Jlm
intrusive-hash.hpp
Go to the documentation of this file.
1 /*
2  * Copyright 2014 Helge Bahmann <hcb@chaoticmind.net>
3  * Copyright 2014 Nico Reißmann <nico.reissmann@gmail.com>
4  * See COPYING for terms of redistribution.
5  */
6 
7 #ifndef JLM_UTIL_INTRUSIVE_HASH_HPP
8 #define JLM_UTIL_INTRUSIVE_HASH_HPP
9 
10 #include <functional>
11 #include <memory>
12 #include <string>
13 #include <utility>
14 #include <vector>
15 
16 /*
17  * Implementation of intrusive hash data structure
18  *
19  * An intrusive hash is a data structure linking multiple objects such that
20  * can be looked up efficiently by a specific key. The intrusive hash is not
21  * a container: It does not "own" the objects referenced. Additionally it
22  * also does not manage memory to represent the linkage: The anchor data
23  * structure for forming the linkage are required to be members of the
24  * objects themselves. Any object can be member of an arbitrary number of
25  * such intrusive hash collections.
26  *
27  * Usage:
28  *
29  * For a class declared as the following:
30  *
31  * class X {
32  * private:
33  * int num;
34  * jlm::util::IntrusiveHashAnchor<X> num_hash_anchor;
35  * public:
36  * typedef jlm::util::IntrusiveHashAccessor<
37  * int, // key type
38  * X, // element type
39  * &X::num, // key member
40  * &X::num_hash_anchor // anchor member
41  * > num_hash_accessor;
42  * };
43  *
44  * an intrusive hash data structure can be declared in the following way:
45  *
46  * typedef jlm::util::IntrusiveHash<
47  * int,
48  * X,
49  * X::num_hash_accessor
50  * > num_hash;
51  *
52  * It is possible to implement a custom accessor instead of using the
53  * template-generated one. In this case the get_key, get_prev, get_next,
54  * set_prev and set_next members must be implemented appropriately.
55  *
56  * An object h of num_hash class then supports STL-style operations
57  * - num_hash::Iterator, num_hash::ConstIterator for iteration
58  * - h.begin(), h.end() and const-qualified variants
59  * - h.find(key) performs lookup and yields an Iterator
60  * - h.insert(element) links an object into the data structure
61  * - h.erase(element) or h.erase(Iterator) unlinks an object from the data
62  * structure
63  * - h.size() and h.empty() testing
64  *
65  * The implementation requires the following guarantees:
66  * - the accessor to key and anchor are noexcept
67  * - the hash function is noexcept
68  * - while linked to the data structure, an objects key may not change
69  *
70  * The implementation provides the following guarantees
71  * - insert is O(1) amortized
72  * - erase, empty, size are O(1) and noexcept
73  * - find is O(1) average and noexcept
74  * - end is O(1)
75  * - begin is O(n)
76  * - inserting a new object does not invalidate iterators, but may
77  * invalidate traversal order
78  * - erase an object does not invalidate iterators except those pointing
79  * to the object removed and does not invalidate traversal order
80  *
81  * An additional template "OwnerIntrusiveHash" implements the same
82  * interface, but in addition assumes "ownership" of the objects it contains.
83  * This means that upon destruction of the container, the elements will
84  * be deleted as well. In particular, it differs in the following ways:
85  *
86  * - insert expects a std::unique_ptr as argument
87  * - erase is only supported on keys and iterators and will cause the
88  * element in question to be deleted
89  * - a new method "unlink" removes an element from the hash and returns
90  * a std::unique_ptr to it
91  */
92 
93 namespace jlm::util
94 {
95 
96 // FIXME: for some weird reason, std::equal_to does not specify noexcept, so
97 // define our own equality comparison operator here
98 template<typename T>
99 struct SafeEqual
100 {
101  inline bool
102  operator()(const T & a, const T & b) const
103  noexcept(noexcept(std::declval<const T &>() == std::declval<const T &>()))
104  {
105  return a == b;
106  }
107 };
108 
109 template<>
110 struct SafeEqual<std::string>
111 {
112  inline bool
113  operator()(const std::string & a, const std::string & b) const noexcept
114  {
115  // C++11 lacks "noexcept" specification for
116  // std::string::operator==, but implementations lacking
117  // this would be a weird breed. So declare it noexcept
118  // by "fiat".
119  return a == b;
120  }
121 };
122 
123 template<
124  typename KeyType,
125  typename ElementType,
126  typename Accessor,
127  typename KeyHash = std::hash<KeyType>,
128  typename KeyEqual = SafeEqual<KeyType>>
130 {
131 private:
132  struct BucketType
133  {
134  constexpr inline BucketType() noexcept
135  : first(nullptr),
136  last(nullptr)
137  {}
138 
139  ElementType * first;
140  ElementType * last;
141  };
142 
143 public:
144  static_assert(noexcept(KeyHash()(std::declval<KeyType &>())), "require noexcept hash");
145  static_assert(noexcept(Accessor().get_key(nullptr)), "require noexcept get_key");
146  static_assert(noexcept(Accessor().get_prev(nullptr)), "require noexcept get_prev");
147  static_assert(noexcept(Accessor().get_next(nullptr)), "require noexcept get_next");
148  static_assert(noexcept(Accessor().set_prev(nullptr, nullptr)), "require noexcept set_prev");
149  static_assert(noexcept(Accessor().set_next(nullptr, nullptr)), "require noexcept set_next");
150  static_assert(
151  noexcept(KeyEqual()(std::declval<KeyType &>(), std::declval<KeyType &>())),
152  "require noexcept key equality");
153  class ConstIterator;
154 
155  class Iterator
156  {
157  public:
158  typedef ElementType value_type;
159  typedef ElementType * pointer;
160  typedef ElementType & reference;
161  typedef std::forward_iterator_tag iterator_category;
162  typedef size_t size_type;
163  typedef ssize_t difference_type;
164 
165  constexpr Iterator() noexcept
166  : map_(nullptr),
167  element_(nullptr)
168  {}
169 
170  constexpr Iterator(const IntrusiveHash * map, ElementType * object)
171  : map_(map),
172  element_(object)
173  {}
174 
175  const Iterator &
176  operator++() noexcept
177  {
178  ElementType * next = map_->accessor_.get_next(element_);
179  if (next == nullptr)
180  {
181  size_t hash = map_->hash_(map_->accessor_.get_key(element_));
182  size_t index = (hash & map_->mask_) + 1;
183  while (next == nullptr && index < map_->buckets_.size())
184  {
185  next = map_->buckets_[index].first;
186  ++index;
187  }
188  }
189  element_ = next;
190  return *this;
191  }
192 
193  Iterator
194  operator++(int) noexcept
195  {
196  Iterator i = *this;
197  ++*this;
198  return i;
199  }
200 
201  inline ElementType &
202  operator*() const noexcept
203  {
204  return *element_;
205  }
206 
207  inline ElementType *
208  operator->() const noexcept
209  {
210  return element_;
211  }
212 
213  inline bool
214  operator==(const Iterator & other) const noexcept
215  {
216  return element_ == other.element_;
217  }
218 
219  inline bool
220  operator!=(const Iterator & other) const noexcept
221  {
222  return element_ != other.element_;
223  }
224 
225  inline ElementType *
226  ptr() const noexcept
227  {
228  return element_;
229  }
230 
231  private:
233  ElementType * element_;
234  friend class ConstIterator;
235  };
236 
238  {
239  public:
240  typedef const ElementType value_type;
241  typedef const ElementType * pointer;
242  typedef const ElementType & reference;
243  typedef std::forward_iterator_tag iterator_category;
244  typedef size_t size_type;
245  typedef ssize_t difference_type;
246 
247  constexpr ConstIterator(const ConstIterator & other) noexcept = default;
248 
249  constexpr ConstIterator(const Iterator & other) noexcept
250  : map_(other.map_),
251  element_(other.element_)
252  {}
253 
254  constexpr ConstIterator() noexcept
255  : map_(nullptr),
256  element_(nullptr)
257  {}
258 
259  constexpr ConstIterator(const IntrusiveHash * map, const ElementType * object)
260  : map_(map),
261  element_(object)
262  {}
263 
264  inline const ConstIterator &
265  operator++() noexcept
266  {
267  ElementType * next = map_->accessor_.get_next(element_);
268  if (next == nullptr)
269  {
270  size_t hash = map_->hash_(map_->accessor_.get_key(element_));
271  size_t index = (hash & map_->mask_) + 1;
272  while (next == nullptr && index < map_->buckets_.size())
273  {
274  next = map_->buckets_[index].first;
275  ++index;
276  }
277  }
278  element_ = next;
279  return *this;
280  }
281 
282  inline ConstIterator
283  operator++(int) noexcept
284  {
285  ConstIterator i = *this;
286  ++*this;
287  return i;
288  }
289 
290  inline const ElementType &
291  operator*() const noexcept
292  {
293  return *element_;
294  }
295 
296  inline const ElementType *
297  operator->() const noexcept
298  {
299  return element_;
300  }
301 
302  inline bool
303  operator==(const ConstIterator & other) const noexcept
304  {
305  return element_ == other.element_;
306  }
307 
308  inline bool
309  operator!=(const ConstIterator & other) const noexcept
310  {
311  return element_ != other.element_;
312  }
313 
314  inline const ElementType *
315  ptr() const noexcept
316  {
317  return element_;
318  }
319 
320  private:
322  const ElementType * element_;
323  };
324 
325  typedef ElementType value_type;
326  typedef ElementType mapped_type;
327  typedef KeyType key_type;
328  typedef size_t size_type;
329 
330  constexpr IntrusiveHash() noexcept
331  : size_(0),
332  mask_(0)
333  {}
334 
335  IntrusiveHash(const IntrusiveHash & other) = delete;
336 
337  void
338  operator=(const IntrusiveHash & other) = delete;
339 
340  IntrusiveHash(IntrusiveHash && other) noexcept
341  : IntrusiveHash()
342  {
343  swap(other);
344  }
345 
346  void
347  swap(IntrusiveHash & other) noexcept
348  {
349  buckets_.swap(other.buckets_);
350  std::swap(size_, other.size_);
351  std::swap(mask_, other.mask_);
352  }
353 
354  void
355  clear() noexcept
356  {
357  for (BucketType & bucket : buckets_)
358  {
359  bucket.first = nullptr;
360  bucket.last = nullptr;
361  size_ = 0;
362  }
363  }
364 
365  Iterator
366  insert(ElementType * element)
367  {
368  ++size_;
369  if (size_ > buckets_.size())
370  {
371  rehash();
372  }
374 
375  return Iterator(this, element);
376  }
377 
378  inline void
379  erase(ElementType * element) noexcept
380  {
381  size_t index = hash_(accessor_.get_key(element)) & mask_;
382  BucketType & b = buckets_[index];
383  ElementType * prev = accessor_.get_prev(element);
384  ElementType * next = accessor_.get_next(element);
385  if (prev)
386  {
387  accessor_.set_next(prev, next);
388  }
389  else
390  {
391  b.first = next;
392  }
393  if (next)
394  {
395  accessor_.set_prev(next, prev);
396  }
397  else
398  {
399  b.last = prev;
400  }
401  --size_;
402  }
403 
404  inline void
405  erase(Iterator i) noexcept
406  {
407  erase(i.ptr());
408  }
409 
410  inline void
411  erase(const KeyType & key) noexcept
412  {
413  Iterator i = find(key);
414  if (i != end())
415  {
416  erase(i);
417  }
418  }
419 
420  inline void
422  {
423  while (begin != end)
424  {
425  ElementType * element = begin.ptr();
426  ++begin;
427  erase(element);
428  }
429  }
430 
431  inline size_type
432  size() const noexcept
433  {
434  return size_;
435  }
436 
437  inline bool
438  empty() const noexcept
439  {
440  return size() == 0;
441  }
442 
443  Iterator
444  begin() noexcept
445  {
446  return Iterator(this, first_object());
447  }
448 
449  Iterator
450  end() noexcept
451  {
452  return Iterator(this, nullptr);
453  }
454 
455  ConstIterator
456  cbegin() const noexcept
457  {
458  return const_iterator(this, first_object());
459  }
460 
461  ConstIterator
462  cend() const noexcept
463  {
464  return const_iterator(this, nullptr);
465  }
466 
467  ConstIterator
468  begin() const noexcept
469  {
470  return cbegin();
471  }
472 
473  ConstIterator
474  end() const noexcept
475  {
476  return cend();
477  }
478 
479  Iterator
480  find(const KeyType & key) noexcept
481  {
482  return Iterator(this, lookup(key));
483  }
484 
485  inline ConstIterator
486  find(const KeyType & key) const noexcept
487  {
488  return const_iterator(this, lookup(key));
489  }
490 
491 private:
492  ElementType *
493  first_object() const noexcept
494  {
495  for (const BucketType & bucket : buckets_)
496  {
497  if (bucket.first)
498  {
499  return bucket.first;
500  }
501  }
502  return nullptr;
503  }
504 
505  size_t size_;
506  size_t mask_;
507  std::vector<BucketType> buckets_;
508 
509  inline void
511  std::vector<BucketType> & bucket_types,
512  size_t mask,
513  ElementType * element) noexcept
514  {
515  size_t index = hash_(accessor_.get_key(element)) & mask;
516  accessor_.set_prev(element, bucket_types[index].last);
517  accessor_.set_next(element, nullptr);
518  if (bucket_types[index].last)
519  {
520  accessor_.set_next(bucket_types[index].last, element);
521  }
522  else
523  {
524  bucket_types[index].first = element;
525  }
526  bucket_types[index].last = element;
527  }
528 
529  void
531  {
532  std::vector<BucketType> new_buckets(
533  std::max(typename decltype(buckets_)::size_type(1), buckets_.size() * 2),
534  BucketType());
535  size_t new_mask = new_buckets.size() - 1;
536 
537  for (BucketType & old_bucket_type : buckets_)
538  {
539  ElementType * element = old_bucket_type.first;
540  while (element)
541  {
542  ElementType * next = accessor_.get_next(element);
543  private_insert_into(new_buckets, new_mask, element);
544  element = next;
545  }
546  }
547 
548  buckets_.swap(new_buckets);
549  mask_ = new_mask;
550  }
551 
552  inline ElementType *
553  lookup(const KeyType & key) const noexcept
554  {
555  if (empty())
556  {
557  return nullptr;
558  }
559 
560  size_t index = hash_(key) & mask_;
561  ElementType * element = buckets_[index].first;
562  while (element && !equal_(key, accessor_.get_key(element)))
563  {
564  element = accessor_.get_next(element);
565  }
566 
567  return element;
568  }
569 
570  Accessor accessor_;
571  KeyHash hash_;
572  KeyEqual equal_;
573 };
574 
575 template<typename ElementType>
577 {
578 public:
579  ElementType * prev;
580  ElementType * next;
581 };
582 
583 template<
584  typename KeyType,
585  typename ElementType,
586  KeyType ElementType::*key_member,
587  IntrusiveHashAnchor<ElementType> ElementType::*anchor_member>
589 {
590 public:
591  inline KeyType
592  get_key(const ElementType * element) const noexcept
593  {
594  return element->*key_member;
595  }
596 
597  inline ElementType *
598  get_prev(const ElementType * element) const noexcept
599  {
600  return (element->*anchor_member).prev;
601  }
602 
603  inline void
604  set_prev(ElementType * element, ElementType * prev) const noexcept
605  {
606  (element->*anchor_member).prev = prev;
607  }
608 
609  inline ElementType *
610  get_next(const ElementType * element) const noexcept
611  {
612  return (element->*anchor_member).next;
613  }
614 
615  inline void
616  set_next(ElementType * element, ElementType * next) const noexcept
617  {
618  (element->*anchor_member).next = next;
619  }
620 };
621 
622 template<
623  typename KeyType,
624  typename ElementType,
625  typename Accessor,
626  typename KeyHash = std::hash<KeyType>,
627  typename KeyEqual = SafeEqual<KeyType>>
629 {
631 
632 public:
633  static_assert(
634  noexcept(std::declval<ElementType &>().~ElementType()),
635  "Require noexcept destructor for ElementType");
636  typedef typename internal_hash_type::const_iterator const_iterator;
637  typedef typename internal_hash_type::iterator iterator;
642 
644  {
645  clear();
646  }
647 
648  inline constexpr OwnerIntrusiveHash() noexcept
649  {}
650 
651  OwnerIntrusiveHash(const OwnerIntrusiveHash & other) = delete;
652 
653  void
654  operator=(const OwnerIntrusiveHash & other) = delete;
655 
657  : internal_hash_(std::move(other.internal_hash_))
658  {}
659 
660  void
661  swap(OwnerIntrusiveHash & other) noexcept
662  {
663  internal_hash_.swap(other.internal_hash_);
664  }
665 
666  void
667  clear() noexcept
668  {
669  iterator i = begin();
670  while (i != end())
671  {
672  iterator j = i;
673  ++i;
674  erase(j);
675  }
676  }
677 
678  inline iterator
679  insert(std::unique_ptr<ElementType> element)
680  {
681  iterator i = internal_hash_.insert(element.get());
682  element.release();
683  return i;
684  }
685 
686  inline void
687  erase(ElementType * element) noexcept
688  {
689  internal_hash_.erase(element);
690  delete element;
691  }
692 
693  inline void
694  erase(iterator i) noexcept
695  {
696  erase(i.ptr());
697  }
698 
699  inline void
700  erase(const KeyType & key) noexcept
701  {
702  iterator i = find(key);
703  if (i != end())
704  {
705  erase(i);
706  }
707  }
708 
709  inline std::unique_ptr<ElementType>
710  unlink(iterator i) noexcept
711  {
712  std::unique_ptr<ElementType> e = i.ptr();
714  return e;
715  }
716 
717  inline void
719  {
721  }
722 
723  inline size_type
724  size() const noexcept
725  {
726  return internal_hash_.size();
727  }
728 
729  inline bool
730  empty() const noexcept
731  {
732  return internal_hash_.empty();
733  }
734 
735  iterator
736  begin() noexcept
737  {
738  return internal_hash_.begin();
739  }
740 
741  iterator
742  end() noexcept
743  {
744  return internal_hash_.end();
745  }
746 
748  cbegin() const noexcept
749  {
750  return internal_hash_.cbegin();
751  }
752 
754  cend() const noexcept
755  {
756  return internal_hash_.cend();
757  }
758 
760  begin() const noexcept
761  {
762  return internal_hash_.begin();
763  }
764 
766  end() const noexcept
767  {
768  return internal_hash_.end();
769  }
770 
771  inline iterator
772  find(const KeyType & key) noexcept
773  {
774  return internal_hash_.find(key);
775  }
776 
777  inline const_iterator
778  find(const KeyType & key) const noexcept
779  {
780  return internal_hash_.find(key);
781  }
782 
783 private:
785 };
786 
787 }
788 
789 #endif
void set_next(ElementType *element, ElementType *next) const noexcept
KeyType get_key(const ElementType *element) const noexcept
ElementType * get_next(const ElementType *element) const noexcept
void set_prev(ElementType *element, ElementType *prev) const noexcept
ElementType * get_prev(const ElementType *element) const noexcept
const ConstIterator & operator++() noexcept
const ElementType * operator->() const noexcept
std::forward_iterator_tag iterator_category
const ElementType & operator*() const noexcept
bool operator==(const ConstIterator &other) const noexcept
ConstIterator operator++(int) noexcept
constexpr ConstIterator(const IntrusiveHash *map, const ElementType *object)
bool operator!=(const ConstIterator &other) const noexcept
constexpr ConstIterator(const ConstIterator &other) noexcept=default
constexpr ConstIterator(const Iterator &other) noexcept
const ElementType * ptr() const noexcept
ElementType & operator*() const noexcept
ElementType * operator->() const noexcept
ElementType * ptr() const noexcept
const Iterator & operator++() noexcept
std::forward_iterator_tag iterator_category
bool operator==(const Iterator &other) const noexcept
bool operator!=(const Iterator &other) const noexcept
constexpr Iterator(const IntrusiveHash *map, ElementType *object)
ConstIterator cend() const noexcept
Iterator end() noexcept
Iterator begin() noexcept
ConstIterator cbegin() const noexcept
ConstIterator find(const KeyType &key) const noexcept
size_type size() const noexcept
ElementType * first_object() const noexcept
void private_insert_into(std::vector< BucketType > &bucket_types, size_t mask, ElementType *element) noexcept
std::vector< BucketType > buckets_
void swap(IntrusiveHash &other) noexcept
ConstIterator end() const noexcept
IntrusiveHash(IntrusiveHash &&other) noexcept
ConstIterator begin() const noexcept
Iterator insert(ElementType *element)
void erase(ElementType *element) noexcept
void erase(const KeyType &key) noexcept
Iterator find(const KeyType &key) noexcept
void erase(Iterator i) noexcept
void operator=(const IntrusiveHash &other)=delete
bool empty() const noexcept
constexpr IntrusiveHash() noexcept
void erase(Iterator begin, Iterator end) noexcept
ElementType * lookup(const KeyType &key) const noexcept
IntrusiveHash(const IntrusiveHash &other)=delete
iterator find(const KeyType &key) noexcept
bool empty() const noexcept
void operator=(const OwnerIntrusiveHash &other)=delete
internal_hash_type::value_type value_type
OwnerIntrusiveHash(const OwnerIntrusiveHash &other)=delete
void erase(ElementType *element) noexcept
IntrusiveHash< KeyType, ElementType, Accessor, KeyHash, KeyEqual > internal_hash_type
const_iterator cbegin() const noexcept
const_iterator cend() const noexcept
const_iterator find(const KeyType &key) const noexcept
void erase(iterator i) noexcept
const_iterator end() const noexcept
internal_hash_type::size_type size_type
internal_hash_type::key_type key_type
internal_hash_type::iterator iterator
internal_hash_type::mapped_type mapped_type
void swap(OwnerIntrusiveHash &other) noexcept
OwnerIntrusiveHash(OwnerIntrusiveHash &&other) noexcept
void erase(const KeyType &key) noexcept
const_iterator begin() const noexcept
size_type size() const noexcept
std::unique_ptr< ElementType > unlink(iterator i) noexcept
internal_hash_type internal_hash_
internal_hash_type::const_iterator const_iterator
void erase(iterator begin, iterator end) noexcept
iterator insert(std::unique_ptr< ElementType > element)
constexpr OwnerIntrusiveHash() noexcept
bool operator()(const std::string &a, const std::string &b) const noexcept
bool operator()(const T &a, const T &b) const noexcept(noexcept(std::declval< const T & >()==std::declval< const T & >()))