ModErn Text Analysis
META Enumerates Textual Applications
hash_storage.h
Go to the documentation of this file.
1 
6 #include <cassert>
7 #include <cmath>
8 #include <functional>
9 #include <iterator>
10 #include <type_traits>
11 #include <utility>
12 #include <vector>
13 
14 #include "meta/config.h"
16 #include "meta/util/optional.h"
17 
18 #ifndef META_HASHING_HASH_STORAGE_H_
19 #define META_HASHING_HASH_STORAGE_H_
20 
21 namespace meta
22 {
23 namespace hashing
24 {
25 
26 template <class T>
27 struct key_traits;
28 
34 template <class K, class V>
35 class kv_pair
36 {
37  public:
38  kv_pair(const K& key, V& value) : key_{key}, value_{value}
39  {
40  // nothing
41  }
42 
43  const K& key() const
44  {
45  return key_.get();
46  }
47 
48  const V& value() const
49  {
50  return value_.get();
51  }
52 
53  V& value()
54  {
55  return value_.get();
56  }
57 
58  template <class K1, class V1>
59  operator std::pair<K1, V1>() const
60  {
61  return {key(), value()};
62  }
63 
64  private:
65  std::reference_wrapper<const K> key_;
66  std::reference_wrapper<V> value_;
67 };
68 
69 template <class Pair>
70 struct kv_traits;
71 
72 template <class K, class V>
73 struct kv_traits<std::pair<K, V>>
74 {
75  static const K& key(const std::pair<K, V>& pr)
76  {
77  return pr.first;
78  }
79 
80  static const V& value(const std::pair<K, V>& pr)
81  {
82  return pr.second;
83  }
84 };
85 
86 template <class K, class V>
87 struct kv_traits<kv_pair<K, V>>
88 {
89  static const K& key(const kv_pair<K, V>& pr)
90  {
91  return pr.key();
92  }
93 
94  static const V& value(const kv_pair<K, V>& pr)
95  {
96  return pr.value();
97  }
98 };
99 
100 template <class Key>
101 const Key& get_key(const Key& key)
102 {
103  return key;
104 }
105 
106 template <class K, class V>
107 const K& get_key(const kv_pair<K, V>& kv)
108 {
109  return kv.key();
110 }
111 
115 template <class Storage>
117 {
118  public:
119  constexpr static bool is_const = std::is_const<Storage>::value;
120 
122 
123  using storage_type = Storage;
124 
125  using difference_type = std::ptrdiff_t;
126  using iterator_category = std::forward_iterator_tag;
127 
128  using reference = typename Storage::reference;
129  using const_reference = typename Storage::const_reference;
130 
131  using value_type = typename std::remove_reference<reference>::type;
132  using const_value_type = typename std::add_const<value_type>::type;
133 
134  using pointer = typename std::add_pointer<value_type>::type;
135  using const_pointer = typename std::add_pointer<const_value_type>::type;
136 
137  key_storage_iterator(Storage& stor) : table_{stor}, idx_{stor.capacity()}
138  {
139  // nothing
140  }
141 
142  key_storage_iterator(Storage& stor, std::size_t idx)
143  : table_{stor}, idx_{idx}
144  {
145  if (idx < table_.get().capacity() && !table_.get().occupied(idx))
146  ++(*this);
147  }
148 
149  template <class Iterator,
150  class U = typename std::
151  enable_if<is_const
152  && std::is_same<
153  typename Iterator::storage_type,
154  typename std::remove_const<Storage>::type>::
155  value>::type>
156  key_storage_iterator(Iterator&& it) : table_{it.table_}, idx_{it.idx_}
157  {
158  // nothing
159  }
160 
164  key_storage_iterator& operator++()
165  {
166  do
167  {
168  ++idx_;
169  } while (idx_ < table_.get().capacity()
170  && !table_.get().occupied(idx_));
171 
172  return *this;
173  }
174 
178  const_reference operator*() const
179  {
180  return (table_.get())[idx_];
181  }
182 
186  const_pointer operator->() const
187  {
188  return &(table_.get())[idx_];
189  }
190 
195  bool operator==(const key_storage_iterator& rhs)
196  {
197  return &(table_.get()) == &(rhs.table_.get()) && idx_ == rhs.idx_;
198  }
199 
204  bool operator!=(const key_storage_iterator& rhs)
205  {
206  return !(*this == rhs);
207  }
208 
209  private:
211  std::reference_wrapper<Storage> table_;
212 
214  std::size_t idx_;
215 };
216 
220 template <class Storage>
222 {
223  public:
224  constexpr static bool is_const = std::is_const<Storage>::value;
225  using difference_type = std::ptrdiff_t;
226  using iterator_category = std::forward_iterator_tag;
227  using value_type =
228  typename std::conditional<is_const, typename Storage::const_value_type,
229  typename Storage::value_type>::type;
230  using const_value_type = typename std::add_const<value_type>::type;
231  using reference = typename std::add_lvalue_reference<value_type>::type;
232  using const_reference =
233  typename std::add_lvalue_reference<const_value_type>::type;
234  using pointer = typename std::add_pointer<value_type>::type;
235  using const_pointer = typename std::add_pointer<const_value_type>::type;
236 
237  key_value_storage_iterator(Storage& stor)
238  : table_{stor}, idx_{stor.capacity()}
239  {
240  // nothing
241  }
242 
243  key_value_storage_iterator(Storage& stor, std::size_t idx)
244  : table_{stor}, idx_{idx}
245  {
246  if (idx < table_.get().capacity())
247  {
248  if (!table_.get().occupied(idx))
249  ++(*this);
250  else
251  pair_ = table_.get()[idx];
252  }
253  }
254 
256  : table_{other.table_}, idx_{other.idx_}, pair_{other.pair_}
257  {
258  // nothing
259  }
260 
262  {
263  table_ = rhs.table_;
264  idx_ = rhs.idx_;
265  pair_ = rhs.pair_;
266  return *this;
267  }
268 
273  {
274  do
275  {
276  ++idx_;
277  } while (idx_ < table_.get().capacity()
278  && !table_.get().occupied(idx_));
279 
280  if (idx_ < table_.get().capacity() && table_.get().occupied(idx_))
281  pair_ = table_.get()[idx_];
282 
283  return *this;
284  }
285 
289  const_reference operator*() const
290  {
291  assert(pair_);
292  return *pair_;
293  }
294 
298  reference operator*()
299  {
300  assert(pair_);
301  return *pair_;
302  }
303 
308  const_pointer operator->() const
309  {
310  return &*pair_;
311  }
312 
317  pointer operator->()
318  {
319  return &*pair_;
320  }
321 
327  {
328  return &(table_.get()) == &(rhs.table_.get()) && idx_ == rhs.idx_;
329  }
330 
336  {
337  return !(*this == rhs);
338  }
339 
340  private:
342  std::reference_wrapper<Storage> table_;
343 
345  std::size_t idx_;
346 
349 };
350 
356 struct hash_idx
357 {
358  std::size_t hc = 0;
359  std::size_t idx = 0;
360 };
361 
372 template <class Storage>
374 
378 template <class Derived>
380 {
381  public:
382  using iterator = typename storage_traits<Derived>::iterator;
383  using const_iterator = typename storage_traits<Derived>::const_iterator;
384  using key_type = typename storage_traits<Derived>::key_type;
385  using stored_type = typename storage_traits<Derived>::stored_type;
386  using probing_strategy = typename storage_traits<Derived>::probing_strategy;
387  using hash_type = typename storage_traits<Derived>::hash_type;
388  using equal_type = typename storage_traits<Derived>::equal_type;
389 
390  constexpr static double default_max_load_factor()
391  {
392  return 0.85;
393  }
394 
395  constexpr static double default_resize_ratio()
396  {
397  return 1.5;
398  }
399 
400  iterator begin()
401  {
402  return {as_derived(), 0};
403  }
404 
405  iterator end()
406  {
407  return {as_derived()};
408  }
409 
410  const_iterator begin() const
411  {
412  return {as_derived(), 0};
413  }
414 
415  const_iterator end() const
416  {
417  return {as_derived()};
418  }
419 
423  double max_load_factor() const
424  {
425  return max_load_factor_;
426  }
427 
432  void max_load_factor(double mlf)
433  {
434  max_load_factor_ = mlf;
435  }
436 
440  double resize_ratio() const
441  {
442  return resize_ratio_;
443  }
444 
449  void resize_ratio(double rratio)
450  {
451  resize_ratio_ = rratio;
452  }
453 
461  std::size_t get_idx(const key_type& key,
462  typename hash_type::result_type hc) const
463  {
464  probing_strategy strategy(hc, as_derived().capacity());
465  auto idx = strategy.probe();
466  while (as_derived().occupied(idx) && !as_derived().equal(idx, hc, key))
467  {
468  idx = strategy.probe();
469  }
470  return idx;
471  }
472 
477  template <class... Args>
478  iterator emplace(Args&&... args)
479  {
480  if (next_load_factor() >= max_load_factor())
481  as_derived().resize(next_size());
482 
483  stored_type stored(std::forward<Args>(args)...);
484  const auto& key = storage_traits<Derived>::get_key(stored);
485  auto hc = hash_(key);
486  auto idx = get_idx(key, hc);
487  as_derived().put(idx, hc, std::move(stored));
488 
489  return {as_derived(), idx};
490  }
491 
497  const_iterator find(const key_type& key) const
498  {
499  auto hc = hash_(key);
500  auto idx = get_idx(key, hc);
501 
502  if (!as_derived().occupied(idx))
503  return end();
504 
505  return {as_derived(), idx};
506  }
507 
513  iterator find(const key_type& key)
514  {
515  auto hc = hash_(key);
516  auto idx = get_idx(key, hc);
517 
518  if (!as_derived().occupied(idx))
519  return end();
520 
521  return {as_derived(), idx};
522  }
523 
527  bool empty() const
528  {
529  return as_derived().size() == 0;
530  }
531 
532  double next_load_factor() const
533  {
534  return (as_derived().size() + 1)
535  / static_cast<double>(as_derived().capacity());
536  }
537 
538  std::size_t next_size() const
539  {
540  return static_cast<std::size_t>(
541  std::ceil(as_derived().capacity() * resize_ratio()));
542  }
543 
544  bool key_equal(const key_type& k1, const key_type& k2) const
545  {
546  return equal_(k1, k2);
547  }
548 
549  std::size_t hash(const key_type& key) const
550  {
551  return hash_(key);
552  }
553 
554  private:
555  Derived& as_derived()
556  {
557  return static_cast<Derived&>(*this);
558  }
559 
560  const Derived& as_derived() const
561  {
562  return static_cast<const Derived&>(*this);
563  }
564 
565  hash_type hash_;
566  equal_type equal_;
567  double max_load_factor_ = default_max_load_factor();
568  double resize_ratio_ = default_resize_ratio();
569 };
570 
574 template <class T, class ProbingStrategy, class Hash, class KeyEqual>
576  : public storage_base<external_key_storage<T, ProbingStrategy, Hash,
577  KeyEqual>>
578 {
579  public:
580  using reference = T&;
581  using const_reference = const T&;
582 
583  using idx_vector_type = util::aligned_vector<hash_idx>;
584  using key_vector_type = util::aligned_vector<T>;
585 
586  external_key_storage(std::size_t capacity) : table_(capacity)
587  {
588  // nothing
589  }
590 
591  bool occupied(std::size_t idx) const
592  {
593  return table_[idx].idx != 0;
594  }
595 
596  bool equal(std::size_t idx, std::size_t hc, const_reference key) const
597  {
598  return table_[idx].hc == hc && this->key_equal((*this)[idx], key);
599  }
600 
601  const_reference operator[](std::size_t idx) const
602  {
603  return keys_[table_[idx].idx - 1];
604  }
605 
606  reference operator[](std::size_t idx)
607  {
608  return keys_[table_[idx].idx - 1];
609  }
610 
611  template <class... Args>
612  void put(std::size_t idx, std::size_t hc, Args&&... args)
613  {
614  if (occupied(idx))
615  {
616  keys_[table_[idx].idx - 1] = T(std::forward<Args>(args)...);
617  }
618  else
619  {
620  table_[idx].idx = keys_.size() + 1;
621  keys_.emplace_back(std::forward<Args>(args)...);
622  }
623  table_[idx].hc = hc;
624  }
625 
626  std::size_t size() const
627  {
628  return keys_.size();
629  }
630 
631  std::size_t capacity() const
632  {
633  return table_.size();
634  }
635 
636  void clear()
637  {
638  key_vector_type{}.swap(keys_);
639  std::fill(std::begin(table_), std::end(table_), hash_idx{});
640  }
641 
642  void resize(std::size_t new_cap)
643  {
644  assert(new_cap > capacity());
645 
646  table_.resize(new_cap);
647  std::fill(std::begin(table_), std::end(table_), hash_idx{});
648 
649  for (std::size_t i = 0; i < keys_.size(); ++i)
650  {
651  auto hc = this->hash(keys_[i]);
652  auto nidx = this->get_idx(keys_[i], hc);
653  table_[nidx].hc = hc;
654  table_[nidx].idx = i + 1;
655  }
656  }
657 
658  std::size_t bytes_used() const
659  {
660  return sizeof(hash_idx) * table_.capacity()
661  + sizeof(T) * keys_.capacity();
662  }
663 
664  key_vector_type extract_keys()
665  {
666  auto res = std::move(keys_);
667  clear();
668  return res;
669  }
670 
671  idx_vector_type table_;
672  key_vector_type keys_;
673 };
674 
679 template <class T, class ProbingStrategy, class Hash, class KeyEqual>
680 struct storage_traits<external_key_storage<T, ProbingStrategy, Hash, KeyEqual>>
681 {
685  using stored_type = T;
686  using key_type = T;
687  using probing_strategy = ProbingStrategy;
688  using hash_type = Hash;
689  using equal_type = KeyEqual;
690 
691  static const T& get_key(const T& key)
692  {
693  return key;
694  }
695 };
696 
701 template <class T, class ProbingStrategy, class Hash, class KeyEqual>
703  : public storage_base<inline_key_storage<T, ProbingStrategy, Hash,
704  KeyEqual>>
705 {
706  public:
707  using reference = T&;
708  using const_reference = const T&;
709  using vector_type = util::aligned_vector<T>;
710 
711  inline_key_storage(std::size_t capacity)
712  : table_(capacity, key_traits<T>::sentinel()), size_{0}
713  {
714  // nothing
715  }
716 
717  bool occupied(std::size_t idx) const
718  {
719  return !this->key_equal(table_[idx], key_traits<T>::sentinel());
720  }
721 
722  bool equal(std::size_t idx, std::size_t /*hc*/, const_reference key) const
723  {
724  return this->key_equal(table_[idx], key);
725  }
726 
727  const_reference operator[](std::size_t idx) const
728  {
729  return table_[idx];
730  }
731 
732  reference operator[](std::size_t idx)
733  {
734  return table_[idx];
735  }
736 
737  template <class... Args>
738  void put(std::size_t idx, std::size_t /*hc*/, Args&&... args)
739  {
740  if (!occupied(idx))
741  ++size_;
742  table_[idx] = T(std::forward<Args>(args)...);
743  }
744 
745  std::size_t size() const
746  {
747  return size_;
748  }
749 
750  std::size_t capacity() const
751  {
752  return table_.size();
753  }
754 
755  void clear()
756  {
757  std::fill(std::begin(table_), std::end(table_),
759  size_ = 0;
760  }
761 
762  void resize(std::size_t new_cap)
763  {
764  assert(new_cap > capacity());
765 
766  vector_type temptable(new_cap, key_traits<T>::sentinel());
767  using std::swap;
768  swap(table_, temptable);
769 
770  for (std::size_t idx = 0; idx < temptable.size(); ++idx)
771  {
772  if (!this->key_equal(temptable[idx], key_traits<T>::sentinel()))
773  {
774  auto hc = this->hash(temptable[idx]);
775  table_[this->get_idx(temptable[idx], hc)]
776  = std::move(temptable[idx]);
777  }
778  }
779  }
780 
781  std::size_t bytes_used() const
782  {
783  return sizeof(T) * table_.capacity() + sizeof(std::size_t);
784  }
785 
786  std::vector<T> extract_keys()
787  {
788  std::vector<T> res;
789  res.reserve(size_);
790  for (std::size_t idx = 0; idx < table_.size(); ++idx)
791  {
792  if (occupied(idx))
793  res.emplace_back(std::move(table_[idx]));
794  }
795  clear();
796  return res;
797  }
798 
799  vector_type table_;
800  std::size_t size_;
801 };
802 
807 template <class T, class ProbingStrategy, class Hash, class KeyEqual>
808 struct storage_traits<inline_key_storage<T, ProbingStrategy, Hash, KeyEqual>>
809 {
813  using stored_type = T;
814  using key_type = T;
815  using probing_strategy = ProbingStrategy;
816  using hash_type = Hash;
817  using equal_type = KeyEqual;
818 
819  static const T& get_key(const T& key)
820  {
821  return key;
822  }
823 };
824 
829 template <class K, class V, class ProbingStrategy, class Hash, class KeyEqual>
831  : public storage_base<inline_key_value_storage<K, V, ProbingStrategy, Hash,
832  KeyEqual>>
833 {
834  public:
835  using value_type = kv_pair<K, V>;
837  using vector_type = util::aligned_vector<std::pair<K, V>>;
838 
839  inline_key_value_storage(std::size_t capacity)
840  : table_(capacity, std::make_pair(key_traits<K>::sentinel(),
842  size_{0}
843  {
844  // nothing
845  }
846 
847  bool occupied(std::size_t idx) const
848  {
849  return !this->key_equal(table_[idx].first, key_traits<K>::sentinel());
850  }
851 
852  bool equal(std::size_t idx, std::size_t /*hc*/, const K& key) const
853  {
854  return this->key_equal(table_[idx].first, key);
855  }
856 
857  const_value_type operator[](std::size_t idx) const
858  {
859  const auto& pr = table_[idx];
860  return {pr.first, pr.second};
861  }
862 
863  value_type operator[](std::size_t idx)
864  {
865  auto& pr = table_[idx];
866  return {pr.first, pr.second};
867  }
868 
869  template <class... Args>
870  void put(std::size_t idx, std::size_t /*hc*/, Args&&... args)
871  {
872  if (!occupied(idx))
873  ++size_;
874  table_[idx] = std::pair<K, V>(std::forward<Args>(args)...);
875  }
876 
877  std::size_t size() const
878  {
879  return size_;
880  }
881 
882  std::size_t capacity() const
883  {
884  return table_.size();
885  }
886 
887  void clear()
888  {
889  std::fill(std::begin(table_), std::end(table_),
890  std::make_pair(key_traits<K>::sentinel(),
892  size_ = 0;
893  }
894 
895  void resize(std::size_t new_cap)
896  {
897  assert(new_cap > capacity());
898 
899  vector_type temptable(new_cap,
900  std::make_pair(key_traits<K>::sentinel(),
902  using std::swap;
903  swap(table_, temptable);
904 
905  for (std::size_t i = 0; i < temptable.size(); ++i)
906  {
907  if (!this->key_equal(temptable[i].first, key_traits<K>::sentinel()))
908  {
909  auto hc = this->hash(temptable[i].first);
910  table_[this->get_idx(temptable[i].first, hc)]
911  = std::move(temptable[i]);
912  }
913  }
914  }
915 
916  std::size_t bytes_used() const
917  {
918  return sizeof(std::pair<K, V>) * table_.capacity()
919  + sizeof(std::size_t);
920  }
921 
922  vector_type extract() &&
923  {
924  // get rid of all blank cells
925  table_.erase(std::remove_if(table_.begin(), table_.end(),
926  [this](const std::pair<K, V>& pr) {
927  return this->key_equal(
928  pr.first,
930  }),
931  table_.end());
932  return std::move(table_);
933  }
934 
935  vector_type table_;
936  std::size_t size_;
937 };
938 
943 template <class K, class V, class ProbingStrategy, class Hash, class KeyEqual>
944 struct storage_traits<inline_key_value_storage<K, V, ProbingStrategy, Hash,
945  KeyEqual>>
946 {
947  using type
951  using stored_type = std::pair<K, V>;
952  using key_type = K;
953  using probing_strategy = ProbingStrategy;
954  using hash_type = Hash;
955  using equal_type = KeyEqual;
956 
957  static const key_type& get_key(const stored_type& st)
958  {
959  return st.first;
960  }
961 };
962 
967 template <class K, class V, class ProbingStrategy, class Hash, class KeyEqual>
969  : public storage_base<inline_key_external_value_storage<K, V,
970  ProbingStrategy,
971  Hash, KeyEqual>>
972 {
973  public:
974  using value_type = kv_pair<K, V>;
976 
977  using key_vector_type = util::aligned_vector<std::pair<K, std::size_t>>;
978  using value_vector_type = util::aligned_vector<V>;
979 
980  inline_key_external_value_storage(std::size_t capacity)
981  : table_(capacity,
982  std::make_pair(key_traits<K>::sentinel(), std::size_t{0}))
983  {
984  // nothing
985  }
986 
987  bool occupied(std::size_t idx) const
988  {
989  return !this->key_equal(table_[idx].first, key_traits<K>::sentinel());
990  }
991 
992  bool equal(std::size_t idx, std::size_t /*hc*/, const K& key) const
993  {
994  return this->key_equal(table_[idx].first, key);
995  }
996 
997  const_value_type operator[](std::size_t idx) const
998  {
999  const auto& pr = table_[idx];
1000  return {pr.first, values_[pr.second]};
1001  }
1002 
1003  value_type operator[](std::size_t idx)
1004  {
1005  auto& pr = table_[idx];
1006  return {pr.first, values_[pr.second]};
1007  }
1008 
1009  template <class... Args>
1010  void put(std::size_t idx, std::size_t /*hc*/, Args&&... args)
1011  {
1012  auto pr = std::pair<K, V>(std::forward<Args>(args)...);
1013  if (occupied(idx))
1014  {
1015  values_[table_[idx].second] = std::move(pr.second);
1016  }
1017  else
1018  {
1019  table_[idx].second = values_.size();
1020  values_.emplace_back(std::move(pr.second));
1021  }
1022  table_[idx].first = std::move(pr.first);
1023  }
1024 
1025  std::size_t size() const
1026  {
1027  return values_.size();
1028  }
1029 
1030  std::size_t capacity() const
1031  {
1032  return table_.size();
1033  }
1034 
1035  void clear()
1036  {
1037  std::fill(std::begin(table_), std::end(table_),
1038  std::make_pair(key_traits<K>::sentinel(), std::size_t{0}));
1039  std::vector<V>{}.swap(values_);
1040  }
1041 
1042  void resize(std::size_t new_cap)
1043  {
1044  assert(new_cap > capacity());
1045 
1046  key_vector_type temptable(new_cap,
1047  std::make_pair(key_traits<K>::sentinel(), 0));
1048  using std::swap;
1049  swap(table_, temptable);
1050 
1051  for (std::size_t i = 0; i < temptable.size(); ++i)
1052  {
1053  if (!this->key_equal(temptable[i].first, key_traits<K>::sentinel()))
1054  {
1055  auto hc = this->hash(temptable[i].first);
1056  table_[this->get_idx(temptable[i].first, hc)]
1057  = std::move(temptable[i]);
1058  }
1059  }
1060  }
1061 
1062  std::size_t bytes_used() const
1063  {
1064  return sizeof(std::pair<K, std::size_t>) * table_.capacity()
1065  + sizeof(V) * values_.capacity();
1066  }
1067 
1068  std::vector<std::pair<K, V>> extract() &&
1069  {
1070  std::vector<std::pair<K, V>> ret;
1071  ret.reserve(values_.size());
1072 
1073  for (auto& key_pr : table_)
1074  {
1075  if (!this->key_equal(key_pr.first, key_traits<K>::sentinel()))
1076  {
1077  ret.emplace_back(std::move(key_pr.first),
1078  std::move(values_[key_pr.second]));
1079  }
1080  }
1081 
1082  key_vector_type{}.swap(table_);
1083  value_vector_type{}.swap(values_);
1084 
1085  return ret;
1086  }
1087 
1088  key_vector_type table_;
1089  value_vector_type values_;
1090 };
1091 
1096 template <class K, class V, class ProbingStrategy, class Hash, class KeyEqual>
1098  Hash, KeyEqual>>
1099 {
1100  using type = inline_key_external_value_storage<K, V, ProbingStrategy, Hash,
1101  KeyEqual>;
1104  using stored_type = std::pair<K, V>;
1105  using key_type = K;
1106  using probing_strategy = ProbingStrategy;
1107  using hash_type = Hash;
1108  using equal_type = KeyEqual;
1109 
1110  static const key_type& get_key(const stored_type& st)
1111  {
1112  return st.first;
1113  }
1114 };
1115 
1120 template <class K, class V, class ProbingStrategy, class Hash, class KeyEqual>
1122  : public storage_base<external_key_value_storage<K, V, ProbingStrategy,
1123  Hash, KeyEqual>>
1124 {
1125  public:
1126  using value_type = kv_pair<K, V>;
1128 
1129  using idx_vector_type = util::aligned_vector<hash_idx>;
1130  using kv_vector_type = util::aligned_vector<std::pair<K, V>>;
1131 
1132  external_key_value_storage(std::size_t capacity) : table_(capacity)
1133  {
1134  // nothing
1135  }
1136 
1137  bool occupied(std::size_t idx) const
1138  {
1139  return table_[idx].idx != 0;
1140  }
1141 
1142  bool equal(std::size_t idx, std::size_t hc, const K& key) const
1143  {
1144  return table_[idx].hc == hc && this->key_equal((*this)[idx].key(), key);
1145  }
1146 
1147  const_value_type operator[](std::size_t idx) const
1148  {
1149  const auto& pr = storage_[table_[idx].idx - 1];
1150  return {pr.first, pr.second};
1151  }
1152 
1153  value_type operator[](std::size_t idx)
1154  {
1155  auto& pr = storage_[table_[idx].idx - 1];
1156  return {pr.first, pr.second};
1157  }
1158 
1159  template <class... Args>
1160  void put(std::size_t idx, std::size_t hc, Args&&... args)
1161  {
1162  if (occupied(idx))
1163  {
1164  storage_[table_[idx].idx - 1]
1165  = std::pair<K, V>(std::forward<Args>(args)...);
1166  }
1167  else
1168  {
1169  table_[idx].idx = storage_.size() + 1;
1170  storage_.emplace_back(std::forward<Args>(args)...);
1171  }
1172  table_[idx].hc = hc;
1173  }
1174 
1175  std::size_t size() const
1176  {
1177  return storage_.size();
1178  }
1179 
1180  std::size_t capacity() const
1181  {
1182  return table_.size();
1183  }
1184 
1185  void clear()
1186  {
1187  kv_vector_type{}.swap(storage_);
1188  std::fill(std::begin(table_), std::end(table_), hash_idx{});
1189  }
1190 
1191  void resize(std::size_t new_cap)
1192  {
1193  assert(new_cap > capacity());
1194 
1195  table_.resize(new_cap);
1196  std::fill(std::begin(table_), std::end(table_), hash_idx{});
1197 
1198  for (std::size_t i = 0; i < storage_.size(); ++i)
1199  {
1200  auto hc = this->hash(storage_[i].first);
1201  auto nidx = this->get_idx(storage_[i].first, hc);
1202  table_[nidx].hc = hc;
1203  table_[nidx].idx = i + 1;
1204  }
1205  }
1206 
1207  std::size_t bytes_used() const
1208  {
1209  return sizeof(hash_idx) * table_.capacity()
1210  + sizeof(std::pair<K, V>) * storage_.capacity();
1211  }
1212 
1213  kv_vector_type extract() &&
1214  {
1215  idx_vector_type{}.swap(table_);
1216  kv_vector_type ret;
1217  storage_.swap(ret);
1218  return ret;
1219  }
1220 
1221  idx_vector_type table_;
1222  kv_vector_type storage_;
1223 };
1224 
1229 template <class K, class V, class ProbingStrategy, class Hash, class KeyEqual>
1230 struct storage_traits<external_key_value_storage<K, V, ProbingStrategy, Hash,
1231  KeyEqual>>
1232 {
1233  using type
1237  using stored_type = std::pair<K, V>;
1238  using key_type = K;
1239  using probing_strategy = ProbingStrategy;
1240  using hash_type = Hash;
1241  using equal_type = KeyEqual;
1242 
1243  static const key_type& get_key(const stored_type& st)
1244  {
1245  return st.first;
1246  }
1247 };
1248 }
1249 }
1250 #endif
void max_load_factor(double mlf)
Sets the maximum allowed load factor for this table.
Definition: hash_storage.h:432
Storage class for hash tables with key, value pairs that should be stored externally from the probing...
Definition: hash_storage.h:1121
std::string remove_if(const std::string &str, Predicate &&pred)
Removes UTF-32 codepoints that match the given function.
Definition: utf.h:121
Storage class for hash tables with keys that can be inlined into the probing table, but values that should be stored externally.
Definition: hash_storage.h:968
bool operator!=(const key_storage_iterator &rhs)
Definition: hash_storage.h:204
A generic, randomly seeded hash function.
Definition: hash.h:343
Storage class for hash sets that have non-inlineable keys.
Definition: hash_storage.h:575
STL namespace.
std::size_t idx_
The current index into the table.
Definition: hash_storage.h:345
uint64_t bytes_used(const T &elem, typename std::enable_if< std::is_same< T, std::string >::value >::type *=nullptr)
Gets the bytes used by a std::string.
Definition: postings_buffer.h:33
std::size_t get_idx(const key_type &key, typename hash_type::result_type hc) const
Uses the specified ProbingStrategy to find the specified element (or the next open slot)...
Definition: hash_storage.h:461
double resize_ratio() const
Definition: hash_storage.h:440
const_pointer operator->() const
Definition: hash_storage.h:186
bool empty() const
Definition: hash_storage.h:527
std::size_t idx_
The current index into the table.
Definition: hash_storage.h:214
const_reference operator*() const
Definition: hash_storage.h:289
iterator find(const key_type &key)
Definition: hash_storage.h:513
double max_load_factor() const
Definition: hash_storage.h:423
The ModErn Text Analysis toolkit is a suite of natural language processing, classification, information retrieval, data mining, and other applications of text processing.
Definition: analyzer.h:25
const_iterator find(const key_type &key) const
Definition: hash_storage.h:497
pointer operator->()
Definition: hash_storage.h:317
Used by probing arrays where keys (and possibly values) are stored externally from the probing array ...
Definition: hash_storage.h:356
Storage class for hash sets with keys that can be inlined into the probing table. ...
Definition: hash_storage.h:702
A traits class indicating whether a type can be inlined as a key in a probing-based hash table...
Definition: hash_storage.h:27
void resize_ratio(double rratio)
Definition: hash_storage.h:449
A generic iterator over probing tables that store only keys.
Definition: hash_storage.h:116
std::reference_wrapper< Storage > table_
The probe_set this iterator is iterating over.
Definition: hash_storage.h:211
A traits class used internally for configuring the following types for the storage classes: ...
Definition: hash_storage.h:373
util::optional< value_type > pair_
The buffered pair.
Definition: hash_storage.h:348
bool operator==(const key_storage_iterator &rhs)
Definition: hash_storage.h:195
std::reference_wrapper< Storage > table_
The probe_set this iterator is iterating over.
Definition: hash_storage.h:342
A CRTP base class for generic probing hash table/set storage.
Definition: hash_storage.h:379
iterator emplace(Args &&... args)
Definition: hash_storage.h:478
A generic iterator over probing tables that store keys and values.
Definition: hash_storage.h:221
bool operator!=(const key_value_storage_iterator &rhs)
Definition: hash_storage.h:335
Definition: hash_storage.h:70
key_storage_iterator & operator++()
Definition: hash_storage.h:164
const_reference operator*() const
Definition: hash_storage.h:178
reference operator*()
Definition: hash_storage.h:298
key_value_storage_iterator & operator++()
Definition: hash_storage.h:272
Pair class used by the hash tables.
Definition: hash_storage.h:35
const_pointer operator->() const
Definition: hash_storage.h:308
bool operator==(const key_value_storage_iterator &rhs)
Definition: hash_storage.h:326
Storage class for hash tables with keys and values that can both be inlined into the probing table...
Definition: hash_storage.h:830