ModErn Text Analysis
META Enumerates Textual Applications
postings_data.h
Go to the documentation of this file.
1 
10 #ifndef META_POSTINGS_DATA_
11 #define META_POSTINGS_DATA_
12 
13 #include <fstream>
14 #include <limits>
15 #include <utility>
16 #include <vector>
17 
18 #include "meta/config.h"
19 #include "meta/meta.h"
21 
22 namespace meta
23 {
24 namespace index
25 {
26 
35 template <class PrimaryKey, class SecondaryKey, class FeatureValue = uint64_t>
36 class postings_data
37 {
38  public:
39  using primary_key_type = PrimaryKey;
40  using secondary_key_type = SecondaryKey;
41  using pair_t = std::pair<SecondaryKey, FeatureValue>;
42  using count_t = std::vector<pair_t>;
43 
48  static_assert(
49  (util::is_numeric<PrimaryKey>::value
50  || std::is_same<PrimaryKey, std::string>::value)
51  && (util::is_numeric<SecondaryKey>::value),
52  "primary and secondary keys in postings data must be numeric types");
53 
57  postings_data() = default;
58 
63  postings_data(PrimaryKey p_id);
64 
68  postings_data(const postings_data&) = default;
69 
73  postings_data(postings_data&&) = default;
74 
78  postings_data& operator=(const postings_data&) = default;
79 
84 
89  template <class Container>
90  void merge_with(Container&& cont);
91 
97  void increase_count(SecondaryKey s_id, FeatureValue amount);
98 
104  FeatureValue count(SecondaryKey s_id) const;
105 
110  const count_t& counts() const;
111 
115  void set_counts(const count_t& counts);
116 
120  void set_counts(count_t&& counts);
121 
127  template <class InputIterator>
128  void set_counts(InputIterator begin, InputIterator end);
129 
135  bool operator<(const postings_data& other) const;
136 
143  uint64_t write_packed(std::ostream& out) const;
144 
152  uint64_t write_packed_counts(std::ostream& out) const;
153 
160  template <class InputStream>
161  uint64_t read_packed(InputStream& in);
162 
166  const PrimaryKey& primary_key() const;
167 
171  void set_primary_key(PrimaryKey new_key);
172 
176  uint64_t inverse_frequency() const;
177 
181  uint64_t bytes_used() const;
182 
183  private:
185  PrimaryKey p_id_;
186 
189 };
190 
197 template <class PrimaryKey, class SecondaryKey, class FeatureValue>
198 bool operator==(
201 }
202 }
203 
204 namespace std
205 {
210 template <class PrimaryKey, class SecondaryKey, class FeatureValue>
211 struct hash<meta::index::postings_data<PrimaryKey, SecondaryKey, FeatureValue>>
212 {
213  using pdata_t
219  size_t operator()(const pdata_t& pd) const
220  {
221  return std::hash<PrimaryKey>{}(pd.primary_key());
222  }
223 };
224 }
225 
227 #endif
PrimaryKey p_id_
Primary id this postings_data represents.
Definition: postings_data.h:185
Contains top-level namespace documentation for the META toolkit.
bool operator==(const postings_data< PrimaryKey, SecondaryKey, FeatureValue > &lhs, const postings_data< PrimaryKey, SecondaryKey, FeatureValue > &rhs)
Definition: postings_data.tcc:126
postings_data()=default
PrimaryKeys may only be integral types or strings; SecondaryKeys may only be integral types...
const PrimaryKey & primary_key() const
Definition: postings_data.tcc:134
uint64_t write_packed_counts(std::ostream &out) const
Writes this postings data&#39;s counts to an output stream in a packed binary format. ...
Definition: postings_data.tcc:153
STL namespace.
uint64_t inverse_frequency() const
postings_data & operator=(const postings_data &)=default
Postings data is copy assignable.
const count_t & counts() const
Definition: postings_data.tcc:78
uint64_t read_packed(InputStream &in)
Reads a postings data object from an input stream in a packed binary format.
Definition: postings_data.tcc:198
The ModErn Text Analysis toolkit is a suite of natural language processing, classification, information retrieval, data mining, and other applications of text processing.
Definition: analyzer.h:25
void set_counts(const count_t &counts)
Definition: postings_data.tcc:85
FeatureValue count(SecondaryKey s_id) const
Definition: postings_data.tcc:71
bool operator<(const postings_data &other) const
Definition: postings_data.tcc:119
A class to represent the per-PrimaryKey data in an index&#39;s postings file.
Definition: forward_index.h:34
util::sparse_vector< SecondaryKey, FeatureValue > counts_
The (secondary_key_type, count) pairs.
Definition: postings_data.h:188
uint64_t write_packed(std::ostream &out) const
Writes this postings data to an output stream in a packed binary format.
Definition: postings_data.tcc:140
void merge_with(Container &&cont)
Definition: postings_data.tcc:27
uint64_t bytes_used() const
Definition: postings_data.tcc:236
void increase_count(SecondaryKey s_id, FeatureValue amount)
Definition: postings_data.tcc:64
void set_primary_key(PrimaryKey new_key)
Definition: postings_data.tcc:111
size_t operator()(const pdata_t &pd) const
Definition: postings_data.h:219