ModErn Text Analysis
META Enumerates Textual Applications
postings_stream.h
Go to the documentation of this file.
1 
10 #ifndef META_INDEX_POSTINGS_STREAM_H_
11 #define META_INDEX_POSTINGS_STREAM_H_
12 
13 #include <iterator>
14 #include <tuple>
15 #include <utility>
16 
17 #include "meta/config.h"
18 #include "meta/io/packed.h"
19 #include "meta/util/optional.h"
20 
21 namespace meta
22 {
23 namespace index
24 {
25 
31 template <class SecondaryKey, class FeatureValue = uint64_t>
33 {
34  private:
36  {
37  char_input_stream(const char* input) : input_{input}
38  {
39  // nothing
40  }
41 
42  char get()
43  {
44  return *input_++;
45  }
46 
47  const char* input_;
48  };
49 
50  public:
58  postings_stream(const char* buffer) : start_{buffer}
59  {
60  char_input_stream stream{start_};
61 
62  io::packed::read(stream, size_);
63  io::packed::read(stream, total_counts_);
64  start_ = stream.input_;
65  }
66 
73  postings_stream(const char* buffer, uint64_t size,
74  FeatureValue total_counts)
75  : start_{buffer}, size_{size}, total_counts_{total_counts}
76  {
77  // nothing
78  }
79 
83  uint64_t size() const
84  {
85  return size_;
86  }
87 
92  FeatureValue total_counts() const
93  {
94  return total_counts_;
95  }
96 
101  template <class OutputStream>
102  uint64_t write_packed(OutputStream& os) const
103  {
104  auto bytes = io::packed::write(os, size_);
105  bytes += io::packed::write(os, total_counts_);
106  for (const auto& pr : *this)
107  {
108  bytes += io::packed::write(os, pr.first);
109  bytes
110  += io::packed::write(os, static_cast<FeatureValue>(pr.second));
111  }
112  return bytes;
113  }
114 
119  class iterator
120  {
121  public:
122  using value_type = std::pair<SecondaryKey, FeatureValue>;
123  using reference = const value_type&;
124  using pointer = const value_type*;
125  using iterator_category = std::input_iterator_tag;
126  using difference_type = std::ptrdiff_t;
127 
128  friend postings_stream;
129 
130  iterator() : stream_{nullptr}, size_{0}, pos_{0}
131  {
132  // nothing
133  }
134 
135  iterator& operator++()
136  {
137  if (pos_ == size_)
138  {
139  stream_ = {nullptr};
140  size_ = 0;
141  pos_ = 0;
142  }
143  else
144  {
145  uint64_t id;
146  io::packed::read(stream_, id);
147  // gap encoding
148  count_.first += id;
149  io::packed::read(stream_, count_.second);
150  ++pos_;
151  }
152  return *this;
153  }
154 
155  util::optional<value_type> operator++(int)
156  {
157  auto proxy = *(*this);
158  ++(*this);
159  return proxy;
160  }
161 
162  reference operator*() const
163  {
164  return count_;
165  }
166 
167  pointer operator->() const
168  {
169  return &count_;
170  }
171 
172  bool operator==(const iterator& other)
173  {
174  return std::tie(stream_.input_, size_, pos_)
175  == std::tie(other.stream_.input_, other.size_, other.pos_);
176  }
177 
178  bool operator!=(const iterator& other)
179  {
180  return !(*this == other);
181  }
182 
183  private:
184  iterator(const char* start, uint64_t size)
185  : stream_{start},
186  size_{size},
187  pos_{0},
188  count_{std::make_pair(SecondaryKey{0}, 0.0)}
189  {
190  ++(*this);
191  }
192 
193  char_input_stream stream_;
194  uint64_t size_;
195  uint64_t pos_;
196  value_type count_;
197  };
198 
202  iterator begin() const
203  {
204  return {start_, size_};
205  }
206 
210  iterator end() const
211  {
212  return {};
213  }
214 
215  private:
216  const char* start_;
217  uint64_t size_;
218  FeatureValue total_counts_;
219 };
220 }
221 }
222 #endif
An iterator over the (SecondaryKey, FeatureValue) pairs of this postings list.
Definition: postings_stream.h:119
bool operator==(const postings_data< PrimaryKey, SecondaryKey, FeatureValue > &lhs, const postings_data< PrimaryKey, SecondaryKey, FeatureValue > &rhs)
Definition: postings_data.tcc:126
postings_stream(const char *buffer)
Creates a postings stream reading from the given buffer.
Definition: postings_stream.h:58
Definition: postings_stream.h:35
uint64_t size() const
Definition: postings_stream.h:83
iterator begin() const
Definition: postings_stream.h:202
iterator end() const
Definition: postings_stream.h:210
FeatureValue total_counts() const
Definition: postings_stream.h:92
The ModErn Text Analysis toolkit is a suite of natural language processing, classification, information retrieval, data mining, and other applications of text processing.
Definition: analyzer.h:25
A stream for extracting the postings list for a specific key in a postings file.
Definition: postings_stream.h:32
postings_stream(const char *buffer, uint64_t size, FeatureValue total_counts)
Creates a postings stream reading from the given buffer.
Definition: postings_stream.h:73
uint64_t write_packed(OutputStream &os) const
Writes this postings stream to an output stream in packed format.
Definition: postings_stream.h:102