ModErn Text Analysis
META Enumerates Textual Applications
postings_file.h
Go to the documentation of this file.
1 
10 #ifndef META_INDEX_POSTINGS_FILE_H_
11 #define META_INDEX_POSTINGS_FILE_H_
12 
13 #include "meta/config.h"
16 #include "meta/io/mmap_file.h"
17 #include "meta/util/disk_vector.h"
18 #include "meta/util/optional.h"
19 
20 namespace meta
21 {
22 namespace index
23 {
24 
30 template <class PrimaryKey, class SecondaryKey, class FeatureValue = uint64_t>
32 {
33  public:
34  using postings_data_type
36 
41  postings_file(const std::string& filename)
42  : postings_{filename}, byte_locations_{filename + "_index"}
43  {
44  // nothing
45  }
46 
54  find_stream(PrimaryKey pk) const
55  {
56  if (pk < byte_locations_.size())
58  postings_.begin() + byte_locations_.at(pk)};
59  return util::nullopt;
60  }
61 
68  std::shared_ptr<postings_data_type> find(PrimaryKey pk) const
69  {
70  auto pdata = std::make_shared<postings_data_type>(pk);
71  uint64_t idx{pk};
72 
73  // if we are in-bounds of the postings file, populate counts
74  if (idx < byte_locations_.size())
75  {
76  auto stream = find_stream(pk);
77  pdata->set_counts(stream->begin(), stream->end());
78  }
79 
80  return pdata;
81  }
82 
83  private:
84  io::mmap_file postings_;
85  util::disk_vector<uint64_t> byte_locations_;
86 };
87 }
88 }
89 #endif
A class for representing optional values.
Definition: optional.h:115
util::optional< postings_stream< SecondaryKey, FeatureValue > > find_stream(PrimaryKey pk) const
Obtains a postings stream object for the given primary key.
Definition: postings_file.h:54
Memory maps a text file readonly.
Definition: mmap_file.h:27
char * begin() const
Definition: mmap_file.cpp:58
T & at(uint64_t idx)
Definition: disk_vector.tcc:106
postings_file(const std::string &filename)
Opens a postings file.
Definition: postings_file.h:41
The ModErn Text Analysis toolkit is a suite of natural language processing, classification, information retrieval, data mining, and other applications of text processing.
Definition: analyzer.h:25
constexpr nullopt_t nullopt
A global nullopt_t constant.
Definition: optional.h:56
File that stores the postings list for an index on disk.
Definition: postings_file.h:31
A stream for extracting the postings list for a specific key in a postings file.
Definition: postings_stream.h:32
A class to represent the per-PrimaryKey data in an index&#39;s postings file.
Definition: forward_index.h:34
uint64_t size() const
Definition: disk_vector.tcc:126
std::shared_ptr< postings_data_type > find(PrimaryKey pk) const
Obtains a postings data object for the given primary key.
Definition: postings_file.h:68