ModErn Text Analysis
META Enumerates Textual Applications
inverted_index.h
Go to the documentation of this file.
1 
11 #ifndef META_INVERTED_INDEX_H_
12 #define META_INVERTED_INDEX_H_
13 
14 #include <queue>
15 #include <stdexcept>
16 
18 #include "meta/config.h"
19 #include "meta/index/disk_index.h"
20 #include "meta/index/make_index.h"
22 
23 namespace meta
24 {
25 
26 namespace corpus
27 {
28 class corpus;
29 class document;
30 }
31 
32 namespace index
33 {
34 
35 template <class>
37 
38 template <class, class, class>
39 class postings_data;
40 }
41 }
42 
43 namespace meta
44 {
45 namespace index
46 {
50 class inverted_index_exception : public std::runtime_error
51 {
52  public:
53  using std::runtime_error::runtime_error;
54 };
55 
65 class inverted_index : public disk_index
66 {
67  public:
68  using primary_key_type = term_id;
69  using secondary_key_type = doc_id;
73 
77  template <class Index, class... Args>
78  friend std::shared_ptr<Index> make_index(const cpptoml::table&, Args&&...);
79 
83  template <class Index, class... Args>
84  friend std::shared_ptr<Index> make_index(const cpptoml::table&,
85  corpus::corpus& docs, Args&&...);
86 
91  template <class Index, template <class, class> class Cache, class... Args>
92  friend std::shared_ptr<cached_index<Index, Cache>>
93  make_index(const cpptoml::table& config, Args&&... args);
94 
95  protected:
100  inverted_index(const cpptoml::table& config);
101 
102  public:
107 
111  inverted_index& operator=(inverted_index&&);
112 
116  inverted_index(const inverted_index&) = delete;
117 
121  inverted_index& operator=(const inverted_index&) = delete;
122 
126  virtual ~inverted_index();
127 
133 
138  virtual std::shared_ptr<postings_data_type>
139  search_primary(term_id t_id) const;
140 
145  util::optional<postings_stream<doc_id>> stream_for(term_id t_id) const;
146 
152  uint64_t doc_freq(term_id t_id) const;
153 
158  uint64_t term_freq(term_id t_id, doc_id d_id) const;
159 
163  uint64_t total_corpus_terms();
164 
169  uint64_t total_num_occurences(term_id t_id) const;
170 
174  float avg_doc_length();
175 
176  private:
180  void load_index();
181 
188  void create_index(const cpptoml::table& config, corpus::corpus& docs);
189 
193  bool valid() const;
194 
196  class impl;
199 };
200 }
201 }
202 
203 #endif
A class for representing optional values.
Definition: optional.h:115
The inverted_index class stores information on a corpus indexed by term_ids.
Definition: inverted_index.h:65
Holds generic data structures and functions that inverted_index and forward_index both use...
Definition: disk_index.h:53
util::pimpl< impl > inv_impl_
Implementation of this index.
Definition: inverted_index.h:196
Basic exception for inverted_index interactions.
Definition: inverted_index.h:50
Definition: inverted_index.h:36
Class to assist in simple pointer-to-implementation classes.
Definition: pimpl.h:28
Represents an indexable document.
Definition: document.h:34
Provides interface to with multiple corpus input formats.
Definition: corpus.h:58
The ModErn Text Analysis toolkit is a suite of natural language processing, classification, information retrieval, data mining, and other applications of text processing.
Definition: analyzer.h:25
std::shared_ptr< Index > make_index(const cpptoml::table &config, corpus::corpus &docs, Args &&... args)
Factory method for creating indexes.
Definition: make_index.h:62
A class to represent the per-PrimaryKey data in an index&#39;s postings file.
Definition: forward_index.h:34
An insert-only probing hash table.
Definition: probe_map.h:40
Implementation of an inverted_index.
Definition: inverted_index.cpp:26