ModErn Text Analysis
META Enumerates Textual Applications
forward_index.h
Go to the documentation of this file.
1 
10 #ifndef META_FORWARD_INDEX_H_
11 #define META_FORWARD_INDEX_H_
12 
13 #include <stdexcept>
14 
15 #include "meta/config.h"
16 #include "meta/index/disk_index.h"
17 #include "meta/index/make_index.h"
19 #include "meta/learn/instance.h"
20 #include "meta/meta.h"
21 #include "meta/util/disk_vector.h"
22 #include "meta/util/optional.h"
23 
24 namespace meta
25 {
26 namespace corpus
27 {
28 class corpus;
29 }
30 
31 namespace index
32 {
33 template <class, class, class>
35 }
36 }
37 
38 namespace meta
39 {
40 namespace index
41 {
45 class forward_index_exception : public std::runtime_error
46 {
47  public:
48  using std::runtime_error::runtime_error;
49 };
50 
56 class forward_index : public disk_index
57 {
58  public:
62  template <class Index, class... Args>
63  friend std::shared_ptr<Index> make_index(const cpptoml::table& config,
64  Args&&... args);
65 
69  template <class Index, class... Args>
70  friend std::shared_ptr<Index> make_index(const cpptoml::table& config,
71  corpus::corpus& docs,
72  Args&&... args);
77  template <class Index, template <class, class> class Cache, class... Args>
78  friend std::shared_ptr<cached_index<Index, Cache>>
79  make_index(const cpptoml::table& config_file, Args&&... args);
80 
81  using primary_key_type = doc_id;
82  using secondary_key_type = term_id;
87 
88  protected:
93  forward_index(const cpptoml::table& config);
94 
95  public:
100 
105  forward_index& operator=(forward_index&&);
106 
110  forward_index(const forward_index&) = delete;
111 
115  forward_index& operator=(const forward_index&) = delete;
116 
120  virtual ~forward_index();
121 
126  virtual std::shared_ptr<postings_data_type>
127  search_primary(doc_id d_id) const;
128 
134  stream_for(doc_id d_id) const;
135 
140  std::string liblinear_data(doc_id d_id) const;
141 
145  virtual uint64_t unique_terms() const override;
146 
151  learn::feature_vector tokenize(const corpus::document& doc);
152 
153  private:
157  void load_index();
158 
165  void create_index(const cpptoml::table& config, corpus::corpus& docs);
166 
170  bool valid() const;
171 
173  class impl;
176 };
177 }
178 }
179 
180 #endif
Contains top-level namespace documentation for the META toolkit.
A class for representing optional values.
Definition: optional.h:115
Implementation of a forward_index.
Definition: forward_index.cpp:31
Holds generic data structures and functions that inverted_index and forward_index both use...
Definition: disk_index.h:53
Class to assist in simple pointer-to-implementation classes.
Definition: pimpl.h:28
Basic exception for forward_index interactions.
Definition: forward_index.h:45
Represents an indexable document.
Definition: document.h:34
Provides interface to with multiple corpus input formats.
Definition: corpus.h:58
The ModErn Text Analysis toolkit is a suite of natural language processing, classification, information retrieval, data mining, and other applications of text processing.
Definition: analyzer.h:25
std::shared_ptr< Index > make_index(const cpptoml::table &config, corpus::corpus &docs, Args &&... args)
Factory method for creating indexes.
Definition: make_index.h:62
A class to represent the per-PrimaryKey data in an index&#39;s postings file.
Definition: forward_index.h:34
The forward_index stores information on a corpus by doc_ids.
Definition: forward_index.h:56
util::pimpl< impl > fwd_impl_
Implementation of this index.
Definition: forward_index.h:173