ModErn Text Analysis
META Enumerates Textual Applications
disk_index.h
Go to the documentation of this file.
1 
11 #ifndef META_DISK_INDEX_H_
12 #define META_DISK_INDEX_H_
13 
14 #include <memory>
15 #include <vector>
16 
17 #include "meta/config.h"
18 #include "meta/corpus/metadata.h"
19 #include "meta/meta.h"
20 #include "meta/util/pimpl.h"
21 
22 namespace cpptoml
23 {
24 class table;
25 }
26 
27 namespace meta
28 {
29 
30 namespace index
31 {
32 class string_list;
33 class vocabulary_map;
34 }
35 
36 namespace util
37 {
38 template <class>
40 }
41 }
42 
43 namespace meta
44 {
45 namespace index
46 {
47 
54 {
55  public:
59  virtual ~disk_index() = default;
60 
64  std::string index_name() const;
65 
69  uint64_t num_docs() const;
70 
75  META_DEPRECATED("use metadata() instead")
76  std::string doc_name(doc_id d_id) const;
77 
82  META_DEPRECATED("use metadata() instead")
83  std::string doc_path(doc_id d_id) const;
84 
88  std::vector<doc_id> docs() const;
89 
95  uint64_t doc_size(doc_id d_id) const;
96 
102  class_label label(doc_id d_id) const;
103 
108  label_id lbl_id(doc_id d_id) const;
109 
114  label_id id(class_label label) const;
115 
120  class_label class_label_from_id(label_id l_id) const;
121 
125  uint64_t num_labels() const;
126 
131  std::vector<class_label> class_labels() const;
132 
137  corpus::metadata metadata(doc_id d_id) const;
138 
144  template <class T>
145  util::optional<T> metadata(doc_id d_id, const std::string& name) const
146  {
147  return metadata(d_id).get<T>(name);
148  }
149 
154  virtual uint64_t unique_terms(doc_id d_id) const;
155 
159  virtual uint64_t unique_terms() const;
160 
165  term_id get_term_id(const std::string& term);
166 
171  std::string term_text(term_id t_id) const;
172 
173  protected:
178 
184  disk_index(const cpptoml::table& config, const std::string& name);
185 
189  disk_index(const disk_index&) = delete;
190 
194  disk_index& operator=(const disk_index&) = delete;
195 
196  public:
200  disk_index(disk_index&&) = default;
201 
205  disk_index& operator=(disk_index&&) = default;
206 };
207 }
208 }
209 
210 #endif
Contains top-level namespace documentation for the META toolkit.
A class for representing optional values.
Definition: optional.h:115
Holds generic data structures and functions that inverted_index and forward_index both use...
Definition: disk_index.h:53
class_label label(const std::string &text)
Extracts a class_label from a string in libsvm format.
Definition: libsvm_parser.cpp:19
Class to assist in simple pointer-to-implementation classes.
Definition: pimpl.h:28
The implementation of a disk_index.
Definition: disk_index_impl.h:49
disk_vector represents a large constant-size vector that does not necessarily fit in memory...
Definition: disk_index.h:39
util::optional< T > metadata(doc_id d_id, const std::string &name) const
Definition: disk_index.h:145
The ModErn Text Analysis toolkit is a suite of natural language processing, classification, information retrieval, data mining, and other applications of text processing.
Definition: analyzer.h:25
util::pimpl< disk_index_impl > impl_
Implementation of this disk_index.
Definition: disk_index.h:175
util::optional< T > get(const std::string &name) const
Definition: metadata.h:80
Represents the collection of metadata for a document.
Definition: metadata.h:30
Definition: analyzer.h:20