ModErn Text Analysis
META Enumerates Textual Applications
document.h
Go to the documentation of this file.
1 
10 #ifndef META_DOCUMENT_H_
11 #define META_DOCUMENT_H_
12 
13 #include <string>
14 #include <unordered_map>
15 #include <vector>
16 
17 #include "meta/config.h"
18 #include "meta/corpus/metadata.h"
19 #include "meta/meta.h"
20 #include "meta/util/optional.h"
21 
22 namespace meta
23 {
24 namespace corpus
25 {
26 
34 class document
35 {
36  public:
42  document(doc_id d_id = doc_id{0},
43  const class_label& label = class_label{"[NONE]"});
44 
48  const class_label& label() const;
49 
55  void content(const std::string& content,
56  const std::string& encoding = "utf-8");
57 
62  void encoding(const std::string& encoding);
63 
67  const std::string& content() const;
68 
72  const std::string& encoding() const;
73 
77  doc_id id() const;
78 
82  bool contains_content() const;
83 
88  void label(class_label label);
89 
93  const std::vector<metadata::field>& mdata() const;
94 
99  void mdata(std::vector<metadata::field>&& metadata);
100 
101  private:
103  doc_id d_id_;
104 
106  class_label label_;
107 
109  std::vector<metadata::field> mdata_;
110 
113 
115  std::string encoding_;
116 };
117 }
118 }
119 
120 #endif
doc_id id() const
Definition: document.cpp:50
Contains top-level namespace documentation for the META toolkit.
const std::string & content() const
Definition: document.cpp:38
class_label label_
Which category this document would be classified into.
Definition: document.h:106
const class_label & label() const
Definition: document.cpp:21
std::string encoding_
The encoding for the content.
Definition: document.h:115
std::vector< metadata::field > mdata_
Other metadata fields for this document.
Definition: document.h:109
Represents an indexable document.
Definition: document.h:34
The ModErn Text Analysis toolkit is a suite of natural language processing, classification, information retrieval, data mining, and other applications of text processing.
Definition: analyzer.h:25
bool contains_content() const
Definition: document.cpp:55
document(doc_id d_id=doc_id{0}, const class_label &label=class_label{"[NONE]"})
Constructor.
Definition: document.cpp:15
doc_id d_id_
The document id for this document.
Definition: document.h:103
const std::string & encoding() const
Definition: document.cpp:45
const std::vector< metadata::field > & mdata() const
Definition: document.cpp:65
util::optional< std::string > content_
What the document contains.
Definition: document.h:112
Represents the collection of metadata for a document.
Definition: metadata.h:30