ModErn Text Analysis
META Enumerates Textual Applications
metadata.h
Go to the documentation of this file.
1 
10 #ifndef META_CORPUS_METADATA_H_
11 #define META_CORPUS_METADATA_H_
12 
13 #include <cstdint>
14 #include <string>
15 #include <vector>
16 
17 #include "cpptoml.h"
18 #include "meta/config.h"
19 #include "meta/io/packed.h"
20 #include "meta/util/optional.h"
21 
22 namespace meta
23 {
24 namespace corpus
25 {
26 
30 class metadata
31 {
32  public:
36  enum class field_type : uint8_t
37  {
38  SIGNED_INT = 0,
39  UNSIGNED_INT,
40  DOUBLE,
41  STRING
42  };
43 
47  struct field_info
48  {
49  std::string name;
50  field_type type;
51 
52  field_info() = default;
53  field_info(std::string n, field_type ft) : name{std::move(n)}, type{ft}
54  {
55  // nothing
56  }
57  field_info(const field_info&) = default;
58  field_info(field_info&&) = default;
59  field_info& operator=(const field_info&) = default;
60  field_info& operator=(field_info&&) = default;
61  ~field_info() = default;
62  };
63 
64  // I want the below to be a const field_info, but g++ gives a cryptic
65  // compiler error in that case... clang++ accepts it just fine. -sigh-
66  using schema_type = std::vector<field_info>;
67 
68  metadata(const char* start, const schema_type& sch)
69  : schema_{&sch}, start_{start}
70  {
71  // nothing
72  }
73 
79  template <class T>
80  util::optional<T> get(const std::string& name) const
81  {
83  for (uint64_t i = 0; i < schema_->size(); ++i)
84  {
85  switch ((*schema_)[i].type)
86  {
87  case field_type::SIGNED_INT:
88  {
89  int64_t si;
90  io::packed::read(stream, si);
91  if ((*schema_)[i].name == name)
92  return {field{si}};
93  break;
94  }
95 
96  case field_type::UNSIGNED_INT:
97  {
98  uint64_t ui;
99  io::packed::read(stream, ui);
100  if ((*schema_)[i].name == name)
101  return {field{ui}};
102  break;
103  }
104 
105  case field_type::DOUBLE:
106  {
107  double d;
108  io::packed::read(stream, d);
109  if ((*schema_)[i].name == name)
110  return {field{d}};
111  break;
112  }
113 
114  case field_type::STRING:
115  {
116  std::string s{stream.input_};
117  stream.input_ += s.size() + 1;
118  if ((*schema_)[i].name == name)
119  return {field{std::move(s)}};
120  break;
121  }
122  }
123  }
124 
125  return util::nullopt;
126  }
127 
131  const schema_type& schema() const
132  {
133  return *schema_;
134  }
135 
139  struct field
140  {
141  union {
142  int64_t sign_int;
143  uint64_t usign_int;
144  double doub;
145  std::string str;
146  };
147 
148  field_type type;
149 
150  field(int64_t sgn) : sign_int{sgn}, type{field_type::SIGNED_INT}
151  {
152  // nothing
153  }
154 
155  field(uint64_t usgn) : usign_int{usgn}, type{field_type::UNSIGNED_INT}
156  {
157  // nothing
158  }
159 
160  field(double d) : doub{d}, type{field_type::DOUBLE}
161  {
162  // nothing
163  }
164 
165  field(std::string s) : type{field_type::STRING}
166  {
167  new (&str) std::string(std::move(s));
168  }
169 
170  field(field&& other) : type{other.type}
171  {
172  switch (type)
173  {
174  case field_type::SIGNED_INT:
175  sign_int = other.sign_int;
176  break;
177 
178  case field_type::UNSIGNED_INT:
179  usign_int = other.usign_int;
180  break;
181 
182  case field_type::DOUBLE:
183  doub = other.doub;
184  break;
185 
186  case field_type::STRING:
187  new (&str) std::string(std::move(other.str));
188  break;
189  }
190  }
191 
192  field(const field& other) : type{other.type}
193  {
194  switch (type)
195  {
196  case field_type::SIGNED_INT:
197  sign_int = other.sign_int;
198  break;
199 
200  case field_type::UNSIGNED_INT:
201  usign_int = other.usign_int;
202  break;
203 
204  case field_type::DOUBLE:
205  doub = other.doub;
206  break;
207 
208  case field_type::STRING:
209  new (&str) std::string(other.str);
210  break;
211  }
212  }
213 
214  field& operator=(field&& other)
215  {
216  if (type == field_type::STRING)
217  (&str)->~basic_string();
218 
219  switch (other.type)
220  {
221  case field_type::SIGNED_INT:
222  sign_int = other.sign_int;
223  break;
224 
225  case field_type::UNSIGNED_INT:
226  usign_int = other.usign_int;
227  break;
228 
229  case field_type::DOUBLE:
230  doub = other.doub;
231  break;
232 
233  case field_type::STRING:
234  new (&str) std::string(std::move(other.str));
235  break;
236  }
237 
238  type = other.type;
239  return *this;
240  }
241 
242  field& operator=(const field& other)
243  {
244  if (type == field_type::STRING)
245  (&str)->~basic_string();
246 
247  switch (other.type)
248  {
249  case field_type::SIGNED_INT:
250  sign_int = other.sign_int;
251  break;
252 
253  case field_type::UNSIGNED_INT:
254  usign_int = other.usign_int;
255  break;
256 
257  case field_type::DOUBLE:
258  doub = other.doub;
259  break;
260 
261  case field_type::STRING:
262  new (&str) std::string(other.str);
263  break;
264  }
265 
266  return *this;
267  }
268 
269  ~field()
270  {
271  // invoke string destructor if needed
272  if (type == field_type::STRING)
273  (&str)->~basic_string();
274  }
275 
276  operator int64_t() const
277  {
278  return sign_int;
279  }
280 
281  operator uint64_t() const
282  {
283  return usign_int;
284  }
285 
286  operator double() const
287  {
288  return doub;
289  }
290 
291  operator std::string() const
292  {
293  return str;
294  }
295  };
296 
297  private:
299  {
300  metadata_input_stream(const char* input) : input_{input}
301  {
302  // nothing
303  }
304 
305  char get()
306  {
307  return *input_++;
308  }
309 
310  const char* input_;
311  };
312 
314  const schema_type* schema_;
315 
317  const char* start_;
318 };
319 
325 metadata::schema_type metadata_schema(const cpptoml::table& config);
326 
330 class metadata_exception : public std::runtime_error
331 {
332  public:
333  using std::runtime_error::runtime_error;
334 };
335 }
336 }
337 #endif
A class for representing optional values.
Definition: optional.h:115
const schema_type * schema_
pointer to the metadata_file&#39;s schema
Definition: metadata.h:314
Exception class for metadata operations.
Definition: metadata.h:330
metadata::schema_type metadata_schema(const cpptoml::table &config)
Extracts a metadata schema from a configuration file.
Definition: metadata.cpp:13
const schema_type & schema() const
Returns the schema for this metadata object.
Definition: metadata.h:131
The ModErn Text Analysis toolkit is a suite of natural language processing, classification, information retrieval, data mining, and other applications of text processing.
Definition: analyzer.h:25
constexpr nullopt_t nullopt
A global nullopt_t constant.
Definition: optional.h:56
Tagged union to represent a single metadata field.
Definition: metadata.h:139
field_type
Type tag for a field.
Definition: metadata.h:36
const char * start_
the start of the metadata within the metadata_file
Definition: metadata.h:317
Pair for storing the schema: contains its name and type.
Definition: metadata.h:47
Represents the collection of metadata for a document.
Definition: metadata.h:30