ModErn Text Analysis
META Enumerates Textual Applications
packed.h
Go to the documentation of this file.
1 
10 #ifndef META_IO_PACKED_H_
11 #define META_IO_PACKED_H_
12 
13 #include <cmath>
14 #include <cstdint>
15 #include <limits>
16 #include <type_traits>
17 
18 #include "meta/config.h"
19 #include "meta/util/identifiers.h"
20 #include "meta/util/string_view.h"
21 
22 namespace meta
23 {
24 namespace io
25 {
26 namespace packed
27 {
28 
38 template <class OutputStream, class T>
39 typename std::enable_if<!std::is_floating_point<T>::value
40  && std::is_unsigned<T>::value
41  && !std::is_same<T, bool>::value,
42  uint64_t>::type
43 packed_write(OutputStream& stream, T value)
44 {
45  uint64_t size = 0;
46  while (value > 127)
47  {
48  ++size;
49  stream.put(static_cast<char>((value & 127) | 128));
50  value >>= 7;
51  }
52  stream.put(static_cast<char>(value));
53  return size + 1;
54 }
55 
61 template <class OutputStream, class T>
62 typename std::enable_if<std::is_same<T, bool>::value, uint64_t>::type
63 packed_write(OutputStream& stream, T value)
64 {
65  uint8_t val = value ? 1 : 0;
66  return packed_write(stream, val);
67 }
68 
80 template <class OutputStream, class T>
81 typename std::enable_if<!std::is_floating_point<T>::value
82  && std::is_signed<T>::value,
83  uint64_t>::type
84 packed_write(OutputStream& stream, T value)
85 {
86  using usigned_type = typename std::make_unsigned<T>::type;
87  auto elem = static_cast<usigned_type>((value << 1)
88  ^ (value >> (sizeof(T) * 8 - 1)));
89  return packed_write(stream, elem);
90 }
91 
106 template <class OutputStream, class T>
107 typename std::enable_if<std::is_floating_point<T>::value, uint64_t>::type
108 packed_write(OutputStream& stream, T value)
109 {
110  int exp;
111  auto digits = std::numeric_limits<T>::digits;
112  auto mantissa = static_cast<int64_t>(std::frexp(value, &exp)
113  * (uint64_t{1} << digits));
114  int64_t exponent = exp - digits;
115 
116  // see dlib link above; tries to shrink mantissa for more efficient
117  // serialization
118  for (uint8_t i = 0; i < sizeof(mantissa) && (mantissa & 0xFF) == 0; ++i)
119  {
120  mantissa >>= 8;
121  exponent += 8;
122  }
123 
124  auto bytes = packed_write(stream, mantissa);
125  bytes += packed_write(stream, exponent);
126  return bytes;
127 }
128 
138 template <class OutputStream>
139 uint64_t packed_write(OutputStream& stream, util::string_view value)
140 {
141  for (const auto& c : value)
142  {
143  stream.put(c);
144  }
145  stream.put('\0');
146  return value.size() + 1;
147 }
148 
157 template <class OutputStream, class T>
158 typename std::enable_if<std::is_enum<T>::value, uint64_t>::type
159 packed_write(OutputStream& stream, T value)
160 {
161  auto val = static_cast<typename std::underlying_type<T>::type>(value);
162  return packed_write(stream, val);
163 }
164 
173 template <class OutputStream, class Tag, class T>
174 uint64_t packed_write(OutputStream& stream,
175  const util::identifier<Tag, T>& value)
176 {
177  return packed_write(stream, static_cast<const T&>(value));
178 }
179 
187 template <class OutputSteam, class T1, class T2>
188 uint64_t packed_write(OutputSteam& os, const std::pair<T1, T2>& pr)
189 {
190  return packed_write(os, pr.first) + packed_write(os, pr.second);
191 }
192 
200 template <class OutputStream, class T, class Alloc>
201 uint64_t packed_write(OutputStream& os, const std::vector<T, Alloc>& vec)
202 {
203  auto bytes = packed_write(os, vec.size());
204  for (const auto& v : vec)
205  bytes += packed_write(os, v);
206  return bytes;
207 }
208 
215 template <class OutputStream, class T>
216 uint64_t write(OutputStream& os, const T& value)
217 {
219  return packed_write(os, value);
220 }
221 
229 template <class InputStream, class T>
230 typename std::enable_if<!std::is_floating_point<T>::value
231  && std::is_unsigned<T>::value
232  && !std::is_same<T, bool>::value,
233  uint64_t>::type
234 packed_read(InputStream& stream, T& value)
235 {
236  value = 0;
237  uint64_t size = 0;
238  uint8_t byte;
239  do
240  {
241  byte = static_cast<uint8_t>(stream.get());
242  value |= static_cast<T>(byte & 127) << (7 * size);
243  ++size;
244  } while (byte & 128);
245  return size;
246 }
247 
255 template <class InputStream, class T>
256 typename std::enable_if<std::is_same<T, bool>::value, uint64_t>::type
257 packed_read(InputStream& stream, T& value)
258 {
259  uint8_t byte;
260  auto bytes = packed_read(stream, byte);
261  value = byte > 0;
262  return bytes;
263 }
264 
276 template <class InputStream, class T>
277 typename std::enable_if<!std::is_floating_point<T>::value
278  && std::is_signed<T>::value,
279  uint64_t>::type
280 packed_read(InputStream& stream, T& value)
281 {
282  typename std::make_unsigned<T>::type elem;
283  auto bytes = packed_read(stream, elem);
284 
285  value = (elem >> 1) ^ (-(elem & 1));
286 
287  return bytes;
288 }
289 
297 template <class InputStream, class T>
298 typename std::enable_if<std::is_floating_point<T>::value, uint64_t>::type
299 packed_read(InputStream& stream, T& value)
300 {
301  int64_t mantissa;
302  int64_t exponent;
303 
304  auto bytes = packed_read(stream, mantissa);
305  bytes += packed_read(stream, exponent);
306  value = static_cast<T>(mantissa * std::pow(2.0, exponent));
307  return bytes;
308 }
309 
317 template <class InputStream>
318 uint64_t packed_read(InputStream& stream, std::string& value)
319 {
320  value.clear();
321  for (auto c = stream.get(); c != '\0'; c = stream.get())
322  value += static_cast<char>(c);
323  return value.size() + 1;
324 }
325 
334 template <class InputStream, class T>
335 typename std::enable_if<std::is_enum<T>::value, uint64_t>::type
336 packed_read(InputStream& stream, T& value)
337 {
338  typename std::underlying_type<T>::type val;
339  auto size = packed_read(stream, val);
340  value = static_cast<T>(val);
341  return size;
342 }
343 
352 template <class InputStream, class Tag, class T>
353 uint64_t packed_read(InputStream& stream, util::identifier<Tag, T>& value)
354 {
355  return packed_read(stream, static_cast<T&>(value));
356 }
357 
365 template <class InputStream, class T1, class T2>
366 uint64_t packed_read(InputStream& is, std::pair<T1, T2>& pr)
367 {
368  return packed_read(is, pr.first) + packed_read(is, pr.second);
369 }
370 
378 template <class InputStream, class T, class Alloc>
379 uint64_t packed_read(InputStream& is, std::vector<T, Alloc>& vec)
380 {
381  uint64_t size;
382  auto bytes = packed_read(is, size);
383  vec.clear();
384  vec.reserve(size);
385 
386  for (uint64_t i = 0; i < size; ++i)
387  {
388  T val;
389  bytes += packed_read(is, val);
390  vec.emplace_back(val);
391  }
392  assert(vec.size() == size);
393  return bytes;
394 }
395 
403 template <class InputStream, class T>
404 uint64_t read(InputStream& is, T& value)
405 {
407  return packed_read(is, value);
408 }
409 
415 template <class T, class InputStream>
416 T read(InputStream& stream)
417 {
419  T val;
420  packed_read(stream, val);
421  return val;
422 }
423 }
424 }
425 }
426 #endif
uint64_t write(OutputStream &os, const T &value)
Wrapper function for enabling ADL for io::packed::write.
Definition: packed.h:216
uint64_t packed_read(InputStream &is, std::vector< T, Alloc > &vec)
Reads a vector type from a packed representation.
Definition: packed.h:379
A non-owning reference to a string.
Definition: string_view.h:51
uint64_t packed_write(OutputStream &os, const std::vector< T, Alloc > &vec)
Writes a vector type in a packed representation.
Definition: packed.h:201
The ModErn Text Analysis toolkit is a suite of natural language processing, classification, information retrieval, data mining, and other applications of text processing.
Definition: analyzer.h:25
Base template that denotes an identifier.
Definition: identifiers.h:49