ModErn Text Analysis
META Enumerates Textual Applications
string_view.h
Go to the documentation of this file.
1 
10 #ifndef META_UTIL_STRING_VIEW_H_
11 #define META_UTIL_STRING_VIEW_H_
12 
13 #include "meta/config.h"
14 #include "meta/hashing/hash.h"
15 
16 #if META_HAS_EXPERIMENTAL_STRING_VIEW
17 #include <experimental/string_view>
18 namespace meta
19 {
20 namespace util
21 {
22 template <class Char, class Traits = std::char_traits<Char>>
23 using basic_string_view = std::experimental::basic_string_view<Char, Traits>;
24 
25 using string_view = basic_string_view<char>;
26 using u16string_view = basic_string_view<char16_t>;
27 using u32string_view = basic_string_view<char32_t>;
28 using wstring_view = basic_string_view<wchar_t>;
29 }
30 }
31 #else
32 
33 #include <algorithm>
34 #include <stdexcept>
35 #include <string>
36 
37 namespace meta
38 {
39 namespace util
40 {
41 
50 template <class Char, class Traits = std::char_traits<Char>>
52 {
53  public:
54  using traits_type = Traits;
55  using value_type = Char;
56  using pointer = Char*;
57  using const_pointer = const Char*;
58  using reference = Char&;
59  using const_reference = const Char&;
60  using const_iterator = const_pointer;
61  using iterator = const_iterator;
62  using const_reverse_iterator = std::reverse_iterator<const_iterator>;
63  using reverse_iterator = const_reverse_iterator;
64  using size_type = std::size_t;
65  using difference_type = std::ptrdiff_t;
66  static constexpr size_type npos = size_type(-1);
67 
68  constexpr basic_string_view() noexcept : data_{nullptr}, size_{0}
69  {
70  // nothing
71  }
72 
73  constexpr basic_string_view(const basic_string_view&) noexcept = default;
74  basic_string_view& operator=(const basic_string_view&) noexcept = default;
75 
76  template <class Allocator>
78  const std::basic_string<Char, Traits, Allocator>& str) noexcept
79  : data_{str.data()},
80  size_{str.size()}
81  {
82  // nothing
83  }
84 
85  constexpr basic_string_view(const Char* str)
86  : data_{str}, size_{Traits::length(str)}
87  {
88  // nothing
89  }
90 
91  constexpr basic_string_view(const Char* str, size_type len)
92  : data_{str}, size_{len}
93  {
94  // nothing
95  }
96 
97  constexpr const_iterator begin() const noexcept
98  {
99  return data_;
100  }
101 
102  constexpr const_iterator end() const noexcept
103  {
104  return data_ + size_;
105  }
106 
107  constexpr const_iterator cbegin() const noexcept
108  {
109  return begin();
110  }
111 
112  constexpr const_iterator cend() const noexcept
113  {
114  return end();
115  }
116 
117  const_reverse_iterator rbegin() const noexcept
118  {
119  return const_reverse_iterator{end()};
120  }
121 
122  const_reverse_iterator rend() const noexcept
123  {
124  return const_reverse_iterator{begin()};
125  }
126 
127  const_reverse_iterator crbegin() const noexcept
128  {
129  return rbegin();
130  }
131 
132  const_reverse_iterator crend() const noexcept
133  {
134  return rend();
135  }
136 
137  constexpr size_type size() const noexcept
138  {
139  return size_;
140  }
141 
142  constexpr size_type length() const noexcept
143  {
144  return size();
145  }
146 
147  constexpr size_type max_size() const noexcept
148  {
149  return size();
150  }
151 
152  constexpr bool empty() const noexcept
153  {
154  return size() == 0;
155  }
156 
157  constexpr const_reference operator[](size_type pos) const
158  {
159  return data_[pos];
160  }
161 
162  const_reference at(size_type pos) const
163  {
164  if (pos >= size())
165  throw std::out_of_range{"index out of bounds"};
166  return data_[pos];
167  }
168 
169  constexpr const_reference front() const
170  {
171  return data_[0];
172  }
173 
174  constexpr const_reference back() const
175  {
176  return data_[size_ - 1];
177  }
178 
179  constexpr const_pointer data() const noexcept
180  {
181  return data_;
182  }
183 
184  void clear() noexcept
185  {
186  data_ = nullptr;
187  size_ = 0;
188  }
189 
190  void remove_prefix(size_type n)
191  {
192  data_ += n;
193  size_ -= n;
194  }
195 
196  void remove_suffix(size_type n)
197  {
198  size_ -= n;
199  }
200 
201  void swap(basic_string_view& s) noexcept
202  {
203  using ::std::swap;
204  swap(data_, s.data_);
205  swap(size_, s.size_);
206  }
207 
208  template <class Allocator>
209  explicit operator std::basic_string<Char, Traits, Allocator>() const
210  {
211  return {begin(), end()};
212  }
213 
214  template <class Allocator = std::allocator<Char>>
215  std::basic_string<Char, Traits, Allocator> to_string(const Allocator& a
216  = Allocator{}) const
217  {
218  return {begin(), end(), a};
219  }
220 
221  size_type copy(Char* s, size_type n, size_type pos = 0) const
222  {
223  if (pos > size())
224  throw std::out_of_range{"index out of bounds"};
225 
226  auto rlen = std::min(n, size() - pos);
227  std::copy_n(begin() + pos, rlen, s);
228  return rlen;
229  }
230 
231  constexpr basic_string_view substr(size_type pos = 0,
232  size_type n = npos) const
233  {
234  return pos > size()
235  ? throw std::out_of_range{"index out of bounds"}
236  : basic_string_view{data() + pos, std::min(n, size() - pos)};
237  }
238 
239  int compare(basic_string_view s) const noexcept
240  {
241  auto cmp
242  = Traits::compare(data(), s.data(), std::min(size(), s.size()));
243  if (cmp != 0)
244  return cmp;
245 
246  if (size() < s.size())
247  return -1;
248 
249  if (size() == s.size())
250  return 0;
251 
252  return 1;
253  }
254 
255  constexpr int compare(size_type pos1, size_type n1,
256  basic_string_view s) const
257  {
258  return substr(pos1, n1).compare(s);
259  }
260 
261  constexpr int compare(size_type pos1, size_type n1, basic_string_view s,
262  size_type pos2, size_type n2) const
263  {
264  return substr(pos1, n1).compare(s.substr(pos2, n2));
265  }
266 
267  constexpr int compare(const Char* s) const
268  {
269  return compare(basic_string_view{s});
270  }
271 
272  constexpr int compare(size_type pos1, size_type n1, const Char* s) const
273  {
274  return substr(pos1, n1).compare(basic_string_view{s});
275  }
276 
277  constexpr int compare(size_type pos1, size_type n1, const Char* s,
278  size_type n2) const
279  {
280  return substr(pos1, n1).compare(basic_string_view{s, n2});
281  }
282 
283  size_type find(basic_string_view s, size_type pos = 0) const noexcept
284  {
285  if (pos >= size())
286  return npos;
287 
288  auto it
289  = std::search(begin() + pos, end(), s.begin(), s.end(), Traits::eq);
290  if (it == end())
291  return npos;
292  return static_cast<size_type>(std::distance(begin(), it));
293  }
294 
295  constexpr size_type find(Char c, size_type pos = 0) const noexcept
296  {
297  return find(basic_string_view{&c, 1}, pos);
298  }
299 
300  constexpr size_type find(const Char* s, size_type pos, size_type n) const
301  {
302  return find(basic_string_view{s, n}, pos);
303  }
304 
305  constexpr size_type find(const Char* s, size_type pos = 0) const
306  {
307  return find(basic_string_view{s}, pos);
308  }
309 
310  size_type rfind(basic_string_view s, size_type pos = npos) const noexcept
311  {
312  if (size() < s.size())
313  return npos;
314 
315  pos = std::min(pos, size());
316  if (s.size() < size() - pos)
317  pos += s.size();
318  else
319  pos = size();
320 
321  auto it = std::find_end(begin(), begin() + pos, s.begin(), s.end(),
322  Traits::eq);
323 
324  if (it == begin() + pos)
325  return npos;
326  return std::distance(begin(), it);
327  }
328 
329  constexpr size_type rfind(Char c, size_type pos = npos) const noexcept
330  {
331  return rfind(basic_string_view{&c, 1}, pos);
332  }
333 
334  constexpr size_type rfind(const Char* s, size_type pos, size_type n) const
335  {
336  return rfind(basic_string_view{s, n}, pos);
337  }
338 
339  constexpr size_type rfind(const Char* s, size_type pos = npos) const
340  {
341  return rfind(basic_string_view{s}, pos);
342  }
343 
344  size_type find_first_of(basic_string_view s, size_type pos = 0) const
345  noexcept
346  {
347  if (pos >= size())
348  return npos;
349 
350  auto it = std::find_first_of(begin() + pos, end(), s.begin(), s.end(),
351  Traits::eq);
352  if (it == end())
353  return npos;
354  return static_cast<size_type>(std::distance(begin(), it));
355  }
356 
357  constexpr size_type find_first_of(Char c, size_type pos = 0) const noexcept
358  {
359  return find_first_of(basic_string_view{&c, 1}, pos);
360  }
361 
362  constexpr size_type find_first_of(const Char* s, size_type pos,
363  size_type n) const
364  {
365  return find_first_of(basic_string_view{s, n}, pos);
366  }
367 
368  constexpr size_type find_first_of(const Char* s, size_type pos = 0) const
369  {
370  return find_first_of(basic_string_view{s}, pos);
371  }
372 
373  size_type find_last_of(basic_string_view s, size_type pos = npos) const
374  noexcept
375  {
376  if (pos >= size())
377  return npos;
378 
379  auto diff
380  = static_cast<difference_type>(size() - std::min(size(), pos));
381  auto it = std::find_first_of(rbegin() + diff, rend(), s.begin(),
382  s.end(), Traits::eq);
383  if (it == rend())
384  return npos;
385  return size() - 1 - static_cast<size_type>(std::distance(rbegin(), it));
386  }
387 
388  constexpr size_type find_last_of(Char c, size_type pos = npos) const
389  noexcept
390  {
391  return find_last_of(basic_string_view{&c, 1}, pos);
392  }
393 
394  constexpr size_type find_last_of(const Char* s, size_type pos,
395  size_type n) const
396  {
397  return find_last_of(basic_string_view{s, n}, pos);
398  }
399 
400  constexpr size_type find_last_of(const Char* s, size_type pos = npos) const
401  {
402  return find_last_of(basic_string_view{s}, pos);
403  }
404 
405  size_type find_first_not_of(basic_string_view s, size_type pos = 0) const
406  noexcept
407  {
408  if (pos >= size())
409  return npos;
410 
411  auto it = std::find_if(begin(), end(), [&](const_reference c) {
412  return std::find_if(
413  s.begin(), s.end(),
414  [&](const_reference sc) { return Traits::eq(c, sc); })
415  == s.end();
416  });
417  if (it == end())
418  return npos;
419  return static_cast<size_type>(std::distance(begin(), it));
420  }
421 
422  constexpr size_type find_first_not_of(Char c, size_type pos = 0) const
423  noexcept
424  {
425  return find_first_not_of(basic_string_view{&c, 1}, pos);
426  }
427 
428  constexpr size_type find_first_not_of(const Char* s, size_type pos,
429  size_type n) const
430  {
431  return find_first_not_of(basic_string_view{s, n}, pos);
432  }
433 
434  constexpr size_type find_first_not_of(const Char* s,
435  size_type pos = 0) const
436  {
437  return find_first_not_of(basic_string_view{s}, pos);
438  }
439 
440  size_type find_last_not_of(basic_string_view s, size_type pos = npos) const
441  noexcept
442  {
443  if (pos >= size())
444  return npos;
445 
446  auto diff = size() - std::min(size(), pos);
447  auto it = std::find_if(rbegin() + diff, rend(), [&](const_reference c) {
448  return std::find_if(
449  s.begin(), s.end(),
450  [&](const_reference sc) { return Traits::eq(c, sc); })
451  == s.end();
452  });
453  if (it == rend())
454  return npos;
455  return size() - 1 - std::distance(rbegin(), it);
456  }
457 
458  constexpr size_type find_last_not_of(Char c, size_type pos = npos) const
459  noexcept
460  {
461  return find_last_not_of(basic_string_view{&c, 1}, pos);
462  }
463 
464  constexpr size_type find_last_not_of(const Char* s, size_type pos,
465  size_type n) const
466  {
467  return find_last_not_of(basic_string_view{s, n}, pos);
468  }
469 
470  constexpr size_type find_last_not_of(const Char* s,
471  size_type pos = npos) const
472  {
473  return find_last_not_of(basic_string_view{s}, pos);
474  }
475 
476  private:
477  const_pointer data_;
478  size_type size_;
479 };
480 
485 
486 namespace
487 {
488 template <class T>
489 using identity = typename std::decay<T>::type;
490 }
491 
492 template <class Char, class Traits>
493 constexpr bool operator==(basic_string_view<Char, Traits> lhs,
494  basic_string_view<Char, Traits> rhs) noexcept
495 {
496  return lhs.compare(rhs) == 0;
497 }
498 
499 template <class Char, class Traits>
500 constexpr bool
501 operator==(basic_string_view<Char, Traits> lhs,
502  identity<basic_string_view<Char, Traits>> rhs) noexcept
503 {
504  return lhs.compare(rhs) == 0;
505 }
506 
507 template <class Char, class Traits>
508 constexpr bool operator==(identity<basic_string_view<Char, Traits>> lhs,
509  basic_string_view<Char, Traits> rhs) noexcept
510 {
511  return lhs.compare(rhs) == 0;
512 }
513 
514 template <class Char, class Traits>
515 constexpr bool operator!=(basic_string_view<Char, Traits> lhs,
516  basic_string_view<Char, Traits> rhs) noexcept
517 {
518  return lhs.compare(rhs) != 0;
519 }
520 
521 template <class Char, class Traits>
522 constexpr bool
523 operator!=(basic_string_view<Char, Traits> lhs,
524  identity<basic_string_view<Char, Traits>> rhs) noexcept
525 {
526  return lhs.compare(rhs) != 0;
527 }
528 
529 template <class Char, class Traits>
530 constexpr bool operator!=(identity<basic_string_view<Char, Traits>> lhs,
531  basic_string_view<Char, Traits> rhs) noexcept
532 {
533  return lhs.compare(rhs) != 0;
534 }
535 
536 template <class Char, class Traits>
537 constexpr bool operator<(basic_string_view<Char, Traits> lhs,
538  basic_string_view<Char, Traits> rhs) noexcept
539 {
540  return lhs.compare(rhs) < 0;
541 }
542 
543 template <class Char, class Traits>
544 constexpr bool operator<(basic_string_view<Char, Traits> lhs,
545  identity<basic_string_view<Char, Traits>> rhs) noexcept
546 {
547  return lhs.compare(rhs) < 0;
548 }
549 
550 template <class Char, class Traits>
551 constexpr bool operator<(identity<basic_string_view<Char, Traits>> lhs,
552  basic_string_view<Char, Traits> rhs) noexcept
553 {
554  return lhs.compare(rhs) < 0;
555 }
556 
557 template <class Char, class Traits>
558 constexpr bool operator>(basic_string_view<Char, Traits> lhs,
559  basic_string_view<Char, Traits> rhs) noexcept
560 {
561  return lhs.compare(rhs) > 0;
562 }
563 
564 template <class Char, class Traits>
565 constexpr bool operator>(basic_string_view<Char, Traits> lhs,
566  identity<basic_string_view<Char, Traits>> rhs) noexcept
567 {
568  return lhs.compare(rhs) > 0;
569 }
570 
571 template <class Char, class Traits>
572 constexpr bool operator>(identity<basic_string_view<Char, Traits>> lhs,
573  basic_string_view<Char, Traits> rhs) noexcept
574 {
575  return lhs.compare(rhs) > 0;
576 }
577 
578 template <class Char, class Traits>
579 constexpr bool operator<=(basic_string_view<Char, Traits> lhs,
580  basic_string_view<Char, Traits> rhs) noexcept
581 {
582  return lhs.compare(rhs) <= 0;
583 }
584 
585 template <class Char, class Traits>
586 constexpr bool
587 operator<=(basic_string_view<Char, Traits> lhs,
588  identity<basic_string_view<Char, Traits>> rhs) noexcept
589 {
590  return lhs.compare(rhs) <= 0;
591 }
592 
593 template <class Char, class Traits>
594 constexpr bool operator<=(identity<basic_string_view<Char, Traits>> lhs,
595  basic_string_view<Char, Traits> rhs) noexcept
596 {
597  return lhs.compare(rhs) <= 0;
598 }
599 
600 template <class Char, class Traits>
601 constexpr bool operator>=(basic_string_view<Char, Traits> lhs,
602  basic_string_view<Char, Traits> rhs) noexcept
603 {
604  return lhs.compare(rhs) >= 0;
605 }
606 
607 template <class Char, class Traits>
608 constexpr bool
609 operator>=(basic_string_view<Char, Traits> lhs,
610  identity<basic_string_view<Char, Traits>> rhs) noexcept
611 {
612  return lhs.compare(rhs) >= 0;
613 }
614 
615 template <class Char, class Traits>
616 constexpr bool operator>=(identity<basic_string_view<Char, Traits>> lhs,
617  basic_string_view<Char, Traits> rhs) noexcept
618 {
619  return lhs.compare(rhs) >= 0;
620 }
621 
622 template <class Char, class Traits>
623 std::basic_ostream<Char, Traits>&
624 operator<<(std::basic_ostream<Char, Traits>& os,
626 {
627  return os << str.to_string();
628 }
629 }
630 }
631 
632 namespace std
633 {
634 template <class Char, class Traits>
635 struct hash<meta::util::basic_string_view<Char, Traits>>
636  : public meta::hashing::hash<>
637 {
638 };
639 }
640 #endif // !META_HAS_EXPERIMENTAL_STRING_VIEW
641 
642 namespace meta
643 {
644 
645 namespace util
646 {
647 inline string_view make_string_view(std::string::const_iterator begin,
648  std::string::const_iterator end)
649 {
650  return string_view{&*begin,
651  static_cast<string_view::size_type>(end - begin)};
652 }
653 }
654 
655 namespace hashing
656 {
657 template <class HashAlgorithm, class Char, class Traits>
658 typename std::enable_if<is_contiguously_hashable<Char>::value>::type
659 hash_append(HashAlgorithm& h, const util::basic_string_view<Char, Traits>& s)
660 {
661  h(s.data(), s.size() * sizeof(Char));
662  hash_append(h, s.size());
663 }
664 
665 template <class HashAlgorithm, class Char, class Traits>
666 typename std::enable_if<!is_contiguously_hashable<Char>::value>::type
667 hash_append(HashAlgorithm& h, const util::basic_string_view<Char, Traits>& s)
668 {
669  for (const auto& c : s)
670  hash_append(h, c);
671  hash_append(h, s.size());
672 }
673 }
674 }
675 #endif // META_UTIL_STRING_VIEW_H_
A generic, randomly seeded hash function.
Definition: hash.h:343
uint64_t length(const std::string &str)
Definition: utf.cpp:125
STL namespace.
void pos(const std::string &file, const cpptoml::table &config, bool replace)
Performs part-of-speech tagging on a text file.
Definition: profile.cpp:133
A non-owning reference to a string.
Definition: string_view.h:51
The ModErn Text Analysis toolkit is a suite of natural language processing, classification, information retrieval, data mining, and other applications of text processing.
Definition: analyzer.h:25