reader.h 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. // UTF-16 code points reader
  2. //
  3. // Platform: ISO C++ 98/11
  4. // $Id$
  5. //
  6. // (c) __vic 2017
  7. #ifndef __VIC_UTF16_READER_H
  8. #define __VIC_UTF16_READER_H
  9. #include<__vic/defs.h>
  10. #include<__vic/unicode.h>
  11. #include<__vic/utf16/defs.h>
  12. #include<__vic/utf16/status.h>
  13. namespace __vic { namespace utf16 {
  14. //////////////////////////////////////////////////////////////////////////////
  15. // Desired CodeUnitReader's interface:
  16. //
  17. // struct CodeUnitReader
  18. // {
  19. // // Returns status::ok, status::eof or status::truncated_code_unit
  20. // status_t read_unit(code_unit_t & );
  21. // };
  22. //////////////////////////////////////////////////////////////////////////////
  23. template<class CodeUnitReader>
  24. class reader
  25. {
  26. CodeUnitReader r;
  27. status_t read_unit(code_unit_t &u) { return r.read_unit(u); }
  28. public:
  29. typedef CodeUnitReader code_unit_reader_type;
  30. CodeUnitReader &get_code_unit_reader() { return r; }
  31. const CodeUnitReader &get_code_unit_reader() const { return r; }
  32. #if __cpp_variadic_templates && __cpp_rvalue_references
  33. template<class... Args>
  34. explicit reader(Args&&... args) : r(std::forward<Args>(args)...) {}
  35. #else
  36. reader() {}
  37. explicit reader(CodeUnitReader r) : r(r) {}
  38. #endif
  39. status_t parse(unicode_t & );
  40. bool read(unicode_t &cp) { return throw_if_error(parse(cp)); }
  41. };
  42. //////////////////////////////////////////////////////////////////////////////
  43. //----------------------------------------------------------------------------
  44. template<class CodeUnitReader>
  45. status_t reader<CodeUnitReader>::parse(unicode_t &cp)
  46. {
  47. code_unit_t c16;
  48. status_t st;
  49. if((st = read_unit(c16)) != status::ok) return st;
  50. if(c16 < hi_surrogate_min || c16 > lo_surrogate_max)
  51. {
  52. cp = c16;
  53. return status::ok;
  54. }
  55. else if((c16 & 0xFC00) == hi_surrogate_min) // 110110xx xxxxxxxx
  56. {
  57. unicode_t c32 = c16 & 0x3FF;
  58. c32 <<= 10;
  59. switch(st = read_unit(c16))
  60. {
  61. case status::ok: break;
  62. case status::eof: return status::truncated_code_point;
  63. default: return st;
  64. }
  65. if((c16 & 0xFC00) == lo_surrogate_min) // 110111xx xxxxxxxx
  66. {
  67. cp = (c32 | (c16 & 0x3FF)) + 0x10000;
  68. return status::ok;
  69. }
  70. }
  71. return status::invalid_sequence;
  72. }
  73. //----------------------------------------------------------------------------
  74. template<class CodeUnitReader>
  75. inline reader<CodeUnitReader> make_reader(CodeUnitReader r)
  76. {
  77. return reader<CodeUnitReader>(r);
  78. }
  79. //----------------------------------------------------------------------------
  80. }} // namespace
  81. #endif // header guard