writer.h 2.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
  1. // UTF-8 code points writer
  2. //
  3. // Platform: ISO C++ 98/11
  4. // $Id$
  5. //
  6. // (c) __vic 2013
  7. #ifndef __VIC_UTF8_WRITER_H
  8. #define __VIC_UTF8_WRITER_H
  9. #include<__vic/defs.h>
  10. #include<__vic/unicode.h>
  11. #include<__vic/bits.h>
  12. #include<cstddef>
  13. namespace __vic { namespace utf8 {
  14. //////////////////////////////////////////////////////////////////////////////
  15. template<class ByteWriter>
  16. class writer
  17. {
  18. ByteWriter w;
  19. void write_byte(unsigned char b) { w.write(b); }
  20. void write_bytes(const unsigned char *p, size_t len)
  21. { while(len--) write_byte(*p++); }
  22. public:
  23. typedef ByteWriter byte_writer_type;
  24. ByteWriter &get_byte_writer() { return w; }
  25. const ByteWriter &get_byte_writer() const { return w; }
  26. #if __cpp_variadic_templates && __cpp_rvalue_references
  27. template<class... Args>
  28. explicit writer(Args&&... args) : w(std::forward<Args>(args)...) {}
  29. #else
  30. writer() {}
  31. explicit writer(ByteWriter w) : w(w) {}
  32. #endif
  33. void write(unicode_t );
  34. };
  35. //////////////////////////////////////////////////////////////////////////////
  36. //----------------------------------------------------------------------------
  37. template<class ByteWriter>
  38. void writer<ByteWriter>::write(unicode_t cp)
  39. {
  40. if(cp < 0x80) // single byte (ASCII)
  41. write_byte(cp); // 0xxxxxxx
  42. else if(cp < 0x0800) // 2 bytes
  43. {
  44. write_byte(0xC0 | (cp >> 6)); // 110xxxxx
  45. write_byte(0x80 | (cp & 0x3F)); // 10xxxxxx
  46. }
  47. else // more bytes
  48. {
  49. // Cases with 1 & 2 bytes can be processed here
  50. // They are processed specifically only for optimization reasons
  51. unsigned char utf8_cp[6];
  52. size_t len =
  53. //cp < 0x80 ? 1 :
  54. //cp < 0x0800 ? 2 :
  55. cp < 0x10000 ? 3 :
  56. cp < 0x200000 ? 4 :
  57. cp < 0x4000000 ? 5 :
  58. 6 ;
  59. for(int i = len - 1; i; i--, cp >>= 6)
  60. utf8_cp[i] = 0x80 | (cp & 0x3F); // ... | __vic::get_lsbs(cp, 6)
  61. utf8_cp[0] = __vic::msb_ones<unicode_t>(len) | cp; // ... | __vic::get_lsbs(cp, 7 - len)
  62. write_bytes(utf8_cp, len);
  63. }
  64. }
  65. //----------------------------------------------------------------------------
  66. template<class ByteWriter>
  67. inline writer<ByteWriter> make_writer(ByteWriter w)
  68. {
  69. return writer<ByteWriter>(w);
  70. }
  71. //----------------------------------------------------------------------------
  72. }} // namespace
  73. #endif // header guard