utils_rtl.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196
  1. #!/usr/bin/env python3
  2. # ***** BEGIN GPL LICENSE BLOCK *****
  3. #
  4. # This program is free software; you can redistribute it and/or
  5. # modify it under the terms of the GNU General Public License
  6. # as published by the Free Software Foundation; either version 2
  7. # of the License, or (at your option) any later version.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. # GNU General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program; if not, write to the Free Software Foundation,
  16. # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  17. #
  18. # ***** END GPL LICENSE BLOCK *****
  19. # <pep8 compliant>
  20. # Preprocess right-to-left languages.
  21. # You can use it either standalone, or through import_po_from_branches or
  22. # update_trunk.
  23. #
  24. # Notes: This has been tested on Linux, not 100% it will work nicely on
  25. # Windows or OsX.
  26. # This uses ctypes, as there is no py3 binding for fribidi currently.
  27. # This implies you only need the compiled C library to run it.
  28. # Finally, note that it handles some formatting/escape codes (like
  29. # \", %s, %x12, %.4f, etc.), protecting them from ugly (evil) fribidi,
  30. # which seems completely unaware of such things (as unicode is...).
  31. import sys
  32. import ctypes
  33. import re
  34. #define FRIBIDI_MASK_NEUTRAL 0x00000040L /* Is neutral */
  35. FRIBIDI_PAR_ON = 0x00000040
  36. #define FRIBIDI_FLAG_SHAPE_MIRRORING 0x00000001
  37. #define FRIBIDI_FLAG_REORDER_NSM 0x00000002
  38. #define FRIBIDI_FLAG_SHAPE_ARAB_PRES 0x00000100
  39. #define FRIBIDI_FLAG_SHAPE_ARAB_LIGA 0x00000200
  40. #define FRIBIDI_FLAG_SHAPE_ARAB_CONSOLE 0x00000400
  41. #define FRIBIDI_FLAG_REMOVE_BIDI 0x00010000
  42. #define FRIBIDI_FLAG_REMOVE_JOINING 0x00020000
  43. #define FRIBIDI_FLAG_REMOVE_SPECIALS 0x00040000
  44. #define FRIBIDI_FLAGS_DEFAULT ( \
  45. # FRIBIDI_FLAG_SHAPE_MIRRORING | \
  46. # FRIBIDI_FLAG_REORDER_NSM | \
  47. # FRIBIDI_FLAG_REMOVE_SPECIALS )
  48. #define FRIBIDI_FLAGS_ARABIC ( \
  49. # FRIBIDI_FLAG_SHAPE_ARAB_PRES | \
  50. # FRIBIDI_FLAG_SHAPE_ARAB_LIGA )
  51. FRIBIDI_FLAG_SHAPE_MIRRORING = 0x00000001
  52. FRIBIDI_FLAG_REORDER_NSM = 0x00000002
  53. FRIBIDI_FLAG_REMOVE_SPECIALS = 0x00040000
  54. FRIBIDI_FLAG_SHAPE_ARAB_PRES = 0x00000100
  55. FRIBIDI_FLAG_SHAPE_ARAB_LIGA = 0x00000200
  56. FRIBIDI_FLAGS_DEFAULT = FRIBIDI_FLAG_SHAPE_MIRRORING | FRIBIDI_FLAG_REORDER_NSM | FRIBIDI_FLAG_REMOVE_SPECIALS
  57. FRIBIDI_FLAGS_ARABIC = FRIBIDI_FLAG_SHAPE_ARAB_PRES | FRIBIDI_FLAG_SHAPE_ARAB_LIGA
  58. MENU_DETECT_REGEX = re.compile("%x\\d+\\|")
  59. ##### Kernel processing funcs. #####
  60. def protect_format_seq(msg):
  61. """
  62. Find some specific escaping/formatting sequences (like \", %s, etc.,
  63. and protect them from any modification!
  64. """
  65. # LRM = "\u200E"
  66. # RLM = "\u200F"
  67. LRE = "\u202A"
  68. RLE = "\u202B"
  69. PDF = "\u202C"
  70. LRO = "\u202D"
  71. RLO = "\u202E"
  72. uctrl = {LRE, RLE, PDF, LRO, RLO}
  73. # Most likely incomplete, but seems to cover current needs.
  74. format_codes = set("tslfd")
  75. digits = set(".0123456789")
  76. if not msg:
  77. return msg
  78. elif MENU_DETECT_REGEX.search(msg):
  79. # An ugly "menu" message, just force it whole LRE if not yet done.
  80. if msg[0] not in {LRE, LRO}:
  81. msg = LRE + msg
  82. idx = 0
  83. ret = []
  84. ln = len(msg)
  85. while idx < ln:
  86. dlt = 1
  87. # # If we find a control char, skip any additional protection!
  88. # if msg[idx] in uctrl:
  89. # ret.append(msg[idx:])
  90. # break
  91. # \" or \'
  92. if idx < (ln - 1) and msg[idx] == '\\' and msg[idx + 1] in "\"\'":
  93. dlt = 2
  94. # %x12|
  95. elif idx < (ln - 2) and msg[idx] == '%' and msg[idx + 1] in "x" and msg[idx + 2] in digits:
  96. dlt = 2
  97. while (idx + dlt) < ln and msg[idx + dlt] in digits:
  98. dlt += 1
  99. if (idx + dlt) < ln and msg[idx + dlt] == '|':
  100. dlt += 1
  101. # %.4f
  102. elif idx < (ln - 3) and msg[idx] == '%' and msg[idx + 1] in digits:
  103. dlt = 2
  104. while (idx + dlt) < ln and msg[idx + dlt] in digits:
  105. dlt += 1
  106. if (idx + dlt) < ln and msg[idx + dlt] in format_codes:
  107. dlt += 1
  108. else:
  109. dlt = 1
  110. # %s
  111. elif idx < (ln - 1) and msg[idx] == '%' and msg[idx + 1] in format_codes:
  112. dlt = 2
  113. if dlt > 1:
  114. ret.append(LRE)
  115. ret += msg[idx:idx + dlt]
  116. idx += dlt
  117. if dlt > 1:
  118. ret.append(PDF)
  119. return "".join(ret)
  120. def log2vis(msgs, settings):
  121. """
  122. Globally mimics deprecated fribidi_log2vis.
  123. msgs should be an iterable of messages to rtl-process.
  124. """
  125. fbd = ctypes.CDLL(settings.FRIBIDI_LIB)
  126. for msg in msgs:
  127. msg = protect_format_seq(msg)
  128. fbc_str = ctypes.create_unicode_buffer(msg)
  129. ln = len(fbc_str) - 1
  130. # print(fbc_str.value, ln)
  131. btypes = (ctypes.c_int * ln)()
  132. embed_lvl = (ctypes.c_uint8 * ln)()
  133. pbase_dir = ctypes.c_int(FRIBIDI_PAR_ON)
  134. jtypes = (ctypes.c_uint8 * ln)()
  135. flags = FRIBIDI_FLAGS_DEFAULT | FRIBIDI_FLAGS_ARABIC
  136. # Find out direction of each char.
  137. fbd.fribidi_get_bidi_types(fbc_str, ln, ctypes.byref(btypes))
  138. # print(*btypes)
  139. fbd.fribidi_get_par_embedding_levels(btypes, ln,
  140. ctypes.byref(pbase_dir),
  141. embed_lvl)
  142. # print(*embed_lvl)
  143. # Joinings for arabic chars.
  144. fbd.fribidi_get_joining_types(fbc_str, ln, jtypes)
  145. # print(*jtypes)
  146. fbd.fribidi_join_arabic(btypes, ln, embed_lvl, jtypes)
  147. # print(*jtypes)
  148. # Final Shaping!
  149. fbd.fribidi_shape(flags, embed_lvl, ln, jtypes, fbc_str)
  150. # print(fbc_str.value)
  151. # print(*(ord(c) for c in fbc_str))
  152. # And now, the reordering.
  153. # Note that here, we expect a single line, so no need to do
  154. # fancy things...
  155. fbd.fribidi_reorder_line(flags, btypes, ln, 0, pbase_dir, embed_lvl,
  156. fbc_str, None)
  157. # print(fbc_str.value)
  158. # print(*(ord(c) for c in fbc_str))
  159. yield fbc_str.value