coding.h 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785
  1. /* Header for coding system handler.
  2. Copyright (C) 2001-2012 Free Software Foundation, Inc.
  3. Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
  4. 2005, 2006, 2007, 2008, 2009, 2010, 2011
  5. National Institute of Advanced Industrial Science and Technology (AIST)
  6. Registration Number H14PRO021
  7. Copyright (C) 2003
  8. National Institute of Advanced Industrial Science and Technology (AIST)
  9. Registration Number H13PRO009
  10. This file is part of GNU Emacs.
  11. GNU Emacs is free software: you can redistribute it and/or modify
  12. it under the terms of the GNU General Public License as published by
  13. the Free Software Foundation, either version 3 of the License, or
  14. (at your option) any later version.
  15. GNU Emacs is distributed in the hope that it will be useful,
  16. but WITHOUT ANY WARRANTY; without even the implied warranty of
  17. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  18. GNU General Public License for more details.
  19. You should have received a copy of the GNU General Public License
  20. along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
  21. #ifndef EMACS_CODING_H
  22. #define EMACS_CODING_H
  23. /* Index to arguments of Fdefine_coding_system_internal. */
  24. enum define_coding_system_arg_index
  25. {
  26. coding_arg_name,
  27. coding_arg_mnemonic,
  28. coding_arg_coding_type,
  29. coding_arg_charset_list,
  30. coding_arg_ascii_compatible_p,
  31. coding_arg_decode_translation_table,
  32. coding_arg_encode_translation_table,
  33. coding_arg_post_read_conversion,
  34. coding_arg_pre_write_conversion,
  35. coding_arg_default_char,
  36. coding_arg_for_unibyte,
  37. coding_arg_plist,
  38. coding_arg_eol_type,
  39. coding_arg_max
  40. };
  41. enum define_coding_iso2022_arg_index
  42. {
  43. coding_arg_iso2022_initial = coding_arg_max,
  44. coding_arg_iso2022_reg_usage,
  45. coding_arg_iso2022_request,
  46. coding_arg_iso2022_flags,
  47. coding_arg_iso2022_max
  48. };
  49. enum define_coding_utf8_arg_index
  50. {
  51. coding_arg_utf8_bom = coding_arg_max,
  52. coding_arg_utf8_max
  53. };
  54. enum define_coding_utf16_arg_index
  55. {
  56. coding_arg_utf16_bom = coding_arg_max,
  57. coding_arg_utf16_endian,
  58. coding_arg_utf16_max
  59. };
  60. enum define_coding_ccl_arg_index
  61. {
  62. coding_arg_ccl_decoder = coding_arg_max,
  63. coding_arg_ccl_encoder,
  64. coding_arg_ccl_valids,
  65. coding_arg_ccl_max
  66. };
  67. /* Hash table for all coding systems. Keys are coding system symbols
  68. and values are spec vectors of the corresponding coding system. A
  69. spec vector has the form [ ATTRS ALIASES EOL-TYPE ]. ATTRS is a
  70. vector of attribute of the coding system. ALIASES is a list of
  71. aliases (symbols) of the coding system. EOL-TYPE is `unix', `dos',
  72. `mac' or a vector of coding systems (symbols). */
  73. extern Lisp_Object Vcoding_system_hash_table;
  74. /* Enumeration of coding system type. */
  75. enum coding_system_type
  76. {
  77. coding_type_charset,
  78. coding_type_utf_8,
  79. coding_type_utf_16,
  80. coding_type_iso_2022,
  81. coding_type_emacs_mule,
  82. coding_type_sjis,
  83. coding_type_ccl,
  84. coding_type_raw_text,
  85. coding_type_undecided,
  86. coding_type_max
  87. };
  88. /* Enumeration of end-of-line format type. */
  89. enum end_of_line_type
  90. {
  91. eol_lf, /* Line-feed only, same as Emacs' internal
  92. format. */
  93. eol_crlf, /* Sequence of carriage-return and
  94. line-feed. */
  95. eol_cr, /* Carriage-return only. */
  96. eol_any, /* Accept any of above. Produce line-feed
  97. only. */
  98. eol_undecided, /* This value is used to denote that the
  99. eol-type is not yet undecided. */
  100. eol_type_max
  101. };
  102. /* Enumeration of index to an attribute vector of a coding system. */
  103. enum coding_attr_index
  104. {
  105. coding_attr_base_name,
  106. coding_attr_docstring,
  107. coding_attr_mnemonic,
  108. coding_attr_type,
  109. coding_attr_charset_list,
  110. coding_attr_ascii_compat,
  111. coding_attr_decode_tbl,
  112. coding_attr_encode_tbl,
  113. coding_attr_trans_tbl,
  114. coding_attr_post_read,
  115. coding_attr_pre_write,
  116. coding_attr_default_char,
  117. coding_attr_for_unibyte,
  118. coding_attr_plist,
  119. coding_attr_category,
  120. coding_attr_safe_charsets,
  121. /* The followings are extra attributes for each type. */
  122. coding_attr_charset_valids,
  123. coding_attr_ccl_decoder,
  124. coding_attr_ccl_encoder,
  125. coding_attr_ccl_valids,
  126. coding_attr_iso_initial,
  127. coding_attr_iso_usage,
  128. coding_attr_iso_request,
  129. coding_attr_iso_flags,
  130. coding_attr_utf_bom,
  131. coding_attr_utf_16_endian,
  132. coding_attr_emacs_mule_full,
  133. coding_attr_last_index
  134. };
  135. /* Macros to access an element of an attribute vector. */
  136. #define CODING_ATTR_BASE_NAME(attrs) AREF (attrs, coding_attr_base_name)
  137. #define CODING_ATTR_TYPE(attrs) AREF (attrs, coding_attr_type)
  138. #define CODING_ATTR_CHARSET_LIST(attrs) AREF (attrs, coding_attr_charset_list)
  139. #define CODING_ATTR_MNEMONIC(attrs) AREF (attrs, coding_attr_mnemonic)
  140. #define CODING_ATTR_DOCSTRING(attrs) AREF (attrs, coding_attr_docstring)
  141. #define CODING_ATTR_ASCII_COMPAT(attrs) AREF (attrs, coding_attr_ascii_compat)
  142. #define CODING_ATTR_DECODE_TBL(attrs) AREF (attrs, coding_attr_decode_tbl)
  143. #define CODING_ATTR_ENCODE_TBL(attrs) AREF (attrs, coding_attr_encode_tbl)
  144. #define CODING_ATTR_TRANS_TBL(attrs) AREF (attrs, coding_attr_trans_tbl)
  145. #define CODING_ATTR_POST_READ(attrs) AREF (attrs, coding_attr_post_read)
  146. #define CODING_ATTR_PRE_WRITE(attrs) AREF (attrs, coding_attr_pre_write)
  147. #define CODING_ATTR_DEFAULT_CHAR(attrs) AREF (attrs, coding_attr_default_char)
  148. #define CODING_ATTR_FOR_UNIBYTE(attrs) AREF (attrs, coding_attr_for_unibyte)
  149. #define CODING_ATTR_FLUSHING(attrs) AREF (attrs, coding_attr_flushing)
  150. #define CODING_ATTR_PLIST(attrs) AREF (attrs, coding_attr_plist)
  151. #define CODING_ATTR_CATEGORY(attrs) AREF (attrs, coding_attr_category)
  152. #define CODING_ATTR_SAFE_CHARSETS(attrs)AREF (attrs, coding_attr_safe_charsets)
  153. /* Return the name of a coding system specified by ID. */
  154. #define CODING_ID_NAME(id) \
  155. (HASH_KEY (XHASH_TABLE (Vcoding_system_hash_table), id))
  156. /* Return the attribute vector of a coding system specified by ID. */
  157. #define CODING_ID_ATTRS(id) \
  158. (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 0))
  159. /* Return the list of aliases of a coding system specified by ID. */
  160. #define CODING_ID_ALIASES(id) \
  161. (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 1))
  162. /* Return the eol-type of a coding system specified by ID. */
  163. #define CODING_ID_EOL_TYPE(id) \
  164. (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 2))
  165. /* Return the spec vector of CODING_SYSTEM_SYMBOL. */
  166. #define CODING_SYSTEM_SPEC(coding_system_symbol) \
  167. (Fgethash (coding_system_symbol, Vcoding_system_hash_table, Qnil))
  168. /* Return the ID of CODING_SYSTEM_SYMBOL. */
  169. #define CODING_SYSTEM_ID(coding_system_symbol) \
  170. hash_lookup (XHASH_TABLE (Vcoding_system_hash_table), \
  171. coding_system_symbol, NULL)
  172. /* Return 1 if CODING_SYSTEM_SYMBOL is a coding system. */
  173. #define CODING_SYSTEM_P(coding_system_symbol) \
  174. (CODING_SYSTEM_ID (coding_system_symbol) >= 0 \
  175. || (! NILP (coding_system_symbol) \
  176. && ! NILP (Fcoding_system_p (coding_system_symbol))))
  177. /* Check if X is a coding system or not. */
  178. #define CHECK_CODING_SYSTEM(x) \
  179. do { \
  180. if (CODING_SYSTEM_ID (x) < 0 \
  181. && NILP (Fcheck_coding_system (x))) \
  182. wrong_type_argument (Qcoding_system_p, (x)); \
  183. } while (0)
  184. /* Check if X is a coding system or not. If it is, set SEPC to the
  185. spec vector of the coding system. */
  186. #define CHECK_CODING_SYSTEM_GET_SPEC(x, spec) \
  187. do { \
  188. spec = CODING_SYSTEM_SPEC (x); \
  189. if (NILP (spec)) \
  190. { \
  191. Fcheck_coding_system (x); \
  192. spec = CODING_SYSTEM_SPEC (x); \
  193. } \
  194. if (NILP (spec)) \
  195. wrong_type_argument (Qcoding_system_p, (x)); \
  196. } while (0)
  197. /* Check if X is a coding system or not. If it is, set ID to the
  198. ID of the coding system. */
  199. #define CHECK_CODING_SYSTEM_GET_ID(x, id) \
  200. do \
  201. { \
  202. id = CODING_SYSTEM_ID (x); \
  203. if (id < 0) \
  204. { \
  205. Fcheck_coding_system (x); \
  206. id = CODING_SYSTEM_ID (x); \
  207. } \
  208. if (id < 0) \
  209. wrong_type_argument (Qcoding_system_p, (x)); \
  210. } while (0)
  211. /*** GENERAL section ***/
  212. /* Enumeration of result code of code conversion. */
  213. enum coding_result_code
  214. {
  215. CODING_RESULT_SUCCESS,
  216. CODING_RESULT_INSUFFICIENT_SRC,
  217. CODING_RESULT_INSUFFICIENT_DST,
  218. CODING_RESULT_INCONSISTENT_EOL,
  219. CODING_RESULT_INVALID_SRC,
  220. CODING_RESULT_INTERRUPT,
  221. CODING_RESULT_INSUFFICIENT_MEM
  222. };
  223. /* Macros used for the member `mode' of the struct coding_system. */
  224. /* If set, recover the original CR or LF of the already decoded text
  225. when the decoding routine encounters an inconsistent eol format. */
  226. #define CODING_MODE_INHIBIT_INCONSISTENT_EOL 0x01
  227. /* If set, the decoding/encoding routines treat the current data as
  228. the last block of the whole text to be converted, and do the
  229. appropriate finishing job. */
  230. #define CODING_MODE_LAST_BLOCK 0x02
  231. /* If set, it means that the current source text is in a buffer which
  232. enables selective display. */
  233. #define CODING_MODE_SELECTIVE_DISPLAY 0x04
  234. /* This flag is used by the decoding/encoding routines on the fly. If
  235. set, it means that right-to-left text is being processed. */
  236. #define CODING_MODE_DIRECTION 0x08
  237. #define CODING_MODE_FIXED_DESTINATION 0x10
  238. /* If set, it means that the encoding routines produces some safe
  239. ASCII characters (usually '?') for unsupported characters. */
  240. #define CODING_MODE_SAFE_ENCODING 0x20
  241. /* For handling composition sequence. */
  242. #include "composite.h"
  243. enum composition_state
  244. {
  245. COMPOSING_NO,
  246. COMPOSING_CHAR,
  247. COMPOSING_RULE,
  248. COMPOSING_COMPONENT_CHAR,
  249. COMPOSING_COMPONENT_RULE
  250. };
  251. /* Structure for the current composition status. */
  252. struct composition_status
  253. {
  254. enum composition_state state;
  255. enum composition_method method;
  256. int old_form; /* 0:pre-21 form, 1:post-21 form */
  257. int length; /* number of elements produced in charbuf */
  258. int nchars; /* number of characters composed */
  259. int ncomps; /* number of composition components */
  260. /* Maximum carryover is for the case of COMPOSITION_WITH_RULE_ALTCHARS.
  261. See the comment in coding.c. */
  262. int carryover[4 /* annotation header */
  263. + MAX_COMPOSITION_COMPONENTS * 3 - 2 /* ALTs and RULEs */
  264. + 2 /* intermediate -1 -1 */
  265. + MAX_COMPOSITION_COMPONENTS /* CHARs */
  266. ];
  267. };
  268. /* Structure of the field `spec.iso_2022' in the structure
  269. `coding_system'. */
  270. struct iso_2022_spec
  271. {
  272. /* Bit-wise-or of CODING_ISO_FLAG_XXX. */
  273. unsigned flags;
  274. /* The current graphic register invoked to each graphic plane. */
  275. int current_invocation[2];
  276. /* The current charset designated to each graphic register. The
  277. value -1 means that not charset is designated, -2 means that
  278. there was an invalid designation previously. */
  279. int current_designation[4];
  280. /* Set to 1 temporarily only when graphic register 2 or 3 is invoked
  281. by single-shift while encoding. */
  282. int single_shifting;
  283. /* Set to 1 temporarily only when processing at beginning of line. */
  284. int bol;
  285. /* If positive, we are now scanning CTEXT extended segment. */
  286. int ctext_extended_segment_len;
  287. /* If nonzero, we are now scanning embedded UTF-8 sequence. */
  288. int embedded_utf_8;
  289. /* The current composition. */
  290. struct composition_status cmp_status;
  291. };
  292. struct emacs_mule_spec
  293. {
  294. int full_support;
  295. struct composition_status cmp_status;
  296. };
  297. struct ccl_spec;
  298. enum utf_bom_type
  299. {
  300. utf_detect_bom,
  301. utf_without_bom,
  302. utf_with_bom
  303. };
  304. enum utf_16_endian_type
  305. {
  306. utf_16_big_endian,
  307. utf_16_little_endian
  308. };
  309. struct utf_16_spec
  310. {
  311. enum utf_bom_type bom;
  312. enum utf_16_endian_type endian;
  313. int surrogate;
  314. };
  315. struct coding_detection_info
  316. {
  317. /* Values of these members are bitwise-OR of CATEGORY_MASK_XXXs. */
  318. /* Which categories are already checked. */
  319. int checked;
  320. /* Which categories are strongly found. */
  321. int found;
  322. /* Which categories are rejected. */
  323. int rejected;
  324. };
  325. struct coding_system
  326. {
  327. /* ID number of the coding system. This is an index to
  328. Vcoding_system_hash_table. This value is set by
  329. setup_coding_system. At the early stage of building time, this
  330. value is -1 in the array coding_categories to indicate that no
  331. coding-system of that category is yet defined. */
  332. ptrdiff_t id;
  333. /* Flag bits of the coding system. The meaning of each bit is common
  334. to all types of coding systems. */
  335. int common_flags;
  336. /* Mode bits of the coding system. See the comments of the macros
  337. CODING_MODE_XXX. */
  338. unsigned int mode;
  339. /* Detailed information specific to each type of coding system. */
  340. union
  341. {
  342. struct iso_2022_spec iso_2022;
  343. struct ccl_spec *ccl; /* Defined in ccl.h. */
  344. struct utf_16_spec utf_16;
  345. enum utf_bom_type utf_8_bom;
  346. struct emacs_mule_spec emacs_mule;
  347. } spec;
  348. int max_charset_id;
  349. unsigned char *safe_charsets;
  350. /* The following two members specify how binary 8-bit code 128..255
  351. are represented in source and destination text respectively. 1
  352. means they are represented by 2-byte sequence, 0 means they are
  353. represented by 1-byte as is (see the comment in character.h). */
  354. unsigned src_multibyte : 1;
  355. unsigned dst_multibyte : 1;
  356. /* How may heading bytes we can skip for decoding. This is set to
  357. -1 in setup_coding_system, and updated by detect_coding. So,
  358. when this is equal to the byte length of the text being
  359. converted, we can skip the actual conversion process. */
  360. EMACS_INT head_ascii;
  361. /* The following members are set by encoding/decoding routine. */
  362. EMACS_INT produced, produced_char, consumed, consumed_char;
  363. /* Number of error source data found in a decoding routine. */
  364. int errors;
  365. /* Store the positions of error source data. */
  366. EMACS_INT *error_positions;
  367. /* Finish status of code conversion. */
  368. enum coding_result_code result;
  369. EMACS_INT src_pos, src_pos_byte, src_chars, src_bytes;
  370. Lisp_Object src_object;
  371. const unsigned char *source;
  372. EMACS_INT dst_pos, dst_pos_byte, dst_bytes;
  373. Lisp_Object dst_object;
  374. unsigned char *destination;
  375. /* Set to 1 if the source of conversion is not in the member
  376. `charbuf', but at `src_object'. */
  377. int chars_at_source;
  378. /* If an element is non-negative, it is a character code.
  379. If it is in the range -128..-1, it is a 8-bit character code
  380. minus 256.
  381. If it is less than -128, it specifies the start of an annotation
  382. chunk. The length of the chunk is -128 minus the value of the
  383. element. The following elements are OFFSET, ANNOTATION-TYPE, and
  384. a sequence of actual data for the annotation. OFFSET is a
  385. character position offset from dst_pos or src_pos,
  386. ANNOTATION-TYPE specifies the meaning of the annotation and how to
  387. handle the following data.. */
  388. int *charbuf;
  389. int charbuf_size, charbuf_used;
  390. /* Set to 1 if charbuf contains an annotation. */
  391. int annotated;
  392. unsigned char carryover[64];
  393. int carryover_bytes;
  394. int default_char;
  395. int (*detector) (struct coding_system *,
  396. struct coding_detection_info *);
  397. void (*decoder) (struct coding_system *);
  398. int (*encoder) (struct coding_system *);
  399. };
  400. /* Meanings of bits in the member `common_flags' of the structure
  401. coding_system. The lowest 8 bits are reserved for various kind of
  402. annotations (currently two of them are used). */
  403. #define CODING_ANNOTATION_MASK 0x00FF
  404. #define CODING_ANNOTATE_COMPOSITION_MASK 0x0001
  405. #define CODING_ANNOTATE_DIRECTION_MASK 0x0002
  406. #define CODING_ANNOTATE_CHARSET_MASK 0x0003
  407. #define CODING_FOR_UNIBYTE_MASK 0x0100
  408. #define CODING_REQUIRE_FLUSHING_MASK 0x0200
  409. #define CODING_REQUIRE_DECODING_MASK 0x0400
  410. #define CODING_REQUIRE_ENCODING_MASK 0x0800
  411. #define CODING_REQUIRE_DETECTION_MASK 0x1000
  412. #define CODING_RESET_AT_BOL_MASK 0x2000
  413. /* Return 1 if the coding context CODING requires annotation
  414. handling. */
  415. #define CODING_REQUIRE_ANNOTATION(coding) \
  416. ((coding)->common_flags & CODING_ANNOTATION_MASK)
  417. /* Return 1 if the coding context CODING prefers decoding into unibyte. */
  418. #define CODING_FOR_UNIBYTE(coding) \
  419. ((coding)->common_flags & CODING_FOR_UNIBYTE_MASK)
  420. /* Return 1 if the coding context CODING requires specific code to be
  421. attached at the tail of converted text. */
  422. #define CODING_REQUIRE_FLUSHING(coding) \
  423. ((coding)->common_flags & CODING_REQUIRE_FLUSHING_MASK)
  424. /* Return 1 if the coding context CODING requires code conversion on
  425. decoding. */
  426. #define CODING_REQUIRE_DECODING(coding) \
  427. ((coding)->dst_multibyte \
  428. || (coding)->common_flags & CODING_REQUIRE_DECODING_MASK)
  429. /* Return 1 if the coding context CODING requires code conversion on
  430. encoding.
  431. The non-multibyte part of the condition is to support encoding of
  432. unibyte strings/buffers generated by string-as-unibyte or
  433. (set-buffer-multibyte nil) from multibyte strings/buffers. */
  434. #define CODING_REQUIRE_ENCODING(coding) \
  435. ((coding)->src_multibyte \
  436. || (coding)->common_flags & CODING_REQUIRE_ENCODING_MASK \
  437. || (coding)->mode & CODING_MODE_SELECTIVE_DISPLAY)
  438. /* Return 1 if the coding context CODING requires some kind of code
  439. detection. */
  440. #define CODING_REQUIRE_DETECTION(coding) \
  441. ((coding)->common_flags & CODING_REQUIRE_DETECTION_MASK)
  442. /* Return 1 if the coding context CODING requires code conversion on
  443. decoding or some kind of code detection. */
  444. #define CODING_MAY_REQUIRE_DECODING(coding) \
  445. (CODING_REQUIRE_DECODING (coding) \
  446. || CODING_REQUIRE_DETECTION (coding))
  447. /* Macros to decode or encode a character of JISX0208 in SJIS. S1 and
  448. S2 are the 1st and 2nd position-codes of JISX0208 in SJIS coding
  449. system. C1 and C2 are the 1st and 2nd position codes of Emacs'
  450. internal format. */
  451. #define SJIS_TO_JIS(code) \
  452. do { \
  453. int s1, s2, j1, j2; \
  454. \
  455. s1 = (code) >> 8, s2 = (code) & 0xFF; \
  456. \
  457. if (s2 >= 0x9F) \
  458. (j1 = s1 * 2 - (s1 >= 0xE0 ? 0x160 : 0xE0), \
  459. j2 = s2 - 0x7E); \
  460. else \
  461. (j1 = s1 * 2 - ((s1 >= 0xE0) ? 0x161 : 0xE1), \
  462. j2 = s2 - ((s2 >= 0x7F) ? 0x20 : 0x1F)); \
  463. (code) = (j1 << 8) | j2; \
  464. } while (0)
  465. #define SJIS_TO_JIS2(code) \
  466. do { \
  467. int s1, s2, j1, j2; \
  468. \
  469. s1 = (code) >> 8, s2 = (code) & 0xFF; \
  470. \
  471. if (s2 >= 0x9F) \
  472. { \
  473. j1 = (s1 == 0xF0 ? 0x28 \
  474. : s1 == 0xF1 ? 0x24 \
  475. : s1 == 0xF2 ? 0x2C \
  476. : s1 == 0xF3 ? 0x2E \
  477. : 0x6E + (s1 - 0xF4) * 2); \
  478. j2 = s2 - 0x7E; \
  479. } \
  480. else \
  481. { \
  482. j1 = (s1 <= 0xF2 ? 0x21 + (s1 - 0xF0) * 2 \
  483. : s1 <= 0xF4 ? 0x2D + (s1 - 0xF3) * 2 \
  484. : 0x6F + (s1 - 0xF5) * 2); \
  485. j2 = s2 - ((s2 >= 0x7F ? 0x20 : 0x1F)); \
  486. } \
  487. (code) = (j1 << 8) | j2; \
  488. } while (0)
  489. #define JIS_TO_SJIS(code) \
  490. do { \
  491. int s1, s2, j1, j2; \
  492. \
  493. j1 = (code) >> 8, j2 = (code) & 0xFF; \
  494. if (j1 & 1) \
  495. (s1 = j1 / 2 + ((j1 < 0x5F) ? 0x71 : 0xB1), \
  496. s2 = j2 + ((j2 >= 0x60) ? 0x20 : 0x1F)); \
  497. else \
  498. (s1 = j1 / 2 + ((j1 < 0x5F) ? 0x70 : 0xB0), \
  499. s2 = j2 + 0x7E); \
  500. (code) = (s1 << 8) | s2; \
  501. } while (0)
  502. #define JIS_TO_SJIS2(code) \
  503. do { \
  504. int s1, s2, j1, j2; \
  505. \
  506. j1 = (code) >> 8, j2 = (code) & 0xFF; \
  507. if (j1 & 1) \
  508. { \
  509. s1 = (j1 <= 0x25 ? 0xF0 + (j1 - 0x21) / 2 \
  510. : j1 <= 0x2F ? 0xF3 + (j1 - 0x2D) / 2 \
  511. : 0xF5 + (j1 - 0x6F) / 2); \
  512. s2 = j2 + ((j2 >= 0x60) ? 0x20 : 0x1F); \
  513. } \
  514. else \
  515. { \
  516. s1 = (j1 == 0x28 ? 0xF0 \
  517. : j1 == 0x24 ? 0xF1 \
  518. : j1 == 0x2C ? 0xF2 \
  519. : j1 == 0x2E ? 0xF3 \
  520. : 0xF4 + (j1 - 0x6E) / 2); \
  521. s2 = j2 + 0x7E; \
  522. } \
  523. (code) = (s1 << 8) | s2; \
  524. } while (0)
  525. /* Encode the file name NAME using the specified coding system
  526. for file names, if any. */
  527. #define ENCODE_FILE(name) \
  528. (! NILP (Vfile_name_coding_system) \
  529. && !EQ (Vfile_name_coding_system, make_number (0)) \
  530. ? code_convert_string_norecord (name, Vfile_name_coding_system, 1) \
  531. : (! NILP (Vdefault_file_name_coding_system) \
  532. && !EQ (Vdefault_file_name_coding_system, make_number (0)) \
  533. ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 1) \
  534. : name))
  535. /* Decode the file name NAME using the specified coding system
  536. for file names, if any. */
  537. #define DECODE_FILE(name) \
  538. (! NILP (Vfile_name_coding_system) \
  539. && !EQ (Vfile_name_coding_system, make_number (0)) \
  540. ? code_convert_string_norecord (name, Vfile_name_coding_system, 0) \
  541. : (! NILP (Vdefault_file_name_coding_system) \
  542. && !EQ (Vdefault_file_name_coding_system, make_number (0)) \
  543. ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 0) \
  544. : name))
  545. /* Encode the string STR using the specified coding system
  546. for system functions, if any. */
  547. #define ENCODE_SYSTEM(str) \
  548. (! NILP (Vlocale_coding_system) \
  549. && !EQ (Vlocale_coding_system, make_number (0)) \
  550. ? code_convert_string_norecord (str, Vlocale_coding_system, 1) \
  551. : str)
  552. /* Decode the string STR using the specified coding system
  553. for system functions, if any. */
  554. #define DECODE_SYSTEM(str) \
  555. (! NILP (Vlocale_coding_system) \
  556. && !EQ (Vlocale_coding_system, make_number (0)) \
  557. ? code_convert_string_norecord (str, Vlocale_coding_system, 0) \
  558. : str)
  559. /* Note that this encodes utf-8, not utf-8-emacs, so it's not a no-op. */
  560. #define ENCODE_UTF_8(str) code_convert_string_norecord (str, Qutf_8, 1)
  561. /* Extern declarations. */
  562. extern Lisp_Object code_conversion_save (int, int);
  563. extern int decoding_buffer_size (struct coding_system *, int);
  564. extern int encoding_buffer_size (struct coding_system *, int);
  565. extern void setup_coding_system (Lisp_Object, struct coding_system *);
  566. extern Lisp_Object coding_charset_list (struct coding_system *);
  567. extern Lisp_Object coding_system_charset_list (Lisp_Object);
  568. extern Lisp_Object code_convert_string (Lisp_Object, Lisp_Object,
  569. Lisp_Object, int, int, int);
  570. extern Lisp_Object code_convert_string_norecord (Lisp_Object, Lisp_Object,
  571. int);
  572. extern Lisp_Object raw_text_coding_system (Lisp_Object);
  573. extern Lisp_Object coding_inherit_eol_type (Lisp_Object, Lisp_Object);
  574. extern Lisp_Object complement_process_encoding_system (Lisp_Object);
  575. extern int decode_coding_gap (struct coding_system *,
  576. EMACS_INT, EMACS_INT);
  577. extern void decode_coding_object (struct coding_system *,
  578. Lisp_Object, EMACS_INT, EMACS_INT,
  579. EMACS_INT, EMACS_INT, Lisp_Object);
  580. extern void encode_coding_object (struct coding_system *,
  581. Lisp_Object, EMACS_INT, EMACS_INT,
  582. EMACS_INT, EMACS_INT, Lisp_Object);
  583. /* Macros for backward compatibility. */
  584. #define decode_coding_region(coding, from, to) \
  585. decode_coding_object (coding, Fcurrent_buffer (), \
  586. from, CHAR_TO_BYTE (from), \
  587. to, CHAR_TO_BYTE (to), Fcurrent_buffer ())
  588. #define encode_coding_region(coding, from, to) \
  589. encode_coding_object (coding, Fcurrent_buffer (), \
  590. from, CHAR_TO_BYTE (from), \
  591. to, CHAR_TO_BYTE (to), Fcurrent_buffer ())
  592. #define decode_coding_string(coding, string, nocopy) \
  593. decode_coding_object (coding, string, 0, 0, SCHARS (string), \
  594. SBYTES (string), Qt)
  595. #define encode_coding_string(coding, string, nocopy) \
  596. (STRING_MULTIBYTE(string) ? \
  597. (encode_coding_object (coding, string, 0, 0, SCHARS (string), \
  598. SBYTES (string), Qt), \
  599. (coding)->dst_object) : (string))
  600. #define decode_coding_c_string(coding, src, bytes, dst_object) \
  601. do { \
  602. (coding)->source = (src); \
  603. (coding)->src_chars = (coding)->src_bytes = (bytes); \
  604. decode_coding_object ((coding), Qnil, 0, 0, (bytes), (bytes), \
  605. (dst_object)); \
  606. } while (0)
  607. extern Lisp_Object preferred_coding_system (void);
  608. extern Lisp_Object Qutf_8, Qutf_8_emacs;
  609. extern Lisp_Object Qcoding_category_index;
  610. extern Lisp_Object Qcoding_system_p;
  611. extern Lisp_Object Qraw_text, Qemacs_mule, Qno_conversion, Qundecided;
  612. extern Lisp_Object Qbuffer_file_coding_system;
  613. extern Lisp_Object Qunix, Qdos, Qmac;
  614. extern Lisp_Object Qtranslation_table;
  615. extern Lisp_Object Qtranslation_table_id;
  616. #ifdef emacs
  617. extern Lisp_Object Qfile_coding_system;
  618. extern Lisp_Object Qcall_process, Qcall_process_region;
  619. extern Lisp_Object Qstart_process, Qopen_network_stream;
  620. extern Lisp_Object Qwrite_region;
  621. extern char *emacs_strerror (int);
  622. /* Coding system to be used to encode text for terminal display when
  623. terminal coding system is nil. */
  624. extern struct coding_system safe_terminal_coding;
  625. #endif
  626. /* Error signaled when there's a problem with detecting coding system */
  627. extern Lisp_Object Qcoding_system_error;
  628. extern char emacs_mule_bytes[256];
  629. extern int emacs_mule_string_char (unsigned char *);
  630. #endif /* EMACS_CODING_H */