Mercurial > hg > xemacs-beta
comparison src/file-coding.h @ 396:6719134a07c2 r21-2-13
Import from CVS: tag r21-2-13
author | cvs |
---|---|
date | Mon, 13 Aug 2007 11:12:05 +0200 |
parents | 8626e4521993 |
children | 74fd4e045ea6 |
comparison
equal
deleted
inserted
replaced
395:de2c2a7459d2 | 396:6719134a07c2 |
---|---|
40 CODESYS_SHIFT_JIS, /* Shift-JIS; Hankaku (half-width) KANA | 40 CODESYS_SHIFT_JIS, /* Shift-JIS; Hankaku (half-width) KANA |
41 is also supported. */ | 41 is also supported. */ |
42 CODESYS_ISO2022, /* Any ISO2022-compliant coding system. | 42 CODESYS_ISO2022, /* Any ISO2022-compliant coding system. |
43 Includes JIS, EUC, CTEXT */ | 43 Includes JIS, EUC, CTEXT */ |
44 CODESYS_BIG5, /* BIG5 (used for Taiwanese). */ | 44 CODESYS_BIG5, /* BIG5 (used for Taiwanese). */ |
45 CODESYS_UCS4, /* ISO 10646 UCS-4 */ | |
46 CODESYS_UTF8, /* ISO 10646 UTF-8 */ | |
45 CODESYS_CCL, /* Converter written in CCL. */ | 47 CODESYS_CCL, /* Converter written in CCL. */ |
46 #endif | 48 #endif |
47 CODESYS_NO_CONVERSION /* "No conversion"; used for binary files. | 49 CODESYS_NO_CONVERSION /* "No conversion"; used for binary files. |
48 We use quotes because there really | 50 We use quotes because there really |
49 is some conversion being applied, | 51 is some conversion being applied, |
59 EOL_AUTODETECT, | 61 EOL_AUTODETECT, |
60 EOL_LF, | 62 EOL_LF, |
61 EOL_CRLF, | 63 EOL_CRLF, |
62 EOL_CR | 64 EOL_CR |
63 }; | 65 }; |
66 typedef enum eol_type eol_type_t; | |
64 | 67 |
65 #ifdef MULE | 68 #ifdef MULE |
66 typedef struct charset_conversion_spec charset_conversion_spec; | 69 typedef struct charset_conversion_spec charset_conversion_spec; |
67 struct charset_conversion_spec | 70 struct charset_conversion_spec |
68 { | 71 { |
129 decoding (input) and encoding (output). */ | 132 decoding (input) and encoding (output). */ |
130 Lisp_Object decode, encode; | 133 Lisp_Object decode, encode; |
131 } ccl; | 134 } ccl; |
132 #endif | 135 #endif |
133 }; | 136 }; |
137 typedef struct Lisp_Coding_System Lisp_Coding_System; | |
134 | 138 |
135 DECLARE_LRECORD (coding_system, struct Lisp_Coding_System); | 139 DECLARE_LRECORD (coding_system, struct Lisp_Coding_System); |
136 #define XCODING_SYSTEM(x) XRECORD (x, coding_system, struct Lisp_Coding_System) | 140 #define XCODING_SYSTEM(x) XRECORD (x, coding_system, struct Lisp_Coding_System) |
137 #define XSETCODING_SYSTEM(x, p) XSETRECORD (x, p, coding_system) | 141 #define XSETCODING_SYSTEM(x, p) XSETRECORD (x, p, coding_system) |
138 #define CODING_SYSTEMP(x) RECORDP (x, coding_system) | 142 #define CODING_SYSTEMP(x) RECORDP (x, coding_system) |
243 EXFUN (Fmake_coding_system, 4); | 247 EXFUN (Fmake_coding_system, 4); |
244 EXFUN (Fset_coding_category_system, 2); | 248 EXFUN (Fset_coding_category_system, 2); |
245 EXFUN (Fset_coding_priority_list, 1); | 249 EXFUN (Fset_coding_priority_list, 1); |
246 EXFUN (Fsubsidiary_coding_system, 2); | 250 EXFUN (Fsubsidiary_coding_system, 2); |
247 | 251 |
252 extern Lisp_Object Qucs4, Qutf8; | |
248 extern Lisp_Object Qbig5, Qbuffer_file_coding_system, Qccl, Qcharset_g0; | 253 extern Lisp_Object Qbig5, Qbuffer_file_coding_system, Qccl, Qcharset_g0; |
249 extern Lisp_Object Qcharset_g1, Qcharset_g2, Qcharset_g3, Qcoding_system_error; | 254 extern Lisp_Object Qcharset_g1, Qcharset_g2, Qcharset_g3, Qcoding_system_error; |
250 extern Lisp_Object Qcoding_system_p, Qcr, Qcrlf, Qctext, Qdecode, Qencode; | 255 extern Lisp_Object Qcoding_system_p, Qcr, Qcrlf, Qctext, Qdecode, Qencode; |
251 extern Lisp_Object Qeol_cr, Qeol_crlf, Qeol_lf, Qeol_type, Qescape_quoted; | 256 extern Lisp_Object Qeol_cr, Qeol_crlf, Qeol_lf, Qeol_type, Qescape_quoted; |
252 extern Lisp_Object Qforce_g0_on_output, Qforce_g1_on_output; | 257 extern Lisp_Object Qforce_g0_on_output, Qforce_g1_on_output; |
303 If both CODING_STATE_SS2 and | 308 If both CODING_STATE_SS2 and |
304 CODING_STATE_SS3 are set, | 309 CODING_STATE_SS3 are set, |
305 CODING_STATE_SS2 overrides; but | 310 CODING_STATE_SS2 overrides; but |
306 this probably indicates an error | 311 this probably indicates an error |
307 in the text encoding. */ | 312 in the text encoding. */ |
313 #ifdef ENABLE_COMPOSITE_CHARS | |
308 #define CODING_STATE_COMPOSITE (1 << 8) /* If set, we're currently processing | 314 #define CODING_STATE_COMPOSITE (1 << 8) /* If set, we're currently processing |
309 a composite character (i.e. a | 315 a composite character (i.e. a |
310 character constructed by | 316 character constructed by |
311 overstriking two or more | 317 overstriking two or more |
312 characters). */ | 318 characters). */ |
319 #endif /* ENABLE_COMPOSITE_CHARS */ | |
313 | 320 |
314 | 321 |
315 /* CODING_STATE_ISO2022_LOCK is the mask of flags that remain on until | 322 /* CODING_STATE_ISO2022_LOCK is the mask of flags that remain on until |
316 explicitly turned off when in the ISO2022 encoder/decoder. Other flags are | 323 explicitly turned off when in the ISO2022 encoder/decoder. Other flags are |
317 turned off at the end of processing each character or escape sequence. */ | 324 turned off at the end of processing each character or escape sequence. */ |
325 #ifdef ENABLE_COMPOSITE_CHARS | |
318 # define CODING_STATE_ISO2022_LOCK \ | 326 # define CODING_STATE_ISO2022_LOCK \ |
319 (CODING_STATE_END | CODING_STATE_COMPOSITE | CODING_STATE_R2L) | 327 (CODING_STATE_END | CODING_STATE_COMPOSITE | CODING_STATE_R2L) |
320 #define CODING_STATE_BIG5_LOCK \ | 328 #else |
321 CODING_STATE_END | 329 # define CODING_STATE_ISO2022_LOCK (CODING_STATE_END | CODING_STATE_R2L) |
330 #endif | |
331 | |
332 #define CODING_STATE_BIG5_LOCK CODING_STATE_END | |
322 | 333 |
323 /* Flags indicating what we've seen so far when parsing an | 334 /* Flags indicating what we've seen so far when parsing an |
324 ISO2022 escape sequence. */ | 335 ISO2022 escape sequence. */ |
325 enum iso_esc_flag | 336 enum iso_esc_flag |
326 { | 337 { |
359 ISO_ESC_2_4_15, /* We've seen ESC $ 0x2F. */ | 370 ISO_ESC_2_4_15, /* We've seen ESC $ 0x2F. */ |
360 ISO_ESC_5_11, /* We've seen ESC [ or 0x9B. This | 371 ISO_ESC_5_11, /* We've seen ESC [ or 0x9B. This |
361 starts a directionality-control | 372 starts a directionality-control |
362 sequence. The next character | 373 sequence. The next character |
363 must be 0, 1, 2, or ]. */ | 374 must be 0, 1, 2, or ]. */ |
364 ISO_ESC_5_11_0, /* We've seen 0x9B 0. The next | 375 ISO_ESC_5_11_0, /* We've seen 0x9B 0. The next character must be ]. */ |
365 character must be ]. */ | 376 ISO_ESC_5_11_1, /* We've seen 0x9B 1. The next character must be ]. */ |
366 ISO_ESC_5_11_1, /* We've seen 0x9B 1. The next | 377 ISO_ESC_5_11_2, /* We've seen 0x9B 2. The next character must be ]. */ |
367 character must be ]. */ | |
368 ISO_ESC_5_11_2, /* We've seen 0x9B 2. The next | |
369 character must be ]. */ | |
370 | 378 |
371 /* Full sequences. */ | 379 /* Full sequences. */ |
380 #ifdef ENABLE_COMPOSITE_CHARS | |
372 ISO_ESC_START_COMPOSITE, /* Private usage for START COMPOSING */ | 381 ISO_ESC_START_COMPOSITE, /* Private usage for START COMPOSING */ |
373 ISO_ESC_END_COMPOSITE, /* Private usage for END COMPOSING */ | 382 ISO_ESC_END_COMPOSITE, /* Private usage for END COMPOSING */ |
383 #endif /* ENABLE_COMPOSITE_CHARS */ | |
374 ISO_ESC_SINGLE_SHIFT, /* We've seen a complete single-shift sequence. */ | 384 ISO_ESC_SINGLE_SHIFT, /* We've seen a complete single-shift sequence. */ |
375 ISO_ESC_LOCKING_SHIFT,/* We've seen a complete locking-shift sequence. */ | 385 ISO_ESC_LOCKING_SHIFT,/* We've seen a complete locking-shift sequence. */ |
376 ISO_ESC_DESIGNATE, /* We've seen a complete designation sequence. */ | 386 ISO_ESC_DESIGNATE, /* We've seen a complete designation sequence. */ |
377 ISO_ESC_DIRECTIONALITY,/* We've seen a complete ISO6429 directionality | 387 ISO_ESC_DIRECTIONALITY,/* We've seen a complete ISO6429 directionality |
378 sequence. */ | 388 sequence. */ |
390 #define ISO_CODE_DEL 0x7F /* delete */ | 400 #define ISO_CODE_DEL 0x7F /* delete */ |
391 #define ISO_CODE_SS2 0x8E /* single-shift-2 */ | 401 #define ISO_CODE_SS2 0x8E /* single-shift-2 */ |
392 #define ISO_CODE_SS3 0x8F /* single-shift-3 */ | 402 #define ISO_CODE_SS3 0x8F /* single-shift-3 */ |
393 #define ISO_CODE_CSI 0x9B /* control-sequence-introduce */ | 403 #define ISO_CODE_CSI 0x9B /* control-sequence-introduce */ |
394 #endif /* MULE */ | 404 #endif /* MULE */ |
395 | |
396 /* Macros to access an encoding stream or decoding stream */ | |
397 | |
398 #define CODING_STREAM_DECOMPOSE(str, flags, ch) \ | |
399 do { \ | |
400 flags = (str)->flags; \ | |
401 ch = (str)->ch; \ | |
402 } while (0) | |
403 | |
404 #define CODING_STREAM_COMPOSE(str, flags, ch) \ | |
405 do { \ | |
406 (str)->flags = flags; \ | |
407 (str)->ch = ch; \ | |
408 } while (0) | |
409 | |
410 | 405 |
411 /* For detecting the encoding of text */ | 406 /* For detecting the encoding of text */ |
412 enum coding_category_type | 407 enum coding_category_type |
413 { | 408 { |
414 #ifdef MULE | 409 #ifdef MULE |
424 CODING_CATEGORY_ISO_8_2, /* ISO2022 system using eight-bit bytes, | 419 CODING_CATEGORY_ISO_8_2, /* ISO2022 system using eight-bit bytes, |
425 no locking shift, no designation sequences, | 420 no locking shift, no designation sequences, |
426 two-dimension characters in the upper half. */ | 421 two-dimension characters in the upper half. */ |
427 CODING_CATEGORY_ISO_LOCK_SHIFT, /* ISO2022 system using locking shift */ | 422 CODING_CATEGORY_ISO_LOCK_SHIFT, /* ISO2022 system using locking shift */ |
428 CODING_CATEGORY_BIG5, | 423 CODING_CATEGORY_BIG5, |
424 CODING_CATEGORY_UCS4, | |
425 CODING_CATEGORY_UTF8, | |
429 #endif /* MULE */ | 426 #endif /* MULE */ |
430 CODING_CATEGORY_NO_CONVERSION | 427 CODING_CATEGORY_NO_CONVERSION |
431 }; | 428 }; |
432 | 429 |
433 #define CODING_CATEGORY_LAST CODING_CATEGORY_NO_CONVERSION | 430 #define CODING_CATEGORY_LAST CODING_CATEGORY_NO_CONVERSION |
445 (1 << CODING_CATEGORY_ISO_8_2) | 442 (1 << CODING_CATEGORY_ISO_8_2) |
446 #define CODING_CATEGORY_ISO_LOCK_SHIFT_MASK \ | 443 #define CODING_CATEGORY_ISO_LOCK_SHIFT_MASK \ |
447 (1 << CODING_CATEGORY_ISO_LOCK_SHIFT) | 444 (1 << CODING_CATEGORY_ISO_LOCK_SHIFT) |
448 #define CODING_CATEGORY_BIG5_MASK \ | 445 #define CODING_CATEGORY_BIG5_MASK \ |
449 (1 << CODING_CATEGORY_BIG5) | 446 (1 << CODING_CATEGORY_BIG5) |
447 #define CODING_CATEGORY_UCS4_MASK \ | |
448 (1 << CODING_CATEGORY_UCS4) | |
449 #define CODING_CATEGORY_UTF8_MASK \ | |
450 (1 << CODING_CATEGORY_UTF8) | |
450 #endif | 451 #endif |
451 #define CODING_CATEGORY_NO_CONVERSION_MASK \ | 452 #define CODING_CATEGORY_NO_CONVERSION_MASK \ |
452 (1 << CODING_CATEGORY_NO_CONVERSION) | 453 (1 << CODING_CATEGORY_NO_CONVERSION) |
453 #define CODING_CATEGORY_NOT_FINISHED_MASK \ | 454 #define CODING_CATEGORY_NOT_FINISHED_MASK \ |
454 (1 << 30) | 455 (1 << 30) |
503 | 504 |
504 | 505 |
505 #ifndef MULE | 506 #ifndef MULE |
506 #define MIN_LEADING_BYTE 0x80 | 507 #define MIN_LEADING_BYTE 0x80 |
507 /* These need special treatment in a string and/or character */ | 508 /* These need special treatment in a string and/or character */ |
509 #ifdef ENABLE_COMPOSITE_CHARS | |
508 #define LEADING_BYTE_COMPOSITE 0x80 /* for a composite character */ | 510 #define LEADING_BYTE_COMPOSITE 0x80 /* for a composite character */ |
511 #endif | |
509 #define LEADING_BYTE_CONTROL_1 0x8F /* represent normal 80-9F */ | 512 #define LEADING_BYTE_CONTROL_1 0x8F /* represent normal 80-9F */ |
510 #define LEADING_BYTE_LATIN_ISO8859_1 0x81 /* Right half of ISO 8859-1 */ | 513 #define LEADING_BYTE_LATIN_ISO8859_1 0x81 /* Right half of ISO 8859-1 */ |
511 #define BYTE_C1_P(c) ((unsigned int) ((unsigned int) (c) - 0x80) < 0x20) | 514 #define BYTE_C1_P(c) ((unsigned int) ((unsigned int) (c) - 0x80) < 0x20) |
512 #define BUFBYTE_FIRST_BYTE_P(c) ((c) < 0xA0) | 515 #define BUFBYTE_FIRST_BYTE_P(c) ((c) < 0xA0) |
513 #define BUFBYTE_LEADING_BYTE_P(c) BYTE_C1_P (c) | 516 #define BUFBYTE_LEADING_BYTE_P(c) BYTE_C1_P (c) |