comparison src/file-coding.h @ 396:6719134a07c2 r21-2-13

Import from CVS: tag r21-2-13
author cvs
date Mon, 13 Aug 2007 11:12:05 +0200
parents 8626e4521993
children 74fd4e045ea6
comparison
equal deleted inserted replaced
395:de2c2a7459d2 396:6719134a07c2
40 CODESYS_SHIFT_JIS, /* Shift-JIS; Hankaku (half-width) KANA 40 CODESYS_SHIFT_JIS, /* Shift-JIS; Hankaku (half-width) KANA
41 is also supported. */ 41 is also supported. */
42 CODESYS_ISO2022, /* Any ISO2022-compliant coding system. 42 CODESYS_ISO2022, /* Any ISO2022-compliant coding system.
43 Includes JIS, EUC, CTEXT */ 43 Includes JIS, EUC, CTEXT */
44 CODESYS_BIG5, /* BIG5 (used for Taiwanese). */ 44 CODESYS_BIG5, /* BIG5 (used for Taiwanese). */
45 CODESYS_UCS4, /* ISO 10646 UCS-4 */
46 CODESYS_UTF8, /* ISO 10646 UTF-8 */
45 CODESYS_CCL, /* Converter written in CCL. */ 47 CODESYS_CCL, /* Converter written in CCL. */
46 #endif 48 #endif
47 CODESYS_NO_CONVERSION /* "No conversion"; used for binary files. 49 CODESYS_NO_CONVERSION /* "No conversion"; used for binary files.
48 We use quotes because there really 50 We use quotes because there really
49 is some conversion being applied, 51 is some conversion being applied,
59 EOL_AUTODETECT, 61 EOL_AUTODETECT,
60 EOL_LF, 62 EOL_LF,
61 EOL_CRLF, 63 EOL_CRLF,
62 EOL_CR 64 EOL_CR
63 }; 65 };
66 typedef enum eol_type eol_type_t;
64 67
65 #ifdef MULE 68 #ifdef MULE
66 typedef struct charset_conversion_spec charset_conversion_spec; 69 typedef struct charset_conversion_spec charset_conversion_spec;
67 struct charset_conversion_spec 70 struct charset_conversion_spec
68 { 71 {
129 decoding (input) and encoding (output). */ 132 decoding (input) and encoding (output). */
130 Lisp_Object decode, encode; 133 Lisp_Object decode, encode;
131 } ccl; 134 } ccl;
132 #endif 135 #endif
133 }; 136 };
137 typedef struct Lisp_Coding_System Lisp_Coding_System;
134 138
135 DECLARE_LRECORD (coding_system, struct Lisp_Coding_System); 139 DECLARE_LRECORD (coding_system, struct Lisp_Coding_System);
136 #define XCODING_SYSTEM(x) XRECORD (x, coding_system, struct Lisp_Coding_System) 140 #define XCODING_SYSTEM(x) XRECORD (x, coding_system, struct Lisp_Coding_System)
137 #define XSETCODING_SYSTEM(x, p) XSETRECORD (x, p, coding_system) 141 #define XSETCODING_SYSTEM(x, p) XSETRECORD (x, p, coding_system)
138 #define CODING_SYSTEMP(x) RECORDP (x, coding_system) 142 #define CODING_SYSTEMP(x) RECORDP (x, coding_system)
243 EXFUN (Fmake_coding_system, 4); 247 EXFUN (Fmake_coding_system, 4);
244 EXFUN (Fset_coding_category_system, 2); 248 EXFUN (Fset_coding_category_system, 2);
245 EXFUN (Fset_coding_priority_list, 1); 249 EXFUN (Fset_coding_priority_list, 1);
246 EXFUN (Fsubsidiary_coding_system, 2); 250 EXFUN (Fsubsidiary_coding_system, 2);
247 251
252 extern Lisp_Object Qucs4, Qutf8;
248 extern Lisp_Object Qbig5, Qbuffer_file_coding_system, Qccl, Qcharset_g0; 253 extern Lisp_Object Qbig5, Qbuffer_file_coding_system, Qccl, Qcharset_g0;
249 extern Lisp_Object Qcharset_g1, Qcharset_g2, Qcharset_g3, Qcoding_system_error; 254 extern Lisp_Object Qcharset_g1, Qcharset_g2, Qcharset_g3, Qcoding_system_error;
250 extern Lisp_Object Qcoding_system_p, Qcr, Qcrlf, Qctext, Qdecode, Qencode; 255 extern Lisp_Object Qcoding_system_p, Qcr, Qcrlf, Qctext, Qdecode, Qencode;
251 extern Lisp_Object Qeol_cr, Qeol_crlf, Qeol_lf, Qeol_type, Qescape_quoted; 256 extern Lisp_Object Qeol_cr, Qeol_crlf, Qeol_lf, Qeol_type, Qescape_quoted;
252 extern Lisp_Object Qforce_g0_on_output, Qforce_g1_on_output; 257 extern Lisp_Object Qforce_g0_on_output, Qforce_g1_on_output;
303 If both CODING_STATE_SS2 and 308 If both CODING_STATE_SS2 and
304 CODING_STATE_SS3 are set, 309 CODING_STATE_SS3 are set,
305 CODING_STATE_SS2 overrides; but 310 CODING_STATE_SS2 overrides; but
306 this probably indicates an error 311 this probably indicates an error
307 in the text encoding. */ 312 in the text encoding. */
313 #ifdef ENABLE_COMPOSITE_CHARS
308 #define CODING_STATE_COMPOSITE (1 << 8) /* If set, we're currently processing 314 #define CODING_STATE_COMPOSITE (1 << 8) /* If set, we're currently processing
309 a composite character (i.e. a 315 a composite character (i.e. a
310 character constructed by 316 character constructed by
311 overstriking two or more 317 overstriking two or more
312 characters). */ 318 characters). */
319 #endif /* ENABLE_COMPOSITE_CHARS */
313 320
314 321
315 /* CODING_STATE_ISO2022_LOCK is the mask of flags that remain on until 322 /* CODING_STATE_ISO2022_LOCK is the mask of flags that remain on until
316 explicitly turned off when in the ISO2022 encoder/decoder. Other flags are 323 explicitly turned off when in the ISO2022 encoder/decoder. Other flags are
317 turned off at the end of processing each character or escape sequence. */ 324 turned off at the end of processing each character or escape sequence. */
325 #ifdef ENABLE_COMPOSITE_CHARS
318 # define CODING_STATE_ISO2022_LOCK \ 326 # define CODING_STATE_ISO2022_LOCK \
319 (CODING_STATE_END | CODING_STATE_COMPOSITE | CODING_STATE_R2L) 327 (CODING_STATE_END | CODING_STATE_COMPOSITE | CODING_STATE_R2L)
320 #define CODING_STATE_BIG5_LOCK \ 328 #else
321 CODING_STATE_END 329 # define CODING_STATE_ISO2022_LOCK (CODING_STATE_END | CODING_STATE_R2L)
330 #endif
331
332 #define CODING_STATE_BIG5_LOCK CODING_STATE_END
322 333
323 /* Flags indicating what we've seen so far when parsing an 334 /* Flags indicating what we've seen so far when parsing an
324 ISO2022 escape sequence. */ 335 ISO2022 escape sequence. */
325 enum iso_esc_flag 336 enum iso_esc_flag
326 { 337 {
359 ISO_ESC_2_4_15, /* We've seen ESC $ 0x2F. */ 370 ISO_ESC_2_4_15, /* We've seen ESC $ 0x2F. */
360 ISO_ESC_5_11, /* We've seen ESC [ or 0x9B. This 371 ISO_ESC_5_11, /* We've seen ESC [ or 0x9B. This
361 starts a directionality-control 372 starts a directionality-control
362 sequence. The next character 373 sequence. The next character
363 must be 0, 1, 2, or ]. */ 374 must be 0, 1, 2, or ]. */
364 ISO_ESC_5_11_0, /* We've seen 0x9B 0. The next 375 ISO_ESC_5_11_0, /* We've seen 0x9B 0. The next character must be ]. */
365 character must be ]. */ 376 ISO_ESC_5_11_1, /* We've seen 0x9B 1. The next character must be ]. */
366 ISO_ESC_5_11_1, /* We've seen 0x9B 1. The next 377 ISO_ESC_5_11_2, /* We've seen 0x9B 2. The next character must be ]. */
367 character must be ]. */
368 ISO_ESC_5_11_2, /* We've seen 0x9B 2. The next
369 character must be ]. */
370 378
371 /* Full sequences. */ 379 /* Full sequences. */
380 #ifdef ENABLE_COMPOSITE_CHARS
372 ISO_ESC_START_COMPOSITE, /* Private usage for START COMPOSING */ 381 ISO_ESC_START_COMPOSITE, /* Private usage for START COMPOSING */
373 ISO_ESC_END_COMPOSITE, /* Private usage for END COMPOSING */ 382 ISO_ESC_END_COMPOSITE, /* Private usage for END COMPOSING */
383 #endif /* ENABLE_COMPOSITE_CHARS */
374 ISO_ESC_SINGLE_SHIFT, /* We've seen a complete single-shift sequence. */ 384 ISO_ESC_SINGLE_SHIFT, /* We've seen a complete single-shift sequence. */
375 ISO_ESC_LOCKING_SHIFT,/* We've seen a complete locking-shift sequence. */ 385 ISO_ESC_LOCKING_SHIFT,/* We've seen a complete locking-shift sequence. */
376 ISO_ESC_DESIGNATE, /* We've seen a complete designation sequence. */ 386 ISO_ESC_DESIGNATE, /* We've seen a complete designation sequence. */
377 ISO_ESC_DIRECTIONALITY,/* We've seen a complete ISO6429 directionality 387 ISO_ESC_DIRECTIONALITY,/* We've seen a complete ISO6429 directionality
378 sequence. */ 388 sequence. */
390 #define ISO_CODE_DEL 0x7F /* delete */ 400 #define ISO_CODE_DEL 0x7F /* delete */
391 #define ISO_CODE_SS2 0x8E /* single-shift-2 */ 401 #define ISO_CODE_SS2 0x8E /* single-shift-2 */
392 #define ISO_CODE_SS3 0x8F /* single-shift-3 */ 402 #define ISO_CODE_SS3 0x8F /* single-shift-3 */
393 #define ISO_CODE_CSI 0x9B /* control-sequence-introduce */ 403 #define ISO_CODE_CSI 0x9B /* control-sequence-introduce */
394 #endif /* MULE */ 404 #endif /* MULE */
395
396 /* Macros to access an encoding stream or decoding stream */
397
398 #define CODING_STREAM_DECOMPOSE(str, flags, ch) \
399 do { \
400 flags = (str)->flags; \
401 ch = (str)->ch; \
402 } while (0)
403
404 #define CODING_STREAM_COMPOSE(str, flags, ch) \
405 do { \
406 (str)->flags = flags; \
407 (str)->ch = ch; \
408 } while (0)
409
410 405
411 /* For detecting the encoding of text */ 406 /* For detecting the encoding of text */
412 enum coding_category_type 407 enum coding_category_type
413 { 408 {
414 #ifdef MULE 409 #ifdef MULE
424 CODING_CATEGORY_ISO_8_2, /* ISO2022 system using eight-bit bytes, 419 CODING_CATEGORY_ISO_8_2, /* ISO2022 system using eight-bit bytes,
425 no locking shift, no designation sequences, 420 no locking shift, no designation sequences,
426 two-dimension characters in the upper half. */ 421 two-dimension characters in the upper half. */
427 CODING_CATEGORY_ISO_LOCK_SHIFT, /* ISO2022 system using locking shift */ 422 CODING_CATEGORY_ISO_LOCK_SHIFT, /* ISO2022 system using locking shift */
428 CODING_CATEGORY_BIG5, 423 CODING_CATEGORY_BIG5,
424 CODING_CATEGORY_UCS4,
425 CODING_CATEGORY_UTF8,
429 #endif /* MULE */ 426 #endif /* MULE */
430 CODING_CATEGORY_NO_CONVERSION 427 CODING_CATEGORY_NO_CONVERSION
431 }; 428 };
432 429
433 #define CODING_CATEGORY_LAST CODING_CATEGORY_NO_CONVERSION 430 #define CODING_CATEGORY_LAST CODING_CATEGORY_NO_CONVERSION
445 (1 << CODING_CATEGORY_ISO_8_2) 442 (1 << CODING_CATEGORY_ISO_8_2)
446 #define CODING_CATEGORY_ISO_LOCK_SHIFT_MASK \ 443 #define CODING_CATEGORY_ISO_LOCK_SHIFT_MASK \
447 (1 << CODING_CATEGORY_ISO_LOCK_SHIFT) 444 (1 << CODING_CATEGORY_ISO_LOCK_SHIFT)
448 #define CODING_CATEGORY_BIG5_MASK \ 445 #define CODING_CATEGORY_BIG5_MASK \
449 (1 << CODING_CATEGORY_BIG5) 446 (1 << CODING_CATEGORY_BIG5)
447 #define CODING_CATEGORY_UCS4_MASK \
448 (1 << CODING_CATEGORY_UCS4)
449 #define CODING_CATEGORY_UTF8_MASK \
450 (1 << CODING_CATEGORY_UTF8)
450 #endif 451 #endif
451 #define CODING_CATEGORY_NO_CONVERSION_MASK \ 452 #define CODING_CATEGORY_NO_CONVERSION_MASK \
452 (1 << CODING_CATEGORY_NO_CONVERSION) 453 (1 << CODING_CATEGORY_NO_CONVERSION)
453 #define CODING_CATEGORY_NOT_FINISHED_MASK \ 454 #define CODING_CATEGORY_NOT_FINISHED_MASK \
454 (1 << 30) 455 (1 << 30)
503 504
504 505
505 #ifndef MULE 506 #ifndef MULE
506 #define MIN_LEADING_BYTE 0x80 507 #define MIN_LEADING_BYTE 0x80
507 /* These need special treatment in a string and/or character */ 508 /* These need special treatment in a string and/or character */
509 #ifdef ENABLE_COMPOSITE_CHARS
508 #define LEADING_BYTE_COMPOSITE 0x80 /* for a composite character */ 510 #define LEADING_BYTE_COMPOSITE 0x80 /* for a composite character */
511 #endif
509 #define LEADING_BYTE_CONTROL_1 0x8F /* represent normal 80-9F */ 512 #define LEADING_BYTE_CONTROL_1 0x8F /* represent normal 80-9F */
510 #define LEADING_BYTE_LATIN_ISO8859_1 0x81 /* Right half of ISO 8859-1 */ 513 #define LEADING_BYTE_LATIN_ISO8859_1 0x81 /* Right half of ISO 8859-1 */
511 #define BYTE_C1_P(c) ((unsigned int) ((unsigned int) (c) - 0x80) < 0x20) 514 #define BYTE_C1_P(c) ((unsigned int) ((unsigned int) (c) - 0x80) < 0x20)
512 #define BUFBYTE_FIRST_BYTE_P(c) ((c) < 0xA0) 515 #define BUFBYTE_FIRST_BYTE_P(c) ((c) < 0xA0)
513 #define BUFBYTE_LEADING_BYTE_P(c) BYTE_C1_P (c) 516 #define BUFBYTE_LEADING_BYTE_P(c) BYTE_C1_P (c)