comparison src/file-coding.h @ 371:cc15677e0335 r21-2b1

Import from CVS: tag r21-2b1
author cvs
date Mon, 13 Aug 2007 11:03:08 +0200
parents a4f53d9b3154
children 8626e4521993
comparison
equal deleted inserted replaced
370:bd866891f083 371:cc15677e0335
303 If both CODING_STATE_SS2 and 303 If both CODING_STATE_SS2 and
304 CODING_STATE_SS3 are set, 304 CODING_STATE_SS3 are set,
305 CODING_STATE_SS2 overrides; but 305 CODING_STATE_SS2 overrides; but
306 this probably indicates an error 306 this probably indicates an error
307 in the text encoding. */ 307 in the text encoding. */
308 #ifdef ENABLE_COMPOSITE_CHARS
309 #define CODING_STATE_COMPOSITE (1 << 8) /* If set, we're currently processing 308 #define CODING_STATE_COMPOSITE (1 << 8) /* If set, we're currently processing
310 a composite character (i.e. a 309 a composite character (i.e. a
311 character constructed by 310 character constructed by
312 overstriking two or more 311 overstriking two or more
313 characters). */ 312 characters). */
314 #endif /* ENABLE_COMPOSITE_CHARS */
315 313
316 314
317 /* CODING_STATE_ISO2022_LOCK is the mask of flags that remain on until 315 /* CODING_STATE_ISO2022_LOCK is the mask of flags that remain on until
318 explicitly turned off when in the ISO2022 encoder/decoder. Other flags are 316 explicitly turned off when in the ISO2022 encoder/decoder. Other flags are
319 turned off at the end of processing each character or escape sequence. */ 317 turned off at the end of processing each character or escape sequence. */
320 #ifdef ENABLE_COMPOSITE_CHARS
321 # define CODING_STATE_ISO2022_LOCK \ 318 # define CODING_STATE_ISO2022_LOCK \
322 (CODING_STATE_END | CODING_STATE_COMPOSITE | CODING_STATE_R2L) 319 (CODING_STATE_END | CODING_STATE_COMPOSITE | CODING_STATE_R2L)
323 #else 320 #define CODING_STATE_BIG5_LOCK \
324 # define CODING_STATE_ISO2022_LOCK (CODING_STATE_END | CODING_STATE_R2L) 321 CODING_STATE_END
325 #endif
326
327 #define CODING_STATE_BIG5_LOCK CODING_STATE_END
328 322
329 /* Flags indicating what we've seen so far when parsing an 323 /* Flags indicating what we've seen so far when parsing an
330 ISO2022 escape sequence. */ 324 ISO2022 escape sequence. */
331 enum iso_esc_flag 325 enum iso_esc_flag
332 { 326 {
373 character must be ]. */ 367 character must be ]. */
374 ISO_ESC_5_11_2, /* We've seen 0x9B 2. The next 368 ISO_ESC_5_11_2, /* We've seen 0x9B 2. The next
375 character must be ]. */ 369 character must be ]. */
376 370
377 /* Full sequences. */ 371 /* Full sequences. */
378 #ifdef ENABLE_COMPOSITE_CHARS
379 ISO_ESC_START_COMPOSITE, /* Private usage for START COMPOSING */ 372 ISO_ESC_START_COMPOSITE, /* Private usage for START COMPOSING */
380 ISO_ESC_END_COMPOSITE, /* Private usage for END COMPOSING */ 373 ISO_ESC_END_COMPOSITE, /* Private usage for END COMPOSING */
381 #endif /* ENABLE_COMPOSITE_CHARS */
382 ISO_ESC_SINGLE_SHIFT, /* We've seen a complete single-shift sequence. */ 374 ISO_ESC_SINGLE_SHIFT, /* We've seen a complete single-shift sequence. */
383 ISO_ESC_LOCKING_SHIFT,/* We've seen a complete locking-shift sequence. */ 375 ISO_ESC_LOCKING_SHIFT,/* We've seen a complete locking-shift sequence. */
384 ISO_ESC_DESIGNATE, /* We've seen a complete designation sequence. */ 376 ISO_ESC_DESIGNATE, /* We've seen a complete designation sequence. */
385 ISO_ESC_DIRECTIONALITY,/* We've seen a complete ISO6429 directionality 377 ISO_ESC_DIRECTIONALITY,/* We've seen a complete ISO6429 directionality
386 sequence. */ 378 sequence. */
415 (str)->ch = ch; \ 407 (str)->ch = ch; \
416 } while (0) 408 } while (0)
417 409
418 410
419 /* For detecting the encoding of text */ 411 /* For detecting the encoding of text */
420 /* The order is chosen so that by default only ISO 2022 7-bit encodings
421 are auto-detected. These are needed for Mule files. */
422 enum coding_category_type 412 enum coding_category_type
423 { 413 {
424 #ifdef MULE 414 #ifdef MULE
415 CODING_CATEGORY_SHIFT_JIS,
425 CODING_CATEGORY_ISO_7, /* ISO2022 system using only seven-bit bytes, 416 CODING_CATEGORY_ISO_7, /* ISO2022 system using only seven-bit bytes,
426 no locking shift */ 417 no locking shift */
427 CODING_CATEGORY_NO_CONVERSION, 418 CODING_CATEGORY_ISO_8_DESIGNATE, /* ISO2022 system using eight-bit bytes,
419 no locking shift, no single shift,
420 using designation to switch charsets */
428 CODING_CATEGORY_ISO_8_1, /* ISO2022 system using eight-bit bytes, 421 CODING_CATEGORY_ISO_8_1, /* ISO2022 system using eight-bit bytes,
429 no locking shift, no designation sequences, 422 no locking shift, no designation sequences,
430 one-dimension characters in the upper half. */ 423 one-dimension characters in the upper half. */
431 CODING_CATEGORY_ISO_8_2, /* ISO2022 system using eight-bit bytes, 424 CODING_CATEGORY_ISO_8_2, /* ISO2022 system using eight-bit bytes,
432 no locking shift, no designation sequences, 425 no locking shift, no designation sequences,
433 two-dimension characters in the upper half. */ 426 two-dimension characters in the upper half. */
434 CODING_CATEGORY_ISO_8_DESIGNATE, /* ISO2022 system using eight-bit bytes,
435 no locking shift, no single shift,
436 using designation to switch charsets */
437 CODING_CATEGORY_ISO_LOCK_SHIFT, /* ISO2022 system using locking shift */ 427 CODING_CATEGORY_ISO_LOCK_SHIFT, /* ISO2022 system using locking shift */
438 CODING_CATEGORY_BIG5, 428 CODING_CATEGORY_BIG5,
439 CODING_CATEGORY_SHIFT_JIS 429 #endif /* MULE */
440 #else /* MULE */
441 CODING_CATEGORY_NO_CONVERSION 430 CODING_CATEGORY_NO_CONVERSION
442 #endif /* not MULE */
443 }; 431 };
444 432
445 #ifdef MULE
446 #define CODING_CATEGORY_LAST CODING_CATEGORY_SHIFT_JIS
447 #else
448 #define CODING_CATEGORY_LAST CODING_CATEGORY_NO_CONVERSION 433 #define CODING_CATEGORY_LAST CODING_CATEGORY_NO_CONVERSION
449 #endif
450 434
451 #ifdef MULE 435 #ifdef MULE
452 #define CODING_CATEGORY_SHIFT_JIS_MASK \ 436 #define CODING_CATEGORY_SHIFT_JIS_MASK \
453 (1 << CODING_CATEGORY_SHIFT_JIS) 437 (1 << CODING_CATEGORY_SHIFT_JIS)
454 #define CODING_CATEGORY_ISO_7_MASK \ 438 #define CODING_CATEGORY_ISO_7_MASK \
519 503
520 504
521 #ifndef MULE 505 #ifndef MULE
522 #define MIN_LEADING_BYTE 0x80 506 #define MIN_LEADING_BYTE 0x80
523 /* These need special treatment in a string and/or character */ 507 /* These need special treatment in a string and/or character */
524 #ifdef ENABLE_COMPOSITE_CHARS
525 #define LEADING_BYTE_COMPOSITE 0x80 /* for a composite character */ 508 #define LEADING_BYTE_COMPOSITE 0x80 /* for a composite character */
526 #endif
527 #define LEADING_BYTE_CONTROL_1 0x8F /* represent normal 80-9F */ 509 #define LEADING_BYTE_CONTROL_1 0x8F /* represent normal 80-9F */
528 #define LEADING_BYTE_LATIN_ISO8859_1 0x81 /* Right half of ISO 8859-1 */ 510 #define LEADING_BYTE_LATIN_ISO8859_1 0x81 /* Right half of ISO 8859-1 */
529 #define BYTE_C1_P(c) ((unsigned int) ((unsigned int) (c) - 0x80) < 0x20) 511 #define BYTE_C1_P(c) ((unsigned int) ((unsigned int) (c) - 0x80) < 0x20)
530 #define BUFBYTE_FIRST_BYTE_P(c) ((c) < 0xA0) 512 #define BUFBYTE_FIRST_BYTE_P(c) ((c) < 0xA0)
531 #define BUFBYTE_LEADING_BYTE_P(c) BYTE_C1_P (c) 513 #define BUFBYTE_LEADING_BYTE_P(c) BYTE_C1_P (c)