Mercurial > hg > xemacs-beta
diff src/mule-coding.c @ 74:54cc21c15cbb r20-0b32
Import from CVS: tag r20-0b32
author | cvs |
---|---|
date | Mon, 13 Aug 2007 09:04:33 +0200 |
parents | 131b0175ea99 |
children | dbb370e3c29e |
line wrap: on
line diff
--- a/src/mule-coding.c Mon Aug 13 09:03:47 2007 +0200 +++ b/src/mule-coding.c Mon Aug 13 09:04:33 2007 +0200 @@ -123,34 +123,34 @@ sequence literally into the output stream, and later on insert the corresponding direction-restoring escape sequence literally also. */ - int switched_dir_and_no_valid_charset_yet :1; - int invalid_switch_dir :1; + unsigned int switched_dir_and_no_valid_charset_yet :1; + unsigned int invalid_switch_dir :1; /* Tells the decoder to output the escape sequence literally even though it was valid. Used in the games we play to avoid lossage when we encounter invalid designations. */ - int output_literally :1; + unsigned int output_literally :1; /* We encountered a direction switch followed by an invalid designation. We didn't output the direction switch literally because we didn't know about the invalid designation; but we have to do so now. */ - int output_direction_sequence :1; + unsigned int output_direction_sequence :1; }; Lisp_Object Fcopy_coding_system (Lisp_Object old_coding_system, Lisp_Object new_name); struct detection_state; -static int detect_coding_shift_jis (struct detection_state *st, - CONST unsigned char *src, - unsigned int n); -static void decode_coding_shift_jis (Lstream *decoding, - CONST unsigned char *src, - unsigned_char_dynarr *dst, - unsigned int n); -static void encode_coding_shift_jis (Lstream *encoding, - CONST unsigned char *src, - unsigned_char_dynarr *dst, - unsigned int n); +static int detect_coding_sjis (struct detection_state *st, + CONST unsigned char *src, + unsigned int n); +static void decode_coding_sjis (Lstream *decoding, + CONST unsigned char *src, + unsigned_char_dynarr *dst, + unsigned int n); +static void encode_coding_sjis (Lstream *encoding, + CONST unsigned char *src, + unsigned_char_dynarr *dst, + unsigned int n); static int detect_coding_big5 (struct detection_state *st, CONST unsigned char *src, unsigned int n); @@ -1232,10 +1232,10 @@ struct iso2022_decoder iso; unsigned int flags; int high_byte_count; - int saw_single_shift:1; + unsigned int saw_single_shift:1; } iso2022; - + struct { int seen_anything; @@ -1354,7 +1354,7 @@ if (!mask_has_at_most_one_bit_p (st->iso2022.mask)) st->iso2022.mask = detect_coding_iso2022 (st, src, n); if (!mask_has_at_most_one_bit_p (st->shift_jis.mask)) - st->shift_jis.mask = detect_coding_shift_jis (st, src, n); + st->shift_jis.mask = detect_coding_sjis (st, src, n); if (!mask_has_at_most_one_bit_p (st->big5.mask)) st->big5.mask = detect_coding_big5 (st, src, n); @@ -1588,7 +1588,7 @@ } \ else \ { \ - Dynarr_add (dst, LEADING_BYTE_LATIN_1); \ + Dynarr_add (dst, LEADING_BYTE_LATIN_ISO8859_1); \ Dynarr_add (dst, c); \ } \ } while (0) @@ -1602,14 +1602,12 @@ } \ } while (0) -#define DECODE_HANDLE_END_OF_CONVERSION(flags, ch, dst)\ -do { \ - DECODE_OUTPUT_PARTIAL_CHAR (ch); \ - if (flags & CODING_STATE_END) \ - { \ - if (flags & CODING_STATE_CR) \ - Dynarr_add (dst, '\r'); \ - } \ +#define DECODE_HANDLE_END_OF_CONVERSION(flags, ch, dst) \ +do { \ + DECODE_OUTPUT_PARTIAL_CHAR (ch); \ + if ((flags & CODING_STATE_END) && \ + (flags & CODING_STATE_CR)) \ + Dynarr_add (dst, '\r'); \ } while (0) #define DECODING_STREAM_DATA(stream) LSTREAM_TYPE_DATA (stream, decoding) @@ -1950,7 +1948,7 @@ decode_coding_no_conversion (decoding, src, dst, n); break; case CODESYS_SHIFT_JIS: - decode_coding_shift_jis (decoding, src, dst, n); + decode_coding_sjis (decoding, src, dst, n); break; case CODESYS_BIG5: decode_coding_big5 (decoding, src, dst, n); @@ -1994,7 +1992,7 @@ get_buffer_range_char (buf, start, end, &b, &e, 0); coding_system = Fget_coding_system (coding_system); - instream = make_lisp_buffer_input_stream (buf, b, e, 0); + instream = make_lisp_buffer_input_stream (buf, b, e, 0); outstream = make_lisp_buffer_output_stream (buf, b, 0); outstream = make_decoding_output_stream (XLSTREAM (outstream), coding_system); @@ -2349,7 +2347,7 @@ encode_coding_no_conversion (encoding, src, dst, n); break; case CODESYS_SHIFT_JIS: - encode_coding_shift_jis (encoding, src, dst, n); + encode_coding_sjis (encoding, src, dst, n); break; case CODESYS_BIG5: encode_coding_big5 (encoding, src, dst, n); @@ -2382,7 +2380,7 @@ get_buffer_range_char (buf, start, end, &b, &e, 0); coding_system = Fget_coding_system (coding_system); - instream = make_lisp_buffer_input_stream (buf, b, e, 0); + instream = make_lisp_buffer_input_stream (buf, b, e, 0); outstream = make_lisp_buffer_output_stream (buf, b, 0); outstream = make_decoding_output_stream (XLSTREAM (outstream), Fget_coding_system (Qbinary)); @@ -2396,7 +2394,6 @@ ------> [ENCODE AS SPECIFIED] ------> [DECODE AS BINARY] ------> [BUFFER] - */ while (1) { @@ -2449,15 +2446,15 @@ /* Is this the first byte of a Shift-JIS two-byte char? */ -#define BYTE_SHIFT_JIS_TWO_BYTE_1_P(c) \ +#define BYTE_SJIS_TWO_BYTE_1_P(c) \ (((c) >= 0x81 && (c) <= 0x9F) || ((c) >= 0xE0 && (c) <= 0xEF)) /* Is this the second byte of a Shift-JIS two-byte char? */ -#define BYTE_SHIFT_JIS_TWO_BYTE_2_P(c) \ +#define BYTE_SJIS_TWO_BYTE_2_P(c) \ (((c) >= 0x40 && (c) <= 0x7E) || ((c) >= 0x80 && (c) <= 0xFC)) -#define BYTE_SHIFT_JIS_KATAKANA_P(c) \ +#define BYTE_SJIS_KATAKANA_P(c) \ ((c) >= 0xA1 && (c) <= 0xDF) /* Code conversion macros. These are macros because they are used in @@ -2472,61 +2469,35 @@ /* Convert shift-JIS code (sj1, sj2) into internal string representation (c1, c2). (The leading byte is assumed.) */ -#define DECODE_SHIFT_JIS(sj1, sj2, c1, c2) do \ -{ \ - int I1 = sj1, I2 = sj2; \ - if (I2 >= 0x9f) \ - { \ - if (I1 >= 0xe0) \ - c1 = (I1 << 1) - 0xe0; \ - else \ - c1 = (I1 << 1) - 0x60; \ - c2 = I2 + 2; \ - } \ - else \ - { \ - if (I1 >= 0xe0) \ - c1 = (I1 << 1) - 0xe1; \ - else \ - c1 = (I1 << 1) - 0x61; \ - if (I2 >= 0x7f) \ - c2 = I2 + 0x60; \ - else \ - c2 = I2 + 0x61; \ - } \ +#define DECODE_SJIS(sj1, sj2, c1, c2) \ +do { \ + int I1 = sj1, I2 = sj2; \ + if (I2 >= 0x9f) \ + c1 = (I1 << 1) - ((I1 >= 0xe0) ? 0xe0 : 0x60), \ + c2 = I2 + 2; \ + else \ + c1 = (I1 << 1) - ((I1 >= 0xe0) ? 0xe1 : 0x61), \ + c2 = I2 + ((I2 >= 0x7f) ? 0x60 : 0x61); \ } while (0) /* Convert the internal string representation of a Shift-JIS character (c1, c2) into Shift-JIS code (sj1, sj2). The leading byte is assumed. */ -#define ENCODE_SHIFT_JIS(c1, c2, sj1, sj2) do \ -{ \ - int I1 = c1, I2 = sj2; \ - if (I1 & 1) \ - { \ - if (I1 < 0xdf) \ - sj1 = (I1 >> 1) + 0x31; \ - else \ - sj1 = (I1 >> 1) + 0x71; \ - if (I2 >= 0xe0) \ - sj2 = I2 - 0x60; \ - else \ - sj2 = I2 - 0x61; \ - } \ - else \ - { \ - if (I1 < 0xdf) \ - sj1 = (I1 >> 1) + 0x30; \ - else \ - sj1 = (I1 >> 1) + 0x70; \ - sj2 = I2 - 2; \ - } \ +#define ENCODE_SJIS(c1, c2, sj1, sj2) \ +do { \ + int I1 = c1, I2 = sj2; \ + if (I1 & 1) \ + sj1 = (I1 >> 1) + ((I1 < 0xdf) ? 0x31 : 0x71), \ + sj2 = I2 - ((I2 >= 0xe0) ? 0x60 : 0x61); \ + else \ + sj1 = (I1 >> 1) + ((I1 < 0xdf) ? 0x30 : 0x70), \ + sj2 = I2 - 2; \ } while (0) static int -detect_coding_shift_jis (struct detection_state *st, CONST unsigned char *src, - unsigned int n) +detect_coding_sjis (struct detection_state *st, CONST unsigned char *src, + unsigned int n) { int c; @@ -2550,8 +2521,8 @@ /* Convert Shift-JIS data to internal format. */ static void -decode_coding_shift_jis (Lstream *decoding, CONST unsigned char *src, - unsigned_char_dynarr *dst, unsigned int n) +decode_coding_sjis (Lstream *decoding, CONST unsigned char *src, + unsigned_char_dynarr *dst, unsigned int n) { unsigned char c; unsigned int flags, ch; @@ -2568,12 +2539,12 @@ if (ch) { /* Previous character was first byte of Shift-JIS Kanji char. */ - if (BYTE_SHIFT_JIS_TWO_BYTE_2_P (c)) + if (BYTE_SJIS_TWO_BYTE_2_P (c)) { unsigned char e1, e2; Dynarr_add (dst, LEADING_BYTE_JAPANESE_JISX0208); - DECODE_SHIFT_JIS (ch, c, e1, e2); + DECODE_SJIS (ch, c, e1, e2); Dynarr_add (dst, e1); Dynarr_add (dst, e2); } @@ -2587,11 +2558,11 @@ else { DECODE_HANDLE_EOL_TYPE (eol, c, flags, dst); - if (BYTE_SHIFT_JIS_TWO_BYTE_1_P (c)) + if (BYTE_SJIS_TWO_BYTE_1_P (c)) ch = c; - else if (BYTE_SHIFT_JIS_KATAKANA_P (c)) + else if (BYTE_SJIS_KATAKANA_P (c)) { - Dynarr_add (dst, LEADING_BYTE_JAPANESE_JISX0201_KANA); + Dynarr_add (dst, LEADING_BYTE_KATAKANA_JISX0201); Dynarr_add (dst, c); } else @@ -2608,8 +2579,8 @@ /* Convert internally-formatted data to Shift-JIS. */ static void -encode_coding_shift_jis (Lstream *encoding, CONST unsigned char *src, - unsigned_char_dynarr *dst, unsigned int n) +encode_coding_sjis (Lstream *encoding, CONST unsigned char *src, + unsigned_char_dynarr *dst, unsigned int n) { unsigned char c; struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); @@ -2636,12 +2607,12 @@ ch = 0; } else if (BUFBYTE_LEADING_BYTE_P (c)) - ch = (c == LEADING_BYTE_JAPANESE_JISX0201_KANA || + ch = (c == LEADING_BYTE_KATAKANA_JISX0201 || c == LEADING_BYTE_JAPANESE_JISX0208_1978 || c == LEADING_BYTE_JAPANESE_JISX0208) ? c : 0; else if (ch) { - if (ch == LEADING_BYTE_JAPANESE_JISX0201_KANA) + if (ch == LEADING_BYTE_KATAKANA_JISX0201) { Dynarr_add (dst, c); ch = 0; @@ -2652,7 +2623,7 @@ else { unsigned char j1, j2; - ENCODE_SHIFT_JIS (ch, c, j1, j2); + ENCODE_SJIS (ch, c, j1, j2); Dynarr_add (dst, j1); Dynarr_add (dst, j2); ch = 0; @@ -2677,10 +2648,10 @@ CHECK_INT (XCDR (code)); s1 = XINT (XCAR (code)); s2 = XINT (XCDR (code)); - if (BYTE_SHIFT_JIS_TWO_BYTE_1_P (s1) && - BYTE_SHIFT_JIS_TWO_BYTE_2_P (s2)) + if (BYTE_SJIS_TWO_BYTE_1_P (s1) && + BYTE_SJIS_TWO_BYTE_2_P (s2)) { - DECODE_SHIFT_JIS (s1, s2, c1, c2); + DECODE_SJIS (s1, s2, c1, c2); return make_char (MAKE_CHAR (Vcharset_japanese_jisx0208, c1 & 0x7F, c2 & 0x7F)); } @@ -2701,7 +2672,7 @@ BREAKUP_CHAR (XCHAR (ch), charset, c1, c2); if (EQ (charset, Vcharset_japanese_jisx0208)) { - ENCODE_SHIFT_JIS (c1 | 0x80, c2 | 0x80, s1, s2); + ENCODE_SJIS (c1 | 0x80, c2 | 0x80, s1, s2); return Fcons (make_int (s1), make_int (s2)); } else @@ -3405,7 +3376,7 @@ { int jj; - /* If we are in the thrall of in invalid designation, + /* If we are in the thrall of an invalid designation, then stick the directionality sequence literally into the output stream so it ends up in the original text again. */ for (jj = 0; jj < 4; jj++) @@ -3563,7 +3534,7 @@ } static int -detect_coding_iso2022 (struct detection_state *st, CONST unsigned char *src, +detect_coding_iso2022 (struct detection_state *st, CONST unsigned char *src, unsigned int n) { int c; @@ -4423,14 +4394,15 @@ else if (BUFBYTE_LEADING_BYTE_P (c)) { assert (ch == 0); - if (c == LEADING_BYTE_LATIN_1 || c == LEADING_BYTE_CONTROL_1) + if (c == LEADING_BYTE_LATIN_ISO8859_1 || + c == LEADING_BYTE_CONTROL_1) ch = c; else Dynarr_add (dst, '~'); /* untranslatable character */ } else { - if (ch == LEADING_BYTE_LATIN_1) + if (ch == LEADING_BYTE_LATIN_ISO8859_1) Dynarr_add (dst, c); else if (ch == LEADING_BYTE_CONTROL_1) { @@ -4488,9 +4460,9 @@ for (; ptr < end;) { Bufbyte c = - (BYTE_ASCII_P (*ptr)) ? *ptr : - (*ptr == LEADING_BYTE_CONTROL_1) ? (*(ptr+1) - 0x20) : - (*ptr == LEADING_BYTE_LATIN_1) ? (*(ptr+1)) : + (BYTE_ASCII_P (*ptr)) ? *ptr : + (*ptr == LEADING_BYTE_CONTROL_1) ? (*(ptr+1) - 0x20) : + (*ptr == LEADING_BYTE_LATIN_ISO8859_1) ? (*(ptr+1)) : '~'; Dynarr_add (conversion_out_dynarr, (Extbyte) c);