Mercurial > hg > xemacs-beta
comparison src/mule-coding.c @ 74:54cc21c15cbb r20-0b32
Import from CVS: tag r20-0b32
author | cvs |
---|---|
date | Mon, 13 Aug 2007 09:04:33 +0200 |
parents | 131b0175ea99 |
children | dbb370e3c29e |
comparison
equal
deleted
inserted
replaced
73:e2d7a37b7c8d | 74:54cc21c15cbb |
---|---|
121 just after a direction switch (i.e. no valid designation | 121 just after a direction switch (i.e. no valid designation |
122 encountered yet), we insert the direction-switch escape | 122 encountered yet), we insert the direction-switch escape |
123 sequence literally into the output stream, and later on | 123 sequence literally into the output stream, and later on |
124 insert the corresponding direction-restoring escape sequence | 124 insert the corresponding direction-restoring escape sequence |
125 literally also. */ | 125 literally also. */ |
126 int switched_dir_and_no_valid_charset_yet :1; | 126 unsigned int switched_dir_and_no_valid_charset_yet :1; |
127 int invalid_switch_dir :1; | 127 unsigned int invalid_switch_dir :1; |
128 | 128 |
129 /* Tells the decoder to output the escape sequence literally | 129 /* Tells the decoder to output the escape sequence literally |
130 even though it was valid. Used in the games we play to | 130 even though it was valid. Used in the games we play to |
131 avoid lossage when we encounter invalid designations. */ | 131 avoid lossage when we encounter invalid designations. */ |
132 int output_literally :1; | 132 unsigned int output_literally :1; |
133 /* We encountered a direction switch followed by an invalid | 133 /* We encountered a direction switch followed by an invalid |
134 designation. We didn't output the direction switch | 134 designation. We didn't output the direction switch |
135 literally because we didn't know about the invalid designation; | 135 literally because we didn't know about the invalid designation; |
136 but we have to do so now. */ | 136 but we have to do so now. */ |
137 int output_direction_sequence :1; | 137 unsigned int output_direction_sequence :1; |
138 }; | 138 }; |
139 | 139 |
140 Lisp_Object Fcopy_coding_system (Lisp_Object old_coding_system, | 140 Lisp_Object Fcopy_coding_system (Lisp_Object old_coding_system, |
141 Lisp_Object new_name); | 141 Lisp_Object new_name); |
142 struct detection_state; | 142 struct detection_state; |
143 static int detect_coding_shift_jis (struct detection_state *st, | 143 static int detect_coding_sjis (struct detection_state *st, |
144 CONST unsigned char *src, | 144 CONST unsigned char *src, |
145 unsigned int n); | 145 unsigned int n); |
146 static void decode_coding_shift_jis (Lstream *decoding, | 146 static void decode_coding_sjis (Lstream *decoding, |
147 CONST unsigned char *src, | 147 CONST unsigned char *src, |
148 unsigned_char_dynarr *dst, | 148 unsigned_char_dynarr *dst, |
149 unsigned int n); | 149 unsigned int n); |
150 static void encode_coding_shift_jis (Lstream *encoding, | 150 static void encode_coding_sjis (Lstream *encoding, |
151 CONST unsigned char *src, | 151 CONST unsigned char *src, |
152 unsigned_char_dynarr *dst, | 152 unsigned_char_dynarr *dst, |
153 unsigned int n); | 153 unsigned int n); |
154 static int detect_coding_big5 (struct detection_state *st, | 154 static int detect_coding_big5 (struct detection_state *st, |
155 CONST unsigned char *src, | 155 CONST unsigned char *src, |
156 unsigned int n); | 156 unsigned int n); |
157 static void decode_coding_big5 (Lstream *decoding, | 157 static void decode_coding_big5 (Lstream *decoding, |
158 CONST unsigned char *src, | 158 CONST unsigned char *src, |
1230 int mask; | 1230 int mask; |
1231 int initted; | 1231 int initted; |
1232 struct iso2022_decoder iso; | 1232 struct iso2022_decoder iso; |
1233 unsigned int flags; | 1233 unsigned int flags; |
1234 int high_byte_count; | 1234 int high_byte_count; |
1235 int saw_single_shift:1; | 1235 unsigned int saw_single_shift:1; |
1236 } | 1236 } |
1237 iso2022; | 1237 iso2022; |
1238 | 1238 |
1239 struct | 1239 struct |
1240 { | 1240 { |
1241 int seen_anything; | 1241 int seen_anything; |
1242 int just_saw_cr; | 1242 int just_saw_cr; |
1243 } | 1243 } |
1352 return 0; | 1352 return 0; |
1353 | 1353 |
1354 if (!mask_has_at_most_one_bit_p (st->iso2022.mask)) | 1354 if (!mask_has_at_most_one_bit_p (st->iso2022.mask)) |
1355 st->iso2022.mask = detect_coding_iso2022 (st, src, n); | 1355 st->iso2022.mask = detect_coding_iso2022 (st, src, n); |
1356 if (!mask_has_at_most_one_bit_p (st->shift_jis.mask)) | 1356 if (!mask_has_at_most_one_bit_p (st->shift_jis.mask)) |
1357 st->shift_jis.mask = detect_coding_shift_jis (st, src, n); | 1357 st->shift_jis.mask = detect_coding_sjis (st, src, n); |
1358 if (!mask_has_at_most_one_bit_p (st->big5.mask)) | 1358 if (!mask_has_at_most_one_bit_p (st->big5.mask)) |
1359 st->big5.mask = detect_coding_big5 (st, src, n); | 1359 st->big5.mask = detect_coding_big5 (st, src, n); |
1360 | 1360 |
1361 st->mask = st->iso2022.mask | st->shift_jis.mask | st->big5.mask; | 1361 st->mask = st->iso2022.mask | st->shift_jis.mask | st->big5.mask; |
1362 | 1362 |
1586 Dynarr_add (dst, LEADING_BYTE_CONTROL_1); \ | 1586 Dynarr_add (dst, LEADING_BYTE_CONTROL_1); \ |
1587 Dynarr_add (dst, c + 0x20); \ | 1587 Dynarr_add (dst, c + 0x20); \ |
1588 } \ | 1588 } \ |
1589 else \ | 1589 else \ |
1590 { \ | 1590 { \ |
1591 Dynarr_add (dst, LEADING_BYTE_LATIN_1); \ | 1591 Dynarr_add (dst, LEADING_BYTE_LATIN_ISO8859_1); \ |
1592 Dynarr_add (dst, c); \ | 1592 Dynarr_add (dst, c); \ |
1593 } \ | 1593 } \ |
1594 } while (0) | 1594 } while (0) |
1595 | 1595 |
1596 #define DECODE_OUTPUT_PARTIAL_CHAR(ch) \ | 1596 #define DECODE_OUTPUT_PARTIAL_CHAR(ch) \ |
1600 DECODE_ADD_BINARY_CHAR (ch, dst); \ | 1600 DECODE_ADD_BINARY_CHAR (ch, dst); \ |
1601 ch = 0; \ | 1601 ch = 0; \ |
1602 } \ | 1602 } \ |
1603 } while (0) | 1603 } while (0) |
1604 | 1604 |
1605 #define DECODE_HANDLE_END_OF_CONVERSION(flags, ch, dst)\ | 1605 #define DECODE_HANDLE_END_OF_CONVERSION(flags, ch, dst) \ |
1606 do { \ | 1606 do { \ |
1607 DECODE_OUTPUT_PARTIAL_CHAR (ch); \ | 1607 DECODE_OUTPUT_PARTIAL_CHAR (ch); \ |
1608 if (flags & CODING_STATE_END) \ | 1608 if ((flags & CODING_STATE_END) && \ |
1609 { \ | 1609 (flags & CODING_STATE_CR)) \ |
1610 if (flags & CODING_STATE_CR) \ | 1610 Dynarr_add (dst, '\r'); \ |
1611 Dynarr_add (dst, '\r'); \ | |
1612 } \ | |
1613 } while (0) | 1611 } while (0) |
1614 | 1612 |
1615 #define DECODING_STREAM_DATA(stream) LSTREAM_TYPE_DATA (stream, decoding) | 1613 #define DECODING_STREAM_DATA(stream) LSTREAM_TYPE_DATA (stream, decoding) |
1616 | 1614 |
1617 struct decoding_stream | 1615 struct decoding_stream |
1948 system, then do no conversion. */ | 1946 system, then do no conversion. */ |
1949 case CODESYS_NO_CONVERSION: | 1947 case CODESYS_NO_CONVERSION: |
1950 decode_coding_no_conversion (decoding, src, dst, n); | 1948 decode_coding_no_conversion (decoding, src, dst, n); |
1951 break; | 1949 break; |
1952 case CODESYS_SHIFT_JIS: | 1950 case CODESYS_SHIFT_JIS: |
1953 decode_coding_shift_jis (decoding, src, dst, n); | 1951 decode_coding_sjis (decoding, src, dst, n); |
1954 break; | 1952 break; |
1955 case CODESYS_BIG5: | 1953 case CODESYS_BIG5: |
1956 decode_coding_big5 (decoding, src, dst, n); | 1954 decode_coding_big5 (decoding, src, dst, n); |
1957 break; | 1955 break; |
1958 case CODESYS_CCL: | 1956 case CODESYS_CCL: |
1992 int speccount = specpdl_depth (); | 1990 int speccount = specpdl_depth (); |
1993 struct gcpro gcpro1, gcpro2; | 1991 struct gcpro gcpro1, gcpro2; |
1994 | 1992 |
1995 get_buffer_range_char (buf, start, end, &b, &e, 0); | 1993 get_buffer_range_char (buf, start, end, &b, &e, 0); |
1996 coding_system = Fget_coding_system (coding_system); | 1994 coding_system = Fget_coding_system (coding_system); |
1997 instream = make_lisp_buffer_input_stream (buf, b, e, 0); | 1995 instream = make_lisp_buffer_input_stream (buf, b, e, 0); |
1998 outstream = make_lisp_buffer_output_stream (buf, b, 0); | 1996 outstream = make_lisp_buffer_output_stream (buf, b, 0); |
1999 outstream = make_decoding_output_stream (XLSTREAM (outstream), | 1997 outstream = make_decoding_output_stream (XLSTREAM (outstream), |
2000 coding_system); | 1998 coding_system); |
2001 outstream = make_encoding_output_stream (XLSTREAM (outstream), | 1999 outstream = make_encoding_output_stream (XLSTREAM (outstream), |
2002 Fget_coding_system (Qbinary)); | 2000 Fget_coding_system (Qbinary)); |
2347 system, then do no conversion. */ | 2345 system, then do no conversion. */ |
2348 case CODESYS_NO_CONVERSION: | 2346 case CODESYS_NO_CONVERSION: |
2349 encode_coding_no_conversion (encoding, src, dst, n); | 2347 encode_coding_no_conversion (encoding, src, dst, n); |
2350 break; | 2348 break; |
2351 case CODESYS_SHIFT_JIS: | 2349 case CODESYS_SHIFT_JIS: |
2352 encode_coding_shift_jis (encoding, src, dst, n); | 2350 encode_coding_sjis (encoding, src, dst, n); |
2353 break; | 2351 break; |
2354 case CODESYS_BIG5: | 2352 case CODESYS_BIG5: |
2355 encode_coding_big5 (encoding, src, dst, n); | 2353 encode_coding_big5 (encoding, src, dst, n); |
2356 break; | 2354 break; |
2357 case CODESYS_CCL: | 2355 case CODESYS_CCL: |
2380 int speccount = specpdl_depth (); | 2378 int speccount = specpdl_depth (); |
2381 struct gcpro gcpro1, gcpro2; | 2379 struct gcpro gcpro1, gcpro2; |
2382 | 2380 |
2383 get_buffer_range_char (buf, start, end, &b, &e, 0); | 2381 get_buffer_range_char (buf, start, end, &b, &e, 0); |
2384 coding_system = Fget_coding_system (coding_system); | 2382 coding_system = Fget_coding_system (coding_system); |
2385 instream = make_lisp_buffer_input_stream (buf, b, e, 0); | 2383 instream = make_lisp_buffer_input_stream (buf, b, e, 0); |
2386 outstream = make_lisp_buffer_output_stream (buf, b, 0); | 2384 outstream = make_lisp_buffer_output_stream (buf, b, 0); |
2387 outstream = make_decoding_output_stream (XLSTREAM (outstream), | 2385 outstream = make_decoding_output_stream (XLSTREAM (outstream), |
2388 Fget_coding_system (Qbinary)); | 2386 Fget_coding_system (Qbinary)); |
2389 outstream = make_encoding_output_stream (XLSTREAM (outstream), | 2387 outstream = make_encoding_output_stream (XLSTREAM (outstream), |
2390 coding_system); | 2388 coding_system); |
2394 | 2392 |
2395 [BUFFER] <----- send through | 2393 [BUFFER] <----- send through |
2396 ------> [ENCODE AS SPECIFIED] | 2394 ------> [ENCODE AS SPECIFIED] |
2397 ------> [DECODE AS BINARY] | 2395 ------> [DECODE AS BINARY] |
2398 ------> [BUFFER] | 2396 ------> [BUFFER] |
2399 | |
2400 */ | 2397 */ |
2401 while (1) | 2398 while (1) |
2402 { | 2399 { |
2403 int size_in_bytes; | 2400 int size_in_bytes; |
2404 Bufpos oldpos, newpos, even_newer_pos; | 2401 Bufpos oldpos, newpos, even_newer_pos; |
2447 | 2444 |
2448 */ | 2445 */ |
2449 | 2446 |
2450 /* Is this the first byte of a Shift-JIS two-byte char? */ | 2447 /* Is this the first byte of a Shift-JIS two-byte char? */ |
2451 | 2448 |
2452 #define BYTE_SHIFT_JIS_TWO_BYTE_1_P(c) \ | 2449 #define BYTE_SJIS_TWO_BYTE_1_P(c) \ |
2453 (((c) >= 0x81 && (c) <= 0x9F) || ((c) >= 0xE0 && (c) <= 0xEF)) | 2450 (((c) >= 0x81 && (c) <= 0x9F) || ((c) >= 0xE0 && (c) <= 0xEF)) |
2454 | 2451 |
2455 /* Is this the second byte of a Shift-JIS two-byte char? */ | 2452 /* Is this the second byte of a Shift-JIS two-byte char? */ |
2456 | 2453 |
2457 #define BYTE_SHIFT_JIS_TWO_BYTE_2_P(c) \ | 2454 #define BYTE_SJIS_TWO_BYTE_2_P(c) \ |
2458 (((c) >= 0x40 && (c) <= 0x7E) || ((c) >= 0x80 && (c) <= 0xFC)) | 2455 (((c) >= 0x40 && (c) <= 0x7E) || ((c) >= 0x80 && (c) <= 0xFC)) |
2459 | 2456 |
2460 #define BYTE_SHIFT_JIS_KATAKANA_P(c) \ | 2457 #define BYTE_SJIS_KATAKANA_P(c) \ |
2461 ((c) >= 0xA1 && (c) <= 0xDF) | 2458 ((c) >= 0xA1 && (c) <= 0xDF) |
2462 | 2459 |
2463 /* Code conversion macros. These are macros because they are used in | 2460 /* Code conversion macros. These are macros because they are used in |
2464 inner loops during code conversion. | 2461 inner loops during code conversion. |
2465 | 2462 |
2470 (e.g. Qstring). */ | 2467 (e.g. Qstring). */ |
2471 | 2468 |
2472 /* Convert shift-JIS code (sj1, sj2) into internal string | 2469 /* Convert shift-JIS code (sj1, sj2) into internal string |
2473 representation (c1, c2). (The leading byte is assumed.) */ | 2470 representation (c1, c2). (The leading byte is assumed.) */ |
2474 | 2471 |
2475 #define DECODE_SHIFT_JIS(sj1, sj2, c1, c2) do \ | 2472 #define DECODE_SJIS(sj1, sj2, c1, c2) \ |
2476 { \ | 2473 do { \ |
2477 int I1 = sj1, I2 = sj2; \ | 2474 int I1 = sj1, I2 = sj2; \ |
2478 if (I2 >= 0x9f) \ | 2475 if (I2 >= 0x9f) \ |
2479 { \ | 2476 c1 = (I1 << 1) - ((I1 >= 0xe0) ? 0xe0 : 0x60), \ |
2480 if (I1 >= 0xe0) \ | 2477 c2 = I2 + 2; \ |
2481 c1 = (I1 << 1) - 0xe0; \ | 2478 else \ |
2482 else \ | 2479 c1 = (I1 << 1) - ((I1 >= 0xe0) ? 0xe1 : 0x61), \ |
2483 c1 = (I1 << 1) - 0x60; \ | 2480 c2 = I2 + ((I2 >= 0x7f) ? 0x60 : 0x61); \ |
2484 c2 = I2 + 2; \ | |
2485 } \ | |
2486 else \ | |
2487 { \ | |
2488 if (I1 >= 0xe0) \ | |
2489 c1 = (I1 << 1) - 0xe1; \ | |
2490 else \ | |
2491 c1 = (I1 << 1) - 0x61; \ | |
2492 if (I2 >= 0x7f) \ | |
2493 c2 = I2 + 0x60; \ | |
2494 else \ | |
2495 c2 = I2 + 0x61; \ | |
2496 } \ | |
2497 } while (0) | 2481 } while (0) |
2498 | 2482 |
2499 /* Convert the internal string representation of a Shift-JIS character | 2483 /* Convert the internal string representation of a Shift-JIS character |
2500 (c1, c2) into Shift-JIS code (sj1, sj2). The leading byte is | 2484 (c1, c2) into Shift-JIS code (sj1, sj2). The leading byte is |
2501 assumed. */ | 2485 assumed. */ |
2502 | 2486 |
2503 #define ENCODE_SHIFT_JIS(c1, c2, sj1, sj2) do \ | 2487 #define ENCODE_SJIS(c1, c2, sj1, sj2) \ |
2504 { \ | 2488 do { \ |
2505 int I1 = c1, I2 = sj2; \ | 2489 int I1 = c1, I2 = sj2; \ |
2506 if (I1 & 1) \ | 2490 if (I1 & 1) \ |
2507 { \ | 2491 sj1 = (I1 >> 1) + ((I1 < 0xdf) ? 0x31 : 0x71), \ |
2508 if (I1 < 0xdf) \ | 2492 sj2 = I2 - ((I2 >= 0xe0) ? 0x60 : 0x61); \ |
2509 sj1 = (I1 >> 1) + 0x31; \ | 2493 else \ |
2510 else \ | 2494 sj1 = (I1 >> 1) + ((I1 < 0xdf) ? 0x30 : 0x70), \ |
2511 sj1 = (I1 >> 1) + 0x71; \ | 2495 sj2 = I2 - 2; \ |
2512 if (I2 >= 0xe0) \ | |
2513 sj2 = I2 - 0x60; \ | |
2514 else \ | |
2515 sj2 = I2 - 0x61; \ | |
2516 } \ | |
2517 else \ | |
2518 { \ | |
2519 if (I1 < 0xdf) \ | |
2520 sj1 = (I1 >> 1) + 0x30; \ | |
2521 else \ | |
2522 sj1 = (I1 >> 1) + 0x70; \ | |
2523 sj2 = I2 - 2; \ | |
2524 } \ | |
2525 } while (0) | 2496 } while (0) |
2526 | 2497 |
2527 static int | 2498 static int |
2528 detect_coding_shift_jis (struct detection_state *st, CONST unsigned char *src, | 2499 detect_coding_sjis (struct detection_state *st, CONST unsigned char *src, |
2529 unsigned int n) | 2500 unsigned int n) |
2530 { | 2501 { |
2531 int c; | 2502 int c; |
2532 | 2503 |
2533 while (n--) | 2504 while (n--) |
2534 { | 2505 { |
2548 } | 2519 } |
2549 | 2520 |
2550 /* Convert Shift-JIS data to internal format. */ | 2521 /* Convert Shift-JIS data to internal format. */ |
2551 | 2522 |
2552 static void | 2523 static void |
2553 decode_coding_shift_jis (Lstream *decoding, CONST unsigned char *src, | 2524 decode_coding_sjis (Lstream *decoding, CONST unsigned char *src, |
2554 unsigned_char_dynarr *dst, unsigned int n) | 2525 unsigned_char_dynarr *dst, unsigned int n) |
2555 { | 2526 { |
2556 unsigned char c; | 2527 unsigned char c; |
2557 unsigned int flags, ch; | 2528 unsigned int flags, ch; |
2558 int eol; | 2529 int eol; |
2559 struct decoding_stream *str = DECODING_STREAM_DATA (decoding); | 2530 struct decoding_stream *str = DECODING_STREAM_DATA (decoding); |
2566 c = *src++; | 2537 c = *src++; |
2567 | 2538 |
2568 if (ch) | 2539 if (ch) |
2569 { | 2540 { |
2570 /* Previous character was first byte of Shift-JIS Kanji char. */ | 2541 /* Previous character was first byte of Shift-JIS Kanji char. */ |
2571 if (BYTE_SHIFT_JIS_TWO_BYTE_2_P (c)) | 2542 if (BYTE_SJIS_TWO_BYTE_2_P (c)) |
2572 { | 2543 { |
2573 unsigned char e1, e2; | 2544 unsigned char e1, e2; |
2574 | 2545 |
2575 Dynarr_add (dst, LEADING_BYTE_JAPANESE_JISX0208); | 2546 Dynarr_add (dst, LEADING_BYTE_JAPANESE_JISX0208); |
2576 DECODE_SHIFT_JIS (ch, c, e1, e2); | 2547 DECODE_SJIS (ch, c, e1, e2); |
2577 Dynarr_add (dst, e1); | 2548 Dynarr_add (dst, e1); |
2578 Dynarr_add (dst, e2); | 2549 Dynarr_add (dst, e2); |
2579 } | 2550 } |
2580 else | 2551 else |
2581 { | 2552 { |
2585 ch = 0; | 2556 ch = 0; |
2586 } | 2557 } |
2587 else | 2558 else |
2588 { | 2559 { |
2589 DECODE_HANDLE_EOL_TYPE (eol, c, flags, dst); | 2560 DECODE_HANDLE_EOL_TYPE (eol, c, flags, dst); |
2590 if (BYTE_SHIFT_JIS_TWO_BYTE_1_P (c)) | 2561 if (BYTE_SJIS_TWO_BYTE_1_P (c)) |
2591 ch = c; | 2562 ch = c; |
2592 else if (BYTE_SHIFT_JIS_KATAKANA_P (c)) | 2563 else if (BYTE_SJIS_KATAKANA_P (c)) |
2593 { | 2564 { |
2594 Dynarr_add (dst, LEADING_BYTE_JAPANESE_JISX0201_KANA); | 2565 Dynarr_add (dst, LEADING_BYTE_KATAKANA_JISX0201); |
2595 Dynarr_add (dst, c); | 2566 Dynarr_add (dst, c); |
2596 } | 2567 } |
2597 else | 2568 else |
2598 DECODE_ADD_BINARY_CHAR (c, dst); | 2569 DECODE_ADD_BINARY_CHAR (c, dst); |
2599 } | 2570 } |
2606 } | 2577 } |
2607 | 2578 |
2608 /* Convert internally-formatted data to Shift-JIS. */ | 2579 /* Convert internally-formatted data to Shift-JIS. */ |
2609 | 2580 |
2610 static void | 2581 static void |
2611 encode_coding_shift_jis (Lstream *encoding, CONST unsigned char *src, | 2582 encode_coding_sjis (Lstream *encoding, CONST unsigned char *src, |
2612 unsigned_char_dynarr *dst, unsigned int n) | 2583 unsigned_char_dynarr *dst, unsigned int n) |
2613 { | 2584 { |
2614 unsigned char c; | 2585 unsigned char c; |
2615 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); | 2586 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); |
2616 unsigned int flags, ch; | 2587 unsigned int flags, ch; |
2617 int eol; | 2588 int eol; |
2634 { | 2605 { |
2635 Dynarr_add (dst, c); | 2606 Dynarr_add (dst, c); |
2636 ch = 0; | 2607 ch = 0; |
2637 } | 2608 } |
2638 else if (BUFBYTE_LEADING_BYTE_P (c)) | 2609 else if (BUFBYTE_LEADING_BYTE_P (c)) |
2639 ch = (c == LEADING_BYTE_JAPANESE_JISX0201_KANA || | 2610 ch = (c == LEADING_BYTE_KATAKANA_JISX0201 || |
2640 c == LEADING_BYTE_JAPANESE_JISX0208_1978 || | 2611 c == LEADING_BYTE_JAPANESE_JISX0208_1978 || |
2641 c == LEADING_BYTE_JAPANESE_JISX0208) ? c : 0; | 2612 c == LEADING_BYTE_JAPANESE_JISX0208) ? c : 0; |
2642 else if (ch) | 2613 else if (ch) |
2643 { | 2614 { |
2644 if (ch == LEADING_BYTE_JAPANESE_JISX0201_KANA) | 2615 if (ch == LEADING_BYTE_KATAKANA_JISX0201) |
2645 { | 2616 { |
2646 Dynarr_add (dst, c); | 2617 Dynarr_add (dst, c); |
2647 ch = 0; | 2618 ch = 0; |
2648 } | 2619 } |
2649 else if (ch == LEADING_BYTE_JAPANESE_JISX0208_1978 || | 2620 else if (ch == LEADING_BYTE_JAPANESE_JISX0208_1978 || |
2650 ch == LEADING_BYTE_JAPANESE_JISX0208) | 2621 ch == LEADING_BYTE_JAPANESE_JISX0208) |
2651 ch = c; | 2622 ch = c; |
2652 else | 2623 else |
2653 { | 2624 { |
2654 unsigned char j1, j2; | 2625 unsigned char j1, j2; |
2655 ENCODE_SHIFT_JIS (ch, c, j1, j2); | 2626 ENCODE_SJIS (ch, c, j1, j2); |
2656 Dynarr_add (dst, j1); | 2627 Dynarr_add (dst, j1); |
2657 Dynarr_add (dst, j2); | 2628 Dynarr_add (dst, j2); |
2658 ch = 0; | 2629 ch = 0; |
2659 } | 2630 } |
2660 } | 2631 } |
2675 CHECK_CONS (code); | 2646 CHECK_CONS (code); |
2676 CHECK_INT (XCAR (code)); | 2647 CHECK_INT (XCAR (code)); |
2677 CHECK_INT (XCDR (code)); | 2648 CHECK_INT (XCDR (code)); |
2678 s1 = XINT (XCAR (code)); | 2649 s1 = XINT (XCAR (code)); |
2679 s2 = XINT (XCDR (code)); | 2650 s2 = XINT (XCDR (code)); |
2680 if (BYTE_SHIFT_JIS_TWO_BYTE_1_P (s1) && | 2651 if (BYTE_SJIS_TWO_BYTE_1_P (s1) && |
2681 BYTE_SHIFT_JIS_TWO_BYTE_2_P (s2)) | 2652 BYTE_SJIS_TWO_BYTE_2_P (s2)) |
2682 { | 2653 { |
2683 DECODE_SHIFT_JIS (s1, s2, c1, c2); | 2654 DECODE_SJIS (s1, s2, c1, c2); |
2684 return make_char (MAKE_CHAR (Vcharset_japanese_jisx0208, | 2655 return make_char (MAKE_CHAR (Vcharset_japanese_jisx0208, |
2685 c1 & 0x7F, c2 & 0x7F)); | 2656 c1 & 0x7F, c2 & 0x7F)); |
2686 } | 2657 } |
2687 else | 2658 else |
2688 return Qnil; | 2659 return Qnil; |
2699 | 2670 |
2700 CHECK_CHAR_COERCE_INT (ch); | 2671 CHECK_CHAR_COERCE_INT (ch); |
2701 BREAKUP_CHAR (XCHAR (ch), charset, c1, c2); | 2672 BREAKUP_CHAR (XCHAR (ch), charset, c1, c2); |
2702 if (EQ (charset, Vcharset_japanese_jisx0208)) | 2673 if (EQ (charset, Vcharset_japanese_jisx0208)) |
2703 { | 2674 { |
2704 ENCODE_SHIFT_JIS (c1 | 0x80, c2 | 0x80, s1, s2); | 2675 ENCODE_SJIS (c1 | 0x80, c2 | 0x80, s1, s2); |
2705 return Fcons (make_int (s1), make_int (s2)); | 2676 return Fcons (make_int (s1), make_int (s2)); |
2706 } | 2677 } |
2707 else | 2678 else |
2708 return Qnil; | 2679 return Qnil; |
2709 } | 2680 } |
3403 } | 3374 } |
3404 else | 3375 else |
3405 { | 3376 { |
3406 int jj; | 3377 int jj; |
3407 | 3378 |
3408 /* If we are in the thrall of in invalid designation, | 3379 /* If we are in the thrall of an invalid designation, |
3409 then stick the directionality sequence literally into the | 3380 then stick the directionality sequence literally into the |
3410 output stream so it ends up in the original text again. */ | 3381 output stream so it ends up in the original text again. */ |
3411 for (jj = 0; jj < 4; jj++) | 3382 for (jj = 0; jj < 4; jj++) |
3412 if (iso->invalid_designated[jj]) | 3383 if (iso->invalid_designated[jj]) |
3413 break; | 3384 break; |
3561 iso->switched_dir_and_no_valid_charset_yet = 0; | 3532 iso->switched_dir_and_no_valid_charset_yet = 0; |
3562 return 1; | 3533 return 1; |
3563 } | 3534 } |
3564 | 3535 |
3565 static int | 3536 static int |
3566 detect_coding_iso2022 (struct detection_state *st, CONST unsigned char *src, | 3537 detect_coding_iso2022 (struct detection_state *st, CONST unsigned char *src, |
3567 unsigned int n) | 3538 unsigned int n) |
3568 { | 3539 { |
3569 int c; | 3540 int c; |
3570 int mask; | 3541 int mask; |
3571 | 3542 |
4421 Dynarr_add (dst, c); | 4392 Dynarr_add (dst, c); |
4422 } | 4393 } |
4423 else if (BUFBYTE_LEADING_BYTE_P (c)) | 4394 else if (BUFBYTE_LEADING_BYTE_P (c)) |
4424 { | 4395 { |
4425 assert (ch == 0); | 4396 assert (ch == 0); |
4426 if (c == LEADING_BYTE_LATIN_1 || c == LEADING_BYTE_CONTROL_1) | 4397 if (c == LEADING_BYTE_LATIN_ISO8859_1 || |
4398 c == LEADING_BYTE_CONTROL_1) | |
4427 ch = c; | 4399 ch = c; |
4428 else | 4400 else |
4429 Dynarr_add (dst, '~'); /* untranslatable character */ | 4401 Dynarr_add (dst, '~'); /* untranslatable character */ |
4430 } | 4402 } |
4431 else | 4403 else |
4432 { | 4404 { |
4433 if (ch == LEADING_BYTE_LATIN_1) | 4405 if (ch == LEADING_BYTE_LATIN_ISO8859_1) |
4434 Dynarr_add (dst, c); | 4406 Dynarr_add (dst, c); |
4435 else if (ch == LEADING_BYTE_CONTROL_1) | 4407 else if (ch == LEADING_BYTE_CONTROL_1) |
4436 { | 4408 { |
4437 assert (c < 0xC0); | 4409 assert (c < 0xC0); |
4438 Dynarr_add (dst, c - 0x20); | 4410 Dynarr_add (dst, c - 0x20); |
4486 CONST Bufbyte *end = ptr + len; | 4458 CONST Bufbyte *end = ptr + len; |
4487 | 4459 |
4488 for (; ptr < end;) | 4460 for (; ptr < end;) |
4489 { | 4461 { |
4490 Bufbyte c = | 4462 Bufbyte c = |
4491 (BYTE_ASCII_P (*ptr)) ? *ptr : | 4463 (BYTE_ASCII_P (*ptr)) ? *ptr : |
4492 (*ptr == LEADING_BYTE_CONTROL_1) ? (*(ptr+1) - 0x20) : | 4464 (*ptr == LEADING_BYTE_CONTROL_1) ? (*(ptr+1) - 0x20) : |
4493 (*ptr == LEADING_BYTE_LATIN_1) ? (*(ptr+1)) : | 4465 (*ptr == LEADING_BYTE_LATIN_ISO8859_1) ? (*(ptr+1)) : |
4494 '~'; | 4466 '~'; |
4495 | 4467 |
4496 Dynarr_add (conversion_out_dynarr, (Extbyte) c); | 4468 Dynarr_add (conversion_out_dynarr, (Extbyte) c); |
4497 INC_CHARPTR (ptr); | 4469 INC_CHARPTR (ptr); |
4498 } | 4470 } |