comparison src/file-coding.c @ 444:576fb035e263 r21-2-37

Import from CVS: tag r21-2-37
author cvs
date Mon, 13 Aug 2007 11:36:19 +0200
parents abe6d1db359e
children 3078fd1074e8
comparison
equal deleted inserted replaced
443:a8296e22da4e 444:576fb035e263
174 #endif /* MULE */ 174 #endif /* MULE */
175 EXFUN (Fcopy_coding_system, 2); 175 EXFUN (Fcopy_coding_system, 2);
176 #ifdef MULE 176 #ifdef MULE
177 struct detection_state; 177 struct detection_state;
178 static int detect_coding_sjis (struct detection_state *st, 178 static int detect_coding_sjis (struct detection_state *st,
179 const unsigned char *src, 179 const Extbyte *src, size_t n);
180 unsigned int n); 180 static void decode_coding_sjis (Lstream *decoding, const Extbyte *src,
181 static void decode_coding_sjis (Lstream *decoding, 181 unsigned_char_dynarr *dst, size_t n);
182 const unsigned char *src, 182 static void encode_coding_sjis (Lstream *encoding, const Bufbyte *src,
183 unsigned_char_dynarr *dst, 183 unsigned_char_dynarr *dst, size_t n);
184 unsigned int n);
185 static void encode_coding_sjis (Lstream *encoding,
186 const unsigned char *src,
187 unsigned_char_dynarr *dst,
188 unsigned int n);
189 static int detect_coding_big5 (struct detection_state *st, 184 static int detect_coding_big5 (struct detection_state *st,
190 const unsigned char *src, 185 const Extbyte *src, size_t n);
191 unsigned int n); 186 static void decode_coding_big5 (Lstream *decoding, const Extbyte *src,
192 static void decode_coding_big5 (Lstream *decoding, 187 unsigned_char_dynarr *dst, size_t n);
193 const unsigned char *src, 188 static void encode_coding_big5 (Lstream *encoding, const Bufbyte *src,
194 unsigned_char_dynarr *dst, unsigned int n); 189 unsigned_char_dynarr *dst, size_t n);
195 static void encode_coding_big5 (Lstream *encoding,
196 const unsigned char *src,
197 unsigned_char_dynarr *dst, unsigned int n);
198 static int detect_coding_ucs4 (struct detection_state *st, 190 static int detect_coding_ucs4 (struct detection_state *st,
199 const unsigned char *src, 191 const Extbyte *src, size_t n);
200 unsigned int n); 192 static void decode_coding_ucs4 (Lstream *decoding, const Extbyte *src,
201 static void decode_coding_ucs4 (Lstream *decoding, 193 unsigned_char_dynarr *dst, size_t n);
202 const unsigned char *src, 194 static void encode_coding_ucs4 (Lstream *encoding, const Bufbyte *src,
203 unsigned_char_dynarr *dst, unsigned int n); 195 unsigned_char_dynarr *dst, size_t n);
204 static void encode_coding_ucs4 (Lstream *encoding,
205 const unsigned char *src,
206 unsigned_char_dynarr *dst, unsigned int n);
207 static int detect_coding_utf8 (struct detection_state *st, 196 static int detect_coding_utf8 (struct detection_state *st,
208 const unsigned char *src, 197 const Extbyte *src, size_t n);
209 unsigned int n); 198 static void decode_coding_utf8 (Lstream *decoding, const Extbyte *src,
210 static void decode_coding_utf8 (Lstream *decoding, 199 unsigned_char_dynarr *dst, size_t n);
211 const unsigned char *src, 200 static void encode_coding_utf8 (Lstream *encoding, const Bufbyte *src,
212 unsigned_char_dynarr *dst, unsigned int n); 201 unsigned_char_dynarr *dst, size_t n);
213 static void encode_coding_utf8 (Lstream *encoding,
214 const unsigned char *src,
215 unsigned_char_dynarr *dst, unsigned int n);
216 static int postprocess_iso2022_mask (int mask); 202 static int postprocess_iso2022_mask (int mask);
217 static void reset_iso2022 (Lisp_Object coding_system, 203 static void reset_iso2022 (Lisp_Object coding_system,
218 struct iso2022_decoder *iso); 204 struct iso2022_decoder *iso);
219 static int detect_coding_iso2022 (struct detection_state *st, 205 static int detect_coding_iso2022 (struct detection_state *st,
220 const unsigned char *src, 206 const Extbyte *src, size_t n);
221 unsigned int n); 207 static void decode_coding_iso2022 (Lstream *decoding, const Extbyte *src,
222 static void decode_coding_iso2022 (Lstream *decoding, 208 unsigned_char_dynarr *dst, size_t n);
223 const unsigned char *src, 209 static void encode_coding_iso2022 (Lstream *encoding, const Bufbyte *src,
224 unsigned_char_dynarr *dst, unsigned int n); 210 unsigned_char_dynarr *dst, size_t n);
225 static void encode_coding_iso2022 (Lstream *encoding,
226 const unsigned char *src,
227 unsigned_char_dynarr *dst, unsigned int n);
228 #endif /* MULE */ 211 #endif /* MULE */
229 static void decode_coding_no_conversion (Lstream *decoding, 212 static void decode_coding_no_conversion (Lstream *decoding, const Extbyte *src,
230 const unsigned char *src, 213 unsigned_char_dynarr *dst, size_t n);
231 unsigned_char_dynarr *dst, 214 static void encode_coding_no_conversion (Lstream *encoding, const Bufbyte *src,
232 unsigned int n); 215 unsigned_char_dynarr *dst, size_t n);
233 static void encode_coding_no_conversion (Lstream *encoding, 216 static void mule_decode (Lstream *decoding, const Extbyte *src,
234 const unsigned char *src, 217 unsigned_char_dynarr *dst, size_t n);
235 unsigned_char_dynarr *dst, 218 static void mule_encode (Lstream *encoding, const Bufbyte *src,
236 unsigned int n); 219 unsigned_char_dynarr *dst, size_t n);
237 static void mule_decode (Lstream *decoding, const unsigned char *src,
238 unsigned_char_dynarr *dst, unsigned int n);
239 static void mule_encode (Lstream *encoding, const unsigned char *src,
240 unsigned_char_dynarr *dst, unsigned int n);
241 220
242 typedef struct codesys_prop codesys_prop; 221 typedef struct codesys_prop codesys_prop;
243 struct codesys_prop 222 struct codesys_prop
244 { 223 {
245 Lisp_Object sym; 224 Lisp_Object sym;
787 converted to nil when stored internally, and 766 converted to nil when stored internally, and
788 `coding-system-property' will return nil.) 767 `coding-system-property' will return nil.)
789 768
790 'post-read-conversion 769 'post-read-conversion
791 Function called after a file has been read in, to perform the 770 Function called after a file has been read in, to perform the
792 decoding. Called with two arguments, BEG and END, denoting 771 decoding. Called with two arguments, START and END, denoting
793 a region of the current buffer to be decoded. 772 a region of the current buffer to be decoded.
794 773
795 'pre-write-conversion 774 'pre-write-conversion
796 Function called before a file is written out, to perform the 775 Function called before a file is written out, to perform the
797 encoding. Called with two arguments, BEG and END, denoting 776 encoding. Called with two arguments, START and END, denoting
798 a region of the current buffer to be encoded. 777 a region of the current buffer to be encoded.
799 778
800 779
801 The following additional properties are recognized if TYPE is 'iso2022: 780 The following additional properties are recognized if TYPE is 'iso2022:
802 781
981 else 960 else
982 signal_simple_error ("Unrecognized property", key); 961 signal_simple_error ("Unrecognized property", key);
983 } 962 }
984 else if (EQ (type, Qccl)) 963 else if (EQ (type, Qccl))
985 { 964 {
965 Lisp_Object sym;
966 struct ccl_program test_ccl;
967 Extbyte *suffix;
968
969 /* Check key first. */
986 if (EQ (key, Qdecode)) 970 if (EQ (key, Qdecode))
971 suffix = "-ccl-decode";
972 else if (EQ (key, Qencode))
973 suffix = "-ccl-encode";
974 else
975 signal_simple_error ("Unrecognized property", key);
976
977 /* If value is vector, register it as a ccl program
978 associated with an newly created symbol for
979 backward compatibility. */
980 if (VECTORP (value))
987 { 981 {
988 CHECK_VECTOR (value); 982 sym = Fintern (concat2 (Fsymbol_name (name),
989 CODING_SYSTEM_CCL_DECODE (codesys) = value; 983 build_string (suffix)),
990 } 984 Qnil);
991 else if (EQ (key, Qencode)) 985 Fregister_ccl_program (sym, value);
992 {
993 CHECK_VECTOR (value);
994 CODING_SYSTEM_CCL_ENCODE (codesys) = value;
995 } 986 }
996 else 987 else
997 signal_simple_error ("Unrecognized property", key); 988 {
989 CHECK_SYMBOL (value);
990 sym = value;
991 }
992 /* check if the given ccl programs are valid. */
993 if (setup_ccl_program (&test_ccl, sym) < 0)
994 signal_simple_error ("Invalid CCL program", value);
995
996 if (EQ (key, Qdecode))
997 CODING_SYSTEM_CCL_DECODE (codesys) = sym;
998 else if (EQ (key, Qencode))
999 CODING_SYSTEM_CCL_ENCODE (codesys) = sym;
1000
998 } 1001 }
999 #endif /* MULE */ 1002 #endif /* MULE */
1000 else 1003 else
1001 signal_simple_error ("Unrecognized property", key); 1004 signal_simple_error ("Unrecognized property", key);
1002 } 1005 }
1626 technical interviews */ 1629 technical interviews */
1627 return (mask & (mask - 1)) == 0; 1630 return (mask & (mask - 1)) == 0;
1628 } 1631 }
1629 1632
1630 static eol_type_t 1633 static eol_type_t
1631 detect_eol_type (struct detection_state *st, const unsigned char *src, 1634 detect_eol_type (struct detection_state *st, const Extbyte *src,
1632 unsigned int n) 1635 size_t n)
1633 { 1636 {
1634 int c;
1635
1636 while (n--) 1637 while (n--)
1637 { 1638 {
1638 c = *src++; 1639 unsigned char c = *(unsigned char *)src++;
1639 if (c == '\n') 1640 if (c == '\n')
1640 { 1641 {
1641 if (st->eol.just_saw_cr) 1642 if (st->eol.just_saw_cr)
1642 return EOL_CRLF; 1643 return EOL_CRLF;
1643 else if (st->eol.seen_anything) 1644 else if (st->eol.seen_anything)
1672 1 == definitive answers are here for both st->eol_type and st->mask 1673 1 == definitive answers are here for both st->eol_type and st->mask
1673 */ 1674 */
1674 1675
1675 static int 1676 static int
1676 detect_coding_type (struct detection_state *st, const Extbyte *src, 1677 detect_coding_type (struct detection_state *st, const Extbyte *src,
1677 unsigned int n, int just_do_eol) 1678 size_t n, int just_do_eol)
1678 { 1679 {
1679 int c;
1680
1681 if (st->eol_type == EOL_AUTODETECT) 1680 if (st->eol_type == EOL_AUTODETECT)
1682 st->eol_type = detect_eol_type (st, src, n); 1681 st->eol_type = detect_eol_type (st, src, n);
1683 1682
1684 if (just_do_eol) 1683 if (just_do_eol)
1685 return st->eol_type != EOL_AUTODETECT; 1684 return st->eol_type != EOL_AUTODETECT;
1686 1685
1687 if (!st->seen_non_ascii) 1686 if (!st->seen_non_ascii)
1688 { 1687 {
1689 for (; n; n--, src++) 1688 for (; n; n--, src++)
1690 { 1689 {
1691 c = *src; 1690 unsigned char c = *(unsigned char *) src;
1692 if ((c < 0x20 && !acceptable_control_char_p (c)) || c >= 0x80) 1691 if ((c < 0x20 && !acceptable_control_char_p (c)) || c >= 0x80)
1693 { 1692 {
1694 st->seen_non_ascii = 1; 1693 st->seen_non_ascii = 1;
1695 #ifdef MULE 1694 #ifdef MULE
1696 st->shift_jis.mask = ~0; 1695 st->shift_jis.mask = ~0;
1912 Lstream_rewind (stream); 1911 Lstream_rewind (stream);
1913 } 1912 }
1914 1913
1915 DEFUN ("detect-coding-region", Fdetect_coding_region, 2, 3, 0, /* 1914 DEFUN ("detect-coding-region", Fdetect_coding_region, 2, 3, 0, /*
1916 Detect coding system of the text in the region between START and END. 1915 Detect coding system of the text in the region between START and END.
1917 Returned a list of possible coding systems ordered by priority. 1916 Return a list of possible coding systems ordered by priority.
1918 If only ASCII characters are found, it returns 'undecided or one of 1917 If only ASCII characters are found, return 'undecided or one of
1919 its subsidiary coding systems according to a detected end-of-line 1918 its subsidiary coding systems according to a detected end-of-line
1920 type. Optional arg BUFFER defaults to the current buffer. 1919 type. Optional arg BUFFER defaults to the current buffer.
1921 */ 1920 */
1922 (start, end, buffer)) 1921 (start, end, buffer))
1923 { 1922 {
1938 xzero (decst); 1937 xzero (decst);
1939 decst.eol_type = EOL_AUTODETECT; 1938 decst.eol_type = EOL_AUTODETECT;
1940 decst.mask = ~0; 1939 decst.mask = ~0;
1941 while (1) 1940 while (1)
1942 { 1941 {
1943 unsigned char random_buffer[4096]; 1942 Extbyte random_buffer[4096];
1944 ssize_t nread = Lstream_read (istr, random_buffer, sizeof (random_buffer)); 1943 ssize_t nread = Lstream_read (istr, random_buffer, sizeof (random_buffer));
1945 1944
1946 if (!nread) 1945 if (!nread)
1947 break; 1946 break;
1948 if (detect_coding_type (&decst, random_buffer, nread, 0)) 1947 if (detect_coding_type (&decst, random_buffer, nread, 0))
2194 } 2193 }
2195 if (read_size == 0) 2194 if (read_size == 0)
2196 /* There might be some more end data produced in the translation. 2195 /* There might be some more end data produced in the translation.
2197 See the comment above. */ 2196 See the comment above. */
2198 str->flags |= CODING_STATE_END; 2197 str->flags |= CODING_STATE_END;
2199 mule_decode (stream, data, str->runoff, read_size); 2198 mule_decode (stream, (Extbyte *) data, str->runoff, read_size);
2200 } 2199 }
2201 2200
2202 if (data - orig_data == 0) 2201 if (data - orig_data == 0)
2203 return error_occurred ? -1 : 0; 2202 return error_occurred ? -1 : 0;
2204 else 2203 else
2212 ssize_t retval; 2211 ssize_t retval;
2213 2212
2214 /* Decode all our data into the runoff, and then attempt to write 2213 /* Decode all our data into the runoff, and then attempt to write
2215 it all out to the other end. Remove whatever chunk we succeeded 2214 it all out to the other end. Remove whatever chunk we succeeded
2216 in writing. */ 2215 in writing. */
2217 mule_decode (stream, data, str->runoff, size); 2216 mule_decode (stream, (Extbyte *) data, str->runoff, size);
2218 retval = Lstream_write (str->other_end, Dynarr_atp (str->runoff, 0), 2217 retval = Lstream_write (str->other_end, Dynarr_atp (str->runoff, 0),
2219 Dynarr_length (str->runoff)); 2218 Dynarr_length (str->runoff));
2220 if (retval > 0) 2219 if (retval > 0)
2221 Dynarr_delete_many (str->runoff, 0, retval); 2220 Dynarr_delete_many (str->runoff, 0, retval);
2222 /* Do NOT return retval. The return value indicates how much 2221 /* Do NOT return retval. The return value indicates how much
2364 written to that stream; that is handled in decoding_reader() 2363 written to that stream; that is handled in decoding_reader()
2365 or decoding_writer(). This allows the same functions to 2364 or decoding_writer(). This allows the same functions to
2366 be used for both reading and writing. */ 2365 be used for both reading and writing. */
2367 2366
2368 static void 2367 static void
2369 mule_decode (Lstream *decoding, const unsigned char *src, 2368 mule_decode (Lstream *decoding, const Extbyte *src,
2370 unsigned_char_dynarr *dst, unsigned int n) 2369 unsigned_char_dynarr *dst, size_t n)
2371 { 2370 {
2372 struct decoding_stream *str = DECODING_STREAM_DATA (decoding); 2371 struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
2373 2372
2374 /* If necessary, do encoding-detection now. We do this when 2373 /* If necessary, do encoding-detection now. We do this when
2375 we're a writing stream or a non-seekable reading stream, 2374 we're a writing stream or a non-seekable reading stream,
2429 case CODESYS_UTF8: 2428 case CODESYS_UTF8:
2430 decode_coding_utf8 (decoding, src, dst, n); 2429 decode_coding_utf8 (decoding, src, dst, n);
2431 break; 2430 break;
2432 case CODESYS_CCL: 2431 case CODESYS_CCL:
2433 str->ccl.last_block = str->flags & CODING_STATE_END; 2432 str->ccl.last_block = str->flags & CODING_STATE_END;
2434 ccl_driver (&str->ccl, src, dst, n, 0, CCL_MODE_DECODING); 2433 /* When applying ccl program to stream, MUST NOT set NULL
2434 pointer to src. */
2435 ccl_driver (&str->ccl, (src ? (unsigned char *)src : (unsigned char*)""),
2436 dst, n, 0, CCL_MODE_DECODING);
2435 break; 2437 break;
2436 case CODESYS_ISO2022: 2438 case CODESYS_ISO2022:
2437 decode_coding_iso2022 (decoding, src, dst, n); 2439 decode_coding_iso2022 (decoding, src, dst, n);
2438 break; 2440 break;
2439 #endif /* MULE */ 2441 #endif /* MULE */
2808 /* Convert N bytes of internally-formatted data stored in SRC to an 2810 /* Convert N bytes of internally-formatted data stored in SRC to an
2809 external format, according to the encoding stream ENCODING. 2811 external format, according to the encoding stream ENCODING.
2810 Store the encoded data into DST. */ 2812 Store the encoded data into DST. */
2811 2813
2812 static void 2814 static void
2813 mule_encode (Lstream *encoding, const unsigned char *src, 2815 mule_encode (Lstream *encoding, const Bufbyte *src,
2814 unsigned_char_dynarr *dst, unsigned int n) 2816 unsigned_char_dynarr *dst, size_t n)
2815 { 2817 {
2816 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); 2818 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
2817 2819
2818 switch (CODING_SYSTEM_TYPE (str->codesys)) 2820 switch (CODING_SYSTEM_TYPE (str->codesys))
2819 { 2821 {
2841 case CODESYS_UTF8: 2843 case CODESYS_UTF8:
2842 encode_coding_utf8 (encoding, src, dst, n); 2844 encode_coding_utf8 (encoding, src, dst, n);
2843 break; 2845 break;
2844 case CODESYS_CCL: 2846 case CODESYS_CCL:
2845 str->ccl.last_block = str->flags & CODING_STATE_END; 2847 str->ccl.last_block = str->flags & CODING_STATE_END;
2846 ccl_driver (&str->ccl, src, dst, n, 0, CCL_MODE_ENCODING); 2848 /* When applying ccl program to stream, MUST NOT set NULL
2849 pointer to src. */
2850 ccl_driver (&str->ccl, ((src) ? src : (unsigned char*)""),
2851 dst, n, 0, CCL_MODE_ENCODING);
2847 break; 2852 break;
2848 case CODESYS_ISO2022: 2853 case CODESYS_ISO2022:
2849 encode_coding_iso2022 (encoding, src, dst, n); 2854 encode_coding_iso2022 (encoding, src, dst, n);
2850 break; 2855 break;
2851 #endif /* MULE */ 2856 #endif /* MULE */
2955 2960
2956 #define BYTE_SJIS_KATAKANA_P(c) \ 2961 #define BYTE_SJIS_KATAKANA_P(c) \
2957 ((c) >= 0xA1 && (c) <= 0xDF) 2962 ((c) >= 0xA1 && (c) <= 0xDF)
2958 2963
2959 static int 2964 static int
2960 detect_coding_sjis (struct detection_state *st, const unsigned char *src, 2965 detect_coding_sjis (struct detection_state *st, const Extbyte *src, size_t n)
2961 unsigned int n) 2966 {
2962 {
2963 int c;
2964
2965 while (n--) 2967 while (n--)
2966 { 2968 {
2967 c = *src++; 2969 unsigned char c = *(unsigned char *)src++;
2968 if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) 2970 if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
2969 return 0; 2971 return 0;
2970 if (st->shift_jis.in_second_byte) 2972 if (st->shift_jis.in_second_byte)
2971 { 2973 {
2972 st->shift_jis.in_second_byte = 0; 2974 st->shift_jis.in_second_byte = 0;
2980 } 2982 }
2981 2983
2982 /* Convert Shift-JIS data to internal format. */ 2984 /* Convert Shift-JIS data to internal format. */
2983 2985
2984 static void 2986 static void
2985 decode_coding_sjis (Lstream *decoding, const unsigned char *src, 2987 decode_coding_sjis (Lstream *decoding, const Extbyte *src,
2986 unsigned_char_dynarr *dst, unsigned int n) 2988 unsigned_char_dynarr *dst, size_t n)
2987 { 2989 {
2988 unsigned char c;
2989 struct decoding_stream *str = DECODING_STREAM_DATA (decoding); 2990 struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
2990 unsigned int flags = str->flags; 2991 unsigned int flags = str->flags;
2991 unsigned int ch = str->ch; 2992 unsigned int ch = str->ch;
2992 eol_type_t eol_type = str->eol_type; 2993 eol_type_t eol_type = str->eol_type;
2993 2994
2994 while (n--) 2995 while (n--)
2995 { 2996 {
2996 c = *src++; 2997 unsigned char c = *(unsigned char *)src++;
2997 2998
2998 if (ch) 2999 if (ch)
2999 { 3000 {
3000 /* Previous character was first byte of Shift-JIS Kanji char. */ 3001 /* Previous character was first byte of Shift-JIS Kanji char. */
3001 if (BYTE_SJIS_TWO_BYTE_2_P (c)) 3002 if (BYTE_SJIS_TWO_BYTE_2_P (c))
3037 } 3038 }
3038 3039
3039 /* Convert internally-formatted data to Shift-JIS. */ 3040 /* Convert internally-formatted data to Shift-JIS. */
3040 3041
3041 static void 3042 static void
3042 encode_coding_sjis (Lstream *encoding, const unsigned char *src, 3043 encode_coding_sjis (Lstream *encoding, const Bufbyte *src,
3043 unsigned_char_dynarr *dst, unsigned int n) 3044 unsigned_char_dynarr *dst, size_t n)
3044 { 3045 {
3045 unsigned char c;
3046 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); 3046 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
3047 unsigned int flags = str->flags; 3047 unsigned int flags = str->flags;
3048 unsigned int ch = str->ch; 3048 unsigned int ch = str->ch;
3049 eol_type_t eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys); 3049 eol_type_t eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys);
3050 3050
3051 while (n--) 3051 while (n--)
3052 { 3052 {
3053 c = *src++; 3053 Bufbyte c = *src++;
3054 if (c == '\n') 3054 if (c == '\n')
3055 { 3055 {
3056 if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT) 3056 if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT)
3057 Dynarr_add (dst, '\r'); 3057 Dynarr_add (dst, '\r');
3058 if (eol_type != EOL_CR) 3058 if (eol_type != EOL_CR)
3117 else 3117 else
3118 return Qnil; 3118 return Qnil;
3119 } 3119 }
3120 3120
3121 DEFUN ("encode-shift-jis-char", Fencode_shift_jis_char, 1, 1, 0, /* 3121 DEFUN ("encode-shift-jis-char", Fencode_shift_jis_char, 1, 1, 0, /*
3122 Encode a JISX0208 character CHAR to SHIFT-JIS coding-system. 3122 Encode a JISX0208 character CHARACTER to SHIFT-JIS coding-system.
3123 Return the corresponding character code in SHIFT-JIS as a cons of two bytes. 3123 Return the corresponding character code in SHIFT-JIS as a cons of two bytes.
3124 */ 3124 */
3125 (ch)) 3125 (character))
3126 { 3126 {
3127 Lisp_Object charset; 3127 Lisp_Object charset;
3128 int c1, c2, s1, s2; 3128 int c1, c2, s1, s2;
3129 3129
3130 CHECK_CHAR_COERCE_INT (ch); 3130 CHECK_CHAR_COERCE_INT (character);
3131 BREAKUP_CHAR (XCHAR (ch), charset, c1, c2); 3131 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
3132 if (EQ (charset, Vcharset_japanese_jisx0208)) 3132 if (EQ (charset, Vcharset_japanese_jisx0208))
3133 { 3133 {
3134 ENCODE_SJIS (c1 | 0x80, c2 | 0x80, s1, s2); 3134 ENCODE_SJIS (c1 | 0x80, c2 | 0x80, s1, s2);
3135 return Fcons (make_int (s1), make_int (s2)); 3135 return Fcons (make_int (s1), make_int (s2));
3136 } 3136 }
3241 b2 = I % BIG5_SAME_ROW; \ 3241 b2 = I % BIG5_SAME_ROW; \
3242 b2 += b2 < 0x3F ? 0x40 : 0x62; \ 3242 b2 += b2 < 0x3F ? 0x40 : 0x62; \
3243 } while (0) 3243 } while (0)
3244 3244
3245 static int 3245 static int
3246 detect_coding_big5 (struct detection_state *st, const unsigned char *src, 3246 detect_coding_big5 (struct detection_state *st, const Extbyte *src, size_t n)
3247 unsigned int n) 3247 {
3248 {
3249 int c;
3250
3251 while (n--) 3248 while (n--)
3252 { 3249 {
3253 c = *src++; 3250 unsigned char c = *(unsigned char *)src++;
3254 if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO || 3251 if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO ||
3255 (c >= 0x80 && c <= 0xA0)) 3252 (c >= 0x80 && c <= 0xA0))
3256 return 0; 3253 return 0;
3257 if (st->big5.in_second_byte) 3254 if (st->big5.in_second_byte)
3258 { 3255 {
3267 } 3264 }
3268 3265
3269 /* Convert Big5 data to internal format. */ 3266 /* Convert Big5 data to internal format. */
3270 3267
3271 static void 3268 static void
3272 decode_coding_big5 (Lstream *decoding, const unsigned char *src, 3269 decode_coding_big5 (Lstream *decoding, const Extbyte *src,
3273 unsigned_char_dynarr *dst, unsigned int n) 3270 unsigned_char_dynarr *dst, size_t n)
3274 { 3271 {
3275 unsigned char c;
3276 struct decoding_stream *str = DECODING_STREAM_DATA (decoding); 3272 struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
3277 unsigned int flags = str->flags; 3273 unsigned int flags = str->flags;
3278 unsigned int ch = str->ch; 3274 unsigned int ch = str->ch;
3279 eol_type_t eol_type = str->eol_type; 3275 eol_type_t eol_type = str->eol_type;
3280 3276
3281 while (n--) 3277 while (n--)
3282 { 3278 {
3283 c = *src++; 3279 unsigned char c = *(unsigned char *)src++;
3284 if (ch) 3280 if (ch)
3285 { 3281 {
3286 /* Previous character was first byte of Big5 char. */ 3282 /* Previous character was first byte of Big5 char. */
3287 if (BYTE_BIG5_TWO_BYTE_2_P (c)) 3283 if (BYTE_BIG5_TWO_BYTE_2_P (c))
3288 { 3284 {
3317 } 3313 }
3318 3314
3319 /* Convert internally-formatted data to Big5. */ 3315 /* Convert internally-formatted data to Big5. */
3320 3316
3321 static void 3317 static void
3322 encode_coding_big5 (Lstream *encoding, const unsigned char *src, 3318 encode_coding_big5 (Lstream *encoding, const Bufbyte *src,
3323 unsigned_char_dynarr *dst, unsigned int n) 3319 unsigned_char_dynarr *dst, size_t n)
3324 { 3320 {
3325 unsigned char c; 3321 unsigned char c;
3326 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); 3322 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
3327 unsigned int flags = str->flags; 3323 unsigned int flags = str->flags;
3328 unsigned int ch = str->ch; 3324 unsigned int ch = str->ch;
3405 else 3401 else
3406 return Qnil; 3402 return Qnil;
3407 } 3403 }
3408 3404
3409 DEFUN ("encode-big5-char", Fencode_big5_char, 1, 1, 0, /* 3405 DEFUN ("encode-big5-char", Fencode_big5_char, 1, 1, 0, /*
3410 Encode the Big5 character CH to BIG5 coding-system. 3406 Encode the Big5 character CHARACTER in the BIG5 coding-system.
3411 Return the corresponding character code in Big5. 3407 Return the corresponding character code in Big5.
3412 */ 3408 */
3413 (ch)) 3409 (character))
3414 { 3410 {
3415 Lisp_Object charset; 3411 Lisp_Object charset;
3416 int c1, c2, b1, b2; 3412 int c1, c2, b1, b2;
3417 3413
3418 CHECK_CHAR_COERCE_INT (ch); 3414 CHECK_CHAR_COERCE_INT (character);
3419 BREAKUP_CHAR (XCHAR (ch), charset, c1, c2); 3415 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
3420 if (EQ (charset, Vcharset_chinese_big5_1) || 3416 if (EQ (charset, Vcharset_chinese_big5_1) ||
3421 EQ (charset, Vcharset_chinese_big5_2)) 3417 EQ (charset, Vcharset_chinese_big5_2))
3422 { 3418 {
3423 ENCODE_BIG5 (XCHARSET_LEADING_BYTE (charset), c1 | 0x80, c2 | 0x80, 3419 ENCODE_BIG5 (XCHARSET_LEADING_BYTE (charset), c1 | 0x80, c2 | 0x80,
3424 b1, b2); 3420 b1, b2);
3584 Dynarr_add (dst, (code >> 8) & 255); 3580 Dynarr_add (dst, (code >> 8) & 255);
3585 Dynarr_add (dst, code & 255); 3581 Dynarr_add (dst, code & 255);
3586 } 3582 }
3587 3583
3588 static int 3584 static int
3589 detect_coding_ucs4 (struct detection_state *st, const unsigned char *src, 3585 detect_coding_ucs4 (struct detection_state *st, const Extbyte *src, size_t n)
3590 unsigned int n)
3591 { 3586 {
3592 while (n--) 3587 while (n--)
3593 { 3588 {
3594 int c = *src++; 3589 unsigned char c = *(unsigned char *)src++;
3595 switch (st->ucs4.in_byte) 3590 switch (st->ucs4.in_byte)
3596 { 3591 {
3597 case 0: 3592 case 0:
3598 if (c >= 128) 3593 if (c >= 128)
3599 return 0; 3594 return 0;
3609 } 3604 }
3610 return CODING_CATEGORY_UCS4_MASK; 3605 return CODING_CATEGORY_UCS4_MASK;
3611 } 3606 }
3612 3607
3613 static void 3608 static void
3614 decode_coding_ucs4 (Lstream *decoding, const unsigned char *src, 3609 decode_coding_ucs4 (Lstream *decoding, const Extbyte *src,
3615 unsigned_char_dynarr *dst, unsigned int n) 3610 unsigned_char_dynarr *dst, size_t n)
3616 { 3611 {
3617 struct decoding_stream *str = DECODING_STREAM_DATA (decoding); 3612 struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
3618 unsigned int flags = str->flags; 3613 unsigned int flags = str->flags;
3619 unsigned int ch = str->ch; 3614 unsigned int ch = str->ch;
3620 unsigned char counter = str->counter; 3615 unsigned char counter = str->counter;
3621 3616
3622 while (n--) 3617 while (n--)
3623 { 3618 {
3624 unsigned char c = *src++; 3619 unsigned char c = *(unsigned char *)src++;
3625 switch (counter) 3620 switch (counter)
3626 { 3621 {
3627 case 0: 3622 case 0:
3628 ch = c; 3623 ch = c;
3629 counter = 3; 3624 counter = 3;
3645 str->ch = ch; 3640 str->ch = ch;
3646 str->counter = counter; 3641 str->counter = counter;
3647 } 3642 }
3648 3643
3649 static void 3644 static void
3650 encode_coding_ucs4 (Lstream *encoding, const unsigned char *src, 3645 encode_coding_ucs4 (Lstream *encoding, const Bufbyte *src,
3651 unsigned_char_dynarr *dst, unsigned int n) 3646 unsigned_char_dynarr *dst, size_t n)
3652 { 3647 {
3653 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); 3648 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
3654 unsigned int flags = str->flags; 3649 unsigned int flags = str->flags;
3655 unsigned int ch = str->ch; 3650 unsigned int ch = str->ch;
3656 unsigned char char_boundary = str->iso2022.current_char_boundary; 3651 unsigned char char_boundary = str->iso2022.current_char_boundary;
3711 { 3706 {
3712 if (in_composite) 3707 if (in_composite)
3713 { 3708 {
3714 /* #### Bother! We don't know how to 3709 /* #### Bother! We don't know how to
3715 handle this yet. */ 3710 handle this yet. */
3716 Dynarr_add (dst, 0); 3711 Dynarr_add (dst, '\0');
3717 Dynarr_add (dst, 0); 3712 Dynarr_add (dst, '\0');
3718 Dynarr_add (dst, 0); 3713 Dynarr_add (dst, '\0');
3719 Dynarr_add (dst, '~'); 3714 Dynarr_add (dst, '~');
3720 } 3715 }
3721 else 3716 else
3722 { 3717 {
3723 Emchar emch = MAKE_CHAR (Vcharset_composite, 3718 Emchar emch = MAKE_CHAR (Vcharset_composite,
3784 /************************************************************************/ 3779 /************************************************************************/
3785 /* UTF-8 methods */ 3780 /* UTF-8 methods */
3786 /************************************************************************/ 3781 /************************************************************************/
3787 3782
3788 static int 3783 static int
3789 detect_coding_utf8 (struct detection_state *st, const unsigned char *src, 3784 detect_coding_utf8 (struct detection_state *st, const Extbyte *src, size_t n)
3790 unsigned int n)
3791 { 3785 {
3792 while (n--) 3786 while (n--)
3793 { 3787 {
3794 unsigned char c = *src++; 3788 unsigned char c = *(unsigned char *)src++;
3795 switch (st->utf8.in_byte) 3789 switch (st->utf8.in_byte)
3796 { 3790 {
3797 case 0: 3791 case 0:
3798 if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) 3792 if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
3799 return 0; 3793 return 0;
3819 } 3813 }
3820 return CODING_CATEGORY_UTF8_MASK; 3814 return CODING_CATEGORY_UTF8_MASK;
3821 } 3815 }
3822 3816
3823 static void 3817 static void
3824 decode_coding_utf8 (Lstream *decoding, const unsigned char *src, 3818 decode_coding_utf8 (Lstream *decoding, const Extbyte *src,
3825 unsigned_char_dynarr *dst, unsigned int n) 3819 unsigned_char_dynarr *dst, size_t n)
3826 { 3820 {
3827 struct decoding_stream *str = DECODING_STREAM_DATA (decoding); 3821 struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
3828 unsigned int flags = str->flags; 3822 unsigned int flags = str->flags;
3829 unsigned int ch = str->ch; 3823 unsigned int ch = str->ch;
3830 eol_type_t eol_type = str->eol_type; 3824 eol_type_t eol_type = str->eol_type;
3831 unsigned char counter = str->counter; 3825 unsigned char counter = str->counter;
3832 3826
3833 while (n--) 3827 while (n--)
3834 { 3828 {
3835 unsigned char c = *src++; 3829 unsigned char c = *(unsigned char *)src++;
3836 switch (counter) 3830 switch (counter)
3837 { 3831 {
3838 case 0: 3832 case 0:
3839 if ( c >= 0xfc ) 3833 if ( c >= 0xfc )
3840 { 3834 {
3933 Dynarr_add (dst, (code & 0x3f) | 0x80); 3927 Dynarr_add (dst, (code & 0x3f) | 0x80);
3934 } 3928 }
3935 } 3929 }
3936 3930
3937 static void 3931 static void
3938 encode_coding_utf8 (Lstream *encoding, const unsigned char *src, 3932 encode_coding_utf8 (Lstream *encoding, const Bufbyte *src,
3939 unsigned_char_dynarr *dst, unsigned int n) 3933 unsigned_char_dynarr *dst, size_t n)
3940 { 3934 {
3941 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); 3935 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
3942 unsigned int flags = str->flags; 3936 unsigned int flags = str->flags;
3943 unsigned int ch = str->ch; 3937 unsigned int ch = str->ch;
3944 eol_type_t eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys); 3938 eol_type_t eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys);
4652 iso->switched_dir_and_no_valid_charset_yet = 0; 4646 iso->switched_dir_and_no_valid_charset_yet = 0;
4653 return 1; 4647 return 1;
4654 } 4648 }
4655 4649
4656 static int 4650 static int
4657 detect_coding_iso2022 (struct detection_state *st, const unsigned char *src, 4651 detect_coding_iso2022 (struct detection_state *st, const Extbyte *src, size_t n)
4658 unsigned int n)
4659 { 4652 {
4660 int mask; 4653 int mask;
4661 4654
4662 /* #### There are serious deficiencies in the recognition mechanism 4655 /* #### There are serious deficiencies in the recognition mechanism
4663 here. This needs to be much smarter if it's going to cut it. 4656 here. This needs to be much smarter if it's going to cut it.
4683 4676
4684 mask = st->iso2022.mask; 4677 mask = st->iso2022.mask;
4685 4678
4686 while (n--) 4679 while (n--)
4687 { 4680 {
4688 int c = *src++; 4681 unsigned char c = *(unsigned char *)src++;
4689 if (c >= 0xA0) 4682 if (c >= 0xA0)
4690 { 4683 {
4691 mask &= ~CODING_CATEGORY_ISO_7_MASK; 4684 mask &= ~CODING_CATEGORY_ISO_7_MASK;
4692 st->iso2022.high_byte_count++; 4685 st->iso2022.high_byte_count++;
4693 } 4686 }
4843 } 4836 }
4844 4837
4845 /* Convert ISO2022-format data to internal format. */ 4838 /* Convert ISO2022-format data to internal format. */
4846 4839
4847 static void 4840 static void
4848 decode_coding_iso2022 (Lstream *decoding, const unsigned char *src, 4841 decode_coding_iso2022 (Lstream *decoding, const Extbyte *src,
4849 unsigned_char_dynarr *dst, unsigned int n) 4842 unsigned_char_dynarr *dst, size_t n)
4850 { 4843 {
4851 struct decoding_stream *str = DECODING_STREAM_DATA (decoding); 4844 struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
4852 unsigned int flags = str->flags; 4845 unsigned int flags = str->flags;
4853 unsigned int ch = str->ch; 4846 unsigned int ch = str->ch;
4854 eol_type_t eol_type = str->eol_type; 4847 eol_type_t eol_type = str->eol_type;
4864 dst = str->iso2022.composite_chars; 4857 dst = str->iso2022.composite_chars;
4865 #endif /* ENABLE_COMPOSITE_CHARS */ 4858 #endif /* ENABLE_COMPOSITE_CHARS */
4866 4859
4867 while (n--) 4860 while (n--)
4868 { 4861 {
4869 unsigned char c = *src++; 4862 unsigned char c = *(unsigned char *)src++;
4870 if (flags & CODING_STATE_ESCAPE) 4863 if (flags & CODING_STATE_ESCAPE)
4871 { /* Within ESC sequence */ 4864 { /* Within ESC sequence */
4872 int retval = parse_iso2022_esc (coding_system, &str->iso2022, 4865 int retval = parse_iso2022_esc (coding_system, &str->iso2022,
4873 c, &flags, 1); 4866 c, &flags, 1);
4874 4867
5169 } 5162 }
5170 5163
5171 /* Convert internally-formatted data to ISO2022 format. */ 5164 /* Convert internally-formatted data to ISO2022 format. */
5172 5165
5173 static void 5166 static void
5174 encode_coding_iso2022 (Lstream *encoding, const unsigned char *src, 5167 encode_coding_iso2022 (Lstream *encoding, const Bufbyte *src,
5175 unsigned_char_dynarr *dst, unsigned int n) 5168 unsigned_char_dynarr *dst, size_t n)
5176 { 5169 {
5177 unsigned char charmask, c; 5170 unsigned char charmask, c;
5178 unsigned char char_boundary; 5171 unsigned char char_boundary;
5179 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); 5172 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
5180 unsigned int flags = str->flags; 5173 unsigned int flags = str->flags;
5478 5471
5479 /* This is used when reading in "binary" files -- i.e. files that may 5472 /* This is used when reading in "binary" files -- i.e. files that may
5480 contain all 256 possible byte values and that are not to be 5473 contain all 256 possible byte values and that are not to be
5481 interpreted as being in any particular decoding. */ 5474 interpreted as being in any particular decoding. */
5482 static void 5475 static void
5483 decode_coding_no_conversion (Lstream *decoding, const unsigned char *src, 5476 decode_coding_no_conversion (Lstream *decoding, const Extbyte *src,
5484 unsigned_char_dynarr *dst, unsigned int n) 5477 unsigned_char_dynarr *dst, size_t n)
5485 { 5478 {
5486 unsigned char c;
5487 struct decoding_stream *str = DECODING_STREAM_DATA (decoding); 5479 struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
5488 unsigned int flags = str->flags; 5480 unsigned int flags = str->flags;
5489 unsigned int ch = str->ch; 5481 unsigned int ch = str->ch;
5490 eol_type_t eol_type = str->eol_type; 5482 eol_type_t eol_type = str->eol_type;
5491 5483
5492 while (n--) 5484 while (n--)
5493 { 5485 {
5494 c = *src++; 5486 unsigned char c = *(unsigned char *)src++;
5495 5487
5496 DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst); 5488 DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst);
5497 DECODE_ADD_BINARY_CHAR (c, dst); 5489 DECODE_ADD_BINARY_CHAR (c, dst);
5498 label_continue_loop:; 5490 label_continue_loop:;
5499 } 5491 }
5503 str->flags = flags; 5495 str->flags = flags;
5504 str->ch = ch; 5496 str->ch = ch;
5505 } 5497 }
5506 5498
5507 static void 5499 static void
5508 encode_coding_no_conversion (Lstream *encoding, const unsigned char *src, 5500 encode_coding_no_conversion (Lstream *encoding, const Bufbyte *src,
5509 unsigned_char_dynarr *dst, unsigned int n) 5501 unsigned_char_dynarr *dst, size_t n)
5510 { 5502 {
5511 unsigned char c; 5503 unsigned char c;
5512 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); 5504 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
5513 unsigned int flags = str->flags; 5505 unsigned int flags = str->flags;
5514 unsigned int ch = str->ch; 5506 unsigned int ch = str->ch;