Mercurial > hg > xemacs-beta
comparison src/file-coding.c @ 444:576fb035e263 r21-2-37
Import from CVS: tag r21-2-37
author | cvs |
---|---|
date | Mon, 13 Aug 2007 11:36:19 +0200 |
parents | abe6d1db359e |
children | 3078fd1074e8 |
comparison
equal
deleted
inserted
replaced
443:a8296e22da4e | 444:576fb035e263 |
---|---|
174 #endif /* MULE */ | 174 #endif /* MULE */ |
175 EXFUN (Fcopy_coding_system, 2); | 175 EXFUN (Fcopy_coding_system, 2); |
176 #ifdef MULE | 176 #ifdef MULE |
177 struct detection_state; | 177 struct detection_state; |
178 static int detect_coding_sjis (struct detection_state *st, | 178 static int detect_coding_sjis (struct detection_state *st, |
179 const unsigned char *src, | 179 const Extbyte *src, size_t n); |
180 unsigned int n); | 180 static void decode_coding_sjis (Lstream *decoding, const Extbyte *src, |
181 static void decode_coding_sjis (Lstream *decoding, | 181 unsigned_char_dynarr *dst, size_t n); |
182 const unsigned char *src, | 182 static void encode_coding_sjis (Lstream *encoding, const Bufbyte *src, |
183 unsigned_char_dynarr *dst, | 183 unsigned_char_dynarr *dst, size_t n); |
184 unsigned int n); | |
185 static void encode_coding_sjis (Lstream *encoding, | |
186 const unsigned char *src, | |
187 unsigned_char_dynarr *dst, | |
188 unsigned int n); | |
189 static int detect_coding_big5 (struct detection_state *st, | 184 static int detect_coding_big5 (struct detection_state *st, |
190 const unsigned char *src, | 185 const Extbyte *src, size_t n); |
191 unsigned int n); | 186 static void decode_coding_big5 (Lstream *decoding, const Extbyte *src, |
192 static void decode_coding_big5 (Lstream *decoding, | 187 unsigned_char_dynarr *dst, size_t n); |
193 const unsigned char *src, | 188 static void encode_coding_big5 (Lstream *encoding, const Bufbyte *src, |
194 unsigned_char_dynarr *dst, unsigned int n); | 189 unsigned_char_dynarr *dst, size_t n); |
195 static void encode_coding_big5 (Lstream *encoding, | |
196 const unsigned char *src, | |
197 unsigned_char_dynarr *dst, unsigned int n); | |
198 static int detect_coding_ucs4 (struct detection_state *st, | 190 static int detect_coding_ucs4 (struct detection_state *st, |
199 const unsigned char *src, | 191 const Extbyte *src, size_t n); |
200 unsigned int n); | 192 static void decode_coding_ucs4 (Lstream *decoding, const Extbyte *src, |
201 static void decode_coding_ucs4 (Lstream *decoding, | 193 unsigned_char_dynarr *dst, size_t n); |
202 const unsigned char *src, | 194 static void encode_coding_ucs4 (Lstream *encoding, const Bufbyte *src, |
203 unsigned_char_dynarr *dst, unsigned int n); | 195 unsigned_char_dynarr *dst, size_t n); |
204 static void encode_coding_ucs4 (Lstream *encoding, | |
205 const unsigned char *src, | |
206 unsigned_char_dynarr *dst, unsigned int n); | |
207 static int detect_coding_utf8 (struct detection_state *st, | 196 static int detect_coding_utf8 (struct detection_state *st, |
208 const unsigned char *src, | 197 const Extbyte *src, size_t n); |
209 unsigned int n); | 198 static void decode_coding_utf8 (Lstream *decoding, const Extbyte *src, |
210 static void decode_coding_utf8 (Lstream *decoding, | 199 unsigned_char_dynarr *dst, size_t n); |
211 const unsigned char *src, | 200 static void encode_coding_utf8 (Lstream *encoding, const Bufbyte *src, |
212 unsigned_char_dynarr *dst, unsigned int n); | 201 unsigned_char_dynarr *dst, size_t n); |
213 static void encode_coding_utf8 (Lstream *encoding, | |
214 const unsigned char *src, | |
215 unsigned_char_dynarr *dst, unsigned int n); | |
216 static int postprocess_iso2022_mask (int mask); | 202 static int postprocess_iso2022_mask (int mask); |
217 static void reset_iso2022 (Lisp_Object coding_system, | 203 static void reset_iso2022 (Lisp_Object coding_system, |
218 struct iso2022_decoder *iso); | 204 struct iso2022_decoder *iso); |
219 static int detect_coding_iso2022 (struct detection_state *st, | 205 static int detect_coding_iso2022 (struct detection_state *st, |
220 const unsigned char *src, | 206 const Extbyte *src, size_t n); |
221 unsigned int n); | 207 static void decode_coding_iso2022 (Lstream *decoding, const Extbyte *src, |
222 static void decode_coding_iso2022 (Lstream *decoding, | 208 unsigned_char_dynarr *dst, size_t n); |
223 const unsigned char *src, | 209 static void encode_coding_iso2022 (Lstream *encoding, const Bufbyte *src, |
224 unsigned_char_dynarr *dst, unsigned int n); | 210 unsigned_char_dynarr *dst, size_t n); |
225 static void encode_coding_iso2022 (Lstream *encoding, | |
226 const unsigned char *src, | |
227 unsigned_char_dynarr *dst, unsigned int n); | |
228 #endif /* MULE */ | 211 #endif /* MULE */ |
229 static void decode_coding_no_conversion (Lstream *decoding, | 212 static void decode_coding_no_conversion (Lstream *decoding, const Extbyte *src, |
230 const unsigned char *src, | 213 unsigned_char_dynarr *dst, size_t n); |
231 unsigned_char_dynarr *dst, | 214 static void encode_coding_no_conversion (Lstream *encoding, const Bufbyte *src, |
232 unsigned int n); | 215 unsigned_char_dynarr *dst, size_t n); |
233 static void encode_coding_no_conversion (Lstream *encoding, | 216 static void mule_decode (Lstream *decoding, const Extbyte *src, |
234 const unsigned char *src, | 217 unsigned_char_dynarr *dst, size_t n); |
235 unsigned_char_dynarr *dst, | 218 static void mule_encode (Lstream *encoding, const Bufbyte *src, |
236 unsigned int n); | 219 unsigned_char_dynarr *dst, size_t n); |
237 static void mule_decode (Lstream *decoding, const unsigned char *src, | |
238 unsigned_char_dynarr *dst, unsigned int n); | |
239 static void mule_encode (Lstream *encoding, const unsigned char *src, | |
240 unsigned_char_dynarr *dst, unsigned int n); | |
241 | 220 |
242 typedef struct codesys_prop codesys_prop; | 221 typedef struct codesys_prop codesys_prop; |
243 struct codesys_prop | 222 struct codesys_prop |
244 { | 223 { |
245 Lisp_Object sym; | 224 Lisp_Object sym; |
787 converted to nil when stored internally, and | 766 converted to nil when stored internally, and |
788 `coding-system-property' will return nil.) | 767 `coding-system-property' will return nil.) |
789 | 768 |
790 'post-read-conversion | 769 'post-read-conversion |
791 Function called after a file has been read in, to perform the | 770 Function called after a file has been read in, to perform the |
792 decoding. Called with two arguments, BEG and END, denoting | 771 decoding. Called with two arguments, START and END, denoting |
793 a region of the current buffer to be decoded. | 772 a region of the current buffer to be decoded. |
794 | 773 |
795 'pre-write-conversion | 774 'pre-write-conversion |
796 Function called before a file is written out, to perform the | 775 Function called before a file is written out, to perform the |
797 encoding. Called with two arguments, BEG and END, denoting | 776 encoding. Called with two arguments, START and END, denoting |
798 a region of the current buffer to be encoded. | 777 a region of the current buffer to be encoded. |
799 | 778 |
800 | 779 |
801 The following additional properties are recognized if TYPE is 'iso2022: | 780 The following additional properties are recognized if TYPE is 'iso2022: |
802 | 781 |
981 else | 960 else |
982 signal_simple_error ("Unrecognized property", key); | 961 signal_simple_error ("Unrecognized property", key); |
983 } | 962 } |
984 else if (EQ (type, Qccl)) | 963 else if (EQ (type, Qccl)) |
985 { | 964 { |
965 Lisp_Object sym; | |
966 struct ccl_program test_ccl; | |
967 Extbyte *suffix; | |
968 | |
969 /* Check key first. */ | |
986 if (EQ (key, Qdecode)) | 970 if (EQ (key, Qdecode)) |
971 suffix = "-ccl-decode"; | |
972 else if (EQ (key, Qencode)) | |
973 suffix = "-ccl-encode"; | |
974 else | |
975 signal_simple_error ("Unrecognized property", key); | |
976 | |
977 /* If value is vector, register it as a ccl program | |
978 associated with an newly created symbol for | |
979 backward compatibility. */ | |
980 if (VECTORP (value)) | |
987 { | 981 { |
988 CHECK_VECTOR (value); | 982 sym = Fintern (concat2 (Fsymbol_name (name), |
989 CODING_SYSTEM_CCL_DECODE (codesys) = value; | 983 build_string (suffix)), |
990 } | 984 Qnil); |
991 else if (EQ (key, Qencode)) | 985 Fregister_ccl_program (sym, value); |
992 { | |
993 CHECK_VECTOR (value); | |
994 CODING_SYSTEM_CCL_ENCODE (codesys) = value; | |
995 } | 986 } |
996 else | 987 else |
997 signal_simple_error ("Unrecognized property", key); | 988 { |
989 CHECK_SYMBOL (value); | |
990 sym = value; | |
991 } | |
992 /* check if the given ccl programs are valid. */ | |
993 if (setup_ccl_program (&test_ccl, sym) < 0) | |
994 signal_simple_error ("Invalid CCL program", value); | |
995 | |
996 if (EQ (key, Qdecode)) | |
997 CODING_SYSTEM_CCL_DECODE (codesys) = sym; | |
998 else if (EQ (key, Qencode)) | |
999 CODING_SYSTEM_CCL_ENCODE (codesys) = sym; | |
1000 | |
998 } | 1001 } |
999 #endif /* MULE */ | 1002 #endif /* MULE */ |
1000 else | 1003 else |
1001 signal_simple_error ("Unrecognized property", key); | 1004 signal_simple_error ("Unrecognized property", key); |
1002 } | 1005 } |
1626 technical interviews */ | 1629 technical interviews */ |
1627 return (mask & (mask - 1)) == 0; | 1630 return (mask & (mask - 1)) == 0; |
1628 } | 1631 } |
1629 | 1632 |
1630 static eol_type_t | 1633 static eol_type_t |
1631 detect_eol_type (struct detection_state *st, const unsigned char *src, | 1634 detect_eol_type (struct detection_state *st, const Extbyte *src, |
1632 unsigned int n) | 1635 size_t n) |
1633 { | 1636 { |
1634 int c; | |
1635 | |
1636 while (n--) | 1637 while (n--) |
1637 { | 1638 { |
1638 c = *src++; | 1639 unsigned char c = *(unsigned char *)src++; |
1639 if (c == '\n') | 1640 if (c == '\n') |
1640 { | 1641 { |
1641 if (st->eol.just_saw_cr) | 1642 if (st->eol.just_saw_cr) |
1642 return EOL_CRLF; | 1643 return EOL_CRLF; |
1643 else if (st->eol.seen_anything) | 1644 else if (st->eol.seen_anything) |
1672 1 == definitive answers are here for both st->eol_type and st->mask | 1673 1 == definitive answers are here for both st->eol_type and st->mask |
1673 */ | 1674 */ |
1674 | 1675 |
1675 static int | 1676 static int |
1676 detect_coding_type (struct detection_state *st, const Extbyte *src, | 1677 detect_coding_type (struct detection_state *st, const Extbyte *src, |
1677 unsigned int n, int just_do_eol) | 1678 size_t n, int just_do_eol) |
1678 { | 1679 { |
1679 int c; | |
1680 | |
1681 if (st->eol_type == EOL_AUTODETECT) | 1680 if (st->eol_type == EOL_AUTODETECT) |
1682 st->eol_type = detect_eol_type (st, src, n); | 1681 st->eol_type = detect_eol_type (st, src, n); |
1683 | 1682 |
1684 if (just_do_eol) | 1683 if (just_do_eol) |
1685 return st->eol_type != EOL_AUTODETECT; | 1684 return st->eol_type != EOL_AUTODETECT; |
1686 | 1685 |
1687 if (!st->seen_non_ascii) | 1686 if (!st->seen_non_ascii) |
1688 { | 1687 { |
1689 for (; n; n--, src++) | 1688 for (; n; n--, src++) |
1690 { | 1689 { |
1691 c = *src; | 1690 unsigned char c = *(unsigned char *) src; |
1692 if ((c < 0x20 && !acceptable_control_char_p (c)) || c >= 0x80) | 1691 if ((c < 0x20 && !acceptable_control_char_p (c)) || c >= 0x80) |
1693 { | 1692 { |
1694 st->seen_non_ascii = 1; | 1693 st->seen_non_ascii = 1; |
1695 #ifdef MULE | 1694 #ifdef MULE |
1696 st->shift_jis.mask = ~0; | 1695 st->shift_jis.mask = ~0; |
1912 Lstream_rewind (stream); | 1911 Lstream_rewind (stream); |
1913 } | 1912 } |
1914 | 1913 |
1915 DEFUN ("detect-coding-region", Fdetect_coding_region, 2, 3, 0, /* | 1914 DEFUN ("detect-coding-region", Fdetect_coding_region, 2, 3, 0, /* |
1916 Detect coding system of the text in the region between START and END. | 1915 Detect coding system of the text in the region between START and END. |
1917 Returned a list of possible coding systems ordered by priority. | 1916 Return a list of possible coding systems ordered by priority. |
1918 If only ASCII characters are found, it returns 'undecided or one of | 1917 If only ASCII characters are found, return 'undecided or one of |
1919 its subsidiary coding systems according to a detected end-of-line | 1918 its subsidiary coding systems according to a detected end-of-line |
1920 type. Optional arg BUFFER defaults to the current buffer. | 1919 type. Optional arg BUFFER defaults to the current buffer. |
1921 */ | 1920 */ |
1922 (start, end, buffer)) | 1921 (start, end, buffer)) |
1923 { | 1922 { |
1938 xzero (decst); | 1937 xzero (decst); |
1939 decst.eol_type = EOL_AUTODETECT; | 1938 decst.eol_type = EOL_AUTODETECT; |
1940 decst.mask = ~0; | 1939 decst.mask = ~0; |
1941 while (1) | 1940 while (1) |
1942 { | 1941 { |
1943 unsigned char random_buffer[4096]; | 1942 Extbyte random_buffer[4096]; |
1944 ssize_t nread = Lstream_read (istr, random_buffer, sizeof (random_buffer)); | 1943 ssize_t nread = Lstream_read (istr, random_buffer, sizeof (random_buffer)); |
1945 | 1944 |
1946 if (!nread) | 1945 if (!nread) |
1947 break; | 1946 break; |
1948 if (detect_coding_type (&decst, random_buffer, nread, 0)) | 1947 if (detect_coding_type (&decst, random_buffer, nread, 0)) |
2194 } | 2193 } |
2195 if (read_size == 0) | 2194 if (read_size == 0) |
2196 /* There might be some more end data produced in the translation. | 2195 /* There might be some more end data produced in the translation. |
2197 See the comment above. */ | 2196 See the comment above. */ |
2198 str->flags |= CODING_STATE_END; | 2197 str->flags |= CODING_STATE_END; |
2199 mule_decode (stream, data, str->runoff, read_size); | 2198 mule_decode (stream, (Extbyte *) data, str->runoff, read_size); |
2200 } | 2199 } |
2201 | 2200 |
2202 if (data - orig_data == 0) | 2201 if (data - orig_data == 0) |
2203 return error_occurred ? -1 : 0; | 2202 return error_occurred ? -1 : 0; |
2204 else | 2203 else |
2212 ssize_t retval; | 2211 ssize_t retval; |
2213 | 2212 |
2214 /* Decode all our data into the runoff, and then attempt to write | 2213 /* Decode all our data into the runoff, and then attempt to write |
2215 it all out to the other end. Remove whatever chunk we succeeded | 2214 it all out to the other end. Remove whatever chunk we succeeded |
2216 in writing. */ | 2215 in writing. */ |
2217 mule_decode (stream, data, str->runoff, size); | 2216 mule_decode (stream, (Extbyte *) data, str->runoff, size); |
2218 retval = Lstream_write (str->other_end, Dynarr_atp (str->runoff, 0), | 2217 retval = Lstream_write (str->other_end, Dynarr_atp (str->runoff, 0), |
2219 Dynarr_length (str->runoff)); | 2218 Dynarr_length (str->runoff)); |
2220 if (retval > 0) | 2219 if (retval > 0) |
2221 Dynarr_delete_many (str->runoff, 0, retval); | 2220 Dynarr_delete_many (str->runoff, 0, retval); |
2222 /* Do NOT return retval. The return value indicates how much | 2221 /* Do NOT return retval. The return value indicates how much |
2364 written to that stream; that is handled in decoding_reader() | 2363 written to that stream; that is handled in decoding_reader() |
2365 or decoding_writer(). This allows the same functions to | 2364 or decoding_writer(). This allows the same functions to |
2366 be used for both reading and writing. */ | 2365 be used for both reading and writing. */ |
2367 | 2366 |
2368 static void | 2367 static void |
2369 mule_decode (Lstream *decoding, const unsigned char *src, | 2368 mule_decode (Lstream *decoding, const Extbyte *src, |
2370 unsigned_char_dynarr *dst, unsigned int n) | 2369 unsigned_char_dynarr *dst, size_t n) |
2371 { | 2370 { |
2372 struct decoding_stream *str = DECODING_STREAM_DATA (decoding); | 2371 struct decoding_stream *str = DECODING_STREAM_DATA (decoding); |
2373 | 2372 |
2374 /* If necessary, do encoding-detection now. We do this when | 2373 /* If necessary, do encoding-detection now. We do this when |
2375 we're a writing stream or a non-seekable reading stream, | 2374 we're a writing stream or a non-seekable reading stream, |
2429 case CODESYS_UTF8: | 2428 case CODESYS_UTF8: |
2430 decode_coding_utf8 (decoding, src, dst, n); | 2429 decode_coding_utf8 (decoding, src, dst, n); |
2431 break; | 2430 break; |
2432 case CODESYS_CCL: | 2431 case CODESYS_CCL: |
2433 str->ccl.last_block = str->flags & CODING_STATE_END; | 2432 str->ccl.last_block = str->flags & CODING_STATE_END; |
2434 ccl_driver (&str->ccl, src, dst, n, 0, CCL_MODE_DECODING); | 2433 /* When applying ccl program to stream, MUST NOT set NULL |
2434 pointer to src. */ | |
2435 ccl_driver (&str->ccl, (src ? (unsigned char *)src : (unsigned char*)""), | |
2436 dst, n, 0, CCL_MODE_DECODING); | |
2435 break; | 2437 break; |
2436 case CODESYS_ISO2022: | 2438 case CODESYS_ISO2022: |
2437 decode_coding_iso2022 (decoding, src, dst, n); | 2439 decode_coding_iso2022 (decoding, src, dst, n); |
2438 break; | 2440 break; |
2439 #endif /* MULE */ | 2441 #endif /* MULE */ |
2808 /* Convert N bytes of internally-formatted data stored in SRC to an | 2810 /* Convert N bytes of internally-formatted data stored in SRC to an |
2809 external format, according to the encoding stream ENCODING. | 2811 external format, according to the encoding stream ENCODING. |
2810 Store the encoded data into DST. */ | 2812 Store the encoded data into DST. */ |
2811 | 2813 |
2812 static void | 2814 static void |
2813 mule_encode (Lstream *encoding, const unsigned char *src, | 2815 mule_encode (Lstream *encoding, const Bufbyte *src, |
2814 unsigned_char_dynarr *dst, unsigned int n) | 2816 unsigned_char_dynarr *dst, size_t n) |
2815 { | 2817 { |
2816 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); | 2818 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); |
2817 | 2819 |
2818 switch (CODING_SYSTEM_TYPE (str->codesys)) | 2820 switch (CODING_SYSTEM_TYPE (str->codesys)) |
2819 { | 2821 { |
2841 case CODESYS_UTF8: | 2843 case CODESYS_UTF8: |
2842 encode_coding_utf8 (encoding, src, dst, n); | 2844 encode_coding_utf8 (encoding, src, dst, n); |
2843 break; | 2845 break; |
2844 case CODESYS_CCL: | 2846 case CODESYS_CCL: |
2845 str->ccl.last_block = str->flags & CODING_STATE_END; | 2847 str->ccl.last_block = str->flags & CODING_STATE_END; |
2846 ccl_driver (&str->ccl, src, dst, n, 0, CCL_MODE_ENCODING); | 2848 /* When applying ccl program to stream, MUST NOT set NULL |
2849 pointer to src. */ | |
2850 ccl_driver (&str->ccl, ((src) ? src : (unsigned char*)""), | |
2851 dst, n, 0, CCL_MODE_ENCODING); | |
2847 break; | 2852 break; |
2848 case CODESYS_ISO2022: | 2853 case CODESYS_ISO2022: |
2849 encode_coding_iso2022 (encoding, src, dst, n); | 2854 encode_coding_iso2022 (encoding, src, dst, n); |
2850 break; | 2855 break; |
2851 #endif /* MULE */ | 2856 #endif /* MULE */ |
2955 | 2960 |
2956 #define BYTE_SJIS_KATAKANA_P(c) \ | 2961 #define BYTE_SJIS_KATAKANA_P(c) \ |
2957 ((c) >= 0xA1 && (c) <= 0xDF) | 2962 ((c) >= 0xA1 && (c) <= 0xDF) |
2958 | 2963 |
2959 static int | 2964 static int |
2960 detect_coding_sjis (struct detection_state *st, const unsigned char *src, | 2965 detect_coding_sjis (struct detection_state *st, const Extbyte *src, size_t n) |
2961 unsigned int n) | 2966 { |
2962 { | |
2963 int c; | |
2964 | |
2965 while (n--) | 2967 while (n--) |
2966 { | 2968 { |
2967 c = *src++; | 2969 unsigned char c = *(unsigned char *)src++; |
2968 if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) | 2970 if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) |
2969 return 0; | 2971 return 0; |
2970 if (st->shift_jis.in_second_byte) | 2972 if (st->shift_jis.in_second_byte) |
2971 { | 2973 { |
2972 st->shift_jis.in_second_byte = 0; | 2974 st->shift_jis.in_second_byte = 0; |
2980 } | 2982 } |
2981 | 2983 |
2982 /* Convert Shift-JIS data to internal format. */ | 2984 /* Convert Shift-JIS data to internal format. */ |
2983 | 2985 |
2984 static void | 2986 static void |
2985 decode_coding_sjis (Lstream *decoding, const unsigned char *src, | 2987 decode_coding_sjis (Lstream *decoding, const Extbyte *src, |
2986 unsigned_char_dynarr *dst, unsigned int n) | 2988 unsigned_char_dynarr *dst, size_t n) |
2987 { | 2989 { |
2988 unsigned char c; | |
2989 struct decoding_stream *str = DECODING_STREAM_DATA (decoding); | 2990 struct decoding_stream *str = DECODING_STREAM_DATA (decoding); |
2990 unsigned int flags = str->flags; | 2991 unsigned int flags = str->flags; |
2991 unsigned int ch = str->ch; | 2992 unsigned int ch = str->ch; |
2992 eol_type_t eol_type = str->eol_type; | 2993 eol_type_t eol_type = str->eol_type; |
2993 | 2994 |
2994 while (n--) | 2995 while (n--) |
2995 { | 2996 { |
2996 c = *src++; | 2997 unsigned char c = *(unsigned char *)src++; |
2997 | 2998 |
2998 if (ch) | 2999 if (ch) |
2999 { | 3000 { |
3000 /* Previous character was first byte of Shift-JIS Kanji char. */ | 3001 /* Previous character was first byte of Shift-JIS Kanji char. */ |
3001 if (BYTE_SJIS_TWO_BYTE_2_P (c)) | 3002 if (BYTE_SJIS_TWO_BYTE_2_P (c)) |
3037 } | 3038 } |
3038 | 3039 |
3039 /* Convert internally-formatted data to Shift-JIS. */ | 3040 /* Convert internally-formatted data to Shift-JIS. */ |
3040 | 3041 |
3041 static void | 3042 static void |
3042 encode_coding_sjis (Lstream *encoding, const unsigned char *src, | 3043 encode_coding_sjis (Lstream *encoding, const Bufbyte *src, |
3043 unsigned_char_dynarr *dst, unsigned int n) | 3044 unsigned_char_dynarr *dst, size_t n) |
3044 { | 3045 { |
3045 unsigned char c; | |
3046 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); | 3046 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); |
3047 unsigned int flags = str->flags; | 3047 unsigned int flags = str->flags; |
3048 unsigned int ch = str->ch; | 3048 unsigned int ch = str->ch; |
3049 eol_type_t eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys); | 3049 eol_type_t eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys); |
3050 | 3050 |
3051 while (n--) | 3051 while (n--) |
3052 { | 3052 { |
3053 c = *src++; | 3053 Bufbyte c = *src++; |
3054 if (c == '\n') | 3054 if (c == '\n') |
3055 { | 3055 { |
3056 if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT) | 3056 if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT) |
3057 Dynarr_add (dst, '\r'); | 3057 Dynarr_add (dst, '\r'); |
3058 if (eol_type != EOL_CR) | 3058 if (eol_type != EOL_CR) |
3117 else | 3117 else |
3118 return Qnil; | 3118 return Qnil; |
3119 } | 3119 } |
3120 | 3120 |
3121 DEFUN ("encode-shift-jis-char", Fencode_shift_jis_char, 1, 1, 0, /* | 3121 DEFUN ("encode-shift-jis-char", Fencode_shift_jis_char, 1, 1, 0, /* |
3122 Encode a JISX0208 character CHAR to SHIFT-JIS coding-system. | 3122 Encode a JISX0208 character CHARACTER to SHIFT-JIS coding-system. |
3123 Return the corresponding character code in SHIFT-JIS as a cons of two bytes. | 3123 Return the corresponding character code in SHIFT-JIS as a cons of two bytes. |
3124 */ | 3124 */ |
3125 (ch)) | 3125 (character)) |
3126 { | 3126 { |
3127 Lisp_Object charset; | 3127 Lisp_Object charset; |
3128 int c1, c2, s1, s2; | 3128 int c1, c2, s1, s2; |
3129 | 3129 |
3130 CHECK_CHAR_COERCE_INT (ch); | 3130 CHECK_CHAR_COERCE_INT (character); |
3131 BREAKUP_CHAR (XCHAR (ch), charset, c1, c2); | 3131 BREAKUP_CHAR (XCHAR (character), charset, c1, c2); |
3132 if (EQ (charset, Vcharset_japanese_jisx0208)) | 3132 if (EQ (charset, Vcharset_japanese_jisx0208)) |
3133 { | 3133 { |
3134 ENCODE_SJIS (c1 | 0x80, c2 | 0x80, s1, s2); | 3134 ENCODE_SJIS (c1 | 0x80, c2 | 0x80, s1, s2); |
3135 return Fcons (make_int (s1), make_int (s2)); | 3135 return Fcons (make_int (s1), make_int (s2)); |
3136 } | 3136 } |
3241 b2 = I % BIG5_SAME_ROW; \ | 3241 b2 = I % BIG5_SAME_ROW; \ |
3242 b2 += b2 < 0x3F ? 0x40 : 0x62; \ | 3242 b2 += b2 < 0x3F ? 0x40 : 0x62; \ |
3243 } while (0) | 3243 } while (0) |
3244 | 3244 |
3245 static int | 3245 static int |
3246 detect_coding_big5 (struct detection_state *st, const unsigned char *src, | 3246 detect_coding_big5 (struct detection_state *st, const Extbyte *src, size_t n) |
3247 unsigned int n) | 3247 { |
3248 { | |
3249 int c; | |
3250 | |
3251 while (n--) | 3248 while (n--) |
3252 { | 3249 { |
3253 c = *src++; | 3250 unsigned char c = *(unsigned char *)src++; |
3254 if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO || | 3251 if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO || |
3255 (c >= 0x80 && c <= 0xA0)) | 3252 (c >= 0x80 && c <= 0xA0)) |
3256 return 0; | 3253 return 0; |
3257 if (st->big5.in_second_byte) | 3254 if (st->big5.in_second_byte) |
3258 { | 3255 { |
3267 } | 3264 } |
3268 | 3265 |
3269 /* Convert Big5 data to internal format. */ | 3266 /* Convert Big5 data to internal format. */ |
3270 | 3267 |
3271 static void | 3268 static void |
3272 decode_coding_big5 (Lstream *decoding, const unsigned char *src, | 3269 decode_coding_big5 (Lstream *decoding, const Extbyte *src, |
3273 unsigned_char_dynarr *dst, unsigned int n) | 3270 unsigned_char_dynarr *dst, size_t n) |
3274 { | 3271 { |
3275 unsigned char c; | |
3276 struct decoding_stream *str = DECODING_STREAM_DATA (decoding); | 3272 struct decoding_stream *str = DECODING_STREAM_DATA (decoding); |
3277 unsigned int flags = str->flags; | 3273 unsigned int flags = str->flags; |
3278 unsigned int ch = str->ch; | 3274 unsigned int ch = str->ch; |
3279 eol_type_t eol_type = str->eol_type; | 3275 eol_type_t eol_type = str->eol_type; |
3280 | 3276 |
3281 while (n--) | 3277 while (n--) |
3282 { | 3278 { |
3283 c = *src++; | 3279 unsigned char c = *(unsigned char *)src++; |
3284 if (ch) | 3280 if (ch) |
3285 { | 3281 { |
3286 /* Previous character was first byte of Big5 char. */ | 3282 /* Previous character was first byte of Big5 char. */ |
3287 if (BYTE_BIG5_TWO_BYTE_2_P (c)) | 3283 if (BYTE_BIG5_TWO_BYTE_2_P (c)) |
3288 { | 3284 { |
3317 } | 3313 } |
3318 | 3314 |
3319 /* Convert internally-formatted data to Big5. */ | 3315 /* Convert internally-formatted data to Big5. */ |
3320 | 3316 |
3321 static void | 3317 static void |
3322 encode_coding_big5 (Lstream *encoding, const unsigned char *src, | 3318 encode_coding_big5 (Lstream *encoding, const Bufbyte *src, |
3323 unsigned_char_dynarr *dst, unsigned int n) | 3319 unsigned_char_dynarr *dst, size_t n) |
3324 { | 3320 { |
3325 unsigned char c; | 3321 unsigned char c; |
3326 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); | 3322 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); |
3327 unsigned int flags = str->flags; | 3323 unsigned int flags = str->flags; |
3328 unsigned int ch = str->ch; | 3324 unsigned int ch = str->ch; |
3405 else | 3401 else |
3406 return Qnil; | 3402 return Qnil; |
3407 } | 3403 } |
3408 | 3404 |
3409 DEFUN ("encode-big5-char", Fencode_big5_char, 1, 1, 0, /* | 3405 DEFUN ("encode-big5-char", Fencode_big5_char, 1, 1, 0, /* |
3410 Encode the Big5 character CH to BIG5 coding-system. | 3406 Encode the Big5 character CHARACTER in the BIG5 coding-system. |
3411 Return the corresponding character code in Big5. | 3407 Return the corresponding character code in Big5. |
3412 */ | 3408 */ |
3413 (ch)) | 3409 (character)) |
3414 { | 3410 { |
3415 Lisp_Object charset; | 3411 Lisp_Object charset; |
3416 int c1, c2, b1, b2; | 3412 int c1, c2, b1, b2; |
3417 | 3413 |
3418 CHECK_CHAR_COERCE_INT (ch); | 3414 CHECK_CHAR_COERCE_INT (character); |
3419 BREAKUP_CHAR (XCHAR (ch), charset, c1, c2); | 3415 BREAKUP_CHAR (XCHAR (character), charset, c1, c2); |
3420 if (EQ (charset, Vcharset_chinese_big5_1) || | 3416 if (EQ (charset, Vcharset_chinese_big5_1) || |
3421 EQ (charset, Vcharset_chinese_big5_2)) | 3417 EQ (charset, Vcharset_chinese_big5_2)) |
3422 { | 3418 { |
3423 ENCODE_BIG5 (XCHARSET_LEADING_BYTE (charset), c1 | 0x80, c2 | 0x80, | 3419 ENCODE_BIG5 (XCHARSET_LEADING_BYTE (charset), c1 | 0x80, c2 | 0x80, |
3424 b1, b2); | 3420 b1, b2); |
3584 Dynarr_add (dst, (code >> 8) & 255); | 3580 Dynarr_add (dst, (code >> 8) & 255); |
3585 Dynarr_add (dst, code & 255); | 3581 Dynarr_add (dst, code & 255); |
3586 } | 3582 } |
3587 | 3583 |
3588 static int | 3584 static int |
3589 detect_coding_ucs4 (struct detection_state *st, const unsigned char *src, | 3585 detect_coding_ucs4 (struct detection_state *st, const Extbyte *src, size_t n) |
3590 unsigned int n) | |
3591 { | 3586 { |
3592 while (n--) | 3587 while (n--) |
3593 { | 3588 { |
3594 int c = *src++; | 3589 unsigned char c = *(unsigned char *)src++; |
3595 switch (st->ucs4.in_byte) | 3590 switch (st->ucs4.in_byte) |
3596 { | 3591 { |
3597 case 0: | 3592 case 0: |
3598 if (c >= 128) | 3593 if (c >= 128) |
3599 return 0; | 3594 return 0; |
3609 } | 3604 } |
3610 return CODING_CATEGORY_UCS4_MASK; | 3605 return CODING_CATEGORY_UCS4_MASK; |
3611 } | 3606 } |
3612 | 3607 |
3613 static void | 3608 static void |
3614 decode_coding_ucs4 (Lstream *decoding, const unsigned char *src, | 3609 decode_coding_ucs4 (Lstream *decoding, const Extbyte *src, |
3615 unsigned_char_dynarr *dst, unsigned int n) | 3610 unsigned_char_dynarr *dst, size_t n) |
3616 { | 3611 { |
3617 struct decoding_stream *str = DECODING_STREAM_DATA (decoding); | 3612 struct decoding_stream *str = DECODING_STREAM_DATA (decoding); |
3618 unsigned int flags = str->flags; | 3613 unsigned int flags = str->flags; |
3619 unsigned int ch = str->ch; | 3614 unsigned int ch = str->ch; |
3620 unsigned char counter = str->counter; | 3615 unsigned char counter = str->counter; |
3621 | 3616 |
3622 while (n--) | 3617 while (n--) |
3623 { | 3618 { |
3624 unsigned char c = *src++; | 3619 unsigned char c = *(unsigned char *)src++; |
3625 switch (counter) | 3620 switch (counter) |
3626 { | 3621 { |
3627 case 0: | 3622 case 0: |
3628 ch = c; | 3623 ch = c; |
3629 counter = 3; | 3624 counter = 3; |
3645 str->ch = ch; | 3640 str->ch = ch; |
3646 str->counter = counter; | 3641 str->counter = counter; |
3647 } | 3642 } |
3648 | 3643 |
3649 static void | 3644 static void |
3650 encode_coding_ucs4 (Lstream *encoding, const unsigned char *src, | 3645 encode_coding_ucs4 (Lstream *encoding, const Bufbyte *src, |
3651 unsigned_char_dynarr *dst, unsigned int n) | 3646 unsigned_char_dynarr *dst, size_t n) |
3652 { | 3647 { |
3653 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); | 3648 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); |
3654 unsigned int flags = str->flags; | 3649 unsigned int flags = str->flags; |
3655 unsigned int ch = str->ch; | 3650 unsigned int ch = str->ch; |
3656 unsigned char char_boundary = str->iso2022.current_char_boundary; | 3651 unsigned char char_boundary = str->iso2022.current_char_boundary; |
3711 { | 3706 { |
3712 if (in_composite) | 3707 if (in_composite) |
3713 { | 3708 { |
3714 /* #### Bother! We don't know how to | 3709 /* #### Bother! We don't know how to |
3715 handle this yet. */ | 3710 handle this yet. */ |
3716 Dynarr_add (dst, 0); | 3711 Dynarr_add (dst, '\0'); |
3717 Dynarr_add (dst, 0); | 3712 Dynarr_add (dst, '\0'); |
3718 Dynarr_add (dst, 0); | 3713 Dynarr_add (dst, '\0'); |
3719 Dynarr_add (dst, '~'); | 3714 Dynarr_add (dst, '~'); |
3720 } | 3715 } |
3721 else | 3716 else |
3722 { | 3717 { |
3723 Emchar emch = MAKE_CHAR (Vcharset_composite, | 3718 Emchar emch = MAKE_CHAR (Vcharset_composite, |
3784 /************************************************************************/ | 3779 /************************************************************************/ |
3785 /* UTF-8 methods */ | 3780 /* UTF-8 methods */ |
3786 /************************************************************************/ | 3781 /************************************************************************/ |
3787 | 3782 |
3788 static int | 3783 static int |
3789 detect_coding_utf8 (struct detection_state *st, const unsigned char *src, | 3784 detect_coding_utf8 (struct detection_state *st, const Extbyte *src, size_t n) |
3790 unsigned int n) | |
3791 { | 3785 { |
3792 while (n--) | 3786 while (n--) |
3793 { | 3787 { |
3794 unsigned char c = *src++; | 3788 unsigned char c = *(unsigned char *)src++; |
3795 switch (st->utf8.in_byte) | 3789 switch (st->utf8.in_byte) |
3796 { | 3790 { |
3797 case 0: | 3791 case 0: |
3798 if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) | 3792 if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) |
3799 return 0; | 3793 return 0; |
3819 } | 3813 } |
3820 return CODING_CATEGORY_UTF8_MASK; | 3814 return CODING_CATEGORY_UTF8_MASK; |
3821 } | 3815 } |
3822 | 3816 |
3823 static void | 3817 static void |
3824 decode_coding_utf8 (Lstream *decoding, const unsigned char *src, | 3818 decode_coding_utf8 (Lstream *decoding, const Extbyte *src, |
3825 unsigned_char_dynarr *dst, unsigned int n) | 3819 unsigned_char_dynarr *dst, size_t n) |
3826 { | 3820 { |
3827 struct decoding_stream *str = DECODING_STREAM_DATA (decoding); | 3821 struct decoding_stream *str = DECODING_STREAM_DATA (decoding); |
3828 unsigned int flags = str->flags; | 3822 unsigned int flags = str->flags; |
3829 unsigned int ch = str->ch; | 3823 unsigned int ch = str->ch; |
3830 eol_type_t eol_type = str->eol_type; | 3824 eol_type_t eol_type = str->eol_type; |
3831 unsigned char counter = str->counter; | 3825 unsigned char counter = str->counter; |
3832 | 3826 |
3833 while (n--) | 3827 while (n--) |
3834 { | 3828 { |
3835 unsigned char c = *src++; | 3829 unsigned char c = *(unsigned char *)src++; |
3836 switch (counter) | 3830 switch (counter) |
3837 { | 3831 { |
3838 case 0: | 3832 case 0: |
3839 if ( c >= 0xfc ) | 3833 if ( c >= 0xfc ) |
3840 { | 3834 { |
3933 Dynarr_add (dst, (code & 0x3f) | 0x80); | 3927 Dynarr_add (dst, (code & 0x3f) | 0x80); |
3934 } | 3928 } |
3935 } | 3929 } |
3936 | 3930 |
3937 static void | 3931 static void |
3938 encode_coding_utf8 (Lstream *encoding, const unsigned char *src, | 3932 encode_coding_utf8 (Lstream *encoding, const Bufbyte *src, |
3939 unsigned_char_dynarr *dst, unsigned int n) | 3933 unsigned_char_dynarr *dst, size_t n) |
3940 { | 3934 { |
3941 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); | 3935 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); |
3942 unsigned int flags = str->flags; | 3936 unsigned int flags = str->flags; |
3943 unsigned int ch = str->ch; | 3937 unsigned int ch = str->ch; |
3944 eol_type_t eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys); | 3938 eol_type_t eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys); |
4652 iso->switched_dir_and_no_valid_charset_yet = 0; | 4646 iso->switched_dir_and_no_valid_charset_yet = 0; |
4653 return 1; | 4647 return 1; |
4654 } | 4648 } |
4655 | 4649 |
4656 static int | 4650 static int |
4657 detect_coding_iso2022 (struct detection_state *st, const unsigned char *src, | 4651 detect_coding_iso2022 (struct detection_state *st, const Extbyte *src, size_t n) |
4658 unsigned int n) | |
4659 { | 4652 { |
4660 int mask; | 4653 int mask; |
4661 | 4654 |
4662 /* #### There are serious deficiencies in the recognition mechanism | 4655 /* #### There are serious deficiencies in the recognition mechanism |
4663 here. This needs to be much smarter if it's going to cut it. | 4656 here. This needs to be much smarter if it's going to cut it. |
4683 | 4676 |
4684 mask = st->iso2022.mask; | 4677 mask = st->iso2022.mask; |
4685 | 4678 |
4686 while (n--) | 4679 while (n--) |
4687 { | 4680 { |
4688 int c = *src++; | 4681 unsigned char c = *(unsigned char *)src++; |
4689 if (c >= 0xA0) | 4682 if (c >= 0xA0) |
4690 { | 4683 { |
4691 mask &= ~CODING_CATEGORY_ISO_7_MASK; | 4684 mask &= ~CODING_CATEGORY_ISO_7_MASK; |
4692 st->iso2022.high_byte_count++; | 4685 st->iso2022.high_byte_count++; |
4693 } | 4686 } |
4843 } | 4836 } |
4844 | 4837 |
4845 /* Convert ISO2022-format data to internal format. */ | 4838 /* Convert ISO2022-format data to internal format. */ |
4846 | 4839 |
4847 static void | 4840 static void |
4848 decode_coding_iso2022 (Lstream *decoding, const unsigned char *src, | 4841 decode_coding_iso2022 (Lstream *decoding, const Extbyte *src, |
4849 unsigned_char_dynarr *dst, unsigned int n) | 4842 unsigned_char_dynarr *dst, size_t n) |
4850 { | 4843 { |
4851 struct decoding_stream *str = DECODING_STREAM_DATA (decoding); | 4844 struct decoding_stream *str = DECODING_STREAM_DATA (decoding); |
4852 unsigned int flags = str->flags; | 4845 unsigned int flags = str->flags; |
4853 unsigned int ch = str->ch; | 4846 unsigned int ch = str->ch; |
4854 eol_type_t eol_type = str->eol_type; | 4847 eol_type_t eol_type = str->eol_type; |
4864 dst = str->iso2022.composite_chars; | 4857 dst = str->iso2022.composite_chars; |
4865 #endif /* ENABLE_COMPOSITE_CHARS */ | 4858 #endif /* ENABLE_COMPOSITE_CHARS */ |
4866 | 4859 |
4867 while (n--) | 4860 while (n--) |
4868 { | 4861 { |
4869 unsigned char c = *src++; | 4862 unsigned char c = *(unsigned char *)src++; |
4870 if (flags & CODING_STATE_ESCAPE) | 4863 if (flags & CODING_STATE_ESCAPE) |
4871 { /* Within ESC sequence */ | 4864 { /* Within ESC sequence */ |
4872 int retval = parse_iso2022_esc (coding_system, &str->iso2022, | 4865 int retval = parse_iso2022_esc (coding_system, &str->iso2022, |
4873 c, &flags, 1); | 4866 c, &flags, 1); |
4874 | 4867 |
5169 } | 5162 } |
5170 | 5163 |
5171 /* Convert internally-formatted data to ISO2022 format. */ | 5164 /* Convert internally-formatted data to ISO2022 format. */ |
5172 | 5165 |
5173 static void | 5166 static void |
5174 encode_coding_iso2022 (Lstream *encoding, const unsigned char *src, | 5167 encode_coding_iso2022 (Lstream *encoding, const Bufbyte *src, |
5175 unsigned_char_dynarr *dst, unsigned int n) | 5168 unsigned_char_dynarr *dst, size_t n) |
5176 { | 5169 { |
5177 unsigned char charmask, c; | 5170 unsigned char charmask, c; |
5178 unsigned char char_boundary; | 5171 unsigned char char_boundary; |
5179 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); | 5172 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); |
5180 unsigned int flags = str->flags; | 5173 unsigned int flags = str->flags; |
5478 | 5471 |
5479 /* This is used when reading in "binary" files -- i.e. files that may | 5472 /* This is used when reading in "binary" files -- i.e. files that may |
5480 contain all 256 possible byte values and that are not to be | 5473 contain all 256 possible byte values and that are not to be |
5481 interpreted as being in any particular decoding. */ | 5474 interpreted as being in any particular decoding. */ |
5482 static void | 5475 static void |
5483 decode_coding_no_conversion (Lstream *decoding, const unsigned char *src, | 5476 decode_coding_no_conversion (Lstream *decoding, const Extbyte *src, |
5484 unsigned_char_dynarr *dst, unsigned int n) | 5477 unsigned_char_dynarr *dst, size_t n) |
5485 { | 5478 { |
5486 unsigned char c; | |
5487 struct decoding_stream *str = DECODING_STREAM_DATA (decoding); | 5479 struct decoding_stream *str = DECODING_STREAM_DATA (decoding); |
5488 unsigned int flags = str->flags; | 5480 unsigned int flags = str->flags; |
5489 unsigned int ch = str->ch; | 5481 unsigned int ch = str->ch; |
5490 eol_type_t eol_type = str->eol_type; | 5482 eol_type_t eol_type = str->eol_type; |
5491 | 5483 |
5492 while (n--) | 5484 while (n--) |
5493 { | 5485 { |
5494 c = *src++; | 5486 unsigned char c = *(unsigned char *)src++; |
5495 | 5487 |
5496 DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst); | 5488 DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst); |
5497 DECODE_ADD_BINARY_CHAR (c, dst); | 5489 DECODE_ADD_BINARY_CHAR (c, dst); |
5498 label_continue_loop:; | 5490 label_continue_loop:; |
5499 } | 5491 } |
5503 str->flags = flags; | 5495 str->flags = flags; |
5504 str->ch = ch; | 5496 str->ch = ch; |
5505 } | 5497 } |
5506 | 5498 |
5507 static void | 5499 static void |
5508 encode_coding_no_conversion (Lstream *encoding, const unsigned char *src, | 5500 encode_coding_no_conversion (Lstream *encoding, const Bufbyte *src, |
5509 unsigned_char_dynarr *dst, unsigned int n) | 5501 unsigned_char_dynarr *dst, size_t n) |
5510 { | 5502 { |
5511 unsigned char c; | 5503 unsigned char c; |
5512 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); | 5504 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); |
5513 unsigned int flags = str->flags; | 5505 unsigned int flags = str->flags; |
5514 unsigned int ch = str->ch; | 5506 unsigned int ch = str->ch; |