Mercurial > hg > xemacs-beta
diff src/file-coding.c @ 444:576fb035e263 r21-2-37
Import from CVS: tag r21-2-37
author | cvs |
---|---|
date | Mon, 13 Aug 2007 11:36:19 +0200 |
parents | abe6d1db359e |
children | 3078fd1074e8 |
line wrap: on
line diff
--- a/src/file-coding.c Mon Aug 13 11:35:05 2007 +0200 +++ b/src/file-coding.c Mon Aug 13 11:36:19 2007 +0200 @@ -176,68 +176,47 @@ #ifdef MULE struct detection_state; static int detect_coding_sjis (struct detection_state *st, - const unsigned char *src, - unsigned int n); -static void decode_coding_sjis (Lstream *decoding, - const unsigned char *src, - unsigned_char_dynarr *dst, - unsigned int n); -static void encode_coding_sjis (Lstream *encoding, - const unsigned char *src, - unsigned_char_dynarr *dst, - unsigned int n); + const Extbyte *src, size_t n); +static void decode_coding_sjis (Lstream *decoding, const Extbyte *src, + unsigned_char_dynarr *dst, size_t n); +static void encode_coding_sjis (Lstream *encoding, const Bufbyte *src, + unsigned_char_dynarr *dst, size_t n); static int detect_coding_big5 (struct detection_state *st, - const unsigned char *src, - unsigned int n); -static void decode_coding_big5 (Lstream *decoding, - const unsigned char *src, - unsigned_char_dynarr *dst, unsigned int n); -static void encode_coding_big5 (Lstream *encoding, - const unsigned char *src, - unsigned_char_dynarr *dst, unsigned int n); + const Extbyte *src, size_t n); +static void decode_coding_big5 (Lstream *decoding, const Extbyte *src, + unsigned_char_dynarr *dst, size_t n); +static void encode_coding_big5 (Lstream *encoding, const Bufbyte *src, + unsigned_char_dynarr *dst, size_t n); static int detect_coding_ucs4 (struct detection_state *st, - const unsigned char *src, - unsigned int n); -static void decode_coding_ucs4 (Lstream *decoding, - const unsigned char *src, - unsigned_char_dynarr *dst, unsigned int n); -static void encode_coding_ucs4 (Lstream *encoding, - const unsigned char *src, - unsigned_char_dynarr *dst, unsigned int n); + const Extbyte *src, size_t n); +static void decode_coding_ucs4 (Lstream *decoding, const Extbyte *src, + unsigned_char_dynarr *dst, size_t n); +static void encode_coding_ucs4 (Lstream *encoding, const Bufbyte *src, + unsigned_char_dynarr *dst, size_t n); static int detect_coding_utf8 (struct detection_state *st, - const unsigned char *src, - unsigned int n); -static void decode_coding_utf8 (Lstream *decoding, - const unsigned char *src, - unsigned_char_dynarr *dst, unsigned int n); -static void encode_coding_utf8 (Lstream *encoding, - const unsigned char *src, - unsigned_char_dynarr *dst, unsigned int n); + const Extbyte *src, size_t n); +static void decode_coding_utf8 (Lstream *decoding, const Extbyte *src, + unsigned_char_dynarr *dst, size_t n); +static void encode_coding_utf8 (Lstream *encoding, const Bufbyte *src, + unsigned_char_dynarr *dst, size_t n); static int postprocess_iso2022_mask (int mask); static void reset_iso2022 (Lisp_Object coding_system, struct iso2022_decoder *iso); static int detect_coding_iso2022 (struct detection_state *st, - const unsigned char *src, - unsigned int n); -static void decode_coding_iso2022 (Lstream *decoding, - const unsigned char *src, - unsigned_char_dynarr *dst, unsigned int n); -static void encode_coding_iso2022 (Lstream *encoding, - const unsigned char *src, - unsigned_char_dynarr *dst, unsigned int n); + const Extbyte *src, size_t n); +static void decode_coding_iso2022 (Lstream *decoding, const Extbyte *src, + unsigned_char_dynarr *dst, size_t n); +static void encode_coding_iso2022 (Lstream *encoding, const Bufbyte *src, + unsigned_char_dynarr *dst, size_t n); #endif /* MULE */ -static void decode_coding_no_conversion (Lstream *decoding, - const unsigned char *src, - unsigned_char_dynarr *dst, - unsigned int n); -static void encode_coding_no_conversion (Lstream *encoding, - const unsigned char *src, - unsigned_char_dynarr *dst, - unsigned int n); -static void mule_decode (Lstream *decoding, const unsigned char *src, - unsigned_char_dynarr *dst, unsigned int n); -static void mule_encode (Lstream *encoding, const unsigned char *src, - unsigned_char_dynarr *dst, unsigned int n); +static void decode_coding_no_conversion (Lstream *decoding, const Extbyte *src, + unsigned_char_dynarr *dst, size_t n); +static void encode_coding_no_conversion (Lstream *encoding, const Bufbyte *src, + unsigned_char_dynarr *dst, size_t n); +static void mule_decode (Lstream *decoding, const Extbyte *src, + unsigned_char_dynarr *dst, size_t n); +static void mule_encode (Lstream *encoding, const Bufbyte *src, + unsigned_char_dynarr *dst, size_t n); typedef struct codesys_prop codesys_prop; struct codesys_prop @@ -789,12 +768,12 @@ 'post-read-conversion Function called after a file has been read in, to perform the - decoding. Called with two arguments, BEG and END, denoting + decoding. Called with two arguments, START and END, denoting a region of the current buffer to be decoded. 'pre-write-conversion Function called before a file is written out, to perform the - encoding. Called with two arguments, BEG and END, denoting + encoding. Called with two arguments, START and END, denoting a region of the current buffer to be encoded. @@ -983,18 +962,42 @@ } else if (EQ (type, Qccl)) { + Lisp_Object sym; + struct ccl_program test_ccl; + Extbyte *suffix; + + /* Check key first. */ if (EQ (key, Qdecode)) + suffix = "-ccl-decode"; + else if (EQ (key, Qencode)) + suffix = "-ccl-encode"; + else + signal_simple_error ("Unrecognized property", key); + + /* If value is vector, register it as a ccl program + associated with an newly created symbol for + backward compatibility. */ + if (VECTORP (value)) { - CHECK_VECTOR (value); - CODING_SYSTEM_CCL_DECODE (codesys) = value; - } - else if (EQ (key, Qencode)) - { - CHECK_VECTOR (value); - CODING_SYSTEM_CCL_ENCODE (codesys) = value; + sym = Fintern (concat2 (Fsymbol_name (name), + build_string (suffix)), + Qnil); + Fregister_ccl_program (sym, value); } else - signal_simple_error ("Unrecognized property", key); + { + CHECK_SYMBOL (value); + sym = value; + } + /* check if the given ccl programs are valid. */ + if (setup_ccl_program (&test_ccl, sym) < 0) + signal_simple_error ("Invalid CCL program", value); + + if (EQ (key, Qdecode)) + CODING_SYSTEM_CCL_DECODE (codesys) = sym; + else if (EQ (key, Qencode)) + CODING_SYSTEM_CCL_ENCODE (codesys) = sym; + } #endif /* MULE */ else @@ -1628,14 +1631,12 @@ } static eol_type_t -detect_eol_type (struct detection_state *st, const unsigned char *src, - unsigned int n) +detect_eol_type (struct detection_state *st, const Extbyte *src, + size_t n) { - int c; - while (n--) { - c = *src++; + unsigned char c = *(unsigned char *)src++; if (c == '\n') { if (st->eol.just_saw_cr) @@ -1674,10 +1675,8 @@ static int detect_coding_type (struct detection_state *st, const Extbyte *src, - unsigned int n, int just_do_eol) + size_t n, int just_do_eol) { - int c; - if (st->eol_type == EOL_AUTODETECT) st->eol_type = detect_eol_type (st, src, n); @@ -1688,7 +1687,7 @@ { for (; n; n--, src++) { - c = *src; + unsigned char c = *(unsigned char *) src; if ((c < 0x20 && !acceptable_control_char_p (c)) || c >= 0x80) { st->seen_non_ascii = 1; @@ -1914,8 +1913,8 @@ DEFUN ("detect-coding-region", Fdetect_coding_region, 2, 3, 0, /* Detect coding system of the text in the region between START and END. -Returned a list of possible coding systems ordered by priority. -If only ASCII characters are found, it returns 'undecided or one of +Return a list of possible coding systems ordered by priority. +If only ASCII characters are found, return 'undecided or one of its subsidiary coding systems according to a detected end-of-line type. Optional arg BUFFER defaults to the current buffer. */ @@ -1940,7 +1939,7 @@ decst.mask = ~0; while (1) { - unsigned char random_buffer[4096]; + Extbyte random_buffer[4096]; ssize_t nread = Lstream_read (istr, random_buffer, sizeof (random_buffer)); if (!nread) @@ -2196,7 +2195,7 @@ /* There might be some more end data produced in the translation. See the comment above. */ str->flags |= CODING_STATE_END; - mule_decode (stream, data, str->runoff, read_size); + mule_decode (stream, (Extbyte *) data, str->runoff, read_size); } if (data - orig_data == 0) @@ -2214,7 +2213,7 @@ /* Decode all our data into the runoff, and then attempt to write it all out to the other end. Remove whatever chunk we succeeded in writing. */ - mule_decode (stream, data, str->runoff, size); + mule_decode (stream, (Extbyte *) data, str->runoff, size); retval = Lstream_write (str->other_end, Dynarr_atp (str->runoff, 0), Dynarr_length (str->runoff)); if (retval > 0) @@ -2366,8 +2365,8 @@ be used for both reading and writing. */ static void -mule_decode (Lstream *decoding, const unsigned char *src, - unsigned_char_dynarr *dst, unsigned int n) +mule_decode (Lstream *decoding, const Extbyte *src, + unsigned_char_dynarr *dst, size_t n) { struct decoding_stream *str = DECODING_STREAM_DATA (decoding); @@ -2431,7 +2430,10 @@ break; case CODESYS_CCL: str->ccl.last_block = str->flags & CODING_STATE_END; - ccl_driver (&str->ccl, src, dst, n, 0, CCL_MODE_DECODING); + /* When applying ccl program to stream, MUST NOT set NULL + pointer to src. */ + ccl_driver (&str->ccl, (src ? (unsigned char *)src : (unsigned char*)""), + dst, n, 0, CCL_MODE_DECODING); break; case CODESYS_ISO2022: decode_coding_iso2022 (decoding, src, dst, n); @@ -2810,8 +2812,8 @@ Store the encoded data into DST. */ static void -mule_encode (Lstream *encoding, const unsigned char *src, - unsigned_char_dynarr *dst, unsigned int n) +mule_encode (Lstream *encoding, const Bufbyte *src, + unsigned_char_dynarr *dst, size_t n) { struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); @@ -2843,7 +2845,10 @@ break; case CODESYS_CCL: str->ccl.last_block = str->flags & CODING_STATE_END; - ccl_driver (&str->ccl, src, dst, n, 0, CCL_MODE_ENCODING); + /* When applying ccl program to stream, MUST NOT set NULL + pointer to src. */ + ccl_driver (&str->ccl, ((src) ? src : (unsigned char*)""), + dst, n, 0, CCL_MODE_ENCODING); break; case CODESYS_ISO2022: encode_coding_iso2022 (encoding, src, dst, n); @@ -2957,14 +2962,11 @@ ((c) >= 0xA1 && (c) <= 0xDF) static int -detect_coding_sjis (struct detection_state *st, const unsigned char *src, - unsigned int n) +detect_coding_sjis (struct detection_state *st, const Extbyte *src, size_t n) { - int c; - while (n--) { - c = *src++; + unsigned char c = *(unsigned char *)src++; if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) return 0; if (st->shift_jis.in_second_byte) @@ -2982,10 +2984,9 @@ /* Convert Shift-JIS data to internal format. */ static void -decode_coding_sjis (Lstream *decoding, const unsigned char *src, - unsigned_char_dynarr *dst, unsigned int n) +decode_coding_sjis (Lstream *decoding, const Extbyte *src, + unsigned_char_dynarr *dst, size_t n) { - unsigned char c; struct decoding_stream *str = DECODING_STREAM_DATA (decoding); unsigned int flags = str->flags; unsigned int ch = str->ch; @@ -2993,7 +2994,7 @@ while (n--) { - c = *src++; + unsigned char c = *(unsigned char *)src++; if (ch) { @@ -3039,10 +3040,9 @@ /* Convert internally-formatted data to Shift-JIS. */ static void -encode_coding_sjis (Lstream *encoding, const unsigned char *src, - unsigned_char_dynarr *dst, unsigned int n) +encode_coding_sjis (Lstream *encoding, const Bufbyte *src, + unsigned_char_dynarr *dst, size_t n) { - unsigned char c; struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); unsigned int flags = str->flags; unsigned int ch = str->ch; @@ -3050,7 +3050,7 @@ while (n--) { - c = *src++; + Bufbyte c = *src++; if (c == '\n') { if (eol_type != EOL_LF && eol_type != EOL_AUTODETECT) @@ -3119,16 +3119,16 @@ } DEFUN ("encode-shift-jis-char", Fencode_shift_jis_char, 1, 1, 0, /* -Encode a JISX0208 character CHAR to SHIFT-JIS coding-system. +Encode a JISX0208 character CHARACTER to SHIFT-JIS coding-system. Return the corresponding character code in SHIFT-JIS as a cons of two bytes. */ - (ch)) + (character)) { Lisp_Object charset; int c1, c2, s1, s2; - CHECK_CHAR_COERCE_INT (ch); - BREAKUP_CHAR (XCHAR (ch), charset, c1, c2); + CHECK_CHAR_COERCE_INT (character); + BREAKUP_CHAR (XCHAR (character), charset, c1, c2); if (EQ (charset, Vcharset_japanese_jisx0208)) { ENCODE_SJIS (c1 | 0x80, c2 | 0x80, s1, s2); @@ -3243,14 +3243,11 @@ } while (0) static int -detect_coding_big5 (struct detection_state *st, const unsigned char *src, - unsigned int n) +detect_coding_big5 (struct detection_state *st, const Extbyte *src, size_t n) { - int c; - while (n--) { - c = *src++; + unsigned char c = *(unsigned char *)src++; if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO || (c >= 0x80 && c <= 0xA0)) return 0; @@ -3269,10 +3266,9 @@ /* Convert Big5 data to internal format. */ static void -decode_coding_big5 (Lstream *decoding, const unsigned char *src, - unsigned_char_dynarr *dst, unsigned int n) +decode_coding_big5 (Lstream *decoding, const Extbyte *src, + unsigned_char_dynarr *dst, size_t n) { - unsigned char c; struct decoding_stream *str = DECODING_STREAM_DATA (decoding); unsigned int flags = str->flags; unsigned int ch = str->ch; @@ -3280,7 +3276,7 @@ while (n--) { - c = *src++; + unsigned char c = *(unsigned char *)src++; if (ch) { /* Previous character was first byte of Big5 char. */ @@ -3319,8 +3315,8 @@ /* Convert internally-formatted data to Big5. */ static void -encode_coding_big5 (Lstream *encoding, const unsigned char *src, - unsigned_char_dynarr *dst, unsigned int n) +encode_coding_big5 (Lstream *encoding, const Bufbyte *src, + unsigned_char_dynarr *dst, size_t n) { unsigned char c; struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); @@ -3407,16 +3403,16 @@ } DEFUN ("encode-big5-char", Fencode_big5_char, 1, 1, 0, /* -Encode the Big5 character CH to BIG5 coding-system. +Encode the Big5 character CHARACTER in the BIG5 coding-system. Return the corresponding character code in Big5. */ - (ch)) + (character)) { Lisp_Object charset; int c1, c2, b1, b2; - CHECK_CHAR_COERCE_INT (ch); - BREAKUP_CHAR (XCHAR (ch), charset, c1, c2); + CHECK_CHAR_COERCE_INT (character); + BREAKUP_CHAR (XCHAR (character), charset, c1, c2); if (EQ (charset, Vcharset_chinese_big5_1) || EQ (charset, Vcharset_chinese_big5_2)) { @@ -3586,12 +3582,11 @@ } static int -detect_coding_ucs4 (struct detection_state *st, const unsigned char *src, - unsigned int n) +detect_coding_ucs4 (struct detection_state *st, const Extbyte *src, size_t n) { while (n--) { - int c = *src++; + unsigned char c = *(unsigned char *)src++; switch (st->ucs4.in_byte) { case 0: @@ -3611,8 +3606,8 @@ } static void -decode_coding_ucs4 (Lstream *decoding, const unsigned char *src, - unsigned_char_dynarr *dst, unsigned int n) +decode_coding_ucs4 (Lstream *decoding, const Extbyte *src, + unsigned_char_dynarr *dst, size_t n) { struct decoding_stream *str = DECODING_STREAM_DATA (decoding); unsigned int flags = str->flags; @@ -3621,7 +3616,7 @@ while (n--) { - unsigned char c = *src++; + unsigned char c = *(unsigned char *)src++; switch (counter) { case 0: @@ -3647,8 +3642,8 @@ } static void -encode_coding_ucs4 (Lstream *encoding, const unsigned char *src, - unsigned_char_dynarr *dst, unsigned int n) +encode_coding_ucs4 (Lstream *encoding, const Bufbyte *src, + unsigned_char_dynarr *dst, size_t n) { struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); unsigned int flags = str->flags; @@ -3713,9 +3708,9 @@ { /* #### Bother! We don't know how to handle this yet. */ - Dynarr_add (dst, 0); - Dynarr_add (dst, 0); - Dynarr_add (dst, 0); + Dynarr_add (dst, '\0'); + Dynarr_add (dst, '\0'); + Dynarr_add (dst, '\0'); Dynarr_add (dst, '~'); } else @@ -3786,12 +3781,11 @@ /************************************************************************/ static int -detect_coding_utf8 (struct detection_state *st, const unsigned char *src, - unsigned int n) +detect_coding_utf8 (struct detection_state *st, const Extbyte *src, size_t n) { while (n--) { - unsigned char c = *src++; + unsigned char c = *(unsigned char *)src++; switch (st->utf8.in_byte) { case 0: @@ -3821,8 +3815,8 @@ } static void -decode_coding_utf8 (Lstream *decoding, const unsigned char *src, - unsigned_char_dynarr *dst, unsigned int n) +decode_coding_utf8 (Lstream *decoding, const Extbyte *src, + unsigned_char_dynarr *dst, size_t n) { struct decoding_stream *str = DECODING_STREAM_DATA (decoding); unsigned int flags = str->flags; @@ -3832,7 +3826,7 @@ while (n--) { - unsigned char c = *src++; + unsigned char c = *(unsigned char *)src++; switch (counter) { case 0: @@ -3935,8 +3929,8 @@ } static void -encode_coding_utf8 (Lstream *encoding, const unsigned char *src, - unsigned_char_dynarr *dst, unsigned int n) +encode_coding_utf8 (Lstream *encoding, const Bufbyte *src, + unsigned_char_dynarr *dst, size_t n) { struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); unsigned int flags = str->flags; @@ -4654,8 +4648,7 @@ } static int -detect_coding_iso2022 (struct detection_state *st, const unsigned char *src, - unsigned int n) +detect_coding_iso2022 (struct detection_state *st, const Extbyte *src, size_t n) { int mask; @@ -4685,7 +4678,7 @@ while (n--) { - int c = *src++; + unsigned char c = *(unsigned char *)src++; if (c >= 0xA0) { mask &= ~CODING_CATEGORY_ISO_7_MASK; @@ -4845,8 +4838,8 @@ /* Convert ISO2022-format data to internal format. */ static void -decode_coding_iso2022 (Lstream *decoding, const unsigned char *src, - unsigned_char_dynarr *dst, unsigned int n) +decode_coding_iso2022 (Lstream *decoding, const Extbyte *src, + unsigned_char_dynarr *dst, size_t n) { struct decoding_stream *str = DECODING_STREAM_DATA (decoding); unsigned int flags = str->flags; @@ -4866,7 +4859,7 @@ while (n--) { - unsigned char c = *src++; + unsigned char c = *(unsigned char *)src++; if (flags & CODING_STATE_ESCAPE) { /* Within ESC sequence */ int retval = parse_iso2022_esc (coding_system, &str->iso2022, @@ -5171,8 +5164,8 @@ /* Convert internally-formatted data to ISO2022 format. */ static void -encode_coding_iso2022 (Lstream *encoding, const unsigned char *src, - unsigned_char_dynarr *dst, unsigned int n) +encode_coding_iso2022 (Lstream *encoding, const Bufbyte *src, + unsigned_char_dynarr *dst, size_t n) { unsigned char charmask, c; unsigned char char_boundary; @@ -5480,10 +5473,9 @@ contain all 256 possible byte values and that are not to be interpreted as being in any particular decoding. */ static void -decode_coding_no_conversion (Lstream *decoding, const unsigned char *src, - unsigned_char_dynarr *dst, unsigned int n) +decode_coding_no_conversion (Lstream *decoding, const Extbyte *src, + unsigned_char_dynarr *dst, size_t n) { - unsigned char c; struct decoding_stream *str = DECODING_STREAM_DATA (decoding); unsigned int flags = str->flags; unsigned int ch = str->ch; @@ -5491,7 +5483,7 @@ while (n--) { - c = *src++; + unsigned char c = *(unsigned char *)src++; DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst); DECODE_ADD_BINARY_CHAR (c, dst); @@ -5505,8 +5497,8 @@ } static void -encode_coding_no_conversion (Lstream *encoding, const unsigned char *src, - unsigned_char_dynarr *dst, unsigned int n) +encode_coding_no_conversion (Lstream *encoding, const Bufbyte *src, + unsigned_char_dynarr *dst, size_t n) { unsigned char c; struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);