comparison src/file-coding.c @ 371:cc15677e0335 r21-2b1

Import from CVS: tag r21-2b1
author cvs
date Mon, 13 Aug 2007 11:03:08 +0200
parents a4f53d9b3154
children 8626e4521993
comparison
equal deleted inserted replaced
370:bd866891f083 371:cc15677e0335
101 unsigned char esc_bytes[8]; 101 unsigned char esc_bytes[8];
102 102
103 /* Index for next byte to store in ISO escape sequence. */ 103 /* Index for next byte to store in ISO escape sequence. */
104 int esc_bytes_index; 104 int esc_bytes_index;
105 105
106 #ifdef ENABLE_COMPOSITE_CHARS
107 /* Stuff seen so far when composing a string. */ 106 /* Stuff seen so far when composing a string. */
108 unsigned_char_dynarr *composite_chars; 107 unsigned_char_dynarr *composite_chars;
109 #endif
110 108
111 /* If we saw an invalid designation sequence for a particular 109 /* If we saw an invalid designation sequence for a particular
112 register, we flag it here and switch to ASCII. The next time we 110 register, we flag it here and switch to ASCII. The next time we
113 see a valid designation for this register, we turn off the flag 111 see a valid designation for this register, we turn off the flag
114 and do the designation normally, but pretend the sequence was 112 and do the designation normally, but pretend the sequence was
617 'iso2022 615 'iso2022
618 Any ISO2022-compliant encoding. Among other things, this includes 616 Any ISO2022-compliant encoding. Among other things, this includes
619 JIS (the Japanese encoding commonly used for e-mail), EUC (the 617 JIS (the Japanese encoding commonly used for e-mail), EUC (the
620 standard Unix encoding for Japanese and other languages), and 618 standard Unix encoding for Japanese and other languages), and
621 Compound Text (the encoding used in X11). You can specify more 619 Compound Text (the encoding used in X11). You can specify more
622 specific information about the conversion with the PROPS argument. 620 specific information about the conversion with the FLAGS argument.
623 'big5 621 'big5
624 Big5 (the encoding commonly used for Taiwanese). 622 Big5 (the encoding commonly used for Taiwanese).
625 'ccl 623 'ccl
626 The conversion is performed using a user-written pseudo-code 624 The conversion is performed using a user-written pseudo-code
627 program. CCL (Code Conversion Language) is the name of this 625 program. CCL (Code Conversion Language) is the name of this
1853 else if (CODING_SYSTEM_TYPE (str->codesys) == CODESYS_CCL) 1851 else if (CODING_SYSTEM_TYPE (str->codesys) == CODESYS_CCL)
1854 { 1852 {
1855 setup_ccl_program (&str->ccl, CODING_SYSTEM_CCL_DECODE (str->codesys)); 1853 setup_ccl_program (&str->ccl, CODING_SYSTEM_CCL_DECODE (str->codesys));
1856 } 1854 }
1857 #endif /* MULE */ 1855 #endif /* MULE */
1858 if (CODING_SYSTEM_TYPE (str->codesys) == CODESYS_AUTODETECT
1859 || CODING_SYSTEM_EOL_TYPE (str->codesys) == EOL_AUTODETECT)
1860 {
1861 xzero (str->decst);
1862 str->decst.eol_type = EOL_AUTODETECT;
1863 str->decst.mask = ~0;
1864 }
1865 str->flags = str->ch = 0; 1856 str->flags = str->ch = 0;
1866 } 1857 }
1867 1858
1868 static int 1859 static int
1869 decoding_rewinder (Lstream *stream) 1860 decoding_rewinder (Lstream *stream)
1897 str->flags |= CODING_STATE_END; 1888 str->flags |= CODING_STATE_END;
1898 decoding_writer (stream, 0, 0); 1889 decoding_writer (stream, 0, 0);
1899 } 1890 }
1900 Dynarr_free (str->runoff); 1891 Dynarr_free (str->runoff);
1901 #ifdef MULE 1892 #ifdef MULE
1902 #ifdef ENABLE_COMPOSITE_CHARS
1903 if (str->iso2022.composite_chars) 1893 if (str->iso2022.composite_chars)
1904 Dynarr_free (str->iso2022.composite_chars); 1894 Dynarr_free (str->iso2022.composite_chars);
1905 #endif
1906 #endif 1895 #endif
1907 return Lstream_close (str->other_end); 1896 return Lstream_close (str->other_end);
1908 } 1897 }
1909 1898
1910 Lisp_Object 1899 Lisp_Object
3204 iso->register_right = 1; 3193 iso->register_right = 1;
3205 iso->switched_dir_and_no_valid_charset_yet = 0; 3194 iso->switched_dir_and_no_valid_charset_yet = 0;
3206 iso->invalid_switch_dir = 0; 3195 iso->invalid_switch_dir = 0;
3207 iso->output_direction_sequence = 0; 3196 iso->output_direction_sequence = 0;
3208 iso->output_literally = 0; 3197 iso->output_literally = 0;
3209 #ifdef ENABLE_COMPOSITE_CHARS
3210 if (iso->composite_chars) 3198 if (iso->composite_chars)
3211 Dynarr_reset (iso->composite_chars); 3199 Dynarr_reset (iso->composite_chars);
3212 #endif
3213 } 3200 }
3214 3201
3215 static int 3202 static int
3216 fit_to_be_escape_quoted (unsigned char c) 3203 fit_to_be_escape_quoted (unsigned char c)
3217 { 3204 {
3337 reg = 3; half = 1; 3324 reg = 3; half = 1;
3338 goto locking_shift; 3325 goto locking_shift;
3339 3326
3340 /**** composite ****/ 3327 /**** composite ****/
3341 3328
3342 #ifdef ENABLE_COMPOSITE_CHARS
3343 case '0': 3329 case '0':
3344 iso->esc = ISO_ESC_START_COMPOSITE; 3330 iso->esc = ISO_ESC_START_COMPOSITE;
3345 *flags = (*flags & CODING_STATE_ISO2022_LOCK) | 3331 *flags = (*flags & CODING_STATE_ISO2022_LOCK) |
3346 CODING_STATE_COMPOSITE; 3332 CODING_STATE_COMPOSITE;
3347 return 1; 3333 return 1;
3349 case '1': 3335 case '1':
3350 iso->esc = ISO_ESC_END_COMPOSITE; 3336 iso->esc = ISO_ESC_END_COMPOSITE;
3351 *flags = (*flags & CODING_STATE_ISO2022_LOCK) & 3337 *flags = (*flags & CODING_STATE_ISO2022_LOCK) &
3352 ~CODING_STATE_COMPOSITE; 3338 ~CODING_STATE_COMPOSITE;
3353 return 1; 3339 return 1;
3354 #endif /* ENABLE_COMPOSITE_CHARS */
3355 3340
3356 /**** directionality ****/ 3341 /**** directionality ****/
3357 3342
3358 case '[': 3343 case '[':
3359 iso->esc = ISO_ESC_5_11; 3344 iso->esc = ISO_ESC_5_11;
3798 unsigned char c; 3783 unsigned char c;
3799 unsigned int flags, ch; 3784 unsigned int flags, ch;
3800 enum eol_type eol_type; 3785 enum eol_type eol_type;
3801 struct decoding_stream *str = DECODING_STREAM_DATA (decoding); 3786 struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
3802 Lisp_Object coding_system; 3787 Lisp_Object coding_system;
3803 #ifdef ENABLE_COMPOSITE_CHARS
3804 unsigned_char_dynarr *real_dst = dst; 3788 unsigned_char_dynarr *real_dst = dst;
3805 #endif
3806 3789
3807 CODING_STREAM_DECOMPOSE (str, flags, ch); 3790 CODING_STREAM_DECOMPOSE (str, flags, ch);
3808 eol_type = str->eol_type; 3791 eol_type = str->eol_type;
3809 XSETCODING_SYSTEM (coding_system, str->codesys); 3792 XSETCODING_SYSTEM (coding_system, str->codesys);
3810 3793
3811 #ifdef ENABLE_COMPOSITE_CHARS
3812 if (flags & CODING_STATE_COMPOSITE) 3794 if (flags & CODING_STATE_COMPOSITE)
3813 dst = str->iso2022.composite_chars; 3795 dst = str->iso2022.composite_chars;
3814 #endif /* ENABLE_COMPOSITE_CHARS */
3815 3796
3816 while (n--) 3797 while (n--)
3817 { 3798 {
3818 c = *src++; 3799 c = *src++;
3819 if (flags & CODING_STATE_ESCAPE) 3800 if (flags & CODING_STATE_ESCAPE)
3823 3804
3824 if (retval) 3805 if (retval)
3825 { 3806 {
3826 switch (str->iso2022.esc) 3807 switch (str->iso2022.esc)
3827 { 3808 {
3828 #ifdef ENABLE_COMPOSITE_CHARS
3829 case ISO_ESC_START_COMPOSITE: 3809 case ISO_ESC_START_COMPOSITE:
3830 if (str->iso2022.composite_chars) 3810 if (str->iso2022.composite_chars)
3831 Dynarr_reset (str->iso2022.composite_chars); 3811 Dynarr_reset (str->iso2022.composite_chars);
3832 else 3812 else
3833 str->iso2022.composite_chars = Dynarr_new (unsigned_char); 3813 str->iso2022.composite_chars = Dynarr_new (unsigned_char);
3842 dst = real_dst; 3822 dst = real_dst;
3843 len = set_charptr_emchar (comstr, emch); 3823 len = set_charptr_emchar (comstr, emch);
3844 Dynarr_add_many (dst, comstr, len); 3824 Dynarr_add_many (dst, comstr, len);
3845 break; 3825 break;
3846 } 3826 }
3847 #endif /* ENABLE_COMPOSITE_CHARS */
3848 3827
3849 case ISO_ESC_LITERAL: 3828 case ISO_ESC_LITERAL:
3850 DECODE_ADD_BINARY_CHAR (c, dst); 3829 DECODE_ADD_BINARY_CHAR (c, dst);
3851 break; 3830 break;
3852 3831
3930 : !BYTE_ASCII_P (c) ? str->iso2022.register_right 3909 : !BYTE_ASCII_P (c) ? str->iso2022.register_right
3931 : str->iso2022.register_left); 3910 : str->iso2022.register_left);
3932 charset = str->iso2022.charset[reg]; 3911 charset = str->iso2022.charset[reg];
3933 3912
3934 /* Error checking: */ 3913 /* Error checking: */
3935 if (! CHARSETP (charset) 3914 if (NILP (charset) || str->iso2022.invalid_designated[reg]
3936 || str->iso2022.invalid_designated[reg]
3937 || (((c & 0x7F) == ' ' || (c & 0x7F) == ISO_CODE_DEL) 3915 || (((c & 0x7F) == ' ' || (c & 0x7F) == ISO_CODE_DEL)
3938 && XCHARSET_CHARS (charset) == 94)) 3916 && XCHARSET_CHARS (charset) == 94))
3939 /* Mrmph. We are trying to invoke a register that has no 3917 /* Mrmph. We are trying to invoke a register that has no
3940 or an invalid charset in it, or trying to add a character 3918 or an invalid charset in it, or trying to add a character
3941 outside the range of the charset. Insert that char literally 3919 outside the range of the charset. Insert that char literally
4129 struct Lisp_Coding_System *codesys = str->codesys; 4107 struct Lisp_Coding_System *codesys = str->codesys;
4130 int i; 4108 int i;
4131 Lisp_Object charset; 4109 Lisp_Object charset;
4132 int half; 4110 int half;
4133 4111
4134 #ifdef ENABLE_COMPOSITE_CHARS
4135 /* flags for handling composite chars. We do a little switcharoo 4112 /* flags for handling composite chars. We do a little switcharoo
4136 on the source while we're outputting the composite char. */ 4113 on the source while we're outputting the composite char. */
4137 unsigned int saved_n = 0; 4114 unsigned int saved_n = 0;
4138 CONST unsigned char *saved_src = NULL; 4115 CONST unsigned char *saved_src = NULL;
4139 int in_composite = 0; 4116 int in_composite = 0;
4140 #endif /* ENABLE_COMPOSITE_CHARS */
4141 4117
4142 CODING_STREAM_DECOMPOSE (str, flags, ch); 4118 CODING_STREAM_DECOMPOSE (str, flags, ch);
4143 eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys); 4119 eol_type = CODING_SYSTEM_EOL_TYPE (str->codesys);
4144 char_boundary = str->iso2022.current_char_boundary; 4120 char_boundary = str->iso2022.current_char_boundary;
4145 charset = str->iso2022.current_charset; 4121 charset = str->iso2022.current_charset;
4146 half = str->iso2022.current_half; 4122 half = str->iso2022.current_half;
4147 4123
4148 #ifdef ENABLE_COMPOSITE_CHARS
4149 back_to_square_n: 4124 back_to_square_n:
4150 #endif
4151 while (n--) 4125 while (n--)
4152 { 4126 {
4153 c = *src++; 4127 c = *src++;
4154 4128
4155 if (BYTE_ASCII_P (c)) 4129 if (BYTE_ASCII_P (c))
4204 ch = 0; 4178 ch = 0;
4205 charset = CHARSET_BY_LEADING_BYTE (c); 4179 charset = CHARSET_BY_LEADING_BYTE (c);
4206 if (LEADING_BYTE_PREFIX_P(c)) 4180 if (LEADING_BYTE_PREFIX_P(c))
4207 ch = c; 4181 ch = c;
4208 else if (!EQ (charset, Vcharset_control_1) 4182 else if (!EQ (charset, Vcharset_control_1)
4209 #ifdef ENABLE_COMPOSITE_CHARS 4183 && !EQ (charset, Vcharset_composite))
4210 && !EQ (charset, Vcharset_composite)
4211 #endif
4212 )
4213 { 4184 {
4214 int reg; 4185 int reg;
4215 4186
4216 ensure_correct_direction (XCHARSET_DIRECTION (charset), 4187 ensure_correct_direction (XCHARSET_DIRECTION (charset),
4217 codesys, dst, &flags, 0); 4188 codesys, dst, &flags, 0);
4327 Dynarr_add (dst, c & charmask); 4298 Dynarr_add (dst, c & charmask);
4328 ch = 0; 4299 ch = 0;
4329 } 4300 }
4330 else if (ch) 4301 else if (ch)
4331 { 4302 {
4332 #ifdef ENABLE_COMPOSITE_CHARS
4333 if (EQ (charset, Vcharset_composite)) 4303 if (EQ (charset, Vcharset_composite))
4334 { 4304 {
4335 if (in_composite) 4305 if (in_composite)
4336 { 4306 {
4337 /* #### Bother! We don't know how to 4307 /* #### Bother! We don't know how to
4351 Dynarr_add (dst, ISO_CODE_ESC); 4321 Dynarr_add (dst, ISO_CODE_ESC);
4352 Dynarr_add (dst, '0'); /* start composing */ 4322 Dynarr_add (dst, '0'); /* start composing */
4353 } 4323 }
4354 } 4324 }
4355 else 4325 else
4356 #endif /* ENABLE_COMPOSITE_CHARS */
4357 { 4326 {
4358 Dynarr_add (dst, ch & charmask); 4327 Dynarr_add (dst, ch & charmask);
4359 Dynarr_add (dst, c & charmask); 4328 Dynarr_add (dst, c & charmask);
4360 } 4329 }
4361 ch = 0; 4330 ch = 0;
4384 } 4353 }
4385 } 4354 }
4386 } 4355 }
4387 } 4356 }
4388 4357
4389 #ifdef ENABLE_COMPOSITE_CHARS
4390 if (in_composite) 4358 if (in_composite)
4391 { 4359 {
4392 n = saved_n; 4360 n = saved_n;
4393 src = saved_src; 4361 src = saved_src;
4394 in_composite = 0; 4362 in_composite = 0;
4395 Dynarr_add (dst, ISO_CODE_ESC); 4363 Dynarr_add (dst, ISO_CODE_ESC);
4396 Dynarr_add (dst, '1'); /* end composing */ 4364 Dynarr_add (dst, '1'); /* end composing */
4397 goto back_to_square_n; /* Wheeeeeeeee ..... */ 4365 goto back_to_square_n; /* Wheeeeeeeee ..... */
4398 } 4366 }
4399 #endif /* ENABLE_COMPOSITE_CHARS */
4400 4367
4401 if (char_boundary && flags & CODING_STATE_END) 4368 if (char_boundary && flags & CODING_STATE_END)
4402 { 4369 {
4403 restore_left_to_right_direction (codesys, dst, &flags, 0); 4370 restore_left_to_right_direction (codesys, dst, &flags, 0);
4404 ensure_normal_shift (str, dst); 4371 ensure_normal_shift (str, dst);
4520 #define FILE_NAME_CODING_SYSTEM \ 4487 #define FILE_NAME_CODING_SYSTEM \
4521 ((NILP (Vfile_name_coding_system) || \ 4488 ((NILP (Vfile_name_coding_system) || \
4522 (EQ ((Vfile_name_coding_system), Qbinary))) ? \ 4489 (EQ ((Vfile_name_coding_system), Qbinary))) ? \
4523 Qnil : Fget_coding_system (Vfile_name_coding_system)) 4490 Qnil : Fget_coding_system (Vfile_name_coding_system))
4524 4491
4492 /* #### not correct for all values of `fmt'! */
4493 #ifdef MULE
4494 #define FMT_CODING_SYSTEM(fmt) \
4495 (((fmt) == FORMAT_FILENAME) ? FILE_NAME_CODING_SYSTEM : \
4496 ((fmt) == FORMAT_CTEXT ) ? Fget_coding_system (Qctext) : \
4497 ((fmt) == FORMAT_TERMINAL) ? FILE_NAME_CODING_SYSTEM : \
4498 Qnil)
4499 #else
4500 #define FMT_CODING_SYSTEM(fmt) \
4501 (((fmt) == FORMAT_FILENAME) ? FILE_NAME_CODING_SYSTEM : \
4502 ((fmt) == FORMAT_TERMINAL) ? FILE_NAME_CODING_SYSTEM : \
4503 Qnil)
4504 #endif
4505
4525 Extbyte * 4506 Extbyte *
4526 convert_to_external_format (CONST Bufbyte *ptr, 4507 convert_to_external_format (CONST Bufbyte *ptr,
4527 Bytecount len, 4508 Bytecount len,
4528 Extcount *len_out, 4509 Extcount *len_out,
4529 enum external_data_format fmt) 4510 enum external_data_format fmt)
4530 { 4511 {
4531 Lisp_Object coding_system; 4512 Lisp_Object coding_system = FMT_CODING_SYSTEM (fmt);
4532
4533 /* #### not correct for all values of `fmt'! */
4534 if (fmt == FORMAT_FILENAME || fmt == FORMAT_TERMINAL)
4535 coding_system = FILE_NAME_CODING_SYSTEM;
4536 #ifdef MULE
4537 else if (fmt == FORMAT_CTEXT)
4538 coding_system = Fget_coding_system (Qctext);
4539 #endif
4540 else
4541 coding_system = Qnil;
4542
4543 /* Lisp_Object coding_system = FMT_CODING_SYSTEM (fmt); */
4544 4513
4545 if (!conversion_out_dynarr) 4514 if (!conversion_out_dynarr)
4546 conversion_out_dynarr = Dynarr_new (Extbyte); 4515 conversion_out_dynarr = Dynarr_new (Extbyte);
4547 else 4516 else
4548 Dynarr_reset (conversion_out_dynarr); 4517 Dynarr_reset (conversion_out_dynarr);
4606 convert_from_external_format (CONST Extbyte *ptr, 4575 convert_from_external_format (CONST Extbyte *ptr,
4607 Extcount len, 4576 Extcount len,
4608 Bytecount *len_out, 4577 Bytecount *len_out,
4609 enum external_data_format fmt) 4578 enum external_data_format fmt)
4610 { 4579 {
4611 Lisp_Object coding_system; 4580 Lisp_Object coding_system = FMT_CODING_SYSTEM (fmt);
4612
4613 /* #### not correct for all values of `fmt'! */
4614 if (fmt == FORMAT_FILENAME || fmt == FORMAT_TERMINAL)
4615 coding_system = FILE_NAME_CODING_SYSTEM;
4616 #ifdef MULE
4617 else if (fmt == FORMAT_CTEXT)
4618 coding_system = Fget_coding_system (Qctext);
4619 #endif
4620 else
4621 coding_system = Qnil;
4622
4623 /* Lisp_Object coding_system = FMT_CODING_SYSTEM (fmt); */
4624 4581
4625 if (!conversion_in_dynarr) 4582 if (!conversion_in_dynarr)
4626 conversion_in_dynarr = Dynarr_new (Bufbyte); 4583 conversion_in_dynarr = Dynarr_new (Bufbyte);
4627 else 4584 else
4628 Dynarr_reset (conversion_in_dynarr); 4585 Dynarr_reset (conversion_in_dynarr);
4675 /************************************************************************/ 4632 /************************************************************************/
4676 /* Initialization */ 4633 /* Initialization */
4677 /************************************************************************/ 4634 /************************************************************************/
4678 4635
4679 void 4636 void
4680 syms_of_file_coding (void) 4637 syms_of_mule_coding (void)
4681 { 4638 {
4682 defsymbol (&Qbuffer_file_coding_system, "buffer-file-coding-system"); 4639 defsymbol (&Qbuffer_file_coding_system, "buffer-file-coding-system");
4683 deferror (&Qcoding_system_error, "coding-system-error", 4640 deferror (&Qcoding_system_error, "coding-system-error",
4684 "Coding-system error", Qio_error); 4641 "Coding-system error", Qio_error);
4685 4642
4776 defsymbol (&coding_category_symbol[CODING_CATEGORY_NO_CONVERSION], 4733 defsymbol (&coding_category_symbol[CODING_CATEGORY_NO_CONVERSION],
4777 "no-conversion"); 4734 "no-conversion");
4778 } 4735 }
4779 4736
4780 void 4737 void
4781 lstream_type_create_file_coding (void) 4738 lstream_type_create_mule_coding (void)
4782 { 4739 {
4783 LSTREAM_HAS_METHOD (decoding, reader); 4740 LSTREAM_HAS_METHOD (decoding, reader);
4784 LSTREAM_HAS_METHOD (decoding, writer); 4741 LSTREAM_HAS_METHOD (decoding, writer);
4785 LSTREAM_HAS_METHOD (decoding, rewinder); 4742 LSTREAM_HAS_METHOD (decoding, rewinder);
4786 LSTREAM_HAS_METHOD (decoding, seekable_p); 4743 LSTREAM_HAS_METHOD (decoding, seekable_p);
4796 LSTREAM_HAS_METHOD (encoding, closer); 4753 LSTREAM_HAS_METHOD (encoding, closer);
4797 LSTREAM_HAS_METHOD (encoding, marker); 4754 LSTREAM_HAS_METHOD (encoding, marker);
4798 } 4755 }
4799 4756
4800 void 4757 void
4801 vars_of_file_coding (void) 4758 vars_of_mule_coding (void)
4802 { 4759 {
4803 int i; 4760 int i;
4804 4761
4805 /* Initialize to something reasonable ... */ 4762 /* Initialize to something reasonable ... */
4806 for (i = 0; i <= CODING_CATEGORY_LAST; i++) 4763 for (i = 0; i <= CODING_CATEGORY_LAST; i++)
4858 */ ); 4815 */ );
4859 enable_multibyte_characters = 1; 4816 enable_multibyte_characters = 1;
4860 } 4817 }
4861 4818
4862 void 4819 void
4863 complex_vars_of_file_coding (void) 4820 complex_vars_of_mule_coding (void)
4864 { 4821 {
4865 staticpro (&Vcoding_system_hashtable); 4822 staticpro (&Vcoding_system_hashtable);
4866 Vcoding_system_hashtable = make_lisp_hashtable (50, HASHTABLE_NONWEAK, 4823 Vcoding_system_hashtable = make_lisp_hashtable (50, HASHTABLE_NONWEAK,
4867 HASHTABLE_EQ); 4824 HASHTABLE_EQ);
4868 4825