comparison src/file-coding.c @ 380:8626e4521993 r21-2-5

Import from CVS: tag r21-2-5
author cvs
date Mon, 13 Aug 2007 11:07:10 +0200
parents cc15677e0335
children aabb7f5b1c81
comparison
equal deleted inserted replaced
379:76b7d63099ad 380:8626e4521993
73 Lisp_Object Qctext, Qescape_quoted; 73 Lisp_Object Qctext, Qescape_quoted;
74 Lisp_Object Qshort, Qno_ascii_eol, Qno_ascii_cntl, Qseven, Qlock_shift; 74 Lisp_Object Qshort, Qno_ascii_eol, Qno_ascii_cntl, Qseven, Qlock_shift;
75 #endif 75 #endif
76 Lisp_Object Qencode, Qdecode; 76 Lisp_Object Qencode, Qdecode;
77 77
78 Lisp_Object Vcoding_system_hashtable; 78 Lisp_Object Vcoding_system_hash_table;
79 79
80 int enable_multibyte_characters; 80 int enable_multibyte_characters;
81 81
82 #ifdef MULE 82 #ifdef MULE
83 /* Additional information used by the ISO2022 decoder and detector. */ 83 /* Additional information used by the ISO2022 decoder and detector. */
230 static Lisp_Object 230 static Lisp_Object
231 mark_coding_system (Lisp_Object obj, void (*markobj) (Lisp_Object)) 231 mark_coding_system (Lisp_Object obj, void (*markobj) (Lisp_Object))
232 { 232 {
233 struct Lisp_Coding_System *codesys = XCODING_SYSTEM (obj); 233 struct Lisp_Coding_System *codesys = XCODING_SYSTEM (obj);
234 234
235 (markobj) (CODING_SYSTEM_NAME (codesys)); 235 markobj (CODING_SYSTEM_NAME (codesys));
236 (markobj) (CODING_SYSTEM_DOC_STRING (codesys)); 236 markobj (CODING_SYSTEM_DOC_STRING (codesys));
237 (markobj) (CODING_SYSTEM_MNEMONIC (codesys)); 237 markobj (CODING_SYSTEM_MNEMONIC (codesys));
238 (markobj) (CODING_SYSTEM_EOL_LF (codesys)); 238 markobj (CODING_SYSTEM_EOL_LF (codesys));
239 (markobj) (CODING_SYSTEM_EOL_CRLF (codesys)); 239 markobj (CODING_SYSTEM_EOL_CRLF (codesys));
240 (markobj) (CODING_SYSTEM_EOL_CR (codesys)); 240 markobj (CODING_SYSTEM_EOL_CR (codesys));
241 241
242 switch (CODING_SYSTEM_TYPE (codesys)) 242 switch (CODING_SYSTEM_TYPE (codesys))
243 { 243 {
244 #ifdef MULE 244 #ifdef MULE
245 int i; 245 int i;
246 case CODESYS_ISO2022: 246 case CODESYS_ISO2022:
247 for (i = 0; i < 4; i++) 247 for (i = 0; i < 4; i++)
248 (markobj) (CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i)); 248 markobj (CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i));
249 if (codesys->iso2022.input_conv) 249 if (codesys->iso2022.input_conv)
250 { 250 {
251 for (i = 0; i < Dynarr_length (codesys->iso2022.input_conv); i++) 251 for (i = 0; i < Dynarr_length (codesys->iso2022.input_conv); i++)
252 { 252 {
253 struct charset_conversion_spec *ccs = 253 struct charset_conversion_spec *ccs =
254 Dynarr_atp (codesys->iso2022.input_conv, i); 254 Dynarr_atp (codesys->iso2022.input_conv, i);
255 (markobj) (ccs->from_charset); 255 markobj (ccs->from_charset);
256 (markobj) (ccs->to_charset); 256 markobj (ccs->to_charset);
257 } 257 }
258 } 258 }
259 if (codesys->iso2022.output_conv) 259 if (codesys->iso2022.output_conv)
260 { 260 {
261 for (i = 0; i < Dynarr_length (codesys->iso2022.output_conv); i++) 261 for (i = 0; i < Dynarr_length (codesys->iso2022.output_conv); i++)
262 { 262 {
263 struct charset_conversion_spec *ccs = 263 struct charset_conversion_spec *ccs =
264 Dynarr_atp (codesys->iso2022.output_conv, i); 264 Dynarr_atp (codesys->iso2022.output_conv, i);
265 (markobj) (ccs->from_charset); 265 markobj (ccs->from_charset);
266 (markobj) (ccs->to_charset); 266 markobj (ccs->to_charset);
267 } 267 }
268 } 268 }
269 break; 269 break;
270 270
271 case CODESYS_CCL: 271 case CODESYS_CCL:
272 (markobj) (CODING_SYSTEM_CCL_DECODE (codesys)); 272 markobj (CODING_SYSTEM_CCL_DECODE (codesys));
273 (markobj) (CODING_SYSTEM_CCL_ENCODE (codesys)); 273 markobj (CODING_SYSTEM_CCL_ENCODE (codesys));
274 break; 274 break;
275 #endif /* MULE */ 275 #endif /* MULE */
276 default: 276 default:
277 break; 277 break;
278 } 278 }
279 279
280 (markobj) (CODING_SYSTEM_PRE_WRITE_CONVERSION (codesys)); 280 markobj (CODING_SYSTEM_PRE_WRITE_CONVERSION (codesys));
281 return CODING_SYSTEM_POST_READ_CONVERSION (codesys); 281 return CODING_SYSTEM_POST_READ_CONVERSION (codesys);
282 } 282 }
283 283
284 static void 284 static void
285 print_coding_system (Lisp_Object obj, Lisp_Object printcharfun, 285 print_coding_system (Lisp_Object obj, Lisp_Object printcharfun,
342 static Lisp_Object 342 static Lisp_Object
343 eol_type_to_symbol (enum eol_type type) 343 eol_type_to_symbol (enum eol_type type)
344 { 344 {
345 switch (type) 345 switch (type)
346 { 346 {
347 default: abort ();
347 case EOL_LF: return Qlf; 348 case EOL_LF: return Qlf;
348 case EOL_CRLF: return Qcrlf; 349 case EOL_CRLF: return Qcrlf;
349 case EOL_CR: return Qcr; 350 case EOL_CR: return Qcr;
350 case EOL_AUTODETECT: return Qnil; 351 case EOL_AUTODETECT: return Qnil;
351 default: abort (); return Qnil; /* not reached */
352 } 352 }
353 } 353 }
354 354
355 static void 355 static void
356 setup_eol_coding_systems (struct Lisp_Coding_System *codesys) 356 setup_eol_coding_systems (struct Lisp_Coding_System *codesys)
437 if (NILP (coding_system_or_name)) 437 if (NILP (coding_system_or_name))
438 coding_system_or_name = Qbinary; 438 coding_system_or_name = Qbinary;
439 else 439 else
440 CHECK_SYMBOL (coding_system_or_name); 440 CHECK_SYMBOL (coding_system_or_name);
441 441
442 return Fgethash (coding_system_or_name, Vcoding_system_hashtable, Qnil); 442 return Fgethash (coding_system_or_name, Vcoding_system_hash_table, Qnil);
443 } 443 }
444 444
445 DEFUN ("get-coding-system", Fget_coding_system, 1, 1, 0, /* 445 DEFUN ("get-coding-system", Fget_coding_system, 1, 1, 0, /*
446 Retrieve the coding system of the given name. 446 Retrieve the coding system of the given name.
447 Same as `find-coding-system' except that if there is no such 447 Same as `find-coding-system' except that if there is no such
463 { 463 {
464 Lisp_Object *coding_system_list; 464 Lisp_Object *coding_system_list;
465 }; 465 };
466 466
467 static int 467 static int
468 add_coding_system_to_list_mapper (CONST void *hash_key, void *hash_contents, 468 add_coding_system_to_list_mapper (Lisp_Object key, Lisp_Object value,
469 void *coding_system_list_closure) 469 void *coding_system_list_closure)
470 { 470 {
471 /* This function can GC */ 471 /* This function can GC */
472 Lisp_Object key, contents;
473 Lisp_Object *coding_system_list;
474 struct coding_system_list_closure *cscl = 472 struct coding_system_list_closure *cscl =
475 (struct coding_system_list_closure *) coding_system_list_closure; 473 (struct coding_system_list_closure *) coding_system_list_closure;
476 CVOID_TO_LISP (key, hash_key); 474 Lisp_Object *coding_system_list = cscl->coding_system_list;
477 VOID_TO_LISP (contents, hash_contents); 475
478 coding_system_list = cscl->coding_system_list; 476 *coding_system_list = Fcons (XCODING_SYSTEM (value)->name,
479
480 *coding_system_list = Fcons (XCODING_SYSTEM (contents)->name,
481 *coding_system_list); 477 *coding_system_list);
482 return 0; 478 return 0;
483 } 479 }
484 480
485 DEFUN ("coding-system-list", Fcoding_system_list, 0, 0, 0, /* 481 DEFUN ("coding-system-list", Fcoding_system_list, 0, 0, 0, /*
491 struct gcpro gcpro1; 487 struct gcpro gcpro1;
492 struct coding_system_list_closure coding_system_list_closure; 488 struct coding_system_list_closure coding_system_list_closure;
493 489
494 GCPRO1 (coding_system_list); 490 GCPRO1 (coding_system_list);
495 coding_system_list_closure.coding_system_list = &coding_system_list; 491 coding_system_list_closure.coding_system_list = &coding_system_list;
496 elisp_maphash (add_coding_system_to_list_mapper, Vcoding_system_hashtable, 492 elisp_maphash (add_coding_system_to_list_mapper, Vcoding_system_hash_table,
497 &coding_system_list_closure); 493 &coding_system_list_closure);
498 UNGCPRO; 494 UNGCPRO;
499 495
500 return coding_system_list; 496 return coding_system_list;
501 } 497 }
888 setup_eol_coding_systems (codesys); 884 setup_eol_coding_systems (codesys);
889 885
890 { 886 {
891 Lisp_Object codesys_obj; 887 Lisp_Object codesys_obj;
892 XSETCODING_SYSTEM (codesys_obj, codesys); 888 XSETCODING_SYSTEM (codesys_obj, codesys);
893 Fputhash (name, codesys_obj, Vcoding_system_hashtable); 889 Fputhash (name, codesys_obj, Vcoding_system_hash_table);
894 return codesys_obj; 890 return codesys_obj;
895 } 891 }
896 } 892 }
897 893
898 DEFUN ("copy-coding-system", Fcopy_coding_system, 2, 2, 0, /* 894 DEFUN ("copy-coding-system", Fcopy_coding_system, 2, 2, 0, /*
909 { 905 {
910 XSETCODING_SYSTEM (new_coding_system, 906 XSETCODING_SYSTEM (new_coding_system,
911 allocate_coding_system 907 allocate_coding_system
912 (XCODING_SYSTEM_TYPE (old_coding_system), 908 (XCODING_SYSTEM_TYPE (old_coding_system),
913 new_name)); 909 new_name));
914 Fputhash (new_name, new_coding_system, Vcoding_system_hashtable); 910 Fputhash (new_name, new_coding_system, Vcoding_system_hash_table);
915 } 911 }
916 912
917 { 913 {
918 struct Lisp_Coding_System *to = XCODING_SYSTEM (new_coding_system); 914 struct Lisp_Coding_System *to = XCODING_SYSTEM (new_coding_system);
919 struct Lisp_Coding_System *from = XCODING_SYSTEM (old_coding_system); 915 struct Lisp_Coding_System *from = XCODING_SYSTEM (old_coding_system);
976 */ 972 */
977 (coding_system)) 973 (coding_system))
978 { 974 {
979 switch (XCODING_SYSTEM_TYPE (Fget_coding_system (coding_system))) 975 switch (XCODING_SYSTEM_TYPE (Fget_coding_system (coding_system)))
980 { 976 {
977 default: abort ();
981 case CODESYS_AUTODETECT: return Qundecided; 978 case CODESYS_AUTODETECT: return Qundecided;
982 #ifdef MULE 979 #ifdef MULE
983 case CODESYS_SHIFT_JIS: return Qshift_jis; 980 case CODESYS_SHIFT_JIS: return Qshift_jis;
984 case CODESYS_ISO2022: return Qiso2022; 981 case CODESYS_ISO2022: return Qiso2022;
985 case CODESYS_BIG5: return Qbig5; 982 case CODESYS_BIG5: return Qbig5;
987 #endif 984 #endif
988 case CODESYS_NO_CONVERSION: return Qno_conversion; 985 case CODESYS_NO_CONVERSION: return Qno_conversion;
989 #ifdef DEBUG_XEMACS 986 #ifdef DEBUG_XEMACS
990 case CODESYS_INTERNAL: return Qinternal; 987 case CODESYS_INTERNAL: return Qinternal;
991 #endif 988 #endif
992 default: 989 }
993 abort ();
994 }
995
996 return Qnil; /* not reached */
997 } 990 }
998 991
999 #ifdef MULE 992 #ifdef MULE
1000 static 993 static
1001 Lisp_Object coding_system_charset (Lisp_Object coding_system, int gnum) 994 Lisp_Object coding_system_charset (Lisp_Object coding_system, int gnum)
1744 /* We do not need to mark the coding systems or charsets stored 1737 /* We do not need to mark the coding systems or charsets stored
1745 within the stream because they are stored in a global list 1738 within the stream because they are stored in a global list
1746 and automatically marked. */ 1739 and automatically marked. */
1747 1740
1748 XSETLSTREAM (str_obj, str); 1741 XSETLSTREAM (str_obj, str);
1749 (markobj) (str_obj); 1742 markobj (str_obj);
1750 if (str->imp->marker) 1743 if (str->imp->marker)
1751 return (str->imp->marker) (str_obj, markobj); 1744 return (str->imp->marker) (str_obj, markobj);
1752 else 1745 else
1753 return Qnil; 1746 return Qnil;
1754 } 1747 }
2190 /* We do not need to mark the coding systems or charsets stored 2183 /* We do not need to mark the coding systems or charsets stored
2191 within the stream because they are stored in a global list 2184 within the stream because they are stored in a global list
2192 and automatically marked. */ 2185 and automatically marked. */
2193 2186
2194 XSETLSTREAM (str_obj, str); 2187 XSETLSTREAM (str_obj, str);
2195 (markobj) (str_obj); 2188 markobj (str_obj);
2196 if (str->imp->marker) 2189 if (str->imp->marker)
2197 return (str->imp->marker) (str_obj, markobj); 2190 return (str->imp->marker) (str_obj, markobj);
2198 else 2191 else
2199 return Qnil; 2192 return Qnil;
2200 } 2193 }
2746 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE 2739 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE
2747 -------------------------- 2740 --------------------------
2748 2741
2749 Since the number of characters in Big5 is larger than maximum 2742 Since the number of characters in Big5 is larger than maximum
2750 characters in Emacs' charset (96x96), it can't be handled as one 2743 characters in Emacs' charset (96x96), it can't be handled as one
2751 charset. So, in Emacs, Big5 is devided into two: `charset-big5-1' 2744 charset. So, in Emacs, Big5 is divided into two: `charset-big5-1'
2752 and `charset-big5-2'. Both <type>s are TYPE94x94. The former 2745 and `charset-big5-2'. Both <type>s are TYPE94x94. The former
2753 contains frequently used characters and the latter contains less 2746 contains frequently used characters and the latter contains less
2754 frequently used characters. */ 2747 frequently used characters. */
2755 2748
2756 #define BYTE_BIG5_TWO_BYTE_1_P(c) \ 2749 #define BYTE_BIG5_TWO_BYTE_1_P(c) \
4482 static Extbyte_dynarr *conversion_out_dynarr; 4475 static Extbyte_dynarr *conversion_out_dynarr;
4483 static Bufbyte_dynarr *conversion_in_dynarr; 4476 static Bufbyte_dynarr *conversion_in_dynarr;
4484 4477
4485 /* Determine coding system from coding format */ 4478 /* Determine coding system from coding format */
4486 4479
4487 #define FILE_NAME_CODING_SYSTEM \
4488 ((NILP (Vfile_name_coding_system) || \
4489 (EQ ((Vfile_name_coding_system), Qbinary))) ? \
4490 Qnil : Fget_coding_system (Vfile_name_coding_system))
4491
4492 /* #### not correct for all values of `fmt'! */ 4480 /* #### not correct for all values of `fmt'! */
4481 static Lisp_Object
4482 external_data_format_to_coding_system (enum external_data_format fmt)
4483 {
4484 switch (fmt)
4485 {
4486 case FORMAT_FILENAME:
4487 case FORMAT_TERMINAL:
4488 if (EQ (Vfile_name_coding_system, Qnil) ||
4489 EQ (Vfile_name_coding_system, Qbinary))
4490 return Qnil;
4491 else
4492 return Fget_coding_system (Vfile_name_coding_system);
4493 #ifdef MULE 4493 #ifdef MULE
4494 #define FMT_CODING_SYSTEM(fmt) \ 4494 case FORMAT_CTEXT:
4495 (((fmt) == FORMAT_FILENAME) ? FILE_NAME_CODING_SYSTEM : \ 4495 return Fget_coding_system (Qctext);
4496 ((fmt) == FORMAT_CTEXT ) ? Fget_coding_system (Qctext) : \
4497 ((fmt) == FORMAT_TERMINAL) ? FILE_NAME_CODING_SYSTEM : \
4498 Qnil)
4499 #else
4500 #define FMT_CODING_SYSTEM(fmt) \
4501 (((fmt) == FORMAT_FILENAME) ? FILE_NAME_CODING_SYSTEM : \
4502 ((fmt) == FORMAT_TERMINAL) ? FILE_NAME_CODING_SYSTEM : \
4503 Qnil)
4504 #endif 4496 #endif
4497 default:
4498 return Qnil;
4499 }
4500 }
4505 4501
4506 Extbyte * 4502 Extbyte *
4507 convert_to_external_format (CONST Bufbyte *ptr, 4503 convert_to_external_format (CONST Bufbyte *ptr,
4508 Bytecount len, 4504 Bytecount len,
4509 Extcount *len_out, 4505 Extcount *len_out,
4510 enum external_data_format fmt) 4506 enum external_data_format fmt)
4511 { 4507 {
4512 Lisp_Object coding_system = FMT_CODING_SYSTEM (fmt); 4508 Lisp_Object coding_system = external_data_format_to_coding_system (fmt);
4513 4509
4514 if (!conversion_out_dynarr) 4510 if (!conversion_out_dynarr)
4515 conversion_out_dynarr = Dynarr_new (Extbyte); 4511 conversion_out_dynarr = Dynarr_new (Extbyte);
4516 else 4512 else
4517 Dynarr_reset (conversion_out_dynarr); 4513 Dynarr_reset (conversion_out_dynarr);
4575 convert_from_external_format (CONST Extbyte *ptr, 4571 convert_from_external_format (CONST Extbyte *ptr,
4576 Extcount len, 4572 Extcount len,
4577 Bytecount *len_out, 4573 Bytecount *len_out,
4578 enum external_data_format fmt) 4574 enum external_data_format fmt)
4579 { 4575 {
4580 Lisp_Object coding_system = FMT_CODING_SYSTEM (fmt); 4576 Lisp_Object coding_system = external_data_format_to_coding_system (fmt);
4581 4577
4582 if (!conversion_in_dynarr) 4578 if (!conversion_in_dynarr)
4583 conversion_in_dynarr = Dynarr_new (Bufbyte); 4579 conversion_in_dynarr = Dynarr_new (Bufbyte);
4584 else 4580 else
4585 Dynarr_reset (conversion_in_dynarr); 4581 Dynarr_reset (conversion_in_dynarr);
4817 } 4813 }
4818 4814
4819 void 4815 void
4820 complex_vars_of_mule_coding (void) 4816 complex_vars_of_mule_coding (void)
4821 { 4817 {
4822 staticpro (&Vcoding_system_hashtable); 4818 staticpro (&Vcoding_system_hash_table);
4823 Vcoding_system_hashtable = make_lisp_hashtable (50, HASHTABLE_NONWEAK, 4819 Vcoding_system_hash_table =
4824 HASHTABLE_EQ); 4820 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
4825 4821
4826 the_codesys_prop_dynarr = Dynarr_new (codesys_prop); 4822 the_codesys_prop_dynarr = Dynarr_new (codesys_prop);
4827 4823
4828 #define DEFINE_CODESYS_PROP(Prop_Type, Sym) do \ 4824 #define DEFINE_CODESYS_PROP(Prop_Type, Sym) do \
4829 { \ 4825 { \