Mercurial > hg > xemacs-beta
comparison src/file-coding.c @ 5118:e0db3c197671 ben-lisp-object
merge up to latest default branch, doesn't compile yet
author | Ben Wing <ben@xemacs.org> |
---|---|
date | Sat, 26 Dec 2009 21:18:49 -0600 |
parents | 3742ea8250b5 257b468bf2ca |
children | d1247f3cc363 |
comparison
equal
deleted
inserted
replaced
5117:3742ea8250b5 | 5118:e0db3c197671 |
---|---|
76 #include "elhash.h" | 76 #include "elhash.h" |
77 #include "insdel.h" | 77 #include "insdel.h" |
78 #include "lstream.h" | 78 #include "lstream.h" |
79 #include "opaque.h" | 79 #include "opaque.h" |
80 #include "file-coding.h" | 80 #include "file-coding.h" |
81 #include "extents.h" | |
82 #include "rangetab.h" | |
83 #include "chartab.h" | |
81 | 84 |
82 #ifdef HAVE_ZLIB | 85 #ifdef HAVE_ZLIB |
83 #include "zlib.h" | 86 #include "zlib.h" |
84 #endif | 87 #endif |
85 | 88 |
87 Lisp_Object Vterminal_coding_system; | 90 Lisp_Object Vterminal_coding_system; |
88 Lisp_Object Vcoding_system_for_read; | 91 Lisp_Object Vcoding_system_for_read; |
89 Lisp_Object Vcoding_system_for_write; | 92 Lisp_Object Vcoding_system_for_write; |
90 Lisp_Object Vfile_name_coding_system; | 93 Lisp_Object Vfile_name_coding_system; |
91 | 94 |
95 Lisp_Object Qaliases, Qcharset_skip_chars_string; | |
96 | |
92 #ifdef DEBUG_XEMACS | 97 #ifdef DEBUG_XEMACS |
93 Lisp_Object Vdebug_coding_detection; | 98 Lisp_Object Vdebug_coding_detection; |
99 #endif | |
100 | |
101 #ifdef MULE | |
102 extern Lisp_Object Vcharset_ascii, Vcharset_control_1, | |
103 Vcharset_latin_iso8859_1; | |
94 #endif | 104 #endif |
95 | 105 |
96 typedef struct coding_system_type_entry | 106 typedef struct coding_system_type_entry |
97 { | 107 { |
98 struct coding_system_methods *meths; | 108 struct coding_system_methods *meths; |
226 Lisp_Object Qdo_eol, Qdo_coding; | 236 Lisp_Object Qdo_eol, Qdo_coding; |
227 | 237 |
228 Lisp_Object Qcanonicalize_after_coding; | 238 Lisp_Object Qcanonicalize_after_coding; |
229 | 239 |
230 Lisp_Object QScoding_system_cookie; | 240 Lisp_Object QScoding_system_cookie; |
241 | |
242 Lisp_Object Qposix_charset_to_coding_system_hash; | |
231 | 243 |
232 /* This is used to convert autodetected coding systems into existing | 244 /* This is used to convert autodetected coding systems into existing |
233 systems. For example, the chain undecided->convert-eol-autodetect may | 245 systems. For example, the chain undecided->convert-eol-autodetect may |
234 have its separate parts detected as mswindows-multibyte and | 246 have its separate parts detected as mswindows-multibyte and |
235 convert-eol-crlf, and the result needs to be mapped to | 247 convert-eol-crlf, and the result needs to be mapped to |
303 write_fmt_string_lisp (printcharfun, "%s[", 1, XCODING_SYSTEM_NAME (cs)); | 315 write_fmt_string_lisp (printcharfun, "%s[", 1, XCODING_SYSTEM_NAME (cs)); |
304 print_coding_system_properties (cs, printcharfun); | 316 print_coding_system_properties (cs, printcharfun); |
305 write_c_string (printcharfun, "]"); | 317 write_c_string (printcharfun, "]"); |
306 } | 318 } |
307 | 319 |
320 #ifndef NEW_GC | |
308 static void | 321 static void |
309 finalize_coding_system (void *header, int for_disksave) | 322 finalize_coding_system (void *header, int for_disksave) |
310 { | 323 { |
311 Lisp_Object cs = wrap_coding_system ((Lisp_Coding_System *) header); | 324 Lisp_Object cs = wrap_coding_system ((Lisp_Coding_System *) header); |
312 /* Since coding systems never go away, this function is not | 325 /* Since coding systems never go away, this function is not |
313 necessary. But it would be necessary if we changed things | 326 necessary. But it would be necessary if we changed things |
314 so that coding systems could go away. */ | 327 so that coding systems could go away. */ |
315 if (!for_disksave) /* see comment in lstream.c */ | 328 if (!for_disksave) /* see comment in lstream.c */ |
316 MAYBE_XCODESYSMETH (cs, finalize, (cs)); | 329 MAYBE_XCODESYSMETH (cs, finalize, (cs)); |
317 } | 330 } |
331 #endif /* not NEW_GC */ | |
318 | 332 |
319 static Bytecount | 333 static Bytecount |
320 sizeof_coding_system (const void *header) | 334 sizeof_coding_system (const void *header) |
321 { | 335 { |
322 const Lisp_Coding_System *p = (const Lisp_Coding_System *) header; | 336 const Lisp_Coding_System *p = (const Lisp_Coding_System *) header; |
364 | 378 |
365 const struct sized_memory_description coding_system_empty_extra_description = { | 379 const struct sized_memory_description coding_system_empty_extra_description = { |
366 0, coding_system_empty_extra_description_1 | 380 0, coding_system_empty_extra_description_1 |
367 }; | 381 }; |
368 | 382 |
369 DEFINE_SIZABLE_LISP_OBJECT ("coding-system", coding_system, | 383 #ifdef NEW_GC |
370 mark_coding_system, | 384 DEFINE_DUMPABLE_SIZABLE_LISP_OBJECT ("coding-system", coding_system, |
371 print_coding_system, | 385 mark_coding_system, |
372 finalize_coding_system, | 386 print_coding_system, |
373 0, 0, coding_system_description, | 387 0, 0, 0, coding_system_description, |
374 sizeof_coding_system, | 388 sizeof_coding_system, |
375 Lisp_Coding_System); | 389 Lisp_Coding_System); |
390 #else /* not NEW_GC */ | |
391 DEFINE_DUMPABLE_SIZABLE_LISP_OBJECT ("coding-system", coding_system, | |
392 mark_coding_system, | |
393 print_coding_system, | |
394 finalize_coding_system, | |
395 0, 0, coding_system_description, | |
396 sizeof_coding_system, | |
397 Lisp_Coding_System); | |
398 #endif /* not NEW_GC */ | |
376 | 399 |
377 /************************************************************************/ | 400 /************************************************************************/ |
378 /* Creating coding systems */ | 401 /* Creating coding systems */ |
379 /************************************************************************/ | 402 /************************************************************************/ |
380 | 403 |
399 static int | 422 static int |
400 valid_coding_system_type_p (Lisp_Object type) | 423 valid_coding_system_type_p (Lisp_Object type) |
401 { | 424 { |
402 return decode_coding_system_type (type, ERROR_ME_NOT) != 0; | 425 return decode_coding_system_type (type, ERROR_ME_NOT) != 0; |
403 } | 426 } |
427 | |
428 #ifdef MULE | |
429 static Lisp_Object Vdefault_query_coding_region_chartab_cache; | |
430 | |
431 /* Non-static because it's used in INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA. */ | |
432 Lisp_Object | |
433 default_query_method (Lisp_Object codesys, struct buffer *buf, | |
434 Charbpos end, int flags) | |
435 { | |
436 Charbpos pos = BUF_PT (buf), fail_range_start, fail_range_end; | |
437 Charbpos pos_byte = BYTE_BUF_PT (buf); | |
438 Lisp_Object safe_charsets = XCODING_SYSTEM_SAFE_CHARSETS (codesys); | |
439 Lisp_Object safe_chars = XCODING_SYSTEM_SAFE_CHARS (codesys), | |
440 result = Qnil; | |
441 enum query_coding_failure_reasons failed_reason, | |
442 previous_failed_reason = query_coding_succeeded; | |
443 | |
444 /* safe-charsets of t means the coding system can encode everything. */ | |
445 if (EQ (Qnil, safe_chars)) | |
446 { | |
447 if (EQ (Qt, safe_charsets)) | |
448 { | |
449 return Qnil; | |
450 } | |
451 | |
452 /* If we've no information on what characters the coding system can | |
453 encode, give up. */ | |
454 if (EQ (Qnil, safe_charsets) && EQ (Qnil, safe_chars)) | |
455 { | |
456 return Qunbound; | |
457 } | |
458 | |
459 safe_chars = Fgethash (safe_charsets, | |
460 Vdefault_query_coding_region_chartab_cache, | |
461 Qnil); | |
462 if (NILP (safe_chars)) | |
463 { | |
464 safe_chars = Fmake_char_table (Qgeneric); | |
465 { | |
466 EXTERNAL_LIST_LOOP_2 (safe_charset, safe_charsets) | |
467 Fput_char_table (safe_charset, Qt, safe_chars); | |
468 } | |
469 | |
470 Fputhash (safe_charsets, safe_chars, | |
471 Vdefault_query_coding_region_chartab_cache); | |
472 } | |
473 } | |
474 | |
475 if (flags & QUERY_METHOD_HIGHLIGHT && | |
476 /* If we're being called really early, live without highlights getting | |
477 cleared properly: */ | |
478 !(UNBOUNDP (XSYMBOL (Qquery_coding_clear_highlights)->function))) | |
479 { | |
480 /* It's okay to call Lisp here, the only non-stack object we may have | |
481 allocated up to this point is safe_chars, and that's | |
482 reachable from its entry in | |
483 Vdefault_query_coding_region_chartab_cache */ | |
484 call3 (Qquery_coding_clear_highlights, make_int (pos), make_int (end), | |
485 wrap_buffer (buf)); | |
486 } | |
487 | |
488 while (pos < end) | |
489 { | |
490 Ichar ch = BYTE_BUF_FETCH_CHAR (buf, pos_byte); | |
491 if (!EQ (Qnil, get_char_table (ch, safe_chars))) | |
492 { | |
493 pos++; | |
494 INC_BYTEBPOS (buf, pos_byte); | |
495 } | |
496 else | |
497 { | |
498 fail_range_start = pos; | |
499 while ((pos < end) && | |
500 (EQ (Qnil, get_char_table (ch, safe_chars)) | |
501 && (failed_reason = query_coding_unencodable)) | |
502 && (previous_failed_reason == query_coding_succeeded | |
503 || previous_failed_reason == failed_reason)) | |
504 { | |
505 pos++; | |
506 INC_BYTEBPOS (buf, pos_byte); | |
507 ch = BYTE_BUF_FETCH_CHAR (buf, pos_byte); | |
508 previous_failed_reason = failed_reason; | |
509 } | |
510 | |
511 if (fail_range_start == pos) | |
512 { | |
513 /* The character can actually be encoded; move on. */ | |
514 pos++; | |
515 INC_BYTEBPOS (buf, pos_byte); | |
516 } | |
517 else | |
518 { | |
519 assert (previous_failed_reason == query_coding_unencodable); | |
520 | |
521 if (flags & QUERY_METHOD_ERRORP) | |
522 { | |
523 DECLARE_EISTRING (error_details); | |
524 | |
525 eicpy_ascii (error_details, "Cannot encode "); | |
526 eicat_lstr (error_details, | |
527 make_string_from_buffer (buf, fail_range_start, | |
528 pos - | |
529 fail_range_start)); | |
530 eicat_ascii (error_details, " using coding system"); | |
531 | |
532 signal_error (Qtext_conversion_error, | |
533 (const CIbyte *)(eidata (error_details)), | |
534 XCODING_SYSTEM_NAME (codesys)); | |
535 } | |
536 | |
537 if (NILP (result)) | |
538 { | |
539 result = Fmake_range_table (Qstart_closed_end_open); | |
540 } | |
541 | |
542 fail_range_end = pos; | |
543 | |
544 Fput_range_table (make_int (fail_range_start), | |
545 make_int (fail_range_end), | |
546 Qunencodable, | |
547 result); | |
548 previous_failed_reason = query_coding_succeeded; | |
549 | |
550 if (flags & QUERY_METHOD_HIGHLIGHT) | |
551 { | |
552 Lisp_Object extent | |
553 = Fmake_extent (make_int (fail_range_start), | |
554 make_int (fail_range_end), | |
555 wrap_buffer (buf)); | |
556 | |
557 Fset_extent_priority | |
558 (extent, make_int (2 + mouse_highlight_priority)); | |
559 Fset_extent_face (extent, Qquery_coding_warning_face); | |
560 } | |
561 } | |
562 } | |
563 } | |
564 | |
565 return result; | |
566 } | |
567 #else | |
568 Lisp_Object | |
569 default_query_method (Lisp_Object UNUSED (codesys), | |
570 struct buffer * UNUSED (buf), | |
571 Charbpos UNUSED (end), int UNUSED (flags)) | |
572 { | |
573 return Qnil; | |
574 } | |
575 #endif /* defined MULE */ | |
404 | 576 |
405 DEFUN ("valid-coding-system-type-p", Fvalid_coding_system_type_p, 1, 1, 0, /* | 577 DEFUN ("valid-coding-system-type-p", Fvalid_coding_system_type_p, 1, 1, 0, /* |
406 Given a CODING-SYSTEM-TYPE, return non-nil if it is valid. | 578 Given a CODING-SYSTEM-TYPE, return non-nil if it is valid. |
407 Valid types depend on how XEmacs was compiled but may include | 579 Valid types depend on how XEmacs was compiled but may include |
408 `undecided', `chain', `integer', `ccl', `iso2022', `big5', `shift-jis', | 580 `undecided', `chain', `integer', `ccl', `iso2022', `big5', `shift-jis', |
454 (object)) | 626 (object)) |
455 { | 627 { |
456 return CODING_SYSTEMP (object) ? Qt : Qnil; | 628 return CODING_SYSTEMP (object) ? Qt : Qnil; |
457 } | 629 } |
458 | 630 |
631 static Lisp_Object | |
632 find_coding_system (Lisp_Object coding_system_or_name, | |
633 int do_autoloads) | |
634 { | |
635 Lisp_Object lookup; | |
636 | |
637 if (NILP (coding_system_or_name)) | |
638 coding_system_or_name = Qbinary; | |
639 else if (CODING_SYSTEMP (coding_system_or_name)) | |
640 return coding_system_or_name; | |
641 else | |
642 CHECK_SYMBOL (coding_system_or_name); | |
643 | |
644 while (1) | |
645 { | |
646 lookup = | |
647 Fgethash (coding_system_or_name, Vcoding_system_hash_table, Qnil); | |
648 | |
649 if (CONSP (lookup) && do_autoloads) | |
650 { | |
651 struct gcpro gcpro1; | |
652 int length; | |
653 DECLARE_EISTRING (desired_base); | |
654 DECLARE_EISTRING (warning_info); | |
655 | |
656 eicpy_lstr (desired_base, XSYMBOL_NAME (coding_system_or_name)); | |
657 | |
658 /* Work out the name of the base coding system. */ | |
659 length = eilen (desired_base); | |
660 if (length > (int)(sizeof ("-unix") - 1)) | |
661 { | |
662 if (0 == qxestrcmp ((UAscbyte *)"-unix", (eidata (desired_base)) | |
663 + (length - (sizeof ("-unix") - 1)))) | |
664 { | |
665 eidel (desired_base, length - (sizeof ("-unix") - 1), | |
666 -1, 5, 5); | |
667 } | |
668 } | |
669 else if (length > (int)(sizeof ("-dos") - 1)) | |
670 { | |
671 if ((0 == qxestrcmp ((UAscbyte *)"-dos", (eidata (desired_base)) | |
672 + (length - (sizeof ("-dos") - 1)))) || | |
673 (0 == qxestrcmp ((UAscbyte *)"-mac", (eidata (desired_base)) | |
674 + (length - (sizeof ("-mac") - 1))))) | |
675 { | |
676 eidel (desired_base, length - (sizeof ("-dos") - 1), -1, | |
677 4, 4); | |
678 } | |
679 } | |
680 | |
681 coding_system_or_name = intern_int (eidata (desired_base)); | |
682 | |
683 /* Remove this coding system and its subsidiary coding | |
684 systems from the hash, to avoid calling this code recursively. */ | |
685 Fremhash (coding_system_or_name, Vcoding_system_hash_table); | |
686 Fremhash (add_suffix_to_symbol(coding_system_or_name, "-unix"), | |
687 Vcoding_system_hash_table); | |
688 Fremhash (add_suffix_to_symbol(coding_system_or_name, "-dos"), | |
689 Vcoding_system_hash_table); | |
690 Fremhash (add_suffix_to_symbol(coding_system_or_name, "-mac"), | |
691 Vcoding_system_hash_table); | |
692 | |
693 eicpy_ascii (warning_info, "Error autoloading coding system "); | |
694 eicat_lstr (warning_info, XSYMBOL_NAME (coding_system_or_name)); | |
695 | |
696 /* Keep around the form so it doesn't disappear from under | |
697 #'eval's feet. */ | |
698 GCPRO1 (lookup); | |
699 call1_trapping_problems ((const CIbyte *)eidata (warning_info), | |
700 Qeval, lookup, 0); | |
701 UNGCPRO; | |
702 | |
703 lookup = | |
704 Fgethash (coding_system_or_name, Vcoding_system_hash_table, Qnil); | |
705 } | |
706 | |
707 if (CODING_SYSTEMP (lookup) || NILP (lookup)) | |
708 return lookup; | |
709 | |
710 coding_system_or_name = lookup; | |
711 } | |
712 } | |
713 | |
459 DEFUN ("find-coding-system", Ffind_coding_system, 1, 1, 0, /* | 714 DEFUN ("find-coding-system", Ffind_coding_system, 1, 1, 0, /* |
460 Retrieve the coding system of the given name. | 715 Retrieve the coding system of the given name. |
461 | 716 |
462 If CODING-SYSTEM-OR-NAME is a coding-system object, it is simply | 717 If CODING-SYSTEM-OR-NAME is a coding-system object, it is simply |
463 returned. Otherwise, CODING-SYSTEM-OR-NAME should be a symbol. | 718 returned. Otherwise, CODING-SYSTEM-OR-NAME should be a symbol. |
464 If there is no such coding system, nil is returned. Otherwise the | 719 If there is no such coding system, nil is returned. Otherwise the |
465 associated coding system object is returned. | 720 associated coding system object is returned. |
466 */ | 721 */ |
467 (coding_system_or_name)) | 722 (coding_system_or_name)) |
468 { | 723 { |
469 if (NILP (coding_system_or_name)) | 724 return find_coding_system(coding_system_or_name, 1); |
470 coding_system_or_name = Qbinary; | 725 } |
471 else if (CODING_SYSTEMP (coding_system_or_name)) | 726 |
472 return coding_system_or_name; | 727 DEFUN ("autoload-coding-system", Fautoload_coding_system, 2, 2, 0, /* |
473 else | 728 Define SYMBOL as a coding-system that is loaded on demand. |
474 CHECK_SYMBOL (coding_system_or_name); | 729 |
475 | 730 FORM is a form to evaluate to define the coding-system. |
476 while (1) | 731 */ |
477 { | 732 (symbol, form)) |
478 coding_system_or_name = | 733 { |
479 Fgethash (coding_system_or_name, Vcoding_system_hash_table, Qnil); | 734 Lisp_Object lookup; |
480 | 735 |
481 if (CODING_SYSTEMP (coding_system_or_name) | 736 CHECK_SYMBOL (symbol); |
482 || NILP (coding_system_or_name)) | 737 CHECK_CONS (form); |
483 return coding_system_or_name; | 738 |
484 } | 739 lookup = find_coding_system (symbol, 0); |
740 | |
741 if (!NILP (lookup) && | |
742 /* Allow autoloads to be redefined. */ | |
743 !CONSP (lookup)) | |
744 { | |
745 invalid_operation ("Cannot redefine existing coding system", | |
746 symbol); | |
747 } | |
748 | |
749 Fputhash (symbol, form, Vcoding_system_hash_table); | |
750 Fputhash (add_suffix_to_symbol(symbol, "-unix"), form, | |
751 Vcoding_system_hash_table); | |
752 Fputhash (add_suffix_to_symbol(symbol, "-dos"), form, | |
753 Vcoding_system_hash_table); | |
754 Fputhash (add_suffix_to_symbol(symbol, "-mac"), form, | |
755 Vcoding_system_hash_table); | |
756 | |
757 /* Tell the POSIX locale infrastructure about this coding system (though | |
758 unfortunately it'll be too late for the startup locale sniffing. */ | |
759 if (!UNBOUNDP (Qposix_charset_to_coding_system_hash)) | |
760 { | |
761 Lisp_Object val = Fsymbol_value (Qposix_charset_to_coding_system_hash); | |
762 DECLARE_EISTRING (minimal_name); | |
763 Ibyte *full_name; | |
764 int len = XSTRING_LENGTH (XSYMBOL_NAME (symbol)), i; | |
765 | |
766 if (!NILP (val)) | |
767 { | |
768 full_name = XSTRING_DATA (XSYMBOL_NAME (symbol)); | |
769 for (i = 0; i < len; ++i) | |
770 { | |
771 if (full_name[i] >= '0' && full_name[i] <= '9') | |
772 { | |
773 eicat_ch (minimal_name, full_name[i]); | |
774 } | |
775 else if (full_name[i] >= 'a' && full_name[i] <= 'z') | |
776 { | |
777 eicat_ch (minimal_name, full_name[i]); | |
778 } | |
779 else if (full_name[i] >= 'A' && full_name[i] <= 'Z') | |
780 { | |
781 eicat_ch (minimal_name, full_name[i] + | |
782 ('a' - 'A')); | |
783 } | |
784 } | |
785 | |
786 if (eilen (minimal_name)) | |
787 { | |
788 CHECK_HASH_TABLE (val); | |
789 Fputhash (eimake_string(minimal_name), symbol, val); | |
790 } | |
791 } | |
792 } | |
793 | |
794 return Qt; | |
485 } | 795 } |
486 | 796 |
487 DEFUN ("get-coding-system", Fget_coding_system, 1, 1, 0, /* | 797 DEFUN ("get-coding-system", Fget_coding_system, 1, 1, 0, /* |
488 Retrieve the coding system of the given name. | 798 Retrieve the coding system of the given name. |
489 Same as `find-coding-system' except that if there is no such | 799 Same as `find-coding-system' except that if there is no such |
636 int normal; | 946 int normal; |
637 int internal; | 947 int internal; |
638 }; | 948 }; |
639 | 949 |
640 static int | 950 static int |
641 add_coding_system_to_list_mapper (Lisp_Object key, Lisp_Object UNUSED (value), | 951 add_coding_system_to_list_mapper (Lisp_Object key, Lisp_Object value, |
642 void *coding_system_list_closure) | 952 void *coding_system_list_closure) |
643 { | 953 { |
644 /* This function can GC */ | 954 /* This function can GC */ |
645 struct coding_system_list_closure *cscl = | 955 struct coding_system_list_closure *cscl = |
646 (struct coding_system_list_closure *) coding_system_list_closure; | 956 (struct coding_system_list_closure *) coding_system_list_closure; |
647 Lisp_Object *coding_system_list = cscl->coding_system_list; | 957 Lisp_Object *coding_system_list = cscl->coding_system_list; |
648 | 958 |
649 /* We can't just use VALUE because KEY might be an alias, and we need | 959 /* We can't just use VALUE because KEY might be an alias, and we need |
650 the real coding system object. */ | 960 the real coding system object. |
651 if (XCODING_SYSTEM (Ffind_coding_system (key))->internal_p ? | 961 |
652 cscl->internal : cscl->normal) | 962 Autoloaded coding systems have conses for their values, and can't be |
963 internal coding systems, or coding system aliases. */ | |
964 if (CONSP (value) || | |
965 (XCODING_SYSTEM (Ffind_coding_system (key))->internal_p ? | |
966 cscl->internal : cscl->normal)) | |
653 *coding_system_list = Fcons (key, *coding_system_list); | 967 *coding_system_list = Fcons (key, *coding_system_list); |
654 return 0; | 968 return 0; |
655 } | 969 } |
656 | 970 |
657 /* #### should we specify a conventional for "all coding systems"? */ | 971 /* #### should we specify a conventional for "all coding systems"? */ |
737 } | 1051 } |
738 } | 1052 } |
739 | 1053 |
740 struct subsidiary_type | 1054 struct subsidiary_type |
741 { | 1055 { |
742 Ascbyte *extension; | 1056 const Ascbyte *extension; |
743 Ascbyte *mnemonic_ext; | 1057 const Ascbyte *mnemonic_ext; |
744 enum eol_type eol; | 1058 enum eol_type eol; |
745 }; | 1059 }; |
746 | 1060 |
747 static struct subsidiary_type coding_subsidiary_list[] = | 1061 static struct subsidiary_type coding_subsidiary_list[] = |
748 { { "-unix", "", EOL_LF }, | 1062 { { "-unix", "", EOL_LF }, |
784 (decodes byte->char), we need to coerce it to one by the appropriate | 1098 (decodes byte->char), we need to coerce it to one by the appropriate |
785 wrapping in CANONICAL. */ | 1099 wrapping in CANONICAL. */ |
786 | 1100 |
787 for (i = 0; i < countof (coding_subsidiary_list); i++) | 1101 for (i = 0; i < countof (coding_subsidiary_list); i++) |
788 { | 1102 { |
789 Ascbyte *extension = coding_subsidiary_list[i].extension; | 1103 const Ascbyte *extension = coding_subsidiary_list[i].extension; |
790 Ascbyte *mnemonic_ext = coding_subsidiary_list[i].mnemonic_ext; | 1104 const Ascbyte *mnemonic_ext = coding_subsidiary_list[i].mnemonic_ext; |
791 enum eol_type eol = coding_subsidiary_list[i].eol; | 1105 enum eol_type eol = coding_subsidiary_list[i].eol; |
792 | 1106 |
793 qxestrcpy_ascii (codesys_name + len, extension); | 1107 qxestrcpy_ascii (codesys_name + len, extension); |
794 codesys_name_sym = intern_int (codesys_name); | 1108 codesys_name_sym = intern_int (codesys_name); |
795 if (mlen != -1) | 1109 if (mlen != -1) |
822 XCODING_SYSTEM_SUBSIDIARY_PARENT (sub_codesys) = codesys; | 1136 XCODING_SYSTEM_SUBSIDIARY_PARENT (sub_codesys) = codesys; |
823 XCODING_SYSTEM (codesys)->eol[eol] = sub_codesys; | 1137 XCODING_SYSTEM (codesys)->eol[eol] = sub_codesys; |
824 } | 1138 } |
825 } | 1139 } |
826 | 1140 |
1141 DEFUN ("coding-system-canonical-name-p", Fcoding_system_canonical_name_p, | |
1142 1, 1, 0, /* | |
1143 Return t if OBJECT names a coding system, and is not a coding system alias. | |
1144 */ | |
1145 (object)) | |
1146 { | |
1147 return CODING_SYSTEMP (Fgethash (object, Vcoding_system_hash_table, Qnil)) | |
1148 ? Qt : Qnil; | |
1149 } | |
1150 | |
827 /* Basic function to create new coding systems. For `make-coding-system', | 1151 /* Basic function to create new coding systems. For `make-coding-system', |
828 NAME-OR-EXISTING is the NAME argument, PREFIX is null, and TYPE, | 1152 NAME-OR-EXISTING is the NAME argument, PREFIX is null, and TYPE, |
829 DESCRIPTION, and PROPS are the same. All created coding systems are put | 1153 DESCRIPTION, and PROPS are the same. All created coding systems are put |
830 in a hash table indexed by NAME. | 1154 in a hash table indexed by NAME. |
831 | 1155 |
861 crazy crap is based on existing behavior in other Mule versions, | 1185 crazy crap is based on existing behavior in other Mule versions, |
862 including FSF Emacs.) | 1186 including FSF Emacs.) |
863 */ | 1187 */ |
864 | 1188 |
865 static Lisp_Object | 1189 static Lisp_Object |
866 make_coding_system_1 (Lisp_Object name_or_existing, Ascbyte *prefix, | 1190 make_coding_system_1 (Lisp_Object name_or_existing, const Ascbyte *prefix, |
867 Lisp_Object type, Lisp_Object description, | 1191 Lisp_Object type, Lisp_Object description, |
868 Lisp_Object props) | 1192 Lisp_Object props) |
869 { | 1193 { |
870 Lisp_Coding_System *cs; | 1194 Lisp_Coding_System *cs; |
871 int need_to_setup_eol_systems = 1; | 1195 int need_to_setup_eol_systems = 1; |
872 enum eol_type eol_wrapper = EOL_AUTODETECT; | 1196 enum eol_type eol_wrapper = EOL_AUTODETECT; |
873 struct coding_system_methods *meths; | 1197 struct coding_system_methods *meths; |
874 Lisp_Object csobj; | 1198 Lisp_Object csobj; |
875 Lisp_Object defmnem = Qnil; | 1199 Lisp_Object defmnem = Qnil, aliases = Qnil; |
876 | 1200 |
877 if (NILP (type)) | 1201 if (NILP (type)) |
878 type = Qundecided; | 1202 type = Qundecided; |
879 meths = decode_coding_system_type (type, ERROR_ME); | 1203 meths = decode_coding_system_type (type, ERROR_ME); |
880 | 1204 |
905 xfree (newname, Ibyte *); | 1229 xfree (newname, Ibyte *); |
906 } | 1230 } |
907 else | 1231 else |
908 CHECK_SYMBOL (name_or_existing); | 1232 CHECK_SYMBOL (name_or_existing); |
909 | 1233 |
910 if (!NILP (Ffind_coding_system (name_or_existing))) | 1234 /* See is there an entry for name_or_existing in the defined coding system |
1235 hash table. */ | |
1236 csobj = find_coding_system (name_or_existing, 0); | |
1237 /* Error if it's there and not an autoload form. */ | |
1238 if (!NILP (csobj) && !CONSP (csobj)) | |
911 invalid_operation ("Cannot redefine existing coding system", | 1239 invalid_operation ("Cannot redefine existing coding system", |
912 name_or_existing); | 1240 name_or_existing); |
913 | 1241 |
914 cs = allocate_coding_system (meths, meths->extra_data_size, | 1242 cs = allocate_coding_system (meths, meths->extra_data_size, |
915 name_or_existing); | 1243 name_or_existing); |
916 csobj = wrap_coding_system (cs); | 1244 csobj = wrap_coding_system (cs); |
917 | 1245 |
955 | 1283 |
956 else if (EQ (key, Qpost_read_conversion)) | 1284 else if (EQ (key, Qpost_read_conversion)) |
957 CODING_SYSTEM_POST_READ_CONVERSION (cs) = value; | 1285 CODING_SYSTEM_POST_READ_CONVERSION (cs) = value; |
958 else if (EQ (key, Qpre_write_conversion)) | 1286 else if (EQ (key, Qpre_write_conversion)) |
959 CODING_SYSTEM_PRE_WRITE_CONVERSION (cs) = value; | 1287 CODING_SYSTEM_PRE_WRITE_CONVERSION (cs) = value; |
1288 else if (EQ (key, Qaliases)) | |
1289 { | |
1290 EXTERNAL_LIST_LOOP_2 (alias, value) | |
1291 { | |
1292 CHECK_SYMBOL (alias); | |
1293 | |
1294 if (!NILP (Fcoding_system_canonical_name_p (alias))) | |
1295 { | |
1296 invalid_change ("Symbol is the canonical name of a " | |
1297 "coding system and cannot be redefined", | |
1298 alias); | |
1299 } | |
1300 } | |
1301 aliases = value; | |
1302 } | |
960 /* FSF compatibility */ | 1303 /* FSF compatibility */ |
961 else if (EQ (key, Qtranslation_table_for_decode)) | 1304 else if (EQ (key, Qtranslation_table_for_decode)) |
962 ; | 1305 ; |
963 else if (EQ (key, Qtranslation_table_for_encode)) | 1306 else if (EQ (key, Qtranslation_table_for_encode)) |
964 ; | 1307 ; |
965 else if (EQ (key, Qsafe_chars)) | 1308 else if (EQ (key, Qsafe_chars)) |
966 ; | 1309 { |
1310 CHECK_CHAR_TABLE (value); | |
1311 CODING_SYSTEM_SAFE_CHARS (cs) = value; | |
1312 } | |
967 else if (EQ (key, Qsafe_charsets)) | 1313 else if (EQ (key, Qsafe_charsets)) |
968 ; | 1314 { |
1315 if (!EQ (Qt, value) | |
1316 /* Would be nice to actually do this check, but there are | |
1317 some order conflicts with japanese.el and | |
1318 mule-coding.el */ | |
1319 && 0) | |
1320 { | |
1321 #ifdef MULE | |
1322 EXTERNAL_LIST_LOOP_2 (safe_charset, value) | |
1323 CHECK_CHARSET (Ffind_charset (safe_charset)); | |
1324 #endif | |
1325 } | |
1326 | |
1327 CODING_SYSTEM_SAFE_CHARSETS (cs) = value; | |
1328 } | |
1329 else if (EQ (key, Qcategory)) | |
1330 { | |
1331 Fput (name_or_existing, intern ("coding-system-property"), | |
1332 Fplist_put (Fget (name_or_existing, | |
1333 intern ("coding-system-property"), | |
1334 Qnil), | |
1335 Qcategory, value)); | |
1336 } | |
969 else if (EQ (key, Qmime_charset)) | 1337 else if (EQ (key, Qmime_charset)) |
970 ; | 1338 ; |
971 else if (EQ (key, Qvalid_codes)) | 1339 else if (EQ (key, Qvalid_codes)) |
972 ; | 1340 ; |
973 else | 1341 else |
982 { | 1350 { |
983 XCODING_SYSTEM_CANONICAL (csobj) = | 1351 XCODING_SYSTEM_CANONICAL (csobj) = |
984 CODESYSMETH_OR_GIVEN (cs, canonicalize, (csobj), Qnil); | 1352 CODESYSMETH_OR_GIVEN (cs, canonicalize, (csobj), Qnil); |
985 XCODING_SYSTEM_EOL_TYPE (csobj) = EOL_AUTODETECT; /* for copy-coding-system | 1353 XCODING_SYSTEM_EOL_TYPE (csobj) = EOL_AUTODETECT; /* for copy-coding-system |
986 below */ | 1354 below */ |
1355 | |
1356 Fputhash (name_or_existing, csobj, Vcoding_system_hash_table); | |
987 | 1357 |
988 if (need_to_setup_eol_systems && !cs->internal_p) | 1358 if (need_to_setup_eol_systems && !cs->internal_p) |
989 setup_eol_coding_systems (csobj); | 1359 setup_eol_coding_systems (csobj); |
990 else if (eol_wrapper == EOL_CR || eol_wrapper == EOL_CRLF) | 1360 else if (eol_wrapper == EOL_CR || eol_wrapper == EOL_CRLF) |
991 { | 1361 { |
1020 Qconvert_eol_crlf), | 1390 Qconvert_eol_crlf), |
1021 Qcanonicalize_after_coding, | 1391 Qcanonicalize_after_coding, |
1022 csobj)); | 1392 csobj)); |
1023 } | 1393 } |
1024 XCODING_SYSTEM_EOL_TYPE (csobj) = eol_wrapper; | 1394 XCODING_SYSTEM_EOL_TYPE (csobj) = eol_wrapper; |
1395 | |
1396 { | |
1397 EXTERNAL_LIST_LOOP_2 (alias, aliases) | |
1398 Fdefine_coding_system_alias (alias, csobj); | |
1399 } | |
1025 } | 1400 } |
1026 | |
1027 Fputhash (name_or_existing, csobj, Vcoding_system_hash_table); | |
1028 | 1401 |
1029 return csobj; | 1402 return csobj; |
1030 } | 1403 } |
1031 | 1404 |
1032 Lisp_Object | 1405 Lisp_Object |
1033 make_internal_coding_system (Lisp_Object existing, Ascbyte *prefix, | 1406 make_internal_coding_system (Lisp_Object existing, const Ascbyte *prefix, |
1034 Lisp_Object type, Lisp_Object description, | 1407 Lisp_Object type, Lisp_Object description, |
1035 Lisp_Object props) | 1408 Lisp_Object props) |
1036 { | 1409 { |
1037 return make_coding_system_1 (existing, prefix, type, description, props); | 1410 return make_coding_system_1 (existing, prefix, type, description, props); |
1038 } | 1411 } |
1039 | 1412 |
1040 DEFUN ("make-coding-system", Fmake_coding_system, 2, 4, 0, /* | 1413 DEFUN ("make-coding-system-internal", Fmake_coding_system_internal, 2, 4, 0, /* |
1041 Register symbol NAME as a coding system. | 1414 See `make-coding-system'. This does much of the work of that function. |
1042 | 1415 |
1043 TYPE describes the conversion method used and should be one of | 1416 Without Mule support, it does all the work of that function, and an alias |
1044 | 1417 exists, mapping `make-coding-system' to |
1045 nil or `undecided' | 1418 `make-coding-system-internal'. You'll need a non-Mule XEmacs to read the |
1046 Automatic conversion. XEmacs attempts to detect the coding system | 1419 complete docstring. Or you can just read it in make-coding-system.el; |
1047 used in the file. | 1420 something like the following should work: |
1048 `chain' | 1421 |
1049 Chain two or more coding systems together to make a combination coding | 1422 \\[find-function-other-window] find-file RET \\[find-file] mule/make-coding-system.el RET |
1050 system. | |
1051 `no-conversion' | |
1052 No conversion. Use this for binary files and such. On output, | |
1053 graphic characters that are not in ASCII or Latin-1 will be | |
1054 replaced by a ?. (For a no-conversion-encoded buffer, these | |
1055 characters will only be present if you explicitly insert them.) | |
1056 `convert-eol' | |
1057 Convert CRLF sequences or CR to LF. | |
1058 `shift-jis' | |
1059 Shift-JIS (a Japanese encoding commonly used in PC operating systems). | |
1060 `unicode' | |
1061 Any Unicode encoding (UCS-4, UTF-8, UTF-16, etc.). | |
1062 `mswindows-unicode-to-multibyte' | |
1063 (MS Windows only) Converts from Windows Unicode to Windows Multibyte | |
1064 (any code page encoding) upon encoding, and the other way upon decoding. | |
1065 `mswindows-multibyte' | |
1066 Converts to or from Windows Multibyte (any code page encoding). | |
1067 This is resolved into a chain of `mswindows-unicode' and | |
1068 `mswindows-unicode-to-multibyte'. | |
1069 `iso2022' | |
1070 Any ISO2022-compliant encoding. Among other things, this includes | |
1071 JIS (the Japanese encoding commonly used for e-mail), EUC (the | |
1072 standard Unix encoding for Japanese and other languages), and | |
1073 Compound Text (the encoding used in X11). You can specify more | |
1074 specific information about the conversion with the PROPS argument. | |
1075 `big5' | |
1076 Big5 (the encoding commonly used for Mandarin Chinese in Taiwan). | |
1077 `ccl' | |
1078 The conversion is performed using a user-written pseudo-code | |
1079 program. CCL (Code Conversion Language) is the name of this | |
1080 pseudo-code. | |
1081 `gzip' | |
1082 GZIP compression format. | |
1083 `internal' | |
1084 Write out or read in the raw contents of the memory representing | |
1085 the buffer's text. This is primarily useful for debugging | |
1086 purposes, and is only enabled when XEmacs has been compiled with | |
1087 DEBUG_XEMACS defined (via the --debug configure option). | |
1088 WARNING: Reading in a file using `internal' conversion can result | |
1089 in an internal inconsistency in the memory representing a | |
1090 buffer's text, which will produce unpredictable results and may | |
1091 cause XEmacs to crash. Under normal circumstances you should | |
1092 never use `internal' conversion. | |
1093 | |
1094 DESCRIPTION is a short English phrase describing the coding system, | |
1095 suitable for use as a menu item. (See also the `documentation' property | |
1096 below.) | |
1097 | |
1098 PROPS is a property list, describing the specific nature of the | |
1099 character set. Recognized properties are: | |
1100 | |
1101 `mnemonic' | |
1102 String to be displayed in the modeline when this coding system is | |
1103 active. | |
1104 | |
1105 `documentation' | |
1106 Detailed documentation on the coding system. | |
1107 | |
1108 `eol-type' | |
1109 End-of-line conversion to be used. It should be one of | |
1110 | |
1111 nil | |
1112 Automatically detect the end-of-line type (LF, CRLF, | |
1113 or CR). Also generate subsidiary coding systems named | |
1114 `NAME-unix', `NAME-dos', and `NAME-mac', that are | |
1115 identical to this coding system but have an EOL-TYPE | |
1116 value of `lf', `crlf', and `cr', respectively. | |
1117 `lf' | |
1118 The end of a line is marked externally using ASCII LF. | |
1119 Since this is also the way that XEmacs represents an | |
1120 end-of-line internally, specifying this option results | |
1121 in no end-of-line conversion. This is the standard | |
1122 format for Unix text files. | |
1123 `crlf' | |
1124 The end of a line is marked externally using ASCII | |
1125 CRLF. This is the standard format for MS-DOS text | |
1126 files. | |
1127 `cr' | |
1128 The end of a line is marked externally using ASCII CR. | |
1129 This is the standard format for Macintosh text files. | |
1130 t | |
1131 Automatically detect the end-of-line type but do not | |
1132 generate subsidiary coding systems. (This value is | |
1133 converted to nil when stored internally, and | |
1134 `coding-system-property' will return nil.) | |
1135 | |
1136 `post-read-conversion' | |
1137 The value is a function to call after some text is inserted and | |
1138 decoded by the coding system itself and before any functions in | |
1139 `after-change-functions' are called. (#### Not actually true in | |
1140 XEmacs. `after-change-functions' will be called twice if | |
1141 `post-read-conversion' changes something.) The argument of this | |
1142 function is the same as for a function in | |
1143 `after-insert-file-functions', i.e. LENGTH of the text inserted, | |
1144 with point at the head of the text to be decoded. | |
1145 | |
1146 `pre-write-conversion' | |
1147 The value is a function to call after all functions in | |
1148 `write-region-annotate-functions' and `buffer-file-format' are | |
1149 called, and before the text is encoded by the coding system itself. | |
1150 The arguments to this function are the same as those of a function | |
1151 in `write-region-annotate-functions', i.e. FROM and TO, specifying | |
1152 a region of text. | |
1153 | |
1154 | |
1155 | |
1156 The following properties are allowed for FSF compatibility but currently | |
1157 ignored: | |
1158 | |
1159 `translation-table-for-decode' | |
1160 The value is a translation table to be applied on decoding. See | |
1161 the function `make-translation-table' for the format of translation | |
1162 table. This is not applicable to CCL-based coding systems. | |
1163 | |
1164 `translation-table-for-encode' | |
1165 The value is a translation table to be applied on encoding. This is | |
1166 not applicable to CCL-based coding systems. | |
1167 | |
1168 `safe-chars' | |
1169 The value is a char table. If a character has non-nil value in it, | |
1170 the character is safely supported by the coding system. This | |
1171 overrides the specification of safe-charsets. | |
1172 | |
1173 `safe-charsets' | |
1174 The value is a list of charsets safely supported by the coding | |
1175 system. The value t means that all charsets Emacs handles are | |
1176 supported. Even if some charset is not in this list, it doesn't | |
1177 mean that the charset can't be encoded in the coding system; | |
1178 it just means that some other receiver of text encoded | |
1179 in the coding system won't be able to handle that charset. | |
1180 | |
1181 `mime-charset' | |
1182 The value is a symbol of which name is `MIME-charset' parameter of | |
1183 the coding system. | |
1184 | |
1185 `valid-codes' (meaningful only for a coding system based on CCL) | |
1186 The value is a list to indicate valid byte ranges of the encoded | |
1187 file. Each element of the list is an integer or a cons of integer. | |
1188 In the former case, the integer value is a valid byte code. In the | |
1189 latter case, the integers specifies the range of valid byte codes. | |
1190 | |
1191 | |
1192 | |
1193 The following additional property is recognized if TYPE is `convert-eol': | |
1194 | |
1195 `subtype' | |
1196 One of `lf', `crlf', `cr' or nil (for autodetection). When decoding, | |
1197 the corresponding sequence will be converted to LF. When encoding, | |
1198 the opposite happens. This coding system converts characters to | |
1199 characters. | |
1200 | |
1201 | |
1202 | |
1203 The following additional properties are recognized if TYPE is `iso2022': | |
1204 | |
1205 `charset-g0' | |
1206 `charset-g1' | |
1207 `charset-g2' | |
1208 `charset-g3' | |
1209 The character set initially designated to the G0 - G3 registers. | |
1210 The value should be one of | |
1211 | |
1212 -- A charset object (designate that character set) | |
1213 -- nil (do not ever use this register) | |
1214 -- t (no character set is initially designated to | |
1215 the register, but may be later on; this automatically | |
1216 sets the corresponding `force-g*-on-output' property) | |
1217 | |
1218 `force-g0-on-output' | |
1219 `force-g1-on-output' | |
1220 `force-g2-on-output' | |
1221 `force-g2-on-output' | |
1222 If non-nil, send an explicit designation sequence on output before | |
1223 using the specified register. | |
1224 | |
1225 `short' | |
1226 If non-nil, use the short forms "ESC $ @", "ESC $ A", and | |
1227 "ESC $ B" on output in place of the full designation sequences | |
1228 "ESC $ ( @", "ESC $ ( A", and "ESC $ ( B". | |
1229 | |
1230 `no-ascii-eol' | |
1231 If non-nil, don't designate ASCII to G0 at each end of line on output. | |
1232 Setting this to non-nil also suppresses other state-resetting that | |
1233 normally happens at the end of a line. | |
1234 | |
1235 `no-ascii-cntl' | |
1236 If non-nil, don't designate ASCII to G0 before control chars on output. | |
1237 | |
1238 `seven' | |
1239 If non-nil, use 7-bit environment on output. Otherwise, use 8-bit | |
1240 environment. | |
1241 | |
1242 `lock-shift' | |
1243 If non-nil, use locking-shift (SO/SI) instead of single-shift | |
1244 or designation by escape sequence. | |
1245 | |
1246 `no-iso6429' | |
1247 If non-nil, don't use ISO6429's direction specification. | |
1248 | |
1249 `escape-quoted' | |
1250 If non-nil, literal control characters that are the same as | |
1251 the beginning of a recognized ISO2022 or ISO6429 escape sequence | |
1252 (in particular, ESC (0x1B), SO (0x0E), SI (0x0F), SS2 (0x8E), | |
1253 SS3 (0x8F), and CSI (0x9B)) are "quoted" with an escape character | |
1254 so that they can be properly distinguished from an escape sequence. | |
1255 (Note that doing this results in a non-portable encoding.) This | |
1256 encoding flag is used for byte-compiled files. Note that ESC | |
1257 is a good choice for a quoting character because there are no | |
1258 escape sequences whose second byte is a character from the Control-0 | |
1259 or Control-1 character sets; this is explicitly disallowed by the | |
1260 ISO2022 standard. | |
1261 | |
1262 `input-charset-conversion' | |
1263 A list of conversion specifications, specifying conversion of | |
1264 characters in one charset to another when decoding is performed. | |
1265 Each specification is a list of two elements: the source charset, | |
1266 and the destination charset. | |
1267 | |
1268 `output-charset-conversion' | |
1269 A list of conversion specifications, specifying conversion of | |
1270 characters in one charset to another when encoding is performed. | |
1271 The form of each specification is the same as for | |
1272 `input-charset-conversion'. | |
1273 | |
1274 | |
1275 | |
1276 The following additional properties are recognized (and required) | |
1277 if TYPE is `ccl': | |
1278 | |
1279 `decode' | |
1280 CCL program used for decoding (converting to internal format). | |
1281 | |
1282 `encode' | |
1283 CCL program used for encoding (converting to external format). | |
1284 | |
1285 | |
1286 The following additional properties are recognized if TYPE is `chain': | |
1287 | |
1288 `chain' | |
1289 List of coding systems to be chained together, in decoding order. | |
1290 | |
1291 `canonicalize-after-coding' | |
1292 Coding system to be returned by the detector routines in place of | |
1293 this coding system. | |
1294 | |
1295 | |
1296 | |
1297 The following additional properties are recognized if TYPE is `unicode': | |
1298 | |
1299 `type' | |
1300 One of `utf-16', `utf-8', `ucs-4', or `utf-7' (the latter is not | |
1301 yet implemented). `utf-16' is the basic two-byte encoding; | |
1302 `ucs-4' is the four-byte encoding; `utf-8' is an ASCII-compatible | |
1303 variable-width 8-bit encoding; `utf-7' is a 7-bit encoding using | |
1304 only characters that will safely pass through all mail gateways. | |
1305 [[ This should be \"transformation format\". There should also be | |
1306 `ucs-2' (or `bmp' -- no surrogates) and `utf-32' (range checked). ]] | |
1307 | |
1308 `little-endian' | |
1309 If non-nil, `utf-16' and `ucs-4' will write out the groups of two | |
1310 or four bytes little-endian instead of big-endian. This is required, | |
1311 for example, under Windows. | |
1312 | |
1313 `need-bom' | |
1314 If non-nil, a byte order mark (BOM, or Unicode FFFE) should be | |
1315 written out at the beginning of the data. This serves both to | |
1316 identify the endianness of the following data and to mark the | |
1317 data as Unicode (at least, this is how Windows uses it). | |
1318 [[ The correct term is \"signature\", since this technique may also | |
1319 be used with UTF-8. That is the term used in the standard. ]] | |
1320 | |
1321 | |
1322 The following additional properties are recognized if TYPE is | |
1323 `mswindows-multibyte': | |
1324 | |
1325 `code-page' | |
1326 Either a number (specifying a particular code page) or one of the | |
1327 symbols `ansi', `oem', `mac', or `ebcdic', specifying the ANSI, | |
1328 OEM, Macintosh, or EBCDIC code page associated with a particular | |
1329 locale (given by the `locale' property). NOTE: EBCDIC code pages | |
1330 only exist in Windows 2000 and later. | |
1331 | |
1332 `locale' | |
1333 If `code-page' is a symbol, this specifies the locale whose code | |
1334 page of the corresponding type should be used. This should be | |
1335 one of the following: A cons of two strings, (LANGUAGE | |
1336 . SUBLANGUAGE) (see `mswindows-set-current-locale'); a string (a | |
1337 language; SUBLANG_DEFAULT, i.e. the default sublanguage, is | |
1338 used); or one of the symbols `current', `user-default', or | |
1339 `system-default', corresponding to the values of | |
1340 `mswindows-current-locale', `mswindows-user-default-locale', or | |
1341 `mswindows-system-default-locale', respectively. | |
1342 | |
1343 | |
1344 | |
1345 The following additional properties are recognized if TYPE is `undecided': | |
1346 [[ Doesn't GNU use \"detect-*\" for the following two? ]] | |
1347 | |
1348 `do-eol' | |
1349 Do EOL detection. | |
1350 | |
1351 `do-coding' | |
1352 Do encoding detection. | |
1353 | |
1354 `coding-system' | |
1355 If encoding detection is not done, use the specified coding system | |
1356 to do decoding. This is used internally when implementing coding | |
1357 systems with an EOL type that specifies autodetection (the default), | |
1358 so that the detector routines return the proper subsidiary. | |
1359 | |
1360 | |
1361 | |
1362 The following additional property is recognized if TYPE is `gzip': | |
1363 | |
1364 `level' | |
1365 Compression level: 0 through 9, or `default' (currently 6). | |
1366 | 1423 |
1367 */ | 1424 */ |
1368 (name, type, description, props)) | 1425 (name, type, description, props)) |
1369 { | 1426 { |
1370 return make_coding_system_1 (name, 0, type, description, props); | 1427 return make_coding_system_1 (name, 0, type, description, props); |
1380 (old_coding_system, new_name)) | 1437 (old_coding_system, new_name)) |
1381 { | 1438 { |
1382 Lisp_Object new_coding_system; | 1439 Lisp_Object new_coding_system; |
1383 old_coding_system = Fget_coding_system (old_coding_system); | 1440 old_coding_system = Fget_coding_system (old_coding_system); |
1384 new_coding_system = | 1441 new_coding_system = |
1385 UNBOUNDP (new_name) ? Qnil : Ffind_coding_system (new_name); | 1442 UNBOUNDP (new_name) ? Qnil : find_coding_system (new_name, 0); |
1386 if (NILP (new_coding_system)) | 1443 if (NILP (new_coding_system)) |
1387 { | 1444 { |
1388 new_coding_system = | 1445 new_coding_system = |
1389 wrap_coding_system | 1446 wrap_coding_system |
1390 (allocate_coding_system | 1447 (allocate_coding_system |
1404 Lisp_Coding_System *from = XCODING_SYSTEM (old_coding_system); | 1461 Lisp_Coding_System *from = XCODING_SYSTEM (old_coding_system); |
1405 COPY_SIZED_LCRECORD (to, from, sizeof_coding_system (from)); | 1462 COPY_SIZED_LCRECORD (to, from, sizeof_coding_system (from)); |
1406 to->name = new_name; | 1463 to->name = new_name; |
1407 } | 1464 } |
1408 return new_coding_system; | 1465 return new_coding_system; |
1409 } | |
1410 | |
1411 DEFUN ("coding-system-canonical-name-p", Fcoding_system_canonical_name_p, | |
1412 1, 1, 0, /* | |
1413 Return t if OBJECT names a coding system, and is not a coding system alias. | |
1414 */ | |
1415 (object)) | |
1416 { | |
1417 return CODING_SYSTEMP (Fgethash (object, Vcoding_system_hash_table, Qnil)) | |
1418 ? Qt : Qnil; | |
1419 } | 1466 } |
1420 | 1467 |
1421 /* #### Shouldn't this really be a find/get pair? */ | 1468 /* #### Shouldn't this really be a find/get pair? */ |
1422 | 1469 |
1423 DEFUN ("coding-system-alias-p", Fcoding_system_alias_p, 1, 1, 0, /* | 1470 DEFUN ("coding-system-alias-p", Fcoding_system_alias_p, 1, 1, 0, /* |
1698 return XCODING_SYSTEM_EOL_CR (coding_system); | 1745 return XCODING_SYSTEM_EOL_CR (coding_system); |
1699 else if (EQ (prop, Qpost_read_conversion)) | 1746 else if (EQ (prop, Qpost_read_conversion)) |
1700 return XCODING_SYSTEM_POST_READ_CONVERSION (coding_system); | 1747 return XCODING_SYSTEM_POST_READ_CONVERSION (coding_system); |
1701 else if (EQ (prop, Qpre_write_conversion)) | 1748 else if (EQ (prop, Qpre_write_conversion)) |
1702 return XCODING_SYSTEM_PRE_WRITE_CONVERSION (coding_system); | 1749 return XCODING_SYSTEM_PRE_WRITE_CONVERSION (coding_system); |
1750 else if (EQ (prop, Qsafe_charsets)) | |
1751 return XCODING_SYSTEM_SAFE_CHARSETS (coding_system); | |
1752 else if (EQ (prop, Qsafe_chars)) | |
1753 return XCODING_SYSTEM_SAFE_CHARS (coding_system); | |
1703 else | 1754 else |
1704 { | 1755 { |
1705 Lisp_Object value = CODESYSMETH_OR_GIVEN (XCODING_SYSTEM (coding_system), | 1756 Lisp_Object value = CODESYSMETH_OR_GIVEN (XCODING_SYSTEM (coding_system), |
1706 getprop, | 1757 getprop, |
1707 (coding_system, prop), | 1758 (coding_system, prop), |
2272 Lstream_delete (XLSTREAM (lb_outstream)); | 2323 Lstream_delete (XLSTREAM (lb_outstream)); |
2273 return make_int (retlen); | 2324 return make_int (retlen); |
2274 } | 2325 } |
2275 } | 2326 } |
2276 | 2327 |
2277 DEFUN ("decode-coding-region", Fdecode_coding_region, 3, 4, 0, /* | 2328 DEFUN ("decode-coding-region", Fdecode_coding_region, 3, 4, |
2329 "*r\nzDecode from coding system: \ni", /* | |
2278 Decode the text between START and END which is encoded in CODING-SYSTEM. | 2330 Decode the text between START and END which is encoded in CODING-SYSTEM. |
2279 This is useful if you've read in encoded text from a file without decoding | 2331 This is useful if you've read in encoded text from a file without decoding |
2280 it (e.g. you read in a JIS-formatted file but used the `binary' or | 2332 it (e.g. you read in a JIS-formatted file but used the `binary' or |
2281 `no-conversion' coding system, so that it shows up as "^[$B!<!+^[(B"). | 2333 `no-conversion' coding system, so that it shows up as "^[$B!<!+^[(B"). |
2282 Return length of decoded text. | 2334 Return length of decoded text. |
2283 BUFFER defaults to the current buffer if unspecified. | 2335 BUFFER defaults to the current buffer if unspecified, and when interactive. |
2284 */ | 2336 */ |
2285 (start, end, coding_system, buffer)) | 2337 (start, end, coding_system, buffer)) |
2286 { | 2338 { |
2287 return encode_decode_coding_region (start, end, coding_system, buffer, | 2339 return encode_decode_coding_region (start, end, coding_system, buffer, |
2288 CODING_DECODE); | 2340 CODING_DECODE); |
2289 } | 2341 } |
2290 | 2342 |
2291 DEFUN ("encode-coding-region", Fencode_coding_region, 3, 4, 0, /* | 2343 DEFUN ("encode-coding-region", Fencode_coding_region, 3, 4, |
2344 "*r\nzEncode to coding system: \ni", /* | |
2292 Encode the text between START and END using CODING-SYSTEM. | 2345 Encode the text between START and END using CODING-SYSTEM. |
2293 This will, for example, convert Japanese characters into stuff such as | 2346 This will, for example, convert Japanese characters into stuff such as |
2294 "^[$B!<!+^[(B" if you use the JIS encoding. Return length of encoded | 2347 "^[$B!<!+^[(B" if you use the JIS encoding. Return length of encoded text. |
2295 text. BUFFER defaults to the current buffer if unspecified. | 2348 BUFFER defaults to the current buffer if unspecified, and when interactive. |
2296 */ | 2349 */ |
2297 (start, end, coding_system, buffer)) | 2350 (start, end, coding_system, buffer)) |
2298 { | 2351 { |
2299 return encode_decode_coding_region (start, end, coding_system, buffer, | 2352 return encode_decode_coding_region (start, end, coding_system, buffer, |
2300 CODING_ENCODE); | 2353 CODING_ENCODE); |
2301 } | 2354 } |
2355 | |
2356 DEFUN ("query-coding-region", Fquery_coding_region, 3, 7, 0, /* | |
2357 Work out whether CODING-SYSTEM can losslessly encode a region. | |
2358 | |
2359 START and END are the beginning and end of the region to check. | |
2360 CODING-SYSTEM is the coding system to try. | |
2361 | |
2362 Optional argument BUFFER is the buffer to check, and defaults to the current | |
2363 buffer. | |
2364 | |
2365 IGNORE-INVALID-SEQUENCESP, also an optional argument, says to treat XEmacs | |
2366 characters which have an unambiguous encoded representation, despite being | |
2367 undefined in what they represent, as encodable. These chiefly arise with | |
2368 variable-length encodings like UTF-8 and UTF-16, where an invalid sequence | |
2369 is passed through to XEmacs as a sequence of characters with a defined | |
2370 correspondence to the octets on disk, but no non-error semantics; see the | |
2371 `invalid-sequence-coding-system' argument to `set-language-info'. | |
2372 | |
2373 They can also arise with fixed-length encodings like ISO 8859-7, where | |
2374 certain octets on disk have undefined values, and treating them as | |
2375 corresponding to the ISO 8859-1 characters with the same numerical values | |
2376 may lead to data that is not understood by other applications. | |
2377 | |
2378 Optional argument ERRORP says to signal a `text-conversion-error' if some | |
2379 character in the region cannot be encoded, and defaults to nil. | |
2380 | |
2381 Optional argument HIGHLIGHT says to display unencodable characters in the | |
2382 region using `query-coding-warning-face'. It defaults to nil. | |
2383 | |
2384 This function can return multiple values; the intention is that callers use | |
2385 `multiple-value-bind' or the related CL multiple value functions to deal | |
2386 with it. The first result is `t' if the region can be encoded using | |
2387 CODING-SYSTEM, or `nil' if not. If the region cannot be encoded using | |
2388 CODING-SYSTEM, the second result is a range table describing the positions | |
2389 of the unencodable characters. | |
2390 | |
2391 Ranges that describe characters that would be ignored were | |
2392 IGNORE-INVALID-SEQUENCESP non-nil map to the symbol `invalid-sequence'; | |
2393 other ranges map to the symbol `unencodable'. If IGNORE-INVALID-SEQUENCESP | |
2394 is non-nil, all ranges will map to the symbol `unencodable'. See | |
2395 `make-range-table' for more details of range tables. | |
2396 */ | |
2397 (start, end, coding_system, buffer, ignore_invalid_sequencesp, | |
2398 errorp, highlight)) | |
2399 { | |
2400 Charbpos b, e; | |
2401 struct buffer *buf = decode_buffer (buffer, 1); | |
2402 Lisp_Object result; | |
2403 int flags = 0, speccount = specpdl_depth (); | |
2404 | |
2405 coding_system = Fget_coding_system (coding_system); | |
2406 | |
2407 get_buffer_range_char (buf, start, end, &b, &e, 0); | |
2408 | |
2409 if (buf != current_buffer) | |
2410 { | |
2411 record_unwind_protect (save_current_buffer_restore, Fcurrent_buffer ()); | |
2412 set_buffer_internal (buf); | |
2413 } | |
2414 | |
2415 record_unwind_protect (save_excursion_restore, save_excursion_save ()); | |
2416 | |
2417 BUF_SET_PT (buf, b); | |
2418 | |
2419 if (!NILP (ignore_invalid_sequencesp)) | |
2420 { | |
2421 flags |= QUERY_METHOD_IGNORE_INVALID_SEQUENCES; | |
2422 } | |
2423 | |
2424 if (!NILP (errorp)) | |
2425 { | |
2426 flags |= QUERY_METHOD_ERRORP; | |
2427 } | |
2428 | |
2429 if (!NILP (highlight)) | |
2430 { | |
2431 flags |= QUERY_METHOD_HIGHLIGHT; | |
2432 } | |
2433 | |
2434 result = XCODESYSMETH_OR_GIVEN (coding_system, query, | |
2435 (coding_system, buf, e, flags), Qunbound); | |
2436 | |
2437 if (UNBOUNDP (result)) | |
2438 { | |
2439 signal_error (Qtext_conversion_error, | |
2440 "Coding system doesn't say what it can encode", | |
2441 XCODING_SYSTEM_NAME (coding_system)); | |
2442 } | |
2443 | |
2444 result = (NILP (result)) ? Qt : values2 (Qnil, result); | |
2445 | |
2446 return unbind_to_1 (speccount, result); | |
2447 } | |
2448 | |
2302 | 2449 |
2303 | 2450 |
2304 /************************************************************************/ | 2451 /************************************************************************/ |
2305 /* Chain methods */ | 2452 /* Chain methods */ |
2306 /************************************************************************/ | 2453 /************************************************************************/ |
3349 } | 3496 } |
3350 | 3497 |
3351 static void | 3498 static void |
3352 output_bytes_in_ascii_and_hex (const UExtbyte *src, Bytecount n) | 3499 output_bytes_in_ascii_and_hex (const UExtbyte *src, Bytecount n) |
3353 { | 3500 { |
3354 UExtbyte *ascii = alloca_array (UExtbyte, n + 1); | 3501 Extbyte *ascii = alloca_array (Extbyte, n + 1); |
3355 UExtbyte *hex = alloca_array (UExtbyte, 3 * n + 1); | 3502 Extbyte *hex = alloca_array (Extbyte, 3 * n + 1); |
3356 int i; | 3503 int i; |
3504 DECLARE_EISTRING (eistr_ascii); | |
3505 DECLARE_EISTRING (eistr_hex); | |
3357 | 3506 |
3358 for (i = 0; i < n; i++) | 3507 for (i = 0; i < n; i++) |
3359 { | 3508 { |
3360 UExtbyte c = src[i]; | 3509 Extbyte c = src[i]; |
3361 if (c < 0x20) | 3510 if (c < 0x20) |
3362 ascii[i] = '.'; | 3511 ascii[i] = '.'; |
3363 else | 3512 else |
3364 ascii[i] = c; | 3513 ascii[i] = c; |
3365 hex[3 * i] = hex_digit_to_char (c >> 4); | 3514 hex[3 * i] = hex_digit_to_char (c >> 4); |
3366 hex[3 * i + 1] = hex_digit_to_char (c & 0xF); | 3515 hex[3 * i + 1] = hex_digit_to_char (c & 0xF); |
3367 hex[3 * i + 2] = ' '; | 3516 hex[3 * i + 2] = ' '; |
3368 } | 3517 } |
3369 ascii[i] = '\0'; | 3518 ascii[i] = '\0'; |
3370 hex[3 * i - 1] = '\0'; | 3519 hex[3 * i - 1] = '\0'; |
3371 stderr_out ("%s %s", ascii, hex); | 3520 |
3521 eicpy_ext(eistr_hex, hex, Qbinary); | |
3522 eicpy_ext(eistr_ascii, ascii, Qbinary); | |
3523 | |
3524 stderr_out ("%s %s", eidata(eistr_ascii), eidata(eistr_hex)); | |
3372 } | 3525 } |
3373 | 3526 |
3374 #endif /* DEBUG_XEMACS */ | 3527 #endif /* DEBUG_XEMACS */ |
3375 | 3528 |
3376 /* Attempt to determine the encoding of the given text. Before calling | 3529 /* Attempt to determine the encoding of the given text. Before calling |
3496 "Invalid `default-buffer-file-coding-system', set to nil"); | 3649 "Invalid `default-buffer-file-coding-system', set to nil"); |
3497 XBUFFER (Vbuffer_defaults)->buffer_file_coding_system = Qnil; | 3650 XBUFFER (Vbuffer_defaults)->buffer_file_coding_system = Qnil; |
3498 } | 3651 } |
3499 } | 3652 } |
3500 if (NILP (retval)) | 3653 if (NILP (retval)) |
3501 retval = Fget_coding_system (Qraw_text); | 3654 retval = Fget_coding_system (Qbinary); |
3502 return retval; | 3655 return retval; |
3503 } | 3656 } |
3504 else | 3657 else |
3505 { | 3658 { |
3506 int likelihood; | 3659 int likelihood; |
3845 /* #### This is cheesy. What we really ought to do is buffer | 3998 /* #### This is cheesy. What we really ought to do is buffer |
3846 up a certain minimum amount of data so as to get a less | 3999 up a certain minimum amount of data so as to get a less |
3847 random result when doing subprocess detection. */ | 4000 random result when doing subprocess detection. */ |
3848 detect_coding_type (data->st, src, n); | 4001 detect_coding_type (data->st, src, n); |
3849 data->actual = detected_coding_system (data->st); | 4002 data->actual = detected_coding_system (data->st); |
4003 /* kludge to prevent infinite recursion */ | |
4004 if (XCODING_SYSTEM(data->actual)->methods->enumtype == undecided_coding_system) | |
4005 data->actual = Fget_coding_system (Qbinary); | |
3850 } | 4006 } |
3851 } | 4007 } |
3852 /* We need to set the detected coding system if we actually have | 4008 /* We need to set the detected coding system if we actually have |
3853 such a coding system but didn't before. That is the case | 4009 such a coding system but didn't before. That is the case |
3854 either when we just detected it in the previous code or when | 4010 either when we just detected it in the previous code or when |
3892 | 4048 |
3893 if (str->direction == CODING_ENCODE) | 4049 if (str->direction == CODING_ENCODE) |
3894 return str->codesys; | 4050 return str->codesys; |
3895 | 4051 |
3896 if (!data->c.initted) | 4052 if (!data->c.initted) |
3897 return Fget_coding_system (Qundecided); | 4053 return str->codesys; |
3898 | 4054 |
3899 ret = coding_stream_canonicalize_after_coding | 4055 ret = coding_stream_canonicalize_after_coding |
3900 (XLSTREAM (data->c.lstreams[0])); | 4056 (XLSTREAM (data->c.lstreams[0])); |
3901 if (NILP (ret)) | 4057 if (NILP (ret)) |
3902 ret = Fget_coding_system (Qundecided); | 4058 ret = str->codesys; |
3903 if (XCODING_SYSTEM_EOL_TYPE (ret) != EOL_AUTODETECT) | 4059 if (XCODING_SYSTEM_EOL_TYPE (ret) != EOL_AUTODETECT) |
3904 return ret; | 4060 return ret; |
3905 eolret = coding_stream_canonicalize_after_coding | 4061 eolret = coding_stream_canonicalize_after_coding |
3906 (XLSTREAM (data->c.lstreams[1])); | 4062 (XLSTREAM (data->c.lstreams[1])); |
3907 if (!EQ (XCODING_SYSTEM_TYPE (eolret), Qconvert_eol)) | 4063 if (!EQ (XCODING_SYSTEM_TYPE (eolret), Qconvert_eol)) |
4359 INIT_LISP_OBJECT (coding_system); | 4515 INIT_LISP_OBJECT (coding_system); |
4360 | 4516 |
4361 DEFSUBR (Fvalid_coding_system_type_p); | 4517 DEFSUBR (Fvalid_coding_system_type_p); |
4362 DEFSUBR (Fcoding_system_type_list); | 4518 DEFSUBR (Fcoding_system_type_list); |
4363 DEFSUBR (Fcoding_system_p); | 4519 DEFSUBR (Fcoding_system_p); |
4520 DEFSUBR (Fautoload_coding_system); | |
4364 DEFSUBR (Ffind_coding_system); | 4521 DEFSUBR (Ffind_coding_system); |
4365 DEFSUBR (Fget_coding_system); | 4522 DEFSUBR (Fget_coding_system); |
4366 DEFSUBR (Fcoding_system_list); | 4523 DEFSUBR (Fcoding_system_list); |
4367 DEFSUBR (Fcoding_system_name); | 4524 DEFSUBR (Fcoding_system_name); |
4368 DEFSUBR (Fmake_coding_system); | 4525 DEFSUBR (Fmake_coding_system_internal); |
4369 DEFSUBR (Fcopy_coding_system); | 4526 DEFSUBR (Fcopy_coding_system); |
4370 DEFSUBR (Fcoding_system_canonical_name_p); | 4527 DEFSUBR (Fcoding_system_canonical_name_p); |
4371 DEFSUBR (Fcoding_system_alias_p); | 4528 DEFSUBR (Fcoding_system_alias_p); |
4372 DEFSUBR (Fcoding_system_aliasee); | 4529 DEFSUBR (Fcoding_system_aliasee); |
4373 DEFSUBR (Fdefine_coding_system_alias); | 4530 DEFSUBR (Fdefine_coding_system_alias); |
4386 DEFSUBR (Fcoding_category_system); | 4543 DEFSUBR (Fcoding_category_system); |
4387 | 4544 |
4388 DEFSUBR (Fdetect_coding_region); | 4545 DEFSUBR (Fdetect_coding_region); |
4389 DEFSUBR (Fdecode_coding_region); | 4546 DEFSUBR (Fdecode_coding_region); |
4390 DEFSUBR (Fencode_coding_region); | 4547 DEFSUBR (Fencode_coding_region); |
4548 DEFSUBR (Fquery_coding_region); | |
4391 DEFSYMBOL_MULTIWORD_PREDICATE (Qcoding_systemp); | 4549 DEFSYMBOL_MULTIWORD_PREDICATE (Qcoding_systemp); |
4392 DEFSYMBOL (Qno_conversion); | 4550 DEFSYMBOL (Qno_conversion); |
4393 DEFSYMBOL (Qconvert_eol); | 4551 DEFSYMBOL (Qconvert_eol); |
4394 DEFSYMBOL (Qconvert_eol_autodetect); | 4552 DEFSYMBOL (Qconvert_eol_autodetect); |
4395 DEFSYMBOL (Qconvert_eol_lf); | 4553 DEFSYMBOL (Qconvert_eol_lf); |
4430 DEFSYMBOL (Qdo_eol); | 4588 DEFSYMBOL (Qdo_eol); |
4431 DEFSYMBOL (Qdo_coding); | 4589 DEFSYMBOL (Qdo_coding); |
4432 | 4590 |
4433 DEFSYMBOL (Qcanonicalize_after_coding); | 4591 DEFSYMBOL (Qcanonicalize_after_coding); |
4434 | 4592 |
4593 DEFSYMBOL (Qposix_charset_to_coding_system_hash); | |
4594 | |
4435 DEFSYMBOL (Qescape_quoted); | 4595 DEFSYMBOL (Qescape_quoted); |
4596 | |
4597 DEFSYMBOL (Qquery_coding_warning_face); | |
4598 DEFSYMBOL (Qaliases); | |
4599 DEFSYMBOL (Qcharset_skip_chars_string); | |
4436 | 4600 |
4437 #ifdef HAVE_ZLIB | 4601 #ifdef HAVE_ZLIB |
4438 DEFSYMBOL (Qgzip); | 4602 DEFSYMBOL (Qgzip); |
4439 #endif | 4603 #endif |
4440 | 4604 |
4596 | 4760 |
4597 Fprovide (intern ("unix-default-eol-detection")); | 4761 Fprovide (intern ("unix-default-eol-detection")); |
4598 #endif | 4762 #endif |
4599 | 4763 |
4600 DEFVAR_LISP ("keyboard-coding-system", &Vkeyboard_coding_system /* | 4764 DEFVAR_LISP ("keyboard-coding-system", &Vkeyboard_coding_system /* |
4601 Coding system used for TTY keyboard input. | 4765 Default coding system used for TTY and X11 keyboard input. |
4602 Not used under a windowing system. | 4766 Under X11, used only to interpet the character for a key event when that |
4767 event has a KeySym of NoSymbol but does have an associated string keysym, | |
4768 something that's seen with input methods. | |
4769 | |
4770 If you need to set these things to different coding systems, call the | |
4771 function `set-console-tty-coding-system' for the TTY and use this variable | |
4772 for X11. | |
4603 */ ); | 4773 */ ); |
4604 Vkeyboard_coding_system = Qnil; | 4774 Vkeyboard_coding_system = Qnil; |
4605 | 4775 |
4606 DEFVAR_LISP ("terminal-coding-system", &Vterminal_coding_system /* | 4776 DEFVAR_LISP ("terminal-coding-system", &Vterminal_coding_system /* |
4607 Coding system used for TTY display output. | 4777 Coding system used for TTY display output. |
4649 If non-nil, display debug information about detection operations in progress. | 4819 If non-nil, display debug information about detection operations in progress. |
4650 Information is displayed on stderr. | 4820 Information is displayed on stderr. |
4651 */ ); | 4821 */ ); |
4652 Vdebug_coding_detection = Qnil; | 4822 Vdebug_coding_detection = Qnil; |
4653 #endif | 4823 #endif |
4824 | |
4825 #ifdef MULE | |
4826 Vdefault_query_coding_region_chartab_cache | |
4827 = make_lisp_hash_table (25, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL); | |
4828 staticpro (&Vdefault_query_coding_region_chartab_cache); | |
4829 #endif | |
4654 } | 4830 } |
4655 | 4831 |
4656 /* #### reformat this for consistent appearance? */ | 4832 /* #### reformat this for consistent appearance? */ |
4657 | 4833 |
4658 void | 4834 void |
4659 complex_vars_of_file_coding (void) | 4835 complex_vars_of_file_coding (void) |
4660 { | 4836 { |
4661 Fmake_coding_system | 4837 Fmake_coding_system_internal |
4662 (Qconvert_eol_cr, Qconvert_eol, | 4838 (Qconvert_eol_cr, Qconvert_eol, |
4663 build_msg_string ("Convert CR to LF"), | 4839 build_msg_string ("Convert CR to LF"), |
4664 nconc2 (list6 (Qdocumentation, | 4840 nconc2 (list6 (Qdocumentation, |
4665 build_msg_string ( | 4841 build_msg_string ( |
4666 "Converts CR (used to mark the end of a line on Macintosh systems) to LF\n" | 4842 "Converts CR (used to mark the end of a line on Macintosh systems) to LF\n" |
4668 Qmnemonic, build_string ("CR->LF"), | 4844 Qmnemonic, build_string ("CR->LF"), |
4669 Qsubtype, Qcr), | 4845 Qsubtype, Qcr), |
4670 /* VERY IMPORTANT! Tell make-coding-system not to generate | 4846 /* VERY IMPORTANT! Tell make-coding-system not to generate |
4671 subsidiaries -- it needs the coding systems we're creating | 4847 subsidiaries -- it needs the coding systems we're creating |
4672 to do so! */ | 4848 to do so! */ |
4673 list2 (Qeol_type, Qlf))); | 4849 list4 (Qeol_type, Qlf, |
4674 | 4850 Qsafe_charsets, Qt))); |
4675 Fmake_coding_system | 4851 |
4852 Fmake_coding_system_internal | |
4676 (Qconvert_eol_lf, Qconvert_eol, | 4853 (Qconvert_eol_lf, Qconvert_eol, |
4677 build_msg_string ("Convert LF to LF (do nothing)"), | 4854 build_msg_string ("Convert LF to LF (do nothing)"), |
4678 nconc2 (list6 (Qdocumentation, | 4855 nconc2 (list6 (Qdocumentation, |
4679 build_msg_string ( | 4856 build_msg_string ( |
4680 "Do nothing."), | 4857 "Do nothing."), |
4681 Qmnemonic, build_string ("LF->LF"), | 4858 Qmnemonic, build_string ("LF->LF"), |
4682 Qsubtype, Qlf), | 4859 Qsubtype, Qlf), |
4683 /* VERY IMPORTANT! Tell make-coding-system not to generate | 4860 /* VERY IMPORTANT! Tell make-coding-system not to generate |
4684 subsidiaries -- it needs the coding systems we're creating | 4861 subsidiaries -- it needs the coding systems we're creating |
4685 to do so! */ | 4862 to do so! */ |
4686 list2 (Qeol_type, Qlf))); | 4863 list4 (Qeol_type, Qlf, |
4687 | 4864 Qsafe_charsets, Qt))); |
4688 Fmake_coding_system | 4865 |
4866 Fmake_coding_system_internal | |
4689 (Qconvert_eol_crlf, Qconvert_eol, | 4867 (Qconvert_eol_crlf, Qconvert_eol, |
4690 build_msg_string ("Convert CRLF to LF"), | 4868 build_msg_string ("Convert CRLF to LF"), |
4691 nconc2 (list6 (Qdocumentation, | 4869 nconc2 (list6 (Qdocumentation, |
4692 build_msg_string ( | 4870 build_msg_string ( |
4693 "Converts CR+LF (used to mark the end of a line on Macintosh systems) to LF\n" | 4871 "Converts CR+LF (used to mark the end of a line on Macintosh systems) to LF\n" |
4694 "(used internally and under Unix to mark the end of a line)."), | 4872 "(used internally and under Unix to mark the end of a line)."), |
4695 Qmnemonic, build_string ("CRLF->LF"), | 4873 Qmnemonic, build_string ("CRLF->LF"), |
4696 Qsubtype, Qcrlf), | 4874 Qsubtype, Qcrlf), |
4875 | |
4697 /* VERY IMPORTANT! Tell make-coding-system not to generate | 4876 /* VERY IMPORTANT! Tell make-coding-system not to generate |
4698 subsidiaries -- it needs the coding systems we're creating | 4877 subsidiaries -- it needs the coding systems we're creating |
4699 to do so! */ | 4878 to do so! */ |
4700 list2 (Qeol_type, Qlf))); | 4879 list4 (Qeol_type, Qlf, |
4701 | 4880 Qsafe_charsets, Qt))); |
4702 Fmake_coding_system | 4881 |
4882 Fmake_coding_system_internal | |
4703 (Qconvert_eol_autodetect, Qconvert_eol, | 4883 (Qconvert_eol_autodetect, Qconvert_eol, |
4704 build_msg_string ("Autodetect EOL type"), | 4884 build_msg_string ("Autodetect EOL type"), |
4705 nconc2 (list6 (Qdocumentation, | 4885 nconc2 (list6 (Qdocumentation, |
4706 build_msg_string ( | 4886 build_msg_string ( |
4707 "Autodetect the end-of-line type."), | 4887 "Autodetect the end-of-line type."), |
4708 Qmnemonic, build_string ("Auto-EOL"), | 4888 Qmnemonic, build_string ("Auto-EOL"), |
4709 Qsubtype, Qnil), | 4889 Qsubtype, Qnil), |
4710 /* VERY IMPORTANT! Tell make-coding-system not to generate | 4890 /* VERY IMPORTANT! Tell make-coding-system not to generate |
4711 subsidiaries -- it needs the coding systems we're creating | 4891 subsidiaries -- it needs the coding systems we're creating |
4712 to do so! */ | 4892 to do so! */ |
4713 list2 (Qeol_type, Qlf))); | 4893 list4 (Qeol_type, Qlf, |
4714 | 4894 Qsafe_charsets, Qt))); |
4715 Fmake_coding_system | 4895 |
4896 Fmake_coding_system_internal | |
4716 (Qundecided, Qundecided, | 4897 (Qundecided, Qundecided, |
4717 build_msg_string ("Undecided (auto-detect)"), | 4898 build_msg_string ("Undecided (auto-detect)"), |
4718 nconc2 (list4 (Qdocumentation, | 4899 nconc2 (list4 (Qdocumentation, |
4719 build_msg_string | 4900 build_msg_string |
4720 ("Automatically detects the correct encoding."), | 4901 ("Automatically detects the correct encoding."), |
4723 /* We do EOL detection ourselves so we don't need to be | 4904 /* We do EOL detection ourselves so we don't need to be |
4724 wrapped in an EOL detector. (It doesn't actually hurt, | 4905 wrapped in an EOL detector. (It doesn't actually hurt, |
4725 though, I don't think.) */ | 4906 though, I don't think.) */ |
4726 Qeol_type, Qlf))); | 4907 Qeol_type, Qlf))); |
4727 | 4908 |
4728 Fmake_coding_system | 4909 Fmake_coding_system_internal |
4729 (intern ("undecided-dos"), Qundecided, | 4910 (intern ("undecided-dos"), Qundecided, |
4730 build_msg_string ("Undecided (auto-detect) (CRLF)"), | 4911 build_msg_string ("Undecided (auto-detect) (CRLF)"), |
4731 nconc2 (list4 (Qdocumentation, | 4912 nconc2 (list4 (Qdocumentation, |
4732 build_msg_string | 4913 build_msg_string |
4733 ("Automatically detects the correct encoding; EOL type of CRLF forced."), | 4914 ("Automatically detects the correct encoding; EOL type of CRLF forced."), |
4734 Qmnemonic, build_string ("Auto")), | 4915 Qmnemonic, build_string ("Auto")), |
4735 list4 (Qdo_coding, Qt, | 4916 list4 (Qdo_coding, Qt, |
4736 Qeol_type, Qcrlf))); | 4917 Qeol_type, Qcrlf))); |
4737 | 4918 |
4738 Fmake_coding_system | 4919 Fmake_coding_system_internal |
4739 (intern ("undecided-unix"), Qundecided, | 4920 (intern ("undecided-unix"), Qundecided, |
4740 build_msg_string ("Undecided (auto-detect) (LF)"), | 4921 build_msg_string ("Undecided (auto-detect) (LF)"), |
4741 nconc2 (list4 (Qdocumentation, | 4922 nconc2 (list4 (Qdocumentation, |
4742 build_msg_string | 4923 build_msg_string |
4743 ("Automatically detects the correct encoding; EOL type of LF forced."), | 4924 ("Automatically detects the correct encoding; EOL type of LF forced."), |
4744 Qmnemonic, build_string ("Auto")), | 4925 Qmnemonic, build_string ("Auto")), |
4745 list4 (Qdo_coding, Qt, | 4926 list4 (Qdo_coding, Qt, |
4746 Qeol_type, Qlf))); | 4927 Qeol_type, Qlf))); |
4747 | 4928 |
4748 Fmake_coding_system | 4929 Fmake_coding_system_internal |
4749 (intern ("undecided-mac"), Qundecided, | 4930 (intern ("undecided-mac"), Qundecided, |
4750 build_msg_string ("Undecided (auto-detect) (CR)"), | 4931 build_msg_string ("Undecided (auto-detect) (CR)"), |
4751 nconc2 (list4 (Qdocumentation, | 4932 nconc2 (list4 (Qdocumentation, |
4752 build_msg_string | 4933 build_msg_string |
4753 ("Automatically detects the correct encoding; EOL type of CR forced."), | 4934 ("Automatically detects the correct encoding; EOL type of CR forced."), |
4754 Qmnemonic, build_string ("Auto")), | 4935 Qmnemonic, build_string ("Auto")), |
4755 list4 (Qdo_coding, Qt, | 4936 list4 (Qdo_coding, Qt, |
4756 Qeol_type, Qcr))); | 4937 Qeol_type, Qcr))); |
4757 | 4938 |
4758 /* Need to create this here or we're really screwed. */ | 4939 /* Need to create this here or we're really screwed. */ |
4759 Fmake_coding_system | 4940 Fmake_coding_system_internal |
4760 (Qraw_text, Qno_conversion, | 4941 (Qraw_text, Qno_conversion, |
4761 build_msg_string ("Raw Text"), | 4942 build_msg_string ("Raw Text"), |
4762 list4 (Qdocumentation, | 4943 nconc2 (list4 (Qdocumentation, |
4763 build_msg_string ("Raw text converts only line-break codes, and acts otherwise like `binary'."), | 4944 build_msg_string ("Raw text converts only line-break " |
4764 Qmnemonic, build_string ("Raw"))); | 4945 "codes, and acts otherwise like " |
4765 | 4946 "`binary'."), |
4766 Fmake_coding_system | 4947 Qmnemonic, build_string ("Raw")), |
4948 #ifdef MULE | |
4949 list2 (Qsafe_charsets, list3 (Vcharset_ascii, Vcharset_control_1, | |
4950 Vcharset_latin_iso8859_1)))); | |
4951 | |
4952 #else | |
4953 Qnil)); | |
4954 #endif | |
4955 | |
4956 Fmake_coding_system_internal | |
4767 (Qbinary, Qno_conversion, | 4957 (Qbinary, Qno_conversion, |
4768 build_msg_string ("Binary"), | 4958 build_msg_string ("Binary"), |
4769 list6 (Qdocumentation, | 4959 nconc2 (list6 (Qdocumentation, |
4770 build_msg_string ( | 4960 build_msg_string ( |
4771 "This coding system is as close as it comes to doing no conversion.\n" | 4961 "This coding system is as close as it comes to doing no conversion.\n" |
4772 "On input, each byte is converted directly into the character\n" | 4962 "On input, each byte is converted directly into the character\n" |
4773 "with the corresponding code -- i.e. from the `ascii', `control-1',\n" | 4963 "with the corresponding code -- i.e. from the `ascii', `control-1',\n" |
4774 "or `latin-1' character sets. On output, these characters are\n" | 4964 "or `latin-1' character sets. On output, these characters are\n" |
4775 "converted back to the corresponding bytes, and other characters\n" | 4965 "converted back to the corresponding bytes, and other characters\n" |
4776 "are converted to the default character, i.e. `~'."), | 4966 "are converted to the default character, i.e. `~'."), |
4777 Qeol_type, Qlf, | 4967 Qeol_type, Qlf, |
4778 Qmnemonic, build_string ("Binary"))); | 4968 Qmnemonic, build_string ("Binary")), |
4969 #ifdef MULE | |
4970 list2 (Qsafe_charsets, list3 (Vcharset_ascii, Vcharset_control_1, | |
4971 Vcharset_latin_iso8859_1)))); | |
4972 | |
4973 #else | |
4974 Qnil)); | |
4975 #endif | |
4779 | 4976 |
4780 /* Formerly aliased to raw-text! Completely bogus and not even the same | 4977 /* Formerly aliased to raw-text! Completely bogus and not even the same |
4781 as FSF Emacs. */ | 4978 as FSF Emacs. */ |
4782 Fdefine_coding_system_alias (Qno_conversion, Qbinary); | 4979 Fdefine_coding_system_alias (Qno_conversion, Qbinary); |
4783 Fdefine_coding_system_alias (intern ("no-conversion-unix"), | 4980 Fdefine_coding_system_alias (intern ("no-conversion-unix"), |