comparison src/file-coding.c @ 5118:e0db3c197671 ben-lisp-object

merge up to latest default branch, doesn't compile yet
author Ben Wing <ben@xemacs.org>
date Sat, 26 Dec 2009 21:18:49 -0600
parents 3742ea8250b5 257b468bf2ca
children d1247f3cc363
comparison
equal deleted inserted replaced
5117:3742ea8250b5 5118:e0db3c197671
76 #include "elhash.h" 76 #include "elhash.h"
77 #include "insdel.h" 77 #include "insdel.h"
78 #include "lstream.h" 78 #include "lstream.h"
79 #include "opaque.h" 79 #include "opaque.h"
80 #include "file-coding.h" 80 #include "file-coding.h"
81 #include "extents.h"
82 #include "rangetab.h"
83 #include "chartab.h"
81 84
82 #ifdef HAVE_ZLIB 85 #ifdef HAVE_ZLIB
83 #include "zlib.h" 86 #include "zlib.h"
84 #endif 87 #endif
85 88
87 Lisp_Object Vterminal_coding_system; 90 Lisp_Object Vterminal_coding_system;
88 Lisp_Object Vcoding_system_for_read; 91 Lisp_Object Vcoding_system_for_read;
89 Lisp_Object Vcoding_system_for_write; 92 Lisp_Object Vcoding_system_for_write;
90 Lisp_Object Vfile_name_coding_system; 93 Lisp_Object Vfile_name_coding_system;
91 94
95 Lisp_Object Qaliases, Qcharset_skip_chars_string;
96
92 #ifdef DEBUG_XEMACS 97 #ifdef DEBUG_XEMACS
93 Lisp_Object Vdebug_coding_detection; 98 Lisp_Object Vdebug_coding_detection;
99 #endif
100
101 #ifdef MULE
102 extern Lisp_Object Vcharset_ascii, Vcharset_control_1,
103 Vcharset_latin_iso8859_1;
94 #endif 104 #endif
95 105
96 typedef struct coding_system_type_entry 106 typedef struct coding_system_type_entry
97 { 107 {
98 struct coding_system_methods *meths; 108 struct coding_system_methods *meths;
226 Lisp_Object Qdo_eol, Qdo_coding; 236 Lisp_Object Qdo_eol, Qdo_coding;
227 237
228 Lisp_Object Qcanonicalize_after_coding; 238 Lisp_Object Qcanonicalize_after_coding;
229 239
230 Lisp_Object QScoding_system_cookie; 240 Lisp_Object QScoding_system_cookie;
241
242 Lisp_Object Qposix_charset_to_coding_system_hash;
231 243
232 /* This is used to convert autodetected coding systems into existing 244 /* This is used to convert autodetected coding systems into existing
233 systems. For example, the chain undecided->convert-eol-autodetect may 245 systems. For example, the chain undecided->convert-eol-autodetect may
234 have its separate parts detected as mswindows-multibyte and 246 have its separate parts detected as mswindows-multibyte and
235 convert-eol-crlf, and the result needs to be mapped to 247 convert-eol-crlf, and the result needs to be mapped to
303 write_fmt_string_lisp (printcharfun, "%s[", 1, XCODING_SYSTEM_NAME (cs)); 315 write_fmt_string_lisp (printcharfun, "%s[", 1, XCODING_SYSTEM_NAME (cs));
304 print_coding_system_properties (cs, printcharfun); 316 print_coding_system_properties (cs, printcharfun);
305 write_c_string (printcharfun, "]"); 317 write_c_string (printcharfun, "]");
306 } 318 }
307 319
320 #ifndef NEW_GC
308 static void 321 static void
309 finalize_coding_system (void *header, int for_disksave) 322 finalize_coding_system (void *header, int for_disksave)
310 { 323 {
311 Lisp_Object cs = wrap_coding_system ((Lisp_Coding_System *) header); 324 Lisp_Object cs = wrap_coding_system ((Lisp_Coding_System *) header);
312 /* Since coding systems never go away, this function is not 325 /* Since coding systems never go away, this function is not
313 necessary. But it would be necessary if we changed things 326 necessary. But it would be necessary if we changed things
314 so that coding systems could go away. */ 327 so that coding systems could go away. */
315 if (!for_disksave) /* see comment in lstream.c */ 328 if (!for_disksave) /* see comment in lstream.c */
316 MAYBE_XCODESYSMETH (cs, finalize, (cs)); 329 MAYBE_XCODESYSMETH (cs, finalize, (cs));
317 } 330 }
331 #endif /* not NEW_GC */
318 332
319 static Bytecount 333 static Bytecount
320 sizeof_coding_system (const void *header) 334 sizeof_coding_system (const void *header)
321 { 335 {
322 const Lisp_Coding_System *p = (const Lisp_Coding_System *) header; 336 const Lisp_Coding_System *p = (const Lisp_Coding_System *) header;
364 378
365 const struct sized_memory_description coding_system_empty_extra_description = { 379 const struct sized_memory_description coding_system_empty_extra_description = {
366 0, coding_system_empty_extra_description_1 380 0, coding_system_empty_extra_description_1
367 }; 381 };
368 382
369 DEFINE_SIZABLE_LISP_OBJECT ("coding-system", coding_system, 383 #ifdef NEW_GC
370 mark_coding_system, 384 DEFINE_DUMPABLE_SIZABLE_LISP_OBJECT ("coding-system", coding_system,
371 print_coding_system, 385 mark_coding_system,
372 finalize_coding_system, 386 print_coding_system,
373 0, 0, coding_system_description, 387 0, 0, 0, coding_system_description,
374 sizeof_coding_system, 388 sizeof_coding_system,
375 Lisp_Coding_System); 389 Lisp_Coding_System);
390 #else /* not NEW_GC */
391 DEFINE_DUMPABLE_SIZABLE_LISP_OBJECT ("coding-system", coding_system,
392 mark_coding_system,
393 print_coding_system,
394 finalize_coding_system,
395 0, 0, coding_system_description,
396 sizeof_coding_system,
397 Lisp_Coding_System);
398 #endif /* not NEW_GC */
376 399
377 /************************************************************************/ 400 /************************************************************************/
378 /* Creating coding systems */ 401 /* Creating coding systems */
379 /************************************************************************/ 402 /************************************************************************/
380 403
399 static int 422 static int
400 valid_coding_system_type_p (Lisp_Object type) 423 valid_coding_system_type_p (Lisp_Object type)
401 { 424 {
402 return decode_coding_system_type (type, ERROR_ME_NOT) != 0; 425 return decode_coding_system_type (type, ERROR_ME_NOT) != 0;
403 } 426 }
427
428 #ifdef MULE
429 static Lisp_Object Vdefault_query_coding_region_chartab_cache;
430
431 /* Non-static because it's used in INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA. */
432 Lisp_Object
433 default_query_method (Lisp_Object codesys, struct buffer *buf,
434 Charbpos end, int flags)
435 {
436 Charbpos pos = BUF_PT (buf), fail_range_start, fail_range_end;
437 Charbpos pos_byte = BYTE_BUF_PT (buf);
438 Lisp_Object safe_charsets = XCODING_SYSTEM_SAFE_CHARSETS (codesys);
439 Lisp_Object safe_chars = XCODING_SYSTEM_SAFE_CHARS (codesys),
440 result = Qnil;
441 enum query_coding_failure_reasons failed_reason,
442 previous_failed_reason = query_coding_succeeded;
443
444 /* safe-charsets of t means the coding system can encode everything. */
445 if (EQ (Qnil, safe_chars))
446 {
447 if (EQ (Qt, safe_charsets))
448 {
449 return Qnil;
450 }
451
452 /* If we've no information on what characters the coding system can
453 encode, give up. */
454 if (EQ (Qnil, safe_charsets) && EQ (Qnil, safe_chars))
455 {
456 return Qunbound;
457 }
458
459 safe_chars = Fgethash (safe_charsets,
460 Vdefault_query_coding_region_chartab_cache,
461 Qnil);
462 if (NILP (safe_chars))
463 {
464 safe_chars = Fmake_char_table (Qgeneric);
465 {
466 EXTERNAL_LIST_LOOP_2 (safe_charset, safe_charsets)
467 Fput_char_table (safe_charset, Qt, safe_chars);
468 }
469
470 Fputhash (safe_charsets, safe_chars,
471 Vdefault_query_coding_region_chartab_cache);
472 }
473 }
474
475 if (flags & QUERY_METHOD_HIGHLIGHT &&
476 /* If we're being called really early, live without highlights getting
477 cleared properly: */
478 !(UNBOUNDP (XSYMBOL (Qquery_coding_clear_highlights)->function)))
479 {
480 /* It's okay to call Lisp here, the only non-stack object we may have
481 allocated up to this point is safe_chars, and that's
482 reachable from its entry in
483 Vdefault_query_coding_region_chartab_cache */
484 call3 (Qquery_coding_clear_highlights, make_int (pos), make_int (end),
485 wrap_buffer (buf));
486 }
487
488 while (pos < end)
489 {
490 Ichar ch = BYTE_BUF_FETCH_CHAR (buf, pos_byte);
491 if (!EQ (Qnil, get_char_table (ch, safe_chars)))
492 {
493 pos++;
494 INC_BYTEBPOS (buf, pos_byte);
495 }
496 else
497 {
498 fail_range_start = pos;
499 while ((pos < end) &&
500 (EQ (Qnil, get_char_table (ch, safe_chars))
501 && (failed_reason = query_coding_unencodable))
502 && (previous_failed_reason == query_coding_succeeded
503 || previous_failed_reason == failed_reason))
504 {
505 pos++;
506 INC_BYTEBPOS (buf, pos_byte);
507 ch = BYTE_BUF_FETCH_CHAR (buf, pos_byte);
508 previous_failed_reason = failed_reason;
509 }
510
511 if (fail_range_start == pos)
512 {
513 /* The character can actually be encoded; move on. */
514 pos++;
515 INC_BYTEBPOS (buf, pos_byte);
516 }
517 else
518 {
519 assert (previous_failed_reason == query_coding_unencodable);
520
521 if (flags & QUERY_METHOD_ERRORP)
522 {
523 DECLARE_EISTRING (error_details);
524
525 eicpy_ascii (error_details, "Cannot encode ");
526 eicat_lstr (error_details,
527 make_string_from_buffer (buf, fail_range_start,
528 pos -
529 fail_range_start));
530 eicat_ascii (error_details, " using coding system");
531
532 signal_error (Qtext_conversion_error,
533 (const CIbyte *)(eidata (error_details)),
534 XCODING_SYSTEM_NAME (codesys));
535 }
536
537 if (NILP (result))
538 {
539 result = Fmake_range_table (Qstart_closed_end_open);
540 }
541
542 fail_range_end = pos;
543
544 Fput_range_table (make_int (fail_range_start),
545 make_int (fail_range_end),
546 Qunencodable,
547 result);
548 previous_failed_reason = query_coding_succeeded;
549
550 if (flags & QUERY_METHOD_HIGHLIGHT)
551 {
552 Lisp_Object extent
553 = Fmake_extent (make_int (fail_range_start),
554 make_int (fail_range_end),
555 wrap_buffer (buf));
556
557 Fset_extent_priority
558 (extent, make_int (2 + mouse_highlight_priority));
559 Fset_extent_face (extent, Qquery_coding_warning_face);
560 }
561 }
562 }
563 }
564
565 return result;
566 }
567 #else
568 Lisp_Object
569 default_query_method (Lisp_Object UNUSED (codesys),
570 struct buffer * UNUSED (buf),
571 Charbpos UNUSED (end), int UNUSED (flags))
572 {
573 return Qnil;
574 }
575 #endif /* defined MULE */
404 576
405 DEFUN ("valid-coding-system-type-p", Fvalid_coding_system_type_p, 1, 1, 0, /* 577 DEFUN ("valid-coding-system-type-p", Fvalid_coding_system_type_p, 1, 1, 0, /*
406 Given a CODING-SYSTEM-TYPE, return non-nil if it is valid. 578 Given a CODING-SYSTEM-TYPE, return non-nil if it is valid.
407 Valid types depend on how XEmacs was compiled but may include 579 Valid types depend on how XEmacs was compiled but may include
408 `undecided', `chain', `integer', `ccl', `iso2022', `big5', `shift-jis', 580 `undecided', `chain', `integer', `ccl', `iso2022', `big5', `shift-jis',
454 (object)) 626 (object))
455 { 627 {
456 return CODING_SYSTEMP (object) ? Qt : Qnil; 628 return CODING_SYSTEMP (object) ? Qt : Qnil;
457 } 629 }
458 630
631 static Lisp_Object
632 find_coding_system (Lisp_Object coding_system_or_name,
633 int do_autoloads)
634 {
635 Lisp_Object lookup;
636
637 if (NILP (coding_system_or_name))
638 coding_system_or_name = Qbinary;
639 else if (CODING_SYSTEMP (coding_system_or_name))
640 return coding_system_or_name;
641 else
642 CHECK_SYMBOL (coding_system_or_name);
643
644 while (1)
645 {
646 lookup =
647 Fgethash (coding_system_or_name, Vcoding_system_hash_table, Qnil);
648
649 if (CONSP (lookup) && do_autoloads)
650 {
651 struct gcpro gcpro1;
652 int length;
653 DECLARE_EISTRING (desired_base);
654 DECLARE_EISTRING (warning_info);
655
656 eicpy_lstr (desired_base, XSYMBOL_NAME (coding_system_or_name));
657
658 /* Work out the name of the base coding system. */
659 length = eilen (desired_base);
660 if (length > (int)(sizeof ("-unix") - 1))
661 {
662 if (0 == qxestrcmp ((UAscbyte *)"-unix", (eidata (desired_base))
663 + (length - (sizeof ("-unix") - 1))))
664 {
665 eidel (desired_base, length - (sizeof ("-unix") - 1),
666 -1, 5, 5);
667 }
668 }
669 else if (length > (int)(sizeof ("-dos") - 1))
670 {
671 if ((0 == qxestrcmp ((UAscbyte *)"-dos", (eidata (desired_base))
672 + (length - (sizeof ("-dos") - 1)))) ||
673 (0 == qxestrcmp ((UAscbyte *)"-mac", (eidata (desired_base))
674 + (length - (sizeof ("-mac") - 1)))))
675 {
676 eidel (desired_base, length - (sizeof ("-dos") - 1), -1,
677 4, 4);
678 }
679 }
680
681 coding_system_or_name = intern_int (eidata (desired_base));
682
683 /* Remove this coding system and its subsidiary coding
684 systems from the hash, to avoid calling this code recursively. */
685 Fremhash (coding_system_or_name, Vcoding_system_hash_table);
686 Fremhash (add_suffix_to_symbol(coding_system_or_name, "-unix"),
687 Vcoding_system_hash_table);
688 Fremhash (add_suffix_to_symbol(coding_system_or_name, "-dos"),
689 Vcoding_system_hash_table);
690 Fremhash (add_suffix_to_symbol(coding_system_or_name, "-mac"),
691 Vcoding_system_hash_table);
692
693 eicpy_ascii (warning_info, "Error autoloading coding system ");
694 eicat_lstr (warning_info, XSYMBOL_NAME (coding_system_or_name));
695
696 /* Keep around the form so it doesn't disappear from under
697 #'eval's feet. */
698 GCPRO1 (lookup);
699 call1_trapping_problems ((const CIbyte *)eidata (warning_info),
700 Qeval, lookup, 0);
701 UNGCPRO;
702
703 lookup =
704 Fgethash (coding_system_or_name, Vcoding_system_hash_table, Qnil);
705 }
706
707 if (CODING_SYSTEMP (lookup) || NILP (lookup))
708 return lookup;
709
710 coding_system_or_name = lookup;
711 }
712 }
713
459 DEFUN ("find-coding-system", Ffind_coding_system, 1, 1, 0, /* 714 DEFUN ("find-coding-system", Ffind_coding_system, 1, 1, 0, /*
460 Retrieve the coding system of the given name. 715 Retrieve the coding system of the given name.
461 716
462 If CODING-SYSTEM-OR-NAME is a coding-system object, it is simply 717 If CODING-SYSTEM-OR-NAME is a coding-system object, it is simply
463 returned. Otherwise, CODING-SYSTEM-OR-NAME should be a symbol. 718 returned. Otherwise, CODING-SYSTEM-OR-NAME should be a symbol.
464 If there is no such coding system, nil is returned. Otherwise the 719 If there is no such coding system, nil is returned. Otherwise the
465 associated coding system object is returned. 720 associated coding system object is returned.
466 */ 721 */
467 (coding_system_or_name)) 722 (coding_system_or_name))
468 { 723 {
469 if (NILP (coding_system_or_name)) 724 return find_coding_system(coding_system_or_name, 1);
470 coding_system_or_name = Qbinary; 725 }
471 else if (CODING_SYSTEMP (coding_system_or_name)) 726
472 return coding_system_or_name; 727 DEFUN ("autoload-coding-system", Fautoload_coding_system, 2, 2, 0, /*
473 else 728 Define SYMBOL as a coding-system that is loaded on demand.
474 CHECK_SYMBOL (coding_system_or_name); 729
475 730 FORM is a form to evaluate to define the coding-system.
476 while (1) 731 */
477 { 732 (symbol, form))
478 coding_system_or_name = 733 {
479 Fgethash (coding_system_or_name, Vcoding_system_hash_table, Qnil); 734 Lisp_Object lookup;
480 735
481 if (CODING_SYSTEMP (coding_system_or_name) 736 CHECK_SYMBOL (symbol);
482 || NILP (coding_system_or_name)) 737 CHECK_CONS (form);
483 return coding_system_or_name; 738
484 } 739 lookup = find_coding_system (symbol, 0);
740
741 if (!NILP (lookup) &&
742 /* Allow autoloads to be redefined. */
743 !CONSP (lookup))
744 {
745 invalid_operation ("Cannot redefine existing coding system",
746 symbol);
747 }
748
749 Fputhash (symbol, form, Vcoding_system_hash_table);
750 Fputhash (add_suffix_to_symbol(symbol, "-unix"), form,
751 Vcoding_system_hash_table);
752 Fputhash (add_suffix_to_symbol(symbol, "-dos"), form,
753 Vcoding_system_hash_table);
754 Fputhash (add_suffix_to_symbol(symbol, "-mac"), form,
755 Vcoding_system_hash_table);
756
757 /* Tell the POSIX locale infrastructure about this coding system (though
758 unfortunately it'll be too late for the startup locale sniffing. */
759 if (!UNBOUNDP (Qposix_charset_to_coding_system_hash))
760 {
761 Lisp_Object val = Fsymbol_value (Qposix_charset_to_coding_system_hash);
762 DECLARE_EISTRING (minimal_name);
763 Ibyte *full_name;
764 int len = XSTRING_LENGTH (XSYMBOL_NAME (symbol)), i;
765
766 if (!NILP (val))
767 {
768 full_name = XSTRING_DATA (XSYMBOL_NAME (symbol));
769 for (i = 0; i < len; ++i)
770 {
771 if (full_name[i] >= '0' && full_name[i] <= '9')
772 {
773 eicat_ch (minimal_name, full_name[i]);
774 }
775 else if (full_name[i] >= 'a' && full_name[i] <= 'z')
776 {
777 eicat_ch (minimal_name, full_name[i]);
778 }
779 else if (full_name[i] >= 'A' && full_name[i] <= 'Z')
780 {
781 eicat_ch (minimal_name, full_name[i] +
782 ('a' - 'A'));
783 }
784 }
785
786 if (eilen (minimal_name))
787 {
788 CHECK_HASH_TABLE (val);
789 Fputhash (eimake_string(minimal_name), symbol, val);
790 }
791 }
792 }
793
794 return Qt;
485 } 795 }
486 796
487 DEFUN ("get-coding-system", Fget_coding_system, 1, 1, 0, /* 797 DEFUN ("get-coding-system", Fget_coding_system, 1, 1, 0, /*
488 Retrieve the coding system of the given name. 798 Retrieve the coding system of the given name.
489 Same as `find-coding-system' except that if there is no such 799 Same as `find-coding-system' except that if there is no such
636 int normal; 946 int normal;
637 int internal; 947 int internal;
638 }; 948 };
639 949
640 static int 950 static int
641 add_coding_system_to_list_mapper (Lisp_Object key, Lisp_Object UNUSED (value), 951 add_coding_system_to_list_mapper (Lisp_Object key, Lisp_Object value,
642 void *coding_system_list_closure) 952 void *coding_system_list_closure)
643 { 953 {
644 /* This function can GC */ 954 /* This function can GC */
645 struct coding_system_list_closure *cscl = 955 struct coding_system_list_closure *cscl =
646 (struct coding_system_list_closure *) coding_system_list_closure; 956 (struct coding_system_list_closure *) coding_system_list_closure;
647 Lisp_Object *coding_system_list = cscl->coding_system_list; 957 Lisp_Object *coding_system_list = cscl->coding_system_list;
648 958
649 /* We can't just use VALUE because KEY might be an alias, and we need 959 /* We can't just use VALUE because KEY might be an alias, and we need
650 the real coding system object. */ 960 the real coding system object.
651 if (XCODING_SYSTEM (Ffind_coding_system (key))->internal_p ? 961
652 cscl->internal : cscl->normal) 962 Autoloaded coding systems have conses for their values, and can't be
963 internal coding systems, or coding system aliases. */
964 if (CONSP (value) ||
965 (XCODING_SYSTEM (Ffind_coding_system (key))->internal_p ?
966 cscl->internal : cscl->normal))
653 *coding_system_list = Fcons (key, *coding_system_list); 967 *coding_system_list = Fcons (key, *coding_system_list);
654 return 0; 968 return 0;
655 } 969 }
656 970
657 /* #### should we specify a conventional for "all coding systems"? */ 971 /* #### should we specify a conventional for "all coding systems"? */
737 } 1051 }
738 } 1052 }
739 1053
740 struct subsidiary_type 1054 struct subsidiary_type
741 { 1055 {
742 Ascbyte *extension; 1056 const Ascbyte *extension;
743 Ascbyte *mnemonic_ext; 1057 const Ascbyte *mnemonic_ext;
744 enum eol_type eol; 1058 enum eol_type eol;
745 }; 1059 };
746 1060
747 static struct subsidiary_type coding_subsidiary_list[] = 1061 static struct subsidiary_type coding_subsidiary_list[] =
748 { { "-unix", "", EOL_LF }, 1062 { { "-unix", "", EOL_LF },
784 (decodes byte->char), we need to coerce it to one by the appropriate 1098 (decodes byte->char), we need to coerce it to one by the appropriate
785 wrapping in CANONICAL. */ 1099 wrapping in CANONICAL. */
786 1100
787 for (i = 0; i < countof (coding_subsidiary_list); i++) 1101 for (i = 0; i < countof (coding_subsidiary_list); i++)
788 { 1102 {
789 Ascbyte *extension = coding_subsidiary_list[i].extension; 1103 const Ascbyte *extension = coding_subsidiary_list[i].extension;
790 Ascbyte *mnemonic_ext = coding_subsidiary_list[i].mnemonic_ext; 1104 const Ascbyte *mnemonic_ext = coding_subsidiary_list[i].mnemonic_ext;
791 enum eol_type eol = coding_subsidiary_list[i].eol; 1105 enum eol_type eol = coding_subsidiary_list[i].eol;
792 1106
793 qxestrcpy_ascii (codesys_name + len, extension); 1107 qxestrcpy_ascii (codesys_name + len, extension);
794 codesys_name_sym = intern_int (codesys_name); 1108 codesys_name_sym = intern_int (codesys_name);
795 if (mlen != -1) 1109 if (mlen != -1)
822 XCODING_SYSTEM_SUBSIDIARY_PARENT (sub_codesys) = codesys; 1136 XCODING_SYSTEM_SUBSIDIARY_PARENT (sub_codesys) = codesys;
823 XCODING_SYSTEM (codesys)->eol[eol] = sub_codesys; 1137 XCODING_SYSTEM (codesys)->eol[eol] = sub_codesys;
824 } 1138 }
825 } 1139 }
826 1140
1141 DEFUN ("coding-system-canonical-name-p", Fcoding_system_canonical_name_p,
1142 1, 1, 0, /*
1143 Return t if OBJECT names a coding system, and is not a coding system alias.
1144 */
1145 (object))
1146 {
1147 return CODING_SYSTEMP (Fgethash (object, Vcoding_system_hash_table, Qnil))
1148 ? Qt : Qnil;
1149 }
1150
827 /* Basic function to create new coding systems. For `make-coding-system', 1151 /* Basic function to create new coding systems. For `make-coding-system',
828 NAME-OR-EXISTING is the NAME argument, PREFIX is null, and TYPE, 1152 NAME-OR-EXISTING is the NAME argument, PREFIX is null, and TYPE,
829 DESCRIPTION, and PROPS are the same. All created coding systems are put 1153 DESCRIPTION, and PROPS are the same. All created coding systems are put
830 in a hash table indexed by NAME. 1154 in a hash table indexed by NAME.
831 1155
861 crazy crap is based on existing behavior in other Mule versions, 1185 crazy crap is based on existing behavior in other Mule versions,
862 including FSF Emacs.) 1186 including FSF Emacs.)
863 */ 1187 */
864 1188
865 static Lisp_Object 1189 static Lisp_Object
866 make_coding_system_1 (Lisp_Object name_or_existing, Ascbyte *prefix, 1190 make_coding_system_1 (Lisp_Object name_or_existing, const Ascbyte *prefix,
867 Lisp_Object type, Lisp_Object description, 1191 Lisp_Object type, Lisp_Object description,
868 Lisp_Object props) 1192 Lisp_Object props)
869 { 1193 {
870 Lisp_Coding_System *cs; 1194 Lisp_Coding_System *cs;
871 int need_to_setup_eol_systems = 1; 1195 int need_to_setup_eol_systems = 1;
872 enum eol_type eol_wrapper = EOL_AUTODETECT; 1196 enum eol_type eol_wrapper = EOL_AUTODETECT;
873 struct coding_system_methods *meths; 1197 struct coding_system_methods *meths;
874 Lisp_Object csobj; 1198 Lisp_Object csobj;
875 Lisp_Object defmnem = Qnil; 1199 Lisp_Object defmnem = Qnil, aliases = Qnil;
876 1200
877 if (NILP (type)) 1201 if (NILP (type))
878 type = Qundecided; 1202 type = Qundecided;
879 meths = decode_coding_system_type (type, ERROR_ME); 1203 meths = decode_coding_system_type (type, ERROR_ME);
880 1204
905 xfree (newname, Ibyte *); 1229 xfree (newname, Ibyte *);
906 } 1230 }
907 else 1231 else
908 CHECK_SYMBOL (name_or_existing); 1232 CHECK_SYMBOL (name_or_existing);
909 1233
910 if (!NILP (Ffind_coding_system (name_or_existing))) 1234 /* See is there an entry for name_or_existing in the defined coding system
1235 hash table. */
1236 csobj = find_coding_system (name_or_existing, 0);
1237 /* Error if it's there and not an autoload form. */
1238 if (!NILP (csobj) && !CONSP (csobj))
911 invalid_operation ("Cannot redefine existing coding system", 1239 invalid_operation ("Cannot redefine existing coding system",
912 name_or_existing); 1240 name_or_existing);
913 1241
914 cs = allocate_coding_system (meths, meths->extra_data_size, 1242 cs = allocate_coding_system (meths, meths->extra_data_size,
915 name_or_existing); 1243 name_or_existing);
916 csobj = wrap_coding_system (cs); 1244 csobj = wrap_coding_system (cs);
917 1245
955 1283
956 else if (EQ (key, Qpost_read_conversion)) 1284 else if (EQ (key, Qpost_read_conversion))
957 CODING_SYSTEM_POST_READ_CONVERSION (cs) = value; 1285 CODING_SYSTEM_POST_READ_CONVERSION (cs) = value;
958 else if (EQ (key, Qpre_write_conversion)) 1286 else if (EQ (key, Qpre_write_conversion))
959 CODING_SYSTEM_PRE_WRITE_CONVERSION (cs) = value; 1287 CODING_SYSTEM_PRE_WRITE_CONVERSION (cs) = value;
1288 else if (EQ (key, Qaliases))
1289 {
1290 EXTERNAL_LIST_LOOP_2 (alias, value)
1291 {
1292 CHECK_SYMBOL (alias);
1293
1294 if (!NILP (Fcoding_system_canonical_name_p (alias)))
1295 {
1296 invalid_change ("Symbol is the canonical name of a "
1297 "coding system and cannot be redefined",
1298 alias);
1299 }
1300 }
1301 aliases = value;
1302 }
960 /* FSF compatibility */ 1303 /* FSF compatibility */
961 else if (EQ (key, Qtranslation_table_for_decode)) 1304 else if (EQ (key, Qtranslation_table_for_decode))
962 ; 1305 ;
963 else if (EQ (key, Qtranslation_table_for_encode)) 1306 else if (EQ (key, Qtranslation_table_for_encode))
964 ; 1307 ;
965 else if (EQ (key, Qsafe_chars)) 1308 else if (EQ (key, Qsafe_chars))
966 ; 1309 {
1310 CHECK_CHAR_TABLE (value);
1311 CODING_SYSTEM_SAFE_CHARS (cs) = value;
1312 }
967 else if (EQ (key, Qsafe_charsets)) 1313 else if (EQ (key, Qsafe_charsets))
968 ; 1314 {
1315 if (!EQ (Qt, value)
1316 /* Would be nice to actually do this check, but there are
1317 some order conflicts with japanese.el and
1318 mule-coding.el */
1319 && 0)
1320 {
1321 #ifdef MULE
1322 EXTERNAL_LIST_LOOP_2 (safe_charset, value)
1323 CHECK_CHARSET (Ffind_charset (safe_charset));
1324 #endif
1325 }
1326
1327 CODING_SYSTEM_SAFE_CHARSETS (cs) = value;
1328 }
1329 else if (EQ (key, Qcategory))
1330 {
1331 Fput (name_or_existing, intern ("coding-system-property"),
1332 Fplist_put (Fget (name_or_existing,
1333 intern ("coding-system-property"),
1334 Qnil),
1335 Qcategory, value));
1336 }
969 else if (EQ (key, Qmime_charset)) 1337 else if (EQ (key, Qmime_charset))
970 ; 1338 ;
971 else if (EQ (key, Qvalid_codes)) 1339 else if (EQ (key, Qvalid_codes))
972 ; 1340 ;
973 else 1341 else
982 { 1350 {
983 XCODING_SYSTEM_CANONICAL (csobj) = 1351 XCODING_SYSTEM_CANONICAL (csobj) =
984 CODESYSMETH_OR_GIVEN (cs, canonicalize, (csobj), Qnil); 1352 CODESYSMETH_OR_GIVEN (cs, canonicalize, (csobj), Qnil);
985 XCODING_SYSTEM_EOL_TYPE (csobj) = EOL_AUTODETECT; /* for copy-coding-system 1353 XCODING_SYSTEM_EOL_TYPE (csobj) = EOL_AUTODETECT; /* for copy-coding-system
986 below */ 1354 below */
1355
1356 Fputhash (name_or_existing, csobj, Vcoding_system_hash_table);
987 1357
988 if (need_to_setup_eol_systems && !cs->internal_p) 1358 if (need_to_setup_eol_systems && !cs->internal_p)
989 setup_eol_coding_systems (csobj); 1359 setup_eol_coding_systems (csobj);
990 else if (eol_wrapper == EOL_CR || eol_wrapper == EOL_CRLF) 1360 else if (eol_wrapper == EOL_CR || eol_wrapper == EOL_CRLF)
991 { 1361 {
1020 Qconvert_eol_crlf), 1390 Qconvert_eol_crlf),
1021 Qcanonicalize_after_coding, 1391 Qcanonicalize_after_coding,
1022 csobj)); 1392 csobj));
1023 } 1393 }
1024 XCODING_SYSTEM_EOL_TYPE (csobj) = eol_wrapper; 1394 XCODING_SYSTEM_EOL_TYPE (csobj) = eol_wrapper;
1395
1396 {
1397 EXTERNAL_LIST_LOOP_2 (alias, aliases)
1398 Fdefine_coding_system_alias (alias, csobj);
1399 }
1025 } 1400 }
1026
1027 Fputhash (name_or_existing, csobj, Vcoding_system_hash_table);
1028 1401
1029 return csobj; 1402 return csobj;
1030 } 1403 }
1031 1404
1032 Lisp_Object 1405 Lisp_Object
1033 make_internal_coding_system (Lisp_Object existing, Ascbyte *prefix, 1406 make_internal_coding_system (Lisp_Object existing, const Ascbyte *prefix,
1034 Lisp_Object type, Lisp_Object description, 1407 Lisp_Object type, Lisp_Object description,
1035 Lisp_Object props) 1408 Lisp_Object props)
1036 { 1409 {
1037 return make_coding_system_1 (existing, prefix, type, description, props); 1410 return make_coding_system_1 (existing, prefix, type, description, props);
1038 } 1411 }
1039 1412
1040 DEFUN ("make-coding-system", Fmake_coding_system, 2, 4, 0, /* 1413 DEFUN ("make-coding-system-internal", Fmake_coding_system_internal, 2, 4, 0, /*
1041 Register symbol NAME as a coding system. 1414 See `make-coding-system'. This does much of the work of that function.
1042 1415
1043 TYPE describes the conversion method used and should be one of 1416 Without Mule support, it does all the work of that function, and an alias
1044 1417 exists, mapping `make-coding-system' to
1045 nil or `undecided' 1418 `make-coding-system-internal'. You'll need a non-Mule XEmacs to read the
1046 Automatic conversion. XEmacs attempts to detect the coding system 1419 complete docstring. Or you can just read it in make-coding-system.el;
1047 used in the file. 1420 something like the following should work:
1048 `chain' 1421
1049 Chain two or more coding systems together to make a combination coding 1422 \\[find-function-other-window] find-file RET \\[find-file] mule/make-coding-system.el RET
1050 system.
1051 `no-conversion'
1052 No conversion. Use this for binary files and such. On output,
1053 graphic characters that are not in ASCII or Latin-1 will be
1054 replaced by a ?. (For a no-conversion-encoded buffer, these
1055 characters will only be present if you explicitly insert them.)
1056 `convert-eol'
1057 Convert CRLF sequences or CR to LF.
1058 `shift-jis'
1059 Shift-JIS (a Japanese encoding commonly used in PC operating systems).
1060 `unicode'
1061 Any Unicode encoding (UCS-4, UTF-8, UTF-16, etc.).
1062 `mswindows-unicode-to-multibyte'
1063 (MS Windows only) Converts from Windows Unicode to Windows Multibyte
1064 (any code page encoding) upon encoding, and the other way upon decoding.
1065 `mswindows-multibyte'
1066 Converts to or from Windows Multibyte (any code page encoding).
1067 This is resolved into a chain of `mswindows-unicode' and
1068 `mswindows-unicode-to-multibyte'.
1069 `iso2022'
1070 Any ISO2022-compliant encoding. Among other things, this includes
1071 JIS (the Japanese encoding commonly used for e-mail), EUC (the
1072 standard Unix encoding for Japanese and other languages), and
1073 Compound Text (the encoding used in X11). You can specify more
1074 specific information about the conversion with the PROPS argument.
1075 `big5'
1076 Big5 (the encoding commonly used for Mandarin Chinese in Taiwan).
1077 `ccl'
1078 The conversion is performed using a user-written pseudo-code
1079 program. CCL (Code Conversion Language) is the name of this
1080 pseudo-code.
1081 `gzip'
1082 GZIP compression format.
1083 `internal'
1084 Write out or read in the raw contents of the memory representing
1085 the buffer's text. This is primarily useful for debugging
1086 purposes, and is only enabled when XEmacs has been compiled with
1087 DEBUG_XEMACS defined (via the --debug configure option).
1088 WARNING: Reading in a file using `internal' conversion can result
1089 in an internal inconsistency in the memory representing a
1090 buffer's text, which will produce unpredictable results and may
1091 cause XEmacs to crash. Under normal circumstances you should
1092 never use `internal' conversion.
1093
1094 DESCRIPTION is a short English phrase describing the coding system,
1095 suitable for use as a menu item. (See also the `documentation' property
1096 below.)
1097
1098 PROPS is a property list, describing the specific nature of the
1099 character set. Recognized properties are:
1100
1101 `mnemonic'
1102 String to be displayed in the modeline when this coding system is
1103 active.
1104
1105 `documentation'
1106 Detailed documentation on the coding system.
1107
1108 `eol-type'
1109 End-of-line conversion to be used. It should be one of
1110
1111 nil
1112 Automatically detect the end-of-line type (LF, CRLF,
1113 or CR). Also generate subsidiary coding systems named
1114 `NAME-unix', `NAME-dos', and `NAME-mac', that are
1115 identical to this coding system but have an EOL-TYPE
1116 value of `lf', `crlf', and `cr', respectively.
1117 `lf'
1118 The end of a line is marked externally using ASCII LF.
1119 Since this is also the way that XEmacs represents an
1120 end-of-line internally, specifying this option results
1121 in no end-of-line conversion. This is the standard
1122 format for Unix text files.
1123 `crlf'
1124 The end of a line is marked externally using ASCII
1125 CRLF. This is the standard format for MS-DOS text
1126 files.
1127 `cr'
1128 The end of a line is marked externally using ASCII CR.
1129 This is the standard format for Macintosh text files.
1130 t
1131 Automatically detect the end-of-line type but do not
1132 generate subsidiary coding systems. (This value is
1133 converted to nil when stored internally, and
1134 `coding-system-property' will return nil.)
1135
1136 `post-read-conversion'
1137 The value is a function to call after some text is inserted and
1138 decoded by the coding system itself and before any functions in
1139 `after-change-functions' are called. (#### Not actually true in
1140 XEmacs. `after-change-functions' will be called twice if
1141 `post-read-conversion' changes something.) The argument of this
1142 function is the same as for a function in
1143 `after-insert-file-functions', i.e. LENGTH of the text inserted,
1144 with point at the head of the text to be decoded.
1145
1146 `pre-write-conversion'
1147 The value is a function to call after all functions in
1148 `write-region-annotate-functions' and `buffer-file-format' are
1149 called, and before the text is encoded by the coding system itself.
1150 The arguments to this function are the same as those of a function
1151 in `write-region-annotate-functions', i.e. FROM and TO, specifying
1152 a region of text.
1153
1154
1155
1156 The following properties are allowed for FSF compatibility but currently
1157 ignored:
1158
1159 `translation-table-for-decode'
1160 The value is a translation table to be applied on decoding. See
1161 the function `make-translation-table' for the format of translation
1162 table. This is not applicable to CCL-based coding systems.
1163
1164 `translation-table-for-encode'
1165 The value is a translation table to be applied on encoding. This is
1166 not applicable to CCL-based coding systems.
1167
1168 `safe-chars'
1169 The value is a char table. If a character has non-nil value in it,
1170 the character is safely supported by the coding system. This
1171 overrides the specification of safe-charsets.
1172
1173 `safe-charsets'
1174 The value is a list of charsets safely supported by the coding
1175 system. The value t means that all charsets Emacs handles are
1176 supported. Even if some charset is not in this list, it doesn't
1177 mean that the charset can't be encoded in the coding system;
1178 it just means that some other receiver of text encoded
1179 in the coding system won't be able to handle that charset.
1180
1181 `mime-charset'
1182 The value is a symbol of which name is `MIME-charset' parameter of
1183 the coding system.
1184
1185 `valid-codes' (meaningful only for a coding system based on CCL)
1186 The value is a list to indicate valid byte ranges of the encoded
1187 file. Each element of the list is an integer or a cons of integer.
1188 In the former case, the integer value is a valid byte code. In the
1189 latter case, the integers specifies the range of valid byte codes.
1190
1191
1192
1193 The following additional property is recognized if TYPE is `convert-eol':
1194
1195 `subtype'
1196 One of `lf', `crlf', `cr' or nil (for autodetection). When decoding,
1197 the corresponding sequence will be converted to LF. When encoding,
1198 the opposite happens. This coding system converts characters to
1199 characters.
1200
1201
1202
1203 The following additional properties are recognized if TYPE is `iso2022':
1204
1205 `charset-g0'
1206 `charset-g1'
1207 `charset-g2'
1208 `charset-g3'
1209 The character set initially designated to the G0 - G3 registers.
1210 The value should be one of
1211
1212 -- A charset object (designate that character set)
1213 -- nil (do not ever use this register)
1214 -- t (no character set is initially designated to
1215 the register, but may be later on; this automatically
1216 sets the corresponding `force-g*-on-output' property)
1217
1218 `force-g0-on-output'
1219 `force-g1-on-output'
1220 `force-g2-on-output'
1221 `force-g2-on-output'
1222 If non-nil, send an explicit designation sequence on output before
1223 using the specified register.
1224
1225 `short'
1226 If non-nil, use the short forms "ESC $ @", "ESC $ A", and
1227 "ESC $ B" on output in place of the full designation sequences
1228 "ESC $ ( @", "ESC $ ( A", and "ESC $ ( B".
1229
1230 `no-ascii-eol'
1231 If non-nil, don't designate ASCII to G0 at each end of line on output.
1232 Setting this to non-nil also suppresses other state-resetting that
1233 normally happens at the end of a line.
1234
1235 `no-ascii-cntl'
1236 If non-nil, don't designate ASCII to G0 before control chars on output.
1237
1238 `seven'
1239 If non-nil, use 7-bit environment on output. Otherwise, use 8-bit
1240 environment.
1241
1242 `lock-shift'
1243 If non-nil, use locking-shift (SO/SI) instead of single-shift
1244 or designation by escape sequence.
1245
1246 `no-iso6429'
1247 If non-nil, don't use ISO6429's direction specification.
1248
1249 `escape-quoted'
1250 If non-nil, literal control characters that are the same as
1251 the beginning of a recognized ISO2022 or ISO6429 escape sequence
1252 (in particular, ESC (0x1B), SO (0x0E), SI (0x0F), SS2 (0x8E),
1253 SS3 (0x8F), and CSI (0x9B)) are "quoted" with an escape character
1254 so that they can be properly distinguished from an escape sequence.
1255 (Note that doing this results in a non-portable encoding.) This
1256 encoding flag is used for byte-compiled files. Note that ESC
1257 is a good choice for a quoting character because there are no
1258 escape sequences whose second byte is a character from the Control-0
1259 or Control-1 character sets; this is explicitly disallowed by the
1260 ISO2022 standard.
1261
1262 `input-charset-conversion'
1263 A list of conversion specifications, specifying conversion of
1264 characters in one charset to another when decoding is performed.
1265 Each specification is a list of two elements: the source charset,
1266 and the destination charset.
1267
1268 `output-charset-conversion'
1269 A list of conversion specifications, specifying conversion of
1270 characters in one charset to another when encoding is performed.
1271 The form of each specification is the same as for
1272 `input-charset-conversion'.
1273
1274
1275
1276 The following additional properties are recognized (and required)
1277 if TYPE is `ccl':
1278
1279 `decode'
1280 CCL program used for decoding (converting to internal format).
1281
1282 `encode'
1283 CCL program used for encoding (converting to external format).
1284
1285
1286 The following additional properties are recognized if TYPE is `chain':
1287
1288 `chain'
1289 List of coding systems to be chained together, in decoding order.
1290
1291 `canonicalize-after-coding'
1292 Coding system to be returned by the detector routines in place of
1293 this coding system.
1294
1295
1296
1297 The following additional properties are recognized if TYPE is `unicode':
1298
1299 `type'
1300 One of `utf-16', `utf-8', `ucs-4', or `utf-7' (the latter is not
1301 yet implemented). `utf-16' is the basic two-byte encoding;
1302 `ucs-4' is the four-byte encoding; `utf-8' is an ASCII-compatible
1303 variable-width 8-bit encoding; `utf-7' is a 7-bit encoding using
1304 only characters that will safely pass through all mail gateways.
1305 [[ This should be \"transformation format\". There should also be
1306 `ucs-2' (or `bmp' -- no surrogates) and `utf-32' (range checked). ]]
1307
1308 `little-endian'
1309 If non-nil, `utf-16' and `ucs-4' will write out the groups of two
1310 or four bytes little-endian instead of big-endian. This is required,
1311 for example, under Windows.
1312
1313 `need-bom'
1314 If non-nil, a byte order mark (BOM, or Unicode FFFE) should be
1315 written out at the beginning of the data. This serves both to
1316 identify the endianness of the following data and to mark the
1317 data as Unicode (at least, this is how Windows uses it).
1318 [[ The correct term is \"signature\", since this technique may also
1319 be used with UTF-8. That is the term used in the standard. ]]
1320
1321
1322 The following additional properties are recognized if TYPE is
1323 `mswindows-multibyte':
1324
1325 `code-page'
1326 Either a number (specifying a particular code page) or one of the
1327 symbols `ansi', `oem', `mac', or `ebcdic', specifying the ANSI,
1328 OEM, Macintosh, or EBCDIC code page associated with a particular
1329 locale (given by the `locale' property). NOTE: EBCDIC code pages
1330 only exist in Windows 2000 and later.
1331
1332 `locale'
1333 If `code-page' is a symbol, this specifies the locale whose code
1334 page of the corresponding type should be used. This should be
1335 one of the following: A cons of two strings, (LANGUAGE
1336 . SUBLANGUAGE) (see `mswindows-set-current-locale'); a string (a
1337 language; SUBLANG_DEFAULT, i.e. the default sublanguage, is
1338 used); or one of the symbols `current', `user-default', or
1339 `system-default', corresponding to the values of
1340 `mswindows-current-locale', `mswindows-user-default-locale', or
1341 `mswindows-system-default-locale', respectively.
1342
1343
1344
1345 The following additional properties are recognized if TYPE is `undecided':
1346 [[ Doesn't GNU use \"detect-*\" for the following two? ]]
1347
1348 `do-eol'
1349 Do EOL detection.
1350
1351 `do-coding'
1352 Do encoding detection.
1353
1354 `coding-system'
1355 If encoding detection is not done, use the specified coding system
1356 to do decoding. This is used internally when implementing coding
1357 systems with an EOL type that specifies autodetection (the default),
1358 so that the detector routines return the proper subsidiary.
1359
1360
1361
1362 The following additional property is recognized if TYPE is `gzip':
1363
1364 `level'
1365 Compression level: 0 through 9, or `default' (currently 6).
1366 1423
1367 */ 1424 */
1368 (name, type, description, props)) 1425 (name, type, description, props))
1369 { 1426 {
1370 return make_coding_system_1 (name, 0, type, description, props); 1427 return make_coding_system_1 (name, 0, type, description, props);
1380 (old_coding_system, new_name)) 1437 (old_coding_system, new_name))
1381 { 1438 {
1382 Lisp_Object new_coding_system; 1439 Lisp_Object new_coding_system;
1383 old_coding_system = Fget_coding_system (old_coding_system); 1440 old_coding_system = Fget_coding_system (old_coding_system);
1384 new_coding_system = 1441 new_coding_system =
1385 UNBOUNDP (new_name) ? Qnil : Ffind_coding_system (new_name); 1442 UNBOUNDP (new_name) ? Qnil : find_coding_system (new_name, 0);
1386 if (NILP (new_coding_system)) 1443 if (NILP (new_coding_system))
1387 { 1444 {
1388 new_coding_system = 1445 new_coding_system =
1389 wrap_coding_system 1446 wrap_coding_system
1390 (allocate_coding_system 1447 (allocate_coding_system
1404 Lisp_Coding_System *from = XCODING_SYSTEM (old_coding_system); 1461 Lisp_Coding_System *from = XCODING_SYSTEM (old_coding_system);
1405 COPY_SIZED_LCRECORD (to, from, sizeof_coding_system (from)); 1462 COPY_SIZED_LCRECORD (to, from, sizeof_coding_system (from));
1406 to->name = new_name; 1463 to->name = new_name;
1407 } 1464 }
1408 return new_coding_system; 1465 return new_coding_system;
1409 }
1410
1411 DEFUN ("coding-system-canonical-name-p", Fcoding_system_canonical_name_p,
1412 1, 1, 0, /*
1413 Return t if OBJECT names a coding system, and is not a coding system alias.
1414 */
1415 (object))
1416 {
1417 return CODING_SYSTEMP (Fgethash (object, Vcoding_system_hash_table, Qnil))
1418 ? Qt : Qnil;
1419 } 1466 }
1420 1467
1421 /* #### Shouldn't this really be a find/get pair? */ 1468 /* #### Shouldn't this really be a find/get pair? */
1422 1469
1423 DEFUN ("coding-system-alias-p", Fcoding_system_alias_p, 1, 1, 0, /* 1470 DEFUN ("coding-system-alias-p", Fcoding_system_alias_p, 1, 1, 0, /*
1698 return XCODING_SYSTEM_EOL_CR (coding_system); 1745 return XCODING_SYSTEM_EOL_CR (coding_system);
1699 else if (EQ (prop, Qpost_read_conversion)) 1746 else if (EQ (prop, Qpost_read_conversion))
1700 return XCODING_SYSTEM_POST_READ_CONVERSION (coding_system); 1747 return XCODING_SYSTEM_POST_READ_CONVERSION (coding_system);
1701 else if (EQ (prop, Qpre_write_conversion)) 1748 else if (EQ (prop, Qpre_write_conversion))
1702 return XCODING_SYSTEM_PRE_WRITE_CONVERSION (coding_system); 1749 return XCODING_SYSTEM_PRE_WRITE_CONVERSION (coding_system);
1750 else if (EQ (prop, Qsafe_charsets))
1751 return XCODING_SYSTEM_SAFE_CHARSETS (coding_system);
1752 else if (EQ (prop, Qsafe_chars))
1753 return XCODING_SYSTEM_SAFE_CHARS (coding_system);
1703 else 1754 else
1704 { 1755 {
1705 Lisp_Object value = CODESYSMETH_OR_GIVEN (XCODING_SYSTEM (coding_system), 1756 Lisp_Object value = CODESYSMETH_OR_GIVEN (XCODING_SYSTEM (coding_system),
1706 getprop, 1757 getprop,
1707 (coding_system, prop), 1758 (coding_system, prop),
2272 Lstream_delete (XLSTREAM (lb_outstream)); 2323 Lstream_delete (XLSTREAM (lb_outstream));
2273 return make_int (retlen); 2324 return make_int (retlen);
2274 } 2325 }
2275 } 2326 }
2276 2327
2277 DEFUN ("decode-coding-region", Fdecode_coding_region, 3, 4, 0, /* 2328 DEFUN ("decode-coding-region", Fdecode_coding_region, 3, 4,
2329 "*r\nzDecode from coding system: \ni", /*
2278 Decode the text between START and END which is encoded in CODING-SYSTEM. 2330 Decode the text between START and END which is encoded in CODING-SYSTEM.
2279 This is useful if you've read in encoded text from a file without decoding 2331 This is useful if you've read in encoded text from a file without decoding
2280 it (e.g. you read in a JIS-formatted file but used the `binary' or 2332 it (e.g. you read in a JIS-formatted file but used the `binary' or
2281 `no-conversion' coding system, so that it shows up as "^[$B!<!+^[(B"). 2333 `no-conversion' coding system, so that it shows up as "^[$B!<!+^[(B").
2282 Return length of decoded text. 2334 Return length of decoded text.
2283 BUFFER defaults to the current buffer if unspecified. 2335 BUFFER defaults to the current buffer if unspecified, and when interactive.
2284 */ 2336 */
2285 (start, end, coding_system, buffer)) 2337 (start, end, coding_system, buffer))
2286 { 2338 {
2287 return encode_decode_coding_region (start, end, coding_system, buffer, 2339 return encode_decode_coding_region (start, end, coding_system, buffer,
2288 CODING_DECODE); 2340 CODING_DECODE);
2289 } 2341 }
2290 2342
2291 DEFUN ("encode-coding-region", Fencode_coding_region, 3, 4, 0, /* 2343 DEFUN ("encode-coding-region", Fencode_coding_region, 3, 4,
2344 "*r\nzEncode to coding system: \ni", /*
2292 Encode the text between START and END using CODING-SYSTEM. 2345 Encode the text between START and END using CODING-SYSTEM.
2293 This will, for example, convert Japanese characters into stuff such as 2346 This will, for example, convert Japanese characters into stuff such as
2294 "^[$B!<!+^[(B" if you use the JIS encoding. Return length of encoded 2347 "^[$B!<!+^[(B" if you use the JIS encoding. Return length of encoded text.
2295 text. BUFFER defaults to the current buffer if unspecified. 2348 BUFFER defaults to the current buffer if unspecified, and when interactive.
2296 */ 2349 */
2297 (start, end, coding_system, buffer)) 2350 (start, end, coding_system, buffer))
2298 { 2351 {
2299 return encode_decode_coding_region (start, end, coding_system, buffer, 2352 return encode_decode_coding_region (start, end, coding_system, buffer,
2300 CODING_ENCODE); 2353 CODING_ENCODE);
2301 } 2354 }
2355
2356 DEFUN ("query-coding-region", Fquery_coding_region, 3, 7, 0, /*
2357 Work out whether CODING-SYSTEM can losslessly encode a region.
2358
2359 START and END are the beginning and end of the region to check.
2360 CODING-SYSTEM is the coding system to try.
2361
2362 Optional argument BUFFER is the buffer to check, and defaults to the current
2363 buffer.
2364
2365 IGNORE-INVALID-SEQUENCESP, also an optional argument, says to treat XEmacs
2366 characters which have an unambiguous encoded representation, despite being
2367 undefined in what they represent, as encodable. These chiefly arise with
2368 variable-length encodings like UTF-8 and UTF-16, where an invalid sequence
2369 is passed through to XEmacs as a sequence of characters with a defined
2370 correspondence to the octets on disk, but no non-error semantics; see the
2371 `invalid-sequence-coding-system' argument to `set-language-info'.
2372
2373 They can also arise with fixed-length encodings like ISO 8859-7, where
2374 certain octets on disk have undefined values, and treating them as
2375 corresponding to the ISO 8859-1 characters with the same numerical values
2376 may lead to data that is not understood by other applications.
2377
2378 Optional argument ERRORP says to signal a `text-conversion-error' if some
2379 character in the region cannot be encoded, and defaults to nil.
2380
2381 Optional argument HIGHLIGHT says to display unencodable characters in the
2382 region using `query-coding-warning-face'. It defaults to nil.
2383
2384 This function can return multiple values; the intention is that callers use
2385 `multiple-value-bind' or the related CL multiple value functions to deal
2386 with it. The first result is `t' if the region can be encoded using
2387 CODING-SYSTEM, or `nil' if not. If the region cannot be encoded using
2388 CODING-SYSTEM, the second result is a range table describing the positions
2389 of the unencodable characters.
2390
2391 Ranges that describe characters that would be ignored were
2392 IGNORE-INVALID-SEQUENCESP non-nil map to the symbol `invalid-sequence';
2393 other ranges map to the symbol `unencodable'. If IGNORE-INVALID-SEQUENCESP
2394 is non-nil, all ranges will map to the symbol `unencodable'. See
2395 `make-range-table' for more details of range tables.
2396 */
2397 (start, end, coding_system, buffer, ignore_invalid_sequencesp,
2398 errorp, highlight))
2399 {
2400 Charbpos b, e;
2401 struct buffer *buf = decode_buffer (buffer, 1);
2402 Lisp_Object result;
2403 int flags = 0, speccount = specpdl_depth ();
2404
2405 coding_system = Fget_coding_system (coding_system);
2406
2407 get_buffer_range_char (buf, start, end, &b, &e, 0);
2408
2409 if (buf != current_buffer)
2410 {
2411 record_unwind_protect (save_current_buffer_restore, Fcurrent_buffer ());
2412 set_buffer_internal (buf);
2413 }
2414
2415 record_unwind_protect (save_excursion_restore, save_excursion_save ());
2416
2417 BUF_SET_PT (buf, b);
2418
2419 if (!NILP (ignore_invalid_sequencesp))
2420 {
2421 flags |= QUERY_METHOD_IGNORE_INVALID_SEQUENCES;
2422 }
2423
2424 if (!NILP (errorp))
2425 {
2426 flags |= QUERY_METHOD_ERRORP;
2427 }
2428
2429 if (!NILP (highlight))
2430 {
2431 flags |= QUERY_METHOD_HIGHLIGHT;
2432 }
2433
2434 result = XCODESYSMETH_OR_GIVEN (coding_system, query,
2435 (coding_system, buf, e, flags), Qunbound);
2436
2437 if (UNBOUNDP (result))
2438 {
2439 signal_error (Qtext_conversion_error,
2440 "Coding system doesn't say what it can encode",
2441 XCODING_SYSTEM_NAME (coding_system));
2442 }
2443
2444 result = (NILP (result)) ? Qt : values2 (Qnil, result);
2445
2446 return unbind_to_1 (speccount, result);
2447 }
2448
2302 2449
2303 2450
2304 /************************************************************************/ 2451 /************************************************************************/
2305 /* Chain methods */ 2452 /* Chain methods */
2306 /************************************************************************/ 2453 /************************************************************************/
3349 } 3496 }
3350 3497
3351 static void 3498 static void
3352 output_bytes_in_ascii_and_hex (const UExtbyte *src, Bytecount n) 3499 output_bytes_in_ascii_and_hex (const UExtbyte *src, Bytecount n)
3353 { 3500 {
3354 UExtbyte *ascii = alloca_array (UExtbyte, n + 1); 3501 Extbyte *ascii = alloca_array (Extbyte, n + 1);
3355 UExtbyte *hex = alloca_array (UExtbyte, 3 * n + 1); 3502 Extbyte *hex = alloca_array (Extbyte, 3 * n + 1);
3356 int i; 3503 int i;
3504 DECLARE_EISTRING (eistr_ascii);
3505 DECLARE_EISTRING (eistr_hex);
3357 3506
3358 for (i = 0; i < n; i++) 3507 for (i = 0; i < n; i++)
3359 { 3508 {
3360 UExtbyte c = src[i]; 3509 Extbyte c = src[i];
3361 if (c < 0x20) 3510 if (c < 0x20)
3362 ascii[i] = '.'; 3511 ascii[i] = '.';
3363 else 3512 else
3364 ascii[i] = c; 3513 ascii[i] = c;
3365 hex[3 * i] = hex_digit_to_char (c >> 4); 3514 hex[3 * i] = hex_digit_to_char (c >> 4);
3366 hex[3 * i + 1] = hex_digit_to_char (c & 0xF); 3515 hex[3 * i + 1] = hex_digit_to_char (c & 0xF);
3367 hex[3 * i + 2] = ' '; 3516 hex[3 * i + 2] = ' ';
3368 } 3517 }
3369 ascii[i] = '\0'; 3518 ascii[i] = '\0';
3370 hex[3 * i - 1] = '\0'; 3519 hex[3 * i - 1] = '\0';
3371 stderr_out ("%s %s", ascii, hex); 3520
3521 eicpy_ext(eistr_hex, hex, Qbinary);
3522 eicpy_ext(eistr_ascii, ascii, Qbinary);
3523
3524 stderr_out ("%s %s", eidata(eistr_ascii), eidata(eistr_hex));
3372 } 3525 }
3373 3526
3374 #endif /* DEBUG_XEMACS */ 3527 #endif /* DEBUG_XEMACS */
3375 3528
3376 /* Attempt to determine the encoding of the given text. Before calling 3529 /* Attempt to determine the encoding of the given text. Before calling
3496 "Invalid `default-buffer-file-coding-system', set to nil"); 3649 "Invalid `default-buffer-file-coding-system', set to nil");
3497 XBUFFER (Vbuffer_defaults)->buffer_file_coding_system = Qnil; 3650 XBUFFER (Vbuffer_defaults)->buffer_file_coding_system = Qnil;
3498 } 3651 }
3499 } 3652 }
3500 if (NILP (retval)) 3653 if (NILP (retval))
3501 retval = Fget_coding_system (Qraw_text); 3654 retval = Fget_coding_system (Qbinary);
3502 return retval; 3655 return retval;
3503 } 3656 }
3504 else 3657 else
3505 { 3658 {
3506 int likelihood; 3659 int likelihood;
3845 /* #### This is cheesy. What we really ought to do is buffer 3998 /* #### This is cheesy. What we really ought to do is buffer
3846 up a certain minimum amount of data so as to get a less 3999 up a certain minimum amount of data so as to get a less
3847 random result when doing subprocess detection. */ 4000 random result when doing subprocess detection. */
3848 detect_coding_type (data->st, src, n); 4001 detect_coding_type (data->st, src, n);
3849 data->actual = detected_coding_system (data->st); 4002 data->actual = detected_coding_system (data->st);
4003 /* kludge to prevent infinite recursion */
4004 if (XCODING_SYSTEM(data->actual)->methods->enumtype == undecided_coding_system)
4005 data->actual = Fget_coding_system (Qbinary);
3850 } 4006 }
3851 } 4007 }
3852 /* We need to set the detected coding system if we actually have 4008 /* We need to set the detected coding system if we actually have
3853 such a coding system but didn't before. That is the case 4009 such a coding system but didn't before. That is the case
3854 either when we just detected it in the previous code or when 4010 either when we just detected it in the previous code or when
3892 4048
3893 if (str->direction == CODING_ENCODE) 4049 if (str->direction == CODING_ENCODE)
3894 return str->codesys; 4050 return str->codesys;
3895 4051
3896 if (!data->c.initted) 4052 if (!data->c.initted)
3897 return Fget_coding_system (Qundecided); 4053 return str->codesys;
3898 4054
3899 ret = coding_stream_canonicalize_after_coding 4055 ret = coding_stream_canonicalize_after_coding
3900 (XLSTREAM (data->c.lstreams[0])); 4056 (XLSTREAM (data->c.lstreams[0]));
3901 if (NILP (ret)) 4057 if (NILP (ret))
3902 ret = Fget_coding_system (Qundecided); 4058 ret = str->codesys;
3903 if (XCODING_SYSTEM_EOL_TYPE (ret) != EOL_AUTODETECT) 4059 if (XCODING_SYSTEM_EOL_TYPE (ret) != EOL_AUTODETECT)
3904 return ret; 4060 return ret;
3905 eolret = coding_stream_canonicalize_after_coding 4061 eolret = coding_stream_canonicalize_after_coding
3906 (XLSTREAM (data->c.lstreams[1])); 4062 (XLSTREAM (data->c.lstreams[1]));
3907 if (!EQ (XCODING_SYSTEM_TYPE (eolret), Qconvert_eol)) 4063 if (!EQ (XCODING_SYSTEM_TYPE (eolret), Qconvert_eol))
4359 INIT_LISP_OBJECT (coding_system); 4515 INIT_LISP_OBJECT (coding_system);
4360 4516
4361 DEFSUBR (Fvalid_coding_system_type_p); 4517 DEFSUBR (Fvalid_coding_system_type_p);
4362 DEFSUBR (Fcoding_system_type_list); 4518 DEFSUBR (Fcoding_system_type_list);
4363 DEFSUBR (Fcoding_system_p); 4519 DEFSUBR (Fcoding_system_p);
4520 DEFSUBR (Fautoload_coding_system);
4364 DEFSUBR (Ffind_coding_system); 4521 DEFSUBR (Ffind_coding_system);
4365 DEFSUBR (Fget_coding_system); 4522 DEFSUBR (Fget_coding_system);
4366 DEFSUBR (Fcoding_system_list); 4523 DEFSUBR (Fcoding_system_list);
4367 DEFSUBR (Fcoding_system_name); 4524 DEFSUBR (Fcoding_system_name);
4368 DEFSUBR (Fmake_coding_system); 4525 DEFSUBR (Fmake_coding_system_internal);
4369 DEFSUBR (Fcopy_coding_system); 4526 DEFSUBR (Fcopy_coding_system);
4370 DEFSUBR (Fcoding_system_canonical_name_p); 4527 DEFSUBR (Fcoding_system_canonical_name_p);
4371 DEFSUBR (Fcoding_system_alias_p); 4528 DEFSUBR (Fcoding_system_alias_p);
4372 DEFSUBR (Fcoding_system_aliasee); 4529 DEFSUBR (Fcoding_system_aliasee);
4373 DEFSUBR (Fdefine_coding_system_alias); 4530 DEFSUBR (Fdefine_coding_system_alias);
4386 DEFSUBR (Fcoding_category_system); 4543 DEFSUBR (Fcoding_category_system);
4387 4544
4388 DEFSUBR (Fdetect_coding_region); 4545 DEFSUBR (Fdetect_coding_region);
4389 DEFSUBR (Fdecode_coding_region); 4546 DEFSUBR (Fdecode_coding_region);
4390 DEFSUBR (Fencode_coding_region); 4547 DEFSUBR (Fencode_coding_region);
4548 DEFSUBR (Fquery_coding_region);
4391 DEFSYMBOL_MULTIWORD_PREDICATE (Qcoding_systemp); 4549 DEFSYMBOL_MULTIWORD_PREDICATE (Qcoding_systemp);
4392 DEFSYMBOL (Qno_conversion); 4550 DEFSYMBOL (Qno_conversion);
4393 DEFSYMBOL (Qconvert_eol); 4551 DEFSYMBOL (Qconvert_eol);
4394 DEFSYMBOL (Qconvert_eol_autodetect); 4552 DEFSYMBOL (Qconvert_eol_autodetect);
4395 DEFSYMBOL (Qconvert_eol_lf); 4553 DEFSYMBOL (Qconvert_eol_lf);
4430 DEFSYMBOL (Qdo_eol); 4588 DEFSYMBOL (Qdo_eol);
4431 DEFSYMBOL (Qdo_coding); 4589 DEFSYMBOL (Qdo_coding);
4432 4590
4433 DEFSYMBOL (Qcanonicalize_after_coding); 4591 DEFSYMBOL (Qcanonicalize_after_coding);
4434 4592
4593 DEFSYMBOL (Qposix_charset_to_coding_system_hash);
4594
4435 DEFSYMBOL (Qescape_quoted); 4595 DEFSYMBOL (Qescape_quoted);
4596
4597 DEFSYMBOL (Qquery_coding_warning_face);
4598 DEFSYMBOL (Qaliases);
4599 DEFSYMBOL (Qcharset_skip_chars_string);
4436 4600
4437 #ifdef HAVE_ZLIB 4601 #ifdef HAVE_ZLIB
4438 DEFSYMBOL (Qgzip); 4602 DEFSYMBOL (Qgzip);
4439 #endif 4603 #endif
4440 4604
4596 4760
4597 Fprovide (intern ("unix-default-eol-detection")); 4761 Fprovide (intern ("unix-default-eol-detection"));
4598 #endif 4762 #endif
4599 4763
4600 DEFVAR_LISP ("keyboard-coding-system", &Vkeyboard_coding_system /* 4764 DEFVAR_LISP ("keyboard-coding-system", &Vkeyboard_coding_system /*
4601 Coding system used for TTY keyboard input. 4765 Default coding system used for TTY and X11 keyboard input.
4602 Not used under a windowing system. 4766 Under X11, used only to interpet the character for a key event when that
4767 event has a KeySym of NoSymbol but does have an associated string keysym,
4768 something that's seen with input methods.
4769
4770 If you need to set these things to different coding systems, call the
4771 function `set-console-tty-coding-system' for the TTY and use this variable
4772 for X11.
4603 */ ); 4773 */ );
4604 Vkeyboard_coding_system = Qnil; 4774 Vkeyboard_coding_system = Qnil;
4605 4775
4606 DEFVAR_LISP ("terminal-coding-system", &Vterminal_coding_system /* 4776 DEFVAR_LISP ("terminal-coding-system", &Vterminal_coding_system /*
4607 Coding system used for TTY display output. 4777 Coding system used for TTY display output.
4649 If non-nil, display debug information about detection operations in progress. 4819 If non-nil, display debug information about detection operations in progress.
4650 Information is displayed on stderr. 4820 Information is displayed on stderr.
4651 */ ); 4821 */ );
4652 Vdebug_coding_detection = Qnil; 4822 Vdebug_coding_detection = Qnil;
4653 #endif 4823 #endif
4824
4825 #ifdef MULE
4826 Vdefault_query_coding_region_chartab_cache
4827 = make_lisp_hash_table (25, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
4828 staticpro (&Vdefault_query_coding_region_chartab_cache);
4829 #endif
4654 } 4830 }
4655 4831
4656 /* #### reformat this for consistent appearance? */ 4832 /* #### reformat this for consistent appearance? */
4657 4833
4658 void 4834 void
4659 complex_vars_of_file_coding (void) 4835 complex_vars_of_file_coding (void)
4660 { 4836 {
4661 Fmake_coding_system 4837 Fmake_coding_system_internal
4662 (Qconvert_eol_cr, Qconvert_eol, 4838 (Qconvert_eol_cr, Qconvert_eol,
4663 build_msg_string ("Convert CR to LF"), 4839 build_msg_string ("Convert CR to LF"),
4664 nconc2 (list6 (Qdocumentation, 4840 nconc2 (list6 (Qdocumentation,
4665 build_msg_string ( 4841 build_msg_string (
4666 "Converts CR (used to mark the end of a line on Macintosh systems) to LF\n" 4842 "Converts CR (used to mark the end of a line on Macintosh systems) to LF\n"
4668 Qmnemonic, build_string ("CR->LF"), 4844 Qmnemonic, build_string ("CR->LF"),
4669 Qsubtype, Qcr), 4845 Qsubtype, Qcr),
4670 /* VERY IMPORTANT! Tell make-coding-system not to generate 4846 /* VERY IMPORTANT! Tell make-coding-system not to generate
4671 subsidiaries -- it needs the coding systems we're creating 4847 subsidiaries -- it needs the coding systems we're creating
4672 to do so! */ 4848 to do so! */
4673 list2 (Qeol_type, Qlf))); 4849 list4 (Qeol_type, Qlf,
4674 4850 Qsafe_charsets, Qt)));
4675 Fmake_coding_system 4851
4852 Fmake_coding_system_internal
4676 (Qconvert_eol_lf, Qconvert_eol, 4853 (Qconvert_eol_lf, Qconvert_eol,
4677 build_msg_string ("Convert LF to LF (do nothing)"), 4854 build_msg_string ("Convert LF to LF (do nothing)"),
4678 nconc2 (list6 (Qdocumentation, 4855 nconc2 (list6 (Qdocumentation,
4679 build_msg_string ( 4856 build_msg_string (
4680 "Do nothing."), 4857 "Do nothing."),
4681 Qmnemonic, build_string ("LF->LF"), 4858 Qmnemonic, build_string ("LF->LF"),
4682 Qsubtype, Qlf), 4859 Qsubtype, Qlf),
4683 /* VERY IMPORTANT! Tell make-coding-system not to generate 4860 /* VERY IMPORTANT! Tell make-coding-system not to generate
4684 subsidiaries -- it needs the coding systems we're creating 4861 subsidiaries -- it needs the coding systems we're creating
4685 to do so! */ 4862 to do so! */
4686 list2 (Qeol_type, Qlf))); 4863 list4 (Qeol_type, Qlf,
4687 4864 Qsafe_charsets, Qt)));
4688 Fmake_coding_system 4865
4866 Fmake_coding_system_internal
4689 (Qconvert_eol_crlf, Qconvert_eol, 4867 (Qconvert_eol_crlf, Qconvert_eol,
4690 build_msg_string ("Convert CRLF to LF"), 4868 build_msg_string ("Convert CRLF to LF"),
4691 nconc2 (list6 (Qdocumentation, 4869 nconc2 (list6 (Qdocumentation,
4692 build_msg_string ( 4870 build_msg_string (
4693 "Converts CR+LF (used to mark the end of a line on Macintosh systems) to LF\n" 4871 "Converts CR+LF (used to mark the end of a line on Macintosh systems) to LF\n"
4694 "(used internally and under Unix to mark the end of a line)."), 4872 "(used internally and under Unix to mark the end of a line)."),
4695 Qmnemonic, build_string ("CRLF->LF"), 4873 Qmnemonic, build_string ("CRLF->LF"),
4696 Qsubtype, Qcrlf), 4874 Qsubtype, Qcrlf),
4875
4697 /* VERY IMPORTANT! Tell make-coding-system not to generate 4876 /* VERY IMPORTANT! Tell make-coding-system not to generate
4698 subsidiaries -- it needs the coding systems we're creating 4877 subsidiaries -- it needs the coding systems we're creating
4699 to do so! */ 4878 to do so! */
4700 list2 (Qeol_type, Qlf))); 4879 list4 (Qeol_type, Qlf,
4701 4880 Qsafe_charsets, Qt)));
4702 Fmake_coding_system 4881
4882 Fmake_coding_system_internal
4703 (Qconvert_eol_autodetect, Qconvert_eol, 4883 (Qconvert_eol_autodetect, Qconvert_eol,
4704 build_msg_string ("Autodetect EOL type"), 4884 build_msg_string ("Autodetect EOL type"),
4705 nconc2 (list6 (Qdocumentation, 4885 nconc2 (list6 (Qdocumentation,
4706 build_msg_string ( 4886 build_msg_string (
4707 "Autodetect the end-of-line type."), 4887 "Autodetect the end-of-line type."),
4708 Qmnemonic, build_string ("Auto-EOL"), 4888 Qmnemonic, build_string ("Auto-EOL"),
4709 Qsubtype, Qnil), 4889 Qsubtype, Qnil),
4710 /* VERY IMPORTANT! Tell make-coding-system not to generate 4890 /* VERY IMPORTANT! Tell make-coding-system not to generate
4711 subsidiaries -- it needs the coding systems we're creating 4891 subsidiaries -- it needs the coding systems we're creating
4712 to do so! */ 4892 to do so! */
4713 list2 (Qeol_type, Qlf))); 4893 list4 (Qeol_type, Qlf,
4714 4894 Qsafe_charsets, Qt)));
4715 Fmake_coding_system 4895
4896 Fmake_coding_system_internal
4716 (Qundecided, Qundecided, 4897 (Qundecided, Qundecided,
4717 build_msg_string ("Undecided (auto-detect)"), 4898 build_msg_string ("Undecided (auto-detect)"),
4718 nconc2 (list4 (Qdocumentation, 4899 nconc2 (list4 (Qdocumentation,
4719 build_msg_string 4900 build_msg_string
4720 ("Automatically detects the correct encoding."), 4901 ("Automatically detects the correct encoding."),
4723 /* We do EOL detection ourselves so we don't need to be 4904 /* We do EOL detection ourselves so we don't need to be
4724 wrapped in an EOL detector. (It doesn't actually hurt, 4905 wrapped in an EOL detector. (It doesn't actually hurt,
4725 though, I don't think.) */ 4906 though, I don't think.) */
4726 Qeol_type, Qlf))); 4907 Qeol_type, Qlf)));
4727 4908
4728 Fmake_coding_system 4909 Fmake_coding_system_internal
4729 (intern ("undecided-dos"), Qundecided, 4910 (intern ("undecided-dos"), Qundecided,
4730 build_msg_string ("Undecided (auto-detect) (CRLF)"), 4911 build_msg_string ("Undecided (auto-detect) (CRLF)"),
4731 nconc2 (list4 (Qdocumentation, 4912 nconc2 (list4 (Qdocumentation,
4732 build_msg_string 4913 build_msg_string
4733 ("Automatically detects the correct encoding; EOL type of CRLF forced."), 4914 ("Automatically detects the correct encoding; EOL type of CRLF forced."),
4734 Qmnemonic, build_string ("Auto")), 4915 Qmnemonic, build_string ("Auto")),
4735 list4 (Qdo_coding, Qt, 4916 list4 (Qdo_coding, Qt,
4736 Qeol_type, Qcrlf))); 4917 Qeol_type, Qcrlf)));
4737 4918
4738 Fmake_coding_system 4919 Fmake_coding_system_internal
4739 (intern ("undecided-unix"), Qundecided, 4920 (intern ("undecided-unix"), Qundecided,
4740 build_msg_string ("Undecided (auto-detect) (LF)"), 4921 build_msg_string ("Undecided (auto-detect) (LF)"),
4741 nconc2 (list4 (Qdocumentation, 4922 nconc2 (list4 (Qdocumentation,
4742 build_msg_string 4923 build_msg_string
4743 ("Automatically detects the correct encoding; EOL type of LF forced."), 4924 ("Automatically detects the correct encoding; EOL type of LF forced."),
4744 Qmnemonic, build_string ("Auto")), 4925 Qmnemonic, build_string ("Auto")),
4745 list4 (Qdo_coding, Qt, 4926 list4 (Qdo_coding, Qt,
4746 Qeol_type, Qlf))); 4927 Qeol_type, Qlf)));
4747 4928
4748 Fmake_coding_system 4929 Fmake_coding_system_internal
4749 (intern ("undecided-mac"), Qundecided, 4930 (intern ("undecided-mac"), Qundecided,
4750 build_msg_string ("Undecided (auto-detect) (CR)"), 4931 build_msg_string ("Undecided (auto-detect) (CR)"),
4751 nconc2 (list4 (Qdocumentation, 4932 nconc2 (list4 (Qdocumentation,
4752 build_msg_string 4933 build_msg_string
4753 ("Automatically detects the correct encoding; EOL type of CR forced."), 4934 ("Automatically detects the correct encoding; EOL type of CR forced."),
4754 Qmnemonic, build_string ("Auto")), 4935 Qmnemonic, build_string ("Auto")),
4755 list4 (Qdo_coding, Qt, 4936 list4 (Qdo_coding, Qt,
4756 Qeol_type, Qcr))); 4937 Qeol_type, Qcr)));
4757 4938
4758 /* Need to create this here or we're really screwed. */ 4939 /* Need to create this here or we're really screwed. */
4759 Fmake_coding_system 4940 Fmake_coding_system_internal
4760 (Qraw_text, Qno_conversion, 4941 (Qraw_text, Qno_conversion,
4761 build_msg_string ("Raw Text"), 4942 build_msg_string ("Raw Text"),
4762 list4 (Qdocumentation, 4943 nconc2 (list4 (Qdocumentation,
4763 build_msg_string ("Raw text converts only line-break codes, and acts otherwise like `binary'."), 4944 build_msg_string ("Raw text converts only line-break "
4764 Qmnemonic, build_string ("Raw"))); 4945 "codes, and acts otherwise like "
4765 4946 "`binary'."),
4766 Fmake_coding_system 4947 Qmnemonic, build_string ("Raw")),
4948 #ifdef MULE
4949 list2 (Qsafe_charsets, list3 (Vcharset_ascii, Vcharset_control_1,
4950 Vcharset_latin_iso8859_1))));
4951
4952 #else
4953 Qnil));
4954 #endif
4955
4956 Fmake_coding_system_internal
4767 (Qbinary, Qno_conversion, 4957 (Qbinary, Qno_conversion,
4768 build_msg_string ("Binary"), 4958 build_msg_string ("Binary"),
4769 list6 (Qdocumentation, 4959 nconc2 (list6 (Qdocumentation,
4770 build_msg_string ( 4960 build_msg_string (
4771 "This coding system is as close as it comes to doing no conversion.\n" 4961 "This coding system is as close as it comes to doing no conversion.\n"
4772 "On input, each byte is converted directly into the character\n" 4962 "On input, each byte is converted directly into the character\n"
4773 "with the corresponding code -- i.e. from the `ascii', `control-1',\n" 4963 "with the corresponding code -- i.e. from the `ascii', `control-1',\n"
4774 "or `latin-1' character sets. On output, these characters are\n" 4964 "or `latin-1' character sets. On output, these characters are\n"
4775 "converted back to the corresponding bytes, and other characters\n" 4965 "converted back to the corresponding bytes, and other characters\n"
4776 "are converted to the default character, i.e. `~'."), 4966 "are converted to the default character, i.e. `~'."),
4777 Qeol_type, Qlf, 4967 Qeol_type, Qlf,
4778 Qmnemonic, build_string ("Binary"))); 4968 Qmnemonic, build_string ("Binary")),
4969 #ifdef MULE
4970 list2 (Qsafe_charsets, list3 (Vcharset_ascii, Vcharset_control_1,
4971 Vcharset_latin_iso8859_1))));
4972
4973 #else
4974 Qnil));
4975 #endif
4779 4976
4780 /* Formerly aliased to raw-text! Completely bogus and not even the same 4977 /* Formerly aliased to raw-text! Completely bogus and not even the same
4781 as FSF Emacs. */ 4978 as FSF Emacs. */
4782 Fdefine_coding_system_alias (Qno_conversion, Qbinary); 4979 Fdefine_coding_system_alias (Qno_conversion, Qbinary);
4783 Fdefine_coding_system_alias (intern ("no-conversion-unix"), 4980 Fdefine_coding_system_alias (intern ("no-conversion-unix"),