Mercurial > hg > xemacs-beta
comparison src/file-coding.c @ 5682:dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
src/ChangeLog addition:
2012-09-07 Aidan Kehoe <kehoea@parhasard.net>
* file-coding.c:
* file-coding.c (snarf_coding_system):
Take a new parameter, FIND_CODING_SYSTEM_P, which indicates that
find_coding_system() should be called.
* file-coding.c (look_for_coding_system_magic_cookie):
* file-coding.c (determine_real_coding_system):
* file-coding.c (undecided_convert):
Use this parameter.
* file-coding.c (Ffind_coding_system_magic_cookie_in_file):
New, moved from files.el, so we can use
look_for_coding_system_magic_cookie's implementation.
* file-coding.c (syms_of_file_coding):
Make Ffind_coding_system_magic_cookie_in_file available.
lisp/ChangeLog addition:
2012-09-07 Aidan Kehoe <kehoea@parhasard.net>
* files.el:
* files.el (find-coding-system-magic-cookie-in-file):
Removed. Move this to C, so we can use
look_for_coding_system_magic_cookie().
author | Aidan Kehoe <kehoea@parhasard.net> |
---|---|
date | Fri, 07 Sep 2012 22:06:01 +0100 |
parents | 56144c8593a8 |
children | 7a538e1a4676 |
comparison
equal
deleted
inserted
replaced
5681:4af5a3435c94 | 5682:dae33b5feffe |
---|---|
77 #include "opaque.h" | 77 #include "opaque.h" |
78 #include "file-coding.h" | 78 #include "file-coding.h" |
79 #include "extents.h" | 79 #include "extents.h" |
80 #include "rangetab.h" | 80 #include "rangetab.h" |
81 #include "chartab.h" | 81 #include "chartab.h" |
82 #include "sysfile.h" | |
82 | 83 |
83 #ifdef HAVE_ZLIB | 84 #ifdef HAVE_ZLIB |
84 #include "zlib.h" | 85 #include "zlib.h" |
85 #endif | 86 #endif |
86 | 87 |
3672 | 3673 |
3673 /* Look for a coding system in the string (skipping over leading | 3674 /* Look for a coding system in the string (skipping over leading |
3674 blanks). If found, return it, otherwise nil. */ | 3675 blanks). If found, return it, otherwise nil. */ |
3675 | 3676 |
3676 static Lisp_Object | 3677 static Lisp_Object |
3677 snarf_coding_system (const UExtbyte *p, Bytecount len) | 3678 snarf_coding_system (const UExtbyte *p, Bytecount len, |
3679 Boolint find_coding_system_p) | |
3678 { | 3680 { |
3679 Bytecount n; | 3681 Bytecount n; |
3680 UExtbyte *name; | 3682 UExtbyte *name; |
3681 | 3683 |
3682 while (*p == ' ' || *p == '\t') p++, len--; | 3684 while (*p == ' ' || *p == '\t') p++, len--; |
3696 if (n > 0) | 3698 if (n > 0) |
3697 { | 3699 { |
3698 name[n] = '\0'; | 3700 name[n] = '\0'; |
3699 /* This call to intern_istring() is OK because we already verified that | 3701 /* This call to intern_istring() is OK because we already verified that |
3700 there are only ASCII characters in the string */ | 3702 there are only ASCII characters in the string */ |
3701 return find_coding_system_for_text_file (intern_istring ((Ibyte *) name), 0); | 3703 if (find_coding_system_p) |
3704 { | |
3705 return | |
3706 find_coding_system_for_text_file (intern_istring ((Ibyte *) name), | |
3707 0); | |
3708 } | |
3709 else | |
3710 { | |
3711 return build_ascstring ((const Ascbyte *) name); | |
3712 } | |
3702 } | 3713 } |
3703 | 3714 |
3704 return Qnil; | 3715 return Qnil; |
3705 } | 3716 } |
3706 | 3717 |
3723 free_detection_state (st); | 3734 free_detection_state (st); |
3724 free_opaque_ptr (opaque); | 3735 free_opaque_ptr (opaque); |
3725 return Qnil; | 3736 return Qnil; |
3726 } | 3737 } |
3727 | 3738 |
3728 /* #### This duplicates code in `find-coding-system-magic-cookie-in-file' | |
3729 in files.el. Look into combining them. */ | |
3730 | |
3731 static Lisp_Object | 3739 static Lisp_Object |
3732 look_for_coding_system_magic_cookie (const UExtbyte *data, Bytecount len) | 3740 look_for_coding_system_magic_cookie (const UExtbyte *data, Bytecount len, |
3741 Boolint find_coding_system_p) | |
3733 { | 3742 { |
3734 const UExtbyte *p; | 3743 const UExtbyte *p; |
3735 const UExtbyte *scan_end; | 3744 const UExtbyte *scan_end; |
3736 Bytecount cookie_len; | 3745 Bytecount cookie_len; |
3737 | 3746 |
3765 || (*(p-1) == ' ' || | 3774 || (*(p-1) == ' ' || |
3766 *(p-1) == '\t' || | 3775 *(p-1) == '\t' || |
3767 *(p-1) == ';'))) | 3776 *(p-1) == ';'))) |
3768 { | 3777 { |
3769 p += LENGTH ("coding:"); | 3778 p += LENGTH ("coding:"); |
3770 return snarf_coding_system (p, suffix - p); | 3779 return snarf_coding_system (p, suffix - p, |
3780 find_coding_system_p); | |
3771 break; | 3781 break; |
3772 } | 3782 } |
3773 break; | 3783 break; |
3774 } | 3784 } |
3775 break; | 3785 break; |
3790 | 3800 |
3791 p += cookie_len; | 3801 p += cookie_len; |
3792 suffix = p; | 3802 suffix = p; |
3793 while (suffix < scan_end && !isspace (*suffix)) | 3803 while (suffix < scan_end && !isspace (*suffix)) |
3794 suffix++; | 3804 suffix++; |
3795 return snarf_coding_system (p, suffix - p); | 3805 return snarf_coding_system (p, suffix - p, find_coding_system_p); |
3796 } | 3806 } |
3797 } | 3807 } |
3798 | 3808 |
3799 return Qnil; | 3809 return Qnil; |
3800 } | 3810 } |
3805 struct detection_state *st = allocate_detection_state (); | 3815 struct detection_state *st = allocate_detection_state (); |
3806 int depth = record_unwind_protect (unwind_free_detection_state, | 3816 int depth = record_unwind_protect (unwind_free_detection_state, |
3807 make_opaque_ptr (st)); | 3817 make_opaque_ptr (st)); |
3808 UExtbyte buf[4096]; | 3818 UExtbyte buf[4096]; |
3809 Bytecount nread = Lstream_read (stream, buf, sizeof (buf)); | 3819 Bytecount nread = Lstream_read (stream, buf, sizeof (buf)); |
3810 Lisp_Object coding_system = look_for_coding_system_magic_cookie (buf, nread); | 3820 Lisp_Object coding_system |
3821 = look_for_coding_system_magic_cookie (buf, nread, 1); | |
3811 | 3822 |
3812 if (NILP (coding_system)) | 3823 if (NILP (coding_system)) |
3813 { | 3824 { |
3814 while (1) | 3825 while (1) |
3815 { | 3826 { |
3969 data->st = allocate_detection_state (); | 3980 data->st = allocate_detection_state (); |
3970 if (first_time) | 3981 if (first_time) |
3971 /* #### This is cheesy. What we really ought to do is buffer | 3982 /* #### This is cheesy. What we really ought to do is buffer |
3972 up a certain minimum amount of data to get a better result. | 3983 up a certain minimum amount of data to get a better result. |
3973 */ | 3984 */ |
3974 data->actual = look_for_coding_system_magic_cookie (src, n); | 3985 data->actual = look_for_coding_system_magic_cookie (src, n, 1); |
3975 if (NILP (data->actual)) | 3986 if (NILP (data->actual)) |
3976 { | 3987 { |
3977 /* #### This is cheesy. What we really ought to do is buffer | 3988 /* #### This is cheesy. What we really ought to do is buffer |
3978 up a certain minimum amount of data so as to get a less | 3989 up a certain minimum amount of data so as to get a less |
3979 random result when doing subprocess detection. */ | 3990 random result when doing subprocess detection. */ |
4214 val = detect_coding_stream (lb_instream); | 4225 val = detect_coding_stream (lb_instream); |
4215 Lstream_delete (XLSTREAM (lb_instream)); | 4226 Lstream_delete (XLSTREAM (lb_instream)); |
4216 return val; | 4227 return val; |
4217 } | 4228 } |
4218 | 4229 |
4230 DEFUN ("find-coding-system-magic-cookie-in-file", | |
4231 Ffind_coding_system_magic_cookie_in_file, 1, 1, 0, /* | |
4232 Look for the coding-system magic cookie in FILENAME. | |
4233 The coding-system magic cookie is either the local variable specification | |
4234 -*- ... coding: ... -*- on the first line, or the exact string | |
4235 \";;;###coding system: \" somewhere within the first 3000 characters | |
4236 of the file. If found, the coding system name (as a string) is returned; | |
4237 otherwise nil is returned. Note that it is extremely unlikely that | |
4238 either such string would occur coincidentally as the result of encoding | |
4239 some characters in a non-ASCII charset, and that the spaces make it | |
4240 even less likely since the space character is not a valid octet in any | |
4241 ISO 2022 encoding of most non-ASCII charsets. | |
4242 */ | |
4243 (filename)) | |
4244 { | |
4245 Lisp_Object lstream; | |
4246 UExtbyte buf[4096]; | |
4247 Bytecount nread; | |
4248 int fd = -1; | |
4249 struct stat st; | |
4250 | |
4251 filename = Fexpand_file_name (filename, Qnil); | |
4252 | |
4253 if (qxe_stat (XSTRING_DATA (filename), &st) < 0) | |
4254 { | |
4255 badopen: | |
4256 report_file_error ("Opening input file", filename); | |
4257 } | |
4258 | |
4259 if (fd < 0) | |
4260 { | |
4261 if ((fd = qxe_interruptible_open (XSTRING_DATA (filename), | |
4262 O_RDONLY | OPEN_BINARY, 0)) < 0) | |
4263 goto badopen; | |
4264 } | |
4265 | |
4266 lstream = make_filedesc_input_stream (fd, 0, -1, 0); | |
4267 Lstream_set_buffering (XLSTREAM (lstream), LSTREAM_UNBUFFERED, 0); | |
4268 nread = Lstream_read (XLSTREAM (lstream), buf, sizeof (buf)); | |
4269 Lstream_delete (XLSTREAM (lstream)); | |
4270 retry_close (fd); | |
4271 | |
4272 return look_for_coding_system_magic_cookie (buf, nread, 0); | |
4273 } | |
4219 | 4274 |
4220 | 4275 |
4221 #ifdef DEBUG_XEMACS | 4276 #ifdef DEBUG_XEMACS |
4222 | 4277 |
4223 /************************************************************************/ | 4278 /************************************************************************/ |
4522 | 4577 |
4523 DEFSUBR (Fdetect_coding_region); | 4578 DEFSUBR (Fdetect_coding_region); |
4524 DEFSUBR (Fdecode_coding_region); | 4579 DEFSUBR (Fdecode_coding_region); |
4525 DEFSUBR (Fencode_coding_region); | 4580 DEFSUBR (Fencode_coding_region); |
4526 DEFSUBR (Fquery_coding_region); | 4581 DEFSUBR (Fquery_coding_region); |
4582 DEFSUBR (Ffind_coding_system_magic_cookie_in_file); | |
4527 DEFSYMBOL_MULTIWORD_PREDICATE (Qcoding_systemp); | 4583 DEFSYMBOL_MULTIWORD_PREDICATE (Qcoding_systemp); |
4528 DEFSYMBOL (Qno_conversion); | 4584 DEFSYMBOL (Qno_conversion); |
4529 DEFSYMBOL (Qconvert_eol); | 4585 DEFSYMBOL (Qconvert_eol); |
4530 DEFSYMBOL (Qconvert_eol_autodetect); | 4586 DEFSYMBOL (Qconvert_eol_autodetect); |
4531 DEFSYMBOL (Qconvert_eol_lf); | 4587 DEFSYMBOL (Qconvert_eol_lf); |