comparison src/file-coding.c @ 5682:dae33b5feffe

Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie() src/ChangeLog addition: 2012-09-07 Aidan Kehoe <kehoea@parhasard.net> * file-coding.c: * file-coding.c (snarf_coding_system): Take a new parameter, FIND_CODING_SYSTEM_P, which indicates that find_coding_system() should be called. * file-coding.c (look_for_coding_system_magic_cookie): * file-coding.c (determine_real_coding_system): * file-coding.c (undecided_convert): Use this parameter. * file-coding.c (Ffind_coding_system_magic_cookie_in_file): New, moved from files.el, so we can use look_for_coding_system_magic_cookie's implementation. * file-coding.c (syms_of_file_coding): Make Ffind_coding_system_magic_cookie_in_file available. lisp/ChangeLog addition: 2012-09-07 Aidan Kehoe <kehoea@parhasard.net> * files.el: * files.el (find-coding-system-magic-cookie-in-file): Removed. Move this to C, so we can use look_for_coding_system_magic_cookie().
author Aidan Kehoe <kehoea@parhasard.net>
date Fri, 07 Sep 2012 22:06:01 +0100
parents 56144c8593a8
children 7a538e1a4676
comparison
equal deleted inserted replaced
5681:4af5a3435c94 5682:dae33b5feffe
77 #include "opaque.h" 77 #include "opaque.h"
78 #include "file-coding.h" 78 #include "file-coding.h"
79 #include "extents.h" 79 #include "extents.h"
80 #include "rangetab.h" 80 #include "rangetab.h"
81 #include "chartab.h" 81 #include "chartab.h"
82 #include "sysfile.h"
82 83
83 #ifdef HAVE_ZLIB 84 #ifdef HAVE_ZLIB
84 #include "zlib.h" 85 #include "zlib.h"
85 #endif 86 #endif
86 87
3672 3673
3673 /* Look for a coding system in the string (skipping over leading 3674 /* Look for a coding system in the string (skipping over leading
3674 blanks). If found, return it, otherwise nil. */ 3675 blanks). If found, return it, otherwise nil. */
3675 3676
3676 static Lisp_Object 3677 static Lisp_Object
3677 snarf_coding_system (const UExtbyte *p, Bytecount len) 3678 snarf_coding_system (const UExtbyte *p, Bytecount len,
3679 Boolint find_coding_system_p)
3678 { 3680 {
3679 Bytecount n; 3681 Bytecount n;
3680 UExtbyte *name; 3682 UExtbyte *name;
3681 3683
3682 while (*p == ' ' || *p == '\t') p++, len--; 3684 while (*p == ' ' || *p == '\t') p++, len--;
3696 if (n > 0) 3698 if (n > 0)
3697 { 3699 {
3698 name[n] = '\0'; 3700 name[n] = '\0';
3699 /* This call to intern_istring() is OK because we already verified that 3701 /* This call to intern_istring() is OK because we already verified that
3700 there are only ASCII characters in the string */ 3702 there are only ASCII characters in the string */
3701 return find_coding_system_for_text_file (intern_istring ((Ibyte *) name), 0); 3703 if (find_coding_system_p)
3704 {
3705 return
3706 find_coding_system_for_text_file (intern_istring ((Ibyte *) name),
3707 0);
3708 }
3709 else
3710 {
3711 return build_ascstring ((const Ascbyte *) name);
3712 }
3702 } 3713 }
3703 3714
3704 return Qnil; 3715 return Qnil;
3705 } 3716 }
3706 3717
3723 free_detection_state (st); 3734 free_detection_state (st);
3724 free_opaque_ptr (opaque); 3735 free_opaque_ptr (opaque);
3725 return Qnil; 3736 return Qnil;
3726 } 3737 }
3727 3738
3728 /* #### This duplicates code in `find-coding-system-magic-cookie-in-file'
3729 in files.el. Look into combining them. */
3730
3731 static Lisp_Object 3739 static Lisp_Object
3732 look_for_coding_system_magic_cookie (const UExtbyte *data, Bytecount len) 3740 look_for_coding_system_magic_cookie (const UExtbyte *data, Bytecount len,
3741 Boolint find_coding_system_p)
3733 { 3742 {
3734 const UExtbyte *p; 3743 const UExtbyte *p;
3735 const UExtbyte *scan_end; 3744 const UExtbyte *scan_end;
3736 Bytecount cookie_len; 3745 Bytecount cookie_len;
3737 3746
3765 || (*(p-1) == ' ' || 3774 || (*(p-1) == ' ' ||
3766 *(p-1) == '\t' || 3775 *(p-1) == '\t' ||
3767 *(p-1) == ';'))) 3776 *(p-1) == ';')))
3768 { 3777 {
3769 p += LENGTH ("coding:"); 3778 p += LENGTH ("coding:");
3770 return snarf_coding_system (p, suffix - p); 3779 return snarf_coding_system (p, suffix - p,
3780 find_coding_system_p);
3771 break; 3781 break;
3772 } 3782 }
3773 break; 3783 break;
3774 } 3784 }
3775 break; 3785 break;
3790 3800
3791 p += cookie_len; 3801 p += cookie_len;
3792 suffix = p; 3802 suffix = p;
3793 while (suffix < scan_end && !isspace (*suffix)) 3803 while (suffix < scan_end && !isspace (*suffix))
3794 suffix++; 3804 suffix++;
3795 return snarf_coding_system (p, suffix - p); 3805 return snarf_coding_system (p, suffix - p, find_coding_system_p);
3796 } 3806 }
3797 } 3807 }
3798 3808
3799 return Qnil; 3809 return Qnil;
3800 } 3810 }
3805 struct detection_state *st = allocate_detection_state (); 3815 struct detection_state *st = allocate_detection_state ();
3806 int depth = record_unwind_protect (unwind_free_detection_state, 3816 int depth = record_unwind_protect (unwind_free_detection_state,
3807 make_opaque_ptr (st)); 3817 make_opaque_ptr (st));
3808 UExtbyte buf[4096]; 3818 UExtbyte buf[4096];
3809 Bytecount nread = Lstream_read (stream, buf, sizeof (buf)); 3819 Bytecount nread = Lstream_read (stream, buf, sizeof (buf));
3810 Lisp_Object coding_system = look_for_coding_system_magic_cookie (buf, nread); 3820 Lisp_Object coding_system
3821 = look_for_coding_system_magic_cookie (buf, nread, 1);
3811 3822
3812 if (NILP (coding_system)) 3823 if (NILP (coding_system))
3813 { 3824 {
3814 while (1) 3825 while (1)
3815 { 3826 {
3969 data->st = allocate_detection_state (); 3980 data->st = allocate_detection_state ();
3970 if (first_time) 3981 if (first_time)
3971 /* #### This is cheesy. What we really ought to do is buffer 3982 /* #### This is cheesy. What we really ought to do is buffer
3972 up a certain minimum amount of data to get a better result. 3983 up a certain minimum amount of data to get a better result.
3973 */ 3984 */
3974 data->actual = look_for_coding_system_magic_cookie (src, n); 3985 data->actual = look_for_coding_system_magic_cookie (src, n, 1);
3975 if (NILP (data->actual)) 3986 if (NILP (data->actual))
3976 { 3987 {
3977 /* #### This is cheesy. What we really ought to do is buffer 3988 /* #### This is cheesy. What we really ought to do is buffer
3978 up a certain minimum amount of data so as to get a less 3989 up a certain minimum amount of data so as to get a less
3979 random result when doing subprocess detection. */ 3990 random result when doing subprocess detection. */
4214 val = detect_coding_stream (lb_instream); 4225 val = detect_coding_stream (lb_instream);
4215 Lstream_delete (XLSTREAM (lb_instream)); 4226 Lstream_delete (XLSTREAM (lb_instream));
4216 return val; 4227 return val;
4217 } 4228 }
4218 4229
4230 DEFUN ("find-coding-system-magic-cookie-in-file",
4231 Ffind_coding_system_magic_cookie_in_file, 1, 1, 0, /*
4232 Look for the coding-system magic cookie in FILENAME.
4233 The coding-system magic cookie is either the local variable specification
4234 -*- ... coding: ... -*- on the first line, or the exact string
4235 \";;;###coding system: \" somewhere within the first 3000 characters
4236 of the file. If found, the coding system name (as a string) is returned;
4237 otherwise nil is returned. Note that it is extremely unlikely that
4238 either such string would occur coincidentally as the result of encoding
4239 some characters in a non-ASCII charset, and that the spaces make it
4240 even less likely since the space character is not a valid octet in any
4241 ISO 2022 encoding of most non-ASCII charsets.
4242 */
4243 (filename))
4244 {
4245 Lisp_Object lstream;
4246 UExtbyte buf[4096];
4247 Bytecount nread;
4248 int fd = -1;
4249 struct stat st;
4250
4251 filename = Fexpand_file_name (filename, Qnil);
4252
4253 if (qxe_stat (XSTRING_DATA (filename), &st) < 0)
4254 {
4255 badopen:
4256 report_file_error ("Opening input file", filename);
4257 }
4258
4259 if (fd < 0)
4260 {
4261 if ((fd = qxe_interruptible_open (XSTRING_DATA (filename),
4262 O_RDONLY | OPEN_BINARY, 0)) < 0)
4263 goto badopen;
4264 }
4265
4266 lstream = make_filedesc_input_stream (fd, 0, -1, 0);
4267 Lstream_set_buffering (XLSTREAM (lstream), LSTREAM_UNBUFFERED, 0);
4268 nread = Lstream_read (XLSTREAM (lstream), buf, sizeof (buf));
4269 Lstream_delete (XLSTREAM (lstream));
4270 retry_close (fd);
4271
4272 return look_for_coding_system_magic_cookie (buf, nread, 0);
4273 }
4219 4274
4220 4275
4221 #ifdef DEBUG_XEMACS 4276 #ifdef DEBUG_XEMACS
4222 4277
4223 /************************************************************************/ 4278 /************************************************************************/
4522 4577
4523 DEFSUBR (Fdetect_coding_region); 4578 DEFSUBR (Fdetect_coding_region);
4524 DEFSUBR (Fdecode_coding_region); 4579 DEFSUBR (Fdecode_coding_region);
4525 DEFSUBR (Fencode_coding_region); 4580 DEFSUBR (Fencode_coding_region);
4526 DEFSUBR (Fquery_coding_region); 4581 DEFSUBR (Fquery_coding_region);
4582 DEFSUBR (Ffind_coding_system_magic_cookie_in_file);
4527 DEFSYMBOL_MULTIWORD_PREDICATE (Qcoding_systemp); 4583 DEFSYMBOL_MULTIWORD_PREDICATE (Qcoding_systemp);
4528 DEFSYMBOL (Qno_conversion); 4584 DEFSYMBOL (Qno_conversion);
4529 DEFSYMBOL (Qconvert_eol); 4585 DEFSYMBOL (Qconvert_eol);
4530 DEFSYMBOL (Qconvert_eol_autodetect); 4586 DEFSYMBOL (Qconvert_eol_autodetect);
4531 DEFSYMBOL (Qconvert_eol_lf); 4587 DEFSYMBOL (Qconvert_eol_lf);