Mercurial > hg > xemacs-beta
annotate src/file-coding.c @ 5124:623d57b7fbe8 ben-lisp-object
separate regular and disksave finalization, print method fixes.
Create separate disksave method and make the finalize method only be for
actual object finalization, not disksave finalization.
Fix places where 0 was given in place of a printer -- print methods are
mandatory, and internal objects formerly without a print method now must
explicitly specify internal_object_printer().
Change the defn of CONSOLE_LIVE_P to avoid problems in some weird situations.
-------------------- ChangeLog entries follow: --------------------
src/ChangeLog addition:
2010-01-20 Ben Wing <ben@xemacs.org>
* alloc.c:
* alloc.c (very_old_free_lcrecord):
* alloc.c (disksave_object_finalization_1):
* alloc.c (make_lcrecord_list):
* alloc.c (alloc_managed_lcrecord):
* alloc.c (free_managed_lcrecord):
* alloc.c (sweep_lcrecords_1):
* buffer.c:
* bytecode.c:
* bytecode.c (Fcompiled_function_p):
* chartab.c:
* console-impl.h:
* console-impl.h (CONSOLE_TYPE_P):
* console.c:
* console.c (set_quit_events):
* data.c:
* data.c (Fmake_ephemeron):
* database.c:
* database.c (finalize_database):
* database.c (Fclose_database):
* device-msw.c:
* device-msw.c (finalize_devmode):
* device-msw.c (allocate_devmode):
* device.c:
* elhash.c:
* elhash.c (finalize_hash_table):
* eval.c:
* eval.c (bind_multiple_value_limits):
* event-stream.c:
* event-stream.c (finalize_command_builder):
* events.c:
* events.c (mark_event):
* extents.c:
* extents.c (finalize_extent_info):
* extents.c (uninit_buffer_extents):
* faces.c:
* file-coding.c:
* file-coding.c (finalize_coding_system):
* file-coding.h:
* file-coding.h (struct coding_system_methods):
* file-coding.h (struct detector):
* floatfns.c:
* floatfns.c (extract_float):
* fns.c:
* fns.c (Fidentity):
* font-mgr.c (finalize_fc_pattern):
* font-mgr.c (finalize_fc_config):
* frame.c:
* glyphs.c:
* glyphs.c (finalize_image_instance):
* glyphs.c (unmap_subwindow_instance_cache_mapper):
* gui.c:
* gui.c (gui_error):
* keymap.c:
* lisp.h (struct Lisp_Symbol):
* lrecord.h:
* lrecord.h (struct lrecord_implementation):
* lrecord.h (MC_ALLOC_CALL_FINALIZER):
* lrecord.h (MC_ALLOC_CALL_FINALIZER_FOR_DISKSAVE):
* lrecord.h (DEFINE_DUMPABLE_LISP_OBJECT):
* lrecord.h (DEFINE_DUMPABLE_GENERAL_LISP_OBJECT):
* lrecord.h (DEFINE_DUMPABLE_SIZABLE_LISP_OBJECT):
* lrecord.h (DEFINE_DUMPABLE_SIZABLE_GENERAL_LISP_OBJECT):
* lrecord.h (DEFINE_DUMPABLE_FROB_BLOCK_LISP_OBJECT):
* lrecord.h (DEFINE_DUMPABLE_FROB_BLOCK_GENERAL_LISP_OBJECT):
* lrecord.h (DEFINE_DUMPABLE_FROB_BLOCK_SIZABLE_LISP_OBJECT):
* lrecord.h (DEFINE_DUMPABLE_INTERNAL_LISP_OBJECT):
* lrecord.h (DEFINE_DUMPABLE_SIZABLE_INTERNAL_LISP_OBJECT):
* lrecord.h (DEFINE_NODUMP_LISP_OBJECT):
* lrecord.h (DEFINE_NODUMP_GENERAL_LISP_OBJECT):
* lrecord.h (DEFINE_NODUMP_SIZABLE_LISP_OBJECT):
* lrecord.h (DEFINE_NODUMP_SIZABLE_GENERAL_LISP_OBJECT):
* lrecord.h (DEFINE_NODUMP_FROB_BLOCK_LISP_OBJECT):
* lrecord.h (DEFINE_NODUMP_FROB_BLOCK_GENERAL_LISP_OBJECT):
* lrecord.h (DEFINE_NODUMP_FROB_BLOCK_SIZABLE_LISP_OBJECT):
* lrecord.h (DEFINE_NODUMP_INTERNAL_LISP_OBJECT):
* lrecord.h (DEFINE_NODUMP_SIZABLE_INTERNAL_LISP_OBJECT):
* lrecord.h (MAKE_LISP_OBJECT):
* lrecord.h (DEFINE_DUMPABLE_MODULE_LISP_OBJECT):
* lrecord.h (DEFINE_DUMPABLE_MODULE_GENERAL_LISP_OBJECT):
* lrecord.h (DEFINE_DUMPABLE_MODULE_SIZABLE_LISP_OBJECT):
* lrecord.h (DEFINE_DUMPABLE_MODULE_SIZABLE_GENERAL_LISP_OBJECT):
* lrecord.h (DEFINE_NODUMP_MODULE_LISP_OBJECT):
* lrecord.h (DEFINE_NODUMP_MODULE_GENERAL_LISP_OBJECT):
* lrecord.h (DEFINE_NODUMP_MODULE_SIZABLE_LISP_OBJECT):
* lrecord.h (DEFINE_NODUMP_MODULE_SIZABLE_GENERAL_LISP_OBJECT):
* lrecord.h (MAKE_MODULE_LISP_OBJECT):
* lstream.c:
* lstream.c (finalize_lstream):
* lstream.c (disksave_lstream):
* marker.c:
* marker.c (finalize_marker):
* mule-charset.c (make_charset):
* number.c:
* objects.c:
* objects.c (finalize_color_instance):
* objects.c (finalize_font_instance):
* opaque.c:
* opaque.c (make_opaque_ptr):
* process-nt.c:
* process-nt.c (nt_finalize_process_data):
* process-nt.c (nt_deactivate_process):
* process.c:
* process.c (finalize_process):
* procimpl.h (struct process_methods):
* scrollbar.c:
* scrollbar.c (free_scrollbar_instance):
* specifier.c (finalize_specifier):
* symbols.c:
* toolbar.c:
* toolbar.c (Ftoolbar_button_p):
* tooltalk.c:
* ui-gtk.c:
* ui-gtk.c (emacs_gtk_object_finalizer):
* ui-gtk.c (allocate_emacs_gtk_boxed_data):
* window.c:
* window.c (finalize_window):
* window.c (mark_window_as_deleted):
Separate out regular and disksave finalization. Instead of a
FOR_DISKSAVE argument to the finalizer, create a separate object
method `disksaver'. Make `finalizer' have only one argument.
Go through and separate out all finalize methods into finalize
and disksave. Delete lots of thereby redundant disksave checking.
Delete places that signal an error if we attempt to disksave --
all of these objects are non-dumpable and we will get an error
from pdump anyway if we attempt to dump them. After this is done,
only one object remains that has a disksave method -- lstream.
Change DEFINE_*_LISP_OBJECT_WITH_PROPS to DEFINE_*_GENERAL_LISP_OBJECT,
which is used for specifying either property methods or disksave
methods (or in the future, any other less-used methods).
Remove the for_disksave argument to finalize_process_data. Don't
provide a disksaver for processes because no one currently needs
it.
Clean up various places where objects didn't provide a print method.
It was made mandatory in previous changes, and all methods now
either provide their own print method or use internal_object_printer
or external_object_printer.
Change the definition of CONSOLE_LIVE_P to use the contype enum
rather than looking into the conmeths structure -- in some weird
situations with dead objects, the conmeths structure is NULL,
and printing such objects from debug_print() will crash if we try
to look into the conmeths structure.
author | Ben Wing <ben@xemacs.org> |
---|---|
date | Wed, 20 Jan 2010 07:05:57 -0600 |
parents | d1247f3cc363 |
children | b5df3737028a |
rev | line source |
---|---|
771 | 1 /* Text encoding conversion functions; coding-system object. |
2 #### rename me to coding-system.c or coding.c | |
428 | 3 Copyright (C) 1991, 1995 Free Software Foundation, Inc. |
4 Copyright (C) 1995 Sun Microsystems, Inc. | |
3025 | 5 Copyright (C) 2000, 2001, 2002, 2003, 2005 Ben Wing. |
428 | 6 |
7 This file is part of XEmacs. | |
8 | |
9 XEmacs is free software; you can redistribute it and/or modify it | |
10 under the terms of the GNU General Public License as published by the | |
11 Free Software Foundation; either version 2, or (at your option) any | |
12 later version. | |
13 | |
14 XEmacs is distributed in the hope that it will be useful, but WITHOUT | |
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
17 for more details. | |
18 | |
19 You should have received a copy of the GNU General Public License | |
20 along with XEmacs; see the file COPYING. If not, write to | |
21 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
22 Boston, MA 02111-1307, USA. */ | |
23 | |
771 | 24 /* Synched up with: Not in FSF. */ |
25 | |
26 /* Authorship: | |
27 | |
28 Current primary author: Ben Wing <ben@xemacs.org> | |
29 | |
30 Rewritten by Ben Wing <ben@xemacs.org>, based originally on coding.c | |
31 from Mule 2.? but probably does not share one line of code with that | |
32 original source. Rewriting work started around Dec. 1994. or Jan. 1995. | |
33 Proceeded in earnest till Nov. 1995. | |
34 | |
35 Around Feb. 17, 1998, Andy Piper renamed what was then mule-coding.c to | |
36 file-coding.c, with the intention of using it to do end-of-line conversion | |
37 on non-MULE machines (specifically, on Windows machines). He separated | |
38 out the MULE stuff from non-MULE using ifdef's, and searched throughout | |
39 the rest of the source tree looking for coding-system-related code that | |
40 was ifdef MULE but should be ifdef HAVE_CODING_SYSTEMS. | |
41 | |
42 Sept. 4 - 8, 1998, Tomohiko Morioka added the UCS_4 and UTF_8 coding system | |
43 types, providing a primitive means of decoding and encoding externally- | |
44 formatted Unicode/UCS_4 and Unicode/UTF_8 data. | |
45 | |
46 January 25, 2000, Martin Buchholz redid and fleshed out the coding | |
47 system alias handling that was first added in prototype form by | |
48 Hrjove Niksic, April 15, 1999. | |
49 | |
50 April to May 2000, Ben Wing: More major reorganization. Adding features | |
51 needed for MS Windows (multibyte, unicode, unicode-to-multibyte), the | |
52 "chain" coding system for chaining two together, and doing a lot of | |
53 reorganization in preparation for properly abstracting out the different | |
54 coding system types. | |
55 | |
56 June 2001, Ben Wing: Added Unicode support. Eliminated previous | |
57 junky Unicode translation support. | |
58 | |
59 August 2001, Ben Wing: Moved Unicode support to unicode.c. Finished | |
60 abstracting everything except detection, which is hard to abstract (see | |
61 just below). | |
62 | |
63 September 2001, Ben Wing: Moved Mule code to mule-coding.c, Windows code | |
64 to intl-win32.c. Lots more rewriting; very little code is untouched | |
65 from before April 2000. Abstracted the detection code, added multiple | |
66 levels of likelihood to increase the reliability of the algorithm. | |
67 | |
68 October 2001, Ben Wing: HAVE_CODING_SYSTEMS is always now defined. | |
69 Removed the conditionals. | |
70 */ | |
71 | |
428 | 72 #include <config.h> |
73 #include "lisp.h" | |
74 | |
75 #include "buffer.h" | |
76 #include "elhash.h" | |
77 #include "insdel.h" | |
78 #include "lstream.h" | |
440 | 79 #include "opaque.h" |
771 | 80 #include "file-coding.h" |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
81 #include "extents.h" |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
82 #include "rangetab.h" |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
83 #include "chartab.h" |
771 | 84 |
85 #ifdef HAVE_ZLIB | |
86 #include "zlib.h" | |
428 | 87 #endif |
88 | |
89 Lisp_Object Vkeyboard_coding_system; | |
90 Lisp_Object Vterminal_coding_system; | |
91 Lisp_Object Vcoding_system_for_read; | |
92 Lisp_Object Vcoding_system_for_write; | |
93 Lisp_Object Vfile_name_coding_system; | |
94 | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
95 Lisp_Object Qaliases, Qcharset_skip_chars_string; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
96 |
771 | 97 #ifdef DEBUG_XEMACS |
98 Lisp_Object Vdebug_coding_detection; | |
440 | 99 #endif |
771 | 100 |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
101 #ifdef MULE |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
102 extern Lisp_Object Vcharset_ascii, Vcharset_control_1, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
103 Vcharset_latin_iso8859_1; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
104 #endif |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
105 |
771 | 106 typedef struct coding_system_type_entry |
107 { | |
108 struct coding_system_methods *meths; | |
109 } coding_system_type_entry; | |
110 | |
111 typedef struct | |
112 { | |
113 Dynarr_declare (coding_system_type_entry); | |
114 } coding_system_type_entry_dynarr; | |
115 | |
116 static coding_system_type_entry_dynarr *the_coding_system_type_entry_dynarr; | |
117 | |
1204 | 118 static const struct memory_description cste_description_1[] = { |
2551 | 119 { XD_BLOCK_PTR, offsetof (coding_system_type_entry, meths), 1, |
120 { &coding_system_methods_description } }, | |
771 | 121 { XD_END } |
122 }; | |
123 | |
1204 | 124 static const struct sized_memory_description cste_description = { |
771 | 125 sizeof (coding_system_type_entry), |
126 cste_description_1 | |
127 }; | |
128 | |
1204 | 129 static const struct memory_description csted_description_1[] = { |
771 | 130 XD_DYNARR_DESC (coding_system_type_entry_dynarr, &cste_description), |
428 | 131 { XD_END } |
132 }; | |
133 | |
1204 | 134 static const struct sized_memory_description csted_description = { |
771 | 135 sizeof (coding_system_type_entry_dynarr), |
136 csted_description_1 | |
137 }; | |
138 | |
139 static Lisp_Object Vcoding_system_type_list; | |
140 | |
141 /* Coding system currently associated with each coding category. */ | |
142 Lisp_Object coding_category_system[MAX_DETECTOR_CATEGORIES]; | |
143 | |
144 /* Table of all coding categories in decreasing order of priority. | |
145 This describes a permutation of the possible coding categories. */ | |
146 int coding_category_by_priority[MAX_DETECTOR_CATEGORIES]; | |
147 | |
148 /* Value used with to give a unique name to nameless coding systems */ | |
149 int coding_system_tick; | |
150 | |
151 int coding_detector_count; | |
152 int coding_detector_category_count; | |
153 | |
154 detector_dynarr *all_coding_detectors; | |
155 | |
1204 | 156 static const struct memory_description struct_detector_category_description_1[] |
771 | 157 = |
158 { | |
159 { XD_LISP_OBJECT, offsetof (struct detector_category, sym) }, | |
160 { XD_END } | |
161 }; | |
162 | |
1204 | 163 static const struct sized_memory_description struct_detector_category_description = |
771 | 164 { |
165 sizeof (struct detector_category), | |
166 struct_detector_category_description_1 | |
428 | 167 }; |
168 | |
1204 | 169 static const struct memory_description detector_category_dynarr_description_1[] = |
771 | 170 { |
171 XD_DYNARR_DESC (detector_category_dynarr, | |
172 &struct_detector_category_description), | |
173 { XD_END } | |
174 }; | |
175 | |
1204 | 176 static const struct sized_memory_description detector_category_dynarr_description = { |
771 | 177 sizeof (detector_category_dynarr), |
178 detector_category_dynarr_description_1 | |
179 }; | |
180 | |
1204 | 181 static const struct memory_description struct_detector_description_1[] |
771 | 182 = |
183 { | |
2367 | 184 { XD_BLOCK_PTR, offsetof (struct detector, cats), 1, |
2551 | 185 { &detector_category_dynarr_description } }, |
771 | 186 { XD_END } |
187 }; | |
188 | |
1204 | 189 static const struct sized_memory_description struct_detector_description = |
771 | 190 { |
191 sizeof (struct detector), | |
192 struct_detector_description_1 | |
193 }; | |
194 | |
1204 | 195 static const struct memory_description detector_dynarr_description_1[] = |
771 | 196 { |
197 XD_DYNARR_DESC (detector_dynarr, &struct_detector_description), | |
198 { XD_END } | |
199 }; | |
200 | |
1204 | 201 static const struct sized_memory_description detector_dynarr_description = { |
771 | 202 sizeof (detector_dynarr), |
203 detector_dynarr_description_1 | |
204 }; | |
428 | 205 |
206 Lisp_Object Qcoding_systemp; | |
207 | |
771 | 208 Lisp_Object Qraw_text; |
428 | 209 |
210 Lisp_Object Qmnemonic, Qeol_type; | |
211 Lisp_Object Qcr, Qcrlf, Qlf; | |
212 Lisp_Object Qeol_cr, Qeol_crlf, Qeol_lf; | |
213 Lisp_Object Qpost_read_conversion; | |
214 Lisp_Object Qpre_write_conversion; | |
215 | |
771 | 216 Lisp_Object Qtranslation_table_for_decode; |
217 Lisp_Object Qtranslation_table_for_encode; | |
218 Lisp_Object Qsafe_chars; | |
219 Lisp_Object Qsafe_charsets; | |
220 Lisp_Object Qmime_charset; | |
221 Lisp_Object Qvalid_codes; | |
222 | |
223 Lisp_Object Qno_conversion; | |
224 Lisp_Object Qconvert_eol; | |
440 | 225 Lisp_Object Qescape_quoted; |
771 | 226 Lisp_Object Qencode, Qdecode; |
227 | |
228 Lisp_Object Qconvert_eol_lf, Qconvert_eol_cr, Qconvert_eol_crlf; | |
229 Lisp_Object Qconvert_eol_autodetect; | |
230 | |
231 Lisp_Object Qnear_certainty, Qquite_probable, Qsomewhat_likely; | |
1494 | 232 Lisp_Object Qslightly_likely; |
771 | 233 Lisp_Object Qas_likely_as_unlikely, Qsomewhat_unlikely, Qquite_improbable; |
234 Lisp_Object Qnearly_impossible; | |
235 | |
236 Lisp_Object Qdo_eol, Qdo_coding; | |
237 | |
238 Lisp_Object Qcanonicalize_after_coding; | |
239 | |
1347 | 240 Lisp_Object QScoding_system_cookie; |
241 | |
4303 | 242 Lisp_Object Qposix_charset_to_coding_system_hash; |
243 | |
771 | 244 /* This is used to convert autodetected coding systems into existing |
245 systems. For example, the chain undecided->convert-eol-autodetect may | |
246 have its separate parts detected as mswindows-multibyte and | |
247 convert-eol-crlf, and the result needs to be mapped to | |
248 mswindows-multibyte-dos. */ | |
249 /* #### It's not clear we need this whole chain-canonicalize mechanism | |
250 any more. */ | |
251 static Lisp_Object Vchain_canonicalize_hash_table; | |
252 | |
253 #ifdef HAVE_ZLIB | |
254 Lisp_Object Qgzip; | |
428 | 255 #endif |
771 | 256 |
2297 | 257 /* Maps symbols (coding system names) to either coding system objects or |
258 (for aliases) other names. */ | |
771 | 259 static Lisp_Object Vcoding_system_hash_table; |
428 | 260 |
261 int enable_multibyte_characters; | |
262 | |
263 EXFUN (Fcopy_coding_system, 2); | |
264 | |
265 | |
266 /************************************************************************/ | |
771 | 267 /* Coding system object methods */ |
428 | 268 /************************************************************************/ |
269 | |
270 static Lisp_Object | |
271 mark_coding_system (Lisp_Object obj) | |
272 { | |
273 Lisp_Coding_System *codesys = XCODING_SYSTEM (obj); | |
274 | |
1204 | 275 #define MARKED_SLOT(x) mark_object (codesys->x); |
276 #include "coding-system-slots.h" | |
771 | 277 |
278 MAYBE_CODESYSMETH (codesys, mark, (obj)); | |
428 | 279 |
1204 | 280 return Qnil; |
428 | 281 } |
282 | |
283 static void | |
771 | 284 print_coding_system_properties (Lisp_Object obj, Lisp_Object printcharfun) |
285 { | |
286 Lisp_Coding_System *c = XCODING_SYSTEM (obj); | |
287 print_internal (c->methods->type, printcharfun, 1); | |
288 MAYBE_CODESYSMETH (c, print, (obj, printcharfun, 1)); | |
289 if (CODING_SYSTEM_EOL_TYPE (c) != EOL_AUTODETECT) | |
290 write_fmt_string_lisp (printcharfun, " eol-type=%s", | |
291 1, Fcoding_system_property (obj, Qeol_type)); | |
292 } | |
293 | |
294 static void | |
428 | 295 print_coding_system (Lisp_Object obj, Lisp_Object printcharfun, |
2286 | 296 int UNUSED (escapeflag)) |
428 | 297 { |
298 Lisp_Coding_System *c = XCODING_SYSTEM (obj); | |
299 if (print_readably) | |
771 | 300 printing_unreadable_object |
301 ("printing unreadable object #<coding-system 0x%x>", c->header.uid); | |
302 | |
303 write_fmt_string_lisp (printcharfun, "#<coding-system %s ", 1, c->name); | |
304 print_coding_system_properties (obj, printcharfun); | |
826 | 305 write_c_string (printcharfun, ">"); |
428 | 306 } |
307 | |
771 | 308 /* Print an abbreviated version of a coding system (but still containing |
309 all the information), for use within a coding system print method. */ | |
310 | |
311 static void | |
312 print_coding_system_in_print_method (Lisp_Object cs, Lisp_Object printcharfun, | |
2286 | 313 int UNUSED (escapeflag)) |
771 | 314 { |
800 | 315 write_fmt_string_lisp (printcharfun, "%s[", 1, XCODING_SYSTEM_NAME (cs)); |
771 | 316 print_coding_system_properties (cs, printcharfun); |
826 | 317 write_c_string (printcharfun, "]"); |
771 | 318 } |
319 | |
3263 | 320 #ifndef NEW_GC |
428 | 321 static void |
5124
623d57b7fbe8
separate regular and disksave finalization, print method fixes.
Ben Wing <ben@xemacs.org>
parents:
5120
diff
changeset
|
322 finalize_coding_system (void *header) |
428 | 323 { |
771 | 324 Lisp_Object cs = wrap_coding_system ((Lisp_Coding_System *) header); |
428 | 325 /* Since coding systems never go away, this function is not |
326 necessary. But it would be necessary if we changed things | |
327 so that coding systems could go away. */ | |
5124
623d57b7fbe8
separate regular and disksave finalization, print method fixes.
Ben Wing <ben@xemacs.org>
parents:
5120
diff
changeset
|
328 MAYBE_XCODESYSMETH (cs, finalize, (cs)); |
771 | 329 } |
3263 | 330 #endif /* not NEW_GC */ |
771 | 331 |
332 static Bytecount | |
333 sizeof_coding_system (const void *header) | |
334 { | |
335 const Lisp_Coding_System *p = (const Lisp_Coding_System *) header; | |
336 return offsetof (Lisp_Coding_System, data) + p->methods->extra_data_size; | |
428 | 337 } |
338 | |
1204 | 339 static const struct memory_description coding_system_methods_description_1[] |
771 | 340 = { |
341 { XD_LISP_OBJECT, | |
342 offsetof (struct coding_system_methods, type) }, | |
343 { XD_LISP_OBJECT, | |
344 offsetof (struct coding_system_methods, predicate_symbol) }, | |
345 { XD_END } | |
346 }; | |
347 | |
1204 | 348 const struct sized_memory_description coding_system_methods_description = { |
771 | 349 sizeof (struct coding_system_methods), |
350 coding_system_methods_description_1 | |
351 }; | |
352 | |
1204 | 353 static const struct sized_memory_description coding_system_extra_description_map[] = |
354 { | |
355 { offsetof (Lisp_Coding_System, methods) }, | |
356 { offsetof (struct coding_system_methods, extra_description) }, | |
357 { -1 }, | |
771 | 358 }; |
359 | |
1204 | 360 static const struct memory_description coding_system_description[] = |
428 | 361 { |
2367 | 362 { XD_BLOCK_PTR, offsetof (Lisp_Coding_System, methods), 1, |
2551 | 363 { &coding_system_methods_description } }, |
1204 | 364 #define MARKED_SLOT(x) { XD_LISP_OBJECT, offsetof (Lisp_Coding_System, x) }, |
365 #define MARKED_SLOT_ARRAY(slot, size) \ | |
366 { XD_LISP_OBJECT_ARRAY, offsetof (Lisp_Coding_System, slot), size }, | |
367 #include "coding-system-slots.h" | |
2367 | 368 { XD_BLOCK_ARRAY, offsetof (Lisp_Coding_System, data), 1, |
2551 | 369 { coding_system_extra_description_map } }, |
1204 | 370 { XD_END } |
771 | 371 }; |
372 | |
1204 | 373 static const struct memory_description coding_system_empty_extra_description_1[] = |
374 { | |
375 { XD_END } | |
376 }; | |
377 | |
378 const struct sized_memory_description coding_system_empty_extra_description = { | |
379 0, coding_system_empty_extra_description_1 | |
380 }; | |
381 | |
3263 | 382 #ifdef NEW_GC |
5118
e0db3c197671
merge up to latest default branch, doesn't compile yet
Ben Wing <ben@xemacs.org>
diff
changeset
|
383 DEFINE_DUMPABLE_SIZABLE_LISP_OBJECT ("coding-system", coding_system, |
5124
623d57b7fbe8
separate regular and disksave finalization, print method fixes.
Ben Wing <ben@xemacs.org>
parents:
5120
diff
changeset
|
384 mark_coding_system, |
623d57b7fbe8
separate regular and disksave finalization, print method fixes.
Ben Wing <ben@xemacs.org>
parents:
5120
diff
changeset
|
385 print_coding_system, |
623d57b7fbe8
separate regular and disksave finalization, print method fixes.
Ben Wing <ben@xemacs.org>
parents:
5120
diff
changeset
|
386 0, 0, 0, coding_system_description, |
623d57b7fbe8
separate regular and disksave finalization, print method fixes.
Ben Wing <ben@xemacs.org>
parents:
5120
diff
changeset
|
387 sizeof_coding_system, |
623d57b7fbe8
separate regular and disksave finalization, print method fixes.
Ben Wing <ben@xemacs.org>
parents:
5120
diff
changeset
|
388 Lisp_Coding_System); |
3263 | 389 #else /* not NEW_GC */ |
5118
e0db3c197671
merge up to latest default branch, doesn't compile yet
Ben Wing <ben@xemacs.org>
diff
changeset
|
390 DEFINE_DUMPABLE_SIZABLE_LISP_OBJECT ("coding-system", coding_system, |
5124
623d57b7fbe8
separate regular and disksave finalization, print method fixes.
Ben Wing <ben@xemacs.org>
parents:
5120
diff
changeset
|
391 mark_coding_system, |
623d57b7fbe8
separate regular and disksave finalization, print method fixes.
Ben Wing <ben@xemacs.org>
parents:
5120
diff
changeset
|
392 print_coding_system, |
623d57b7fbe8
separate regular and disksave finalization, print method fixes.
Ben Wing <ben@xemacs.org>
parents:
5120
diff
changeset
|
393 finalize_coding_system, |
623d57b7fbe8
separate regular and disksave finalization, print method fixes.
Ben Wing <ben@xemacs.org>
parents:
5120
diff
changeset
|
394 0, 0, coding_system_description, |
623d57b7fbe8
separate regular and disksave finalization, print method fixes.
Ben Wing <ben@xemacs.org>
parents:
5120
diff
changeset
|
395 sizeof_coding_system, |
623d57b7fbe8
separate regular and disksave finalization, print method fixes.
Ben Wing <ben@xemacs.org>
parents:
5120
diff
changeset
|
396 Lisp_Coding_System); |
3263 | 397 #endif /* not NEW_GC */ |
771 | 398 |
399 /************************************************************************/ | |
400 /* Creating coding systems */ | |
401 /************************************************************************/ | |
402 | |
403 static struct coding_system_methods * | |
404 decode_coding_system_type (Lisp_Object type, Error_Behavior errb) | |
428 | 405 { |
771 | 406 int i; |
407 | |
408 for (i = 0; i < Dynarr_length (the_coding_system_type_entry_dynarr); i++) | |
428 | 409 { |
771 | 410 if (EQ (type, |
411 Dynarr_at (the_coding_system_type_entry_dynarr, i).meths->type)) | |
412 return Dynarr_at (the_coding_system_type_entry_dynarr, i).meths; | |
428 | 413 } |
771 | 414 |
415 maybe_invalid_constant ("Invalid coding system type", type, | |
416 Qcoding_system, errb); | |
417 | |
418 return 0; | |
428 | 419 } |
420 | |
771 | 421 static int |
422 valid_coding_system_type_p (Lisp_Object type) | |
428 | 423 { |
771 | 424 return decode_coding_system_type (type, ERROR_ME_NOT) != 0; |
425 } | |
426 | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
427 #ifdef MULE |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
428 static Lisp_Object Vdefault_query_coding_region_chartab_cache; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
429 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
430 /* Non-static because it's used in INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA. */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
431 Lisp_Object |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
432 default_query_method (Lisp_Object codesys, struct buffer *buf, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
433 Charbpos end, int flags) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
434 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
435 Charbpos pos = BUF_PT (buf), fail_range_start, fail_range_end; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
436 Charbpos pos_byte = BYTE_BUF_PT (buf); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
437 Lisp_Object safe_charsets = XCODING_SYSTEM_SAFE_CHARSETS (codesys); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
438 Lisp_Object safe_chars = XCODING_SYSTEM_SAFE_CHARS (codesys), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
439 result = Qnil; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
440 enum query_coding_failure_reasons failed_reason, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
441 previous_failed_reason = query_coding_succeeded; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
442 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
443 /* safe-charsets of t means the coding system can encode everything. */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
444 if (EQ (Qnil, safe_chars)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
445 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
446 if (EQ (Qt, safe_charsets)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
447 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
448 return Qnil; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
449 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
450 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
451 /* If we've no information on what characters the coding system can |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
452 encode, give up. */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
453 if (EQ (Qnil, safe_charsets) && EQ (Qnil, safe_chars)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
454 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
455 return Qunbound; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
456 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
457 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
458 safe_chars = Fgethash (safe_charsets, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
459 Vdefault_query_coding_region_chartab_cache, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
460 Qnil); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
461 if (NILP (safe_chars)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
462 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
463 safe_chars = Fmake_char_table (Qgeneric); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
464 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
465 EXTERNAL_LIST_LOOP_2 (safe_charset, safe_charsets) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
466 Fput_char_table (safe_charset, Qt, safe_chars); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
467 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
468 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
469 Fputhash (safe_charsets, safe_chars, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
470 Vdefault_query_coding_region_chartab_cache); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
471 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
472 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
473 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
474 if (flags & QUERY_METHOD_HIGHLIGHT && |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
475 /* If we're being called really early, live without highlights getting |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
476 cleared properly: */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
477 !(UNBOUNDP (XSYMBOL (Qquery_coding_clear_highlights)->function))) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
478 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
479 /* It's okay to call Lisp here, the only non-stack object we may have |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
480 allocated up to this point is safe_chars, and that's |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
481 reachable from its entry in |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
482 Vdefault_query_coding_region_chartab_cache */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
483 call3 (Qquery_coding_clear_highlights, make_int (pos), make_int (end), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
484 wrap_buffer (buf)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
485 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
486 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
487 while (pos < end) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
488 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
489 Ichar ch = BYTE_BUF_FETCH_CHAR (buf, pos_byte); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
490 if (!EQ (Qnil, get_char_table (ch, safe_chars))) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
491 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
492 pos++; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
493 INC_BYTEBPOS (buf, pos_byte); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
494 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
495 else |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
496 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
497 fail_range_start = pos; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
498 while ((pos < end) && |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
499 (EQ (Qnil, get_char_table (ch, safe_chars)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
500 && (failed_reason = query_coding_unencodable)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
501 && (previous_failed_reason == query_coding_succeeded |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
502 || previous_failed_reason == failed_reason)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
503 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
504 pos++; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
505 INC_BYTEBPOS (buf, pos_byte); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
506 ch = BYTE_BUF_FETCH_CHAR (buf, pos_byte); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
507 previous_failed_reason = failed_reason; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
508 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
509 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
510 if (fail_range_start == pos) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
511 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
512 /* The character can actually be encoded; move on. */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
513 pos++; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
514 INC_BYTEBPOS (buf, pos_byte); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
515 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
516 else |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
517 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
518 assert (previous_failed_reason == query_coding_unencodable); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
519 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
520 if (flags & QUERY_METHOD_ERRORP) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
521 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
522 DECLARE_EISTRING (error_details); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
523 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
524 eicpy_ascii (error_details, "Cannot encode "); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
525 eicat_lstr (error_details, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
526 make_string_from_buffer (buf, fail_range_start, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
527 pos - |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
528 fail_range_start)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
529 eicat_ascii (error_details, " using coding system"); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
530 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
531 signal_error (Qtext_conversion_error, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
532 (const CIbyte *)(eidata (error_details)), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
533 XCODING_SYSTEM_NAME (codesys)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
534 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
535 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
536 if (NILP (result)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
537 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
538 result = Fmake_range_table (Qstart_closed_end_open); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
539 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
540 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
541 fail_range_end = pos; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
542 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
543 Fput_range_table (make_int (fail_range_start), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
544 make_int (fail_range_end), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
545 Qunencodable, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
546 result); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
547 previous_failed_reason = query_coding_succeeded; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
548 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
549 if (flags & QUERY_METHOD_HIGHLIGHT) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
550 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
551 Lisp_Object extent |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
552 = Fmake_extent (make_int (fail_range_start), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
553 make_int (fail_range_end), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
554 wrap_buffer (buf)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
555 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
556 Fset_extent_priority |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
557 (extent, make_int (2 + mouse_highlight_priority)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
558 Fset_extent_face (extent, Qquery_coding_warning_face); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
559 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
560 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
561 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
562 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
563 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
564 return result; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
565 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
566 #else |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
567 Lisp_Object |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
568 default_query_method (Lisp_Object UNUSED (codesys), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
569 struct buffer * UNUSED (buf), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
570 Charbpos UNUSED (end), int UNUSED (flags)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
571 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
572 return Qnil; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
573 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
574 #endif /* defined MULE */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
575 |
771 | 576 DEFUN ("valid-coding-system-type-p", Fvalid_coding_system_type_p, 1, 1, 0, /* |
577 Given a CODING-SYSTEM-TYPE, return non-nil if it is valid. | |
578 Valid types depend on how XEmacs was compiled but may include | |
3025 | 579 `undecided', `chain', `integer', `ccl', `iso2022', `big5', `shift-jis', |
580 `utf-16', `ucs-4', `utf-8', etc. | |
771 | 581 */ |
582 (coding_system_type)) | |
583 { | |
584 return valid_coding_system_type_p (coding_system_type) ? Qt : Qnil; | |
585 } | |
586 | |
587 DEFUN ("coding-system-type-list", Fcoding_system_type_list, 0, 0, 0, /* | |
588 Return a list of valid coding system types. | |
589 */ | |
590 ()) | |
591 { | |
592 return Fcopy_sequence (Vcoding_system_type_list); | |
593 } | |
594 | |
595 void | |
596 add_entry_to_coding_system_type_list (struct coding_system_methods *meths) | |
597 { | |
598 struct coding_system_type_entry entry; | |
599 | |
600 entry.meths = meths; | |
601 Dynarr_add (the_coding_system_type_entry_dynarr, entry); | |
602 Vcoding_system_type_list = Fcons (meths->type, Vcoding_system_type_list); | |
428 | 603 } |
604 | |
605 DEFUN ("coding-system-p", Fcoding_system_p, 1, 1, 0, /* | |
606 Return t if OBJECT is a coding system. | |
607 A coding system is an object that defines how text containing multiple | |
608 character sets is encoded into a stream of (typically 8-bit) bytes. | |
609 The coding system is used to decode the stream into a series of | |
610 characters (which may be from multiple charsets) when the text is read | |
611 from a file or process, and is used to encode the text back into the | |
612 same format when it is written out to a file or process. | |
613 | |
614 For example, many ISO2022-compliant coding systems (such as Compound | |
615 Text, which is used for inter-client data under the X Window System) | |
616 use escape sequences to switch between different charsets -- Japanese | |
617 Kanji, for example, is invoked with "ESC $ ( B"; ASCII is invoked | |
618 with "ESC ( B"; and Cyrillic is invoked with "ESC - L". See | |
619 `make-coding-system' for more information. | |
620 | |
621 Coding systems are normally identified using a symbol, and the | |
622 symbol is accepted in place of the actual coding system object whenever | |
623 a coding system is called for. (This is similar to how faces work.) | |
624 */ | |
625 (object)) | |
626 { | |
627 return CODING_SYSTEMP (object) ? Qt : Qnil; | |
628 } | |
629 | |
4303 | 630 static Lisp_Object |
631 find_coding_system (Lisp_Object coding_system_or_name, | |
632 int do_autoloads) | |
633 { | |
634 Lisp_Object lookup; | |
635 | |
636 if (NILP (coding_system_or_name)) | |
637 coding_system_or_name = Qbinary; | |
638 else if (CODING_SYSTEMP (coding_system_or_name)) | |
639 return coding_system_or_name; | |
640 else | |
641 CHECK_SYMBOL (coding_system_or_name); | |
642 | |
643 while (1) | |
644 { | |
645 lookup = | |
646 Fgethash (coding_system_or_name, Vcoding_system_hash_table, Qnil); | |
647 | |
648 if (CONSP (lookup) && do_autoloads) | |
649 { | |
650 struct gcpro gcpro1; | |
651 int length; | |
652 DECLARE_EISTRING (desired_base); | |
653 DECLARE_EISTRING (warning_info); | |
654 | |
655 eicpy_lstr (desired_base, XSYMBOL_NAME (coding_system_or_name)); | |
656 | |
657 /* Work out the name of the base coding system. */ | |
658 length = eilen (desired_base); | |
659 if (length > (int)(sizeof ("-unix") - 1)) | |
660 { | |
661 if (0 == qxestrcmp ((UAscbyte *)"-unix", (eidata (desired_base)) | |
662 + (length - (sizeof ("-unix") - 1)))) | |
663 { | |
664 eidel (desired_base, length - (sizeof ("-unix") - 1), | |
665 -1, 5, 5); | |
666 } | |
667 } | |
668 else if (length > (int)(sizeof ("-dos") - 1)) | |
669 { | |
670 if ((0 == qxestrcmp ((UAscbyte *)"-dos", (eidata (desired_base)) | |
671 + (length - (sizeof ("-dos") - 1)))) || | |
672 (0 == qxestrcmp ((UAscbyte *)"-mac", (eidata (desired_base)) | |
673 + (length - (sizeof ("-mac") - 1))))) | |
674 { | |
675 eidel (desired_base, length - (sizeof ("-dos") - 1), -1, | |
676 4, 4); | |
677 } | |
678 } | |
679 | |
680 coding_system_or_name = intern_int (eidata (desired_base)); | |
681 | |
682 /* Remove this coding system and its subsidiary coding | |
683 systems from the hash, to avoid calling this code recursively. */ | |
684 Fremhash (coding_system_or_name, Vcoding_system_hash_table); | |
685 Fremhash (add_suffix_to_symbol(coding_system_or_name, "-unix"), | |
686 Vcoding_system_hash_table); | |
687 Fremhash (add_suffix_to_symbol(coding_system_or_name, "-dos"), | |
688 Vcoding_system_hash_table); | |
689 Fremhash (add_suffix_to_symbol(coding_system_or_name, "-mac"), | |
690 Vcoding_system_hash_table); | |
691 | |
692 eicpy_ascii (warning_info, "Error autoloading coding system "); | |
693 eicat_lstr (warning_info, XSYMBOL_NAME (coding_system_or_name)); | |
694 | |
695 /* Keep around the form so it doesn't disappear from under | |
696 #'eval's feet. */ | |
697 GCPRO1 (lookup); | |
698 call1_trapping_problems ((const CIbyte *)eidata (warning_info), | |
699 Qeval, lookup, 0); | |
700 UNGCPRO; | |
701 | |
702 lookup = | |
703 Fgethash (coding_system_or_name, Vcoding_system_hash_table, Qnil); | |
704 } | |
705 | |
706 if (CODING_SYSTEMP (lookup) || NILP (lookup)) | |
707 return lookup; | |
708 | |
709 coding_system_or_name = lookup; | |
710 } | |
711 } | |
712 | |
428 | 713 DEFUN ("find-coding-system", Ffind_coding_system, 1, 1, 0, /* |
714 Retrieve the coding system of the given name. | |
715 | |
716 If CODING-SYSTEM-OR-NAME is a coding-system object, it is simply | |
717 returned. Otherwise, CODING-SYSTEM-OR-NAME should be a symbol. | |
718 If there is no such coding system, nil is returned. Otherwise the | |
719 associated coding system object is returned. | |
720 */ | |
721 (coding_system_or_name)) | |
722 { | |
4303 | 723 return find_coding_system(coding_system_or_name, 1); |
724 } | |
725 | |
726 DEFUN ("autoload-coding-system", Fautoload_coding_system, 2, 2, 0, /* | |
727 Define SYMBOL as a coding-system that is loaded on demand. | |
728 | |
729 FORM is a form to evaluate to define the coding-system. | |
730 */ | |
731 (symbol, form)) | |
732 { | |
733 Lisp_Object lookup; | |
734 | |
735 CHECK_SYMBOL (symbol); | |
736 CHECK_CONS (form); | |
737 | |
738 lookup = find_coding_system (symbol, 0); | |
739 | |
740 if (!NILP (lookup) && | |
741 /* Allow autoloads to be redefined. */ | |
742 !CONSP (lookup)) | |
440 | 743 { |
4303 | 744 invalid_operation ("Cannot redefine existing coding system", |
745 symbol); | |
440 | 746 } |
4303 | 747 |
748 Fputhash (symbol, form, Vcoding_system_hash_table); | |
749 Fputhash (add_suffix_to_symbol(symbol, "-unix"), form, | |
750 Vcoding_system_hash_table); | |
751 Fputhash (add_suffix_to_symbol(symbol, "-dos"), form, | |
752 Vcoding_system_hash_table); | |
753 Fputhash (add_suffix_to_symbol(symbol, "-mac"), form, | |
754 Vcoding_system_hash_table); | |
755 | |
756 /* Tell the POSIX locale infrastructure about this coding system (though | |
757 unfortunately it'll be too late for the startup locale sniffing. */ | |
758 if (!UNBOUNDP (Qposix_charset_to_coding_system_hash)) | |
759 { | |
760 Lisp_Object val = Fsymbol_value (Qposix_charset_to_coding_system_hash); | |
761 DECLARE_EISTRING (minimal_name); | |
762 Ibyte *full_name; | |
763 int len = XSTRING_LENGTH (XSYMBOL_NAME (symbol)), i; | |
764 | |
765 if (!NILP (val)) | |
766 { | |
767 full_name = XSTRING_DATA (XSYMBOL_NAME (symbol)); | |
768 for (i = 0; i < len; ++i) | |
769 { | |
770 if (full_name[i] >= '0' && full_name[i] <= '9') | |
771 { | |
772 eicat_ch (minimal_name, full_name[i]); | |
773 } | |
774 else if (full_name[i] >= 'a' && full_name[i] <= 'z') | |
775 { | |
776 eicat_ch (minimal_name, full_name[i]); | |
777 } | |
778 else if (full_name[i] >= 'A' && full_name[i] <= 'Z') | |
779 { | |
780 eicat_ch (minimal_name, full_name[i] + | |
781 ('a' - 'A')); | |
782 } | |
783 } | |
784 | |
785 if (eilen (minimal_name)) | |
786 { | |
787 CHECK_HASH_TABLE (val); | |
788 Fputhash (eimake_string(minimal_name), symbol, val); | |
789 } | |
790 } | |
791 } | |
792 | |
793 return Qt; | |
428 | 794 } |
795 | |
796 DEFUN ("get-coding-system", Fget_coding_system, 1, 1, 0, /* | |
797 Retrieve the coding system of the given name. | |
798 Same as `find-coding-system' except that if there is no such | |
799 coding system, an error is signaled instead of returning nil. | |
800 */ | |
801 (name)) | |
802 { | |
803 Lisp_Object coding_system = Ffind_coding_system (name); | |
804 | |
805 if (NILP (coding_system)) | |
563 | 806 invalid_argument ("No such coding system", name); |
428 | 807 return coding_system; |
808 } | |
809 | |
771 | 810 int |
811 coding_system_is_binary (Lisp_Object coding_system) | |
812 { | |
813 Lisp_Coding_System *cs = XCODING_SYSTEM (coding_system); | |
814 return | |
815 (EQ (CODING_SYSTEM_TYPE (cs), Qno_conversion) && | |
816 CODING_SYSTEM_EOL_TYPE (cs) == EOL_LF && | |
817 EQ (CODING_SYSTEM_POST_READ_CONVERSION (cs), Qnil) && | |
818 EQ (CODING_SYSTEM_PRE_WRITE_CONVERSION (cs), Qnil)); | |
819 } | |
820 | |
821 static Lisp_Object | |
822 coding_system_real_canonical (Lisp_Object cs) | |
823 { | |
824 if (!NILP (XCODING_SYSTEM_CANONICAL (cs))) | |
825 return XCODING_SYSTEM_CANONICAL (cs); | |
826 return cs; | |
827 } | |
828 | |
829 /* Return true if coding system is of the "standard" type that decodes | |
830 bytes into characters (suitable for decoding a text file). */ | |
831 int | |
832 coding_system_is_for_text_file (Lisp_Object coding_system) | |
833 { | |
834 return (XCODESYSMETH_OR_GIVEN | |
835 (coding_system, conversion_end_type, | |
836 (coding_system_real_canonical (coding_system)), | |
837 DECODES_BYTE_TO_CHARACTER) == | |
838 DECODES_BYTE_TO_CHARACTER); | |
839 } | |
840 | |
841 static int | |
842 decoding_source_sink_type_is_char (Lisp_Object cs, enum source_or_sink sex) | |
843 { | |
844 enum source_sink_type type = | |
845 XCODESYSMETH_OR_GIVEN (cs, conversion_end_type, | |
846 (coding_system_real_canonical (cs)), | |
847 DECODES_BYTE_TO_CHARACTER); | |
848 if (sex == CODING_SOURCE) | |
849 return (type == DECODES_CHARACTER_TO_CHARACTER || | |
850 type == DECODES_CHARACTER_TO_BYTE); | |
851 else | |
852 return (type == DECODES_CHARACTER_TO_CHARACTER || | |
853 type == DECODES_BYTE_TO_CHARACTER); | |
854 } | |
855 | |
856 static int | |
857 encoding_source_sink_type_is_char (Lisp_Object cs, enum source_or_sink sex) | |
858 { | |
859 return decoding_source_sink_type_is_char (cs, | |
860 /* Sex change */ | |
861 sex == CODING_SOURCE ? | |
862 CODING_SINK : CODING_SOURCE); | |
863 } | |
864 | |
865 /* Like Ffind_coding_system() but check that the coding system is of the | |
866 "standard" type that decodes bytes into characters (suitable for | |
867 decoding a text file), and if not, returns an appropriate wrapper that | |
868 does. Also, if EOL_WRAP is non-zero, check whether this coding system | |
869 wants EOL auto-detection, and if so, wrap with a convert-eol coding | |
870 system to do this. */ | |
871 | |
872 Lisp_Object | |
873 find_coding_system_for_text_file (Lisp_Object name, int eol_wrap) | |
874 { | |
875 Lisp_Object coding_system = Ffind_coding_system (name); | |
876 Lisp_Object wrapper = coding_system; | |
877 | |
878 if (NILP (coding_system)) | |
879 return Qnil; | |
880 if (!coding_system_is_for_text_file (coding_system)) | |
881 { | |
882 wrapper = XCODING_SYSTEM_TEXT_FILE_WRAPPER (coding_system); | |
883 if (NILP (wrapper)) | |
884 { | |
885 Lisp_Object chain; | |
886 if (!decoding_source_sink_type_is_char (coding_system, CODING_SINK)) | |
887 chain = list2 (coding_system, Qbinary); | |
888 else | |
889 chain = list1 (coding_system); | |
890 if (decoding_source_sink_type_is_char (coding_system, CODING_SOURCE)) | |
891 chain = Fcons (Qbinary, chain); | |
892 wrapper = | |
893 make_internal_coding_system | |
894 (coding_system, | |
895 "internal-text-file-wrapper", | |
896 Qchain, | |
897 Qunbound, list4 (Qchain, chain, | |
898 Qcanonicalize_after_coding, coding_system)); | |
899 XCODING_SYSTEM_TEXT_FILE_WRAPPER (coding_system) = wrapper; | |
900 } | |
901 } | |
902 | |
903 if (!eol_wrap || XCODING_SYSTEM_EOL_TYPE (coding_system) != EOL_AUTODETECT) | |
904 return wrapper; | |
905 | |
906 coding_system = wrapper; | |
907 wrapper = XCODING_SYSTEM_AUTO_EOL_WRAPPER (coding_system); | |
908 if (!NILP (wrapper)) | |
909 return wrapper; | |
910 wrapper = | |
911 make_internal_coding_system | |
912 (coding_system, | |
913 "internal-auto-eol-wrapper", | |
914 Qundecided, Qunbound, | |
915 list4 (Qcoding_system, coding_system, | |
916 Qdo_eol, Qt)); | |
917 XCODING_SYSTEM_AUTO_EOL_WRAPPER (coding_system) = wrapper; | |
918 return wrapper; | |
919 } | |
920 | |
921 /* Like Fget_coding_system() but verify that the coding system is of the | |
922 "standard" type that decodes bytes into characters (suitable for | |
923 decoding a text file), and if not, returns an appropriate wrapper that | |
924 does. Also, if EOL_WRAP is non-zero, check whether this coding system | |
925 wants EOL auto-detection, and if so, wrap with a convert-eol coding | |
926 system to do this. */ | |
927 | |
928 Lisp_Object | |
929 get_coding_system_for_text_file (Lisp_Object name, int eol_wrap) | |
930 { | |
931 Lisp_Object coding_system = find_coding_system_for_text_file (name, | |
932 eol_wrap); | |
933 if (NILP (coding_system)) | |
934 invalid_argument ("No such coding system", name); | |
935 return coding_system; | |
936 } | |
937 | |
938 /* We store the coding systems in hash tables with the names as the | |
939 key and the actual coding system object as the value. Occasionally | |
940 we need to use them in a list format. These routines provide us | |
941 with that. */ | |
428 | 942 struct coding_system_list_closure |
943 { | |
944 Lisp_Object *coding_system_list; | |
771 | 945 int normal; |
946 int internal; | |
428 | 947 }; |
948 | |
949 static int | |
4303 | 950 add_coding_system_to_list_mapper (Lisp_Object key, Lisp_Object value, |
428 | 951 void *coding_system_list_closure) |
952 { | |
953 /* This function can GC */ | |
954 struct coding_system_list_closure *cscl = | |
955 (struct coding_system_list_closure *) coding_system_list_closure; | |
956 Lisp_Object *coding_system_list = cscl->coding_system_list; | |
957 | |
771 | 958 /* We can't just use VALUE because KEY might be an alias, and we need |
4303 | 959 the real coding system object. |
960 | |
961 Autoloaded coding systems have conses for their values, and can't be | |
962 internal coding systems, or coding system aliases. */ | |
963 if (CONSP (value) || | |
964 (XCODING_SYSTEM (Ffind_coding_system (key))->internal_p ? | |
965 cscl->internal : cscl->normal)) | |
771 | 966 *coding_system_list = Fcons (key, *coding_system_list); |
428 | 967 return 0; |
968 } | |
969 | |
2297 | 970 /* #### should we specify a conventional for "all coding systems"? */ |
771 | 971 DEFUN ("coding-system-list", Fcoding_system_list, 0, 1, 0, /* |
428 | 972 Return a list of the names of all defined coding systems. |
771 | 973 If INTERNAL is nil, only the normal (non-internal) coding systems are |
974 included. (Internal coding systems are created for various internal | |
975 purposes, such as implementing EOL types of CRLF and CR; generally, you do | |
976 not want to see these.) If it is t, only the internal coding systems are | |
977 included. If it is any other non-nil value both normal and internal are | |
978 included. | |
428 | 979 */ |
771 | 980 (internal)) |
428 | 981 { |
982 Lisp_Object coding_system_list = Qnil; | |
983 struct gcpro gcpro1; | |
984 struct coding_system_list_closure coding_system_list_closure; | |
985 | |
986 GCPRO1 (coding_system_list); | |
987 coding_system_list_closure.coding_system_list = &coding_system_list; | |
771 | 988 coding_system_list_closure.normal = !EQ (internal, Qt); |
989 coding_system_list_closure.internal = !NILP (internal); | |
428 | 990 elisp_maphash (add_coding_system_to_list_mapper, Vcoding_system_hash_table, |
991 &coding_system_list_closure); | |
992 UNGCPRO; | |
993 | |
994 return coding_system_list; | |
995 } | |
996 | |
997 DEFUN ("coding-system-name", Fcoding_system_name, 1, 1, 0, /* | |
998 Return the name of the given coding system. | |
999 */ | |
1000 (coding_system)) | |
1001 { | |
1002 coding_system = Fget_coding_system (coding_system); | |
1003 return XCODING_SYSTEM_NAME (coding_system); | |
1004 } | |
1005 | |
1006 static Lisp_Coding_System * | |
771 | 1007 allocate_coding_system (struct coding_system_methods *codesys_meths, |
1008 Bytecount data_size, | |
1009 Lisp_Object name) | |
428 | 1010 { |
771 | 1011 Bytecount total_size = offsetof (Lisp_Coding_System, data) + data_size; |
5117
3742ea8250b5
Checking in final CVS version of workspace 'ben-lisp-object'
Ben Wing <ben@xemacs.org>
parents:
3025
diff
changeset
|
1012 Lisp_Object obj = ALLOC_SIZED_LISP_OBJECT (total_size, coding_system); |
3742ea8250b5
Checking in final CVS version of workspace 'ben-lisp-object'
Ben Wing <ben@xemacs.org>
parents:
3025
diff
changeset
|
1013 Lisp_Coding_System *codesys = XCODING_SYSTEM (obj); |
1204 | 1014 |
771 | 1015 codesys->methods = codesys_meths; |
1204 | 1016 #define MARKED_SLOT(x) codesys->x = Qnil; |
1017 #include "coding-system-slots.h" | |
1018 | |
771 | 1019 CODING_SYSTEM_EOL_TYPE (codesys) = EOL_LF; |
1020 CODING_SYSTEM_NAME (codesys) = name; | |
1021 | |
1022 MAYBE_CODESYSMETH (codesys, init, (wrap_coding_system (codesys))); | |
428 | 1023 |
1024 return codesys; | |
1025 } | |
1026 | |
771 | 1027 static enum eol_type |
1028 symbol_to_eol_type (Lisp_Object symbol) | |
1029 { | |
1030 CHECK_SYMBOL (symbol); | |
1031 if (NILP (symbol)) return EOL_AUTODETECT; | |
1032 if (EQ (symbol, Qlf)) return EOL_LF; | |
1033 if (EQ (symbol, Qcrlf)) return EOL_CRLF; | |
1034 if (EQ (symbol, Qcr)) return EOL_CR; | |
1035 | |
1036 invalid_constant ("Unrecognized eol type", symbol); | |
1204 | 1037 RETURN_NOT_REACHED (EOL_AUTODETECT); |
771 | 1038 } |
1039 | |
1040 static Lisp_Object | |
1041 eol_type_to_symbol (enum eol_type type) | |
1042 { | |
1043 switch (type) | |
1044 { | |
2500 | 1045 default: ABORT (); |
771 | 1046 case EOL_LF: return Qlf; |
1047 case EOL_CRLF: return Qcrlf; | |
1048 case EOL_CR: return Qcr; | |
1049 case EOL_AUTODETECT: return Qnil; | |
1050 } | |
1051 } | |
1052 | |
1053 struct subsidiary_type | |
1054 { | |
4528
726060ee587c
First draft of g++ 4.3 warning removal patch. Builds. *Needs ChangeLogs.*
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4522
diff
changeset
|
1055 const Ascbyte *extension; |
726060ee587c
First draft of g++ 4.3 warning removal patch. Builds. *Needs ChangeLogs.*
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4522
diff
changeset
|
1056 const Ascbyte *mnemonic_ext; |
771 | 1057 enum eol_type eol; |
1058 }; | |
1059 | |
1060 static struct subsidiary_type coding_subsidiary_list[] = | |
1061 { { "-unix", "", EOL_LF }, | |
1062 { "-dos", ":T", EOL_CRLF }, | |
1063 { "-mac", ":t", EOL_CR } }; | |
1064 | |
1065 /* kludge */ | |
428 | 1066 static void |
771 | 1067 setup_eol_coding_systems (Lisp_Object codesys) |
428 | 1068 { |
793 | 1069 int len = XSTRING_LENGTH (XSYMBOL (XCODING_SYSTEM_NAME (codesys))->name); |
2367 | 1070 Ibyte *codesys_name = alloca_ibytes (len + 7); |
771 | 1071 int mlen = -1; |
867 | 1072 Ibyte *codesys_mnemonic = 0; |
771 | 1073 Lisp_Object codesys_name_sym, sub_codesys; |
1074 int i; | |
1075 | |
1076 memcpy (codesys_name, | |
793 | 1077 XSTRING_DATA (XSYMBOL (XCODING_SYSTEM_NAME (codesys))->name), len); |
771 | 1078 |
1079 if (STRINGP (XCODING_SYSTEM_MNEMONIC (codesys))) | |
428 | 1080 { |
771 | 1081 mlen = XSTRING_LENGTH (XCODING_SYSTEM_MNEMONIC (codesys)); |
2367 | 1082 codesys_mnemonic = alloca_ibytes (mlen + 7); |
771 | 1083 memcpy (codesys_mnemonic, |
1084 XSTRING_DATA (XCODING_SYSTEM_MNEMONIC (codesys)), mlen); | |
1085 } | |
1086 | |
1087 /* Create three "subsidiary" coding systems, decoding data encoded using | |
1088 each of the three EOL types. We do this for each subsidiary by | |
1089 copying the original coding system, setting the EOL type | |
1090 appropriately, and setting the CANONICAL member of the new coding | |
1091 system to be a chain consisting of the original coding system followed | |
1092 by a convert-eol coding system to do the EOL decoding. For EOL type | |
1093 LF, however, we don't need any decoding, so we skip creating a | |
1094 CANONICAL. | |
1095 | |
1096 If the original coding system is not a text-type coding system | |
1097 (decodes byte->char), we need to coerce it to one by the appropriate | |
1098 wrapping in CANONICAL. */ | |
1099 | |
1100 for (i = 0; i < countof (coding_subsidiary_list); i++) | |
1101 { | |
4528
726060ee587c
First draft of g++ 4.3 warning removal patch. Builds. *Needs ChangeLogs.*
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4522
diff
changeset
|
1102 const Ascbyte *extension = coding_subsidiary_list[i].extension; |
726060ee587c
First draft of g++ 4.3 warning removal patch. Builds. *Needs ChangeLogs.*
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4522
diff
changeset
|
1103 const Ascbyte *mnemonic_ext = coding_subsidiary_list[i].mnemonic_ext; |
771 | 1104 enum eol_type eol = coding_subsidiary_list[i].eol; |
1105 | |
2367 | 1106 qxestrcpy_ascii (codesys_name + len, extension); |
771 | 1107 codesys_name_sym = intern_int (codesys_name); |
1108 if (mlen != -1) | |
2367 | 1109 qxestrcpy_ascii (codesys_mnemonic + mlen, mnemonic_ext); |
771 | 1110 |
1111 sub_codesys = Fcopy_coding_system (codesys, codesys_name_sym); | |
1112 if (mlen != -1) | |
1113 XCODING_SYSTEM_MNEMONIC (sub_codesys) = | |
1114 build_intstring (codesys_mnemonic); | |
1115 | |
1116 if (eol != EOL_LF) | |
1117 { | |
1118 Lisp_Object chain = list2 (get_coding_system_for_text_file | |
1119 (codesys, 0), | |
1120 eol == EOL_CR ? Qconvert_eol_cr : | |
1121 Qconvert_eol_crlf); | |
1122 Lisp_Object canon = | |
1123 make_internal_coding_system | |
1124 (sub_codesys, "internal-subsidiary-eol-wrapper", | |
1125 Qchain, Qunbound, | |
1126 mlen != -1 ? | |
1127 list6 (Qmnemonic, build_intstring (codesys_mnemonic), | |
1128 Qchain, chain, | |
1129 Qcanonicalize_after_coding, sub_codesys) : | |
1130 list4 (Qchain, chain, | |
1131 Qcanonicalize_after_coding, sub_codesys)); | |
1132 XCODING_SYSTEM_CANONICAL (sub_codesys) = canon; | |
1133 } | |
1134 XCODING_SYSTEM_EOL_TYPE (sub_codesys) = eol; | |
1135 XCODING_SYSTEM_SUBSIDIARY_PARENT (sub_codesys) = codesys; | |
1136 XCODING_SYSTEM (codesys)->eol[eol] = sub_codesys; | |
428 | 1137 } |
1138 } | |
1139 | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1140 DEFUN ("coding-system-canonical-name-p", Fcoding_system_canonical_name_p, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1141 1, 1, 0, /* |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1142 Return t if OBJECT names a coding system, and is not a coding system alias. |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1143 */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1144 (object)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1145 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1146 return CODING_SYSTEMP (Fgethash (object, Vcoding_system_hash_table, Qnil)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1147 ? Qt : Qnil; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1148 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1149 |
771 | 1150 /* Basic function to create new coding systems. For `make-coding-system', |
1151 NAME-OR-EXISTING is the NAME argument, PREFIX is null, and TYPE, | |
1152 DESCRIPTION, and PROPS are the same. All created coding systems are put | |
1153 in a hash table indexed by NAME. | |
1154 | |
1155 If PREFIX is a string, NAME-OR-EXISTING should specify an existing | |
1156 coding system (or nil), and an internal coding system will be created. | |
1157 The name of the coding system will be constructed by combining PREFIX | |
1158 with the name of the existing coding system (if given), and a number | |
1159 will be appended to insure uniqueness. In such a case, if Qunbound is | |
1160 given for DESCRIPTION, the description gets created based on the | |
1161 generated name. Also, if no mnemonic is given in the properties list, a | |
1162 mnemonic is created based on the generated name. | |
1163 | |
1164 For internal coding systems, the coding system is marked as internal | |
1165 (see `coding-system-list'), and no subsidiaries will be created or | |
1166 eol-wrapping will happen. Otherwise: | |
1167 | |
1168 -- if the eol-type property is `lf' or t, the coding system is merely | |
1169 created and returned. (For t, the coding system will be wrapped with | |
1170 an EOL autodetector when it's used to read a file.) | |
1171 | |
1172 -- if eol-type is `crlf' or `cr', after the coding system object is | |
1173 created, it will be wrapped in a chain with the appropriate | |
1174 convert-eol coding system (either `convert-eol-crlf' or | |
1175 `convert-eol-cr'), so that CRLF->LF or CR->LF conversion is done at | |
1176 decoding time, and the opposite at encoding time. The resulting | |
1177 chain becomes the CANONICAL field of the coding system object. | |
1178 | |
1179 -- if eol-type is nil or omitted, "subsidiaries" are generated: Three | |
1180 coding systems where the original coding system (before wrapping with | |
1181 convert-eol-autodetect) is either unwrapped or wrapped with | |
1182 convert-eol-crlf or convert-eol-cr, respectively, so that coding systems | |
1183 to handle LF, CRLF, and CR end-of-line indicators are created. (This | |
1184 crazy crap is based on existing behavior in other Mule versions, | |
1185 including FSF Emacs.) | |
1186 */ | |
428 | 1187 |
1188 static Lisp_Object | |
4528
726060ee587c
First draft of g++ 4.3 warning removal patch. Builds. *Needs ChangeLogs.*
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4522
diff
changeset
|
1189 make_coding_system_1 (Lisp_Object name_or_existing, const Ascbyte *prefix, |
771 | 1190 Lisp_Object type, Lisp_Object description, |
1191 Lisp_Object props) | |
428 | 1192 { |
771 | 1193 Lisp_Coding_System *cs; |
1194 int need_to_setup_eol_systems = 1; | |
1195 enum eol_type eol_wrapper = EOL_AUTODETECT; | |
1196 struct coding_system_methods *meths; | |
1197 Lisp_Object csobj; | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1198 Lisp_Object defmnem = Qnil, aliases = Qnil; |
771 | 1199 |
1200 if (NILP (type)) | |
1201 type = Qundecided; | |
1202 meths = decode_coding_system_type (type, ERROR_ME); | |
1203 | |
1204 if (prefix) | |
428 | 1205 { |
867 | 1206 Ibyte *newname = |
771 | 1207 emacs_sprintf_malloc (NULL, "%s-%s-%d", |
1208 prefix, | |
867 | 1209 NILP (name_or_existing) ? (Ibyte *) "nil" : |
771 | 1210 XSTRING_DATA (Fsymbol_name (XCODING_SYSTEM_NAME |
1211 (name_or_existing))), | |
1212 ++coding_system_tick); | |
1213 name_or_existing = intern_int (newname); | |
1726 | 1214 xfree (newname, Ibyte *); |
771 | 1215 |
1216 if (UNBOUNDP (description)) | |
1217 { | |
1218 newname = | |
1219 emacs_sprintf_malloc | |
1220 (NULL, "For Internal Use (%s)", | |
1221 XSTRING_DATA (Fsymbol_name (name_or_existing))); | |
1222 description = build_intstring (newname); | |
1726 | 1223 xfree (newname, Ibyte *); |
771 | 1224 } |
1225 | |
1226 newname = emacs_sprintf_malloc (NULL, "Int%d", coding_system_tick); | |
1227 defmnem = build_intstring (newname); | |
1726 | 1228 xfree (newname, Ibyte *); |
428 | 1229 } |
771 | 1230 else |
1231 CHECK_SYMBOL (name_or_existing); | |
1232 | |
4303 | 1233 /* See is there an entry for name_or_existing in the defined coding system |
1234 hash table. */ | |
1235 csobj = find_coding_system (name_or_existing, 0); | |
1236 /* Error if it's there and not an autoload form. */ | |
1237 if (!NILP (csobj) && !CONSP (csobj)) | |
771 | 1238 invalid_operation ("Cannot redefine existing coding system", |
4303 | 1239 name_or_existing); |
771 | 1240 |
1241 cs = allocate_coding_system (meths, meths->extra_data_size, | |
1242 name_or_existing); | |
793 | 1243 csobj = wrap_coding_system (cs); |
771 | 1244 |
1245 cs->internal_p = !!prefix; | |
1246 | |
1247 if (NILP (description)) | |
1248 description = build_string (""); | |
1249 else | |
1250 CHECK_STRING (description); | |
1251 CODING_SYSTEM_DESCRIPTION (cs) = description; | |
1252 | |
1253 if (!NILP (defmnem)) | |
1254 CODING_SYSTEM_MNEMONIC (cs) = defmnem; | |
1255 | |
1256 { | |
1257 EXTERNAL_PROPERTY_LIST_LOOP_3 (key, value, props) | |
1258 { | |
1259 int recognized = 1; | |
1260 | |
1261 if (EQ (key, Qmnemonic)) | |
1262 { | |
1263 if (!NILP (value)) | |
1264 CHECK_STRING (value); | |
1265 CODING_SYSTEM_MNEMONIC (cs) = value; | |
1266 } | |
1267 | |
1268 else if (EQ (key, Qdocumentation)) | |
1269 { | |
1270 if (!NILP (value)) | |
1271 CHECK_STRING (value); | |
1272 CODING_SYSTEM_DOCUMENTATION (cs) = value; | |
1273 } | |
1274 | |
1275 else if (EQ (key, Qeol_type)) | |
1276 { | |
1277 need_to_setup_eol_systems = NILP (value); | |
1278 if (EQ (value, Qt)) | |
1279 value = Qnil; | |
1280 eol_wrapper = symbol_to_eol_type (value); | |
1281 } | |
1282 | |
1283 else if (EQ (key, Qpost_read_conversion)) | |
1284 CODING_SYSTEM_POST_READ_CONVERSION (cs) = value; | |
1285 else if (EQ (key, Qpre_write_conversion)) | |
1286 CODING_SYSTEM_PRE_WRITE_CONVERSION (cs) = value; | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1287 else if (EQ (key, Qaliases)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1288 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1289 EXTERNAL_LIST_LOOP_2 (alias, value) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1290 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1291 CHECK_SYMBOL (alias); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1292 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1293 if (!NILP (Fcoding_system_canonical_name_p (alias))) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1294 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1295 invalid_change ("Symbol is the canonical name of a " |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1296 "coding system and cannot be redefined", |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1297 alias); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1298 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1299 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1300 aliases = value; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1301 } |
771 | 1302 /* FSF compatibility */ |
1303 else if (EQ (key, Qtranslation_table_for_decode)) | |
1304 ; | |
1305 else if (EQ (key, Qtranslation_table_for_encode)) | |
1306 ; | |
1307 else if (EQ (key, Qsafe_chars)) | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1308 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1309 CHECK_CHAR_TABLE (value); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1310 CODING_SYSTEM_SAFE_CHARS (cs) = value; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1311 } |
771 | 1312 else if (EQ (key, Qsafe_charsets)) |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1313 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1314 if (!EQ (Qt, value) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1315 /* Would be nice to actually do this check, but there are |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1316 some order conflicts with japanese.el and |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1317 mule-coding.el */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1318 && 0) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1319 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1320 #ifdef MULE |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1321 EXTERNAL_LIST_LOOP_2 (safe_charset, value) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1322 CHECK_CHARSET (Ffind_charset (safe_charset)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1323 #endif |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1324 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1325 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1326 CODING_SYSTEM_SAFE_CHARSETS (cs) = value; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1327 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1328 else if (EQ (key, Qcategory)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1329 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1330 Fput (name_or_existing, intern ("coding-system-property"), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1331 Fplist_put (Fget (name_or_existing, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1332 intern ("coding-system-property"), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1333 Qnil), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1334 Qcategory, value)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1335 } |
771 | 1336 else if (EQ (key, Qmime_charset)) |
1337 ; | |
1338 else if (EQ (key, Qvalid_codes)) | |
1339 ; | |
1340 else | |
1341 recognized = CODESYSMETH_OR_GIVEN (cs, putprop, | |
1342 (csobj, key, value), 0); | |
1343 | |
1344 if (!recognized) | |
1345 invalid_constant ("Unrecognized property", key); | |
1346 } | |
1347 } | |
1348 | |
1349 { | |
1350 XCODING_SYSTEM_CANONICAL (csobj) = | |
1351 CODESYSMETH_OR_GIVEN (cs, canonicalize, (csobj), Qnil); | |
1352 XCODING_SYSTEM_EOL_TYPE (csobj) = EOL_AUTODETECT; /* for copy-coding-system | |
1353 below */ | |
1354 | |
4303 | 1355 Fputhash (name_or_existing, csobj, Vcoding_system_hash_table); |
1356 | |
771 | 1357 if (need_to_setup_eol_systems && !cs->internal_p) |
1358 setup_eol_coding_systems (csobj); | |
1359 else if (eol_wrapper == EOL_CR || eol_wrapper == EOL_CRLF) | |
1360 { | |
1361 /* If a specific eol-type (other than LF) was specified, we handle | |
1362 this by converting the coding system into a chain that wraps the | |
1363 coding system along with a convert-eol system after it, in | |
1364 exactly that same switcheroo fashion that the normal | |
1365 canonicalize method works -- BUT we will run into a problem if | |
1366 we do it the obvious way, because when `chain' creates its | |
1367 substreams, the substream containing the coding system we're | |
1368 creating will have canonicalization expansion done on it, | |
1369 leading to infinite recursion. So we have to generate a new, | |
1370 internal coding system with the previous value of CANONICAL. */ | |
867 | 1371 Ibyte *newname = |
771 | 1372 emacs_sprintf_malloc |
1373 (NULL, "internal-eol-copy-%s-%d", | |
1374 XSTRING_DATA (Fsymbol_name (name_or_existing)), | |
1375 ++coding_system_tick); | |
1376 Lisp_Object newnamesym = intern_int (newname); | |
1377 Lisp_Object copied = Fcopy_coding_system (csobj, newnamesym); | |
1726 | 1378 xfree (newname, Ibyte *); |
771 | 1379 |
1380 XCODING_SYSTEM_CANONICAL (csobj) = | |
1381 make_internal_coding_system | |
1382 (csobj, | |
1383 "internal-eol-wrapper", | |
1384 Qchain, Qunbound, | |
1385 list4 (Qchain, | |
1386 list2 (copied, | |
1387 eol_wrapper == EOL_CR ? | |
1388 Qconvert_eol_cr : | |
1389 Qconvert_eol_crlf), | |
1390 Qcanonicalize_after_coding, | |
1391 csobj)); | |
1392 } | |
1393 XCODING_SYSTEM_EOL_TYPE (csobj) = eol_wrapper; | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1394 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1395 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1396 EXTERNAL_LIST_LOOP_2 (alias, aliases) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1397 Fdefine_coding_system_alias (alias, csobj); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1398 } |
771 | 1399 } |
1400 | |
1401 return csobj; | |
428 | 1402 } |
1403 | |
771 | 1404 Lisp_Object |
4528
726060ee587c
First draft of g++ 4.3 warning removal patch. Builds. *Needs ChangeLogs.*
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4522
diff
changeset
|
1405 make_internal_coding_system (Lisp_Object existing, const Ascbyte *prefix, |
771 | 1406 Lisp_Object type, Lisp_Object description, |
1407 Lisp_Object props) | |
1408 { | |
1409 return make_coding_system_1 (existing, prefix, type, description, props); | |
1410 } | |
428 | 1411 |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1412 DEFUN ("make-coding-system-internal", Fmake_coding_system_internal, 2, 4, 0, /* |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1413 See `make-coding-system'. This does much of the work of that function. |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1414 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1415 Without Mule support, it does all the work of that function, and an alias |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1416 exists, mapping `make-coding-system' to |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1417 `make-coding-system-internal'. You'll need a non-Mule XEmacs to read the |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1418 complete docstring. Or you can just read it in make-coding-system.el; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1419 something like the following should work: |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1420 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1421 \\[find-function-other-window] find-file RET \\[find-file] mule/make-coding-system.el RET |
771 | 1422 |
428 | 1423 */ |
771 | 1424 (name, type, description, props)) |
428 | 1425 { |
771 | 1426 return make_coding_system_1 (name, 0, type, description, props); |
428 | 1427 } |
1428 | |
1429 DEFUN ("copy-coding-system", Fcopy_coding_system, 2, 2, 0, /* | |
1430 Copy OLD-CODING-SYSTEM to NEW-NAME. | |
1431 If NEW-NAME does not name an existing coding system, a new one will | |
1432 be created. | |
771 | 1433 If you are using this function to create an alias, think again: |
1434 Use `define-coding-system-alias' instead. | |
428 | 1435 */ |
1436 (old_coding_system, new_name)) | |
1437 { | |
1438 Lisp_Object new_coding_system; | |
1439 old_coding_system = Fget_coding_system (old_coding_system); | |
771 | 1440 new_coding_system = |
4303 | 1441 UNBOUNDP (new_name) ? Qnil : find_coding_system (new_name, 0); |
428 | 1442 if (NILP (new_coding_system)) |
1443 { | |
793 | 1444 new_coding_system = |
1445 wrap_coding_system | |
1446 (allocate_coding_system | |
1447 (XCODING_SYSTEM (old_coding_system)->methods, | |
1448 XCODING_SYSTEM (old_coding_system)->methods->extra_data_size, | |
1449 new_name)); | |
771 | 1450 if (!UNBOUNDP (new_name)) |
1451 Fputhash (new_name, new_coding_system, Vcoding_system_hash_table); | |
428 | 1452 } |
771 | 1453 else if (XCODING_SYSTEM (old_coding_system)->methods != |
1454 XCODING_SYSTEM (new_coding_system)->methods) | |
1455 invalid_operation_2 ("Coding systems not same type", | |
1456 old_coding_system, new_coding_system); | |
428 | 1457 |
1458 { | |
1459 Lisp_Coding_System *to = XCODING_SYSTEM (new_coding_system); | |
1460 Lisp_Coding_System *from = XCODING_SYSTEM (old_coding_system); | |
5120
d1247f3cc363
latest work on lisp-object workspace;
Ben Wing <ben@xemacs.org>
parents:
5118
diff
changeset
|
1461 COPY_SIZED_LISP_OBJECT (to, from, sizeof_coding_system (from)); |
428 | 1462 to->name = new_name; |
1463 } | |
1464 return new_coding_system; | |
1465 } | |
1466 | |
2297 | 1467 /* #### Shouldn't this really be a find/get pair? */ |
1468 | |
440 | 1469 DEFUN ("coding-system-alias-p", Fcoding_system_alias_p, 1, 1, 0, /* |
1470 Return t if OBJECT is a coding system alias. | |
1471 All coding system aliases are created by `define-coding-system-alias'. | |
1472 */ | |
1473 (object)) | |
428 | 1474 { |
440 | 1475 return SYMBOLP (Fgethash (object, Vcoding_system_hash_table, Qzero)) |
1476 ? Qt : Qnil; | |
1477 } | |
1478 | |
1479 DEFUN ("coding-system-aliasee", Fcoding_system_aliasee, 1, 1, 0, /* | |
1480 Return the coding-system symbol for which symbol ALIAS is an alias. | |
1481 */ | |
1482 (alias)) | |
1483 { | |
1484 Lisp_Object aliasee = Fgethash (alias, Vcoding_system_hash_table, Qnil); | |
1485 if (SYMBOLP (aliasee)) | |
1486 return aliasee; | |
1487 else | |
563 | 1488 invalid_argument ("Symbol is not a coding system alias", alias); |
1204 | 1489 RETURN_NOT_REACHED (Qnil); |
440 | 1490 } |
1491 | |
1492 /* A maphash function, for removing dangling coding system aliases. */ | |
1493 static int | |
2286 | 1494 dangling_coding_system_alias_p (Lisp_Object UNUSED (alias), |
440 | 1495 Lisp_Object aliasee, |
1496 void *dangling_aliases) | |
1497 { | |
1498 if (SYMBOLP (aliasee) | |
1499 && NILP (Fgethash (aliasee, Vcoding_system_hash_table, Qnil))) | |
428 | 1500 { |
440 | 1501 (*(int *) dangling_aliases)++; |
1502 return 1; | |
428 | 1503 } |
440 | 1504 else |
1505 return 0; | |
1506 } | |
1507 | |
1508 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias, 2, 2, 0, /* | |
1509 Define symbol ALIAS as an alias for coding system ALIASEE. | |
1510 | |
1511 You can use this function to redefine an alias that has already been defined, | |
1512 but you cannot redefine a name which is the canonical name for a coding system. | |
1513 \(a canonical name of a coding system is what is returned when you call | |
1514 `coding-system-name' on a coding system). | |
1515 | |
1516 ALIASEE itself can be an alias, which allows you to define nested aliases. | |
1517 | |
1518 You are forbidden, however, from creating alias loops or `dangling' aliases. | |
1519 These will be detected, and an error will be signaled if you attempt to do so. | |
1520 | |
1521 If ALIASEE is nil, then ALIAS will simply be undefined. | |
1522 | |
1523 See also `coding-system-alias-p', `coding-system-aliasee', | |
1524 and `coding-system-canonical-name-p'. | |
1525 */ | |
1526 (alias, aliasee)) | |
1527 { | |
2286 | 1528 Lisp_Object probe; |
440 | 1529 |
1530 CHECK_SYMBOL (alias); | |
1531 | |
1532 if (!NILP (Fcoding_system_canonical_name_p (alias))) | |
563 | 1533 invalid_change |
440 | 1534 ("Symbol is the canonical name of a coding system and cannot be redefined", |
1535 alias); | |
1536 | |
1537 if (NILP (aliasee)) | |
1538 { | |
771 | 1539 Lisp_Object subsidiary_unix = add_suffix_to_symbol (alias, "-unix"); |
1540 Lisp_Object subsidiary_dos = add_suffix_to_symbol (alias, "-dos"); | |
1541 Lisp_Object subsidiary_mac = add_suffix_to_symbol (alias, "-mac"); | |
440 | 1542 |
1543 Fremhash (alias, Vcoding_system_hash_table); | |
1544 | |
1545 /* Undefine subsidiary aliases, | |
1546 presumably created by a previous call to this function */ | |
1547 if (! NILP (Fcoding_system_alias_p (subsidiary_unix)) && | |
1548 ! NILP (Fcoding_system_alias_p (subsidiary_dos)) && | |
1549 ! NILP (Fcoding_system_alias_p (subsidiary_mac))) | |
1550 { | |
1551 Fdefine_coding_system_alias (subsidiary_unix, Qnil); | |
1552 Fdefine_coding_system_alias (subsidiary_dos, Qnil); | |
1553 Fdefine_coding_system_alias (subsidiary_mac, Qnil); | |
1554 } | |
1555 | |
1556 /* Undefine dangling coding system aliases. */ | |
1557 { | |
1558 int dangling_aliases; | |
1559 | |
1560 do { | |
1561 dangling_aliases = 0; | |
1562 elisp_map_remhash (dangling_coding_system_alias_p, | |
1563 Vcoding_system_hash_table, | |
1564 &dangling_aliases); | |
1565 } while (dangling_aliases > 0); | |
1566 } | |
1567 | |
1568 return Qnil; | |
1569 } | |
1570 | |
1571 if (CODING_SYSTEMP (aliasee)) | |
1572 aliasee = XCODING_SYSTEM_NAME (aliasee); | |
1573 | |
1574 /* Checks that aliasee names a coding-system */ | |
2286 | 1575 (void) Fget_coding_system (aliasee); |
440 | 1576 |
1577 /* Check for coding system alias loops */ | |
1578 if (EQ (alias, aliasee)) | |
563 | 1579 alias_loop: invalid_operation_2 |
440 | 1580 ("Attempt to create a coding system alias loop", alias, aliasee); |
1581 | |
1582 for (probe = aliasee; | |
1583 SYMBOLP (probe); | |
1584 probe = Fgethash (probe, Vcoding_system_hash_table, Qzero)) | |
1585 { | |
1586 if (EQ (probe, alias)) | |
1587 goto alias_loop; | |
1588 } | |
1589 | |
1590 Fputhash (alias, aliasee, Vcoding_system_hash_table); | |
1591 | |
1592 /* Set up aliases for subsidiaries. | |
2297 | 1593 #### There must be a better way to handle subsidiary coding systems. |
1594 Inquiring Minds Want To Know: shouldn't they always be chains? */ | |
440 | 1595 { |
1596 static const char *suffixes[] = { "-unix", "-dos", "-mac" }; | |
1597 int i; | |
1598 for (i = 0; i < countof (suffixes); i++) | |
1599 { | |
1600 Lisp_Object alias_subsidiary = | |
771 | 1601 add_suffix_to_symbol (alias, suffixes[i]); |
440 | 1602 Lisp_Object aliasee_subsidiary = |
771 | 1603 add_suffix_to_symbol (aliasee, suffixes[i]); |
440 | 1604 |
1605 if (! NILP (Ffind_coding_system (aliasee_subsidiary))) | |
1606 Fdefine_coding_system_alias (alias_subsidiary, aliasee_subsidiary); | |
1607 } | |
1608 } | |
428 | 1609 /* FSF return value is a vector of [ALIAS-unix ALIAS-dos ALIAS-mac], |
1610 but it doesn't look intentional, so I'd rather return something | |
1611 meaningful or nothing at all. */ | |
1612 return Qnil; | |
1613 } | |
1614 | |
1615 static Lisp_Object | |
771 | 1616 subsidiary_coding_system (Lisp_Object coding_system, enum eol_type type) |
428 | 1617 { |
1618 Lisp_Coding_System *cs = XCODING_SYSTEM (coding_system); | |
1619 Lisp_Object new_coding_system; | |
1620 | |
1621 switch (type) | |
1622 { | |
1623 case EOL_AUTODETECT: return coding_system; | |
1624 case EOL_LF: new_coding_system = CODING_SYSTEM_EOL_LF (cs); break; | |
1625 case EOL_CR: new_coding_system = CODING_SYSTEM_EOL_CR (cs); break; | |
1626 case EOL_CRLF: new_coding_system = CODING_SYSTEM_EOL_CRLF (cs); break; | |
2500 | 1627 default: ABORT (); return Qnil; |
428 | 1628 } |
1629 | |
1630 return NILP (new_coding_system) ? coding_system : new_coding_system; | |
1631 } | |
1632 | |
1633 DEFUN ("subsidiary-coding-system", Fsubsidiary_coding_system, 2, 2, 0, /* | |
1634 Return the subsidiary coding system of CODING-SYSTEM with eol type EOL-TYPE. | |
771 | 1635 The logically opposite operation is `coding-system-base'. |
428 | 1636 */ |
1637 (coding_system, eol_type)) | |
1638 { | |
771 | 1639 coding_system = get_coding_system_for_text_file (coding_system, 0); |
428 | 1640 |
1641 return subsidiary_coding_system (coding_system, | |
1642 symbol_to_eol_type (eol_type)); | |
1643 } | |
1644 | |
771 | 1645 DEFUN ("coding-system-base", Fcoding_system_base, |
1646 1, 1, 0, /* | |
1647 Return the base coding system of CODING-SYSTEM. | |
1648 If CODING-SYSTEM is a subsidiary, this returns its parent; otherwise, it | |
1649 returns CODING-SYSTEM. | |
1650 The logically opposite operation is `subsidiary-coding-system'. | |
1651 */ | |
1652 (coding_system)) | |
1653 { | |
1654 Lisp_Object base; | |
1655 | |
1656 coding_system = Fget_coding_system (coding_system); | |
1657 if (EQ (XCODING_SYSTEM_NAME (coding_system), Qbinary)) | |
1658 return Fget_coding_system (Qraw_text); /* hack! */ | |
1659 base = XCODING_SYSTEM_SUBSIDIARY_PARENT (coding_system); | |
1660 if (!NILP (base)) | |
1661 return base; | |
1662 return coding_system; | |
1663 } | |
1664 | |
1665 DEFUN ("coding-system-used-for-io", Fcoding_system_used_for_io, | |
1666 1, 1, 0, /* | |
1667 Return the coding system actually used for I/O. | |
1668 In some cases (e.g. when a particular EOL type is specified) this won't be | |
2297 | 1669 the coding system itself. This can be useful when trying to determine |
1670 precisely how data was decoded. | |
771 | 1671 */ |
1672 (coding_system)) | |
1673 { | |
1674 Lisp_Object canon; | |
1675 | |
1676 coding_system = Fget_coding_system (coding_system); | |
1677 canon = XCODING_SYSTEM_CANONICAL (coding_system); | |
1678 if (!NILP (canon)) | |
1679 return canon; | |
1680 return coding_system; | |
1681 } | |
1682 | |
428 | 1683 |
1684 /************************************************************************/ | |
1685 /* Coding system accessors */ | |
1686 /************************************************************************/ | |
1687 | |
771 | 1688 DEFUN ("coding-system-description", Fcoding_system_description, 1, 1, 0, /* |
1689 Return the description for CODING-SYSTEM. | |
1690 The `description' of a coding system is a short English phrase giving the | |
1691 name rendered according to English punctuation rules, plus possibly some | |
1692 explanatory text (typically in the form of a parenthetical phrase). The | |
1693 description is intended to be short enough that it can appear as a menu item, | |
1694 and clear enough to be recognizable even to someone who is assumed to have | |
1695 some basic familiarity with different encodings but may not know all the | |
1696 technical names; thus, for `cn-gb-2312' is described as "Chinese EUC" and | |
1697 `hz-gb-2312' is described as "Hz/ZW (Chinese)", where the actual name of | |
1698 the encoding is given, followed by a note that this is a Chinese encoding, | |
1699 because the great majority of people encountering this would have no idea | |
1700 what it is, and giving the language indicates whether the encoding should | |
1701 just be ignored or (conceivably) investigated more thoroughly. | |
428 | 1702 */ |
1703 (coding_system)) | |
1704 { | |
1705 coding_system = Fget_coding_system (coding_system); | |
771 | 1706 return XCODING_SYSTEM_DESCRIPTION (coding_system); |
428 | 1707 } |
1708 | |
1709 DEFUN ("coding-system-type", Fcoding_system_type, 1, 1, 0, /* | |
1710 Return the type of CODING-SYSTEM. | |
1711 */ | |
1712 (coding_system)) | |
1713 { | |
771 | 1714 coding_system = Fget_coding_system (coding_system); |
1715 return XCODING_SYSTEM_TYPE (coding_system); | |
428 | 1716 } |
1717 | |
1718 DEFUN ("coding-system-property", Fcoding_system_property, 2, 2, 0, /* | |
1719 Return the PROP property of CODING-SYSTEM. | |
1720 */ | |
1721 (coding_system, prop)) | |
1722 { | |
1723 coding_system = Fget_coding_system (coding_system); | |
1724 CHECK_SYMBOL (prop); | |
1725 | |
1726 if (EQ (prop, Qname)) | |
1727 return XCODING_SYSTEM_NAME (coding_system); | |
1728 else if (EQ (prop, Qtype)) | |
1729 return Fcoding_system_type (coding_system); | |
771 | 1730 else if (EQ (prop, Qdescription)) |
1731 return XCODING_SYSTEM_DESCRIPTION (coding_system); | |
428 | 1732 else if (EQ (prop, Qmnemonic)) |
1733 return XCODING_SYSTEM_MNEMONIC (coding_system); | |
771 | 1734 else if (EQ (prop, Qdocumentation)) |
1735 return XCODING_SYSTEM_DOCUMENTATION (coding_system); | |
428 | 1736 else if (EQ (prop, Qeol_type)) |
771 | 1737 return eol_type_to_symbol (XCODING_SYSTEM_EOL_TYPE |
1738 (coding_system)); | |
428 | 1739 else if (EQ (prop, Qeol_lf)) |
1740 return XCODING_SYSTEM_EOL_LF (coding_system); | |
1741 else if (EQ (prop, Qeol_crlf)) | |
1742 return XCODING_SYSTEM_EOL_CRLF (coding_system); | |
1743 else if (EQ (prop, Qeol_cr)) | |
1744 return XCODING_SYSTEM_EOL_CR (coding_system); | |
1745 else if (EQ (prop, Qpost_read_conversion)) | |
1746 return XCODING_SYSTEM_POST_READ_CONVERSION (coding_system); | |
1747 else if (EQ (prop, Qpre_write_conversion)) | |
1748 return XCODING_SYSTEM_PRE_WRITE_CONVERSION (coding_system); | |
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1749 else if (EQ (prop, Qsafe_charsets)) |
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1750 return XCODING_SYSTEM_SAFE_CHARSETS (coding_system); |
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1751 else if (EQ (prop, Qsafe_chars)) |
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1752 return XCODING_SYSTEM_SAFE_CHARS (coding_system); |
771 | 1753 else |
1754 { | |
1755 Lisp_Object value = CODESYSMETH_OR_GIVEN (XCODING_SYSTEM (coding_system), | |
1756 getprop, | |
1757 (coding_system, prop), | |
1758 Qunbound); | |
1759 if (UNBOUNDP (value)) | |
1760 invalid_constant ("Unrecognized property", prop); | |
1761 return value; | |
1762 } | |
1763 } | |
1764 | |
1765 | |
1766 /************************************************************************/ | |
1767 /* Coding stream functions */ | |
1768 /************************************************************************/ | |
1769 | |
1770 /* A coding stream is a stream used for encoding or decoding text. The | |
1771 coding-stream object keeps track of the actual coding system, the stream | |
1772 that is at the other end, and data that needs to be persistent across | |
1773 the lifetime of the stream. */ | |
1774 | |
1204 | 1775 extern const struct sized_memory_description chain_coding_stream_description; |
1776 extern const struct sized_memory_description undecided_coding_stream_description; | |
1777 | |
1778 static const struct memory_description coding_stream_data_description_1 []= { | |
2551 | 1779 { XD_BLOCK_PTR, chain_coding_system, 1, |
1780 { &chain_coding_stream_description } }, | |
1781 { XD_BLOCK_PTR, undecided_coding_system, 1, | |
1782 { &undecided_coding_stream_description } }, | |
1204 | 1783 { XD_END } |
1784 }; | |
1785 | |
1786 static const struct sized_memory_description coding_stream_data_description = { | |
1787 sizeof (void *), coding_stream_data_description_1 | |
1788 }; | |
1789 | |
1790 static const struct memory_description coding_lstream_description[] = { | |
1791 { XD_INT, offsetof (struct coding_stream, type) }, | |
1792 { XD_LISP_OBJECT, offsetof (struct coding_stream, orig_codesys) }, | |
1793 { XD_LISP_OBJECT, offsetof (struct coding_stream, codesys) }, | |
1794 { XD_LISP_OBJECT, offsetof (struct coding_stream, other_end) }, | |
1795 { XD_UNION, offsetof (struct coding_stream, data), | |
2551 | 1796 XD_INDIRECT (0, 0), { &coding_stream_data_description } }, |
1204 | 1797 { XD_END } |
1798 }; | |
1799 | |
1800 DEFINE_LSTREAM_IMPLEMENTATION_WITH_DATA ("coding", coding); | |
771 | 1801 |
1802 /* Encoding and decoding are parallel operations, so we create just one | |
1803 stream for both. "Decoding" may involve the extra step of autodetection | |
1804 of the data format, but that's only because of the conventional | |
1805 definition of decoding as converting from external- to | |
1806 internal-formatted data. | |
1807 | |
2297 | 1808 [[ REWRITE ME! ]] |
1809 | |
771 | 1810 #### We really need to abstract out the concept of "data formats" and |
1811 define "converters" that convert from and to specified formats, | |
1812 eliminating the idea of decoding and encoding. When specifying a | |
1813 conversion process, we need to give the data formats themselves, not the | |
1814 conversion processes -- e.g. a coding system called "Unicode->multibyte" | |
1815 converts in both directions, and we could auto-detect the format of data | |
1816 at either end. */ | |
1817 | |
1818 static Bytecount | |
1819 coding_reader (Lstream *stream, unsigned char *data, Bytecount size) | |
1820 { | |
1821 unsigned char *orig_data = data; | |
1822 Bytecount read_size; | |
1823 int error_occurred = 0; | |
1824 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
1825 | |
1826 /* We need to interface to coding_{de,en}code_1(), which expects to take | |
1827 some amount of data and store the result into a Dynarr. We have | |
1828 coding_{de,en}code_1() store into c->runoff, and take data from there | |
1829 as necessary. */ | |
1830 | |
1831 /* We loop until we have enough data, reading chunks from the other | |
1832 end and converting it. */ | |
1833 while (1) | |
1834 { | |
1835 /* Take data from convert_to if we can. Make sure to take at | |
1836 most SIZE bytes, and delete the data from convert_to. */ | |
1837 if (Dynarr_length (str->convert_to) > 0) | |
1838 { | |
1839 Bytecount chunk = | |
1840 min (size, (Bytecount) Dynarr_length (str->convert_to)); | |
1841 memcpy (data, Dynarr_atp (str->convert_to, 0), chunk); | |
1842 Dynarr_delete_many (str->convert_to, 0, chunk); | |
1843 data += chunk; | |
1844 size -= chunk; | |
1845 } | |
1846 | |
1847 if (size == 0) | |
1848 break; /* No more room for data */ | |
1849 | |
1850 if (str->eof) | |
1851 break; | |
1852 | |
1853 { | |
1854 /* Exhausted convert_to, so get some more. Read into convert_from, | |
1855 after existing "rejected" data from the last conversion. */ | |
1856 Bytecount rejected = Dynarr_length (str->convert_from); | |
1857 /* #### 1024 is arbitrary; we really need to separate 0 from EOF, | |
1858 and when we get 0, keep taking more data until we don't get 0 -- | |
1859 we don't know how much data the conversion routine might need | |
2297 | 1860 before it can generate any data of its own (eg, bzip2). */ |
814 | 1861 Bytecount readmore = |
1862 str->one_byte_at_a_time ? (Bytecount) 1 : | |
1863 max (size, (Bytecount) 1024); | |
771 | 1864 |
1865 Dynarr_add_many (str->convert_from, 0, readmore); | |
1866 read_size = Lstream_read (str->other_end, | |
1867 Dynarr_atp (str->convert_from, rejected), | |
1868 readmore); | |
1869 /* Trim size down to how much we actually got */ | |
1870 Dynarr_set_size (str->convert_from, rejected + max (0, read_size)); | |
1871 } | |
1872 | |
1873 if (read_size < 0) /* LSTREAM_ERROR */ | |
1874 { | |
1875 error_occurred = 1; | |
1876 break; | |
1877 } | |
1878 if (read_size == 0) /* LSTREAM_EOF */ | |
1879 /* There might be some more end data produced in the translation, | |
1880 so we set a flag and call the conversion method once more to | |
1881 output any final stuff it may be holding, any "go back to a sane | |
1882 state" escape sequences, etc. The conversion method is free to | |
1883 look at this flag, and we use it above to stop looping. */ | |
1884 str->eof = 1; | |
1885 { | |
1886 Bytecount processed; | |
1887 Bytecount to_process = Dynarr_length (str->convert_from); | |
1888 | |
1889 /* Convert the data, and save any rejected data in convert_from */ | |
1890 processed = | |
1891 XCODESYSMETH (str->codesys, convert, | |
1892 (str, Dynarr_atp (str->convert_from, 0), | |
1893 str->convert_to, to_process)); | |
1894 if (processed < 0) | |
1895 { | |
1896 error_occurred = 1; | |
1897 break; | |
1898 } | |
1899 assert (processed <= to_process); | |
1900 if (processed < to_process) | |
1901 memmove (Dynarr_atp (str->convert_from, 0), | |
1902 Dynarr_atp (str->convert_from, processed), | |
1903 to_process - processed); | |
1904 Dynarr_set_size (str->convert_from, to_process - processed); | |
1905 } | |
1906 } | |
1907 | |
1908 if (data - orig_data == 0) | |
1909 return error_occurred ? -1 : 0; | |
1910 else | |
1911 return data - orig_data; | |
1912 } | |
1913 | |
1914 static Bytecount | |
1915 coding_writer (Lstream *stream, const unsigned char *data, Bytecount size) | |
1916 { | |
1917 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
1918 | |
1919 /* Convert all our data into convert_to, and then attempt to write | |
1920 it all out to the other end. */ | |
1921 Dynarr_reset (str->convert_to); | |
1922 size = XCODESYSMETH (str->codesys, convert, | |
1923 (str, data, str->convert_to, size)); | |
1924 if (Lstream_write (str->other_end, Dynarr_atp (str->convert_to, 0), | |
1925 Dynarr_length (str->convert_to)) < 0) | |
1926 return -1; | |
1927 else | |
1928 /* The return value indicates how much of the incoming data was | |
1929 processed, not how many bytes were written. */ | |
1930 return size; | |
1931 } | |
1932 | |
1933 static int | |
1934 encode_decode_source_sink_type_is_char (Lisp_Object cs, | |
1935 enum source_or_sink sex, | |
1936 enum encode_decode direction) | |
1937 { | |
1938 return (direction == CODING_DECODE ? | |
1939 decoding_source_sink_type_is_char (cs, sex) : | |
1940 encoding_source_sink_type_is_char (cs, sex)); | |
1941 } | |
1942 | |
1943 /* Ensure that the convert methods only get full characters sent to them to | |
1944 convert if the source of that conversion is characters; and that no such | |
1945 full-character checking happens when the source is bytes. Keep in mind | |
1946 that (1) the conversion_end_type return values take the perspective of | |
1947 encoding; (2) the source for decoding is the same as the sink for | |
1948 encoding; (3) when writing, the data is given to us, and we set our own | |
1949 stream to be character mode or not; (4) when reading, the data comes | |
1950 from the other_end stream, and we set that one to be character mode or | |
1951 not. This is consistent with the comment above the prototype for | |
1952 Lstream_set_character_mode(), which lays out rules for who is allowed to | |
1953 modify the character type mode on a stream. | |
1954 | |
814 | 1955 If we're a read stream, we're always setting character mode on the |
1956 source, but we also set it on ourselves consistent with the flag that | |
1957 can disable this (see again the comment above | |
1958 Lstream_set_character_mode()). | |
1959 */ | |
771 | 1960 |
1961 static void | |
1962 set_coding_character_mode (Lstream *stream) | |
1963 { | |
1964 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
1965 Lstream *stream_to_set = | |
1966 stream->flags & LSTREAM_FL_WRITE ? stream : str->other_end; | |
1967 if (encode_decode_source_sink_type_is_char | |
1968 (str->codesys, CODING_SOURCE, str->direction)) | |
1969 Lstream_set_character_mode (stream_to_set); | |
1970 else | |
1971 Lstream_unset_character_mode (stream_to_set); | |
814 | 1972 if (str->set_char_mode_on_us_when_reading && |
1973 (stream->flags & LSTREAM_FL_READ)) | |
1974 { | |
1975 if (encode_decode_source_sink_type_is_char | |
1976 (str->codesys, CODING_SINK, str->direction)) | |
1977 Lstream_set_character_mode (stream); | |
1978 else | |
1979 Lstream_unset_character_mode (stream); | |
1980 } | |
771 | 1981 } |
1982 | |
1983 static Lisp_Object | |
1984 coding_marker (Lisp_Object stream) | |
1985 { | |
1986 struct coding_stream *str = CODING_STREAM_DATA (XLSTREAM (stream)); | |
1987 | |
1988 mark_object (str->orig_codesys); | |
1989 mark_object (str->codesys); | |
1990 MAYBE_XCODESYSMETH (str->codesys, mark_coding_stream, (str)); | |
1991 return wrap_lstream (str->other_end); | |
1992 } | |
1993 | |
1994 static int | |
1995 coding_rewinder (Lstream *stream) | |
1996 { | |
1997 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
1998 MAYBE_XCODESYSMETH (str->codesys, rewind_coding_stream, (str)); | |
1999 | |
2000 str->ch = 0; | |
2001 Dynarr_reset (str->convert_to); | |
2002 Dynarr_reset (str->convert_from); | |
2003 return Lstream_rewind (str->other_end); | |
2004 } | |
2005 | |
2006 static int | |
2007 coding_seekable_p (Lstream *stream) | |
2008 { | |
2009 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
2010 return Lstream_seekable_p (str->other_end); | |
2011 } | |
2012 | |
2013 static int | |
2014 coding_flusher (Lstream *stream) | |
2015 { | |
2016 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
2017 return Lstream_flush (str->other_end); | |
2018 } | |
2019 | |
2020 static int | |
2021 coding_closer (Lstream *stream) | |
2022 { | |
2023 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
2024 if (stream->flags & LSTREAM_FL_WRITE) | |
2025 { | |
2026 str->eof = 1; | |
2027 coding_writer (stream, 0, 0); | |
2028 str->eof = 0; | |
2029 } | |
2030 /* It's safe to free the runoff dynarrs now because they are used only | |
2031 during conversion. We need to keep the type-specific data around, | |
2032 though, because of canonicalize_after_coding. */ | |
2033 if (str->convert_to) | |
2034 { | |
2035 Dynarr_free (str->convert_to); | |
2036 str->convert_to = 0; | |
2037 } | |
2038 if (str->convert_from) | |
428 | 2039 { |
771 | 2040 Dynarr_free (str->convert_from); |
2041 str->convert_from = 0; | |
2042 } | |
2043 | |
800 | 2044 if (str->no_close_other) |
2045 return Lstream_flush (str->other_end); | |
2046 else | |
2047 return Lstream_close (str->other_end); | |
771 | 2048 } |
2049 | |
2050 static void | |
2051 coding_finalizer (Lstream *stream) | |
2052 { | |
2053 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
2054 | |
2055 assert (!str->finalized); | |
2056 MAYBE_XCODESYSMETH (str->codesys, finalize_coding_stream, (str)); | |
2057 if (str->data) | |
2058 { | |
1726 | 2059 xfree (str->data, void *); |
771 | 2060 str->data = 0; |
2061 } | |
2062 str->finalized = 1; | |
2063 } | |
2064 | |
2065 static Lisp_Object | |
2066 coding_stream_canonicalize_after_coding (Lstream *stream) | |
2067 { | |
2068 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
2069 | |
2070 return XCODESYSMETH_OR_GIVEN (str->codesys, canonicalize_after_coding, | |
2071 (str), str->codesys); | |
2072 } | |
2073 | |
2074 Lisp_Object | |
2075 coding_stream_detected_coding_system (Lstream *stream) | |
2076 { | |
2077 Lisp_Object codesys = | |
2078 coding_stream_canonicalize_after_coding (stream); | |
2079 if (NILP (codesys)) | |
2080 return Fget_coding_system (Qidentity); | |
2081 return codesys; | |
2082 } | |
2083 | |
2084 Lisp_Object | |
2085 coding_stream_coding_system (Lstream *stream) | |
2086 { | |
2087 return CODING_STREAM_DATA (stream)->codesys; | |
2088 } | |
2089 | |
2090 /* Change the coding system associated with a stream. */ | |
2091 | |
2092 void | |
2093 set_coding_stream_coding_system (Lstream *lstr, Lisp_Object codesys) | |
2094 { | |
2095 struct coding_stream *str = CODING_STREAM_DATA (lstr); | |
2096 if (EQ (str->orig_codesys, codesys)) | |
2097 return; | |
2098 /* We do the equivalent of closing the stream, destroying it, and | |
2099 reinitializing it. This includes flushing out the data and signalling | |
2100 EOF, if we're a writing stream; we also replace the type-specific data | |
2101 with the data appropriate for the new coding system. */ | |
2102 if (!NILP (str->codesys)) | |
2103 { | |
2104 if (lstr->flags & LSTREAM_FL_WRITE) | |
2105 { | |
2106 Lstream_flush (lstr); | |
2107 str->eof = 1; | |
2108 coding_writer (lstr, 0, 0); | |
2109 str->eof = 0; | |
2110 } | |
2111 MAYBE_XCODESYSMETH (str->codesys, finalize_coding_stream, (str)); | |
2112 } | |
2113 str->orig_codesys = codesys; | |
2114 str->codesys = coding_system_real_canonical (codesys); | |
2115 | |
2116 if (str->data) | |
2117 { | |
1726 | 2118 xfree (str->data, void *); |
771 | 2119 str->data = 0; |
428 | 2120 } |
771 | 2121 if (XCODING_SYSTEM_METHODS (str->codesys)->coding_data_size) |
1204 | 2122 { |
2123 str->data = | |
2124 xmalloc_and_zero (XCODING_SYSTEM_METHODS (str->codesys)-> | |
2125 coding_data_size); | |
2126 str->type = XCODING_SYSTEM_METHODS (str->codesys)->enumtype; | |
2127 } | |
771 | 2128 MAYBE_XCODESYSMETH (str->codesys, init_coding_stream, (str)); |
2129 /* The new coding system may have different ideas regarding whether its | |
2130 ends are characters or bytes. */ | |
2131 set_coding_character_mode (lstr); | |
2132 } | |
2133 | |
2134 /* WARNING WARNING WARNING WARNING!!!!! If you open up a coding | |
2135 stream for writing, no automatic code detection will be performed. | |
2136 The reason for this is that automatic code detection requires a | |
2137 seekable input. Things will also fail if you open a coding | |
2138 stream for reading using a non-fully-specified coding system and | |
2139 a non-seekable input stream. */ | |
2140 | |
2141 static Lisp_Object | |
2142 make_coding_stream_1 (Lstream *stream, Lisp_Object codesys, | |
800 | 2143 const char *mode, enum encode_decode direction, |
802 | 2144 int flags) |
771 | 2145 { |
2146 Lstream *lstr = Lstream_new (lstream_coding, mode); | |
2147 struct coding_stream *str = CODING_STREAM_DATA (lstr); | |
2148 | |
2149 codesys = Fget_coding_system (codesys); | |
2150 xzero (*str); | |
2151 str->codesys = Qnil; | |
2152 str->orig_codesys = Qnil; | |
2153 str->us = lstr; | |
2154 str->other_end = stream; | |
2155 str->convert_to = Dynarr_new (unsigned_char); | |
2156 str->convert_from = Dynarr_new (unsigned_char); | |
2157 str->direction = direction; | |
814 | 2158 if (flags & LSTREAM_FL_NO_CLOSE_OTHER) |
802 | 2159 str->no_close_other = 1; |
814 | 2160 if (flags & LSTREAM_FL_READ_ONE_BYTE_AT_A_TIME) |
802 | 2161 str->one_byte_at_a_time = 1; |
814 | 2162 if (!(flags & LSTREAM_FL_NO_INIT_CHAR_MODE_WHEN_READING)) |
2163 str->set_char_mode_on_us_when_reading = 1; | |
802 | 2164 |
771 | 2165 set_coding_stream_coding_system (lstr, codesys); |
793 | 2166 return wrap_lstream (lstr); |
771 | 2167 } |
2168 | |
814 | 2169 /* FLAGS: |
2170 | |
2171 LSTREAM_FL_NO_CLOSE_OTHER | |
2172 Don't close STREAM (the stream at the other end) when this stream is | |
2173 closed. | |
2174 | |
2175 LSTREAM_FL_READ_ONE_BYTE_AT_A_TIME | |
2176 When reading from STREAM, read and process one byte at a time rather | |
2177 than in large chunks. This is for reading from TTY's, so we don't | |
2178 block. #### We should instead create a non-blocking filedesc stream | |
2179 that emulates the behavior as necessary using select(), when the | |
2180 fcntls don't work. (As seems to be the case on Cygwin.) | |
2181 | |
2182 LSTREAM_FL_NO_INIT_CHAR_MODE_WHEN_READING | |
2183 When reading from STREAM, read and process one byte at a time rather | |
2184 than in large chunks. This is for reading from TTY's, so we don't | |
2185 block. #### We should instead create a non-blocking filedesc stream | |
2186 that emulates the behavior as necessary using select(), when the | |
2187 fcntls don't work. (As seems to be the case on Cygwin.) | |
2188 */ | |
771 | 2189 Lisp_Object |
2190 make_coding_input_stream (Lstream *stream, Lisp_Object codesys, | |
802 | 2191 enum encode_decode direction, int flags) |
771 | 2192 { |
800 | 2193 return make_coding_stream_1 (stream, codesys, "r", direction, |
802 | 2194 flags); |
771 | 2195 } |
2196 | |
814 | 2197 /* FLAGS: |
2198 | |
2199 LSTREAM_FL_NO_CLOSE_OTHER | |
2200 Don't close STREAM (the stream at the other end) when this stream is | |
2201 closed. | |
2202 */ | |
771 | 2203 Lisp_Object |
2204 make_coding_output_stream (Lstream *stream, Lisp_Object codesys, | |
802 | 2205 enum encode_decode direction, int flags) |
771 | 2206 { |
800 | 2207 return make_coding_stream_1 (stream, codesys, "w", direction, |
802 | 2208 flags); |
771 | 2209 } |
2210 | |
2211 static Lisp_Object | |
2212 encode_decode_coding_region (Lisp_Object start, Lisp_Object end, | |
2213 Lisp_Object coding_system, Lisp_Object buffer, | |
2214 enum encode_decode direction) | |
2215 { | |
2216 Charbpos b, e; | |
2217 struct buffer *buf = decode_buffer (buffer, 0); | |
2218 Lisp_Object instream = Qnil, to_outstream = Qnil, outstream = Qnil; | |
2219 Lisp_Object from_outstream = Qnil, auto_outstream = Qnil; | |
2220 Lisp_Object lb_outstream = Qnil; | |
2221 Lisp_Object next; | |
2222 Lstream *istr, *ostr; | |
2223 struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5; | |
2224 struct gcpro ngcpro1; | |
2225 int source_char, sink_char; | |
2226 | |
2227 get_buffer_range_char (buf, start, end, &b, &e, 0); | |
2228 barf_if_buffer_read_only (buf, b, e); | |
2229 | |
2230 GCPRO5 (instream, to_outstream, outstream, from_outstream, lb_outstream); | |
2231 NGCPRO1 (auto_outstream); | |
2232 | |
2233 coding_system = Fget_coding_system (coding_system); | |
2234 source_char = encode_decode_source_sink_type_is_char (coding_system, | |
2235 CODING_SOURCE, | |
2236 direction); | |
2237 sink_char = encode_decode_source_sink_type_is_char (coding_system, | |
2238 CODING_SINK, | |
2239 direction); | |
2240 | |
2241 /* Order is IN <---> [TO] -> OUT -> [FROM] -> [AUTODETECT-EOL] -> LB */ | |
2242 instream = make_lisp_buffer_input_stream (buf, b, e, 0); | |
2243 next = lb_outstream = make_lisp_buffer_output_stream (buf, b, 0); | |
2244 | |
2245 if (direction == CODING_DECODE && | |
2246 XCODING_SYSTEM_EOL_TYPE (coding_system) == EOL_AUTODETECT) | |
2247 next = auto_outstream = | |
2248 make_coding_output_stream | |
800 | 2249 (XLSTREAM (next), Fget_coding_system (Qconvert_eol_autodetect), |
2250 CODING_DECODE, 0); | |
771 | 2251 |
2252 if (!sink_char) | |
2253 next = from_outstream = | |
800 | 2254 make_coding_output_stream (XLSTREAM (next), Qbinary, CODING_DECODE, 0); |
771 | 2255 outstream = make_coding_output_stream (XLSTREAM (next), coding_system, |
800 | 2256 direction, 0); |
771 | 2257 if (!source_char) |
428 | 2258 { |
771 | 2259 to_outstream = |
2260 make_coding_output_stream (XLSTREAM (outstream), | |
800 | 2261 Qbinary, CODING_ENCODE, 0); |
771 | 2262 ostr = XLSTREAM (to_outstream); |
2263 } | |
2264 else | |
2265 ostr = XLSTREAM (outstream); | |
2266 istr = XLSTREAM (instream); | |
2267 | |
2268 /* The chain of streams looks like this: | |
2269 | |
2297 | 2270 [BUFFER] <----- (( read from/send to loop )) |
771 | 2271 ------> [CHAR->BYTE i.e. ENCODE AS BINARY if source is |
2272 in bytes] | |
2273 ------> [ENCODE/DECODE AS SPECIFIED] | |
2274 ------> [BYTE->CHAR i.e. DECODE AS BINARY | |
2275 if sink is in bytes] | |
2276 ------> [AUTODETECT EOL if | |
2277 we're decoding and | |
2278 coding system calls | |
2279 for this] | |
2280 ------> [BUFFER] | |
2281 */ | |
2367 | 2282 |
2283 /* #### See comment | |
2284 | |
2285 EFFICIENCY OF CODING CONVERSION WITH MULTIPLE COPIES/CHAINS | |
2286 | |
2287 in text.c. | |
2288 */ | |
2289 | |
771 | 2290 while (1) |
2291 { | |
2292 char tempbuf[1024]; /* some random amount */ | |
2293 Charbpos newpos, even_newer_pos; | |
2294 Charbpos oldpos = lisp_buffer_stream_startpos (istr); | |
2295 Bytecount size_in_bytes = | |
2296 Lstream_read (istr, tempbuf, sizeof (tempbuf)); | |
2297 | |
2298 if (!size_in_bytes) | |
2299 break; | |
2300 newpos = lisp_buffer_stream_startpos (istr); | |
2301 Lstream_write (ostr, tempbuf, size_in_bytes); | |
2302 even_newer_pos = lisp_buffer_stream_startpos (istr); | |
2303 buffer_delete_range (buf, even_newer_pos - (newpos - oldpos), | |
2304 even_newer_pos, 0); | |
428 | 2305 } |
771 | 2306 |
2307 { | |
2308 Charcount retlen = | |
2309 lisp_buffer_stream_startpos (XLSTREAM (instream)) - b; | |
2310 Lstream_close (istr); | |
2311 Lstream_close (ostr); | |
2312 NUNGCPRO; | |
2313 UNGCPRO; | |
2314 Lstream_delete (istr); | |
2315 if (!NILP (from_outstream)) | |
2316 Lstream_delete (XLSTREAM (from_outstream)); | |
2317 Lstream_delete (XLSTREAM (outstream)); | |
2318 if (!NILP (to_outstream)) | |
2319 Lstream_delete (XLSTREAM (to_outstream)); | |
2320 if (!NILP (auto_outstream)) | |
2321 Lstream_delete (XLSTREAM (auto_outstream)); | |
2322 Lstream_delete (XLSTREAM (lb_outstream)); | |
2323 return make_int (retlen); | |
2324 } | |
2325 } | |
2326 | |
3302 | 2327 DEFUN ("decode-coding-region", Fdecode_coding_region, 3, 4, |
2328 "*r\nzDecode from coding system: \ni", /* | |
771 | 2329 Decode the text between START and END which is encoded in CODING-SYSTEM. |
2330 This is useful if you've read in encoded text from a file without decoding | |
2331 it (e.g. you read in a JIS-formatted file but used the `binary' or | |
2332 `no-conversion' coding system, so that it shows up as "^[$B!<!+^[(B"). | |
2333 Return length of decoded text. | |
3302 | 2334 BUFFER defaults to the current buffer if unspecified, and when interactive. |
771 | 2335 */ |
2336 (start, end, coding_system, buffer)) | |
2337 { | |
2338 return encode_decode_coding_region (start, end, coding_system, buffer, | |
2339 CODING_DECODE); | |
2340 } | |
2341 | |
3302 | 2342 DEFUN ("encode-coding-region", Fencode_coding_region, 3, 4, |
2343 "*r\nzEncode to coding system: \ni", /* | |
771 | 2344 Encode the text between START and END using CODING-SYSTEM. |
2345 This will, for example, convert Japanese characters into stuff such as | |
3302 | 2346 "^[$B!<!+^[(B" if you use the JIS encoding. Return length of encoded text. |
2347 BUFFER defaults to the current buffer if unspecified, and when interactive. | |
771 | 2348 */ |
2349 (start, end, coding_system, buffer)) | |
2350 { | |
2351 return encode_decode_coding_region (start, end, coding_system, buffer, | |
2352 CODING_ENCODE); | |
428 | 2353 } |
2354 | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2355 DEFUN ("query-coding-region", Fquery_coding_region, 3, 7, 0, /* |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2356 Work out whether CODING-SYSTEM can losslessly encode a region. |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2357 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2358 START and END are the beginning and end of the region to check. |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2359 CODING-SYSTEM is the coding system to try. |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2360 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2361 Optional argument BUFFER is the buffer to check, and defaults to the current |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2362 buffer. |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2363 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2364 IGNORE-INVALID-SEQUENCESP, also an optional argument, says to treat XEmacs |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2365 characters which have an unambiguous encoded representation, despite being |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2366 undefined in what they represent, as encodable. These chiefly arise with |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2367 variable-length encodings like UTF-8 and UTF-16, where an invalid sequence |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2368 is passed through to XEmacs as a sequence of characters with a defined |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2369 correspondence to the octets on disk, but no non-error semantics; see the |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2370 `invalid-sequence-coding-system' argument to `set-language-info'. |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2371 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2372 They can also arise with fixed-length encodings like ISO 8859-7, where |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2373 certain octets on disk have undefined values, and treating them as |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2374 corresponding to the ISO 8859-1 characters with the same numerical values |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2375 may lead to data that is not understood by other applications. |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2376 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2377 Optional argument ERRORP says to signal a `text-conversion-error' if some |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2378 character in the region cannot be encoded, and defaults to nil. |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2379 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2380 Optional argument HIGHLIGHT says to display unencodable characters in the |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2381 region using `query-coding-warning-face'. It defaults to nil. |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2382 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2383 This function can return multiple values; the intention is that callers use |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2384 `multiple-value-bind' or the related CL multiple value functions to deal |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2385 with it. The first result is `t' if the region can be encoded using |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2386 CODING-SYSTEM, or `nil' if not. If the region cannot be encoded using |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2387 CODING-SYSTEM, the second result is a range table describing the positions |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2388 of the unencodable characters. |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2389 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2390 Ranges that describe characters that would be ignored were |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2391 IGNORE-INVALID-SEQUENCESP non-nil map to the symbol `invalid-sequence'; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2392 other ranges map to the symbol `unencodable'. If IGNORE-INVALID-SEQUENCESP |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2393 is non-nil, all ranges will map to the symbol `unencodable'. See |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2394 `make-range-table' for more details of range tables. |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2395 */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2396 (start, end, coding_system, buffer, ignore_invalid_sequencesp, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2397 errorp, highlight)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2398 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2399 Charbpos b, e; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2400 struct buffer *buf = decode_buffer (buffer, 1); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2401 Lisp_Object result; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2402 int flags = 0, speccount = specpdl_depth (); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2403 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2404 coding_system = Fget_coding_system (coding_system); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2405 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2406 get_buffer_range_char (buf, start, end, &b, &e, 0); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2407 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2408 if (buf != current_buffer) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2409 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2410 record_unwind_protect (save_current_buffer_restore, Fcurrent_buffer ()); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2411 set_buffer_internal (buf); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2412 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2413 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2414 record_unwind_protect (save_excursion_restore, save_excursion_save ()); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2415 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2416 BUF_SET_PT (buf, b); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2417 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2418 if (!NILP (ignore_invalid_sequencesp)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2419 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2420 flags |= QUERY_METHOD_IGNORE_INVALID_SEQUENCES; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2421 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2422 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2423 if (!NILP (errorp)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2424 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2425 flags |= QUERY_METHOD_ERRORP; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2426 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2427 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2428 if (!NILP (highlight)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2429 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2430 flags |= QUERY_METHOD_HIGHLIGHT; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2431 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2432 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2433 result = XCODESYSMETH_OR_GIVEN (coding_system, query, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2434 (coding_system, buf, e, flags), Qunbound); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2435 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2436 if (UNBOUNDP (result)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2437 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2438 signal_error (Qtext_conversion_error, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2439 "Coding system doesn't say what it can encode", |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2440 XCODING_SYSTEM_NAME (coding_system)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2441 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2442 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2443 result = (NILP (result)) ? Qt : values2 (Qnil, result); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2444 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2445 return unbind_to_1 (speccount, result); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2446 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2447 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2448 |
428 | 2449 |
2450 /************************************************************************/ | |
771 | 2451 /* Chain methods */ |
428 | 2452 /************************************************************************/ |
2453 | |
771 | 2454 /* #### Need a way to create "opposite-direction" coding systems. */ |
2455 | |
2456 /* Chain two or more coding systems together to make a combination coding | |
2457 system. */ | |
2458 | |
2459 struct chain_coding_system | |
2460 { | |
2461 /* List of coding systems, in decode order */ | |
2462 Lisp_Object *chain; | |
2463 /* Number of coding systems in list */ | |
2464 int count; | |
2465 /* Coding system to return as a result of canonicalize-after-coding */ | |
2466 Lisp_Object canonicalize_after_coding; | |
2467 }; | |
2468 | |
2469 struct chain_coding_stream | |
2470 { | |
2471 int initted; | |
2472 /* Lstreams for chain coding system */ | |
2473 Lisp_Object *lstreams; | |
2474 int lstream_count; | |
2475 }; | |
2476 | |
1204 | 2477 static const struct memory_description chain_coding_system_description[] = { |
2478 { XD_INT, offsetof (struct chain_coding_system, count) }, | |
2367 | 2479 { XD_BLOCK_PTR, offsetof (struct chain_coding_system, chain), |
2551 | 2480 XD_INDIRECT (0, 0), { &lisp_object_description } }, |
1204 | 2481 { XD_LISP_OBJECT, offsetof (struct chain_coding_system, |
2482 canonicalize_after_coding) }, | |
771 | 2483 { XD_END } |
2484 }; | |
2485 | |
1204 | 2486 static const struct memory_description chain_coding_stream_description_1 [] = { |
2487 { XD_INT, offsetof (struct chain_coding_stream, lstream_count) }, | |
2367 | 2488 { XD_BLOCK_PTR, offsetof (struct chain_coding_stream, lstreams), |
2551 | 2489 XD_INDIRECT (0, 0), { &lisp_object_description } }, |
771 | 2490 { XD_END } |
2491 }; | |
2492 | |
1204 | 2493 const struct sized_memory_description chain_coding_stream_description = { |
2494 sizeof (struct chain_coding_stream), chain_coding_stream_description_1 | |
2495 }; | |
2496 | |
2497 DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (chain); | |
2498 | |
771 | 2499 static Lisp_Object |
2500 chain_canonicalize (Lisp_Object codesys) | |
2501 { | |
2502 /* We make use of the fact that this method is called at init time, after | |
2503 properties have been parsed. init_method is called too early. */ | |
2504 /* #### It's not clear we need this whole chain-canonicalize mechanism | |
2505 any more. */ | |
2506 Lisp_Object chain = Flist (XCODING_SYSTEM_CHAIN_COUNT (codesys), | |
2507 XCODING_SYSTEM_CHAIN_CHAIN (codesys)); | |
2508 chain = Fcons (XCODING_SYSTEM_PRE_WRITE_CONVERSION (codesys), | |
2509 Fcons (XCODING_SYSTEM_POST_READ_CONVERSION (codesys), | |
2510 chain)); | |
2511 Fputhash (chain, codesys, Vchain_canonicalize_hash_table); | |
2512 return codesys; | |
2513 } | |
2514 | |
2515 static Lisp_Object | |
2516 chain_canonicalize_after_coding (struct coding_stream *str) | |
2517 { | |
2518 Lisp_Object cac = | |
2519 XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (str->codesys); | |
2520 if (!NILP (cac)) | |
2521 return cac; | |
2522 return str->codesys; | |
2523 #if 0 | |
2524 struct chain_coding_stream *data = CODING_STREAM_TYPE_DATA (str, chain); | |
2525 Lisp_Object us = str->codesys, codesys; | |
2526 int i; | |
2527 Lisp_Object chain; | |
2528 Lisp_Object tail; | |
2529 int changed = 0; | |
2530 | |
2531 /* #### It's not clear we need this whole chain-canonicalize mechanism | |
2532 any more. */ | |
2533 if (str->direction == CODING_ENCODE || !data->initted) | |
2534 return us; | |
2535 | |
2536 chain = Flist (XCODING_SYSTEM_CHAIN_COUNT (us), | |
2537 XCODING_SYSTEM_CHAIN_CHAIN (us)); | |
2538 | |
2539 tail = chain; | |
2540 for (i = 0; i < XCODING_SYSTEM_CHAIN_COUNT (us); i++) | |
2541 { | |
2542 codesys = (coding_stream_canonicalize_after_coding | |
2543 (XLSTREAM (data->lstreams[i]))); | |
2544 if (!EQ (codesys, XCAR (tail))) | |
2545 changed = 1; | |
2546 XCAR (tail) = codesys; | |
2547 tail = XCDR (tail); | |
2548 } | |
2549 | |
2550 if (!changed) | |
2551 return us; | |
2552 | |
2553 chain = delq_no_quit (Qnil, chain); | |
2554 | |
2555 if (NILP (XCODING_SYSTEM_PRE_WRITE_CONVERSION (us)) && | |
2556 NILP (XCODING_SYSTEM_POST_READ_CONVERSION (us))) | |
2557 { | |
2558 if (NILP (chain)) | |
2559 return Qnil; | |
2560 if (NILP (XCDR (chain))) | |
2561 return XCAR (chain); | |
2562 } | |
2563 | |
2564 codesys = Fgethash (Fcons (XCODING_SYSTEM_PRE_WRITE_CONVERSION (us), | |
2565 Fcons (XCODING_SYSTEM_POST_READ_CONVERSION (us), | |
2566 chain)), Vchain_canonicalize_hash_table, | |
2567 Qnil); | |
2568 if (!NILP (codesys)) | |
2569 return codesys; | |
2570 return make_internal_coding_system | |
2571 (us, "internal-chain-canonicalizer-wrapper", | |
2572 Qchain, Qunbound, list2 (Qchain, chain)); | |
2573 #endif /* 0 */ | |
2574 } | |
2575 | |
2576 static void | |
2577 chain_init (Lisp_Object codesys) | |
2578 { | |
2579 XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (codesys) = Qnil; | |
2580 } | |
2581 | |
2582 static void | |
2583 chain_mark (Lisp_Object codesys) | |
2584 { | |
2585 int i; | |
2586 | |
2587 for (i = 0; i < XCODING_SYSTEM_CHAIN_COUNT (codesys); i++) | |
2588 mark_object (XCODING_SYSTEM_CHAIN_CHAIN (codesys)[i]); | |
2589 mark_object (XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (codesys)); | |
2590 } | |
2591 | |
2592 static void | |
2593 chain_mark_coding_stream_1 (struct chain_coding_stream *data) | |
2594 { | |
2595 int i; | |
2596 | |
2597 for (i = 0; i < data->lstream_count; i++) | |
2598 mark_object (data->lstreams[i]); | |
2599 } | |
2600 | |
2601 static void | |
2602 chain_mark_coding_stream (struct coding_stream *str) | |
2603 { | |
2604 chain_mark_coding_stream_1 (CODING_STREAM_TYPE_DATA (str, chain)); | |
2605 } | |
2606 | |
2607 static void | |
2608 chain_print (Lisp_Object cs, Lisp_Object printcharfun, int escapeflag) | |
2609 { | |
2610 int i; | |
2611 | |
826 | 2612 write_c_string (printcharfun, "("); |
771 | 2613 for (i = 0; i < XCODING_SYSTEM_CHAIN_COUNT (cs); i++) |
2614 { | |
826 | 2615 write_c_string (printcharfun, i == 0 ? "" : "->"); |
771 | 2616 print_coding_system_in_print_method (XCODING_SYSTEM_CHAIN_CHAIN (cs)[i], |
2617 printcharfun, escapeflag); | |
2618 } | |
2619 { | |
2620 Lisp_Object cac = XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (cs); | |
2621 if (!NILP (cac)) | |
2622 { | |
2623 if (i > 0) | |
826 | 2624 write_c_string (printcharfun, " "); |
2625 write_c_string (printcharfun, "canonicalize-after-coding="); | |
771 | 2626 print_coding_system_in_print_method (cac, printcharfun, escapeflag); |
2627 } | |
2628 } | |
2629 | |
826 | 2630 write_c_string (printcharfun, ")"); |
771 | 2631 } |
2632 | |
2633 static void | |
2634 chain_rewind_coding_stream_1 (struct chain_coding_stream *data) | |
2635 { | |
2636 /* Each will rewind the next; there is always at least one stream (the | |
2637 dynarr stream at the end) if we're initted */ | |
2638 if (data->initted) | |
2639 Lstream_rewind (XLSTREAM (data->lstreams[0])); | |
2640 } | |
2641 | |
2642 static void | |
2643 chain_rewind_coding_stream (struct coding_stream *str) | |
2644 { | |
2645 chain_rewind_coding_stream_1 (CODING_STREAM_TYPE_DATA (str, chain)); | |
2646 } | |
2647 | |
2648 static void | |
2649 chain_init_coding_streams_1 (struct chain_coding_stream *data, | |
2650 unsigned_char_dynarr *dst, | |
2651 int ncodesys, Lisp_Object *codesys, | |
2652 enum encode_decode direction) | |
2653 { | |
2654 int i; | |
2655 Lisp_Object lstream_out; | |
2656 | |
2657 data->lstream_count = ncodesys + 1; | |
2658 data->lstreams = xnew_array (Lisp_Object, data->lstream_count); | |
2659 | |
2660 lstream_out = make_dynarr_output_stream (dst); | |
2661 Lstream_set_buffering (XLSTREAM (lstream_out), LSTREAM_UNBUFFERED, 0); | |
2662 data->lstreams[data->lstream_count - 1] = lstream_out; | |
2663 | |
2664 for (i = ncodesys - 1; i >= 0; i--) | |
2665 { | |
2666 data->lstreams[i] = | |
2667 make_coding_output_stream | |
2668 (XLSTREAM (lstream_out), | |
2669 codesys[direction == CODING_ENCODE ? ncodesys - (i + 1) : i], | |
800 | 2670 direction, 0); |
771 | 2671 lstream_out = data->lstreams[i]; |
2672 Lstream_set_buffering (XLSTREAM (lstream_out), LSTREAM_UNBUFFERED, | |
2673 0); | |
2674 } | |
2675 data->initted = 1; | |
2676 } | |
2677 | |
2678 static Bytecount | |
2679 chain_convert (struct coding_stream *str, const UExtbyte *src, | |
2680 unsigned_char_dynarr *dst, Bytecount n) | |
2681 { | |
2682 struct chain_coding_stream *data = CODING_STREAM_TYPE_DATA (str, chain); | |
2683 | |
2684 if (str->eof) | |
2685 { | |
2686 /* Each will close the next; there is always at least one stream (the | |
2687 dynarr stream at the end) if we're initted. We need to close now | |
2688 because more data may be generated. */ | |
2689 if (data->initted) | |
2690 Lstream_close (XLSTREAM (data->lstreams[0])); | |
2691 return n; | |
2692 } | |
2693 | |
2694 if (!data->initted) | |
2695 chain_init_coding_streams_1 | |
2696 (data, dst, XCODING_SYSTEM_CHAIN_COUNT (str->codesys), | |
2697 XCODING_SYSTEM_CHAIN_CHAIN (str->codesys), str->direction); | |
2698 | |
2699 if (Lstream_write (XLSTREAM (data->lstreams[0]), src, n) < 0) | |
2700 return -1; | |
2701 return n; | |
2702 } | |
2703 | |
2704 static void | |
2705 chain_finalize_coding_stream_1 (struct chain_coding_stream *data) | |
2706 { | |
2707 if (data->lstreams) | |
2708 { | |
2297 | 2709 /* During GC, these objects are unmarked, and are about to be freed. |
2710 We do NOT want them on the free list, and that will cause lots of | |
2711 nastiness including crashes. Just let them be freed normally. */ | |
771 | 2712 if (!gc_in_progress) |
2713 { | |
2714 int i; | |
2297 | 2715 /* Order of deletion is important here! Delete from the head of |
2716 the chain and work your way towards the tail. In general, | |
2717 when you delete an object, there should be *NO* pointers to it | |
2718 anywhere. Deleting back-to-front would be a problem because | |
2719 there are pointers going forward. If there were pointers in | |
2720 both directions, you'd have to disconnect the pointers to a | |
2721 particular object before deleting it. */ | |
771 | 2722 for (i = 0; i < data->lstream_count; i++) |
2723 Lstream_delete (XLSTREAM ((data->lstreams)[i])); | |
2724 } | |
1726 | 2725 xfree (data->lstreams, Lisp_Object *); |
771 | 2726 } |
2727 } | |
2728 | |
2729 static void | |
2730 chain_finalize_coding_stream (struct coding_stream *str) | |
2731 { | |
2732 chain_finalize_coding_stream_1 (CODING_STREAM_TYPE_DATA (str, chain)); | |
2733 } | |
2734 | |
2735 static void | |
2736 chain_finalize (Lisp_Object c) | |
2737 { | |
2738 if (XCODING_SYSTEM_CHAIN_CHAIN (c)) | |
1726 | 2739 xfree (XCODING_SYSTEM_CHAIN_CHAIN (c), Lisp_Object *); |
771 | 2740 } |
2741 | |
428 | 2742 static int |
771 | 2743 chain_putprop (Lisp_Object codesys, Lisp_Object key, Lisp_Object value) |
2744 { | |
2745 if (EQ (key, Qchain)) | |
2746 { | |
2747 Lisp_Object *cslist; | |
2748 int count = 0; | |
2749 int i; | |
2750 | |
2367 | 2751 { |
2752 EXTERNAL_LIST_LOOP_2 (elt, value) | |
2753 { | |
2754 Fget_coding_system (elt); | |
2755 count++; | |
2756 } | |
2757 } | |
771 | 2758 |
2759 cslist = xnew_array (Lisp_Object, count); | |
2760 XCODING_SYSTEM_CHAIN_CHAIN (codesys) = cslist; | |
2761 | |
2762 count = 0; | |
2367 | 2763 { |
2764 EXTERNAL_LIST_LOOP_2 (elt, value) | |
2765 { | |
2766 cslist[count] = Fget_coding_system (elt); | |
2767 count++; | |
2768 } | |
2769 } | |
771 | 2770 |
2771 XCODING_SYSTEM_CHAIN_COUNT (codesys) = count; | |
2772 | |
2773 for (i = 0; i < count - 1; i++) | |
2774 { | |
2775 if (decoding_source_sink_type_is_char (cslist[i], CODING_SINK) != | |
2776 decoding_source_sink_type_is_char (cslist[i + 1], CODING_SOURCE)) | |
2777 invalid_argument_2 ("Sink of first must match source of second", | |
2778 cslist[i], cslist[i + 1]); | |
2779 } | |
2780 } | |
2781 else if (EQ (key, Qcanonicalize_after_coding)) | |
2782 XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (codesys) = | |
2783 Fget_coding_system (value); | |
2784 else | |
2785 return 0; | |
2786 return 1; | |
2787 } | |
2788 | |
2789 static Lisp_Object | |
2790 chain_getprop (Lisp_Object coding_system, Lisp_Object prop) | |
2791 { | |
2792 if (EQ (prop, Qchain)) | |
2793 { | |
2794 Lisp_Object result = Qnil; | |
2795 int i; | |
2796 | |
2797 for (i = 0; i < XCODING_SYSTEM_CHAIN_COUNT (coding_system); i++) | |
2798 result = Fcons (XCODING_SYSTEM_CHAIN_CHAIN (coding_system)[i], | |
2799 result); | |
2800 | |
2801 return Fnreverse (result); | |
2802 } | |
2803 else if (EQ (prop, Qcanonicalize_after_coding)) | |
2804 return XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (coding_system); | |
2805 else | |
2806 return Qunbound; | |
2807 } | |
2808 | |
2809 static enum source_sink_type | |
2810 chain_conversion_end_type (Lisp_Object codesys) | |
2811 { | |
2812 Lisp_Object *cslist = XCODING_SYSTEM_CHAIN_CHAIN (codesys); | |
2813 int n = XCODING_SYSTEM_CHAIN_COUNT (codesys); | |
2814 int charp_source, charp_sink; | |
2815 | |
2816 if (n == 0) | |
2817 return DECODES_BYTE_TO_BYTE; /* arbitrary */ | |
2818 charp_source = decoding_source_sink_type_is_char (cslist[0], CODING_SOURCE); | |
2819 charp_sink = decoding_source_sink_type_is_char (cslist[n - 1], CODING_SINK); | |
2820 | |
2821 switch (charp_source * 2 + charp_sink) | |
2822 { | |
2823 case 0: return DECODES_BYTE_TO_BYTE; | |
2824 case 1: return DECODES_BYTE_TO_CHARACTER; | |
2825 case 2: return DECODES_CHARACTER_TO_BYTE; | |
2826 case 3: return DECODES_CHARACTER_TO_CHARACTER; | |
2827 } | |
2828 | |
2500 | 2829 ABORT (); |
771 | 2830 return DECODES_BYTE_TO_BYTE; |
2831 } | |
2832 | |
2833 | |
2834 /************************************************************************/ | |
2835 /* No-conversion methods */ | |
2836 /************************************************************************/ | |
2837 | |
2838 /* "No conversion"; used for binary files. We use quotes because there | |
2839 really is some conversion being applied (it does byte<->char | |
2840 conversion), but it appears to the user as if the text is read in | |
2297 | 2841 without conversion. |
2842 | |
2843 #### Shouldn't we _call_ it that, then? And while we're at it, | |
2844 separate it into "to_internal" and "to_external"? */ | |
771 | 2845 DEFINE_CODING_SYSTEM_TYPE (no_conversion); |
2846 | |
2847 /* This is used when reading in "binary" files -- i.e. files that may | |
2848 contain all 256 possible byte values and that are not to be | |
2849 interpreted as being in any particular encoding. */ | |
2850 static Bytecount | |
2851 no_conversion_convert (struct coding_stream *str, | |
2852 const UExtbyte *src, | |
2853 unsigned_char_dynarr *dst, Bytecount n) | |
2854 { | |
2855 UExtbyte c; | |
2856 unsigned int ch = str->ch; | |
2857 Bytecount orign = n; | |
2858 | |
2859 if (str->direction == CODING_DECODE) | |
2860 { | |
2861 while (n--) | |
2862 { | |
2863 c = *src++; | |
2864 | |
2865 DECODE_ADD_BINARY_CHAR (c, dst); | |
2866 } | |
2867 | |
2868 if (str->eof) | |
2869 DECODE_OUTPUT_PARTIAL_CHAR (ch, dst); | |
2870 } | |
2871 else | |
2872 { | |
2873 | |
2874 while (n--) | |
2875 { | |
2876 c = *src++; | |
826 | 2877 if (byte_ascii_p (c)) |
771 | 2878 { |
2879 assert (ch == 0); | |
2880 Dynarr_add (dst, c); | |
2881 } | |
2882 #ifdef MULE | |
867 | 2883 else if (ibyte_leading_byte_p (c)) |
771 | 2884 { |
2885 assert (ch == 0); | |
2886 if (c == LEADING_BYTE_LATIN_ISO8859_1 || | |
2887 c == LEADING_BYTE_CONTROL_1) | |
2888 ch = c; | |
2889 else | |
2297 | 2890 /* #### This is just plain unacceptable. */ |
771 | 2891 Dynarr_add (dst, '~'); /* untranslatable character */ |
2892 } | |
2893 else | |
2894 { | |
2895 if (ch == LEADING_BYTE_LATIN_ISO8859_1) | |
2896 Dynarr_add (dst, c); | |
2897 else if (ch == LEADING_BYTE_CONTROL_1) | |
2898 { | |
2899 assert (c < 0xC0); | |
2900 Dynarr_add (dst, c - 0x20); | |
2901 } | |
2902 /* else it should be the second or third byte of an | |
2903 untranslatable character, so ignore it */ | |
2904 ch = 0; | |
2905 } | |
2906 #endif /* MULE */ | |
2907 | |
2908 } | |
2909 } | |
2910 | |
2911 str->ch = ch; | |
2912 return orign; | |
2913 } | |
2914 | |
2915 DEFINE_DETECTOR (no_conversion); | |
2916 DEFINE_DETECTOR_CATEGORY (no_conversion, no_conversion); | |
2917 | |
2918 struct no_conversion_detector | |
2919 { | |
2920 int dummy; | |
2921 }; | |
2922 | |
2923 static void | |
2286 | 2924 no_conversion_detect (struct detection_state *st, const UExtbyte *UNUSED (src), |
2925 Bytecount UNUSED (n)) | |
771 | 2926 { |
2927 /* Hack until we get better handling of this stuff! */ | |
2928 DET_RESULT (st, no_conversion) = DET_SLIGHTLY_LIKELY; | |
2929 } | |
2930 | |
2931 | |
2932 /************************************************************************/ | |
2933 /* Convert-eol methods */ | |
2934 /************************************************************************/ | |
2935 | |
2936 /* This is used to handle end-of-line (EOL) differences. It is | |
2819 | 2937 character-to-character, and works (when encoding) *BEFORE* sending data to |
2938 the main encoding routine -- thus, that routine must handle different EOL | |
2939 types itself if it does line-oriented type processing. This is unavoidable | |
2940 because we don't know whether the output of the main encoding routine is | |
2941 ASCII compatible (UTF-16 is definitely not, for example). [[ sjt sez this | |
2942 is bogus. There should be _no_ EOL processing (or processing of any kind) | |
2943 after conversion to external. ]] | |
771 | 2944 |
793 | 2945 There is one parameter: `subtype', either `cr', `lf', `crlf', or nil. |
771 | 2946 */ |
2947 | |
2948 struct convert_eol_coding_system | |
2949 { | |
2950 enum eol_type subtype; | |
2132 | 2951 int dummy; /* On some architectures (eg ia64) the portable dumper can |
2952 produce unaligned access errors without this field. Probably | |
2953 because the combined structure of this structure and | |
2954 Lisp_Coding_System is not properly aligned. */ | |
771 | 2955 }; |
2956 | |
2957 #define CODING_SYSTEM_CONVERT_EOL_SUBTYPE(codesys) \ | |
2958 (CODING_SYSTEM_TYPE_DATA (codesys, convert_eol)->subtype) | |
2959 #define XCODING_SYSTEM_CONVERT_EOL_SUBTYPE(codesys) \ | |
2960 (XCODING_SYSTEM_TYPE_DATA (codesys, convert_eol)->subtype) | |
2961 | |
2962 struct convert_eol_coding_stream | |
2963 { | |
2964 enum eol_type actual; | |
2965 }; | |
2966 | |
1204 | 2967 static const struct memory_description |
771 | 2968 convert_eol_coding_system_description[] = { |
2969 { XD_END } | |
2970 }; | |
2971 | |
1204 | 2972 DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (convert_eol); |
2973 | |
771 | 2974 static void |
2286 | 2975 convert_eol_print (Lisp_Object cs, Lisp_Object printcharfun, |
2976 int UNUSED (escapeflag)) | |
771 | 2977 { |
2978 struct convert_eol_coding_system *data = | |
2979 XCODING_SYSTEM_TYPE_DATA (cs, convert_eol); | |
2980 | |
2981 write_fmt_string (printcharfun, "(%s)", | |
2982 data->subtype == EOL_LF ? "lf" : | |
2983 data->subtype == EOL_CRLF ? "crlf" : | |
2984 data->subtype == EOL_CR ? "cr" : | |
793 | 2985 data->subtype == EOL_AUTODETECT ? "nil" : |
2500 | 2986 (ABORT(), "")); |
771 | 2987 } |
2988 | |
2989 static enum source_sink_type | |
2286 | 2990 convert_eol_conversion_end_type (Lisp_Object UNUSED (codesys)) |
771 | 2991 { |
2992 return DECODES_CHARACTER_TO_CHARACTER; | |
2993 } | |
2994 | |
2995 static int | |
2996 convert_eol_putprop (Lisp_Object codesys, | |
2997 Lisp_Object key, | |
2998 Lisp_Object value) | |
2999 { | |
3000 struct convert_eol_coding_system *data = | |
3001 XCODING_SYSTEM_TYPE_DATA (codesys, convert_eol); | |
3002 | |
3003 if (EQ (key, Qsubtype)) | |
3004 { | |
3005 if (EQ (value, Qlf) /* || EQ (value, Qunix) */) | |
3006 data->subtype = EOL_LF; | |
3007 else if (EQ (value, Qcrlf) /* || EQ (value, Qdos) */) | |
3008 data->subtype = EOL_CRLF; | |
3009 else if (EQ (value, Qcr) /* || EQ (value, Qmac) */) | |
3010 data->subtype = EOL_CR; | |
793 | 3011 else if (EQ (value, Qnil)) |
771 | 3012 data->subtype = EOL_AUTODETECT; |
3013 else invalid_constant ("Unrecognized eol type", value); | |
3014 } | |
3015 else | |
3016 return 0; | |
3017 return 1; | |
3018 } | |
3019 | |
3020 static Lisp_Object | |
3021 convert_eol_getprop (Lisp_Object coding_system, Lisp_Object prop) | |
3022 { | |
3023 struct convert_eol_coding_system *data = | |
3024 XCODING_SYSTEM_TYPE_DATA (coding_system, convert_eol); | |
3025 | |
3026 if (EQ (prop, Qsubtype)) | |
3027 { | |
3028 switch (data->subtype) | |
3029 { | |
3030 case EOL_LF: return Qlf; | |
3031 case EOL_CRLF: return Qcrlf; | |
3032 case EOL_CR: return Qcr; | |
793 | 3033 case EOL_AUTODETECT: return Qnil; |
2500 | 3034 default: ABORT (); |
771 | 3035 } |
3036 } | |
3037 | |
3038 return Qunbound; | |
3039 } | |
3040 | |
3041 static void | |
3042 convert_eol_init_coding_stream (struct coding_stream *str) | |
3043 { | |
3044 struct convert_eol_coding_stream *data = | |
3045 CODING_STREAM_TYPE_DATA (str, convert_eol); | |
3046 data->actual = XCODING_SYSTEM_CONVERT_EOL_SUBTYPE (str->codesys); | |
3047 } | |
3048 | |
3049 static Bytecount | |
867 | 3050 convert_eol_convert (struct coding_stream *str, const Ibyte *src, |
771 | 3051 unsigned_char_dynarr *dst, Bytecount n) |
3052 { | |
3053 if (str->direction == CODING_DECODE) | |
3054 { | |
3055 struct convert_eol_coding_stream *data = | |
3056 CODING_STREAM_TYPE_DATA (str, convert_eol); | |
3057 | |
3058 if (data->actual == EOL_AUTODETECT) | |
3059 { | |
3060 Bytecount n2 = n; | |
867 | 3061 const Ibyte *src2 = src; |
771 | 3062 |
3063 for (; n2; n2--) | |
3064 { | |
867 | 3065 Ibyte c = *src2++; |
771 | 3066 if (c == '\n') |
3067 { | |
3068 data->actual = EOL_LF; | |
3069 break; | |
3070 } | |
3071 else if (c == '\r') | |
3072 { | |
3073 if (n2 == 1) | |
3074 { | |
3075 /* If we're seeing a '\r' at the end of the data, then | |
3076 reject the '\r' right now so it doesn't become an | |
3077 issue in the code below -- unless we're at the end of | |
3078 the stream, in which case we can't do that (because | |
3079 then the '\r' will never get written out), and in any | |
3080 case we should be recognizing it at EOL_CR format. */ | |
3081 if (str->eof) | |
3082 data->actual = EOL_CR; | |
3083 else | |
3084 n--; | |
3085 break; | |
3086 } | |
3087 else if (*src2 == '\n') | |
3088 data->actual = EOL_CRLF; | |
3089 else | |
3090 data->actual = EOL_CR; | |
3091 break; | |
3092 } | |
3093 } | |
3094 } | |
3095 | |
3096 /* str->eof is set, the caller reached EOF on the other end and has | |
3097 no new data to give us. The only data we get is the data we | |
3098 rejected from last time. */ | |
3099 if (data->actual == EOL_LF || data->actual == EOL_AUTODETECT || | |
3100 (str->eof)) | |
3101 Dynarr_add_many (dst, src, n); | |
3102 else | |
3103 { | |
867 | 3104 const Ibyte *end = src + n; |
771 | 3105 while (1) |
3106 { | |
3107 /* Find the next section with no \r and add it. */ | |
867 | 3108 const Ibyte *runstart = src; |
3109 src = (Ibyte *) memchr (src, '\r', end - src); | |
771 | 3110 if (!src) |
3111 src = end; | |
3112 Dynarr_add_many (dst, runstart, src - runstart); | |
3113 /* Stop if at end ... */ | |
3114 if (src == end) | |
3115 break; | |
3116 /* ... else, translate as necessary. */ | |
3117 src++; | |
3118 if (data->actual == EOL_CR) | |
3119 Dynarr_add (dst, '\n'); | |
3120 /* We need to be careful here with CRLF. If we see a CR at the | |
3121 end of the data, we don't know if it's part of a CRLF, so we | |
3122 reject it. Otherwise: If it's part of a CRLF, eat it and | |
3123 loop; the following LF gets added next time around. If it's | |
3124 not part of a CRLF, add the CR and loop. The following | |
3125 character will be processed in the next loop iteration. This | |
3126 correctly handles a sequence like CR+CR+LF. */ | |
3127 else if (src == end) | |
3128 return n - 1; /* reject the CR at the end; we'll get it again | |
3129 next time the convert method is called */ | |
3130 else if (*src != '\n') | |
3131 Dynarr_add (dst, '\r'); | |
3132 } | |
3133 } | |
3134 | |
3135 return n; | |
3136 } | |
3137 else | |
3138 { | |
3139 enum eol_type subtype = | |
3140 XCODING_SYSTEM_CONVERT_EOL_SUBTYPE (str->codesys); | |
867 | 3141 const Ibyte *end = src + n; |
771 | 3142 |
3143 /* We try to be relatively efficient here. */ | |
3144 if (subtype == EOL_LF) | |
3145 Dynarr_add_many (dst, src, n); | |
3146 else | |
3147 { | |
3148 while (1) | |
3149 { | |
3150 /* Find the next section with no \n and add it. */ | |
867 | 3151 const Ibyte *runstart = src; |
3152 src = (Ibyte *) memchr (src, '\n', end - src); | |
771 | 3153 if (!src) |
3154 src = end; | |
3155 Dynarr_add_many (dst, runstart, src - runstart); | |
3156 /* Stop if at end ... */ | |
3157 if (src == end) | |
3158 break; | |
3159 /* ... else, skip over \n and add its translation. */ | |
3160 src++; | |
3161 Dynarr_add (dst, '\r'); | |
3162 if (subtype == EOL_CRLF) | |
3163 Dynarr_add (dst, '\n'); | |
3164 } | |
3165 } | |
3166 | |
3167 return n; | |
3168 } | |
3169 } | |
3170 | |
3171 static Lisp_Object | |
3172 convert_eol_canonicalize_after_coding (struct coding_stream *str) | |
3173 { | |
3174 struct convert_eol_coding_stream *data = | |
3175 CODING_STREAM_TYPE_DATA (str, convert_eol); | |
3176 | |
3177 if (str->direction == CODING_ENCODE) | |
3178 return str->codesys; | |
3179 | |
3180 switch (data->actual) | |
3181 { | |
3182 case EOL_LF: return Fget_coding_system (Qconvert_eol_lf); | |
3183 case EOL_CRLF: return Fget_coding_system (Qconvert_eol_crlf); | |
3184 case EOL_CR: return Fget_coding_system (Qconvert_eol_cr); | |
3185 case EOL_AUTODETECT: return str->codesys; | |
2500 | 3186 default: ABORT (); return Qnil; |
771 | 3187 } |
3188 } | |
3189 | |
3190 | |
3191 /************************************************************************/ | |
3192 /* Undecided methods */ | |
3193 /************************************************************************/ | |
3194 | |
3195 /* Do autodetection. We can autodetect the EOL type only, the coding | |
3196 system only, or both. We only do autodetection when decoding; when | |
3197 encoding, we just pass the data through. | |
3198 | |
3199 When doing just EOL detection, a coding system can be specified; if so, | |
3200 we will decode this data through the coding system before doing EOL | |
3201 detection. The reason for specifying this is so that | |
3202 canonicalize-after-coding works: We will canonicalize the specified | |
3203 coding system into the appropriate EOL type. When doing both coding and | |
3204 EOL detection, we do similar canonicalization, and also catch situations | |
3205 where the EOL type is overspecified, i.e. the detected coding system | |
3206 specifies an EOL type, and either switch to the equivalent | |
3207 non-EOL-processing coding system (if possible), or terminate EOL | |
3208 detection and use the specified EOL type. This prevents data from being | |
3209 EOL-processed twice. | |
3210 */ | |
3211 | |
3212 struct undecided_coding_system | |
3213 { | |
3214 int do_eol, do_coding; | |
3215 Lisp_Object cs; | |
3216 }; | |
3217 | |
3218 struct undecided_coding_stream | |
3219 { | |
3220 Lisp_Object actual; | |
3221 /* Either 2 or 3 lstreams here; see undecided_convert */ | |
3222 struct chain_coding_stream c; | |
3223 | |
3224 struct detection_state *st; | |
3225 }; | |
3226 | |
1204 | 3227 static const struct memory_description undecided_coding_system_description[] = { |
3228 { XD_LISP_OBJECT, offsetof (struct undecided_coding_system, cs) }, | |
771 | 3229 { XD_END } |
3230 }; | |
3231 | |
1204 | 3232 static const struct memory_description undecided_coding_stream_description_1 [] = { |
3233 { XD_LISP_OBJECT, offsetof (struct undecided_coding_stream, actual) }, | |
2367 | 3234 { XD_BLOCK_ARRAY, offsetof (struct undecided_coding_stream, c), |
2551 | 3235 1, { &chain_coding_stream_description } }, |
1204 | 3236 { XD_END } |
3237 }; | |
3238 | |
3239 const struct sized_memory_description undecided_coding_stream_description = { | |
3240 sizeof (struct undecided_coding_stream), undecided_coding_stream_description_1 | |
3241 }; | |
3242 | |
3243 DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (undecided); | |
3244 | |
771 | 3245 static void |
3246 undecided_init (Lisp_Object codesys) | |
3247 { | |
3248 struct undecided_coding_system *data = | |
3249 XCODING_SYSTEM_TYPE_DATA (codesys, undecided); | |
3250 | |
3251 data->cs = Qnil; | |
3252 } | |
3253 | |
3254 static void | |
3255 undecided_mark (Lisp_Object codesys) | |
3256 { | |
3257 struct undecided_coding_system *data = | |
3258 XCODING_SYSTEM_TYPE_DATA (codesys, undecided); | |
3259 | |
3260 mark_object (data->cs); | |
3261 } | |
3262 | |
3263 static void | |
3264 undecided_print (Lisp_Object cs, Lisp_Object printcharfun, int escapeflag) | |
3265 { | |
3266 struct undecided_coding_system *data = | |
3267 XCODING_SYSTEM_TYPE_DATA (cs, undecided); | |
3268 int need_space = 0; | |
3269 | |
826 | 3270 write_c_string (printcharfun, "("); |
771 | 3271 if (data->do_eol) |
3272 { | |
826 | 3273 write_c_string (printcharfun, "do-eol"); |
771 | 3274 need_space = 1; |
3275 } | |
3276 if (data->do_coding) | |
3277 { | |
3278 if (need_space) | |
826 | 3279 write_c_string (printcharfun, " "); |
3280 write_c_string (printcharfun, "do-coding"); | |
771 | 3281 need_space = 1; |
3282 } | |
3283 if (!NILP (data->cs)) | |
3284 { | |
3285 if (need_space) | |
826 | 3286 write_c_string (printcharfun, " "); |
3287 write_c_string (printcharfun, "coding-system="); | |
771 | 3288 print_coding_system_in_print_method (data->cs, printcharfun, escapeflag); |
3289 } | |
826 | 3290 write_c_string (printcharfun, ")"); |
771 | 3291 } |
3292 | |
3293 static void | |
3294 undecided_mark_coding_stream (struct coding_stream *str) | |
3295 { | |
1204 | 3296 mark_object (CODING_STREAM_TYPE_DATA (str, undecided)->actual); |
771 | 3297 chain_mark_coding_stream_1 (&CODING_STREAM_TYPE_DATA (str, undecided)->c); |
3298 } | |
3299 | |
3300 static int | |
3301 undecided_putprop (Lisp_Object codesys, Lisp_Object key, Lisp_Object value) | |
3302 { | |
3303 struct undecided_coding_system *data = | |
3304 XCODING_SYSTEM_TYPE_DATA (codesys, undecided); | |
3305 | |
3306 if (EQ (key, Qdo_eol)) | |
3307 data->do_eol = 1; | |
3308 else if (EQ (key, Qdo_coding)) | |
3309 data->do_coding = 1; | |
3310 else if (EQ (key, Qcoding_system)) | |
3311 data->cs = get_coding_system_for_text_file (value, 0); | |
3312 else | |
3313 return 0; | |
3314 return 1; | |
3315 } | |
3316 | |
3317 static Lisp_Object | |
3318 undecided_getprop (Lisp_Object codesys, Lisp_Object prop) | |
3319 { | |
3320 struct undecided_coding_system *data = | |
3321 XCODING_SYSTEM_TYPE_DATA (codesys, undecided); | |
3322 | |
3323 if (EQ (prop, Qdo_eol)) | |
3324 return data->do_eol ? Qt : Qnil; | |
3325 if (EQ (prop, Qdo_coding)) | |
3326 return data->do_coding ? Qt : Qnil; | |
3327 if (EQ (prop, Qcoding_system)) | |
3328 return data->cs; | |
3329 return Qunbound; | |
3330 } | |
3331 | |
3332 static struct detection_state * | |
3333 allocate_detection_state (void) | |
3334 { | |
3335 int i; | |
3336 Bytecount size = MAX_ALIGN_SIZE (sizeof (struct detection_state)); | |
3337 struct detection_state *block; | |
3338 | |
3339 for (i = 0; i < coding_detector_count; i++) | |
3340 size += MAX_ALIGN_SIZE (Dynarr_at (all_coding_detectors, i).data_size); | |
3341 | |
3342 block = (struct detection_state *) xmalloc_and_zero (size); | |
3343 | |
3344 size = MAX_ALIGN_SIZE (sizeof (struct detection_state)); | |
3345 for (i = 0; i < coding_detector_count; i++) | |
3346 { | |
3347 block->data_offset[i] = size; | |
3348 size += MAX_ALIGN_SIZE (Dynarr_at (all_coding_detectors, i).data_size); | |
3349 } | |
3350 | |
3351 return block; | |
3352 } | |
3353 | |
3354 static void | |
3355 free_detection_state (struct detection_state *st) | |
3356 { | |
3357 int i; | |
3358 | |
3359 for (i = 0; i < coding_detector_count; i++) | |
3360 { | |
3361 if (Dynarr_at (all_coding_detectors, i).finalize_detection_state_method) | |
3362 Dynarr_at (all_coding_detectors, i).finalize_detection_state_method | |
3363 (st); | |
3364 } | |
3365 | |
1726 | 3366 xfree (st, struct detection_state *); |
771 | 3367 } |
3368 | |
3369 static int | |
3370 coding_category_symbol_to_id (Lisp_Object symbol) | |
428 | 3371 { |
3372 int i; | |
3373 | |
3374 CHECK_SYMBOL (symbol); | |
771 | 3375 for (i = 0; i < coding_detector_count; i++) |
3376 { | |
3377 detector_category_dynarr *cats = | |
3378 Dynarr_at (all_coding_detectors, i).cats; | |
3379 int j; | |
3380 | |
3381 for (j = 0; j < Dynarr_length (cats); j++) | |
3382 if (EQ (Dynarr_at (cats, j).sym, symbol)) | |
3383 return Dynarr_at (cats, j).id; | |
3384 } | |
3385 | |
563 | 3386 invalid_constant ("Unrecognized coding category", symbol); |
1204 | 3387 RETURN_NOT_REACHED (0); |
428 | 3388 } |
3389 | |
771 | 3390 static Lisp_Object |
3391 coding_category_id_to_symbol (int id) | |
428 | 3392 { |
3393 int i; | |
771 | 3394 |
3395 for (i = 0; i < coding_detector_count; i++) | |
3396 { | |
3397 detector_category_dynarr *cats = | |
3398 Dynarr_at (all_coding_detectors, i).cats; | |
3399 int j; | |
3400 | |
3401 for (j = 0; j < Dynarr_length (cats); j++) | |
3402 if (id == Dynarr_at (cats, j).id) | |
3403 return Dynarr_at (cats, j).sym; | |
3404 } | |
3405 | |
2500 | 3406 ABORT (); |
771 | 3407 return Qnil; /* (usually) not reached */ |
428 | 3408 } |
3409 | |
771 | 3410 static Lisp_Object |
3411 detection_result_number_to_symbol (enum detection_result result) | |
428 | 3412 { |
1494 | 3413 /* let compiler warn if not all enumerators are handled */ |
3414 switch (result) { | |
3415 #define FROB(sym, num) case num: return (sym) | |
771 | 3416 FROB (Qnear_certainty, DET_NEAR_CERTAINTY); |
3417 FROB (Qquite_probable, DET_QUITE_PROBABLE); | |
3418 FROB (Qsomewhat_likely, DET_SOMEWHAT_LIKELY); | |
1494 | 3419 FROB (Qslightly_likely, DET_SLIGHTLY_LIKELY); |
771 | 3420 FROB (Qas_likely_as_unlikely, DET_AS_LIKELY_AS_UNLIKELY); |
3421 FROB (Qsomewhat_unlikely, DET_SOMEWHAT_UNLIKELY); | |
3422 FROB (Qquite_improbable, DET_QUITE_IMPROBABLE); | |
3423 FROB (Qnearly_impossible, DET_NEARLY_IMPOSSIBLE); | |
3424 #undef FROB | |
1494 | 3425 } |
771 | 3426 |
2500 | 3427 ABORT (); |
771 | 3428 return Qnil; /* (usually) not reached */ |
3429 } | |
3430 | |
778 | 3431 #if 0 /* not used */ |
771 | 3432 static enum detection_result |
3433 detection_result_symbol_to_number (Lisp_Object symbol) | |
3434 { | |
1494 | 3435 /* using switch here would be bad style, and doesn't help */ |
771 | 3436 #define FROB(sym, num) if (EQ (symbol, sym)) return (num) |
3437 FROB (Qnear_certainty, DET_NEAR_CERTAINTY); | |
3438 FROB (Qquite_probable, DET_QUITE_PROBABLE); | |
3439 FROB (Qsomewhat_likely, DET_SOMEWHAT_LIKELY); | |
1494 | 3440 FROB (Qslightly_likely, DET_SLIGHTLY_LIKELY); |
771 | 3441 FROB (Qas_likely_as_unlikely, DET_AS_LIKELY_AS_UNLIKELY); |
3442 FROB (Qsomewhat_unlikely, DET_SOMEWHAT_UNLIKELY); | |
3443 FROB (Qquite_improbable, DET_QUITE_IMPROBABLE); | |
3444 FROB (Qnearly_impossible, DET_NEARLY_IMPOSSIBLE); | |
3445 #undef FROB | |
3446 | |
3447 invalid_constant ("Unrecognized detection result", symbol); | |
3448 return ((enum detection_result) 0); /* not reached */ | |
3449 } | |
778 | 3450 #endif /* 0 */ |
771 | 3451 |
3452 /* Set all detection results for a given detector to a specified value. */ | |
3453 void | |
3454 set_detection_results (struct detection_state *st, int detector, int given) | |
3455 { | |
3456 detector_category_dynarr *cats = | |
3457 Dynarr_at (all_coding_detectors, detector).cats; | |
3458 int i; | |
3459 | |
3460 for (i = 0; i < Dynarr_length (cats); i++) | |
3461 st->categories[Dynarr_at (cats, i).id] = given; | |
3462 } | |
428 | 3463 |
3464 static int | |
3465 acceptable_control_char_p (int c) | |
3466 { | |
3467 switch (c) | |
3468 { | |
3469 /* Allow and ignore control characters that you might | |
3470 reasonably see in a text file */ | |
3471 case '\r': | |
3472 case '\n': | |
3473 case '\t': | |
3474 case 7: /* bell */ | |
3475 case 8: /* backspace */ | |
3476 case 11: /* vertical tab */ | |
3477 case 12: /* form feed */ | |
3478 case 26: /* MS-DOS C-z junk */ | |
3479 case 31: /* '^_' -- for info */ | |
3480 return 1; | |
3481 default: | |
3482 return 0; | |
3483 } | |
3484 } | |
3485 | |
771 | 3486 #ifdef DEBUG_XEMACS |
3487 | |
3488 static UExtbyte | |
3489 hex_digit_to_char (int digit) | |
428 | 3490 { |
771 | 3491 if (digit < 10) |
3492 return digit + '0'; | |
3493 else | |
3494 return digit - 10 + 'A'; | |
428 | 3495 } |
3496 | |
771 | 3497 static void |
3498 output_bytes_in_ascii_and_hex (const UExtbyte *src, Bytecount n) | |
428 | 3499 { |
3425 | 3500 Extbyte *ascii = alloca_array (Extbyte, n + 1); |
3501 Extbyte *hex = alloca_array (Extbyte, 3 * n + 1); | |
771 | 3502 int i; |
3413 | 3503 DECLARE_EISTRING (eistr_ascii); |
3504 DECLARE_EISTRING (eistr_hex); | |
771 | 3505 |
3506 for (i = 0; i < n; i++) | |
428 | 3507 { |
3425 | 3508 Extbyte c = src[i]; |
771 | 3509 if (c < 0x20) |
3510 ascii[i] = '.'; | |
428 | 3511 else |
771 | 3512 ascii[i] = c; |
3513 hex[3 * i] = hex_digit_to_char (c >> 4); | |
3514 hex[3 * i + 1] = hex_digit_to_char (c & 0xF); | |
3515 hex[3 * i + 2] = ' '; | |
428 | 3516 } |
771 | 3517 ascii[i] = '\0'; |
3518 hex[3 * i - 1] = '\0'; | |
3413 | 3519 |
3520 eicpy_ext(eistr_hex, hex, Qbinary); | |
3521 eicpy_ext(eistr_ascii, ascii, Qbinary); | |
3522 | |
3425 | 3523 stderr_out ("%s %s", eidata(eistr_ascii), eidata(eistr_hex)); |
428 | 3524 } |
3525 | |
771 | 3526 #endif /* DEBUG_XEMACS */ |
3527 | |
3528 /* Attempt to determine the encoding of the given text. Before calling | |
3529 this function for the first time, you must zero out the detection state. | |
428 | 3530 |
3531 Returns: | |
3532 | |
771 | 3533 0 == keep going |
3534 1 == stop | |
428 | 3535 */ |
3536 | |
3537 static int | |
771 | 3538 detect_coding_type (struct detection_state *st, const UExtbyte *src, |
3539 Bytecount n) | |
428 | 3540 { |
771 | 3541 Bytecount n2 = n; |
3542 const UExtbyte *src2 = src; | |
3543 int i; | |
3544 | |
3545 #ifdef DEBUG_XEMACS | |
3546 if (!NILP (Vdebug_coding_detection)) | |
3547 { | |
3548 int bytes = min (16, n); | |
3549 stderr_out ("detect_coding_type: processing %ld bytes\n", n); | |
3550 stderr_out ("First %d: ", bytes); | |
3551 output_bytes_in_ascii_and_hex (src, bytes); | |
3552 stderr_out ("\nLast %d: ", bytes); | |
3553 output_bytes_in_ascii_and_hex (src + n - bytes, bytes); | |
3554 stderr_out ("\n"); | |
3555 } | |
3556 #endif /* DEBUG_XEMACS */ | |
428 | 3557 if (!st->seen_non_ascii) |
3558 { | |
771 | 3559 for (; n2; n2--, src2++) |
428 | 3560 { |
771 | 3561 UExtbyte c = *src2; |
428 | 3562 if ((c < 0x20 && !acceptable_control_char_p (c)) || c >= 0x80) |
3563 { | |
3564 st->seen_non_ascii = 1; | |
3565 break; | |
3566 } | |
3567 } | |
3568 } | |
3569 | |
771 | 3570 for (i = 0; i < coding_detector_count; i++) |
3571 Dynarr_at (all_coding_detectors, i).detect_method (st, src, n); | |
3572 | |
3573 st->bytes_seen += n; | |
3574 | |
3575 #ifdef DEBUG_XEMACS | |
3576 if (!NILP (Vdebug_coding_detection)) | |
3577 { | |
3578 stderr_out ("seen_non_ascii: %d\n", st->seen_non_ascii); | |
1494 | 3579 if (coding_detector_category_count <= 0) |
3580 stderr_out ("found %d detector categories\n", | |
3581 coding_detector_category_count); | |
771 | 3582 for (i = 0; i < coding_detector_category_count; i++) |
3583 stderr_out_lisp | |
3584 ("%s: %s\n", | |
3585 2, | |
3586 coding_category_id_to_symbol (i), | |
3587 detection_result_number_to_symbol ((enum detection_result) | |
3588 st->categories[i])); | |
3589 } | |
3590 #endif /* DEBUG_XEMACS */ | |
3591 | |
3592 { | |
3593 int not_unlikely = 0; | |
3594 int retval; | |
3595 | |
3596 for (i = 0; i < coding_detector_category_count; i++) | |
3597 if (st->categories[i] >= 0) | |
3598 not_unlikely++; | |
3599 | |
3600 retval = (not_unlikely <= 1 | |
3601 #if 0 /* this is bogus */ | |
3602 || st->bytes_seen >= MAX_BYTES_PROCESSED_FOR_DETECTION | |
428 | 3603 #endif |
771 | 3604 ); |
3605 | |
3606 #ifdef DEBUG_XEMACS | |
3607 if (!NILP (Vdebug_coding_detection)) | |
3608 stderr_out ("detect_coding_type: returning %d (%s)\n", | |
3609 retval, retval ? "stop" : "keep going"); | |
3610 #endif /* DEBUG_XEMACS */ | |
3611 | |
3612 return retval; | |
428 | 3613 } |
3614 } | |
3615 | |
3616 static Lisp_Object | |
771 | 3617 detected_coding_system (struct detection_state *st) |
428 | 3618 { |
771 | 3619 int i; |
3620 int even = 1; | |
3621 | |
3622 if (st->seen_non_ascii) | |
3623 { | |
3624 for (i = 0; i < coding_detector_category_count; i++) | |
3625 if (st->categories[i] != DET_AS_LIKELY_AS_UNLIKELY) | |
3626 { | |
3627 even = 0; | |
3628 break; | |
3629 } | |
3630 } | |
3631 | |
3632 /* #### Here we are ignoring the results of detection when it's all | |
3633 ASCII. This is obviously a bad thing. But we need to fix up the | |
3634 existing detection methods somewhat before we can switch. */ | |
3635 if (even) | |
428 | 3636 { |
3637 /* If the file was entirely or basically ASCII, use the | |
3638 default value of `buffer-file-coding-system'. */ | |
3639 Lisp_Object retval = | |
3640 XBUFFER (Vbuffer_defaults)->buffer_file_coding_system; | |
3641 if (!NILP (retval)) | |
3642 { | |
771 | 3643 retval = find_coding_system_for_text_file (retval, 0); |
428 | 3644 if (NILP (retval)) |
3645 { | |
3646 warn_when_safe | |
3647 (Qbad_variable, Qwarning, | |
3648 "Invalid `default-buffer-file-coding-system', set to nil"); | |
3649 XBUFFER (Vbuffer_defaults)->buffer_file_coding_system = Qnil; | |
3650 } | |
3651 } | |
3652 if (NILP (retval)) | |
4100 | 3653 retval = Fget_coding_system (Qbinary); |
428 | 3654 return retval; |
3655 } | |
3656 else | |
3657 { | |
771 | 3658 int likelihood; |
3659 Lisp_Object retval = Qnil; | |
3660 | |
3661 /* Look through the coding categories first by likelihood and then by | |
3662 priority and find the first one that is allowed. */ | |
3663 | |
3664 for (likelihood = DET_HIGHEST; likelihood >= DET_LOWEST; likelihood--) | |
428 | 3665 { |
771 | 3666 for (i = 0; i < coding_detector_category_count; i++) |
3667 { | |
3668 int cat = coding_category_by_priority[i]; | |
3669 if (st->categories[cat] == likelihood && | |
3670 !NILP (coding_category_system[cat])) | |
3671 { | |
3672 retval = (get_coding_system_for_text_file | |
3673 (coding_category_system[cat], 0)); | |
3674 if (likelihood < DET_AS_LIKELY_AS_UNLIKELY) | |
3675 warn_when_safe_lispobj | |
3676 (intern ("detection"), | |
793 | 3677 Qwarning, |
771 | 3678 emacs_sprintf_string_lisp |
3679 ( | |
3680 "Detected coding %s is unlikely to be correct (likelihood == `%s')", | |
3681 Qnil, 2, XCODING_SYSTEM_NAME (retval), | |
3682 detection_result_number_to_symbol | |
3683 ((enum detection_result) likelihood))); | |
3684 return retval; | |
3685 } | |
3686 } | |
428 | 3687 } |
771 | 3688 |
3689 return Fget_coding_system (Qraw_text); | |
428 | 3690 } |
3691 } | |
3692 | |
1347 | 3693 /* Look for a coding system in the string (skipping over leading |
3694 blanks). If found, return it, otherwise nil. */ | |
3695 | |
3696 static Lisp_Object | |
2531 | 3697 snarf_coding_system (const UExtbyte *p, Bytecount len) |
1347 | 3698 { |
3699 Bytecount n; | |
2531 | 3700 UExtbyte *name; |
1347 | 3701 |
3702 while (*p == ' ' || *p == '\t') p++, len--; | |
3703 len = min (len, 1000); | |
3704 name = alloca_ibytes (len + 1); | |
3705 memcpy (name, p, len); | |
3706 name[len] = '\0'; | |
3707 | |
3708 /* Get coding system name */ | |
3709 /* Characters valid in a MIME charset name (rfc 1521), | |
3710 and in a Lisp symbol name. */ | |
3711 n = qxestrspn (name, | |
3712 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" | |
3713 "abcdefghijklmnopqrstuvwxyz" | |
3714 "0123456789" | |
3715 "!$%&*+-.^_{|}~"); | |
3716 if (n > 0) | |
3717 { | |
3718 name[n] = '\0'; | |
2531 | 3719 /* This call to intern_int() is OK because we already verified that |
3720 there are only ASCII characters in the string */ | |
3721 return find_coding_system_for_text_file (intern_int ((Ibyte *) name), 0); | |
1347 | 3722 } |
3723 | |
3724 return Qnil; | |
3725 } | |
3726 | |
428 | 3727 /* Given a seekable read stream and potential coding system and EOL type |
3728 as specified, do any autodetection that is called for. If the | |
3729 coding system and/or EOL type are not `autodetect', they will be left | |
3730 alone; but this function will never return an autodetect coding system | |
3731 or EOL type. | |
3732 | |
3733 This function does not automatically fetch subsidiary coding systems; | |
3734 that should be unnecessary with the explicit eol-type argument. */ | |
3735 | |
3736 #define LENGTH(string_constant) (sizeof (string_constant) - 1) | |
3737 | |
771 | 3738 static Lisp_Object |
3739 unwind_free_detection_state (Lisp_Object opaque) | |
3740 { | |
3741 struct detection_state *st = | |
3742 (struct detection_state *) get_opaque_ptr (opaque); | |
3743 free_detection_state (st); | |
3744 free_opaque_ptr (opaque); | |
3745 return Qnil; | |
3746 } | |
3747 | |
1347 | 3748 /* #### This duplicates code in `find-coding-system-magic-cookie-in-file' |
3749 in files.el. Look into combining them. */ | |
3750 | |
771 | 3751 static Lisp_Object |
3752 look_for_coding_system_magic_cookie (const UExtbyte *data, Bytecount len) | |
428 | 3753 { |
771 | 3754 const UExtbyte *p; |
3755 const UExtbyte *scan_end; | |
2531 | 3756 Bytecount cookie_len; |
771 | 3757 |
3758 /* Look for initial "-*-"; mode line prefix */ | |
3759 for (p = data, | |
3760 scan_end = data + len - LENGTH ("-*-coding:?-*-"); | |
3761 p <= scan_end | |
3762 && *p != '\n' | |
3763 && *p != '\r'; | |
3764 p++) | |
3765 if (*p == '-' && *(p+1) == '*' && *(p+2) == '-') | |
3766 { | |
3767 const UExtbyte *local_vars_beg = p + 3; | |
3768 /* Look for final "-*-"; mode line suffix */ | |
3769 for (p = local_vars_beg, | |
3770 scan_end = data + len - LENGTH ("-*-"); | |
3771 p <= scan_end | |
428 | 3772 && *p != '\n' |
3773 && *p != '\r'; | |
771 | 3774 p++) |
3775 if (*p == '-' && *(p+1) == '*' && *(p+2) == '-') | |
3776 { | |
3777 const UExtbyte *suffix = p; | |
3778 /* Look for "coding:" */ | |
3779 for (p = local_vars_beg, | |
3780 scan_end = suffix - LENGTH ("coding:?"); | |
3781 p <= scan_end; | |
3782 p++) | |
3783 if (memcmp ("coding:", p, LENGTH ("coding:")) == 0 | |
3784 && (p == local_vars_beg | |
3785 || (*(p-1) == ' ' || | |
3786 *(p-1) == '\t' || | |
3787 *(p-1) == ';'))) | |
3788 { | |
3789 p += LENGTH ("coding:"); | |
1347 | 3790 return snarf_coding_system (p, suffix - p); |
771 | 3791 break; |
3792 } | |
3793 break; | |
3794 } | |
3795 break; | |
3796 } | |
3797 | |
2531 | 3798 /* Look for ;;;###coding system */ |
3799 | |
3800 cookie_len = LENGTH (";;;###coding system: "); | |
3801 | |
3802 for (p = data, | |
3803 scan_end = data + len - cookie_len; | |
3804 p <= scan_end; | |
3805 p++) | |
1347 | 3806 { |
2531 | 3807 if (*p == ';' && !memcmp (p, ";;;###coding system: ", cookie_len)) |
3808 { | |
3809 const UExtbyte *suffix; | |
3810 | |
3811 p += cookie_len; | |
3812 suffix = p; | |
3813 while (suffix < scan_end && !isspace (*suffix)) | |
3814 suffix++; | |
3815 return snarf_coding_system (p, suffix - p); | |
3816 } | |
1347 | 3817 } |
3818 | |
3819 return Qnil; | |
771 | 3820 } |
3821 | |
3822 static Lisp_Object | |
3823 determine_real_coding_system (Lstream *stream) | |
3824 { | |
3825 struct detection_state *st = allocate_detection_state (); | |
3826 int depth = record_unwind_protect (unwind_free_detection_state, | |
3827 make_opaque_ptr (st)); | |
3828 UExtbyte buf[4096]; | |
3829 Bytecount nread = Lstream_read (stream, buf, sizeof (buf)); | |
3830 Lisp_Object coding_system = look_for_coding_system_magic_cookie (buf, nread); | |
3831 | |
3832 if (NILP (coding_system)) | |
3833 { | |
3834 while (1) | |
3835 { | |
3836 if (detect_coding_type (st, buf, nread)) | |
428 | 3837 break; |
771 | 3838 nread = Lstream_read (stream, buf, sizeof (buf)); |
3839 if (nread == 0) | |
3840 break; | |
428 | 3841 } |
771 | 3842 |
3843 coding_system = detected_coding_system (st); | |
428 | 3844 } |
3845 | |
3846 Lstream_rewind (stream); | |
771 | 3847 |
3848 unbind_to (depth); | |
3849 return coding_system; | |
3850 } | |
3851 | |
3852 static void | |
3853 undecided_init_coding_stream (struct coding_stream *str) | |
3854 { | |
3855 struct undecided_coding_stream *data = | |
3856 CODING_STREAM_TYPE_DATA (str, undecided); | |
3857 struct undecided_coding_system *csdata = | |
3858 XCODING_SYSTEM_TYPE_DATA (str->codesys, undecided); | |
3859 | |
3860 data->actual = Qnil; | |
3861 | |
3862 if (str->direction == CODING_DECODE) | |
3863 { | |
3864 Lstream *lst = str->other_end; | |
3865 | |
3866 if ((lst->flags & LSTREAM_FL_READ) && | |
3867 Lstream_seekable_p (lst) && | |
3868 csdata->do_coding) | |
3869 /* We can determine the coding system now. */ | |
3870 data->actual = determine_real_coding_system (lst); | |
3871 } | |
1494 | 3872 |
3873 #ifdef DEBUG_XEMACS | |
3874 if (!NILP (Vdebug_coding_detection)) | |
3875 stderr_out_lisp ("detected coding system: %s\n", 1, data->actual); | |
3876 #endif /* DEBUG_XEMACS */ | |
771 | 3877 } |
3878 | |
3879 static void | |
3880 undecided_rewind_coding_stream (struct coding_stream *str) | |
3881 { | |
3882 chain_rewind_coding_stream_1 (&CODING_STREAM_TYPE_DATA (str, undecided)->c); | |
3883 } | |
3884 | |
3885 static void | |
3886 undecided_finalize_coding_stream (struct coding_stream *str) | |
3887 { | |
3888 struct undecided_coding_stream *data = | |
3889 CODING_STREAM_TYPE_DATA (str, undecided); | |
3890 | |
3891 chain_finalize_coding_stream_1 | |
3892 (&CODING_STREAM_TYPE_DATA (str, undecided)->c); | |
3893 if (data->st) | |
3894 free_detection_state (data->st); | |
3895 } | |
3896 | |
3897 static Lisp_Object | |
3898 undecided_canonicalize (Lisp_Object codesys) | |
3899 { | |
3900 struct undecided_coding_system *csdata = | |
3901 XCODING_SYSTEM_TYPE_DATA (codesys, undecided); | |
3902 if (!csdata->do_eol && !csdata->do_coding) | |
3903 return NILP (csdata->cs) ? Fget_coding_system (Qbinary) : csdata->cs; | |
3904 if (csdata->do_eol && !csdata->do_coding && NILP (csdata->cs)) | |
3905 return Fget_coding_system (Qconvert_eol_autodetect); | |
3906 return codesys; | |
3907 } | |
3908 | |
3909 static Bytecount | |
3910 undecided_convert (struct coding_stream *str, const UExtbyte *src, | |
3911 unsigned_char_dynarr *dst, Bytecount n) | |
3912 { | |
3913 int first_time = 0; | |
3914 | |
3915 if (str->direction == CODING_DECODE) | |
3916 { | |
3917 /* At this point, we have only the following possibilities: | |
3918 | |
3919 do_eol && do_coding | |
3920 do_coding only | |
3921 do_eol only and a coding system was specified | |
3922 | |
3923 Other possibilities are removed during undecided_canonicalize. | |
3924 | |
3925 Therefore, our substreams are either | |
3926 | |
3927 lstream_coding -> lstream_dynarr, or | |
3928 lstream_coding -> lstream_eol -> lstream_dynarr. | |
3929 */ | |
3930 struct undecided_coding_system *csdata = | |
3931 XCODING_SYSTEM_TYPE_DATA (str->codesys, undecided); | |
3932 struct undecided_coding_stream *data = | |
3933 CODING_STREAM_TYPE_DATA (str, undecided); | |
3934 | |
3935 if (str->eof) | |
3936 { | |
3937 /* Each will close the next. We need to close now because more | |
3938 data may be generated. */ | |
3939 if (data->c.initted) | |
3940 Lstream_close (XLSTREAM (data->c.lstreams[0])); | |
3941 return n; | |
3942 } | |
3943 | |
3944 if (!data->c.initted) | |
3945 { | |
3946 data->c.lstream_count = csdata->do_eol ? 3 : 2; | |
3947 data->c.lstreams = xnew_array (Lisp_Object, data->c.lstream_count); | |
3948 | |
3949 data->c.lstreams[data->c.lstream_count - 1] = | |
3950 make_dynarr_output_stream (dst); | |
3951 Lstream_set_buffering | |
3952 (XLSTREAM (data->c.lstreams[data->c.lstream_count - 1]), | |
3953 LSTREAM_UNBUFFERED, 0); | |
3954 if (csdata->do_eol) | |
3955 { | |
3956 data->c.lstreams[1] = | |
3957 make_coding_output_stream | |
3958 (XLSTREAM (data->c.lstreams[data->c.lstream_count - 1]), | |
3959 Fget_coding_system (Qconvert_eol_autodetect), | |
800 | 3960 CODING_DECODE, 0); |
771 | 3961 Lstream_set_buffering |
3962 (XLSTREAM (data->c.lstreams[1]), | |
3963 LSTREAM_UNBUFFERED, 0); | |
3964 } | |
3965 | |
3966 data->c.lstreams[0] = | |
3967 make_coding_output_stream | |
3968 (XLSTREAM (data->c.lstreams[1]), | |
3969 /* Substitute binary if we need to detect the encoding */ | |
3970 csdata->do_coding ? Qbinary : csdata->cs, | |
800 | 3971 CODING_DECODE, 0); |
771 | 3972 Lstream_set_buffering (XLSTREAM (data->c.lstreams[0]), |
3973 LSTREAM_UNBUFFERED, 0); | |
3974 | |
3975 first_time = 1; | |
3976 data->c.initted = 1; | |
3977 } | |
3978 | |
3979 /* If necessary, do encoding-detection now. We do this when we're a | |
3980 writing stream or a non-seekable reading stream, meaning that we | |
3981 can't just process the whole input, rewind, and start over. */ | |
3982 | |
3983 if (csdata->do_coding) | |
3984 { | |
3985 int actual_was_nil = NILP (data->actual); | |
3986 if (NILP (data->actual)) | |
3987 { | |
3988 if (!data->st) | |
3989 data->st = allocate_detection_state (); | |
3990 if (first_time) | |
3991 /* #### This is cheesy. What we really ought to do is buffer | |
3992 up a certain minimum amount of data to get a better result. | |
3993 */ | |
3994 data->actual = look_for_coding_system_magic_cookie (src, n); | |
3995 if (NILP (data->actual)) | |
3996 { | |
3997 /* #### This is cheesy. What we really ought to do is buffer | |
3998 up a certain minimum amount of data so as to get a less | |
3999 random result when doing subprocess detection. */ | |
4000 detect_coding_type (data->st, src, n); | |
4001 data->actual = detected_coding_system (data->st); | |
4100 | 4002 /* kludge to prevent infinite recursion */ |
4003 if (XCODING_SYSTEM(data->actual)->methods->enumtype == undecided_coding_system) | |
4004 data->actual = Fget_coding_system (Qbinary); | |
771 | 4005 } |
4006 } | |
4007 /* We need to set the detected coding system if we actually have | |
4008 such a coding system but didn't before. That is the case | |
4009 either when we just detected it in the previous code or when | |
4010 it was detected during undecided_init_coding_stream(). We | |
4011 can check for that using first_time. */ | |
4012 if (!NILP (data->actual) && (actual_was_nil || first_time)) | |
4013 { | |
4014 /* If the detected coding system doesn't allow for EOL | |
4015 autodetection, try to get the equivalent that does; | |
4016 otherwise, disable EOL detection (overriding whatever | |
4017 may already have been detected). */ | |
4018 if (XCODING_SYSTEM_EOL_TYPE (data->actual) != EOL_AUTODETECT) | |
4019 { | |
4020 if (!NILP (XCODING_SYSTEM_SUBSIDIARY_PARENT (data->actual))) | |
4021 data->actual = | |
4022 XCODING_SYSTEM_SUBSIDIARY_PARENT (data->actual); | |
4023 else if (data->c.lstream_count == 3) | |
4024 set_coding_stream_coding_system | |
4025 (XLSTREAM (data->c.lstreams[1]), | |
4026 Fget_coding_system (Qidentity)); | |
4027 } | |
4028 set_coding_stream_coding_system | |
4029 (XLSTREAM (data->c.lstreams[0]), data->actual); | |
4030 } | |
4031 } | |
4032 | |
4033 if (Lstream_write (XLSTREAM (data->c.lstreams[0]), src, n) < 0) | |
4034 return -1; | |
4035 return n; | |
4036 } | |
4037 else | |
4038 return no_conversion_convert (str, src, dst, n); | |
4039 } | |
4040 | |
4041 static Lisp_Object | |
4042 undecided_canonicalize_after_coding (struct coding_stream *str) | |
4043 { | |
4044 struct undecided_coding_stream *data = | |
4045 CODING_STREAM_TYPE_DATA (str, undecided); | |
4046 Lisp_Object ret, eolret; | |
4047 | |
4048 if (str->direction == CODING_ENCODE) | |
4049 return str->codesys; | |
4050 | |
4051 if (!data->c.initted) | |
4647
e4ed58cb0e5b
Fix bugs with #'find-file, 0-length files, & coding-system-for-read specified.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
4052 return str->codesys; |
771 | 4053 |
4054 ret = coding_stream_canonicalize_after_coding | |
4055 (XLSTREAM (data->c.lstreams[0])); | |
4056 if (NILP (ret)) | |
4647
e4ed58cb0e5b
Fix bugs with #'find-file, 0-length files, & coding-system-for-read specified.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
4057 ret = str->codesys; |
771 | 4058 if (XCODING_SYSTEM_EOL_TYPE (ret) != EOL_AUTODETECT) |
4059 return ret; | |
4060 eolret = coding_stream_canonicalize_after_coding | |
4061 (XLSTREAM (data->c.lstreams[1])); | |
4062 if (!EQ (XCODING_SYSTEM_TYPE (eolret), Qconvert_eol)) | |
4063 return ret; | |
4064 return | |
4065 Fsubsidiary_coding_system (ret, Fcoding_system_property (eolret, | |
4066 Qsubtype)); | |
4067 } | |
4068 | |
4069 | |
4070 /************************************************************************/ | |
4071 /* Lisp interface: Coding category functions and detection */ | |
4072 /************************************************************************/ | |
4073 | |
4074 DEFUN ("coding-category-list", Fcoding_category_list, 0, 0, 0, /* | |
4075 Return a list of all recognized coding categories. | |
4076 */ | |
4077 ()) | |
4078 { | |
4079 int i; | |
4080 Lisp_Object list = Qnil; | |
4081 | |
4082 for (i = 0; i < coding_detector_count; i++) | |
4083 { | |
4084 detector_category_dynarr *cats = | |
4085 Dynarr_at (all_coding_detectors, i).cats; | |
4086 int j; | |
4087 | |
4088 for (j = 0; j < Dynarr_length (cats); j++) | |
4089 list = Fcons (Dynarr_at (cats, j).sym, list); | |
4090 } | |
4091 | |
4092 return Fnreverse (list); | |
4093 } | |
4094 | |
4095 DEFUN ("set-coding-priority-list", Fset_coding_priority_list, 1, 1, 0, /* | |
4096 Change the priority order of the coding categories. | |
4097 LIST should be list of coding categories, in descending order of | |
4098 priority. Unspecified coding categories will be lower in priority | |
4099 than all specified ones, in the same relative order they were in | |
4100 previously. | |
4101 */ | |
4102 (list)) | |
4103 { | |
4104 int *category_to_priority = | |
4105 alloca_array (int, coding_detector_category_count); | |
4106 int i, j; | |
4107 | |
4108 /* First generate a list that maps coding categories to priorities. */ | |
4109 | |
4110 for (i = 0; i < coding_detector_category_count; i++) | |
4111 category_to_priority[i] = -1; | |
4112 | |
4113 /* Highest priority comes from the specified list. */ | |
4114 i = 0; | |
2367 | 4115 { |
4116 EXTERNAL_LIST_LOOP_2 (elt, list) | |
4117 { | |
4118 int cat = coding_category_symbol_to_id (elt); | |
4119 | |
4120 if (category_to_priority[cat] >= 0) | |
4121 sferror ("Duplicate coding category in list", elt); | |
4122 category_to_priority[cat] = i++; | |
4123 } | |
4124 } | |
771 | 4125 |
4126 /* Now go through the existing categories by priority to retrieve | |
4127 the categories not yet specified and preserve their priority | |
4128 order. */ | |
4129 for (j = 0; j < coding_detector_category_count; j++) | |
4130 { | |
4131 int cat = coding_category_by_priority[j]; | |
4132 if (category_to_priority[cat] < 0) | |
4133 category_to_priority[cat] = i++; | |
4134 } | |
4135 | |
4136 /* Now we need to construct the inverse of the mapping we just | |
4137 constructed. */ | |
4138 | |
4139 for (i = 0; i < coding_detector_category_count; i++) | |
4140 coding_category_by_priority[category_to_priority[i]] = i; | |
4141 | |
4142 /* Phew! That was confusing. */ | |
4143 return Qnil; | |
4144 } | |
4145 | |
4146 DEFUN ("coding-priority-list", Fcoding_priority_list, 0, 0, 0, /* | |
4147 Return a list of coding categories in descending order of priority. | |
4148 */ | |
4149 ()) | |
4150 { | |
4151 int i; | |
4152 Lisp_Object list = Qnil; | |
4153 | |
4154 for (i = 0; i < coding_detector_category_count; i++) | |
4155 list = | |
4156 Fcons (coding_category_id_to_symbol (coding_category_by_priority[i]), | |
4157 list); | |
4158 return Fnreverse (list); | |
4159 } | |
4160 | |
4161 DEFUN ("set-coding-category-system", Fset_coding_category_system, 2, 2, 0, /* | |
4162 Change the coding system associated with a coding category. | |
4163 */ | |
4164 (coding_category, coding_system)) | |
4165 { | |
4166 coding_category_system[coding_category_symbol_to_id (coding_category)] = | |
4167 Fget_coding_system (coding_system); | |
4168 return Qnil; | |
4169 } | |
4170 | |
4171 DEFUN ("coding-category-system", Fcoding_category_system, 1, 1, 0, /* | |
4172 Return the coding system associated with a coding category. | |
4173 */ | |
4174 (coding_category)) | |
4175 { | |
4176 Lisp_Object sys = | |
4177 coding_category_system[coding_category_symbol_to_id (coding_category)]; | |
4178 | |
4179 if (!NILP (sys)) | |
4180 return XCODING_SYSTEM_NAME (sys); | |
4181 return Qnil; | |
4182 } | |
4183 | |
800 | 4184 /* Detect the encoding of STREAM. Assumes stream is at the begnning and will |
4185 read through to the end of STREAM, leaving it there but open. */ | |
4186 | |
771 | 4187 Lisp_Object |
4188 detect_coding_stream (Lisp_Object stream) | |
4189 { | |
4190 Lisp_Object val = Qnil; | |
4191 struct gcpro gcpro1, gcpro2, gcpro3; | |
4192 UExtbyte random_buffer[65536]; | |
4193 Lisp_Object binary_instream = | |
4194 make_coding_input_stream | |
4195 (XLSTREAM (stream), Qbinary, | |
814 | 4196 CODING_ENCODE, LSTREAM_FL_NO_CLOSE_OTHER); |
771 | 4197 Lisp_Object decstream = |
4198 make_coding_input_stream | |
4199 (XLSTREAM (binary_instream), | |
800 | 4200 Qundecided, CODING_DECODE, 0); |
771 | 4201 Lstream *decstr = XLSTREAM (decstream); |
4202 | |
4203 GCPRO3 (decstream, stream, binary_instream); | |
4204 /* Read and discard all data; detection happens as a side effect of this, | |
4205 and we examine what was detected afterwards. */ | |
4206 while (Lstream_read (decstr, random_buffer, sizeof (random_buffer)) > 0) | |
4207 ; | |
4208 | |
4209 val = coding_stream_detected_coding_system (decstr); | |
4210 Lstream_close (decstr); | |
4211 Lstream_delete (decstr); | |
4212 Lstream_delete (XLSTREAM (binary_instream)); | |
4213 UNGCPRO; | |
4214 return val; | |
428 | 4215 } |
4216 | |
4217 DEFUN ("detect-coding-region", Fdetect_coding_region, 2, 3, 0, /* | |
4218 Detect coding system of the text in the region between START and END. | |
444 | 4219 Return a list of possible coding systems ordered by priority. |
3025 | 4220 If only ASCII characters are found, return `undecided' or one of |
428 | 4221 its subsidiary coding systems according to a detected end-of-line |
4222 type. Optional arg BUFFER defaults to the current buffer. | |
4223 */ | |
4224 (start, end, buffer)) | |
4225 { | |
4226 Lisp_Object val = Qnil; | |
4227 struct buffer *buf = decode_buffer (buffer, 0); | |
665 | 4228 Charbpos b, e; |
771 | 4229 Lisp_Object lb_instream; |
428 | 4230 |
4231 get_buffer_range_char (buf, start, end, &b, &e, 0); | |
4232 lb_instream = make_lisp_buffer_input_stream (buf, b, e, 0); | |
771 | 4233 |
4234 val = detect_coding_stream (lb_instream); | |
4235 Lstream_delete (XLSTREAM (lb_instream)); | |
428 | 4236 return val; |
4237 } | |
4238 | |
4239 | |
771 | 4240 |
4241 #ifdef DEBUG_XEMACS | |
4242 | |
428 | 4243 /************************************************************************/ |
771 | 4244 /* Internal methods */ |
4245 /************************************************************************/ | |
4246 | |
4247 /* Raw (internally-formatted) data. */ | |
4248 DEFINE_CODING_SYSTEM_TYPE (internal); | |
428 | 4249 |
665 | 4250 static Bytecount |
2286 | 4251 internal_convert (struct coding_stream *UNUSED (str), const UExtbyte *src, |
771 | 4252 unsigned_char_dynarr *dst, Bytecount n) |
4253 { | |
4254 Bytecount orign = n; | |
4255 Dynarr_add_many (dst, src, n); | |
4256 return orign; | |
4257 } | |
4258 | |
4259 #endif /* DEBUG_XEMACS */ | |
4260 | |
4261 | |
4262 | |
4263 #ifdef HAVE_ZLIB | |
4264 | |
4265 /************************************************************************/ | |
4266 /* Gzip methods */ | |
4267 /************************************************************************/ | |
4268 | |
4269 struct gzip_coding_system | |
428 | 4270 { |
771 | 4271 int level; /* 0 through 9, or -1 for default */ |
4272 }; | |
4273 | |
4274 #define CODING_SYSTEM_GZIP_LEVEL(codesys) \ | |
4275 (CODING_SYSTEM_TYPE_DATA (codesys, gzip)->level) | |
4276 #define XCODING_SYSTEM_GZIP_LEVEL(codesys) \ | |
4277 (XCODING_SYSTEM_TYPE_DATA (codesys, gzip)->level) | |
4278 | |
4279 struct gzip_coding_stream | |
428 | 4280 { |
771 | 4281 z_stream stream; |
4282 int stream_initted; | |
4283 int reached_eof; /* #### this should be handled by the caller, once we | |
4284 return LSTREAM_EOF */ | |
4285 }; | |
4286 | |
1204 | 4287 static const struct memory_description |
771 | 4288 gzip_coding_system_description[] = { |
4289 { XD_END } | |
4290 }; | |
4291 | |
1204 | 4292 DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (gzip); |
4293 | |
771 | 4294 enum source_sink_type |
4295 gzip_conversion_end_type (Lisp_Object codesys) | |
4296 { | |
4297 return DECODES_BYTE_TO_BYTE; | |
428 | 4298 } |
4299 | |
4300 static void | |
771 | 4301 gzip_init (Lisp_Object codesys) |
4302 { | |
4303 struct gzip_coding_system *data = XCODING_SYSTEM_TYPE_DATA (codesys, gzip); | |
4304 data->level = -1; | |
4305 } | |
4306 | |
4307 static void | |
4308 gzip_print (Lisp_Object cs, Lisp_Object printcharfun, int escapeflag) | |
428 | 4309 { |
771 | 4310 struct gzip_coding_system *data = XCODING_SYSTEM_TYPE_DATA (cs, gzip); |
4311 | |
826 | 4312 write_c_string (printcharfun, "("); |
771 | 4313 if (data->level == -1) |
826 | 4314 write_c_string (printcharfun, "default"); |
771 | 4315 else |
4316 print_internal (make_int (data->level), printcharfun, 0); | |
826 | 4317 write_c_string (printcharfun, ")"); |
428 | 4318 } |
4319 | |
4320 static int | |
771 | 4321 gzip_putprop (Lisp_Object codesys, Lisp_Object key, Lisp_Object value) |
428 | 4322 { |
771 | 4323 struct gzip_coding_system *data = XCODING_SYSTEM_TYPE_DATA (codesys, gzip); |
4324 | |
4325 if (EQ (key, Qlevel)) | |
428 | 4326 { |
771 | 4327 if (EQ (value, Qdefault)) |
4328 data->level = -1; | |
4329 else | |
428 | 4330 { |
771 | 4331 CHECK_INT (value); |
4332 check_int_range (XINT (value), 0, 9); | |
4333 data->level = XINT (value); | |
428 | 4334 } |
4335 } | |
4336 else | |
771 | 4337 return 0; |
4338 return 1; | |
428 | 4339 } |
4340 | |
4341 static Lisp_Object | |
771 | 4342 gzip_getprop (Lisp_Object coding_system, Lisp_Object prop) |
428 | 4343 { |
771 | 4344 struct gzip_coding_system *data = |
4345 XCODING_SYSTEM_TYPE_DATA (coding_system, gzip); | |
4346 | |
4347 if (EQ (prop, Qlevel)) | |
428 | 4348 { |
771 | 4349 if (data->level == -1) |
4350 return Qdefault; | |
4351 return make_int (data->level); | |
428 | 4352 } |
771 | 4353 |
4354 return Qunbound; | |
428 | 4355 } |
4356 | |
4357 static void | |
771 | 4358 gzip_init_coding_stream (struct coding_stream *str) |
428 | 4359 { |
771 | 4360 struct gzip_coding_stream *data = CODING_STREAM_TYPE_DATA (str, gzip); |
4361 if (data->stream_initted) | |
428 | 4362 { |
771 | 4363 if (str->direction == CODING_DECODE) |
4364 inflateEnd (&data->stream); | |
4365 else | |
4366 deflateEnd (&data->stream); | |
4367 data->stream_initted = 0; | |
428 | 4368 } |
771 | 4369 data->reached_eof = 0; |
428 | 4370 } |
4371 | |
4372 static void | |
771 | 4373 gzip_rewind_coding_stream (struct coding_stream *str) |
428 | 4374 { |
771 | 4375 gzip_init_coding_stream (str); |
428 | 4376 } |
4377 | |
771 | 4378 static Bytecount |
4379 gzip_convert (struct coding_stream *str, | |
4380 const UExtbyte *src, | |
4381 unsigned_char_dynarr *dst, Bytecount n) | |
428 | 4382 { |
771 | 4383 struct gzip_coding_stream *data = CODING_STREAM_TYPE_DATA (str, gzip); |
4384 int zerr; | |
4385 if (str->direction == CODING_DECODE) | |
428 | 4386 { |
771 | 4387 if (data->reached_eof) |
4388 return n; /* eat the data */ | |
4389 | |
4390 if (!data->stream_initted) | |
428 | 4391 { |
771 | 4392 xzero (data->stream); |
4393 if (inflateInit (&data->stream) != Z_OK) | |
4394 return LSTREAM_ERROR; | |
4395 data->stream_initted = 1; | |
428 | 4396 } |
771 | 4397 |
4398 data->stream.next_in = (Bytef *) src; | |
4399 data->stream.avail_in = n; | |
4400 | |
4401 /* Normally we stop when we've fed all data to the decompressor; but | |
4402 if we're at the end of the input, and the decompressor hasn't | |
4403 reported EOF, we need to keep going, as there might be more output | |
4404 to generate. Z_OK from the decompressor means input was processed | |
4405 or output was generated; if neither, we break out of the loop. | |
4406 Other return values are: | |
4407 | |
4408 Z_STREAM_END EOF from decompressor | |
4409 Z_DATA_ERROR Corrupted data | |
4410 Z_BUF_ERROR No progress possible (this should happen if | |
4411 we try to feed it an incomplete file) | |
4412 Z_MEM_ERROR Out of memory | |
4413 Z_STREAM_ERROR (should never happen) | |
4414 Z_NEED_DICT (#### when will this happen?) | |
4415 */ | |
4416 while (data->stream.avail_in > 0 || str->eof) | |
4417 { | |
4418 /* Reserve an output buffer of the same size as the input buffer; | |
4419 if that's not enough, we keep reserving the same size. */ | |
4420 Bytecount reserved = n; | |
4421 Dynarr_add_many (dst, 0, reserved); | |
4422 /* Careful here! Don't retrieve the pointer until after | |
4423 reserving the space, or it might be bogus */ | |
4424 data->stream.next_out = | |
4425 Dynarr_atp (dst, Dynarr_length (dst) - reserved); | |
4426 data->stream.avail_out = reserved; | |
4427 zerr = inflate (&data->stream, Z_NO_FLUSH); | |
4428 /* Lop off the unused portion */ | |
4429 Dynarr_set_size (dst, Dynarr_length (dst) - data->stream.avail_out); | |
4430 if (zerr != Z_OK) | |
4431 break; | |
4432 } | |
4433 | |
4434 if (zerr == Z_STREAM_END) | |
4435 data->reached_eof = 1; | |
4436 | |
4437 if ((Bytecount) data->stream.avail_in < n) | |
4438 return n - data->stream.avail_in; | |
4439 | |
4440 if (zerr == Z_OK || zerr == Z_STREAM_END) | |
4441 return 0; | |
4442 | |
4443 return LSTREAM_ERROR; | |
428 | 4444 } |
4445 else | |
4446 { | |
771 | 4447 if (!data->stream_initted) |
4448 { | |
4449 int level = XCODING_SYSTEM_GZIP_LEVEL (str->codesys); | |
4450 xzero (data->stream); | |
4451 if (deflateInit (&data->stream, | |
4452 level == -1 ? Z_DEFAULT_COMPRESSION : level) != | |
4453 Z_OK) | |
4454 return LSTREAM_ERROR; | |
4455 data->stream_initted = 1; | |
428 | 4456 } |
771 | 4457 |
4458 data->stream.next_in = (Bytef *) src; | |
4459 data->stream.avail_in = n; | |
4460 | |
4461 /* Normally we stop when we've fed all data to the compressor; but if | |
4462 we're at the end of the input, and the compressor hasn't reported | |
4463 EOF, we need to keep going, as there might be more output to | |
4464 generate. (To signal EOF on our end, we set the FLUSH parameter | |
4465 to Z_FINISH; when all data is output, Z_STREAM_END will be | |
4466 returned.) Z_OK from the compressor means input was processed or | |
4467 output was generated; if neither, we break out of the loop. Other | |
4468 return values are: | |
4469 | |
4470 Z_STREAM_END EOF from compressor | |
4471 Z_BUF_ERROR No progress possible (should never happen) | |
4472 Z_STREAM_ERROR (should never happen) | |
4473 */ | |
4474 while (data->stream.avail_in > 0 || str->eof) | |
4475 { | |
4476 /* Reserve an output buffer of the same size as the input buffer; | |
4477 if that's not enough, we keep reserving the same size. */ | |
4478 Bytecount reserved = n; | |
4479 Dynarr_add_many (dst, 0, reserved); | |
4480 /* Careful here! Don't retrieve the pointer until after | |
4481 reserving the space, or it might be bogus */ | |
4482 data->stream.next_out = | |
4483 Dynarr_atp (dst, Dynarr_length (dst) - reserved); | |
4484 data->stream.avail_out = reserved; | |
4485 zerr = | |
4486 deflate (&data->stream, | |
4487 str->eof ? Z_FINISH : Z_NO_FLUSH); | |
4488 /* Lop off the unused portion */ | |
4489 Dynarr_set_size (dst, Dynarr_length (dst) - data->stream.avail_out); | |
4490 if (zerr != Z_OK) | |
4491 break; | |
4492 } | |
4493 | |
4494 if ((Bytecount) data->stream.avail_in < n) | |
4495 return n - data->stream.avail_in; | |
4496 | |
4497 if (zerr == Z_OK || zerr == Z_STREAM_END) | |
4498 return 0; | |
4499 | |
4500 return LSTREAM_ERROR; | |
428 | 4501 } |
4502 } | |
4503 | |
771 | 4504 #endif /* HAVE_ZLIB */ |
428 | 4505 |
4506 | |
4507 /************************************************************************/ | |
4508 /* Initialization */ | |
4509 /************************************************************************/ | |
4510 | |
4511 void | |
4512 syms_of_file_coding (void) | |
4513 { | |
5117
3742ea8250b5
Checking in final CVS version of workspace 'ben-lisp-object'
Ben Wing <ben@xemacs.org>
parents:
3025
diff
changeset
|
4514 INIT_LISP_OBJECT (coding_system); |
442 | 4515 |
771 | 4516 DEFSUBR (Fvalid_coding_system_type_p); |
4517 DEFSUBR (Fcoding_system_type_list); | |
428 | 4518 DEFSUBR (Fcoding_system_p); |
4303 | 4519 DEFSUBR (Fautoload_coding_system); |
428 | 4520 DEFSUBR (Ffind_coding_system); |
4521 DEFSUBR (Fget_coding_system); | |
4522 DEFSUBR (Fcoding_system_list); | |
4523 DEFSUBR (Fcoding_system_name); | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4524 DEFSUBR (Fmake_coding_system_internal); |
428 | 4525 DEFSUBR (Fcopy_coding_system); |
440 | 4526 DEFSUBR (Fcoding_system_canonical_name_p); |
4527 DEFSUBR (Fcoding_system_alias_p); | |
4528 DEFSUBR (Fcoding_system_aliasee); | |
428 | 4529 DEFSUBR (Fdefine_coding_system_alias); |
4530 DEFSUBR (Fsubsidiary_coding_system); | |
771 | 4531 DEFSUBR (Fcoding_system_base); |
4532 DEFSUBR (Fcoding_system_used_for_io); | |
428 | 4533 |
4534 DEFSUBR (Fcoding_system_type); | |
771 | 4535 DEFSUBR (Fcoding_system_description); |
428 | 4536 DEFSUBR (Fcoding_system_property); |
4537 | |
4538 DEFSUBR (Fcoding_category_list); | |
4539 DEFSUBR (Fset_coding_priority_list); | |
4540 DEFSUBR (Fcoding_priority_list); | |
4541 DEFSUBR (Fset_coding_category_system); | |
4542 DEFSUBR (Fcoding_category_system); | |
4543 | |
4544 DEFSUBR (Fdetect_coding_region); | |
4545 DEFSUBR (Fdecode_coding_region); | |
4546 DEFSUBR (Fencode_coding_region); | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4547 DEFSUBR (Fquery_coding_region); |
563 | 4548 DEFSYMBOL_MULTIWORD_PREDICATE (Qcoding_systemp); |
4549 DEFSYMBOL (Qno_conversion); | |
771 | 4550 DEFSYMBOL (Qconvert_eol); |
4551 DEFSYMBOL (Qconvert_eol_autodetect); | |
4552 DEFSYMBOL (Qconvert_eol_lf); | |
4553 DEFSYMBOL (Qconvert_eol_cr); | |
4554 DEFSYMBOL (Qconvert_eol_crlf); | |
563 | 4555 DEFSYMBOL (Qraw_text); |
771 | 4556 |
563 | 4557 DEFSYMBOL (Qmnemonic); |
4558 DEFSYMBOL (Qeol_type); | |
4559 DEFSYMBOL (Qpost_read_conversion); | |
4560 DEFSYMBOL (Qpre_write_conversion); | |
4561 | |
771 | 4562 DEFSYMBOL (Qtranslation_table_for_decode); |
4563 DEFSYMBOL (Qtranslation_table_for_encode); | |
4564 DEFSYMBOL (Qsafe_chars); | |
4565 DEFSYMBOL (Qsafe_charsets); | |
4566 DEFSYMBOL (Qmime_charset); | |
4567 DEFSYMBOL (Qvalid_codes); | |
4568 | |
563 | 4569 DEFSYMBOL (Qcr); |
4570 DEFSYMBOL (Qlf); | |
4571 DEFSYMBOL (Qcrlf); | |
4572 DEFSYMBOL (Qeol_cr); | |
4573 DEFSYMBOL (Qeol_lf); | |
4574 DEFSYMBOL (Qeol_crlf); | |
4575 DEFSYMBOL (Qencode); | |
4576 DEFSYMBOL (Qdecode); | |
428 | 4577 |
771 | 4578 DEFSYMBOL (Qnear_certainty); |
4579 DEFSYMBOL (Qquite_probable); | |
4580 DEFSYMBOL (Qsomewhat_likely); | |
1494 | 4581 DEFSYMBOL (Qslightly_likely); |
771 | 4582 DEFSYMBOL (Qas_likely_as_unlikely); |
4583 DEFSYMBOL (Qsomewhat_unlikely); | |
4584 DEFSYMBOL (Qquite_improbable); | |
4585 DEFSYMBOL (Qnearly_impossible); | |
4586 | |
4587 DEFSYMBOL (Qdo_eol); | |
4588 DEFSYMBOL (Qdo_coding); | |
4589 | |
4590 DEFSYMBOL (Qcanonicalize_after_coding); | |
4591 | |
4303 | 4592 DEFSYMBOL (Qposix_charset_to_coding_system_hash); |
4593 | |
771 | 4594 DEFSYMBOL (Qescape_quoted); |
4595 | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4596 DEFSYMBOL (Qquery_coding_warning_face); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4597 DEFSYMBOL (Qaliases); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4598 DEFSYMBOL (Qcharset_skip_chars_string); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4599 |
771 | 4600 #ifdef HAVE_ZLIB |
4601 DEFSYMBOL (Qgzip); | |
4602 #endif | |
4603 | |
428 | 4604 } |
4605 | |
4606 void | |
4607 lstream_type_create_file_coding (void) | |
4608 { | |
771 | 4609 LSTREAM_HAS_METHOD (coding, reader); |
4610 LSTREAM_HAS_METHOD (coding, writer); | |
4611 LSTREAM_HAS_METHOD (coding, rewinder); | |
4612 LSTREAM_HAS_METHOD (coding, seekable_p); | |
4613 LSTREAM_HAS_METHOD (coding, marker); | |
4614 LSTREAM_HAS_METHOD (coding, flusher); | |
4615 LSTREAM_HAS_METHOD (coding, closer); | |
4616 LSTREAM_HAS_METHOD (coding, finalizer); | |
4617 } | |
4618 | |
4619 void | |
4620 coding_system_type_create (void) | |
4621 { | |
4622 int i; | |
4623 | |
4624 staticpro (&Vcoding_system_hash_table); | |
4625 Vcoding_system_hash_table = | |
4626 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ); | |
4627 | |
4628 the_coding_system_type_entry_dynarr = Dynarr_new (coding_system_type_entry); | |
2367 | 4629 dump_add_root_block_ptr (&the_coding_system_type_entry_dynarr, |
771 | 4630 &csted_description); |
4631 | |
4632 Vcoding_system_type_list = Qnil; | |
4633 staticpro (&Vcoding_system_type_list); | |
4634 | |
4635 /* Initialize to something reasonable ... */ | |
4636 for (i = 0; i < MAX_DETECTOR_CATEGORIES; i++) | |
4637 { | |
4638 coding_category_system[i] = Qnil; | |
1204 | 4639 dump_add_root_lisp_object (&coding_category_system[i]); |
771 | 4640 coding_category_by_priority[i] = i; |
4641 } | |
4642 | |
4643 dump_add_opaque (coding_category_by_priority, | |
4644 sizeof (coding_category_by_priority)); | |
4645 | |
4646 all_coding_detectors = Dynarr_new2 (detector_dynarr, struct detector); | |
2367 | 4647 dump_add_root_block_ptr (&all_coding_detectors, |
771 | 4648 &detector_dynarr_description); |
4649 | |
4650 dump_add_opaque_int (&coding_system_tick); | |
4651 dump_add_opaque_int (&coding_detector_count); | |
4652 dump_add_opaque_int (&coding_detector_category_count); | |
4653 | |
4654 INITIALIZE_CODING_SYSTEM_TYPE (no_conversion, | |
4655 "no-conversion-coding-system-p"); | |
4656 CODING_SYSTEM_HAS_METHOD (no_conversion, convert); | |
4657 | |
4658 INITIALIZE_DETECTOR (no_conversion); | |
4659 DETECTOR_HAS_METHOD (no_conversion, detect); | |
4660 INITIALIZE_DETECTOR_CATEGORY (no_conversion, no_conversion); | |
4661 | |
4662 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (convert_eol, | |
4663 "convert-eol-coding-system-p"); | |
4664 CODING_SYSTEM_HAS_METHOD (convert_eol, print); | |
4665 CODING_SYSTEM_HAS_METHOD (convert_eol, convert); | |
4666 CODING_SYSTEM_HAS_METHOD (convert_eol, getprop); | |
4667 CODING_SYSTEM_HAS_METHOD (convert_eol, putprop); | |
4668 CODING_SYSTEM_HAS_METHOD (convert_eol, conversion_end_type); | |
4669 CODING_SYSTEM_HAS_METHOD (convert_eol, canonicalize_after_coding); | |
4670 CODING_SYSTEM_HAS_METHOD (convert_eol, init_coding_stream); | |
4671 | |
4672 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (undecided, | |
4673 "undecided-coding-system-p"); | |
4674 CODING_SYSTEM_HAS_METHOD (undecided, init); | |
4675 CODING_SYSTEM_HAS_METHOD (undecided, mark); | |
4676 CODING_SYSTEM_HAS_METHOD (undecided, print); | |
4677 CODING_SYSTEM_HAS_METHOD (undecided, convert); | |
4678 CODING_SYSTEM_HAS_METHOD (undecided, putprop); | |
4679 CODING_SYSTEM_HAS_METHOD (undecided, getprop); | |
4680 CODING_SYSTEM_HAS_METHOD (undecided, init_coding_stream); | |
4681 CODING_SYSTEM_HAS_METHOD (undecided, rewind_coding_stream); | |
4682 CODING_SYSTEM_HAS_METHOD (undecided, finalize_coding_stream); | |
4683 CODING_SYSTEM_HAS_METHOD (undecided, mark_coding_stream); | |
4684 CODING_SYSTEM_HAS_METHOD (undecided, canonicalize); | |
4685 CODING_SYSTEM_HAS_METHOD (undecided, canonicalize_after_coding); | |
4686 | |
4687 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (chain, "chain-coding-system-p"); | |
4688 | |
4689 CODING_SYSTEM_HAS_METHOD (chain, print); | |
4690 CODING_SYSTEM_HAS_METHOD (chain, canonicalize); | |
4691 CODING_SYSTEM_HAS_METHOD (chain, init); | |
4692 CODING_SYSTEM_HAS_METHOD (chain, mark); | |
4693 CODING_SYSTEM_HAS_METHOD (chain, mark_coding_stream); | |
4694 CODING_SYSTEM_HAS_METHOD (chain, convert); | |
4695 CODING_SYSTEM_HAS_METHOD (chain, rewind_coding_stream); | |
4696 CODING_SYSTEM_HAS_METHOD (chain, finalize_coding_stream); | |
4697 CODING_SYSTEM_HAS_METHOD (chain, finalize); | |
4698 CODING_SYSTEM_HAS_METHOD (chain, putprop); | |
4699 CODING_SYSTEM_HAS_METHOD (chain, getprop); | |
4700 CODING_SYSTEM_HAS_METHOD (chain, conversion_end_type); | |
4701 CODING_SYSTEM_HAS_METHOD (chain, canonicalize_after_coding); | |
4702 | |
4703 #ifdef DEBUG_XEMACS | |
4704 INITIALIZE_CODING_SYSTEM_TYPE (internal, "internal-coding-system-p"); | |
4705 CODING_SYSTEM_HAS_METHOD (internal, convert); | |
4706 #endif | |
4707 | |
4708 #ifdef HAVE_ZLIB | |
4709 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (gzip, "gzip-coding-system-p"); | |
4710 CODING_SYSTEM_HAS_METHOD (gzip, conversion_end_type); | |
4711 CODING_SYSTEM_HAS_METHOD (gzip, convert); | |
4712 CODING_SYSTEM_HAS_METHOD (gzip, init); | |
4713 CODING_SYSTEM_HAS_METHOD (gzip, print); | |
4714 CODING_SYSTEM_HAS_METHOD (gzip, init_coding_stream); | |
4715 CODING_SYSTEM_HAS_METHOD (gzip, rewind_coding_stream); | |
4716 CODING_SYSTEM_HAS_METHOD (gzip, putprop); | |
4717 CODING_SYSTEM_HAS_METHOD (gzip, getprop); | |
4718 #endif | |
4719 } | |
4720 | |
4721 void | |
4722 reinit_coding_system_type_create (void) | |
4723 { | |
4724 REINITIALIZE_CODING_SYSTEM_TYPE (no_conversion); | |
4725 REINITIALIZE_CODING_SYSTEM_TYPE (convert_eol); | |
4726 REINITIALIZE_CODING_SYSTEM_TYPE (undecided); | |
4727 REINITIALIZE_CODING_SYSTEM_TYPE (chain); | |
4728 #if 0 | |
4729 REINITIALIZE_CODING_SYSTEM_TYPE (text_file_wrapper); | |
4730 #endif /* 0 */ | |
4731 #ifdef DEBUG_XEMACS | |
4732 REINITIALIZE_CODING_SYSTEM_TYPE (internal); | |
4733 #endif | |
4734 #ifdef HAVE_ZLIB | |
4735 REINITIALIZE_CODING_SYSTEM_TYPE (gzip); | |
4736 #endif | |
4737 } | |
4738 | |
4739 void | |
4740 reinit_vars_of_file_coding (void) | |
4741 { | |
428 | 4742 } |
4743 | |
4744 void | |
4745 vars_of_file_coding (void) | |
4746 { | |
771 | 4747 /* We always have file-coding support */ |
428 | 4748 Fprovide (intern ("file-coding")); |
4749 | |
1347 | 4750 QScoding_system_cookie = build_string (";;;###coding system: "); |
4751 staticpro (&QScoding_system_cookie); | |
4752 | |
1242 | 4753 #ifdef HAVE_DEFAULT_EOL_DETECTION |
2297 | 4754 /* #### Find a more appropriate place for this comment. |
4755 WARNING: The existing categories are intimately tied to the function | |
1242 | 4756 `coding-system-category' in coding.el. If you change a category, or |
4757 change the layout of any coding system associated with a category, you | |
4758 need to check that function and make sure it's written properly. */ | |
4759 | |
4760 Fprovide (intern ("unix-default-eol-detection")); | |
4761 #endif | |
4762 | |
428 | 4763 DEFVAR_LISP ("keyboard-coding-system", &Vkeyboard_coding_system /* |
3142 | 4764 Default coding system used for TTY and X11 keyboard input. |
4765 Under X11, used only to interpet the character for a key event when that | |
4766 event has a KeySym of NoSymbol but does have an associated string keysym, | |
4767 something that's seen with input methods. | |
4768 | |
4769 If you need to set these things to different coding systems, call the | |
4770 function `set-console-tty-coding-system' for the TTY and use this variable | |
4771 for X11. | |
428 | 4772 */ ); |
4773 Vkeyboard_coding_system = Qnil; | |
4774 | |
4775 DEFVAR_LISP ("terminal-coding-system", &Vterminal_coding_system /* | |
4776 Coding system used for TTY display output. | |
4777 Not used under a windowing system. | |
4778 */ ); | |
4779 Vterminal_coding_system = Qnil; | |
4780 | |
4781 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read /* | |
440 | 4782 Overriding coding system used when reading from a file or process. |
4783 You should bind this variable with `let', but do not set it globally. | |
4784 If this is non-nil, it specifies the coding system that will be used | |
4785 to decode input on read operations, such as from a file or process. | |
4786 It overrides `buffer-file-coding-system-for-read', | |
428 | 4787 `insert-file-contents-pre-hook', etc. Use those variables instead of |
440 | 4788 this one for permanent changes to the environment. */ ); |
428 | 4789 Vcoding_system_for_read = Qnil; |
4790 | |
4791 DEFVAR_LISP ("coding-system-for-write", | |
4792 &Vcoding_system_for_write /* | |
440 | 4793 Overriding coding system used when writing to a file or process. |
4794 You should bind this variable with `let', but do not set it globally. | |
4795 If this is non-nil, it specifies the coding system that will be used | |
4796 to encode output for write operations, such as to a file or process. | |
4797 It overrides `buffer-file-coding-system', `write-region-pre-hook', etc. | |
4798 Use those variables instead of this one for permanent changes to the | |
4799 environment. */ ); | |
428 | 4800 Vcoding_system_for_write = Qnil; |
4801 | |
4802 DEFVAR_LISP ("file-name-coding-system", &Vfile_name_coding_system /* | |
4803 Coding system used to convert pathnames when accessing files. | |
4804 */ ); | |
4805 Vfile_name_coding_system = Qnil; | |
4806 | |
4807 DEFVAR_BOOL ("enable-multibyte-characters", &enable_multibyte_characters /* | |
771 | 4808 Setting this has no effect. It is purely for FSF compatibility. |
428 | 4809 */ ); |
4810 enable_multibyte_characters = 1; | |
771 | 4811 |
4812 Vchain_canonicalize_hash_table = | |
4813 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL); | |
4814 staticpro (&Vchain_canonicalize_hash_table); | |
4815 | |
4816 #ifdef DEBUG_XEMACS | |
4817 DEFVAR_LISP ("debug-coding-detection", &Vdebug_coding_detection /* | |
4818 If non-nil, display debug information about detection operations in progress. | |
4819 Information is displayed on stderr. | |
4820 */ ); | |
4821 Vdebug_coding_detection = Qnil; | |
4822 #endif | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4823 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4824 #ifdef MULE |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4825 Vdefault_query_coding_region_chartab_cache |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4826 = make_lisp_hash_table (25, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4827 staticpro (&Vdefault_query_coding_region_chartab_cache); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4828 #endif |
428 | 4829 } |
4830 | |
2297 | 4831 /* #### reformat this for consistent appearance? */ |
4832 | |
428 | 4833 void |
4834 complex_vars_of_file_coding (void) | |
4835 { | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4836 Fmake_coding_system_internal |
771 | 4837 (Qconvert_eol_cr, Qconvert_eol, |
4838 build_msg_string ("Convert CR to LF"), | |
4839 nconc2 (list6 (Qdocumentation, | |
4840 build_msg_string ( | |
4841 "Converts CR (used to mark the end of a line on Macintosh systems) to LF\n" | |
4842 "(used internally and under Unix to mark the end of a line)."), | |
4843 Qmnemonic, build_string ("CR->LF"), | |
4844 Qsubtype, Qcr), | |
4845 /* VERY IMPORTANT! Tell make-coding-system not to generate | |
4846 subsidiaries -- it needs the coding systems we're creating | |
4847 to do so! */ | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4848 list4 (Qeol_type, Qlf, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4849 Qsafe_charsets, Qt))); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4850 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4851 Fmake_coding_system_internal |
771 | 4852 (Qconvert_eol_lf, Qconvert_eol, |
4853 build_msg_string ("Convert LF to LF (do nothing)"), | |
4854 nconc2 (list6 (Qdocumentation, | |
4855 build_msg_string ( | |
4856 "Do nothing."), | |
4857 Qmnemonic, build_string ("LF->LF"), | |
4858 Qsubtype, Qlf), | |
4859 /* VERY IMPORTANT! Tell make-coding-system not to generate | |
4860 subsidiaries -- it needs the coding systems we're creating | |
4861 to do so! */ | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4862 list4 (Qeol_type, Qlf, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4863 Qsafe_charsets, Qt))); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4864 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4865 Fmake_coding_system_internal |
771 | 4866 (Qconvert_eol_crlf, Qconvert_eol, |
4867 build_msg_string ("Convert CRLF to LF"), | |
4868 nconc2 (list6 (Qdocumentation, | |
4869 build_msg_string ( | |
4870 "Converts CR+LF (used to mark the end of a line on Macintosh systems) to LF\n" | |
4871 "(used internally and under Unix to mark the end of a line)."), | |
4872 Qmnemonic, build_string ("CRLF->LF"), | |
4873 Qsubtype, Qcrlf), | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4874 |
771 | 4875 /* VERY IMPORTANT! Tell make-coding-system not to generate |
4876 subsidiaries -- it needs the coding systems we're creating | |
4877 to do so! */ | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4878 list4 (Qeol_type, Qlf, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4879 Qsafe_charsets, Qt))); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4880 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4881 Fmake_coding_system_internal |
771 | 4882 (Qconvert_eol_autodetect, Qconvert_eol, |
4883 build_msg_string ("Autodetect EOL type"), | |
4884 nconc2 (list6 (Qdocumentation, | |
4885 build_msg_string ( | |
4886 "Autodetect the end-of-line type."), | |
4887 Qmnemonic, build_string ("Auto-EOL"), | |
793 | 4888 Qsubtype, Qnil), |
771 | 4889 /* VERY IMPORTANT! Tell make-coding-system not to generate |
4890 subsidiaries -- it needs the coding systems we're creating | |
4891 to do so! */ | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4892 list4 (Qeol_type, Qlf, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4893 Qsafe_charsets, Qt))); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4894 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4895 Fmake_coding_system_internal |
771 | 4896 (Qundecided, Qundecided, |
4897 build_msg_string ("Undecided (auto-detect)"), | |
4898 nconc2 (list4 (Qdocumentation, | |
4899 build_msg_string | |
4900 ("Automatically detects the correct encoding."), | |
4901 Qmnemonic, build_string ("Auto")), | |
4902 list6 (Qdo_eol, Qt, Qdo_coding, Qt, | |
4903 /* We do EOL detection ourselves so we don't need to be | |
4904 wrapped in an EOL detector. (It doesn't actually hurt, | |
4905 though, I don't think.) */ | |
4906 Qeol_type, Qlf))); | |
4907 | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4908 Fmake_coding_system_internal |
771 | 4909 (intern ("undecided-dos"), Qundecided, |
4910 build_msg_string ("Undecided (auto-detect) (CRLF)"), | |
4911 nconc2 (list4 (Qdocumentation, | |
4912 build_msg_string | |
4913 ("Automatically detects the correct encoding; EOL type of CRLF forced."), | |
4914 Qmnemonic, build_string ("Auto")), | |
4915 list4 (Qdo_coding, Qt, | |
4916 Qeol_type, Qcrlf))); | |
4917 | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4918 Fmake_coding_system_internal |
771 | 4919 (intern ("undecided-unix"), Qundecided, |
4920 build_msg_string ("Undecided (auto-detect) (LF)"), | |
4921 nconc2 (list4 (Qdocumentation, | |
4922 build_msg_string | |
4923 ("Automatically detects the correct encoding; EOL type of LF forced."), | |
4924 Qmnemonic, build_string ("Auto")), | |
4925 list4 (Qdo_coding, Qt, | |
4926 Qeol_type, Qlf))); | |
4927 | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4928 Fmake_coding_system_internal |
771 | 4929 (intern ("undecided-mac"), Qundecided, |
4930 build_msg_string ("Undecided (auto-detect) (CR)"), | |
4931 nconc2 (list4 (Qdocumentation, | |
4932 build_msg_string | |
4933 ("Automatically detects the correct encoding; EOL type of CR forced."), | |
4934 Qmnemonic, build_string ("Auto")), | |
4935 list4 (Qdo_coding, Qt, | |
4936 Qeol_type, Qcr))); | |
4937 | |
428 | 4938 /* Need to create this here or we're really screwed. */ |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4939 Fmake_coding_system_internal |
428 | 4940 (Qraw_text, Qno_conversion, |
771 | 4941 build_msg_string ("Raw Text"), |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4942 nconc2 (list4 (Qdocumentation, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4943 build_msg_string ("Raw text converts only line-break " |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4944 "codes, and acts otherwise like " |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4945 "`binary'."), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4946 Qmnemonic, build_string ("Raw")), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4947 #ifdef MULE |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4948 list2 (Qsafe_charsets, list3 (Vcharset_ascii, Vcharset_control_1, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4949 Vcharset_latin_iso8859_1)))); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4950 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4951 #else |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4952 Qnil)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4953 #endif |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4954 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4955 Fmake_coding_system_internal |
428 | 4956 (Qbinary, Qno_conversion, |
771 | 4957 build_msg_string ("Binary"), |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4958 nconc2 (list6 (Qdocumentation, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4959 build_msg_string ( |
771 | 4960 "This coding system is as close as it comes to doing no conversion.\n" |
4961 "On input, each byte is converted directly into the character\n" | |
4962 "with the corresponding code -- i.e. from the `ascii', `control-1',\n" | |
4963 "or `latin-1' character sets. On output, these characters are\n" | |
4964 "converted back to the corresponding bytes, and other characters\n" | |
4965 "are converted to the default character, i.e. `~'."), | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4966 Qeol_type, Qlf, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4967 Qmnemonic, build_string ("Binary")), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4968 #ifdef MULE |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4969 list2 (Qsafe_charsets, list3 (Vcharset_ascii, Vcharset_control_1, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4970 Vcharset_latin_iso8859_1)))); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4971 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4972 #else |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4973 Qnil)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4974 #endif |
428 | 4975 |
771 | 4976 /* Formerly aliased to raw-text! Completely bogus and not even the same |
4977 as FSF Emacs. */ | |
4978 Fdefine_coding_system_alias (Qno_conversion, Qbinary); | |
4979 Fdefine_coding_system_alias (intern ("no-conversion-unix"), | |
4980 intern ("raw-text-unix")); | |
4981 Fdefine_coding_system_alias (intern ("no-conversion-dos"), | |
4982 intern ("raw-text-dos")); | |
4983 Fdefine_coding_system_alias (intern ("no-conversion-mac"), | |
4984 intern ("raw-text-mac")); | |
4985 | |
1318 | 4986 /* These three below will get their defaults set correctly |
4987 in code-init.el. We init them now so we can handle stuff at dump | |
771 | 4988 time before we get to code-init.el. */ |
1318 | 4989 Fdefine_coding_system_alias (Qnative, Qbinary); |
440 | 4990 Fdefine_coding_system_alias (Qterminal, Qbinary); |
4991 Fdefine_coding_system_alias (Qkeyboard, Qbinary); | |
4992 | |
1318 | 4993 Fdefine_coding_system_alias (Qfile_name, Qnative); |
771 | 4994 Fdefine_coding_system_alias (Qidentity, Qconvert_eol_lf); |
4995 | |
428 | 4996 /* Need this for bootstrapping */ |
771 | 4997 coding_category_system[detector_category_no_conversion] = |
428 | 4998 Fget_coding_system (Qraw_text); |
4999 } |