Mercurial > hg > xemacs-beta
annotate src/file-coding.c @ 5699:a95c89dc9695
Reorganize and update INSTALL.
author | Stephen J. Turnbull <stephen@xemacs.org> |
---|---|
date | Mon, 24 Dec 2012 04:16:31 +0900 |
parents | dae33b5feffe |
children | 7a538e1a4676 |
rev | line source |
---|---|
771 | 1 /* Text encoding conversion functions; coding-system object. |
2 #### rename me to coding-system.c or coding.c | |
428 | 3 Copyright (C) 1991, 1995 Free Software Foundation, Inc. |
4 Copyright (C) 1995 Sun Microsystems, Inc. | |
5127
a9c41067dd88
more cleanups, terminology clarification, lots of doc work
Ben Wing <ben@xemacs.org>
parents:
5126
diff
changeset
|
5 Copyright (C) 2000, 2001, 2002, 2003, 2005, 2010 Ben Wing. |
428 | 6 |
7 This file is part of XEmacs. | |
8 | |
5402
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
5191
diff
changeset
|
9 XEmacs is free software: you can redistribute it and/or modify it |
428 | 10 under the terms of the GNU General Public License as published by the |
5402
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
5191
diff
changeset
|
11 Free Software Foundation, either version 3 of the License, or (at your |
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
5191
diff
changeset
|
12 option) any later version. |
428 | 13 |
14 XEmacs is distributed in the hope that it will be useful, but WITHOUT | |
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
17 for more details. | |
18 | |
19 You should have received a copy of the GNU General Public License | |
5402
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
5191
diff
changeset
|
20 along with XEmacs. If not, see <http://www.gnu.org/licenses/>. */ |
428 | 21 |
771 | 22 /* Synched up with: Not in FSF. */ |
23 | |
24 /* Authorship: | |
25 | |
26 Current primary author: Ben Wing <ben@xemacs.org> | |
27 | |
28 Rewritten by Ben Wing <ben@xemacs.org>, based originally on coding.c | |
29 from Mule 2.? but probably does not share one line of code with that | |
30 original source. Rewriting work started around Dec. 1994. or Jan. 1995. | |
31 Proceeded in earnest till Nov. 1995. | |
32 | |
33 Around Feb. 17, 1998, Andy Piper renamed what was then mule-coding.c to | |
34 file-coding.c, with the intention of using it to do end-of-line conversion | |
35 on non-MULE machines (specifically, on Windows machines). He separated | |
36 out the MULE stuff from non-MULE using ifdef's, and searched throughout | |
37 the rest of the source tree looking for coding-system-related code that | |
38 was ifdef MULE but should be ifdef HAVE_CODING_SYSTEMS. | |
39 | |
40 Sept. 4 - 8, 1998, Tomohiko Morioka added the UCS_4 and UTF_8 coding system | |
41 types, providing a primitive means of decoding and encoding externally- | |
42 formatted Unicode/UCS_4 and Unicode/UTF_8 data. | |
43 | |
44 January 25, 2000, Martin Buchholz redid and fleshed out the coding | |
45 system alias handling that was first added in prototype form by | |
46 Hrjove Niksic, April 15, 1999. | |
47 | |
48 April to May 2000, Ben Wing: More major reorganization. Adding features | |
49 needed for MS Windows (multibyte, unicode, unicode-to-multibyte), the | |
50 "chain" coding system for chaining two together, and doing a lot of | |
51 reorganization in preparation for properly abstracting out the different | |
52 coding system types. | |
53 | |
54 June 2001, Ben Wing: Added Unicode support. Eliminated previous | |
55 junky Unicode translation support. | |
56 | |
57 August 2001, Ben Wing: Moved Unicode support to unicode.c. Finished | |
58 abstracting everything except detection, which is hard to abstract (see | |
59 just below). | |
60 | |
61 September 2001, Ben Wing: Moved Mule code to mule-coding.c, Windows code | |
62 to intl-win32.c. Lots more rewriting; very little code is untouched | |
63 from before April 2000. Abstracted the detection code, added multiple | |
64 levels of likelihood to increase the reliability of the algorithm. | |
65 | |
66 October 2001, Ben Wing: HAVE_CODING_SYSTEMS is always now defined. | |
67 Removed the conditionals. | |
68 */ | |
69 | |
428 | 70 #include <config.h> |
71 #include "lisp.h" | |
72 | |
73 #include "buffer.h" | |
74 #include "elhash.h" | |
75 #include "insdel.h" | |
76 #include "lstream.h" | |
440 | 77 #include "opaque.h" |
771 | 78 #include "file-coding.h" |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
79 #include "extents.h" |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
80 #include "rangetab.h" |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
81 #include "chartab.h" |
5682
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
82 #include "sysfile.h" |
771 | 83 |
84 #ifdef HAVE_ZLIB | |
85 #include "zlib.h" | |
428 | 86 #endif |
87 | |
88 Lisp_Object Vkeyboard_coding_system; | |
89 Lisp_Object Vterminal_coding_system; | |
90 Lisp_Object Vcoding_system_for_read; | |
91 Lisp_Object Vcoding_system_for_write; | |
92 Lisp_Object Vfile_name_coding_system; | |
93 | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
94 Lisp_Object Qaliases, Qcharset_skip_chars_string; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
95 |
771 | 96 #ifdef DEBUG_XEMACS |
97 Lisp_Object Vdebug_coding_detection; | |
440 | 98 #endif |
771 | 99 |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
100 #ifdef MULE |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
101 extern Lisp_Object Vcharset_ascii, Vcharset_control_1, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
102 Vcharset_latin_iso8859_1; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
103 #endif |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
104 |
771 | 105 typedef struct coding_system_type_entry |
106 { | |
107 struct coding_system_methods *meths; | |
108 } coding_system_type_entry; | |
109 | |
110 typedef struct | |
111 { | |
112 Dynarr_declare (coding_system_type_entry); | |
113 } coding_system_type_entry_dynarr; | |
114 | |
115 static coding_system_type_entry_dynarr *the_coding_system_type_entry_dynarr; | |
116 | |
1204 | 117 static const struct memory_description cste_description_1[] = { |
2551 | 118 { XD_BLOCK_PTR, offsetof (coding_system_type_entry, meths), 1, |
119 { &coding_system_methods_description } }, | |
771 | 120 { XD_END } |
121 }; | |
122 | |
1204 | 123 static const struct sized_memory_description cste_description = { |
771 | 124 sizeof (coding_system_type_entry), |
125 cste_description_1 | |
126 }; | |
127 | |
1204 | 128 static const struct memory_description csted_description_1[] = { |
771 | 129 XD_DYNARR_DESC (coding_system_type_entry_dynarr, &cste_description), |
428 | 130 { XD_END } |
131 }; | |
132 | |
1204 | 133 static const struct sized_memory_description csted_description = { |
771 | 134 sizeof (coding_system_type_entry_dynarr), |
135 csted_description_1 | |
136 }; | |
137 | |
138 static Lisp_Object Vcoding_system_type_list; | |
139 | |
140 /* Coding system currently associated with each coding category. */ | |
141 Lisp_Object coding_category_system[MAX_DETECTOR_CATEGORIES]; | |
142 | |
143 /* Table of all coding categories in decreasing order of priority. | |
144 This describes a permutation of the possible coding categories. */ | |
145 int coding_category_by_priority[MAX_DETECTOR_CATEGORIES]; | |
146 | |
147 /* Value used with to give a unique name to nameless coding systems */ | |
148 int coding_system_tick; | |
149 | |
150 int coding_detector_count; | |
151 int coding_detector_category_count; | |
152 | |
153 detector_dynarr *all_coding_detectors; | |
154 | |
1204 | 155 static const struct memory_description struct_detector_category_description_1[] |
771 | 156 = |
157 { | |
158 { XD_LISP_OBJECT, offsetof (struct detector_category, sym) }, | |
159 { XD_END } | |
160 }; | |
161 | |
1204 | 162 static const struct sized_memory_description struct_detector_category_description = |
771 | 163 { |
164 sizeof (struct detector_category), | |
165 struct_detector_category_description_1 | |
428 | 166 }; |
167 | |
1204 | 168 static const struct memory_description detector_category_dynarr_description_1[] = |
771 | 169 { |
170 XD_DYNARR_DESC (detector_category_dynarr, | |
171 &struct_detector_category_description), | |
172 { XD_END } | |
173 }; | |
174 | |
1204 | 175 static const struct sized_memory_description detector_category_dynarr_description = { |
771 | 176 sizeof (detector_category_dynarr), |
177 detector_category_dynarr_description_1 | |
178 }; | |
179 | |
1204 | 180 static const struct memory_description struct_detector_description_1[] |
771 | 181 = |
182 { | |
2367 | 183 { XD_BLOCK_PTR, offsetof (struct detector, cats), 1, |
2551 | 184 { &detector_category_dynarr_description } }, |
771 | 185 { XD_END } |
186 }; | |
187 | |
1204 | 188 static const struct sized_memory_description struct_detector_description = |
771 | 189 { |
190 sizeof (struct detector), | |
191 struct_detector_description_1 | |
192 }; | |
193 | |
1204 | 194 static const struct memory_description detector_dynarr_description_1[] = |
771 | 195 { |
196 XD_DYNARR_DESC (detector_dynarr, &struct_detector_description), | |
197 { XD_END } | |
198 }; | |
199 | |
1204 | 200 static const struct sized_memory_description detector_dynarr_description = { |
771 | 201 sizeof (detector_dynarr), |
202 detector_dynarr_description_1 | |
203 }; | |
428 | 204 |
205 Lisp_Object Qcoding_systemp; | |
206 | |
771 | 207 Lisp_Object Qraw_text; |
428 | 208 |
209 Lisp_Object Qmnemonic, Qeol_type; | |
210 Lisp_Object Qcr, Qcrlf, Qlf; | |
211 Lisp_Object Qeol_cr, Qeol_crlf, Qeol_lf; | |
212 Lisp_Object Qpost_read_conversion; | |
213 Lisp_Object Qpre_write_conversion; | |
214 | |
771 | 215 Lisp_Object Qtranslation_table_for_decode; |
216 Lisp_Object Qtranslation_table_for_encode; | |
217 Lisp_Object Qsafe_chars; | |
218 Lisp_Object Qsafe_charsets; | |
219 Lisp_Object Qmime_charset; | |
220 Lisp_Object Qvalid_codes; | |
221 | |
222 Lisp_Object Qno_conversion; | |
223 Lisp_Object Qconvert_eol; | |
440 | 224 Lisp_Object Qescape_quoted; |
771 | 225 Lisp_Object Qencode, Qdecode; |
226 | |
227 Lisp_Object Qconvert_eol_lf, Qconvert_eol_cr, Qconvert_eol_crlf; | |
228 Lisp_Object Qconvert_eol_autodetect; | |
229 | |
230 Lisp_Object Qnear_certainty, Qquite_probable, Qsomewhat_likely; | |
1494 | 231 Lisp_Object Qslightly_likely; |
771 | 232 Lisp_Object Qas_likely_as_unlikely, Qsomewhat_unlikely, Qquite_improbable; |
233 Lisp_Object Qnearly_impossible; | |
234 | |
235 Lisp_Object Qdo_eol, Qdo_coding; | |
236 | |
237 Lisp_Object Qcanonicalize_after_coding; | |
238 | |
1347 | 239 Lisp_Object QScoding_system_cookie; |
240 | |
4303 | 241 Lisp_Object Qposix_charset_to_coding_system_hash; |
242 | |
771 | 243 /* This is used to convert autodetected coding systems into existing |
244 systems. For example, the chain undecided->convert-eol-autodetect may | |
245 have its separate parts detected as mswindows-multibyte and | |
246 convert-eol-crlf, and the result needs to be mapped to | |
247 mswindows-multibyte-dos. */ | |
248 /* #### It's not clear we need this whole chain-canonicalize mechanism | |
249 any more. */ | |
250 static Lisp_Object Vchain_canonicalize_hash_table; | |
251 | |
252 #ifdef HAVE_ZLIB | |
253 Lisp_Object Qgzip; | |
428 | 254 #endif |
771 | 255 |
2297 | 256 /* Maps symbols (coding system names) to either coding system objects or |
257 (for aliases) other names. */ | |
771 | 258 static Lisp_Object Vcoding_system_hash_table; |
428 | 259 |
260 int enable_multibyte_characters; | |
261 | |
262 EXFUN (Fcopy_coding_system, 2); | |
263 | |
264 | |
265 /************************************************************************/ | |
771 | 266 /* Coding system object methods */ |
428 | 267 /************************************************************************/ |
268 | |
269 static Lisp_Object | |
270 mark_coding_system (Lisp_Object obj) | |
271 { | |
272 Lisp_Coding_System *codesys = XCODING_SYSTEM (obj); | |
273 | |
1204 | 274 #define MARKED_SLOT(x) mark_object (codesys->x); |
275 #include "coding-system-slots.h" | |
771 | 276 |
277 MAYBE_CODESYSMETH (codesys, mark, (obj)); | |
428 | 278 |
1204 | 279 return Qnil; |
428 | 280 } |
281 | |
282 static void | |
771 | 283 print_coding_system_properties (Lisp_Object obj, Lisp_Object printcharfun) |
284 { | |
285 Lisp_Coding_System *c = XCODING_SYSTEM (obj); | |
286 print_internal (c->methods->type, printcharfun, 1); | |
287 MAYBE_CODESYSMETH (c, print, (obj, printcharfun, 1)); | |
288 if (CODING_SYSTEM_EOL_TYPE (c) != EOL_AUTODETECT) | |
289 write_fmt_string_lisp (printcharfun, " eol-type=%s", | |
290 1, Fcoding_system_property (obj, Qeol_type)); | |
291 } | |
292 | |
293 static void | |
428 | 294 print_coding_system (Lisp_Object obj, Lisp_Object printcharfun, |
2286 | 295 int UNUSED (escapeflag)) |
428 | 296 { |
297 Lisp_Coding_System *c = XCODING_SYSTEM (obj); | |
298 if (print_readably) | |
5142
f965e31a35f0
reduce lcrecord headers to 2 words, rename printing_unreadable_object
Ben Wing <ben@xemacs.org>
parents:
5128
diff
changeset
|
299 printing_unreadable_lisp_object (obj, 0); |
771 | 300 |
301 write_fmt_string_lisp (printcharfun, "#<coding-system %s ", 1, c->name); | |
302 print_coding_system_properties (obj, printcharfun); | |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
303 write_ascstring (printcharfun, ">"); |
428 | 304 } |
305 | |
771 | 306 /* Print an abbreviated version of a coding system (but still containing |
307 all the information), for use within a coding system print method. */ | |
308 | |
309 static void | |
310 print_coding_system_in_print_method (Lisp_Object cs, Lisp_Object printcharfun, | |
2286 | 311 int UNUSED (escapeflag)) |
771 | 312 { |
800 | 313 write_fmt_string_lisp (printcharfun, "%s[", 1, XCODING_SYSTEM_NAME (cs)); |
771 | 314 print_coding_system_properties (cs, printcharfun); |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
315 write_ascstring (printcharfun, "]"); |
771 | 316 } |
317 | |
3263 | 318 #ifndef NEW_GC |
428 | 319 static void |
5127
a9c41067dd88
more cleanups, terminology clarification, lots of doc work
Ben Wing <ben@xemacs.org>
parents:
5126
diff
changeset
|
320 finalize_coding_system (Lisp_Object obj) |
428 | 321 { |
322 /* Since coding systems never go away, this function is not | |
323 necessary. But it would be necessary if we changed things | |
324 so that coding systems could go away. */ | |
5127
a9c41067dd88
more cleanups, terminology clarification, lots of doc work
Ben Wing <ben@xemacs.org>
parents:
5126
diff
changeset
|
325 MAYBE_XCODESYSMETH (obj, finalize, (obj)); |
771 | 326 } |
3263 | 327 #endif /* not NEW_GC */ |
771 | 328 |
329 static Bytecount | |
5127
a9c41067dd88
more cleanups, terminology clarification, lots of doc work
Ben Wing <ben@xemacs.org>
parents:
5126
diff
changeset
|
330 sizeof_coding_system (Lisp_Object obj) |
771 | 331 { |
5127
a9c41067dd88
more cleanups, terminology clarification, lots of doc work
Ben Wing <ben@xemacs.org>
parents:
5126
diff
changeset
|
332 const Lisp_Coding_System *p = XCODING_SYSTEM (obj); |
771 | 333 return offsetof (Lisp_Coding_System, data) + p->methods->extra_data_size; |
428 | 334 } |
335 | |
1204 | 336 static const struct memory_description coding_system_methods_description_1[] |
771 | 337 = { |
338 { XD_LISP_OBJECT, | |
339 offsetof (struct coding_system_methods, type) }, | |
340 { XD_LISP_OBJECT, | |
341 offsetof (struct coding_system_methods, predicate_symbol) }, | |
342 { XD_END } | |
343 }; | |
344 | |
1204 | 345 const struct sized_memory_description coding_system_methods_description = { |
771 | 346 sizeof (struct coding_system_methods), |
347 coding_system_methods_description_1 | |
348 }; | |
349 | |
1204 | 350 static const struct sized_memory_description coding_system_extra_description_map[] = |
351 { | |
352 { offsetof (Lisp_Coding_System, methods) }, | |
353 { offsetof (struct coding_system_methods, extra_description) }, | |
354 { -1 }, | |
771 | 355 }; |
356 | |
1204 | 357 static const struct memory_description coding_system_description[] = |
428 | 358 { |
2367 | 359 { XD_BLOCK_PTR, offsetof (Lisp_Coding_System, methods), 1, |
2551 | 360 { &coding_system_methods_description } }, |
1204 | 361 #define MARKED_SLOT(x) { XD_LISP_OBJECT, offsetof (Lisp_Coding_System, x) }, |
362 #define MARKED_SLOT_ARRAY(slot, size) \ | |
363 { XD_LISP_OBJECT_ARRAY, offsetof (Lisp_Coding_System, slot), size }, | |
364 #include "coding-system-slots.h" | |
2367 | 365 { XD_BLOCK_ARRAY, offsetof (Lisp_Coding_System, data), 1, |
2551 | 366 { coding_system_extra_description_map } }, |
1204 | 367 { XD_END } |
771 | 368 }; |
369 | |
1204 | 370 static const struct memory_description coding_system_empty_extra_description_1[] = |
371 { | |
372 { XD_END } | |
373 }; | |
374 | |
375 const struct sized_memory_description coding_system_empty_extra_description = { | |
376 0, coding_system_empty_extra_description_1 | |
377 }; | |
378 | |
5118
e0db3c197671
merge up to latest default branch, doesn't compile yet
Ben Wing <ben@xemacs.org>
diff
changeset
|
379 DEFINE_DUMPABLE_SIZABLE_LISP_OBJECT ("coding-system", coding_system, |
5124
623d57b7fbe8
separate regular and disksave finalization, print method fixes.
Ben Wing <ben@xemacs.org>
parents:
5120
diff
changeset
|
380 mark_coding_system, |
623d57b7fbe8
separate regular and disksave finalization, print method fixes.
Ben Wing <ben@xemacs.org>
parents:
5120
diff
changeset
|
381 print_coding_system, |
5169
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5142
diff
changeset
|
382 IF_OLD_GC (finalize_coding_system), |
5124
623d57b7fbe8
separate regular and disksave finalization, print method fixes.
Ben Wing <ben@xemacs.org>
parents:
5120
diff
changeset
|
383 0, 0, coding_system_description, |
623d57b7fbe8
separate regular and disksave finalization, print method fixes.
Ben Wing <ben@xemacs.org>
parents:
5120
diff
changeset
|
384 sizeof_coding_system, |
623d57b7fbe8
separate regular and disksave finalization, print method fixes.
Ben Wing <ben@xemacs.org>
parents:
5120
diff
changeset
|
385 Lisp_Coding_System); |
771 | 386 |
387 /************************************************************************/ | |
388 /* Creating coding systems */ | |
389 /************************************************************************/ | |
390 | |
391 static struct coding_system_methods * | |
392 decode_coding_system_type (Lisp_Object type, Error_Behavior errb) | |
428 | 393 { |
771 | 394 int i; |
395 | |
396 for (i = 0; i < Dynarr_length (the_coding_system_type_entry_dynarr); i++) | |
428 | 397 { |
771 | 398 if (EQ (type, |
399 Dynarr_at (the_coding_system_type_entry_dynarr, i).meths->type)) | |
400 return Dynarr_at (the_coding_system_type_entry_dynarr, i).meths; | |
428 | 401 } |
771 | 402 |
403 maybe_invalid_constant ("Invalid coding system type", type, | |
404 Qcoding_system, errb); | |
405 | |
406 return 0; | |
428 | 407 } |
408 | |
771 | 409 static int |
410 valid_coding_system_type_p (Lisp_Object type) | |
428 | 411 { |
771 | 412 return decode_coding_system_type (type, ERROR_ME_NOT) != 0; |
413 } | |
414 | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
415 #ifdef MULE |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
416 static Lisp_Object Vdefault_query_coding_region_chartab_cache; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
417 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
418 /* Non-static because it's used in INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA. */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
419 Lisp_Object |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
420 default_query_method (Lisp_Object codesys, struct buffer *buf, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
421 Charbpos end, int flags) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
422 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
423 Charbpos pos = BUF_PT (buf), fail_range_start, fail_range_end; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
424 Charbpos pos_byte = BYTE_BUF_PT (buf); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
425 Lisp_Object safe_charsets = XCODING_SYSTEM_SAFE_CHARSETS (codesys); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
426 Lisp_Object safe_chars = XCODING_SYSTEM_SAFE_CHARS (codesys), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
427 result = Qnil; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
428 enum query_coding_failure_reasons failed_reason, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
429 previous_failed_reason = query_coding_succeeded; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
430 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
431 /* safe-charsets of t means the coding system can encode everything. */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
432 if (EQ (Qnil, safe_chars)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
433 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
434 if (EQ (Qt, safe_charsets)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
435 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
436 return Qnil; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
437 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
438 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
439 /* If we've no information on what characters the coding system can |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
440 encode, give up. */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
441 if (EQ (Qnil, safe_charsets) && EQ (Qnil, safe_chars)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
442 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
443 return Qunbound; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
444 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
445 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
446 safe_chars = Fgethash (safe_charsets, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
447 Vdefault_query_coding_region_chartab_cache, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
448 Qnil); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
449 if (NILP (safe_chars)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
450 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
451 safe_chars = Fmake_char_table (Qgeneric); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
452 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
453 EXTERNAL_LIST_LOOP_2 (safe_charset, safe_charsets) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
454 Fput_char_table (safe_charset, Qt, safe_chars); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
455 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
456 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
457 Fputhash (safe_charsets, safe_chars, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
458 Vdefault_query_coding_region_chartab_cache); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
459 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
460 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
461 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
462 if (flags & QUERY_METHOD_HIGHLIGHT && |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
463 /* If we're being called really early, live without highlights getting |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
464 cleared properly: */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
465 !(UNBOUNDP (XSYMBOL (Qquery_coding_clear_highlights)->function))) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
466 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
467 /* It's okay to call Lisp here, the only non-stack object we may have |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
468 allocated up to this point is safe_chars, and that's |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
469 reachable from its entry in |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
470 Vdefault_query_coding_region_chartab_cache */ |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5474
diff
changeset
|
471 call3 (Qquery_coding_clear_highlights, make_fixnum (pos), make_fixnum (end), |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
472 wrap_buffer (buf)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
473 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
474 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
475 while (pos < end) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
476 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
477 Ichar ch = BYTE_BUF_FETCH_CHAR (buf, pos_byte); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
478 if (!EQ (Qnil, get_char_table (ch, safe_chars))) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
479 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
480 pos++; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
481 INC_BYTEBPOS (buf, pos_byte); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
482 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
483 else |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
484 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
485 fail_range_start = pos; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
486 while ((pos < end) && |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
487 (EQ (Qnil, get_char_table (ch, safe_chars)) |
4839
5f1c6ca4d05e
fix bad change to default_query_method, rewrite conditional for more correctness
Ben Wing <ben@xemacs.org>
parents:
4830
diff
changeset
|
488 && (failed_reason = query_coding_unencodable, |
5f1c6ca4d05e
fix bad change to default_query_method, rewrite conditional for more correctness
Ben Wing <ben@xemacs.org>
parents:
4830
diff
changeset
|
489 (previous_failed_reason == query_coding_succeeded |
5f1c6ca4d05e
fix bad change to default_query_method, rewrite conditional for more correctness
Ben Wing <ben@xemacs.org>
parents:
4830
diff
changeset
|
490 || previous_failed_reason == failed_reason)))) |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
491 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
492 pos++; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
493 INC_BYTEBPOS (buf, pos_byte); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
494 ch = BYTE_BUF_FETCH_CHAR (buf, pos_byte); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
495 previous_failed_reason = failed_reason; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
496 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
497 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
498 if (fail_range_start == pos) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
499 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
500 /* The character can actually be encoded; move on. */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
501 pos++; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
502 INC_BYTEBPOS (buf, pos_byte); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
503 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
504 else |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
505 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
506 assert (previous_failed_reason == query_coding_unencodable); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
507 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
508 if (flags & QUERY_METHOD_ERRORP) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
509 { |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
510 signal_error_2 |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
511 (Qtext_conversion_error, |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
512 "Cannot encode using coding system", |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
513 make_string_from_buffer (buf, fail_range_start, |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
514 pos - fail_range_start), |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
515 XCODING_SYSTEM_NAME (codesys)); |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
516 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
517 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
518 if (NILP (result)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
519 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
520 result = Fmake_range_table (Qstart_closed_end_open); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
521 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
522 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
523 fail_range_end = pos; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
524 |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5474
diff
changeset
|
525 Fput_range_table (make_fixnum (fail_range_start), |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5474
diff
changeset
|
526 make_fixnum (fail_range_end), |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
527 Qunencodable, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
528 result); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
529 previous_failed_reason = query_coding_succeeded; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
530 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
531 if (flags & QUERY_METHOD_HIGHLIGHT) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
532 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
533 Lisp_Object extent |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5474
diff
changeset
|
534 = Fmake_extent (make_fixnum (fail_range_start), |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5474
diff
changeset
|
535 make_fixnum (fail_range_end), |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
536 wrap_buffer (buf)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
537 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
538 Fset_extent_priority |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5474
diff
changeset
|
539 (extent, make_fixnum (2 + mouse_highlight_priority)); |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
540 Fset_extent_face (extent, Qquery_coding_warning_face); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
541 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
542 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
543 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
544 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
545 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
546 return result; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
547 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
548 #else |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
549 Lisp_Object |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
550 default_query_method (Lisp_Object UNUSED (codesys), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
551 struct buffer * UNUSED (buf), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
552 Charbpos UNUSED (end), int UNUSED (flags)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
553 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
554 return Qnil; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
555 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
556 #endif /* defined MULE */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
557 |
771 | 558 DEFUN ("valid-coding-system-type-p", Fvalid_coding_system_type_p, 1, 1, 0, /* |
559 Given a CODING-SYSTEM-TYPE, return non-nil if it is valid. | |
560 Valid types depend on how XEmacs was compiled but may include | |
3025 | 561 `undecided', `chain', `integer', `ccl', `iso2022', `big5', `shift-jis', |
562 `utf-16', `ucs-4', `utf-8', etc. | |
771 | 563 */ |
564 (coding_system_type)) | |
565 { | |
566 return valid_coding_system_type_p (coding_system_type) ? Qt : Qnil; | |
567 } | |
568 | |
569 DEFUN ("coding-system-type-list", Fcoding_system_type_list, 0, 0, 0, /* | |
570 Return a list of valid coding system types. | |
571 */ | |
572 ()) | |
573 { | |
574 return Fcopy_sequence (Vcoding_system_type_list); | |
575 } | |
576 | |
577 void | |
578 add_entry_to_coding_system_type_list (struct coding_system_methods *meths) | |
579 { | |
580 struct coding_system_type_entry entry; | |
581 | |
582 entry.meths = meths; | |
583 Dynarr_add (the_coding_system_type_entry_dynarr, entry); | |
584 Vcoding_system_type_list = Fcons (meths->type, Vcoding_system_type_list); | |
428 | 585 } |
586 | |
587 DEFUN ("coding-system-p", Fcoding_system_p, 1, 1, 0, /* | |
588 Return t if OBJECT is a coding system. | |
589 A coding system is an object that defines how text containing multiple | |
590 character sets is encoded into a stream of (typically 8-bit) bytes. | |
591 The coding system is used to decode the stream into a series of | |
592 characters (which may be from multiple charsets) when the text is read | |
593 from a file or process, and is used to encode the text back into the | |
594 same format when it is written out to a file or process. | |
595 | |
596 For example, many ISO2022-compliant coding systems (such as Compound | |
597 Text, which is used for inter-client data under the X Window System) | |
598 use escape sequences to switch between different charsets -- Japanese | |
599 Kanji, for example, is invoked with "ESC $ ( B"; ASCII is invoked | |
600 with "ESC ( B"; and Cyrillic is invoked with "ESC - L". See | |
601 `make-coding-system' for more information. | |
602 | |
603 Coding systems are normally identified using a symbol, and the | |
604 symbol is accepted in place of the actual coding system object whenever | |
605 a coding system is called for. (This is similar to how faces work.) | |
606 */ | |
607 (object)) | |
608 { | |
609 return CODING_SYSTEMP (object) ? Qt : Qnil; | |
610 } | |
611 | |
4303 | 612 static Lisp_Object |
613 find_coding_system (Lisp_Object coding_system_or_name, | |
614 int do_autoloads) | |
615 { | |
616 Lisp_Object lookup; | |
617 | |
618 if (NILP (coding_system_or_name)) | |
619 coding_system_or_name = Qbinary; | |
620 else if (CODING_SYSTEMP (coding_system_or_name)) | |
621 return coding_system_or_name; | |
622 else | |
623 CHECK_SYMBOL (coding_system_or_name); | |
624 | |
625 while (1) | |
626 { | |
627 lookup = | |
628 Fgethash (coding_system_or_name, Vcoding_system_hash_table, Qnil); | |
629 | |
630 if (CONSP (lookup) && do_autoloads) | |
631 { | |
632 struct gcpro gcpro1; | |
633 int length; | |
634 DECLARE_EISTRING (desired_base); | |
635 DECLARE_EISTRING (warning_info); | |
636 | |
637 eicpy_lstr (desired_base, XSYMBOL_NAME (coding_system_or_name)); | |
638 | |
639 /* Work out the name of the base coding system. */ | |
640 length = eilen (desired_base); | |
641 if (length > (int)(sizeof ("-unix") - 1)) | |
642 { | |
643 if (0 == qxestrcmp ((UAscbyte *)"-unix", (eidata (desired_base)) | |
644 + (length - (sizeof ("-unix") - 1)))) | |
645 { | |
646 eidel (desired_base, length - (sizeof ("-unix") - 1), | |
647 -1, 5, 5); | |
648 } | |
649 } | |
650 else if (length > (int)(sizeof ("-dos") - 1)) | |
651 { | |
652 if ((0 == qxestrcmp ((UAscbyte *)"-dos", (eidata (desired_base)) | |
653 + (length - (sizeof ("-dos") - 1)))) || | |
654 (0 == qxestrcmp ((UAscbyte *)"-mac", (eidata (desired_base)) | |
655 + (length - (sizeof ("-mac") - 1))))) | |
656 { | |
657 eidel (desired_base, length - (sizeof ("-dos") - 1), -1, | |
658 4, 4); | |
659 } | |
660 } | |
661 | |
4953
304aebb79cd3
function renamings to track names of char typedefs
Ben Wing <ben@xemacs.org>
parents:
4952
diff
changeset
|
662 coding_system_or_name = intern_istring (eidata (desired_base)); |
4303 | 663 |
664 /* Remove this coding system and its subsidiary coding | |
665 systems from the hash, to avoid calling this code recursively. */ | |
666 Fremhash (coding_system_or_name, Vcoding_system_hash_table); | |
667 Fremhash (add_suffix_to_symbol(coding_system_or_name, "-unix"), | |
668 Vcoding_system_hash_table); | |
669 Fremhash (add_suffix_to_symbol(coding_system_or_name, "-dos"), | |
670 Vcoding_system_hash_table); | |
671 Fremhash (add_suffix_to_symbol(coding_system_or_name, "-mac"), | |
672 Vcoding_system_hash_table); | |
673 | |
674 eicpy_ascii (warning_info, "Error autoloading coding system "); | |
675 eicat_lstr (warning_info, XSYMBOL_NAME (coding_system_or_name)); | |
676 | |
677 /* Keep around the form so it doesn't disappear from under | |
678 #'eval's feet. */ | |
679 GCPRO1 (lookup); | |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
680 call1_trapping_problems ((const CIbyte *) eidata (warning_info), |
4303 | 681 Qeval, lookup, 0); |
682 UNGCPRO; | |
683 | |
684 lookup = | |
685 Fgethash (coding_system_or_name, Vcoding_system_hash_table, Qnil); | |
686 } | |
687 | |
688 if (CODING_SYSTEMP (lookup) || NILP (lookup)) | |
689 return lookup; | |
690 | |
691 coding_system_or_name = lookup; | |
692 } | |
693 } | |
694 | |
428 | 695 DEFUN ("find-coding-system", Ffind_coding_system, 1, 1, 0, /* |
696 Retrieve the coding system of the given name. | |
697 | |
698 If CODING-SYSTEM-OR-NAME is a coding-system object, it is simply | |
699 returned. Otherwise, CODING-SYSTEM-OR-NAME should be a symbol. | |
700 If there is no such coding system, nil is returned. Otherwise the | |
701 associated coding system object is returned. | |
702 */ | |
703 (coding_system_or_name)) | |
704 { | |
4303 | 705 return find_coding_system(coding_system_or_name, 1); |
706 } | |
707 | |
708 DEFUN ("autoload-coding-system", Fautoload_coding_system, 2, 2, 0, /* | |
709 Define SYMBOL as a coding-system that is loaded on demand. | |
710 | |
711 FORM is a form to evaluate to define the coding-system. | |
712 */ | |
713 (symbol, form)) | |
714 { | |
715 Lisp_Object lookup; | |
716 | |
717 CHECK_SYMBOL (symbol); | |
718 CHECK_CONS (form); | |
719 | |
720 lookup = find_coding_system (symbol, 0); | |
721 | |
722 if (!NILP (lookup) && | |
723 /* Allow autoloads to be redefined. */ | |
724 !CONSP (lookup)) | |
440 | 725 { |
4303 | 726 invalid_operation ("Cannot redefine existing coding system", |
727 symbol); | |
440 | 728 } |
4303 | 729 |
730 Fputhash (symbol, form, Vcoding_system_hash_table); | |
731 Fputhash (add_suffix_to_symbol(symbol, "-unix"), form, | |
732 Vcoding_system_hash_table); | |
733 Fputhash (add_suffix_to_symbol(symbol, "-dos"), form, | |
734 Vcoding_system_hash_table); | |
735 Fputhash (add_suffix_to_symbol(symbol, "-mac"), form, | |
736 Vcoding_system_hash_table); | |
737 | |
738 /* Tell the POSIX locale infrastructure about this coding system (though | |
739 unfortunately it'll be too late for the startup locale sniffing. */ | |
740 if (!UNBOUNDP (Qposix_charset_to_coding_system_hash)) | |
741 { | |
742 Lisp_Object val = Fsymbol_value (Qposix_charset_to_coding_system_hash); | |
743 DECLARE_EISTRING (minimal_name); | |
744 Ibyte *full_name; | |
745 int len = XSTRING_LENGTH (XSYMBOL_NAME (symbol)), i; | |
746 | |
747 if (!NILP (val)) | |
748 { | |
749 full_name = XSTRING_DATA (XSYMBOL_NAME (symbol)); | |
750 for (i = 0; i < len; ++i) | |
751 { | |
752 if (full_name[i] >= '0' && full_name[i] <= '9') | |
753 { | |
754 eicat_ch (minimal_name, full_name[i]); | |
755 } | |
756 else if (full_name[i] >= 'a' && full_name[i] <= 'z') | |
757 { | |
758 eicat_ch (minimal_name, full_name[i]); | |
759 } | |
760 else if (full_name[i] >= 'A' && full_name[i] <= 'Z') | |
761 { | |
762 eicat_ch (minimal_name, full_name[i] + | |
763 ('a' - 'A')); | |
764 } | |
765 } | |
766 | |
767 if (eilen (minimal_name)) | |
768 { | |
769 CHECK_HASH_TABLE (val); | |
770 Fputhash (eimake_string(minimal_name), symbol, val); | |
771 } | |
772 } | |
773 } | |
774 | |
775 return Qt; | |
428 | 776 } |
777 | |
778 DEFUN ("get-coding-system", Fget_coding_system, 1, 1, 0, /* | |
779 Retrieve the coding system of the given name. | |
780 Same as `find-coding-system' except that if there is no such | |
781 coding system, an error is signaled instead of returning nil. | |
782 */ | |
783 (name)) | |
784 { | |
785 Lisp_Object coding_system = Ffind_coding_system (name); | |
786 | |
787 if (NILP (coding_system)) | |
563 | 788 invalid_argument ("No such coding system", name); |
428 | 789 return coding_system; |
790 } | |
791 | |
771 | 792 int |
793 coding_system_is_binary (Lisp_Object coding_system) | |
794 { | |
795 Lisp_Coding_System *cs = XCODING_SYSTEM (coding_system); | |
796 return | |
797 (EQ (CODING_SYSTEM_TYPE (cs), Qno_conversion) && | |
798 CODING_SYSTEM_EOL_TYPE (cs) == EOL_LF && | |
799 EQ (CODING_SYSTEM_POST_READ_CONVERSION (cs), Qnil) && | |
800 EQ (CODING_SYSTEM_PRE_WRITE_CONVERSION (cs), Qnil)); | |
801 } | |
802 | |
803 static Lisp_Object | |
804 coding_system_real_canonical (Lisp_Object cs) | |
805 { | |
806 if (!NILP (XCODING_SYSTEM_CANONICAL (cs))) | |
807 return XCODING_SYSTEM_CANONICAL (cs); | |
808 return cs; | |
809 } | |
810 | |
811 /* Return true if coding system is of the "standard" type that decodes | |
812 bytes into characters (suitable for decoding a text file). */ | |
813 int | |
814 coding_system_is_for_text_file (Lisp_Object coding_system) | |
815 { | |
816 return (XCODESYSMETH_OR_GIVEN | |
817 (coding_system, conversion_end_type, | |
818 (coding_system_real_canonical (coding_system)), | |
819 DECODES_BYTE_TO_CHARACTER) == | |
820 DECODES_BYTE_TO_CHARACTER); | |
821 } | |
822 | |
823 static int | |
824 decoding_source_sink_type_is_char (Lisp_Object cs, enum source_or_sink sex) | |
825 { | |
826 enum source_sink_type type = | |
827 XCODESYSMETH_OR_GIVEN (cs, conversion_end_type, | |
828 (coding_system_real_canonical (cs)), | |
829 DECODES_BYTE_TO_CHARACTER); | |
830 if (sex == CODING_SOURCE) | |
831 return (type == DECODES_CHARACTER_TO_CHARACTER || | |
832 type == DECODES_CHARACTER_TO_BYTE); | |
833 else | |
834 return (type == DECODES_CHARACTER_TO_CHARACTER || | |
835 type == DECODES_BYTE_TO_CHARACTER); | |
836 } | |
837 | |
838 static int | |
839 encoding_source_sink_type_is_char (Lisp_Object cs, enum source_or_sink sex) | |
840 { | |
841 return decoding_source_sink_type_is_char (cs, | |
842 /* Sex change */ | |
843 sex == CODING_SOURCE ? | |
844 CODING_SINK : CODING_SOURCE); | |
845 } | |
846 | |
847 /* Like Ffind_coding_system() but check that the coding system is of the | |
848 "standard" type that decodes bytes into characters (suitable for | |
849 decoding a text file), and if not, returns an appropriate wrapper that | |
850 does. Also, if EOL_WRAP is non-zero, check whether this coding system | |
851 wants EOL auto-detection, and if so, wrap with a convert-eol coding | |
852 system to do this. */ | |
853 | |
854 Lisp_Object | |
855 find_coding_system_for_text_file (Lisp_Object name, int eol_wrap) | |
856 { | |
857 Lisp_Object coding_system = Ffind_coding_system (name); | |
858 Lisp_Object wrapper = coding_system; | |
859 | |
860 if (NILP (coding_system)) | |
861 return Qnil; | |
862 if (!coding_system_is_for_text_file (coding_system)) | |
863 { | |
864 wrapper = XCODING_SYSTEM_TEXT_FILE_WRAPPER (coding_system); | |
865 if (NILP (wrapper)) | |
866 { | |
867 Lisp_Object chain; | |
868 if (!decoding_source_sink_type_is_char (coding_system, CODING_SINK)) | |
869 chain = list2 (coding_system, Qbinary); | |
870 else | |
871 chain = list1 (coding_system); | |
872 if (decoding_source_sink_type_is_char (coding_system, CODING_SOURCE)) | |
873 chain = Fcons (Qbinary, chain); | |
874 wrapper = | |
875 make_internal_coding_system | |
876 (coding_system, | |
877 "internal-text-file-wrapper", | |
878 Qchain, | |
879 Qunbound, list4 (Qchain, chain, | |
880 Qcanonicalize_after_coding, coding_system)); | |
881 XCODING_SYSTEM_TEXT_FILE_WRAPPER (coding_system) = wrapper; | |
882 } | |
883 } | |
884 | |
885 if (!eol_wrap || XCODING_SYSTEM_EOL_TYPE (coding_system) != EOL_AUTODETECT) | |
886 return wrapper; | |
887 | |
888 coding_system = wrapper; | |
889 wrapper = XCODING_SYSTEM_AUTO_EOL_WRAPPER (coding_system); | |
890 if (!NILP (wrapper)) | |
891 return wrapper; | |
892 wrapper = | |
893 make_internal_coding_system | |
894 (coding_system, | |
895 "internal-auto-eol-wrapper", | |
896 Qundecided, Qunbound, | |
897 list4 (Qcoding_system, coding_system, | |
898 Qdo_eol, Qt)); | |
899 XCODING_SYSTEM_AUTO_EOL_WRAPPER (coding_system) = wrapper; | |
900 return wrapper; | |
901 } | |
902 | |
903 /* Like Fget_coding_system() but verify that the coding system is of the | |
904 "standard" type that decodes bytes into characters (suitable for | |
905 decoding a text file), and if not, returns an appropriate wrapper that | |
906 does. Also, if EOL_WRAP is non-zero, check whether this coding system | |
907 wants EOL auto-detection, and if so, wrap with a convert-eol coding | |
908 system to do this. */ | |
909 | |
910 Lisp_Object | |
911 get_coding_system_for_text_file (Lisp_Object name, int eol_wrap) | |
912 { | |
913 Lisp_Object coding_system = find_coding_system_for_text_file (name, | |
914 eol_wrap); | |
915 if (NILP (coding_system)) | |
916 invalid_argument ("No such coding system", name); | |
917 return coding_system; | |
918 } | |
919 | |
920 /* We store the coding systems in hash tables with the names as the | |
921 key and the actual coding system object as the value. Occasionally | |
922 we need to use them in a list format. These routines provide us | |
923 with that. */ | |
428 | 924 struct coding_system_list_closure |
925 { | |
926 Lisp_Object *coding_system_list; | |
771 | 927 int normal; |
928 int internal; | |
428 | 929 }; |
930 | |
931 static int | |
4303 | 932 add_coding_system_to_list_mapper (Lisp_Object key, Lisp_Object value, |
428 | 933 void *coding_system_list_closure) |
934 { | |
935 /* This function can GC */ | |
936 struct coding_system_list_closure *cscl = | |
937 (struct coding_system_list_closure *) coding_system_list_closure; | |
938 Lisp_Object *coding_system_list = cscl->coding_system_list; | |
939 | |
771 | 940 /* We can't just use VALUE because KEY might be an alias, and we need |
4303 | 941 the real coding system object. |
942 | |
943 Autoloaded coding systems have conses for their values, and can't be | |
944 internal coding systems, or coding system aliases. */ | |
945 if (CONSP (value) || | |
946 (XCODING_SYSTEM (Ffind_coding_system (key))->internal_p ? | |
947 cscl->internal : cscl->normal)) | |
771 | 948 *coding_system_list = Fcons (key, *coding_system_list); |
428 | 949 return 0; |
950 } | |
951 | |
2297 | 952 /* #### should we specify a conventional for "all coding systems"? */ |
771 | 953 DEFUN ("coding-system-list", Fcoding_system_list, 0, 1, 0, /* |
428 | 954 Return a list of the names of all defined coding systems. |
771 | 955 If INTERNAL is nil, only the normal (non-internal) coding systems are |
956 included. (Internal coding systems are created for various internal | |
957 purposes, such as implementing EOL types of CRLF and CR; generally, you do | |
958 not want to see these.) If it is t, only the internal coding systems are | |
959 included. If it is any other non-nil value both normal and internal are | |
960 included. | |
428 | 961 */ |
771 | 962 (internal)) |
428 | 963 { |
964 Lisp_Object coding_system_list = Qnil; | |
965 struct gcpro gcpro1; | |
966 struct coding_system_list_closure coding_system_list_closure; | |
967 | |
968 GCPRO1 (coding_system_list); | |
969 coding_system_list_closure.coding_system_list = &coding_system_list; | |
771 | 970 coding_system_list_closure.normal = !EQ (internal, Qt); |
971 coding_system_list_closure.internal = !NILP (internal); | |
428 | 972 elisp_maphash (add_coding_system_to_list_mapper, Vcoding_system_hash_table, |
973 &coding_system_list_closure); | |
974 UNGCPRO; | |
975 | |
976 return coding_system_list; | |
977 } | |
978 | |
979 DEFUN ("coding-system-name", Fcoding_system_name, 1, 1, 0, /* | |
980 Return the name of the given coding system. | |
981 */ | |
982 (coding_system)) | |
983 { | |
984 coding_system = Fget_coding_system (coding_system); | |
985 return XCODING_SYSTEM_NAME (coding_system); | |
986 } | |
987 | |
988 static Lisp_Coding_System * | |
771 | 989 allocate_coding_system (struct coding_system_methods *codesys_meths, |
990 Bytecount data_size, | |
991 Lisp_Object name) | |
428 | 992 { |
771 | 993 Bytecount total_size = offsetof (Lisp_Coding_System, data) + data_size; |
5117
3742ea8250b5
Checking in final CVS version of workspace 'ben-lisp-object'
Ben Wing <ben@xemacs.org>
parents:
3025
diff
changeset
|
994 Lisp_Object obj = ALLOC_SIZED_LISP_OBJECT (total_size, coding_system); |
3742ea8250b5
Checking in final CVS version of workspace 'ben-lisp-object'
Ben Wing <ben@xemacs.org>
parents:
3025
diff
changeset
|
995 Lisp_Coding_System *codesys = XCODING_SYSTEM (obj); |
1204 | 996 |
771 | 997 codesys->methods = codesys_meths; |
1204 | 998 #define MARKED_SLOT(x) codesys->x = Qnil; |
999 #include "coding-system-slots.h" | |
1000 | |
771 | 1001 CODING_SYSTEM_EOL_TYPE (codesys) = EOL_LF; |
1002 CODING_SYSTEM_NAME (codesys) = name; | |
1003 | |
1004 MAYBE_CODESYSMETH (codesys, init, (wrap_coding_system (codesys))); | |
428 | 1005 |
1006 return codesys; | |
1007 } | |
1008 | |
771 | 1009 static enum eol_type |
1010 symbol_to_eol_type (Lisp_Object symbol) | |
1011 { | |
1012 CHECK_SYMBOL (symbol); | |
1013 if (NILP (symbol)) return EOL_AUTODETECT; | |
1014 if (EQ (symbol, Qlf)) return EOL_LF; | |
1015 if (EQ (symbol, Qcrlf)) return EOL_CRLF; | |
1016 if (EQ (symbol, Qcr)) return EOL_CR; | |
1017 | |
1018 invalid_constant ("Unrecognized eol type", symbol); | |
1204 | 1019 RETURN_NOT_REACHED (EOL_AUTODETECT); |
771 | 1020 } |
1021 | |
1022 static Lisp_Object | |
1023 eol_type_to_symbol (enum eol_type type) | |
1024 { | |
1025 switch (type) | |
1026 { | |
2500 | 1027 default: ABORT (); |
771 | 1028 case EOL_LF: return Qlf; |
1029 case EOL_CRLF: return Qcrlf; | |
1030 case EOL_CR: return Qcr; | |
1031 case EOL_AUTODETECT: return Qnil; | |
1032 } | |
1033 } | |
1034 | |
1035 struct subsidiary_type | |
1036 { | |
4528
726060ee587c
First draft of g++ 4.3 warning removal patch. Builds. *Needs ChangeLogs.*
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4522
diff
changeset
|
1037 const Ascbyte *extension; |
726060ee587c
First draft of g++ 4.3 warning removal patch. Builds. *Needs ChangeLogs.*
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4522
diff
changeset
|
1038 const Ascbyte *mnemonic_ext; |
771 | 1039 enum eol_type eol; |
1040 }; | |
1041 | |
1042 static struct subsidiary_type coding_subsidiary_list[] = | |
1043 { { "-unix", "", EOL_LF }, | |
1044 { "-dos", ":T", EOL_CRLF }, | |
1045 { "-mac", ":t", EOL_CR } }; | |
1046 | |
1047 /* kludge */ | |
428 | 1048 static void |
771 | 1049 setup_eol_coding_systems (Lisp_Object codesys) |
428 | 1050 { |
793 | 1051 int len = XSTRING_LENGTH (XSYMBOL (XCODING_SYSTEM_NAME (codesys))->name); |
2367 | 1052 Ibyte *codesys_name = alloca_ibytes (len + 7); |
771 | 1053 int mlen = -1; |
867 | 1054 Ibyte *codesys_mnemonic = 0; |
771 | 1055 Lisp_Object codesys_name_sym, sub_codesys; |
1056 int i; | |
1057 | |
1058 memcpy (codesys_name, | |
793 | 1059 XSTRING_DATA (XSYMBOL (XCODING_SYSTEM_NAME (codesys))->name), len); |
771 | 1060 |
1061 if (STRINGP (XCODING_SYSTEM_MNEMONIC (codesys))) | |
428 | 1062 { |
771 | 1063 mlen = XSTRING_LENGTH (XCODING_SYSTEM_MNEMONIC (codesys)); |
2367 | 1064 codesys_mnemonic = alloca_ibytes (mlen + 7); |
771 | 1065 memcpy (codesys_mnemonic, |
1066 XSTRING_DATA (XCODING_SYSTEM_MNEMONIC (codesys)), mlen); | |
1067 } | |
1068 | |
1069 /* Create three "subsidiary" coding systems, decoding data encoded using | |
1070 each of the three EOL types. We do this for each subsidiary by | |
1071 copying the original coding system, setting the EOL type | |
1072 appropriately, and setting the CANONICAL member of the new coding | |
1073 system to be a chain consisting of the original coding system followed | |
1074 by a convert-eol coding system to do the EOL decoding. For EOL type | |
1075 LF, however, we don't need any decoding, so we skip creating a | |
1076 CANONICAL. | |
1077 | |
1078 If the original coding system is not a text-type coding system | |
1079 (decodes byte->char), we need to coerce it to one by the appropriate | |
1080 wrapping in CANONICAL. */ | |
1081 | |
1082 for (i = 0; i < countof (coding_subsidiary_list); i++) | |
1083 { | |
4528
726060ee587c
First draft of g++ 4.3 warning removal patch. Builds. *Needs ChangeLogs.*
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4522
diff
changeset
|
1084 const Ascbyte *extension = coding_subsidiary_list[i].extension; |
726060ee587c
First draft of g++ 4.3 warning removal patch. Builds. *Needs ChangeLogs.*
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4522
diff
changeset
|
1085 const Ascbyte *mnemonic_ext = coding_subsidiary_list[i].mnemonic_ext; |
771 | 1086 enum eol_type eol = coding_subsidiary_list[i].eol; |
1087 | |
2367 | 1088 qxestrcpy_ascii (codesys_name + len, extension); |
4953
304aebb79cd3
function renamings to track names of char typedefs
Ben Wing <ben@xemacs.org>
parents:
4952
diff
changeset
|
1089 codesys_name_sym = intern_istring (codesys_name); |
771 | 1090 if (mlen != -1) |
2367 | 1091 qxestrcpy_ascii (codesys_mnemonic + mlen, mnemonic_ext); |
771 | 1092 |
1093 sub_codesys = Fcopy_coding_system (codesys, codesys_name_sym); | |
1094 if (mlen != -1) | |
1095 XCODING_SYSTEM_MNEMONIC (sub_codesys) = | |
4953
304aebb79cd3
function renamings to track names of char typedefs
Ben Wing <ben@xemacs.org>
parents:
4952
diff
changeset
|
1096 build_istring (codesys_mnemonic); |
771 | 1097 |
1098 if (eol != EOL_LF) | |
1099 { | |
1100 Lisp_Object chain = list2 (get_coding_system_for_text_file | |
1101 (codesys, 0), | |
1102 eol == EOL_CR ? Qconvert_eol_cr : | |
1103 Qconvert_eol_crlf); | |
1104 Lisp_Object canon = | |
1105 make_internal_coding_system | |
1106 (sub_codesys, "internal-subsidiary-eol-wrapper", | |
1107 Qchain, Qunbound, | |
1108 mlen != -1 ? | |
4953
304aebb79cd3
function renamings to track names of char typedefs
Ben Wing <ben@xemacs.org>
parents:
4952
diff
changeset
|
1109 list6 (Qmnemonic, build_istring (codesys_mnemonic), |
771 | 1110 Qchain, chain, |
1111 Qcanonicalize_after_coding, sub_codesys) : | |
1112 list4 (Qchain, chain, | |
1113 Qcanonicalize_after_coding, sub_codesys)); | |
1114 XCODING_SYSTEM_CANONICAL (sub_codesys) = canon; | |
1115 } | |
1116 XCODING_SYSTEM_EOL_TYPE (sub_codesys) = eol; | |
1117 XCODING_SYSTEM_SUBSIDIARY_PARENT (sub_codesys) = codesys; | |
1118 XCODING_SYSTEM (codesys)->eol[eol] = sub_codesys; | |
428 | 1119 } |
1120 } | |
1121 | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1122 DEFUN ("coding-system-canonical-name-p", Fcoding_system_canonical_name_p, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1123 1, 1, 0, /* |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1124 Return t if OBJECT names a coding system, and is not a coding system alias. |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1125 */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1126 (object)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1127 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1128 return CODING_SYSTEMP (Fgethash (object, Vcoding_system_hash_table, Qnil)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1129 ? Qt : Qnil; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1130 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1131 |
771 | 1132 /* Basic function to create new coding systems. For `make-coding-system', |
1133 NAME-OR-EXISTING is the NAME argument, PREFIX is null, and TYPE, | |
1134 DESCRIPTION, and PROPS are the same. All created coding systems are put | |
1135 in a hash table indexed by NAME. | |
1136 | |
1137 If PREFIX is a string, NAME-OR-EXISTING should specify an existing | |
1138 coding system (or nil), and an internal coding system will be created. | |
1139 The name of the coding system will be constructed by combining PREFIX | |
1140 with the name of the existing coding system (if given), and a number | |
1141 will be appended to insure uniqueness. In such a case, if Qunbound is | |
1142 given for DESCRIPTION, the description gets created based on the | |
1143 generated name. Also, if no mnemonic is given in the properties list, a | |
1144 mnemonic is created based on the generated name. | |
1145 | |
1146 For internal coding systems, the coding system is marked as internal | |
1147 (see `coding-system-list'), and no subsidiaries will be created or | |
1148 eol-wrapping will happen. Otherwise: | |
1149 | |
1150 -- if the eol-type property is `lf' or t, the coding system is merely | |
1151 created and returned. (For t, the coding system will be wrapped with | |
1152 an EOL autodetector when it's used to read a file.) | |
1153 | |
1154 -- if eol-type is `crlf' or `cr', after the coding system object is | |
1155 created, it will be wrapped in a chain with the appropriate | |
1156 convert-eol coding system (either `convert-eol-crlf' or | |
1157 `convert-eol-cr'), so that CRLF->LF or CR->LF conversion is done at | |
1158 decoding time, and the opposite at encoding time. The resulting | |
1159 chain becomes the CANONICAL field of the coding system object. | |
1160 | |
1161 -- if eol-type is nil or omitted, "subsidiaries" are generated: Three | |
1162 coding systems where the original coding system (before wrapping with | |
1163 convert-eol-autodetect) is either unwrapped or wrapped with | |
1164 convert-eol-crlf or convert-eol-cr, respectively, so that coding systems | |
1165 to handle LF, CRLF, and CR end-of-line indicators are created. (This | |
1166 crazy crap is based on existing behavior in other Mule versions, | |
1167 including FSF Emacs.) | |
1168 */ | |
428 | 1169 |
1170 static Lisp_Object | |
4528
726060ee587c
First draft of g++ 4.3 warning removal patch. Builds. *Needs ChangeLogs.*
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4522
diff
changeset
|
1171 make_coding_system_1 (Lisp_Object name_or_existing, const Ascbyte *prefix, |
771 | 1172 Lisp_Object type, Lisp_Object description, |
1173 Lisp_Object props) | |
428 | 1174 { |
771 | 1175 Lisp_Coding_System *cs; |
1176 int need_to_setup_eol_systems = 1; | |
1177 enum eol_type eol_wrapper = EOL_AUTODETECT; | |
1178 struct coding_system_methods *meths; | |
1179 Lisp_Object csobj; | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1180 Lisp_Object defmnem = Qnil, aliases = Qnil; |
771 | 1181 |
1182 if (NILP (type)) | |
1183 type = Qundecided; | |
1184 meths = decode_coding_system_type (type, ERROR_ME); | |
1185 | |
1186 if (prefix) | |
428 | 1187 { |
867 | 1188 Ibyte *newname = |
771 | 1189 emacs_sprintf_malloc (NULL, "%s-%s-%d", |
1190 prefix, | |
867 | 1191 NILP (name_or_existing) ? (Ibyte *) "nil" : |
771 | 1192 XSTRING_DATA (Fsymbol_name (XCODING_SYSTEM_NAME |
1193 (name_or_existing))), | |
1194 ++coding_system_tick); | |
4953
304aebb79cd3
function renamings to track names of char typedefs
Ben Wing <ben@xemacs.org>
parents:
4952
diff
changeset
|
1195 name_or_existing = intern_istring (newname); |
4976
16112448d484
Rename xfree(FOO, TYPE) -> xfree(FOO)
Ben Wing <ben@xemacs.org>
parents:
4967
diff
changeset
|
1196 xfree (newname); |
771 | 1197 |
1198 if (UNBOUNDP (description)) | |
1199 { | |
1200 newname = | |
1201 emacs_sprintf_malloc | |
1202 (NULL, "For Internal Use (%s)", | |
1203 XSTRING_DATA (Fsymbol_name (name_or_existing))); | |
4953
304aebb79cd3
function renamings to track names of char typedefs
Ben Wing <ben@xemacs.org>
parents:
4952
diff
changeset
|
1204 description = build_istring (newname); |
4976
16112448d484
Rename xfree(FOO, TYPE) -> xfree(FOO)
Ben Wing <ben@xemacs.org>
parents:
4967
diff
changeset
|
1205 xfree (newname); |
771 | 1206 } |
1207 | |
1208 newname = emacs_sprintf_malloc (NULL, "Int%d", coding_system_tick); | |
4953
304aebb79cd3
function renamings to track names of char typedefs
Ben Wing <ben@xemacs.org>
parents:
4952
diff
changeset
|
1209 defmnem = build_istring (newname); |
4976
16112448d484
Rename xfree(FOO, TYPE) -> xfree(FOO)
Ben Wing <ben@xemacs.org>
parents:
4967
diff
changeset
|
1210 xfree (newname); |
428 | 1211 } |
771 | 1212 else |
1213 CHECK_SYMBOL (name_or_existing); | |
1214 | |
4303 | 1215 /* See is there an entry for name_or_existing in the defined coding system |
1216 hash table. */ | |
1217 csobj = find_coding_system (name_or_existing, 0); | |
1218 /* Error if it's there and not an autoload form. */ | |
1219 if (!NILP (csobj) && !CONSP (csobj)) | |
771 | 1220 invalid_operation ("Cannot redefine existing coding system", |
4303 | 1221 name_or_existing); |
771 | 1222 |
1223 cs = allocate_coding_system (meths, meths->extra_data_size, | |
1224 name_or_existing); | |
793 | 1225 csobj = wrap_coding_system (cs); |
771 | 1226 |
1227 cs->internal_p = !!prefix; | |
1228 | |
1229 if (NILP (description)) | |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
1230 description = build_ascstring (""); |
771 | 1231 else |
1232 CHECK_STRING (description); | |
1233 CODING_SYSTEM_DESCRIPTION (cs) = description; | |
1234 | |
1235 if (!NILP (defmnem)) | |
1236 CODING_SYSTEM_MNEMONIC (cs) = defmnem; | |
1237 | |
1238 { | |
1239 EXTERNAL_PROPERTY_LIST_LOOP_3 (key, value, props) | |
1240 { | |
1241 int recognized = 1; | |
1242 | |
1243 if (EQ (key, Qmnemonic)) | |
1244 { | |
1245 if (!NILP (value)) | |
1246 CHECK_STRING (value); | |
1247 CODING_SYSTEM_MNEMONIC (cs) = value; | |
1248 } | |
1249 | |
1250 else if (EQ (key, Qdocumentation)) | |
1251 { | |
1252 if (!NILP (value)) | |
1253 CHECK_STRING (value); | |
1254 CODING_SYSTEM_DOCUMENTATION (cs) = value; | |
1255 } | |
1256 | |
1257 else if (EQ (key, Qeol_type)) | |
1258 { | |
1259 need_to_setup_eol_systems = NILP (value); | |
1260 if (EQ (value, Qt)) | |
1261 value = Qnil; | |
1262 eol_wrapper = symbol_to_eol_type (value); | |
1263 } | |
1264 | |
1265 else if (EQ (key, Qpost_read_conversion)) | |
1266 CODING_SYSTEM_POST_READ_CONVERSION (cs) = value; | |
1267 else if (EQ (key, Qpre_write_conversion)) | |
1268 CODING_SYSTEM_PRE_WRITE_CONVERSION (cs) = value; | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1269 else if (EQ (key, Qaliases)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1270 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1271 EXTERNAL_LIST_LOOP_2 (alias, value) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1272 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1273 CHECK_SYMBOL (alias); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1274 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1275 if (!NILP (Fcoding_system_canonical_name_p (alias))) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1276 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1277 invalid_change ("Symbol is the canonical name of a " |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1278 "coding system and cannot be redefined", |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1279 alias); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1280 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1281 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1282 aliases = value; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1283 } |
771 | 1284 /* FSF compatibility */ |
1285 else if (EQ (key, Qtranslation_table_for_decode)) | |
1286 ; | |
1287 else if (EQ (key, Qtranslation_table_for_encode)) | |
1288 ; | |
1289 else if (EQ (key, Qsafe_chars)) | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1290 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1291 CHECK_CHAR_TABLE (value); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1292 CODING_SYSTEM_SAFE_CHARS (cs) = value; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1293 } |
771 | 1294 else if (EQ (key, Qsafe_charsets)) |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1295 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1296 if (!EQ (Qt, value) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1297 /* Would be nice to actually do this check, but there are |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1298 some order conflicts with japanese.el and |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1299 mule-coding.el */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1300 && 0) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1301 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1302 #ifdef MULE |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1303 EXTERNAL_LIST_LOOP_2 (safe_charset, value) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1304 CHECK_CHARSET (Ffind_charset (safe_charset)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1305 #endif |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1306 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1307 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1308 CODING_SYSTEM_SAFE_CHARSETS (cs) = value; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1309 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1310 else if (EQ (key, Qcategory)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1311 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1312 Fput (name_or_existing, intern ("coding-system-property"), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1313 Fplist_put (Fget (name_or_existing, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1314 intern ("coding-system-property"), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1315 Qnil), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1316 Qcategory, value)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1317 } |
771 | 1318 else if (EQ (key, Qmime_charset)) |
1319 ; | |
1320 else if (EQ (key, Qvalid_codes)) | |
1321 ; | |
1322 else | |
1323 recognized = CODESYSMETH_OR_GIVEN (cs, putprop, | |
1324 (csobj, key, value), 0); | |
1325 | |
1326 if (!recognized) | |
1327 invalid_constant ("Unrecognized property", key); | |
1328 } | |
1329 } | |
1330 | |
1331 { | |
1332 XCODING_SYSTEM_CANONICAL (csobj) = | |
1333 CODESYSMETH_OR_GIVEN (cs, canonicalize, (csobj), Qnil); | |
1334 XCODING_SYSTEM_EOL_TYPE (csobj) = EOL_AUTODETECT; /* for copy-coding-system | |
1335 below */ | |
1336 | |
4303 | 1337 Fputhash (name_or_existing, csobj, Vcoding_system_hash_table); |
1338 | |
771 | 1339 if (need_to_setup_eol_systems && !cs->internal_p) |
1340 setup_eol_coding_systems (csobj); | |
1341 else if (eol_wrapper == EOL_CR || eol_wrapper == EOL_CRLF) | |
1342 { | |
1343 /* If a specific eol-type (other than LF) was specified, we handle | |
1344 this by converting the coding system into a chain that wraps the | |
1345 coding system along with a convert-eol system after it, in | |
1346 exactly that same switcheroo fashion that the normal | |
1347 canonicalize method works -- BUT we will run into a problem if | |
1348 we do it the obvious way, because when `chain' creates its | |
1349 substreams, the substream containing the coding system we're | |
1350 creating will have canonicalization expansion done on it, | |
1351 leading to infinite recursion. So we have to generate a new, | |
1352 internal coding system with the previous value of CANONICAL. */ | |
867 | 1353 Ibyte *newname = |
771 | 1354 emacs_sprintf_malloc |
1355 (NULL, "internal-eol-copy-%s-%d", | |
1356 XSTRING_DATA (Fsymbol_name (name_or_existing)), | |
1357 ++coding_system_tick); | |
4953
304aebb79cd3
function renamings to track names of char typedefs
Ben Wing <ben@xemacs.org>
parents:
4952
diff
changeset
|
1358 Lisp_Object newnamesym = intern_istring (newname); |
771 | 1359 Lisp_Object copied = Fcopy_coding_system (csobj, newnamesym); |
4976
16112448d484
Rename xfree(FOO, TYPE) -> xfree(FOO)
Ben Wing <ben@xemacs.org>
parents:
4967
diff
changeset
|
1360 xfree (newname); |
771 | 1361 |
1362 XCODING_SYSTEM_CANONICAL (csobj) = | |
1363 make_internal_coding_system | |
1364 (csobj, | |
1365 "internal-eol-wrapper", | |
1366 Qchain, Qunbound, | |
1367 list4 (Qchain, | |
1368 list2 (copied, | |
1369 eol_wrapper == EOL_CR ? | |
1370 Qconvert_eol_cr : | |
1371 Qconvert_eol_crlf), | |
1372 Qcanonicalize_after_coding, | |
1373 csobj)); | |
1374 } | |
1375 XCODING_SYSTEM_EOL_TYPE (csobj) = eol_wrapper; | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1376 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1377 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1378 EXTERNAL_LIST_LOOP_2 (alias, aliases) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1379 Fdefine_coding_system_alias (alias, csobj); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1380 } |
771 | 1381 } |
1382 | |
1383 return csobj; | |
428 | 1384 } |
1385 | |
771 | 1386 Lisp_Object |
4528
726060ee587c
First draft of g++ 4.3 warning removal patch. Builds. *Needs ChangeLogs.*
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4522
diff
changeset
|
1387 make_internal_coding_system (Lisp_Object existing, const Ascbyte *prefix, |
771 | 1388 Lisp_Object type, Lisp_Object description, |
1389 Lisp_Object props) | |
1390 { | |
1391 return make_coding_system_1 (existing, prefix, type, description, props); | |
1392 } | |
428 | 1393 |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1394 DEFUN ("make-coding-system-internal", Fmake_coding_system_internal, 2, 4, 0, /* |
5083
88f955fa5a7f
Back out revision c673987f5f3d, undump mule/make-coding-system.el.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5038
diff
changeset
|
1395 Create a new coding system object, and register NAME as its name. |
88f955fa5a7f
Back out revision c673987f5f3d, undump mule/make-coding-system.el.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5038
diff
changeset
|
1396 |
88f955fa5a7f
Back out revision c673987f5f3d, undump mule/make-coding-system.el.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5038
diff
changeset
|
1397 With Mule support, this does much of the work of `make-coding-system'. |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1398 Without Mule support, it does all the work of that function, and an alias |
5083
88f955fa5a7f
Back out revision c673987f5f3d, undump mule/make-coding-system.el.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5038
diff
changeset
|
1399 exists, mapping `make-coding-system' to `make-coding-system-internal'. |
88f955fa5a7f
Back out revision c673987f5f3d, undump mule/make-coding-system.el.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5038
diff
changeset
|
1400 |
88f955fa5a7f
Back out revision c673987f5f3d, undump mule/make-coding-system.el.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5038
diff
changeset
|
1401 You'll need a Mule XEmacs to read the complete docstring. Or you can |
88f955fa5a7f
Back out revision c673987f5f3d, undump mule/make-coding-system.el.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5038
diff
changeset
|
1402 just read it in make-coding-system.el; something like the following |
88f955fa5a7f
Back out revision c673987f5f3d, undump mule/make-coding-system.el.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5038
diff
changeset
|
1403 should work: |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1404 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1405 \\[find-function-other-window] find-file RET \\[find-file] mule/make-coding-system.el RET |
771 | 1406 |
428 | 1407 */ |
771 | 1408 (name, type, description, props)) |
428 | 1409 { |
771 | 1410 return make_coding_system_1 (name, 0, type, description, props); |
428 | 1411 } |
1412 | |
1413 DEFUN ("copy-coding-system", Fcopy_coding_system, 2, 2, 0, /* | |
1414 Copy OLD-CODING-SYSTEM to NEW-NAME. | |
1415 If NEW-NAME does not name an existing coding system, a new one will | |
1416 be created. | |
771 | 1417 If you are using this function to create an alias, think again: |
1418 Use `define-coding-system-alias' instead. | |
428 | 1419 */ |
1420 (old_coding_system, new_name)) | |
1421 { | |
1422 Lisp_Object new_coding_system; | |
1423 old_coding_system = Fget_coding_system (old_coding_system); | |
771 | 1424 new_coding_system = |
4303 | 1425 UNBOUNDP (new_name) ? Qnil : find_coding_system (new_name, 0); |
428 | 1426 if (NILP (new_coding_system)) |
1427 { | |
793 | 1428 new_coding_system = |
1429 wrap_coding_system | |
1430 (allocate_coding_system | |
1431 (XCODING_SYSTEM (old_coding_system)->methods, | |
1432 XCODING_SYSTEM (old_coding_system)->methods->extra_data_size, | |
1433 new_name)); | |
771 | 1434 if (!UNBOUNDP (new_name)) |
1435 Fputhash (new_name, new_coding_system, Vcoding_system_hash_table); | |
428 | 1436 } |
771 | 1437 else if (XCODING_SYSTEM (old_coding_system)->methods != |
1438 XCODING_SYSTEM (new_coding_system)->methods) | |
1439 invalid_operation_2 ("Coding systems not same type", | |
1440 old_coding_system, new_coding_system); | |
428 | 1441 |
5127
a9c41067dd88
more cleanups, terminology clarification, lots of doc work
Ben Wing <ben@xemacs.org>
parents:
5126
diff
changeset
|
1442 copy_lisp_object (new_coding_system, old_coding_system); |
a9c41067dd88
more cleanups, terminology clarification, lots of doc work
Ben Wing <ben@xemacs.org>
parents:
5126
diff
changeset
|
1443 XCODING_SYSTEM (new_coding_system)->name = new_name; |
428 | 1444 return new_coding_system; |
1445 } | |
1446 | |
2297 | 1447 /* #### Shouldn't this really be a find/get pair? */ |
1448 | |
440 | 1449 DEFUN ("coding-system-alias-p", Fcoding_system_alias_p, 1, 1, 0, /* |
1450 Return t if OBJECT is a coding system alias. | |
1451 All coding system aliases are created by `define-coding-system-alias'. | |
1452 */ | |
1453 (object)) | |
428 | 1454 { |
440 | 1455 return SYMBOLP (Fgethash (object, Vcoding_system_hash_table, Qzero)) |
1456 ? Qt : Qnil; | |
1457 } | |
1458 | |
1459 DEFUN ("coding-system-aliasee", Fcoding_system_aliasee, 1, 1, 0, /* | |
1460 Return the coding-system symbol for which symbol ALIAS is an alias. | |
1461 */ | |
1462 (alias)) | |
1463 { | |
1464 Lisp_Object aliasee = Fgethash (alias, Vcoding_system_hash_table, Qnil); | |
1465 if (SYMBOLP (aliasee)) | |
1466 return aliasee; | |
1467 else | |
563 | 1468 invalid_argument ("Symbol is not a coding system alias", alias); |
1204 | 1469 RETURN_NOT_REACHED (Qnil); |
440 | 1470 } |
1471 | |
1472 /* A maphash function, for removing dangling coding system aliases. */ | |
1473 static int | |
2286 | 1474 dangling_coding_system_alias_p (Lisp_Object UNUSED (alias), |
440 | 1475 Lisp_Object aliasee, |
1476 void *dangling_aliases) | |
1477 { | |
1478 if (SYMBOLP (aliasee) | |
1479 && NILP (Fgethash (aliasee, Vcoding_system_hash_table, Qnil))) | |
428 | 1480 { |
440 | 1481 (*(int *) dangling_aliases)++; |
1482 return 1; | |
428 | 1483 } |
440 | 1484 else |
1485 return 0; | |
1486 } | |
1487 | |
1488 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias, 2, 2, 0, /* | |
1489 Define symbol ALIAS as an alias for coding system ALIASEE. | |
1490 | |
1491 You can use this function to redefine an alias that has already been defined, | |
1492 but you cannot redefine a name which is the canonical name for a coding system. | |
1493 \(a canonical name of a coding system is what is returned when you call | |
1494 `coding-system-name' on a coding system). | |
1495 | |
1496 ALIASEE itself can be an alias, which allows you to define nested aliases. | |
1497 | |
1498 You are forbidden, however, from creating alias loops or `dangling' aliases. | |
1499 These will be detected, and an error will be signaled if you attempt to do so. | |
1500 | |
1501 If ALIASEE is nil, then ALIAS will simply be undefined. | |
1502 | |
1503 See also `coding-system-alias-p', `coding-system-aliasee', | |
1504 and `coding-system-canonical-name-p'. | |
1505 */ | |
1506 (alias, aliasee)) | |
1507 { | |
2286 | 1508 Lisp_Object probe; |
440 | 1509 |
1510 CHECK_SYMBOL (alias); | |
1511 | |
1512 if (!NILP (Fcoding_system_canonical_name_p (alias))) | |
563 | 1513 invalid_change |
440 | 1514 ("Symbol is the canonical name of a coding system and cannot be redefined", |
1515 alias); | |
1516 | |
1517 if (NILP (aliasee)) | |
1518 { | |
771 | 1519 Lisp_Object subsidiary_unix = add_suffix_to_symbol (alias, "-unix"); |
1520 Lisp_Object subsidiary_dos = add_suffix_to_symbol (alias, "-dos"); | |
1521 Lisp_Object subsidiary_mac = add_suffix_to_symbol (alias, "-mac"); | |
440 | 1522 |
1523 Fremhash (alias, Vcoding_system_hash_table); | |
1524 | |
1525 /* Undefine subsidiary aliases, | |
1526 presumably created by a previous call to this function */ | |
1527 if (! NILP (Fcoding_system_alias_p (subsidiary_unix)) && | |
1528 ! NILP (Fcoding_system_alias_p (subsidiary_dos)) && | |
1529 ! NILP (Fcoding_system_alias_p (subsidiary_mac))) | |
1530 { | |
1531 Fdefine_coding_system_alias (subsidiary_unix, Qnil); | |
1532 Fdefine_coding_system_alias (subsidiary_dos, Qnil); | |
1533 Fdefine_coding_system_alias (subsidiary_mac, Qnil); | |
1534 } | |
1535 | |
1536 /* Undefine dangling coding system aliases. */ | |
1537 { | |
1538 int dangling_aliases; | |
1539 | |
1540 do { | |
1541 dangling_aliases = 0; | |
1542 elisp_map_remhash (dangling_coding_system_alias_p, | |
1543 Vcoding_system_hash_table, | |
1544 &dangling_aliases); | |
1545 } while (dangling_aliases > 0); | |
1546 } | |
1547 | |
1548 return Qnil; | |
1549 } | |
1550 | |
1551 if (CODING_SYSTEMP (aliasee)) | |
1552 aliasee = XCODING_SYSTEM_NAME (aliasee); | |
1553 | |
1554 /* Checks that aliasee names a coding-system */ | |
2286 | 1555 (void) Fget_coding_system (aliasee); |
440 | 1556 |
1557 /* Check for coding system alias loops */ | |
1558 if (EQ (alias, aliasee)) | |
563 | 1559 alias_loop: invalid_operation_2 |
440 | 1560 ("Attempt to create a coding system alias loop", alias, aliasee); |
1561 | |
1562 for (probe = aliasee; | |
1563 SYMBOLP (probe); | |
1564 probe = Fgethash (probe, Vcoding_system_hash_table, Qzero)) | |
1565 { | |
1566 if (EQ (probe, alias)) | |
1567 goto alias_loop; | |
1568 } | |
1569 | |
1570 Fputhash (alias, aliasee, Vcoding_system_hash_table); | |
1571 | |
1572 /* Set up aliases for subsidiaries. | |
2297 | 1573 #### There must be a better way to handle subsidiary coding systems. |
1574 Inquiring Minds Want To Know: shouldn't they always be chains? */ | |
440 | 1575 { |
1576 static const char *suffixes[] = { "-unix", "-dos", "-mac" }; | |
1577 int i; | |
1578 for (i = 0; i < countof (suffixes); i++) | |
1579 { | |
1580 Lisp_Object alias_subsidiary = | |
771 | 1581 add_suffix_to_symbol (alias, suffixes[i]); |
440 | 1582 Lisp_Object aliasee_subsidiary = |
771 | 1583 add_suffix_to_symbol (aliasee, suffixes[i]); |
440 | 1584 |
1585 if (! NILP (Ffind_coding_system (aliasee_subsidiary))) | |
1586 Fdefine_coding_system_alias (alias_subsidiary, aliasee_subsidiary); | |
1587 } | |
1588 } | |
428 | 1589 /* FSF return value is a vector of [ALIAS-unix ALIAS-dos ALIAS-mac], |
1590 but it doesn't look intentional, so I'd rather return something | |
1591 meaningful or nothing at all. */ | |
1592 return Qnil; | |
1593 } | |
1594 | |
1595 static Lisp_Object | |
771 | 1596 subsidiary_coding_system (Lisp_Object coding_system, enum eol_type type) |
428 | 1597 { |
1598 Lisp_Coding_System *cs = XCODING_SYSTEM (coding_system); | |
1599 Lisp_Object new_coding_system; | |
1600 | |
1601 switch (type) | |
1602 { | |
1603 case EOL_AUTODETECT: return coding_system; | |
1604 case EOL_LF: new_coding_system = CODING_SYSTEM_EOL_LF (cs); break; | |
1605 case EOL_CR: new_coding_system = CODING_SYSTEM_EOL_CR (cs); break; | |
1606 case EOL_CRLF: new_coding_system = CODING_SYSTEM_EOL_CRLF (cs); break; | |
2500 | 1607 default: ABORT (); return Qnil; |
428 | 1608 } |
1609 | |
1610 return NILP (new_coding_system) ? coding_system : new_coding_system; | |
1611 } | |
1612 | |
1613 DEFUN ("subsidiary-coding-system", Fsubsidiary_coding_system, 2, 2, 0, /* | |
1614 Return the subsidiary coding system of CODING-SYSTEM with eol type EOL-TYPE. | |
771 | 1615 The logically opposite operation is `coding-system-base'. |
428 | 1616 */ |
1617 (coding_system, eol_type)) | |
1618 { | |
771 | 1619 coding_system = get_coding_system_for_text_file (coding_system, 0); |
428 | 1620 |
1621 return subsidiary_coding_system (coding_system, | |
1622 symbol_to_eol_type (eol_type)); | |
1623 } | |
1624 | |
771 | 1625 DEFUN ("coding-system-base", Fcoding_system_base, |
1626 1, 1, 0, /* | |
1627 Return the base coding system of CODING-SYSTEM. | |
1628 If CODING-SYSTEM is a subsidiary, this returns its parent; otherwise, it | |
1629 returns CODING-SYSTEM. | |
1630 The logically opposite operation is `subsidiary-coding-system'. | |
1631 */ | |
1632 (coding_system)) | |
1633 { | |
1634 Lisp_Object base; | |
1635 | |
1636 coding_system = Fget_coding_system (coding_system); | |
1637 if (EQ (XCODING_SYSTEM_NAME (coding_system), Qbinary)) | |
1638 return Fget_coding_system (Qraw_text); /* hack! */ | |
1639 base = XCODING_SYSTEM_SUBSIDIARY_PARENT (coding_system); | |
1640 if (!NILP (base)) | |
1641 return base; | |
1642 return coding_system; | |
1643 } | |
1644 | |
1645 DEFUN ("coding-system-used-for-io", Fcoding_system_used_for_io, | |
1646 1, 1, 0, /* | |
1647 Return the coding system actually used for I/O. | |
1648 In some cases (e.g. when a particular EOL type is specified) this won't be | |
2297 | 1649 the coding system itself. This can be useful when trying to determine |
1650 precisely how data was decoded. | |
771 | 1651 */ |
1652 (coding_system)) | |
1653 { | |
1654 Lisp_Object canon; | |
1655 | |
1656 coding_system = Fget_coding_system (coding_system); | |
1657 canon = XCODING_SYSTEM_CANONICAL (coding_system); | |
1658 if (!NILP (canon)) | |
1659 return canon; | |
1660 return coding_system; | |
1661 } | |
1662 | |
428 | 1663 |
1664 /************************************************************************/ | |
1665 /* Coding system accessors */ | |
1666 /************************************************************************/ | |
1667 | |
771 | 1668 DEFUN ("coding-system-description", Fcoding_system_description, 1, 1, 0, /* |
1669 Return the description for CODING-SYSTEM. | |
1670 The `description' of a coding system is a short English phrase giving the | |
1671 name rendered according to English punctuation rules, plus possibly some | |
1672 explanatory text (typically in the form of a parenthetical phrase). The | |
1673 description is intended to be short enough that it can appear as a menu item, | |
1674 and clear enough to be recognizable even to someone who is assumed to have | |
1675 some basic familiarity with different encodings but may not know all the | |
1676 technical names; thus, for `cn-gb-2312' is described as "Chinese EUC" and | |
1677 `hz-gb-2312' is described as "Hz/ZW (Chinese)", where the actual name of | |
1678 the encoding is given, followed by a note that this is a Chinese encoding, | |
1679 because the great majority of people encountering this would have no idea | |
1680 what it is, and giving the language indicates whether the encoding should | |
1681 just be ignored or (conceivably) investigated more thoroughly. | |
428 | 1682 */ |
1683 (coding_system)) | |
1684 { | |
1685 coding_system = Fget_coding_system (coding_system); | |
771 | 1686 return XCODING_SYSTEM_DESCRIPTION (coding_system); |
428 | 1687 } |
1688 | |
1689 DEFUN ("coding-system-type", Fcoding_system_type, 1, 1, 0, /* | |
1690 Return the type of CODING-SYSTEM. | |
1691 */ | |
1692 (coding_system)) | |
1693 { | |
771 | 1694 coding_system = Fget_coding_system (coding_system); |
1695 return XCODING_SYSTEM_TYPE (coding_system); | |
428 | 1696 } |
1697 | |
1698 DEFUN ("coding-system-property", Fcoding_system_property, 2, 2, 0, /* | |
1699 Return the PROP property of CODING-SYSTEM. | |
1700 */ | |
1701 (coding_system, prop)) | |
1702 { | |
1703 coding_system = Fget_coding_system (coding_system); | |
1704 CHECK_SYMBOL (prop); | |
1705 | |
1706 if (EQ (prop, Qname)) | |
1707 return XCODING_SYSTEM_NAME (coding_system); | |
1708 else if (EQ (prop, Qtype)) | |
1709 return Fcoding_system_type (coding_system); | |
771 | 1710 else if (EQ (prop, Qdescription)) |
1711 return XCODING_SYSTEM_DESCRIPTION (coding_system); | |
428 | 1712 else if (EQ (prop, Qmnemonic)) |
1713 return XCODING_SYSTEM_MNEMONIC (coding_system); | |
771 | 1714 else if (EQ (prop, Qdocumentation)) |
1715 return XCODING_SYSTEM_DOCUMENTATION (coding_system); | |
428 | 1716 else if (EQ (prop, Qeol_type)) |
771 | 1717 return eol_type_to_symbol (XCODING_SYSTEM_EOL_TYPE |
1718 (coding_system)); | |
428 | 1719 else if (EQ (prop, Qeol_lf)) |
1720 return XCODING_SYSTEM_EOL_LF (coding_system); | |
1721 else if (EQ (prop, Qeol_crlf)) | |
1722 return XCODING_SYSTEM_EOL_CRLF (coding_system); | |
1723 else if (EQ (prop, Qeol_cr)) | |
1724 return XCODING_SYSTEM_EOL_CR (coding_system); | |
1725 else if (EQ (prop, Qpost_read_conversion)) | |
1726 return XCODING_SYSTEM_POST_READ_CONVERSION (coding_system); | |
1727 else if (EQ (prop, Qpre_write_conversion)) | |
1728 return XCODING_SYSTEM_PRE_WRITE_CONVERSION (coding_system); | |
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1729 else if (EQ (prop, Qsafe_charsets)) |
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1730 return XCODING_SYSTEM_SAFE_CHARSETS (coding_system); |
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1731 else if (EQ (prop, Qsafe_chars)) |
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1732 return XCODING_SYSTEM_SAFE_CHARS (coding_system); |
771 | 1733 else |
1734 { | |
1735 Lisp_Object value = CODESYSMETH_OR_GIVEN (XCODING_SYSTEM (coding_system), | |
1736 getprop, | |
1737 (coding_system, prop), | |
1738 Qunbound); | |
1739 if (UNBOUNDP (value)) | |
1740 invalid_constant ("Unrecognized property", prop); | |
1741 return value; | |
1742 } | |
1743 } | |
1744 | |
1745 | |
1746 /************************************************************************/ | |
1747 /* Coding stream functions */ | |
1748 /************************************************************************/ | |
1749 | |
1750 /* A coding stream is a stream used for encoding or decoding text. The | |
1751 coding-stream object keeps track of the actual coding system, the stream | |
1752 that is at the other end, and data that needs to be persistent across | |
1753 the lifetime of the stream. */ | |
1754 | |
1204 | 1755 extern const struct sized_memory_description chain_coding_stream_description; |
1756 extern const struct sized_memory_description undecided_coding_stream_description; | |
1757 | |
1758 static const struct memory_description coding_stream_data_description_1 []= { | |
2551 | 1759 { XD_BLOCK_PTR, chain_coding_system, 1, |
1760 { &chain_coding_stream_description } }, | |
1761 { XD_BLOCK_PTR, undecided_coding_system, 1, | |
1762 { &undecided_coding_stream_description } }, | |
1204 | 1763 { XD_END } |
1764 }; | |
1765 | |
1766 static const struct sized_memory_description coding_stream_data_description = { | |
1767 sizeof (void *), coding_stream_data_description_1 | |
1768 }; | |
1769 | |
1770 static const struct memory_description coding_lstream_description[] = { | |
1771 { XD_INT, offsetof (struct coding_stream, type) }, | |
1772 { XD_LISP_OBJECT, offsetof (struct coding_stream, orig_codesys) }, | |
1773 { XD_LISP_OBJECT, offsetof (struct coding_stream, codesys) }, | |
1774 { XD_LISP_OBJECT, offsetof (struct coding_stream, other_end) }, | |
1775 { XD_UNION, offsetof (struct coding_stream, data), | |
2551 | 1776 XD_INDIRECT (0, 0), { &coding_stream_data_description } }, |
1204 | 1777 { XD_END } |
1778 }; | |
1779 | |
1780 DEFINE_LSTREAM_IMPLEMENTATION_WITH_DATA ("coding", coding); | |
771 | 1781 |
1782 /* Encoding and decoding are parallel operations, so we create just one | |
1783 stream for both. "Decoding" may involve the extra step of autodetection | |
1784 of the data format, but that's only because of the conventional | |
1785 definition of decoding as converting from external- to | |
1786 internal-formatted data. | |
1787 | |
2297 | 1788 [[ REWRITE ME! ]] |
1789 | |
771 | 1790 #### We really need to abstract out the concept of "data formats" and |
1791 define "converters" that convert from and to specified formats, | |
1792 eliminating the idea of decoding and encoding. When specifying a | |
1793 conversion process, we need to give the data formats themselves, not the | |
1794 conversion processes -- e.g. a coding system called "Unicode->multibyte" | |
1795 converts in both directions, and we could auto-detect the format of data | |
1796 at either end. */ | |
1797 | |
1798 static Bytecount | |
1799 coding_reader (Lstream *stream, unsigned char *data, Bytecount size) | |
1800 { | |
1801 unsigned char *orig_data = data; | |
1802 Bytecount read_size; | |
1803 int error_occurred = 0; | |
1804 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
1805 | |
1806 /* We need to interface to coding_{de,en}code_1(), which expects to take | |
1807 some amount of data and store the result into a Dynarr. We have | |
1808 coding_{de,en}code_1() store into c->runoff, and take data from there | |
1809 as necessary. */ | |
1810 | |
1811 /* We loop until we have enough data, reading chunks from the other | |
1812 end and converting it. */ | |
1813 while (1) | |
1814 { | |
1815 /* Take data from convert_to if we can. Make sure to take at | |
1816 most SIZE bytes, and delete the data from convert_to. */ | |
1817 if (Dynarr_length (str->convert_to) > 0) | |
1818 { | |
1819 Bytecount chunk = | |
1820 min (size, (Bytecount) Dynarr_length (str->convert_to)); | |
4967 | 1821 memcpy (data, Dynarr_begin (str->convert_to), chunk); |
771 | 1822 Dynarr_delete_many (str->convert_to, 0, chunk); |
1823 data += chunk; | |
1824 size -= chunk; | |
1825 } | |
1826 | |
1827 if (size == 0) | |
1828 break; /* No more room for data */ | |
1829 | |
1830 if (str->eof) | |
1831 break; | |
1832 | |
1833 { | |
1834 /* Exhausted convert_to, so get some more. Read into convert_from, | |
1835 after existing "rejected" data from the last conversion. */ | |
1836 Bytecount rejected = Dynarr_length (str->convert_from); | |
1837 /* #### 1024 is arbitrary; we really need to separate 0 from EOF, | |
1838 and when we get 0, keep taking more data until we don't get 0 -- | |
1839 we don't know how much data the conversion routine might need | |
2297 | 1840 before it can generate any data of its own (eg, bzip2). */ |
814 | 1841 Bytecount readmore = |
1842 str->one_byte_at_a_time ? (Bytecount) 1 : | |
1843 max (size, (Bytecount) 1024); | |
771 | 1844 |
1845 Dynarr_add_many (str->convert_from, 0, readmore); | |
1846 read_size = Lstream_read (str->other_end, | |
1847 Dynarr_atp (str->convert_from, rejected), | |
1848 readmore); | |
1849 /* Trim size down to how much we actually got */ | |
5038 | 1850 Dynarr_set_lengthr (str->convert_from, rejected + max (0, read_size)); |
771 | 1851 } |
1852 | |
1853 if (read_size < 0) /* LSTREAM_ERROR */ | |
1854 { | |
1855 error_occurred = 1; | |
1856 break; | |
1857 } | |
1858 if (read_size == 0) /* LSTREAM_EOF */ | |
1859 /* There might be some more end data produced in the translation, | |
1860 so we set a flag and call the conversion method once more to | |
1861 output any final stuff it may be holding, any "go back to a sane | |
1862 state" escape sequences, etc. The conversion method is free to | |
1863 look at this flag, and we use it above to stop looping. */ | |
1864 str->eof = 1; | |
1865 { | |
1866 Bytecount processed; | |
1867 Bytecount to_process = Dynarr_length (str->convert_from); | |
1868 | |
1869 /* Convert the data, and save any rejected data in convert_from */ | |
1870 processed = | |
1871 XCODESYSMETH (str->codesys, convert, | |
4967 | 1872 (str, Dynarr_begin (str->convert_from), |
771 | 1873 str->convert_to, to_process)); |
1874 if (processed < 0) | |
1875 { | |
1876 error_occurred = 1; | |
1877 break; | |
1878 } | |
1879 assert (processed <= to_process); | |
1880 if (processed < to_process) | |
4967 | 1881 memmove (Dynarr_begin (str->convert_from), |
771 | 1882 Dynarr_atp (str->convert_from, processed), |
1883 to_process - processed); | |
5038 | 1884 Dynarr_set_lengthr (str->convert_from, to_process - processed); |
771 | 1885 } |
1886 } | |
1887 | |
1888 if (data - orig_data == 0) | |
1889 return error_occurred ? -1 : 0; | |
1890 else | |
1891 return data - orig_data; | |
1892 } | |
1893 | |
1894 static Bytecount | |
1895 coding_writer (Lstream *stream, const unsigned char *data, Bytecount size) | |
1896 { | |
1897 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
1898 | |
1899 /* Convert all our data into convert_to, and then attempt to write | |
1900 it all out to the other end. */ | |
1901 Dynarr_reset (str->convert_to); | |
1902 size = XCODESYSMETH (str->codesys, convert, | |
1903 (str, data, str->convert_to, size)); | |
4967 | 1904 if (Lstream_write (str->other_end, Dynarr_begin (str->convert_to), |
771 | 1905 Dynarr_length (str->convert_to)) < 0) |
1906 return -1; | |
1907 else | |
1908 /* The return value indicates how much of the incoming data was | |
1909 processed, not how many bytes were written. */ | |
1910 return size; | |
1911 } | |
1912 | |
1913 static int | |
1914 encode_decode_source_sink_type_is_char (Lisp_Object cs, | |
1915 enum source_or_sink sex, | |
1916 enum encode_decode direction) | |
1917 { | |
1918 return (direction == CODING_DECODE ? | |
1919 decoding_source_sink_type_is_char (cs, sex) : | |
1920 encoding_source_sink_type_is_char (cs, sex)); | |
1921 } | |
1922 | |
1923 /* Ensure that the convert methods only get full characters sent to them to | |
1924 convert if the source of that conversion is characters; and that no such | |
1925 full-character checking happens when the source is bytes. Keep in mind | |
1926 that (1) the conversion_end_type return values take the perspective of | |
1927 encoding; (2) the source for decoding is the same as the sink for | |
1928 encoding; (3) when writing, the data is given to us, and we set our own | |
1929 stream to be character mode or not; (4) when reading, the data comes | |
1930 from the other_end stream, and we set that one to be character mode or | |
1931 not. This is consistent with the comment above the prototype for | |
1932 Lstream_set_character_mode(), which lays out rules for who is allowed to | |
1933 modify the character type mode on a stream. | |
1934 | |
814 | 1935 If we're a read stream, we're always setting character mode on the |
1936 source, but we also set it on ourselves consistent with the flag that | |
1937 can disable this (see again the comment above | |
1938 Lstream_set_character_mode()). | |
1939 */ | |
771 | 1940 |
1941 static void | |
1942 set_coding_character_mode (Lstream *stream) | |
1943 { | |
1944 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
1945 Lstream *stream_to_set = | |
1946 stream->flags & LSTREAM_FL_WRITE ? stream : str->other_end; | |
1947 if (encode_decode_source_sink_type_is_char | |
1948 (str->codesys, CODING_SOURCE, str->direction)) | |
1949 Lstream_set_character_mode (stream_to_set); | |
1950 else | |
1951 Lstream_unset_character_mode (stream_to_set); | |
814 | 1952 if (str->set_char_mode_on_us_when_reading && |
1953 (stream->flags & LSTREAM_FL_READ)) | |
1954 { | |
1955 if (encode_decode_source_sink_type_is_char | |
1956 (str->codesys, CODING_SINK, str->direction)) | |
1957 Lstream_set_character_mode (stream); | |
1958 else | |
1959 Lstream_unset_character_mode (stream); | |
1960 } | |
771 | 1961 } |
1962 | |
1963 static Lisp_Object | |
1964 coding_marker (Lisp_Object stream) | |
1965 { | |
1966 struct coding_stream *str = CODING_STREAM_DATA (XLSTREAM (stream)); | |
1967 | |
1968 mark_object (str->orig_codesys); | |
1969 mark_object (str->codesys); | |
1970 MAYBE_XCODESYSMETH (str->codesys, mark_coding_stream, (str)); | |
1971 return wrap_lstream (str->other_end); | |
1972 } | |
1973 | |
1974 static int | |
1975 coding_rewinder (Lstream *stream) | |
1976 { | |
1977 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
1978 MAYBE_XCODESYSMETH (str->codesys, rewind_coding_stream, (str)); | |
1979 | |
1980 str->ch = 0; | |
1981 Dynarr_reset (str->convert_to); | |
1982 Dynarr_reset (str->convert_from); | |
1983 return Lstream_rewind (str->other_end); | |
1984 } | |
1985 | |
1986 static int | |
1987 coding_seekable_p (Lstream *stream) | |
1988 { | |
1989 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
1990 return Lstream_seekable_p (str->other_end); | |
1991 } | |
1992 | |
1993 static int | |
1994 coding_flusher (Lstream *stream) | |
1995 { | |
1996 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
1997 return Lstream_flush (str->other_end); | |
1998 } | |
1999 | |
2000 static int | |
2001 coding_closer (Lstream *stream) | |
2002 { | |
2003 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
2004 if (stream->flags & LSTREAM_FL_WRITE) | |
2005 { | |
2006 str->eof = 1; | |
2007 coding_writer (stream, 0, 0); | |
2008 str->eof = 0; | |
2009 } | |
2010 /* It's safe to free the runoff dynarrs now because they are used only | |
2011 during conversion. We need to keep the type-specific data around, | |
2012 though, because of canonicalize_after_coding. */ | |
2013 if (str->convert_to) | |
2014 { | |
2015 Dynarr_free (str->convert_to); | |
2016 str->convert_to = 0; | |
2017 } | |
2018 if (str->convert_from) | |
428 | 2019 { |
771 | 2020 Dynarr_free (str->convert_from); |
2021 str->convert_from = 0; | |
2022 } | |
2023 | |
800 | 2024 if (str->no_close_other) |
2025 return Lstream_flush (str->other_end); | |
2026 else | |
2027 return Lstream_close (str->other_end); | |
771 | 2028 } |
2029 | |
2030 static void | |
2031 coding_finalizer (Lstream *stream) | |
2032 { | |
2033 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
2034 | |
2035 assert (!str->finalized); | |
2036 MAYBE_XCODESYSMETH (str->codesys, finalize_coding_stream, (str)); | |
2037 if (str->data) | |
2038 { | |
4976
16112448d484
Rename xfree(FOO, TYPE) -> xfree(FOO)
Ben Wing <ben@xemacs.org>
parents:
4967
diff
changeset
|
2039 xfree (str->data); |
771 | 2040 str->data = 0; |
2041 } | |
2042 str->finalized = 1; | |
2043 } | |
2044 | |
2045 static Lisp_Object | |
2046 coding_stream_canonicalize_after_coding (Lstream *stream) | |
2047 { | |
2048 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
2049 | |
2050 return XCODESYSMETH_OR_GIVEN (str->codesys, canonicalize_after_coding, | |
2051 (str), str->codesys); | |
2052 } | |
2053 | |
2054 Lisp_Object | |
2055 coding_stream_detected_coding_system (Lstream *stream) | |
2056 { | |
2057 Lisp_Object codesys = | |
2058 coding_stream_canonicalize_after_coding (stream); | |
2059 if (NILP (codesys)) | |
2060 return Fget_coding_system (Qidentity); | |
2061 return codesys; | |
2062 } | |
2063 | |
2064 Lisp_Object | |
2065 coding_stream_coding_system (Lstream *stream) | |
2066 { | |
2067 return CODING_STREAM_DATA (stream)->codesys; | |
2068 } | |
2069 | |
2070 /* Change the coding system associated with a stream. */ | |
2071 | |
2072 void | |
2073 set_coding_stream_coding_system (Lstream *lstr, Lisp_Object codesys) | |
2074 { | |
2075 struct coding_stream *str = CODING_STREAM_DATA (lstr); | |
2076 if (EQ (str->orig_codesys, codesys)) | |
2077 return; | |
2078 /* We do the equivalent of closing the stream, destroying it, and | |
2079 reinitializing it. This includes flushing out the data and signalling | |
2080 EOF, if we're a writing stream; we also replace the type-specific data | |
2081 with the data appropriate for the new coding system. */ | |
2082 if (!NILP (str->codesys)) | |
2083 { | |
2084 if (lstr->flags & LSTREAM_FL_WRITE) | |
2085 { | |
2086 Lstream_flush (lstr); | |
2087 str->eof = 1; | |
2088 coding_writer (lstr, 0, 0); | |
2089 str->eof = 0; | |
2090 } | |
2091 MAYBE_XCODESYSMETH (str->codesys, finalize_coding_stream, (str)); | |
2092 } | |
2093 str->orig_codesys = codesys; | |
2094 str->codesys = coding_system_real_canonical (codesys); | |
2095 | |
2096 if (str->data) | |
2097 { | |
4976
16112448d484
Rename xfree(FOO, TYPE) -> xfree(FOO)
Ben Wing <ben@xemacs.org>
parents:
4967
diff
changeset
|
2098 xfree (str->data); |
771 | 2099 str->data = 0; |
428 | 2100 } |
771 | 2101 if (XCODING_SYSTEM_METHODS (str->codesys)->coding_data_size) |
1204 | 2102 { |
2103 str->data = | |
2104 xmalloc_and_zero (XCODING_SYSTEM_METHODS (str->codesys)-> | |
2105 coding_data_size); | |
2106 str->type = XCODING_SYSTEM_METHODS (str->codesys)->enumtype; | |
2107 } | |
771 | 2108 MAYBE_XCODESYSMETH (str->codesys, init_coding_stream, (str)); |
2109 /* The new coding system may have different ideas regarding whether its | |
2110 ends are characters or bytes. */ | |
2111 set_coding_character_mode (lstr); | |
2112 } | |
2113 | |
2114 /* WARNING WARNING WARNING WARNING!!!!! If you open up a coding | |
2115 stream for writing, no automatic code detection will be performed. | |
2116 The reason for this is that automatic code detection requires a | |
2117 seekable input. Things will also fail if you open a coding | |
2118 stream for reading using a non-fully-specified coding system and | |
2119 a non-seekable input stream. */ | |
2120 | |
2121 static Lisp_Object | |
2122 make_coding_stream_1 (Lstream *stream, Lisp_Object codesys, | |
800 | 2123 const char *mode, enum encode_decode direction, |
802 | 2124 int flags) |
771 | 2125 { |
2126 Lstream *lstr = Lstream_new (lstream_coding, mode); | |
2127 struct coding_stream *str = CODING_STREAM_DATA (lstr); | |
2128 | |
2129 codesys = Fget_coding_system (codesys); | |
2130 xzero (*str); | |
2131 str->codesys = Qnil; | |
2132 str->orig_codesys = Qnil; | |
2133 str->us = lstr; | |
2134 str->other_end = stream; | |
2135 str->convert_to = Dynarr_new (unsigned_char); | |
2136 str->convert_from = Dynarr_new (unsigned_char); | |
2137 str->direction = direction; | |
814 | 2138 if (flags & LSTREAM_FL_NO_CLOSE_OTHER) |
802 | 2139 str->no_close_other = 1; |
814 | 2140 if (flags & LSTREAM_FL_READ_ONE_BYTE_AT_A_TIME) |
802 | 2141 str->one_byte_at_a_time = 1; |
814 | 2142 if (!(flags & LSTREAM_FL_NO_INIT_CHAR_MODE_WHEN_READING)) |
2143 str->set_char_mode_on_us_when_reading = 1; | |
802 | 2144 |
771 | 2145 set_coding_stream_coding_system (lstr, codesys); |
793 | 2146 return wrap_lstream (lstr); |
771 | 2147 } |
2148 | |
814 | 2149 /* FLAGS: |
2150 | |
2151 LSTREAM_FL_NO_CLOSE_OTHER | |
2152 Don't close STREAM (the stream at the other end) when this stream is | |
2153 closed. | |
2154 | |
2155 LSTREAM_FL_READ_ONE_BYTE_AT_A_TIME | |
2156 When reading from STREAM, read and process one byte at a time rather | |
2157 than in large chunks. This is for reading from TTY's, so we don't | |
2158 block. #### We should instead create a non-blocking filedesc stream | |
2159 that emulates the behavior as necessary using select(), when the | |
2160 fcntls don't work. (As seems to be the case on Cygwin.) | |
2161 | |
2162 LSTREAM_FL_NO_INIT_CHAR_MODE_WHEN_READING | |
2163 When reading from STREAM, read and process one byte at a time rather | |
2164 than in large chunks. This is for reading from TTY's, so we don't | |
2165 block. #### We should instead create a non-blocking filedesc stream | |
2166 that emulates the behavior as necessary using select(), when the | |
2167 fcntls don't work. (As seems to be the case on Cygwin.) | |
2168 */ | |
771 | 2169 Lisp_Object |
2170 make_coding_input_stream (Lstream *stream, Lisp_Object codesys, | |
802 | 2171 enum encode_decode direction, int flags) |
771 | 2172 { |
800 | 2173 return make_coding_stream_1 (stream, codesys, "r", direction, |
802 | 2174 flags); |
771 | 2175 } |
2176 | |
814 | 2177 /* FLAGS: |
2178 | |
2179 LSTREAM_FL_NO_CLOSE_OTHER | |
2180 Don't close STREAM (the stream at the other end) when this stream is | |
2181 closed. | |
2182 */ | |
771 | 2183 Lisp_Object |
2184 make_coding_output_stream (Lstream *stream, Lisp_Object codesys, | |
802 | 2185 enum encode_decode direction, int flags) |
771 | 2186 { |
800 | 2187 return make_coding_stream_1 (stream, codesys, "w", direction, |
802 | 2188 flags); |
771 | 2189 } |
2190 | |
2191 static Lisp_Object | |
2192 encode_decode_coding_region (Lisp_Object start, Lisp_Object end, | |
2193 Lisp_Object coding_system, Lisp_Object buffer, | |
2194 enum encode_decode direction) | |
2195 { | |
2196 Charbpos b, e; | |
2197 struct buffer *buf = decode_buffer (buffer, 0); | |
2198 Lisp_Object instream = Qnil, to_outstream = Qnil, outstream = Qnil; | |
2199 Lisp_Object from_outstream = Qnil, auto_outstream = Qnil; | |
2200 Lisp_Object lb_outstream = Qnil; | |
2201 Lisp_Object next; | |
2202 Lstream *istr, *ostr; | |
2203 struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5; | |
2204 struct gcpro ngcpro1; | |
2205 int source_char, sink_char; | |
2206 | |
2207 get_buffer_range_char (buf, start, end, &b, &e, 0); | |
2208 barf_if_buffer_read_only (buf, b, e); | |
2209 | |
2210 GCPRO5 (instream, to_outstream, outstream, from_outstream, lb_outstream); | |
2211 NGCPRO1 (auto_outstream); | |
2212 | |
2213 coding_system = Fget_coding_system (coding_system); | |
2214 source_char = encode_decode_source_sink_type_is_char (coding_system, | |
2215 CODING_SOURCE, | |
2216 direction); | |
2217 sink_char = encode_decode_source_sink_type_is_char (coding_system, | |
2218 CODING_SINK, | |
2219 direction); | |
2220 | |
2221 /* Order is IN <---> [TO] -> OUT -> [FROM] -> [AUTODETECT-EOL] -> LB */ | |
2222 instream = make_lisp_buffer_input_stream (buf, b, e, 0); | |
2223 next = lb_outstream = make_lisp_buffer_output_stream (buf, b, 0); | |
2224 | |
2225 if (direction == CODING_DECODE && | |
2226 XCODING_SYSTEM_EOL_TYPE (coding_system) == EOL_AUTODETECT) | |
2227 next = auto_outstream = | |
2228 make_coding_output_stream | |
800 | 2229 (XLSTREAM (next), Fget_coding_system (Qconvert_eol_autodetect), |
2230 CODING_DECODE, 0); | |
771 | 2231 |
2232 if (!sink_char) | |
2233 next = from_outstream = | |
800 | 2234 make_coding_output_stream (XLSTREAM (next), Qbinary, CODING_DECODE, 0); |
771 | 2235 outstream = make_coding_output_stream (XLSTREAM (next), coding_system, |
800 | 2236 direction, 0); |
771 | 2237 if (!source_char) |
428 | 2238 { |
771 | 2239 to_outstream = |
2240 make_coding_output_stream (XLSTREAM (outstream), | |
800 | 2241 Qbinary, CODING_ENCODE, 0); |
771 | 2242 ostr = XLSTREAM (to_outstream); |
2243 } | |
2244 else | |
2245 ostr = XLSTREAM (outstream); | |
2246 istr = XLSTREAM (instream); | |
2247 | |
2248 /* The chain of streams looks like this: | |
2249 | |
2297 | 2250 [BUFFER] <----- (( read from/send to loop )) |
771 | 2251 ------> [CHAR->BYTE i.e. ENCODE AS BINARY if source is |
2252 in bytes] | |
2253 ------> [ENCODE/DECODE AS SPECIFIED] | |
2254 ------> [BYTE->CHAR i.e. DECODE AS BINARY | |
2255 if sink is in bytes] | |
2256 ------> [AUTODETECT EOL if | |
2257 we're decoding and | |
2258 coding system calls | |
2259 for this] | |
2260 ------> [BUFFER] | |
2261 */ | |
2367 | 2262 |
2263 /* #### See comment | |
2264 | |
2265 EFFICIENCY OF CODING CONVERSION WITH MULTIPLE COPIES/CHAINS | |
2266 | |
2267 in text.c. | |
2268 */ | |
2269 | |
771 | 2270 while (1) |
2271 { | |
2272 char tempbuf[1024]; /* some random amount */ | |
2273 Charbpos newpos, even_newer_pos; | |
2274 Charbpos oldpos = lisp_buffer_stream_startpos (istr); | |
2275 Bytecount size_in_bytes = | |
2276 Lstream_read (istr, tempbuf, sizeof (tempbuf)); | |
2277 | |
2278 if (!size_in_bytes) | |
2279 break; | |
2280 newpos = lisp_buffer_stream_startpos (istr); | |
2281 Lstream_write (ostr, tempbuf, size_in_bytes); | |
2282 even_newer_pos = lisp_buffer_stream_startpos (istr); | |
2283 buffer_delete_range (buf, even_newer_pos - (newpos - oldpos), | |
2284 even_newer_pos, 0); | |
428 | 2285 } |
771 | 2286 |
2287 { | |
2288 Charcount retlen = | |
2289 lisp_buffer_stream_startpos (XLSTREAM (instream)) - b; | |
2290 Lstream_close (istr); | |
2291 Lstream_close (ostr); | |
2292 NUNGCPRO; | |
2293 UNGCPRO; | |
2294 Lstream_delete (istr); | |
2295 if (!NILP (from_outstream)) | |
2296 Lstream_delete (XLSTREAM (from_outstream)); | |
2297 Lstream_delete (XLSTREAM (outstream)); | |
2298 if (!NILP (to_outstream)) | |
2299 Lstream_delete (XLSTREAM (to_outstream)); | |
2300 if (!NILP (auto_outstream)) | |
2301 Lstream_delete (XLSTREAM (auto_outstream)); | |
2302 Lstream_delete (XLSTREAM (lb_outstream)); | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5474
diff
changeset
|
2303 return make_fixnum (retlen); |
771 | 2304 } |
2305 } | |
2306 | |
3302 | 2307 DEFUN ("decode-coding-region", Fdecode_coding_region, 3, 4, |
2308 "*r\nzDecode from coding system: \ni", /* | |
771 | 2309 Decode the text between START and END which is encoded in CODING-SYSTEM. |
2310 This is useful if you've read in encoded text from a file without decoding | |
2311 it (e.g. you read in a JIS-formatted file but used the `binary' or | |
2312 `no-conversion' coding system, so that it shows up as "^[$B!<!+^[(B"). | |
2313 Return length of decoded text. | |
3302 | 2314 BUFFER defaults to the current buffer if unspecified, and when interactive. |
771 | 2315 */ |
2316 (start, end, coding_system, buffer)) | |
2317 { | |
2318 return encode_decode_coding_region (start, end, coding_system, buffer, | |
2319 CODING_DECODE); | |
2320 } | |
2321 | |
3302 | 2322 DEFUN ("encode-coding-region", Fencode_coding_region, 3, 4, |
2323 "*r\nzEncode to coding system: \ni", /* | |
771 | 2324 Encode the text between START and END using CODING-SYSTEM. |
2325 This will, for example, convert Japanese characters into stuff such as | |
3302 | 2326 "^[$B!<!+^[(B" if you use the JIS encoding. Return length of encoded text. |
2327 BUFFER defaults to the current buffer if unspecified, and when interactive. | |
771 | 2328 */ |
2329 (start, end, coding_system, buffer)) | |
2330 { | |
2331 return encode_decode_coding_region (start, end, coding_system, buffer, | |
2332 CODING_ENCODE); | |
428 | 2333 } |
2334 | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2335 DEFUN ("query-coding-region", Fquery_coding_region, 3, 7, 0, /* |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2336 Work out whether CODING-SYSTEM can losslessly encode a region. |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2337 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2338 START and END are the beginning and end of the region to check. |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2339 CODING-SYSTEM is the coding system to try. |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2340 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2341 Optional argument BUFFER is the buffer to check, and defaults to the current |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2342 buffer. |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2343 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2344 IGNORE-INVALID-SEQUENCESP, also an optional argument, says to treat XEmacs |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2345 characters which have an unambiguous encoded representation, despite being |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2346 undefined in what they represent, as encodable. These chiefly arise with |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2347 variable-length encodings like UTF-8 and UTF-16, where an invalid sequence |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2348 is passed through to XEmacs as a sequence of characters with a defined |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2349 correspondence to the octets on disk, but no non-error semantics; see the |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2350 `invalid-sequence-coding-system' argument to `set-language-info'. |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2351 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2352 They can also arise with fixed-length encodings like ISO 8859-7, where |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2353 certain octets on disk have undefined values, and treating them as |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2354 corresponding to the ISO 8859-1 characters with the same numerical values |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2355 may lead to data that is not understood by other applications. |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2356 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2357 Optional argument ERRORP says to signal a `text-conversion-error' if some |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2358 character in the region cannot be encoded, and defaults to nil. |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2359 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2360 Optional argument HIGHLIGHT says to display unencodable characters in the |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2361 region using `query-coding-warning-face'. It defaults to nil. |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2362 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2363 This function can return multiple values; the intention is that callers use |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2364 `multiple-value-bind' or the related CL multiple value functions to deal |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2365 with it. The first result is `t' if the region can be encoded using |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2366 CODING-SYSTEM, or `nil' if not. If the region cannot be encoded using |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2367 CODING-SYSTEM, the second result is a range table describing the positions |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2368 of the unencodable characters. |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2369 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2370 Ranges that describe characters that would be ignored were |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2371 IGNORE-INVALID-SEQUENCESP non-nil map to the symbol `invalid-sequence'; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2372 other ranges map to the symbol `unencodable'. If IGNORE-INVALID-SEQUENCESP |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2373 is non-nil, all ranges will map to the symbol `unencodable'. See |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2374 `make-range-table' for more details of range tables. |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2375 */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2376 (start, end, coding_system, buffer, ignore_invalid_sequencesp, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2377 errorp, highlight)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2378 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2379 Charbpos b, e; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2380 struct buffer *buf = decode_buffer (buffer, 1); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2381 Lisp_Object result; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2382 int flags = 0, speccount = specpdl_depth (); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2383 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2384 coding_system = Fget_coding_system (coding_system); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2385 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2386 get_buffer_range_char (buf, start, end, &b, &e, 0); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2387 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2388 if (buf != current_buffer) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2389 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2390 record_unwind_protect (save_current_buffer_restore, Fcurrent_buffer ()); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2391 set_buffer_internal (buf); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2392 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2393 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2394 record_unwind_protect (save_excursion_restore, save_excursion_save ()); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2395 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2396 BUF_SET_PT (buf, b); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2397 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2398 if (!NILP (ignore_invalid_sequencesp)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2399 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2400 flags |= QUERY_METHOD_IGNORE_INVALID_SEQUENCES; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2401 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2402 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2403 if (!NILP (errorp)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2404 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2405 flags |= QUERY_METHOD_ERRORP; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2406 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2407 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2408 if (!NILP (highlight)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2409 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2410 flags |= QUERY_METHOD_HIGHLIGHT; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2411 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2412 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2413 result = XCODESYSMETH_OR_GIVEN (coding_system, query, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2414 (coding_system, buf, e, flags), Qunbound); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2415 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2416 if (UNBOUNDP (result)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2417 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2418 signal_error (Qtext_conversion_error, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2419 "Coding system doesn't say what it can encode", |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2420 XCODING_SYSTEM_NAME (coding_system)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2421 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2422 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2423 result = (NILP (result)) ? Qt : values2 (Qnil, result); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2424 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2425 return unbind_to_1 (speccount, result); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2426 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2427 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2428 |
428 | 2429 |
2430 /************************************************************************/ | |
771 | 2431 /* Chain methods */ |
428 | 2432 /************************************************************************/ |
2433 | |
771 | 2434 /* #### Need a way to create "opposite-direction" coding systems. */ |
2435 | |
2436 /* Chain two or more coding systems together to make a combination coding | |
2437 system. */ | |
2438 | |
2439 struct chain_coding_system | |
2440 { | |
2441 /* List of coding systems, in decode order */ | |
2442 Lisp_Object *chain; | |
2443 /* Number of coding systems in list */ | |
2444 int count; | |
2445 /* Coding system to return as a result of canonicalize-after-coding */ | |
2446 Lisp_Object canonicalize_after_coding; | |
2447 }; | |
2448 | |
2449 struct chain_coding_stream | |
2450 { | |
2451 int initted; | |
2452 /* Lstreams for chain coding system */ | |
2453 Lisp_Object *lstreams; | |
2454 int lstream_count; | |
2455 }; | |
2456 | |
1204 | 2457 static const struct memory_description chain_coding_system_description[] = { |
2458 { XD_INT, offsetof (struct chain_coding_system, count) }, | |
2367 | 2459 { XD_BLOCK_PTR, offsetof (struct chain_coding_system, chain), |
2551 | 2460 XD_INDIRECT (0, 0), { &lisp_object_description } }, |
1204 | 2461 { XD_LISP_OBJECT, offsetof (struct chain_coding_system, |
2462 canonicalize_after_coding) }, | |
771 | 2463 { XD_END } |
2464 }; | |
2465 | |
1204 | 2466 static const struct memory_description chain_coding_stream_description_1 [] = { |
2467 { XD_INT, offsetof (struct chain_coding_stream, lstream_count) }, | |
2367 | 2468 { XD_BLOCK_PTR, offsetof (struct chain_coding_stream, lstreams), |
2551 | 2469 XD_INDIRECT (0, 0), { &lisp_object_description } }, |
771 | 2470 { XD_END } |
2471 }; | |
2472 | |
1204 | 2473 const struct sized_memory_description chain_coding_stream_description = { |
2474 sizeof (struct chain_coding_stream), chain_coding_stream_description_1 | |
2475 }; | |
2476 | |
2477 DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (chain); | |
2478 | |
771 | 2479 static Lisp_Object |
2480 chain_canonicalize (Lisp_Object codesys) | |
2481 { | |
2482 /* We make use of the fact that this method is called at init time, after | |
2483 properties have been parsed. init_method is called too early. */ | |
2484 /* #### It's not clear we need this whole chain-canonicalize mechanism | |
2485 any more. */ | |
2486 Lisp_Object chain = Flist (XCODING_SYSTEM_CHAIN_COUNT (codesys), | |
2487 XCODING_SYSTEM_CHAIN_CHAIN (codesys)); | |
2488 chain = Fcons (XCODING_SYSTEM_PRE_WRITE_CONVERSION (codesys), | |
2489 Fcons (XCODING_SYSTEM_POST_READ_CONVERSION (codesys), | |
2490 chain)); | |
2491 Fputhash (chain, codesys, Vchain_canonicalize_hash_table); | |
2492 return codesys; | |
2493 } | |
2494 | |
2495 static Lisp_Object | |
2496 chain_canonicalize_after_coding (struct coding_stream *str) | |
2497 { | |
2498 Lisp_Object cac = | |
2499 XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (str->codesys); | |
2500 if (!NILP (cac)) | |
2501 return cac; | |
2502 return str->codesys; | |
2503 #if 0 | |
2504 struct chain_coding_stream *data = CODING_STREAM_TYPE_DATA (str, chain); | |
2505 Lisp_Object us = str->codesys, codesys; | |
2506 int i; | |
2507 Lisp_Object chain; | |
2508 Lisp_Object tail; | |
2509 int changed = 0; | |
2510 | |
2511 /* #### It's not clear we need this whole chain-canonicalize mechanism | |
2512 any more. */ | |
2513 if (str->direction == CODING_ENCODE || !data->initted) | |
2514 return us; | |
2515 | |
2516 chain = Flist (XCODING_SYSTEM_CHAIN_COUNT (us), | |
2517 XCODING_SYSTEM_CHAIN_CHAIN (us)); | |
2518 | |
2519 tail = chain; | |
2520 for (i = 0; i < XCODING_SYSTEM_CHAIN_COUNT (us); i++) | |
2521 { | |
2522 codesys = (coding_stream_canonicalize_after_coding | |
2523 (XLSTREAM (data->lstreams[i]))); | |
2524 if (!EQ (codesys, XCAR (tail))) | |
2525 changed = 1; | |
2526 XCAR (tail) = codesys; | |
2527 tail = XCDR (tail); | |
2528 } | |
2529 | |
2530 if (!changed) | |
2531 return us; | |
2532 | |
2533 chain = delq_no_quit (Qnil, chain); | |
2534 | |
2535 if (NILP (XCODING_SYSTEM_PRE_WRITE_CONVERSION (us)) && | |
2536 NILP (XCODING_SYSTEM_POST_READ_CONVERSION (us))) | |
2537 { | |
2538 if (NILP (chain)) | |
2539 return Qnil; | |
2540 if (NILP (XCDR (chain))) | |
2541 return XCAR (chain); | |
2542 } | |
2543 | |
2544 codesys = Fgethash (Fcons (XCODING_SYSTEM_PRE_WRITE_CONVERSION (us), | |
2545 Fcons (XCODING_SYSTEM_POST_READ_CONVERSION (us), | |
2546 chain)), Vchain_canonicalize_hash_table, | |
2547 Qnil); | |
2548 if (!NILP (codesys)) | |
2549 return codesys; | |
2550 return make_internal_coding_system | |
2551 (us, "internal-chain-canonicalizer-wrapper", | |
2552 Qchain, Qunbound, list2 (Qchain, chain)); | |
2553 #endif /* 0 */ | |
2554 } | |
2555 | |
2556 static void | |
2557 chain_init (Lisp_Object codesys) | |
2558 { | |
2559 XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (codesys) = Qnil; | |
2560 } | |
2561 | |
2562 static void | |
2563 chain_mark (Lisp_Object codesys) | |
2564 { | |
2565 int i; | |
2566 | |
2567 for (i = 0; i < XCODING_SYSTEM_CHAIN_COUNT (codesys); i++) | |
2568 mark_object (XCODING_SYSTEM_CHAIN_CHAIN (codesys)[i]); | |
2569 mark_object (XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (codesys)); | |
2570 } | |
2571 | |
2572 static void | |
2573 chain_mark_coding_stream_1 (struct chain_coding_stream *data) | |
2574 { | |
2575 int i; | |
2576 | |
2577 for (i = 0; i < data->lstream_count; i++) | |
2578 mark_object (data->lstreams[i]); | |
2579 } | |
2580 | |
2581 static void | |
2582 chain_mark_coding_stream (struct coding_stream *str) | |
2583 { | |
2584 chain_mark_coding_stream_1 (CODING_STREAM_TYPE_DATA (str, chain)); | |
2585 } | |
2586 | |
2587 static void | |
2588 chain_print (Lisp_Object cs, Lisp_Object printcharfun, int escapeflag) | |
2589 { | |
2590 int i; | |
2591 | |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
2592 write_ascstring (printcharfun, "("); |
771 | 2593 for (i = 0; i < XCODING_SYSTEM_CHAIN_COUNT (cs); i++) |
2594 { | |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
2595 write_ascstring (printcharfun, i == 0 ? "" : "->"); |
771 | 2596 print_coding_system_in_print_method (XCODING_SYSTEM_CHAIN_CHAIN (cs)[i], |
2597 printcharfun, escapeflag); | |
2598 } | |
2599 { | |
2600 Lisp_Object cac = XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (cs); | |
2601 if (!NILP (cac)) | |
2602 { | |
2603 if (i > 0) | |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
2604 write_ascstring (printcharfun, " "); |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
2605 write_ascstring (printcharfun, "canonicalize-after-coding="); |
771 | 2606 print_coding_system_in_print_method (cac, printcharfun, escapeflag); |
2607 } | |
2608 } | |
2609 | |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
2610 write_ascstring (printcharfun, ")"); |
771 | 2611 } |
2612 | |
2613 static void | |
2614 chain_rewind_coding_stream_1 (struct chain_coding_stream *data) | |
2615 { | |
2616 /* Each will rewind the next; there is always at least one stream (the | |
2617 dynarr stream at the end) if we're initted */ | |
2618 if (data->initted) | |
2619 Lstream_rewind (XLSTREAM (data->lstreams[0])); | |
2620 } | |
2621 | |
2622 static void | |
2623 chain_rewind_coding_stream (struct coding_stream *str) | |
2624 { | |
2625 chain_rewind_coding_stream_1 (CODING_STREAM_TYPE_DATA (str, chain)); | |
2626 } | |
2627 | |
2628 static void | |
2629 chain_init_coding_streams_1 (struct chain_coding_stream *data, | |
2630 unsigned_char_dynarr *dst, | |
2631 int ncodesys, Lisp_Object *codesys, | |
2632 enum encode_decode direction) | |
2633 { | |
2634 int i; | |
2635 Lisp_Object lstream_out; | |
2636 | |
2637 data->lstream_count = ncodesys + 1; | |
2638 data->lstreams = xnew_array (Lisp_Object, data->lstream_count); | |
2639 | |
2640 lstream_out = make_dynarr_output_stream (dst); | |
2641 Lstream_set_buffering (XLSTREAM (lstream_out), LSTREAM_UNBUFFERED, 0); | |
2642 data->lstreams[data->lstream_count - 1] = lstream_out; | |
2643 | |
2644 for (i = ncodesys - 1; i >= 0; i--) | |
2645 { | |
2646 data->lstreams[i] = | |
2647 make_coding_output_stream | |
2648 (XLSTREAM (lstream_out), | |
2649 codesys[direction == CODING_ENCODE ? ncodesys - (i + 1) : i], | |
800 | 2650 direction, 0); |
771 | 2651 lstream_out = data->lstreams[i]; |
2652 Lstream_set_buffering (XLSTREAM (lstream_out), LSTREAM_UNBUFFERED, | |
2653 0); | |
2654 } | |
2655 data->initted = 1; | |
2656 } | |
2657 | |
2658 static Bytecount | |
2659 chain_convert (struct coding_stream *str, const UExtbyte *src, | |
2660 unsigned_char_dynarr *dst, Bytecount n) | |
2661 { | |
2662 struct chain_coding_stream *data = CODING_STREAM_TYPE_DATA (str, chain); | |
2663 | |
2664 if (str->eof) | |
2665 { | |
2666 /* Each will close the next; there is always at least one stream (the | |
2667 dynarr stream at the end) if we're initted. We need to close now | |
2668 because more data may be generated. */ | |
2669 if (data->initted) | |
2670 Lstream_close (XLSTREAM (data->lstreams[0])); | |
2671 return n; | |
2672 } | |
2673 | |
2674 if (!data->initted) | |
2675 chain_init_coding_streams_1 | |
2676 (data, dst, XCODING_SYSTEM_CHAIN_COUNT (str->codesys), | |
2677 XCODING_SYSTEM_CHAIN_CHAIN (str->codesys), str->direction); | |
2678 | |
2679 if (Lstream_write (XLSTREAM (data->lstreams[0]), src, n) < 0) | |
2680 return -1; | |
2681 return n; | |
2682 } | |
2683 | |
2684 static void | |
2685 chain_finalize_coding_stream_1 (struct chain_coding_stream *data) | |
2686 { | |
2687 if (data->lstreams) | |
2688 { | |
2297 | 2689 /* During GC, these objects are unmarked, and are about to be freed. |
2690 We do NOT want them on the free list, and that will cause lots of | |
2691 nastiness including crashes. Just let them be freed normally. */ | |
771 | 2692 if (!gc_in_progress) |
2693 { | |
2694 int i; | |
2297 | 2695 /* Order of deletion is important here! Delete from the head of |
2696 the chain and work your way towards the tail. In general, | |
2697 when you delete an object, there should be *NO* pointers to it | |
2698 anywhere. Deleting back-to-front would be a problem because | |
2699 there are pointers going forward. If there were pointers in | |
2700 both directions, you'd have to disconnect the pointers to a | |
2701 particular object before deleting it. */ | |
771 | 2702 for (i = 0; i < data->lstream_count; i++) |
2703 Lstream_delete (XLSTREAM ((data->lstreams)[i])); | |
2704 } | |
4976
16112448d484
Rename xfree(FOO, TYPE) -> xfree(FOO)
Ben Wing <ben@xemacs.org>
parents:
4967
diff
changeset
|
2705 xfree (data->lstreams); |
5169
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5142
diff
changeset
|
2706 data->lstreams = 0; |
771 | 2707 } |
2708 } | |
2709 | |
2710 static void | |
2711 chain_finalize_coding_stream (struct coding_stream *str) | |
2712 { | |
2713 chain_finalize_coding_stream_1 (CODING_STREAM_TYPE_DATA (str, chain)); | |
2714 } | |
2715 | |
2716 static void | |
2717 chain_finalize (Lisp_Object c) | |
2718 { | |
2719 if (XCODING_SYSTEM_CHAIN_CHAIN (c)) | |
4976
16112448d484
Rename xfree(FOO, TYPE) -> xfree(FOO)
Ben Wing <ben@xemacs.org>
parents:
4967
diff
changeset
|
2720 xfree (XCODING_SYSTEM_CHAIN_CHAIN (c)); |
771 | 2721 } |
2722 | |
428 | 2723 static int |
771 | 2724 chain_putprop (Lisp_Object codesys, Lisp_Object key, Lisp_Object value) |
2725 { | |
2726 if (EQ (key, Qchain)) | |
2727 { | |
2728 Lisp_Object *cslist; | |
2729 int count = 0; | |
2730 int i; | |
2731 | |
2367 | 2732 { |
2733 EXTERNAL_LIST_LOOP_2 (elt, value) | |
2734 { | |
2735 Fget_coding_system (elt); | |
2736 count++; | |
2737 } | |
2738 } | |
771 | 2739 |
2740 cslist = xnew_array (Lisp_Object, count); | |
2741 XCODING_SYSTEM_CHAIN_CHAIN (codesys) = cslist; | |
2742 | |
2743 count = 0; | |
2367 | 2744 { |
2745 EXTERNAL_LIST_LOOP_2 (elt, value) | |
2746 { | |
2747 cslist[count] = Fget_coding_system (elt); | |
2748 count++; | |
2749 } | |
2750 } | |
771 | 2751 |
2752 XCODING_SYSTEM_CHAIN_COUNT (codesys) = count; | |
2753 | |
2754 for (i = 0; i < count - 1; i++) | |
2755 { | |
2756 if (decoding_source_sink_type_is_char (cslist[i], CODING_SINK) != | |
2757 decoding_source_sink_type_is_char (cslist[i + 1], CODING_SOURCE)) | |
2758 invalid_argument_2 ("Sink of first must match source of second", | |
2759 cslist[i], cslist[i + 1]); | |
2760 } | |
2761 } | |
2762 else if (EQ (key, Qcanonicalize_after_coding)) | |
2763 XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (codesys) = | |
2764 Fget_coding_system (value); | |
2765 else | |
2766 return 0; | |
2767 return 1; | |
2768 } | |
2769 | |
2770 static Lisp_Object | |
2771 chain_getprop (Lisp_Object coding_system, Lisp_Object prop) | |
2772 { | |
2773 if (EQ (prop, Qchain)) | |
2774 { | |
2775 Lisp_Object result = Qnil; | |
2776 int i; | |
2777 | |
2778 for (i = 0; i < XCODING_SYSTEM_CHAIN_COUNT (coding_system); i++) | |
2779 result = Fcons (XCODING_SYSTEM_CHAIN_CHAIN (coding_system)[i], | |
2780 result); | |
2781 | |
2782 return Fnreverse (result); | |
2783 } | |
2784 else if (EQ (prop, Qcanonicalize_after_coding)) | |
2785 return XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (coding_system); | |
2786 else | |
2787 return Qunbound; | |
2788 } | |
2789 | |
2790 static enum source_sink_type | |
2791 chain_conversion_end_type (Lisp_Object codesys) | |
2792 { | |
2793 Lisp_Object *cslist = XCODING_SYSTEM_CHAIN_CHAIN (codesys); | |
2794 int n = XCODING_SYSTEM_CHAIN_COUNT (codesys); | |
2795 int charp_source, charp_sink; | |
2796 | |
2797 if (n == 0) | |
2798 return DECODES_BYTE_TO_BYTE; /* arbitrary */ | |
2799 charp_source = decoding_source_sink_type_is_char (cslist[0], CODING_SOURCE); | |
2800 charp_sink = decoding_source_sink_type_is_char (cslist[n - 1], CODING_SINK); | |
2801 | |
2802 switch (charp_source * 2 + charp_sink) | |
2803 { | |
2804 case 0: return DECODES_BYTE_TO_BYTE; | |
2805 case 1: return DECODES_BYTE_TO_CHARACTER; | |
2806 case 2: return DECODES_CHARACTER_TO_BYTE; | |
2807 case 3: return DECODES_CHARACTER_TO_CHARACTER; | |
2808 } | |
2809 | |
2500 | 2810 ABORT (); |
771 | 2811 return DECODES_BYTE_TO_BYTE; |
2812 } | |
2813 | |
2814 | |
2815 /************************************************************************/ | |
2816 /* No-conversion methods */ | |
2817 /************************************************************************/ | |
2818 | |
2819 /* "No conversion"; used for binary files. We use quotes because there | |
2820 really is some conversion being applied (it does byte<->char | |
2821 conversion), but it appears to the user as if the text is read in | |
2297 | 2822 without conversion. |
2823 | |
2824 #### Shouldn't we _call_ it that, then? And while we're at it, | |
2825 separate it into "to_internal" and "to_external"? */ | |
771 | 2826 DEFINE_CODING_SYSTEM_TYPE (no_conversion); |
2827 | |
2828 /* This is used when reading in "binary" files -- i.e. files that may | |
2829 contain all 256 possible byte values and that are not to be | |
2830 interpreted as being in any particular encoding. */ | |
2831 static Bytecount | |
2832 no_conversion_convert (struct coding_stream *str, | |
2833 const UExtbyte *src, | |
2834 unsigned_char_dynarr *dst, Bytecount n) | |
2835 { | |
2836 UExtbyte c; | |
2837 unsigned int ch = str->ch; | |
2838 Bytecount orign = n; | |
2839 | |
2840 if (str->direction == CODING_DECODE) | |
2841 { | |
2842 while (n--) | |
2843 { | |
2844 c = *src++; | |
2845 | |
2846 DECODE_ADD_BINARY_CHAR (c, dst); | |
2847 } | |
2848 | |
2849 if (str->eof) | |
2850 DECODE_OUTPUT_PARTIAL_CHAR (ch, dst); | |
2851 } | |
2852 else | |
2853 { | |
2854 | |
2855 while (n--) | |
2856 { | |
2857 c = *src++; | |
826 | 2858 if (byte_ascii_p (c)) |
771 | 2859 { |
2860 assert (ch == 0); | |
2861 Dynarr_add (dst, c); | |
2862 } | |
2863 #ifdef MULE | |
867 | 2864 else if (ibyte_leading_byte_p (c)) |
771 | 2865 { |
2866 assert (ch == 0); | |
2867 if (c == LEADING_BYTE_LATIN_ISO8859_1 || | |
2868 c == LEADING_BYTE_CONTROL_1) | |
2869 ch = c; | |
2870 else | |
2297 | 2871 /* #### This is just plain unacceptable. */ |
771 | 2872 Dynarr_add (dst, '~'); /* untranslatable character */ |
2873 } | |
2874 else | |
2875 { | |
2876 if (ch == LEADING_BYTE_LATIN_ISO8859_1) | |
2877 Dynarr_add (dst, c); | |
2878 else if (ch == LEADING_BYTE_CONTROL_1) | |
2879 { | |
2880 assert (c < 0xC0); | |
2881 Dynarr_add (dst, c - 0x20); | |
2882 } | |
2883 /* else it should be the second or third byte of an | |
2884 untranslatable character, so ignore it */ | |
2885 ch = 0; | |
2886 } | |
2887 #endif /* MULE */ | |
2888 | |
2889 } | |
2890 } | |
2891 | |
2892 str->ch = ch; | |
2893 return orign; | |
2894 } | |
2895 | |
2896 DEFINE_DETECTOR (no_conversion); | |
2897 DEFINE_DETECTOR_CATEGORY (no_conversion, no_conversion); | |
2898 | |
2899 struct no_conversion_detector | |
2900 { | |
2901 int dummy; | |
2902 }; | |
2903 | |
2904 static void | |
2286 | 2905 no_conversion_detect (struct detection_state *st, const UExtbyte *UNUSED (src), |
2906 Bytecount UNUSED (n)) | |
771 | 2907 { |
2908 /* Hack until we get better handling of this stuff! */ | |
2909 DET_RESULT (st, no_conversion) = DET_SLIGHTLY_LIKELY; | |
2910 } | |
2911 | |
2912 | |
2913 /************************************************************************/ | |
2914 /* Convert-eol methods */ | |
2915 /************************************************************************/ | |
2916 | |
2917 /* This is used to handle end-of-line (EOL) differences. It is | |
2819 | 2918 character-to-character, and works (when encoding) *BEFORE* sending data to |
2919 the main encoding routine -- thus, that routine must handle different EOL | |
2920 types itself if it does line-oriented type processing. This is unavoidable | |
2921 because we don't know whether the output of the main encoding routine is | |
2922 ASCII compatible (UTF-16 is definitely not, for example). [[ sjt sez this | |
2923 is bogus. There should be _no_ EOL processing (or processing of any kind) | |
2924 after conversion to external. ]] | |
771 | 2925 |
793 | 2926 There is one parameter: `subtype', either `cr', `lf', `crlf', or nil. |
771 | 2927 */ |
2928 | |
2929 struct convert_eol_coding_system | |
2930 { | |
2931 enum eol_type subtype; | |
2132 | 2932 int dummy; /* On some architectures (eg ia64) the portable dumper can |
2933 produce unaligned access errors without this field. Probably | |
2934 because the combined structure of this structure and | |
2935 Lisp_Coding_System is not properly aligned. */ | |
771 | 2936 }; |
2937 | |
2938 #define CODING_SYSTEM_CONVERT_EOL_SUBTYPE(codesys) \ | |
2939 (CODING_SYSTEM_TYPE_DATA (codesys, convert_eol)->subtype) | |
2940 #define XCODING_SYSTEM_CONVERT_EOL_SUBTYPE(codesys) \ | |
2941 (XCODING_SYSTEM_TYPE_DATA (codesys, convert_eol)->subtype) | |
2942 | |
2943 struct convert_eol_coding_stream | |
2944 { | |
2945 enum eol_type actual; | |
2946 }; | |
2947 | |
1204 | 2948 static const struct memory_description |
771 | 2949 convert_eol_coding_system_description[] = { |
2950 { XD_END } | |
2951 }; | |
2952 | |
1204 | 2953 DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (convert_eol); |
2954 | |
771 | 2955 static void |
2286 | 2956 convert_eol_print (Lisp_Object cs, Lisp_Object printcharfun, |
2957 int UNUSED (escapeflag)) | |
771 | 2958 { |
2959 struct convert_eol_coding_system *data = | |
2960 XCODING_SYSTEM_TYPE_DATA (cs, convert_eol); | |
2961 | |
2962 write_fmt_string (printcharfun, "(%s)", | |
2963 data->subtype == EOL_LF ? "lf" : | |
2964 data->subtype == EOL_CRLF ? "crlf" : | |
2965 data->subtype == EOL_CR ? "cr" : | |
793 | 2966 data->subtype == EOL_AUTODETECT ? "nil" : |
2500 | 2967 (ABORT(), "")); |
771 | 2968 } |
2969 | |
2970 static enum source_sink_type | |
2286 | 2971 convert_eol_conversion_end_type (Lisp_Object UNUSED (codesys)) |
771 | 2972 { |
2973 return DECODES_CHARACTER_TO_CHARACTER; | |
2974 } | |
2975 | |
2976 static int | |
2977 convert_eol_putprop (Lisp_Object codesys, | |
2978 Lisp_Object key, | |
2979 Lisp_Object value) | |
2980 { | |
2981 struct convert_eol_coding_system *data = | |
2982 XCODING_SYSTEM_TYPE_DATA (codesys, convert_eol); | |
2983 | |
2984 if (EQ (key, Qsubtype)) | |
2985 { | |
2986 if (EQ (value, Qlf) /* || EQ (value, Qunix) */) | |
2987 data->subtype = EOL_LF; | |
2988 else if (EQ (value, Qcrlf) /* || EQ (value, Qdos) */) | |
2989 data->subtype = EOL_CRLF; | |
2990 else if (EQ (value, Qcr) /* || EQ (value, Qmac) */) | |
2991 data->subtype = EOL_CR; | |
793 | 2992 else if (EQ (value, Qnil)) |
771 | 2993 data->subtype = EOL_AUTODETECT; |
2994 else invalid_constant ("Unrecognized eol type", value); | |
2995 } | |
2996 else | |
2997 return 0; | |
2998 return 1; | |
2999 } | |
3000 | |
3001 static Lisp_Object | |
3002 convert_eol_getprop (Lisp_Object coding_system, Lisp_Object prop) | |
3003 { | |
3004 struct convert_eol_coding_system *data = | |
3005 XCODING_SYSTEM_TYPE_DATA (coding_system, convert_eol); | |
3006 | |
3007 if (EQ (prop, Qsubtype)) | |
3008 { | |
3009 switch (data->subtype) | |
3010 { | |
3011 case EOL_LF: return Qlf; | |
3012 case EOL_CRLF: return Qcrlf; | |
3013 case EOL_CR: return Qcr; | |
793 | 3014 case EOL_AUTODETECT: return Qnil; |
2500 | 3015 default: ABORT (); |
771 | 3016 } |
3017 } | |
3018 | |
3019 return Qunbound; | |
3020 } | |
3021 | |
3022 static void | |
3023 convert_eol_init_coding_stream (struct coding_stream *str) | |
3024 { | |
3025 struct convert_eol_coding_stream *data = | |
3026 CODING_STREAM_TYPE_DATA (str, convert_eol); | |
3027 data->actual = XCODING_SYSTEM_CONVERT_EOL_SUBTYPE (str->codesys); | |
3028 } | |
3029 | |
3030 static Bytecount | |
867 | 3031 convert_eol_convert (struct coding_stream *str, const Ibyte *src, |
771 | 3032 unsigned_char_dynarr *dst, Bytecount n) |
3033 { | |
3034 if (str->direction == CODING_DECODE) | |
3035 { | |
3036 struct convert_eol_coding_stream *data = | |
3037 CODING_STREAM_TYPE_DATA (str, convert_eol); | |
3038 | |
3039 if (data->actual == EOL_AUTODETECT) | |
3040 { | |
3041 Bytecount n2 = n; | |
867 | 3042 const Ibyte *src2 = src; |
771 | 3043 |
3044 for (; n2; n2--) | |
3045 { | |
867 | 3046 Ibyte c = *src2++; |
771 | 3047 if (c == '\n') |
3048 { | |
3049 data->actual = EOL_LF; | |
3050 break; | |
3051 } | |
3052 else if (c == '\r') | |
3053 { | |
3054 if (n2 == 1) | |
3055 { | |
3056 /* If we're seeing a '\r' at the end of the data, then | |
3057 reject the '\r' right now so it doesn't become an | |
3058 issue in the code below -- unless we're at the end of | |
3059 the stream, in which case we can't do that (because | |
3060 then the '\r' will never get written out), and in any | |
3061 case we should be recognizing it at EOL_CR format. */ | |
3062 if (str->eof) | |
3063 data->actual = EOL_CR; | |
3064 else | |
3065 n--; | |
3066 break; | |
3067 } | |
3068 else if (*src2 == '\n') | |
3069 data->actual = EOL_CRLF; | |
3070 else | |
3071 data->actual = EOL_CR; | |
3072 break; | |
3073 } | |
3074 } | |
3075 } | |
3076 | |
3077 /* str->eof is set, the caller reached EOF on the other end and has | |
3078 no new data to give us. The only data we get is the data we | |
3079 rejected from last time. */ | |
3080 if (data->actual == EOL_LF || data->actual == EOL_AUTODETECT || | |
3081 (str->eof)) | |
3082 Dynarr_add_many (dst, src, n); | |
3083 else | |
3084 { | |
867 | 3085 const Ibyte *end = src + n; |
771 | 3086 while (1) |
3087 { | |
3088 /* Find the next section with no \r and add it. */ | |
867 | 3089 const Ibyte *runstart = src; |
3090 src = (Ibyte *) memchr (src, '\r', end - src); | |
771 | 3091 if (!src) |
3092 src = end; | |
3093 Dynarr_add_many (dst, runstart, src - runstart); | |
3094 /* Stop if at end ... */ | |
3095 if (src == end) | |
3096 break; | |
3097 /* ... else, translate as necessary. */ | |
3098 src++; | |
3099 if (data->actual == EOL_CR) | |
3100 Dynarr_add (dst, '\n'); | |
3101 /* We need to be careful here with CRLF. If we see a CR at the | |
3102 end of the data, we don't know if it's part of a CRLF, so we | |
3103 reject it. Otherwise: If it's part of a CRLF, eat it and | |
3104 loop; the following LF gets added next time around. If it's | |
3105 not part of a CRLF, add the CR and loop. The following | |
3106 character will be processed in the next loop iteration. This | |
3107 correctly handles a sequence like CR+CR+LF. */ | |
3108 else if (src == end) | |
3109 return n - 1; /* reject the CR at the end; we'll get it again | |
3110 next time the convert method is called */ | |
3111 else if (*src != '\n') | |
3112 Dynarr_add (dst, '\r'); | |
3113 } | |
3114 } | |
3115 | |
3116 return n; | |
3117 } | |
3118 else | |
3119 { | |
3120 enum eol_type subtype = | |
3121 XCODING_SYSTEM_CONVERT_EOL_SUBTYPE (str->codesys); | |
867 | 3122 const Ibyte *end = src + n; |
771 | 3123 |
3124 /* We try to be relatively efficient here. */ | |
3125 if (subtype == EOL_LF) | |
3126 Dynarr_add_many (dst, src, n); | |
3127 else | |
3128 { | |
3129 while (1) | |
3130 { | |
3131 /* Find the next section with no \n and add it. */ | |
867 | 3132 const Ibyte *runstart = src; |
3133 src = (Ibyte *) memchr (src, '\n', end - src); | |
771 | 3134 if (!src) |
3135 src = end; | |
3136 Dynarr_add_many (dst, runstart, src - runstart); | |
3137 /* Stop if at end ... */ | |
3138 if (src == end) | |
3139 break; | |
3140 /* ... else, skip over \n and add its translation. */ | |
3141 src++; | |
3142 Dynarr_add (dst, '\r'); | |
3143 if (subtype == EOL_CRLF) | |
3144 Dynarr_add (dst, '\n'); | |
3145 } | |
3146 } | |
3147 | |
3148 return n; | |
3149 } | |
3150 } | |
3151 | |
3152 static Lisp_Object | |
3153 convert_eol_canonicalize_after_coding (struct coding_stream *str) | |
3154 { | |
3155 struct convert_eol_coding_stream *data = | |
3156 CODING_STREAM_TYPE_DATA (str, convert_eol); | |
3157 | |
3158 if (str->direction == CODING_ENCODE) | |
3159 return str->codesys; | |
3160 | |
3161 switch (data->actual) | |
3162 { | |
3163 case EOL_LF: return Fget_coding_system (Qconvert_eol_lf); | |
3164 case EOL_CRLF: return Fget_coding_system (Qconvert_eol_crlf); | |
3165 case EOL_CR: return Fget_coding_system (Qconvert_eol_cr); | |
3166 case EOL_AUTODETECT: return str->codesys; | |
2500 | 3167 default: ABORT (); return Qnil; |
771 | 3168 } |
3169 } | |
3170 | |
3171 | |
3172 /************************************************************************/ | |
3173 /* Undecided methods */ | |
3174 /************************************************************************/ | |
3175 | |
3176 /* Do autodetection. We can autodetect the EOL type only, the coding | |
3177 system only, or both. We only do autodetection when decoding; when | |
3178 encoding, we just pass the data through. | |
3179 | |
3180 When doing just EOL detection, a coding system can be specified; if so, | |
3181 we will decode this data through the coding system before doing EOL | |
3182 detection. The reason for specifying this is so that | |
3183 canonicalize-after-coding works: We will canonicalize the specified | |
3184 coding system into the appropriate EOL type. When doing both coding and | |
3185 EOL detection, we do similar canonicalization, and also catch situations | |
3186 where the EOL type is overspecified, i.e. the detected coding system | |
3187 specifies an EOL type, and either switch to the equivalent | |
3188 non-EOL-processing coding system (if possible), or terminate EOL | |
3189 detection and use the specified EOL type. This prevents data from being | |
3190 EOL-processed twice. | |
3191 */ | |
3192 | |
3193 struct undecided_coding_system | |
3194 { | |
3195 int do_eol, do_coding; | |
3196 Lisp_Object cs; | |
3197 }; | |
3198 | |
3199 struct undecided_coding_stream | |
3200 { | |
3201 Lisp_Object actual; | |
3202 /* Either 2 or 3 lstreams here; see undecided_convert */ | |
3203 struct chain_coding_stream c; | |
3204 | |
3205 struct detection_state *st; | |
3206 }; | |
3207 | |
1204 | 3208 static const struct memory_description undecided_coding_system_description[] = { |
3209 { XD_LISP_OBJECT, offsetof (struct undecided_coding_system, cs) }, | |
771 | 3210 { XD_END } |
3211 }; | |
3212 | |
1204 | 3213 static const struct memory_description undecided_coding_stream_description_1 [] = { |
3214 { XD_LISP_OBJECT, offsetof (struct undecided_coding_stream, actual) }, | |
2367 | 3215 { XD_BLOCK_ARRAY, offsetof (struct undecided_coding_stream, c), |
2551 | 3216 1, { &chain_coding_stream_description } }, |
1204 | 3217 { XD_END } |
3218 }; | |
3219 | |
3220 const struct sized_memory_description undecided_coding_stream_description = { | |
3221 sizeof (struct undecided_coding_stream), undecided_coding_stream_description_1 | |
3222 }; | |
3223 | |
3224 DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (undecided); | |
3225 | |
771 | 3226 static void |
3227 undecided_init (Lisp_Object codesys) | |
3228 { | |
3229 struct undecided_coding_system *data = | |
3230 XCODING_SYSTEM_TYPE_DATA (codesys, undecided); | |
3231 | |
3232 data->cs = Qnil; | |
3233 } | |
3234 | |
3235 static void | |
3236 undecided_mark (Lisp_Object codesys) | |
3237 { | |
3238 struct undecided_coding_system *data = | |
3239 XCODING_SYSTEM_TYPE_DATA (codesys, undecided); | |
3240 | |
3241 mark_object (data->cs); | |
3242 } | |
3243 | |
3244 static void | |
3245 undecided_print (Lisp_Object cs, Lisp_Object printcharfun, int escapeflag) | |
3246 { | |
3247 struct undecided_coding_system *data = | |
3248 XCODING_SYSTEM_TYPE_DATA (cs, undecided); | |
3249 int need_space = 0; | |
3250 | |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
3251 write_ascstring (printcharfun, "("); |
771 | 3252 if (data->do_eol) |
3253 { | |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
3254 write_ascstring (printcharfun, "do-eol"); |
771 | 3255 need_space = 1; |
3256 } | |
3257 if (data->do_coding) | |
3258 { | |
3259 if (need_space) | |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
3260 write_ascstring (printcharfun, " "); |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
3261 write_ascstring (printcharfun, "do-coding"); |
771 | 3262 need_space = 1; |
3263 } | |
3264 if (!NILP (data->cs)) | |
3265 { | |
3266 if (need_space) | |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
3267 write_ascstring (printcharfun, " "); |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
3268 write_ascstring (printcharfun, "coding-system="); |
771 | 3269 print_coding_system_in_print_method (data->cs, printcharfun, escapeflag); |
3270 } | |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
3271 write_ascstring (printcharfun, ")"); |
771 | 3272 } |
3273 | |
3274 static void | |
3275 undecided_mark_coding_stream (struct coding_stream *str) | |
3276 { | |
1204 | 3277 mark_object (CODING_STREAM_TYPE_DATA (str, undecided)->actual); |
771 | 3278 chain_mark_coding_stream_1 (&CODING_STREAM_TYPE_DATA (str, undecided)->c); |
3279 } | |
3280 | |
3281 static int | |
3282 undecided_putprop (Lisp_Object codesys, Lisp_Object key, Lisp_Object value) | |
3283 { | |
3284 struct undecided_coding_system *data = | |
3285 XCODING_SYSTEM_TYPE_DATA (codesys, undecided); | |
3286 | |
3287 if (EQ (key, Qdo_eol)) | |
3288 data->do_eol = 1; | |
3289 else if (EQ (key, Qdo_coding)) | |
3290 data->do_coding = 1; | |
3291 else if (EQ (key, Qcoding_system)) | |
3292 data->cs = get_coding_system_for_text_file (value, 0); | |
3293 else | |
3294 return 0; | |
3295 return 1; | |
3296 } | |
3297 | |
3298 static Lisp_Object | |
3299 undecided_getprop (Lisp_Object codesys, Lisp_Object prop) | |
3300 { | |
3301 struct undecided_coding_system *data = | |
3302 XCODING_SYSTEM_TYPE_DATA (codesys, undecided); | |
3303 | |
3304 if (EQ (prop, Qdo_eol)) | |
3305 return data->do_eol ? Qt : Qnil; | |
3306 if (EQ (prop, Qdo_coding)) | |
3307 return data->do_coding ? Qt : Qnil; | |
3308 if (EQ (prop, Qcoding_system)) | |
3309 return data->cs; | |
3310 return Qunbound; | |
3311 } | |
3312 | |
3313 static struct detection_state * | |
3314 allocate_detection_state (void) | |
3315 { | |
3316 int i; | |
3317 Bytecount size = MAX_ALIGN_SIZE (sizeof (struct detection_state)); | |
3318 struct detection_state *block; | |
3319 | |
3320 for (i = 0; i < coding_detector_count; i++) | |
3321 size += MAX_ALIGN_SIZE (Dynarr_at (all_coding_detectors, i).data_size); | |
3322 | |
3323 block = (struct detection_state *) xmalloc_and_zero (size); | |
3324 | |
3325 size = MAX_ALIGN_SIZE (sizeof (struct detection_state)); | |
3326 for (i = 0; i < coding_detector_count; i++) | |
3327 { | |
3328 block->data_offset[i] = size; | |
3329 size += MAX_ALIGN_SIZE (Dynarr_at (all_coding_detectors, i).data_size); | |
3330 } | |
3331 | |
3332 return block; | |
3333 } | |
3334 | |
3335 static void | |
3336 free_detection_state (struct detection_state *st) | |
3337 { | |
3338 int i; | |
3339 | |
3340 for (i = 0; i < coding_detector_count; i++) | |
3341 { | |
3342 if (Dynarr_at (all_coding_detectors, i).finalize_detection_state_method) | |
3343 Dynarr_at (all_coding_detectors, i).finalize_detection_state_method | |
3344 (st); | |
3345 } | |
3346 | |
4976
16112448d484
Rename xfree(FOO, TYPE) -> xfree(FOO)
Ben Wing <ben@xemacs.org>
parents:
4967
diff
changeset
|
3347 xfree (st); |
771 | 3348 } |
3349 | |
3350 static int | |
3351 coding_category_symbol_to_id (Lisp_Object symbol) | |
428 | 3352 { |
3353 int i; | |
3354 | |
3355 CHECK_SYMBOL (symbol); | |
771 | 3356 for (i = 0; i < coding_detector_count; i++) |
3357 { | |
3358 detector_category_dynarr *cats = | |
3359 Dynarr_at (all_coding_detectors, i).cats; | |
3360 int j; | |
3361 | |
3362 for (j = 0; j < Dynarr_length (cats); j++) | |
3363 if (EQ (Dynarr_at (cats, j).sym, symbol)) | |
3364 return Dynarr_at (cats, j).id; | |
3365 } | |
3366 | |
563 | 3367 invalid_constant ("Unrecognized coding category", symbol); |
1204 | 3368 RETURN_NOT_REACHED (0); |
428 | 3369 } |
3370 | |
771 | 3371 static Lisp_Object |
3372 coding_category_id_to_symbol (int id) | |
428 | 3373 { |
3374 int i; | |
771 | 3375 |
3376 for (i = 0; i < coding_detector_count; i++) | |
3377 { | |
3378 detector_category_dynarr *cats = | |
3379 Dynarr_at (all_coding_detectors, i).cats; | |
3380 int j; | |
3381 | |
3382 for (j = 0; j < Dynarr_length (cats); j++) | |
3383 if (id == Dynarr_at (cats, j).id) | |
3384 return Dynarr_at (cats, j).sym; | |
3385 } | |
3386 | |
2500 | 3387 ABORT (); |
771 | 3388 return Qnil; /* (usually) not reached */ |
428 | 3389 } |
3390 | |
771 | 3391 static Lisp_Object |
3392 detection_result_number_to_symbol (enum detection_result result) | |
428 | 3393 { |
1494 | 3394 /* let compiler warn if not all enumerators are handled */ |
3395 switch (result) { | |
3396 #define FROB(sym, num) case num: return (sym) | |
771 | 3397 FROB (Qnear_certainty, DET_NEAR_CERTAINTY); |
3398 FROB (Qquite_probable, DET_QUITE_PROBABLE); | |
3399 FROB (Qsomewhat_likely, DET_SOMEWHAT_LIKELY); | |
1494 | 3400 FROB (Qslightly_likely, DET_SLIGHTLY_LIKELY); |
771 | 3401 FROB (Qas_likely_as_unlikely, DET_AS_LIKELY_AS_UNLIKELY); |
3402 FROB (Qsomewhat_unlikely, DET_SOMEWHAT_UNLIKELY); | |
3403 FROB (Qquite_improbable, DET_QUITE_IMPROBABLE); | |
3404 FROB (Qnearly_impossible, DET_NEARLY_IMPOSSIBLE); | |
3405 #undef FROB | |
1494 | 3406 } |
771 | 3407 |
2500 | 3408 ABORT (); |
771 | 3409 return Qnil; /* (usually) not reached */ |
3410 } | |
3411 | |
778 | 3412 #if 0 /* not used */ |
771 | 3413 static enum detection_result |
3414 detection_result_symbol_to_number (Lisp_Object symbol) | |
3415 { | |
1494 | 3416 /* using switch here would be bad style, and doesn't help */ |
771 | 3417 #define FROB(sym, num) if (EQ (symbol, sym)) return (num) |
3418 FROB (Qnear_certainty, DET_NEAR_CERTAINTY); | |
3419 FROB (Qquite_probable, DET_QUITE_PROBABLE); | |
3420 FROB (Qsomewhat_likely, DET_SOMEWHAT_LIKELY); | |
1494 | 3421 FROB (Qslightly_likely, DET_SLIGHTLY_LIKELY); |
771 | 3422 FROB (Qas_likely_as_unlikely, DET_AS_LIKELY_AS_UNLIKELY); |
3423 FROB (Qsomewhat_unlikely, DET_SOMEWHAT_UNLIKELY); | |
3424 FROB (Qquite_improbable, DET_QUITE_IMPROBABLE); | |
3425 FROB (Qnearly_impossible, DET_NEARLY_IMPOSSIBLE); | |
3426 #undef FROB | |
3427 | |
3428 invalid_constant ("Unrecognized detection result", symbol); | |
3429 return ((enum detection_result) 0); /* not reached */ | |
3430 } | |
778 | 3431 #endif /* 0 */ |
771 | 3432 |
3433 /* Set all detection results for a given detector to a specified value. */ | |
3434 void | |
3435 set_detection_results (struct detection_state *st, int detector, int given) | |
3436 { | |
3437 detector_category_dynarr *cats = | |
3438 Dynarr_at (all_coding_detectors, detector).cats; | |
3439 int i; | |
3440 | |
3441 for (i = 0; i < Dynarr_length (cats); i++) | |
3442 st->categories[Dynarr_at (cats, i).id] = given; | |
3443 } | |
428 | 3444 |
3445 static int | |
3446 acceptable_control_char_p (int c) | |
3447 { | |
3448 switch (c) | |
3449 { | |
3450 /* Allow and ignore control characters that you might | |
3451 reasonably see in a text file */ | |
3452 case '\r': | |
3453 case '\n': | |
3454 case '\t': | |
3455 case 7: /* bell */ | |
3456 case 8: /* backspace */ | |
3457 case 11: /* vertical tab */ | |
3458 case 12: /* form feed */ | |
3459 case 26: /* MS-DOS C-z junk */ | |
3460 case 31: /* '^_' -- for info */ | |
3461 return 1; | |
3462 default: | |
3463 return 0; | |
3464 } | |
3465 } | |
3466 | |
771 | 3467 #ifdef DEBUG_XEMACS |
3468 | |
3469 static UExtbyte | |
3470 hex_digit_to_char (int digit) | |
428 | 3471 { |
771 | 3472 if (digit < 10) |
3473 return digit + '0'; | |
3474 else | |
3475 return digit - 10 + 'A'; | |
428 | 3476 } |
3477 | |
771 | 3478 static void |
3479 output_bytes_in_ascii_and_hex (const UExtbyte *src, Bytecount n) | |
428 | 3480 { |
3425 | 3481 Extbyte *ascii = alloca_array (Extbyte, n + 1); |
3482 Extbyte *hex = alloca_array (Extbyte, 3 * n + 1); | |
771 | 3483 int i; |
3413 | 3484 DECLARE_EISTRING (eistr_ascii); |
3485 DECLARE_EISTRING (eistr_hex); | |
771 | 3486 |
3487 for (i = 0; i < n; i++) | |
428 | 3488 { |
3425 | 3489 Extbyte c = src[i]; |
771 | 3490 if (c < 0x20) |
3491 ascii[i] = '.'; | |
428 | 3492 else |
771 | 3493 ascii[i] = c; |
3494 hex[3 * i] = hex_digit_to_char (c >> 4); | |
3495 hex[3 * i + 1] = hex_digit_to_char (c & 0xF); | |
3496 hex[3 * i + 2] = ' '; | |
428 | 3497 } |
771 | 3498 ascii[i] = '\0'; |
3499 hex[3 * i - 1] = '\0'; | |
3413 | 3500 |
3501 eicpy_ext(eistr_hex, hex, Qbinary); | |
3502 eicpy_ext(eistr_ascii, ascii, Qbinary); | |
3503 | |
3425 | 3504 stderr_out ("%s %s", eidata(eistr_ascii), eidata(eistr_hex)); |
428 | 3505 } |
3506 | |
771 | 3507 #endif /* DEBUG_XEMACS */ |
3508 | |
3509 /* Attempt to determine the encoding of the given text. Before calling | |
3510 this function for the first time, you must zero out the detection state. | |
428 | 3511 |
3512 Returns: | |
3513 | |
771 | 3514 0 == keep going |
3515 1 == stop | |
428 | 3516 */ |
3517 | |
3518 static int | |
771 | 3519 detect_coding_type (struct detection_state *st, const UExtbyte *src, |
3520 Bytecount n) | |
428 | 3521 { |
771 | 3522 Bytecount n2 = n; |
3523 const UExtbyte *src2 = src; | |
3524 int i; | |
3525 | |
3526 #ifdef DEBUG_XEMACS | |
3527 if (!NILP (Vdebug_coding_detection)) | |
3528 { | |
3529 int bytes = min (16, n); | |
3530 stderr_out ("detect_coding_type: processing %ld bytes\n", n); | |
3531 stderr_out ("First %d: ", bytes); | |
3532 output_bytes_in_ascii_and_hex (src, bytes); | |
3533 stderr_out ("\nLast %d: ", bytes); | |
3534 output_bytes_in_ascii_and_hex (src + n - bytes, bytes); | |
3535 stderr_out ("\n"); | |
3536 } | |
3537 #endif /* DEBUG_XEMACS */ | |
428 | 3538 if (!st->seen_non_ascii) |
3539 { | |
771 | 3540 for (; n2; n2--, src2++) |
428 | 3541 { |
771 | 3542 UExtbyte c = *src2; |
428 | 3543 if ((c < 0x20 && !acceptable_control_char_p (c)) || c >= 0x80) |
3544 { | |
3545 st->seen_non_ascii = 1; | |
3546 break; | |
3547 } | |
3548 } | |
3549 } | |
3550 | |
771 | 3551 for (i = 0; i < coding_detector_count; i++) |
3552 Dynarr_at (all_coding_detectors, i).detect_method (st, src, n); | |
3553 | |
3554 st->bytes_seen += n; | |
3555 | |
3556 #ifdef DEBUG_XEMACS | |
3557 if (!NILP (Vdebug_coding_detection)) | |
3558 { | |
3559 stderr_out ("seen_non_ascii: %d\n", st->seen_non_ascii); | |
1494 | 3560 if (coding_detector_category_count <= 0) |
3561 stderr_out ("found %d detector categories\n", | |
3562 coding_detector_category_count); | |
771 | 3563 for (i = 0; i < coding_detector_category_count; i++) |
3564 stderr_out_lisp | |
3565 ("%s: %s\n", | |
3566 2, | |
3567 coding_category_id_to_symbol (i), | |
3568 detection_result_number_to_symbol ((enum detection_result) | |
3569 st->categories[i])); | |
3570 } | |
3571 #endif /* DEBUG_XEMACS */ | |
3572 | |
3573 { | |
3574 int not_unlikely = 0; | |
3575 int retval; | |
3576 | |
3577 for (i = 0; i < coding_detector_category_count; i++) | |
3578 if (st->categories[i] >= 0) | |
3579 not_unlikely++; | |
3580 | |
3581 retval = (not_unlikely <= 1 | |
3582 #if 0 /* this is bogus */ | |
3583 || st->bytes_seen >= MAX_BYTES_PROCESSED_FOR_DETECTION | |
428 | 3584 #endif |
771 | 3585 ); |
3586 | |
3587 #ifdef DEBUG_XEMACS | |
3588 if (!NILP (Vdebug_coding_detection)) | |
3589 stderr_out ("detect_coding_type: returning %d (%s)\n", | |
3590 retval, retval ? "stop" : "keep going"); | |
3591 #endif /* DEBUG_XEMACS */ | |
3592 | |
3593 return retval; | |
428 | 3594 } |
3595 } | |
3596 | |
3597 static Lisp_Object | |
771 | 3598 detected_coding_system (struct detection_state *st) |
428 | 3599 { |
771 | 3600 int i; |
3601 int even = 1; | |
3602 | |
3603 if (st->seen_non_ascii) | |
3604 { | |
3605 for (i = 0; i < coding_detector_category_count; i++) | |
3606 if (st->categories[i] != DET_AS_LIKELY_AS_UNLIKELY) | |
3607 { | |
3608 even = 0; | |
3609 break; | |
3610 } | |
3611 } | |
3612 | |
3613 /* #### Here we are ignoring the results of detection when it's all | |
3614 ASCII. This is obviously a bad thing. But we need to fix up the | |
3615 existing detection methods somewhat before we can switch. */ | |
3616 if (even) | |
428 | 3617 { |
3618 /* If the file was entirely or basically ASCII, use the | |
3619 default value of `buffer-file-coding-system'. */ | |
3620 Lisp_Object retval = | |
3621 XBUFFER (Vbuffer_defaults)->buffer_file_coding_system; | |
3622 if (!NILP (retval)) | |
3623 { | |
771 | 3624 retval = find_coding_system_for_text_file (retval, 0); |
428 | 3625 if (NILP (retval)) |
3626 { | |
3627 warn_when_safe | |
3628 (Qbad_variable, Qwarning, | |
3629 "Invalid `default-buffer-file-coding-system', set to nil"); | |
3630 XBUFFER (Vbuffer_defaults)->buffer_file_coding_system = Qnil; | |
3631 } | |
3632 } | |
3633 if (NILP (retval)) | |
4100 | 3634 retval = Fget_coding_system (Qbinary); |
428 | 3635 return retval; |
3636 } | |
3637 else | |
3638 { | |
771 | 3639 int likelihood; |
3640 Lisp_Object retval = Qnil; | |
3641 | |
3642 /* Look through the coding categories first by likelihood and then by | |
3643 priority and find the first one that is allowed. */ | |
3644 | |
3645 for (likelihood = DET_HIGHEST; likelihood >= DET_LOWEST; likelihood--) | |
428 | 3646 { |
771 | 3647 for (i = 0; i < coding_detector_category_count; i++) |
3648 { | |
3649 int cat = coding_category_by_priority[i]; | |
3650 if (st->categories[cat] == likelihood && | |
3651 !NILP (coding_category_system[cat])) | |
3652 { | |
3653 retval = (get_coding_system_for_text_file | |
3654 (coding_category_system[cat], 0)); | |
3655 if (likelihood < DET_AS_LIKELY_AS_UNLIKELY) | |
3656 warn_when_safe_lispobj | |
3657 (intern ("detection"), | |
793 | 3658 Qwarning, |
771 | 3659 emacs_sprintf_string_lisp |
3660 ( | |
3661 "Detected coding %s is unlikely to be correct (likelihood == `%s')", | |
3662 Qnil, 2, XCODING_SYSTEM_NAME (retval), | |
3663 detection_result_number_to_symbol | |
3664 ((enum detection_result) likelihood))); | |
3665 return retval; | |
3666 } | |
3667 } | |
428 | 3668 } |
771 | 3669 |
3670 return Fget_coding_system (Qraw_text); | |
428 | 3671 } |
3672 } | |
3673 | |
1347 | 3674 /* Look for a coding system in the string (skipping over leading |
3675 blanks). If found, return it, otherwise nil. */ | |
3676 | |
3677 static Lisp_Object | |
5682
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
3678 snarf_coding_system (const UExtbyte *p, Bytecount len, |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
3679 Boolint find_coding_system_p) |
1347 | 3680 { |
3681 Bytecount n; | |
2531 | 3682 UExtbyte *name; |
1347 | 3683 |
3684 while (*p == ' ' || *p == '\t') p++, len--; | |
3685 len = min (len, 1000); | |
3686 name = alloca_ibytes (len + 1); | |
3687 memcpy (name, p, len); | |
3688 name[len] = '\0'; | |
3689 | |
3690 /* Get coding system name */ | |
3691 /* Characters valid in a MIME charset name (rfc 1521), | |
3692 and in a Lisp symbol name. */ | |
3693 n = qxestrspn (name, | |
3694 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" | |
3695 "abcdefghijklmnopqrstuvwxyz" | |
3696 "0123456789" | |
3697 "!$%&*+-.^_{|}~"); | |
3698 if (n > 0) | |
3699 { | |
3700 name[n] = '\0'; | |
4953
304aebb79cd3
function renamings to track names of char typedefs
Ben Wing <ben@xemacs.org>
parents:
4952
diff
changeset
|
3701 /* This call to intern_istring() is OK because we already verified that |
2531 | 3702 there are only ASCII characters in the string */ |
5682
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
3703 if (find_coding_system_p) |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
3704 { |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
3705 return |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
3706 find_coding_system_for_text_file (intern_istring ((Ibyte *) name), |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
3707 0); |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
3708 } |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
3709 else |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
3710 { |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
3711 return build_ascstring ((const Ascbyte *) name); |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
3712 } |
1347 | 3713 } |
3714 | |
3715 return Qnil; | |
3716 } | |
3717 | |
428 | 3718 /* Given a seekable read stream and potential coding system and EOL type |
3719 as specified, do any autodetection that is called for. If the | |
3720 coding system and/or EOL type are not `autodetect', they will be left | |
3721 alone; but this function will never return an autodetect coding system | |
3722 or EOL type. | |
3723 | |
3724 This function does not automatically fetch subsidiary coding systems; | |
3725 that should be unnecessary with the explicit eol-type argument. */ | |
3726 | |
3727 #define LENGTH(string_constant) (sizeof (string_constant) - 1) | |
3728 | |
771 | 3729 static Lisp_Object |
3730 unwind_free_detection_state (Lisp_Object opaque) | |
3731 { | |
3732 struct detection_state *st = | |
3733 (struct detection_state *) get_opaque_ptr (opaque); | |
3734 free_detection_state (st); | |
3735 free_opaque_ptr (opaque); | |
3736 return Qnil; | |
3737 } | |
3738 | |
3739 static Lisp_Object | |
5682
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
3740 look_for_coding_system_magic_cookie (const UExtbyte *data, Bytecount len, |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
3741 Boolint find_coding_system_p) |
428 | 3742 { |
771 | 3743 const UExtbyte *p; |
3744 const UExtbyte *scan_end; | |
2531 | 3745 Bytecount cookie_len; |
771 | 3746 |
3747 /* Look for initial "-*-"; mode line prefix */ | |
3748 for (p = data, | |
3749 scan_end = data + len - LENGTH ("-*-coding:?-*-"); | |
3750 p <= scan_end | |
3751 && *p != '\n' | |
3752 && *p != '\r'; | |
3753 p++) | |
3754 if (*p == '-' && *(p+1) == '*' && *(p+2) == '-') | |
3755 { | |
3756 const UExtbyte *local_vars_beg = p + 3; | |
3757 /* Look for final "-*-"; mode line suffix */ | |
3758 for (p = local_vars_beg, | |
3759 scan_end = data + len - LENGTH ("-*-"); | |
3760 p <= scan_end | |
428 | 3761 && *p != '\n' |
3762 && *p != '\r'; | |
771 | 3763 p++) |
3764 if (*p == '-' && *(p+1) == '*' && *(p+2) == '-') | |
3765 { | |
3766 const UExtbyte *suffix = p; | |
3767 /* Look for "coding:" */ | |
3768 for (p = local_vars_beg, | |
3769 scan_end = suffix - LENGTH ("coding:?"); | |
3770 p <= scan_end; | |
3771 p++) | |
3772 if (memcmp ("coding:", p, LENGTH ("coding:")) == 0 | |
3773 && (p == local_vars_beg | |
3774 || (*(p-1) == ' ' || | |
3775 *(p-1) == '\t' || | |
3776 *(p-1) == ';'))) | |
3777 { | |
3778 p += LENGTH ("coding:"); | |
5682
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
3779 return snarf_coding_system (p, suffix - p, |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
3780 find_coding_system_p); |
771 | 3781 break; |
3782 } | |
3783 break; | |
3784 } | |
3785 break; | |
3786 } | |
3787 | |
2531 | 3788 /* Look for ;;;###coding system */ |
3789 | |
3790 cookie_len = LENGTH (";;;###coding system: "); | |
3791 | |
3792 for (p = data, | |
3793 scan_end = data + len - cookie_len; | |
3794 p <= scan_end; | |
3795 p++) | |
1347 | 3796 { |
2531 | 3797 if (*p == ';' && !memcmp (p, ";;;###coding system: ", cookie_len)) |
3798 { | |
3799 const UExtbyte *suffix; | |
3800 | |
3801 p += cookie_len; | |
3802 suffix = p; | |
3803 while (suffix < scan_end && !isspace (*suffix)) | |
3804 suffix++; | |
5682
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
3805 return snarf_coding_system (p, suffix - p, find_coding_system_p); |
2531 | 3806 } |
1347 | 3807 } |
3808 | |
3809 return Qnil; | |
771 | 3810 } |
3811 | |
3812 static Lisp_Object | |
3813 determine_real_coding_system (Lstream *stream) | |
3814 { | |
3815 struct detection_state *st = allocate_detection_state (); | |
3816 int depth = record_unwind_protect (unwind_free_detection_state, | |
3817 make_opaque_ptr (st)); | |
3818 UExtbyte buf[4096]; | |
3819 Bytecount nread = Lstream_read (stream, buf, sizeof (buf)); | |
5682
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
3820 Lisp_Object coding_system |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
3821 = look_for_coding_system_magic_cookie (buf, nread, 1); |
771 | 3822 |
3823 if (NILP (coding_system)) | |
3824 { | |
3825 while (1) | |
3826 { | |
3827 if (detect_coding_type (st, buf, nread)) | |
428 | 3828 break; |
771 | 3829 nread = Lstream_read (stream, buf, sizeof (buf)); |
3830 if (nread == 0) | |
3831 break; | |
428 | 3832 } |
771 | 3833 |
3834 coding_system = detected_coding_system (st); | |
428 | 3835 } |
3836 | |
3837 Lstream_rewind (stream); | |
771 | 3838 |
3839 unbind_to (depth); | |
3840 return coding_system; | |
3841 } | |
3842 | |
3843 static void | |
3844 undecided_init_coding_stream (struct coding_stream *str) | |
3845 { | |
3846 struct undecided_coding_stream *data = | |
3847 CODING_STREAM_TYPE_DATA (str, undecided); | |
3848 struct undecided_coding_system *csdata = | |
3849 XCODING_SYSTEM_TYPE_DATA (str->codesys, undecided); | |
3850 | |
3851 data->actual = Qnil; | |
3852 | |
3853 if (str->direction == CODING_DECODE) | |
3854 { | |
3855 Lstream *lst = str->other_end; | |
3856 | |
3857 if ((lst->flags & LSTREAM_FL_READ) && | |
3858 Lstream_seekable_p (lst) && | |
3859 csdata->do_coding) | |
3860 /* We can determine the coding system now. */ | |
3861 data->actual = determine_real_coding_system (lst); | |
3862 } | |
1494 | 3863 |
3864 #ifdef DEBUG_XEMACS | |
3865 if (!NILP (Vdebug_coding_detection)) | |
3866 stderr_out_lisp ("detected coding system: %s\n", 1, data->actual); | |
3867 #endif /* DEBUG_XEMACS */ | |
771 | 3868 } |
3869 | |
3870 static void | |
3871 undecided_rewind_coding_stream (struct coding_stream *str) | |
3872 { | |
3873 chain_rewind_coding_stream_1 (&CODING_STREAM_TYPE_DATA (str, undecided)->c); | |
3874 } | |
3875 | |
3876 static void | |
3877 undecided_finalize_coding_stream (struct coding_stream *str) | |
3878 { | |
3879 struct undecided_coding_stream *data = | |
3880 CODING_STREAM_TYPE_DATA (str, undecided); | |
3881 | |
3882 chain_finalize_coding_stream_1 | |
3883 (&CODING_STREAM_TYPE_DATA (str, undecided)->c); | |
3884 if (data->st) | |
3885 free_detection_state (data->st); | |
3886 } | |
3887 | |
3888 static Lisp_Object | |
3889 undecided_canonicalize (Lisp_Object codesys) | |
3890 { | |
3891 struct undecided_coding_system *csdata = | |
3892 XCODING_SYSTEM_TYPE_DATA (codesys, undecided); | |
3893 if (!csdata->do_eol && !csdata->do_coding) | |
3894 return NILP (csdata->cs) ? Fget_coding_system (Qbinary) : csdata->cs; | |
3895 if (csdata->do_eol && !csdata->do_coding && NILP (csdata->cs)) | |
3896 return Fget_coding_system (Qconvert_eol_autodetect); | |
3897 return codesys; | |
3898 } | |
3899 | |
3900 static Bytecount | |
3901 undecided_convert (struct coding_stream *str, const UExtbyte *src, | |
3902 unsigned_char_dynarr *dst, Bytecount n) | |
3903 { | |
3904 int first_time = 0; | |
3905 | |
3906 if (str->direction == CODING_DECODE) | |
3907 { | |
3908 /* At this point, we have only the following possibilities: | |
3909 | |
3910 do_eol && do_coding | |
3911 do_coding only | |
3912 do_eol only and a coding system was specified | |
3913 | |
3914 Other possibilities are removed during undecided_canonicalize. | |
3915 | |
3916 Therefore, our substreams are either | |
3917 | |
3918 lstream_coding -> lstream_dynarr, or | |
3919 lstream_coding -> lstream_eol -> lstream_dynarr. | |
3920 */ | |
3921 struct undecided_coding_system *csdata = | |
3922 XCODING_SYSTEM_TYPE_DATA (str->codesys, undecided); | |
3923 struct undecided_coding_stream *data = | |
3924 CODING_STREAM_TYPE_DATA (str, undecided); | |
3925 | |
3926 if (str->eof) | |
3927 { | |
3928 /* Each will close the next. We need to close now because more | |
3929 data may be generated. */ | |
3930 if (data->c.initted) | |
3931 Lstream_close (XLSTREAM (data->c.lstreams[0])); | |
3932 return n; | |
3933 } | |
3934 | |
3935 if (!data->c.initted) | |
3936 { | |
3937 data->c.lstream_count = csdata->do_eol ? 3 : 2; | |
3938 data->c.lstreams = xnew_array (Lisp_Object, data->c.lstream_count); | |
3939 | |
3940 data->c.lstreams[data->c.lstream_count - 1] = | |
3941 make_dynarr_output_stream (dst); | |
3942 Lstream_set_buffering | |
3943 (XLSTREAM (data->c.lstreams[data->c.lstream_count - 1]), | |
3944 LSTREAM_UNBUFFERED, 0); | |
3945 if (csdata->do_eol) | |
3946 { | |
3947 data->c.lstreams[1] = | |
3948 make_coding_output_stream | |
3949 (XLSTREAM (data->c.lstreams[data->c.lstream_count - 1]), | |
3950 Fget_coding_system (Qconvert_eol_autodetect), | |
800 | 3951 CODING_DECODE, 0); |
771 | 3952 Lstream_set_buffering |
3953 (XLSTREAM (data->c.lstreams[1]), | |
3954 LSTREAM_UNBUFFERED, 0); | |
3955 } | |
3956 | |
3957 data->c.lstreams[0] = | |
3958 make_coding_output_stream | |
3959 (XLSTREAM (data->c.lstreams[1]), | |
3960 /* Substitute binary if we need to detect the encoding */ | |
3961 csdata->do_coding ? Qbinary : csdata->cs, | |
800 | 3962 CODING_DECODE, 0); |
771 | 3963 Lstream_set_buffering (XLSTREAM (data->c.lstreams[0]), |
3964 LSTREAM_UNBUFFERED, 0); | |
3965 | |
3966 first_time = 1; | |
3967 data->c.initted = 1; | |
3968 } | |
3969 | |
3970 /* If necessary, do encoding-detection now. We do this when we're a | |
3971 writing stream or a non-seekable reading stream, meaning that we | |
3972 can't just process the whole input, rewind, and start over. */ | |
3973 | |
3974 if (csdata->do_coding) | |
3975 { | |
3976 int actual_was_nil = NILP (data->actual); | |
3977 if (NILP (data->actual)) | |
3978 { | |
3979 if (!data->st) | |
3980 data->st = allocate_detection_state (); | |
3981 if (first_time) | |
3982 /* #### This is cheesy. What we really ought to do is buffer | |
3983 up a certain minimum amount of data to get a better result. | |
3984 */ | |
5682
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
3985 data->actual = look_for_coding_system_magic_cookie (src, n, 1); |
771 | 3986 if (NILP (data->actual)) |
3987 { | |
3988 /* #### This is cheesy. What we really ought to do is buffer | |
3989 up a certain minimum amount of data so as to get a less | |
3990 random result when doing subprocess detection. */ | |
3991 detect_coding_type (data->st, src, n); | |
3992 data->actual = detected_coding_system (data->st); | |
4100 | 3993 /* kludge to prevent infinite recursion */ |
3994 if (XCODING_SYSTEM(data->actual)->methods->enumtype == undecided_coding_system) | |
3995 data->actual = Fget_coding_system (Qbinary); | |
771 | 3996 } |
3997 } | |
3998 /* We need to set the detected coding system if we actually have | |
3999 such a coding system but didn't before. That is the case | |
4000 either when we just detected it in the previous code or when | |
4001 it was detected during undecided_init_coding_stream(). We | |
4002 can check for that using first_time. */ | |
4003 if (!NILP (data->actual) && (actual_was_nil || first_time)) | |
4004 { | |
4005 /* If the detected coding system doesn't allow for EOL | |
4006 autodetection, try to get the equivalent that does; | |
4007 otherwise, disable EOL detection (overriding whatever | |
4008 may already have been detected). */ | |
4009 if (XCODING_SYSTEM_EOL_TYPE (data->actual) != EOL_AUTODETECT) | |
4010 { | |
4011 if (!NILP (XCODING_SYSTEM_SUBSIDIARY_PARENT (data->actual))) | |
4012 data->actual = | |
4013 XCODING_SYSTEM_SUBSIDIARY_PARENT (data->actual); | |
4014 else if (data->c.lstream_count == 3) | |
4015 set_coding_stream_coding_system | |
4016 (XLSTREAM (data->c.lstreams[1]), | |
4017 Fget_coding_system (Qidentity)); | |
4018 } | |
4019 set_coding_stream_coding_system | |
4020 (XLSTREAM (data->c.lstreams[0]), data->actual); | |
4021 } | |
4022 } | |
4023 | |
4024 if (Lstream_write (XLSTREAM (data->c.lstreams[0]), src, n) < 0) | |
4025 return -1; | |
4026 return n; | |
4027 } | |
4028 else | |
4029 return no_conversion_convert (str, src, dst, n); | |
4030 } | |
4031 | |
4032 static Lisp_Object | |
4033 undecided_canonicalize_after_coding (struct coding_stream *str) | |
4034 { | |
4035 struct undecided_coding_stream *data = | |
4036 CODING_STREAM_TYPE_DATA (str, undecided); | |
4037 Lisp_Object ret, eolret; | |
4038 | |
4039 if (str->direction == CODING_ENCODE) | |
4040 return str->codesys; | |
4041 | |
4042 if (!data->c.initted) | |
4647
e4ed58cb0e5b
Fix bugs with #'find-file, 0-length files, & coding-system-for-read specified.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
4043 return str->codesys; |
771 | 4044 |
4045 ret = coding_stream_canonicalize_after_coding | |
4046 (XLSTREAM (data->c.lstreams[0])); | |
4047 if (NILP (ret)) | |
4647
e4ed58cb0e5b
Fix bugs with #'find-file, 0-length files, & coding-system-for-read specified.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
4048 ret = str->codesys; |
771 | 4049 if (XCODING_SYSTEM_EOL_TYPE (ret) != EOL_AUTODETECT) |
4050 return ret; | |
4051 eolret = coding_stream_canonicalize_after_coding | |
4052 (XLSTREAM (data->c.lstreams[1])); | |
4053 if (!EQ (XCODING_SYSTEM_TYPE (eolret), Qconvert_eol)) | |
4054 return ret; | |
4055 return | |
4056 Fsubsidiary_coding_system (ret, Fcoding_system_property (eolret, | |
4057 Qsubtype)); | |
4058 } | |
4059 | |
4060 | |
4061 /************************************************************************/ | |
4062 /* Lisp interface: Coding category functions and detection */ | |
4063 /************************************************************************/ | |
4064 | |
4065 DEFUN ("coding-category-list", Fcoding_category_list, 0, 0, 0, /* | |
4066 Return a list of all recognized coding categories. | |
4067 */ | |
4068 ()) | |
4069 { | |
4070 int i; | |
4071 Lisp_Object list = Qnil; | |
4072 | |
4073 for (i = 0; i < coding_detector_count; i++) | |
4074 { | |
4075 detector_category_dynarr *cats = | |
4076 Dynarr_at (all_coding_detectors, i).cats; | |
4077 int j; | |
4078 | |
4079 for (j = 0; j < Dynarr_length (cats); j++) | |
4080 list = Fcons (Dynarr_at (cats, j).sym, list); | |
4081 } | |
4082 | |
4083 return Fnreverse (list); | |
4084 } | |
4085 | |
4086 DEFUN ("set-coding-priority-list", Fset_coding_priority_list, 1, 1, 0, /* | |
4087 Change the priority order of the coding categories. | |
4088 LIST should be list of coding categories, in descending order of | |
4089 priority. Unspecified coding categories will be lower in priority | |
4090 than all specified ones, in the same relative order they were in | |
4091 previously. | |
4092 */ | |
4093 (list)) | |
4094 { | |
4095 int *category_to_priority = | |
4096 alloca_array (int, coding_detector_category_count); | |
4097 int i, j; | |
4098 | |
4099 /* First generate a list that maps coding categories to priorities. */ | |
4100 | |
4101 for (i = 0; i < coding_detector_category_count; i++) | |
4102 category_to_priority[i] = -1; | |
4103 | |
4104 /* Highest priority comes from the specified list. */ | |
4105 i = 0; | |
2367 | 4106 { |
4107 EXTERNAL_LIST_LOOP_2 (elt, list) | |
4108 { | |
4109 int cat = coding_category_symbol_to_id (elt); | |
4110 | |
4111 if (category_to_priority[cat] >= 0) | |
4112 sferror ("Duplicate coding category in list", elt); | |
4113 category_to_priority[cat] = i++; | |
4114 } | |
4115 } | |
771 | 4116 |
4117 /* Now go through the existing categories by priority to retrieve | |
4118 the categories not yet specified and preserve their priority | |
4119 order. */ | |
4120 for (j = 0; j < coding_detector_category_count; j++) | |
4121 { | |
4122 int cat = coding_category_by_priority[j]; | |
4123 if (category_to_priority[cat] < 0) | |
4124 category_to_priority[cat] = i++; | |
4125 } | |
4126 | |
4127 /* Now we need to construct the inverse of the mapping we just | |
4128 constructed. */ | |
4129 | |
4130 for (i = 0; i < coding_detector_category_count; i++) | |
4131 coding_category_by_priority[category_to_priority[i]] = i; | |
4132 | |
4133 /* Phew! That was confusing. */ | |
4134 return Qnil; | |
4135 } | |
4136 | |
4137 DEFUN ("coding-priority-list", Fcoding_priority_list, 0, 0, 0, /* | |
4138 Return a list of coding categories in descending order of priority. | |
4139 */ | |
4140 ()) | |
4141 { | |
4142 int i; | |
4143 Lisp_Object list = Qnil; | |
4144 | |
4145 for (i = 0; i < coding_detector_category_count; i++) | |
4146 list = | |
4147 Fcons (coding_category_id_to_symbol (coding_category_by_priority[i]), | |
4148 list); | |
4149 return Fnreverse (list); | |
4150 } | |
4151 | |
4152 DEFUN ("set-coding-category-system", Fset_coding_category_system, 2, 2, 0, /* | |
4153 Change the coding system associated with a coding category. | |
4154 */ | |
4155 (coding_category, coding_system)) | |
4156 { | |
4157 coding_category_system[coding_category_symbol_to_id (coding_category)] = | |
4158 Fget_coding_system (coding_system); | |
4159 return Qnil; | |
4160 } | |
4161 | |
4162 DEFUN ("coding-category-system", Fcoding_category_system, 1, 1, 0, /* | |
4163 Return the coding system associated with a coding category. | |
4164 */ | |
4165 (coding_category)) | |
4166 { | |
4167 Lisp_Object sys = | |
4168 coding_category_system[coding_category_symbol_to_id (coding_category)]; | |
4169 | |
4170 if (!NILP (sys)) | |
4171 return XCODING_SYSTEM_NAME (sys); | |
4172 return Qnil; | |
4173 } | |
4174 | |
800 | 4175 /* Detect the encoding of STREAM. Assumes stream is at the begnning and will |
4176 read through to the end of STREAM, leaving it there but open. */ | |
4177 | |
771 | 4178 Lisp_Object |
4179 detect_coding_stream (Lisp_Object stream) | |
4180 { | |
4181 Lisp_Object val = Qnil; | |
4182 struct gcpro gcpro1, gcpro2, gcpro3; | |
4183 UExtbyte random_buffer[65536]; | |
4184 Lisp_Object binary_instream = | |
4185 make_coding_input_stream | |
4186 (XLSTREAM (stream), Qbinary, | |
814 | 4187 CODING_ENCODE, LSTREAM_FL_NO_CLOSE_OTHER); |
771 | 4188 Lisp_Object decstream = |
4189 make_coding_input_stream | |
4190 (XLSTREAM (binary_instream), | |
800 | 4191 Qundecided, CODING_DECODE, 0); |
771 | 4192 Lstream *decstr = XLSTREAM (decstream); |
4193 | |
4194 GCPRO3 (decstream, stream, binary_instream); | |
4195 /* Read and discard all data; detection happens as a side effect of this, | |
4196 and we examine what was detected afterwards. */ | |
4197 while (Lstream_read (decstr, random_buffer, sizeof (random_buffer)) > 0) | |
4198 ; | |
4199 | |
4200 val = coding_stream_detected_coding_system (decstr); | |
4201 Lstream_close (decstr); | |
4202 Lstream_delete (decstr); | |
4203 Lstream_delete (XLSTREAM (binary_instream)); | |
4204 UNGCPRO; | |
4205 return val; | |
428 | 4206 } |
4207 | |
4208 DEFUN ("detect-coding-region", Fdetect_coding_region, 2, 3, 0, /* | |
4209 Detect coding system of the text in the region between START and END. | |
444 | 4210 Return a list of possible coding systems ordered by priority. |
3025 | 4211 If only ASCII characters are found, return `undecided' or one of |
428 | 4212 its subsidiary coding systems according to a detected end-of-line |
4213 type. Optional arg BUFFER defaults to the current buffer. | |
4214 */ | |
4215 (start, end, buffer)) | |
4216 { | |
4217 Lisp_Object val = Qnil; | |
4218 struct buffer *buf = decode_buffer (buffer, 0); | |
665 | 4219 Charbpos b, e; |
771 | 4220 Lisp_Object lb_instream; |
428 | 4221 |
4222 get_buffer_range_char (buf, start, end, &b, &e, 0); | |
4223 lb_instream = make_lisp_buffer_input_stream (buf, b, e, 0); | |
771 | 4224 |
4225 val = detect_coding_stream (lb_instream); | |
4226 Lstream_delete (XLSTREAM (lb_instream)); | |
428 | 4227 return val; |
4228 } | |
4229 | |
5682
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4230 DEFUN ("find-coding-system-magic-cookie-in-file", |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4231 Ffind_coding_system_magic_cookie_in_file, 1, 1, 0, /* |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4232 Look for the coding-system magic cookie in FILENAME. |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4233 The coding-system magic cookie is either the local variable specification |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4234 -*- ... coding: ... -*- on the first line, or the exact string |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4235 \";;;###coding system: \" somewhere within the first 3000 characters |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4236 of the file. If found, the coding system name (as a string) is returned; |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4237 otherwise nil is returned. Note that it is extremely unlikely that |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4238 either such string would occur coincidentally as the result of encoding |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4239 some characters in a non-ASCII charset, and that the spaces make it |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4240 even less likely since the space character is not a valid octet in any |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4241 ISO 2022 encoding of most non-ASCII charsets. |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4242 */ |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4243 (filename)) |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4244 { |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4245 Lisp_Object lstream; |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4246 UExtbyte buf[4096]; |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4247 Bytecount nread; |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4248 int fd = -1; |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4249 struct stat st; |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4250 |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4251 filename = Fexpand_file_name (filename, Qnil); |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4252 |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4253 if (qxe_stat (XSTRING_DATA (filename), &st) < 0) |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4254 { |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4255 badopen: |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4256 report_file_error ("Opening input file", filename); |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4257 } |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4258 |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4259 if (fd < 0) |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4260 { |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4261 if ((fd = qxe_interruptible_open (XSTRING_DATA (filename), |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4262 O_RDONLY | OPEN_BINARY, 0)) < 0) |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4263 goto badopen; |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4264 } |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4265 |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4266 lstream = make_filedesc_input_stream (fd, 0, -1, 0); |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4267 Lstream_set_buffering (XLSTREAM (lstream), LSTREAM_UNBUFFERED, 0); |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4268 nread = Lstream_read (XLSTREAM (lstream), buf, sizeof (buf)); |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4269 Lstream_delete (XLSTREAM (lstream)); |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4270 retry_close (fd); |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4271 |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4272 return look_for_coding_system_magic_cookie (buf, nread, 0); |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4273 } |
428 | 4274 |
771 | 4275 |
4276 #ifdef DEBUG_XEMACS | |
4277 | |
428 | 4278 /************************************************************************/ |
771 | 4279 /* Internal methods */ |
4280 /************************************************************************/ | |
4281 | |
4282 /* Raw (internally-formatted) data. */ | |
4283 DEFINE_CODING_SYSTEM_TYPE (internal); | |
428 | 4284 |
665 | 4285 static Bytecount |
2286 | 4286 internal_convert (struct coding_stream *UNUSED (str), const UExtbyte *src, |
771 | 4287 unsigned_char_dynarr *dst, Bytecount n) |
4288 { | |
4289 Bytecount orign = n; | |
4290 Dynarr_add_many (dst, src, n); | |
4291 return orign; | |
4292 } | |
4293 | |
4294 #endif /* DEBUG_XEMACS */ | |
4295 | |
4296 | |
4297 | |
4298 #ifdef HAVE_ZLIB | |
4299 | |
4300 /************************************************************************/ | |
4301 /* Gzip methods */ | |
4302 /************************************************************************/ | |
4303 | |
4304 struct gzip_coding_system | |
428 | 4305 { |
771 | 4306 int level; /* 0 through 9, or -1 for default */ |
4307 }; | |
4308 | |
4309 #define CODING_SYSTEM_GZIP_LEVEL(codesys) \ | |
4310 (CODING_SYSTEM_TYPE_DATA (codesys, gzip)->level) | |
4311 #define XCODING_SYSTEM_GZIP_LEVEL(codesys) \ | |
4312 (XCODING_SYSTEM_TYPE_DATA (codesys, gzip)->level) | |
4313 | |
4314 struct gzip_coding_stream | |
428 | 4315 { |
771 | 4316 z_stream stream; |
4317 int stream_initted; | |
4318 int reached_eof; /* #### this should be handled by the caller, once we | |
4319 return LSTREAM_EOF */ | |
4320 }; | |
4321 | |
1204 | 4322 static const struct memory_description |
771 | 4323 gzip_coding_system_description[] = { |
4324 { XD_END } | |
4325 }; | |
4326 | |
1204 | 4327 DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (gzip); |
4328 | |
771 | 4329 enum source_sink_type |
4330 gzip_conversion_end_type (Lisp_Object codesys) | |
4331 { | |
4332 return DECODES_BYTE_TO_BYTE; | |
428 | 4333 } |
4334 | |
4335 static void | |
771 | 4336 gzip_init (Lisp_Object codesys) |
4337 { | |
4338 struct gzip_coding_system *data = XCODING_SYSTEM_TYPE_DATA (codesys, gzip); | |
4339 data->level = -1; | |
4340 } | |
4341 | |
4342 static void | |
4343 gzip_print (Lisp_Object cs, Lisp_Object printcharfun, int escapeflag) | |
428 | 4344 { |
771 | 4345 struct gzip_coding_system *data = XCODING_SYSTEM_TYPE_DATA (cs, gzip); |
4346 | |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
4347 write_ascstring (printcharfun, "("); |
771 | 4348 if (data->level == -1) |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
4349 write_ascstring (printcharfun, "default"); |
771 | 4350 else |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5474
diff
changeset
|
4351 print_internal (make_fixnum (data->level), printcharfun, 0); |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
4352 write_ascstring (printcharfun, ")"); |
428 | 4353 } |
4354 | |
4355 static int | |
771 | 4356 gzip_putprop (Lisp_Object codesys, Lisp_Object key, Lisp_Object value) |
428 | 4357 { |
771 | 4358 struct gzip_coding_system *data = XCODING_SYSTEM_TYPE_DATA (codesys, gzip); |
4359 | |
4360 if (EQ (key, Qlevel)) | |
428 | 4361 { |
771 | 4362 if (EQ (value, Qdefault)) |
4363 data->level = -1; | |
4364 else | |
428 | 4365 { |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5474
diff
changeset
|
4366 check_integer_range (value, Qzero, make_fixnum (9)); |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5474
diff
changeset
|
4367 data->level = XFIXNUM (value); |
428 | 4368 } |
4369 } | |
4370 else | |
771 | 4371 return 0; |
4372 return 1; | |
428 | 4373 } |
4374 | |
4375 static Lisp_Object | |
771 | 4376 gzip_getprop (Lisp_Object coding_system, Lisp_Object prop) |
428 | 4377 { |
771 | 4378 struct gzip_coding_system *data = |
4379 XCODING_SYSTEM_TYPE_DATA (coding_system, gzip); | |
4380 | |
4381 if (EQ (prop, Qlevel)) | |
428 | 4382 { |
771 | 4383 if (data->level == -1) |
4384 return Qdefault; | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5474
diff
changeset
|
4385 return make_fixnum (data->level); |
428 | 4386 } |
771 | 4387 |
4388 return Qunbound; | |
428 | 4389 } |
4390 | |
4391 static void | |
771 | 4392 gzip_init_coding_stream (struct coding_stream *str) |
428 | 4393 { |
771 | 4394 struct gzip_coding_stream *data = CODING_STREAM_TYPE_DATA (str, gzip); |
4395 if (data->stream_initted) | |
428 | 4396 { |
771 | 4397 if (str->direction == CODING_DECODE) |
4398 inflateEnd (&data->stream); | |
4399 else | |
4400 deflateEnd (&data->stream); | |
4401 data->stream_initted = 0; | |
428 | 4402 } |
771 | 4403 data->reached_eof = 0; |
428 | 4404 } |
4405 | |
4406 static void | |
771 | 4407 gzip_rewind_coding_stream (struct coding_stream *str) |
428 | 4408 { |
771 | 4409 gzip_init_coding_stream (str); |
428 | 4410 } |
4411 | |
771 | 4412 static Bytecount |
4413 gzip_convert (struct coding_stream *str, | |
4414 const UExtbyte *src, | |
4415 unsigned_char_dynarr *dst, Bytecount n) | |
428 | 4416 { |
771 | 4417 struct gzip_coding_stream *data = CODING_STREAM_TYPE_DATA (str, gzip); |
4418 int zerr; | |
4419 if (str->direction == CODING_DECODE) | |
428 | 4420 { |
771 | 4421 if (data->reached_eof) |
4422 return n; /* eat the data */ | |
4423 | |
4424 if (!data->stream_initted) | |
428 | 4425 { |
771 | 4426 xzero (data->stream); |
4427 if (inflateInit (&data->stream) != Z_OK) | |
4428 return LSTREAM_ERROR; | |
4429 data->stream_initted = 1; | |
428 | 4430 } |
771 | 4431 |
4432 data->stream.next_in = (Bytef *) src; | |
4433 data->stream.avail_in = n; | |
4434 | |
4435 /* Normally we stop when we've fed all data to the decompressor; but | |
4436 if we're at the end of the input, and the decompressor hasn't | |
4437 reported EOF, we need to keep going, as there might be more output | |
4438 to generate. Z_OK from the decompressor means input was processed | |
4439 or output was generated; if neither, we break out of the loop. | |
4440 Other return values are: | |
4441 | |
4442 Z_STREAM_END EOF from decompressor | |
4443 Z_DATA_ERROR Corrupted data | |
4444 Z_BUF_ERROR No progress possible (this should happen if | |
4445 we try to feed it an incomplete file) | |
4446 Z_MEM_ERROR Out of memory | |
4447 Z_STREAM_ERROR (should never happen) | |
4448 Z_NEED_DICT (#### when will this happen?) | |
4449 */ | |
4450 while (data->stream.avail_in > 0 || str->eof) | |
4451 { | |
4452 /* Reserve an output buffer of the same size as the input buffer; | |
4453 if that's not enough, we keep reserving the same size. */ | |
4454 Bytecount reserved = n; | |
4455 Dynarr_add_many (dst, 0, reserved); | |
4456 /* Careful here! Don't retrieve the pointer until after | |
4457 reserving the space, or it might be bogus */ | |
4458 data->stream.next_out = | |
4459 Dynarr_atp (dst, Dynarr_length (dst) - reserved); | |
4460 data->stream.avail_out = reserved; | |
4461 zerr = inflate (&data->stream, Z_NO_FLUSH); | |
4462 /* Lop off the unused portion */ | |
5038 | 4463 Dynarr_set_lengthr (dst, Dynarr_length (dst) - data->stream.avail_out); |
771 | 4464 if (zerr != Z_OK) |
4465 break; | |
4466 } | |
4467 | |
4468 if (zerr == Z_STREAM_END) | |
4469 data->reached_eof = 1; | |
4470 | |
4471 if ((Bytecount) data->stream.avail_in < n) | |
4472 return n - data->stream.avail_in; | |
4473 | |
4474 if (zerr == Z_OK || zerr == Z_STREAM_END) | |
4475 return 0; | |
4476 | |
4477 return LSTREAM_ERROR; | |
428 | 4478 } |
4479 else | |
4480 { | |
771 | 4481 if (!data->stream_initted) |
4482 { | |
4483 int level = XCODING_SYSTEM_GZIP_LEVEL (str->codesys); | |
4484 xzero (data->stream); | |
4485 if (deflateInit (&data->stream, | |
4486 level == -1 ? Z_DEFAULT_COMPRESSION : level) != | |
4487 Z_OK) | |
4488 return LSTREAM_ERROR; | |
4489 data->stream_initted = 1; | |
428 | 4490 } |
771 | 4491 |
4492 data->stream.next_in = (Bytef *) src; | |
4493 data->stream.avail_in = n; | |
4494 | |
4495 /* Normally we stop when we've fed all data to the compressor; but if | |
4496 we're at the end of the input, and the compressor hasn't reported | |
4497 EOF, we need to keep going, as there might be more output to | |
4498 generate. (To signal EOF on our end, we set the FLUSH parameter | |
4499 to Z_FINISH; when all data is output, Z_STREAM_END will be | |
4500 returned.) Z_OK from the compressor means input was processed or | |
4501 output was generated; if neither, we break out of the loop. Other | |
4502 return values are: | |
4503 | |
4504 Z_STREAM_END EOF from compressor | |
4505 Z_BUF_ERROR No progress possible (should never happen) | |
4506 Z_STREAM_ERROR (should never happen) | |
4507 */ | |
4508 while (data->stream.avail_in > 0 || str->eof) | |
4509 { | |
4510 /* Reserve an output buffer of the same size as the input buffer; | |
4511 if that's not enough, we keep reserving the same size. */ | |
4512 Bytecount reserved = n; | |
4513 Dynarr_add_many (dst, 0, reserved); | |
4514 /* Careful here! Don't retrieve the pointer until after | |
4515 reserving the space, or it might be bogus */ | |
4516 data->stream.next_out = | |
4517 Dynarr_atp (dst, Dynarr_length (dst) - reserved); | |
4518 data->stream.avail_out = reserved; | |
4519 zerr = | |
4520 deflate (&data->stream, | |
4521 str->eof ? Z_FINISH : Z_NO_FLUSH); | |
4522 /* Lop off the unused portion */ | |
5038 | 4523 Dynarr_set_lengthr (dst, Dynarr_length (dst) - data->stream.avail_out); |
771 | 4524 if (zerr != Z_OK) |
4525 break; | |
4526 } | |
4527 | |
4528 if ((Bytecount) data->stream.avail_in < n) | |
4529 return n - data->stream.avail_in; | |
4530 | |
4531 if (zerr == Z_OK || zerr == Z_STREAM_END) | |
4532 return 0; | |
4533 | |
4534 return LSTREAM_ERROR; | |
428 | 4535 } |
4536 } | |
4537 | |
771 | 4538 #endif /* HAVE_ZLIB */ |
428 | 4539 |
4540 | |
4541 /************************************************************************/ | |
4542 /* Initialization */ | |
4543 /************************************************************************/ | |
4544 | |
4545 void | |
4546 syms_of_file_coding (void) | |
4547 { | |
5117
3742ea8250b5
Checking in final CVS version of workspace 'ben-lisp-object'
Ben Wing <ben@xemacs.org>
parents:
3025
diff
changeset
|
4548 INIT_LISP_OBJECT (coding_system); |
442 | 4549 |
771 | 4550 DEFSUBR (Fvalid_coding_system_type_p); |
4551 DEFSUBR (Fcoding_system_type_list); | |
428 | 4552 DEFSUBR (Fcoding_system_p); |
4303 | 4553 DEFSUBR (Fautoload_coding_system); |
428 | 4554 DEFSUBR (Ffind_coding_system); |
4555 DEFSUBR (Fget_coding_system); | |
4556 DEFSUBR (Fcoding_system_list); | |
4557 DEFSUBR (Fcoding_system_name); | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4558 DEFSUBR (Fmake_coding_system_internal); |
428 | 4559 DEFSUBR (Fcopy_coding_system); |
440 | 4560 DEFSUBR (Fcoding_system_canonical_name_p); |
4561 DEFSUBR (Fcoding_system_alias_p); | |
4562 DEFSUBR (Fcoding_system_aliasee); | |
428 | 4563 DEFSUBR (Fdefine_coding_system_alias); |
4564 DEFSUBR (Fsubsidiary_coding_system); | |
771 | 4565 DEFSUBR (Fcoding_system_base); |
4566 DEFSUBR (Fcoding_system_used_for_io); | |
428 | 4567 |
4568 DEFSUBR (Fcoding_system_type); | |
771 | 4569 DEFSUBR (Fcoding_system_description); |
428 | 4570 DEFSUBR (Fcoding_system_property); |
4571 | |
4572 DEFSUBR (Fcoding_category_list); | |
4573 DEFSUBR (Fset_coding_priority_list); | |
4574 DEFSUBR (Fcoding_priority_list); | |
4575 DEFSUBR (Fset_coding_category_system); | |
4576 DEFSUBR (Fcoding_category_system); | |
4577 | |
4578 DEFSUBR (Fdetect_coding_region); | |
4579 DEFSUBR (Fdecode_coding_region); | |
4580 DEFSUBR (Fencode_coding_region); | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4581 DEFSUBR (Fquery_coding_region); |
5682
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4582 DEFSUBR (Ffind_coding_system_magic_cookie_in_file); |
563 | 4583 DEFSYMBOL_MULTIWORD_PREDICATE (Qcoding_systemp); |
4584 DEFSYMBOL (Qno_conversion); | |
771 | 4585 DEFSYMBOL (Qconvert_eol); |
4586 DEFSYMBOL (Qconvert_eol_autodetect); | |
4587 DEFSYMBOL (Qconvert_eol_lf); | |
4588 DEFSYMBOL (Qconvert_eol_cr); | |
4589 DEFSYMBOL (Qconvert_eol_crlf); | |
563 | 4590 DEFSYMBOL (Qraw_text); |
771 | 4591 |
563 | 4592 DEFSYMBOL (Qmnemonic); |
4593 DEFSYMBOL (Qeol_type); | |
4594 DEFSYMBOL (Qpost_read_conversion); | |
4595 DEFSYMBOL (Qpre_write_conversion); | |
4596 | |
771 | 4597 DEFSYMBOL (Qtranslation_table_for_decode); |
4598 DEFSYMBOL (Qtranslation_table_for_encode); | |
4599 DEFSYMBOL (Qsafe_chars); | |
4600 DEFSYMBOL (Qsafe_charsets); | |
4601 DEFSYMBOL (Qmime_charset); | |
4602 DEFSYMBOL (Qvalid_codes); | |
4603 | |
563 | 4604 DEFSYMBOL (Qcr); |
4605 DEFSYMBOL (Qlf); | |
4606 DEFSYMBOL (Qcrlf); | |
4607 DEFSYMBOL (Qeol_cr); | |
4608 DEFSYMBOL (Qeol_lf); | |
4609 DEFSYMBOL (Qeol_crlf); | |
4610 DEFSYMBOL (Qencode); | |
4611 DEFSYMBOL (Qdecode); | |
428 | 4612 |
771 | 4613 DEFSYMBOL (Qnear_certainty); |
4614 DEFSYMBOL (Qquite_probable); | |
4615 DEFSYMBOL (Qsomewhat_likely); | |
1494 | 4616 DEFSYMBOL (Qslightly_likely); |
771 | 4617 DEFSYMBOL (Qas_likely_as_unlikely); |
4618 DEFSYMBOL (Qsomewhat_unlikely); | |
4619 DEFSYMBOL (Qquite_improbable); | |
4620 DEFSYMBOL (Qnearly_impossible); | |
4621 | |
4622 DEFSYMBOL (Qdo_eol); | |
4623 DEFSYMBOL (Qdo_coding); | |
4624 | |
4625 DEFSYMBOL (Qcanonicalize_after_coding); | |
4626 | |
4303 | 4627 DEFSYMBOL (Qposix_charset_to_coding_system_hash); |
4628 | |
771 | 4629 DEFSYMBOL (Qescape_quoted); |
4630 | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4631 DEFSYMBOL (Qquery_coding_warning_face); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4632 DEFSYMBOL (Qaliases); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4633 DEFSYMBOL (Qcharset_skip_chars_string); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4634 |
771 | 4635 #ifdef HAVE_ZLIB |
4636 DEFSYMBOL (Qgzip); | |
4637 #endif | |
4638 | |
428 | 4639 } |
4640 | |
4641 void | |
4642 lstream_type_create_file_coding (void) | |
4643 { | |
771 | 4644 LSTREAM_HAS_METHOD (coding, reader); |
4645 LSTREAM_HAS_METHOD (coding, writer); | |
4646 LSTREAM_HAS_METHOD (coding, rewinder); | |
4647 LSTREAM_HAS_METHOD (coding, seekable_p); | |
4648 LSTREAM_HAS_METHOD (coding, marker); | |
4649 LSTREAM_HAS_METHOD (coding, flusher); | |
4650 LSTREAM_HAS_METHOD (coding, closer); | |
4651 LSTREAM_HAS_METHOD (coding, finalizer); | |
4652 } | |
4653 | |
4654 void | |
4655 coding_system_type_create (void) | |
4656 { | |
4657 int i; | |
4658 | |
4659 staticpro (&Vcoding_system_hash_table); | |
4660 Vcoding_system_hash_table = | |
5191
71ee43b8a74d
Add #'equalp as a hash test by default; add #'define-hash-table-test, GNU API
Aidan Kehoe <kehoea@parhasard.net>
parents:
5169
diff
changeset
|
4661 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, Qeq); |
771 | 4662 |
4663 the_coding_system_type_entry_dynarr = Dynarr_new (coding_system_type_entry); | |
2367 | 4664 dump_add_root_block_ptr (&the_coding_system_type_entry_dynarr, |
771 | 4665 &csted_description); |
4666 | |
4667 Vcoding_system_type_list = Qnil; | |
4668 staticpro (&Vcoding_system_type_list); | |
4669 | |
4670 /* Initialize to something reasonable ... */ | |
4671 for (i = 0; i < MAX_DETECTOR_CATEGORIES; i++) | |
4672 { | |
4673 coding_category_system[i] = Qnil; | |
1204 | 4674 dump_add_root_lisp_object (&coding_category_system[i]); |
771 | 4675 coding_category_by_priority[i] = i; |
4676 } | |
4677 | |
4678 dump_add_opaque (coding_category_by_priority, | |
4679 sizeof (coding_category_by_priority)); | |
4680 | |
4681 all_coding_detectors = Dynarr_new2 (detector_dynarr, struct detector); | |
2367 | 4682 dump_add_root_block_ptr (&all_coding_detectors, |
771 | 4683 &detector_dynarr_description); |
4684 | |
4685 dump_add_opaque_int (&coding_system_tick); | |
4686 dump_add_opaque_int (&coding_detector_count); | |
4687 dump_add_opaque_int (&coding_detector_category_count); | |
4688 | |
4689 INITIALIZE_CODING_SYSTEM_TYPE (no_conversion, | |
4690 "no-conversion-coding-system-p"); | |
4691 CODING_SYSTEM_HAS_METHOD (no_conversion, convert); | |
4692 | |
4693 INITIALIZE_DETECTOR (no_conversion); | |
4694 DETECTOR_HAS_METHOD (no_conversion, detect); | |
4695 INITIALIZE_DETECTOR_CATEGORY (no_conversion, no_conversion); | |
4696 | |
4697 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (convert_eol, | |
4698 "convert-eol-coding-system-p"); | |
4699 CODING_SYSTEM_HAS_METHOD (convert_eol, print); | |
4700 CODING_SYSTEM_HAS_METHOD (convert_eol, convert); | |
4701 CODING_SYSTEM_HAS_METHOD (convert_eol, getprop); | |
4702 CODING_SYSTEM_HAS_METHOD (convert_eol, putprop); | |
4703 CODING_SYSTEM_HAS_METHOD (convert_eol, conversion_end_type); | |
4704 CODING_SYSTEM_HAS_METHOD (convert_eol, canonicalize_after_coding); | |
4705 CODING_SYSTEM_HAS_METHOD (convert_eol, init_coding_stream); | |
4706 | |
4707 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (undecided, | |
4708 "undecided-coding-system-p"); | |
4709 CODING_SYSTEM_HAS_METHOD (undecided, init); | |
4710 CODING_SYSTEM_HAS_METHOD (undecided, mark); | |
4711 CODING_SYSTEM_HAS_METHOD (undecided, print); | |
4712 CODING_SYSTEM_HAS_METHOD (undecided, convert); | |
4713 CODING_SYSTEM_HAS_METHOD (undecided, putprop); | |
4714 CODING_SYSTEM_HAS_METHOD (undecided, getprop); | |
4715 CODING_SYSTEM_HAS_METHOD (undecided, init_coding_stream); | |
4716 CODING_SYSTEM_HAS_METHOD (undecided, rewind_coding_stream); | |
4717 CODING_SYSTEM_HAS_METHOD (undecided, finalize_coding_stream); | |
4718 CODING_SYSTEM_HAS_METHOD (undecided, mark_coding_stream); | |
4719 CODING_SYSTEM_HAS_METHOD (undecided, canonicalize); | |
4720 CODING_SYSTEM_HAS_METHOD (undecided, canonicalize_after_coding); | |
4721 | |
4722 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (chain, "chain-coding-system-p"); | |
4723 | |
4724 CODING_SYSTEM_HAS_METHOD (chain, print); | |
4725 CODING_SYSTEM_HAS_METHOD (chain, canonicalize); | |
4726 CODING_SYSTEM_HAS_METHOD (chain, init); | |
4727 CODING_SYSTEM_HAS_METHOD (chain, mark); | |
4728 CODING_SYSTEM_HAS_METHOD (chain, mark_coding_stream); | |
4729 CODING_SYSTEM_HAS_METHOD (chain, convert); | |
4730 CODING_SYSTEM_HAS_METHOD (chain, rewind_coding_stream); | |
4731 CODING_SYSTEM_HAS_METHOD (chain, finalize_coding_stream); | |
4732 CODING_SYSTEM_HAS_METHOD (chain, finalize); | |
4733 CODING_SYSTEM_HAS_METHOD (chain, putprop); | |
4734 CODING_SYSTEM_HAS_METHOD (chain, getprop); | |
4735 CODING_SYSTEM_HAS_METHOD (chain, conversion_end_type); | |
4736 CODING_SYSTEM_HAS_METHOD (chain, canonicalize_after_coding); | |
4737 | |
4738 #ifdef DEBUG_XEMACS | |
4739 INITIALIZE_CODING_SYSTEM_TYPE (internal, "internal-coding-system-p"); | |
4740 CODING_SYSTEM_HAS_METHOD (internal, convert); | |
4741 #endif | |
4742 | |
4743 #ifdef HAVE_ZLIB | |
4744 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (gzip, "gzip-coding-system-p"); | |
4745 CODING_SYSTEM_HAS_METHOD (gzip, conversion_end_type); | |
4746 CODING_SYSTEM_HAS_METHOD (gzip, convert); | |
4747 CODING_SYSTEM_HAS_METHOD (gzip, init); | |
4748 CODING_SYSTEM_HAS_METHOD (gzip, print); | |
4749 CODING_SYSTEM_HAS_METHOD (gzip, init_coding_stream); | |
4750 CODING_SYSTEM_HAS_METHOD (gzip, rewind_coding_stream); | |
4751 CODING_SYSTEM_HAS_METHOD (gzip, putprop); | |
4752 CODING_SYSTEM_HAS_METHOD (gzip, getprop); | |
4753 #endif | |
4754 } | |
4755 | |
4756 void | |
4757 reinit_coding_system_type_create (void) | |
4758 { | |
4759 REINITIALIZE_CODING_SYSTEM_TYPE (no_conversion); | |
4760 REINITIALIZE_CODING_SYSTEM_TYPE (convert_eol); | |
4761 REINITIALIZE_CODING_SYSTEM_TYPE (undecided); | |
4762 REINITIALIZE_CODING_SYSTEM_TYPE (chain); | |
4763 #if 0 | |
4764 REINITIALIZE_CODING_SYSTEM_TYPE (text_file_wrapper); | |
4765 #endif /* 0 */ | |
4766 #ifdef DEBUG_XEMACS | |
4767 REINITIALIZE_CODING_SYSTEM_TYPE (internal); | |
4768 #endif | |
4769 #ifdef HAVE_ZLIB | |
4770 REINITIALIZE_CODING_SYSTEM_TYPE (gzip); | |
4771 #endif | |
4772 } | |
4773 | |
4774 void | |
4775 reinit_vars_of_file_coding (void) | |
4776 { | |
428 | 4777 } |
4778 | |
4779 void | |
4780 vars_of_file_coding (void) | |
4781 { | |
771 | 4782 /* We always have file-coding support */ |
428 | 4783 Fprovide (intern ("file-coding")); |
4784 | |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
4785 QScoding_system_cookie = build_ascstring (";;;###coding system: "); |
1347 | 4786 staticpro (&QScoding_system_cookie); |
4787 | |
1242 | 4788 #ifdef HAVE_DEFAULT_EOL_DETECTION |
2297 | 4789 /* #### Find a more appropriate place for this comment. |
4790 WARNING: The existing categories are intimately tied to the function | |
1242 | 4791 `coding-system-category' in coding.el. If you change a category, or |
4792 change the layout of any coding system associated with a category, you | |
4793 need to check that function and make sure it's written properly. */ | |
4794 | |
4795 Fprovide (intern ("unix-default-eol-detection")); | |
4796 #endif | |
4797 | |
428 | 4798 DEFVAR_LISP ("keyboard-coding-system", &Vkeyboard_coding_system /* |
3142 | 4799 Default coding system used for TTY and X11 keyboard input. |
5384
3889ef128488
Fix misspelled words, and some grammar, across the entire source tree.
Jerry James <james@xemacs.org>
parents:
5345
diff
changeset
|
4800 Under X11, used only to interpret the character for a key event when that |
3142 | 4801 event has a KeySym of NoSymbol but does have an associated string keysym, |
4802 something that's seen with input methods. | |
4803 | |
4804 If you need to set these things to different coding systems, call the | |
4805 function `set-console-tty-coding-system' for the TTY and use this variable | |
4806 for X11. | |
428 | 4807 */ ); |
4808 Vkeyboard_coding_system = Qnil; | |
4809 | |
4810 DEFVAR_LISP ("terminal-coding-system", &Vterminal_coding_system /* | |
4811 Coding system used for TTY display output. | |
4812 Not used under a windowing system. | |
4813 */ ); | |
4814 Vterminal_coding_system = Qnil; | |
4815 | |
4816 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read /* | |
440 | 4817 Overriding coding system used when reading from a file or process. |
4818 You should bind this variable with `let', but do not set it globally. | |
4819 If this is non-nil, it specifies the coding system that will be used | |
4820 to decode input on read operations, such as from a file or process. | |
4821 It overrides `buffer-file-coding-system-for-read', | |
428 | 4822 `insert-file-contents-pre-hook', etc. Use those variables instead of |
440 | 4823 this one for permanent changes to the environment. */ ); |
428 | 4824 Vcoding_system_for_read = Qnil; |
4825 | |
4826 DEFVAR_LISP ("coding-system-for-write", | |
4827 &Vcoding_system_for_write /* | |
440 | 4828 Overriding coding system used when writing to a file or process. |
4829 You should bind this variable with `let', but do not set it globally. | |
4830 If this is non-nil, it specifies the coding system that will be used | |
4831 to encode output for write operations, such as to a file or process. | |
4832 It overrides `buffer-file-coding-system', `write-region-pre-hook', etc. | |
4833 Use those variables instead of this one for permanent changes to the | |
4834 environment. */ ); | |
428 | 4835 Vcoding_system_for_write = Qnil; |
4836 | |
4837 DEFVAR_LISP ("file-name-coding-system", &Vfile_name_coding_system /* | |
4838 Coding system used to convert pathnames when accessing files. | |
4839 */ ); | |
4840 Vfile_name_coding_system = Qnil; | |
4841 | |
4842 DEFVAR_BOOL ("enable-multibyte-characters", &enable_multibyte_characters /* | |
771 | 4843 Setting this has no effect. It is purely for FSF compatibility. |
428 | 4844 */ ); |
4845 enable_multibyte_characters = 1; | |
771 | 4846 |
4847 Vchain_canonicalize_hash_table = | |
5191
71ee43b8a74d
Add #'equalp as a hash test by default; add #'define-hash-table-test, GNU API
Aidan Kehoe <kehoea@parhasard.net>
parents:
5169
diff
changeset
|
4848 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, Qequal); |
771 | 4849 staticpro (&Vchain_canonicalize_hash_table); |
4850 | |
4851 #ifdef DEBUG_XEMACS | |
4852 DEFVAR_LISP ("debug-coding-detection", &Vdebug_coding_detection /* | |
4853 If non-nil, display debug information about detection operations in progress. | |
4854 Information is displayed on stderr. | |
4855 */ ); | |
4856 Vdebug_coding_detection = Qnil; | |
4857 #endif | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4858 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4859 #ifdef MULE |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4860 Vdefault_query_coding_region_chartab_cache |
5191
71ee43b8a74d
Add #'equalp as a hash test by default; add #'define-hash-table-test, GNU API
Aidan Kehoe <kehoea@parhasard.net>
parents:
5169
diff
changeset
|
4861 = make_lisp_hash_table (25, HASH_TABLE_NON_WEAK, Qequal); |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4862 staticpro (&Vdefault_query_coding_region_chartab_cache); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4863 #endif |
428 | 4864 } |
4865 | |
2297 | 4866 /* #### reformat this for consistent appearance? */ |
4867 | |
428 | 4868 void |
4869 complex_vars_of_file_coding (void) | |
4870 { | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4871 Fmake_coding_system_internal |
771 | 4872 (Qconvert_eol_cr, Qconvert_eol, |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
4873 build_defer_string ("Convert CR to LF"), |
5345
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4874 listu (Qdocumentation, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4875 build_defer_string ( |
771 | 4876 "Converts CR (used to mark the end of a line on Macintosh systems) to LF\n" |
4877 "(used internally and under Unix to mark the end of a line)."), | |
5345
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4878 Qmnemonic, build_ascstring ("CR->LF"), |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4879 Qsubtype, Qcr, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4880 /* VERY IMPORTANT! Tell make-coding-system not to generate |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4881 subsidiaries -- it needs the coding systems we're creating |
771 | 4882 to do so! */ |
5345
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4883 Qeol_type, Qlf, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4884 Qsafe_charsets, Qt, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4885 Qunbound)); |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4886 Fmake_coding_system_internal |
771 | 4887 (Qconvert_eol_lf, Qconvert_eol, |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
4888 build_defer_string ("Convert LF to LF (do nothing)"), |
5345
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4889 listu (Qdocumentation, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4890 build_defer_string ("Do nothing."), |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4891 Qmnemonic, build_ascstring ("LF->LF"), |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4892 Qsubtype, Qlf, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4893 /* VERY IMPORTANT! Tell make-coding-system not to generate |
771 | 4894 subsidiaries -- it needs the coding systems we're creating |
4895 to do so! */ | |
5345
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4896 Qeol_type, Qlf, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4897 Qsafe_charsets, Qt, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4898 Qunbound)); |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4899 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4900 Fmake_coding_system_internal |
771 | 4901 (Qconvert_eol_crlf, Qconvert_eol, |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
4902 build_defer_string ("Convert CRLF to LF"), |
5345
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4903 listu (Qdocumentation, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4904 build_defer_string ( |
771 | 4905 "Converts CR+LF (used to mark the end of a line on Macintosh systems) to LF\n" |
4906 "(used internally and under Unix to mark the end of a line)."), | |
5345
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4907 Qmnemonic, build_ascstring ("CRLF->LF"), |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4908 Qsubtype, Qcrlf, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4909 /* VERY IMPORTANT! Tell make-coding-system not to generate |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4910 subsidiaries -- it needs the coding systems we're creating |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4911 to do so! */ |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4912 Qeol_type, Qlf, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4913 Qsafe_charsets, Qt, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4914 Qunbound)); |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4915 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4916 Fmake_coding_system_internal |
771 | 4917 (Qconvert_eol_autodetect, Qconvert_eol, |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
4918 build_defer_string ("Autodetect EOL type"), |
5345
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4919 listu (Qdocumentation, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4920 build_defer_string ("Autodetect the end-of-line type."), |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4921 Qmnemonic, build_ascstring ("Auto-EOL"), |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4922 Qsubtype, Qnil, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4923 /* VERY IMPORTANT! Tell make-coding-system not to generate |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4924 subsidiaries -- it needs the coding systems we're creating |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4925 to do so! */ |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4926 Qeol_type, Qlf, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4927 Qsafe_charsets, Qt, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4928 Qunbound)); |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4929 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4930 Fmake_coding_system_internal |
771 | 4931 (Qundecided, Qundecided, |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
4932 build_defer_string ("Undecided (auto-detect)"), |
5345
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4933 listu (Qdocumentation, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4934 build_defer_string ("Automatically detects the correct encoding."), |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4935 Qmnemonic, build_ascstring ("Auto"), |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4936 Qdo_eol, Qt, Qdo_coding, Qt, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4937 /* We do EOL detection ourselves so we don't need to be |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4938 wrapped in an EOL detector. (It doesn't actually hurt, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4939 though, I don't think.) */ |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4940 Qeol_type, Qlf, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4941 Qunbound)); |
771 | 4942 |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4943 Fmake_coding_system_internal |
771 | 4944 (intern ("undecided-dos"), Qundecided, |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
4945 build_defer_string ("Undecided (auto-detect) (CRLF)"), |
5345
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4946 listu (Qdocumentation, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4947 build_defer_string |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4948 ("Automatically detects the correct encoding; EOL type of CRLF forced."), |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4949 Qmnemonic, build_ascstring ("Auto"), |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4950 Qdo_coding, Qt, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4951 Qeol_type, Qcrlf, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4952 Qunbound)); |
771 | 4953 |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4954 Fmake_coding_system_internal |
771 | 4955 (intern ("undecided-unix"), Qundecided, |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
4956 build_defer_string ("Undecided (auto-detect) (LF)"), |
5345
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4957 listu (Qdocumentation, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4958 build_defer_string |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4959 ("Automatically detects the correct encoding; EOL type of LF forced."), |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4960 Qmnemonic, build_ascstring ("Auto"), |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4961 Qdo_coding, Qt, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4962 Qeol_type, Qlf, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4963 Qunbound));; |
771 | 4964 |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4965 Fmake_coding_system_internal |
771 | 4966 (intern ("undecided-mac"), Qundecided, |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
4967 build_defer_string ("Undecided (auto-detect) (CR)"), |
5345
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4968 listu (Qdocumentation, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4969 build_defer_string |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4970 ("Automatically detects the correct encoding; EOL type of CR forced."), |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4971 Qmnemonic, build_ascstring ("Auto"), |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4972 Qdo_coding, Qt, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4973 Qeol_type, Qcr, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4974 Qunbound)); |
771 | 4975 |
428 | 4976 /* Need to create this here or we're really screwed. */ |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4977 Fmake_coding_system_internal |
428 | 4978 (Qraw_text, Qno_conversion, |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
4979 build_defer_string ("Raw Text"), |
5345
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4980 listu (Qdocumentation, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4981 build_defer_string ("Raw text converts only line-break " |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4982 "codes, and acts otherwise like " |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4983 "`binary'."), |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4984 Qmnemonic, build_ascstring ("Raw"), |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4985 #ifdef MULE |
5345
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4986 Qsafe_charsets, list3 (Vcharset_ascii, Vcharset_control_1, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4987 Vcharset_latin_iso8859_1), |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4988 |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4989 #endif |
5345
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4990 Qunbound)); |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4991 |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4992 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4993 Fmake_coding_system_internal |
428 | 4994 (Qbinary, Qno_conversion, |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
4995 build_defer_string ("Binary"), |
5345
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4996 listu (Qdocumentation, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4997 build_defer_string ( |
771 | 4998 "This coding system is as close as it comes to doing no conversion.\n" |
4999 "On input, each byte is converted directly into the character\n" | |
5000 "with the corresponding code -- i.e. from the `ascii', `control-1',\n" | |
5001 "or `latin-1' character sets. On output, these characters are\n" | |
5002 "converted back to the corresponding bytes, and other characters\n" | |
5003 "are converted to the default character, i.e. `~'."), | |
5345
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
5004 Qeol_type, Qlf, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
5005 Qmnemonic, build_ascstring ("Binary"), |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
5006 #ifdef MULE |
5345
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
5007 Qsafe_charsets, list3 (Vcharset_ascii, Vcharset_control_1, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
5008 Vcharset_latin_iso8859_1), |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
5009 #endif |
5345
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
5010 Qunbound)); |
428 | 5011 |
771 | 5012 /* Formerly aliased to raw-text! Completely bogus and not even the same |
5013 as FSF Emacs. */ | |
5014 Fdefine_coding_system_alias (Qno_conversion, Qbinary); | |
5015 Fdefine_coding_system_alias (intern ("no-conversion-unix"), | |
5016 intern ("raw-text-unix")); | |
5017 Fdefine_coding_system_alias (intern ("no-conversion-dos"), | |
5018 intern ("raw-text-dos")); | |
5019 Fdefine_coding_system_alias (intern ("no-conversion-mac"), | |
5020 intern ("raw-text-mac")); | |
5021 | |
1318 | 5022 /* These three below will get their defaults set correctly |
5023 in code-init.el. We init them now so we can handle stuff at dump | |
771 | 5024 time before we get to code-init.el. */ |
1318 | 5025 Fdefine_coding_system_alias (Qnative, Qbinary); |
440 | 5026 Fdefine_coding_system_alias (Qterminal, Qbinary); |
5027 Fdefine_coding_system_alias (Qkeyboard, Qbinary); | |
5028 | |
1318 | 5029 Fdefine_coding_system_alias (Qfile_name, Qnative); |
771 | 5030 Fdefine_coding_system_alias (Qidentity, Qconvert_eol_lf); |
5031 | |
428 | 5032 /* Need this for bootstrapping */ |
771 | 5033 coding_category_system[detector_category_no_conversion] = |
428 | 5034 Fget_coding_system (Qraw_text); |
5035 } |