Mercurial > hg > xemacs-beta
annotate src/file-coding.c @ 5117:3742ea8250b5 ben-lisp-object ben-lisp-object-final-ws-year-2005
Checking in final CVS version of workspace 'ben-lisp-object'
author | Ben Wing <ben@xemacs.org> |
---|---|
date | Sat, 26 Dec 2009 00:20:27 -0600 |
parents | facf3239ba30 |
children | e0db3c197671 |
rev | line source |
---|---|
771 | 1 /* Text encoding conversion functions; coding-system object. |
2 #### rename me to coding-system.c or coding.c | |
428 | 3 Copyright (C) 1991, 1995 Free Software Foundation, Inc. |
4 Copyright (C) 1995 Sun Microsystems, Inc. | |
3025 | 5 Copyright (C) 2000, 2001, 2002, 2003, 2005 Ben Wing. |
428 | 6 |
7 This file is part of XEmacs. | |
8 | |
9 XEmacs is free software; you can redistribute it and/or modify it | |
10 under the terms of the GNU General Public License as published by the | |
11 Free Software Foundation; either version 2, or (at your option) any | |
12 later version. | |
13 | |
14 XEmacs is distributed in the hope that it will be useful, but WITHOUT | |
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
17 for more details. | |
18 | |
19 You should have received a copy of the GNU General Public License | |
20 along with XEmacs; see the file COPYING. If not, write to | |
21 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
22 Boston, MA 02111-1307, USA. */ | |
23 | |
771 | 24 /* Synched up with: Not in FSF. */ |
25 | |
26 /* Authorship: | |
27 | |
28 Current primary author: Ben Wing <ben@xemacs.org> | |
29 | |
30 Rewritten by Ben Wing <ben@xemacs.org>, based originally on coding.c | |
31 from Mule 2.? but probably does not share one line of code with that | |
32 original source. Rewriting work started around Dec. 1994. or Jan. 1995. | |
33 Proceeded in earnest till Nov. 1995. | |
34 | |
35 Around Feb. 17, 1998, Andy Piper renamed what was then mule-coding.c to | |
36 file-coding.c, with the intention of using it to do end-of-line conversion | |
37 on non-MULE machines (specifically, on Windows machines). He separated | |
38 out the MULE stuff from non-MULE using ifdef's, and searched throughout | |
39 the rest of the source tree looking for coding-system-related code that | |
40 was ifdef MULE but should be ifdef HAVE_CODING_SYSTEMS. | |
41 | |
42 Sept. 4 - 8, 1998, Tomohiko Morioka added the UCS_4 and UTF_8 coding system | |
43 types, providing a primitive means of decoding and encoding externally- | |
44 formatted Unicode/UCS_4 and Unicode/UTF_8 data. | |
45 | |
46 January 25, 2000, Martin Buchholz redid and fleshed out the coding | |
47 system alias handling that was first added in prototype form by | |
48 Hrjove Niksic, April 15, 1999. | |
49 | |
50 April to May 2000, Ben Wing: More major reorganization. Adding features | |
51 needed for MS Windows (multibyte, unicode, unicode-to-multibyte), the | |
52 "chain" coding system for chaining two together, and doing a lot of | |
53 reorganization in preparation for properly abstracting out the different | |
54 coding system types. | |
55 | |
56 June 2001, Ben Wing: Added Unicode support. Eliminated previous | |
57 junky Unicode translation support. | |
58 | |
59 August 2001, Ben Wing: Moved Unicode support to unicode.c. Finished | |
60 abstracting everything except detection, which is hard to abstract (see | |
61 just below). | |
62 | |
63 September 2001, Ben Wing: Moved Mule code to mule-coding.c, Windows code | |
64 to intl-win32.c. Lots more rewriting; very little code is untouched | |
65 from before April 2000. Abstracted the detection code, added multiple | |
66 levels of likelihood to increase the reliability of the algorithm. | |
67 | |
68 October 2001, Ben Wing: HAVE_CODING_SYSTEMS is always now defined. | |
69 Removed the conditionals. | |
70 */ | |
71 | |
428 | 72 #include <config.h> |
73 #include "lisp.h" | |
74 | |
75 #include "buffer.h" | |
76 #include "elhash.h" | |
77 #include "insdel.h" | |
78 #include "lstream.h" | |
440 | 79 #include "opaque.h" |
771 | 80 #include "file-coding.h" |
81 | |
82 #ifdef HAVE_ZLIB | |
83 #include "zlib.h" | |
428 | 84 #endif |
85 | |
86 Lisp_Object Vkeyboard_coding_system; | |
87 Lisp_Object Vterminal_coding_system; | |
88 Lisp_Object Vcoding_system_for_read; | |
89 Lisp_Object Vcoding_system_for_write; | |
90 Lisp_Object Vfile_name_coding_system; | |
91 | |
771 | 92 #ifdef DEBUG_XEMACS |
93 Lisp_Object Vdebug_coding_detection; | |
440 | 94 #endif |
771 | 95 |
96 typedef struct coding_system_type_entry | |
97 { | |
98 struct coding_system_methods *meths; | |
99 } coding_system_type_entry; | |
100 | |
101 typedef struct | |
102 { | |
103 Dynarr_declare (coding_system_type_entry); | |
104 } coding_system_type_entry_dynarr; | |
105 | |
106 static coding_system_type_entry_dynarr *the_coding_system_type_entry_dynarr; | |
107 | |
1204 | 108 static const struct memory_description cste_description_1[] = { |
2551 | 109 { XD_BLOCK_PTR, offsetof (coding_system_type_entry, meths), 1, |
110 { &coding_system_methods_description } }, | |
771 | 111 { XD_END } |
112 }; | |
113 | |
1204 | 114 static const struct sized_memory_description cste_description = { |
771 | 115 sizeof (coding_system_type_entry), |
116 cste_description_1 | |
117 }; | |
118 | |
1204 | 119 static const struct memory_description csted_description_1[] = { |
771 | 120 XD_DYNARR_DESC (coding_system_type_entry_dynarr, &cste_description), |
428 | 121 { XD_END } |
122 }; | |
123 | |
1204 | 124 static const struct sized_memory_description csted_description = { |
771 | 125 sizeof (coding_system_type_entry_dynarr), |
126 csted_description_1 | |
127 }; | |
128 | |
129 static Lisp_Object Vcoding_system_type_list; | |
130 | |
131 /* Coding system currently associated with each coding category. */ | |
132 Lisp_Object coding_category_system[MAX_DETECTOR_CATEGORIES]; | |
133 | |
134 /* Table of all coding categories in decreasing order of priority. | |
135 This describes a permutation of the possible coding categories. */ | |
136 int coding_category_by_priority[MAX_DETECTOR_CATEGORIES]; | |
137 | |
138 /* Value used with to give a unique name to nameless coding systems */ | |
139 int coding_system_tick; | |
140 | |
141 int coding_detector_count; | |
142 int coding_detector_category_count; | |
143 | |
144 detector_dynarr *all_coding_detectors; | |
145 | |
1204 | 146 static const struct memory_description struct_detector_category_description_1[] |
771 | 147 = |
148 { | |
149 { XD_LISP_OBJECT, offsetof (struct detector_category, sym) }, | |
150 { XD_END } | |
151 }; | |
152 | |
1204 | 153 static const struct sized_memory_description struct_detector_category_description = |
771 | 154 { |
155 sizeof (struct detector_category), | |
156 struct_detector_category_description_1 | |
428 | 157 }; |
158 | |
1204 | 159 static const struct memory_description detector_category_dynarr_description_1[] = |
771 | 160 { |
161 XD_DYNARR_DESC (detector_category_dynarr, | |
162 &struct_detector_category_description), | |
163 { XD_END } | |
164 }; | |
165 | |
1204 | 166 static const struct sized_memory_description detector_category_dynarr_description = { |
771 | 167 sizeof (detector_category_dynarr), |
168 detector_category_dynarr_description_1 | |
169 }; | |
170 | |
1204 | 171 static const struct memory_description struct_detector_description_1[] |
771 | 172 = |
173 { | |
2367 | 174 { XD_BLOCK_PTR, offsetof (struct detector, cats), 1, |
2551 | 175 { &detector_category_dynarr_description } }, |
771 | 176 { XD_END } |
177 }; | |
178 | |
1204 | 179 static const struct sized_memory_description struct_detector_description = |
771 | 180 { |
181 sizeof (struct detector), | |
182 struct_detector_description_1 | |
183 }; | |
184 | |
1204 | 185 static const struct memory_description detector_dynarr_description_1[] = |
771 | 186 { |
187 XD_DYNARR_DESC (detector_dynarr, &struct_detector_description), | |
188 { XD_END } | |
189 }; | |
190 | |
1204 | 191 static const struct sized_memory_description detector_dynarr_description = { |
771 | 192 sizeof (detector_dynarr), |
193 detector_dynarr_description_1 | |
194 }; | |
428 | 195 |
196 Lisp_Object Qcoding_systemp; | |
197 | |
771 | 198 Lisp_Object Qraw_text; |
428 | 199 |
200 Lisp_Object Qmnemonic, Qeol_type; | |
201 Lisp_Object Qcr, Qcrlf, Qlf; | |
202 Lisp_Object Qeol_cr, Qeol_crlf, Qeol_lf; | |
203 Lisp_Object Qpost_read_conversion; | |
204 Lisp_Object Qpre_write_conversion; | |
205 | |
771 | 206 Lisp_Object Qtranslation_table_for_decode; |
207 Lisp_Object Qtranslation_table_for_encode; | |
208 Lisp_Object Qsafe_chars; | |
209 Lisp_Object Qsafe_charsets; | |
210 Lisp_Object Qmime_charset; | |
211 Lisp_Object Qvalid_codes; | |
212 | |
213 Lisp_Object Qno_conversion; | |
214 Lisp_Object Qconvert_eol; | |
440 | 215 Lisp_Object Qescape_quoted; |
771 | 216 Lisp_Object Qencode, Qdecode; |
217 | |
218 Lisp_Object Qconvert_eol_lf, Qconvert_eol_cr, Qconvert_eol_crlf; | |
219 Lisp_Object Qconvert_eol_autodetect; | |
220 | |
221 Lisp_Object Qnear_certainty, Qquite_probable, Qsomewhat_likely; | |
1494 | 222 Lisp_Object Qslightly_likely; |
771 | 223 Lisp_Object Qas_likely_as_unlikely, Qsomewhat_unlikely, Qquite_improbable; |
224 Lisp_Object Qnearly_impossible; | |
225 | |
226 Lisp_Object Qdo_eol, Qdo_coding; | |
227 | |
228 Lisp_Object Qcanonicalize_after_coding; | |
229 | |
1347 | 230 Lisp_Object QScoding_system_cookie; |
231 | |
771 | 232 /* This is used to convert autodetected coding systems into existing |
233 systems. For example, the chain undecided->convert-eol-autodetect may | |
234 have its separate parts detected as mswindows-multibyte and | |
235 convert-eol-crlf, and the result needs to be mapped to | |
236 mswindows-multibyte-dos. */ | |
237 /* #### It's not clear we need this whole chain-canonicalize mechanism | |
238 any more. */ | |
239 static Lisp_Object Vchain_canonicalize_hash_table; | |
240 | |
241 #ifdef HAVE_ZLIB | |
242 Lisp_Object Qgzip; | |
428 | 243 #endif |
771 | 244 |
2297 | 245 /* Maps symbols (coding system names) to either coding system objects or |
246 (for aliases) other names. */ | |
771 | 247 static Lisp_Object Vcoding_system_hash_table; |
428 | 248 |
249 int enable_multibyte_characters; | |
250 | |
251 EXFUN (Fcopy_coding_system, 2); | |
252 | |
253 | |
254 /************************************************************************/ | |
771 | 255 /* Coding system object methods */ |
428 | 256 /************************************************************************/ |
257 | |
258 static Lisp_Object | |
259 mark_coding_system (Lisp_Object obj) | |
260 { | |
261 Lisp_Coding_System *codesys = XCODING_SYSTEM (obj); | |
262 | |
1204 | 263 #define MARKED_SLOT(x) mark_object (codesys->x); |
264 #include "coding-system-slots.h" | |
771 | 265 |
266 MAYBE_CODESYSMETH (codesys, mark, (obj)); | |
428 | 267 |
1204 | 268 return Qnil; |
428 | 269 } |
270 | |
271 static void | |
771 | 272 print_coding_system_properties (Lisp_Object obj, Lisp_Object printcharfun) |
273 { | |
274 Lisp_Coding_System *c = XCODING_SYSTEM (obj); | |
275 print_internal (c->methods->type, printcharfun, 1); | |
276 MAYBE_CODESYSMETH (c, print, (obj, printcharfun, 1)); | |
277 if (CODING_SYSTEM_EOL_TYPE (c) != EOL_AUTODETECT) | |
278 write_fmt_string_lisp (printcharfun, " eol-type=%s", | |
279 1, Fcoding_system_property (obj, Qeol_type)); | |
280 } | |
281 | |
282 static void | |
428 | 283 print_coding_system (Lisp_Object obj, Lisp_Object printcharfun, |
2286 | 284 int UNUSED (escapeflag)) |
428 | 285 { |
286 Lisp_Coding_System *c = XCODING_SYSTEM (obj); | |
287 if (print_readably) | |
771 | 288 printing_unreadable_object |
289 ("printing unreadable object #<coding-system 0x%x>", c->header.uid); | |
290 | |
291 write_fmt_string_lisp (printcharfun, "#<coding-system %s ", 1, c->name); | |
292 print_coding_system_properties (obj, printcharfun); | |
826 | 293 write_c_string (printcharfun, ">"); |
428 | 294 } |
295 | |
771 | 296 /* Print an abbreviated version of a coding system (but still containing |
297 all the information), for use within a coding system print method. */ | |
298 | |
299 static void | |
300 print_coding_system_in_print_method (Lisp_Object cs, Lisp_Object printcharfun, | |
2286 | 301 int UNUSED (escapeflag)) |
771 | 302 { |
800 | 303 write_fmt_string_lisp (printcharfun, "%s[", 1, XCODING_SYSTEM_NAME (cs)); |
771 | 304 print_coding_system_properties (cs, printcharfun); |
826 | 305 write_c_string (printcharfun, "]"); |
771 | 306 } |
307 | |
428 | 308 static void |
309 finalize_coding_system (void *header, int for_disksave) | |
310 { | |
771 | 311 Lisp_Object cs = wrap_coding_system ((Lisp_Coding_System *) header); |
428 | 312 /* Since coding systems never go away, this function is not |
313 necessary. But it would be necessary if we changed things | |
314 so that coding systems could go away. */ | |
315 if (!for_disksave) /* see comment in lstream.c */ | |
771 | 316 MAYBE_XCODESYSMETH (cs, finalize, (cs)); |
317 } | |
318 | |
319 static Bytecount | |
320 sizeof_coding_system (const void *header) | |
321 { | |
322 const Lisp_Coding_System *p = (const Lisp_Coding_System *) header; | |
323 return offsetof (Lisp_Coding_System, data) + p->methods->extra_data_size; | |
428 | 324 } |
325 | |
1204 | 326 static const struct memory_description coding_system_methods_description_1[] |
771 | 327 = { |
328 { XD_LISP_OBJECT, | |
329 offsetof (struct coding_system_methods, type) }, | |
330 { XD_LISP_OBJECT, | |
331 offsetof (struct coding_system_methods, predicate_symbol) }, | |
332 { XD_END } | |
333 }; | |
334 | |
1204 | 335 const struct sized_memory_description coding_system_methods_description = { |
771 | 336 sizeof (struct coding_system_methods), |
337 coding_system_methods_description_1 | |
338 }; | |
339 | |
1204 | 340 static const struct sized_memory_description coding_system_extra_description_map[] = |
341 { | |
342 { offsetof (Lisp_Coding_System, methods) }, | |
343 { offsetof (struct coding_system_methods, extra_description) }, | |
344 { -1 }, | |
771 | 345 }; |
346 | |
1204 | 347 static const struct memory_description coding_system_description[] = |
428 | 348 { |
2367 | 349 { XD_BLOCK_PTR, offsetof (Lisp_Coding_System, methods), 1, |
2551 | 350 { &coding_system_methods_description } }, |
1204 | 351 #define MARKED_SLOT(x) { XD_LISP_OBJECT, offsetof (Lisp_Coding_System, x) }, |
352 #define MARKED_SLOT_ARRAY(slot, size) \ | |
353 { XD_LISP_OBJECT_ARRAY, offsetof (Lisp_Coding_System, slot), size }, | |
354 #include "coding-system-slots.h" | |
2367 | 355 { XD_BLOCK_ARRAY, offsetof (Lisp_Coding_System, data), 1, |
2551 | 356 { coding_system_extra_description_map } }, |
1204 | 357 { XD_END } |
771 | 358 }; |
359 | |
1204 | 360 static const struct memory_description coding_system_empty_extra_description_1[] = |
361 { | |
362 { XD_END } | |
363 }; | |
364 | |
365 const struct sized_memory_description coding_system_empty_extra_description = { | |
366 0, coding_system_empty_extra_description_1 | |
367 }; | |
368 | |
5117
3742ea8250b5
Checking in final CVS version of workspace 'ben-lisp-object'
Ben Wing <ben@xemacs.org>
parents:
3025
diff
changeset
|
369 DEFINE_SIZABLE_LISP_OBJECT ("coding-system", coding_system, |
3742ea8250b5
Checking in final CVS version of workspace 'ben-lisp-object'
Ben Wing <ben@xemacs.org>
parents:
3025
diff
changeset
|
370 mark_coding_system, |
3742ea8250b5
Checking in final CVS version of workspace 'ben-lisp-object'
Ben Wing <ben@xemacs.org>
parents:
3025
diff
changeset
|
371 print_coding_system, |
3742ea8250b5
Checking in final CVS version of workspace 'ben-lisp-object'
Ben Wing <ben@xemacs.org>
parents:
3025
diff
changeset
|
372 finalize_coding_system, |
3742ea8250b5
Checking in final CVS version of workspace 'ben-lisp-object'
Ben Wing <ben@xemacs.org>
parents:
3025
diff
changeset
|
373 0, 0, coding_system_description, |
3742ea8250b5
Checking in final CVS version of workspace 'ben-lisp-object'
Ben Wing <ben@xemacs.org>
parents:
3025
diff
changeset
|
374 sizeof_coding_system, |
3742ea8250b5
Checking in final CVS version of workspace 'ben-lisp-object'
Ben Wing <ben@xemacs.org>
parents:
3025
diff
changeset
|
375 Lisp_Coding_System); |
771 | 376 |
377 /************************************************************************/ | |
378 /* Creating coding systems */ | |
379 /************************************************************************/ | |
380 | |
381 static struct coding_system_methods * | |
382 decode_coding_system_type (Lisp_Object type, Error_Behavior errb) | |
428 | 383 { |
771 | 384 int i; |
385 | |
386 for (i = 0; i < Dynarr_length (the_coding_system_type_entry_dynarr); i++) | |
428 | 387 { |
771 | 388 if (EQ (type, |
389 Dynarr_at (the_coding_system_type_entry_dynarr, i).meths->type)) | |
390 return Dynarr_at (the_coding_system_type_entry_dynarr, i).meths; | |
428 | 391 } |
771 | 392 |
393 maybe_invalid_constant ("Invalid coding system type", type, | |
394 Qcoding_system, errb); | |
395 | |
396 return 0; | |
428 | 397 } |
398 | |
771 | 399 static int |
400 valid_coding_system_type_p (Lisp_Object type) | |
428 | 401 { |
771 | 402 return decode_coding_system_type (type, ERROR_ME_NOT) != 0; |
403 } | |
404 | |
405 DEFUN ("valid-coding-system-type-p", Fvalid_coding_system_type_p, 1, 1, 0, /* | |
406 Given a CODING-SYSTEM-TYPE, return non-nil if it is valid. | |
407 Valid types depend on how XEmacs was compiled but may include | |
3025 | 408 `undecided', `chain', `integer', `ccl', `iso2022', `big5', `shift-jis', |
409 `utf-16', `ucs-4', `utf-8', etc. | |
771 | 410 */ |
411 (coding_system_type)) | |
412 { | |
413 return valid_coding_system_type_p (coding_system_type) ? Qt : Qnil; | |
414 } | |
415 | |
416 DEFUN ("coding-system-type-list", Fcoding_system_type_list, 0, 0, 0, /* | |
417 Return a list of valid coding system types. | |
418 */ | |
419 ()) | |
420 { | |
421 return Fcopy_sequence (Vcoding_system_type_list); | |
422 } | |
423 | |
424 void | |
425 add_entry_to_coding_system_type_list (struct coding_system_methods *meths) | |
426 { | |
427 struct coding_system_type_entry entry; | |
428 | |
429 entry.meths = meths; | |
430 Dynarr_add (the_coding_system_type_entry_dynarr, entry); | |
431 Vcoding_system_type_list = Fcons (meths->type, Vcoding_system_type_list); | |
428 | 432 } |
433 | |
434 DEFUN ("coding-system-p", Fcoding_system_p, 1, 1, 0, /* | |
435 Return t if OBJECT is a coding system. | |
436 A coding system is an object that defines how text containing multiple | |
437 character sets is encoded into a stream of (typically 8-bit) bytes. | |
438 The coding system is used to decode the stream into a series of | |
439 characters (which may be from multiple charsets) when the text is read | |
440 from a file or process, and is used to encode the text back into the | |
441 same format when it is written out to a file or process. | |
442 | |
443 For example, many ISO2022-compliant coding systems (such as Compound | |
444 Text, which is used for inter-client data under the X Window System) | |
445 use escape sequences to switch between different charsets -- Japanese | |
446 Kanji, for example, is invoked with "ESC $ ( B"; ASCII is invoked | |
447 with "ESC ( B"; and Cyrillic is invoked with "ESC - L". See | |
448 `make-coding-system' for more information. | |
449 | |
450 Coding systems are normally identified using a symbol, and the | |
451 symbol is accepted in place of the actual coding system object whenever | |
452 a coding system is called for. (This is similar to how faces work.) | |
453 */ | |
454 (object)) | |
455 { | |
456 return CODING_SYSTEMP (object) ? Qt : Qnil; | |
457 } | |
458 | |
459 DEFUN ("find-coding-system", Ffind_coding_system, 1, 1, 0, /* | |
460 Retrieve the coding system of the given name. | |
461 | |
462 If CODING-SYSTEM-OR-NAME is a coding-system object, it is simply | |
463 returned. Otherwise, CODING-SYSTEM-OR-NAME should be a symbol. | |
464 If there is no such coding system, nil is returned. Otherwise the | |
465 associated coding system object is returned. | |
466 */ | |
467 (coding_system_or_name)) | |
468 { | |
469 if (NILP (coding_system_or_name)) | |
470 coding_system_or_name = Qbinary; | |
440 | 471 else if (CODING_SYSTEMP (coding_system_or_name)) |
472 return coding_system_or_name; | |
428 | 473 else |
474 CHECK_SYMBOL (coding_system_or_name); | |
475 | |
440 | 476 while (1) |
477 { | |
478 coding_system_or_name = | |
479 Fgethash (coding_system_or_name, Vcoding_system_hash_table, Qnil); | |
480 | |
771 | 481 if (CODING_SYSTEMP (coding_system_or_name) |
482 || NILP (coding_system_or_name)) | |
440 | 483 return coding_system_or_name; |
484 } | |
428 | 485 } |
486 | |
487 DEFUN ("get-coding-system", Fget_coding_system, 1, 1, 0, /* | |
488 Retrieve the coding system of the given name. | |
489 Same as `find-coding-system' except that if there is no such | |
490 coding system, an error is signaled instead of returning nil. | |
491 */ | |
492 (name)) | |
493 { | |
494 Lisp_Object coding_system = Ffind_coding_system (name); | |
495 | |
496 if (NILP (coding_system)) | |
563 | 497 invalid_argument ("No such coding system", name); |
428 | 498 return coding_system; |
499 } | |
500 | |
771 | 501 int |
502 coding_system_is_binary (Lisp_Object coding_system) | |
503 { | |
504 Lisp_Coding_System *cs = XCODING_SYSTEM (coding_system); | |
505 return | |
506 (EQ (CODING_SYSTEM_TYPE (cs), Qno_conversion) && | |
507 CODING_SYSTEM_EOL_TYPE (cs) == EOL_LF && | |
508 EQ (CODING_SYSTEM_POST_READ_CONVERSION (cs), Qnil) && | |
509 EQ (CODING_SYSTEM_PRE_WRITE_CONVERSION (cs), Qnil)); | |
510 } | |
511 | |
512 static Lisp_Object | |
513 coding_system_real_canonical (Lisp_Object cs) | |
514 { | |
515 if (!NILP (XCODING_SYSTEM_CANONICAL (cs))) | |
516 return XCODING_SYSTEM_CANONICAL (cs); | |
517 return cs; | |
518 } | |
519 | |
520 /* Return true if coding system is of the "standard" type that decodes | |
521 bytes into characters (suitable for decoding a text file). */ | |
522 int | |
523 coding_system_is_for_text_file (Lisp_Object coding_system) | |
524 { | |
525 return (XCODESYSMETH_OR_GIVEN | |
526 (coding_system, conversion_end_type, | |
527 (coding_system_real_canonical (coding_system)), | |
528 DECODES_BYTE_TO_CHARACTER) == | |
529 DECODES_BYTE_TO_CHARACTER); | |
530 } | |
531 | |
532 static int | |
533 decoding_source_sink_type_is_char (Lisp_Object cs, enum source_or_sink sex) | |
534 { | |
535 enum source_sink_type type = | |
536 XCODESYSMETH_OR_GIVEN (cs, conversion_end_type, | |
537 (coding_system_real_canonical (cs)), | |
538 DECODES_BYTE_TO_CHARACTER); | |
539 if (sex == CODING_SOURCE) | |
540 return (type == DECODES_CHARACTER_TO_CHARACTER || | |
541 type == DECODES_CHARACTER_TO_BYTE); | |
542 else | |
543 return (type == DECODES_CHARACTER_TO_CHARACTER || | |
544 type == DECODES_BYTE_TO_CHARACTER); | |
545 } | |
546 | |
547 static int | |
548 encoding_source_sink_type_is_char (Lisp_Object cs, enum source_or_sink sex) | |
549 { | |
550 return decoding_source_sink_type_is_char (cs, | |
551 /* Sex change */ | |
552 sex == CODING_SOURCE ? | |
553 CODING_SINK : CODING_SOURCE); | |
554 } | |
555 | |
556 /* Like Ffind_coding_system() but check that the coding system is of the | |
557 "standard" type that decodes bytes into characters (suitable for | |
558 decoding a text file), and if not, returns an appropriate wrapper that | |
559 does. Also, if EOL_WRAP is non-zero, check whether this coding system | |
560 wants EOL auto-detection, and if so, wrap with a convert-eol coding | |
561 system to do this. */ | |
562 | |
563 Lisp_Object | |
564 find_coding_system_for_text_file (Lisp_Object name, int eol_wrap) | |
565 { | |
566 Lisp_Object coding_system = Ffind_coding_system (name); | |
567 Lisp_Object wrapper = coding_system; | |
568 | |
569 if (NILP (coding_system)) | |
570 return Qnil; | |
571 if (!coding_system_is_for_text_file (coding_system)) | |
572 { | |
573 wrapper = XCODING_SYSTEM_TEXT_FILE_WRAPPER (coding_system); | |
574 if (NILP (wrapper)) | |
575 { | |
576 Lisp_Object chain; | |
577 if (!decoding_source_sink_type_is_char (coding_system, CODING_SINK)) | |
578 chain = list2 (coding_system, Qbinary); | |
579 else | |
580 chain = list1 (coding_system); | |
581 if (decoding_source_sink_type_is_char (coding_system, CODING_SOURCE)) | |
582 chain = Fcons (Qbinary, chain); | |
583 wrapper = | |
584 make_internal_coding_system | |
585 (coding_system, | |
586 "internal-text-file-wrapper", | |
587 Qchain, | |
588 Qunbound, list4 (Qchain, chain, | |
589 Qcanonicalize_after_coding, coding_system)); | |
590 XCODING_SYSTEM_TEXT_FILE_WRAPPER (coding_system) = wrapper; | |
591 } | |
592 } | |
593 | |
594 if (!eol_wrap || XCODING_SYSTEM_EOL_TYPE (coding_system) != EOL_AUTODETECT) | |
595 return wrapper; | |
596 | |
597 coding_system = wrapper; | |
598 wrapper = XCODING_SYSTEM_AUTO_EOL_WRAPPER (coding_system); | |
599 if (!NILP (wrapper)) | |
600 return wrapper; | |
601 wrapper = | |
602 make_internal_coding_system | |
603 (coding_system, | |
604 "internal-auto-eol-wrapper", | |
605 Qundecided, Qunbound, | |
606 list4 (Qcoding_system, coding_system, | |
607 Qdo_eol, Qt)); | |
608 XCODING_SYSTEM_AUTO_EOL_WRAPPER (coding_system) = wrapper; | |
609 return wrapper; | |
610 } | |
611 | |
612 /* Like Fget_coding_system() but verify that the coding system is of the | |
613 "standard" type that decodes bytes into characters (suitable for | |
614 decoding a text file), and if not, returns an appropriate wrapper that | |
615 does. Also, if EOL_WRAP is non-zero, check whether this coding system | |
616 wants EOL auto-detection, and if so, wrap with a convert-eol coding | |
617 system to do this. */ | |
618 | |
619 Lisp_Object | |
620 get_coding_system_for_text_file (Lisp_Object name, int eol_wrap) | |
621 { | |
622 Lisp_Object coding_system = find_coding_system_for_text_file (name, | |
623 eol_wrap); | |
624 if (NILP (coding_system)) | |
625 invalid_argument ("No such coding system", name); | |
626 return coding_system; | |
627 } | |
628 | |
629 /* We store the coding systems in hash tables with the names as the | |
630 key and the actual coding system object as the value. Occasionally | |
631 we need to use them in a list format. These routines provide us | |
632 with that. */ | |
428 | 633 struct coding_system_list_closure |
634 { | |
635 Lisp_Object *coding_system_list; | |
771 | 636 int normal; |
637 int internal; | |
428 | 638 }; |
639 | |
640 static int | |
2286 | 641 add_coding_system_to_list_mapper (Lisp_Object key, Lisp_Object UNUSED (value), |
428 | 642 void *coding_system_list_closure) |
643 { | |
644 /* This function can GC */ | |
645 struct coding_system_list_closure *cscl = | |
646 (struct coding_system_list_closure *) coding_system_list_closure; | |
647 Lisp_Object *coding_system_list = cscl->coding_system_list; | |
648 | |
771 | 649 /* We can't just use VALUE because KEY might be an alias, and we need |
650 the real coding system object. */ | |
651 if (XCODING_SYSTEM (Ffind_coding_system (key))->internal_p ? | |
652 cscl->internal : cscl->normal) | |
653 *coding_system_list = Fcons (key, *coding_system_list); | |
428 | 654 return 0; |
655 } | |
656 | |
2297 | 657 /* #### should we specify a conventional for "all coding systems"? */ |
771 | 658 DEFUN ("coding-system-list", Fcoding_system_list, 0, 1, 0, /* |
428 | 659 Return a list of the names of all defined coding systems. |
771 | 660 If INTERNAL is nil, only the normal (non-internal) coding systems are |
661 included. (Internal coding systems are created for various internal | |
662 purposes, such as implementing EOL types of CRLF and CR; generally, you do | |
663 not want to see these.) If it is t, only the internal coding systems are | |
664 included. If it is any other non-nil value both normal and internal are | |
665 included. | |
428 | 666 */ |
771 | 667 (internal)) |
428 | 668 { |
669 Lisp_Object coding_system_list = Qnil; | |
670 struct gcpro gcpro1; | |
671 struct coding_system_list_closure coding_system_list_closure; | |
672 | |
673 GCPRO1 (coding_system_list); | |
674 coding_system_list_closure.coding_system_list = &coding_system_list; | |
771 | 675 coding_system_list_closure.normal = !EQ (internal, Qt); |
676 coding_system_list_closure.internal = !NILP (internal); | |
428 | 677 elisp_maphash (add_coding_system_to_list_mapper, Vcoding_system_hash_table, |
678 &coding_system_list_closure); | |
679 UNGCPRO; | |
680 | |
681 return coding_system_list; | |
682 } | |
683 | |
684 DEFUN ("coding-system-name", Fcoding_system_name, 1, 1, 0, /* | |
685 Return the name of the given coding system. | |
686 */ | |
687 (coding_system)) | |
688 { | |
689 coding_system = Fget_coding_system (coding_system); | |
690 return XCODING_SYSTEM_NAME (coding_system); | |
691 } | |
692 | |
693 static Lisp_Coding_System * | |
771 | 694 allocate_coding_system (struct coding_system_methods *codesys_meths, |
695 Bytecount data_size, | |
696 Lisp_Object name) | |
428 | 697 { |
771 | 698 Bytecount total_size = offsetof (Lisp_Coding_System, data) + data_size; |
5117
3742ea8250b5
Checking in final CVS version of workspace 'ben-lisp-object'
Ben Wing <ben@xemacs.org>
parents:
3025
diff
changeset
|
699 Lisp_Object obj = ALLOC_SIZED_LISP_OBJECT (total_size, coding_system); |
3742ea8250b5
Checking in final CVS version of workspace 'ben-lisp-object'
Ben Wing <ben@xemacs.org>
parents:
3025
diff
changeset
|
700 Lisp_Coding_System *codesys = XCODING_SYSTEM (obj); |
1204 | 701 |
771 | 702 codesys->methods = codesys_meths; |
1204 | 703 #define MARKED_SLOT(x) codesys->x = Qnil; |
704 #include "coding-system-slots.h" | |
705 | |
771 | 706 CODING_SYSTEM_EOL_TYPE (codesys) = EOL_LF; |
707 CODING_SYSTEM_NAME (codesys) = name; | |
708 | |
709 MAYBE_CODESYSMETH (codesys, init, (wrap_coding_system (codesys))); | |
428 | 710 |
711 return codesys; | |
712 } | |
713 | |
771 | 714 static enum eol_type |
715 symbol_to_eol_type (Lisp_Object symbol) | |
716 { | |
717 CHECK_SYMBOL (symbol); | |
718 if (NILP (symbol)) return EOL_AUTODETECT; | |
719 if (EQ (symbol, Qlf)) return EOL_LF; | |
720 if (EQ (symbol, Qcrlf)) return EOL_CRLF; | |
721 if (EQ (symbol, Qcr)) return EOL_CR; | |
722 | |
723 invalid_constant ("Unrecognized eol type", symbol); | |
1204 | 724 RETURN_NOT_REACHED (EOL_AUTODETECT); |
771 | 725 } |
726 | |
727 static Lisp_Object | |
728 eol_type_to_symbol (enum eol_type type) | |
729 { | |
730 switch (type) | |
731 { | |
2500 | 732 default: ABORT (); |
771 | 733 case EOL_LF: return Qlf; |
734 case EOL_CRLF: return Qcrlf; | |
735 case EOL_CR: return Qcr; | |
736 case EOL_AUTODETECT: return Qnil; | |
737 } | |
738 } | |
739 | |
740 struct subsidiary_type | |
741 { | |
2367 | 742 Ascbyte *extension; |
743 Ascbyte *mnemonic_ext; | |
771 | 744 enum eol_type eol; |
745 }; | |
746 | |
747 static struct subsidiary_type coding_subsidiary_list[] = | |
748 { { "-unix", "", EOL_LF }, | |
749 { "-dos", ":T", EOL_CRLF }, | |
750 { "-mac", ":t", EOL_CR } }; | |
751 | |
752 /* kludge */ | |
428 | 753 static void |
771 | 754 setup_eol_coding_systems (Lisp_Object codesys) |
428 | 755 { |
793 | 756 int len = XSTRING_LENGTH (XSYMBOL (XCODING_SYSTEM_NAME (codesys))->name); |
2367 | 757 Ibyte *codesys_name = alloca_ibytes (len + 7); |
771 | 758 int mlen = -1; |
867 | 759 Ibyte *codesys_mnemonic = 0; |
771 | 760 Lisp_Object codesys_name_sym, sub_codesys; |
761 int i; | |
762 | |
763 memcpy (codesys_name, | |
793 | 764 XSTRING_DATA (XSYMBOL (XCODING_SYSTEM_NAME (codesys))->name), len); |
771 | 765 |
766 if (STRINGP (XCODING_SYSTEM_MNEMONIC (codesys))) | |
428 | 767 { |
771 | 768 mlen = XSTRING_LENGTH (XCODING_SYSTEM_MNEMONIC (codesys)); |
2367 | 769 codesys_mnemonic = alloca_ibytes (mlen + 7); |
771 | 770 memcpy (codesys_mnemonic, |
771 XSTRING_DATA (XCODING_SYSTEM_MNEMONIC (codesys)), mlen); | |
772 } | |
773 | |
774 /* Create three "subsidiary" coding systems, decoding data encoded using | |
775 each of the three EOL types. We do this for each subsidiary by | |
776 copying the original coding system, setting the EOL type | |
777 appropriately, and setting the CANONICAL member of the new coding | |
778 system to be a chain consisting of the original coding system followed | |
779 by a convert-eol coding system to do the EOL decoding. For EOL type | |
780 LF, however, we don't need any decoding, so we skip creating a | |
781 CANONICAL. | |
782 | |
783 If the original coding system is not a text-type coding system | |
784 (decodes byte->char), we need to coerce it to one by the appropriate | |
785 wrapping in CANONICAL. */ | |
786 | |
787 for (i = 0; i < countof (coding_subsidiary_list); i++) | |
788 { | |
2367 | 789 Ascbyte *extension = coding_subsidiary_list[i].extension; |
790 Ascbyte *mnemonic_ext = coding_subsidiary_list[i].mnemonic_ext; | |
771 | 791 enum eol_type eol = coding_subsidiary_list[i].eol; |
792 | |
2367 | 793 qxestrcpy_ascii (codesys_name + len, extension); |
771 | 794 codesys_name_sym = intern_int (codesys_name); |
795 if (mlen != -1) | |
2367 | 796 qxestrcpy_ascii (codesys_mnemonic + mlen, mnemonic_ext); |
771 | 797 |
798 sub_codesys = Fcopy_coding_system (codesys, codesys_name_sym); | |
799 if (mlen != -1) | |
800 XCODING_SYSTEM_MNEMONIC (sub_codesys) = | |
801 build_intstring (codesys_mnemonic); | |
802 | |
803 if (eol != EOL_LF) | |
804 { | |
805 Lisp_Object chain = list2 (get_coding_system_for_text_file | |
806 (codesys, 0), | |
807 eol == EOL_CR ? Qconvert_eol_cr : | |
808 Qconvert_eol_crlf); | |
809 Lisp_Object canon = | |
810 make_internal_coding_system | |
811 (sub_codesys, "internal-subsidiary-eol-wrapper", | |
812 Qchain, Qunbound, | |
813 mlen != -1 ? | |
814 list6 (Qmnemonic, build_intstring (codesys_mnemonic), | |
815 Qchain, chain, | |
816 Qcanonicalize_after_coding, sub_codesys) : | |
817 list4 (Qchain, chain, | |
818 Qcanonicalize_after_coding, sub_codesys)); | |
819 XCODING_SYSTEM_CANONICAL (sub_codesys) = canon; | |
820 } | |
821 XCODING_SYSTEM_EOL_TYPE (sub_codesys) = eol; | |
822 XCODING_SYSTEM_SUBSIDIARY_PARENT (sub_codesys) = codesys; | |
823 XCODING_SYSTEM (codesys)->eol[eol] = sub_codesys; | |
428 | 824 } |
825 } | |
826 | |
771 | 827 /* Basic function to create new coding systems. For `make-coding-system', |
828 NAME-OR-EXISTING is the NAME argument, PREFIX is null, and TYPE, | |
829 DESCRIPTION, and PROPS are the same. All created coding systems are put | |
830 in a hash table indexed by NAME. | |
831 | |
832 If PREFIX is a string, NAME-OR-EXISTING should specify an existing | |
833 coding system (or nil), and an internal coding system will be created. | |
834 The name of the coding system will be constructed by combining PREFIX | |
835 with the name of the existing coding system (if given), and a number | |
836 will be appended to insure uniqueness. In such a case, if Qunbound is | |
837 given for DESCRIPTION, the description gets created based on the | |
838 generated name. Also, if no mnemonic is given in the properties list, a | |
839 mnemonic is created based on the generated name. | |
840 | |
841 For internal coding systems, the coding system is marked as internal | |
842 (see `coding-system-list'), and no subsidiaries will be created or | |
843 eol-wrapping will happen. Otherwise: | |
844 | |
845 -- if the eol-type property is `lf' or t, the coding system is merely | |
846 created and returned. (For t, the coding system will be wrapped with | |
847 an EOL autodetector when it's used to read a file.) | |
848 | |
849 -- if eol-type is `crlf' or `cr', after the coding system object is | |
850 created, it will be wrapped in a chain with the appropriate | |
851 convert-eol coding system (either `convert-eol-crlf' or | |
852 `convert-eol-cr'), so that CRLF->LF or CR->LF conversion is done at | |
853 decoding time, and the opposite at encoding time. The resulting | |
854 chain becomes the CANONICAL field of the coding system object. | |
855 | |
856 -- if eol-type is nil or omitted, "subsidiaries" are generated: Three | |
857 coding systems where the original coding system (before wrapping with | |
858 convert-eol-autodetect) is either unwrapped or wrapped with | |
859 convert-eol-crlf or convert-eol-cr, respectively, so that coding systems | |
860 to handle LF, CRLF, and CR end-of-line indicators are created. (This | |
861 crazy crap is based on existing behavior in other Mule versions, | |
862 including FSF Emacs.) | |
863 */ | |
428 | 864 |
865 static Lisp_Object | |
2367 | 866 make_coding_system_1 (Lisp_Object name_or_existing, Ascbyte *prefix, |
771 | 867 Lisp_Object type, Lisp_Object description, |
868 Lisp_Object props) | |
428 | 869 { |
771 | 870 Lisp_Coding_System *cs; |
871 int need_to_setup_eol_systems = 1; | |
872 enum eol_type eol_wrapper = EOL_AUTODETECT; | |
873 struct coding_system_methods *meths; | |
874 Lisp_Object csobj; | |
875 Lisp_Object defmnem = Qnil; | |
876 | |
877 if (NILP (type)) | |
878 type = Qundecided; | |
879 meths = decode_coding_system_type (type, ERROR_ME); | |
880 | |
881 if (prefix) | |
428 | 882 { |
867 | 883 Ibyte *newname = |
771 | 884 emacs_sprintf_malloc (NULL, "%s-%s-%d", |
885 prefix, | |
867 | 886 NILP (name_or_existing) ? (Ibyte *) "nil" : |
771 | 887 XSTRING_DATA (Fsymbol_name (XCODING_SYSTEM_NAME |
888 (name_or_existing))), | |
889 ++coding_system_tick); | |
890 name_or_existing = intern_int (newname); | |
1726 | 891 xfree (newname, Ibyte *); |
771 | 892 |
893 if (UNBOUNDP (description)) | |
894 { | |
895 newname = | |
896 emacs_sprintf_malloc | |
897 (NULL, "For Internal Use (%s)", | |
898 XSTRING_DATA (Fsymbol_name (name_or_existing))); | |
899 description = build_intstring (newname); | |
1726 | 900 xfree (newname, Ibyte *); |
771 | 901 } |
902 | |
903 newname = emacs_sprintf_malloc (NULL, "Int%d", coding_system_tick); | |
904 defmnem = build_intstring (newname); | |
1726 | 905 xfree (newname, Ibyte *); |
428 | 906 } |
771 | 907 else |
908 CHECK_SYMBOL (name_or_existing); | |
909 | |
910 if (!NILP (Ffind_coding_system (name_or_existing))) | |
911 invalid_operation ("Cannot redefine existing coding system", | |
912 name_or_existing); | |
913 | |
914 cs = allocate_coding_system (meths, meths->extra_data_size, | |
915 name_or_existing); | |
793 | 916 csobj = wrap_coding_system (cs); |
771 | 917 |
918 cs->internal_p = !!prefix; | |
919 | |
920 if (NILP (description)) | |
921 description = build_string (""); | |
922 else | |
923 CHECK_STRING (description); | |
924 CODING_SYSTEM_DESCRIPTION (cs) = description; | |
925 | |
926 if (!NILP (defmnem)) | |
927 CODING_SYSTEM_MNEMONIC (cs) = defmnem; | |
928 | |
929 { | |
930 EXTERNAL_PROPERTY_LIST_LOOP_3 (key, value, props) | |
931 { | |
932 int recognized = 1; | |
933 | |
934 if (EQ (key, Qmnemonic)) | |
935 { | |
936 if (!NILP (value)) | |
937 CHECK_STRING (value); | |
938 CODING_SYSTEM_MNEMONIC (cs) = value; | |
939 } | |
940 | |
941 else if (EQ (key, Qdocumentation)) | |
942 { | |
943 if (!NILP (value)) | |
944 CHECK_STRING (value); | |
945 CODING_SYSTEM_DOCUMENTATION (cs) = value; | |
946 } | |
947 | |
948 else if (EQ (key, Qeol_type)) | |
949 { | |
950 need_to_setup_eol_systems = NILP (value); | |
951 if (EQ (value, Qt)) | |
952 value = Qnil; | |
953 eol_wrapper = symbol_to_eol_type (value); | |
954 } | |
955 | |
956 else if (EQ (key, Qpost_read_conversion)) | |
957 CODING_SYSTEM_POST_READ_CONVERSION (cs) = value; | |
958 else if (EQ (key, Qpre_write_conversion)) | |
959 CODING_SYSTEM_PRE_WRITE_CONVERSION (cs) = value; | |
960 /* FSF compatibility */ | |
961 else if (EQ (key, Qtranslation_table_for_decode)) | |
962 ; | |
963 else if (EQ (key, Qtranslation_table_for_encode)) | |
964 ; | |
965 else if (EQ (key, Qsafe_chars)) | |
966 ; | |
967 else if (EQ (key, Qsafe_charsets)) | |
968 ; | |
969 else if (EQ (key, Qmime_charset)) | |
970 ; | |
971 else if (EQ (key, Qvalid_codes)) | |
972 ; | |
973 else | |
974 recognized = CODESYSMETH_OR_GIVEN (cs, putprop, | |
975 (csobj, key, value), 0); | |
976 | |
977 if (!recognized) | |
978 invalid_constant ("Unrecognized property", key); | |
979 } | |
980 } | |
981 | |
982 { | |
983 XCODING_SYSTEM_CANONICAL (csobj) = | |
984 CODESYSMETH_OR_GIVEN (cs, canonicalize, (csobj), Qnil); | |
985 XCODING_SYSTEM_EOL_TYPE (csobj) = EOL_AUTODETECT; /* for copy-coding-system | |
986 below */ | |
987 | |
988 if (need_to_setup_eol_systems && !cs->internal_p) | |
989 setup_eol_coding_systems (csobj); | |
990 else if (eol_wrapper == EOL_CR || eol_wrapper == EOL_CRLF) | |
991 { | |
992 /* If a specific eol-type (other than LF) was specified, we handle | |
993 this by converting the coding system into a chain that wraps the | |
994 coding system along with a convert-eol system after it, in | |
995 exactly that same switcheroo fashion that the normal | |
996 canonicalize method works -- BUT we will run into a problem if | |
997 we do it the obvious way, because when `chain' creates its | |
998 substreams, the substream containing the coding system we're | |
999 creating will have canonicalization expansion done on it, | |
1000 leading to infinite recursion. So we have to generate a new, | |
1001 internal coding system with the previous value of CANONICAL. */ | |
867 | 1002 Ibyte *newname = |
771 | 1003 emacs_sprintf_malloc |
1004 (NULL, "internal-eol-copy-%s-%d", | |
1005 XSTRING_DATA (Fsymbol_name (name_or_existing)), | |
1006 ++coding_system_tick); | |
1007 Lisp_Object newnamesym = intern_int (newname); | |
1008 Lisp_Object copied = Fcopy_coding_system (csobj, newnamesym); | |
1726 | 1009 xfree (newname, Ibyte *); |
771 | 1010 |
1011 XCODING_SYSTEM_CANONICAL (csobj) = | |
1012 make_internal_coding_system | |
1013 (csobj, | |
1014 "internal-eol-wrapper", | |
1015 Qchain, Qunbound, | |
1016 list4 (Qchain, | |
1017 list2 (copied, | |
1018 eol_wrapper == EOL_CR ? | |
1019 Qconvert_eol_cr : | |
1020 Qconvert_eol_crlf), | |
1021 Qcanonicalize_after_coding, | |
1022 csobj)); | |
1023 } | |
1024 XCODING_SYSTEM_EOL_TYPE (csobj) = eol_wrapper; | |
1025 } | |
1026 | |
1027 Fputhash (name_or_existing, csobj, Vcoding_system_hash_table); | |
1028 | |
1029 return csobj; | |
428 | 1030 } |
1031 | |
771 | 1032 Lisp_Object |
2367 | 1033 make_internal_coding_system (Lisp_Object existing, Ascbyte *prefix, |
771 | 1034 Lisp_Object type, Lisp_Object description, |
1035 Lisp_Object props) | |
1036 { | |
1037 return make_coding_system_1 (existing, prefix, type, description, props); | |
1038 } | |
428 | 1039 |
1040 DEFUN ("make-coding-system", Fmake_coding_system, 2, 4, 0, /* | |
1041 Register symbol NAME as a coding system. | |
1042 | |
1043 TYPE describes the conversion method used and should be one of | |
1044 | |
3025 | 1045 nil or `undecided' |
428 | 1046 Automatic conversion. XEmacs attempts to detect the coding system |
1047 used in the file. | |
3025 | 1048 `chain' |
771 | 1049 Chain two or more coding systems together to make a combination coding |
1050 system. | |
3025 | 1051 `no-conversion' |
428 | 1052 No conversion. Use this for binary files and such. On output, |
1053 graphic characters that are not in ASCII or Latin-1 will be | |
1054 replaced by a ?. (For a no-conversion-encoded buffer, these | |
1055 characters will only be present if you explicitly insert them.) | |
3025 | 1056 `convert-eol' |
771 | 1057 Convert CRLF sequences or CR to LF. |
3025 | 1058 `shift-jis' |
428 | 1059 Shift-JIS (a Japanese encoding commonly used in PC operating systems). |
3025 | 1060 `unicode' |
771 | 1061 Any Unicode encoding (UCS-4, UTF-8, UTF-16, etc.). |
3025 | 1062 `mswindows-unicode-to-multibyte' |
771 | 1063 (MS Windows only) Converts from Windows Unicode to Windows Multibyte |
1064 (any code page encoding) upon encoding, and the other way upon decoding. | |
3025 | 1065 `mswindows-multibyte' |
771 | 1066 Converts to or from Windows Multibyte (any code page encoding). |
1067 This is resolved into a chain of `mswindows-unicode' and | |
1068 `mswindows-unicode-to-multibyte'. | |
3025 | 1069 `iso2022' |
428 | 1070 Any ISO2022-compliant encoding. Among other things, this includes |
1071 JIS (the Japanese encoding commonly used for e-mail), EUC (the | |
1072 standard Unix encoding for Japanese and other languages), and | |
1073 Compound Text (the encoding used in X11). You can specify more | |
442 | 1074 specific information about the conversion with the PROPS argument. |
3025 | 1075 `big5' |
2819 | 1076 Big5 (the encoding commonly used for Mandarin Chinese in Taiwan). |
3025 | 1077 `ccl' |
428 | 1078 The conversion is performed using a user-written pseudo-code |
1079 program. CCL (Code Conversion Language) is the name of this | |
1080 pseudo-code. | |
3025 | 1081 `gzip' |
771 | 1082 GZIP compression format. |
3025 | 1083 `internal' |
428 | 1084 Write out or read in the raw contents of the memory representing |
1085 the buffer's text. This is primarily useful for debugging | |
1086 purposes, and is only enabled when XEmacs has been compiled with | |
1087 DEBUG_XEMACS defined (via the --debug configure option). | |
3025 | 1088 WARNING: Reading in a file using `internal' conversion can result |
428 | 1089 in an internal inconsistency in the memory representing a |
1090 buffer's text, which will produce unpredictable results and may | |
1091 cause XEmacs to crash. Under normal circumstances you should | |
3025 | 1092 never use `internal' conversion. |
428 | 1093 |
771 | 1094 DESCRIPTION is a short English phrase describing the coding system, |
1095 suitable for use as a menu item. (See also the `documentation' property | |
1096 below.) | |
428 | 1097 |
1098 PROPS is a property list, describing the specific nature of the | |
1099 character set. Recognized properties are: | |
1100 | |
3025 | 1101 `mnemonic' |
428 | 1102 String to be displayed in the modeline when this coding system is |
1103 active. | |
1104 | |
3025 | 1105 `documentation' |
771 | 1106 Detailed documentation on the coding system. |
1107 | |
3025 | 1108 `eol-type' |
428 | 1109 End-of-line conversion to be used. It should be one of |
1110 | |
1111 nil | |
1112 Automatically detect the end-of-line type (LF, CRLF, | |
1113 or CR). Also generate subsidiary coding systems named | |
1114 `NAME-unix', `NAME-dos', and `NAME-mac', that are | |
1115 identical to this coding system but have an EOL-TYPE | |
3025 | 1116 value of `lf', `crlf', and `cr', respectively. |
1117 `lf' | |
428 | 1118 The end of a line is marked externally using ASCII LF. |
1119 Since this is also the way that XEmacs represents an | |
1120 end-of-line internally, specifying this option results | |
1121 in no end-of-line conversion. This is the standard | |
1122 format for Unix text files. | |
3025 | 1123 `crlf' |
428 | 1124 The end of a line is marked externally using ASCII |
1125 CRLF. This is the standard format for MS-DOS text | |
1126 files. | |
3025 | 1127 `cr' |
428 | 1128 The end of a line is marked externally using ASCII CR. |
1129 This is the standard format for Macintosh text files. | |
1130 t | |
1131 Automatically detect the end-of-line type but do not | |
1132 generate subsidiary coding systems. (This value is | |
1133 converted to nil when stored internally, and | |
1134 `coding-system-property' will return nil.) | |
1135 | |
3025 | 1136 `post-read-conversion' |
771 | 1137 The value is a function to call after some text is inserted and |
1138 decoded by the coding system itself and before any functions in | |
1139 `after-change-functions' are called. (#### Not actually true in | |
1140 XEmacs. `after-change-functions' will be called twice if | |
1141 `post-read-conversion' changes something.) The argument of this | |
1142 function is the same as for a function in | |
1143 `after-insert-file-functions', i.e. LENGTH of the text inserted, | |
1144 with point at the head of the text to be decoded. | |
428 | 1145 |
3025 | 1146 `pre-write-conversion' |
771 | 1147 The value is a function to call after all functions in |
1148 `write-region-annotate-functions' and `buffer-file-format' are | |
1149 called, and before the text is encoded by the coding system itself. | |
1150 The arguments to this function are the same as those of a function | |
1151 in `write-region-annotate-functions', i.e. FROM and TO, specifying | |
1152 a region of text. | |
1153 | |
1154 | |
1155 | |
1156 The following properties are allowed for FSF compatibility but currently | |
1157 ignored: | |
1158 | |
3025 | 1159 `translation-table-for-decode' |
771 | 1160 The value is a translation table to be applied on decoding. See |
1161 the function `make-translation-table' for the format of translation | |
1162 table. This is not applicable to CCL-based coding systems. | |
1163 | |
3025 | 1164 `translation-table-for-encode' |
771 | 1165 The value is a translation table to be applied on encoding. This is |
1166 not applicable to CCL-based coding systems. | |
1167 | |
3025 | 1168 `safe-chars' |
771 | 1169 The value is a char table. If a character has non-nil value in it, |
1170 the character is safely supported by the coding system. This | |
1171 overrides the specification of safe-charsets. | |
1172 | |
3025 | 1173 `safe-charsets' |
771 | 1174 The value is a list of charsets safely supported by the coding |
1175 system. The value t means that all charsets Emacs handles are | |
1176 supported. Even if some charset is not in this list, it doesn't | |
1177 mean that the charset can't be encoded in the coding system; | |
1178 it just means that some other receiver of text encoded | |
1179 in the coding system won't be able to handle that charset. | |
1180 | |
3025 | 1181 `mime-charset' |
771 | 1182 The value is a symbol of which name is `MIME-charset' parameter of |
1183 the coding system. | |
1184 | |
3025 | 1185 `valid-codes' (meaningful only for a coding system based on CCL) |
771 | 1186 The value is a list to indicate valid byte ranges of the encoded |
1187 file. Each element of the list is an integer or a cons of integer. | |
1188 In the former case, the integer value is a valid byte code. In the | |
1189 latter case, the integers specifies the range of valid byte codes. | |
1190 | |
1191 | |
1192 | |
3025 | 1193 The following additional property is recognized if TYPE is `convert-eol': |
1194 | |
1195 `subtype' | |
793 | 1196 One of `lf', `crlf', `cr' or nil (for autodetection). When decoding, |
1197 the corresponding sequence will be converted to LF. When encoding, | |
1198 the opposite happens. This coding system converts characters to | |
771 | 1199 characters. |
1200 | |
428 | 1201 |
1202 | |
3025 | 1203 The following additional properties are recognized if TYPE is `iso2022': |
1204 | |
1205 `charset-g0' | |
1206 `charset-g1' | |
1207 `charset-g2' | |
1208 `charset-g3' | |
428 | 1209 The character set initially designated to the G0 - G3 registers. |
1210 The value should be one of | |
1211 | |
1212 -- A charset object (designate that character set) | |
1213 -- nil (do not ever use this register) | |
1214 -- t (no character set is initially designated to | |
1215 the register, but may be later on; this automatically | |
1216 sets the corresponding `force-g*-on-output' property) | |
1217 | |
3025 | 1218 `force-g0-on-output' |
1219 `force-g1-on-output' | |
1220 `force-g2-on-output' | |
1221 `force-g2-on-output' | |
428 | 1222 If non-nil, send an explicit designation sequence on output before |
1223 using the specified register. | |
1224 | |
3025 | 1225 `short' |
428 | 1226 If non-nil, use the short forms "ESC $ @", "ESC $ A", and |
1227 "ESC $ B" on output in place of the full designation sequences | |
1228 "ESC $ ( @", "ESC $ ( A", and "ESC $ ( B". | |
1229 | |
3025 | 1230 `no-ascii-eol' |
428 | 1231 If non-nil, don't designate ASCII to G0 at each end of line on output. |
1232 Setting this to non-nil also suppresses other state-resetting that | |
1233 normally happens at the end of a line. | |
1234 | |
3025 | 1235 `no-ascii-cntl' |
428 | 1236 If non-nil, don't designate ASCII to G0 before control chars on output. |
1237 | |
3025 | 1238 `seven' |
428 | 1239 If non-nil, use 7-bit environment on output. Otherwise, use 8-bit |
1240 environment. | |
1241 | |
3025 | 1242 `lock-shift' |
428 | 1243 If non-nil, use locking-shift (SO/SI) instead of single-shift |
1244 or designation by escape sequence. | |
1245 | |
3025 | 1246 `no-iso6429' |
428 | 1247 If non-nil, don't use ISO6429's direction specification. |
1248 | |
3025 | 1249 `escape-quoted' |
428 | 1250 If non-nil, literal control characters that are the same as |
1251 the beginning of a recognized ISO2022 or ISO6429 escape sequence | |
1252 (in particular, ESC (0x1B), SO (0x0E), SI (0x0F), SS2 (0x8E), | |
1253 SS3 (0x8F), and CSI (0x9B)) are "quoted" with an escape character | |
1254 so that they can be properly distinguished from an escape sequence. | |
1255 (Note that doing this results in a non-portable encoding.) This | |
1256 encoding flag is used for byte-compiled files. Note that ESC | |
1257 is a good choice for a quoting character because there are no | |
1258 escape sequences whose second byte is a character from the Control-0 | |
1259 or Control-1 character sets; this is explicitly disallowed by the | |
1260 ISO2022 standard. | |
1261 | |
3025 | 1262 `input-charset-conversion' |
428 | 1263 A list of conversion specifications, specifying conversion of |
1264 characters in one charset to another when decoding is performed. | |
1265 Each specification is a list of two elements: the source charset, | |
1266 and the destination charset. | |
1267 | |
3025 | 1268 `output-charset-conversion' |
428 | 1269 A list of conversion specifications, specifying conversion of |
1270 characters in one charset to another when encoding is performed. | |
1271 The form of each specification is the same as for | |
3025 | 1272 `input-charset-conversion'. |
428 | 1273 |
1274 | |
771 | 1275 |
428 | 1276 The following additional properties are recognized (and required) |
3025 | 1277 if TYPE is `ccl': |
1278 | |
1279 `decode' | |
428 | 1280 CCL program used for decoding (converting to internal format). |
1281 | |
3025 | 1282 `encode' |
428 | 1283 CCL program used for encoding (converting to external format). |
771 | 1284 |
1285 | |
3025 | 1286 The following additional properties are recognized if TYPE is `chain': |
1287 | |
1288 `chain' | |
771 | 1289 List of coding systems to be chained together, in decoding order. |
1290 | |
3025 | 1291 `canonicalize-after-coding' |
771 | 1292 Coding system to be returned by the detector routines in place of |
1293 this coding system. | |
1294 | |
1295 | |
1296 | |
3025 | 1297 The following additional properties are recognized if TYPE is `unicode': |
1298 | |
1299 `type' | |
771 | 1300 One of `utf-16', `utf-8', `ucs-4', or `utf-7' (the latter is not |
1301 yet implemented). `utf-16' is the basic two-byte encoding; | |
1302 `ucs-4' is the four-byte encoding; `utf-8' is an ASCII-compatible | |
1303 variable-width 8-bit encoding; `utf-7' is a 7-bit encoding using | |
1304 only characters that will safely pass through all mail gateways. | |
2297 | 1305 [[ This should be \"transformation format\". There should also be |
1306 `ucs-2' (or `bmp' -- no surrogates) and `utf-32' (range checked). ]] | |
771 | 1307 |
3025 | 1308 `little-endian' |
771 | 1309 If non-nil, `utf-16' and `ucs-4' will write out the groups of two |
1310 or four bytes little-endian instead of big-endian. This is required, | |
1311 for example, under Windows. | |
1312 | |
3025 | 1313 `need-bom' |
771 | 1314 If non-nil, a byte order mark (BOM, or Unicode FFFE) should be |
1315 written out at the beginning of the data. This serves both to | |
1316 identify the endianness of the following data and to mark the | |
1317 data as Unicode (at least, this is how Windows uses it). | |
2297 | 1318 [[ The correct term is \"signature\", since this technique may also |
1319 be used with UTF-8. That is the term used in the standard. ]] | |
771 | 1320 |
1321 | |
1322 The following additional properties are recognized if TYPE is | |
3025 | 1323 `mswindows-multibyte': |
1324 | |
1325 `code-page' | |
771 | 1326 Either a number (specifying a particular code page) or one of the |
1327 symbols `ansi', `oem', `mac', or `ebcdic', specifying the ANSI, | |
1328 OEM, Macintosh, or EBCDIC code page associated with a particular | |
1329 locale (given by the `locale' property). NOTE: EBCDIC code pages | |
1330 only exist in Windows 2000 and later. | |
1331 | |
3025 | 1332 `locale' |
771 | 1333 If `code-page' is a symbol, this specifies the locale whose code |
1334 page of the corresponding type should be used. This should be | |
1335 one of the following: A cons of two strings, (LANGUAGE | |
1336 . SUBLANGUAGE) (see `mswindows-set-current-locale'); a string (a | |
1337 language; SUBLANG_DEFAULT, i.e. the default sublanguage, is | |
1338 used); or one of the symbols `current', `user-default', or | |
1339 `system-default', corresponding to the values of | |
1340 `mswindows-current-locale', `mswindows-user-default-locale', or | |
1341 `mswindows-system-default-locale', respectively. | |
1342 | |
1343 | |
1344 | |
3025 | 1345 The following additional properties are recognized if TYPE is `undecided': |
2297 | 1346 [[ Doesn't GNU use \"detect-*\" for the following two? ]] |
771 | 1347 |
3025 | 1348 `do-eol' |
771 | 1349 Do EOL detection. |
1350 | |
3025 | 1351 `do-coding' |
771 | 1352 Do encoding detection. |
1353 | |
3025 | 1354 `coding-system' |
771 | 1355 If encoding detection is not done, use the specified coding system |
1356 to do decoding. This is used internally when implementing coding | |
1357 systems with an EOL type that specifies autodetection (the default), | |
1358 so that the detector routines return the proper subsidiary. | |
1359 | |
1360 | |
1361 | |
3025 | 1362 The following additional property is recognized if TYPE is `gzip': |
1363 | |
1364 `level' | |
771 | 1365 Compression level: 0 through 9, or `default' (currently 6). |
1366 | |
428 | 1367 */ |
771 | 1368 (name, type, description, props)) |
428 | 1369 { |
771 | 1370 return make_coding_system_1 (name, 0, type, description, props); |
428 | 1371 } |
1372 | |
1373 DEFUN ("copy-coding-system", Fcopy_coding_system, 2, 2, 0, /* | |
1374 Copy OLD-CODING-SYSTEM to NEW-NAME. | |
1375 If NEW-NAME does not name an existing coding system, a new one will | |
1376 be created. | |
771 | 1377 If you are using this function to create an alias, think again: |
1378 Use `define-coding-system-alias' instead. | |
428 | 1379 */ |
1380 (old_coding_system, new_name)) | |
1381 { | |
1382 Lisp_Object new_coding_system; | |
1383 old_coding_system = Fget_coding_system (old_coding_system); | |
771 | 1384 new_coding_system = |
1385 UNBOUNDP (new_name) ? Qnil : Ffind_coding_system (new_name); | |
428 | 1386 if (NILP (new_coding_system)) |
1387 { | |
793 | 1388 new_coding_system = |
1389 wrap_coding_system | |
1390 (allocate_coding_system | |
1391 (XCODING_SYSTEM (old_coding_system)->methods, | |
1392 XCODING_SYSTEM (old_coding_system)->methods->extra_data_size, | |
1393 new_name)); | |
771 | 1394 if (!UNBOUNDP (new_name)) |
1395 Fputhash (new_name, new_coding_system, Vcoding_system_hash_table); | |
428 | 1396 } |
771 | 1397 else if (XCODING_SYSTEM (old_coding_system)->methods != |
1398 XCODING_SYSTEM (new_coding_system)->methods) | |
1399 invalid_operation_2 ("Coding systems not same type", | |
1400 old_coding_system, new_coding_system); | |
428 | 1401 |
1402 { | |
1403 Lisp_Coding_System *to = XCODING_SYSTEM (new_coding_system); | |
1404 Lisp_Coding_System *from = XCODING_SYSTEM (old_coding_system); | |
3017 | 1405 COPY_SIZED_LCRECORD (to, from, sizeof_coding_system (from)); |
428 | 1406 to->name = new_name; |
1407 } | |
1408 return new_coding_system; | |
1409 } | |
1410 | |
771 | 1411 DEFUN ("coding-system-canonical-name-p", Fcoding_system_canonical_name_p, |
1412 1, 1, 0, /* | |
440 | 1413 Return t if OBJECT names a coding system, and is not a coding system alias. |
428 | 1414 */ |
440 | 1415 (object)) |
1416 { | |
1417 return CODING_SYSTEMP (Fgethash (object, Vcoding_system_hash_table, Qnil)) | |
1418 ? Qt : Qnil; | |
1419 } | |
1420 | |
2297 | 1421 /* #### Shouldn't this really be a find/get pair? */ |
1422 | |
440 | 1423 DEFUN ("coding-system-alias-p", Fcoding_system_alias_p, 1, 1, 0, /* |
1424 Return t if OBJECT is a coding system alias. | |
1425 All coding system aliases are created by `define-coding-system-alias'. | |
1426 */ | |
1427 (object)) | |
428 | 1428 { |
440 | 1429 return SYMBOLP (Fgethash (object, Vcoding_system_hash_table, Qzero)) |
1430 ? Qt : Qnil; | |
1431 } | |
1432 | |
1433 DEFUN ("coding-system-aliasee", Fcoding_system_aliasee, 1, 1, 0, /* | |
1434 Return the coding-system symbol for which symbol ALIAS is an alias. | |
1435 */ | |
1436 (alias)) | |
1437 { | |
1438 Lisp_Object aliasee = Fgethash (alias, Vcoding_system_hash_table, Qnil); | |
1439 if (SYMBOLP (aliasee)) | |
1440 return aliasee; | |
1441 else | |
563 | 1442 invalid_argument ("Symbol is not a coding system alias", alias); |
1204 | 1443 RETURN_NOT_REACHED (Qnil); |
440 | 1444 } |
1445 | |
1446 /* A maphash function, for removing dangling coding system aliases. */ | |
1447 static int | |
2286 | 1448 dangling_coding_system_alias_p (Lisp_Object UNUSED (alias), |
440 | 1449 Lisp_Object aliasee, |
1450 void *dangling_aliases) | |
1451 { | |
1452 if (SYMBOLP (aliasee) | |
1453 && NILP (Fgethash (aliasee, Vcoding_system_hash_table, Qnil))) | |
428 | 1454 { |
440 | 1455 (*(int *) dangling_aliases)++; |
1456 return 1; | |
428 | 1457 } |
440 | 1458 else |
1459 return 0; | |
1460 } | |
1461 | |
1462 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias, 2, 2, 0, /* | |
1463 Define symbol ALIAS as an alias for coding system ALIASEE. | |
1464 | |
1465 You can use this function to redefine an alias that has already been defined, | |
1466 but you cannot redefine a name which is the canonical name for a coding system. | |
1467 \(a canonical name of a coding system is what is returned when you call | |
1468 `coding-system-name' on a coding system). | |
1469 | |
1470 ALIASEE itself can be an alias, which allows you to define nested aliases. | |
1471 | |
1472 You are forbidden, however, from creating alias loops or `dangling' aliases. | |
1473 These will be detected, and an error will be signaled if you attempt to do so. | |
1474 | |
1475 If ALIASEE is nil, then ALIAS will simply be undefined. | |
1476 | |
1477 See also `coding-system-alias-p', `coding-system-aliasee', | |
1478 and `coding-system-canonical-name-p'. | |
1479 */ | |
1480 (alias, aliasee)) | |
1481 { | |
2286 | 1482 Lisp_Object probe; |
440 | 1483 |
1484 CHECK_SYMBOL (alias); | |
1485 | |
1486 if (!NILP (Fcoding_system_canonical_name_p (alias))) | |
563 | 1487 invalid_change |
440 | 1488 ("Symbol is the canonical name of a coding system and cannot be redefined", |
1489 alias); | |
1490 | |
1491 if (NILP (aliasee)) | |
1492 { | |
771 | 1493 Lisp_Object subsidiary_unix = add_suffix_to_symbol (alias, "-unix"); |
1494 Lisp_Object subsidiary_dos = add_suffix_to_symbol (alias, "-dos"); | |
1495 Lisp_Object subsidiary_mac = add_suffix_to_symbol (alias, "-mac"); | |
440 | 1496 |
1497 Fremhash (alias, Vcoding_system_hash_table); | |
1498 | |
1499 /* Undefine subsidiary aliases, | |
1500 presumably created by a previous call to this function */ | |
1501 if (! NILP (Fcoding_system_alias_p (subsidiary_unix)) && | |
1502 ! NILP (Fcoding_system_alias_p (subsidiary_dos)) && | |
1503 ! NILP (Fcoding_system_alias_p (subsidiary_mac))) | |
1504 { | |
1505 Fdefine_coding_system_alias (subsidiary_unix, Qnil); | |
1506 Fdefine_coding_system_alias (subsidiary_dos, Qnil); | |
1507 Fdefine_coding_system_alias (subsidiary_mac, Qnil); | |
1508 } | |
1509 | |
1510 /* Undefine dangling coding system aliases. */ | |
1511 { | |
1512 int dangling_aliases; | |
1513 | |
1514 do { | |
1515 dangling_aliases = 0; | |
1516 elisp_map_remhash (dangling_coding_system_alias_p, | |
1517 Vcoding_system_hash_table, | |
1518 &dangling_aliases); | |
1519 } while (dangling_aliases > 0); | |
1520 } | |
1521 | |
1522 return Qnil; | |
1523 } | |
1524 | |
1525 if (CODING_SYSTEMP (aliasee)) | |
1526 aliasee = XCODING_SYSTEM_NAME (aliasee); | |
1527 | |
1528 /* Checks that aliasee names a coding-system */ | |
2286 | 1529 (void) Fget_coding_system (aliasee); |
440 | 1530 |
1531 /* Check for coding system alias loops */ | |
1532 if (EQ (alias, aliasee)) | |
563 | 1533 alias_loop: invalid_operation_2 |
440 | 1534 ("Attempt to create a coding system alias loop", alias, aliasee); |
1535 | |
1536 for (probe = aliasee; | |
1537 SYMBOLP (probe); | |
1538 probe = Fgethash (probe, Vcoding_system_hash_table, Qzero)) | |
1539 { | |
1540 if (EQ (probe, alias)) | |
1541 goto alias_loop; | |
1542 } | |
1543 | |
1544 Fputhash (alias, aliasee, Vcoding_system_hash_table); | |
1545 | |
1546 /* Set up aliases for subsidiaries. | |
2297 | 1547 #### There must be a better way to handle subsidiary coding systems. |
1548 Inquiring Minds Want To Know: shouldn't they always be chains? */ | |
440 | 1549 { |
1550 static const char *suffixes[] = { "-unix", "-dos", "-mac" }; | |
1551 int i; | |
1552 for (i = 0; i < countof (suffixes); i++) | |
1553 { | |
1554 Lisp_Object alias_subsidiary = | |
771 | 1555 add_suffix_to_symbol (alias, suffixes[i]); |
440 | 1556 Lisp_Object aliasee_subsidiary = |
771 | 1557 add_suffix_to_symbol (aliasee, suffixes[i]); |
440 | 1558 |
1559 if (! NILP (Ffind_coding_system (aliasee_subsidiary))) | |
1560 Fdefine_coding_system_alias (alias_subsidiary, aliasee_subsidiary); | |
1561 } | |
1562 } | |
428 | 1563 /* FSF return value is a vector of [ALIAS-unix ALIAS-dos ALIAS-mac], |
1564 but it doesn't look intentional, so I'd rather return something | |
1565 meaningful or nothing at all. */ | |
1566 return Qnil; | |
1567 } | |
1568 | |
1569 static Lisp_Object | |
771 | 1570 subsidiary_coding_system (Lisp_Object coding_system, enum eol_type type) |
428 | 1571 { |
1572 Lisp_Coding_System *cs = XCODING_SYSTEM (coding_system); | |
1573 Lisp_Object new_coding_system; | |
1574 | |
1575 switch (type) | |
1576 { | |
1577 case EOL_AUTODETECT: return coding_system; | |
1578 case EOL_LF: new_coding_system = CODING_SYSTEM_EOL_LF (cs); break; | |
1579 case EOL_CR: new_coding_system = CODING_SYSTEM_EOL_CR (cs); break; | |
1580 case EOL_CRLF: new_coding_system = CODING_SYSTEM_EOL_CRLF (cs); break; | |
2500 | 1581 default: ABORT (); return Qnil; |
428 | 1582 } |
1583 | |
1584 return NILP (new_coding_system) ? coding_system : new_coding_system; | |
1585 } | |
1586 | |
1587 DEFUN ("subsidiary-coding-system", Fsubsidiary_coding_system, 2, 2, 0, /* | |
1588 Return the subsidiary coding system of CODING-SYSTEM with eol type EOL-TYPE. | |
771 | 1589 The logically opposite operation is `coding-system-base'. |
428 | 1590 */ |
1591 (coding_system, eol_type)) | |
1592 { | |
771 | 1593 coding_system = get_coding_system_for_text_file (coding_system, 0); |
428 | 1594 |
1595 return subsidiary_coding_system (coding_system, | |
1596 symbol_to_eol_type (eol_type)); | |
1597 } | |
1598 | |
771 | 1599 DEFUN ("coding-system-base", Fcoding_system_base, |
1600 1, 1, 0, /* | |
1601 Return the base coding system of CODING-SYSTEM. | |
1602 If CODING-SYSTEM is a subsidiary, this returns its parent; otherwise, it | |
1603 returns CODING-SYSTEM. | |
1604 The logically opposite operation is `subsidiary-coding-system'. | |
1605 */ | |
1606 (coding_system)) | |
1607 { | |
1608 Lisp_Object base; | |
1609 | |
1610 coding_system = Fget_coding_system (coding_system); | |
1611 if (EQ (XCODING_SYSTEM_NAME (coding_system), Qbinary)) | |
1612 return Fget_coding_system (Qraw_text); /* hack! */ | |
1613 base = XCODING_SYSTEM_SUBSIDIARY_PARENT (coding_system); | |
1614 if (!NILP (base)) | |
1615 return base; | |
1616 return coding_system; | |
1617 } | |
1618 | |
1619 DEFUN ("coding-system-used-for-io", Fcoding_system_used_for_io, | |
1620 1, 1, 0, /* | |
1621 Return the coding system actually used for I/O. | |
1622 In some cases (e.g. when a particular EOL type is specified) this won't be | |
2297 | 1623 the coding system itself. This can be useful when trying to determine |
1624 precisely how data was decoded. | |
771 | 1625 */ |
1626 (coding_system)) | |
1627 { | |
1628 Lisp_Object canon; | |
1629 | |
1630 coding_system = Fget_coding_system (coding_system); | |
1631 canon = XCODING_SYSTEM_CANONICAL (coding_system); | |
1632 if (!NILP (canon)) | |
1633 return canon; | |
1634 return coding_system; | |
1635 } | |
1636 | |
428 | 1637 |
1638 /************************************************************************/ | |
1639 /* Coding system accessors */ | |
1640 /************************************************************************/ | |
1641 | |
771 | 1642 DEFUN ("coding-system-description", Fcoding_system_description, 1, 1, 0, /* |
1643 Return the description for CODING-SYSTEM. | |
1644 The `description' of a coding system is a short English phrase giving the | |
1645 name rendered according to English punctuation rules, plus possibly some | |
1646 explanatory text (typically in the form of a parenthetical phrase). The | |
1647 description is intended to be short enough that it can appear as a menu item, | |
1648 and clear enough to be recognizable even to someone who is assumed to have | |
1649 some basic familiarity with different encodings but may not know all the | |
1650 technical names; thus, for `cn-gb-2312' is described as "Chinese EUC" and | |
1651 `hz-gb-2312' is described as "Hz/ZW (Chinese)", where the actual name of | |
1652 the encoding is given, followed by a note that this is a Chinese encoding, | |
1653 because the great majority of people encountering this would have no idea | |
1654 what it is, and giving the language indicates whether the encoding should | |
1655 just be ignored or (conceivably) investigated more thoroughly. | |
428 | 1656 */ |
1657 (coding_system)) | |
1658 { | |
1659 coding_system = Fget_coding_system (coding_system); | |
771 | 1660 return XCODING_SYSTEM_DESCRIPTION (coding_system); |
428 | 1661 } |
1662 | |
1663 DEFUN ("coding-system-type", Fcoding_system_type, 1, 1, 0, /* | |
1664 Return the type of CODING-SYSTEM. | |
1665 */ | |
1666 (coding_system)) | |
1667 { | |
771 | 1668 coding_system = Fget_coding_system (coding_system); |
1669 return XCODING_SYSTEM_TYPE (coding_system); | |
428 | 1670 } |
1671 | |
1672 DEFUN ("coding-system-property", Fcoding_system_property, 2, 2, 0, /* | |
1673 Return the PROP property of CODING-SYSTEM. | |
1674 */ | |
1675 (coding_system, prop)) | |
1676 { | |
1677 coding_system = Fget_coding_system (coding_system); | |
1678 CHECK_SYMBOL (prop); | |
1679 | |
1680 if (EQ (prop, Qname)) | |
1681 return XCODING_SYSTEM_NAME (coding_system); | |
1682 else if (EQ (prop, Qtype)) | |
1683 return Fcoding_system_type (coding_system); | |
771 | 1684 else if (EQ (prop, Qdescription)) |
1685 return XCODING_SYSTEM_DESCRIPTION (coding_system); | |
428 | 1686 else if (EQ (prop, Qmnemonic)) |
1687 return XCODING_SYSTEM_MNEMONIC (coding_system); | |
771 | 1688 else if (EQ (prop, Qdocumentation)) |
1689 return XCODING_SYSTEM_DOCUMENTATION (coding_system); | |
428 | 1690 else if (EQ (prop, Qeol_type)) |
771 | 1691 return eol_type_to_symbol (XCODING_SYSTEM_EOL_TYPE |
1692 (coding_system)); | |
428 | 1693 else if (EQ (prop, Qeol_lf)) |
1694 return XCODING_SYSTEM_EOL_LF (coding_system); | |
1695 else if (EQ (prop, Qeol_crlf)) | |
1696 return XCODING_SYSTEM_EOL_CRLF (coding_system); | |
1697 else if (EQ (prop, Qeol_cr)) | |
1698 return XCODING_SYSTEM_EOL_CR (coding_system); | |
1699 else if (EQ (prop, Qpost_read_conversion)) | |
1700 return XCODING_SYSTEM_POST_READ_CONVERSION (coding_system); | |
1701 else if (EQ (prop, Qpre_write_conversion)) | |
1702 return XCODING_SYSTEM_PRE_WRITE_CONVERSION (coding_system); | |
771 | 1703 else |
1704 { | |
1705 Lisp_Object value = CODESYSMETH_OR_GIVEN (XCODING_SYSTEM (coding_system), | |
1706 getprop, | |
1707 (coding_system, prop), | |
1708 Qunbound); | |
1709 if (UNBOUNDP (value)) | |
1710 invalid_constant ("Unrecognized property", prop); | |
1711 return value; | |
1712 } | |
1713 } | |
1714 | |
1715 | |
1716 /************************************************************************/ | |
1717 /* Coding stream functions */ | |
1718 /************************************************************************/ | |
1719 | |
1720 /* A coding stream is a stream used for encoding or decoding text. The | |
1721 coding-stream object keeps track of the actual coding system, the stream | |
1722 that is at the other end, and data that needs to be persistent across | |
1723 the lifetime of the stream. */ | |
1724 | |
1204 | 1725 extern const struct sized_memory_description chain_coding_stream_description; |
1726 extern const struct sized_memory_description undecided_coding_stream_description; | |
1727 | |
1728 static const struct memory_description coding_stream_data_description_1 []= { | |
2551 | 1729 { XD_BLOCK_PTR, chain_coding_system, 1, |
1730 { &chain_coding_stream_description } }, | |
1731 { XD_BLOCK_PTR, undecided_coding_system, 1, | |
1732 { &undecided_coding_stream_description } }, | |
1204 | 1733 { XD_END } |
1734 }; | |
1735 | |
1736 static const struct sized_memory_description coding_stream_data_description = { | |
1737 sizeof (void *), coding_stream_data_description_1 | |
1738 }; | |
1739 | |
1740 static const struct memory_description coding_lstream_description[] = { | |
1741 { XD_INT, offsetof (struct coding_stream, type) }, | |
1742 { XD_LISP_OBJECT, offsetof (struct coding_stream, orig_codesys) }, | |
1743 { XD_LISP_OBJECT, offsetof (struct coding_stream, codesys) }, | |
1744 { XD_LISP_OBJECT, offsetof (struct coding_stream, other_end) }, | |
1745 { XD_UNION, offsetof (struct coding_stream, data), | |
2551 | 1746 XD_INDIRECT (0, 0), { &coding_stream_data_description } }, |
1204 | 1747 { XD_END } |
1748 }; | |
1749 | |
1750 DEFINE_LSTREAM_IMPLEMENTATION_WITH_DATA ("coding", coding); | |
771 | 1751 |
1752 /* Encoding and decoding are parallel operations, so we create just one | |
1753 stream for both. "Decoding" may involve the extra step of autodetection | |
1754 of the data format, but that's only because of the conventional | |
1755 definition of decoding as converting from external- to | |
1756 internal-formatted data. | |
1757 | |
2297 | 1758 [[ REWRITE ME! ]] |
1759 | |
771 | 1760 #### We really need to abstract out the concept of "data formats" and |
1761 define "converters" that convert from and to specified formats, | |
1762 eliminating the idea of decoding and encoding. When specifying a | |
1763 conversion process, we need to give the data formats themselves, not the | |
1764 conversion processes -- e.g. a coding system called "Unicode->multibyte" | |
1765 converts in both directions, and we could auto-detect the format of data | |
1766 at either end. */ | |
1767 | |
1768 static Bytecount | |
1769 coding_reader (Lstream *stream, unsigned char *data, Bytecount size) | |
1770 { | |
1771 unsigned char *orig_data = data; | |
1772 Bytecount read_size; | |
1773 int error_occurred = 0; | |
1774 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
1775 | |
1776 /* We need to interface to coding_{de,en}code_1(), which expects to take | |
1777 some amount of data and store the result into a Dynarr. We have | |
1778 coding_{de,en}code_1() store into c->runoff, and take data from there | |
1779 as necessary. */ | |
1780 | |
1781 /* We loop until we have enough data, reading chunks from the other | |
1782 end and converting it. */ | |
1783 while (1) | |
1784 { | |
1785 /* Take data from convert_to if we can. Make sure to take at | |
1786 most SIZE bytes, and delete the data from convert_to. */ | |
1787 if (Dynarr_length (str->convert_to) > 0) | |
1788 { | |
1789 Bytecount chunk = | |
1790 min (size, (Bytecount) Dynarr_length (str->convert_to)); | |
1791 memcpy (data, Dynarr_atp (str->convert_to, 0), chunk); | |
1792 Dynarr_delete_many (str->convert_to, 0, chunk); | |
1793 data += chunk; | |
1794 size -= chunk; | |
1795 } | |
1796 | |
1797 if (size == 0) | |
1798 break; /* No more room for data */ | |
1799 | |
1800 if (str->eof) | |
1801 break; | |
1802 | |
1803 { | |
1804 /* Exhausted convert_to, so get some more. Read into convert_from, | |
1805 after existing "rejected" data from the last conversion. */ | |
1806 Bytecount rejected = Dynarr_length (str->convert_from); | |
1807 /* #### 1024 is arbitrary; we really need to separate 0 from EOF, | |
1808 and when we get 0, keep taking more data until we don't get 0 -- | |
1809 we don't know how much data the conversion routine might need | |
2297 | 1810 before it can generate any data of its own (eg, bzip2). */ |
814 | 1811 Bytecount readmore = |
1812 str->one_byte_at_a_time ? (Bytecount) 1 : | |
1813 max (size, (Bytecount) 1024); | |
771 | 1814 |
1815 Dynarr_add_many (str->convert_from, 0, readmore); | |
1816 read_size = Lstream_read (str->other_end, | |
1817 Dynarr_atp (str->convert_from, rejected), | |
1818 readmore); | |
1819 /* Trim size down to how much we actually got */ | |
1820 Dynarr_set_size (str->convert_from, rejected + max (0, read_size)); | |
1821 } | |
1822 | |
1823 if (read_size < 0) /* LSTREAM_ERROR */ | |
1824 { | |
1825 error_occurred = 1; | |
1826 break; | |
1827 } | |
1828 if (read_size == 0) /* LSTREAM_EOF */ | |
1829 /* There might be some more end data produced in the translation, | |
1830 so we set a flag and call the conversion method once more to | |
1831 output any final stuff it may be holding, any "go back to a sane | |
1832 state" escape sequences, etc. The conversion method is free to | |
1833 look at this flag, and we use it above to stop looping. */ | |
1834 str->eof = 1; | |
1835 { | |
1836 Bytecount processed; | |
1837 Bytecount to_process = Dynarr_length (str->convert_from); | |
1838 | |
1839 /* Convert the data, and save any rejected data in convert_from */ | |
1840 processed = | |
1841 XCODESYSMETH (str->codesys, convert, | |
1842 (str, Dynarr_atp (str->convert_from, 0), | |
1843 str->convert_to, to_process)); | |
1844 if (processed < 0) | |
1845 { | |
1846 error_occurred = 1; | |
1847 break; | |
1848 } | |
1849 assert (processed <= to_process); | |
1850 if (processed < to_process) | |
1851 memmove (Dynarr_atp (str->convert_from, 0), | |
1852 Dynarr_atp (str->convert_from, processed), | |
1853 to_process - processed); | |
1854 Dynarr_set_size (str->convert_from, to_process - processed); | |
1855 } | |
1856 } | |
1857 | |
1858 if (data - orig_data == 0) | |
1859 return error_occurred ? -1 : 0; | |
1860 else | |
1861 return data - orig_data; | |
1862 } | |
1863 | |
1864 static Bytecount | |
1865 coding_writer (Lstream *stream, const unsigned char *data, Bytecount size) | |
1866 { | |
1867 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
1868 | |
1869 /* Convert all our data into convert_to, and then attempt to write | |
1870 it all out to the other end. */ | |
1871 Dynarr_reset (str->convert_to); | |
1872 size = XCODESYSMETH (str->codesys, convert, | |
1873 (str, data, str->convert_to, size)); | |
1874 if (Lstream_write (str->other_end, Dynarr_atp (str->convert_to, 0), | |
1875 Dynarr_length (str->convert_to)) < 0) | |
1876 return -1; | |
1877 else | |
1878 /* The return value indicates how much of the incoming data was | |
1879 processed, not how many bytes were written. */ | |
1880 return size; | |
1881 } | |
1882 | |
1883 static int | |
1884 encode_decode_source_sink_type_is_char (Lisp_Object cs, | |
1885 enum source_or_sink sex, | |
1886 enum encode_decode direction) | |
1887 { | |
1888 return (direction == CODING_DECODE ? | |
1889 decoding_source_sink_type_is_char (cs, sex) : | |
1890 encoding_source_sink_type_is_char (cs, sex)); | |
1891 } | |
1892 | |
1893 /* Ensure that the convert methods only get full characters sent to them to | |
1894 convert if the source of that conversion is characters; and that no such | |
1895 full-character checking happens when the source is bytes. Keep in mind | |
1896 that (1) the conversion_end_type return values take the perspective of | |
1897 encoding; (2) the source for decoding is the same as the sink for | |
1898 encoding; (3) when writing, the data is given to us, and we set our own | |
1899 stream to be character mode or not; (4) when reading, the data comes | |
1900 from the other_end stream, and we set that one to be character mode or | |
1901 not. This is consistent with the comment above the prototype for | |
1902 Lstream_set_character_mode(), which lays out rules for who is allowed to | |
1903 modify the character type mode on a stream. | |
1904 | |
814 | 1905 If we're a read stream, we're always setting character mode on the |
1906 source, but we also set it on ourselves consistent with the flag that | |
1907 can disable this (see again the comment above | |
1908 Lstream_set_character_mode()). | |
1909 */ | |
771 | 1910 |
1911 static void | |
1912 set_coding_character_mode (Lstream *stream) | |
1913 { | |
1914 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
1915 Lstream *stream_to_set = | |
1916 stream->flags & LSTREAM_FL_WRITE ? stream : str->other_end; | |
1917 if (encode_decode_source_sink_type_is_char | |
1918 (str->codesys, CODING_SOURCE, str->direction)) | |
1919 Lstream_set_character_mode (stream_to_set); | |
1920 else | |
1921 Lstream_unset_character_mode (stream_to_set); | |
814 | 1922 if (str->set_char_mode_on_us_when_reading && |
1923 (stream->flags & LSTREAM_FL_READ)) | |
1924 { | |
1925 if (encode_decode_source_sink_type_is_char | |
1926 (str->codesys, CODING_SINK, str->direction)) | |
1927 Lstream_set_character_mode (stream); | |
1928 else | |
1929 Lstream_unset_character_mode (stream); | |
1930 } | |
771 | 1931 } |
1932 | |
1933 static Lisp_Object | |
1934 coding_marker (Lisp_Object stream) | |
1935 { | |
1936 struct coding_stream *str = CODING_STREAM_DATA (XLSTREAM (stream)); | |
1937 | |
1938 mark_object (str->orig_codesys); | |
1939 mark_object (str->codesys); | |
1940 MAYBE_XCODESYSMETH (str->codesys, mark_coding_stream, (str)); | |
1941 return wrap_lstream (str->other_end); | |
1942 } | |
1943 | |
1944 static int | |
1945 coding_rewinder (Lstream *stream) | |
1946 { | |
1947 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
1948 MAYBE_XCODESYSMETH (str->codesys, rewind_coding_stream, (str)); | |
1949 | |
1950 str->ch = 0; | |
1951 Dynarr_reset (str->convert_to); | |
1952 Dynarr_reset (str->convert_from); | |
1953 return Lstream_rewind (str->other_end); | |
1954 } | |
1955 | |
1956 static int | |
1957 coding_seekable_p (Lstream *stream) | |
1958 { | |
1959 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
1960 return Lstream_seekable_p (str->other_end); | |
1961 } | |
1962 | |
1963 static int | |
1964 coding_flusher (Lstream *stream) | |
1965 { | |
1966 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
1967 return Lstream_flush (str->other_end); | |
1968 } | |
1969 | |
1970 static int | |
1971 coding_closer (Lstream *stream) | |
1972 { | |
1973 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
1974 if (stream->flags & LSTREAM_FL_WRITE) | |
1975 { | |
1976 str->eof = 1; | |
1977 coding_writer (stream, 0, 0); | |
1978 str->eof = 0; | |
1979 } | |
1980 /* It's safe to free the runoff dynarrs now because they are used only | |
1981 during conversion. We need to keep the type-specific data around, | |
1982 though, because of canonicalize_after_coding. */ | |
1983 if (str->convert_to) | |
1984 { | |
1985 Dynarr_free (str->convert_to); | |
1986 str->convert_to = 0; | |
1987 } | |
1988 if (str->convert_from) | |
428 | 1989 { |
771 | 1990 Dynarr_free (str->convert_from); |
1991 str->convert_from = 0; | |
1992 } | |
1993 | |
800 | 1994 if (str->no_close_other) |
1995 return Lstream_flush (str->other_end); | |
1996 else | |
1997 return Lstream_close (str->other_end); | |
771 | 1998 } |
1999 | |
2000 static void | |
2001 coding_finalizer (Lstream *stream) | |
2002 { | |
2003 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
2004 | |
2005 assert (!str->finalized); | |
2006 MAYBE_XCODESYSMETH (str->codesys, finalize_coding_stream, (str)); | |
2007 if (str->data) | |
2008 { | |
1726 | 2009 xfree (str->data, void *); |
771 | 2010 str->data = 0; |
2011 } | |
2012 str->finalized = 1; | |
2013 } | |
2014 | |
2015 static Lisp_Object | |
2016 coding_stream_canonicalize_after_coding (Lstream *stream) | |
2017 { | |
2018 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
2019 | |
2020 return XCODESYSMETH_OR_GIVEN (str->codesys, canonicalize_after_coding, | |
2021 (str), str->codesys); | |
2022 } | |
2023 | |
2024 Lisp_Object | |
2025 coding_stream_detected_coding_system (Lstream *stream) | |
2026 { | |
2027 Lisp_Object codesys = | |
2028 coding_stream_canonicalize_after_coding (stream); | |
2029 if (NILP (codesys)) | |
2030 return Fget_coding_system (Qidentity); | |
2031 return codesys; | |
2032 } | |
2033 | |
2034 Lisp_Object | |
2035 coding_stream_coding_system (Lstream *stream) | |
2036 { | |
2037 return CODING_STREAM_DATA (stream)->codesys; | |
2038 } | |
2039 | |
2040 /* Change the coding system associated with a stream. */ | |
2041 | |
2042 void | |
2043 set_coding_stream_coding_system (Lstream *lstr, Lisp_Object codesys) | |
2044 { | |
2045 struct coding_stream *str = CODING_STREAM_DATA (lstr); | |
2046 if (EQ (str->orig_codesys, codesys)) | |
2047 return; | |
2048 /* We do the equivalent of closing the stream, destroying it, and | |
2049 reinitializing it. This includes flushing out the data and signalling | |
2050 EOF, if we're a writing stream; we also replace the type-specific data | |
2051 with the data appropriate for the new coding system. */ | |
2052 if (!NILP (str->codesys)) | |
2053 { | |
2054 if (lstr->flags & LSTREAM_FL_WRITE) | |
2055 { | |
2056 Lstream_flush (lstr); | |
2057 str->eof = 1; | |
2058 coding_writer (lstr, 0, 0); | |
2059 str->eof = 0; | |
2060 } | |
2061 MAYBE_XCODESYSMETH (str->codesys, finalize_coding_stream, (str)); | |
2062 } | |
2063 str->orig_codesys = codesys; | |
2064 str->codesys = coding_system_real_canonical (codesys); | |
2065 | |
2066 if (str->data) | |
2067 { | |
1726 | 2068 xfree (str->data, void *); |
771 | 2069 str->data = 0; |
428 | 2070 } |
771 | 2071 if (XCODING_SYSTEM_METHODS (str->codesys)->coding_data_size) |
1204 | 2072 { |
2073 str->data = | |
2074 xmalloc_and_zero (XCODING_SYSTEM_METHODS (str->codesys)-> | |
2075 coding_data_size); | |
2076 str->type = XCODING_SYSTEM_METHODS (str->codesys)->enumtype; | |
2077 } | |
771 | 2078 MAYBE_XCODESYSMETH (str->codesys, init_coding_stream, (str)); |
2079 /* The new coding system may have different ideas regarding whether its | |
2080 ends are characters or bytes. */ | |
2081 set_coding_character_mode (lstr); | |
2082 } | |
2083 | |
2084 /* WARNING WARNING WARNING WARNING!!!!! If you open up a coding | |
2085 stream for writing, no automatic code detection will be performed. | |
2086 The reason for this is that automatic code detection requires a | |
2087 seekable input. Things will also fail if you open a coding | |
2088 stream for reading using a non-fully-specified coding system and | |
2089 a non-seekable input stream. */ | |
2090 | |
2091 static Lisp_Object | |
2092 make_coding_stream_1 (Lstream *stream, Lisp_Object codesys, | |
800 | 2093 const char *mode, enum encode_decode direction, |
802 | 2094 int flags) |
771 | 2095 { |
2096 Lstream *lstr = Lstream_new (lstream_coding, mode); | |
2097 struct coding_stream *str = CODING_STREAM_DATA (lstr); | |
2098 | |
2099 codesys = Fget_coding_system (codesys); | |
2100 xzero (*str); | |
2101 str->codesys = Qnil; | |
2102 str->orig_codesys = Qnil; | |
2103 str->us = lstr; | |
2104 str->other_end = stream; | |
2105 str->convert_to = Dynarr_new (unsigned_char); | |
2106 str->convert_from = Dynarr_new (unsigned_char); | |
2107 str->direction = direction; | |
814 | 2108 if (flags & LSTREAM_FL_NO_CLOSE_OTHER) |
802 | 2109 str->no_close_other = 1; |
814 | 2110 if (flags & LSTREAM_FL_READ_ONE_BYTE_AT_A_TIME) |
802 | 2111 str->one_byte_at_a_time = 1; |
814 | 2112 if (!(flags & LSTREAM_FL_NO_INIT_CHAR_MODE_WHEN_READING)) |
2113 str->set_char_mode_on_us_when_reading = 1; | |
802 | 2114 |
771 | 2115 set_coding_stream_coding_system (lstr, codesys); |
793 | 2116 return wrap_lstream (lstr); |
771 | 2117 } |
2118 | |
814 | 2119 /* FLAGS: |
2120 | |
2121 LSTREAM_FL_NO_CLOSE_OTHER | |
2122 Don't close STREAM (the stream at the other end) when this stream is | |
2123 closed. | |
2124 | |
2125 LSTREAM_FL_READ_ONE_BYTE_AT_A_TIME | |
2126 When reading from STREAM, read and process one byte at a time rather | |
2127 than in large chunks. This is for reading from TTY's, so we don't | |
2128 block. #### We should instead create a non-blocking filedesc stream | |
2129 that emulates the behavior as necessary using select(), when the | |
2130 fcntls don't work. (As seems to be the case on Cygwin.) | |
2131 | |
2132 LSTREAM_FL_NO_INIT_CHAR_MODE_WHEN_READING | |
2133 When reading from STREAM, read and process one byte at a time rather | |
2134 than in large chunks. This is for reading from TTY's, so we don't | |
2135 block. #### We should instead create a non-blocking filedesc stream | |
2136 that emulates the behavior as necessary using select(), when the | |
2137 fcntls don't work. (As seems to be the case on Cygwin.) | |
2138 */ | |
771 | 2139 Lisp_Object |
2140 make_coding_input_stream (Lstream *stream, Lisp_Object codesys, | |
802 | 2141 enum encode_decode direction, int flags) |
771 | 2142 { |
800 | 2143 return make_coding_stream_1 (stream, codesys, "r", direction, |
802 | 2144 flags); |
771 | 2145 } |
2146 | |
814 | 2147 /* FLAGS: |
2148 | |
2149 LSTREAM_FL_NO_CLOSE_OTHER | |
2150 Don't close STREAM (the stream at the other end) when this stream is | |
2151 closed. | |
2152 */ | |
771 | 2153 Lisp_Object |
2154 make_coding_output_stream (Lstream *stream, Lisp_Object codesys, | |
802 | 2155 enum encode_decode direction, int flags) |
771 | 2156 { |
800 | 2157 return make_coding_stream_1 (stream, codesys, "w", direction, |
802 | 2158 flags); |
771 | 2159 } |
2160 | |
2161 static Lisp_Object | |
2162 encode_decode_coding_region (Lisp_Object start, Lisp_Object end, | |
2163 Lisp_Object coding_system, Lisp_Object buffer, | |
2164 enum encode_decode direction) | |
2165 { | |
2166 Charbpos b, e; | |
2167 struct buffer *buf = decode_buffer (buffer, 0); | |
2168 Lisp_Object instream = Qnil, to_outstream = Qnil, outstream = Qnil; | |
2169 Lisp_Object from_outstream = Qnil, auto_outstream = Qnil; | |
2170 Lisp_Object lb_outstream = Qnil; | |
2171 Lisp_Object next; | |
2172 Lstream *istr, *ostr; | |
2173 struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5; | |
2174 struct gcpro ngcpro1; | |
2175 int source_char, sink_char; | |
2176 | |
2177 get_buffer_range_char (buf, start, end, &b, &e, 0); | |
2178 barf_if_buffer_read_only (buf, b, e); | |
2179 | |
2180 GCPRO5 (instream, to_outstream, outstream, from_outstream, lb_outstream); | |
2181 NGCPRO1 (auto_outstream); | |
2182 | |
2183 coding_system = Fget_coding_system (coding_system); | |
2184 source_char = encode_decode_source_sink_type_is_char (coding_system, | |
2185 CODING_SOURCE, | |
2186 direction); | |
2187 sink_char = encode_decode_source_sink_type_is_char (coding_system, | |
2188 CODING_SINK, | |
2189 direction); | |
2190 | |
2191 /* Order is IN <---> [TO] -> OUT -> [FROM] -> [AUTODETECT-EOL] -> LB */ | |
2192 instream = make_lisp_buffer_input_stream (buf, b, e, 0); | |
2193 next = lb_outstream = make_lisp_buffer_output_stream (buf, b, 0); | |
2194 | |
2195 if (direction == CODING_DECODE && | |
2196 XCODING_SYSTEM_EOL_TYPE (coding_system) == EOL_AUTODETECT) | |
2197 next = auto_outstream = | |
2198 make_coding_output_stream | |
800 | 2199 (XLSTREAM (next), Fget_coding_system (Qconvert_eol_autodetect), |
2200 CODING_DECODE, 0); | |
771 | 2201 |
2202 if (!sink_char) | |
2203 next = from_outstream = | |
800 | 2204 make_coding_output_stream (XLSTREAM (next), Qbinary, CODING_DECODE, 0); |
771 | 2205 outstream = make_coding_output_stream (XLSTREAM (next), coding_system, |
800 | 2206 direction, 0); |
771 | 2207 if (!source_char) |
428 | 2208 { |
771 | 2209 to_outstream = |
2210 make_coding_output_stream (XLSTREAM (outstream), | |
800 | 2211 Qbinary, CODING_ENCODE, 0); |
771 | 2212 ostr = XLSTREAM (to_outstream); |
2213 } | |
2214 else | |
2215 ostr = XLSTREAM (outstream); | |
2216 istr = XLSTREAM (instream); | |
2217 | |
2218 /* The chain of streams looks like this: | |
2219 | |
2297 | 2220 [BUFFER] <----- (( read from/send to loop )) |
771 | 2221 ------> [CHAR->BYTE i.e. ENCODE AS BINARY if source is |
2222 in bytes] | |
2223 ------> [ENCODE/DECODE AS SPECIFIED] | |
2224 ------> [BYTE->CHAR i.e. DECODE AS BINARY | |
2225 if sink is in bytes] | |
2226 ------> [AUTODETECT EOL if | |
2227 we're decoding and | |
2228 coding system calls | |
2229 for this] | |
2230 ------> [BUFFER] | |
2231 */ | |
2367 | 2232 |
2233 /* #### See comment | |
2234 | |
2235 EFFICIENCY OF CODING CONVERSION WITH MULTIPLE COPIES/CHAINS | |
2236 | |
2237 in text.c. | |
2238 */ | |
2239 | |
771 | 2240 while (1) |
2241 { | |
2242 char tempbuf[1024]; /* some random amount */ | |
2243 Charbpos newpos, even_newer_pos; | |
2244 Charbpos oldpos = lisp_buffer_stream_startpos (istr); | |
2245 Bytecount size_in_bytes = | |
2246 Lstream_read (istr, tempbuf, sizeof (tempbuf)); | |
2247 | |
2248 if (!size_in_bytes) | |
2249 break; | |
2250 newpos = lisp_buffer_stream_startpos (istr); | |
2251 Lstream_write (ostr, tempbuf, size_in_bytes); | |
2252 even_newer_pos = lisp_buffer_stream_startpos (istr); | |
2253 buffer_delete_range (buf, even_newer_pos - (newpos - oldpos), | |
2254 even_newer_pos, 0); | |
428 | 2255 } |
771 | 2256 |
2257 { | |
2258 Charcount retlen = | |
2259 lisp_buffer_stream_startpos (XLSTREAM (instream)) - b; | |
2260 Lstream_close (istr); | |
2261 Lstream_close (ostr); | |
2262 NUNGCPRO; | |
2263 UNGCPRO; | |
2264 Lstream_delete (istr); | |
2265 if (!NILP (from_outstream)) | |
2266 Lstream_delete (XLSTREAM (from_outstream)); | |
2267 Lstream_delete (XLSTREAM (outstream)); | |
2268 if (!NILP (to_outstream)) | |
2269 Lstream_delete (XLSTREAM (to_outstream)); | |
2270 if (!NILP (auto_outstream)) | |
2271 Lstream_delete (XLSTREAM (auto_outstream)); | |
2272 Lstream_delete (XLSTREAM (lb_outstream)); | |
2273 return make_int (retlen); | |
2274 } | |
2275 } | |
2276 | |
2277 DEFUN ("decode-coding-region", Fdecode_coding_region, 3, 4, 0, /* | |
2278 Decode the text between START and END which is encoded in CODING-SYSTEM. | |
2279 This is useful if you've read in encoded text from a file without decoding | |
2280 it (e.g. you read in a JIS-formatted file but used the `binary' or | |
2281 `no-conversion' coding system, so that it shows up as "^[$B!<!+^[(B"). | |
2282 Return length of decoded text. | |
2283 BUFFER defaults to the current buffer if unspecified. | |
2284 */ | |
2285 (start, end, coding_system, buffer)) | |
2286 { | |
2287 return encode_decode_coding_region (start, end, coding_system, buffer, | |
2288 CODING_DECODE); | |
2289 } | |
2290 | |
2291 DEFUN ("encode-coding-region", Fencode_coding_region, 3, 4, 0, /* | |
2292 Encode the text between START and END using CODING-SYSTEM. | |
2293 This will, for example, convert Japanese characters into stuff such as | |
2294 "^[$B!<!+^[(B" if you use the JIS encoding. Return length of encoded | |
2295 text. BUFFER defaults to the current buffer if unspecified. | |
2296 */ | |
2297 (start, end, coding_system, buffer)) | |
2298 { | |
2299 return encode_decode_coding_region (start, end, coding_system, buffer, | |
2300 CODING_ENCODE); | |
428 | 2301 } |
2302 | |
2303 | |
2304 /************************************************************************/ | |
771 | 2305 /* Chain methods */ |
428 | 2306 /************************************************************************/ |
2307 | |
771 | 2308 /* #### Need a way to create "opposite-direction" coding systems. */ |
2309 | |
2310 /* Chain two or more coding systems together to make a combination coding | |
2311 system. */ | |
2312 | |
2313 struct chain_coding_system | |
2314 { | |
2315 /* List of coding systems, in decode order */ | |
2316 Lisp_Object *chain; | |
2317 /* Number of coding systems in list */ | |
2318 int count; | |
2319 /* Coding system to return as a result of canonicalize-after-coding */ | |
2320 Lisp_Object canonicalize_after_coding; | |
2321 }; | |
2322 | |
2323 struct chain_coding_stream | |
2324 { | |
2325 int initted; | |
2326 /* Lstreams for chain coding system */ | |
2327 Lisp_Object *lstreams; | |
2328 int lstream_count; | |
2329 }; | |
2330 | |
1204 | 2331 static const struct memory_description chain_coding_system_description[] = { |
2332 { XD_INT, offsetof (struct chain_coding_system, count) }, | |
2367 | 2333 { XD_BLOCK_PTR, offsetof (struct chain_coding_system, chain), |
2551 | 2334 XD_INDIRECT (0, 0), { &lisp_object_description } }, |
1204 | 2335 { XD_LISP_OBJECT, offsetof (struct chain_coding_system, |
2336 canonicalize_after_coding) }, | |
771 | 2337 { XD_END } |
2338 }; | |
2339 | |
1204 | 2340 static const struct memory_description chain_coding_stream_description_1 [] = { |
2341 { XD_INT, offsetof (struct chain_coding_stream, lstream_count) }, | |
2367 | 2342 { XD_BLOCK_PTR, offsetof (struct chain_coding_stream, lstreams), |
2551 | 2343 XD_INDIRECT (0, 0), { &lisp_object_description } }, |
771 | 2344 { XD_END } |
2345 }; | |
2346 | |
1204 | 2347 const struct sized_memory_description chain_coding_stream_description = { |
2348 sizeof (struct chain_coding_stream), chain_coding_stream_description_1 | |
2349 }; | |
2350 | |
2351 DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (chain); | |
2352 | |
771 | 2353 static Lisp_Object |
2354 chain_canonicalize (Lisp_Object codesys) | |
2355 { | |
2356 /* We make use of the fact that this method is called at init time, after | |
2357 properties have been parsed. init_method is called too early. */ | |
2358 /* #### It's not clear we need this whole chain-canonicalize mechanism | |
2359 any more. */ | |
2360 Lisp_Object chain = Flist (XCODING_SYSTEM_CHAIN_COUNT (codesys), | |
2361 XCODING_SYSTEM_CHAIN_CHAIN (codesys)); | |
2362 chain = Fcons (XCODING_SYSTEM_PRE_WRITE_CONVERSION (codesys), | |
2363 Fcons (XCODING_SYSTEM_POST_READ_CONVERSION (codesys), | |
2364 chain)); | |
2365 Fputhash (chain, codesys, Vchain_canonicalize_hash_table); | |
2366 return codesys; | |
2367 } | |
2368 | |
2369 static Lisp_Object | |
2370 chain_canonicalize_after_coding (struct coding_stream *str) | |
2371 { | |
2372 Lisp_Object cac = | |
2373 XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (str->codesys); | |
2374 if (!NILP (cac)) | |
2375 return cac; | |
2376 return str->codesys; | |
2377 #if 0 | |
2378 struct chain_coding_stream *data = CODING_STREAM_TYPE_DATA (str, chain); | |
2379 Lisp_Object us = str->codesys, codesys; | |
2380 int i; | |
2381 Lisp_Object chain; | |
2382 Lisp_Object tail; | |
2383 int changed = 0; | |
2384 | |
2385 /* #### It's not clear we need this whole chain-canonicalize mechanism | |
2386 any more. */ | |
2387 if (str->direction == CODING_ENCODE || !data->initted) | |
2388 return us; | |
2389 | |
2390 chain = Flist (XCODING_SYSTEM_CHAIN_COUNT (us), | |
2391 XCODING_SYSTEM_CHAIN_CHAIN (us)); | |
2392 | |
2393 tail = chain; | |
2394 for (i = 0; i < XCODING_SYSTEM_CHAIN_COUNT (us); i++) | |
2395 { | |
2396 codesys = (coding_stream_canonicalize_after_coding | |
2397 (XLSTREAM (data->lstreams[i]))); | |
2398 if (!EQ (codesys, XCAR (tail))) | |
2399 changed = 1; | |
2400 XCAR (tail) = codesys; | |
2401 tail = XCDR (tail); | |
2402 } | |
2403 | |
2404 if (!changed) | |
2405 return us; | |
2406 | |
2407 chain = delq_no_quit (Qnil, chain); | |
2408 | |
2409 if (NILP (XCODING_SYSTEM_PRE_WRITE_CONVERSION (us)) && | |
2410 NILP (XCODING_SYSTEM_POST_READ_CONVERSION (us))) | |
2411 { | |
2412 if (NILP (chain)) | |
2413 return Qnil; | |
2414 if (NILP (XCDR (chain))) | |
2415 return XCAR (chain); | |
2416 } | |
2417 | |
2418 codesys = Fgethash (Fcons (XCODING_SYSTEM_PRE_WRITE_CONVERSION (us), | |
2419 Fcons (XCODING_SYSTEM_POST_READ_CONVERSION (us), | |
2420 chain)), Vchain_canonicalize_hash_table, | |
2421 Qnil); | |
2422 if (!NILP (codesys)) | |
2423 return codesys; | |
2424 return make_internal_coding_system | |
2425 (us, "internal-chain-canonicalizer-wrapper", | |
2426 Qchain, Qunbound, list2 (Qchain, chain)); | |
2427 #endif /* 0 */ | |
2428 } | |
2429 | |
2430 static void | |
2431 chain_init (Lisp_Object codesys) | |
2432 { | |
2433 XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (codesys) = Qnil; | |
2434 } | |
2435 | |
2436 static void | |
2437 chain_mark (Lisp_Object codesys) | |
2438 { | |
2439 int i; | |
2440 | |
2441 for (i = 0; i < XCODING_SYSTEM_CHAIN_COUNT (codesys); i++) | |
2442 mark_object (XCODING_SYSTEM_CHAIN_CHAIN (codesys)[i]); | |
2443 mark_object (XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (codesys)); | |
2444 } | |
2445 | |
2446 static void | |
2447 chain_mark_coding_stream_1 (struct chain_coding_stream *data) | |
2448 { | |
2449 int i; | |
2450 | |
2451 for (i = 0; i < data->lstream_count; i++) | |
2452 mark_object (data->lstreams[i]); | |
2453 } | |
2454 | |
2455 static void | |
2456 chain_mark_coding_stream (struct coding_stream *str) | |
2457 { | |
2458 chain_mark_coding_stream_1 (CODING_STREAM_TYPE_DATA (str, chain)); | |
2459 } | |
2460 | |
2461 static void | |
2462 chain_print (Lisp_Object cs, Lisp_Object printcharfun, int escapeflag) | |
2463 { | |
2464 int i; | |
2465 | |
826 | 2466 write_c_string (printcharfun, "("); |
771 | 2467 for (i = 0; i < XCODING_SYSTEM_CHAIN_COUNT (cs); i++) |
2468 { | |
826 | 2469 write_c_string (printcharfun, i == 0 ? "" : "->"); |
771 | 2470 print_coding_system_in_print_method (XCODING_SYSTEM_CHAIN_CHAIN (cs)[i], |
2471 printcharfun, escapeflag); | |
2472 } | |
2473 { | |
2474 Lisp_Object cac = XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (cs); | |
2475 if (!NILP (cac)) | |
2476 { | |
2477 if (i > 0) | |
826 | 2478 write_c_string (printcharfun, " "); |
2479 write_c_string (printcharfun, "canonicalize-after-coding="); | |
771 | 2480 print_coding_system_in_print_method (cac, printcharfun, escapeflag); |
2481 } | |
2482 } | |
2483 | |
826 | 2484 write_c_string (printcharfun, ")"); |
771 | 2485 } |
2486 | |
2487 static void | |
2488 chain_rewind_coding_stream_1 (struct chain_coding_stream *data) | |
2489 { | |
2490 /* Each will rewind the next; there is always at least one stream (the | |
2491 dynarr stream at the end) if we're initted */ | |
2492 if (data->initted) | |
2493 Lstream_rewind (XLSTREAM (data->lstreams[0])); | |
2494 } | |
2495 | |
2496 static void | |
2497 chain_rewind_coding_stream (struct coding_stream *str) | |
2498 { | |
2499 chain_rewind_coding_stream_1 (CODING_STREAM_TYPE_DATA (str, chain)); | |
2500 } | |
2501 | |
2502 static void | |
2503 chain_init_coding_streams_1 (struct chain_coding_stream *data, | |
2504 unsigned_char_dynarr *dst, | |
2505 int ncodesys, Lisp_Object *codesys, | |
2506 enum encode_decode direction) | |
2507 { | |
2508 int i; | |
2509 Lisp_Object lstream_out; | |
2510 | |
2511 data->lstream_count = ncodesys + 1; | |
2512 data->lstreams = xnew_array (Lisp_Object, data->lstream_count); | |
2513 | |
2514 lstream_out = make_dynarr_output_stream (dst); | |
2515 Lstream_set_buffering (XLSTREAM (lstream_out), LSTREAM_UNBUFFERED, 0); | |
2516 data->lstreams[data->lstream_count - 1] = lstream_out; | |
2517 | |
2518 for (i = ncodesys - 1; i >= 0; i--) | |
2519 { | |
2520 data->lstreams[i] = | |
2521 make_coding_output_stream | |
2522 (XLSTREAM (lstream_out), | |
2523 codesys[direction == CODING_ENCODE ? ncodesys - (i + 1) : i], | |
800 | 2524 direction, 0); |
771 | 2525 lstream_out = data->lstreams[i]; |
2526 Lstream_set_buffering (XLSTREAM (lstream_out), LSTREAM_UNBUFFERED, | |
2527 0); | |
2528 } | |
2529 data->initted = 1; | |
2530 } | |
2531 | |
2532 static Bytecount | |
2533 chain_convert (struct coding_stream *str, const UExtbyte *src, | |
2534 unsigned_char_dynarr *dst, Bytecount n) | |
2535 { | |
2536 struct chain_coding_stream *data = CODING_STREAM_TYPE_DATA (str, chain); | |
2537 | |
2538 if (str->eof) | |
2539 { | |
2540 /* Each will close the next; there is always at least one stream (the | |
2541 dynarr stream at the end) if we're initted. We need to close now | |
2542 because more data may be generated. */ | |
2543 if (data->initted) | |
2544 Lstream_close (XLSTREAM (data->lstreams[0])); | |
2545 return n; | |
2546 } | |
2547 | |
2548 if (!data->initted) | |
2549 chain_init_coding_streams_1 | |
2550 (data, dst, XCODING_SYSTEM_CHAIN_COUNT (str->codesys), | |
2551 XCODING_SYSTEM_CHAIN_CHAIN (str->codesys), str->direction); | |
2552 | |
2553 if (Lstream_write (XLSTREAM (data->lstreams[0]), src, n) < 0) | |
2554 return -1; | |
2555 return n; | |
2556 } | |
2557 | |
2558 static void | |
2559 chain_finalize_coding_stream_1 (struct chain_coding_stream *data) | |
2560 { | |
2561 if (data->lstreams) | |
2562 { | |
2297 | 2563 /* During GC, these objects are unmarked, and are about to be freed. |
2564 We do NOT want them on the free list, and that will cause lots of | |
2565 nastiness including crashes. Just let them be freed normally. */ | |
771 | 2566 if (!gc_in_progress) |
2567 { | |
2568 int i; | |
2297 | 2569 /* Order of deletion is important here! Delete from the head of |
2570 the chain and work your way towards the tail. In general, | |
2571 when you delete an object, there should be *NO* pointers to it | |
2572 anywhere. Deleting back-to-front would be a problem because | |
2573 there are pointers going forward. If there were pointers in | |
2574 both directions, you'd have to disconnect the pointers to a | |
2575 particular object before deleting it. */ | |
771 | 2576 for (i = 0; i < data->lstream_count; i++) |
2577 Lstream_delete (XLSTREAM ((data->lstreams)[i])); | |
2578 } | |
1726 | 2579 xfree (data->lstreams, Lisp_Object *); |
771 | 2580 } |
2581 } | |
2582 | |
2583 static void | |
2584 chain_finalize_coding_stream (struct coding_stream *str) | |
2585 { | |
2586 chain_finalize_coding_stream_1 (CODING_STREAM_TYPE_DATA (str, chain)); | |
2587 } | |
2588 | |
2589 static void | |
2590 chain_finalize (Lisp_Object c) | |
2591 { | |
2592 if (XCODING_SYSTEM_CHAIN_CHAIN (c)) | |
1726 | 2593 xfree (XCODING_SYSTEM_CHAIN_CHAIN (c), Lisp_Object *); |
771 | 2594 } |
2595 | |
428 | 2596 static int |
771 | 2597 chain_putprop (Lisp_Object codesys, Lisp_Object key, Lisp_Object value) |
2598 { | |
2599 if (EQ (key, Qchain)) | |
2600 { | |
2601 Lisp_Object *cslist; | |
2602 int count = 0; | |
2603 int i; | |
2604 | |
2367 | 2605 { |
2606 EXTERNAL_LIST_LOOP_2 (elt, value) | |
2607 { | |
2608 Fget_coding_system (elt); | |
2609 count++; | |
2610 } | |
2611 } | |
771 | 2612 |
2613 cslist = xnew_array (Lisp_Object, count); | |
2614 XCODING_SYSTEM_CHAIN_CHAIN (codesys) = cslist; | |
2615 | |
2616 count = 0; | |
2367 | 2617 { |
2618 EXTERNAL_LIST_LOOP_2 (elt, value) | |
2619 { | |
2620 cslist[count] = Fget_coding_system (elt); | |
2621 count++; | |
2622 } | |
2623 } | |
771 | 2624 |
2625 XCODING_SYSTEM_CHAIN_COUNT (codesys) = count; | |
2626 | |
2627 for (i = 0; i < count - 1; i++) | |
2628 { | |
2629 if (decoding_source_sink_type_is_char (cslist[i], CODING_SINK) != | |
2630 decoding_source_sink_type_is_char (cslist[i + 1], CODING_SOURCE)) | |
2631 invalid_argument_2 ("Sink of first must match source of second", | |
2632 cslist[i], cslist[i + 1]); | |
2633 } | |
2634 } | |
2635 else if (EQ (key, Qcanonicalize_after_coding)) | |
2636 XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (codesys) = | |
2637 Fget_coding_system (value); | |
2638 else | |
2639 return 0; | |
2640 return 1; | |
2641 } | |
2642 | |
2643 static Lisp_Object | |
2644 chain_getprop (Lisp_Object coding_system, Lisp_Object prop) | |
2645 { | |
2646 if (EQ (prop, Qchain)) | |
2647 { | |
2648 Lisp_Object result = Qnil; | |
2649 int i; | |
2650 | |
2651 for (i = 0; i < XCODING_SYSTEM_CHAIN_COUNT (coding_system); i++) | |
2652 result = Fcons (XCODING_SYSTEM_CHAIN_CHAIN (coding_system)[i], | |
2653 result); | |
2654 | |
2655 return Fnreverse (result); | |
2656 } | |
2657 else if (EQ (prop, Qcanonicalize_after_coding)) | |
2658 return XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (coding_system); | |
2659 else | |
2660 return Qunbound; | |
2661 } | |
2662 | |
2663 static enum source_sink_type | |
2664 chain_conversion_end_type (Lisp_Object codesys) | |
2665 { | |
2666 Lisp_Object *cslist = XCODING_SYSTEM_CHAIN_CHAIN (codesys); | |
2667 int n = XCODING_SYSTEM_CHAIN_COUNT (codesys); | |
2668 int charp_source, charp_sink; | |
2669 | |
2670 if (n == 0) | |
2671 return DECODES_BYTE_TO_BYTE; /* arbitrary */ | |
2672 charp_source = decoding_source_sink_type_is_char (cslist[0], CODING_SOURCE); | |
2673 charp_sink = decoding_source_sink_type_is_char (cslist[n - 1], CODING_SINK); | |
2674 | |
2675 switch (charp_source * 2 + charp_sink) | |
2676 { | |
2677 case 0: return DECODES_BYTE_TO_BYTE; | |
2678 case 1: return DECODES_BYTE_TO_CHARACTER; | |
2679 case 2: return DECODES_CHARACTER_TO_BYTE; | |
2680 case 3: return DECODES_CHARACTER_TO_CHARACTER; | |
2681 } | |
2682 | |
2500 | 2683 ABORT (); |
771 | 2684 return DECODES_BYTE_TO_BYTE; |
2685 } | |
2686 | |
2687 | |
2688 /************************************************************************/ | |
2689 /* No-conversion methods */ | |
2690 /************************************************************************/ | |
2691 | |
2692 /* "No conversion"; used for binary files. We use quotes because there | |
2693 really is some conversion being applied (it does byte<->char | |
2694 conversion), but it appears to the user as if the text is read in | |
2297 | 2695 without conversion. |
2696 | |
2697 #### Shouldn't we _call_ it that, then? And while we're at it, | |
2698 separate it into "to_internal" and "to_external"? */ | |
771 | 2699 DEFINE_CODING_SYSTEM_TYPE (no_conversion); |
2700 | |
2701 /* This is used when reading in "binary" files -- i.e. files that may | |
2702 contain all 256 possible byte values and that are not to be | |
2703 interpreted as being in any particular encoding. */ | |
2704 static Bytecount | |
2705 no_conversion_convert (struct coding_stream *str, | |
2706 const UExtbyte *src, | |
2707 unsigned_char_dynarr *dst, Bytecount n) | |
2708 { | |
2709 UExtbyte c; | |
2710 unsigned int ch = str->ch; | |
2711 Bytecount orign = n; | |
2712 | |
2713 if (str->direction == CODING_DECODE) | |
2714 { | |
2715 while (n--) | |
2716 { | |
2717 c = *src++; | |
2718 | |
2719 DECODE_ADD_BINARY_CHAR (c, dst); | |
2720 } | |
2721 | |
2722 if (str->eof) | |
2723 DECODE_OUTPUT_PARTIAL_CHAR (ch, dst); | |
2724 } | |
2725 else | |
2726 { | |
2727 | |
2728 while (n--) | |
2729 { | |
2730 c = *src++; | |
826 | 2731 if (byte_ascii_p (c)) |
771 | 2732 { |
2733 assert (ch == 0); | |
2734 Dynarr_add (dst, c); | |
2735 } | |
2736 #ifdef MULE | |
867 | 2737 else if (ibyte_leading_byte_p (c)) |
771 | 2738 { |
2739 assert (ch == 0); | |
2740 if (c == LEADING_BYTE_LATIN_ISO8859_1 || | |
2741 c == LEADING_BYTE_CONTROL_1) | |
2742 ch = c; | |
2743 else | |
2297 | 2744 /* #### This is just plain unacceptable. */ |
771 | 2745 Dynarr_add (dst, '~'); /* untranslatable character */ |
2746 } | |
2747 else | |
2748 { | |
2749 if (ch == LEADING_BYTE_LATIN_ISO8859_1) | |
2750 Dynarr_add (dst, c); | |
2751 else if (ch == LEADING_BYTE_CONTROL_1) | |
2752 { | |
2753 assert (c < 0xC0); | |
2754 Dynarr_add (dst, c - 0x20); | |
2755 } | |
2756 /* else it should be the second or third byte of an | |
2757 untranslatable character, so ignore it */ | |
2758 ch = 0; | |
2759 } | |
2760 #endif /* MULE */ | |
2761 | |
2762 } | |
2763 } | |
2764 | |
2765 str->ch = ch; | |
2766 return orign; | |
2767 } | |
2768 | |
2769 DEFINE_DETECTOR (no_conversion); | |
2770 DEFINE_DETECTOR_CATEGORY (no_conversion, no_conversion); | |
2771 | |
2772 struct no_conversion_detector | |
2773 { | |
2774 int dummy; | |
2775 }; | |
2776 | |
2777 static void | |
2286 | 2778 no_conversion_detect (struct detection_state *st, const UExtbyte *UNUSED (src), |
2779 Bytecount UNUSED (n)) | |
771 | 2780 { |
2781 /* Hack until we get better handling of this stuff! */ | |
2782 DET_RESULT (st, no_conversion) = DET_SLIGHTLY_LIKELY; | |
2783 } | |
2784 | |
2785 | |
2786 /************************************************************************/ | |
2787 /* Convert-eol methods */ | |
2788 /************************************************************************/ | |
2789 | |
2790 /* This is used to handle end-of-line (EOL) differences. It is | |
2819 | 2791 character-to-character, and works (when encoding) *BEFORE* sending data to |
2792 the main encoding routine -- thus, that routine must handle different EOL | |
2793 types itself if it does line-oriented type processing. This is unavoidable | |
2794 because we don't know whether the output of the main encoding routine is | |
2795 ASCII compatible (UTF-16 is definitely not, for example). [[ sjt sez this | |
2796 is bogus. There should be _no_ EOL processing (or processing of any kind) | |
2797 after conversion to external. ]] | |
771 | 2798 |
793 | 2799 There is one parameter: `subtype', either `cr', `lf', `crlf', or nil. |
771 | 2800 */ |
2801 | |
2802 struct convert_eol_coding_system | |
2803 { | |
2804 enum eol_type subtype; | |
2132 | 2805 int dummy; /* On some architectures (eg ia64) the portable dumper can |
2806 produce unaligned access errors without this field. Probably | |
2807 because the combined structure of this structure and | |
2808 Lisp_Coding_System is not properly aligned. */ | |
771 | 2809 }; |
2810 | |
2811 #define CODING_SYSTEM_CONVERT_EOL_SUBTYPE(codesys) \ | |
2812 (CODING_SYSTEM_TYPE_DATA (codesys, convert_eol)->subtype) | |
2813 #define XCODING_SYSTEM_CONVERT_EOL_SUBTYPE(codesys) \ | |
2814 (XCODING_SYSTEM_TYPE_DATA (codesys, convert_eol)->subtype) | |
2815 | |
2816 struct convert_eol_coding_stream | |
2817 { | |
2818 enum eol_type actual; | |
2819 }; | |
2820 | |
1204 | 2821 static const struct memory_description |
771 | 2822 convert_eol_coding_system_description[] = { |
2823 { XD_END } | |
2824 }; | |
2825 | |
1204 | 2826 DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (convert_eol); |
2827 | |
771 | 2828 static void |
2286 | 2829 convert_eol_print (Lisp_Object cs, Lisp_Object printcharfun, |
2830 int UNUSED (escapeflag)) | |
771 | 2831 { |
2832 struct convert_eol_coding_system *data = | |
2833 XCODING_SYSTEM_TYPE_DATA (cs, convert_eol); | |
2834 | |
2835 write_fmt_string (printcharfun, "(%s)", | |
2836 data->subtype == EOL_LF ? "lf" : | |
2837 data->subtype == EOL_CRLF ? "crlf" : | |
2838 data->subtype == EOL_CR ? "cr" : | |
793 | 2839 data->subtype == EOL_AUTODETECT ? "nil" : |
2500 | 2840 (ABORT(), "")); |
771 | 2841 } |
2842 | |
2843 static enum source_sink_type | |
2286 | 2844 convert_eol_conversion_end_type (Lisp_Object UNUSED (codesys)) |
771 | 2845 { |
2846 return DECODES_CHARACTER_TO_CHARACTER; | |
2847 } | |
2848 | |
2849 static int | |
2850 convert_eol_putprop (Lisp_Object codesys, | |
2851 Lisp_Object key, | |
2852 Lisp_Object value) | |
2853 { | |
2854 struct convert_eol_coding_system *data = | |
2855 XCODING_SYSTEM_TYPE_DATA (codesys, convert_eol); | |
2856 | |
2857 if (EQ (key, Qsubtype)) | |
2858 { | |
2859 if (EQ (value, Qlf) /* || EQ (value, Qunix) */) | |
2860 data->subtype = EOL_LF; | |
2861 else if (EQ (value, Qcrlf) /* || EQ (value, Qdos) */) | |
2862 data->subtype = EOL_CRLF; | |
2863 else if (EQ (value, Qcr) /* || EQ (value, Qmac) */) | |
2864 data->subtype = EOL_CR; | |
793 | 2865 else if (EQ (value, Qnil)) |
771 | 2866 data->subtype = EOL_AUTODETECT; |
2867 else invalid_constant ("Unrecognized eol type", value); | |
2868 } | |
2869 else | |
2870 return 0; | |
2871 return 1; | |
2872 } | |
2873 | |
2874 static Lisp_Object | |
2875 convert_eol_getprop (Lisp_Object coding_system, Lisp_Object prop) | |
2876 { | |
2877 struct convert_eol_coding_system *data = | |
2878 XCODING_SYSTEM_TYPE_DATA (coding_system, convert_eol); | |
2879 | |
2880 if (EQ (prop, Qsubtype)) | |
2881 { | |
2882 switch (data->subtype) | |
2883 { | |
2884 case EOL_LF: return Qlf; | |
2885 case EOL_CRLF: return Qcrlf; | |
2886 case EOL_CR: return Qcr; | |
793 | 2887 case EOL_AUTODETECT: return Qnil; |
2500 | 2888 default: ABORT (); |
771 | 2889 } |
2890 } | |
2891 | |
2892 return Qunbound; | |
2893 } | |
2894 | |
2895 static void | |
2896 convert_eol_init_coding_stream (struct coding_stream *str) | |
2897 { | |
2898 struct convert_eol_coding_stream *data = | |
2899 CODING_STREAM_TYPE_DATA (str, convert_eol); | |
2900 data->actual = XCODING_SYSTEM_CONVERT_EOL_SUBTYPE (str->codesys); | |
2901 } | |
2902 | |
2903 static Bytecount | |
867 | 2904 convert_eol_convert (struct coding_stream *str, const Ibyte *src, |
771 | 2905 unsigned_char_dynarr *dst, Bytecount n) |
2906 { | |
2907 if (str->direction == CODING_DECODE) | |
2908 { | |
2909 struct convert_eol_coding_stream *data = | |
2910 CODING_STREAM_TYPE_DATA (str, convert_eol); | |
2911 | |
2912 if (data->actual == EOL_AUTODETECT) | |
2913 { | |
2914 Bytecount n2 = n; | |
867 | 2915 const Ibyte *src2 = src; |
771 | 2916 |
2917 for (; n2; n2--) | |
2918 { | |
867 | 2919 Ibyte c = *src2++; |
771 | 2920 if (c == '\n') |
2921 { | |
2922 data->actual = EOL_LF; | |
2923 break; | |
2924 } | |
2925 else if (c == '\r') | |
2926 { | |
2927 if (n2 == 1) | |
2928 { | |
2929 /* If we're seeing a '\r' at the end of the data, then | |
2930 reject the '\r' right now so it doesn't become an | |
2931 issue in the code below -- unless we're at the end of | |
2932 the stream, in which case we can't do that (because | |
2933 then the '\r' will never get written out), and in any | |
2934 case we should be recognizing it at EOL_CR format. */ | |
2935 if (str->eof) | |
2936 data->actual = EOL_CR; | |
2937 else | |
2938 n--; | |
2939 break; | |
2940 } | |
2941 else if (*src2 == '\n') | |
2942 data->actual = EOL_CRLF; | |
2943 else | |
2944 data->actual = EOL_CR; | |
2945 break; | |
2946 } | |
2947 } | |
2948 } | |
2949 | |
2950 /* str->eof is set, the caller reached EOF on the other end and has | |
2951 no new data to give us. The only data we get is the data we | |
2952 rejected from last time. */ | |
2953 if (data->actual == EOL_LF || data->actual == EOL_AUTODETECT || | |
2954 (str->eof)) | |
2955 Dynarr_add_many (dst, src, n); | |
2956 else | |
2957 { | |
867 | 2958 const Ibyte *end = src + n; |
771 | 2959 while (1) |
2960 { | |
2961 /* Find the next section with no \r and add it. */ | |
867 | 2962 const Ibyte *runstart = src; |
2963 src = (Ibyte *) memchr (src, '\r', end - src); | |
771 | 2964 if (!src) |
2965 src = end; | |
2966 Dynarr_add_many (dst, runstart, src - runstart); | |
2967 /* Stop if at end ... */ | |
2968 if (src == end) | |
2969 break; | |
2970 /* ... else, translate as necessary. */ | |
2971 src++; | |
2972 if (data->actual == EOL_CR) | |
2973 Dynarr_add (dst, '\n'); | |
2974 /* We need to be careful here with CRLF. If we see a CR at the | |
2975 end of the data, we don't know if it's part of a CRLF, so we | |
2976 reject it. Otherwise: If it's part of a CRLF, eat it and | |
2977 loop; the following LF gets added next time around. If it's | |
2978 not part of a CRLF, add the CR and loop. The following | |
2979 character will be processed in the next loop iteration. This | |
2980 correctly handles a sequence like CR+CR+LF. */ | |
2981 else if (src == end) | |
2982 return n - 1; /* reject the CR at the end; we'll get it again | |
2983 next time the convert method is called */ | |
2984 else if (*src != '\n') | |
2985 Dynarr_add (dst, '\r'); | |
2986 } | |
2987 } | |
2988 | |
2989 return n; | |
2990 } | |
2991 else | |
2992 { | |
2993 enum eol_type subtype = | |
2994 XCODING_SYSTEM_CONVERT_EOL_SUBTYPE (str->codesys); | |
867 | 2995 const Ibyte *end = src + n; |
771 | 2996 |
2997 /* We try to be relatively efficient here. */ | |
2998 if (subtype == EOL_LF) | |
2999 Dynarr_add_many (dst, src, n); | |
3000 else | |
3001 { | |
3002 while (1) | |
3003 { | |
3004 /* Find the next section with no \n and add it. */ | |
867 | 3005 const Ibyte *runstart = src; |
3006 src = (Ibyte *) memchr (src, '\n', end - src); | |
771 | 3007 if (!src) |
3008 src = end; | |
3009 Dynarr_add_many (dst, runstart, src - runstart); | |
3010 /* Stop if at end ... */ | |
3011 if (src == end) | |
3012 break; | |
3013 /* ... else, skip over \n and add its translation. */ | |
3014 src++; | |
3015 Dynarr_add (dst, '\r'); | |
3016 if (subtype == EOL_CRLF) | |
3017 Dynarr_add (dst, '\n'); | |
3018 } | |
3019 } | |
3020 | |
3021 return n; | |
3022 } | |
3023 } | |
3024 | |
3025 static Lisp_Object | |
3026 convert_eol_canonicalize_after_coding (struct coding_stream *str) | |
3027 { | |
3028 struct convert_eol_coding_stream *data = | |
3029 CODING_STREAM_TYPE_DATA (str, convert_eol); | |
3030 | |
3031 if (str->direction == CODING_ENCODE) | |
3032 return str->codesys; | |
3033 | |
3034 switch (data->actual) | |
3035 { | |
3036 case EOL_LF: return Fget_coding_system (Qconvert_eol_lf); | |
3037 case EOL_CRLF: return Fget_coding_system (Qconvert_eol_crlf); | |
3038 case EOL_CR: return Fget_coding_system (Qconvert_eol_cr); | |
3039 case EOL_AUTODETECT: return str->codesys; | |
2500 | 3040 default: ABORT (); return Qnil; |
771 | 3041 } |
3042 } | |
3043 | |
3044 | |
3045 /************************************************************************/ | |
3046 /* Undecided methods */ | |
3047 /************************************************************************/ | |
3048 | |
3049 /* Do autodetection. We can autodetect the EOL type only, the coding | |
3050 system only, or both. We only do autodetection when decoding; when | |
3051 encoding, we just pass the data through. | |
3052 | |
3053 When doing just EOL detection, a coding system can be specified; if so, | |
3054 we will decode this data through the coding system before doing EOL | |
3055 detection. The reason for specifying this is so that | |
3056 canonicalize-after-coding works: We will canonicalize the specified | |
3057 coding system into the appropriate EOL type. When doing both coding and | |
3058 EOL detection, we do similar canonicalization, and also catch situations | |
3059 where the EOL type is overspecified, i.e. the detected coding system | |
3060 specifies an EOL type, and either switch to the equivalent | |
3061 non-EOL-processing coding system (if possible), or terminate EOL | |
3062 detection and use the specified EOL type. This prevents data from being | |
3063 EOL-processed twice. | |
3064 */ | |
3065 | |
3066 struct undecided_coding_system | |
3067 { | |
3068 int do_eol, do_coding; | |
3069 Lisp_Object cs; | |
3070 }; | |
3071 | |
3072 struct undecided_coding_stream | |
3073 { | |
3074 Lisp_Object actual; | |
3075 /* Either 2 or 3 lstreams here; see undecided_convert */ | |
3076 struct chain_coding_stream c; | |
3077 | |
3078 struct detection_state *st; | |
3079 }; | |
3080 | |
1204 | 3081 static const struct memory_description undecided_coding_system_description[] = { |
3082 { XD_LISP_OBJECT, offsetof (struct undecided_coding_system, cs) }, | |
771 | 3083 { XD_END } |
3084 }; | |
3085 | |
1204 | 3086 static const struct memory_description undecided_coding_stream_description_1 [] = { |
3087 { XD_LISP_OBJECT, offsetof (struct undecided_coding_stream, actual) }, | |
2367 | 3088 { XD_BLOCK_ARRAY, offsetof (struct undecided_coding_stream, c), |
2551 | 3089 1, { &chain_coding_stream_description } }, |
1204 | 3090 { XD_END } |
3091 }; | |
3092 | |
3093 const struct sized_memory_description undecided_coding_stream_description = { | |
3094 sizeof (struct undecided_coding_stream), undecided_coding_stream_description_1 | |
3095 }; | |
3096 | |
3097 DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (undecided); | |
3098 | |
771 | 3099 static void |
3100 undecided_init (Lisp_Object codesys) | |
3101 { | |
3102 struct undecided_coding_system *data = | |
3103 XCODING_SYSTEM_TYPE_DATA (codesys, undecided); | |
3104 | |
3105 data->cs = Qnil; | |
3106 } | |
3107 | |
3108 static void | |
3109 undecided_mark (Lisp_Object codesys) | |
3110 { | |
3111 struct undecided_coding_system *data = | |
3112 XCODING_SYSTEM_TYPE_DATA (codesys, undecided); | |
3113 | |
3114 mark_object (data->cs); | |
3115 } | |
3116 | |
3117 static void | |
3118 undecided_print (Lisp_Object cs, Lisp_Object printcharfun, int escapeflag) | |
3119 { | |
3120 struct undecided_coding_system *data = | |
3121 XCODING_SYSTEM_TYPE_DATA (cs, undecided); | |
3122 int need_space = 0; | |
3123 | |
826 | 3124 write_c_string (printcharfun, "("); |
771 | 3125 if (data->do_eol) |
3126 { | |
826 | 3127 write_c_string (printcharfun, "do-eol"); |
771 | 3128 need_space = 1; |
3129 } | |
3130 if (data->do_coding) | |
3131 { | |
3132 if (need_space) | |
826 | 3133 write_c_string (printcharfun, " "); |
3134 write_c_string (printcharfun, "do-coding"); | |
771 | 3135 need_space = 1; |
3136 } | |
3137 if (!NILP (data->cs)) | |
3138 { | |
3139 if (need_space) | |
826 | 3140 write_c_string (printcharfun, " "); |
3141 write_c_string (printcharfun, "coding-system="); | |
771 | 3142 print_coding_system_in_print_method (data->cs, printcharfun, escapeflag); |
3143 } | |
826 | 3144 write_c_string (printcharfun, ")"); |
771 | 3145 } |
3146 | |
3147 static void | |
3148 undecided_mark_coding_stream (struct coding_stream *str) | |
3149 { | |
1204 | 3150 mark_object (CODING_STREAM_TYPE_DATA (str, undecided)->actual); |
771 | 3151 chain_mark_coding_stream_1 (&CODING_STREAM_TYPE_DATA (str, undecided)->c); |
3152 } | |
3153 | |
3154 static int | |
3155 undecided_putprop (Lisp_Object codesys, Lisp_Object key, Lisp_Object value) | |
3156 { | |
3157 struct undecided_coding_system *data = | |
3158 XCODING_SYSTEM_TYPE_DATA (codesys, undecided); | |
3159 | |
3160 if (EQ (key, Qdo_eol)) | |
3161 data->do_eol = 1; | |
3162 else if (EQ (key, Qdo_coding)) | |
3163 data->do_coding = 1; | |
3164 else if (EQ (key, Qcoding_system)) | |
3165 data->cs = get_coding_system_for_text_file (value, 0); | |
3166 else | |
3167 return 0; | |
3168 return 1; | |
3169 } | |
3170 | |
3171 static Lisp_Object | |
3172 undecided_getprop (Lisp_Object codesys, Lisp_Object prop) | |
3173 { | |
3174 struct undecided_coding_system *data = | |
3175 XCODING_SYSTEM_TYPE_DATA (codesys, undecided); | |
3176 | |
3177 if (EQ (prop, Qdo_eol)) | |
3178 return data->do_eol ? Qt : Qnil; | |
3179 if (EQ (prop, Qdo_coding)) | |
3180 return data->do_coding ? Qt : Qnil; | |
3181 if (EQ (prop, Qcoding_system)) | |
3182 return data->cs; | |
3183 return Qunbound; | |
3184 } | |
3185 | |
3186 static struct detection_state * | |
3187 allocate_detection_state (void) | |
3188 { | |
3189 int i; | |
3190 Bytecount size = MAX_ALIGN_SIZE (sizeof (struct detection_state)); | |
3191 struct detection_state *block; | |
3192 | |
3193 for (i = 0; i < coding_detector_count; i++) | |
3194 size += MAX_ALIGN_SIZE (Dynarr_at (all_coding_detectors, i).data_size); | |
3195 | |
3196 block = (struct detection_state *) xmalloc_and_zero (size); | |
3197 | |
3198 size = MAX_ALIGN_SIZE (sizeof (struct detection_state)); | |
3199 for (i = 0; i < coding_detector_count; i++) | |
3200 { | |
3201 block->data_offset[i] = size; | |
3202 size += MAX_ALIGN_SIZE (Dynarr_at (all_coding_detectors, i).data_size); | |
3203 } | |
3204 | |
3205 return block; | |
3206 } | |
3207 | |
3208 static void | |
3209 free_detection_state (struct detection_state *st) | |
3210 { | |
3211 int i; | |
3212 | |
3213 for (i = 0; i < coding_detector_count; i++) | |
3214 { | |
3215 if (Dynarr_at (all_coding_detectors, i).finalize_detection_state_method) | |
3216 Dynarr_at (all_coding_detectors, i).finalize_detection_state_method | |
3217 (st); | |
3218 } | |
3219 | |
1726 | 3220 xfree (st, struct detection_state *); |
771 | 3221 } |
3222 | |
3223 static int | |
3224 coding_category_symbol_to_id (Lisp_Object symbol) | |
428 | 3225 { |
3226 int i; | |
3227 | |
3228 CHECK_SYMBOL (symbol); | |
771 | 3229 for (i = 0; i < coding_detector_count; i++) |
3230 { | |
3231 detector_category_dynarr *cats = | |
3232 Dynarr_at (all_coding_detectors, i).cats; | |
3233 int j; | |
3234 | |
3235 for (j = 0; j < Dynarr_length (cats); j++) | |
3236 if (EQ (Dynarr_at (cats, j).sym, symbol)) | |
3237 return Dynarr_at (cats, j).id; | |
3238 } | |
3239 | |
563 | 3240 invalid_constant ("Unrecognized coding category", symbol); |
1204 | 3241 RETURN_NOT_REACHED (0); |
428 | 3242 } |
3243 | |
771 | 3244 static Lisp_Object |
3245 coding_category_id_to_symbol (int id) | |
428 | 3246 { |
3247 int i; | |
771 | 3248 |
3249 for (i = 0; i < coding_detector_count; i++) | |
3250 { | |
3251 detector_category_dynarr *cats = | |
3252 Dynarr_at (all_coding_detectors, i).cats; | |
3253 int j; | |
3254 | |
3255 for (j = 0; j < Dynarr_length (cats); j++) | |
3256 if (id == Dynarr_at (cats, j).id) | |
3257 return Dynarr_at (cats, j).sym; | |
3258 } | |
3259 | |
2500 | 3260 ABORT (); |
771 | 3261 return Qnil; /* (usually) not reached */ |
428 | 3262 } |
3263 | |
771 | 3264 static Lisp_Object |
3265 detection_result_number_to_symbol (enum detection_result result) | |
428 | 3266 { |
1494 | 3267 /* let compiler warn if not all enumerators are handled */ |
3268 switch (result) { | |
3269 #define FROB(sym, num) case num: return (sym) | |
771 | 3270 FROB (Qnear_certainty, DET_NEAR_CERTAINTY); |
3271 FROB (Qquite_probable, DET_QUITE_PROBABLE); | |
3272 FROB (Qsomewhat_likely, DET_SOMEWHAT_LIKELY); | |
1494 | 3273 FROB (Qslightly_likely, DET_SLIGHTLY_LIKELY); |
771 | 3274 FROB (Qas_likely_as_unlikely, DET_AS_LIKELY_AS_UNLIKELY); |
3275 FROB (Qsomewhat_unlikely, DET_SOMEWHAT_UNLIKELY); | |
3276 FROB (Qquite_improbable, DET_QUITE_IMPROBABLE); | |
3277 FROB (Qnearly_impossible, DET_NEARLY_IMPOSSIBLE); | |
3278 #undef FROB | |
1494 | 3279 } |
771 | 3280 |
2500 | 3281 ABORT (); |
771 | 3282 return Qnil; /* (usually) not reached */ |
3283 } | |
3284 | |
778 | 3285 #if 0 /* not used */ |
771 | 3286 static enum detection_result |
3287 detection_result_symbol_to_number (Lisp_Object symbol) | |
3288 { | |
1494 | 3289 /* using switch here would be bad style, and doesn't help */ |
771 | 3290 #define FROB(sym, num) if (EQ (symbol, sym)) return (num) |
3291 FROB (Qnear_certainty, DET_NEAR_CERTAINTY); | |
3292 FROB (Qquite_probable, DET_QUITE_PROBABLE); | |
3293 FROB (Qsomewhat_likely, DET_SOMEWHAT_LIKELY); | |
1494 | 3294 FROB (Qslightly_likely, DET_SLIGHTLY_LIKELY); |
771 | 3295 FROB (Qas_likely_as_unlikely, DET_AS_LIKELY_AS_UNLIKELY); |
3296 FROB (Qsomewhat_unlikely, DET_SOMEWHAT_UNLIKELY); | |
3297 FROB (Qquite_improbable, DET_QUITE_IMPROBABLE); | |
3298 FROB (Qnearly_impossible, DET_NEARLY_IMPOSSIBLE); | |
3299 #undef FROB | |
3300 | |
3301 invalid_constant ("Unrecognized detection result", symbol); | |
3302 return ((enum detection_result) 0); /* not reached */ | |
3303 } | |
778 | 3304 #endif /* 0 */ |
771 | 3305 |
3306 /* Set all detection results for a given detector to a specified value. */ | |
3307 void | |
3308 set_detection_results (struct detection_state *st, int detector, int given) | |
3309 { | |
3310 detector_category_dynarr *cats = | |
3311 Dynarr_at (all_coding_detectors, detector).cats; | |
3312 int i; | |
3313 | |
3314 for (i = 0; i < Dynarr_length (cats); i++) | |
3315 st->categories[Dynarr_at (cats, i).id] = given; | |
3316 } | |
428 | 3317 |
3318 static int | |
3319 acceptable_control_char_p (int c) | |
3320 { | |
3321 switch (c) | |
3322 { | |
3323 /* Allow and ignore control characters that you might | |
3324 reasonably see in a text file */ | |
3325 case '\r': | |
3326 case '\n': | |
3327 case '\t': | |
3328 case 7: /* bell */ | |
3329 case 8: /* backspace */ | |
3330 case 11: /* vertical tab */ | |
3331 case 12: /* form feed */ | |
3332 case 26: /* MS-DOS C-z junk */ | |
3333 case 31: /* '^_' -- for info */ | |
3334 return 1; | |
3335 default: | |
3336 return 0; | |
3337 } | |
3338 } | |
3339 | |
771 | 3340 #ifdef DEBUG_XEMACS |
3341 | |
3342 static UExtbyte | |
3343 hex_digit_to_char (int digit) | |
428 | 3344 { |
771 | 3345 if (digit < 10) |
3346 return digit + '0'; | |
3347 else | |
3348 return digit - 10 + 'A'; | |
428 | 3349 } |
3350 | |
771 | 3351 static void |
3352 output_bytes_in_ascii_and_hex (const UExtbyte *src, Bytecount n) | |
428 | 3353 { |
771 | 3354 UExtbyte *ascii = alloca_array (UExtbyte, n + 1); |
3355 UExtbyte *hex = alloca_array (UExtbyte, 3 * n + 1); | |
3356 int i; | |
3357 | |
3358 for (i = 0; i < n; i++) | |
428 | 3359 { |
771 | 3360 UExtbyte c = src[i]; |
3361 if (c < 0x20) | |
3362 ascii[i] = '.'; | |
428 | 3363 else |
771 | 3364 ascii[i] = c; |
3365 hex[3 * i] = hex_digit_to_char (c >> 4); | |
3366 hex[3 * i + 1] = hex_digit_to_char (c & 0xF); | |
3367 hex[3 * i + 2] = ' '; | |
428 | 3368 } |
771 | 3369 ascii[i] = '\0'; |
3370 hex[3 * i - 1] = '\0'; | |
3371 stderr_out ("%s %s", ascii, hex); | |
428 | 3372 } |
3373 | |
771 | 3374 #endif /* DEBUG_XEMACS */ |
3375 | |
3376 /* Attempt to determine the encoding of the given text. Before calling | |
3377 this function for the first time, you must zero out the detection state. | |
428 | 3378 |
3379 Returns: | |
3380 | |
771 | 3381 0 == keep going |
3382 1 == stop | |
428 | 3383 */ |
3384 | |
3385 static int | |
771 | 3386 detect_coding_type (struct detection_state *st, const UExtbyte *src, |
3387 Bytecount n) | |
428 | 3388 { |
771 | 3389 Bytecount n2 = n; |
3390 const UExtbyte *src2 = src; | |
3391 int i; | |
3392 | |
3393 #ifdef DEBUG_XEMACS | |
3394 if (!NILP (Vdebug_coding_detection)) | |
3395 { | |
3396 int bytes = min (16, n); | |
3397 stderr_out ("detect_coding_type: processing %ld bytes\n", n); | |
3398 stderr_out ("First %d: ", bytes); | |
3399 output_bytes_in_ascii_and_hex (src, bytes); | |
3400 stderr_out ("\nLast %d: ", bytes); | |
3401 output_bytes_in_ascii_and_hex (src + n - bytes, bytes); | |
3402 stderr_out ("\n"); | |
3403 } | |
3404 #endif /* DEBUG_XEMACS */ | |
428 | 3405 if (!st->seen_non_ascii) |
3406 { | |
771 | 3407 for (; n2; n2--, src2++) |
428 | 3408 { |
771 | 3409 UExtbyte c = *src2; |
428 | 3410 if ((c < 0x20 && !acceptable_control_char_p (c)) || c >= 0x80) |
3411 { | |
3412 st->seen_non_ascii = 1; | |
3413 break; | |
3414 } | |
3415 } | |
3416 } | |
3417 | |
771 | 3418 for (i = 0; i < coding_detector_count; i++) |
3419 Dynarr_at (all_coding_detectors, i).detect_method (st, src, n); | |
3420 | |
3421 st->bytes_seen += n; | |
3422 | |
3423 #ifdef DEBUG_XEMACS | |
3424 if (!NILP (Vdebug_coding_detection)) | |
3425 { | |
3426 stderr_out ("seen_non_ascii: %d\n", st->seen_non_ascii); | |
1494 | 3427 if (coding_detector_category_count <= 0) |
3428 stderr_out ("found %d detector categories\n", | |
3429 coding_detector_category_count); | |
771 | 3430 for (i = 0; i < coding_detector_category_count; i++) |
3431 stderr_out_lisp | |
3432 ("%s: %s\n", | |
3433 2, | |
3434 coding_category_id_to_symbol (i), | |
3435 detection_result_number_to_symbol ((enum detection_result) | |
3436 st->categories[i])); | |
3437 } | |
3438 #endif /* DEBUG_XEMACS */ | |
3439 | |
3440 { | |
3441 int not_unlikely = 0; | |
3442 int retval; | |
3443 | |
3444 for (i = 0; i < coding_detector_category_count; i++) | |
3445 if (st->categories[i] >= 0) | |
3446 not_unlikely++; | |
3447 | |
3448 retval = (not_unlikely <= 1 | |
3449 #if 0 /* this is bogus */ | |
3450 || st->bytes_seen >= MAX_BYTES_PROCESSED_FOR_DETECTION | |
428 | 3451 #endif |
771 | 3452 ); |
3453 | |
3454 #ifdef DEBUG_XEMACS | |
3455 if (!NILP (Vdebug_coding_detection)) | |
3456 stderr_out ("detect_coding_type: returning %d (%s)\n", | |
3457 retval, retval ? "stop" : "keep going"); | |
3458 #endif /* DEBUG_XEMACS */ | |
3459 | |
3460 return retval; | |
428 | 3461 } |
3462 } | |
3463 | |
3464 static Lisp_Object | |
771 | 3465 detected_coding_system (struct detection_state *st) |
428 | 3466 { |
771 | 3467 int i; |
3468 int even = 1; | |
3469 | |
3470 if (st->seen_non_ascii) | |
3471 { | |
3472 for (i = 0; i < coding_detector_category_count; i++) | |
3473 if (st->categories[i] != DET_AS_LIKELY_AS_UNLIKELY) | |
3474 { | |
3475 even = 0; | |
3476 break; | |
3477 } | |
3478 } | |
3479 | |
3480 /* #### Here we are ignoring the results of detection when it's all | |
3481 ASCII. This is obviously a bad thing. But we need to fix up the | |
3482 existing detection methods somewhat before we can switch. */ | |
3483 if (even) | |
428 | 3484 { |
3485 /* If the file was entirely or basically ASCII, use the | |
3486 default value of `buffer-file-coding-system'. */ | |
3487 Lisp_Object retval = | |
3488 XBUFFER (Vbuffer_defaults)->buffer_file_coding_system; | |
3489 if (!NILP (retval)) | |
3490 { | |
771 | 3491 retval = find_coding_system_for_text_file (retval, 0); |
428 | 3492 if (NILP (retval)) |
3493 { | |
3494 warn_when_safe | |
3495 (Qbad_variable, Qwarning, | |
3496 "Invalid `default-buffer-file-coding-system', set to nil"); | |
3497 XBUFFER (Vbuffer_defaults)->buffer_file_coding_system = Qnil; | |
3498 } | |
3499 } | |
3500 if (NILP (retval)) | |
3501 retval = Fget_coding_system (Qraw_text); | |
3502 return retval; | |
3503 } | |
3504 else | |
3505 { | |
771 | 3506 int likelihood; |
3507 Lisp_Object retval = Qnil; | |
3508 | |
3509 /* Look through the coding categories first by likelihood and then by | |
3510 priority and find the first one that is allowed. */ | |
3511 | |
3512 for (likelihood = DET_HIGHEST; likelihood >= DET_LOWEST; likelihood--) | |
428 | 3513 { |
771 | 3514 for (i = 0; i < coding_detector_category_count; i++) |
3515 { | |
3516 int cat = coding_category_by_priority[i]; | |
3517 if (st->categories[cat] == likelihood && | |
3518 !NILP (coding_category_system[cat])) | |
3519 { | |
3520 retval = (get_coding_system_for_text_file | |
3521 (coding_category_system[cat], 0)); | |
3522 if (likelihood < DET_AS_LIKELY_AS_UNLIKELY) | |
3523 warn_when_safe_lispobj | |
3524 (intern ("detection"), | |
793 | 3525 Qwarning, |
771 | 3526 emacs_sprintf_string_lisp |
3527 ( | |
3528 "Detected coding %s is unlikely to be correct (likelihood == `%s')", | |
3529 Qnil, 2, XCODING_SYSTEM_NAME (retval), | |
3530 detection_result_number_to_symbol | |
3531 ((enum detection_result) likelihood))); | |
3532 return retval; | |
3533 } | |
3534 } | |
428 | 3535 } |
771 | 3536 |
3537 return Fget_coding_system (Qraw_text); | |
428 | 3538 } |
3539 } | |
3540 | |
1347 | 3541 /* Look for a coding system in the string (skipping over leading |
3542 blanks). If found, return it, otherwise nil. */ | |
3543 | |
3544 static Lisp_Object | |
2531 | 3545 snarf_coding_system (const UExtbyte *p, Bytecount len) |
1347 | 3546 { |
3547 Bytecount n; | |
2531 | 3548 UExtbyte *name; |
1347 | 3549 |
3550 while (*p == ' ' || *p == '\t') p++, len--; | |
3551 len = min (len, 1000); | |
3552 name = alloca_ibytes (len + 1); | |
3553 memcpy (name, p, len); | |
3554 name[len] = '\0'; | |
3555 | |
3556 /* Get coding system name */ | |
3557 /* Characters valid in a MIME charset name (rfc 1521), | |
3558 and in a Lisp symbol name. */ | |
3559 n = qxestrspn (name, | |
3560 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" | |
3561 "abcdefghijklmnopqrstuvwxyz" | |
3562 "0123456789" | |
3563 "!$%&*+-.^_{|}~"); | |
3564 if (n > 0) | |
3565 { | |
3566 name[n] = '\0'; | |
2531 | 3567 /* This call to intern_int() is OK because we already verified that |
3568 there are only ASCII characters in the string */ | |
3569 return find_coding_system_for_text_file (intern_int ((Ibyte *) name), 0); | |
1347 | 3570 } |
3571 | |
3572 return Qnil; | |
3573 } | |
3574 | |
428 | 3575 /* Given a seekable read stream and potential coding system and EOL type |
3576 as specified, do any autodetection that is called for. If the | |
3577 coding system and/or EOL type are not `autodetect', they will be left | |
3578 alone; but this function will never return an autodetect coding system | |
3579 or EOL type. | |
3580 | |
3581 This function does not automatically fetch subsidiary coding systems; | |
3582 that should be unnecessary with the explicit eol-type argument. */ | |
3583 | |
3584 #define LENGTH(string_constant) (sizeof (string_constant) - 1) | |
3585 | |
771 | 3586 static Lisp_Object |
3587 unwind_free_detection_state (Lisp_Object opaque) | |
3588 { | |
3589 struct detection_state *st = | |
3590 (struct detection_state *) get_opaque_ptr (opaque); | |
3591 free_detection_state (st); | |
3592 free_opaque_ptr (opaque); | |
3593 return Qnil; | |
3594 } | |
3595 | |
1347 | 3596 /* #### This duplicates code in `find-coding-system-magic-cookie-in-file' |
3597 in files.el. Look into combining them. */ | |
3598 | |
771 | 3599 static Lisp_Object |
3600 look_for_coding_system_magic_cookie (const UExtbyte *data, Bytecount len) | |
428 | 3601 { |
771 | 3602 const UExtbyte *p; |
3603 const UExtbyte *scan_end; | |
2531 | 3604 Bytecount cookie_len; |
771 | 3605 |
3606 /* Look for initial "-*-"; mode line prefix */ | |
3607 for (p = data, | |
3608 scan_end = data + len - LENGTH ("-*-coding:?-*-"); | |
3609 p <= scan_end | |
3610 && *p != '\n' | |
3611 && *p != '\r'; | |
3612 p++) | |
3613 if (*p == '-' && *(p+1) == '*' && *(p+2) == '-') | |
3614 { | |
3615 const UExtbyte *local_vars_beg = p + 3; | |
3616 /* Look for final "-*-"; mode line suffix */ | |
3617 for (p = local_vars_beg, | |
3618 scan_end = data + len - LENGTH ("-*-"); | |
3619 p <= scan_end | |
428 | 3620 && *p != '\n' |
3621 && *p != '\r'; | |
771 | 3622 p++) |
3623 if (*p == '-' && *(p+1) == '*' && *(p+2) == '-') | |
3624 { | |
3625 const UExtbyte *suffix = p; | |
3626 /* Look for "coding:" */ | |
3627 for (p = local_vars_beg, | |
3628 scan_end = suffix - LENGTH ("coding:?"); | |
3629 p <= scan_end; | |
3630 p++) | |
3631 if (memcmp ("coding:", p, LENGTH ("coding:")) == 0 | |
3632 && (p == local_vars_beg | |
3633 || (*(p-1) == ' ' || | |
3634 *(p-1) == '\t' || | |
3635 *(p-1) == ';'))) | |
3636 { | |
3637 p += LENGTH ("coding:"); | |
1347 | 3638 return snarf_coding_system (p, suffix - p); |
771 | 3639 break; |
3640 } | |
3641 break; | |
3642 } | |
3643 break; | |
3644 } | |
3645 | |
2531 | 3646 /* Look for ;;;###coding system */ |
3647 | |
3648 cookie_len = LENGTH (";;;###coding system: "); | |
3649 | |
3650 for (p = data, | |
3651 scan_end = data + len - cookie_len; | |
3652 p <= scan_end; | |
3653 p++) | |
1347 | 3654 { |
2531 | 3655 if (*p == ';' && !memcmp (p, ";;;###coding system: ", cookie_len)) |
3656 { | |
3657 const UExtbyte *suffix; | |
3658 | |
3659 p += cookie_len; | |
3660 suffix = p; | |
3661 while (suffix < scan_end && !isspace (*suffix)) | |
3662 suffix++; | |
3663 return snarf_coding_system (p, suffix - p); | |
3664 } | |
1347 | 3665 } |
3666 | |
3667 return Qnil; | |
771 | 3668 } |
3669 | |
3670 static Lisp_Object | |
3671 determine_real_coding_system (Lstream *stream) | |
3672 { | |
3673 struct detection_state *st = allocate_detection_state (); | |
3674 int depth = record_unwind_protect (unwind_free_detection_state, | |
3675 make_opaque_ptr (st)); | |
3676 UExtbyte buf[4096]; | |
3677 Bytecount nread = Lstream_read (stream, buf, sizeof (buf)); | |
3678 Lisp_Object coding_system = look_for_coding_system_magic_cookie (buf, nread); | |
3679 | |
3680 if (NILP (coding_system)) | |
3681 { | |
3682 while (1) | |
3683 { | |
3684 if (detect_coding_type (st, buf, nread)) | |
428 | 3685 break; |
771 | 3686 nread = Lstream_read (stream, buf, sizeof (buf)); |
3687 if (nread == 0) | |
3688 break; | |
428 | 3689 } |
771 | 3690 |
3691 coding_system = detected_coding_system (st); | |
428 | 3692 } |
3693 | |
3694 Lstream_rewind (stream); | |
771 | 3695 |
3696 unbind_to (depth); | |
3697 return coding_system; | |
3698 } | |
3699 | |
3700 static void | |
3701 undecided_init_coding_stream (struct coding_stream *str) | |
3702 { | |
3703 struct undecided_coding_stream *data = | |
3704 CODING_STREAM_TYPE_DATA (str, undecided); | |
3705 struct undecided_coding_system *csdata = | |
3706 XCODING_SYSTEM_TYPE_DATA (str->codesys, undecided); | |
3707 | |
3708 data->actual = Qnil; | |
3709 | |
3710 if (str->direction == CODING_DECODE) | |
3711 { | |
3712 Lstream *lst = str->other_end; | |
3713 | |
3714 if ((lst->flags & LSTREAM_FL_READ) && | |
3715 Lstream_seekable_p (lst) && | |
3716 csdata->do_coding) | |
3717 /* We can determine the coding system now. */ | |
3718 data->actual = determine_real_coding_system (lst); | |
3719 } | |
1494 | 3720 |
3721 #ifdef DEBUG_XEMACS | |
3722 if (!NILP (Vdebug_coding_detection)) | |
3723 stderr_out_lisp ("detected coding system: %s\n", 1, data->actual); | |
3724 #endif /* DEBUG_XEMACS */ | |
771 | 3725 } |
3726 | |
3727 static void | |
3728 undecided_rewind_coding_stream (struct coding_stream *str) | |
3729 { | |
3730 chain_rewind_coding_stream_1 (&CODING_STREAM_TYPE_DATA (str, undecided)->c); | |
3731 } | |
3732 | |
3733 static void | |
3734 undecided_finalize_coding_stream (struct coding_stream *str) | |
3735 { | |
3736 struct undecided_coding_stream *data = | |
3737 CODING_STREAM_TYPE_DATA (str, undecided); | |
3738 | |
3739 chain_finalize_coding_stream_1 | |
3740 (&CODING_STREAM_TYPE_DATA (str, undecided)->c); | |
3741 if (data->st) | |
3742 free_detection_state (data->st); | |
3743 } | |
3744 | |
3745 static Lisp_Object | |
3746 undecided_canonicalize (Lisp_Object codesys) | |
3747 { | |
3748 struct undecided_coding_system *csdata = | |
3749 XCODING_SYSTEM_TYPE_DATA (codesys, undecided); | |
3750 if (!csdata->do_eol && !csdata->do_coding) | |
3751 return NILP (csdata->cs) ? Fget_coding_system (Qbinary) : csdata->cs; | |
3752 if (csdata->do_eol && !csdata->do_coding && NILP (csdata->cs)) | |
3753 return Fget_coding_system (Qconvert_eol_autodetect); | |
3754 return codesys; | |
3755 } | |
3756 | |
3757 static Bytecount | |
3758 undecided_convert (struct coding_stream *str, const UExtbyte *src, | |
3759 unsigned_char_dynarr *dst, Bytecount n) | |
3760 { | |
3761 int first_time = 0; | |
3762 | |
3763 if (str->direction == CODING_DECODE) | |
3764 { | |
3765 /* At this point, we have only the following possibilities: | |
3766 | |
3767 do_eol && do_coding | |
3768 do_coding only | |
3769 do_eol only and a coding system was specified | |
3770 | |
3771 Other possibilities are removed during undecided_canonicalize. | |
3772 | |
3773 Therefore, our substreams are either | |
3774 | |
3775 lstream_coding -> lstream_dynarr, or | |
3776 lstream_coding -> lstream_eol -> lstream_dynarr. | |
3777 */ | |
3778 struct undecided_coding_system *csdata = | |
3779 XCODING_SYSTEM_TYPE_DATA (str->codesys, undecided); | |
3780 struct undecided_coding_stream *data = | |
3781 CODING_STREAM_TYPE_DATA (str, undecided); | |
3782 | |
3783 if (str->eof) | |
3784 { | |
3785 /* Each will close the next. We need to close now because more | |
3786 data may be generated. */ | |
3787 if (data->c.initted) | |
3788 Lstream_close (XLSTREAM (data->c.lstreams[0])); | |
3789 return n; | |
3790 } | |
3791 | |
3792 if (!data->c.initted) | |
3793 { | |
3794 data->c.lstream_count = csdata->do_eol ? 3 : 2; | |
3795 data->c.lstreams = xnew_array (Lisp_Object, data->c.lstream_count); | |
3796 | |
3797 data->c.lstreams[data->c.lstream_count - 1] = | |
3798 make_dynarr_output_stream (dst); | |
3799 Lstream_set_buffering | |
3800 (XLSTREAM (data->c.lstreams[data->c.lstream_count - 1]), | |
3801 LSTREAM_UNBUFFERED, 0); | |
3802 if (csdata->do_eol) | |
3803 { | |
3804 data->c.lstreams[1] = | |
3805 make_coding_output_stream | |
3806 (XLSTREAM (data->c.lstreams[data->c.lstream_count - 1]), | |
3807 Fget_coding_system (Qconvert_eol_autodetect), | |
800 | 3808 CODING_DECODE, 0); |
771 | 3809 Lstream_set_buffering |
3810 (XLSTREAM (data->c.lstreams[1]), | |
3811 LSTREAM_UNBUFFERED, 0); | |
3812 } | |
3813 | |
3814 data->c.lstreams[0] = | |
3815 make_coding_output_stream | |
3816 (XLSTREAM (data->c.lstreams[1]), | |
3817 /* Substitute binary if we need to detect the encoding */ | |
3818 csdata->do_coding ? Qbinary : csdata->cs, | |
800 | 3819 CODING_DECODE, 0); |
771 | 3820 Lstream_set_buffering (XLSTREAM (data->c.lstreams[0]), |
3821 LSTREAM_UNBUFFERED, 0); | |
3822 | |
3823 first_time = 1; | |
3824 data->c.initted = 1; | |
3825 } | |
3826 | |
3827 /* If necessary, do encoding-detection now. We do this when we're a | |
3828 writing stream or a non-seekable reading stream, meaning that we | |
3829 can't just process the whole input, rewind, and start over. */ | |
3830 | |
3831 if (csdata->do_coding) | |
3832 { | |
3833 int actual_was_nil = NILP (data->actual); | |
3834 if (NILP (data->actual)) | |
3835 { | |
3836 if (!data->st) | |
3837 data->st = allocate_detection_state (); | |
3838 if (first_time) | |
3839 /* #### This is cheesy. What we really ought to do is buffer | |
3840 up a certain minimum amount of data to get a better result. | |
3841 */ | |
3842 data->actual = look_for_coding_system_magic_cookie (src, n); | |
3843 if (NILP (data->actual)) | |
3844 { | |
3845 /* #### This is cheesy. What we really ought to do is buffer | |
3846 up a certain minimum amount of data so as to get a less | |
3847 random result when doing subprocess detection. */ | |
3848 detect_coding_type (data->st, src, n); | |
3849 data->actual = detected_coding_system (data->st); | |
3850 } | |
3851 } | |
3852 /* We need to set the detected coding system if we actually have | |
3853 such a coding system but didn't before. That is the case | |
3854 either when we just detected it in the previous code or when | |
3855 it was detected during undecided_init_coding_stream(). We | |
3856 can check for that using first_time. */ | |
3857 if (!NILP (data->actual) && (actual_was_nil || first_time)) | |
3858 { | |
3859 /* If the detected coding system doesn't allow for EOL | |
3860 autodetection, try to get the equivalent that does; | |
3861 otherwise, disable EOL detection (overriding whatever | |
3862 may already have been detected). */ | |
3863 if (XCODING_SYSTEM_EOL_TYPE (data->actual) != EOL_AUTODETECT) | |
3864 { | |
3865 if (!NILP (XCODING_SYSTEM_SUBSIDIARY_PARENT (data->actual))) | |
3866 data->actual = | |
3867 XCODING_SYSTEM_SUBSIDIARY_PARENT (data->actual); | |
3868 else if (data->c.lstream_count == 3) | |
3869 set_coding_stream_coding_system | |
3870 (XLSTREAM (data->c.lstreams[1]), | |
3871 Fget_coding_system (Qidentity)); | |
3872 } | |
3873 set_coding_stream_coding_system | |
3874 (XLSTREAM (data->c.lstreams[0]), data->actual); | |
3875 } | |
3876 } | |
3877 | |
3878 if (Lstream_write (XLSTREAM (data->c.lstreams[0]), src, n) < 0) | |
3879 return -1; | |
3880 return n; | |
3881 } | |
3882 else | |
3883 return no_conversion_convert (str, src, dst, n); | |
3884 } | |
3885 | |
3886 static Lisp_Object | |
3887 undecided_canonicalize_after_coding (struct coding_stream *str) | |
3888 { | |
3889 struct undecided_coding_stream *data = | |
3890 CODING_STREAM_TYPE_DATA (str, undecided); | |
3891 Lisp_Object ret, eolret; | |
3892 | |
3893 if (str->direction == CODING_ENCODE) | |
3894 return str->codesys; | |
3895 | |
3896 if (!data->c.initted) | |
3897 return Fget_coding_system (Qundecided); | |
3898 | |
3899 ret = coding_stream_canonicalize_after_coding | |
3900 (XLSTREAM (data->c.lstreams[0])); | |
3901 if (NILP (ret)) | |
3902 ret = Fget_coding_system (Qundecided); | |
3903 if (XCODING_SYSTEM_EOL_TYPE (ret) != EOL_AUTODETECT) | |
3904 return ret; | |
3905 eolret = coding_stream_canonicalize_after_coding | |
3906 (XLSTREAM (data->c.lstreams[1])); | |
3907 if (!EQ (XCODING_SYSTEM_TYPE (eolret), Qconvert_eol)) | |
3908 return ret; | |
3909 return | |
3910 Fsubsidiary_coding_system (ret, Fcoding_system_property (eolret, | |
3911 Qsubtype)); | |
3912 } | |
3913 | |
3914 | |
3915 /************************************************************************/ | |
3916 /* Lisp interface: Coding category functions and detection */ | |
3917 /************************************************************************/ | |
3918 | |
3919 DEFUN ("coding-category-list", Fcoding_category_list, 0, 0, 0, /* | |
3920 Return a list of all recognized coding categories. | |
3921 */ | |
3922 ()) | |
3923 { | |
3924 int i; | |
3925 Lisp_Object list = Qnil; | |
3926 | |
3927 for (i = 0; i < coding_detector_count; i++) | |
3928 { | |
3929 detector_category_dynarr *cats = | |
3930 Dynarr_at (all_coding_detectors, i).cats; | |
3931 int j; | |
3932 | |
3933 for (j = 0; j < Dynarr_length (cats); j++) | |
3934 list = Fcons (Dynarr_at (cats, j).sym, list); | |
3935 } | |
3936 | |
3937 return Fnreverse (list); | |
3938 } | |
3939 | |
3940 DEFUN ("set-coding-priority-list", Fset_coding_priority_list, 1, 1, 0, /* | |
3941 Change the priority order of the coding categories. | |
3942 LIST should be list of coding categories, in descending order of | |
3943 priority. Unspecified coding categories will be lower in priority | |
3944 than all specified ones, in the same relative order they were in | |
3945 previously. | |
3946 */ | |
3947 (list)) | |
3948 { | |
3949 int *category_to_priority = | |
3950 alloca_array (int, coding_detector_category_count); | |
3951 int i, j; | |
3952 | |
3953 /* First generate a list that maps coding categories to priorities. */ | |
3954 | |
3955 for (i = 0; i < coding_detector_category_count; i++) | |
3956 category_to_priority[i] = -1; | |
3957 | |
3958 /* Highest priority comes from the specified list. */ | |
3959 i = 0; | |
2367 | 3960 { |
3961 EXTERNAL_LIST_LOOP_2 (elt, list) | |
3962 { | |
3963 int cat = coding_category_symbol_to_id (elt); | |
3964 | |
3965 if (category_to_priority[cat] >= 0) | |
3966 sferror ("Duplicate coding category in list", elt); | |
3967 category_to_priority[cat] = i++; | |
3968 } | |
3969 } | |
771 | 3970 |
3971 /* Now go through the existing categories by priority to retrieve | |
3972 the categories not yet specified and preserve their priority | |
3973 order. */ | |
3974 for (j = 0; j < coding_detector_category_count; j++) | |
3975 { | |
3976 int cat = coding_category_by_priority[j]; | |
3977 if (category_to_priority[cat] < 0) | |
3978 category_to_priority[cat] = i++; | |
3979 } | |
3980 | |
3981 /* Now we need to construct the inverse of the mapping we just | |
3982 constructed. */ | |
3983 | |
3984 for (i = 0; i < coding_detector_category_count; i++) | |
3985 coding_category_by_priority[category_to_priority[i]] = i; | |
3986 | |
3987 /* Phew! That was confusing. */ | |
3988 return Qnil; | |
3989 } | |
3990 | |
3991 DEFUN ("coding-priority-list", Fcoding_priority_list, 0, 0, 0, /* | |
3992 Return a list of coding categories in descending order of priority. | |
3993 */ | |
3994 ()) | |
3995 { | |
3996 int i; | |
3997 Lisp_Object list = Qnil; | |
3998 | |
3999 for (i = 0; i < coding_detector_category_count; i++) | |
4000 list = | |
4001 Fcons (coding_category_id_to_symbol (coding_category_by_priority[i]), | |
4002 list); | |
4003 return Fnreverse (list); | |
4004 } | |
4005 | |
4006 DEFUN ("set-coding-category-system", Fset_coding_category_system, 2, 2, 0, /* | |
4007 Change the coding system associated with a coding category. | |
4008 */ | |
4009 (coding_category, coding_system)) | |
4010 { | |
4011 coding_category_system[coding_category_symbol_to_id (coding_category)] = | |
4012 Fget_coding_system (coding_system); | |
4013 return Qnil; | |
4014 } | |
4015 | |
4016 DEFUN ("coding-category-system", Fcoding_category_system, 1, 1, 0, /* | |
4017 Return the coding system associated with a coding category. | |
4018 */ | |
4019 (coding_category)) | |
4020 { | |
4021 Lisp_Object sys = | |
4022 coding_category_system[coding_category_symbol_to_id (coding_category)]; | |
4023 | |
4024 if (!NILP (sys)) | |
4025 return XCODING_SYSTEM_NAME (sys); | |
4026 return Qnil; | |
4027 } | |
4028 | |
800 | 4029 /* Detect the encoding of STREAM. Assumes stream is at the begnning and will |
4030 read through to the end of STREAM, leaving it there but open. */ | |
4031 | |
771 | 4032 Lisp_Object |
4033 detect_coding_stream (Lisp_Object stream) | |
4034 { | |
4035 Lisp_Object val = Qnil; | |
4036 struct gcpro gcpro1, gcpro2, gcpro3; | |
4037 UExtbyte random_buffer[65536]; | |
4038 Lisp_Object binary_instream = | |
4039 make_coding_input_stream | |
4040 (XLSTREAM (stream), Qbinary, | |
814 | 4041 CODING_ENCODE, LSTREAM_FL_NO_CLOSE_OTHER); |
771 | 4042 Lisp_Object decstream = |
4043 make_coding_input_stream | |
4044 (XLSTREAM (binary_instream), | |
800 | 4045 Qundecided, CODING_DECODE, 0); |
771 | 4046 Lstream *decstr = XLSTREAM (decstream); |
4047 | |
4048 GCPRO3 (decstream, stream, binary_instream); | |
4049 /* Read and discard all data; detection happens as a side effect of this, | |
4050 and we examine what was detected afterwards. */ | |
4051 while (Lstream_read (decstr, random_buffer, sizeof (random_buffer)) > 0) | |
4052 ; | |
4053 | |
4054 val = coding_stream_detected_coding_system (decstr); | |
4055 Lstream_close (decstr); | |
4056 Lstream_delete (decstr); | |
4057 Lstream_delete (XLSTREAM (binary_instream)); | |
4058 UNGCPRO; | |
4059 return val; | |
428 | 4060 } |
4061 | |
4062 DEFUN ("detect-coding-region", Fdetect_coding_region, 2, 3, 0, /* | |
4063 Detect coding system of the text in the region between START and END. | |
444 | 4064 Return a list of possible coding systems ordered by priority. |
3025 | 4065 If only ASCII characters are found, return `undecided' or one of |
428 | 4066 its subsidiary coding systems according to a detected end-of-line |
4067 type. Optional arg BUFFER defaults to the current buffer. | |
4068 */ | |
4069 (start, end, buffer)) | |
4070 { | |
4071 Lisp_Object val = Qnil; | |
4072 struct buffer *buf = decode_buffer (buffer, 0); | |
665 | 4073 Charbpos b, e; |
771 | 4074 Lisp_Object lb_instream; |
428 | 4075 |
4076 get_buffer_range_char (buf, start, end, &b, &e, 0); | |
4077 lb_instream = make_lisp_buffer_input_stream (buf, b, e, 0); | |
771 | 4078 |
4079 val = detect_coding_stream (lb_instream); | |
4080 Lstream_delete (XLSTREAM (lb_instream)); | |
428 | 4081 return val; |
4082 } | |
4083 | |
4084 | |
771 | 4085 |
4086 #ifdef DEBUG_XEMACS | |
4087 | |
428 | 4088 /************************************************************************/ |
771 | 4089 /* Internal methods */ |
4090 /************************************************************************/ | |
4091 | |
4092 /* Raw (internally-formatted) data. */ | |
4093 DEFINE_CODING_SYSTEM_TYPE (internal); | |
428 | 4094 |
665 | 4095 static Bytecount |
2286 | 4096 internal_convert (struct coding_stream *UNUSED (str), const UExtbyte *src, |
771 | 4097 unsigned_char_dynarr *dst, Bytecount n) |
4098 { | |
4099 Bytecount orign = n; | |
4100 Dynarr_add_many (dst, src, n); | |
4101 return orign; | |
4102 } | |
4103 | |
4104 #endif /* DEBUG_XEMACS */ | |
4105 | |
4106 | |
4107 | |
4108 #ifdef HAVE_ZLIB | |
4109 | |
4110 /************************************************************************/ | |
4111 /* Gzip methods */ | |
4112 /************************************************************************/ | |
4113 | |
4114 struct gzip_coding_system | |
428 | 4115 { |
771 | 4116 int level; /* 0 through 9, or -1 for default */ |
4117 }; | |
4118 | |
4119 #define CODING_SYSTEM_GZIP_LEVEL(codesys) \ | |
4120 (CODING_SYSTEM_TYPE_DATA (codesys, gzip)->level) | |
4121 #define XCODING_SYSTEM_GZIP_LEVEL(codesys) \ | |
4122 (XCODING_SYSTEM_TYPE_DATA (codesys, gzip)->level) | |
4123 | |
4124 struct gzip_coding_stream | |
428 | 4125 { |
771 | 4126 z_stream stream; |
4127 int stream_initted; | |
4128 int reached_eof; /* #### this should be handled by the caller, once we | |
4129 return LSTREAM_EOF */ | |
4130 }; | |
4131 | |
1204 | 4132 static const struct memory_description |
771 | 4133 gzip_coding_system_description[] = { |
4134 { XD_END } | |
4135 }; | |
4136 | |
1204 | 4137 DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (gzip); |
4138 | |
771 | 4139 enum source_sink_type |
4140 gzip_conversion_end_type (Lisp_Object codesys) | |
4141 { | |
4142 return DECODES_BYTE_TO_BYTE; | |
428 | 4143 } |
4144 | |
4145 static void | |
771 | 4146 gzip_init (Lisp_Object codesys) |
4147 { | |
4148 struct gzip_coding_system *data = XCODING_SYSTEM_TYPE_DATA (codesys, gzip); | |
4149 data->level = -1; | |
4150 } | |
4151 | |
4152 static void | |
4153 gzip_print (Lisp_Object cs, Lisp_Object printcharfun, int escapeflag) | |
428 | 4154 { |
771 | 4155 struct gzip_coding_system *data = XCODING_SYSTEM_TYPE_DATA (cs, gzip); |
4156 | |
826 | 4157 write_c_string (printcharfun, "("); |
771 | 4158 if (data->level == -1) |
826 | 4159 write_c_string (printcharfun, "default"); |
771 | 4160 else |
4161 print_internal (make_int (data->level), printcharfun, 0); | |
826 | 4162 write_c_string (printcharfun, ")"); |
428 | 4163 } |
4164 | |
4165 static int | |
771 | 4166 gzip_putprop (Lisp_Object codesys, Lisp_Object key, Lisp_Object value) |
428 | 4167 { |
771 | 4168 struct gzip_coding_system *data = XCODING_SYSTEM_TYPE_DATA (codesys, gzip); |
4169 | |
4170 if (EQ (key, Qlevel)) | |
428 | 4171 { |
771 | 4172 if (EQ (value, Qdefault)) |
4173 data->level = -1; | |
4174 else | |
428 | 4175 { |
771 | 4176 CHECK_INT (value); |
4177 check_int_range (XINT (value), 0, 9); | |
4178 data->level = XINT (value); | |
428 | 4179 } |
4180 } | |
4181 else | |
771 | 4182 return 0; |
4183 return 1; | |
428 | 4184 } |
4185 | |
4186 static Lisp_Object | |
771 | 4187 gzip_getprop (Lisp_Object coding_system, Lisp_Object prop) |
428 | 4188 { |
771 | 4189 struct gzip_coding_system *data = |
4190 XCODING_SYSTEM_TYPE_DATA (coding_system, gzip); | |
4191 | |
4192 if (EQ (prop, Qlevel)) | |
428 | 4193 { |
771 | 4194 if (data->level == -1) |
4195 return Qdefault; | |
4196 return make_int (data->level); | |
428 | 4197 } |
771 | 4198 |
4199 return Qunbound; | |
428 | 4200 } |
4201 | |
4202 static void | |
771 | 4203 gzip_init_coding_stream (struct coding_stream *str) |
428 | 4204 { |
771 | 4205 struct gzip_coding_stream *data = CODING_STREAM_TYPE_DATA (str, gzip); |
4206 if (data->stream_initted) | |
428 | 4207 { |
771 | 4208 if (str->direction == CODING_DECODE) |
4209 inflateEnd (&data->stream); | |
4210 else | |
4211 deflateEnd (&data->stream); | |
4212 data->stream_initted = 0; | |
428 | 4213 } |
771 | 4214 data->reached_eof = 0; |
428 | 4215 } |
4216 | |
4217 static void | |
771 | 4218 gzip_rewind_coding_stream (struct coding_stream *str) |
428 | 4219 { |
771 | 4220 gzip_init_coding_stream (str); |
428 | 4221 } |
4222 | |
771 | 4223 static Bytecount |
4224 gzip_convert (struct coding_stream *str, | |
4225 const UExtbyte *src, | |
4226 unsigned_char_dynarr *dst, Bytecount n) | |
428 | 4227 { |
771 | 4228 struct gzip_coding_stream *data = CODING_STREAM_TYPE_DATA (str, gzip); |
4229 int zerr; | |
4230 if (str->direction == CODING_DECODE) | |
428 | 4231 { |
771 | 4232 if (data->reached_eof) |
4233 return n; /* eat the data */ | |
4234 | |
4235 if (!data->stream_initted) | |
428 | 4236 { |
771 | 4237 xzero (data->stream); |
4238 if (inflateInit (&data->stream) != Z_OK) | |
4239 return LSTREAM_ERROR; | |
4240 data->stream_initted = 1; | |
428 | 4241 } |
771 | 4242 |
4243 data->stream.next_in = (Bytef *) src; | |
4244 data->stream.avail_in = n; | |
4245 | |
4246 /* Normally we stop when we've fed all data to the decompressor; but | |
4247 if we're at the end of the input, and the decompressor hasn't | |
4248 reported EOF, we need to keep going, as there might be more output | |
4249 to generate. Z_OK from the decompressor means input was processed | |
4250 or output was generated; if neither, we break out of the loop. | |
4251 Other return values are: | |
4252 | |
4253 Z_STREAM_END EOF from decompressor | |
4254 Z_DATA_ERROR Corrupted data | |
4255 Z_BUF_ERROR No progress possible (this should happen if | |
4256 we try to feed it an incomplete file) | |
4257 Z_MEM_ERROR Out of memory | |
4258 Z_STREAM_ERROR (should never happen) | |
4259 Z_NEED_DICT (#### when will this happen?) | |
4260 */ | |
4261 while (data->stream.avail_in > 0 || str->eof) | |
4262 { | |
4263 /* Reserve an output buffer of the same size as the input buffer; | |
4264 if that's not enough, we keep reserving the same size. */ | |
4265 Bytecount reserved = n; | |
4266 Dynarr_add_many (dst, 0, reserved); | |
4267 /* Careful here! Don't retrieve the pointer until after | |
4268 reserving the space, or it might be bogus */ | |
4269 data->stream.next_out = | |
4270 Dynarr_atp (dst, Dynarr_length (dst) - reserved); | |
4271 data->stream.avail_out = reserved; | |
4272 zerr = inflate (&data->stream, Z_NO_FLUSH); | |
4273 /* Lop off the unused portion */ | |
4274 Dynarr_set_size (dst, Dynarr_length (dst) - data->stream.avail_out); | |
4275 if (zerr != Z_OK) | |
4276 break; | |
4277 } | |
4278 | |
4279 if (zerr == Z_STREAM_END) | |
4280 data->reached_eof = 1; | |
4281 | |
4282 if ((Bytecount) data->stream.avail_in < n) | |
4283 return n - data->stream.avail_in; | |
4284 | |
4285 if (zerr == Z_OK || zerr == Z_STREAM_END) | |
4286 return 0; | |
4287 | |
4288 return LSTREAM_ERROR; | |
428 | 4289 } |
4290 else | |
4291 { | |
771 | 4292 if (!data->stream_initted) |
4293 { | |
4294 int level = XCODING_SYSTEM_GZIP_LEVEL (str->codesys); | |
4295 xzero (data->stream); | |
4296 if (deflateInit (&data->stream, | |
4297 level == -1 ? Z_DEFAULT_COMPRESSION : level) != | |
4298 Z_OK) | |
4299 return LSTREAM_ERROR; | |
4300 data->stream_initted = 1; | |
428 | 4301 } |
771 | 4302 |
4303 data->stream.next_in = (Bytef *) src; | |
4304 data->stream.avail_in = n; | |
4305 | |
4306 /* Normally we stop when we've fed all data to the compressor; but if | |
4307 we're at the end of the input, and the compressor hasn't reported | |
4308 EOF, we need to keep going, as there might be more output to | |
4309 generate. (To signal EOF on our end, we set the FLUSH parameter | |
4310 to Z_FINISH; when all data is output, Z_STREAM_END will be | |
4311 returned.) Z_OK from the compressor means input was processed or | |
4312 output was generated; if neither, we break out of the loop. Other | |
4313 return values are: | |
4314 | |
4315 Z_STREAM_END EOF from compressor | |
4316 Z_BUF_ERROR No progress possible (should never happen) | |
4317 Z_STREAM_ERROR (should never happen) | |
4318 */ | |
4319 while (data->stream.avail_in > 0 || str->eof) | |
4320 { | |
4321 /* Reserve an output buffer of the same size as the input buffer; | |
4322 if that's not enough, we keep reserving the same size. */ | |
4323 Bytecount reserved = n; | |
4324 Dynarr_add_many (dst, 0, reserved); | |
4325 /* Careful here! Don't retrieve the pointer until after | |
4326 reserving the space, or it might be bogus */ | |
4327 data->stream.next_out = | |
4328 Dynarr_atp (dst, Dynarr_length (dst) - reserved); | |
4329 data->stream.avail_out = reserved; | |
4330 zerr = | |
4331 deflate (&data->stream, | |
4332 str->eof ? Z_FINISH : Z_NO_FLUSH); | |
4333 /* Lop off the unused portion */ | |
4334 Dynarr_set_size (dst, Dynarr_length (dst) - data->stream.avail_out); | |
4335 if (zerr != Z_OK) | |
4336 break; | |
4337 } | |
4338 | |
4339 if ((Bytecount) data->stream.avail_in < n) | |
4340 return n - data->stream.avail_in; | |
4341 | |
4342 if (zerr == Z_OK || zerr == Z_STREAM_END) | |
4343 return 0; | |
4344 | |
4345 return LSTREAM_ERROR; | |
428 | 4346 } |
4347 } | |
4348 | |
771 | 4349 #endif /* HAVE_ZLIB */ |
428 | 4350 |
4351 | |
4352 /************************************************************************/ | |
4353 /* Initialization */ | |
4354 /************************************************************************/ | |
4355 | |
4356 void | |
4357 syms_of_file_coding (void) | |
4358 { | |
5117
3742ea8250b5
Checking in final CVS version of workspace 'ben-lisp-object'
Ben Wing <ben@xemacs.org>
parents:
3025
diff
changeset
|
4359 INIT_LISP_OBJECT (coding_system); |
442 | 4360 |
771 | 4361 DEFSUBR (Fvalid_coding_system_type_p); |
4362 DEFSUBR (Fcoding_system_type_list); | |
428 | 4363 DEFSUBR (Fcoding_system_p); |
4364 DEFSUBR (Ffind_coding_system); | |
4365 DEFSUBR (Fget_coding_system); | |
4366 DEFSUBR (Fcoding_system_list); | |
4367 DEFSUBR (Fcoding_system_name); | |
4368 DEFSUBR (Fmake_coding_system); | |
4369 DEFSUBR (Fcopy_coding_system); | |
440 | 4370 DEFSUBR (Fcoding_system_canonical_name_p); |
4371 DEFSUBR (Fcoding_system_alias_p); | |
4372 DEFSUBR (Fcoding_system_aliasee); | |
428 | 4373 DEFSUBR (Fdefine_coding_system_alias); |
4374 DEFSUBR (Fsubsidiary_coding_system); | |
771 | 4375 DEFSUBR (Fcoding_system_base); |
4376 DEFSUBR (Fcoding_system_used_for_io); | |
428 | 4377 |
4378 DEFSUBR (Fcoding_system_type); | |
771 | 4379 DEFSUBR (Fcoding_system_description); |
428 | 4380 DEFSUBR (Fcoding_system_property); |
4381 | |
4382 DEFSUBR (Fcoding_category_list); | |
4383 DEFSUBR (Fset_coding_priority_list); | |
4384 DEFSUBR (Fcoding_priority_list); | |
4385 DEFSUBR (Fset_coding_category_system); | |
4386 DEFSUBR (Fcoding_category_system); | |
4387 | |
4388 DEFSUBR (Fdetect_coding_region); | |
4389 DEFSUBR (Fdecode_coding_region); | |
4390 DEFSUBR (Fencode_coding_region); | |
563 | 4391 DEFSYMBOL_MULTIWORD_PREDICATE (Qcoding_systemp); |
4392 DEFSYMBOL (Qno_conversion); | |
771 | 4393 DEFSYMBOL (Qconvert_eol); |
4394 DEFSYMBOL (Qconvert_eol_autodetect); | |
4395 DEFSYMBOL (Qconvert_eol_lf); | |
4396 DEFSYMBOL (Qconvert_eol_cr); | |
4397 DEFSYMBOL (Qconvert_eol_crlf); | |
563 | 4398 DEFSYMBOL (Qraw_text); |
771 | 4399 |
563 | 4400 DEFSYMBOL (Qmnemonic); |
4401 DEFSYMBOL (Qeol_type); | |
4402 DEFSYMBOL (Qpost_read_conversion); | |
4403 DEFSYMBOL (Qpre_write_conversion); | |
4404 | |
771 | 4405 DEFSYMBOL (Qtranslation_table_for_decode); |
4406 DEFSYMBOL (Qtranslation_table_for_encode); | |
4407 DEFSYMBOL (Qsafe_chars); | |
4408 DEFSYMBOL (Qsafe_charsets); | |
4409 DEFSYMBOL (Qmime_charset); | |
4410 DEFSYMBOL (Qvalid_codes); | |
4411 | |
563 | 4412 DEFSYMBOL (Qcr); |
4413 DEFSYMBOL (Qlf); | |
4414 DEFSYMBOL (Qcrlf); | |
4415 DEFSYMBOL (Qeol_cr); | |
4416 DEFSYMBOL (Qeol_lf); | |
4417 DEFSYMBOL (Qeol_crlf); | |
4418 DEFSYMBOL (Qencode); | |
4419 DEFSYMBOL (Qdecode); | |
428 | 4420 |
771 | 4421 DEFSYMBOL (Qnear_certainty); |
4422 DEFSYMBOL (Qquite_probable); | |
4423 DEFSYMBOL (Qsomewhat_likely); | |
1494 | 4424 DEFSYMBOL (Qslightly_likely); |
771 | 4425 DEFSYMBOL (Qas_likely_as_unlikely); |
4426 DEFSYMBOL (Qsomewhat_unlikely); | |
4427 DEFSYMBOL (Qquite_improbable); | |
4428 DEFSYMBOL (Qnearly_impossible); | |
4429 | |
4430 DEFSYMBOL (Qdo_eol); | |
4431 DEFSYMBOL (Qdo_coding); | |
4432 | |
4433 DEFSYMBOL (Qcanonicalize_after_coding); | |
4434 | |
4435 DEFSYMBOL (Qescape_quoted); | |
4436 | |
4437 #ifdef HAVE_ZLIB | |
4438 DEFSYMBOL (Qgzip); | |
4439 #endif | |
4440 | |
428 | 4441 } |
4442 | |
4443 void | |
4444 lstream_type_create_file_coding (void) | |
4445 { | |
771 | 4446 LSTREAM_HAS_METHOD (coding, reader); |
4447 LSTREAM_HAS_METHOD (coding, writer); | |
4448 LSTREAM_HAS_METHOD (coding, rewinder); | |
4449 LSTREAM_HAS_METHOD (coding, seekable_p); | |
4450 LSTREAM_HAS_METHOD (coding, marker); | |
4451 LSTREAM_HAS_METHOD (coding, flusher); | |
4452 LSTREAM_HAS_METHOD (coding, closer); | |
4453 LSTREAM_HAS_METHOD (coding, finalizer); | |
4454 } | |
4455 | |
4456 void | |
4457 coding_system_type_create (void) | |
4458 { | |
4459 int i; | |
4460 | |
4461 staticpro (&Vcoding_system_hash_table); | |
4462 Vcoding_system_hash_table = | |
4463 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ); | |
4464 | |
4465 the_coding_system_type_entry_dynarr = Dynarr_new (coding_system_type_entry); | |
2367 | 4466 dump_add_root_block_ptr (&the_coding_system_type_entry_dynarr, |
771 | 4467 &csted_description); |
4468 | |
4469 Vcoding_system_type_list = Qnil; | |
4470 staticpro (&Vcoding_system_type_list); | |
4471 | |
4472 /* Initialize to something reasonable ... */ | |
4473 for (i = 0; i < MAX_DETECTOR_CATEGORIES; i++) | |
4474 { | |
4475 coding_category_system[i] = Qnil; | |
1204 | 4476 dump_add_root_lisp_object (&coding_category_system[i]); |
771 | 4477 coding_category_by_priority[i] = i; |
4478 } | |
4479 | |
4480 dump_add_opaque (coding_category_by_priority, | |
4481 sizeof (coding_category_by_priority)); | |
4482 | |
4483 all_coding_detectors = Dynarr_new2 (detector_dynarr, struct detector); | |
2367 | 4484 dump_add_root_block_ptr (&all_coding_detectors, |
771 | 4485 &detector_dynarr_description); |
4486 | |
4487 dump_add_opaque_int (&coding_system_tick); | |
4488 dump_add_opaque_int (&coding_detector_count); | |
4489 dump_add_opaque_int (&coding_detector_category_count); | |
4490 | |
4491 INITIALIZE_CODING_SYSTEM_TYPE (no_conversion, | |
4492 "no-conversion-coding-system-p"); | |
4493 CODING_SYSTEM_HAS_METHOD (no_conversion, convert); | |
4494 | |
4495 INITIALIZE_DETECTOR (no_conversion); | |
4496 DETECTOR_HAS_METHOD (no_conversion, detect); | |
4497 INITIALIZE_DETECTOR_CATEGORY (no_conversion, no_conversion); | |
4498 | |
4499 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (convert_eol, | |
4500 "convert-eol-coding-system-p"); | |
4501 CODING_SYSTEM_HAS_METHOD (convert_eol, print); | |
4502 CODING_SYSTEM_HAS_METHOD (convert_eol, convert); | |
4503 CODING_SYSTEM_HAS_METHOD (convert_eol, getprop); | |
4504 CODING_SYSTEM_HAS_METHOD (convert_eol, putprop); | |
4505 CODING_SYSTEM_HAS_METHOD (convert_eol, conversion_end_type); | |
4506 CODING_SYSTEM_HAS_METHOD (convert_eol, canonicalize_after_coding); | |
4507 CODING_SYSTEM_HAS_METHOD (convert_eol, init_coding_stream); | |
4508 | |
4509 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (undecided, | |
4510 "undecided-coding-system-p"); | |
4511 CODING_SYSTEM_HAS_METHOD (undecided, init); | |
4512 CODING_SYSTEM_HAS_METHOD (undecided, mark); | |
4513 CODING_SYSTEM_HAS_METHOD (undecided, print); | |
4514 CODING_SYSTEM_HAS_METHOD (undecided, convert); | |
4515 CODING_SYSTEM_HAS_METHOD (undecided, putprop); | |
4516 CODING_SYSTEM_HAS_METHOD (undecided, getprop); | |
4517 CODING_SYSTEM_HAS_METHOD (undecided, init_coding_stream); | |
4518 CODING_SYSTEM_HAS_METHOD (undecided, rewind_coding_stream); | |
4519 CODING_SYSTEM_HAS_METHOD (undecided, finalize_coding_stream); | |
4520 CODING_SYSTEM_HAS_METHOD (undecided, mark_coding_stream); | |
4521 CODING_SYSTEM_HAS_METHOD (undecided, canonicalize); | |
4522 CODING_SYSTEM_HAS_METHOD (undecided, canonicalize_after_coding); | |
4523 | |
4524 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (chain, "chain-coding-system-p"); | |
4525 | |
4526 CODING_SYSTEM_HAS_METHOD (chain, print); | |
4527 CODING_SYSTEM_HAS_METHOD (chain, canonicalize); | |
4528 CODING_SYSTEM_HAS_METHOD (chain, init); | |
4529 CODING_SYSTEM_HAS_METHOD (chain, mark); | |
4530 CODING_SYSTEM_HAS_METHOD (chain, mark_coding_stream); | |
4531 CODING_SYSTEM_HAS_METHOD (chain, convert); | |
4532 CODING_SYSTEM_HAS_METHOD (chain, rewind_coding_stream); | |
4533 CODING_SYSTEM_HAS_METHOD (chain, finalize_coding_stream); | |
4534 CODING_SYSTEM_HAS_METHOD (chain, finalize); | |
4535 CODING_SYSTEM_HAS_METHOD (chain, putprop); | |
4536 CODING_SYSTEM_HAS_METHOD (chain, getprop); | |
4537 CODING_SYSTEM_HAS_METHOD (chain, conversion_end_type); | |
4538 CODING_SYSTEM_HAS_METHOD (chain, canonicalize_after_coding); | |
4539 | |
4540 #ifdef DEBUG_XEMACS | |
4541 INITIALIZE_CODING_SYSTEM_TYPE (internal, "internal-coding-system-p"); | |
4542 CODING_SYSTEM_HAS_METHOD (internal, convert); | |
4543 #endif | |
4544 | |
4545 #ifdef HAVE_ZLIB | |
4546 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (gzip, "gzip-coding-system-p"); | |
4547 CODING_SYSTEM_HAS_METHOD (gzip, conversion_end_type); | |
4548 CODING_SYSTEM_HAS_METHOD (gzip, convert); | |
4549 CODING_SYSTEM_HAS_METHOD (gzip, init); | |
4550 CODING_SYSTEM_HAS_METHOD (gzip, print); | |
4551 CODING_SYSTEM_HAS_METHOD (gzip, init_coding_stream); | |
4552 CODING_SYSTEM_HAS_METHOD (gzip, rewind_coding_stream); | |
4553 CODING_SYSTEM_HAS_METHOD (gzip, putprop); | |
4554 CODING_SYSTEM_HAS_METHOD (gzip, getprop); | |
4555 #endif | |
4556 } | |
4557 | |
4558 void | |
4559 reinit_coding_system_type_create (void) | |
4560 { | |
4561 REINITIALIZE_CODING_SYSTEM_TYPE (no_conversion); | |
4562 REINITIALIZE_CODING_SYSTEM_TYPE (convert_eol); | |
4563 REINITIALIZE_CODING_SYSTEM_TYPE (undecided); | |
4564 REINITIALIZE_CODING_SYSTEM_TYPE (chain); | |
4565 #if 0 | |
4566 REINITIALIZE_CODING_SYSTEM_TYPE (text_file_wrapper); | |
4567 #endif /* 0 */ | |
4568 #ifdef DEBUG_XEMACS | |
4569 REINITIALIZE_CODING_SYSTEM_TYPE (internal); | |
4570 #endif | |
4571 #ifdef HAVE_ZLIB | |
4572 REINITIALIZE_CODING_SYSTEM_TYPE (gzip); | |
4573 #endif | |
4574 } | |
4575 | |
4576 void | |
4577 reinit_vars_of_file_coding (void) | |
4578 { | |
428 | 4579 } |
4580 | |
4581 void | |
4582 vars_of_file_coding (void) | |
4583 { | |
771 | 4584 /* We always have file-coding support */ |
428 | 4585 Fprovide (intern ("file-coding")); |
4586 | |
1347 | 4587 QScoding_system_cookie = build_string (";;;###coding system: "); |
4588 staticpro (&QScoding_system_cookie); | |
4589 | |
1242 | 4590 #ifdef HAVE_DEFAULT_EOL_DETECTION |
2297 | 4591 /* #### Find a more appropriate place for this comment. |
4592 WARNING: The existing categories are intimately tied to the function | |
1242 | 4593 `coding-system-category' in coding.el. If you change a category, or |
4594 change the layout of any coding system associated with a category, you | |
4595 need to check that function and make sure it's written properly. */ | |
4596 | |
4597 Fprovide (intern ("unix-default-eol-detection")); | |
4598 #endif | |
4599 | |
428 | 4600 DEFVAR_LISP ("keyboard-coding-system", &Vkeyboard_coding_system /* |
4601 Coding system used for TTY keyboard input. | |
4602 Not used under a windowing system. | |
4603 */ ); | |
4604 Vkeyboard_coding_system = Qnil; | |
4605 | |
4606 DEFVAR_LISP ("terminal-coding-system", &Vterminal_coding_system /* | |
4607 Coding system used for TTY display output. | |
4608 Not used under a windowing system. | |
4609 */ ); | |
4610 Vterminal_coding_system = Qnil; | |
4611 | |
4612 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read /* | |
440 | 4613 Overriding coding system used when reading from a file or process. |
4614 You should bind this variable with `let', but do not set it globally. | |
4615 If this is non-nil, it specifies the coding system that will be used | |
4616 to decode input on read operations, such as from a file or process. | |
4617 It overrides `buffer-file-coding-system-for-read', | |
428 | 4618 `insert-file-contents-pre-hook', etc. Use those variables instead of |
440 | 4619 this one for permanent changes to the environment. */ ); |
428 | 4620 Vcoding_system_for_read = Qnil; |
4621 | |
4622 DEFVAR_LISP ("coding-system-for-write", | |
4623 &Vcoding_system_for_write /* | |
440 | 4624 Overriding coding system used when writing to a file or process. |
4625 You should bind this variable with `let', but do not set it globally. | |
4626 If this is non-nil, it specifies the coding system that will be used | |
4627 to encode output for write operations, such as to a file or process. | |
4628 It overrides `buffer-file-coding-system', `write-region-pre-hook', etc. | |
4629 Use those variables instead of this one for permanent changes to the | |
4630 environment. */ ); | |
428 | 4631 Vcoding_system_for_write = Qnil; |
4632 | |
4633 DEFVAR_LISP ("file-name-coding-system", &Vfile_name_coding_system /* | |
4634 Coding system used to convert pathnames when accessing files. | |
4635 */ ); | |
4636 Vfile_name_coding_system = Qnil; | |
4637 | |
4638 DEFVAR_BOOL ("enable-multibyte-characters", &enable_multibyte_characters /* | |
771 | 4639 Setting this has no effect. It is purely for FSF compatibility. |
428 | 4640 */ ); |
4641 enable_multibyte_characters = 1; | |
771 | 4642 |
4643 Vchain_canonicalize_hash_table = | |
4644 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL); | |
4645 staticpro (&Vchain_canonicalize_hash_table); | |
4646 | |
4647 #ifdef DEBUG_XEMACS | |
4648 DEFVAR_LISP ("debug-coding-detection", &Vdebug_coding_detection /* | |
4649 If non-nil, display debug information about detection operations in progress. | |
4650 Information is displayed on stderr. | |
4651 */ ); | |
4652 Vdebug_coding_detection = Qnil; | |
4653 #endif | |
428 | 4654 } |
4655 | |
2297 | 4656 /* #### reformat this for consistent appearance? */ |
4657 | |
428 | 4658 void |
4659 complex_vars_of_file_coding (void) | |
4660 { | |
771 | 4661 Fmake_coding_system |
4662 (Qconvert_eol_cr, Qconvert_eol, | |
4663 build_msg_string ("Convert CR to LF"), | |
4664 nconc2 (list6 (Qdocumentation, | |
4665 build_msg_string ( | |
4666 "Converts CR (used to mark the end of a line on Macintosh systems) to LF\n" | |
4667 "(used internally and under Unix to mark the end of a line)."), | |
4668 Qmnemonic, build_string ("CR->LF"), | |
4669 Qsubtype, Qcr), | |
4670 /* VERY IMPORTANT! Tell make-coding-system not to generate | |
4671 subsidiaries -- it needs the coding systems we're creating | |
4672 to do so! */ | |
4673 list2 (Qeol_type, Qlf))); | |
4674 | |
4675 Fmake_coding_system | |
4676 (Qconvert_eol_lf, Qconvert_eol, | |
4677 build_msg_string ("Convert LF to LF (do nothing)"), | |
4678 nconc2 (list6 (Qdocumentation, | |
4679 build_msg_string ( | |
4680 "Do nothing."), | |
4681 Qmnemonic, build_string ("LF->LF"), | |
4682 Qsubtype, Qlf), | |
4683 /* VERY IMPORTANT! Tell make-coding-system not to generate | |
4684 subsidiaries -- it needs the coding systems we're creating | |
4685 to do so! */ | |
4686 list2 (Qeol_type, Qlf))); | |
4687 | |
4688 Fmake_coding_system | |
4689 (Qconvert_eol_crlf, Qconvert_eol, | |
4690 build_msg_string ("Convert CRLF to LF"), | |
4691 nconc2 (list6 (Qdocumentation, | |
4692 build_msg_string ( | |
4693 "Converts CR+LF (used to mark the end of a line on Macintosh systems) to LF\n" | |
4694 "(used internally and under Unix to mark the end of a line)."), | |
4695 Qmnemonic, build_string ("CRLF->LF"), | |
4696 Qsubtype, Qcrlf), | |
4697 /* VERY IMPORTANT! Tell make-coding-system not to generate | |
4698 subsidiaries -- it needs the coding systems we're creating | |
4699 to do so! */ | |
4700 list2 (Qeol_type, Qlf))); | |
4701 | |
4702 Fmake_coding_system | |
4703 (Qconvert_eol_autodetect, Qconvert_eol, | |
4704 build_msg_string ("Autodetect EOL type"), | |
4705 nconc2 (list6 (Qdocumentation, | |
4706 build_msg_string ( | |
4707 "Autodetect the end-of-line type."), | |
4708 Qmnemonic, build_string ("Auto-EOL"), | |
793 | 4709 Qsubtype, Qnil), |
771 | 4710 /* VERY IMPORTANT! Tell make-coding-system not to generate |
4711 subsidiaries -- it needs the coding systems we're creating | |
4712 to do so! */ | |
4713 list2 (Qeol_type, Qlf))); | |
4714 | |
4715 Fmake_coding_system | |
4716 (Qundecided, Qundecided, | |
4717 build_msg_string ("Undecided (auto-detect)"), | |
4718 nconc2 (list4 (Qdocumentation, | |
4719 build_msg_string | |
4720 ("Automatically detects the correct encoding."), | |
4721 Qmnemonic, build_string ("Auto")), | |
4722 list6 (Qdo_eol, Qt, Qdo_coding, Qt, | |
4723 /* We do EOL detection ourselves so we don't need to be | |
4724 wrapped in an EOL detector. (It doesn't actually hurt, | |
4725 though, I don't think.) */ | |
4726 Qeol_type, Qlf))); | |
4727 | |
4728 Fmake_coding_system | |
4729 (intern ("undecided-dos"), Qundecided, | |
4730 build_msg_string ("Undecided (auto-detect) (CRLF)"), | |
4731 nconc2 (list4 (Qdocumentation, | |
4732 build_msg_string | |
4733 ("Automatically detects the correct encoding; EOL type of CRLF forced."), | |
4734 Qmnemonic, build_string ("Auto")), | |
4735 list4 (Qdo_coding, Qt, | |
4736 Qeol_type, Qcrlf))); | |
4737 | |
4738 Fmake_coding_system | |
4739 (intern ("undecided-unix"), Qundecided, | |
4740 build_msg_string ("Undecided (auto-detect) (LF)"), | |
4741 nconc2 (list4 (Qdocumentation, | |
4742 build_msg_string | |
4743 ("Automatically detects the correct encoding; EOL type of LF forced."), | |
4744 Qmnemonic, build_string ("Auto")), | |
4745 list4 (Qdo_coding, Qt, | |
4746 Qeol_type, Qlf))); | |
4747 | |
4748 Fmake_coding_system | |
4749 (intern ("undecided-mac"), Qundecided, | |
4750 build_msg_string ("Undecided (auto-detect) (CR)"), | |
4751 nconc2 (list4 (Qdocumentation, | |
4752 build_msg_string | |
4753 ("Automatically detects the correct encoding; EOL type of CR forced."), | |
4754 Qmnemonic, build_string ("Auto")), | |
4755 list4 (Qdo_coding, Qt, | |
4756 Qeol_type, Qcr))); | |
4757 | |
428 | 4758 /* Need to create this here or we're really screwed. */ |
4759 Fmake_coding_system | |
4760 (Qraw_text, Qno_conversion, | |
771 | 4761 build_msg_string ("Raw Text"), |
4762 list4 (Qdocumentation, | |
4763 build_msg_string ("Raw text converts only line-break codes, and acts otherwise like `binary'."), | |
4764 Qmnemonic, build_string ("Raw"))); | |
428 | 4765 |
4766 Fmake_coding_system | |
4767 (Qbinary, Qno_conversion, | |
771 | 4768 build_msg_string ("Binary"), |
4769 list6 (Qdocumentation, | |
4770 build_msg_string ( | |
4771 "This coding system is as close as it comes to doing no conversion.\n" | |
4772 "On input, each byte is converted directly into the character\n" | |
4773 "with the corresponding code -- i.e. from the `ascii', `control-1',\n" | |
4774 "or `latin-1' character sets. On output, these characters are\n" | |
4775 "converted back to the corresponding bytes, and other characters\n" | |
4776 "are converted to the default character, i.e. `~'."), | |
4777 Qeol_type, Qlf, | |
428 | 4778 Qmnemonic, build_string ("Binary"))); |
4779 | |
771 | 4780 /* Formerly aliased to raw-text! Completely bogus and not even the same |
4781 as FSF Emacs. */ | |
4782 Fdefine_coding_system_alias (Qno_conversion, Qbinary); | |
4783 Fdefine_coding_system_alias (intern ("no-conversion-unix"), | |
4784 intern ("raw-text-unix")); | |
4785 Fdefine_coding_system_alias (intern ("no-conversion-dos"), | |
4786 intern ("raw-text-dos")); | |
4787 Fdefine_coding_system_alias (intern ("no-conversion-mac"), | |
4788 intern ("raw-text-mac")); | |
4789 | |
1318 | 4790 /* These three below will get their defaults set correctly |
4791 in code-init.el. We init them now so we can handle stuff at dump | |
771 | 4792 time before we get to code-init.el. */ |
1318 | 4793 Fdefine_coding_system_alias (Qnative, Qbinary); |
440 | 4794 Fdefine_coding_system_alias (Qterminal, Qbinary); |
4795 Fdefine_coding_system_alias (Qkeyboard, Qbinary); | |
4796 | |
1318 | 4797 Fdefine_coding_system_alias (Qfile_name, Qnative); |
771 | 4798 Fdefine_coding_system_alias (Qidentity, Qconvert_eol_lf); |
4799 | |
428 | 4800 /* Need this for bootstrapping */ |
771 | 4801 coding_category_system[detector_category_no_conversion] = |
428 | 4802 Fget_coding_system (Qraw_text); |
4803 } |