771
|
1 /* Text encoding conversion functions; coding-system object.
|
|
2 #### rename me to coding-system.c or coding.c
|
428
|
3 Copyright (C) 1991, 1995 Free Software Foundation, Inc.
|
|
4 Copyright (C) 1995 Sun Microsystems, Inc.
|
3025
|
5 Copyright (C) 2000, 2001, 2002, 2003, 2005 Ben Wing.
|
428
|
6
|
|
7 This file is part of XEmacs.
|
|
8
|
|
9 XEmacs is free software; you can redistribute it and/or modify it
|
|
10 under the terms of the GNU General Public License as published by the
|
|
11 Free Software Foundation; either version 2, or (at your option) any
|
|
12 later version.
|
|
13
|
|
14 XEmacs is distributed in the hope that it will be useful, but WITHOUT
|
|
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
17 for more details.
|
|
18
|
|
19 You should have received a copy of the GNU General Public License
|
|
20 along with XEmacs; see the file COPYING. If not, write to
|
|
21 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
22 Boston, MA 02111-1307, USA. */
|
|
23
|
771
|
24 /* Synched up with: Not in FSF. */
|
|
25
|
|
26 /* Authorship:
|
|
27
|
|
28 Current primary author: Ben Wing <ben@xemacs.org>
|
|
29
|
|
30 Rewritten by Ben Wing <ben@xemacs.org>, based originally on coding.c
|
|
31 from Mule 2.? but probably does not share one line of code with that
|
|
32 original source. Rewriting work started around Dec. 1994. or Jan. 1995.
|
|
33 Proceeded in earnest till Nov. 1995.
|
|
34
|
|
35 Around Feb. 17, 1998, Andy Piper renamed what was then mule-coding.c to
|
|
36 file-coding.c, with the intention of using it to do end-of-line conversion
|
|
37 on non-MULE machines (specifically, on Windows machines). He separated
|
|
38 out the MULE stuff from non-MULE using ifdef's, and searched throughout
|
|
39 the rest of the source tree looking for coding-system-related code that
|
|
40 was ifdef MULE but should be ifdef HAVE_CODING_SYSTEMS.
|
|
41
|
|
42 Sept. 4 - 8, 1998, Tomohiko Morioka added the UCS_4 and UTF_8 coding system
|
|
43 types, providing a primitive means of decoding and encoding externally-
|
|
44 formatted Unicode/UCS_4 and Unicode/UTF_8 data.
|
|
45
|
|
46 January 25, 2000, Martin Buchholz redid and fleshed out the coding
|
|
47 system alias handling that was first added in prototype form by
|
|
48 Hrjove Niksic, April 15, 1999.
|
|
49
|
|
50 April to May 2000, Ben Wing: More major reorganization. Adding features
|
|
51 needed for MS Windows (multibyte, unicode, unicode-to-multibyte), the
|
|
52 "chain" coding system for chaining two together, and doing a lot of
|
|
53 reorganization in preparation for properly abstracting out the different
|
|
54 coding system types.
|
|
55
|
|
56 June 2001, Ben Wing: Added Unicode support. Eliminated previous
|
|
57 junky Unicode translation support.
|
|
58
|
|
59 August 2001, Ben Wing: Moved Unicode support to unicode.c. Finished
|
|
60 abstracting everything except detection, which is hard to abstract (see
|
|
61 just below).
|
|
62
|
|
63 September 2001, Ben Wing: Moved Mule code to mule-coding.c, Windows code
|
|
64 to intl-win32.c. Lots more rewriting; very little code is untouched
|
|
65 from before April 2000. Abstracted the detection code, added multiple
|
|
66 levels of likelihood to increase the reliability of the algorithm.
|
|
67
|
|
68 October 2001, Ben Wing: HAVE_CODING_SYSTEMS is always now defined.
|
|
69 Removed the conditionals.
|
|
70 */
|
|
71
|
428
|
72 #include <config.h>
|
|
73 #include "lisp.h"
|
|
74
|
|
75 #include "buffer.h"
|
|
76 #include "elhash.h"
|
|
77 #include "insdel.h"
|
|
78 #include "lstream.h"
|
440
|
79 #include "opaque.h"
|
771
|
80 #include "file-coding.h"
|
|
81
|
|
82 #ifdef HAVE_ZLIB
|
|
83 #include "zlib.h"
|
428
|
84 #endif
|
|
85
|
|
86 Lisp_Object Vkeyboard_coding_system;
|
|
87 Lisp_Object Vterminal_coding_system;
|
|
88 Lisp_Object Vcoding_system_for_read;
|
|
89 Lisp_Object Vcoding_system_for_write;
|
|
90 Lisp_Object Vfile_name_coding_system;
|
|
91
|
771
|
92 #ifdef DEBUG_XEMACS
|
|
93 Lisp_Object Vdebug_coding_detection;
|
440
|
94 #endif
|
771
|
95
|
|
96 typedef struct coding_system_type_entry
|
|
97 {
|
|
98 struct coding_system_methods *meths;
|
|
99 } coding_system_type_entry;
|
|
100
|
|
101 typedef struct
|
|
102 {
|
|
103 Dynarr_declare (coding_system_type_entry);
|
|
104 } coding_system_type_entry_dynarr;
|
|
105
|
|
106 static coding_system_type_entry_dynarr *the_coding_system_type_entry_dynarr;
|
|
107
|
1204
|
108 static const struct memory_description cste_description_1[] = {
|
2551
|
109 { XD_BLOCK_PTR, offsetof (coding_system_type_entry, meths), 1,
|
|
110 { &coding_system_methods_description } },
|
771
|
111 { XD_END }
|
|
112 };
|
|
113
|
1204
|
114 static const struct sized_memory_description cste_description = {
|
771
|
115 sizeof (coding_system_type_entry),
|
|
116 cste_description_1
|
|
117 };
|
|
118
|
1204
|
119 static const struct memory_description csted_description_1[] = {
|
771
|
120 XD_DYNARR_DESC (coding_system_type_entry_dynarr, &cste_description),
|
428
|
121 { XD_END }
|
|
122 };
|
|
123
|
1204
|
124 static const struct sized_memory_description csted_description = {
|
771
|
125 sizeof (coding_system_type_entry_dynarr),
|
|
126 csted_description_1
|
|
127 };
|
|
128
|
|
129 static Lisp_Object Vcoding_system_type_list;
|
|
130
|
|
131 /* Coding system currently associated with each coding category. */
|
|
132 Lisp_Object coding_category_system[MAX_DETECTOR_CATEGORIES];
|
|
133
|
|
134 /* Table of all coding categories in decreasing order of priority.
|
|
135 This describes a permutation of the possible coding categories. */
|
|
136 int coding_category_by_priority[MAX_DETECTOR_CATEGORIES];
|
|
137
|
|
138 /* Value used with to give a unique name to nameless coding systems */
|
|
139 int coding_system_tick;
|
|
140
|
|
141 int coding_detector_count;
|
|
142 int coding_detector_category_count;
|
|
143
|
|
144 detector_dynarr *all_coding_detectors;
|
|
145
|
1204
|
146 static const struct memory_description struct_detector_category_description_1[]
|
771
|
147 =
|
|
148 {
|
|
149 { XD_LISP_OBJECT, offsetof (struct detector_category, sym) },
|
|
150 { XD_END }
|
|
151 };
|
|
152
|
1204
|
153 static const struct sized_memory_description struct_detector_category_description =
|
771
|
154 {
|
|
155 sizeof (struct detector_category),
|
|
156 struct_detector_category_description_1
|
428
|
157 };
|
|
158
|
1204
|
159 static const struct memory_description detector_category_dynarr_description_1[] =
|
771
|
160 {
|
|
161 XD_DYNARR_DESC (detector_category_dynarr,
|
|
162 &struct_detector_category_description),
|
|
163 { XD_END }
|
|
164 };
|
|
165
|
1204
|
166 static const struct sized_memory_description detector_category_dynarr_description = {
|
771
|
167 sizeof (detector_category_dynarr),
|
|
168 detector_category_dynarr_description_1
|
|
169 };
|
|
170
|
1204
|
171 static const struct memory_description struct_detector_description_1[]
|
771
|
172 =
|
|
173 {
|
2367
|
174 { XD_BLOCK_PTR, offsetof (struct detector, cats), 1,
|
2551
|
175 { &detector_category_dynarr_description } },
|
771
|
176 { XD_END }
|
|
177 };
|
|
178
|
1204
|
179 static const struct sized_memory_description struct_detector_description =
|
771
|
180 {
|
|
181 sizeof (struct detector),
|
|
182 struct_detector_description_1
|
|
183 };
|
|
184
|
1204
|
185 static const struct memory_description detector_dynarr_description_1[] =
|
771
|
186 {
|
|
187 XD_DYNARR_DESC (detector_dynarr, &struct_detector_description),
|
|
188 { XD_END }
|
|
189 };
|
|
190
|
1204
|
191 static const struct sized_memory_description detector_dynarr_description = {
|
771
|
192 sizeof (detector_dynarr),
|
|
193 detector_dynarr_description_1
|
|
194 };
|
428
|
195
|
|
196 Lisp_Object Qcoding_systemp;
|
|
197
|
771
|
198 Lisp_Object Qraw_text;
|
428
|
199
|
|
200 Lisp_Object Qmnemonic, Qeol_type;
|
|
201 Lisp_Object Qcr, Qcrlf, Qlf;
|
|
202 Lisp_Object Qeol_cr, Qeol_crlf, Qeol_lf;
|
|
203 Lisp_Object Qpost_read_conversion;
|
|
204 Lisp_Object Qpre_write_conversion;
|
|
205
|
771
|
206 Lisp_Object Qtranslation_table_for_decode;
|
|
207 Lisp_Object Qtranslation_table_for_encode;
|
|
208 Lisp_Object Qsafe_chars;
|
|
209 Lisp_Object Qsafe_charsets;
|
|
210 Lisp_Object Qmime_charset;
|
|
211 Lisp_Object Qvalid_codes;
|
|
212
|
|
213 Lisp_Object Qno_conversion;
|
|
214 Lisp_Object Qconvert_eol;
|
440
|
215 Lisp_Object Qescape_quoted;
|
771
|
216 Lisp_Object Qencode, Qdecode;
|
|
217
|
|
218 Lisp_Object Qconvert_eol_lf, Qconvert_eol_cr, Qconvert_eol_crlf;
|
|
219 Lisp_Object Qconvert_eol_autodetect;
|
|
220
|
|
221 Lisp_Object Qnear_certainty, Qquite_probable, Qsomewhat_likely;
|
1494
|
222 Lisp_Object Qslightly_likely;
|
771
|
223 Lisp_Object Qas_likely_as_unlikely, Qsomewhat_unlikely, Qquite_improbable;
|
|
224 Lisp_Object Qnearly_impossible;
|
|
225
|
|
226 Lisp_Object Qdo_eol, Qdo_coding;
|
|
227
|
|
228 Lisp_Object Qcanonicalize_after_coding;
|
|
229
|
1347
|
230 Lisp_Object QScoding_system_cookie;
|
|
231
|
4303
|
232 Lisp_Object Qposix_charset_to_coding_system_hash;
|
|
233
|
771
|
234 /* This is used to convert autodetected coding systems into existing
|
|
235 systems. For example, the chain undecided->convert-eol-autodetect may
|
|
236 have its separate parts detected as mswindows-multibyte and
|
|
237 convert-eol-crlf, and the result needs to be mapped to
|
|
238 mswindows-multibyte-dos. */
|
|
239 /* #### It's not clear we need this whole chain-canonicalize mechanism
|
|
240 any more. */
|
|
241 static Lisp_Object Vchain_canonicalize_hash_table;
|
|
242
|
|
243 #ifdef HAVE_ZLIB
|
|
244 Lisp_Object Qgzip;
|
428
|
245 #endif
|
771
|
246
|
2297
|
247 /* Maps symbols (coding system names) to either coding system objects or
|
|
248 (for aliases) other names. */
|
771
|
249 static Lisp_Object Vcoding_system_hash_table;
|
428
|
250
|
|
251 int enable_multibyte_characters;
|
|
252
|
|
253 EXFUN (Fcopy_coding_system, 2);
|
|
254
|
|
255
|
|
256 /************************************************************************/
|
771
|
257 /* Coding system object methods */
|
428
|
258 /************************************************************************/
|
|
259
|
|
260 static Lisp_Object
|
|
261 mark_coding_system (Lisp_Object obj)
|
|
262 {
|
|
263 Lisp_Coding_System *codesys = XCODING_SYSTEM (obj);
|
|
264
|
1204
|
265 #define MARKED_SLOT(x) mark_object (codesys->x);
|
|
266 #include "coding-system-slots.h"
|
771
|
267
|
|
268 MAYBE_CODESYSMETH (codesys, mark, (obj));
|
428
|
269
|
1204
|
270 return Qnil;
|
428
|
271 }
|
|
272
|
|
273 static void
|
771
|
274 print_coding_system_properties (Lisp_Object obj, Lisp_Object printcharfun)
|
|
275 {
|
|
276 Lisp_Coding_System *c = XCODING_SYSTEM (obj);
|
|
277 print_internal (c->methods->type, printcharfun, 1);
|
|
278 MAYBE_CODESYSMETH (c, print, (obj, printcharfun, 1));
|
|
279 if (CODING_SYSTEM_EOL_TYPE (c) != EOL_AUTODETECT)
|
|
280 write_fmt_string_lisp (printcharfun, " eol-type=%s",
|
|
281 1, Fcoding_system_property (obj, Qeol_type));
|
|
282 }
|
|
283
|
|
284 static void
|
428
|
285 print_coding_system (Lisp_Object obj, Lisp_Object printcharfun,
|
2286
|
286 int UNUSED (escapeflag))
|
428
|
287 {
|
|
288 Lisp_Coding_System *c = XCODING_SYSTEM (obj);
|
|
289 if (print_readably)
|
771
|
290 printing_unreadable_object
|
|
291 ("printing unreadable object #<coding-system 0x%x>", c->header.uid);
|
|
292
|
|
293 write_fmt_string_lisp (printcharfun, "#<coding-system %s ", 1, c->name);
|
|
294 print_coding_system_properties (obj, printcharfun);
|
826
|
295 write_c_string (printcharfun, ">");
|
428
|
296 }
|
|
297
|
771
|
298 /* Print an abbreviated version of a coding system (but still containing
|
|
299 all the information), for use within a coding system print method. */
|
|
300
|
|
301 static void
|
|
302 print_coding_system_in_print_method (Lisp_Object cs, Lisp_Object printcharfun,
|
2286
|
303 int UNUSED (escapeflag))
|
771
|
304 {
|
800
|
305 write_fmt_string_lisp (printcharfun, "%s[", 1, XCODING_SYSTEM_NAME (cs));
|
771
|
306 print_coding_system_properties (cs, printcharfun);
|
826
|
307 write_c_string (printcharfun, "]");
|
771
|
308 }
|
|
309
|
3263
|
310 #ifndef NEW_GC
|
428
|
311 static void
|
|
312 finalize_coding_system (void *header, int for_disksave)
|
|
313 {
|
771
|
314 Lisp_Object cs = wrap_coding_system ((Lisp_Coding_System *) header);
|
428
|
315 /* Since coding systems never go away, this function is not
|
|
316 necessary. But it would be necessary if we changed things
|
|
317 so that coding systems could go away. */
|
|
318 if (!for_disksave) /* see comment in lstream.c */
|
771
|
319 MAYBE_XCODESYSMETH (cs, finalize, (cs));
|
|
320 }
|
3263
|
321 #endif /* not NEW_GC */
|
771
|
322
|
|
323 static Bytecount
|
|
324 sizeof_coding_system (const void *header)
|
|
325 {
|
|
326 const Lisp_Coding_System *p = (const Lisp_Coding_System *) header;
|
|
327 return offsetof (Lisp_Coding_System, data) + p->methods->extra_data_size;
|
428
|
328 }
|
|
329
|
1204
|
330 static const struct memory_description coding_system_methods_description_1[]
|
771
|
331 = {
|
|
332 { XD_LISP_OBJECT,
|
|
333 offsetof (struct coding_system_methods, type) },
|
|
334 { XD_LISP_OBJECT,
|
|
335 offsetof (struct coding_system_methods, predicate_symbol) },
|
|
336 { XD_END }
|
|
337 };
|
|
338
|
1204
|
339 const struct sized_memory_description coding_system_methods_description = {
|
771
|
340 sizeof (struct coding_system_methods),
|
|
341 coding_system_methods_description_1
|
|
342 };
|
|
343
|
1204
|
344 static const struct sized_memory_description coding_system_extra_description_map[] =
|
|
345 {
|
|
346 { offsetof (Lisp_Coding_System, methods) },
|
|
347 { offsetof (struct coding_system_methods, extra_description) },
|
|
348 { -1 },
|
771
|
349 };
|
|
350
|
1204
|
351 static const struct memory_description coding_system_description[] =
|
428
|
352 {
|
2367
|
353 { XD_BLOCK_PTR, offsetof (Lisp_Coding_System, methods), 1,
|
2551
|
354 { &coding_system_methods_description } },
|
1204
|
355 #define MARKED_SLOT(x) { XD_LISP_OBJECT, offsetof (Lisp_Coding_System, x) },
|
|
356 #define MARKED_SLOT_ARRAY(slot, size) \
|
|
357 { XD_LISP_OBJECT_ARRAY, offsetof (Lisp_Coding_System, slot), size },
|
|
358 #include "coding-system-slots.h"
|
2367
|
359 { XD_BLOCK_ARRAY, offsetof (Lisp_Coding_System, data), 1,
|
2551
|
360 { coding_system_extra_description_map } },
|
1204
|
361 { XD_END }
|
771
|
362 };
|
|
363
|
1204
|
364 static const struct memory_description coding_system_empty_extra_description_1[] =
|
|
365 {
|
|
366 { XD_END }
|
|
367 };
|
|
368
|
|
369 const struct sized_memory_description coding_system_empty_extra_description = {
|
|
370 0, coding_system_empty_extra_description_1
|
|
371 };
|
|
372
|
3263
|
373 #ifdef NEW_GC
|
|
374 DEFINE_LRECORD_SEQUENCE_IMPLEMENTATION ("coding-system", coding_system,
|
|
375 1, /*dumpable-flag*/
|
|
376 mark_coding_system,
|
|
377 print_coding_system,
|
|
378 0, 0, 0, coding_system_description,
|
|
379 sizeof_coding_system,
|
|
380 Lisp_Coding_System);
|
|
381 #else /* not NEW_GC */
|
934
|
382 DEFINE_LRECORD_SEQUENCE_IMPLEMENTATION ("coding-system", coding_system,
|
|
383 1, /*dumpable-flag*/
|
|
384 mark_coding_system,
|
|
385 print_coding_system,
|
|
386 finalize_coding_system,
|
|
387 0, 0, coding_system_description,
|
|
388 sizeof_coding_system,
|
|
389 Lisp_Coding_System);
|
3263
|
390 #endif /* not NEW_GC */
|
771
|
391
|
|
392 /************************************************************************/
|
|
393 /* Creating coding systems */
|
|
394 /************************************************************************/
|
|
395
|
|
396 static struct coding_system_methods *
|
|
397 decode_coding_system_type (Lisp_Object type, Error_Behavior errb)
|
428
|
398 {
|
771
|
399 int i;
|
|
400
|
|
401 for (i = 0; i < Dynarr_length (the_coding_system_type_entry_dynarr); i++)
|
428
|
402 {
|
771
|
403 if (EQ (type,
|
|
404 Dynarr_at (the_coding_system_type_entry_dynarr, i).meths->type))
|
|
405 return Dynarr_at (the_coding_system_type_entry_dynarr, i).meths;
|
428
|
406 }
|
771
|
407
|
|
408 maybe_invalid_constant ("Invalid coding system type", type,
|
|
409 Qcoding_system, errb);
|
|
410
|
|
411 return 0;
|
428
|
412 }
|
|
413
|
771
|
414 static int
|
|
415 valid_coding_system_type_p (Lisp_Object type)
|
428
|
416 {
|
771
|
417 return decode_coding_system_type (type, ERROR_ME_NOT) != 0;
|
|
418 }
|
|
419
|
|
420 DEFUN ("valid-coding-system-type-p", Fvalid_coding_system_type_p, 1, 1, 0, /*
|
|
421 Given a CODING-SYSTEM-TYPE, return non-nil if it is valid.
|
|
422 Valid types depend on how XEmacs was compiled but may include
|
3025
|
423 `undecided', `chain', `integer', `ccl', `iso2022', `big5', `shift-jis',
|
|
424 `utf-16', `ucs-4', `utf-8', etc.
|
771
|
425 */
|
|
426 (coding_system_type))
|
|
427 {
|
|
428 return valid_coding_system_type_p (coding_system_type) ? Qt : Qnil;
|
|
429 }
|
|
430
|
|
431 DEFUN ("coding-system-type-list", Fcoding_system_type_list, 0, 0, 0, /*
|
|
432 Return a list of valid coding system types.
|
|
433 */
|
|
434 ())
|
|
435 {
|
|
436 return Fcopy_sequence (Vcoding_system_type_list);
|
|
437 }
|
|
438
|
|
439 void
|
|
440 add_entry_to_coding_system_type_list (struct coding_system_methods *meths)
|
|
441 {
|
|
442 struct coding_system_type_entry entry;
|
|
443
|
|
444 entry.meths = meths;
|
|
445 Dynarr_add (the_coding_system_type_entry_dynarr, entry);
|
|
446 Vcoding_system_type_list = Fcons (meths->type, Vcoding_system_type_list);
|
428
|
447 }
|
|
448
|
|
449 DEFUN ("coding-system-p", Fcoding_system_p, 1, 1, 0, /*
|
|
450 Return t if OBJECT is a coding system.
|
|
451 A coding system is an object that defines how text containing multiple
|
|
452 character sets is encoded into a stream of (typically 8-bit) bytes.
|
|
453 The coding system is used to decode the stream into a series of
|
|
454 characters (which may be from multiple charsets) when the text is read
|
|
455 from a file or process, and is used to encode the text back into the
|
|
456 same format when it is written out to a file or process.
|
|
457
|
|
458 For example, many ISO2022-compliant coding systems (such as Compound
|
|
459 Text, which is used for inter-client data under the X Window System)
|
|
460 use escape sequences to switch between different charsets -- Japanese
|
|
461 Kanji, for example, is invoked with "ESC $ ( B"; ASCII is invoked
|
|
462 with "ESC ( B"; and Cyrillic is invoked with "ESC - L". See
|
|
463 `make-coding-system' for more information.
|
|
464
|
|
465 Coding systems are normally identified using a symbol, and the
|
|
466 symbol is accepted in place of the actual coding system object whenever
|
|
467 a coding system is called for. (This is similar to how faces work.)
|
|
468 */
|
|
469 (object))
|
|
470 {
|
|
471 return CODING_SYSTEMP (object) ? Qt : Qnil;
|
|
472 }
|
|
473
|
4303
|
474 static Lisp_Object
|
|
475 find_coding_system (Lisp_Object coding_system_or_name,
|
|
476 int do_autoloads)
|
|
477 {
|
|
478 Lisp_Object lookup;
|
|
479
|
|
480 if (NILP (coding_system_or_name))
|
|
481 coding_system_or_name = Qbinary;
|
|
482 else if (CODING_SYSTEMP (coding_system_or_name))
|
|
483 return coding_system_or_name;
|
|
484 else
|
|
485 CHECK_SYMBOL (coding_system_or_name);
|
|
486
|
|
487 while (1)
|
|
488 {
|
|
489 lookup =
|
|
490 Fgethash (coding_system_or_name, Vcoding_system_hash_table, Qnil);
|
|
491
|
|
492 if (CONSP (lookup) && do_autoloads)
|
|
493 {
|
|
494 struct gcpro gcpro1;
|
|
495 int length;
|
|
496 DECLARE_EISTRING (desired_base);
|
|
497 DECLARE_EISTRING (warning_info);
|
|
498
|
|
499 eicpy_lstr (desired_base, XSYMBOL_NAME (coding_system_or_name));
|
|
500
|
|
501 /* Work out the name of the base coding system. */
|
|
502 length = eilen (desired_base);
|
|
503 if (length > (int)(sizeof ("-unix") - 1))
|
|
504 {
|
|
505 if (0 == qxestrcmp ((UAscbyte *)"-unix", (eidata (desired_base))
|
|
506 + (length - (sizeof ("-unix") - 1))))
|
|
507 {
|
|
508 eidel (desired_base, length - (sizeof ("-unix") - 1),
|
|
509 -1, 5, 5);
|
|
510 }
|
|
511 }
|
|
512 else if (length > (int)(sizeof ("-dos") - 1))
|
|
513 {
|
|
514 if ((0 == qxestrcmp ((UAscbyte *)"-dos", (eidata (desired_base))
|
|
515 + (length - (sizeof ("-dos") - 1)))) ||
|
|
516 (0 == qxestrcmp ((UAscbyte *)"-mac", (eidata (desired_base))
|
|
517 + (length - (sizeof ("-mac") - 1)))))
|
|
518 {
|
|
519 eidel (desired_base, length - (sizeof ("-dos") - 1), -1,
|
|
520 4, 4);
|
|
521 }
|
|
522 }
|
|
523
|
|
524 coding_system_or_name = intern_int (eidata (desired_base));
|
|
525
|
|
526 /* Remove this coding system and its subsidiary coding
|
|
527 systems from the hash, to avoid calling this code recursively. */
|
|
528 Fremhash (coding_system_or_name, Vcoding_system_hash_table);
|
|
529 Fremhash (add_suffix_to_symbol(coding_system_or_name, "-unix"),
|
|
530 Vcoding_system_hash_table);
|
|
531 Fremhash (add_suffix_to_symbol(coding_system_or_name, "-dos"),
|
|
532 Vcoding_system_hash_table);
|
|
533 Fremhash (add_suffix_to_symbol(coding_system_or_name, "-mac"),
|
|
534 Vcoding_system_hash_table);
|
|
535
|
|
536 eicpy_ascii (warning_info, "Error autoloading coding system ");
|
|
537 eicat_lstr (warning_info, XSYMBOL_NAME (coding_system_or_name));
|
|
538
|
|
539 /* Keep around the form so it doesn't disappear from under
|
|
540 #'eval's feet. */
|
|
541 GCPRO1 (lookup);
|
|
542 call1_trapping_problems ((const CIbyte *)eidata (warning_info),
|
|
543 Qeval, lookup, 0);
|
|
544 UNGCPRO;
|
|
545
|
|
546 lookup =
|
|
547 Fgethash (coding_system_or_name, Vcoding_system_hash_table, Qnil);
|
|
548 }
|
|
549
|
|
550 if (CODING_SYSTEMP (lookup) || NILP (lookup))
|
|
551 return lookup;
|
|
552
|
|
553 coding_system_or_name = lookup;
|
|
554 }
|
|
555 }
|
|
556
|
428
|
557 DEFUN ("find-coding-system", Ffind_coding_system, 1, 1, 0, /*
|
|
558 Retrieve the coding system of the given name.
|
|
559
|
|
560 If CODING-SYSTEM-OR-NAME is a coding-system object, it is simply
|
|
561 returned. Otherwise, CODING-SYSTEM-OR-NAME should be a symbol.
|
|
562 If there is no such coding system, nil is returned. Otherwise the
|
|
563 associated coding system object is returned.
|
|
564 */
|
|
565 (coding_system_or_name))
|
|
566 {
|
4303
|
567 return find_coding_system(coding_system_or_name, 1);
|
|
568 }
|
|
569
|
|
570 DEFUN ("autoload-coding-system", Fautoload_coding_system, 2, 2, 0, /*
|
|
571 Define SYMBOL as a coding-system that is loaded on demand.
|
|
572
|
|
573 FORM is a form to evaluate to define the coding-system.
|
|
574 */
|
|
575 (symbol, form))
|
|
576 {
|
|
577 Lisp_Object lookup;
|
|
578
|
|
579 CHECK_SYMBOL (symbol);
|
|
580 CHECK_CONS (form);
|
|
581
|
|
582 lookup = find_coding_system (symbol, 0);
|
|
583
|
|
584 if (!NILP (lookup) &&
|
|
585 /* Allow autoloads to be redefined. */
|
|
586 !CONSP (lookup))
|
440
|
587 {
|
4303
|
588 invalid_operation ("Cannot redefine existing coding system",
|
|
589 symbol);
|
440
|
590 }
|
4303
|
591
|
|
592 Fputhash (symbol, form, Vcoding_system_hash_table);
|
|
593 Fputhash (add_suffix_to_symbol(symbol, "-unix"), form,
|
|
594 Vcoding_system_hash_table);
|
|
595 Fputhash (add_suffix_to_symbol(symbol, "-dos"), form,
|
|
596 Vcoding_system_hash_table);
|
|
597 Fputhash (add_suffix_to_symbol(symbol, "-mac"), form,
|
|
598 Vcoding_system_hash_table);
|
|
599
|
|
600 /* Tell the POSIX locale infrastructure about this coding system (though
|
|
601 unfortunately it'll be too late for the startup locale sniffing. */
|
|
602 if (!UNBOUNDP (Qposix_charset_to_coding_system_hash))
|
|
603 {
|
|
604 Lisp_Object val = Fsymbol_value (Qposix_charset_to_coding_system_hash);
|
|
605 DECLARE_EISTRING (minimal_name);
|
|
606 Ibyte *full_name;
|
|
607 int len = XSTRING_LENGTH (XSYMBOL_NAME (symbol)), i;
|
|
608
|
|
609 if (!NILP (val))
|
|
610 {
|
|
611 full_name = XSTRING_DATA (XSYMBOL_NAME (symbol));
|
|
612 for (i = 0; i < len; ++i)
|
|
613 {
|
|
614 if (full_name[i] >= '0' && full_name[i] <= '9')
|
|
615 {
|
|
616 eicat_ch (minimal_name, full_name[i]);
|
|
617 }
|
|
618 else if (full_name[i] >= 'a' && full_name[i] <= 'z')
|
|
619 {
|
|
620 eicat_ch (minimal_name, full_name[i]);
|
|
621 }
|
|
622 else if (full_name[i] >= 'A' && full_name[i] <= 'Z')
|
|
623 {
|
|
624 eicat_ch (minimal_name, full_name[i] +
|
|
625 ('a' - 'A'));
|
|
626 }
|
|
627 }
|
|
628
|
|
629 if (eilen (minimal_name))
|
|
630 {
|
|
631 CHECK_HASH_TABLE (val);
|
|
632 Fputhash (eimake_string(minimal_name), symbol, val);
|
|
633 }
|
|
634 }
|
|
635 }
|
|
636
|
|
637 return Qt;
|
428
|
638 }
|
|
639
|
|
640 DEFUN ("get-coding-system", Fget_coding_system, 1, 1, 0, /*
|
|
641 Retrieve the coding system of the given name.
|
|
642 Same as `find-coding-system' except that if there is no such
|
|
643 coding system, an error is signaled instead of returning nil.
|
|
644 */
|
|
645 (name))
|
|
646 {
|
|
647 Lisp_Object coding_system = Ffind_coding_system (name);
|
|
648
|
|
649 if (NILP (coding_system))
|
563
|
650 invalid_argument ("No such coding system", name);
|
428
|
651 return coding_system;
|
|
652 }
|
|
653
|
771
|
654 int
|
|
655 coding_system_is_binary (Lisp_Object coding_system)
|
|
656 {
|
|
657 Lisp_Coding_System *cs = XCODING_SYSTEM (coding_system);
|
|
658 return
|
|
659 (EQ (CODING_SYSTEM_TYPE (cs), Qno_conversion) &&
|
|
660 CODING_SYSTEM_EOL_TYPE (cs) == EOL_LF &&
|
|
661 EQ (CODING_SYSTEM_POST_READ_CONVERSION (cs), Qnil) &&
|
|
662 EQ (CODING_SYSTEM_PRE_WRITE_CONVERSION (cs), Qnil));
|
|
663 }
|
|
664
|
|
665 static Lisp_Object
|
|
666 coding_system_real_canonical (Lisp_Object cs)
|
|
667 {
|
|
668 if (!NILP (XCODING_SYSTEM_CANONICAL (cs)))
|
|
669 return XCODING_SYSTEM_CANONICAL (cs);
|
|
670 return cs;
|
|
671 }
|
|
672
|
|
673 /* Return true if coding system is of the "standard" type that decodes
|
|
674 bytes into characters (suitable for decoding a text file). */
|
|
675 int
|
|
676 coding_system_is_for_text_file (Lisp_Object coding_system)
|
|
677 {
|
|
678 return (XCODESYSMETH_OR_GIVEN
|
|
679 (coding_system, conversion_end_type,
|
|
680 (coding_system_real_canonical (coding_system)),
|
|
681 DECODES_BYTE_TO_CHARACTER) ==
|
|
682 DECODES_BYTE_TO_CHARACTER);
|
|
683 }
|
|
684
|
|
685 static int
|
|
686 decoding_source_sink_type_is_char (Lisp_Object cs, enum source_or_sink sex)
|
|
687 {
|
|
688 enum source_sink_type type =
|
|
689 XCODESYSMETH_OR_GIVEN (cs, conversion_end_type,
|
|
690 (coding_system_real_canonical (cs)),
|
|
691 DECODES_BYTE_TO_CHARACTER);
|
|
692 if (sex == CODING_SOURCE)
|
|
693 return (type == DECODES_CHARACTER_TO_CHARACTER ||
|
|
694 type == DECODES_CHARACTER_TO_BYTE);
|
|
695 else
|
|
696 return (type == DECODES_CHARACTER_TO_CHARACTER ||
|
|
697 type == DECODES_BYTE_TO_CHARACTER);
|
|
698 }
|
|
699
|
|
700 static int
|
|
701 encoding_source_sink_type_is_char (Lisp_Object cs, enum source_or_sink sex)
|
|
702 {
|
|
703 return decoding_source_sink_type_is_char (cs,
|
|
704 /* Sex change */
|
|
705 sex == CODING_SOURCE ?
|
|
706 CODING_SINK : CODING_SOURCE);
|
|
707 }
|
|
708
|
|
709 /* Like Ffind_coding_system() but check that the coding system is of the
|
|
710 "standard" type that decodes bytes into characters (suitable for
|
|
711 decoding a text file), and if not, returns an appropriate wrapper that
|
|
712 does. Also, if EOL_WRAP is non-zero, check whether this coding system
|
|
713 wants EOL auto-detection, and if so, wrap with a convert-eol coding
|
|
714 system to do this. */
|
|
715
|
|
716 Lisp_Object
|
|
717 find_coding_system_for_text_file (Lisp_Object name, int eol_wrap)
|
|
718 {
|
|
719 Lisp_Object coding_system = Ffind_coding_system (name);
|
|
720 Lisp_Object wrapper = coding_system;
|
|
721
|
|
722 if (NILP (coding_system))
|
|
723 return Qnil;
|
|
724 if (!coding_system_is_for_text_file (coding_system))
|
|
725 {
|
|
726 wrapper = XCODING_SYSTEM_TEXT_FILE_WRAPPER (coding_system);
|
|
727 if (NILP (wrapper))
|
|
728 {
|
|
729 Lisp_Object chain;
|
|
730 if (!decoding_source_sink_type_is_char (coding_system, CODING_SINK))
|
|
731 chain = list2 (coding_system, Qbinary);
|
|
732 else
|
|
733 chain = list1 (coding_system);
|
|
734 if (decoding_source_sink_type_is_char (coding_system, CODING_SOURCE))
|
|
735 chain = Fcons (Qbinary, chain);
|
|
736 wrapper =
|
|
737 make_internal_coding_system
|
|
738 (coding_system,
|
|
739 "internal-text-file-wrapper",
|
|
740 Qchain,
|
|
741 Qunbound, list4 (Qchain, chain,
|
|
742 Qcanonicalize_after_coding, coding_system));
|
|
743 XCODING_SYSTEM_TEXT_FILE_WRAPPER (coding_system) = wrapper;
|
|
744 }
|
|
745 }
|
|
746
|
|
747 if (!eol_wrap || XCODING_SYSTEM_EOL_TYPE (coding_system) != EOL_AUTODETECT)
|
|
748 return wrapper;
|
|
749
|
|
750 coding_system = wrapper;
|
|
751 wrapper = XCODING_SYSTEM_AUTO_EOL_WRAPPER (coding_system);
|
|
752 if (!NILP (wrapper))
|
|
753 return wrapper;
|
|
754 wrapper =
|
|
755 make_internal_coding_system
|
|
756 (coding_system,
|
|
757 "internal-auto-eol-wrapper",
|
|
758 Qundecided, Qunbound,
|
|
759 list4 (Qcoding_system, coding_system,
|
|
760 Qdo_eol, Qt));
|
|
761 XCODING_SYSTEM_AUTO_EOL_WRAPPER (coding_system) = wrapper;
|
|
762 return wrapper;
|
|
763 }
|
|
764
|
|
765 /* Like Fget_coding_system() but verify that the coding system is of the
|
|
766 "standard" type that decodes bytes into characters (suitable for
|
|
767 decoding a text file), and if not, returns an appropriate wrapper that
|
|
768 does. Also, if EOL_WRAP is non-zero, check whether this coding system
|
|
769 wants EOL auto-detection, and if so, wrap with a convert-eol coding
|
|
770 system to do this. */
|
|
771
|
|
772 Lisp_Object
|
|
773 get_coding_system_for_text_file (Lisp_Object name, int eol_wrap)
|
|
774 {
|
|
775 Lisp_Object coding_system = find_coding_system_for_text_file (name,
|
|
776 eol_wrap);
|
|
777 if (NILP (coding_system))
|
|
778 invalid_argument ("No such coding system", name);
|
|
779 return coding_system;
|
|
780 }
|
|
781
|
|
782 /* We store the coding systems in hash tables with the names as the
|
|
783 key and the actual coding system object as the value. Occasionally
|
|
784 we need to use them in a list format. These routines provide us
|
|
785 with that. */
|
428
|
786 struct coding_system_list_closure
|
|
787 {
|
|
788 Lisp_Object *coding_system_list;
|
771
|
789 int normal;
|
|
790 int internal;
|
428
|
791 };
|
|
792
|
|
793 static int
|
4303
|
794 add_coding_system_to_list_mapper (Lisp_Object key, Lisp_Object value,
|
428
|
795 void *coding_system_list_closure)
|
|
796 {
|
|
797 /* This function can GC */
|
|
798 struct coding_system_list_closure *cscl =
|
|
799 (struct coding_system_list_closure *) coding_system_list_closure;
|
|
800 Lisp_Object *coding_system_list = cscl->coding_system_list;
|
|
801
|
771
|
802 /* We can't just use VALUE because KEY might be an alias, and we need
|
4303
|
803 the real coding system object.
|
|
804
|
|
805 Autoloaded coding systems have conses for their values, and can't be
|
|
806 internal coding systems, or coding system aliases. */
|
|
807 if (CONSP (value) ||
|
|
808 (XCODING_SYSTEM (Ffind_coding_system (key))->internal_p ?
|
|
809 cscl->internal : cscl->normal))
|
771
|
810 *coding_system_list = Fcons (key, *coding_system_list);
|
428
|
811 return 0;
|
|
812 }
|
|
813
|
2297
|
814 /* #### should we specify a conventional for "all coding systems"? */
|
771
|
815 DEFUN ("coding-system-list", Fcoding_system_list, 0, 1, 0, /*
|
428
|
816 Return a list of the names of all defined coding systems.
|
771
|
817 If INTERNAL is nil, only the normal (non-internal) coding systems are
|
|
818 included. (Internal coding systems are created for various internal
|
|
819 purposes, such as implementing EOL types of CRLF and CR; generally, you do
|
|
820 not want to see these.) If it is t, only the internal coding systems are
|
|
821 included. If it is any other non-nil value both normal and internal are
|
|
822 included.
|
428
|
823 */
|
771
|
824 (internal))
|
428
|
825 {
|
|
826 Lisp_Object coding_system_list = Qnil;
|
|
827 struct gcpro gcpro1;
|
|
828 struct coding_system_list_closure coding_system_list_closure;
|
|
829
|
|
830 GCPRO1 (coding_system_list);
|
|
831 coding_system_list_closure.coding_system_list = &coding_system_list;
|
771
|
832 coding_system_list_closure.normal = !EQ (internal, Qt);
|
|
833 coding_system_list_closure.internal = !NILP (internal);
|
428
|
834 elisp_maphash (add_coding_system_to_list_mapper, Vcoding_system_hash_table,
|
|
835 &coding_system_list_closure);
|
|
836 UNGCPRO;
|
|
837
|
|
838 return coding_system_list;
|
|
839 }
|
|
840
|
|
841 DEFUN ("coding-system-name", Fcoding_system_name, 1, 1, 0, /*
|
|
842 Return the name of the given coding system.
|
|
843 */
|
|
844 (coding_system))
|
|
845 {
|
|
846 coding_system = Fget_coding_system (coding_system);
|
|
847 return XCODING_SYSTEM_NAME (coding_system);
|
|
848 }
|
|
849
|
|
850 static Lisp_Coding_System *
|
771
|
851 allocate_coding_system (struct coding_system_methods *codesys_meths,
|
|
852 Bytecount data_size,
|
|
853 Lisp_Object name)
|
428
|
854 {
|
771
|
855 Bytecount total_size = offsetof (Lisp_Coding_System, data) + data_size;
|
428
|
856 Lisp_Coding_System *codesys =
|
3024
|
857 (Lisp_Coding_System *) BASIC_ALLOC_LCRECORD (total_size,
|
1204
|
858 &lrecord_coding_system);
|
|
859
|
771
|
860 codesys->methods = codesys_meths;
|
1204
|
861 #define MARKED_SLOT(x) codesys->x = Qnil;
|
|
862 #include "coding-system-slots.h"
|
|
863
|
771
|
864 CODING_SYSTEM_EOL_TYPE (codesys) = EOL_LF;
|
|
865 CODING_SYSTEM_NAME (codesys) = name;
|
|
866
|
|
867 MAYBE_CODESYSMETH (codesys, init, (wrap_coding_system (codesys)));
|
428
|
868
|
|
869 return codesys;
|
|
870 }
|
|
871
|
771
|
872 static enum eol_type
|
|
873 symbol_to_eol_type (Lisp_Object symbol)
|
|
874 {
|
|
875 CHECK_SYMBOL (symbol);
|
|
876 if (NILP (symbol)) return EOL_AUTODETECT;
|
|
877 if (EQ (symbol, Qlf)) return EOL_LF;
|
|
878 if (EQ (symbol, Qcrlf)) return EOL_CRLF;
|
|
879 if (EQ (symbol, Qcr)) return EOL_CR;
|
|
880
|
|
881 invalid_constant ("Unrecognized eol type", symbol);
|
1204
|
882 RETURN_NOT_REACHED (EOL_AUTODETECT);
|
771
|
883 }
|
|
884
|
|
885 static Lisp_Object
|
|
886 eol_type_to_symbol (enum eol_type type)
|
|
887 {
|
|
888 switch (type)
|
|
889 {
|
2500
|
890 default: ABORT ();
|
771
|
891 case EOL_LF: return Qlf;
|
|
892 case EOL_CRLF: return Qcrlf;
|
|
893 case EOL_CR: return Qcr;
|
|
894 case EOL_AUTODETECT: return Qnil;
|
|
895 }
|
|
896 }
|
|
897
|
|
898 struct subsidiary_type
|
|
899 {
|
2367
|
900 Ascbyte *extension;
|
|
901 Ascbyte *mnemonic_ext;
|
771
|
902 enum eol_type eol;
|
|
903 };
|
|
904
|
|
905 static struct subsidiary_type coding_subsidiary_list[] =
|
|
906 { { "-unix", "", EOL_LF },
|
|
907 { "-dos", ":T", EOL_CRLF },
|
|
908 { "-mac", ":t", EOL_CR } };
|
|
909
|
|
910 /* kludge */
|
428
|
911 static void
|
771
|
912 setup_eol_coding_systems (Lisp_Object codesys)
|
428
|
913 {
|
793
|
914 int len = XSTRING_LENGTH (XSYMBOL (XCODING_SYSTEM_NAME (codesys))->name);
|
2367
|
915 Ibyte *codesys_name = alloca_ibytes (len + 7);
|
771
|
916 int mlen = -1;
|
867
|
917 Ibyte *codesys_mnemonic = 0;
|
771
|
918 Lisp_Object codesys_name_sym, sub_codesys;
|
|
919 int i;
|
|
920
|
|
921 memcpy (codesys_name,
|
793
|
922 XSTRING_DATA (XSYMBOL (XCODING_SYSTEM_NAME (codesys))->name), len);
|
771
|
923
|
|
924 if (STRINGP (XCODING_SYSTEM_MNEMONIC (codesys)))
|
428
|
925 {
|
771
|
926 mlen = XSTRING_LENGTH (XCODING_SYSTEM_MNEMONIC (codesys));
|
2367
|
927 codesys_mnemonic = alloca_ibytes (mlen + 7);
|
771
|
928 memcpy (codesys_mnemonic,
|
|
929 XSTRING_DATA (XCODING_SYSTEM_MNEMONIC (codesys)), mlen);
|
|
930 }
|
|
931
|
|
932 /* Create three "subsidiary" coding systems, decoding data encoded using
|
|
933 each of the three EOL types. We do this for each subsidiary by
|
|
934 copying the original coding system, setting the EOL type
|
|
935 appropriately, and setting the CANONICAL member of the new coding
|
|
936 system to be a chain consisting of the original coding system followed
|
|
937 by a convert-eol coding system to do the EOL decoding. For EOL type
|
|
938 LF, however, we don't need any decoding, so we skip creating a
|
|
939 CANONICAL.
|
|
940
|
|
941 If the original coding system is not a text-type coding system
|
|
942 (decodes byte->char), we need to coerce it to one by the appropriate
|
|
943 wrapping in CANONICAL. */
|
|
944
|
|
945 for (i = 0; i < countof (coding_subsidiary_list); i++)
|
|
946 {
|
2367
|
947 Ascbyte *extension = coding_subsidiary_list[i].extension;
|
|
948 Ascbyte *mnemonic_ext = coding_subsidiary_list[i].mnemonic_ext;
|
771
|
949 enum eol_type eol = coding_subsidiary_list[i].eol;
|
|
950
|
2367
|
951 qxestrcpy_ascii (codesys_name + len, extension);
|
771
|
952 codesys_name_sym = intern_int (codesys_name);
|
|
953 if (mlen != -1)
|
2367
|
954 qxestrcpy_ascii (codesys_mnemonic + mlen, mnemonic_ext);
|
771
|
955
|
|
956 sub_codesys = Fcopy_coding_system (codesys, codesys_name_sym);
|
|
957 if (mlen != -1)
|
|
958 XCODING_SYSTEM_MNEMONIC (sub_codesys) =
|
|
959 build_intstring (codesys_mnemonic);
|
|
960
|
|
961 if (eol != EOL_LF)
|
|
962 {
|
|
963 Lisp_Object chain = list2 (get_coding_system_for_text_file
|
|
964 (codesys, 0),
|
|
965 eol == EOL_CR ? Qconvert_eol_cr :
|
|
966 Qconvert_eol_crlf);
|
|
967 Lisp_Object canon =
|
|
968 make_internal_coding_system
|
|
969 (sub_codesys, "internal-subsidiary-eol-wrapper",
|
|
970 Qchain, Qunbound,
|
|
971 mlen != -1 ?
|
|
972 list6 (Qmnemonic, build_intstring (codesys_mnemonic),
|
|
973 Qchain, chain,
|
|
974 Qcanonicalize_after_coding, sub_codesys) :
|
|
975 list4 (Qchain, chain,
|
|
976 Qcanonicalize_after_coding, sub_codesys));
|
|
977 XCODING_SYSTEM_CANONICAL (sub_codesys) = canon;
|
|
978 }
|
|
979 XCODING_SYSTEM_EOL_TYPE (sub_codesys) = eol;
|
|
980 XCODING_SYSTEM_SUBSIDIARY_PARENT (sub_codesys) = codesys;
|
|
981 XCODING_SYSTEM (codesys)->eol[eol] = sub_codesys;
|
428
|
982 }
|
|
983 }
|
|
984
|
771
|
985 /* Basic function to create new coding systems. For `make-coding-system',
|
|
986 NAME-OR-EXISTING is the NAME argument, PREFIX is null, and TYPE,
|
|
987 DESCRIPTION, and PROPS are the same. All created coding systems are put
|
|
988 in a hash table indexed by NAME.
|
|
989
|
|
990 If PREFIX is a string, NAME-OR-EXISTING should specify an existing
|
|
991 coding system (or nil), and an internal coding system will be created.
|
|
992 The name of the coding system will be constructed by combining PREFIX
|
|
993 with the name of the existing coding system (if given), and a number
|
|
994 will be appended to insure uniqueness. In such a case, if Qunbound is
|
|
995 given for DESCRIPTION, the description gets created based on the
|
|
996 generated name. Also, if no mnemonic is given in the properties list, a
|
|
997 mnemonic is created based on the generated name.
|
|
998
|
|
999 For internal coding systems, the coding system is marked as internal
|
|
1000 (see `coding-system-list'), and no subsidiaries will be created or
|
|
1001 eol-wrapping will happen. Otherwise:
|
|
1002
|
|
1003 -- if the eol-type property is `lf' or t, the coding system is merely
|
|
1004 created and returned. (For t, the coding system will be wrapped with
|
|
1005 an EOL autodetector when it's used to read a file.)
|
|
1006
|
|
1007 -- if eol-type is `crlf' or `cr', after the coding system object is
|
|
1008 created, it will be wrapped in a chain with the appropriate
|
|
1009 convert-eol coding system (either `convert-eol-crlf' or
|
|
1010 `convert-eol-cr'), so that CRLF->LF or CR->LF conversion is done at
|
|
1011 decoding time, and the opposite at encoding time. The resulting
|
|
1012 chain becomes the CANONICAL field of the coding system object.
|
|
1013
|
|
1014 -- if eol-type is nil or omitted, "subsidiaries" are generated: Three
|
|
1015 coding systems where the original coding system (before wrapping with
|
|
1016 convert-eol-autodetect) is either unwrapped or wrapped with
|
|
1017 convert-eol-crlf or convert-eol-cr, respectively, so that coding systems
|
|
1018 to handle LF, CRLF, and CR end-of-line indicators are created. (This
|
|
1019 crazy crap is based on existing behavior in other Mule versions,
|
|
1020 including FSF Emacs.)
|
|
1021 */
|
428
|
1022
|
|
1023 static Lisp_Object
|
2367
|
1024 make_coding_system_1 (Lisp_Object name_or_existing, Ascbyte *prefix,
|
771
|
1025 Lisp_Object type, Lisp_Object description,
|
|
1026 Lisp_Object props)
|
428
|
1027 {
|
771
|
1028 Lisp_Coding_System *cs;
|
|
1029 int need_to_setup_eol_systems = 1;
|
|
1030 enum eol_type eol_wrapper = EOL_AUTODETECT;
|
|
1031 struct coding_system_methods *meths;
|
|
1032 Lisp_Object csobj;
|
|
1033 Lisp_Object defmnem = Qnil;
|
|
1034
|
|
1035 if (NILP (type))
|
|
1036 type = Qundecided;
|
|
1037 meths = decode_coding_system_type (type, ERROR_ME);
|
|
1038
|
|
1039 if (prefix)
|
428
|
1040 {
|
867
|
1041 Ibyte *newname =
|
771
|
1042 emacs_sprintf_malloc (NULL, "%s-%s-%d",
|
|
1043 prefix,
|
867
|
1044 NILP (name_or_existing) ? (Ibyte *) "nil" :
|
771
|
1045 XSTRING_DATA (Fsymbol_name (XCODING_SYSTEM_NAME
|
|
1046 (name_or_existing))),
|
|
1047 ++coding_system_tick);
|
|
1048 name_or_existing = intern_int (newname);
|
1726
|
1049 xfree (newname, Ibyte *);
|
771
|
1050
|
|
1051 if (UNBOUNDP (description))
|
|
1052 {
|
|
1053 newname =
|
|
1054 emacs_sprintf_malloc
|
|
1055 (NULL, "For Internal Use (%s)",
|
|
1056 XSTRING_DATA (Fsymbol_name (name_or_existing)));
|
|
1057 description = build_intstring (newname);
|
1726
|
1058 xfree (newname, Ibyte *);
|
771
|
1059 }
|
|
1060
|
|
1061 newname = emacs_sprintf_malloc (NULL, "Int%d", coding_system_tick);
|
|
1062 defmnem = build_intstring (newname);
|
1726
|
1063 xfree (newname, Ibyte *);
|
428
|
1064 }
|
771
|
1065 else
|
|
1066 CHECK_SYMBOL (name_or_existing);
|
|
1067
|
4303
|
1068 /* See is there an entry for name_or_existing in the defined coding system
|
|
1069 hash table. */
|
|
1070 csobj = find_coding_system (name_or_existing, 0);
|
|
1071 /* Error if it's there and not an autoload form. */
|
|
1072 if (!NILP (csobj) && !CONSP (csobj))
|
771
|
1073 invalid_operation ("Cannot redefine existing coding system",
|
4303
|
1074 name_or_existing);
|
771
|
1075
|
|
1076 cs = allocate_coding_system (meths, meths->extra_data_size,
|
|
1077 name_or_existing);
|
793
|
1078 csobj = wrap_coding_system (cs);
|
771
|
1079
|
|
1080 cs->internal_p = !!prefix;
|
|
1081
|
|
1082 if (NILP (description))
|
|
1083 description = build_string ("");
|
|
1084 else
|
|
1085 CHECK_STRING (description);
|
|
1086 CODING_SYSTEM_DESCRIPTION (cs) = description;
|
|
1087
|
|
1088 if (!NILP (defmnem))
|
|
1089 CODING_SYSTEM_MNEMONIC (cs) = defmnem;
|
|
1090
|
|
1091 {
|
|
1092 EXTERNAL_PROPERTY_LIST_LOOP_3 (key, value, props)
|
|
1093 {
|
|
1094 int recognized = 1;
|
|
1095
|
|
1096 if (EQ (key, Qmnemonic))
|
|
1097 {
|
|
1098 if (!NILP (value))
|
|
1099 CHECK_STRING (value);
|
|
1100 CODING_SYSTEM_MNEMONIC (cs) = value;
|
|
1101 }
|
|
1102
|
|
1103 else if (EQ (key, Qdocumentation))
|
|
1104 {
|
|
1105 if (!NILP (value))
|
|
1106 CHECK_STRING (value);
|
|
1107 CODING_SYSTEM_DOCUMENTATION (cs) = value;
|
|
1108 }
|
|
1109
|
|
1110 else if (EQ (key, Qeol_type))
|
|
1111 {
|
|
1112 need_to_setup_eol_systems = NILP (value);
|
|
1113 if (EQ (value, Qt))
|
|
1114 value = Qnil;
|
|
1115 eol_wrapper = symbol_to_eol_type (value);
|
|
1116 }
|
|
1117
|
|
1118 else if (EQ (key, Qpost_read_conversion))
|
|
1119 CODING_SYSTEM_POST_READ_CONVERSION (cs) = value;
|
|
1120 else if (EQ (key, Qpre_write_conversion))
|
|
1121 CODING_SYSTEM_PRE_WRITE_CONVERSION (cs) = value;
|
|
1122 /* FSF compatibility */
|
|
1123 else if (EQ (key, Qtranslation_table_for_decode))
|
|
1124 ;
|
|
1125 else if (EQ (key, Qtranslation_table_for_encode))
|
|
1126 ;
|
|
1127 else if (EQ (key, Qsafe_chars))
|
|
1128 ;
|
|
1129 else if (EQ (key, Qsafe_charsets))
|
|
1130 ;
|
|
1131 else if (EQ (key, Qmime_charset))
|
|
1132 ;
|
|
1133 else if (EQ (key, Qvalid_codes))
|
|
1134 ;
|
|
1135 else
|
|
1136 recognized = CODESYSMETH_OR_GIVEN (cs, putprop,
|
|
1137 (csobj, key, value), 0);
|
|
1138
|
|
1139 if (!recognized)
|
|
1140 invalid_constant ("Unrecognized property", key);
|
|
1141 }
|
|
1142 }
|
|
1143
|
|
1144 {
|
|
1145 XCODING_SYSTEM_CANONICAL (csobj) =
|
|
1146 CODESYSMETH_OR_GIVEN (cs, canonicalize, (csobj), Qnil);
|
|
1147 XCODING_SYSTEM_EOL_TYPE (csobj) = EOL_AUTODETECT; /* for copy-coding-system
|
|
1148 below */
|
|
1149
|
4303
|
1150 Fputhash (name_or_existing, csobj, Vcoding_system_hash_table);
|
|
1151
|
771
|
1152 if (need_to_setup_eol_systems && !cs->internal_p)
|
|
1153 setup_eol_coding_systems (csobj);
|
|
1154 else if (eol_wrapper == EOL_CR || eol_wrapper == EOL_CRLF)
|
|
1155 {
|
|
1156 /* If a specific eol-type (other than LF) was specified, we handle
|
|
1157 this by converting the coding system into a chain that wraps the
|
|
1158 coding system along with a convert-eol system after it, in
|
|
1159 exactly that same switcheroo fashion that the normal
|
|
1160 canonicalize method works -- BUT we will run into a problem if
|
|
1161 we do it the obvious way, because when `chain' creates its
|
|
1162 substreams, the substream containing the coding system we're
|
|
1163 creating will have canonicalization expansion done on it,
|
|
1164 leading to infinite recursion. So we have to generate a new,
|
|
1165 internal coding system with the previous value of CANONICAL. */
|
867
|
1166 Ibyte *newname =
|
771
|
1167 emacs_sprintf_malloc
|
|
1168 (NULL, "internal-eol-copy-%s-%d",
|
|
1169 XSTRING_DATA (Fsymbol_name (name_or_existing)),
|
|
1170 ++coding_system_tick);
|
|
1171 Lisp_Object newnamesym = intern_int (newname);
|
|
1172 Lisp_Object copied = Fcopy_coding_system (csobj, newnamesym);
|
1726
|
1173 xfree (newname, Ibyte *);
|
771
|
1174
|
|
1175 XCODING_SYSTEM_CANONICAL (csobj) =
|
|
1176 make_internal_coding_system
|
|
1177 (csobj,
|
|
1178 "internal-eol-wrapper",
|
|
1179 Qchain, Qunbound,
|
|
1180 list4 (Qchain,
|
|
1181 list2 (copied,
|
|
1182 eol_wrapper == EOL_CR ?
|
|
1183 Qconvert_eol_cr :
|
|
1184 Qconvert_eol_crlf),
|
|
1185 Qcanonicalize_after_coding,
|
|
1186 csobj));
|
|
1187 }
|
|
1188 XCODING_SYSTEM_EOL_TYPE (csobj) = eol_wrapper;
|
|
1189 }
|
|
1190
|
|
1191 return csobj;
|
428
|
1192 }
|
|
1193
|
771
|
1194 Lisp_Object
|
2367
|
1195 make_internal_coding_system (Lisp_Object existing, Ascbyte *prefix,
|
771
|
1196 Lisp_Object type, Lisp_Object description,
|
|
1197 Lisp_Object props)
|
|
1198 {
|
|
1199 return make_coding_system_1 (existing, prefix, type, description, props);
|
|
1200 }
|
428
|
1201
|
|
1202 DEFUN ("make-coding-system", Fmake_coding_system, 2, 4, 0, /*
|
|
1203 Register symbol NAME as a coding system.
|
|
1204
|
|
1205 TYPE describes the conversion method used and should be one of
|
|
1206
|
3025
|
1207 nil or `undecided'
|
428
|
1208 Automatic conversion. XEmacs attempts to detect the coding system
|
|
1209 used in the file.
|
3025
|
1210 `chain'
|
771
|
1211 Chain two or more coding systems together to make a combination coding
|
|
1212 system.
|
3025
|
1213 `no-conversion'
|
428
|
1214 No conversion. Use this for binary files and such. On output,
|
|
1215 graphic characters that are not in ASCII or Latin-1 will be
|
|
1216 replaced by a ?. (For a no-conversion-encoded buffer, these
|
|
1217 characters will only be present if you explicitly insert them.)
|
3025
|
1218 `convert-eol'
|
771
|
1219 Convert CRLF sequences or CR to LF.
|
3025
|
1220 `shift-jis'
|
428
|
1221 Shift-JIS (a Japanese encoding commonly used in PC operating systems).
|
3025
|
1222 `unicode'
|
771
|
1223 Any Unicode encoding (UCS-4, UTF-8, UTF-16, etc.).
|
3025
|
1224 `mswindows-unicode-to-multibyte'
|
771
|
1225 (MS Windows only) Converts from Windows Unicode to Windows Multibyte
|
|
1226 (any code page encoding) upon encoding, and the other way upon decoding.
|
3025
|
1227 `mswindows-multibyte'
|
771
|
1228 Converts to or from Windows Multibyte (any code page encoding).
|
|
1229 This is resolved into a chain of `mswindows-unicode' and
|
|
1230 `mswindows-unicode-to-multibyte'.
|
3025
|
1231 `iso2022'
|
428
|
1232 Any ISO2022-compliant encoding. Among other things, this includes
|
|
1233 JIS (the Japanese encoding commonly used for e-mail), EUC (the
|
|
1234 standard Unix encoding for Japanese and other languages), and
|
|
1235 Compound Text (the encoding used in X11). You can specify more
|
442
|
1236 specific information about the conversion with the PROPS argument.
|
3025
|
1237 `big5'
|
2819
|
1238 Big5 (the encoding commonly used for Mandarin Chinese in Taiwan).
|
3025
|
1239 `ccl'
|
428
|
1240 The conversion is performed using a user-written pseudo-code
|
|
1241 program. CCL (Code Conversion Language) is the name of this
|
|
1242 pseudo-code.
|
3025
|
1243 `gzip'
|
771
|
1244 GZIP compression format.
|
3025
|
1245 `internal'
|
428
|
1246 Write out or read in the raw contents of the memory representing
|
|
1247 the buffer's text. This is primarily useful for debugging
|
|
1248 purposes, and is only enabled when XEmacs has been compiled with
|
|
1249 DEBUG_XEMACS defined (via the --debug configure option).
|
3025
|
1250 WARNING: Reading in a file using `internal' conversion can result
|
428
|
1251 in an internal inconsistency in the memory representing a
|
|
1252 buffer's text, which will produce unpredictable results and may
|
|
1253 cause XEmacs to crash. Under normal circumstances you should
|
3025
|
1254 never use `internal' conversion.
|
428
|
1255
|
771
|
1256 DESCRIPTION is a short English phrase describing the coding system,
|
|
1257 suitable for use as a menu item. (See also the `documentation' property
|
|
1258 below.)
|
428
|
1259
|
|
1260 PROPS is a property list, describing the specific nature of the
|
|
1261 character set. Recognized properties are:
|
|
1262
|
3025
|
1263 `mnemonic'
|
428
|
1264 String to be displayed in the modeline when this coding system is
|
|
1265 active.
|
|
1266
|
3025
|
1267 `documentation'
|
771
|
1268 Detailed documentation on the coding system.
|
|
1269
|
3025
|
1270 `eol-type'
|
428
|
1271 End-of-line conversion to be used. It should be one of
|
|
1272
|
|
1273 nil
|
|
1274 Automatically detect the end-of-line type (LF, CRLF,
|
|
1275 or CR). Also generate subsidiary coding systems named
|
|
1276 `NAME-unix', `NAME-dos', and `NAME-mac', that are
|
|
1277 identical to this coding system but have an EOL-TYPE
|
3025
|
1278 value of `lf', `crlf', and `cr', respectively.
|
|
1279 `lf'
|
428
|
1280 The end of a line is marked externally using ASCII LF.
|
|
1281 Since this is also the way that XEmacs represents an
|
|
1282 end-of-line internally, specifying this option results
|
|
1283 in no end-of-line conversion. This is the standard
|
|
1284 format for Unix text files.
|
3025
|
1285 `crlf'
|
428
|
1286 The end of a line is marked externally using ASCII
|
|
1287 CRLF. This is the standard format for MS-DOS text
|
|
1288 files.
|
3025
|
1289 `cr'
|
428
|
1290 The end of a line is marked externally using ASCII CR.
|
|
1291 This is the standard format for Macintosh text files.
|
|
1292 t
|
|
1293 Automatically detect the end-of-line type but do not
|
|
1294 generate subsidiary coding systems. (This value is
|
|
1295 converted to nil when stored internally, and
|
|
1296 `coding-system-property' will return nil.)
|
|
1297
|
3025
|
1298 `post-read-conversion'
|
771
|
1299 The value is a function to call after some text is inserted and
|
|
1300 decoded by the coding system itself and before any functions in
|
|
1301 `after-change-functions' are called. (#### Not actually true in
|
|
1302 XEmacs. `after-change-functions' will be called twice if
|
|
1303 `post-read-conversion' changes something.) The argument of this
|
|
1304 function is the same as for a function in
|
|
1305 `after-insert-file-functions', i.e. LENGTH of the text inserted,
|
|
1306 with point at the head of the text to be decoded.
|
428
|
1307
|
3025
|
1308 `pre-write-conversion'
|
771
|
1309 The value is a function to call after all functions in
|
|
1310 `write-region-annotate-functions' and `buffer-file-format' are
|
|
1311 called, and before the text is encoded by the coding system itself.
|
|
1312 The arguments to this function are the same as those of a function
|
|
1313 in `write-region-annotate-functions', i.e. FROM and TO, specifying
|
|
1314 a region of text.
|
|
1315
|
|
1316
|
|
1317
|
|
1318 The following properties are allowed for FSF compatibility but currently
|
|
1319 ignored:
|
|
1320
|
3025
|
1321 `translation-table-for-decode'
|
771
|
1322 The value is a translation table to be applied on decoding. See
|
|
1323 the function `make-translation-table' for the format of translation
|
|
1324 table. This is not applicable to CCL-based coding systems.
|
|
1325
|
3025
|
1326 `translation-table-for-encode'
|
771
|
1327 The value is a translation table to be applied on encoding. This is
|
|
1328 not applicable to CCL-based coding systems.
|
|
1329
|
3025
|
1330 `safe-chars'
|
771
|
1331 The value is a char table. If a character has non-nil value in it,
|
|
1332 the character is safely supported by the coding system. This
|
|
1333 overrides the specification of safe-charsets.
|
|
1334
|
3025
|
1335 `safe-charsets'
|
771
|
1336 The value is a list of charsets safely supported by the coding
|
|
1337 system. The value t means that all charsets Emacs handles are
|
|
1338 supported. Even if some charset is not in this list, it doesn't
|
|
1339 mean that the charset can't be encoded in the coding system;
|
|
1340 it just means that some other receiver of text encoded
|
|
1341 in the coding system won't be able to handle that charset.
|
|
1342
|
3025
|
1343 `mime-charset'
|
771
|
1344 The value is a symbol of which name is `MIME-charset' parameter of
|
|
1345 the coding system.
|
|
1346
|
3025
|
1347 `valid-codes' (meaningful only for a coding system based on CCL)
|
771
|
1348 The value is a list to indicate valid byte ranges of the encoded
|
|
1349 file. Each element of the list is an integer or a cons of integer.
|
|
1350 In the former case, the integer value is a valid byte code. In the
|
|
1351 latter case, the integers specifies the range of valid byte codes.
|
|
1352
|
|
1353
|
|
1354
|
3025
|
1355 The following additional property is recognized if TYPE is `convert-eol':
|
|
1356
|
|
1357 `subtype'
|
793
|
1358 One of `lf', `crlf', `cr' or nil (for autodetection). When decoding,
|
|
1359 the corresponding sequence will be converted to LF. When encoding,
|
|
1360 the opposite happens. This coding system converts characters to
|
771
|
1361 characters.
|
|
1362
|
428
|
1363
|
|
1364
|
3025
|
1365 The following additional properties are recognized if TYPE is `iso2022':
|
|
1366
|
|
1367 `charset-g0'
|
|
1368 `charset-g1'
|
|
1369 `charset-g2'
|
|
1370 `charset-g3'
|
428
|
1371 The character set initially designated to the G0 - G3 registers.
|
|
1372 The value should be one of
|
|
1373
|
|
1374 -- A charset object (designate that character set)
|
|
1375 -- nil (do not ever use this register)
|
|
1376 -- t (no character set is initially designated to
|
|
1377 the register, but may be later on; this automatically
|
|
1378 sets the corresponding `force-g*-on-output' property)
|
|
1379
|
3025
|
1380 `force-g0-on-output'
|
|
1381 `force-g1-on-output'
|
|
1382 `force-g2-on-output'
|
|
1383 `force-g2-on-output'
|
428
|
1384 If non-nil, send an explicit designation sequence on output before
|
|
1385 using the specified register.
|
|
1386
|
3025
|
1387 `short'
|
428
|
1388 If non-nil, use the short forms "ESC $ @", "ESC $ A", and
|
|
1389 "ESC $ B" on output in place of the full designation sequences
|
|
1390 "ESC $ ( @", "ESC $ ( A", and "ESC $ ( B".
|
|
1391
|
3025
|
1392 `no-ascii-eol'
|
428
|
1393 If non-nil, don't designate ASCII to G0 at each end of line on output.
|
|
1394 Setting this to non-nil also suppresses other state-resetting that
|
|
1395 normally happens at the end of a line.
|
|
1396
|
3025
|
1397 `no-ascii-cntl'
|
428
|
1398 If non-nil, don't designate ASCII to G0 before control chars on output.
|
|
1399
|
3025
|
1400 `seven'
|
428
|
1401 If non-nil, use 7-bit environment on output. Otherwise, use 8-bit
|
|
1402 environment.
|
|
1403
|
3025
|
1404 `lock-shift'
|
428
|
1405 If non-nil, use locking-shift (SO/SI) instead of single-shift
|
|
1406 or designation by escape sequence.
|
|
1407
|
3025
|
1408 `no-iso6429'
|
428
|
1409 If non-nil, don't use ISO6429's direction specification.
|
|
1410
|
3025
|
1411 `escape-quoted'
|
428
|
1412 If non-nil, literal control characters that are the same as
|
|
1413 the beginning of a recognized ISO2022 or ISO6429 escape sequence
|
|
1414 (in particular, ESC (0x1B), SO (0x0E), SI (0x0F), SS2 (0x8E),
|
|
1415 SS3 (0x8F), and CSI (0x9B)) are "quoted" with an escape character
|
|
1416 so that they can be properly distinguished from an escape sequence.
|
|
1417 (Note that doing this results in a non-portable encoding.) This
|
|
1418 encoding flag is used for byte-compiled files. Note that ESC
|
|
1419 is a good choice for a quoting character because there are no
|
|
1420 escape sequences whose second byte is a character from the Control-0
|
|
1421 or Control-1 character sets; this is explicitly disallowed by the
|
|
1422 ISO2022 standard.
|
|
1423
|
3025
|
1424 `input-charset-conversion'
|
428
|
1425 A list of conversion specifications, specifying conversion of
|
|
1426 characters in one charset to another when decoding is performed.
|
|
1427 Each specification is a list of two elements: the source charset,
|
|
1428 and the destination charset.
|
|
1429
|
3025
|
1430 `output-charset-conversion'
|
428
|
1431 A list of conversion specifications, specifying conversion of
|
|
1432 characters in one charset to another when encoding is performed.
|
|
1433 The form of each specification is the same as for
|
3025
|
1434 `input-charset-conversion'.
|
428
|
1435
|
|
1436
|
771
|
1437
|
428
|
1438 The following additional properties are recognized (and required)
|
3025
|
1439 if TYPE is `ccl':
|
|
1440
|
|
1441 `decode'
|
428
|
1442 CCL program used for decoding (converting to internal format).
|
|
1443
|
3025
|
1444 `encode'
|
428
|
1445 CCL program used for encoding (converting to external format).
|
771
|
1446
|
|
1447
|
3025
|
1448 The following additional properties are recognized if TYPE is `chain':
|
|
1449
|
|
1450 `chain'
|
771
|
1451 List of coding systems to be chained together, in decoding order.
|
|
1452
|
3025
|
1453 `canonicalize-after-coding'
|
771
|
1454 Coding system to be returned by the detector routines in place of
|
|
1455 this coding system.
|
|
1456
|
|
1457
|
|
1458
|
3025
|
1459 The following additional properties are recognized if TYPE is `unicode':
|
|
1460
|
3767
|
1461 `unicode-type'
|
771
|
1462 One of `utf-16', `utf-8', `ucs-4', or `utf-7' (the latter is not
|
|
1463 yet implemented). `utf-16' is the basic two-byte encoding;
|
|
1464 `ucs-4' is the four-byte encoding; `utf-8' is an ASCII-compatible
|
|
1465 variable-width 8-bit encoding; `utf-7' is a 7-bit encoding using
|
|
1466 only characters that will safely pass through all mail gateways.
|
2297
|
1467 [[ This should be \"transformation format\". There should also be
|
|
1468 `ucs-2' (or `bmp' -- no surrogates) and `utf-32' (range checked). ]]
|
771
|
1469
|
3025
|
1470 `little-endian'
|
771
|
1471 If non-nil, `utf-16' and `ucs-4' will write out the groups of two
|
|
1472 or four bytes little-endian instead of big-endian. This is required,
|
|
1473 for example, under Windows.
|
|
1474
|
3025
|
1475 `need-bom'
|
771
|
1476 If non-nil, a byte order mark (BOM, or Unicode FFFE) should be
|
|
1477 written out at the beginning of the data. This serves both to
|
|
1478 identify the endianness of the following data and to mark the
|
|
1479 data as Unicode (at least, this is how Windows uses it).
|
2297
|
1480 [[ The correct term is \"signature\", since this technique may also
|
|
1481 be used with UTF-8. That is the term used in the standard. ]]
|
771
|
1482
|
|
1483
|
|
1484 The following additional properties are recognized if TYPE is
|
3025
|
1485 `mswindows-multibyte':
|
|
1486
|
|
1487 `code-page'
|
771
|
1488 Either a number (specifying a particular code page) or one of the
|
|
1489 symbols `ansi', `oem', `mac', or `ebcdic', specifying the ANSI,
|
|
1490 OEM, Macintosh, or EBCDIC code page associated with a particular
|
|
1491 locale (given by the `locale' property). NOTE: EBCDIC code pages
|
|
1492 only exist in Windows 2000 and later.
|
|
1493
|
3025
|
1494 `locale'
|
771
|
1495 If `code-page' is a symbol, this specifies the locale whose code
|
|
1496 page of the corresponding type should be used. This should be
|
|
1497 one of the following: A cons of two strings, (LANGUAGE
|
|
1498 . SUBLANGUAGE) (see `mswindows-set-current-locale'); a string (a
|
|
1499 language; SUBLANG_DEFAULT, i.e. the default sublanguage, is
|
|
1500 used); or one of the symbols `current', `user-default', or
|
|
1501 `system-default', corresponding to the values of
|
|
1502 `mswindows-current-locale', `mswindows-user-default-locale', or
|
|
1503 `mswindows-system-default-locale', respectively.
|
|
1504
|
|
1505
|
|
1506
|
3025
|
1507 The following additional properties are recognized if TYPE is `undecided':
|
4072
|
1508 \[[ Doesn't GNU use \"detect-*\" for the following two? ]]
|
771
|
1509
|
3025
|
1510 `do-eol'
|
771
|
1511 Do EOL detection.
|
|
1512
|
3025
|
1513 `do-coding'
|
771
|
1514 Do encoding detection.
|
|
1515
|
3025
|
1516 `coding-system'
|
771
|
1517 If encoding detection is not done, use the specified coding system
|
|
1518 to do decoding. This is used internally when implementing coding
|
|
1519 systems with an EOL type that specifies autodetection (the default),
|
|
1520 so that the detector routines return the proper subsidiary.
|
|
1521
|
|
1522
|
|
1523
|
3025
|
1524 The following additional property is recognized if TYPE is `gzip':
|
|
1525
|
|
1526 `level'
|
771
|
1527 Compression level: 0 through 9, or `default' (currently 6).
|
|
1528
|
428
|
1529 */
|
771
|
1530 (name, type, description, props))
|
428
|
1531 {
|
771
|
1532 return make_coding_system_1 (name, 0, type, description, props);
|
428
|
1533 }
|
|
1534
|
|
1535 DEFUN ("copy-coding-system", Fcopy_coding_system, 2, 2, 0, /*
|
|
1536 Copy OLD-CODING-SYSTEM to NEW-NAME.
|
|
1537 If NEW-NAME does not name an existing coding system, a new one will
|
|
1538 be created.
|
771
|
1539 If you are using this function to create an alias, think again:
|
|
1540 Use `define-coding-system-alias' instead.
|
428
|
1541 */
|
|
1542 (old_coding_system, new_name))
|
|
1543 {
|
|
1544 Lisp_Object new_coding_system;
|
|
1545 old_coding_system = Fget_coding_system (old_coding_system);
|
771
|
1546 new_coding_system =
|
4303
|
1547 UNBOUNDP (new_name) ? Qnil : find_coding_system (new_name, 0);
|
428
|
1548 if (NILP (new_coding_system))
|
|
1549 {
|
793
|
1550 new_coding_system =
|
|
1551 wrap_coding_system
|
|
1552 (allocate_coding_system
|
|
1553 (XCODING_SYSTEM (old_coding_system)->methods,
|
|
1554 XCODING_SYSTEM (old_coding_system)->methods->extra_data_size,
|
|
1555 new_name));
|
771
|
1556 if (!UNBOUNDP (new_name))
|
|
1557 Fputhash (new_name, new_coding_system, Vcoding_system_hash_table);
|
428
|
1558 }
|
771
|
1559 else if (XCODING_SYSTEM (old_coding_system)->methods !=
|
|
1560 XCODING_SYSTEM (new_coding_system)->methods)
|
|
1561 invalid_operation_2 ("Coding systems not same type",
|
|
1562 old_coding_system, new_coding_system);
|
428
|
1563
|
|
1564 {
|
|
1565 Lisp_Coding_System *to = XCODING_SYSTEM (new_coding_system);
|
|
1566 Lisp_Coding_System *from = XCODING_SYSTEM (old_coding_system);
|
3017
|
1567 COPY_SIZED_LCRECORD (to, from, sizeof_coding_system (from));
|
428
|
1568 to->name = new_name;
|
|
1569 }
|
|
1570 return new_coding_system;
|
|
1571 }
|
|
1572
|
771
|
1573 DEFUN ("coding-system-canonical-name-p", Fcoding_system_canonical_name_p,
|
|
1574 1, 1, 0, /*
|
440
|
1575 Return t if OBJECT names a coding system, and is not a coding system alias.
|
428
|
1576 */
|
440
|
1577 (object))
|
|
1578 {
|
|
1579 return CODING_SYSTEMP (Fgethash (object, Vcoding_system_hash_table, Qnil))
|
|
1580 ? Qt : Qnil;
|
|
1581 }
|
|
1582
|
2297
|
1583 /* #### Shouldn't this really be a find/get pair? */
|
|
1584
|
440
|
1585 DEFUN ("coding-system-alias-p", Fcoding_system_alias_p, 1, 1, 0, /*
|
|
1586 Return t if OBJECT is a coding system alias.
|
|
1587 All coding system aliases are created by `define-coding-system-alias'.
|
|
1588 */
|
|
1589 (object))
|
428
|
1590 {
|
440
|
1591 return SYMBOLP (Fgethash (object, Vcoding_system_hash_table, Qzero))
|
|
1592 ? Qt : Qnil;
|
|
1593 }
|
|
1594
|
|
1595 DEFUN ("coding-system-aliasee", Fcoding_system_aliasee, 1, 1, 0, /*
|
|
1596 Return the coding-system symbol for which symbol ALIAS is an alias.
|
|
1597 */
|
|
1598 (alias))
|
|
1599 {
|
|
1600 Lisp_Object aliasee = Fgethash (alias, Vcoding_system_hash_table, Qnil);
|
|
1601 if (SYMBOLP (aliasee))
|
|
1602 return aliasee;
|
|
1603 else
|
563
|
1604 invalid_argument ("Symbol is not a coding system alias", alias);
|
1204
|
1605 RETURN_NOT_REACHED (Qnil);
|
440
|
1606 }
|
|
1607
|
|
1608 /* A maphash function, for removing dangling coding system aliases. */
|
|
1609 static int
|
2286
|
1610 dangling_coding_system_alias_p (Lisp_Object UNUSED (alias),
|
440
|
1611 Lisp_Object aliasee,
|
|
1612 void *dangling_aliases)
|
|
1613 {
|
|
1614 if (SYMBOLP (aliasee)
|
|
1615 && NILP (Fgethash (aliasee, Vcoding_system_hash_table, Qnil)))
|
428
|
1616 {
|
440
|
1617 (*(int *) dangling_aliases)++;
|
|
1618 return 1;
|
428
|
1619 }
|
440
|
1620 else
|
|
1621 return 0;
|
|
1622 }
|
|
1623
|
|
1624 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias, 2, 2, 0, /*
|
|
1625 Define symbol ALIAS as an alias for coding system ALIASEE.
|
|
1626
|
|
1627 You can use this function to redefine an alias that has already been defined,
|
|
1628 but you cannot redefine a name which is the canonical name for a coding system.
|
|
1629 \(a canonical name of a coding system is what is returned when you call
|
|
1630 `coding-system-name' on a coding system).
|
|
1631
|
|
1632 ALIASEE itself can be an alias, which allows you to define nested aliases.
|
|
1633
|
|
1634 You are forbidden, however, from creating alias loops or `dangling' aliases.
|
|
1635 These will be detected, and an error will be signaled if you attempt to do so.
|
|
1636
|
|
1637 If ALIASEE is nil, then ALIAS will simply be undefined.
|
|
1638
|
|
1639 See also `coding-system-alias-p', `coding-system-aliasee',
|
|
1640 and `coding-system-canonical-name-p'.
|
|
1641 */
|
|
1642 (alias, aliasee))
|
|
1643 {
|
2286
|
1644 Lisp_Object probe;
|
440
|
1645
|
|
1646 CHECK_SYMBOL (alias);
|
|
1647
|
|
1648 if (!NILP (Fcoding_system_canonical_name_p (alias)))
|
563
|
1649 invalid_change
|
440
|
1650 ("Symbol is the canonical name of a coding system and cannot be redefined",
|
|
1651 alias);
|
|
1652
|
|
1653 if (NILP (aliasee))
|
|
1654 {
|
771
|
1655 Lisp_Object subsidiary_unix = add_suffix_to_symbol (alias, "-unix");
|
|
1656 Lisp_Object subsidiary_dos = add_suffix_to_symbol (alias, "-dos");
|
|
1657 Lisp_Object subsidiary_mac = add_suffix_to_symbol (alias, "-mac");
|
440
|
1658
|
|
1659 Fremhash (alias, Vcoding_system_hash_table);
|
|
1660
|
|
1661 /* Undefine subsidiary aliases,
|
|
1662 presumably created by a previous call to this function */
|
|
1663 if (! NILP (Fcoding_system_alias_p (subsidiary_unix)) &&
|
|
1664 ! NILP (Fcoding_system_alias_p (subsidiary_dos)) &&
|
|
1665 ! NILP (Fcoding_system_alias_p (subsidiary_mac)))
|
|
1666 {
|
|
1667 Fdefine_coding_system_alias (subsidiary_unix, Qnil);
|
|
1668 Fdefine_coding_system_alias (subsidiary_dos, Qnil);
|
|
1669 Fdefine_coding_system_alias (subsidiary_mac, Qnil);
|
|
1670 }
|
|
1671
|
|
1672 /* Undefine dangling coding system aliases. */
|
|
1673 {
|
|
1674 int dangling_aliases;
|
|
1675
|
|
1676 do {
|
|
1677 dangling_aliases = 0;
|
|
1678 elisp_map_remhash (dangling_coding_system_alias_p,
|
|
1679 Vcoding_system_hash_table,
|
|
1680 &dangling_aliases);
|
|
1681 } while (dangling_aliases > 0);
|
|
1682 }
|
|
1683
|
|
1684 return Qnil;
|
|
1685 }
|
|
1686
|
|
1687 if (CODING_SYSTEMP (aliasee))
|
|
1688 aliasee = XCODING_SYSTEM_NAME (aliasee);
|
|
1689
|
|
1690 /* Checks that aliasee names a coding-system */
|
2286
|
1691 (void) Fget_coding_system (aliasee);
|
440
|
1692
|
|
1693 /* Check for coding system alias loops */
|
|
1694 if (EQ (alias, aliasee))
|
563
|
1695 alias_loop: invalid_operation_2
|
440
|
1696 ("Attempt to create a coding system alias loop", alias, aliasee);
|
|
1697
|
|
1698 for (probe = aliasee;
|
|
1699 SYMBOLP (probe);
|
|
1700 probe = Fgethash (probe, Vcoding_system_hash_table, Qzero))
|
|
1701 {
|
|
1702 if (EQ (probe, alias))
|
|
1703 goto alias_loop;
|
|
1704 }
|
|
1705
|
|
1706 Fputhash (alias, aliasee, Vcoding_system_hash_table);
|
|
1707
|
|
1708 /* Set up aliases for subsidiaries.
|
2297
|
1709 #### There must be a better way to handle subsidiary coding systems.
|
|
1710 Inquiring Minds Want To Know: shouldn't they always be chains? */
|
440
|
1711 {
|
|
1712 static const char *suffixes[] = { "-unix", "-dos", "-mac" };
|
|
1713 int i;
|
|
1714 for (i = 0; i < countof (suffixes); i++)
|
|
1715 {
|
|
1716 Lisp_Object alias_subsidiary =
|
771
|
1717 add_suffix_to_symbol (alias, suffixes[i]);
|
440
|
1718 Lisp_Object aliasee_subsidiary =
|
771
|
1719 add_suffix_to_symbol (aliasee, suffixes[i]);
|
440
|
1720
|
|
1721 if (! NILP (Ffind_coding_system (aliasee_subsidiary)))
|
|
1722 Fdefine_coding_system_alias (alias_subsidiary, aliasee_subsidiary);
|
|
1723 }
|
|
1724 }
|
428
|
1725 /* FSF return value is a vector of [ALIAS-unix ALIAS-dos ALIAS-mac],
|
|
1726 but it doesn't look intentional, so I'd rather return something
|
|
1727 meaningful or nothing at all. */
|
|
1728 return Qnil;
|
|
1729 }
|
|
1730
|
|
1731 static Lisp_Object
|
771
|
1732 subsidiary_coding_system (Lisp_Object coding_system, enum eol_type type)
|
428
|
1733 {
|
|
1734 Lisp_Coding_System *cs = XCODING_SYSTEM (coding_system);
|
|
1735 Lisp_Object new_coding_system;
|
|
1736
|
|
1737 switch (type)
|
|
1738 {
|
|
1739 case EOL_AUTODETECT: return coding_system;
|
|
1740 case EOL_LF: new_coding_system = CODING_SYSTEM_EOL_LF (cs); break;
|
|
1741 case EOL_CR: new_coding_system = CODING_SYSTEM_EOL_CR (cs); break;
|
|
1742 case EOL_CRLF: new_coding_system = CODING_SYSTEM_EOL_CRLF (cs); break;
|
2500
|
1743 default: ABORT (); return Qnil;
|
428
|
1744 }
|
|
1745
|
|
1746 return NILP (new_coding_system) ? coding_system : new_coding_system;
|
|
1747 }
|
|
1748
|
|
1749 DEFUN ("subsidiary-coding-system", Fsubsidiary_coding_system, 2, 2, 0, /*
|
|
1750 Return the subsidiary coding system of CODING-SYSTEM with eol type EOL-TYPE.
|
771
|
1751 The logically opposite operation is `coding-system-base'.
|
428
|
1752 */
|
|
1753 (coding_system, eol_type))
|
|
1754 {
|
771
|
1755 coding_system = get_coding_system_for_text_file (coding_system, 0);
|
428
|
1756
|
|
1757 return subsidiary_coding_system (coding_system,
|
|
1758 symbol_to_eol_type (eol_type));
|
|
1759 }
|
|
1760
|
771
|
1761 DEFUN ("coding-system-base", Fcoding_system_base,
|
|
1762 1, 1, 0, /*
|
|
1763 Return the base coding system of CODING-SYSTEM.
|
|
1764 If CODING-SYSTEM is a subsidiary, this returns its parent; otherwise, it
|
|
1765 returns CODING-SYSTEM.
|
|
1766 The logically opposite operation is `subsidiary-coding-system'.
|
|
1767 */
|
|
1768 (coding_system))
|
|
1769 {
|
|
1770 Lisp_Object base;
|
|
1771
|
|
1772 coding_system = Fget_coding_system (coding_system);
|
|
1773 if (EQ (XCODING_SYSTEM_NAME (coding_system), Qbinary))
|
|
1774 return Fget_coding_system (Qraw_text); /* hack! */
|
|
1775 base = XCODING_SYSTEM_SUBSIDIARY_PARENT (coding_system);
|
|
1776 if (!NILP (base))
|
|
1777 return base;
|
|
1778 return coding_system;
|
|
1779 }
|
|
1780
|
|
1781 DEFUN ("coding-system-used-for-io", Fcoding_system_used_for_io,
|
|
1782 1, 1, 0, /*
|
|
1783 Return the coding system actually used for I/O.
|
|
1784 In some cases (e.g. when a particular EOL type is specified) this won't be
|
2297
|
1785 the coding system itself. This can be useful when trying to determine
|
|
1786 precisely how data was decoded.
|
771
|
1787 */
|
|
1788 (coding_system))
|
|
1789 {
|
|
1790 Lisp_Object canon;
|
|
1791
|
|
1792 coding_system = Fget_coding_system (coding_system);
|
|
1793 canon = XCODING_SYSTEM_CANONICAL (coding_system);
|
|
1794 if (!NILP (canon))
|
|
1795 return canon;
|
|
1796 return coding_system;
|
|
1797 }
|
|
1798
|
428
|
1799
|
|
1800 /************************************************************************/
|
|
1801 /* Coding system accessors */
|
|
1802 /************************************************************************/
|
|
1803
|
771
|
1804 DEFUN ("coding-system-description", Fcoding_system_description, 1, 1, 0, /*
|
|
1805 Return the description for CODING-SYSTEM.
|
|
1806 The `description' of a coding system is a short English phrase giving the
|
|
1807 name rendered according to English punctuation rules, plus possibly some
|
|
1808 explanatory text (typically in the form of a parenthetical phrase). The
|
|
1809 description is intended to be short enough that it can appear as a menu item,
|
|
1810 and clear enough to be recognizable even to someone who is assumed to have
|
|
1811 some basic familiarity with different encodings but may not know all the
|
|
1812 technical names; thus, for `cn-gb-2312' is described as "Chinese EUC" and
|
|
1813 `hz-gb-2312' is described as "Hz/ZW (Chinese)", where the actual name of
|
|
1814 the encoding is given, followed by a note that this is a Chinese encoding,
|
|
1815 because the great majority of people encountering this would have no idea
|
|
1816 what it is, and giving the language indicates whether the encoding should
|
|
1817 just be ignored or (conceivably) investigated more thoroughly.
|
428
|
1818 */
|
|
1819 (coding_system))
|
|
1820 {
|
|
1821 coding_system = Fget_coding_system (coding_system);
|
771
|
1822 return XCODING_SYSTEM_DESCRIPTION (coding_system);
|
428
|
1823 }
|
|
1824
|
|
1825 DEFUN ("coding-system-type", Fcoding_system_type, 1, 1, 0, /*
|
|
1826 Return the type of CODING-SYSTEM.
|
|
1827 */
|
|
1828 (coding_system))
|
|
1829 {
|
771
|
1830 coding_system = Fget_coding_system (coding_system);
|
|
1831 return XCODING_SYSTEM_TYPE (coding_system);
|
428
|
1832 }
|
|
1833
|
|
1834 DEFUN ("coding-system-property", Fcoding_system_property, 2, 2, 0, /*
|
|
1835 Return the PROP property of CODING-SYSTEM.
|
|
1836 */
|
|
1837 (coding_system, prop))
|
|
1838 {
|
|
1839 coding_system = Fget_coding_system (coding_system);
|
|
1840 CHECK_SYMBOL (prop);
|
|
1841
|
|
1842 if (EQ (prop, Qname))
|
|
1843 return XCODING_SYSTEM_NAME (coding_system);
|
|
1844 else if (EQ (prop, Qtype))
|
|
1845 return Fcoding_system_type (coding_system);
|
771
|
1846 else if (EQ (prop, Qdescription))
|
|
1847 return XCODING_SYSTEM_DESCRIPTION (coding_system);
|
428
|
1848 else if (EQ (prop, Qmnemonic))
|
|
1849 return XCODING_SYSTEM_MNEMONIC (coding_system);
|
771
|
1850 else if (EQ (prop, Qdocumentation))
|
|
1851 return XCODING_SYSTEM_DOCUMENTATION (coding_system);
|
428
|
1852 else if (EQ (prop, Qeol_type))
|
771
|
1853 return eol_type_to_symbol (XCODING_SYSTEM_EOL_TYPE
|
|
1854 (coding_system));
|
428
|
1855 else if (EQ (prop, Qeol_lf))
|
|
1856 return XCODING_SYSTEM_EOL_LF (coding_system);
|
|
1857 else if (EQ (prop, Qeol_crlf))
|
|
1858 return XCODING_SYSTEM_EOL_CRLF (coding_system);
|
|
1859 else if (EQ (prop, Qeol_cr))
|
|
1860 return XCODING_SYSTEM_EOL_CR (coding_system);
|
|
1861 else if (EQ (prop, Qpost_read_conversion))
|
|
1862 return XCODING_SYSTEM_POST_READ_CONVERSION (coding_system);
|
|
1863 else if (EQ (prop, Qpre_write_conversion))
|
|
1864 return XCODING_SYSTEM_PRE_WRITE_CONVERSION (coding_system);
|
771
|
1865 else
|
|
1866 {
|
|
1867 Lisp_Object value = CODESYSMETH_OR_GIVEN (XCODING_SYSTEM (coding_system),
|
|
1868 getprop,
|
|
1869 (coding_system, prop),
|
|
1870 Qunbound);
|
|
1871 if (UNBOUNDP (value))
|
|
1872 invalid_constant ("Unrecognized property", prop);
|
|
1873 return value;
|
|
1874 }
|
|
1875 }
|
|
1876
|
|
1877
|
|
1878 /************************************************************************/
|
|
1879 /* Coding stream functions */
|
|
1880 /************************************************************************/
|
|
1881
|
|
1882 /* A coding stream is a stream used for encoding or decoding text. The
|
|
1883 coding-stream object keeps track of the actual coding system, the stream
|
|
1884 that is at the other end, and data that needs to be persistent across
|
|
1885 the lifetime of the stream. */
|
|
1886
|
1204
|
1887 extern const struct sized_memory_description chain_coding_stream_description;
|
|
1888 extern const struct sized_memory_description undecided_coding_stream_description;
|
|
1889
|
|
1890 static const struct memory_description coding_stream_data_description_1 []= {
|
2551
|
1891 { XD_BLOCK_PTR, chain_coding_system, 1,
|
|
1892 { &chain_coding_stream_description } },
|
|
1893 { XD_BLOCK_PTR, undecided_coding_system, 1,
|
|
1894 { &undecided_coding_stream_description } },
|
1204
|
1895 { XD_END }
|
|
1896 };
|
|
1897
|
|
1898 static const struct sized_memory_description coding_stream_data_description = {
|
|
1899 sizeof (void *), coding_stream_data_description_1
|
|
1900 };
|
|
1901
|
|
1902 static const struct memory_description coding_lstream_description[] = {
|
|
1903 { XD_INT, offsetof (struct coding_stream, type) },
|
|
1904 { XD_LISP_OBJECT, offsetof (struct coding_stream, orig_codesys) },
|
|
1905 { XD_LISP_OBJECT, offsetof (struct coding_stream, codesys) },
|
|
1906 { XD_LISP_OBJECT, offsetof (struct coding_stream, other_end) },
|
|
1907 { XD_UNION, offsetof (struct coding_stream, data),
|
2551
|
1908 XD_INDIRECT (0, 0), { &coding_stream_data_description } },
|
1204
|
1909 { XD_END }
|
|
1910 };
|
|
1911
|
|
1912 DEFINE_LSTREAM_IMPLEMENTATION_WITH_DATA ("coding", coding);
|
771
|
1913
|
|
1914 /* Encoding and decoding are parallel operations, so we create just one
|
|
1915 stream for both. "Decoding" may involve the extra step of autodetection
|
|
1916 of the data format, but that's only because of the conventional
|
|
1917 definition of decoding as converting from external- to
|
|
1918 internal-formatted data.
|
|
1919
|
2297
|
1920 [[ REWRITE ME! ]]
|
|
1921
|
771
|
1922 #### We really need to abstract out the concept of "data formats" and
|
|
1923 define "converters" that convert from and to specified formats,
|
|
1924 eliminating the idea of decoding and encoding. When specifying a
|
|
1925 conversion process, we need to give the data formats themselves, not the
|
|
1926 conversion processes -- e.g. a coding system called "Unicode->multibyte"
|
|
1927 converts in both directions, and we could auto-detect the format of data
|
|
1928 at either end. */
|
|
1929
|
|
1930 static Bytecount
|
|
1931 coding_reader (Lstream *stream, unsigned char *data, Bytecount size)
|
|
1932 {
|
|
1933 unsigned char *orig_data = data;
|
|
1934 Bytecount read_size;
|
|
1935 int error_occurred = 0;
|
|
1936 struct coding_stream *str = CODING_STREAM_DATA (stream);
|
|
1937
|
|
1938 /* We need to interface to coding_{de,en}code_1(), which expects to take
|
|
1939 some amount of data and store the result into a Dynarr. We have
|
|
1940 coding_{de,en}code_1() store into c->runoff, and take data from there
|
|
1941 as necessary. */
|
|
1942
|
|
1943 /* We loop until we have enough data, reading chunks from the other
|
|
1944 end and converting it. */
|
|
1945 while (1)
|
|
1946 {
|
|
1947 /* Take data from convert_to if we can. Make sure to take at
|
|
1948 most SIZE bytes, and delete the data from convert_to. */
|
|
1949 if (Dynarr_length (str->convert_to) > 0)
|
|
1950 {
|
|
1951 Bytecount chunk =
|
|
1952 min (size, (Bytecount) Dynarr_length (str->convert_to));
|
|
1953 memcpy (data, Dynarr_atp (str->convert_to, 0), chunk);
|
|
1954 Dynarr_delete_many (str->convert_to, 0, chunk);
|
|
1955 data += chunk;
|
|
1956 size -= chunk;
|
|
1957 }
|
|
1958
|
|
1959 if (size == 0)
|
|
1960 break; /* No more room for data */
|
|
1961
|
|
1962 if (str->eof)
|
|
1963 break;
|
|
1964
|
|
1965 {
|
|
1966 /* Exhausted convert_to, so get some more. Read into convert_from,
|
|
1967 after existing "rejected" data from the last conversion. */
|
|
1968 Bytecount rejected = Dynarr_length (str->convert_from);
|
|
1969 /* #### 1024 is arbitrary; we really need to separate 0 from EOF,
|
|
1970 and when we get 0, keep taking more data until we don't get 0 --
|
|
1971 we don't know how much data the conversion routine might need
|
2297
|
1972 before it can generate any data of its own (eg, bzip2). */
|
814
|
1973 Bytecount readmore =
|
|
1974 str->one_byte_at_a_time ? (Bytecount) 1 :
|
|
1975 max (size, (Bytecount) 1024);
|
771
|
1976
|
|
1977 Dynarr_add_many (str->convert_from, 0, readmore);
|
|
1978 read_size = Lstream_read (str->other_end,
|
|
1979 Dynarr_atp (str->convert_from, rejected),
|
|
1980 readmore);
|
|
1981 /* Trim size down to how much we actually got */
|
|
1982 Dynarr_set_size (str->convert_from, rejected + max (0, read_size));
|
|
1983 }
|
|
1984
|
|
1985 if (read_size < 0) /* LSTREAM_ERROR */
|
|
1986 {
|
|
1987 error_occurred = 1;
|
|
1988 break;
|
|
1989 }
|
|
1990 if (read_size == 0) /* LSTREAM_EOF */
|
|
1991 /* There might be some more end data produced in the translation,
|
|
1992 so we set a flag and call the conversion method once more to
|
|
1993 output any final stuff it may be holding, any "go back to a sane
|
|
1994 state" escape sequences, etc. The conversion method is free to
|
|
1995 look at this flag, and we use it above to stop looping. */
|
|
1996 str->eof = 1;
|
|
1997 {
|
|
1998 Bytecount processed;
|
|
1999 Bytecount to_process = Dynarr_length (str->convert_from);
|
|
2000
|
|
2001 /* Convert the data, and save any rejected data in convert_from */
|
|
2002 processed =
|
|
2003 XCODESYSMETH (str->codesys, convert,
|
|
2004 (str, Dynarr_atp (str->convert_from, 0),
|
|
2005 str->convert_to, to_process));
|
|
2006 if (processed < 0)
|
|
2007 {
|
|
2008 error_occurred = 1;
|
|
2009 break;
|
|
2010 }
|
|
2011 assert (processed <= to_process);
|
|
2012 if (processed < to_process)
|
|
2013 memmove (Dynarr_atp (str->convert_from, 0),
|
|
2014 Dynarr_atp (str->convert_from, processed),
|
|
2015 to_process - processed);
|
|
2016 Dynarr_set_size (str->convert_from, to_process - processed);
|
|
2017 }
|
|
2018 }
|
|
2019
|
|
2020 if (data - orig_data == 0)
|
|
2021 return error_occurred ? -1 : 0;
|
|
2022 else
|
|
2023 return data - orig_data;
|
|
2024 }
|
|
2025
|
|
2026 static Bytecount
|
|
2027 coding_writer (Lstream *stream, const unsigned char *data, Bytecount size)
|
|
2028 {
|
|
2029 struct coding_stream *str = CODING_STREAM_DATA (stream);
|
|
2030
|
|
2031 /* Convert all our data into convert_to, and then attempt to write
|
|
2032 it all out to the other end. */
|
|
2033 Dynarr_reset (str->convert_to);
|
|
2034 size = XCODESYSMETH (str->codesys, convert,
|
|
2035 (str, data, str->convert_to, size));
|
|
2036 if (Lstream_write (str->other_end, Dynarr_atp (str->convert_to, 0),
|
|
2037 Dynarr_length (str->convert_to)) < 0)
|
|
2038 return -1;
|
|
2039 else
|
|
2040 /* The return value indicates how much of the incoming data was
|
|
2041 processed, not how many bytes were written. */
|
|
2042 return size;
|
|
2043 }
|
|
2044
|
|
2045 static int
|
|
2046 encode_decode_source_sink_type_is_char (Lisp_Object cs,
|
|
2047 enum source_or_sink sex,
|
|
2048 enum encode_decode direction)
|
|
2049 {
|
|
2050 return (direction == CODING_DECODE ?
|
|
2051 decoding_source_sink_type_is_char (cs, sex) :
|
|
2052 encoding_source_sink_type_is_char (cs, sex));
|
|
2053 }
|
|
2054
|
|
2055 /* Ensure that the convert methods only get full characters sent to them to
|
|
2056 convert if the source of that conversion is characters; and that no such
|
|
2057 full-character checking happens when the source is bytes. Keep in mind
|
|
2058 that (1) the conversion_end_type return values take the perspective of
|
|
2059 encoding; (2) the source for decoding is the same as the sink for
|
|
2060 encoding; (3) when writing, the data is given to us, and we set our own
|
|
2061 stream to be character mode or not; (4) when reading, the data comes
|
|
2062 from the other_end stream, and we set that one to be character mode or
|
|
2063 not. This is consistent with the comment above the prototype for
|
|
2064 Lstream_set_character_mode(), which lays out rules for who is allowed to
|
|
2065 modify the character type mode on a stream.
|
|
2066
|
814
|
2067 If we're a read stream, we're always setting character mode on the
|
|
2068 source, but we also set it on ourselves consistent with the flag that
|
|
2069 can disable this (see again the comment above
|
|
2070 Lstream_set_character_mode()).
|
|
2071 */
|
771
|
2072
|
|
2073 static void
|
|
2074 set_coding_character_mode (Lstream *stream)
|
|
2075 {
|
|
2076 struct coding_stream *str = CODING_STREAM_DATA (stream);
|
|
2077 Lstream *stream_to_set =
|
|
2078 stream->flags & LSTREAM_FL_WRITE ? stream : str->other_end;
|
|
2079 if (encode_decode_source_sink_type_is_char
|
|
2080 (str->codesys, CODING_SOURCE, str->direction))
|
|
2081 Lstream_set_character_mode (stream_to_set);
|
|
2082 else
|
|
2083 Lstream_unset_character_mode (stream_to_set);
|
814
|
2084 if (str->set_char_mode_on_us_when_reading &&
|
|
2085 (stream->flags & LSTREAM_FL_READ))
|
|
2086 {
|
|
2087 if (encode_decode_source_sink_type_is_char
|
|
2088 (str->codesys, CODING_SINK, str->direction))
|
|
2089 Lstream_set_character_mode (stream);
|
|
2090 else
|
|
2091 Lstream_unset_character_mode (stream);
|
|
2092 }
|
771
|
2093 }
|
|
2094
|
|
2095 static Lisp_Object
|
|
2096 coding_marker (Lisp_Object stream)
|
|
2097 {
|
|
2098 struct coding_stream *str = CODING_STREAM_DATA (XLSTREAM (stream));
|
|
2099
|
|
2100 mark_object (str->orig_codesys);
|
|
2101 mark_object (str->codesys);
|
|
2102 MAYBE_XCODESYSMETH (str->codesys, mark_coding_stream, (str));
|
|
2103 return wrap_lstream (str->other_end);
|
|
2104 }
|
|
2105
|
|
2106 static int
|
|
2107 coding_rewinder (Lstream *stream)
|
|
2108 {
|
|
2109 struct coding_stream *str = CODING_STREAM_DATA (stream);
|
|
2110 MAYBE_XCODESYSMETH (str->codesys, rewind_coding_stream, (str));
|
|
2111
|
|
2112 str->ch = 0;
|
|
2113 Dynarr_reset (str->convert_to);
|
|
2114 Dynarr_reset (str->convert_from);
|
|
2115 return Lstream_rewind (str->other_end);
|
|
2116 }
|
|
2117
|
|
2118 static int
|
|
2119 coding_seekable_p (Lstream *stream)
|
|
2120 {
|
|
2121 struct coding_stream *str = CODING_STREAM_DATA (stream);
|
|
2122 return Lstream_seekable_p (str->other_end);
|
|
2123 }
|
|
2124
|
|
2125 static int
|
|
2126 coding_flusher (Lstream *stream)
|
|
2127 {
|
|
2128 struct coding_stream *str = CODING_STREAM_DATA (stream);
|
|
2129 return Lstream_flush (str->other_end);
|
|
2130 }
|
|
2131
|
|
2132 static int
|
|
2133 coding_closer (Lstream *stream)
|
|
2134 {
|
|
2135 struct coding_stream *str = CODING_STREAM_DATA (stream);
|
|
2136 if (stream->flags & LSTREAM_FL_WRITE)
|
|
2137 {
|
|
2138 str->eof = 1;
|
|
2139 coding_writer (stream, 0, 0);
|
|
2140 str->eof = 0;
|
|
2141 }
|
|
2142 /* It's safe to free the runoff dynarrs now because they are used only
|
|
2143 during conversion. We need to keep the type-specific data around,
|
|
2144 though, because of canonicalize_after_coding. */
|
|
2145 if (str->convert_to)
|
|
2146 {
|
|
2147 Dynarr_free (str->convert_to);
|
|
2148 str->convert_to = 0;
|
|
2149 }
|
|
2150 if (str->convert_from)
|
428
|
2151 {
|
771
|
2152 Dynarr_free (str->convert_from);
|
|
2153 str->convert_from = 0;
|
|
2154 }
|
|
2155
|
800
|
2156 if (str->no_close_other)
|
|
2157 return Lstream_flush (str->other_end);
|
|
2158 else
|
|
2159 return Lstream_close (str->other_end);
|
771
|
2160 }
|
|
2161
|
|
2162 static void
|
|
2163 coding_finalizer (Lstream *stream)
|
|
2164 {
|
|
2165 struct coding_stream *str = CODING_STREAM_DATA (stream);
|
|
2166
|
|
2167 assert (!str->finalized);
|
|
2168 MAYBE_XCODESYSMETH (str->codesys, finalize_coding_stream, (str));
|
|
2169 if (str->data)
|
|
2170 {
|
1726
|
2171 xfree (str->data, void *);
|
771
|
2172 str->data = 0;
|
|
2173 }
|
|
2174 str->finalized = 1;
|
|
2175 }
|
|
2176
|
|
2177 static Lisp_Object
|
|
2178 coding_stream_canonicalize_after_coding (Lstream *stream)
|
|
2179 {
|
|
2180 struct coding_stream *str = CODING_STREAM_DATA (stream);
|
|
2181
|
|
2182 return XCODESYSMETH_OR_GIVEN (str->codesys, canonicalize_after_coding,
|
|
2183 (str), str->codesys);
|
|
2184 }
|
|
2185
|
|
2186 Lisp_Object
|
|
2187 coding_stream_detected_coding_system (Lstream *stream)
|
|
2188 {
|
|
2189 Lisp_Object codesys =
|
|
2190 coding_stream_canonicalize_after_coding (stream);
|
|
2191 if (NILP (codesys))
|
|
2192 return Fget_coding_system (Qidentity);
|
|
2193 return codesys;
|
|
2194 }
|
|
2195
|
|
2196 Lisp_Object
|
|
2197 coding_stream_coding_system (Lstream *stream)
|
|
2198 {
|
|
2199 return CODING_STREAM_DATA (stream)->codesys;
|
|
2200 }
|
|
2201
|
|
2202 /* Change the coding system associated with a stream. */
|
|
2203
|
|
2204 void
|
|
2205 set_coding_stream_coding_system (Lstream *lstr, Lisp_Object codesys)
|
|
2206 {
|
|
2207 struct coding_stream *str = CODING_STREAM_DATA (lstr);
|
|
2208 if (EQ (str->orig_codesys, codesys))
|
|
2209 return;
|
|
2210 /* We do the equivalent of closing the stream, destroying it, and
|
|
2211 reinitializing it. This includes flushing out the data and signalling
|
|
2212 EOF, if we're a writing stream; we also replace the type-specific data
|
|
2213 with the data appropriate for the new coding system. */
|
|
2214 if (!NILP (str->codesys))
|
|
2215 {
|
|
2216 if (lstr->flags & LSTREAM_FL_WRITE)
|
|
2217 {
|
|
2218 Lstream_flush (lstr);
|
|
2219 str->eof = 1;
|
|
2220 coding_writer (lstr, 0, 0);
|
|
2221 str->eof = 0;
|
|
2222 }
|
|
2223 MAYBE_XCODESYSMETH (str->codesys, finalize_coding_stream, (str));
|
|
2224 }
|
|
2225 str->orig_codesys = codesys;
|
|
2226 str->codesys = coding_system_real_canonical (codesys);
|
|
2227
|
|
2228 if (str->data)
|
|
2229 {
|
1726
|
2230 xfree (str->data, void *);
|
771
|
2231 str->data = 0;
|
428
|
2232 }
|
771
|
2233 if (XCODING_SYSTEM_METHODS (str->codesys)->coding_data_size)
|
1204
|
2234 {
|
|
2235 str->data =
|
|
2236 xmalloc_and_zero (XCODING_SYSTEM_METHODS (str->codesys)->
|
|
2237 coding_data_size);
|
|
2238 str->type = XCODING_SYSTEM_METHODS (str->codesys)->enumtype;
|
|
2239 }
|
771
|
2240 MAYBE_XCODESYSMETH (str->codesys, init_coding_stream, (str));
|
|
2241 /* The new coding system may have different ideas regarding whether its
|
|
2242 ends are characters or bytes. */
|
|
2243 set_coding_character_mode (lstr);
|
|
2244 }
|
|
2245
|
|
2246 /* WARNING WARNING WARNING WARNING!!!!! If you open up a coding
|
|
2247 stream for writing, no automatic code detection will be performed.
|
|
2248 The reason for this is that automatic code detection requires a
|
|
2249 seekable input. Things will also fail if you open a coding
|
|
2250 stream for reading using a non-fully-specified coding system and
|
|
2251 a non-seekable input stream. */
|
|
2252
|
|
2253 static Lisp_Object
|
|
2254 make_coding_stream_1 (Lstream *stream, Lisp_Object codesys,
|
800
|
2255 const char *mode, enum encode_decode direction,
|
802
|
2256 int flags)
|
771
|
2257 {
|
|
2258 Lstream *lstr = Lstream_new (lstream_coding, mode);
|
|
2259 struct coding_stream *str = CODING_STREAM_DATA (lstr);
|
|
2260
|
|
2261 codesys = Fget_coding_system (codesys);
|
|
2262 xzero (*str);
|
|
2263 str->codesys = Qnil;
|
|
2264 str->orig_codesys = Qnil;
|
|
2265 str->us = lstr;
|
|
2266 str->other_end = stream;
|
|
2267 str->convert_to = Dynarr_new (unsigned_char);
|
|
2268 str->convert_from = Dynarr_new (unsigned_char);
|
|
2269 str->direction = direction;
|
814
|
2270 if (flags & LSTREAM_FL_NO_CLOSE_OTHER)
|
802
|
2271 str->no_close_other = 1;
|
814
|
2272 if (flags & LSTREAM_FL_READ_ONE_BYTE_AT_A_TIME)
|
802
|
2273 str->one_byte_at_a_time = 1;
|
814
|
2274 if (!(flags & LSTREAM_FL_NO_INIT_CHAR_MODE_WHEN_READING))
|
|
2275 str->set_char_mode_on_us_when_reading = 1;
|
802
|
2276
|
771
|
2277 set_coding_stream_coding_system (lstr, codesys);
|
793
|
2278 return wrap_lstream (lstr);
|
771
|
2279 }
|
|
2280
|
814
|
2281 /* FLAGS:
|
|
2282
|
|
2283 LSTREAM_FL_NO_CLOSE_OTHER
|
|
2284 Don't close STREAM (the stream at the other end) when this stream is
|
|
2285 closed.
|
|
2286
|
|
2287 LSTREAM_FL_READ_ONE_BYTE_AT_A_TIME
|
|
2288 When reading from STREAM, read and process one byte at a time rather
|
|
2289 than in large chunks. This is for reading from TTY's, so we don't
|
|
2290 block. #### We should instead create a non-blocking filedesc stream
|
|
2291 that emulates the behavior as necessary using select(), when the
|
|
2292 fcntls don't work. (As seems to be the case on Cygwin.)
|
|
2293
|
|
2294 LSTREAM_FL_NO_INIT_CHAR_MODE_WHEN_READING
|
|
2295 When reading from STREAM, read and process one byte at a time rather
|
|
2296 than in large chunks. This is for reading from TTY's, so we don't
|
|
2297 block. #### We should instead create a non-blocking filedesc stream
|
|
2298 that emulates the behavior as necessary using select(), when the
|
|
2299 fcntls don't work. (As seems to be the case on Cygwin.)
|
|
2300 */
|
771
|
2301 Lisp_Object
|
|
2302 make_coding_input_stream (Lstream *stream, Lisp_Object codesys,
|
802
|
2303 enum encode_decode direction, int flags)
|
771
|
2304 {
|
800
|
2305 return make_coding_stream_1 (stream, codesys, "r", direction,
|
802
|
2306 flags);
|
771
|
2307 }
|
|
2308
|
814
|
2309 /* FLAGS:
|
|
2310
|
|
2311 LSTREAM_FL_NO_CLOSE_OTHER
|
|
2312 Don't close STREAM (the stream at the other end) when this stream is
|
|
2313 closed.
|
|
2314 */
|
771
|
2315 Lisp_Object
|
|
2316 make_coding_output_stream (Lstream *stream, Lisp_Object codesys,
|
802
|
2317 enum encode_decode direction, int flags)
|
771
|
2318 {
|
800
|
2319 return make_coding_stream_1 (stream, codesys, "w", direction,
|
802
|
2320 flags);
|
771
|
2321 }
|
|
2322
|
|
2323 static Lisp_Object
|
|
2324 encode_decode_coding_region (Lisp_Object start, Lisp_Object end,
|
|
2325 Lisp_Object coding_system, Lisp_Object buffer,
|
|
2326 enum encode_decode direction)
|
|
2327 {
|
|
2328 Charbpos b, e;
|
|
2329 struct buffer *buf = decode_buffer (buffer, 0);
|
|
2330 Lisp_Object instream = Qnil, to_outstream = Qnil, outstream = Qnil;
|
|
2331 Lisp_Object from_outstream = Qnil, auto_outstream = Qnil;
|
|
2332 Lisp_Object lb_outstream = Qnil;
|
|
2333 Lisp_Object next;
|
|
2334 Lstream *istr, *ostr;
|
|
2335 struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
|
|
2336 struct gcpro ngcpro1;
|
|
2337 int source_char, sink_char;
|
|
2338
|
|
2339 get_buffer_range_char (buf, start, end, &b, &e, 0);
|
|
2340 barf_if_buffer_read_only (buf, b, e);
|
|
2341
|
|
2342 GCPRO5 (instream, to_outstream, outstream, from_outstream, lb_outstream);
|
|
2343 NGCPRO1 (auto_outstream);
|
|
2344
|
|
2345 coding_system = Fget_coding_system (coding_system);
|
|
2346 source_char = encode_decode_source_sink_type_is_char (coding_system,
|
|
2347 CODING_SOURCE,
|
|
2348 direction);
|
|
2349 sink_char = encode_decode_source_sink_type_is_char (coding_system,
|
|
2350 CODING_SINK,
|
|
2351 direction);
|
|
2352
|
|
2353 /* Order is IN <---> [TO] -> OUT -> [FROM] -> [AUTODETECT-EOL] -> LB */
|
|
2354 instream = make_lisp_buffer_input_stream (buf, b, e, 0);
|
|
2355 next = lb_outstream = make_lisp_buffer_output_stream (buf, b, 0);
|
|
2356
|
|
2357 if (direction == CODING_DECODE &&
|
|
2358 XCODING_SYSTEM_EOL_TYPE (coding_system) == EOL_AUTODETECT)
|
|
2359 next = auto_outstream =
|
|
2360 make_coding_output_stream
|
800
|
2361 (XLSTREAM (next), Fget_coding_system (Qconvert_eol_autodetect),
|
|
2362 CODING_DECODE, 0);
|
771
|
2363
|
|
2364 if (!sink_char)
|
|
2365 next = from_outstream =
|
800
|
2366 make_coding_output_stream (XLSTREAM (next), Qbinary, CODING_DECODE, 0);
|
771
|
2367 outstream = make_coding_output_stream (XLSTREAM (next), coding_system,
|
800
|
2368 direction, 0);
|
771
|
2369 if (!source_char)
|
428
|
2370 {
|
771
|
2371 to_outstream =
|
|
2372 make_coding_output_stream (XLSTREAM (outstream),
|
800
|
2373 Qbinary, CODING_ENCODE, 0);
|
771
|
2374 ostr = XLSTREAM (to_outstream);
|
|
2375 }
|
|
2376 else
|
|
2377 ostr = XLSTREAM (outstream);
|
|
2378 istr = XLSTREAM (instream);
|
|
2379
|
|
2380 /* The chain of streams looks like this:
|
|
2381
|
2297
|
2382 [BUFFER] <----- (( read from/send to loop ))
|
771
|
2383 ------> [CHAR->BYTE i.e. ENCODE AS BINARY if source is
|
|
2384 in bytes]
|
|
2385 ------> [ENCODE/DECODE AS SPECIFIED]
|
|
2386 ------> [BYTE->CHAR i.e. DECODE AS BINARY
|
|
2387 if sink is in bytes]
|
|
2388 ------> [AUTODETECT EOL if
|
|
2389 we're decoding and
|
|
2390 coding system calls
|
|
2391 for this]
|
|
2392 ------> [BUFFER]
|
|
2393 */
|
2367
|
2394
|
|
2395 /* #### See comment
|
|
2396
|
|
2397 EFFICIENCY OF CODING CONVERSION WITH MULTIPLE COPIES/CHAINS
|
|
2398
|
|
2399 in text.c.
|
|
2400 */
|
|
2401
|
771
|
2402 while (1)
|
|
2403 {
|
|
2404 char tempbuf[1024]; /* some random amount */
|
|
2405 Charbpos newpos, even_newer_pos;
|
|
2406 Charbpos oldpos = lisp_buffer_stream_startpos (istr);
|
|
2407 Bytecount size_in_bytes =
|
|
2408 Lstream_read (istr, tempbuf, sizeof (tempbuf));
|
|
2409
|
|
2410 if (!size_in_bytes)
|
|
2411 break;
|
|
2412 newpos = lisp_buffer_stream_startpos (istr);
|
|
2413 Lstream_write (ostr, tempbuf, size_in_bytes);
|
|
2414 even_newer_pos = lisp_buffer_stream_startpos (istr);
|
|
2415 buffer_delete_range (buf, even_newer_pos - (newpos - oldpos),
|
|
2416 even_newer_pos, 0);
|
428
|
2417 }
|
771
|
2418
|
|
2419 {
|
|
2420 Charcount retlen =
|
|
2421 lisp_buffer_stream_startpos (XLSTREAM (instream)) - b;
|
|
2422 Lstream_close (istr);
|
|
2423 Lstream_close (ostr);
|
|
2424 NUNGCPRO;
|
|
2425 UNGCPRO;
|
|
2426 Lstream_delete (istr);
|
|
2427 if (!NILP (from_outstream))
|
|
2428 Lstream_delete (XLSTREAM (from_outstream));
|
|
2429 Lstream_delete (XLSTREAM (outstream));
|
|
2430 if (!NILP (to_outstream))
|
|
2431 Lstream_delete (XLSTREAM (to_outstream));
|
|
2432 if (!NILP (auto_outstream))
|
|
2433 Lstream_delete (XLSTREAM (auto_outstream));
|
|
2434 Lstream_delete (XLSTREAM (lb_outstream));
|
|
2435 return make_int (retlen);
|
|
2436 }
|
|
2437 }
|
|
2438
|
3302
|
2439 DEFUN ("decode-coding-region", Fdecode_coding_region, 3, 4,
|
|
2440 "*r\nzDecode from coding system: \ni", /*
|
771
|
2441 Decode the text between START and END which is encoded in CODING-SYSTEM.
|
|
2442 This is useful if you've read in encoded text from a file without decoding
|
|
2443 it (e.g. you read in a JIS-formatted file but used the `binary' or
|
|
2444 `no-conversion' coding system, so that it shows up as "^[$B!<!+^[(B").
|
|
2445 Return length of decoded text.
|
3302
|
2446 BUFFER defaults to the current buffer if unspecified, and when interactive.
|
771
|
2447 */
|
|
2448 (start, end, coding_system, buffer))
|
|
2449 {
|
|
2450 return encode_decode_coding_region (start, end, coding_system, buffer,
|
|
2451 CODING_DECODE);
|
|
2452 }
|
|
2453
|
3302
|
2454 DEFUN ("encode-coding-region", Fencode_coding_region, 3, 4,
|
|
2455 "*r\nzEncode to coding system: \ni", /*
|
771
|
2456 Encode the text between START and END using CODING-SYSTEM.
|
|
2457 This will, for example, convert Japanese characters into stuff such as
|
3302
|
2458 "^[$B!<!+^[(B" if you use the JIS encoding. Return length of encoded text.
|
|
2459 BUFFER defaults to the current buffer if unspecified, and when interactive.
|
771
|
2460 */
|
|
2461 (start, end, coding_system, buffer))
|
|
2462 {
|
|
2463 return encode_decode_coding_region (start, end, coding_system, buffer,
|
|
2464 CODING_ENCODE);
|
428
|
2465 }
|
|
2466
|
|
2467
|
|
2468 /************************************************************************/
|
771
|
2469 /* Chain methods */
|
428
|
2470 /************************************************************************/
|
|
2471
|
771
|
2472 /* #### Need a way to create "opposite-direction" coding systems. */
|
|
2473
|
|
2474 /* Chain two or more coding systems together to make a combination coding
|
|
2475 system. */
|
|
2476
|
|
2477 struct chain_coding_system
|
|
2478 {
|
|
2479 /* List of coding systems, in decode order */
|
|
2480 Lisp_Object *chain;
|
|
2481 /* Number of coding systems in list */
|
|
2482 int count;
|
|
2483 /* Coding system to return as a result of canonicalize-after-coding */
|
|
2484 Lisp_Object canonicalize_after_coding;
|
|
2485 };
|
|
2486
|
|
2487 struct chain_coding_stream
|
|
2488 {
|
|
2489 int initted;
|
|
2490 /* Lstreams for chain coding system */
|
|
2491 Lisp_Object *lstreams;
|
|
2492 int lstream_count;
|
|
2493 };
|
|
2494
|
1204
|
2495 static const struct memory_description chain_coding_system_description[] = {
|
|
2496 { XD_INT, offsetof (struct chain_coding_system, count) },
|
2367
|
2497 { XD_BLOCK_PTR, offsetof (struct chain_coding_system, chain),
|
2551
|
2498 XD_INDIRECT (0, 0), { &lisp_object_description } },
|
1204
|
2499 { XD_LISP_OBJECT, offsetof (struct chain_coding_system,
|
|
2500 canonicalize_after_coding) },
|
771
|
2501 { XD_END }
|
|
2502 };
|
|
2503
|
1204
|
2504 static const struct memory_description chain_coding_stream_description_1 [] = {
|
|
2505 { XD_INT, offsetof (struct chain_coding_stream, lstream_count) },
|
2367
|
2506 { XD_BLOCK_PTR, offsetof (struct chain_coding_stream, lstreams),
|
2551
|
2507 XD_INDIRECT (0, 0), { &lisp_object_description } },
|
771
|
2508 { XD_END }
|
|
2509 };
|
|
2510
|
1204
|
2511 const struct sized_memory_description chain_coding_stream_description = {
|
|
2512 sizeof (struct chain_coding_stream), chain_coding_stream_description_1
|
|
2513 };
|
|
2514
|
|
2515 DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (chain);
|
|
2516
|
771
|
2517 static Lisp_Object
|
|
2518 chain_canonicalize (Lisp_Object codesys)
|
|
2519 {
|
|
2520 /* We make use of the fact that this method is called at init time, after
|
|
2521 properties have been parsed. init_method is called too early. */
|
|
2522 /* #### It's not clear we need this whole chain-canonicalize mechanism
|
|
2523 any more. */
|
|
2524 Lisp_Object chain = Flist (XCODING_SYSTEM_CHAIN_COUNT (codesys),
|
|
2525 XCODING_SYSTEM_CHAIN_CHAIN (codesys));
|
|
2526 chain = Fcons (XCODING_SYSTEM_PRE_WRITE_CONVERSION (codesys),
|
|
2527 Fcons (XCODING_SYSTEM_POST_READ_CONVERSION (codesys),
|
|
2528 chain));
|
|
2529 Fputhash (chain, codesys, Vchain_canonicalize_hash_table);
|
|
2530 return codesys;
|
|
2531 }
|
|
2532
|
|
2533 static Lisp_Object
|
|
2534 chain_canonicalize_after_coding (struct coding_stream *str)
|
|
2535 {
|
|
2536 Lisp_Object cac =
|
|
2537 XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (str->codesys);
|
|
2538 if (!NILP (cac))
|
|
2539 return cac;
|
|
2540 return str->codesys;
|
|
2541 #if 0
|
|
2542 struct chain_coding_stream *data = CODING_STREAM_TYPE_DATA (str, chain);
|
|
2543 Lisp_Object us = str->codesys, codesys;
|
|
2544 int i;
|
|
2545 Lisp_Object chain;
|
|
2546 Lisp_Object tail;
|
|
2547 int changed = 0;
|
|
2548
|
|
2549 /* #### It's not clear we need this whole chain-canonicalize mechanism
|
|
2550 any more. */
|
|
2551 if (str->direction == CODING_ENCODE || !data->initted)
|
|
2552 return us;
|
|
2553
|
|
2554 chain = Flist (XCODING_SYSTEM_CHAIN_COUNT (us),
|
|
2555 XCODING_SYSTEM_CHAIN_CHAIN (us));
|
|
2556
|
|
2557 tail = chain;
|
|
2558 for (i = 0; i < XCODING_SYSTEM_CHAIN_COUNT (us); i++)
|
|
2559 {
|
|
2560 codesys = (coding_stream_canonicalize_after_coding
|
|
2561 (XLSTREAM (data->lstreams[i])));
|
|
2562 if (!EQ (codesys, XCAR (tail)))
|
|
2563 changed = 1;
|
|
2564 XCAR (tail) = codesys;
|
|
2565 tail = XCDR (tail);
|
|
2566 }
|
|
2567
|
|
2568 if (!changed)
|
|
2569 return us;
|
|
2570
|
|
2571 chain = delq_no_quit (Qnil, chain);
|
|
2572
|
|
2573 if (NILP (XCODING_SYSTEM_PRE_WRITE_CONVERSION (us)) &&
|
|
2574 NILP (XCODING_SYSTEM_POST_READ_CONVERSION (us)))
|
|
2575 {
|
|
2576 if (NILP (chain))
|
|
2577 return Qnil;
|
|
2578 if (NILP (XCDR (chain)))
|
|
2579 return XCAR (chain);
|
|
2580 }
|
|
2581
|
|
2582 codesys = Fgethash (Fcons (XCODING_SYSTEM_PRE_WRITE_CONVERSION (us),
|
|
2583 Fcons (XCODING_SYSTEM_POST_READ_CONVERSION (us),
|
|
2584 chain)), Vchain_canonicalize_hash_table,
|
|
2585 Qnil);
|
|
2586 if (!NILP (codesys))
|
|
2587 return codesys;
|
|
2588 return make_internal_coding_system
|
|
2589 (us, "internal-chain-canonicalizer-wrapper",
|
|
2590 Qchain, Qunbound, list2 (Qchain, chain));
|
|
2591 #endif /* 0 */
|
|
2592 }
|
|
2593
|
|
2594 static void
|
|
2595 chain_init (Lisp_Object codesys)
|
|
2596 {
|
|
2597 XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (codesys) = Qnil;
|
|
2598 }
|
|
2599
|
|
2600 static void
|
|
2601 chain_mark (Lisp_Object codesys)
|
|
2602 {
|
|
2603 int i;
|
|
2604
|
|
2605 for (i = 0; i < XCODING_SYSTEM_CHAIN_COUNT (codesys); i++)
|
|
2606 mark_object (XCODING_SYSTEM_CHAIN_CHAIN (codesys)[i]);
|
|
2607 mark_object (XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (codesys));
|
|
2608 }
|
|
2609
|
|
2610 static void
|
|
2611 chain_mark_coding_stream_1 (struct chain_coding_stream *data)
|
|
2612 {
|
|
2613 int i;
|
|
2614
|
|
2615 for (i = 0; i < data->lstream_count; i++)
|
|
2616 mark_object (data->lstreams[i]);
|
|
2617 }
|
|
2618
|
|
2619 static void
|
|
2620 chain_mark_coding_stream (struct coding_stream *str)
|
|
2621 {
|
|
2622 chain_mark_coding_stream_1 (CODING_STREAM_TYPE_DATA (str, chain));
|
|
2623 }
|
|
2624
|
|
2625 static void
|
|
2626 chain_print (Lisp_Object cs, Lisp_Object printcharfun, int escapeflag)
|
|
2627 {
|
|
2628 int i;
|
|
2629
|
826
|
2630 write_c_string (printcharfun, "(");
|
771
|
2631 for (i = 0; i < XCODING_SYSTEM_CHAIN_COUNT (cs); i++)
|
|
2632 {
|
826
|
2633 write_c_string (printcharfun, i == 0 ? "" : "->");
|
771
|
2634 print_coding_system_in_print_method (XCODING_SYSTEM_CHAIN_CHAIN (cs)[i],
|
|
2635 printcharfun, escapeflag);
|
|
2636 }
|
|
2637 {
|
|
2638 Lisp_Object cac = XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (cs);
|
|
2639 if (!NILP (cac))
|
|
2640 {
|
|
2641 if (i > 0)
|
826
|
2642 write_c_string (printcharfun, " ");
|
|
2643 write_c_string (printcharfun, "canonicalize-after-coding=");
|
771
|
2644 print_coding_system_in_print_method (cac, printcharfun, escapeflag);
|
|
2645 }
|
|
2646 }
|
|
2647
|
826
|
2648 write_c_string (printcharfun, ")");
|
771
|
2649 }
|
|
2650
|
|
2651 static void
|
|
2652 chain_rewind_coding_stream_1 (struct chain_coding_stream *data)
|
|
2653 {
|
|
2654 /* Each will rewind the next; there is always at least one stream (the
|
|
2655 dynarr stream at the end) if we're initted */
|
|
2656 if (data->initted)
|
|
2657 Lstream_rewind (XLSTREAM (data->lstreams[0]));
|
|
2658 }
|
|
2659
|
|
2660 static void
|
|
2661 chain_rewind_coding_stream (struct coding_stream *str)
|
|
2662 {
|
|
2663 chain_rewind_coding_stream_1 (CODING_STREAM_TYPE_DATA (str, chain));
|
|
2664 }
|
|
2665
|
|
2666 static void
|
|
2667 chain_init_coding_streams_1 (struct chain_coding_stream *data,
|
|
2668 unsigned_char_dynarr *dst,
|
|
2669 int ncodesys, Lisp_Object *codesys,
|
|
2670 enum encode_decode direction)
|
|
2671 {
|
|
2672 int i;
|
|
2673 Lisp_Object lstream_out;
|
|
2674
|
|
2675 data->lstream_count = ncodesys + 1;
|
|
2676 data->lstreams = xnew_array (Lisp_Object, data->lstream_count);
|
|
2677
|
|
2678 lstream_out = make_dynarr_output_stream (dst);
|
|
2679 Lstream_set_buffering (XLSTREAM (lstream_out), LSTREAM_UNBUFFERED, 0);
|
|
2680 data->lstreams[data->lstream_count - 1] = lstream_out;
|
|
2681
|
|
2682 for (i = ncodesys - 1; i >= 0; i--)
|
|
2683 {
|
|
2684 data->lstreams[i] =
|
|
2685 make_coding_output_stream
|
|
2686 (XLSTREAM (lstream_out),
|
|
2687 codesys[direction == CODING_ENCODE ? ncodesys - (i + 1) : i],
|
800
|
2688 direction, 0);
|
771
|
2689 lstream_out = data->lstreams[i];
|
|
2690 Lstream_set_buffering (XLSTREAM (lstream_out), LSTREAM_UNBUFFERED,
|
|
2691 0);
|
|
2692 }
|
|
2693 data->initted = 1;
|
|
2694 }
|
|
2695
|
|
2696 static Bytecount
|
|
2697 chain_convert (struct coding_stream *str, const UExtbyte *src,
|
|
2698 unsigned_char_dynarr *dst, Bytecount n)
|
|
2699 {
|
|
2700 struct chain_coding_stream *data = CODING_STREAM_TYPE_DATA (str, chain);
|
|
2701
|
|
2702 if (str->eof)
|
|
2703 {
|
|
2704 /* Each will close the next; there is always at least one stream (the
|
|
2705 dynarr stream at the end) if we're initted. We need to close now
|
|
2706 because more data may be generated. */
|
|
2707 if (data->initted)
|
|
2708 Lstream_close (XLSTREAM (data->lstreams[0]));
|
|
2709 return n;
|
|
2710 }
|
|
2711
|
|
2712 if (!data->initted)
|
|
2713 chain_init_coding_streams_1
|
|
2714 (data, dst, XCODING_SYSTEM_CHAIN_COUNT (str->codesys),
|
|
2715 XCODING_SYSTEM_CHAIN_CHAIN (str->codesys), str->direction);
|
|
2716
|
|
2717 if (Lstream_write (XLSTREAM (data->lstreams[0]), src, n) < 0)
|
|
2718 return -1;
|
|
2719 return n;
|
|
2720 }
|
|
2721
|
|
2722 static void
|
|
2723 chain_finalize_coding_stream_1 (struct chain_coding_stream *data)
|
|
2724 {
|
|
2725 if (data->lstreams)
|
|
2726 {
|
2297
|
2727 /* During GC, these objects are unmarked, and are about to be freed.
|
|
2728 We do NOT want them on the free list, and that will cause lots of
|
|
2729 nastiness including crashes. Just let them be freed normally. */
|
771
|
2730 if (!gc_in_progress)
|
|
2731 {
|
|
2732 int i;
|
2297
|
2733 /* Order of deletion is important here! Delete from the head of
|
|
2734 the chain and work your way towards the tail. In general,
|
|
2735 when you delete an object, there should be *NO* pointers to it
|
|
2736 anywhere. Deleting back-to-front would be a problem because
|
|
2737 there are pointers going forward. If there were pointers in
|
|
2738 both directions, you'd have to disconnect the pointers to a
|
|
2739 particular object before deleting it. */
|
771
|
2740 for (i = 0; i < data->lstream_count; i++)
|
|
2741 Lstream_delete (XLSTREAM ((data->lstreams)[i]));
|
|
2742 }
|
1726
|
2743 xfree (data->lstreams, Lisp_Object *);
|
771
|
2744 }
|
|
2745 }
|
|
2746
|
|
2747 static void
|
|
2748 chain_finalize_coding_stream (struct coding_stream *str)
|
|
2749 {
|
|
2750 chain_finalize_coding_stream_1 (CODING_STREAM_TYPE_DATA (str, chain));
|
|
2751 }
|
|
2752
|
|
2753 static void
|
|
2754 chain_finalize (Lisp_Object c)
|
|
2755 {
|
|
2756 if (XCODING_SYSTEM_CHAIN_CHAIN (c))
|
1726
|
2757 xfree (XCODING_SYSTEM_CHAIN_CHAIN (c), Lisp_Object *);
|
771
|
2758 }
|
|
2759
|
428
|
2760 static int
|
771
|
2761 chain_putprop (Lisp_Object codesys, Lisp_Object key, Lisp_Object value)
|
|
2762 {
|
|
2763 if (EQ (key, Qchain))
|
|
2764 {
|
|
2765 Lisp_Object *cslist;
|
|
2766 int count = 0;
|
|
2767 int i;
|
|
2768
|
2367
|
2769 {
|
|
2770 EXTERNAL_LIST_LOOP_2 (elt, value)
|
|
2771 {
|
|
2772 Fget_coding_system (elt);
|
|
2773 count++;
|
|
2774 }
|
|
2775 }
|
771
|
2776
|
|
2777 cslist = xnew_array (Lisp_Object, count);
|
|
2778 XCODING_SYSTEM_CHAIN_CHAIN (codesys) = cslist;
|
|
2779
|
|
2780 count = 0;
|
2367
|
2781 {
|
|
2782 EXTERNAL_LIST_LOOP_2 (elt, value)
|
|
2783 {
|
|
2784 cslist[count] = Fget_coding_system (elt);
|
|
2785 count++;
|
|
2786 }
|
|
2787 }
|
771
|
2788
|
|
2789 XCODING_SYSTEM_CHAIN_COUNT (codesys) = count;
|
|
2790
|
|
2791 for (i = 0; i < count - 1; i++)
|
|
2792 {
|
|
2793 if (decoding_source_sink_type_is_char (cslist[i], CODING_SINK) !=
|
|
2794 decoding_source_sink_type_is_char (cslist[i + 1], CODING_SOURCE))
|
|
2795 invalid_argument_2 ("Sink of first must match source of second",
|
|
2796 cslist[i], cslist[i + 1]);
|
|
2797 }
|
|
2798 }
|
|
2799 else if (EQ (key, Qcanonicalize_after_coding))
|
|
2800 XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (codesys) =
|
|
2801 Fget_coding_system (value);
|
|
2802 else
|
|
2803 return 0;
|
|
2804 return 1;
|
|
2805 }
|
|
2806
|
|
2807 static Lisp_Object
|
|
2808 chain_getprop (Lisp_Object coding_system, Lisp_Object prop)
|
|
2809 {
|
|
2810 if (EQ (prop, Qchain))
|
|
2811 {
|
|
2812 Lisp_Object result = Qnil;
|
|
2813 int i;
|
|
2814
|
|
2815 for (i = 0; i < XCODING_SYSTEM_CHAIN_COUNT (coding_system); i++)
|
|
2816 result = Fcons (XCODING_SYSTEM_CHAIN_CHAIN (coding_system)[i],
|
|
2817 result);
|
|
2818
|
|
2819 return Fnreverse (result);
|
|
2820 }
|
|
2821 else if (EQ (prop, Qcanonicalize_after_coding))
|
|
2822 return XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (coding_system);
|
|
2823 else
|
|
2824 return Qunbound;
|
|
2825 }
|
|
2826
|
|
2827 static enum source_sink_type
|
|
2828 chain_conversion_end_type (Lisp_Object codesys)
|
|
2829 {
|
|
2830 Lisp_Object *cslist = XCODING_SYSTEM_CHAIN_CHAIN (codesys);
|
|
2831 int n = XCODING_SYSTEM_CHAIN_COUNT (codesys);
|
|
2832 int charp_source, charp_sink;
|
|
2833
|
|
2834 if (n == 0)
|
|
2835 return DECODES_BYTE_TO_BYTE; /* arbitrary */
|
|
2836 charp_source = decoding_source_sink_type_is_char (cslist[0], CODING_SOURCE);
|
|
2837 charp_sink = decoding_source_sink_type_is_char (cslist[n - 1], CODING_SINK);
|
|
2838
|
|
2839 switch (charp_source * 2 + charp_sink)
|
|
2840 {
|
|
2841 case 0: return DECODES_BYTE_TO_BYTE;
|
|
2842 case 1: return DECODES_BYTE_TO_CHARACTER;
|
|
2843 case 2: return DECODES_CHARACTER_TO_BYTE;
|
|
2844 case 3: return DECODES_CHARACTER_TO_CHARACTER;
|
|
2845 }
|
|
2846
|
2500
|
2847 ABORT ();
|
771
|
2848 return DECODES_BYTE_TO_BYTE;
|
|
2849 }
|
|
2850
|
|
2851
|
|
2852 /************************************************************************/
|
|
2853 /* No-conversion methods */
|
|
2854 /************************************************************************/
|
|
2855
|
|
2856 /* "No conversion"; used for binary files. We use quotes because there
|
|
2857 really is some conversion being applied (it does byte<->char
|
|
2858 conversion), but it appears to the user as if the text is read in
|
2297
|
2859 without conversion.
|
|
2860
|
|
2861 #### Shouldn't we _call_ it that, then? And while we're at it,
|
|
2862 separate it into "to_internal" and "to_external"? */
|
771
|
2863 DEFINE_CODING_SYSTEM_TYPE (no_conversion);
|
|
2864
|
|
2865 /* This is used when reading in "binary" files -- i.e. files that may
|
|
2866 contain all 256 possible byte values and that are not to be
|
|
2867 interpreted as being in any particular encoding. */
|
|
2868 static Bytecount
|
|
2869 no_conversion_convert (struct coding_stream *str,
|
|
2870 const UExtbyte *src,
|
|
2871 unsigned_char_dynarr *dst, Bytecount n)
|
|
2872 {
|
|
2873 UExtbyte c;
|
|
2874 unsigned int ch = str->ch;
|
|
2875 Bytecount orign = n;
|
|
2876
|
|
2877 if (str->direction == CODING_DECODE)
|
|
2878 {
|
|
2879 while (n--)
|
|
2880 {
|
|
2881 c = *src++;
|
|
2882
|
|
2883 DECODE_ADD_BINARY_CHAR (c, dst);
|
|
2884 }
|
|
2885
|
|
2886 if (str->eof)
|
|
2887 DECODE_OUTPUT_PARTIAL_CHAR (ch, dst);
|
|
2888 }
|
|
2889 else
|
|
2890 {
|
|
2891
|
|
2892 while (n--)
|
|
2893 {
|
|
2894 c = *src++;
|
826
|
2895 if (byte_ascii_p (c))
|
771
|
2896 {
|
|
2897 assert (ch == 0);
|
|
2898 Dynarr_add (dst, c);
|
|
2899 }
|
|
2900 #ifdef MULE
|
867
|
2901 else if (ibyte_leading_byte_p (c))
|
771
|
2902 {
|
|
2903 assert (ch == 0);
|
|
2904 if (c == LEADING_BYTE_LATIN_ISO8859_1 ||
|
|
2905 c == LEADING_BYTE_CONTROL_1)
|
|
2906 ch = c;
|
|
2907 else
|
2297
|
2908 /* #### This is just plain unacceptable. */
|
771
|
2909 Dynarr_add (dst, '~'); /* untranslatable character */
|
|
2910 }
|
|
2911 else
|
|
2912 {
|
|
2913 if (ch == LEADING_BYTE_LATIN_ISO8859_1)
|
|
2914 Dynarr_add (dst, c);
|
|
2915 else if (ch == LEADING_BYTE_CONTROL_1)
|
|
2916 {
|
|
2917 assert (c < 0xC0);
|
|
2918 Dynarr_add (dst, c - 0x20);
|
|
2919 }
|
|
2920 /* else it should be the second or third byte of an
|
|
2921 untranslatable character, so ignore it */
|
|
2922 ch = 0;
|
|
2923 }
|
|
2924 #endif /* MULE */
|
|
2925
|
|
2926 }
|
|
2927 }
|
|
2928
|
|
2929 str->ch = ch;
|
|
2930 return orign;
|
|
2931 }
|
|
2932
|
|
2933 DEFINE_DETECTOR (no_conversion);
|
|
2934 DEFINE_DETECTOR_CATEGORY (no_conversion, no_conversion);
|
|
2935
|
|
2936 struct no_conversion_detector
|
|
2937 {
|
|
2938 int dummy;
|
|
2939 };
|
|
2940
|
|
2941 static void
|
2286
|
2942 no_conversion_detect (struct detection_state *st, const UExtbyte *UNUSED (src),
|
|
2943 Bytecount UNUSED (n))
|
771
|
2944 {
|
|
2945 /* Hack until we get better handling of this stuff! */
|
|
2946 DET_RESULT (st, no_conversion) = DET_SLIGHTLY_LIKELY;
|
|
2947 }
|
|
2948
|
|
2949
|
|
2950 /************************************************************************/
|
|
2951 /* Convert-eol methods */
|
|
2952 /************************************************************************/
|
|
2953
|
|
2954 /* This is used to handle end-of-line (EOL) differences. It is
|
2819
|
2955 character-to-character, and works (when encoding) *BEFORE* sending data to
|
|
2956 the main encoding routine -- thus, that routine must handle different EOL
|
|
2957 types itself if it does line-oriented type processing. This is unavoidable
|
|
2958 because we don't know whether the output of the main encoding routine is
|
|
2959 ASCII compatible (UTF-16 is definitely not, for example). [[ sjt sez this
|
|
2960 is bogus. There should be _no_ EOL processing (or processing of any kind)
|
|
2961 after conversion to external. ]]
|
771
|
2962
|
793
|
2963 There is one parameter: `subtype', either `cr', `lf', `crlf', or nil.
|
771
|
2964 */
|
|
2965
|
|
2966 struct convert_eol_coding_system
|
|
2967 {
|
|
2968 enum eol_type subtype;
|
2132
|
2969 int dummy; /* On some architectures (eg ia64) the portable dumper can
|
|
2970 produce unaligned access errors without this field. Probably
|
|
2971 because the combined structure of this structure and
|
|
2972 Lisp_Coding_System is not properly aligned. */
|
771
|
2973 };
|
|
2974
|
|
2975 #define CODING_SYSTEM_CONVERT_EOL_SUBTYPE(codesys) \
|
|
2976 (CODING_SYSTEM_TYPE_DATA (codesys, convert_eol)->subtype)
|
|
2977 #define XCODING_SYSTEM_CONVERT_EOL_SUBTYPE(codesys) \
|
|
2978 (XCODING_SYSTEM_TYPE_DATA (codesys, convert_eol)->subtype)
|
|
2979
|
|
2980 struct convert_eol_coding_stream
|
|
2981 {
|
|
2982 enum eol_type actual;
|
|
2983 };
|
|
2984
|
1204
|
2985 static const struct memory_description
|
771
|
2986 convert_eol_coding_system_description[] = {
|
|
2987 { XD_END }
|
|
2988 };
|
|
2989
|
1204
|
2990 DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (convert_eol);
|
|
2991
|
771
|
2992 static void
|
2286
|
2993 convert_eol_print (Lisp_Object cs, Lisp_Object printcharfun,
|
|
2994 int UNUSED (escapeflag))
|
771
|
2995 {
|
|
2996 struct convert_eol_coding_system *data =
|
|
2997 XCODING_SYSTEM_TYPE_DATA (cs, convert_eol);
|
|
2998
|
|
2999 write_fmt_string (printcharfun, "(%s)",
|
|
3000 data->subtype == EOL_LF ? "lf" :
|
|
3001 data->subtype == EOL_CRLF ? "crlf" :
|
|
3002 data->subtype == EOL_CR ? "cr" :
|
793
|
3003 data->subtype == EOL_AUTODETECT ? "nil" :
|
2500
|
3004 (ABORT(), ""));
|
771
|
3005 }
|
|
3006
|
|
3007 static enum source_sink_type
|
2286
|
3008 convert_eol_conversion_end_type (Lisp_Object UNUSED (codesys))
|
771
|
3009 {
|
|
3010 return DECODES_CHARACTER_TO_CHARACTER;
|
|
3011 }
|
|
3012
|
|
3013 static int
|
|
3014 convert_eol_putprop (Lisp_Object codesys,
|
|
3015 Lisp_Object key,
|
|
3016 Lisp_Object value)
|
|
3017 {
|
|
3018 struct convert_eol_coding_system *data =
|
|
3019 XCODING_SYSTEM_TYPE_DATA (codesys, convert_eol);
|
|
3020
|
|
3021 if (EQ (key, Qsubtype))
|
|
3022 {
|
|
3023 if (EQ (value, Qlf) /* || EQ (value, Qunix) */)
|
|
3024 data->subtype = EOL_LF;
|
|
3025 else if (EQ (value, Qcrlf) /* || EQ (value, Qdos) */)
|
|
3026 data->subtype = EOL_CRLF;
|
|
3027 else if (EQ (value, Qcr) /* || EQ (value, Qmac) */)
|
|
3028 data->subtype = EOL_CR;
|
793
|
3029 else if (EQ (value, Qnil))
|
771
|
3030 data->subtype = EOL_AUTODETECT;
|
|
3031 else invalid_constant ("Unrecognized eol type", value);
|
|
3032 }
|
|
3033 else
|
|
3034 return 0;
|
|
3035 return 1;
|
|
3036 }
|
|
3037
|
|
3038 static Lisp_Object
|
|
3039 convert_eol_getprop (Lisp_Object coding_system, Lisp_Object prop)
|
|
3040 {
|
|
3041 struct convert_eol_coding_system *data =
|
|
3042 XCODING_SYSTEM_TYPE_DATA (coding_system, convert_eol);
|
|
3043
|
|
3044 if (EQ (prop, Qsubtype))
|
|
3045 {
|
|
3046 switch (data->subtype)
|
|
3047 {
|
|
3048 case EOL_LF: return Qlf;
|
|
3049 case EOL_CRLF: return Qcrlf;
|
|
3050 case EOL_CR: return Qcr;
|
793
|
3051 case EOL_AUTODETECT: return Qnil;
|
2500
|
3052 default: ABORT ();
|
771
|
3053 }
|
|
3054 }
|
|
3055
|
|
3056 return Qunbound;
|
|
3057 }
|
|
3058
|
|
3059 static void
|
|
3060 convert_eol_init_coding_stream (struct coding_stream *str)
|
|
3061 {
|
|
3062 struct convert_eol_coding_stream *data =
|
|
3063 CODING_STREAM_TYPE_DATA (str, convert_eol);
|
|
3064 data->actual = XCODING_SYSTEM_CONVERT_EOL_SUBTYPE (str->codesys);
|
|
3065 }
|
|
3066
|
|
3067 static Bytecount
|
867
|
3068 convert_eol_convert (struct coding_stream *str, const Ibyte *src,
|
771
|
3069 unsigned_char_dynarr *dst, Bytecount n)
|
|
3070 {
|
|
3071 if (str->direction == CODING_DECODE)
|
|
3072 {
|
|
3073 struct convert_eol_coding_stream *data =
|
|
3074 CODING_STREAM_TYPE_DATA (str, convert_eol);
|
|
3075
|
|
3076 if (data->actual == EOL_AUTODETECT)
|
|
3077 {
|
|
3078 Bytecount n2 = n;
|
867
|
3079 const Ibyte *src2 = src;
|
771
|
3080
|
|
3081 for (; n2; n2--)
|
|
3082 {
|
867
|
3083 Ibyte c = *src2++;
|
771
|
3084 if (c == '\n')
|
|
3085 {
|
|
3086 data->actual = EOL_LF;
|
|
3087 break;
|
|
3088 }
|
|
3089 else if (c == '\r')
|
|
3090 {
|
|
3091 if (n2 == 1)
|
|
3092 {
|
|
3093 /* If we're seeing a '\r' at the end of the data, then
|
|
3094 reject the '\r' right now so it doesn't become an
|
|
3095 issue in the code below -- unless we're at the end of
|
|
3096 the stream, in which case we can't do that (because
|
|
3097 then the '\r' will never get written out), and in any
|
|
3098 case we should be recognizing it at EOL_CR format. */
|
|
3099 if (str->eof)
|
|
3100 data->actual = EOL_CR;
|
|
3101 else
|
|
3102 n--;
|
|
3103 break;
|
|
3104 }
|
|
3105 else if (*src2 == '\n')
|
|
3106 data->actual = EOL_CRLF;
|
|
3107 else
|
|
3108 data->actual = EOL_CR;
|
|
3109 break;
|
|
3110 }
|
|
3111 }
|
|
3112 }
|
|
3113
|
|
3114 /* str->eof is set, the caller reached EOF on the other end and has
|
|
3115 no new data to give us. The only data we get is the data we
|
|
3116 rejected from last time. */
|
|
3117 if (data->actual == EOL_LF || data->actual == EOL_AUTODETECT ||
|
|
3118 (str->eof))
|
|
3119 Dynarr_add_many (dst, src, n);
|
|
3120 else
|
|
3121 {
|
867
|
3122 const Ibyte *end = src + n;
|
771
|
3123 while (1)
|
|
3124 {
|
|
3125 /* Find the next section with no \r and add it. */
|
867
|
3126 const Ibyte *runstart = src;
|
|
3127 src = (Ibyte *) memchr (src, '\r', end - src);
|
771
|
3128 if (!src)
|
|
3129 src = end;
|
|
3130 Dynarr_add_many (dst, runstart, src - runstart);
|
|
3131 /* Stop if at end ... */
|
|
3132 if (src == end)
|
|
3133 break;
|
|
3134 /* ... else, translate as necessary. */
|
|
3135 src++;
|
|
3136 if (data->actual == EOL_CR)
|
|
3137 Dynarr_add (dst, '\n');
|
|
3138 /* We need to be careful here with CRLF. If we see a CR at the
|
|
3139 end of the data, we don't know if it's part of a CRLF, so we
|
|
3140 reject it. Otherwise: If it's part of a CRLF, eat it and
|
|
3141 loop; the following LF gets added next time around. If it's
|
|
3142 not part of a CRLF, add the CR and loop. The following
|
|
3143 character will be processed in the next loop iteration. This
|
|
3144 correctly handles a sequence like CR+CR+LF. */
|
|
3145 else if (src == end)
|
|
3146 return n - 1; /* reject the CR at the end; we'll get it again
|
|
3147 next time the convert method is called */
|
|
3148 else if (*src != '\n')
|
|
3149 Dynarr_add (dst, '\r');
|
|
3150 }
|
|
3151 }
|
|
3152
|
|
3153 return n;
|
|
3154 }
|
|
3155 else
|
|
3156 {
|
|
3157 enum eol_type subtype =
|
|
3158 XCODING_SYSTEM_CONVERT_EOL_SUBTYPE (str->codesys);
|
867
|
3159 const Ibyte *end = src + n;
|
771
|
3160
|
|
3161 /* We try to be relatively efficient here. */
|
|
3162 if (subtype == EOL_LF)
|
|
3163 Dynarr_add_many (dst, src, n);
|
|
3164 else
|
|
3165 {
|
|
3166 while (1)
|
|
3167 {
|
|
3168 /* Find the next section with no \n and add it. */
|
867
|
3169 const Ibyte *runstart = src;
|
|
3170 src = (Ibyte *) memchr (src, '\n', end - src);
|
771
|
3171 if (!src)
|
|
3172 src = end;
|
|
3173 Dynarr_add_many (dst, runstart, src - runstart);
|
|
3174 /* Stop if at end ... */
|
|
3175 if (src == end)
|
|
3176 break;
|
|
3177 /* ... else, skip over \n and add its translation. */
|
|
3178 src++;
|
|
3179 Dynarr_add (dst, '\r');
|
|
3180 if (subtype == EOL_CRLF)
|
|
3181 Dynarr_add (dst, '\n');
|
|
3182 }
|
|
3183 }
|
|
3184
|
|
3185 return n;
|
|
3186 }
|
|
3187 }
|
|
3188
|
|
3189 static Lisp_Object
|
|
3190 convert_eol_canonicalize_after_coding (struct coding_stream *str)
|
|
3191 {
|
|
3192 struct convert_eol_coding_stream *data =
|
|
3193 CODING_STREAM_TYPE_DATA (str, convert_eol);
|
|
3194
|
|
3195 if (str->direction == CODING_ENCODE)
|
|
3196 return str->codesys;
|
|
3197
|
|
3198 switch (data->actual)
|
|
3199 {
|
|
3200 case EOL_LF: return Fget_coding_system (Qconvert_eol_lf);
|
|
3201 case EOL_CRLF: return Fget_coding_system (Qconvert_eol_crlf);
|
|
3202 case EOL_CR: return Fget_coding_system (Qconvert_eol_cr);
|
|
3203 case EOL_AUTODETECT: return str->codesys;
|
2500
|
3204 default: ABORT (); return Qnil;
|
771
|
3205 }
|
|
3206 }
|
|
3207
|
|
3208
|
|
3209 /************************************************************************/
|
|
3210 /* Undecided methods */
|
|
3211 /************************************************************************/
|
|
3212
|
|
3213 /* Do autodetection. We can autodetect the EOL type only, the coding
|
|
3214 system only, or both. We only do autodetection when decoding; when
|
|
3215 encoding, we just pass the data through.
|
|
3216
|
|
3217 When doing just EOL detection, a coding system can be specified; if so,
|
|
3218 we will decode this data through the coding system before doing EOL
|
|
3219 detection. The reason for specifying this is so that
|
|
3220 canonicalize-after-coding works: We will canonicalize the specified
|
|
3221 coding system into the appropriate EOL type. When doing both coding and
|
|
3222 EOL detection, we do similar canonicalization, and also catch situations
|
|
3223 where the EOL type is overspecified, i.e. the detected coding system
|
|
3224 specifies an EOL type, and either switch to the equivalent
|
|
3225 non-EOL-processing coding system (if possible), or terminate EOL
|
|
3226 detection and use the specified EOL type. This prevents data from being
|
|
3227 EOL-processed twice.
|
|
3228 */
|
|
3229
|
|
3230 struct undecided_coding_system
|
|
3231 {
|
|
3232 int do_eol, do_coding;
|
|
3233 Lisp_Object cs;
|
|
3234 };
|
|
3235
|
|
3236 struct undecided_coding_stream
|
|
3237 {
|
|
3238 Lisp_Object actual;
|
|
3239 /* Either 2 or 3 lstreams here; see undecided_convert */
|
|
3240 struct chain_coding_stream c;
|
|
3241
|
|
3242 struct detection_state *st;
|
|
3243 };
|
|
3244
|
1204
|
3245 static const struct memory_description undecided_coding_system_description[] = {
|
|
3246 { XD_LISP_OBJECT, offsetof (struct undecided_coding_system, cs) },
|
771
|
3247 { XD_END }
|
|
3248 };
|
|
3249
|
1204
|
3250 static const struct memory_description undecided_coding_stream_description_1 [] = {
|
|
3251 { XD_LISP_OBJECT, offsetof (struct undecided_coding_stream, actual) },
|
2367
|
3252 { XD_BLOCK_ARRAY, offsetof (struct undecided_coding_stream, c),
|
2551
|
3253 1, { &chain_coding_stream_description } },
|
1204
|
3254 { XD_END }
|
|
3255 };
|
|
3256
|
|
3257 const struct sized_memory_description undecided_coding_stream_description = {
|
|
3258 sizeof (struct undecided_coding_stream), undecided_coding_stream_description_1
|
|
3259 };
|
|
3260
|
|
3261 DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (undecided);
|
|
3262
|
771
|
3263 static void
|
|
3264 undecided_init (Lisp_Object codesys)
|
|
3265 {
|
|
3266 struct undecided_coding_system *data =
|
|
3267 XCODING_SYSTEM_TYPE_DATA (codesys, undecided);
|
|
3268
|
|
3269 data->cs = Qnil;
|
|
3270 }
|
|
3271
|
|
3272 static void
|
|
3273 undecided_mark (Lisp_Object codesys)
|
|
3274 {
|
|
3275 struct undecided_coding_system *data =
|
|
3276 XCODING_SYSTEM_TYPE_DATA (codesys, undecided);
|
|
3277
|
|
3278 mark_object (data->cs);
|
|
3279 }
|
|
3280
|
|
3281 static void
|
|
3282 undecided_print (Lisp_Object cs, Lisp_Object printcharfun, int escapeflag)
|
|
3283 {
|
|
3284 struct undecided_coding_system *data =
|
|
3285 XCODING_SYSTEM_TYPE_DATA (cs, undecided);
|
|
3286 int need_space = 0;
|
|
3287
|
826
|
3288 write_c_string (printcharfun, "(");
|
771
|
3289 if (data->do_eol)
|
|
3290 {
|
826
|
3291 write_c_string (printcharfun, "do-eol");
|
771
|
3292 need_space = 1;
|
|
3293 }
|
|
3294 if (data->do_coding)
|
|
3295 {
|
|
3296 if (need_space)
|
826
|
3297 write_c_string (printcharfun, " ");
|
|
3298 write_c_string (printcharfun, "do-coding");
|
771
|
3299 need_space = 1;
|
|
3300 }
|
|
3301 if (!NILP (data->cs))
|
|
3302 {
|
|
3303 if (need_space)
|
826
|
3304 write_c_string (printcharfun, " ");
|
|
3305 write_c_string (printcharfun, "coding-system=");
|
771
|
3306 print_coding_system_in_print_method (data->cs, printcharfun, escapeflag);
|
|
3307 }
|
826
|
3308 write_c_string (printcharfun, ")");
|
771
|
3309 }
|
|
3310
|
|
3311 static void
|
|
3312 undecided_mark_coding_stream (struct coding_stream *str)
|
|
3313 {
|
1204
|
3314 mark_object (CODING_STREAM_TYPE_DATA (str, undecided)->actual);
|
771
|
3315 chain_mark_coding_stream_1 (&CODING_STREAM_TYPE_DATA (str, undecided)->c);
|
|
3316 }
|
|
3317
|
|
3318 static int
|
|
3319 undecided_putprop (Lisp_Object codesys, Lisp_Object key, Lisp_Object value)
|
|
3320 {
|
|
3321 struct undecided_coding_system *data =
|
|
3322 XCODING_SYSTEM_TYPE_DATA (codesys, undecided);
|
|
3323
|
|
3324 if (EQ (key, Qdo_eol))
|
|
3325 data->do_eol = 1;
|
|
3326 else if (EQ (key, Qdo_coding))
|
|
3327 data->do_coding = 1;
|
|
3328 else if (EQ (key, Qcoding_system))
|
|
3329 data->cs = get_coding_system_for_text_file (value, 0);
|
|
3330 else
|
|
3331 return 0;
|
|
3332 return 1;
|
|
3333 }
|
|
3334
|
|
3335 static Lisp_Object
|
|
3336 undecided_getprop (Lisp_Object codesys, Lisp_Object prop)
|
|
3337 {
|
|
3338 struct undecided_coding_system *data =
|
|
3339 XCODING_SYSTEM_TYPE_DATA (codesys, undecided);
|
|
3340
|
|
3341 if (EQ (prop, Qdo_eol))
|
|
3342 return data->do_eol ? Qt : Qnil;
|
|
3343 if (EQ (prop, Qdo_coding))
|
|
3344 return data->do_coding ? Qt : Qnil;
|
|
3345 if (EQ (prop, Qcoding_system))
|
|
3346 return data->cs;
|
|
3347 return Qunbound;
|
|
3348 }
|
|
3349
|
|
3350 static struct detection_state *
|
|
3351 allocate_detection_state (void)
|
|
3352 {
|
|
3353 int i;
|
|
3354 Bytecount size = MAX_ALIGN_SIZE (sizeof (struct detection_state));
|
|
3355 struct detection_state *block;
|
|
3356
|
|
3357 for (i = 0; i < coding_detector_count; i++)
|
|
3358 size += MAX_ALIGN_SIZE (Dynarr_at (all_coding_detectors, i).data_size);
|
|
3359
|
|
3360 block = (struct detection_state *) xmalloc_and_zero (size);
|
|
3361
|
|
3362 size = MAX_ALIGN_SIZE (sizeof (struct detection_state));
|
|
3363 for (i = 0; i < coding_detector_count; i++)
|
|
3364 {
|
|
3365 block->data_offset[i] = size;
|
|
3366 size += MAX_ALIGN_SIZE (Dynarr_at (all_coding_detectors, i).data_size);
|
|
3367 }
|
|
3368
|
|
3369 return block;
|
|
3370 }
|
|
3371
|
|
3372 static void
|
|
3373 free_detection_state (struct detection_state *st)
|
|
3374 {
|
|
3375 int i;
|
|
3376
|
|
3377 for (i = 0; i < coding_detector_count; i++)
|
|
3378 {
|
|
3379 if (Dynarr_at (all_coding_detectors, i).finalize_detection_state_method)
|
|
3380 Dynarr_at (all_coding_detectors, i).finalize_detection_state_method
|
|
3381 (st);
|
|
3382 }
|
|
3383
|
1726
|
3384 xfree (st, struct detection_state *);
|
771
|
3385 }
|
|
3386
|
|
3387 static int
|
|
3388 coding_category_symbol_to_id (Lisp_Object symbol)
|
428
|
3389 {
|
|
3390 int i;
|
|
3391
|
|
3392 CHECK_SYMBOL (symbol);
|
771
|
3393 for (i = 0; i < coding_detector_count; i++)
|
|
3394 {
|
|
3395 detector_category_dynarr *cats =
|
|
3396 Dynarr_at (all_coding_detectors, i).cats;
|
|
3397 int j;
|
|
3398
|
|
3399 for (j = 0; j < Dynarr_length (cats); j++)
|
|
3400 if (EQ (Dynarr_at (cats, j).sym, symbol))
|
|
3401 return Dynarr_at (cats, j).id;
|
|
3402 }
|
|
3403
|
563
|
3404 invalid_constant ("Unrecognized coding category", symbol);
|
1204
|
3405 RETURN_NOT_REACHED (0);
|
428
|
3406 }
|
|
3407
|
771
|
3408 static Lisp_Object
|
|
3409 coding_category_id_to_symbol (int id)
|
428
|
3410 {
|
|
3411 int i;
|
771
|
3412
|
|
3413 for (i = 0; i < coding_detector_count; i++)
|
|
3414 {
|
|
3415 detector_category_dynarr *cats =
|
|
3416 Dynarr_at (all_coding_detectors, i).cats;
|
|
3417 int j;
|
|
3418
|
|
3419 for (j = 0; j < Dynarr_length (cats); j++)
|
|
3420 if (id == Dynarr_at (cats, j).id)
|
|
3421 return Dynarr_at (cats, j).sym;
|
|
3422 }
|
|
3423
|
2500
|
3424 ABORT ();
|
771
|
3425 return Qnil; /* (usually) not reached */
|
428
|
3426 }
|
|
3427
|
771
|
3428 static Lisp_Object
|
|
3429 detection_result_number_to_symbol (enum detection_result result)
|
428
|
3430 {
|
1494
|
3431 /* let compiler warn if not all enumerators are handled */
|
|
3432 switch (result) {
|
|
3433 #define FROB(sym, num) case num: return (sym)
|
771
|
3434 FROB (Qnear_certainty, DET_NEAR_CERTAINTY);
|
|
3435 FROB (Qquite_probable, DET_QUITE_PROBABLE);
|
|
3436 FROB (Qsomewhat_likely, DET_SOMEWHAT_LIKELY);
|
1494
|
3437 FROB (Qslightly_likely, DET_SLIGHTLY_LIKELY);
|
771
|
3438 FROB (Qas_likely_as_unlikely, DET_AS_LIKELY_AS_UNLIKELY);
|
|
3439 FROB (Qsomewhat_unlikely, DET_SOMEWHAT_UNLIKELY);
|
|
3440 FROB (Qquite_improbable, DET_QUITE_IMPROBABLE);
|
|
3441 FROB (Qnearly_impossible, DET_NEARLY_IMPOSSIBLE);
|
|
3442 #undef FROB
|
1494
|
3443 }
|
771
|
3444
|
2500
|
3445 ABORT ();
|
771
|
3446 return Qnil; /* (usually) not reached */
|
|
3447 }
|
|
3448
|
778
|
3449 #if 0 /* not used */
|
771
|
3450 static enum detection_result
|
|
3451 detection_result_symbol_to_number (Lisp_Object symbol)
|
|
3452 {
|
1494
|
3453 /* using switch here would be bad style, and doesn't help */
|
771
|
3454 #define FROB(sym, num) if (EQ (symbol, sym)) return (num)
|
|
3455 FROB (Qnear_certainty, DET_NEAR_CERTAINTY);
|
|
3456 FROB (Qquite_probable, DET_QUITE_PROBABLE);
|
|
3457 FROB (Qsomewhat_likely, DET_SOMEWHAT_LIKELY);
|
1494
|
3458 FROB (Qslightly_likely, DET_SLIGHTLY_LIKELY);
|
771
|
3459 FROB (Qas_likely_as_unlikely, DET_AS_LIKELY_AS_UNLIKELY);
|
|
3460 FROB (Qsomewhat_unlikely, DET_SOMEWHAT_UNLIKELY);
|
|
3461 FROB (Qquite_improbable, DET_QUITE_IMPROBABLE);
|
|
3462 FROB (Qnearly_impossible, DET_NEARLY_IMPOSSIBLE);
|
|
3463 #undef FROB
|
|
3464
|
|
3465 invalid_constant ("Unrecognized detection result", symbol);
|
|
3466 return ((enum detection_result) 0); /* not reached */
|
|
3467 }
|
778
|
3468 #endif /* 0 */
|
771
|
3469
|
|
3470 /* Set all detection results for a given detector to a specified value. */
|
|
3471 void
|
|
3472 set_detection_results (struct detection_state *st, int detector, int given)
|
|
3473 {
|
|
3474 detector_category_dynarr *cats =
|
|
3475 Dynarr_at (all_coding_detectors, detector).cats;
|
|
3476 int i;
|
|
3477
|
|
3478 for (i = 0; i < Dynarr_length (cats); i++)
|
|
3479 st->categories[Dynarr_at (cats, i).id] = given;
|
|
3480 }
|
428
|
3481
|
|
3482 static int
|
|
3483 acceptable_control_char_p (int c)
|
|
3484 {
|
|
3485 switch (c)
|
|
3486 {
|
|
3487 /* Allow and ignore control characters that you might
|
|
3488 reasonably see in a text file */
|
|
3489 case '\r':
|
|
3490 case '\n':
|
|
3491 case '\t':
|
|
3492 case 7: /* bell */
|
|
3493 case 8: /* backspace */
|
|
3494 case 11: /* vertical tab */
|
|
3495 case 12: /* form feed */
|
|
3496 case 26: /* MS-DOS C-z junk */
|
|
3497 case 31: /* '^_' -- for info */
|
|
3498 return 1;
|
|
3499 default:
|
|
3500 return 0;
|
|
3501 }
|
|
3502 }
|
|
3503
|
771
|
3504 #ifdef DEBUG_XEMACS
|
|
3505
|
|
3506 static UExtbyte
|
|
3507 hex_digit_to_char (int digit)
|
428
|
3508 {
|
771
|
3509 if (digit < 10)
|
|
3510 return digit + '0';
|
|
3511 else
|
|
3512 return digit - 10 + 'A';
|
428
|
3513 }
|
|
3514
|
771
|
3515 static void
|
|
3516 output_bytes_in_ascii_and_hex (const UExtbyte *src, Bytecount n)
|
428
|
3517 {
|
3425
|
3518 Extbyte *ascii = alloca_array (Extbyte, n + 1);
|
|
3519 Extbyte *hex = alloca_array (Extbyte, 3 * n + 1);
|
771
|
3520 int i;
|
3413
|
3521 DECLARE_EISTRING (eistr_ascii);
|
|
3522 DECLARE_EISTRING (eistr_hex);
|
771
|
3523
|
|
3524 for (i = 0; i < n; i++)
|
428
|
3525 {
|
3425
|
3526 Extbyte c = src[i];
|
771
|
3527 if (c < 0x20)
|
|
3528 ascii[i] = '.';
|
428
|
3529 else
|
771
|
3530 ascii[i] = c;
|
|
3531 hex[3 * i] = hex_digit_to_char (c >> 4);
|
|
3532 hex[3 * i + 1] = hex_digit_to_char (c & 0xF);
|
|
3533 hex[3 * i + 2] = ' ';
|
428
|
3534 }
|
771
|
3535 ascii[i] = '\0';
|
|
3536 hex[3 * i - 1] = '\0';
|
3413
|
3537
|
|
3538 eicpy_ext(eistr_hex, hex, Qbinary);
|
|
3539 eicpy_ext(eistr_ascii, ascii, Qbinary);
|
|
3540
|
3425
|
3541 stderr_out ("%s %s", eidata(eistr_ascii), eidata(eistr_hex));
|
428
|
3542 }
|
|
3543
|
771
|
3544 #endif /* DEBUG_XEMACS */
|
|
3545
|
|
3546 /* Attempt to determine the encoding of the given text. Before calling
|
|
3547 this function for the first time, you must zero out the detection state.
|
428
|
3548
|
|
3549 Returns:
|
|
3550
|
771
|
3551 0 == keep going
|
|
3552 1 == stop
|
428
|
3553 */
|
|
3554
|
|
3555 static int
|
771
|
3556 detect_coding_type (struct detection_state *st, const UExtbyte *src,
|
|
3557 Bytecount n)
|
428
|
3558 {
|
771
|
3559 Bytecount n2 = n;
|
|
3560 const UExtbyte *src2 = src;
|
|
3561 int i;
|
|
3562
|
|
3563 #ifdef DEBUG_XEMACS
|
|
3564 if (!NILP (Vdebug_coding_detection))
|
|
3565 {
|
|
3566 int bytes = min (16, n);
|
|
3567 stderr_out ("detect_coding_type: processing %ld bytes\n", n);
|
|
3568 stderr_out ("First %d: ", bytes);
|
|
3569 output_bytes_in_ascii_and_hex (src, bytes);
|
|
3570 stderr_out ("\nLast %d: ", bytes);
|
|
3571 output_bytes_in_ascii_and_hex (src + n - bytes, bytes);
|
|
3572 stderr_out ("\n");
|
|
3573 }
|
|
3574 #endif /* DEBUG_XEMACS */
|
428
|
3575 if (!st->seen_non_ascii)
|
|
3576 {
|
771
|
3577 for (; n2; n2--, src2++)
|
428
|
3578 {
|
771
|
3579 UExtbyte c = *src2;
|
428
|
3580 if ((c < 0x20 && !acceptable_control_char_p (c)) || c >= 0x80)
|
|
3581 {
|
|
3582 st->seen_non_ascii = 1;
|
|
3583 break;
|
|
3584 }
|
|
3585 }
|
|
3586 }
|
|
3587
|
771
|
3588 for (i = 0; i < coding_detector_count; i++)
|
|
3589 Dynarr_at (all_coding_detectors, i).detect_method (st, src, n);
|
|
3590
|
|
3591 st->bytes_seen += n;
|
|
3592
|
|
3593 #ifdef DEBUG_XEMACS
|
|
3594 if (!NILP (Vdebug_coding_detection))
|
|
3595 {
|
|
3596 stderr_out ("seen_non_ascii: %d\n", st->seen_non_ascii);
|
1494
|
3597 if (coding_detector_category_count <= 0)
|
|
3598 stderr_out ("found %d detector categories\n",
|
|
3599 coding_detector_category_count);
|
771
|
3600 for (i = 0; i < coding_detector_category_count; i++)
|
|
3601 stderr_out_lisp
|
|
3602 ("%s: %s\n",
|
|
3603 2,
|
|
3604 coding_category_id_to_symbol (i),
|
|
3605 detection_result_number_to_symbol ((enum detection_result)
|
|
3606 st->categories[i]));
|
|
3607 }
|
|
3608 #endif /* DEBUG_XEMACS */
|
|
3609
|
|
3610 {
|
|
3611 int not_unlikely = 0;
|
|
3612 int retval;
|
|
3613
|
|
3614 for (i = 0; i < coding_detector_category_count; i++)
|
|
3615 if (st->categories[i] >= 0)
|
|
3616 not_unlikely++;
|
|
3617
|
|
3618 retval = (not_unlikely <= 1
|
|
3619 #if 0 /* this is bogus */
|
|
3620 || st->bytes_seen >= MAX_BYTES_PROCESSED_FOR_DETECTION
|
428
|
3621 #endif
|
771
|
3622 );
|
|
3623
|
|
3624 #ifdef DEBUG_XEMACS
|
|
3625 if (!NILP (Vdebug_coding_detection))
|
|
3626 stderr_out ("detect_coding_type: returning %d (%s)\n",
|
|
3627 retval, retval ? "stop" : "keep going");
|
|
3628 #endif /* DEBUG_XEMACS */
|
|
3629
|
|
3630 return retval;
|
428
|
3631 }
|
|
3632 }
|
|
3633
|
|
3634 static Lisp_Object
|
771
|
3635 detected_coding_system (struct detection_state *st)
|
428
|
3636 {
|
771
|
3637 int i;
|
|
3638 int even = 1;
|
|
3639
|
|
3640 if (st->seen_non_ascii)
|
|
3641 {
|
|
3642 for (i = 0; i < coding_detector_category_count; i++)
|
|
3643 if (st->categories[i] != DET_AS_LIKELY_AS_UNLIKELY)
|
|
3644 {
|
|
3645 even = 0;
|
|
3646 break;
|
|
3647 }
|
|
3648 }
|
|
3649
|
|
3650 /* #### Here we are ignoring the results of detection when it's all
|
|
3651 ASCII. This is obviously a bad thing. But we need to fix up the
|
|
3652 existing detection methods somewhat before we can switch. */
|
|
3653 if (even)
|
428
|
3654 {
|
|
3655 /* If the file was entirely or basically ASCII, use the
|
|
3656 default value of `buffer-file-coding-system'. */
|
|
3657 Lisp_Object retval =
|
|
3658 XBUFFER (Vbuffer_defaults)->buffer_file_coding_system;
|
|
3659 if (!NILP (retval))
|
|
3660 {
|
771
|
3661 retval = find_coding_system_for_text_file (retval, 0);
|
428
|
3662 if (NILP (retval))
|
|
3663 {
|
|
3664 warn_when_safe
|
|
3665 (Qbad_variable, Qwarning,
|
|
3666 "Invalid `default-buffer-file-coding-system', set to nil");
|
|
3667 XBUFFER (Vbuffer_defaults)->buffer_file_coding_system = Qnil;
|
|
3668 }
|
|
3669 }
|
|
3670 if (NILP (retval))
|
4100
|
3671 retval = Fget_coding_system (Qbinary);
|
428
|
3672 return retval;
|
|
3673 }
|
|
3674 else
|
|
3675 {
|
771
|
3676 int likelihood;
|
|
3677 Lisp_Object retval = Qnil;
|
|
3678
|
|
3679 /* Look through the coding categories first by likelihood and then by
|
|
3680 priority and find the first one that is allowed. */
|
|
3681
|
|
3682 for (likelihood = DET_HIGHEST; likelihood >= DET_LOWEST; likelihood--)
|
428
|
3683 {
|
771
|
3684 for (i = 0; i < coding_detector_category_count; i++)
|
|
3685 {
|
|
3686 int cat = coding_category_by_priority[i];
|
|
3687 if (st->categories[cat] == likelihood &&
|
|
3688 !NILP (coding_category_system[cat]))
|
|
3689 {
|
|
3690 retval = (get_coding_system_for_text_file
|
|
3691 (coding_category_system[cat], 0));
|
|
3692 if (likelihood < DET_AS_LIKELY_AS_UNLIKELY)
|
|
3693 warn_when_safe_lispobj
|
|
3694 (intern ("detection"),
|
793
|
3695 Qwarning,
|
771
|
3696 emacs_sprintf_string_lisp
|
|
3697 (
|
|
3698 "Detected coding %s is unlikely to be correct (likelihood == `%s')",
|
|
3699 Qnil, 2, XCODING_SYSTEM_NAME (retval),
|
|
3700 detection_result_number_to_symbol
|
|
3701 ((enum detection_result) likelihood)));
|
|
3702 return retval;
|
|
3703 }
|
|
3704 }
|
428
|
3705 }
|
771
|
3706
|
|
3707 return Fget_coding_system (Qraw_text);
|
428
|
3708 }
|
|
3709 }
|
|
3710
|
1347
|
3711 /* Look for a coding system in the string (skipping over leading
|
|
3712 blanks). If found, return it, otherwise nil. */
|
|
3713
|
|
3714 static Lisp_Object
|
2531
|
3715 snarf_coding_system (const UExtbyte *p, Bytecount len)
|
1347
|
3716 {
|
|
3717 Bytecount n;
|
2531
|
3718 UExtbyte *name;
|
1347
|
3719
|
|
3720 while (*p == ' ' || *p == '\t') p++, len--;
|
|
3721 len = min (len, 1000);
|
|
3722 name = alloca_ibytes (len + 1);
|
|
3723 memcpy (name, p, len);
|
|
3724 name[len] = '\0';
|
|
3725
|
|
3726 /* Get coding system name */
|
|
3727 /* Characters valid in a MIME charset name (rfc 1521),
|
|
3728 and in a Lisp symbol name. */
|
|
3729 n = qxestrspn (name,
|
|
3730 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
|
3731 "abcdefghijklmnopqrstuvwxyz"
|
|
3732 "0123456789"
|
|
3733 "!$%&*+-.^_{|}~");
|
|
3734 if (n > 0)
|
|
3735 {
|
|
3736 name[n] = '\0';
|
2531
|
3737 /* This call to intern_int() is OK because we already verified that
|
|
3738 there are only ASCII characters in the string */
|
|
3739 return find_coding_system_for_text_file (intern_int ((Ibyte *) name), 0);
|
1347
|
3740 }
|
|
3741
|
|
3742 return Qnil;
|
|
3743 }
|
|
3744
|
428
|
3745 /* Given a seekable read stream and potential coding system and EOL type
|
|
3746 as specified, do any autodetection that is called for. If the
|
|
3747 coding system and/or EOL type are not `autodetect', they will be left
|
|
3748 alone; but this function will never return an autodetect coding system
|
|
3749 or EOL type.
|
|
3750
|
|
3751 This function does not automatically fetch subsidiary coding systems;
|
|
3752 that should be unnecessary with the explicit eol-type argument. */
|
|
3753
|
|
3754 #define LENGTH(string_constant) (sizeof (string_constant) - 1)
|
|
3755
|
771
|
3756 static Lisp_Object
|
|
3757 unwind_free_detection_state (Lisp_Object opaque)
|
|
3758 {
|
|
3759 struct detection_state *st =
|
|
3760 (struct detection_state *) get_opaque_ptr (opaque);
|
|
3761 free_detection_state (st);
|
|
3762 free_opaque_ptr (opaque);
|
|
3763 return Qnil;
|
|
3764 }
|
|
3765
|
1347
|
3766 /* #### This duplicates code in `find-coding-system-magic-cookie-in-file'
|
|
3767 in files.el. Look into combining them. */
|
|
3768
|
771
|
3769 static Lisp_Object
|
|
3770 look_for_coding_system_magic_cookie (const UExtbyte *data, Bytecount len)
|
428
|
3771 {
|
771
|
3772 const UExtbyte *p;
|
|
3773 const UExtbyte *scan_end;
|
2531
|
3774 Bytecount cookie_len;
|
771
|
3775
|
|
3776 /* Look for initial "-*-"; mode line prefix */
|
|
3777 for (p = data,
|
|
3778 scan_end = data + len - LENGTH ("-*-coding:?-*-");
|
|
3779 p <= scan_end
|
|
3780 && *p != '\n'
|
|
3781 && *p != '\r';
|
|
3782 p++)
|
|
3783 if (*p == '-' && *(p+1) == '*' && *(p+2) == '-')
|
|
3784 {
|
|
3785 const UExtbyte *local_vars_beg = p + 3;
|
|
3786 /* Look for final "-*-"; mode line suffix */
|
|
3787 for (p = local_vars_beg,
|
|
3788 scan_end = data + len - LENGTH ("-*-");
|
|
3789 p <= scan_end
|
428
|
3790 && *p != '\n'
|
|
3791 && *p != '\r';
|
771
|
3792 p++)
|
|
3793 if (*p == '-' && *(p+1) == '*' && *(p+2) == '-')
|
|
3794 {
|
|
3795 const UExtbyte *suffix = p;
|
|
3796 /* Look for "coding:" */
|
|
3797 for (p = local_vars_beg,
|
|
3798 scan_end = suffix - LENGTH ("coding:?");
|
|
3799 p <= scan_end;
|
|
3800 p++)
|
|
3801 if (memcmp ("coding:", p, LENGTH ("coding:")) == 0
|
|
3802 && (p == local_vars_beg
|
|
3803 || (*(p-1) == ' ' ||
|
|
3804 *(p-1) == '\t' ||
|
|
3805 *(p-1) == ';')))
|
|
3806 {
|
|
3807 p += LENGTH ("coding:");
|
1347
|
3808 return snarf_coding_system (p, suffix - p);
|
771
|
3809 break;
|
|
3810 }
|
|
3811 break;
|
|
3812 }
|
|
3813 break;
|
|
3814 }
|
|
3815
|
2531
|
3816 /* Look for ;;;###coding system */
|
|
3817
|
|
3818 cookie_len = LENGTH (";;;###coding system: ");
|
|
3819
|
|
3820 for (p = data,
|
|
3821 scan_end = data + len - cookie_len;
|
|
3822 p <= scan_end;
|
|
3823 p++)
|
1347
|
3824 {
|
2531
|
3825 if (*p == ';' && !memcmp (p, ";;;###coding system: ", cookie_len))
|
|
3826 {
|
|
3827 const UExtbyte *suffix;
|
|
3828
|
|
3829 p += cookie_len;
|
|
3830 suffix = p;
|
|
3831 while (suffix < scan_end && !isspace (*suffix))
|
|
3832 suffix++;
|
|
3833 return snarf_coding_system (p, suffix - p);
|
|
3834 }
|
1347
|
3835 }
|
|
3836
|
|
3837 return Qnil;
|
771
|
3838 }
|
|
3839
|
|
3840 static Lisp_Object
|
|
3841 determine_real_coding_system (Lstream *stream)
|
|
3842 {
|
|
3843 struct detection_state *st = allocate_detection_state ();
|
|
3844 int depth = record_unwind_protect (unwind_free_detection_state,
|
|
3845 make_opaque_ptr (st));
|
|
3846 UExtbyte buf[4096];
|
|
3847 Bytecount nread = Lstream_read (stream, buf, sizeof (buf));
|
|
3848 Lisp_Object coding_system = look_for_coding_system_magic_cookie (buf, nread);
|
|
3849
|
|
3850 if (NILP (coding_system))
|
|
3851 {
|
|
3852 while (1)
|
|
3853 {
|
|
3854 if (detect_coding_type (st, buf, nread))
|
428
|
3855 break;
|
771
|
3856 nread = Lstream_read (stream, buf, sizeof (buf));
|
|
3857 if (nread == 0)
|
|
3858 break;
|
428
|
3859 }
|
771
|
3860
|
|
3861 coding_system = detected_coding_system (st);
|
428
|
3862 }
|
|
3863
|
|
3864 Lstream_rewind (stream);
|
771
|
3865
|
|
3866 unbind_to (depth);
|
|
3867 return coding_system;
|
|
3868 }
|
|
3869
|
|
3870 static void
|
|
3871 undecided_init_coding_stream (struct coding_stream *str)
|
|
3872 {
|
|
3873 struct undecided_coding_stream *data =
|
|
3874 CODING_STREAM_TYPE_DATA (str, undecided);
|
|
3875 struct undecided_coding_system *csdata =
|
|
3876 XCODING_SYSTEM_TYPE_DATA (str->codesys, undecided);
|
|
3877
|
|
3878 data->actual = Qnil;
|
|
3879
|
|
3880 if (str->direction == CODING_DECODE)
|
|
3881 {
|
|
3882 Lstream *lst = str->other_end;
|
|
3883
|
|
3884 if ((lst->flags & LSTREAM_FL_READ) &&
|
|
3885 Lstream_seekable_p (lst) &&
|
|
3886 csdata->do_coding)
|
|
3887 /* We can determine the coding system now. */
|
|
3888 data->actual = determine_real_coding_system (lst);
|
|
3889 }
|
1494
|
3890
|
|
3891 #ifdef DEBUG_XEMACS
|
|
3892 if (!NILP (Vdebug_coding_detection))
|
|
3893 stderr_out_lisp ("detected coding system: %s\n", 1, data->actual);
|
|
3894 #endif /* DEBUG_XEMACS */
|
771
|
3895 }
|
|
3896
|
|
3897 static void
|
|
3898 undecided_rewind_coding_stream (struct coding_stream *str)
|
|
3899 {
|
|
3900 chain_rewind_coding_stream_1 (&CODING_STREAM_TYPE_DATA (str, undecided)->c);
|
|
3901 }
|
|
3902
|
|
3903 static void
|
|
3904 undecided_finalize_coding_stream (struct coding_stream *str)
|
|
3905 {
|
|
3906 struct undecided_coding_stream *data =
|
|
3907 CODING_STREAM_TYPE_DATA (str, undecided);
|
|
3908
|
|
3909 chain_finalize_coding_stream_1
|
|
3910 (&CODING_STREAM_TYPE_DATA (str, undecided)->c);
|
|
3911 if (data->st)
|
|
3912 free_detection_state (data->st);
|
|
3913 }
|
|
3914
|
|
3915 static Lisp_Object
|
|
3916 undecided_canonicalize (Lisp_Object codesys)
|
|
3917 {
|
|
3918 struct undecided_coding_system *csdata =
|
|
3919 XCODING_SYSTEM_TYPE_DATA (codesys, undecided);
|
|
3920 if (!csdata->do_eol && !csdata->do_coding)
|
|
3921 return NILP (csdata->cs) ? Fget_coding_system (Qbinary) : csdata->cs;
|
|
3922 if (csdata->do_eol && !csdata->do_coding && NILP (csdata->cs))
|
|
3923 return Fget_coding_system (Qconvert_eol_autodetect);
|
|
3924 return codesys;
|
|
3925 }
|
|
3926
|
|
3927 static Bytecount
|
|
3928 undecided_convert (struct coding_stream *str, const UExtbyte *src,
|
|
3929 unsigned_char_dynarr *dst, Bytecount n)
|
|
3930 {
|
|
3931 int first_time = 0;
|
|
3932
|
|
3933 if (str->direction == CODING_DECODE)
|
|
3934 {
|
|
3935 /* At this point, we have only the following possibilities:
|
|
3936
|
|
3937 do_eol && do_coding
|
|
3938 do_coding only
|
|
3939 do_eol only and a coding system was specified
|
|
3940
|
|
3941 Other possibilities are removed during undecided_canonicalize.
|
|
3942
|
|
3943 Therefore, our substreams are either
|
|
3944
|
|
3945 lstream_coding -> lstream_dynarr, or
|
|
3946 lstream_coding -> lstream_eol -> lstream_dynarr.
|
|
3947 */
|
|
3948 struct undecided_coding_system *csdata =
|
|
3949 XCODING_SYSTEM_TYPE_DATA (str->codesys, undecided);
|
|
3950 struct undecided_coding_stream *data =
|
|
3951 CODING_STREAM_TYPE_DATA (str, undecided);
|
|
3952
|
|
3953 if (str->eof)
|
|
3954 {
|
|
3955 /* Each will close the next. We need to close now because more
|
|
3956 data may be generated. */
|
|
3957 if (data->c.initted)
|
|
3958 Lstream_close (XLSTREAM (data->c.lstreams[0]));
|
|
3959 return n;
|
|
3960 }
|
|
3961
|
|
3962 if (!data->c.initted)
|
|
3963 {
|
|
3964 data->c.lstream_count = csdata->do_eol ? 3 : 2;
|
|
3965 data->c.lstreams = xnew_array (Lisp_Object, data->c.lstream_count);
|
|
3966
|
|
3967 data->c.lstreams[data->c.lstream_count - 1] =
|
|
3968 make_dynarr_output_stream (dst);
|
|
3969 Lstream_set_buffering
|
|
3970 (XLSTREAM (data->c.lstreams[data->c.lstream_count - 1]),
|
|
3971 LSTREAM_UNBUFFERED, 0);
|
|
3972 if (csdata->do_eol)
|
|
3973 {
|
|
3974 data->c.lstreams[1] =
|
|
3975 make_coding_output_stream
|
|
3976 (XLSTREAM (data->c.lstreams[data->c.lstream_count - 1]),
|
|
3977 Fget_coding_system (Qconvert_eol_autodetect),
|
800
|
3978 CODING_DECODE, 0);
|
771
|
3979 Lstream_set_buffering
|
|
3980 (XLSTREAM (data->c.lstreams[1]),
|
|
3981 LSTREAM_UNBUFFERED, 0);
|
|
3982 }
|
|
3983
|
|
3984 data->c.lstreams[0] =
|
|
3985 make_coding_output_stream
|
|
3986 (XLSTREAM (data->c.lstreams[1]),
|
|
3987 /* Substitute binary if we need to detect the encoding */
|
|
3988 csdata->do_coding ? Qbinary : csdata->cs,
|
800
|
3989 CODING_DECODE, 0);
|
771
|
3990 Lstream_set_buffering (XLSTREAM (data->c.lstreams[0]),
|
|
3991 LSTREAM_UNBUFFERED, 0);
|
|
3992
|
|
3993 first_time = 1;
|
|
3994 data->c.initted = 1;
|
|
3995 }
|
|
3996
|
|
3997 /* If necessary, do encoding-detection now. We do this when we're a
|
|
3998 writing stream or a non-seekable reading stream, meaning that we
|
|
3999 can't just process the whole input, rewind, and start over. */
|
|
4000
|
|
4001 if (csdata->do_coding)
|
|
4002 {
|
|
4003 int actual_was_nil = NILP (data->actual);
|
|
4004 if (NILP (data->actual))
|
|
4005 {
|
|
4006 if (!data->st)
|
|
4007 data->st = allocate_detection_state ();
|
|
4008 if (first_time)
|
|
4009 /* #### This is cheesy. What we really ought to do is buffer
|
|
4010 up a certain minimum amount of data to get a better result.
|
|
4011 */
|
|
4012 data->actual = look_for_coding_system_magic_cookie (src, n);
|
|
4013 if (NILP (data->actual))
|
|
4014 {
|
|
4015 /* #### This is cheesy. What we really ought to do is buffer
|
|
4016 up a certain minimum amount of data so as to get a less
|
|
4017 random result when doing subprocess detection. */
|
|
4018 detect_coding_type (data->st, src, n);
|
|
4019 data->actual = detected_coding_system (data->st);
|
4100
|
4020 /* kludge to prevent infinite recursion */
|
|
4021 if (XCODING_SYSTEM(data->actual)->methods->enumtype == undecided_coding_system)
|
|
4022 data->actual = Fget_coding_system (Qbinary);
|
771
|
4023 }
|
|
4024 }
|
|
4025 /* We need to set the detected coding system if we actually have
|
|
4026 such a coding system but didn't before. That is the case
|
|
4027 either when we just detected it in the previous code or when
|
|
4028 it was detected during undecided_init_coding_stream(). We
|
|
4029 can check for that using first_time. */
|
|
4030 if (!NILP (data->actual) && (actual_was_nil || first_time))
|
|
4031 {
|
|
4032 /* If the detected coding system doesn't allow for EOL
|
|
4033 autodetection, try to get the equivalent that does;
|
|
4034 otherwise, disable EOL detection (overriding whatever
|
|
4035 may already have been detected). */
|
|
4036 if (XCODING_SYSTEM_EOL_TYPE (data->actual) != EOL_AUTODETECT)
|
|
4037 {
|
|
4038 if (!NILP (XCODING_SYSTEM_SUBSIDIARY_PARENT (data->actual)))
|
|
4039 data->actual =
|
|
4040 XCODING_SYSTEM_SUBSIDIARY_PARENT (data->actual);
|
|
4041 else if (data->c.lstream_count == 3)
|
|
4042 set_coding_stream_coding_system
|
|
4043 (XLSTREAM (data->c.lstreams[1]),
|
|
4044 Fget_coding_system (Qidentity));
|
|
4045 }
|
|
4046 set_coding_stream_coding_system
|
|
4047 (XLSTREAM (data->c.lstreams[0]), data->actual);
|
|
4048 }
|
|
4049 }
|
|
4050
|
|
4051 if (Lstream_write (XLSTREAM (data->c.lstreams[0]), src, n) < 0)
|
|
4052 return -1;
|
|
4053 return n;
|
|
4054 }
|
|
4055 else
|
|
4056 return no_conversion_convert (str, src, dst, n);
|
|
4057 }
|
|
4058
|
|
4059 static Lisp_Object
|
|
4060 undecided_canonicalize_after_coding (struct coding_stream *str)
|
|
4061 {
|
|
4062 struct undecided_coding_stream *data =
|
|
4063 CODING_STREAM_TYPE_DATA (str, undecided);
|
|
4064 Lisp_Object ret, eolret;
|
|
4065
|
|
4066 if (str->direction == CODING_ENCODE)
|
|
4067 return str->codesys;
|
|
4068
|
|
4069 if (!data->c.initted)
|
|
4070 return Fget_coding_system (Qundecided);
|
|
4071
|
|
4072 ret = coding_stream_canonicalize_after_coding
|
|
4073 (XLSTREAM (data->c.lstreams[0]));
|
|
4074 if (NILP (ret))
|
|
4075 ret = Fget_coding_system (Qundecided);
|
|
4076 if (XCODING_SYSTEM_EOL_TYPE (ret) != EOL_AUTODETECT)
|
|
4077 return ret;
|
|
4078 eolret = coding_stream_canonicalize_after_coding
|
|
4079 (XLSTREAM (data->c.lstreams[1]));
|
|
4080 if (!EQ (XCODING_SYSTEM_TYPE (eolret), Qconvert_eol))
|
|
4081 return ret;
|
|
4082 return
|
|
4083 Fsubsidiary_coding_system (ret, Fcoding_system_property (eolret,
|
|
4084 Qsubtype));
|
|
4085 }
|
|
4086
|
|
4087
|
|
4088 /************************************************************************/
|
|
4089 /* Lisp interface: Coding category functions and detection */
|
|
4090 /************************************************************************/
|
|
4091
|
|
4092 DEFUN ("coding-category-list", Fcoding_category_list, 0, 0, 0, /*
|
|
4093 Return a list of all recognized coding categories.
|
|
4094 */
|
|
4095 ())
|
|
4096 {
|
|
4097 int i;
|
|
4098 Lisp_Object list = Qnil;
|
|
4099
|
|
4100 for (i = 0; i < coding_detector_count; i++)
|
|
4101 {
|
|
4102 detector_category_dynarr *cats =
|
|
4103 Dynarr_at (all_coding_detectors, i).cats;
|
|
4104 int j;
|
|
4105
|
|
4106 for (j = 0; j < Dynarr_length (cats); j++)
|
|
4107 list = Fcons (Dynarr_at (cats, j).sym, list);
|
|
4108 }
|
|
4109
|
|
4110 return Fnreverse (list);
|
|
4111 }
|
|
4112
|
|
4113 DEFUN ("set-coding-priority-list", Fset_coding_priority_list, 1, 1, 0, /*
|
|
4114 Change the priority order of the coding categories.
|
|
4115 LIST should be list of coding categories, in descending order of
|
|
4116 priority. Unspecified coding categories will be lower in priority
|
|
4117 than all specified ones, in the same relative order they were in
|
|
4118 previously.
|
|
4119 */
|
|
4120 (list))
|
|
4121 {
|
|
4122 int *category_to_priority =
|
|
4123 alloca_array (int, coding_detector_category_count);
|
|
4124 int i, j;
|
|
4125
|
|
4126 /* First generate a list that maps coding categories to priorities. */
|
|
4127
|
|
4128 for (i = 0; i < coding_detector_category_count; i++)
|
|
4129 category_to_priority[i] = -1;
|
|
4130
|
|
4131 /* Highest priority comes from the specified list. */
|
|
4132 i = 0;
|
2367
|
4133 {
|
|
4134 EXTERNAL_LIST_LOOP_2 (elt, list)
|
|
4135 {
|
|
4136 int cat = coding_category_symbol_to_id (elt);
|
|
4137
|
|
4138 if (category_to_priority[cat] >= 0)
|
|
4139 sferror ("Duplicate coding category in list", elt);
|
|
4140 category_to_priority[cat] = i++;
|
|
4141 }
|
|
4142 }
|
771
|
4143
|
|
4144 /* Now go through the existing categories by priority to retrieve
|
|
4145 the categories not yet specified and preserve their priority
|
|
4146 order. */
|
|
4147 for (j = 0; j < coding_detector_category_count; j++)
|
|
4148 {
|
|
4149 int cat = coding_category_by_priority[j];
|
|
4150 if (category_to_priority[cat] < 0)
|
|
4151 category_to_priority[cat] = i++;
|
|
4152 }
|
|
4153
|
|
4154 /* Now we need to construct the inverse of the mapping we just
|
|
4155 constructed. */
|
|
4156
|
|
4157 for (i = 0; i < coding_detector_category_count; i++)
|
|
4158 coding_category_by_priority[category_to_priority[i]] = i;
|
|
4159
|
|
4160 /* Phew! That was confusing. */
|
|
4161 return Qnil;
|
|
4162 }
|
|
4163
|
|
4164 DEFUN ("coding-priority-list", Fcoding_priority_list, 0, 0, 0, /*
|
|
4165 Return a list of coding categories in descending order of priority.
|
|
4166 */
|
|
4167 ())
|
|
4168 {
|
|
4169 int i;
|
|
4170 Lisp_Object list = Qnil;
|
|
4171
|
|
4172 for (i = 0; i < coding_detector_category_count; i++)
|
|
4173 list =
|
|
4174 Fcons (coding_category_id_to_symbol (coding_category_by_priority[i]),
|
|
4175 list);
|
|
4176 return Fnreverse (list);
|
|
4177 }
|
|
4178
|
|
4179 DEFUN ("set-coding-category-system", Fset_coding_category_system, 2, 2, 0, /*
|
|
4180 Change the coding system associated with a coding category.
|
|
4181 */
|
|
4182 (coding_category, coding_system))
|
|
4183 {
|
|
4184 coding_category_system[coding_category_symbol_to_id (coding_category)] =
|
|
4185 Fget_coding_system (coding_system);
|
|
4186 return Qnil;
|
|
4187 }
|
|
4188
|
|
4189 DEFUN ("coding-category-system", Fcoding_category_system, 1, 1, 0, /*
|
|
4190 Return the coding system associated with a coding category.
|
|
4191 */
|
|
4192 (coding_category))
|
|
4193 {
|
|
4194 Lisp_Object sys =
|
|
4195 coding_category_system[coding_category_symbol_to_id (coding_category)];
|
|
4196
|
|
4197 if (!NILP (sys))
|
|
4198 return XCODING_SYSTEM_NAME (sys);
|
|
4199 return Qnil;
|
|
4200 }
|
|
4201
|
800
|
4202 /* Detect the encoding of STREAM. Assumes stream is at the begnning and will
|
|
4203 read through to the end of STREAM, leaving it there but open. */
|
|
4204
|
771
|
4205 Lisp_Object
|
|
4206 detect_coding_stream (Lisp_Object stream)
|
|
4207 {
|
|
4208 Lisp_Object val = Qnil;
|
|
4209 struct gcpro gcpro1, gcpro2, gcpro3;
|
|
4210 UExtbyte random_buffer[65536];
|
|
4211 Lisp_Object binary_instream =
|
|
4212 make_coding_input_stream
|
|
4213 (XLSTREAM (stream), Qbinary,
|
814
|
4214 CODING_ENCODE, LSTREAM_FL_NO_CLOSE_OTHER);
|
771
|
4215 Lisp_Object decstream =
|
|
4216 make_coding_input_stream
|
|
4217 (XLSTREAM (binary_instream),
|
800
|
4218 Qundecided, CODING_DECODE, 0);
|
771
|
4219 Lstream *decstr = XLSTREAM (decstream);
|
|
4220
|
|
4221 GCPRO3 (decstream, stream, binary_instream);
|
|
4222 /* Read and discard all data; detection happens as a side effect of this,
|
|
4223 and we examine what was detected afterwards. */
|
|
4224 while (Lstream_read (decstr, random_buffer, sizeof (random_buffer)) > 0)
|
|
4225 ;
|
|
4226
|
|
4227 val = coding_stream_detected_coding_system (decstr);
|
|
4228 Lstream_close (decstr);
|
|
4229 Lstream_delete (decstr);
|
|
4230 Lstream_delete (XLSTREAM (binary_instream));
|
|
4231 UNGCPRO;
|
|
4232 return val;
|
428
|
4233 }
|
|
4234
|
|
4235 DEFUN ("detect-coding-region", Fdetect_coding_region, 2, 3, 0, /*
|
|
4236 Detect coding system of the text in the region between START and END.
|
444
|
4237 Return a list of possible coding systems ordered by priority.
|
3025
|
4238 If only ASCII characters are found, return `undecided' or one of
|
428
|
4239 its subsidiary coding systems according to a detected end-of-line
|
|
4240 type. Optional arg BUFFER defaults to the current buffer.
|
|
4241 */
|
|
4242 (start, end, buffer))
|
|
4243 {
|
|
4244 Lisp_Object val = Qnil;
|
|
4245 struct buffer *buf = decode_buffer (buffer, 0);
|
665
|
4246 Charbpos b, e;
|
771
|
4247 Lisp_Object lb_instream;
|
428
|
4248
|
|
4249 get_buffer_range_char (buf, start, end, &b, &e, 0);
|
|
4250 lb_instream = make_lisp_buffer_input_stream (buf, b, e, 0);
|
771
|
4251
|
|
4252 val = detect_coding_stream (lb_instream);
|
|
4253 Lstream_delete (XLSTREAM (lb_instream));
|
428
|
4254 return val;
|
|
4255 }
|
|
4256
|
|
4257
|
771
|
4258
|
|
4259 #ifdef DEBUG_XEMACS
|
|
4260
|
428
|
4261 /************************************************************************/
|
771
|
4262 /* Internal methods */
|
|
4263 /************************************************************************/
|
|
4264
|
|
4265 /* Raw (internally-formatted) data. */
|
|
4266 DEFINE_CODING_SYSTEM_TYPE (internal);
|
428
|
4267
|
665
|
4268 static Bytecount
|
2286
|
4269 internal_convert (struct coding_stream *UNUSED (str), const UExtbyte *src,
|
771
|
4270 unsigned_char_dynarr *dst, Bytecount n)
|
|
4271 {
|
|
4272 Bytecount orign = n;
|
|
4273 Dynarr_add_many (dst, src, n);
|
|
4274 return orign;
|
|
4275 }
|
|
4276
|
|
4277 #endif /* DEBUG_XEMACS */
|
|
4278
|
|
4279
|
|
4280
|
|
4281 #ifdef HAVE_ZLIB
|
|
4282
|
|
4283 /************************************************************************/
|
|
4284 /* Gzip methods */
|
|
4285 /************************************************************************/
|
|
4286
|
|
4287 struct gzip_coding_system
|
428
|
4288 {
|
771
|
4289 int level; /* 0 through 9, or -1 for default */
|
|
4290 };
|
|
4291
|
|
4292 #define CODING_SYSTEM_GZIP_LEVEL(codesys) \
|
|
4293 (CODING_SYSTEM_TYPE_DATA (codesys, gzip)->level)
|
|
4294 #define XCODING_SYSTEM_GZIP_LEVEL(codesys) \
|
|
4295 (XCODING_SYSTEM_TYPE_DATA (codesys, gzip)->level)
|
|
4296
|
|
4297 struct gzip_coding_stream
|
428
|
4298 {
|
771
|
4299 z_stream stream;
|
|
4300 int stream_initted;
|
|
4301 int reached_eof; /* #### this should be handled by the caller, once we
|
|
4302 return LSTREAM_EOF */
|
|
4303 };
|
|
4304
|
1204
|
4305 static const struct memory_description
|
771
|
4306 gzip_coding_system_description[] = {
|
|
4307 { XD_END }
|
|
4308 };
|
|
4309
|
1204
|
4310 DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (gzip);
|
|
4311
|
771
|
4312 enum source_sink_type
|
|
4313 gzip_conversion_end_type (Lisp_Object codesys)
|
|
4314 {
|
|
4315 return DECODES_BYTE_TO_BYTE;
|
428
|
4316 }
|
|
4317
|
|
4318 static void
|
771
|
4319 gzip_init (Lisp_Object codesys)
|
|
4320 {
|
|
4321 struct gzip_coding_system *data = XCODING_SYSTEM_TYPE_DATA (codesys, gzip);
|
|
4322 data->level = -1;
|
|
4323 }
|
|
4324
|
|
4325 static void
|
|
4326 gzip_print (Lisp_Object cs, Lisp_Object printcharfun, int escapeflag)
|
428
|
4327 {
|
771
|
4328 struct gzip_coding_system *data = XCODING_SYSTEM_TYPE_DATA (cs, gzip);
|
|
4329
|
826
|
4330 write_c_string (printcharfun, "(");
|
771
|
4331 if (data->level == -1)
|
826
|
4332 write_c_string (printcharfun, "default");
|
771
|
4333 else
|
|
4334 print_internal (make_int (data->level), printcharfun, 0);
|
826
|
4335 write_c_string (printcharfun, ")");
|
428
|
4336 }
|
|
4337
|
|
4338 static int
|
771
|
4339 gzip_putprop (Lisp_Object codesys, Lisp_Object key, Lisp_Object value)
|
428
|
4340 {
|
771
|
4341 struct gzip_coding_system *data = XCODING_SYSTEM_TYPE_DATA (codesys, gzip);
|
|
4342
|
|
4343 if (EQ (key, Qlevel))
|
428
|
4344 {
|
771
|
4345 if (EQ (value, Qdefault))
|
|
4346 data->level = -1;
|
|
4347 else
|
428
|
4348 {
|
771
|
4349 CHECK_INT (value);
|
|
4350 check_int_range (XINT (value), 0, 9);
|
|
4351 data->level = XINT (value);
|
428
|
4352 }
|
|
4353 }
|
|
4354 else
|
771
|
4355 return 0;
|
|
4356 return 1;
|
428
|
4357 }
|
|
4358
|
|
4359 static Lisp_Object
|
771
|
4360 gzip_getprop (Lisp_Object coding_system, Lisp_Object prop)
|
428
|
4361 {
|
771
|
4362 struct gzip_coding_system *data =
|
|
4363 XCODING_SYSTEM_TYPE_DATA (coding_system, gzip);
|
|
4364
|
|
4365 if (EQ (prop, Qlevel))
|
428
|
4366 {
|
771
|
4367 if (data->level == -1)
|
|
4368 return Qdefault;
|
|
4369 return make_int (data->level);
|
428
|
4370 }
|
771
|
4371
|
|
4372 return Qunbound;
|
428
|
4373 }
|
|
4374
|
|
4375 static void
|
771
|
4376 gzip_init_coding_stream (struct coding_stream *str)
|
428
|
4377 {
|
771
|
4378 struct gzip_coding_stream *data = CODING_STREAM_TYPE_DATA (str, gzip);
|
|
4379 if (data->stream_initted)
|
428
|
4380 {
|
771
|
4381 if (str->direction == CODING_DECODE)
|
|
4382 inflateEnd (&data->stream);
|
|
4383 else
|
|
4384 deflateEnd (&data->stream);
|
|
4385 data->stream_initted = 0;
|
428
|
4386 }
|
771
|
4387 data->reached_eof = 0;
|
428
|
4388 }
|
|
4389
|
|
4390 static void
|
771
|
4391 gzip_rewind_coding_stream (struct coding_stream *str)
|
428
|
4392 {
|
771
|
4393 gzip_init_coding_stream (str);
|
428
|
4394 }
|
|
4395
|
771
|
4396 static Bytecount
|
|
4397 gzip_convert (struct coding_stream *str,
|
|
4398 const UExtbyte *src,
|
|
4399 unsigned_char_dynarr *dst, Bytecount n)
|
428
|
4400 {
|
771
|
4401 struct gzip_coding_stream *data = CODING_STREAM_TYPE_DATA (str, gzip);
|
|
4402 int zerr;
|
|
4403 if (str->direction == CODING_DECODE)
|
428
|
4404 {
|
771
|
4405 if (data->reached_eof)
|
|
4406 return n; /* eat the data */
|
|
4407
|
|
4408 if (!data->stream_initted)
|
428
|
4409 {
|
771
|
4410 xzero (data->stream);
|
|
4411 if (inflateInit (&data->stream) != Z_OK)
|
|
4412 return LSTREAM_ERROR;
|
|
4413 data->stream_initted = 1;
|
428
|
4414 }
|
771
|
4415
|
|
4416 data->stream.next_in = (Bytef *) src;
|
|
4417 data->stream.avail_in = n;
|
|
4418
|
|
4419 /* Normally we stop when we've fed all data to the decompressor; but
|
|
4420 if we're at the end of the input, and the decompressor hasn't
|
|
4421 reported EOF, we need to keep going, as there might be more output
|
|
4422 to generate. Z_OK from the decompressor means input was processed
|
|
4423 or output was generated; if neither, we break out of the loop.
|
|
4424 Other return values are:
|
|
4425
|
|
4426 Z_STREAM_END EOF from decompressor
|
|
4427 Z_DATA_ERROR Corrupted data
|
|
4428 Z_BUF_ERROR No progress possible (this should happen if
|
|
4429 we try to feed it an incomplete file)
|
|
4430 Z_MEM_ERROR Out of memory
|
|
4431 Z_STREAM_ERROR (should never happen)
|
|
4432 Z_NEED_DICT (#### when will this happen?)
|
|
4433 */
|
|
4434 while (data->stream.avail_in > 0 || str->eof)
|
|
4435 {
|
|
4436 /* Reserve an output buffer of the same size as the input buffer;
|
|
4437 if that's not enough, we keep reserving the same size. */
|
|
4438 Bytecount reserved = n;
|
|
4439 Dynarr_add_many (dst, 0, reserved);
|
|
4440 /* Careful here! Don't retrieve the pointer until after
|
|
4441 reserving the space, or it might be bogus */
|
|
4442 data->stream.next_out =
|
|
4443 Dynarr_atp (dst, Dynarr_length (dst) - reserved);
|
|
4444 data->stream.avail_out = reserved;
|
|
4445 zerr = inflate (&data->stream, Z_NO_FLUSH);
|
|
4446 /* Lop off the unused portion */
|
|
4447 Dynarr_set_size (dst, Dynarr_length (dst) - data->stream.avail_out);
|
|
4448 if (zerr != Z_OK)
|
|
4449 break;
|
|
4450 }
|
|
4451
|
|
4452 if (zerr == Z_STREAM_END)
|
|
4453 data->reached_eof = 1;
|
|
4454
|
|
4455 if ((Bytecount) data->stream.avail_in < n)
|
|
4456 return n - data->stream.avail_in;
|
|
4457
|
|
4458 if (zerr == Z_OK || zerr == Z_STREAM_END)
|
|
4459 return 0;
|
|
4460
|
|
4461 return LSTREAM_ERROR;
|
428
|
4462 }
|
|
4463 else
|
|
4464 {
|
771
|
4465 if (!data->stream_initted)
|
|
4466 {
|
|
4467 int level = XCODING_SYSTEM_GZIP_LEVEL (str->codesys);
|
|
4468 xzero (data->stream);
|
|
4469 if (deflateInit (&data->stream,
|
|
4470 level == -1 ? Z_DEFAULT_COMPRESSION : level) !=
|
|
4471 Z_OK)
|
|
4472 return LSTREAM_ERROR;
|
|
4473 data->stream_initted = 1;
|
428
|
4474 }
|
771
|
4475
|
|
4476 data->stream.next_in = (Bytef *) src;
|
|
4477 data->stream.avail_in = n;
|
|
4478
|
|
4479 /* Normally we stop when we've fed all data to the compressor; but if
|
|
4480 we're at the end of the input, and the compressor hasn't reported
|
|
4481 EOF, we need to keep going, as there might be more output to
|
|
4482 generate. (To signal EOF on our end, we set the FLUSH parameter
|
|
4483 to Z_FINISH; when all data is output, Z_STREAM_END will be
|
|
4484 returned.) Z_OK from the compressor means input was processed or
|
|
4485 output was generated; if neither, we break out of the loop. Other
|
|
4486 return values are:
|
|
4487
|
|
4488 Z_STREAM_END EOF from compressor
|
|
4489 Z_BUF_ERROR No progress possible (should never happen)
|
|
4490 Z_STREAM_ERROR (should never happen)
|
|
4491 */
|
|
4492 while (data->stream.avail_in > 0 || str->eof)
|
|
4493 {
|
|
4494 /* Reserve an output buffer of the same size as the input buffer;
|
|
4495 if that's not enough, we keep reserving the same size. */
|
|
4496 Bytecount reserved = n;
|
|
4497 Dynarr_add_many (dst, 0, reserved);
|
|
4498 /* Careful here! Don't retrieve the pointer until after
|
|
4499 reserving the space, or it might be bogus */
|
|
4500 data->stream.next_out =
|
|
4501 Dynarr_atp (dst, Dynarr_length (dst) - reserved);
|
|
4502 data->stream.avail_out = reserved;
|
|
4503 zerr =
|
|
4504 deflate (&data->stream,
|
|
4505 str->eof ? Z_FINISH : Z_NO_FLUSH);
|
|
4506 /* Lop off the unused portion */
|
|
4507 Dynarr_set_size (dst, Dynarr_length (dst) - data->stream.avail_out);
|
|
4508 if (zerr != Z_OK)
|
|
4509 break;
|
|
4510 }
|
|
4511
|
|
4512 if ((Bytecount) data->stream.avail_in < n)
|
|
4513 return n - data->stream.avail_in;
|
|
4514
|
|
4515 if (zerr == Z_OK || zerr == Z_STREAM_END)
|
|
4516 return 0;
|
|
4517
|
|
4518 return LSTREAM_ERROR;
|
428
|
4519 }
|
|
4520 }
|
|
4521
|
771
|
4522 #endif /* HAVE_ZLIB */
|
428
|
4523
|
|
4524
|
|
4525 /************************************************************************/
|
|
4526 /* Initialization */
|
|
4527 /************************************************************************/
|
|
4528
|
|
4529 void
|
|
4530 syms_of_file_coding (void)
|
|
4531 {
|
442
|
4532 INIT_LRECORD_IMPLEMENTATION (coding_system);
|
|
4533
|
771
|
4534 DEFSUBR (Fvalid_coding_system_type_p);
|
|
4535 DEFSUBR (Fcoding_system_type_list);
|
428
|
4536 DEFSUBR (Fcoding_system_p);
|
4303
|
4537 DEFSUBR (Fautoload_coding_system);
|
428
|
4538 DEFSUBR (Ffind_coding_system);
|
|
4539 DEFSUBR (Fget_coding_system);
|
|
4540 DEFSUBR (Fcoding_system_list);
|
|
4541 DEFSUBR (Fcoding_system_name);
|
|
4542 DEFSUBR (Fmake_coding_system);
|
|
4543 DEFSUBR (Fcopy_coding_system);
|
440
|
4544 DEFSUBR (Fcoding_system_canonical_name_p);
|
|
4545 DEFSUBR (Fcoding_system_alias_p);
|
|
4546 DEFSUBR (Fcoding_system_aliasee);
|
428
|
4547 DEFSUBR (Fdefine_coding_system_alias);
|
|
4548 DEFSUBR (Fsubsidiary_coding_system);
|
771
|
4549 DEFSUBR (Fcoding_system_base);
|
|
4550 DEFSUBR (Fcoding_system_used_for_io);
|
428
|
4551
|
|
4552 DEFSUBR (Fcoding_system_type);
|
771
|
4553 DEFSUBR (Fcoding_system_description);
|
428
|
4554 DEFSUBR (Fcoding_system_property);
|
|
4555
|
|
4556 DEFSUBR (Fcoding_category_list);
|
|
4557 DEFSUBR (Fset_coding_priority_list);
|
|
4558 DEFSUBR (Fcoding_priority_list);
|
|
4559 DEFSUBR (Fset_coding_category_system);
|
|
4560 DEFSUBR (Fcoding_category_system);
|
|
4561
|
|
4562 DEFSUBR (Fdetect_coding_region);
|
|
4563 DEFSUBR (Fdecode_coding_region);
|
|
4564 DEFSUBR (Fencode_coding_region);
|
563
|
4565 DEFSYMBOL_MULTIWORD_PREDICATE (Qcoding_systemp);
|
|
4566 DEFSYMBOL (Qno_conversion);
|
771
|
4567 DEFSYMBOL (Qconvert_eol);
|
|
4568 DEFSYMBOL (Qconvert_eol_autodetect);
|
|
4569 DEFSYMBOL (Qconvert_eol_lf);
|
|
4570 DEFSYMBOL (Qconvert_eol_cr);
|
|
4571 DEFSYMBOL (Qconvert_eol_crlf);
|
563
|
4572 DEFSYMBOL (Qraw_text);
|
771
|
4573
|
563
|
4574 DEFSYMBOL (Qmnemonic);
|
|
4575 DEFSYMBOL (Qeol_type);
|
|
4576 DEFSYMBOL (Qpost_read_conversion);
|
|
4577 DEFSYMBOL (Qpre_write_conversion);
|
|
4578
|
771
|
4579 DEFSYMBOL (Qtranslation_table_for_decode);
|
|
4580 DEFSYMBOL (Qtranslation_table_for_encode);
|
|
4581 DEFSYMBOL (Qsafe_chars);
|
|
4582 DEFSYMBOL (Qsafe_charsets);
|
|
4583 DEFSYMBOL (Qmime_charset);
|
|
4584 DEFSYMBOL (Qvalid_codes);
|
|
4585
|
563
|
4586 DEFSYMBOL (Qcr);
|
|
4587 DEFSYMBOL (Qlf);
|
|
4588 DEFSYMBOL (Qcrlf);
|
|
4589 DEFSYMBOL (Qeol_cr);
|
|
4590 DEFSYMBOL (Qeol_lf);
|
|
4591 DEFSYMBOL (Qeol_crlf);
|
|
4592 DEFSYMBOL (Qencode);
|
|
4593 DEFSYMBOL (Qdecode);
|
428
|
4594
|
771
|
4595 DEFSYMBOL (Qnear_certainty);
|
|
4596 DEFSYMBOL (Qquite_probable);
|
|
4597 DEFSYMBOL (Qsomewhat_likely);
|
1494
|
4598 DEFSYMBOL (Qslightly_likely);
|
771
|
4599 DEFSYMBOL (Qas_likely_as_unlikely);
|
|
4600 DEFSYMBOL (Qsomewhat_unlikely);
|
|
4601 DEFSYMBOL (Qquite_improbable);
|
|
4602 DEFSYMBOL (Qnearly_impossible);
|
|
4603
|
|
4604 DEFSYMBOL (Qdo_eol);
|
|
4605 DEFSYMBOL (Qdo_coding);
|
|
4606
|
|
4607 DEFSYMBOL (Qcanonicalize_after_coding);
|
|
4608
|
4303
|
4609 DEFSYMBOL (Qposix_charset_to_coding_system_hash);
|
|
4610
|
771
|
4611 DEFSYMBOL (Qescape_quoted);
|
|
4612
|
|
4613 #ifdef HAVE_ZLIB
|
|
4614 DEFSYMBOL (Qgzip);
|
|
4615 #endif
|
|
4616
|
428
|
4617 }
|
|
4618
|
|
4619 void
|
|
4620 lstream_type_create_file_coding (void)
|
|
4621 {
|
771
|
4622 LSTREAM_HAS_METHOD (coding, reader);
|
|
4623 LSTREAM_HAS_METHOD (coding, writer);
|
|
4624 LSTREAM_HAS_METHOD (coding, rewinder);
|
|
4625 LSTREAM_HAS_METHOD (coding, seekable_p);
|
|
4626 LSTREAM_HAS_METHOD (coding, marker);
|
|
4627 LSTREAM_HAS_METHOD (coding, flusher);
|
|
4628 LSTREAM_HAS_METHOD (coding, closer);
|
|
4629 LSTREAM_HAS_METHOD (coding, finalizer);
|
|
4630 }
|
|
4631
|
|
4632 void
|
|
4633 coding_system_type_create (void)
|
|
4634 {
|
|
4635 int i;
|
|
4636
|
|
4637 staticpro (&Vcoding_system_hash_table);
|
|
4638 Vcoding_system_hash_table =
|
|
4639 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
|
|
4640
|
|
4641 the_coding_system_type_entry_dynarr = Dynarr_new (coding_system_type_entry);
|
2367
|
4642 dump_add_root_block_ptr (&the_coding_system_type_entry_dynarr,
|
771
|
4643 &csted_description);
|
|
4644
|
|
4645 Vcoding_system_type_list = Qnil;
|
|
4646 staticpro (&Vcoding_system_type_list);
|
|
4647
|
|
4648 /* Initialize to something reasonable ... */
|
|
4649 for (i = 0; i < MAX_DETECTOR_CATEGORIES; i++)
|
|
4650 {
|
|
4651 coding_category_system[i] = Qnil;
|
1204
|
4652 dump_add_root_lisp_object (&coding_category_system[i]);
|
771
|
4653 coding_category_by_priority[i] = i;
|
|
4654 }
|
|
4655
|
|
4656 dump_add_opaque (coding_category_by_priority,
|
|
4657 sizeof (coding_category_by_priority));
|
|
4658
|
|
4659 all_coding_detectors = Dynarr_new2 (detector_dynarr, struct detector);
|
2367
|
4660 dump_add_root_block_ptr (&all_coding_detectors,
|
771
|
4661 &detector_dynarr_description);
|
|
4662
|
|
4663 dump_add_opaque_int (&coding_system_tick);
|
|
4664 dump_add_opaque_int (&coding_detector_count);
|
|
4665 dump_add_opaque_int (&coding_detector_category_count);
|
|
4666
|
|
4667 INITIALIZE_CODING_SYSTEM_TYPE (no_conversion,
|
|
4668 "no-conversion-coding-system-p");
|
|
4669 CODING_SYSTEM_HAS_METHOD (no_conversion, convert);
|
|
4670
|
|
4671 INITIALIZE_DETECTOR (no_conversion);
|
|
4672 DETECTOR_HAS_METHOD (no_conversion, detect);
|
|
4673 INITIALIZE_DETECTOR_CATEGORY (no_conversion, no_conversion);
|
|
4674
|
|
4675 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (convert_eol,
|
|
4676 "convert-eol-coding-system-p");
|
|
4677 CODING_SYSTEM_HAS_METHOD (convert_eol, print);
|
|
4678 CODING_SYSTEM_HAS_METHOD (convert_eol, convert);
|
|
4679 CODING_SYSTEM_HAS_METHOD (convert_eol, getprop);
|
|
4680 CODING_SYSTEM_HAS_METHOD (convert_eol, putprop);
|
|
4681 CODING_SYSTEM_HAS_METHOD (convert_eol, conversion_end_type);
|
|
4682 CODING_SYSTEM_HAS_METHOD (convert_eol, canonicalize_after_coding);
|
|
4683 CODING_SYSTEM_HAS_METHOD (convert_eol, init_coding_stream);
|
|
4684
|
|
4685 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (undecided,
|
|
4686 "undecided-coding-system-p");
|
|
4687 CODING_SYSTEM_HAS_METHOD (undecided, init);
|
|
4688 CODING_SYSTEM_HAS_METHOD (undecided, mark);
|
|
4689 CODING_SYSTEM_HAS_METHOD (undecided, print);
|
|
4690 CODING_SYSTEM_HAS_METHOD (undecided, convert);
|
|
4691 CODING_SYSTEM_HAS_METHOD (undecided, putprop);
|
|
4692 CODING_SYSTEM_HAS_METHOD (undecided, getprop);
|
|
4693 CODING_SYSTEM_HAS_METHOD (undecided, init_coding_stream);
|
|
4694 CODING_SYSTEM_HAS_METHOD (undecided, rewind_coding_stream);
|
|
4695 CODING_SYSTEM_HAS_METHOD (undecided, finalize_coding_stream);
|
|
4696 CODING_SYSTEM_HAS_METHOD (undecided, mark_coding_stream);
|
|
4697 CODING_SYSTEM_HAS_METHOD (undecided, canonicalize);
|
|
4698 CODING_SYSTEM_HAS_METHOD (undecided, canonicalize_after_coding);
|
|
4699
|
|
4700 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (chain, "chain-coding-system-p");
|
|
4701
|
|
4702 CODING_SYSTEM_HAS_METHOD (chain, print);
|
|
4703 CODING_SYSTEM_HAS_METHOD (chain, canonicalize);
|
|
4704 CODING_SYSTEM_HAS_METHOD (chain, init);
|
|
4705 CODING_SYSTEM_HAS_METHOD (chain, mark);
|
|
4706 CODING_SYSTEM_HAS_METHOD (chain, mark_coding_stream);
|
|
4707 CODING_SYSTEM_HAS_METHOD (chain, convert);
|
|
4708 CODING_SYSTEM_HAS_METHOD (chain, rewind_coding_stream);
|
|
4709 CODING_SYSTEM_HAS_METHOD (chain, finalize_coding_stream);
|
|
4710 CODING_SYSTEM_HAS_METHOD (chain, finalize);
|
|
4711 CODING_SYSTEM_HAS_METHOD (chain, putprop);
|
|
4712 CODING_SYSTEM_HAS_METHOD (chain, getprop);
|
|
4713 CODING_SYSTEM_HAS_METHOD (chain, conversion_end_type);
|
|
4714 CODING_SYSTEM_HAS_METHOD (chain, canonicalize_after_coding);
|
|
4715
|
|
4716 #ifdef DEBUG_XEMACS
|
|
4717 INITIALIZE_CODING_SYSTEM_TYPE (internal, "internal-coding-system-p");
|
|
4718 CODING_SYSTEM_HAS_METHOD (internal, convert);
|
|
4719 #endif
|
|
4720
|
|
4721 #ifdef HAVE_ZLIB
|
|
4722 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (gzip, "gzip-coding-system-p");
|
|
4723 CODING_SYSTEM_HAS_METHOD (gzip, conversion_end_type);
|
|
4724 CODING_SYSTEM_HAS_METHOD (gzip, convert);
|
|
4725 CODING_SYSTEM_HAS_METHOD (gzip, init);
|
|
4726 CODING_SYSTEM_HAS_METHOD (gzip, print);
|
|
4727 CODING_SYSTEM_HAS_METHOD (gzip, init_coding_stream);
|
|
4728 CODING_SYSTEM_HAS_METHOD (gzip, rewind_coding_stream);
|
|
4729 CODING_SYSTEM_HAS_METHOD (gzip, putprop);
|
|
4730 CODING_SYSTEM_HAS_METHOD (gzip, getprop);
|
|
4731 #endif
|
|
4732 }
|
|
4733
|
|
4734 void
|
|
4735 reinit_coding_system_type_create (void)
|
|
4736 {
|
|
4737 REINITIALIZE_CODING_SYSTEM_TYPE (no_conversion);
|
|
4738 REINITIALIZE_CODING_SYSTEM_TYPE (convert_eol);
|
|
4739 REINITIALIZE_CODING_SYSTEM_TYPE (undecided);
|
|
4740 REINITIALIZE_CODING_SYSTEM_TYPE (chain);
|
|
4741 #if 0
|
|
4742 REINITIALIZE_CODING_SYSTEM_TYPE (text_file_wrapper);
|
|
4743 #endif /* 0 */
|
|
4744 #ifdef DEBUG_XEMACS
|
|
4745 REINITIALIZE_CODING_SYSTEM_TYPE (internal);
|
|
4746 #endif
|
|
4747 #ifdef HAVE_ZLIB
|
|
4748 REINITIALIZE_CODING_SYSTEM_TYPE (gzip);
|
|
4749 #endif
|
|
4750 }
|
|
4751
|
|
4752 void
|
|
4753 reinit_vars_of_file_coding (void)
|
|
4754 {
|
428
|
4755 }
|
|
4756
|
|
4757 void
|
|
4758 vars_of_file_coding (void)
|
|
4759 {
|
771
|
4760 /* We always have file-coding support */
|
428
|
4761 Fprovide (intern ("file-coding"));
|
|
4762
|
1347
|
4763 QScoding_system_cookie = build_string (";;;###coding system: ");
|
|
4764 staticpro (&QScoding_system_cookie);
|
|
4765
|
1242
|
4766 #ifdef HAVE_DEFAULT_EOL_DETECTION
|
2297
|
4767 /* #### Find a more appropriate place for this comment.
|
|
4768 WARNING: The existing categories are intimately tied to the function
|
1242
|
4769 `coding-system-category' in coding.el. If you change a category, or
|
|
4770 change the layout of any coding system associated with a category, you
|
|
4771 need to check that function and make sure it's written properly. */
|
|
4772
|
|
4773 Fprovide (intern ("unix-default-eol-detection"));
|
|
4774 #endif
|
|
4775
|
428
|
4776 DEFVAR_LISP ("keyboard-coding-system", &Vkeyboard_coding_system /*
|
3142
|
4777 Default coding system used for TTY and X11 keyboard input.
|
|
4778 Under X11, used only to interpet the character for a key event when that
|
|
4779 event has a KeySym of NoSymbol but does have an associated string keysym,
|
|
4780 something that's seen with input methods.
|
|
4781
|
|
4782 If you need to set these things to different coding systems, call the
|
|
4783 function `set-console-tty-coding-system' for the TTY and use this variable
|
|
4784 for X11.
|
428
|
4785 */ );
|
|
4786 Vkeyboard_coding_system = Qnil;
|
|
4787
|
|
4788 DEFVAR_LISP ("terminal-coding-system", &Vterminal_coding_system /*
|
|
4789 Coding system used for TTY display output.
|
|
4790 Not used under a windowing system.
|
|
4791 */ );
|
|
4792 Vterminal_coding_system = Qnil;
|
|
4793
|
|
4794 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read /*
|
440
|
4795 Overriding coding system used when reading from a file or process.
|
|
4796 You should bind this variable with `let', but do not set it globally.
|
|
4797 If this is non-nil, it specifies the coding system that will be used
|
|
4798 to decode input on read operations, such as from a file or process.
|
|
4799 It overrides `buffer-file-coding-system-for-read',
|
428
|
4800 `insert-file-contents-pre-hook', etc. Use those variables instead of
|
440
|
4801 this one for permanent changes to the environment. */ );
|
428
|
4802 Vcoding_system_for_read = Qnil;
|
|
4803
|
|
4804 DEFVAR_LISP ("coding-system-for-write",
|
|
4805 &Vcoding_system_for_write /*
|
440
|
4806 Overriding coding system used when writing to a file or process.
|
|
4807 You should bind this variable with `let', but do not set it globally.
|
|
4808 If this is non-nil, it specifies the coding system that will be used
|
|
4809 to encode output for write operations, such as to a file or process.
|
|
4810 It overrides `buffer-file-coding-system', `write-region-pre-hook', etc.
|
|
4811 Use those variables instead of this one for permanent changes to the
|
|
4812 environment. */ );
|
428
|
4813 Vcoding_system_for_write = Qnil;
|
|
4814
|
|
4815 DEFVAR_LISP ("file-name-coding-system", &Vfile_name_coding_system /*
|
|
4816 Coding system used to convert pathnames when accessing files.
|
|
4817 */ );
|
|
4818 Vfile_name_coding_system = Qnil;
|
|
4819
|
|
4820 DEFVAR_BOOL ("enable-multibyte-characters", &enable_multibyte_characters /*
|
771
|
4821 Setting this has no effect. It is purely for FSF compatibility.
|
428
|
4822 */ );
|
|
4823 enable_multibyte_characters = 1;
|
771
|
4824
|
|
4825 Vchain_canonicalize_hash_table =
|
|
4826 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
|
|
4827 staticpro (&Vchain_canonicalize_hash_table);
|
|
4828
|
|
4829 #ifdef DEBUG_XEMACS
|
|
4830 DEFVAR_LISP ("debug-coding-detection", &Vdebug_coding_detection /*
|
|
4831 If non-nil, display debug information about detection operations in progress.
|
|
4832 Information is displayed on stderr.
|
|
4833 */ );
|
|
4834 Vdebug_coding_detection = Qnil;
|
|
4835 #endif
|
428
|
4836 }
|
|
4837
|
2297
|
4838 /* #### reformat this for consistent appearance? */
|
|
4839
|
428
|
4840 void
|
|
4841 complex_vars_of_file_coding (void)
|
|
4842 {
|
771
|
4843 Fmake_coding_system
|
|
4844 (Qconvert_eol_cr, Qconvert_eol,
|
|
4845 build_msg_string ("Convert CR to LF"),
|
|
4846 nconc2 (list6 (Qdocumentation,
|
|
4847 build_msg_string (
|
|
4848 "Converts CR (used to mark the end of a line on Macintosh systems) to LF\n"
|
|
4849 "(used internally and under Unix to mark the end of a line)."),
|
|
4850 Qmnemonic, build_string ("CR->LF"),
|
|
4851 Qsubtype, Qcr),
|
|
4852 /* VERY IMPORTANT! Tell make-coding-system not to generate
|
|
4853 subsidiaries -- it needs the coding systems we're creating
|
|
4854 to do so! */
|
|
4855 list2 (Qeol_type, Qlf)));
|
|
4856
|
|
4857 Fmake_coding_system
|
|
4858 (Qconvert_eol_lf, Qconvert_eol,
|
|
4859 build_msg_string ("Convert LF to LF (do nothing)"),
|
|
4860 nconc2 (list6 (Qdocumentation,
|
|
4861 build_msg_string (
|
|
4862 "Do nothing."),
|
|
4863 Qmnemonic, build_string ("LF->LF"),
|
|
4864 Qsubtype, Qlf),
|
|
4865 /* VERY IMPORTANT! Tell make-coding-system not to generate
|
|
4866 subsidiaries -- it needs the coding systems we're creating
|
|
4867 to do so! */
|
|
4868 list2 (Qeol_type, Qlf)));
|
|
4869
|
|
4870 Fmake_coding_system
|
|
4871 (Qconvert_eol_crlf, Qconvert_eol,
|
|
4872 build_msg_string ("Convert CRLF to LF"),
|
|
4873 nconc2 (list6 (Qdocumentation,
|
|
4874 build_msg_string (
|
|
4875 "Converts CR+LF (used to mark the end of a line on Macintosh systems) to LF\n"
|
|
4876 "(used internally and under Unix to mark the end of a line)."),
|
|
4877 Qmnemonic, build_string ("CRLF->LF"),
|
|
4878 Qsubtype, Qcrlf),
|
|
4879 /* VERY IMPORTANT! Tell make-coding-system not to generate
|
|
4880 subsidiaries -- it needs the coding systems we're creating
|
|
4881 to do so! */
|
|
4882 list2 (Qeol_type, Qlf)));
|
|
4883
|
|
4884 Fmake_coding_system
|
|
4885 (Qconvert_eol_autodetect, Qconvert_eol,
|
|
4886 build_msg_string ("Autodetect EOL type"),
|
|
4887 nconc2 (list6 (Qdocumentation,
|
|
4888 build_msg_string (
|
|
4889 "Autodetect the end-of-line type."),
|
|
4890 Qmnemonic, build_string ("Auto-EOL"),
|
793
|
4891 Qsubtype, Qnil),
|
771
|
4892 /* VERY IMPORTANT! Tell make-coding-system not to generate
|
|
4893 subsidiaries -- it needs the coding systems we're creating
|
|
4894 to do so! */
|
|
4895 list2 (Qeol_type, Qlf)));
|
|
4896
|
|
4897 Fmake_coding_system
|
|
4898 (Qundecided, Qundecided,
|
|
4899 build_msg_string ("Undecided (auto-detect)"),
|
|
4900 nconc2 (list4 (Qdocumentation,
|
|
4901 build_msg_string
|
|
4902 ("Automatically detects the correct encoding."),
|
|
4903 Qmnemonic, build_string ("Auto")),
|
|
4904 list6 (Qdo_eol, Qt, Qdo_coding, Qt,
|
|
4905 /* We do EOL detection ourselves so we don't need to be
|
|
4906 wrapped in an EOL detector. (It doesn't actually hurt,
|
|
4907 though, I don't think.) */
|
|
4908 Qeol_type, Qlf)));
|
|
4909
|
|
4910 Fmake_coding_system
|
|
4911 (intern ("undecided-dos"), Qundecided,
|
|
4912 build_msg_string ("Undecided (auto-detect) (CRLF)"),
|
|
4913 nconc2 (list4 (Qdocumentation,
|
|
4914 build_msg_string
|
|
4915 ("Automatically detects the correct encoding; EOL type of CRLF forced."),
|
|
4916 Qmnemonic, build_string ("Auto")),
|
|
4917 list4 (Qdo_coding, Qt,
|
|
4918 Qeol_type, Qcrlf)));
|
|
4919
|
|
4920 Fmake_coding_system
|
|
4921 (intern ("undecided-unix"), Qundecided,
|
|
4922 build_msg_string ("Undecided (auto-detect) (LF)"),
|
|
4923 nconc2 (list4 (Qdocumentation,
|
|
4924 build_msg_string
|
|
4925 ("Automatically detects the correct encoding; EOL type of LF forced."),
|
|
4926 Qmnemonic, build_string ("Auto")),
|
|
4927 list4 (Qdo_coding, Qt,
|
|
4928 Qeol_type, Qlf)));
|
|
4929
|
|
4930 Fmake_coding_system
|
|
4931 (intern ("undecided-mac"), Qundecided,
|
|
4932 build_msg_string ("Undecided (auto-detect) (CR)"),
|
|
4933 nconc2 (list4 (Qdocumentation,
|
|
4934 build_msg_string
|
|
4935 ("Automatically detects the correct encoding; EOL type of CR forced."),
|
|
4936 Qmnemonic, build_string ("Auto")),
|
|
4937 list4 (Qdo_coding, Qt,
|
|
4938 Qeol_type, Qcr)));
|
|
4939
|
428
|
4940 /* Need to create this here or we're really screwed. */
|
|
4941 Fmake_coding_system
|
|
4942 (Qraw_text, Qno_conversion,
|
771
|
4943 build_msg_string ("Raw Text"),
|
|
4944 list4 (Qdocumentation,
|
|
4945 build_msg_string ("Raw text converts only line-break codes, and acts otherwise like `binary'."),
|
|
4946 Qmnemonic, build_string ("Raw")));
|
428
|
4947
|
|
4948 Fmake_coding_system
|
|
4949 (Qbinary, Qno_conversion,
|
771
|
4950 build_msg_string ("Binary"),
|
|
4951 list6 (Qdocumentation,
|
|
4952 build_msg_string (
|
|
4953 "This coding system is as close as it comes to doing no conversion.\n"
|
|
4954 "On input, each byte is converted directly into the character\n"
|
|
4955 "with the corresponding code -- i.e. from the `ascii', `control-1',\n"
|
|
4956 "or `latin-1' character sets. On output, these characters are\n"
|
|
4957 "converted back to the corresponding bytes, and other characters\n"
|
|
4958 "are converted to the default character, i.e. `~'."),
|
|
4959 Qeol_type, Qlf,
|
428
|
4960 Qmnemonic, build_string ("Binary")));
|
|
4961
|
771
|
4962 /* Formerly aliased to raw-text! Completely bogus and not even the same
|
|
4963 as FSF Emacs. */
|
|
4964 Fdefine_coding_system_alias (Qno_conversion, Qbinary);
|
|
4965 Fdefine_coding_system_alias (intern ("no-conversion-unix"),
|
|
4966 intern ("raw-text-unix"));
|
|
4967 Fdefine_coding_system_alias (intern ("no-conversion-dos"),
|
|
4968 intern ("raw-text-dos"));
|
|
4969 Fdefine_coding_system_alias (intern ("no-conversion-mac"),
|
|
4970 intern ("raw-text-mac"));
|
|
4971
|
1318
|
4972 /* These three below will get their defaults set correctly
|
|
4973 in code-init.el. We init them now so we can handle stuff at dump
|
771
|
4974 time before we get to code-init.el. */
|
1318
|
4975 Fdefine_coding_system_alias (Qnative, Qbinary);
|
440
|
4976 Fdefine_coding_system_alias (Qterminal, Qbinary);
|
|
4977 Fdefine_coding_system_alias (Qkeyboard, Qbinary);
|
|
4978
|
1318
|
4979 Fdefine_coding_system_alias (Qfile_name, Qnative);
|
771
|
4980 Fdefine_coding_system_alias (Qidentity, Qconvert_eol_lf);
|
|
4981
|
428
|
4982 /* Need this for bootstrapping */
|
771
|
4983 coding_category_system[detector_category_no_conversion] =
|
428
|
4984 Fget_coding_system (Qraw_text);
|
|
4985 }
|