Mercurial > hg > xemacs-beta
annotate src/file-coding.c @ 5724:ede80ef92a74
Make soft links in src for module source files, if built in to the executable.
This ensures that those files are built with the same compiler flags as all
other source files.
See these xemacs-beta messages:
<CAHCOHQn+q=Xuwq+y68dvqi7afAP9f-TdB7=8YiZ8VYO816sjHg@mail.gmail.com>
<f5by5ejqiyk.fsf@calexico.inf.ed.ac.uk>
author | Jerry James <james@xemacs.org> |
---|---|
date | Sat, 02 Mar 2013 14:32:37 -0700 |
parents | dae33b5feffe |
children | 7a538e1a4676 |
rev | line source |
---|---|
771 | 1 /* Text encoding conversion functions; coding-system object. |
2 #### rename me to coding-system.c or coding.c | |
428 | 3 Copyright (C) 1991, 1995 Free Software Foundation, Inc. |
4 Copyright (C) 1995 Sun Microsystems, Inc. | |
5127
a9c41067dd88
more cleanups, terminology clarification, lots of doc work
Ben Wing <ben@xemacs.org>
parents:
5126
diff
changeset
|
5 Copyright (C) 2000, 2001, 2002, 2003, 2005, 2010 Ben Wing. |
428 | 6 |
7 This file is part of XEmacs. | |
8 | |
5402
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
5191
diff
changeset
|
9 XEmacs is free software: you can redistribute it and/or modify it |
428 | 10 under the terms of the GNU General Public License as published by the |
5402
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
5191
diff
changeset
|
11 Free Software Foundation, either version 3 of the License, or (at your |
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
5191
diff
changeset
|
12 option) any later version. |
428 | 13 |
14 XEmacs is distributed in the hope that it will be useful, but WITHOUT | |
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
17 for more details. | |
18 | |
19 You should have received a copy of the GNU General Public License | |
5402
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
5191
diff
changeset
|
20 along with XEmacs. If not, see <http://www.gnu.org/licenses/>. */ |
428 | 21 |
771 | 22 /* Synched up with: Not in FSF. */ |
23 | |
24 /* Authorship: | |
25 | |
26 Current primary author: Ben Wing <ben@xemacs.org> | |
27 | |
28 Rewritten by Ben Wing <ben@xemacs.org>, based originally on coding.c | |
29 from Mule 2.? but probably does not share one line of code with that | |
30 original source. Rewriting work started around Dec. 1994. or Jan. 1995. | |
31 Proceeded in earnest till Nov. 1995. | |
32 | |
33 Around Feb. 17, 1998, Andy Piper renamed what was then mule-coding.c to | |
34 file-coding.c, with the intention of using it to do end-of-line conversion | |
35 on non-MULE machines (specifically, on Windows machines). He separated | |
36 out the MULE stuff from non-MULE using ifdef's, and searched throughout | |
37 the rest of the source tree looking for coding-system-related code that | |
38 was ifdef MULE but should be ifdef HAVE_CODING_SYSTEMS. | |
39 | |
40 Sept. 4 - 8, 1998, Tomohiko Morioka added the UCS_4 and UTF_8 coding system | |
41 types, providing a primitive means of decoding and encoding externally- | |
42 formatted Unicode/UCS_4 and Unicode/UTF_8 data. | |
43 | |
44 January 25, 2000, Martin Buchholz redid and fleshed out the coding | |
45 system alias handling that was first added in prototype form by | |
46 Hrjove Niksic, April 15, 1999. | |
47 | |
48 April to May 2000, Ben Wing: More major reorganization. Adding features | |
49 needed for MS Windows (multibyte, unicode, unicode-to-multibyte), the | |
50 "chain" coding system for chaining two together, and doing a lot of | |
51 reorganization in preparation for properly abstracting out the different | |
52 coding system types. | |
53 | |
54 June 2001, Ben Wing: Added Unicode support. Eliminated previous | |
55 junky Unicode translation support. | |
56 | |
57 August 2001, Ben Wing: Moved Unicode support to unicode.c. Finished | |
58 abstracting everything except detection, which is hard to abstract (see | |
59 just below). | |
60 | |
61 September 2001, Ben Wing: Moved Mule code to mule-coding.c, Windows code | |
62 to intl-win32.c. Lots more rewriting; very little code is untouched | |
63 from before April 2000. Abstracted the detection code, added multiple | |
64 levels of likelihood to increase the reliability of the algorithm. | |
65 | |
66 October 2001, Ben Wing: HAVE_CODING_SYSTEMS is always now defined. | |
67 Removed the conditionals. | |
68 */ | |
69 | |
428 | 70 #include <config.h> |
71 #include "lisp.h" | |
72 | |
73 #include "buffer.h" | |
74 #include "elhash.h" | |
75 #include "insdel.h" | |
76 #include "lstream.h" | |
440 | 77 #include "opaque.h" |
771 | 78 #include "file-coding.h" |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
79 #include "extents.h" |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
80 #include "rangetab.h" |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
81 #include "chartab.h" |
5682
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
82 #include "sysfile.h" |
771 | 83 |
84 #ifdef HAVE_ZLIB | |
85 #include "zlib.h" | |
428 | 86 #endif |
87 | |
88 Lisp_Object Vkeyboard_coding_system; | |
89 Lisp_Object Vterminal_coding_system; | |
90 Lisp_Object Vcoding_system_for_read; | |
91 Lisp_Object Vcoding_system_for_write; | |
92 Lisp_Object Vfile_name_coding_system; | |
93 | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
94 Lisp_Object Qaliases, Qcharset_skip_chars_string; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
95 |
771 | 96 #ifdef DEBUG_XEMACS |
97 Lisp_Object Vdebug_coding_detection; | |
440 | 98 #endif |
771 | 99 |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
100 #ifdef MULE |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
101 extern Lisp_Object Vcharset_ascii, Vcharset_control_1, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
102 Vcharset_latin_iso8859_1; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
103 #endif |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
104 |
771 | 105 typedef struct coding_system_type_entry |
106 { | |
107 struct coding_system_methods *meths; | |
108 } coding_system_type_entry; | |
109 | |
110 typedef struct | |
111 { | |
112 Dynarr_declare (coding_system_type_entry); | |
113 } coding_system_type_entry_dynarr; | |
114 | |
115 static coding_system_type_entry_dynarr *the_coding_system_type_entry_dynarr; | |
116 | |
1204 | 117 static const struct memory_description cste_description_1[] = { |
2551 | 118 { XD_BLOCK_PTR, offsetof (coding_system_type_entry, meths), 1, |
119 { &coding_system_methods_description } }, | |
771 | 120 { XD_END } |
121 }; | |
122 | |
1204 | 123 static const struct sized_memory_description cste_description = { |
771 | 124 sizeof (coding_system_type_entry), |
125 cste_description_1 | |
126 }; | |
127 | |
1204 | 128 static const struct memory_description csted_description_1[] = { |
771 | 129 XD_DYNARR_DESC (coding_system_type_entry_dynarr, &cste_description), |
428 | 130 { XD_END } |
131 }; | |
132 | |
1204 | 133 static const struct sized_memory_description csted_description = { |
771 | 134 sizeof (coding_system_type_entry_dynarr), |
135 csted_description_1 | |
136 }; | |
137 | |
138 static Lisp_Object Vcoding_system_type_list; | |
139 | |
140 /* Coding system currently associated with each coding category. */ | |
141 Lisp_Object coding_category_system[MAX_DETECTOR_CATEGORIES]; | |
142 | |
143 /* Table of all coding categories in decreasing order of priority. | |
144 This describes a permutation of the possible coding categories. */ | |
145 int coding_category_by_priority[MAX_DETECTOR_CATEGORIES]; | |
146 | |
147 /* Value used with to give a unique name to nameless coding systems */ | |
148 int coding_system_tick; | |
149 | |
150 int coding_detector_count; | |
151 int coding_detector_category_count; | |
152 | |
153 detector_dynarr *all_coding_detectors; | |
154 | |
1204 | 155 static const struct memory_description struct_detector_category_description_1[] |
771 | 156 = |
157 { | |
158 { XD_LISP_OBJECT, offsetof (struct detector_category, sym) }, | |
159 { XD_END } | |
160 }; | |
161 | |
1204 | 162 static const struct sized_memory_description struct_detector_category_description = |
771 | 163 { |
164 sizeof (struct detector_category), | |
165 struct_detector_category_description_1 | |
428 | 166 }; |
167 | |
1204 | 168 static const struct memory_description detector_category_dynarr_description_1[] = |
771 | 169 { |
170 XD_DYNARR_DESC (detector_category_dynarr, | |
171 &struct_detector_category_description), | |
172 { XD_END } | |
173 }; | |
174 | |
1204 | 175 static const struct sized_memory_description detector_category_dynarr_description = { |
771 | 176 sizeof (detector_category_dynarr), |
177 detector_category_dynarr_description_1 | |
178 }; | |
179 | |
1204 | 180 static const struct memory_description struct_detector_description_1[] |
771 | 181 = |
182 { | |
2367 | 183 { XD_BLOCK_PTR, offsetof (struct detector, cats), 1, |
2551 | 184 { &detector_category_dynarr_description } }, |
771 | 185 { XD_END } |
186 }; | |
187 | |
1204 | 188 static const struct sized_memory_description struct_detector_description = |
771 | 189 { |
190 sizeof (struct detector), | |
191 struct_detector_description_1 | |
192 }; | |
193 | |
1204 | 194 static const struct memory_description detector_dynarr_description_1[] = |
771 | 195 { |
196 XD_DYNARR_DESC (detector_dynarr, &struct_detector_description), | |
197 { XD_END } | |
198 }; | |
199 | |
1204 | 200 static const struct sized_memory_description detector_dynarr_description = { |
771 | 201 sizeof (detector_dynarr), |
202 detector_dynarr_description_1 | |
203 }; | |
428 | 204 |
205 Lisp_Object Qcoding_systemp; | |
206 | |
771 | 207 Lisp_Object Qraw_text; |
428 | 208 |
209 Lisp_Object Qmnemonic, Qeol_type; | |
210 Lisp_Object Qcr, Qcrlf, Qlf; | |
211 Lisp_Object Qeol_cr, Qeol_crlf, Qeol_lf; | |
212 Lisp_Object Qpost_read_conversion; | |
213 Lisp_Object Qpre_write_conversion; | |
214 | |
771 | 215 Lisp_Object Qtranslation_table_for_decode; |
216 Lisp_Object Qtranslation_table_for_encode; | |
217 Lisp_Object Qsafe_chars; | |
218 Lisp_Object Qsafe_charsets; | |
219 Lisp_Object Qmime_charset; | |
220 Lisp_Object Qvalid_codes; | |
221 | |
222 Lisp_Object Qno_conversion; | |
223 Lisp_Object Qconvert_eol; | |
440 | 224 Lisp_Object Qescape_quoted; |
771 | 225 Lisp_Object Qencode, Qdecode; |
226 | |
227 Lisp_Object Qconvert_eol_lf, Qconvert_eol_cr, Qconvert_eol_crlf; | |
228 Lisp_Object Qconvert_eol_autodetect; | |
229 | |
230 Lisp_Object Qnear_certainty, Qquite_probable, Qsomewhat_likely; | |
1494 | 231 Lisp_Object Qslightly_likely; |
771 | 232 Lisp_Object Qas_likely_as_unlikely, Qsomewhat_unlikely, Qquite_improbable; |
233 Lisp_Object Qnearly_impossible; | |
234 | |
235 Lisp_Object Qdo_eol, Qdo_coding; | |
236 | |
237 Lisp_Object Qcanonicalize_after_coding; | |
238 | |
1347 | 239 Lisp_Object QScoding_system_cookie; |
240 | |
4303 | 241 Lisp_Object Qposix_charset_to_coding_system_hash; |
242 | |
771 | 243 /* This is used to convert autodetected coding systems into existing |
244 systems. For example, the chain undecided->convert-eol-autodetect may | |
245 have its separate parts detected as mswindows-multibyte and | |
246 convert-eol-crlf, and the result needs to be mapped to | |
247 mswindows-multibyte-dos. */ | |
248 /* #### It's not clear we need this whole chain-canonicalize mechanism | |
249 any more. */ | |
250 static Lisp_Object Vchain_canonicalize_hash_table; | |
251 | |
252 #ifdef HAVE_ZLIB | |
253 Lisp_Object Qgzip; | |
428 | 254 #endif |
771 | 255 |
2297 | 256 /* Maps symbols (coding system names) to either coding system objects or |
257 (for aliases) other names. */ | |
771 | 258 static Lisp_Object Vcoding_system_hash_table; |
428 | 259 |
260 int enable_multibyte_characters; | |
261 | |
262 EXFUN (Fcopy_coding_system, 2); | |
263 | |
264 | |
265 /************************************************************************/ | |
771 | 266 /* Coding system object methods */ |
428 | 267 /************************************************************************/ |
268 | |
269 static Lisp_Object | |
270 mark_coding_system (Lisp_Object obj) | |
271 { | |
272 Lisp_Coding_System *codesys = XCODING_SYSTEM (obj); | |
273 | |
1204 | 274 #define MARKED_SLOT(x) mark_object (codesys->x); |
275 #include "coding-system-slots.h" | |
771 | 276 |
277 MAYBE_CODESYSMETH (codesys, mark, (obj)); | |
428 | 278 |
1204 | 279 return Qnil; |
428 | 280 } |
281 | |
282 static void | |
771 | 283 print_coding_system_properties (Lisp_Object obj, Lisp_Object printcharfun) |
284 { | |
285 Lisp_Coding_System *c = XCODING_SYSTEM (obj); | |
286 print_internal (c->methods->type, printcharfun, 1); | |
287 MAYBE_CODESYSMETH (c, print, (obj, printcharfun, 1)); | |
288 if (CODING_SYSTEM_EOL_TYPE (c) != EOL_AUTODETECT) | |
289 write_fmt_string_lisp (printcharfun, " eol-type=%s", | |
290 1, Fcoding_system_property (obj, Qeol_type)); | |
291 } | |
292 | |
293 static void | |
428 | 294 print_coding_system (Lisp_Object obj, Lisp_Object printcharfun, |
2286 | 295 int UNUSED (escapeflag)) |
428 | 296 { |
297 Lisp_Coding_System *c = XCODING_SYSTEM (obj); | |
298 if (print_readably) | |
5142
f965e31a35f0
reduce lcrecord headers to 2 words, rename printing_unreadable_object
Ben Wing <ben@xemacs.org>
parents:
5128
diff
changeset
|
299 printing_unreadable_lisp_object (obj, 0); |
771 | 300 |
301 write_fmt_string_lisp (printcharfun, "#<coding-system %s ", 1, c->name); | |
302 print_coding_system_properties (obj, printcharfun); | |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
303 write_ascstring (printcharfun, ">"); |
428 | 304 } |
305 | |
771 | 306 /* Print an abbreviated version of a coding system (but still containing |
307 all the information), for use within a coding system print method. */ | |
308 | |
309 static void | |
310 print_coding_system_in_print_method (Lisp_Object cs, Lisp_Object printcharfun, | |
2286 | 311 int UNUSED (escapeflag)) |
771 | 312 { |
800 | 313 write_fmt_string_lisp (printcharfun, "%s[", 1, XCODING_SYSTEM_NAME (cs)); |
771 | 314 print_coding_system_properties (cs, printcharfun); |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
315 write_ascstring (printcharfun, "]"); |
771 | 316 } |
317 | |
3263 | 318 #ifndef NEW_GC |
428 | 319 static void |
5127
a9c41067dd88
more cleanups, terminology clarification, lots of doc work
Ben Wing <ben@xemacs.org>
parents:
5126
diff
changeset
|
320 finalize_coding_system (Lisp_Object obj) |
428 | 321 { |
322 /* Since coding systems never go away, this function is not | |
323 necessary. But it would be necessary if we changed things | |
324 so that coding systems could go away. */ | |
5127
a9c41067dd88
more cleanups, terminology clarification, lots of doc work
Ben Wing <ben@xemacs.org>
parents:
5126
diff
changeset
|
325 MAYBE_XCODESYSMETH (obj, finalize, (obj)); |
771 | 326 } |
3263 | 327 #endif /* not NEW_GC */ |
771 | 328 |
329 static Bytecount | |
5127
a9c41067dd88
more cleanups, terminology clarification, lots of doc work
Ben Wing <ben@xemacs.org>
parents:
5126
diff
changeset
|
330 sizeof_coding_system (Lisp_Object obj) |
771 | 331 { |
5127
a9c41067dd88
more cleanups, terminology clarification, lots of doc work
Ben Wing <ben@xemacs.org>
parents:
5126
diff
changeset
|
332 const Lisp_Coding_System *p = XCODING_SYSTEM (obj); |
771 | 333 return offsetof (Lisp_Coding_System, data) + p->methods->extra_data_size; |
428 | 334 } |
335 | |
1204 | 336 static const struct memory_description coding_system_methods_description_1[] |
771 | 337 = { |
338 { XD_LISP_OBJECT, | |
339 offsetof (struct coding_system_methods, type) }, | |
340 { XD_LISP_OBJECT, | |
341 offsetof (struct coding_system_methods, predicate_symbol) }, | |
342 { XD_END } | |
343 }; | |
344 | |
1204 | 345 const struct sized_memory_description coding_system_methods_description = { |
771 | 346 sizeof (struct coding_system_methods), |
347 coding_system_methods_description_1 | |
348 }; | |
349 | |
1204 | 350 static const struct sized_memory_description coding_system_extra_description_map[] = |
351 { | |
352 { offsetof (Lisp_Coding_System, methods) }, | |
353 { offsetof (struct coding_system_methods, extra_description) }, | |
354 { -1 }, | |
771 | 355 }; |
356 | |
1204 | 357 static const struct memory_description coding_system_description[] = |
428 | 358 { |
2367 | 359 { XD_BLOCK_PTR, offsetof (Lisp_Coding_System, methods), 1, |
2551 | 360 { &coding_system_methods_description } }, |
1204 | 361 #define MARKED_SLOT(x) { XD_LISP_OBJECT, offsetof (Lisp_Coding_System, x) }, |
362 #define MARKED_SLOT_ARRAY(slot, size) \ | |
363 { XD_LISP_OBJECT_ARRAY, offsetof (Lisp_Coding_System, slot), size }, | |
364 #include "coding-system-slots.h" | |
2367 | 365 { XD_BLOCK_ARRAY, offsetof (Lisp_Coding_System, data), 1, |
2551 | 366 { coding_system_extra_description_map } }, |
1204 | 367 { XD_END } |
771 | 368 }; |
369 | |
1204 | 370 static const struct memory_description coding_system_empty_extra_description_1[] = |
371 { | |
372 { XD_END } | |
373 }; | |
374 | |
375 const struct sized_memory_description coding_system_empty_extra_description = { | |
376 0, coding_system_empty_extra_description_1 | |
377 }; | |
378 | |
5118
e0db3c197671
merge up to latest default branch, doesn't compile yet
Ben Wing <ben@xemacs.org>
diff
changeset
|
379 DEFINE_DUMPABLE_SIZABLE_LISP_OBJECT ("coding-system", coding_system, |
5124
623d57b7fbe8
separate regular and disksave finalization, print method fixes.
Ben Wing <ben@xemacs.org>
parents:
5120
diff
changeset
|
380 mark_coding_system, |
623d57b7fbe8
separate regular and disksave finalization, print method fixes.
Ben Wing <ben@xemacs.org>
parents:
5120
diff
changeset
|
381 print_coding_system, |
5169
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5142
diff
changeset
|
382 IF_OLD_GC (finalize_coding_system), |
5124
623d57b7fbe8
separate regular and disksave finalization, print method fixes.
Ben Wing <ben@xemacs.org>
parents:
5120
diff
changeset
|
383 0, 0, coding_system_description, |
623d57b7fbe8
separate regular and disksave finalization, print method fixes.
Ben Wing <ben@xemacs.org>
parents:
5120
diff
changeset
|
384 sizeof_coding_system, |
623d57b7fbe8
separate regular and disksave finalization, print method fixes.
Ben Wing <ben@xemacs.org>
parents:
5120
diff
changeset
|
385 Lisp_Coding_System); |
771 | 386 |
387 /************************************************************************/ | |
388 /* Creating coding systems */ | |
389 /************************************************************************/ | |
390 | |
391 static struct coding_system_methods * | |
392 decode_coding_system_type (Lisp_Object type, Error_Behavior errb) | |
428 | 393 { |
771 | 394 int i; |
395 | |
396 for (i = 0; i < Dynarr_length (the_coding_system_type_entry_dynarr); i++) | |
428 | 397 { |
771 | 398 if (EQ (type, |
399 Dynarr_at (the_coding_system_type_entry_dynarr, i).meths->type)) | |
400 return Dynarr_at (the_coding_system_type_entry_dynarr, i).meths; | |
428 | 401 } |
771 | 402 |
403 maybe_invalid_constant ("Invalid coding system type", type, | |
404 Qcoding_system, errb); | |
405 | |
406 return 0; | |
428 | 407 } |
408 | |
771 | 409 static int |
410 valid_coding_system_type_p (Lisp_Object type) | |
428 | 411 { |
771 | 412 return decode_coding_system_type (type, ERROR_ME_NOT) != 0; |
413 } | |
414 | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
415 #ifdef MULE |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
416 static Lisp_Object Vdefault_query_coding_region_chartab_cache; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
417 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
418 /* Non-static because it's used in INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA. */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
419 Lisp_Object |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
420 default_query_method (Lisp_Object codesys, struct buffer *buf, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
421 Charbpos end, int flags) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
422 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
423 Charbpos pos = BUF_PT (buf), fail_range_start, fail_range_end; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
424 Charbpos pos_byte = BYTE_BUF_PT (buf); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
425 Lisp_Object safe_charsets = XCODING_SYSTEM_SAFE_CHARSETS (codesys); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
426 Lisp_Object safe_chars = XCODING_SYSTEM_SAFE_CHARS (codesys), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
427 result = Qnil; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
428 enum query_coding_failure_reasons failed_reason, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
429 previous_failed_reason = query_coding_succeeded; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
430 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
431 /* safe-charsets of t means the coding system can encode everything. */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
432 if (EQ (Qnil, safe_chars)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
433 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
434 if (EQ (Qt, safe_charsets)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
435 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
436 return Qnil; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
437 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
438 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
439 /* If we've no information on what characters the coding system can |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
440 encode, give up. */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
441 if (EQ (Qnil, safe_charsets) && EQ (Qnil, safe_chars)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
442 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
443 return Qunbound; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
444 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
445 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
446 safe_chars = Fgethash (safe_charsets, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
447 Vdefault_query_coding_region_chartab_cache, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
448 Qnil); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
449 if (NILP (safe_chars)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
450 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
451 safe_chars = Fmake_char_table (Qgeneric); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
452 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
453 EXTERNAL_LIST_LOOP_2 (safe_charset, safe_charsets) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
454 Fput_char_table (safe_charset, Qt, safe_chars); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
455 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
456 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
457 Fputhash (safe_charsets, safe_chars, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
458 Vdefault_query_coding_region_chartab_cache); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
459 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
460 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
461 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
462 if (flags & QUERY_METHOD_HIGHLIGHT && |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
463 /* If we're being called really early, live without highlights getting |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
464 cleared properly: */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
465 !(UNBOUNDP (XSYMBOL (Qquery_coding_clear_highlights)->function))) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
466 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
467 /* It's okay to call Lisp here, the only non-stack object we may have |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
468 allocated up to this point is safe_chars, and that's |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
469 reachable from its entry in |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
470 Vdefault_query_coding_region_chartab_cache */ |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5474
diff
changeset
|
471 call3 (Qquery_coding_clear_highlights, make_fixnum (pos), make_fixnum (end), |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
472 wrap_buffer (buf)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
473 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
474 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
475 while (pos < end) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
476 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
477 Ichar ch = BYTE_BUF_FETCH_CHAR (buf, pos_byte); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
478 if (!EQ (Qnil, get_char_table (ch, safe_chars))) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
479 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
480 pos++; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
481 INC_BYTEBPOS (buf, pos_byte); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
482 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
483 else |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
484 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
485 fail_range_start = pos; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
486 while ((pos < end) && |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
487 (EQ (Qnil, get_char_table (ch, safe_chars)) |
4839
5f1c6ca4d05e
fix bad change to default_query_method, rewrite conditional for more correctness
Ben Wing <ben@xemacs.org>
parents:
4830
diff
changeset
|
488 && (failed_reason = query_coding_unencodable, |
5f1c6ca4d05e
fix bad change to default_query_method, rewrite conditional for more correctness
Ben Wing <ben@xemacs.org>
parents:
4830
diff
changeset
|
489 (previous_failed_reason == query_coding_succeeded |
5f1c6ca4d05e
fix bad change to default_query_method, rewrite conditional for more correctness
Ben Wing <ben@xemacs.org>
parents:
4830
diff
changeset
|
490 || previous_failed_reason == failed_reason)))) |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
491 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
492 pos++; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
493 INC_BYTEBPOS (buf, pos_byte); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
494 ch = BYTE_BUF_FETCH_CHAR (buf, pos_byte); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
495 previous_failed_reason = failed_reason; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
496 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
497 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
498 if (fail_range_start == pos) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
499 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
500 /* The character can actually be encoded; move on. */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
501 pos++; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
502 INC_BYTEBPOS (buf, pos_byte); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
503 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
504 else |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
505 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
506 assert (previous_failed_reason == query_coding_unencodable); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
507 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
508 if (flags & QUERY_METHOD_ERRORP) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
509 { |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
510 signal_error_2 |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
511 (Qtext_conversion_error, |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
512 "Cannot encode using coding system", |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
513 make_string_from_buffer (buf, fail_range_start, |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
514 pos - fail_range_start), |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
515 XCODING_SYSTEM_NAME (codesys)); |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
516 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
517 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
518 if (NILP (result)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
519 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
520 result = Fmake_range_table (Qstart_closed_end_open); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
521 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
522 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
523 fail_range_end = pos; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
524 |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5474
diff
changeset
|
525 Fput_range_table (make_fixnum (fail_range_start), |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5474
diff
changeset
|
526 make_fixnum (fail_range_end), |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
527 Qunencodable, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
528 result); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
529 previous_failed_reason = query_coding_succeeded; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
530 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
531 if (flags & QUERY_METHOD_HIGHLIGHT) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
532 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
533 Lisp_Object extent |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5474
diff
changeset
|
534 = Fmake_extent (make_fixnum (fail_range_start), |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5474
diff
changeset
|
535 make_fixnum (fail_range_end), |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
536 wrap_buffer (buf)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
537 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
538 Fset_extent_priority |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5474
diff
changeset
|
539 (extent, make_fixnum (2 + mouse_highlight_priority)); |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
540 Fset_extent_face (extent, Qquery_coding_warning_face); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
541 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
542 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
543 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
544 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
545 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
546 return result; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
547 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
548 #else |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
549 Lisp_Object |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
550 default_query_method (Lisp_Object UNUSED (codesys), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
551 struct buffer * UNUSED (buf), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
552 Charbpos UNUSED (end), int UNUSED (flags)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
553 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
554 return Qnil; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
555 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
556 #endif /* defined MULE */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
557 |
771 | 558 DEFUN ("valid-coding-system-type-p", Fvalid_coding_system_type_p, 1, 1, 0, /* |
559 Given a CODING-SYSTEM-TYPE, return non-nil if it is valid. | |
560 Valid types depend on how XEmacs was compiled but may include | |
3025 | 561 `undecided', `chain', `integer', `ccl', `iso2022', `big5', `shift-jis', |
562 `utf-16', `ucs-4', `utf-8', etc. | |
771 | 563 */ |
564 (coding_system_type)) | |
565 { | |
566 return valid_coding_system_type_p (coding_system_type) ? Qt : Qnil; | |
567 } | |
568 | |
569 DEFUN ("coding-system-type-list", Fcoding_system_type_list, 0, 0, 0, /* | |
570 Return a list of valid coding system types. | |
571 */ | |
572 ()) | |
573 { | |
574 return Fcopy_sequence (Vcoding_system_type_list); | |
575 } | |
576 | |
577 void | |
578 add_entry_to_coding_system_type_list (struct coding_system_methods *meths) | |
579 { | |
580 struct coding_system_type_entry entry; | |
581 | |
582 entry.meths = meths; | |
583 Dynarr_add (the_coding_system_type_entry_dynarr, entry); | |
584 Vcoding_system_type_list = Fcons (meths->type, Vcoding_system_type_list); | |
428 | 585 } |
586 | |
587 DEFUN ("coding-system-p", Fcoding_system_p, 1, 1, 0, /* | |
588 Return t if OBJECT is a coding system. | |
589 A coding system is an object that defines how text containing multiple | |
590 character sets is encoded into a stream of (typically 8-bit) bytes. | |
591 The coding system is used to decode the stream into a series of | |
592 characters (which may be from multiple charsets) when the text is read | |
593 from a file or process, and is used to encode the text back into the | |
594 same format when it is written out to a file or process. | |
595 | |
596 For example, many ISO2022-compliant coding systems (such as Compound | |
597 Text, which is used for inter-client data under the X Window System) | |
598 use escape sequences to switch between different charsets -- Japanese | |
599 Kanji, for example, is invoked with "ESC $ ( B"; ASCII is invoked | |
600 with "ESC ( B"; and Cyrillic is invoked with "ESC - L". See | |
601 `make-coding-system' for more information. | |
602 | |
603 Coding systems are normally identified using a symbol, and the | |
604 symbol is accepted in place of the actual coding system object whenever | |
605 a coding system is called for. (This is similar to how faces work.) | |
606 */ | |
607 (object)) | |
608 { | |
609 return CODING_SYSTEMP (object) ? Qt : Qnil; | |
610 } | |
611 | |
4303 | 612 static Lisp_Object |
613 find_coding_system (Lisp_Object coding_system_or_name, | |
614 int do_autoloads) | |
615 { | |
616 Lisp_Object lookup; | |
617 | |
618 if (NILP (coding_system_or_name)) | |
619 coding_system_or_name = Qbinary; | |
620 else if (CODING_SYSTEMP (coding_system_or_name)) | |
621 return coding_system_or_name; | |
622 else | |
623 CHECK_SYMBOL (coding_system_or_name); | |
624 | |
625 while (1) | |
626 { | |
627 lookup = | |
628 Fgethash (coding_system_or_name, Vcoding_system_hash_table, Qnil); | |
629 | |
630 if (CONSP (lookup) && do_autoloads) | |
631 { | |
632 struct gcpro gcpro1; | |
633 int length; | |
634 DECLARE_EISTRING (desired_base); | |
635 DECLARE_EISTRING (warning_info); | |
636 | |
637 eicpy_lstr (desired_base, XSYMBOL_NAME (coding_system_or_name)); | |
638 | |
639 /* Work out the name of the base coding system. */ | |
640 length = eilen (desired_base); | |
641 if (length > (int)(sizeof ("-unix") - 1)) | |
642 { | |
643 if (0 == qxestrcmp ((UAscbyte *)"-unix", (eidata (desired_base)) | |
644 + (length - (sizeof ("-unix") - 1)))) | |
645 { | |
646 eidel (desired_base, length - (sizeof ("-unix") - 1), | |
647 -1, 5, 5); | |
648 } | |
649 } | |
650 else if (length > (int)(sizeof ("-dos") - 1)) | |
651 { | |
652 if ((0 == qxestrcmp ((UAscbyte *)"-dos", (eidata (desired_base)) | |
653 + (length - (sizeof ("-dos") - 1)))) || | |
654 (0 == qxestrcmp ((UAscbyte *)"-mac", (eidata (desired_base)) | |
655 + (length - (sizeof ("-mac") - 1))))) | |
656 { | |
657 eidel (desired_base, length - (sizeof ("-dos") - 1), -1, | |
658 4, 4); | |
659 } | |
660 } | |
661 | |
4953
304aebb79cd3
function renamings to track names of char typedefs
Ben Wing <ben@xemacs.org>
parents:
4952
diff
changeset
|
662 coding_system_or_name = intern_istring (eidata (desired_base)); |
4303 | 663 |
664 /* Remove this coding system and its subsidiary coding | |
665 systems from the hash, to avoid calling this code recursively. */ | |
666 Fremhash (coding_system_or_name, Vcoding_system_hash_table); | |
667 Fremhash (add_suffix_to_symbol(coding_system_or_name, "-unix"), | |
668 Vcoding_system_hash_table); | |
669 Fremhash (add_suffix_to_symbol(coding_system_or_name, "-dos"), | |
670 Vcoding_system_hash_table); | |
671 Fremhash (add_suffix_to_symbol(coding_system_or_name, "-mac"), | |
672 Vcoding_system_hash_table); | |
673 | |
674 eicpy_ascii (warning_info, "Error autoloading coding system "); | |
675 eicat_lstr (warning_info, XSYMBOL_NAME (coding_system_or_name)); | |
676 | |
677 /* Keep around the form so it doesn't disappear from under | |
678 #'eval's feet. */ | |
679 GCPRO1 (lookup); | |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
680 call1_trapping_problems ((const CIbyte *) eidata (warning_info), |
4303 | 681 Qeval, lookup, 0); |
682 UNGCPRO; | |
683 | |
684 lookup = | |
685 Fgethash (coding_system_or_name, Vcoding_system_hash_table, Qnil); | |
686 } | |
687 | |
688 if (CODING_SYSTEMP (lookup) || NILP (lookup)) | |
689 return lookup; | |
690 | |
691 coding_system_or_name = lookup; | |
692 } | |
693 } | |
694 | |
428 | 695 DEFUN ("find-coding-system", Ffind_coding_system, 1, 1, 0, /* |
696 Retrieve the coding system of the given name. | |
697 | |
698 If CODING-SYSTEM-OR-NAME is a coding-system object, it is simply | |
699 returned. Otherwise, CODING-SYSTEM-OR-NAME should be a symbol. | |
700 If there is no such coding system, nil is returned. Otherwise the | |
701 associated coding system object is returned. | |
702 */ | |
703 (coding_system_or_name)) | |
704 { | |
4303 | 705 return find_coding_system(coding_system_or_name, 1); |
706 } | |
707 | |
708 DEFUN ("autoload-coding-system", Fautoload_coding_system, 2, 2, 0, /* | |
709 Define SYMBOL as a coding-system that is loaded on demand. | |
710 | |
711 FORM is a form to evaluate to define the coding-system. | |
712 */ | |
713 (symbol, form)) | |
714 { | |
715 Lisp_Object lookup; | |
716 | |
717 CHECK_SYMBOL (symbol); | |
718 CHECK_CONS (form); | |
719 | |
720 lookup = find_coding_system (symbol, 0); | |
721 | |
722 if (!NILP (lookup) && | |
723 /* Allow autoloads to be redefined. */ | |
724 !CONSP (lookup)) | |
440 | 725 { |
4303 | 726 invalid_operation ("Cannot redefine existing coding system", |
727 symbol); | |
440 | 728 } |
4303 | 729 |
730 Fputhash (symbol, form, Vcoding_system_hash_table); | |
731 Fputhash (add_suffix_to_symbol(symbol, "-unix"), form, | |
732 Vcoding_system_hash_table); | |
733 Fputhash (add_suffix_to_symbol(symbol, "-dos"), form, | |
734 Vcoding_system_hash_table); | |
735 Fputhash (add_suffix_to_symbol(symbol, "-mac"), form, | |
736 Vcoding_system_hash_table); | |
737 | |
738 /* Tell the POSIX locale infrastructure about this coding system (though | |
739 unfortunately it'll be too late for the startup locale sniffing. */ | |
740 if (!UNBOUNDP (Qposix_charset_to_coding_system_hash)) | |
741 { | |
742 Lisp_Object val = Fsymbol_value (Qposix_charset_to_coding_system_hash); | |
743 DECLARE_EISTRING (minimal_name); | |
744 Ibyte *full_name; | |
745 int len = XSTRING_LENGTH (XSYMBOL_NAME (symbol)), i; | |
746 | |
747 if (!NILP (val)) | |
748 { | |
749 full_name = XSTRING_DATA (XSYMBOL_NAME (symbol)); | |
750 for (i = 0; i < len; ++i) | |
751 { | |
752 if (full_name[i] >= '0' && full_name[i] <= '9') | |
753 { | |
754 eicat_ch (minimal_name, full_name[i]); | |
755 } | |
756 else if (full_name[i] >= 'a' && full_name[i] <= 'z') | |
757 { | |
758 eicat_ch (minimal_name, full_name[i]); | |
759 } | |
760 else if (full_name[i] >= 'A' && full_name[i] <= 'Z') | |
761 { | |
762 eicat_ch (minimal_name, full_name[i] + | |
763 ('a' - 'A')); | |
764 } | |
765 } | |
766 | |
767 if (eilen (minimal_name)) | |
768 { | |
769 CHECK_HASH_TABLE (val); | |
770 Fputhash (eimake_string(minimal_name), symbol, val); | |
771 } | |
772 } | |
773 } | |
774 | |
775 return Qt; | |
428 | 776 } |
777 | |
778 DEFUN ("get-coding-system", Fget_coding_system, 1, 1, 0, /* | |
779 Retrieve the coding system of the given name. | |
780 Same as `find-coding-system' except that if there is no such | |
781 coding system, an error is signaled instead of returning nil. | |
782 */ | |
783 (name)) | |
784 { | |
785 Lisp_Object coding_system = Ffind_coding_system (name); | |
786 | |
787 if (NILP (coding_system)) | |
563 | 788 invalid_argument ("No such coding system", name); |
428 | 789 return coding_system; |
790 } | |
791 | |
771 | 792 int |
793 coding_system_is_binary (Lisp_Object coding_system) | |
794 { | |
795 Lisp_Coding_System *cs = XCODING_SYSTEM (coding_system); | |
796 return | |
797 (EQ (CODING_SYSTEM_TYPE (cs), Qno_conversion) && | |
798 CODING_SYSTEM_EOL_TYPE (cs) == EOL_LF && | |
799 EQ (CODING_SYSTEM_POST_READ_CONVERSION (cs), Qnil) && | |
800 EQ (CODING_SYSTEM_PRE_WRITE_CONVERSION (cs), Qnil)); | |
801 } | |
802 | |
803 static Lisp_Object | |
804 coding_system_real_canonical (Lisp_Object cs) | |
805 { | |
806 if (!NILP (XCODING_SYSTEM_CANONICAL (cs))) | |
807 return XCODING_SYSTEM_CANONICAL (cs); | |
808 return cs; | |
809 } | |
810 | |
811 /* Return true if coding system is of the "standard" type that decodes | |
812 bytes into characters (suitable for decoding a text file). */ | |
813 int | |
814 coding_system_is_for_text_file (Lisp_Object coding_system) | |
815 { | |
816 return (XCODESYSMETH_OR_GIVEN | |
817 (coding_system, conversion_end_type, | |
818 (coding_system_real_canonical (coding_system)), | |
819 DECODES_BYTE_TO_CHARACTER) == | |
820 DECODES_BYTE_TO_CHARACTER); | |
821 } | |
822 | |
823 static int | |
824 decoding_source_sink_type_is_char (Lisp_Object cs, enum source_or_sink sex) | |
825 { | |
826 enum source_sink_type type = | |
827 XCODESYSMETH_OR_GIVEN (cs, conversion_end_type, | |
828 (coding_system_real_canonical (cs)), | |
829 DECODES_BYTE_TO_CHARACTER); | |
830 if (sex == CODING_SOURCE) | |
831 return (type == DECODES_CHARACTER_TO_CHARACTER || | |
832 type == DECODES_CHARACTER_TO_BYTE); | |
833 else | |
834 return (type == DECODES_CHARACTER_TO_CHARACTER || | |
835 type == DECODES_BYTE_TO_CHARACTER); | |
836 } | |
837 | |
838 static int | |
839 encoding_source_sink_type_is_char (Lisp_Object cs, enum source_or_sink sex) | |
840 { | |
841 return decoding_source_sink_type_is_char (cs, | |
842 /* Sex change */ | |
843 sex == CODING_SOURCE ? | |
844 CODING_SINK : CODING_SOURCE); | |
845 } | |
846 | |
847 /* Like Ffind_coding_system() but check that the coding system is of the | |
848 "standard" type that decodes bytes into characters (suitable for | |
849 decoding a text file), and if not, returns an appropriate wrapper that | |
850 does. Also, if EOL_WRAP is non-zero, check whether this coding system | |
851 wants EOL auto-detection, and if so, wrap with a convert-eol coding | |
852 system to do this. */ | |
853 | |
854 Lisp_Object | |
855 find_coding_system_for_text_file (Lisp_Object name, int eol_wrap) | |
856 { | |
857 Lisp_Object coding_system = Ffind_coding_system (name); | |
858 Lisp_Object wrapper = coding_system; | |
859 | |
860 if (NILP (coding_system)) | |
861 return Qnil; | |
862 if (!coding_system_is_for_text_file (coding_system)) | |
863 { | |
864 wrapper = XCODING_SYSTEM_TEXT_FILE_WRAPPER (coding_system); | |
865 if (NILP (wrapper)) | |
866 { | |
867 Lisp_Object chain; | |
868 if (!decoding_source_sink_type_is_char (coding_system, CODING_SINK)) | |
869 chain = list2 (coding_system, Qbinary); | |
870 else | |
871 chain = list1 (coding_system); | |
872 if (decoding_source_sink_type_is_char (coding_system, CODING_SOURCE)) | |
873 chain = Fcons (Qbinary, chain); | |
874 wrapper = | |
875 make_internal_coding_system | |
876 (coding_system, | |
877 "internal-text-file-wrapper", | |
878 Qchain, | |
879 Qunbound, list4 (Qchain, chain, | |
880 Qcanonicalize_after_coding, coding_system)); | |
881 XCODING_SYSTEM_TEXT_FILE_WRAPPER (coding_system) = wrapper; | |
882 } | |
883 } | |
884 | |
885 if (!eol_wrap || XCODING_SYSTEM_EOL_TYPE (coding_system) != EOL_AUTODETECT) | |
886 return wrapper; | |
887 | |
888 coding_system = wrapper; | |
889 wrapper = XCODING_SYSTEM_AUTO_EOL_WRAPPER (coding_system); | |
890 if (!NILP (wrapper)) | |
891 return wrapper; | |
892 wrapper = | |
893 make_internal_coding_system | |
894 (coding_system, | |
895 "internal-auto-eol-wrapper", | |
896 Qundecided, Qunbound, | |
897 list4 (Qcoding_system, coding_system, | |
898 Qdo_eol, Qt)); | |
899 XCODING_SYSTEM_AUTO_EOL_WRAPPER (coding_system) = wrapper; | |
900 return wrapper; | |
901 } | |
902 | |
903 /* Like Fget_coding_system() but verify that the coding system is of the | |
904 "standard" type that decodes bytes into characters (suitable for | |
905 decoding a text file), and if not, returns an appropriate wrapper that | |
906 does. Also, if EOL_WRAP is non-zero, check whether this coding system | |
907 wants EOL auto-detection, and if so, wrap with a convert-eol coding | |
908 system to do this. */ | |
909 | |
910 Lisp_Object | |
911 get_coding_system_for_text_file (Lisp_Object name, int eol_wrap) | |
912 { | |
913 Lisp_Object coding_system = find_coding_system_for_text_file (name, | |
914 eol_wrap); | |
915 if (NILP (coding_system)) | |
916 invalid_argument ("No such coding system", name); | |
917 return coding_system; | |
918 } | |
919 | |
920 /* We store the coding systems in hash tables with the names as the | |
921 key and the actual coding system object as the value. Occasionally | |
922 we need to use them in a list format. These routines provide us | |
923 with that. */ | |
428 | 924 struct coding_system_list_closure |
925 { | |
926 Lisp_Object *coding_system_list; | |
771 | 927 int normal; |
928 int internal; | |
428 | 929 }; |
930 | |
931 static int | |
4303 | 932 add_coding_system_to_list_mapper (Lisp_Object key, Lisp_Object value, |
428 | 933 void *coding_system_list_closure) |
934 { | |
935 /* This function can GC */ | |
936 struct coding_system_list_closure *cscl = | |
937 (struct coding_system_list_closure *) coding_system_list_closure; | |
938 Lisp_Object *coding_system_list = cscl->coding_system_list; | |
939 | |
771 | 940 /* We can't just use VALUE because KEY might be an alias, and we need |
4303 | 941 the real coding system object. |
942 | |
943 Autoloaded coding systems have conses for their values, and can't be | |
944 internal coding systems, or coding system aliases. */ | |
945 if (CONSP (value) || | |
946 (XCODING_SYSTEM (Ffind_coding_system (key))->internal_p ? | |
947 cscl->internal : cscl->normal)) | |
771 | 948 *coding_system_list = Fcons (key, *coding_system_list); |
428 | 949 return 0; |
950 } | |
951 | |
2297 | 952 /* #### should we specify a conventional for "all coding systems"? */ |
771 | 953 DEFUN ("coding-system-list", Fcoding_system_list, 0, 1, 0, /* |
428 | 954 Return a list of the names of all defined coding systems. |
771 | 955 If INTERNAL is nil, only the normal (non-internal) coding systems are |
956 included. (Internal coding systems are created for various internal | |
957 purposes, such as implementing EOL types of CRLF and CR; generally, you do | |
958 not want to see these.) If it is t, only the internal coding systems are | |
959 included. If it is any other non-nil value both normal and internal are | |
960 included. | |
428 | 961 */ |
771 | 962 (internal)) |
428 | 963 { |
964 Lisp_Object coding_system_list = Qnil; | |
965 struct gcpro gcpro1; | |
966 struct coding_system_list_closure coding_system_list_closure; | |
967 | |
968 GCPRO1 (coding_system_list); | |
969 coding_system_list_closure.coding_system_list = &coding_system_list; | |
771 | 970 coding_system_list_closure.normal = !EQ (internal, Qt); |
971 coding_system_list_closure.internal = !NILP (internal); | |
428 | 972 elisp_maphash (add_coding_system_to_list_mapper, Vcoding_system_hash_table, |
973 &coding_system_list_closure); | |
974 UNGCPRO; | |
975 | |
976 return coding_system_list; | |
977 } | |
978 | |
979 DEFUN ("coding-system-name", Fcoding_system_name, 1, 1, 0, /* | |
980 Return the name of the given coding system. | |
981 */ | |
982 (coding_system)) | |
983 { | |
984 coding_system = Fget_coding_system (coding_system); | |
985 return XCODING_SYSTEM_NAME (coding_system); | |
986 } | |
987 | |
988 static Lisp_Coding_System * | |
771 | 989 allocate_coding_system (struct coding_system_methods *codesys_meths, |
990 Bytecount data_size, | |
991 Lisp_Object name) | |
428 | 992 { |
771 | 993 Bytecount total_size = offsetof (Lisp_Coding_System, data) + data_size; |
5117
3742ea8250b5
Checking in final CVS version of workspace 'ben-lisp-object'
Ben Wing <ben@xemacs.org>
parents:
3025
diff
changeset
|
994 Lisp_Object obj = ALLOC_SIZED_LISP_OBJECT (total_size, coding_system); |
3742ea8250b5
Checking in final CVS version of workspace 'ben-lisp-object'
Ben Wing <ben@xemacs.org>
parents:
3025
diff
changeset
|
995 Lisp_Coding_System *codesys = XCODING_SYSTEM (obj); |
1204 | 996 |
771 | 997 codesys->methods = codesys_meths; |
1204 | 998 #define MARKED_SLOT(x) codesys->x = Qnil; |
999 #include "coding-system-slots.h" | |
1000 | |
771 | 1001 CODING_SYSTEM_EOL_TYPE (codesys) = EOL_LF; |
1002 CODING_SYSTEM_NAME (codesys) = name; | |
1003 | |
1004 MAYBE_CODESYSMETH (codesys, init, (wrap_coding_system (codesys))); | |
428 | 1005 |
1006 return codesys; | |
1007 } | |
1008 | |
771 | 1009 static enum eol_type |
1010 symbol_to_eol_type (Lisp_Object symbol) | |
1011 { | |
1012 CHECK_SYMBOL (symbol); | |
1013 if (NILP (symbol)) return EOL_AUTODETECT; | |
1014 if (EQ (symbol, Qlf)) return EOL_LF; | |
1015 if (EQ (symbol, Qcrlf)) return EOL_CRLF; | |
1016 if (EQ (symbol, Qcr)) return EOL_CR; | |
1017 | |
1018 invalid_constant ("Unrecognized eol type", symbol); | |
1204 | 1019 RETURN_NOT_REACHED (EOL_AUTODETECT); |
771 | 1020 } |
1021 | |
1022 static Lisp_Object | |
1023 eol_type_to_symbol (enum eol_type type) | |
1024 { | |
1025 switch (type) | |
1026 { | |
2500 | 1027 default: ABORT (); |
771 | 1028 case EOL_LF: return Qlf; |
1029 case EOL_CRLF: return Qcrlf; | |
1030 case EOL_CR: return Qcr; | |
1031 case EOL_AUTODETECT: return Qnil; | |
1032 } | |
1033 } | |
1034 | |
1035 struct subsidiary_type | |
1036 { | |
4528
726060ee587c
First draft of g++ 4.3 warning removal patch. Builds. *Needs ChangeLogs.*
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4522
diff
changeset
|
1037 const Ascbyte *extension; |
726060ee587c
First draft of g++ 4.3 warning removal patch. Builds. *Needs ChangeLogs.*
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4522
diff
changeset
|
1038 const Ascbyte *mnemonic_ext; |
771 | 1039 enum eol_type eol; |
1040 }; | |
1041 | |
1042 static struct subsidiary_type coding_subsidiary_list[] = | |
1043 { { "-unix", "", EOL_LF }, | |
1044 { "-dos", ":T", EOL_CRLF }, | |
1045 { "-mac", ":t", EOL_CR } }; | |
1046 | |
1047 /* kludge */ | |
428 | 1048 static void |
771 | 1049 setup_eol_coding_systems (Lisp_Object codesys) |
428 | 1050 { |
793 | 1051 int len = XSTRING_LENGTH (XSYMBOL (XCODING_SYSTEM_NAME (codesys))->name); |
2367 | 1052 Ibyte *codesys_name = alloca_ibytes (len + 7); |
771 | 1053 int mlen = -1; |
867 | 1054 Ibyte *codesys_mnemonic = 0; |
771 | 1055 Lisp_Object codesys_name_sym, sub_codesys; |
1056 int i; | |
1057 | |
1058 memcpy (codesys_name, | |
793 | 1059 XSTRING_DATA (XSYMBOL (XCODING_SYSTEM_NAME (codesys))->name), len); |
771 | 1060 |
1061 if (STRINGP (XCODING_SYSTEM_MNEMONIC (codesys))) | |
428 | 1062 { |
771 | 1063 mlen = XSTRING_LENGTH (XCODING_SYSTEM_MNEMONIC (codesys)); |
2367 | 1064 codesys_mnemonic = alloca_ibytes (mlen + 7); |
771 | 1065 memcpy (codesys_mnemonic, |
1066 XSTRING_DATA (XCODING_SYSTEM_MNEMONIC (codesys)), mlen); | |
1067 } | |
1068 | |
1069 /* Create three "subsidiary" coding systems, decoding data encoded using | |
1070 each of the three EOL types. We do this for each subsidiary by | |
1071 copying the original coding system, setting the EOL type | |
1072 appropriately, and setting the CANONICAL member of the new coding | |
1073 system to be a chain consisting of the original coding system followed | |
1074 by a convert-eol coding system to do the EOL decoding. For EOL type | |
1075 LF, however, we don't need any decoding, so we skip creating a | |
1076 CANONICAL. | |
1077 | |
1078 If the original coding system is not a text-type coding system | |
1079 (decodes byte->char), we need to coerce it to one by the appropriate | |
1080 wrapping in CANONICAL. */ | |
1081 | |
1082 for (i = 0; i < countof (coding_subsidiary_list); i++) | |
1083 { | |
4528
726060ee587c
First draft of g++ 4.3 warning removal patch. Builds. *Needs ChangeLogs.*
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4522
diff
changeset
|
1084 const Ascbyte *extension = coding_subsidiary_list[i].extension; |
726060ee587c
First draft of g++ 4.3 warning removal patch. Builds. *Needs ChangeLogs.*
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4522
diff
changeset
|
1085 const Ascbyte *mnemonic_ext = coding_subsidiary_list[i].mnemonic_ext; |
771 | 1086 enum eol_type eol = coding_subsidiary_list[i].eol; |
1087 | |
2367 | 1088 qxestrcpy_ascii (codesys_name + len, extension); |
4953
304aebb79cd3
function renamings to track names of char typedefs
Ben Wing <ben@xemacs.org>
parents:
4952
diff
changeset
|
1089 codesys_name_sym = intern_istring (codesys_name); |
771 | 1090 if (mlen != -1) |
2367 | 1091 qxestrcpy_ascii (codesys_mnemonic + mlen, mnemonic_ext); |
771 | 1092 |
1093 sub_codesys = Fcopy_coding_system (codesys, codesys_name_sym); | |
1094 if (mlen != -1) | |
1095 XCODING_SYSTEM_MNEMONIC (sub_codesys) = | |
4953
304aebb79cd3
function renamings to track names of char typedefs
Ben Wing <ben@xemacs.org>
parents:
4952
diff
changeset
|
1096 build_istring (codesys_mnemonic); |
771 | 1097 |
1098 if (eol != EOL_LF) | |
1099 { | |
1100 Lisp_Object chain = list2 (get_coding_system_for_text_file | |
1101 (codesys, 0), | |
1102 eol == EOL_CR ? Qconvert_eol_cr : | |
1103 Qconvert_eol_crlf); | |
1104 Lisp_Object canon = | |
1105 make_internal_coding_system | |
1106 (sub_codesys, "internal-subsidiary-eol-wrapper", | |
1107 Qchain, Qunbound, | |
1108 mlen != -1 ? | |
4953
304aebb79cd3
function renamings to track names of char typedefs
Ben Wing <ben@xemacs.org>
parents:
4952
diff
changeset
|
1109 list6 (Qmnemonic, build_istring (codesys_mnemonic), |
771 | 1110 Qchain, chain, |
1111 Qcanonicalize_after_coding, sub_codesys) : | |
1112 list4 (Qchain, chain, | |
1113 Qcanonicalize_after_coding, sub_codesys)); | |
1114 XCODING_SYSTEM_CANONICAL (sub_codesys) = canon; | |
1115 } | |
1116 XCODING_SYSTEM_EOL_TYPE (sub_codesys) = eol; | |
1117 XCODING_SYSTEM_SUBSIDIARY_PARENT (sub_codesys) = codesys; | |
1118 XCODING_SYSTEM (codesys)->eol[eol] = sub_codesys; | |
428 | 1119 } |
1120 } | |
1121 | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1122 DEFUN ("coding-system-canonical-name-p", Fcoding_system_canonical_name_p, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1123 1, 1, 0, /* |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1124 Return t if OBJECT names a coding system, and is not a coding system alias. |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1125 */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1126 (object)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1127 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1128 return CODING_SYSTEMP (Fgethash (object, Vcoding_system_hash_table, Qnil)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1129 ? Qt : Qnil; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1130 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1131 |
771 | 1132 /* Basic function to create new coding systems. For `make-coding-system', |
1133 NAME-OR-EXISTING is the NAME argument, PREFIX is null, and TYPE, | |
1134 DESCRIPTION, and PROPS are the same. All created coding systems are put | |
1135 in a hash table indexed by NAME. | |
1136 | |
1137 If PREFIX is a string, NAME-OR-EXISTING should specify an existing | |
1138 coding system (or nil), and an internal coding system will be created. | |
1139 The name of the coding system will be constructed by combining PREFIX | |
1140 with the name of the existing coding system (if given), and a number | |
1141 will be appended to insure uniqueness. In such a case, if Qunbound is | |
1142 given for DESCRIPTION, the description gets created based on the | |
1143 generated name. Also, if no mnemonic is given in the properties list, a | |
1144 mnemonic is created based on the generated name. | |
1145 | |
1146 For internal coding systems, the coding system is marked as internal | |
1147 (see `coding-system-list'), and no subsidiaries will be created or | |
1148 eol-wrapping will happen. Otherwise: | |
1149 | |
1150 -- if the eol-type property is `lf' or t, the coding system is merely | |
1151 created and returned. (For t, the coding system will be wrapped with | |
1152 an EOL autodetector when it's used to read a file.) | |
1153 | |
1154 -- if eol-type is `crlf' or `cr', after the coding system object is | |
1155 created, it will be wrapped in a chain with the appropriate | |
1156 convert-eol coding system (either `convert-eol-crlf' or | |
1157 `convert-eol-cr'), so that CRLF->LF or CR->LF conversion is done at | |
1158 decoding time, and the opposite at encoding time. The resulting | |
1159 chain becomes the CANONICAL field of the coding system object. | |
1160 | |
1161 -- if eol-type is nil or omitted, "subsidiaries" are generated: Three | |
1162 coding systems where the original coding system (before wrapping with | |
1163 convert-eol-autodetect) is either unwrapped or wrapped with | |
1164 convert-eol-crlf or convert-eol-cr, respectively, so that coding systems | |
1165 to handle LF, CRLF, and CR end-of-line indicators are created. (This | |
1166 crazy crap is based on existing behavior in other Mule versions, | |
1167 including FSF Emacs.) | |
1168 */ | |
428 | 1169 |
1170 static Lisp_Object | |
4528
726060ee587c
First draft of g++ 4.3 warning removal patch. Builds. *Needs ChangeLogs.*
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4522
diff
changeset
|
1171 make_coding_system_1 (Lisp_Object name_or_existing, const Ascbyte *prefix, |
771 | 1172 Lisp_Object type, Lisp_Object description, |
1173 Lisp_Object props) | |
428 | 1174 { |
771 | 1175 Lisp_Coding_System *cs; |
1176 int need_to_setup_eol_systems = 1; | |
1177 enum eol_type eol_wrapper = EOL_AUTODETECT; | |
1178 struct coding_system_methods *meths; | |
1179 Lisp_Object csobj; | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1180 Lisp_Object defmnem = Qnil, aliases = Qnil; |
771 | 1181 |
1182 if (NILP (type)) | |
1183 type = Qundecided; | |
1184 meths = decode_coding_system_type (type, ERROR_ME); | |
1185 | |
1186 if (prefix) | |
428 | 1187 { |
867 | 1188 Ibyte *newname = |
771 | 1189 emacs_sprintf_malloc (NULL, "%s-%s-%d", |
1190 prefix, | |
867 | 1191 NILP (name_or_existing) ? (Ibyte *) "nil" : |
771 | 1192 XSTRING_DATA (Fsymbol_name (XCODING_SYSTEM_NAME |
1193 (name_or_existing))), | |
1194 ++coding_system_tick); | |
4953
304aebb79cd3
function renamings to track names of char typedefs
Ben Wing <ben@xemacs.org>
parents:
4952
diff
changeset
|
1195 name_or_existing = intern_istring (newname); |
4976
16112448d484
Rename xfree(FOO, TYPE) -> xfree(FOO)
Ben Wing <ben@xemacs.org>
parents:
4967
diff
changeset
|
1196 xfree (newname); |
771 | 1197 |
1198 if (UNBOUNDP (description)) | |
1199 { | |
1200 newname = | |
1201 emacs_sprintf_malloc | |
1202 (NULL, "For Internal Use (%s)", | |
1203 XSTRING_DATA (Fsymbol_name (name_or_existing))); | |
4953
304aebb79cd3
function renamings to track names of char typedefs
Ben Wing <ben@xemacs.org>
parents:
4952
diff
changeset
|
1204 description = build_istring (newname); |
4976
16112448d484
Rename xfree(FOO, TYPE) -> xfree(FOO)
Ben Wing <ben@xemacs.org>
parents:
4967
diff
changeset
|
1205 xfree (newname); |
771 | 1206 } |
1207 | |
1208 newname = emacs_sprintf_malloc (NULL, "Int%d", coding_system_tick); | |
4953
304aebb79cd3
function renamings to track names of char typedefs
Ben Wing <ben@xemacs.org>
parents:
4952
diff
changeset
|
1209 defmnem = build_istring (newname); |
4976
16112448d484
Rename xfree(FOO, TYPE) -> xfree(FOO)
Ben Wing <ben@xemacs.org>
parents:
4967
diff
changeset
|
1210 xfree (newname); |
428 | 1211 } |
771 | 1212 else |
1213 CHECK_SYMBOL (name_or_existing); | |
1214 | |
4303 | 1215 /* See is there an entry for name_or_existing in the defined coding system |
1216 hash table. */ | |
1217 csobj = find_coding_system (name_or_existing, 0); | |
1218 /* Error if it's there and not an autoload form. */ | |
1219 if (!NILP (csobj) && !CONSP (csobj)) | |
771 | 1220 invalid_operation ("Cannot redefine existing coding system", |
4303 | 1221 name_or_existing); |
771 | 1222 |
1223 cs = allocate_coding_system (meths, meths->extra_data_size, | |
1224 name_or_existing); | |
793 | 1225 csobj = wrap_coding_system (cs); |
771 | 1226 |
1227 cs->internal_p = !!prefix; | |
1228 | |
1229 if (NILP (description)) | |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
1230 description = build_ascstring (""); |
771 | 1231 else |
1232 CHECK_STRING (description); | |
1233 CODING_SYSTEM_DESCRIPTION (cs) = description; | |
1234 | |
1235 if (!NILP (defmnem)) | |
1236 CODING_SYSTEM_MNEMONIC (cs) = defmnem; | |
1237 | |
1238 { | |
1239 EXTERNAL_PROPERTY_LIST_LOOP_3 (key, value, props) | |
1240 { | |
1241 int recognized = 1; | |
1242 | |
1243 if (EQ (key, Qmnemonic)) | |
1244 { | |
1245 if (!NILP (value)) | |
1246 CHECK_STRING (value); | |
1247 CODING_SYSTEM_MNEMONIC (cs) = value; | |
1248 } | |
1249 | |
1250 else if (EQ (key, Qdocumentation)) | |
1251 { | |
1252 if (!NILP (value)) | |
1253 CHECK_STRING (value); | |
1254 CODING_SYSTEM_DOCUMENTATION (cs) = value; | |
1255 } | |
1256 | |
1257 else if (EQ (key, Qeol_type)) | |
1258 { | |
1259 need_to_setup_eol_systems = NILP (value); | |
1260 if (EQ (value, Qt)) | |
1261 value = Qnil; | |
1262 eol_wrapper = symbol_to_eol_type (value); | |
1263 } | |
1264 | |
1265 else if (EQ (key, Qpost_read_conversion)) | |
1266 CODING_SYSTEM_POST_READ_CONVERSION (cs) = value; | |
1267 else if (EQ (key, Qpre_write_conversion)) | |
1268 CODING_SYSTEM_PRE_WRITE_CONVERSION (cs) = value; | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1269 else if (EQ (key, Qaliases)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1270 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1271 EXTERNAL_LIST_LOOP_2 (alias, value) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1272 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1273 CHECK_SYMBOL (alias); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1274 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1275 if (!NILP (Fcoding_system_canonical_name_p (alias))) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1276 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1277 invalid_change ("Symbol is the canonical name of a " |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1278 "coding system and cannot be redefined", |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1279 alias); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1280 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1281 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1282 aliases = value; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1283 } |
771 | 1284 /* FSF compatibility */ |
1285 else if (EQ (key, Qtranslation_table_for_decode)) | |
1286 ; | |
1287 else if (EQ (key, Qtranslation_table_for_encode)) | |
1288 ; | |
1289 else if (EQ (key, Qsafe_chars)) | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1290 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1291 CHECK_CHAR_TABLE (value); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1292 CODING_SYSTEM_SAFE_CHARS (cs) = value; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1293 } |
771 | 1294 else if (EQ (key, Qsafe_charsets)) |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1295 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1296 if (!EQ (Qt, value) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1297 /* Would be nice to actually do this check, but there are |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1298 some order conflicts with japanese.el and |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1299 mule-coding.el */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1300 && 0) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1301 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1302 #ifdef MULE |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1303 EXTERNAL_LIST_LOOP_2 (safe_charset, value) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1304 CHECK_CHARSET (Ffind_charset (safe_charset)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1305 #endif |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1306 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1307 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1308 CODING_SYSTEM_SAFE_CHARSETS (cs) = value; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1309 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1310 else if (EQ (key, Qcategory)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1311 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1312 Fput (name_or_existing, intern ("coding-system-property"), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1313 Fplist_put (Fget (name_or_existing, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1314 intern ("coding-system-property"), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1315 Qnil), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1316 Qcategory, value)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1317 } |
771 | 1318 else if (EQ (key, Qmime_charset)) |
1319 ; | |
1320 else if (EQ (key, Qvalid_codes)) | |
1321 ; | |
1322 else | |
1323 recognized = CODESYSMETH_OR_GIVEN (cs, putprop, | |
1324 (csobj, key, value), 0); | |
1325 | |
1326 if (!recognized) | |
1327 invalid_constant ("Unrecognized property", key); | |
1328 } | |
1329 } | |
1330 | |
1331 { | |
1332 XCODING_SYSTEM_CANONICAL (csobj) = | |
1333 CODESYSMETH_OR_GIVEN (cs, canonicalize, (csobj), Qnil); | |
1334 XCODING_SYSTEM_EOL_TYPE (csobj) = EOL_AUTODETECT; /* for copy-coding-system | |
1335 below */ | |
1336 | |
4303 | 1337 Fputhash (name_or_existing, csobj, Vcoding_system_hash_table); |
1338 | |
771 | 1339 if (need_to_setup_eol_systems && !cs->internal_p) |
1340 setup_eol_coding_systems (csobj); | |
1341 else if (eol_wrapper == EOL_CR || eol_wrapper == EOL_CRLF) | |
1342 { | |
1343 /* If a specific eol-type (other than LF) was specified, we handle | |
1344 this by converting the coding system into a chain that wraps the | |
1345 coding system along with a convert-eol system after it, in | |
1346 exactly that same switcheroo fashion that the normal | |
1347 canonicalize method works -- BUT we will run into a problem if | |
1348 we do it the obvious way, because when `chain' creates its | |
1349 substreams, the substream containing the coding system we're | |
1350 creating will have canonicalization expansion done on it, | |
1351 leading to infinite recursion. So we have to generate a new, | |
1352 internal coding system with the previous value of CANONICAL. */ | |
867 | 1353 Ibyte *newname = |
771 | 1354 emacs_sprintf_malloc |
1355 (NULL, "internal-eol-copy-%s-%d", | |
1356 XSTRING_DATA (Fsymbol_name (name_or_existing)), | |
1357 ++coding_system_tick); | |
4953
304aebb79cd3
function renamings to track names of char typedefs
Ben Wing <ben@xemacs.org>
parents:
4952
diff
changeset
|
1358 Lisp_Object newnamesym = intern_istring (newname); |
771 | 1359 Lisp_Object copied = Fcopy_coding_system (csobj, newnamesym); |
4976
16112448d484
Rename xfree(FOO, TYPE) -> xfree(FOO)
Ben Wing <ben@xemacs.org>
parents:
4967
diff
changeset
|
1360 xfree (newname); |
771 | 1361 |
1362 XCODING_SYSTEM_CANONICAL (csobj) = | |
1363 make_internal_coding_system | |
1364 (csobj, | |
1365 "internal-eol-wrapper", | |
1366 Qchain, Qunbound, | |
1367 list4 (Qchain, | |
1368 list2 (copied, | |
1369 eol_wrapper == EOL_CR ? | |
1370 Qconvert_eol_cr : | |
1371 Qconvert_eol_crlf), | |
1372 Qcanonicalize_after_coding, | |
1373 csobj)); | |
1374 } | |
1375 XCODING_SYSTEM_EOL_TYPE (csobj) = eol_wrapper; | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1376 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1377 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1378 EXTERNAL_LIST_LOOP_2 (alias, aliases) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1379 Fdefine_coding_system_alias (alias, csobj); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1380 } |
771 | 1381 } |
1382 | |
1383 return csobj; | |
428 | 1384 } |
1385 | |
771 | 1386 Lisp_Object |
4528
726060ee587c
First draft of g++ 4.3 warning removal patch. Builds. *Needs ChangeLogs.*
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4522
diff
changeset
|
1387 make_internal_coding_system (Lisp_Object existing, const Ascbyte *prefix, |
771 | 1388 Lisp_Object type, Lisp_Object description, |
1389 Lisp_Object props) | |
1390 { | |
1391 return make_coding_system_1 (existing, prefix, type, description, props); | |
1392 } | |
428 | 1393 |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1394 DEFUN ("make-coding-system-internal", Fmake_coding_system_internal, 2, 4, 0, /* |
5083
88f955fa5a7f
Back out revision c673987f5f3d, undump mule/make-coding-system.el.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5038
diff
changeset
|
1395 Create a new coding system object, and register NAME as its name. |
88f955fa5a7f
Back out revision c673987f5f3d, undump mule/make-coding-system.el.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5038
diff
changeset
|
1396 |
88f955fa5a7f
Back out revision c673987f5f3d, undump mule/make-coding-system.el.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5038
diff
changeset
|
1397 With Mule support, this does much of the work of `make-coding-system'. |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1398 Without Mule support, it does all the work of that function, and an alias |
5083
88f955fa5a7f
Back out revision c673987f5f3d, undump mule/make-coding-system.el.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5038
diff
changeset
|
1399 exists, mapping `make-coding-system' to `make-coding-system-internal'. |
88f955fa5a7f
Back out revision c673987f5f3d, undump mule/make-coding-system.el.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5038
diff
changeset
|
1400 |
88f955fa5a7f
Back out revision c673987f5f3d, undump mule/make-coding-system.el.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5038
diff
changeset
|
1401 You'll need a Mule XEmacs to read the complete docstring. Or you can |
88f955fa5a7f
Back out revision c673987f5f3d, undump mule/make-coding-system.el.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5038
diff
changeset
|
1402 just read it in make-coding-system.el; something like the following |
88f955fa5a7f
Back out revision c673987f5f3d, undump mule/make-coding-system.el.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5038
diff
changeset
|
1403 should work: |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1404 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
1405 \\[find-function-other-window] find-file RET \\[find-file] mule/make-coding-system.el RET |
771 | 1406 |
428 | 1407 */ |
771 | 1408 (name, type, description, props)) |
428 | 1409 { |
771 | 1410 return make_coding_system_1 (name, 0, type, description, props); |
428 | 1411 } |
1412 | |
1413 DEFUN ("copy-coding-system", Fcopy_coding_system, 2, 2, 0, /* | |
1414 Copy OLD-CODING-SYSTEM to NEW-NAME. | |
1415 If NEW-NAME does not name an existing coding system, a new one will | |
1416 be created. | |
771 | 1417 If you are using this function to create an alias, think again: |
1418 Use `define-coding-system-alias' instead. | |
428 | 1419 */ |
1420 (old_coding_system, new_name)) | |
1421 { | |
1422 Lisp_Object new_coding_system; | |
1423 old_coding_system = Fget_coding_system (old_coding_system); | |
771 | 1424 new_coding_system = |
4303 | 1425 UNBOUNDP (new_name) ? Qnil : find_coding_system (new_name, 0); |
428 | 1426 if (NILP (new_coding_system)) |
1427 { | |
793 | 1428 new_coding_system = |
1429 wrap_coding_system | |
1430 (allocate_coding_system | |
1431 (XCODING_SYSTEM (old_coding_system)->methods, | |
1432 XCODING_SYSTEM (old_coding_system)->methods->extra_data_size, | |
1433 new_name)); | |
771 | 1434 if (!UNBOUNDP (new_name)) |
1435 Fputhash (new_name, new_coding_system, Vcoding_system_hash_table); | |
428 | 1436 } |
771 | 1437 else if (XCODING_SYSTEM (old_coding_system)->methods != |
1438 XCODING_SYSTEM (new_coding_system)->methods) | |
1439 invalid_operation_2 ("Coding systems not same type", | |
1440 old_coding_system, new_coding_system); | |
428 | 1441 |
5127
a9c41067dd88
more cleanups, terminology clarification, lots of doc work
Ben Wing <ben@xemacs.org>
parents:
5126
diff
changeset
|
1442 copy_lisp_object (new_coding_system, old_coding_system); |
a9c41067dd88
more cleanups, terminology clarification, lots of doc work
Ben Wing <ben@xemacs.org>
parents:
5126
diff
changeset
|
1443 XCODING_SYSTEM (new_coding_system)->name = new_name; |
428 | 1444 return new_coding_system; |
1445 } | |
1446 | |
2297 | 1447 /* #### Shouldn't this really be a find/get pair? */ |
1448 | |
440 | 1449 DEFUN ("coding-system-alias-p", Fcoding_system_alias_p, 1, 1, 0, /* |
1450 Return t if OBJECT is a coding system alias. | |
1451 All coding system aliases are created by `define-coding-system-alias'. | |
1452 */ | |
1453 (object)) | |
428 | 1454 { |
440 | 1455 return SYMBOLP (Fgethash (object, Vcoding_system_hash_table, Qzero)) |
1456 ? Qt : Qnil; | |
1457 } | |
1458 | |
1459 DEFUN ("coding-system-aliasee", Fcoding_system_aliasee, 1, 1, 0, /* | |
1460 Return the coding-system symbol for which symbol ALIAS is an alias. | |
1461 */ | |
1462 (alias)) | |
1463 { | |
1464 Lisp_Object aliasee = Fgethash (alias, Vcoding_system_hash_table, Qnil); | |
1465 if (SYMBOLP (aliasee)) | |
1466 return aliasee; | |
1467 else | |
563 | 1468 invalid_argument ("Symbol is not a coding system alias", alias); |
1204 | 1469 RETURN_NOT_REACHED (Qnil); |
440 | 1470 } |
1471 | |
1472 /* A maphash function, for removing dangling coding system aliases. */ | |
1473 static int | |
2286 | 1474 dangling_coding_system_alias_p (Lisp_Object UNUSED (alias), |
440 | 1475 Lisp_Object aliasee, |
1476 void *dangling_aliases) | |
1477 { | |
1478 if (SYMBOLP (aliasee) | |
1479 && NILP (Fgethash (aliasee, Vcoding_system_hash_table, Qnil))) | |
428 | 1480 { |
440 | 1481 (*(int *) dangling_aliases)++; |
1482 return 1; | |
428 | 1483 } |
440 | 1484 else |
1485 return 0; | |
1486 } | |
1487 | |
1488 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias, 2, 2, 0, /* | |
1489 Define symbol ALIAS as an alias for coding system ALIASEE. | |
1490 | |
1491 You can use this function to redefine an alias that has already been defined, | |
1492 but you cannot redefine a name which is the canonical name for a coding system. | |
1493 \(a canonical name of a coding system is what is returned when you call | |
1494 `coding-system-name' on a coding system). | |
1495 | |
1496 ALIASEE itself can be an alias, which allows you to define nested aliases. | |
1497 | |
1498 You are forbidden, however, from creating alias loops or `dangling' aliases. | |
1499 These will be detected, and an error will be signaled if you attempt to do so. | |
1500 | |
1501 If ALIASEE is nil, then ALIAS will simply be undefined. | |
1502 | |
1503 See also `coding-system-alias-p', `coding-system-aliasee', | |
1504 and `coding-system-canonical-name-p'. | |
1505 */ | |
1506 (alias, aliasee)) | |
1507 { | |
2286 | 1508 Lisp_Object probe; |
440 | 1509 |
1510 CHECK_SYMBOL (alias); | |
1511 | |
1512 if (!NILP (Fcoding_system_canonical_name_p (alias))) | |
563 | 1513 invalid_change |
440 | 1514 ("Symbol is the canonical name of a coding system and cannot be redefined", |
1515 alias); | |
1516 | |
1517 if (NILP (aliasee)) | |
1518 { | |
771 | 1519 Lisp_Object subsidiary_unix = add_suffix_to_symbol (alias, "-unix"); |
1520 Lisp_Object subsidiary_dos = add_suffix_to_symbol (alias, "-dos"); | |
1521 Lisp_Object subsidiary_mac = add_suffix_to_symbol (alias, "-mac"); | |
440 | 1522 |
1523 Fremhash (alias, Vcoding_system_hash_table); | |
1524 | |
1525 /* Undefine subsidiary aliases, | |
1526 presumably created by a previous call to this function */ | |
1527 if (! NILP (Fcoding_system_alias_p (subsidiary_unix)) && | |
1528 ! NILP (Fcoding_system_alias_p (subsidiary_dos)) && | |
1529 ! NILP (Fcoding_system_alias_p (subsidiary_mac))) | |
1530 { | |
1531 Fdefine_coding_system_alias (subsidiary_unix, Qnil); | |
1532 Fdefine_coding_system_alias (subsidiary_dos, Qnil); | |
1533 Fdefine_coding_system_alias (subsidiary_mac, Qnil); | |
1534 } | |
1535 | |
1536 /* Undefine dangling coding system aliases. */ | |
1537 { | |
1538 int dangling_aliases; | |
1539 | |
1540 do { | |
1541 dangling_aliases = 0; | |
1542 elisp_map_remhash (dangling_coding_system_alias_p, | |
1543 Vcoding_system_hash_table, | |
1544 &dangling_aliases); | |
1545 } while (dangling_aliases > 0); | |
1546 } | |
1547 | |
1548 return Qnil; | |
1549 } | |
1550 | |
1551 if (CODING_SYSTEMP (aliasee)) | |
1552 aliasee = XCODING_SYSTEM_NAME (aliasee); | |
1553 | |
1554 /* Checks that aliasee names a coding-system */ | |
2286 | 1555 (void) Fget_coding_system (aliasee); |
440 | 1556 |
1557 /* Check for coding system alias loops */ | |
1558 if (EQ (alias, aliasee)) | |
563 | 1559 alias_loop: invalid_operation_2 |
440 | 1560 ("Attempt to create a coding system alias loop", alias, aliasee); |
1561 | |
1562 for (probe = aliasee; | |
1563 SYMBOLP (probe); | |
1564 probe = Fgethash (probe, Vcoding_system_hash_table, Qzero)) | |
1565 { | |
1566 if (EQ (probe, alias)) | |
1567 goto alias_loop; | |
1568 } | |
1569 | |
1570 Fputhash (alias, aliasee, Vcoding_system_hash_table); | |
1571 | |
1572 /* Set up aliases for subsidiaries. | |
2297 | 1573 #### There must be a better way to handle subsidiary coding systems. |
1574 Inquiring Minds Want To Know: shouldn't they always be chains? */ | |
440 | 1575 { |
1576 static const char *suffixes[] = { "-unix", "-dos", "-mac" }; | |
1577 int i; | |
1578 for (i = 0; i < countof (suffixes); i++) | |
1579 { | |
1580 Lisp_Object alias_subsidiary = | |
771 | 1581 add_suffix_to_symbol (alias, suffixes[i]); |
440 | 1582 Lisp_Object aliasee_subsidiary = |
771 | 1583 add_suffix_to_symbol (aliasee, suffixes[i]); |
440 | 1584 |
1585 if (! NILP (Ffind_coding_system (aliasee_subsidiary))) | |
1586 Fdefine_coding_system_alias (alias_subsidiary, aliasee_subsidiary); | |
1587 } | |
1588 } | |
428 | 1589 /* FSF return value is a vector of [ALIAS-unix ALIAS-dos ALIAS-mac], |
1590 but it doesn't look intentional, so I'd rather return something | |
1591 meaningful or nothing at all. */ | |
1592 return Qnil; | |
1593 } | |
1594 | |
1595 static Lisp_Object | |
771 | 1596 subsidiary_coding_system (Lisp_Object coding_system, enum eol_type type) |
428 | 1597 { |
1598 Lisp_Coding_System *cs = XCODING_SYSTEM (coding_system); | |
1599 Lisp_Object new_coding_system; | |
1600 | |
1601 switch (type) | |
1602 { | |
1603 case EOL_AUTODETECT: return coding_system; | |
1604 case EOL_LF: new_coding_system = CODING_SYSTEM_EOL_LF (cs); break; | |
1605 case EOL_CR: new_coding_system = CODING_SYSTEM_EOL_CR (cs); break; | |
1606 case EOL_CRLF: new_coding_system = CODING_SYSTEM_EOL_CRLF (cs); break; | |
2500 | 1607 default: ABORT (); return Qnil; |
428 | 1608 } |
1609 | |
1610 return NILP (new_coding_system) ? coding_system : new_coding_system; | |
1611 } | |
1612 | |
1613 DEFUN ("subsidiary-coding-system", Fsubsidiary_coding_system, 2, 2, 0, /* | |
1614 Return the subsidiary coding system of CODING-SYSTEM with eol type EOL-TYPE. | |
771 | 1615 The logically opposite operation is `coding-system-base'. |
428 | 1616 */ |
1617 (coding_system, eol_type)) | |
1618 { | |
771 | 1619 coding_system = get_coding_system_for_text_file (coding_system, 0); |
428 | 1620 |
1621 return subsidiary_coding_system (coding_system, | |
1622 symbol_to_eol_type (eol_type)); | |
1623 } | |
1624 | |
771 | 1625 DEFUN ("coding-system-base", Fcoding_system_base, |
1626 1, 1, 0, /* | |
1627 Return the base coding system of CODING-SYSTEM. | |
1628 If CODING-SYSTEM is a subsidiary, this returns its parent; otherwise, it | |
1629 returns CODING-SYSTEM. | |
1630 The logically opposite operation is `subsidiary-coding-system'. | |
1631 */ | |
1632 (coding_system)) | |
1633 { | |
1634 Lisp_Object base; | |
1635 | |
1636 coding_system = Fget_coding_system (coding_system); | |
1637 if (EQ (XCODING_SYSTEM_NAME (coding_system), Qbinary)) | |
1638 return Fget_coding_system (Qraw_text); /* hack! */ | |
1639 base = XCODING_SYSTEM_SUBSIDIARY_PARENT (coding_system); | |
1640 if (!NILP (base)) | |
1641 return base; | |
1642 return coding_system; | |
1643 } | |
1644 | |
1645 DEFUN ("coding-system-used-for-io", Fcoding_system_used_for_io, | |
1646 1, 1, 0, /* | |
1647 Return the coding system actually used for I/O. | |
1648 In some cases (e.g. when a particular EOL type is specified) this won't be | |
2297 | 1649 the coding system itself. This can be useful when trying to determine |
1650 precisely how data was decoded. | |
771 | 1651 */ |
1652 (coding_system)) | |
1653 { | |
1654 Lisp_Object canon; | |
1655 | |
1656 coding_system = Fget_coding_system (coding_system); | |
1657 canon = XCODING_SYSTEM_CANONICAL (coding_system); | |
1658 if (!NILP (canon)) | |
1659 return canon; | |
1660 return coding_system; | |
1661 } | |
1662 | |
428 | 1663 |
1664 /************************************************************************/ | |
1665 /* Coding system accessors */ | |
1666 /************************************************************************/ | |
1667 | |
771 | 1668 DEFUN ("coding-system-description", Fcoding_system_description, 1, 1, 0, /* |
1669 Return the description for CODING-SYSTEM. | |
1670 The `description' of a coding system is a short English phrase giving the | |
1671 name rendered according to English punctuation rules, plus possibly some | |
1672 explanatory text (typically in the form of a parenthetical phrase). The | |
1673 description is intended to be short enough that it can appear as a menu item, | |
1674 and clear enough to be recognizable even to someone who is assumed to have | |
1675 some basic familiarity with different encodings but may not know all the | |
1676 technical names; thus, for `cn-gb-2312' is described as "Chinese EUC" and | |
1677 `hz-gb-2312' is described as "Hz/ZW (Chinese)", where the actual name of | |
1678 the encoding is given, followed by a note that this is a Chinese encoding, | |
1679 because the great majority of people encountering this would have no idea | |
1680 what it is, and giving the language indicates whether the encoding should | |
1681 just be ignored or (conceivably) investigated more thoroughly. | |
428 | 1682 */ |
1683 (coding_system)) | |
1684 { | |
1685 coding_system = Fget_coding_system (coding_system); | |
771 | 1686 return XCODING_SYSTEM_DESCRIPTION (coding_system); |
428 | 1687 } |
1688 | |
1689 DEFUN ("coding-system-type", Fcoding_system_type, 1, 1, 0, /* | |
1690 Return the type of CODING-SYSTEM. | |
1691 */ | |
1692 (coding_system)) | |
1693 { | |
771 | 1694 coding_system = Fget_coding_system (coding_system); |
1695 return XCODING_SYSTEM_TYPE (coding_system); | |
428 | 1696 } |
1697 | |
1698 DEFUN ("coding-system-property", Fcoding_system_property, 2, 2, 0, /* | |
1699 Return the PROP property of CODING-SYSTEM. | |
1700 */ | |
1701 (coding_system, prop)) | |
1702 { | |
1703 coding_system = Fget_coding_system (coding_system); | |
1704 CHECK_SYMBOL (prop); | |
1705 | |
1706 if (EQ (prop, Qname)) | |
1707 return XCODING_SYSTEM_NAME (coding_system); | |
1708 else if (EQ (prop, Qtype)) | |
1709 return Fcoding_system_type (coding_system); | |
771 | 1710 else if (EQ (prop, Qdescription)) |
1711 return XCODING_SYSTEM_DESCRIPTION (coding_system); | |
428 | 1712 else if (EQ (prop, Qmnemonic)) |
1713 return XCODING_SYSTEM_MNEMONIC (coding_system); | |
771 | 1714 else if (EQ (prop, Qdocumentation)) |
1715 return XCODING_SYSTEM_DOCUMENTATION (coding_system); | |
428 | 1716 else if (EQ (prop, Qeol_type)) |
771 | 1717 return eol_type_to_symbol (XCODING_SYSTEM_EOL_TYPE |
1718 (coding_system)); | |
428 | 1719 else if (EQ (prop, Qeol_lf)) |
1720 return XCODING_SYSTEM_EOL_LF (coding_system); | |
1721 else if (EQ (prop, Qeol_crlf)) | |
1722 return XCODING_SYSTEM_EOL_CRLF (coding_system); | |
1723 else if (EQ (prop, Qeol_cr)) | |
1724 return XCODING_SYSTEM_EOL_CR (coding_system); | |
1725 else if (EQ (prop, Qpost_read_conversion)) | |
1726 return XCODING_SYSTEM_POST_READ_CONVERSION (coding_system); | |
1727 else if (EQ (prop, Qpre_write_conversion)) | |
1728 return XCODING_SYSTEM_PRE_WRITE_CONVERSION (coding_system); | |
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1729 else if (EQ (prop, Qsafe_charsets)) |
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1730 return XCODING_SYSTEM_SAFE_CHARSETS (coding_system); |
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1731 else if (EQ (prop, Qsafe_chars)) |
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1732 return XCODING_SYSTEM_SAFE_CHARS (coding_system); |
771 | 1733 else |
1734 { | |
1735 Lisp_Object value = CODESYSMETH_OR_GIVEN (XCODING_SYSTEM (coding_system), | |
1736 getprop, | |
1737 (coding_system, prop), | |
1738 Qunbound); | |
1739 if (UNBOUNDP (value)) | |
1740 invalid_constant ("Unrecognized property", prop); | |
1741 return value; | |
1742 } | |
1743 } | |
1744 | |
1745 | |
1746 /************************************************************************/ | |
1747 /* Coding stream functions */ | |
1748 /************************************************************************/ | |
1749 | |
1750 /* A coding stream is a stream used for encoding or decoding text. The | |
1751 coding-stream object keeps track of the actual coding system, the stream | |
1752 that is at the other end, and data that needs to be persistent across | |
1753 the lifetime of the stream. */ | |
1754 | |
1204 | 1755 extern const struct sized_memory_description chain_coding_stream_description; |
1756 extern const struct sized_memory_description undecided_coding_stream_description; | |
1757 | |
1758 static const struct memory_description coding_stream_data_description_1 []= { | |
2551 | 1759 { XD_BLOCK_PTR, chain_coding_system, 1, |
1760 { &chain_coding_stream_description } }, | |
1761 { XD_BLOCK_PTR, undecided_coding_system, 1, | |
1762 { &undecided_coding_stream_description } }, | |
1204 | 1763 { XD_END } |
1764 }; | |
1765 | |
1766 static const struct sized_memory_description coding_stream_data_description = { | |
1767 sizeof (void *), coding_stream_data_description_1 | |
1768 }; | |
1769 | |
1770 static const struct memory_description coding_lstream_description[] = { | |
1771 { XD_INT, offsetof (struct coding_stream, type) }, | |
1772 { XD_LISP_OBJECT, offsetof (struct coding_stream, orig_codesys) }, | |
1773 { XD_LISP_OBJECT, offsetof (struct coding_stream, codesys) }, | |
1774 { XD_LISP_OBJECT, offsetof (struct coding_stream, other_end) }, | |
1775 { XD_UNION, offsetof (struct coding_stream, data), | |
2551 | 1776 XD_INDIRECT (0, 0), { &coding_stream_data_description } }, |
1204 | 1777 { XD_END } |
1778 }; | |
1779 | |
1780 DEFINE_LSTREAM_IMPLEMENTATION_WITH_DATA ("coding", coding); | |
771 | 1781 |
1782 /* Encoding and decoding are parallel operations, so we create just one | |
1783 stream for both. "Decoding" may involve the extra step of autodetection | |
1784 of the data format, but that's only because of the conventional | |
1785 definition of decoding as converting from external- to | |
1786 internal-formatted data. | |
1787 | |
2297 | 1788 [[ REWRITE ME! ]] |
1789 | |
771 | 1790 #### We really need to abstract out the concept of "data formats" and |
1791 define "converters" that convert from and to specified formats, | |
1792 eliminating the idea of decoding and encoding. When specifying a | |
1793 conversion process, we need to give the data formats themselves, not the | |
1794 conversion processes -- e.g. a coding system called "Unicode->multibyte" | |
1795 converts in both directions, and we could auto-detect the format of data | |
1796 at either end. */ | |
1797 | |
1798 static Bytecount | |
1799 coding_reader (Lstream *stream, unsigned char *data, Bytecount size) | |
1800 { | |
1801 unsigned char *orig_data = data; | |
1802 Bytecount read_size; | |
1803 int error_occurred = 0; | |
1804 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
1805 | |
1806 /* We need to interface to coding_{de,en}code_1(), which expects to take | |
1807 some amount of data and store the result into a Dynarr. We have | |
1808 coding_{de,en}code_1() store into c->runoff, and take data from there | |
1809 as necessary. */ | |
1810 | |
1811 /* We loop until we have enough data, reading chunks from the other | |
1812 end and converting it. */ | |
1813 while (1) | |
1814 { | |
1815 /* Take data from convert_to if we can. Make sure to take at | |
1816 most SIZE bytes, and delete the data from convert_to. */ | |
1817 if (Dynarr_length (str->convert_to) > 0) | |
1818 { | |
1819 Bytecount chunk = | |
1820 min (size, (Bytecount) Dynarr_length (str->convert_to)); | |
4967 | 1821 memcpy (data, Dynarr_begin (str->convert_to), chunk); |
771 | 1822 Dynarr_delete_many (str->convert_to, 0, chunk); |
1823 data += chunk; | |
1824 size -= chunk; | |
1825 } | |
1826 | |
1827 if (size == 0) | |
1828 break; /* No more room for data */ | |
1829 | |
1830 if (str->eof) | |
1831 break; | |
1832 | |
1833 { | |
1834 /* Exhausted convert_to, so get some more. Read into convert_from, | |
1835 after existing "rejected" data from the last conversion. */ | |
1836 Bytecount rejected = Dynarr_length (str->convert_from); | |
1837 /* #### 1024 is arbitrary; we really need to separate 0 from EOF, | |
1838 and when we get 0, keep taking more data until we don't get 0 -- | |
1839 we don't know how much data the conversion routine might need | |
2297 | 1840 before it can generate any data of its own (eg, bzip2). */ |
814 | 1841 Bytecount readmore = |
1842 str->one_byte_at_a_time ? (Bytecount) 1 : | |
1843 max (size, (Bytecount) 1024); | |
771 | 1844 |
1845 Dynarr_add_many (str->convert_from, 0, readmore); | |
1846 read_size = Lstream_read (str->other_end, | |
1847 Dynarr_atp (str->convert_from, rejected), | |
1848 readmore); | |
1849 /* Trim size down to how much we actually got */ | |
5038 | 1850 Dynarr_set_lengthr (str->convert_from, rejected + max (0, read_size)); |
771 | 1851 } |
1852 | |
1853 if (read_size < 0) /* LSTREAM_ERROR */ | |
1854 { | |
1855 error_occurred = 1; | |
1856 break; | |
1857 } | |
1858 if (read_size == 0) /* LSTREAM_EOF */ | |
1859 /* There might be some more end data produced in the translation, | |
1860 so we set a flag and call the conversion method once more to | |
1861 output any final stuff it may be holding, any "go back to a sane | |
1862 state" escape sequences, etc. The conversion method is free to | |
1863 look at this flag, and we use it above to stop looping. */ | |
1864 str->eof = 1; | |
1865 { | |
1866 Bytecount processed; | |
1867 Bytecount to_process = Dynarr_length (str->convert_from); | |
1868 | |
1869 /* Convert the data, and save any rejected data in convert_from */ | |
1870 processed = | |
1871 XCODESYSMETH (str->codesys, convert, | |
4967 | 1872 (str, Dynarr_begin (str->convert_from), |
771 | 1873 str->convert_to, to_process)); |
1874 if (processed < 0) | |
1875 { | |
1876 error_occurred = 1; | |
1877 break; | |
1878 } | |
1879 assert (processed <= to_process); | |
1880 if (processed < to_process) | |
4967 | 1881 memmove (Dynarr_begin (str->convert_from), |
771 | 1882 Dynarr_atp (str->convert_from, processed), |
1883 to_process - processed); | |
5038 | 1884 Dynarr_set_lengthr (str->convert_from, to_process - processed); |
771 | 1885 } |
1886 } | |
1887 | |
1888 if (data - orig_data == 0) | |
1889 return error_occurred ? -1 : 0; | |
1890 else | |
1891 return data - orig_data; | |
1892 } | |
1893 | |
1894 static Bytecount | |
1895 coding_writer (Lstream *stream, const unsigned char *data, Bytecount size) | |
1896 { | |
1897 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
1898 | |
1899 /* Convert all our data into convert_to, and then attempt to write | |
1900 it all out to the other end. */ | |
1901 Dynarr_reset (str->convert_to); | |
1902 size = XCODESYSMETH (str->codesys, convert, | |
1903 (str, data, str->convert_to, size)); | |
4967 | 1904 if (Lstream_write (str->other_end, Dynarr_begin (str->convert_to), |
771 | 1905 Dynarr_length (str->convert_to)) < 0) |
1906 return -1; | |
1907 else | |
1908 /* The return value indicates how much of the incoming data was | |
1909 processed, not how many bytes were written. */ | |
1910 return size; | |
1911 } | |
1912 | |
1913 static int | |
1914 encode_decode_source_sink_type_is_char (Lisp_Object cs, | |
1915 enum source_or_sink sex, | |
1916 enum encode_decode direction) | |
1917 { | |
1918 return (direction == CODING_DECODE ? | |
1919 decoding_source_sink_type_is_char (cs, sex) : | |
1920 encoding_source_sink_type_is_char (cs, sex)); | |
1921 } | |
1922 | |
1923 /* Ensure that the convert methods only get full characters sent to them to | |
1924 convert if the source of that conversion is characters; and that no such | |
1925 full-character checking happens when the source is bytes. Keep in mind | |
1926 that (1) the conversion_end_type return values take the perspective of | |
1927 encoding; (2) the source for decoding is the same as the sink for | |
1928 encoding; (3) when writing, the data is given to us, and we set our own | |
1929 stream to be character mode or not; (4) when reading, the data comes | |
1930 from the other_end stream, and we set that one to be character mode or | |
1931 not. This is consistent with the comment above the prototype for | |
1932 Lstream_set_character_mode(), which lays out rules for who is allowed to | |
1933 modify the character type mode on a stream. | |
1934 | |
814 | 1935 If we're a read stream, we're always setting character mode on the |
1936 source, but we also set it on ourselves consistent with the flag that | |
1937 can disable this (see again the comment above | |
1938 Lstream_set_character_mode()). | |
1939 */ | |
771 | 1940 |
1941 static void | |
1942 set_coding_character_mode (Lstream *stream) | |
1943 { | |
1944 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
1945 Lstream *stream_to_set = | |
1946 stream->flags & LSTREAM_FL_WRITE ? stream : str->other_end; | |
1947 if (encode_decode_source_sink_type_is_char | |
1948 (str->codesys, CODING_SOURCE, str->direction)) | |
1949 Lstream_set_character_mode (stream_to_set); | |
1950 else | |
1951 Lstream_unset_character_mode (stream_to_set); | |
814 | 1952 if (str->set_char_mode_on_us_when_reading && |
1953 (stream->flags & LSTREAM_FL_READ)) | |
1954 { | |
1955 if (encode_decode_source_sink_type_is_char | |
1956 (str->codesys, CODING_SINK, str->direction)) | |
1957 Lstream_set_character_mode (stream); | |
1958 else | |
1959 Lstream_unset_character_mode (stream); | |
1960 } | |
771 | 1961 } |
1962 | |
1963 static Lisp_Object | |
1964 coding_marker (Lisp_Object stream) | |
1965 { | |
1966 struct coding_stream *str = CODING_STREAM_DATA (XLSTREAM (stream)); | |
1967 | |
1968 mark_object (str->orig_codesys); | |
1969 mark_object (str->codesys); | |
1970 MAYBE_XCODESYSMETH (str->codesys, mark_coding_stream, (str)); | |
1971 return wrap_lstream (str->other_end); | |
1972 } | |
1973 | |
1974 static int | |
1975 coding_rewinder (Lstream *stream) | |
1976 { | |
1977 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
1978 MAYBE_XCODESYSMETH (str->codesys, rewind_coding_stream, (str)); | |
1979 | |
1980 str->ch = 0; | |
1981 Dynarr_reset (str->convert_to); | |
1982 Dynarr_reset (str->convert_from); | |
1983 return Lstream_rewind (str->other_end); | |
1984 } | |
1985 | |
1986 static int | |
1987 coding_seekable_p (Lstream *stream) | |
1988 { | |
1989 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
1990 return Lstream_seekable_p (str->other_end); | |
1991 } | |
1992 | |
1993 static int | |
1994 coding_flusher (Lstream *stream) | |
1995 { | |
1996 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
1997 return Lstream_flush (str->other_end); | |
1998 } | |
1999 | |
2000 static int | |
2001 coding_closer (Lstream *stream) | |
2002 { | |
2003 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
2004 if (stream->flags & LSTREAM_FL_WRITE) | |
2005 { | |
2006 str->eof = 1; | |
2007 coding_writer (stream, 0, 0); | |
2008 str->eof = 0; | |
2009 } | |
2010 /* It's safe to free the runoff dynarrs now because they are used only | |
2011 during conversion. We need to keep the type-specific data around, | |
2012 though, because of canonicalize_after_coding. */ | |
2013 if (str->convert_to) | |
2014 { | |
2015 Dynarr_free (str->convert_to); | |
2016 str->convert_to = 0; | |
2017 } | |
2018 if (str->convert_from) | |
428 | 2019 { |
771 | 2020 Dynarr_free (str->convert_from); |
2021 str->convert_from = 0; | |
2022 } | |
2023 | |
800 | 2024 if (str->no_close_other) |
2025 return Lstream_flush (str->other_end); | |
2026 else | |
2027 return Lstream_close (str->other_end); | |
771 | 2028 } |
2029 | |
2030 static void | |
2031 coding_finalizer (Lstream *stream) | |
2032 { | |
2033 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
2034 | |
2035 assert (!str->finalized); | |
2036 MAYBE_XCODESYSMETH (str->codesys, finalize_coding_stream, (str)); | |
2037 if (str->data) | |
2038 { | |
4976
16112448d484
Rename xfree(FOO, TYPE) -> xfree(FOO)
Ben Wing <ben@xemacs.org>
parents:
4967
diff
changeset
|
2039 xfree (str->data); |
771 | 2040 str->data = 0; |
2041 } | |
2042 str->finalized = 1; | |
2043 } | |
2044 | |
2045 static Lisp_Object | |
2046 coding_stream_canonicalize_after_coding (Lstream *stream) | |
2047 { | |
2048 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
2049 | |
2050 return XCODESYSMETH_OR_GIVEN (str->codesys, canonicalize_after_coding, | |
2051 (str), str->codesys); | |
2052 } | |
2053 | |
2054 Lisp_Object | |
2055 coding_stream_detected_coding_system (Lstream *stream) | |
2056 { | |
2057 Lisp_Object codesys = | |
2058 coding_stream_canonicalize_after_coding (stream); | |
2059 if (NILP (codesys)) | |
2060 return Fget_coding_system (Qidentity); | |
2061 return codesys; | |
2062 } | |
2063 | |
2064 Lisp_Object | |
2065 coding_stream_coding_system (Lstream *stream) | |
2066 { | |
2067 return CODING_STREAM_DATA (stream)->codesys; | |
2068 } | |
2069 | |
2070 /* Change the coding system associated with a stream. */ | |
2071 | |
2072 void | |
2073 set_coding_stream_coding_system (Lstream *lstr, Lisp_Object codesys) | |
2074 { | |
2075 struct coding_stream *str = CODING_STREAM_DATA (lstr); | |
2076 if (EQ (str->orig_codesys, codesys)) | |
2077 return; | |
2078 /* We do the equivalent of closing the stream, destroying it, and | |
2079 reinitializing it. This includes flushing out the data and signalling | |
2080 EOF, if we're a writing stream; we also replace the type-specific data | |
2081 with the data appropriate for the new coding system. */ | |
2082 if (!NILP (str->codesys)) | |
2083 { | |
2084 if (lstr->flags & LSTREAM_FL_WRITE) | |
2085 { | |
2086 Lstream_flush (lstr); | |
2087 str->eof = 1; | |
2088 coding_writer (lstr, 0, 0); | |
2089 str->eof = 0; | |
2090 } | |
2091 MAYBE_XCODESYSMETH (str->codesys, finalize_coding_stream, (str)); | |
2092 } | |
2093 str->orig_codesys = codesys; | |
2094 str->codesys = coding_system_real_canonical (codesys); | |
2095 | |
2096 if (str->data) | |
2097 { | |
4976
16112448d484
Rename xfree(FOO, TYPE) -> xfree(FOO)
Ben Wing <ben@xemacs.org>
parents:
4967
diff
changeset
|
2098 xfree (str->data); |
771 | 2099 str->data = 0; |
428 | 2100 } |
771 | 2101 if (XCODING_SYSTEM_METHODS (str->codesys)->coding_data_size) |
1204 | 2102 { |
2103 str->data = | |
2104 xmalloc_and_zero (XCODING_SYSTEM_METHODS (str->codesys)-> | |
2105 coding_data_size); | |
2106 str->type = XCODING_SYSTEM_METHODS (str->codesys)->enumtype; | |
2107 } | |
771 | 2108 MAYBE_XCODESYSMETH (str->codesys, init_coding_stream, (str)); |
2109 /* The new coding system may have different ideas regarding whether its | |
2110 ends are characters or bytes. */ | |
2111 set_coding_character_mode (lstr); | |
2112 } | |
2113 | |
2114 /* WARNING WARNING WARNING WARNING!!!!! If you open up a coding | |
2115 stream for writing, no automatic code detection will be performed. | |
2116 The reason for this is that automatic code detection requires a | |
2117 seekable input. Things will also fail if you open a coding | |
2118 stream for reading using a non-fully-specified coding system and | |
2119 a non-seekable input stream. */ | |
2120 | |
2121 static Lisp_Object | |
2122 make_coding_stream_1 (Lstream *stream, Lisp_Object codesys, | |
800 | 2123 const char *mode, enum encode_decode direction, |
802 | 2124 int flags) |
771 | 2125 { |
2126 Lstream *lstr = Lstream_new (lstream_coding, mode); | |
2127 struct coding_stream *str = CODING_STREAM_DATA (lstr); | |
2128 | |
2129 codesys = Fget_coding_system (codesys); | |
2130 xzero (*str); | |
2131 str->codesys = Qnil; | |
2132 str->orig_codesys = Qnil; | |
2133 str->us = lstr; | |
2134 str->other_end = stream; | |
2135 str->convert_to = Dynarr_new (unsigned_char); | |
2136 str->convert_from = Dynarr_new (unsigned_char); | |
2137 str->direction = direction; | |
814 | 2138 if (flags & LSTREAM_FL_NO_CLOSE_OTHER) |
802 | 2139 str->no_close_other = 1; |
814 | 2140 if (flags & LSTREAM_FL_READ_ONE_BYTE_AT_A_TIME) |
802 | 2141 str->one_byte_at_a_time = 1; |
814 | 2142 if (!(flags & LSTREAM_FL_NO_INIT_CHAR_MODE_WHEN_READING)) |
2143 str->set_char_mode_on_us_when_reading = 1; | |
802 | 2144 |
771 | 2145 set_coding_stream_coding_system (lstr, codesys); |
793 | 2146 return wrap_lstream (lstr); |
771 | 2147 } |
2148 | |
814 | 2149 /* FLAGS: |
2150 | |
2151 LSTREAM_FL_NO_CLOSE_OTHER | |
2152 Don't close STREAM (the stream at the other end) when this stream is | |
2153 closed. | |
2154 | |
2155 LSTREAM_FL_READ_ONE_BYTE_AT_A_TIME | |
2156 When reading from STREAM, read and process one byte at a time rather | |
2157 than in large chunks. This is for reading from TTY's, so we don't | |
2158 block. #### We should instead create a non-blocking filedesc stream | |
2159 that emulates the behavior as necessary using select(), when the | |
2160 fcntls don't work. (As seems to be the case on Cygwin.) | |
2161 | |
2162 LSTREAM_FL_NO_INIT_CHAR_MODE_WHEN_READING | |
2163 When reading from STREAM, read and process one byte at a time rather | |
2164 than in large chunks. This is for reading from TTY's, so we don't | |
2165 block. #### We should instead create a non-blocking filedesc stream | |
2166 that emulates the behavior as necessary using select(), when the | |
2167 fcntls don't work. (As seems to be the case on Cygwin.) | |
2168 */ | |
771 | 2169 Lisp_Object |
2170 make_coding_input_stream (Lstream *stream, Lisp_Object codesys, | |
802 | 2171 enum encode_decode direction, int flags) |
771 | 2172 { |
800 | 2173 return make_coding_stream_1 (stream, codesys, "r", direction, |
802 | 2174 flags); |
771 | 2175 } |
2176 | |
814 | 2177 /* FLAGS: |
2178 | |
2179 LSTREAM_FL_NO_CLOSE_OTHER | |
2180 Don't close STREAM (the stream at the other end) when this stream is | |
2181 closed. | |
2182 */ | |
771 | 2183 Lisp_Object |
2184 make_coding_output_stream (Lstream *stream, Lisp_Object codesys, | |
802 | 2185 enum encode_decode direction, int flags) |
771 | 2186 { |
800 | 2187 return make_coding_stream_1 (stream, codesys, "w", direction, |
802 | 2188 flags); |
771 | 2189 } |
2190 | |
2191 static Lisp_Object | |
2192 encode_decode_coding_region (Lisp_Object start, Lisp_Object end, | |
2193 Lisp_Object coding_system, Lisp_Object buffer, | |
2194 enum encode_decode direction) | |
2195 { | |
2196 Charbpos b, e; | |
2197 struct buffer *buf = decode_buffer (buffer, 0); | |
2198 Lisp_Object instream = Qnil, to_outstream = Qnil, outstream = Qnil; | |
2199 Lisp_Object from_outstream = Qnil, auto_outstream = Qnil; | |
2200 Lisp_Object lb_outstream = Qnil; | |
2201 Lisp_Object next; | |
2202 Lstream *istr, *ostr; | |
2203 struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5; | |
2204 struct gcpro ngcpro1; | |
2205 int source_char, sink_char; | |
2206 | |
2207 get_buffer_range_char (buf, start, end, &b, &e, 0); | |
2208 barf_if_buffer_read_only (buf, b, e); | |
2209 | |
2210 GCPRO5 (instream, to_outstream, outstream, from_outstream, lb_outstream); | |
2211 NGCPRO1 (auto_outstream); | |
2212 | |
2213 coding_system = Fget_coding_system (coding_system); | |
2214 source_char = encode_decode_source_sink_type_is_char (coding_system, | |
2215 CODING_SOURCE, | |
2216 direction); | |
2217 sink_char = encode_decode_source_sink_type_is_char (coding_system, | |
2218 CODING_SINK, | |
2219 direction); | |
2220 | |
2221 /* Order is IN <---> [TO] -> OUT -> [FROM] -> [AUTODETECT-EOL] -> LB */ | |
2222 instream = make_lisp_buffer_input_stream (buf, b, e, 0); | |
2223 next = lb_outstream = make_lisp_buffer_output_stream (buf, b, 0); | |
2224 | |
2225 if (direction == CODING_DECODE && | |
2226 XCODING_SYSTEM_EOL_TYPE (coding_system) == EOL_AUTODETECT) | |
2227 next = auto_outstream = | |
2228 make_coding_output_stream | |
800 | 2229 (XLSTREAM (next), Fget_coding_system (Qconvert_eol_autodetect), |
2230 CODING_DECODE, 0); | |
771 | 2231 |
2232 if (!sink_char) | |
2233 next = from_outstream = | |
800 | 2234 make_coding_output_stream (XLSTREAM (next), Qbinary, CODING_DECODE, 0); |
771 | 2235 outstream = make_coding_output_stream (XLSTREAM (next), coding_system, |
800 | 2236 direction, 0); |
771 | 2237 if (!source_char) |
428 | 2238 { |
771 | 2239 to_outstream = |
2240 make_coding_output_stream (XLSTREAM (outstream), | |
800 | 2241 Qbinary, CODING_ENCODE, 0); |
771 | 2242 ostr = XLSTREAM (to_outstream); |
2243 } | |
2244 else | |
2245 ostr = XLSTREAM (outstream); | |
2246 istr = XLSTREAM (instream); | |
2247 | |
2248 /* The chain of streams looks like this: | |
2249 | |
2297 | 2250 [BUFFER] <----- (( read from/send to loop )) |
771 | 2251 ------> [CHAR->BYTE i.e. ENCODE AS BINARY if source is |
2252 in bytes] | |
2253 ------> [ENCODE/DECODE AS SPECIFIED] | |
2254 ------> [BYTE->CHAR i.e. DECODE AS BINARY | |
2255 if sink is in bytes] | |
2256 ------> [AUTODETECT EOL if | |
2257 we're decoding and | |
2258 coding system calls | |
2259 for this] | |
2260 ------> [BUFFER] | |
2261 */ | |
2367 | 2262 |
2263 /* #### See comment | |
2264 | |
2265 EFFICIENCY OF CODING CONVERSION WITH MULTIPLE COPIES/CHAINS | |
2266 | |
2267 in text.c. | |
2268 */ | |
2269 | |
771 | 2270 while (1) |
2271 { | |
2272 char tempbuf[1024]; /* some random amount */ | |
2273 Charbpos newpos, even_newer_pos; | |
2274 Charbpos oldpos = lisp_buffer_stream_startpos (istr); | |
2275 Bytecount size_in_bytes = | |
2276 Lstream_read (istr, tempbuf, sizeof (tempbuf)); | |
2277 | |
2278 if (!size_in_bytes) | |
2279 break; | |
2280 newpos = lisp_buffer_stream_startpos (istr); | |
2281 Lstream_write (ostr, tempbuf, size_in_bytes); | |
2282 even_newer_pos = lisp_buffer_stream_startpos (istr); | |
2283 buffer_delete_range (buf, even_newer_pos - (newpos - oldpos), | |
2284 even_newer_pos, 0); | |
428 | 2285 } |
771 | 2286 |
2287 { | |
2288 Charcount retlen = | |
2289 lisp_buffer_stream_startpos (XLSTREAM (instream)) - b; | |
2290 Lstream_close (istr); | |
2291 Lstream_close (ostr); | |
2292 NUNGCPRO; | |
2293 UNGCPRO; | |
2294 Lstream_delete (istr); | |
2295 if (!NILP (from_outstream)) | |
2296 Lstream_delete (XLSTREAM (from_outstream)); | |
2297 Lstream_delete (XLSTREAM (outstream)); | |
2298 if (!NILP (to_outstream)) | |
2299 Lstream_delete (XLSTREAM (to_outstream)); | |
2300 if (!NILP (auto_outstream)) | |
2301 Lstream_delete (XLSTREAM (auto_outstream)); | |
2302 Lstream_delete (XLSTREAM (lb_outstream)); | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5474
diff
changeset
|
2303 return make_fixnum (retlen); |
771 | 2304 } |
2305 } | |
2306 | |
3302 | 2307 DEFUN ("decode-coding-region", Fdecode_coding_region, 3, 4, |
2308 "*r\nzDecode from coding system: \ni", /* | |
771 | 2309 Decode the text between START and END which is encoded in CODING-SYSTEM. |
2310 This is useful if you've read in encoded text from a file without decoding | |
2311 it (e.g. you read in a JIS-formatted file but used the `binary' or | |
2312 `no-conversion' coding system, so that it shows up as "^[$B!<!+^[(B"). | |
2313 Return length of decoded text. | |
3302 | 2314 BUFFER defaults to the current buffer if unspecified, and when interactive. |
771 | 2315 */ |
2316 (start, end, coding_system, buffer)) | |
2317 { | |
2318 return encode_decode_coding_region (start, end, coding_system, buffer, | |
2319 CODING_DECODE); | |
2320 } | |
2321 | |
3302 | 2322 DEFUN ("encode-coding-region", Fencode_coding_region, 3, 4, |
2323 "*r\nzEncode to coding system: \ni", /* | |
771 | 2324 Encode the text between START and END using CODING-SYSTEM. |
2325 This will, for example, convert Japanese characters into stuff such as | |
3302 | 2326 "^[$B!<!+^[(B" if you use the JIS encoding. Return length of encoded text. |
2327 BUFFER defaults to the current buffer if unspecified, and when interactive. | |
771 | 2328 */ |
2329 (start, end, coding_system, buffer)) | |
2330 { | |
2331 return encode_decode_coding_region (start, end, coding_system, buffer, | |
2332 CODING_ENCODE); | |
428 | 2333 } |
2334 | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2335 DEFUN ("query-coding-region", Fquery_coding_region, 3, 7, 0, /* |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2336 Work out whether CODING-SYSTEM can losslessly encode a region. |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2337 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2338 START and END are the beginning and end of the region to check. |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2339 CODING-SYSTEM is the coding system to try. |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2340 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2341 Optional argument BUFFER is the buffer to check, and defaults to the current |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2342 buffer. |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2343 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2344 IGNORE-INVALID-SEQUENCESP, also an optional argument, says to treat XEmacs |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2345 characters which have an unambiguous encoded representation, despite being |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2346 undefined in what they represent, as encodable. These chiefly arise with |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2347 variable-length encodings like UTF-8 and UTF-16, where an invalid sequence |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2348 is passed through to XEmacs as a sequence of characters with a defined |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2349 correspondence to the octets on disk, but no non-error semantics; see the |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2350 `invalid-sequence-coding-system' argument to `set-language-info'. |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2351 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2352 They can also arise with fixed-length encodings like ISO 8859-7, where |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2353 certain octets on disk have undefined values, and treating them as |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2354 corresponding to the ISO 8859-1 characters with the same numerical values |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2355 may lead to data that is not understood by other applications. |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2356 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2357 Optional argument ERRORP says to signal a `text-conversion-error' if some |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2358 character in the region cannot be encoded, and defaults to nil. |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2359 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2360 Optional argument HIGHLIGHT says to display unencodable characters in the |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2361 region using `query-coding-warning-face'. It defaults to nil. |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2362 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2363 This function can return multiple values; the intention is that callers use |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2364 `multiple-value-bind' or the related CL multiple value functions to deal |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2365 with it. The first result is `t' if the region can be encoded using |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2366 CODING-SYSTEM, or `nil' if not. If the region cannot be encoded using |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2367 CODING-SYSTEM, the second result is a range table describing the positions |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2368 of the unencodable characters. |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2369 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2370 Ranges that describe characters that would be ignored were |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2371 IGNORE-INVALID-SEQUENCESP non-nil map to the symbol `invalid-sequence'; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2372 other ranges map to the symbol `unencodable'. If IGNORE-INVALID-SEQUENCESP |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2373 is non-nil, all ranges will map to the symbol `unencodable'. See |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2374 `make-range-table' for more details of range tables. |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2375 */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2376 (start, end, coding_system, buffer, ignore_invalid_sequencesp, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2377 errorp, highlight)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2378 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2379 Charbpos b, e; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2380 struct buffer *buf = decode_buffer (buffer, 1); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2381 Lisp_Object result; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2382 int flags = 0, speccount = specpdl_depth (); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2383 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2384 coding_system = Fget_coding_system (coding_system); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2385 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2386 get_buffer_range_char (buf, start, end, &b, &e, 0); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2387 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2388 if (buf != current_buffer) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2389 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2390 record_unwind_protect (save_current_buffer_restore, Fcurrent_buffer ()); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2391 set_buffer_internal (buf); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2392 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2393 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2394 record_unwind_protect (save_excursion_restore, save_excursion_save ()); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2395 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2396 BUF_SET_PT (buf, b); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2397 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2398 if (!NILP (ignore_invalid_sequencesp)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2399 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2400 flags |= QUERY_METHOD_IGNORE_INVALID_SEQUENCES; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2401 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2402 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2403 if (!NILP (errorp)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2404 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2405 flags |= QUERY_METHOD_ERRORP; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2406 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2407 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2408 if (!NILP (highlight)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2409 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2410 flags |= QUERY_METHOD_HIGHLIGHT; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2411 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2412 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2413 result = XCODESYSMETH_OR_GIVEN (coding_system, query, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2414 (coding_system, buf, e, flags), Qunbound); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2415 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2416 if (UNBOUNDP (result)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2417 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2418 signal_error (Qtext_conversion_error, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2419 "Coding system doesn't say what it can encode", |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2420 XCODING_SYSTEM_NAME (coding_system)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2421 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2422 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2423 result = (NILP (result)) ? Qt : values2 (Qnil, result); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2424 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2425 return unbind_to_1 (speccount, result); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2426 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2427 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
2428 |
428 | 2429 |
2430 /************************************************************************/ | |
771 | 2431 /* Chain methods */ |
428 | 2432 /************************************************************************/ |
2433 | |
771 | 2434 /* #### Need a way to create "opposite-direction" coding systems. */ |
2435 | |
2436 /* Chain two or more coding systems together to make a combination coding | |
2437 system. */ | |
2438 | |
2439 struct chain_coding_system | |
2440 { | |
2441 /* List of coding systems, in decode order */ | |
2442 Lisp_Object *chain; | |
2443 /* Number of coding systems in list */ | |
2444 int count; | |
2445 /* Coding system to return as a result of canonicalize-after-coding */ | |
2446 Lisp_Object canonicalize_after_coding; | |
2447 }; | |
2448 | |
2449 struct chain_coding_stream | |
2450 { | |
2451 int initted; | |
2452 /* Lstreams for chain coding system */ | |
2453 Lisp_Object *lstreams; | |
2454 int lstream_count; | |
2455 }; | |
2456 | |
1204 | 2457 static const struct memory_description chain_coding_system_description[] = { |
2458 { XD_INT, offsetof (struct chain_coding_system, count) }, | |
2367 | 2459 { XD_BLOCK_PTR, offsetof (struct chain_coding_system, chain), |
2551 | 2460 XD_INDIRECT (0, 0), { &lisp_object_description } }, |
1204 | 2461 { XD_LISP_OBJECT, offsetof (struct chain_coding_system, |
2462 canonicalize_after_coding) }, | |
771 | 2463 { XD_END } |
2464 }; | |
2465 | |
1204 | 2466 static const struct memory_description chain_coding_stream_description_1 [] = { |
2467 { XD_INT, offsetof (struct chain_coding_stream, lstream_count) }, | |
2367 | 2468 { XD_BLOCK_PTR, offsetof (struct chain_coding_stream, lstreams), |
2551 | 2469 XD_INDIRECT (0, 0), { &lisp_object_description } }, |
771 | 2470 { XD_END } |
2471 }; | |
2472 | |
1204 | 2473 const struct sized_memory_description chain_coding_stream_description = { |
2474 sizeof (struct chain_coding_stream), chain_coding_stream_description_1 | |
2475 }; | |
2476 | |
2477 DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (chain); | |
2478 | |
771 | 2479 static Lisp_Object |
2480 chain_canonicalize (Lisp_Object codesys) | |
2481 { | |
2482 /* We make use of the fact that this method is called at init time, after | |
2483 properties have been parsed. init_method is called too early. */ | |
2484 /* #### It's not clear we need this whole chain-canonicalize mechanism | |
2485 any more. */ | |
2486 Lisp_Object chain = Flist (XCODING_SYSTEM_CHAIN_COUNT (codesys), | |
2487 XCODING_SYSTEM_CHAIN_CHAIN (codesys)); | |
2488 chain = Fcons (XCODING_SYSTEM_PRE_WRITE_CONVERSION (codesys), | |
2489 Fcons (XCODING_SYSTEM_POST_READ_CONVERSION (codesys), | |
2490 chain)); | |
2491 Fputhash (chain, codesys, Vchain_canonicalize_hash_table); | |
2492 return codesys; | |
2493 } | |
2494 | |
2495 static Lisp_Object | |
2496 chain_canonicalize_after_coding (struct coding_stream *str) | |
2497 { | |
2498 Lisp_Object cac = | |
2499 XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (str->codesys); | |
2500 if (!NILP (cac)) | |
2501 return cac; | |
2502 return str->codesys; | |
2503 #if 0 | |
2504 struct chain_coding_stream *data = CODING_STREAM_TYPE_DATA (str, chain); | |
2505 Lisp_Object us = str->codesys, codesys; | |
2506 int i; | |
2507 Lisp_Object chain; | |
2508 Lisp_Object tail; | |
2509 int changed = 0; | |
2510 | |
2511 /* #### It's not clear we need this whole chain-canonicalize mechanism | |
2512 any more. */ | |
2513 if (str->direction == CODING_ENCODE || !data->initted) | |
2514 return us; | |
2515 | |
2516 chain = Flist (XCODING_SYSTEM_CHAIN_COUNT (us), | |
2517 XCODING_SYSTEM_CHAIN_CHAIN (us)); | |
2518 | |
2519 tail = chain; | |
2520 for (i = 0; i < XCODING_SYSTEM_CHAIN_COUNT (us); i++) | |
2521 { | |
2522 codesys = (coding_stream_canonicalize_after_coding | |
2523 (XLSTREAM (data->lstreams[i]))); | |
2524 if (!EQ (codesys, XCAR (tail))) | |
2525 changed = 1; | |
2526 XCAR (tail) = codesys; | |
2527 tail = XCDR (tail); | |
2528 } | |
2529 | |
2530 if (!changed) | |
2531 return us; | |
2532 | |
2533 chain = delq_no_quit (Qnil, chain); | |
2534 | |
2535 if (NILP (XCODING_SYSTEM_PRE_WRITE_CONVERSION (us)) && | |
2536 NILP (XCODING_SYSTEM_POST_READ_CONVERSION (us))) | |
2537 { | |
2538 if (NILP (chain)) | |
2539 return Qnil; | |
2540 if (NILP (XCDR (chain))) | |
2541 return XCAR (chain); | |
2542 } | |
2543 | |
2544 codesys = Fgethash (Fcons (XCODING_SYSTEM_PRE_WRITE_CONVERSION (us), | |
2545 Fcons (XCODING_SYSTEM_POST_READ_CONVERSION (us), | |
2546 chain)), Vchain_canonicalize_hash_table, | |
2547 Qnil); | |
2548 if (!NILP (codesys)) | |
2549 return codesys; | |
2550 return make_internal_coding_system | |
2551 (us, "internal-chain-canonicalizer-wrapper", | |
2552 Qchain, Qunbound, list2 (Qchain, chain)); | |
2553 #endif /* 0 */ | |
2554 } | |
2555 | |
2556 static void | |
2557 chain_init (Lisp_Object codesys) | |
2558 { | |
2559 XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (codesys) = Qnil; | |
2560 } | |
2561 | |
2562 static void | |
2563 chain_mark (Lisp_Object codesys) | |
2564 { | |
2565 int i; | |
2566 | |
2567 for (i = 0; i < XCODING_SYSTEM_CHAIN_COUNT (codesys); i++) | |
2568 mark_object (XCODING_SYSTEM_CHAIN_CHAIN (codesys)[i]); | |
2569 mark_object (XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (codesys)); | |
2570 } | |
2571 | |
2572 static void | |
2573 chain_mark_coding_stream_1 (struct chain_coding_stream *data) | |
2574 { | |
2575 int i; | |
2576 | |
2577 for (i = 0; i < data->lstream_count; i++) | |
2578 mark_object (data->lstreams[i]); | |
2579 } | |
2580 | |
2581 static void | |
2582 chain_mark_coding_stream (struct coding_stream *str) | |
2583 { | |
2584 chain_mark_coding_stream_1 (CODING_STREAM_TYPE_DATA (str, chain)); | |
2585 } | |
2586 | |
2587 static void | |
2588 chain_print (Lisp_Object cs, Lisp_Object printcharfun, int escapeflag) | |
2589 { | |
2590 int i; | |
2591 | |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
2592 write_ascstring (printcharfun, "("); |
771 | 2593 for (i = 0; i < XCODING_SYSTEM_CHAIN_COUNT (cs); i++) |
2594 { | |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
2595 write_ascstring (printcharfun, i == 0 ? "" : "->"); |
771 | 2596 print_coding_system_in_print_method (XCODING_SYSTEM_CHAIN_CHAIN (cs)[i], |
2597 printcharfun, escapeflag); | |
2598 } | |
2599 { | |
2600 Lisp_Object cac = XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (cs); | |
2601 if (!NILP (cac)) | |
2602 { | |
2603 if (i > 0) | |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
2604 write_ascstring (printcharfun, " "); |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
2605 write_ascstring (printcharfun, "canonicalize-after-coding="); |
771 | 2606 print_coding_system_in_print_method (cac, printcharfun, escapeflag); |
2607 } | |
2608 } | |
2609 | |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
2610 write_ascstring (printcharfun, ")"); |
771 | 2611 } |
2612 | |
2613 static void | |
2614 chain_rewind_coding_stream_1 (struct chain_coding_stream *data) | |
2615 { | |
2616 /* Each will rewind the next; there is always at least one stream (the | |
2617 dynarr stream at the end) if we're initted */ | |
2618 if (data->initted) | |
2619 Lstream_rewind (XLSTREAM (data->lstreams[0])); | |
2620 } | |
2621 | |
2622 static void | |
2623 chain_rewind_coding_stream (struct coding_stream *str) | |
2624 { | |
2625 chain_rewind_coding_stream_1 (CODING_STREAM_TYPE_DATA (str, chain)); | |
2626 } | |
2627 | |
2628 static void | |
2629 chain_init_coding_streams_1 (struct chain_coding_stream *data, | |
2630 unsigned_char_dynarr *dst, | |
2631 int ncodesys, Lisp_Object *codesys, | |
2632 enum encode_decode direction) | |
2633 { | |
2634 int i; | |
2635 Lisp_Object lstream_out; | |
2636 | |
2637 data->lstream_count = ncodesys + 1; | |
2638 data->lstreams = xnew_array (Lisp_Object, data->lstream_count); | |
2639 | |
2640 lstream_out = make_dynarr_output_stream (dst); | |
2641 Lstream_set_buffering (XLSTREAM (lstream_out), LSTREAM_UNBUFFERED, 0); | |
2642 data->lstreams[data->lstream_count - 1] = lstream_out; | |
2643 | |
2644 for (i = ncodesys - 1; i >= 0; i--) | |
2645 { | |
2646 data->lstreams[i] = | |
2647 make_coding_output_stream | |
2648 (XLSTREAM (lstream_out), | |
2649 codesys[direction == CODING_ENCODE ? ncodesys - (i + 1) : i], | |
800 | 2650 direction, 0); |
771 | 2651 lstream_out = data->lstreams[i]; |
2652 Lstream_set_buffering (XLSTREAM (lstream_out), LSTREAM_UNBUFFERED, | |
2653 0); | |
2654 } | |
2655 data->initted = 1; | |
2656 } | |
2657 | |
2658 static Bytecount | |
2659 chain_convert (struct coding_stream *str, const UExtbyte *src, | |
2660 unsigned_char_dynarr *dst, Bytecount n) | |
2661 { | |
2662 struct chain_coding_stream *data = CODING_STREAM_TYPE_DATA (str, chain); | |
2663 | |
2664 if (str->eof) | |
2665 { | |
2666 /* Each will close the next; there is always at least one stream (the | |
2667 dynarr stream at the end) if we're initted. We need to close now | |
2668 because more data may be generated. */ | |
2669 if (data->initted) | |
2670 Lstream_close (XLSTREAM (data->lstreams[0])); | |
2671 return n; | |
2672 } | |
2673 | |
2674 if (!data->initted) | |
2675 chain_init_coding_streams_1 | |
2676 (data, dst, XCODING_SYSTEM_CHAIN_COUNT (str->codesys), | |
2677 XCODING_SYSTEM_CHAIN_CHAIN (str->codesys), str->direction); | |
2678 | |
2679 if (Lstream_write (XLSTREAM (data->lstreams[0]), src, n) < 0) | |
2680 return -1; | |
2681 return n; | |
2682 } | |
2683 | |
2684 static void | |
2685 chain_finalize_coding_stream_1 (struct chain_coding_stream *data) | |
2686 { | |
2687 if (data->lstreams) | |
2688 { | |
2297 | 2689 /* During GC, these objects are unmarked, and are about to be freed. |
2690 We do NOT want them on the free list, and that will cause lots of | |
2691 nastiness including crashes. Just let them be freed normally. */ | |
771 | 2692 if (!gc_in_progress) |
2693 { | |
2694 int i; | |
2297 | 2695 /* Order of deletion is important here! Delete from the head of |
2696 the chain and work your way towards the tail. In general, | |
2697 when you delete an object, there should be *NO* pointers to it | |
2698 anywhere. Deleting back-to-front would be a problem because | |
2699 there are pointers going forward. If there were pointers in | |
2700 both directions, you'd have to disconnect the pointers to a | |
2701 particular object before deleting it. */ | |
771 | 2702 for (i = 0; i < data->lstream_count; i++) |
2703 Lstream_delete (XLSTREAM ((data->lstreams)[i])); | |
2704 } | |
4976
16112448d484
Rename xfree(FOO, TYPE) -> xfree(FOO)
Ben Wing <ben@xemacs.org>
parents:
4967
diff
changeset
|
2705 xfree (data->lstreams); |
5169
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5142
diff
changeset
|
2706 data->lstreams = 0; |
771 | 2707 } |
2708 } | |
2709 | |
2710 static void | |
2711 chain_finalize_coding_stream (struct coding_stream *str) | |
2712 { | |
2713 chain_finalize_coding_stream_1 (CODING_STREAM_TYPE_DATA (str, chain)); | |
2714 } | |
2715 | |
2716 static void | |
2717 chain_finalize (Lisp_Object c) | |
2718 { | |
2719 if (XCODING_SYSTEM_CHAIN_CHAIN (c)) | |
4976
16112448d484
Rename xfree(FOO, TYPE) -> xfree(FOO)
Ben Wing <ben@xemacs.org>
parents:
4967
diff
changeset
|
2720 xfree (XCODING_SYSTEM_CHAIN_CHAIN (c)); |
771 | 2721 } |
2722 | |
428 | 2723 static int |
771 | 2724 chain_putprop (Lisp_Object codesys, Lisp_Object key, Lisp_Object value) |
2725 { | |
2726 if (EQ (key, Qchain)) | |
2727 { | |
2728 Lisp_Object *cslist; | |
2729 int count = 0; | |
2730 int i; | |
2731 | |
2367 | 2732 { |
2733 EXTERNAL_LIST_LOOP_2 (elt, value) | |
2734 { | |
2735 Fget_coding_system (elt); | |
2736 count++; | |
2737 } | |
2738 } | |
771 | 2739 |
2740 cslist = xnew_array (Lisp_Object, count); | |
2741 XCODING_SYSTEM_CHAIN_CHAIN (codesys) = cslist; | |
2742 | |
2743 count = 0; | |
2367 | 2744 { |
2745 EXTERNAL_LIST_LOOP_2 (elt, value) | |
2746 { | |
2747 cslist[count] = Fget_coding_system (elt); | |
2748 count++; | |
2749 } | |
2750 } | |
771 | 2751 |
2752 XCODING_SYSTEM_CHAIN_COUNT (codesys) = count; | |
2753 | |
2754 for (i = 0; i < count - 1; i++) | |
2755 { | |
2756 if (decoding_source_sink_type_is_char (cslist[i], CODING_SINK) != | |
2757 decoding_source_sink_type_is_char (cslist[i + 1], CODING_SOURCE)) | |
2758 invalid_argument_2 ("Sink of first must match source of second", | |
2759 cslist[i], cslist[i + 1]); | |
2760 } | |
2761 } | |
2762 else if (EQ (key, Qcanonicalize_after_coding)) | |
2763 XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (codesys) = | |
2764 Fget_coding_system (value); | |
2765 else | |
2766 return 0; | |
2767 return 1; | |
2768 } | |
2769 | |
2770 static Lisp_Object | |
2771 chain_getprop (Lisp_Object coding_system, Lisp_Object prop) | |
2772 { | |
2773 if (EQ (prop, Qchain)) | |
2774 { | |
2775 Lisp_Object result = Qnil; | |
2776 int i; | |
2777 | |
2778 for (i = 0; i < XCODING_SYSTEM_CHAIN_COUNT (coding_system); i++) | |
2779 result = Fcons (XCODING_SYSTEM_CHAIN_CHAIN (coding_system)[i], | |
2780 result); | |
2781 | |
2782 return Fnreverse (result); | |
2783 } | |
2784 else if (EQ (prop, Qcanonicalize_after_coding)) | |
2785 return XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (coding_system); | |
2786 else | |
2787 return Qunbound; | |
2788 } | |
2789 | |
2790 static enum source_sink_type | |
2791 chain_conversion_end_type (Lisp_Object codesys) | |
2792 { | |
2793 Lisp_Object *cslist = XCODING_SYSTEM_CHAIN_CHAIN (codesys); | |
2794 int n = XCODING_SYSTEM_CHAIN_COUNT (codesys); | |
2795 int charp_source, charp_sink; | |
2796 | |
2797 if (n == 0) | |
2798 return DECODES_BYTE_TO_BYTE; /* arbitrary */ | |
2799 charp_source = decoding_source_sink_type_is_char (cslist[0], CODING_SOURCE); | |
2800 charp_sink = decoding_source_sink_type_is_char (cslist[n - 1], CODING_SINK); | |
2801 | |
2802 switch (charp_source * 2 + charp_sink) | |
2803 { | |
2804 case 0: return DECODES_BYTE_TO_BYTE; | |
2805 case 1: return DECODES_BYTE_TO_CHARACTER; | |
2806 case 2: return DECODES_CHARACTER_TO_BYTE; | |
2807 case 3: return DECODES_CHARACTER_TO_CHARACTER; | |
2808 } | |
2809 | |
2500 | 2810 ABORT (); |
771 | 2811 return DECODES_BYTE_TO_BYTE; |
2812 } | |
2813 | |
2814 | |
2815 /************************************************************************/ | |
2816 /* No-conversion methods */ | |
2817 /************************************************************************/ | |
2818 | |
2819 /* "No conversion"; used for binary files. We use quotes because there | |
2820 really is some conversion being applied (it does byte<->char | |
2821 conversion), but it appears to the user as if the text is read in | |
2297 | 2822 without conversion. |
2823 | |
2824 #### Shouldn't we _call_ it that, then? And while we're at it, | |
2825 separate it into "to_internal" and "to_external"? */ | |
771 | 2826 DEFINE_CODING_SYSTEM_TYPE (no_conversion); |
2827 | |
2828 /* This is used when reading in "binary" files -- i.e. files that may | |
2829 contain all 256 possible byte values and that are not to be | |
2830 interpreted as being in any particular encoding. */ | |
2831 static Bytecount | |
2832 no_conversion_convert (struct coding_stream *str, | |
2833 const UExtbyte *src, | |
2834 unsigned_char_dynarr *dst, Bytecount n) | |
2835 { | |
2836 UExtbyte c; | |
2837 unsigned int ch = str->ch; | |
2838 Bytecount orign = n; | |
2839 | |
2840 if (str->direction == CODING_DECODE) | |
2841 { | |
2842 while (n--) | |
2843 { | |
2844 c = *src++; | |
2845 | |
2846 DECODE_ADD_BINARY_CHAR (c, dst); | |
2847 } | |
2848 | |
2849 if (str->eof) | |
2850 DECODE_OUTPUT_PARTIAL_CHAR (ch, dst); | |
2851 } | |
2852 else | |
2853 { | |
2854 | |
2855 while (n--) | |
2856 { | |
2857 c = *src++; | |
826 | 2858 if (byte_ascii_p (c)) |
771 | 2859 { |
2860 assert (ch == 0); | |
2861 Dynarr_add (dst, c); | |
2862 } | |
2863 #ifdef MULE | |
867 | 2864 else if (ibyte_leading_byte_p (c)) |
771 | 2865 { |
2866 assert (ch == 0); | |
2867 if (c == LEADING_BYTE_LATIN_ISO8859_1 || | |
2868 c == LEADING_BYTE_CONTROL_1) | |
2869 ch = c; | |
2870 else | |
2297 | 2871 /* #### This is just plain unacceptable. */ |
771 | 2872 Dynarr_add (dst, '~'); /* untranslatable character */ |
2873 } | |
2874 else | |
2875 { | |
2876 if (ch == LEADING_BYTE_LATIN_ISO8859_1) | |
2877 Dynarr_add (dst, c); | |
2878 else if (ch == LEADING_BYTE_CONTROL_1) | |
2879 { | |
2880 assert (c < 0xC0); | |
2881 Dynarr_add (dst, c - 0x20); | |
2882 } | |
2883 /* else it should be the second or third byte of an | |
2884 untranslatable character, so ignore it */ | |
2885 ch = 0; | |
2886 } | |
2887 #endif /* MULE */ | |
2888 | |
2889 } | |
2890 } | |
2891 | |
2892 str->ch = ch; | |
2893 return orign; | |
2894 } | |
2895 | |
2896 DEFINE_DETECTOR (no_conversion); | |
2897 DEFINE_DETECTOR_CATEGORY (no_conversion, no_conversion); | |
2898 | |
2899 struct no_conversion_detector | |
2900 { | |
2901 int dummy; | |
2902 }; | |
2903 | |
2904 static void | |
2286 | 2905 no_conversion_detect (struct detection_state *st, const UExtbyte *UNUSED (src), |
2906 Bytecount UNUSED (n)) | |
771 | 2907 { |
2908 /* Hack until we get better handling of this stuff! */ | |
2909 DET_RESULT (st, no_conversion) = DET_SLIGHTLY_LIKELY; | |
2910 } | |
2911 | |
2912 | |
2913 /************************************************************************/ | |
2914 /* Convert-eol methods */ | |
2915 /************************************************************************/ | |
2916 | |
2917 /* This is used to handle end-of-line (EOL) differences. It is | |
2819 | 2918 character-to-character, and works (when encoding) *BEFORE* sending data to |
2919 the main encoding routine -- thus, that routine must handle different EOL | |
2920 types itself if it does line-oriented type processing. This is unavoidable | |
2921 because we don't know whether the output of the main encoding routine is | |
2922 ASCII compatible (UTF-16 is definitely not, for example). [[ sjt sez this | |
2923 is bogus. There should be _no_ EOL processing (or processing of any kind) | |
2924 after conversion to external. ]] | |
771 | 2925 |
793 | 2926 There is one parameter: `subtype', either `cr', `lf', `crlf', or nil. |
771 | 2927 */ |
2928 | |
2929 struct convert_eol_coding_system | |
2930 { | |
2931 enum eol_type subtype; | |
2132 | 2932 int dummy; /* On some architectures (eg ia64) the portable dumper can |
2933 produce unaligned access errors without this field. Probably | |
2934 because the combined structure of this structure and | |
2935 Lisp_Coding_System is not properly aligned. */ | |
771 | 2936 }; |
2937 | |
2938 #define CODING_SYSTEM_CONVERT_EOL_SUBTYPE(codesys) \ | |
2939 (CODING_SYSTEM_TYPE_DATA (codesys, convert_eol)->subtype) | |
2940 #define XCODING_SYSTEM_CONVERT_EOL_SUBTYPE(codesys) \ | |
2941 (XCODING_SYSTEM_TYPE_DATA (codesys, convert_eol)->subtype) | |
2942 | |
2943 struct convert_eol_coding_stream | |
2944 { | |
2945 enum eol_type actual; | |
2946 }; | |
2947 | |
1204 | 2948 static const struct memory_description |
771 | 2949 convert_eol_coding_system_description[] = { |
2950 { XD_END } | |
2951 }; | |
2952 | |
1204 | 2953 DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (convert_eol); |
2954 | |
771 | 2955 static void |
2286 | 2956 convert_eol_print (Lisp_Object cs, Lisp_Object printcharfun, |
2957 int UNUSED (escapeflag)) | |
771 | 2958 { |
2959 struct convert_eol_coding_system *data = | |
2960 XCODING_SYSTEM_TYPE_DATA (cs, convert_eol); | |
2961 | |
2962 write_fmt_string (printcharfun, "(%s)", | |
2963 data->subtype == EOL_LF ? "lf" : | |
2964 data->subtype == EOL_CRLF ? "crlf" : | |
2965 data->subtype == EOL_CR ? "cr" : | |
793 | 2966 data->subtype == EOL_AUTODETECT ? "nil" : |
2500 | 2967 (ABORT(), "")); |
771 | 2968 } |
2969 | |
2970 static enum source_sink_type | |
2286 | 2971 convert_eol_conversion_end_type (Lisp_Object UNUSED (codesys)) |
771 | 2972 { |
2973 return DECODES_CHARACTER_TO_CHARACTER; | |
2974 } | |
2975 | |
2976 static int | |
2977 convert_eol_putprop (Lisp_Object codesys, | |
2978 Lisp_Object key, | |
2979 Lisp_Object value) | |
2980 { | |
2981 struct convert_eol_coding_system *data = | |
2982 XCODING_SYSTEM_TYPE_DATA (codesys, convert_eol); | |
2983 | |
2984 if (EQ (key, Qsubtype)) | |
2985 { | |
2986 if (EQ (value, Qlf) /* || EQ (value, Qunix) */) | |
2987 data->subtype = EOL_LF; | |
2988 else if (EQ (value, Qcrlf) /* || EQ (value, Qdos) */) | |
2989 data->subtype = EOL_CRLF; | |
2990 else if (EQ (value, Qcr) /* || EQ (value, Qmac) */) | |
2991 data->subtype = EOL_CR; | |
793 | 2992 else if (EQ (value, Qnil)) |
771 | 2993 data->subtype = EOL_AUTODETECT; |
2994 else invalid_constant ("Unrecognized eol type", value); | |
2995 } | |
2996 else | |
2997 return 0; | |
2998 return 1; | |
2999 } | |
3000 | |
3001 static Lisp_Object | |
3002 convert_eol_getprop (Lisp_Object coding_system, Lisp_Object prop) | |
3003 { | |
3004 struct convert_eol_coding_system *data = | |
3005 XCODING_SYSTEM_TYPE_DATA (coding_system, convert_eol); | |
3006 | |
3007 if (EQ (prop, Qsubtype)) | |
3008 { | |
3009 switch (data->subtype) | |
3010 { | |
3011 case EOL_LF: return Qlf; | |
3012 case EOL_CRLF: return Qcrlf; | |
3013 case EOL_CR: return Qcr; | |
793 | 3014 case EOL_AUTODETECT: return Qnil; |
2500 | 3015 default: ABORT (); |
771 | 3016 } |
3017 } | |
3018 | |
3019 return Qunbound; | |
3020 } | |
3021 | |
3022 static void | |
3023 convert_eol_init_coding_stream (struct coding_stream *str) | |
3024 { | |
3025 struct convert_eol_coding_stream *data = | |
3026 CODING_STREAM_TYPE_DATA (str, convert_eol); | |
3027 data->actual = XCODING_SYSTEM_CONVERT_EOL_SUBTYPE (str->codesys); | |
3028 } | |
3029 | |
3030 static Bytecount | |
867 | 3031 convert_eol_convert (struct coding_stream *str, const Ibyte *src, |
771 | 3032 unsigned_char_dynarr *dst, Bytecount n) |
3033 { | |
3034 if (str->direction == CODING_DECODE) | |
3035 { | |
3036 struct convert_eol_coding_stream *data = | |
3037 CODING_STREAM_TYPE_DATA (str, convert_eol); | |
3038 | |
3039 if (data->actual == EOL_AUTODETECT) | |
3040 { | |
3041 Bytecount n2 = n; | |
867 | 3042 const Ibyte *src2 = src; |
771 | 3043 |
3044 for (; n2; n2--) | |
3045 { | |
867 | 3046 Ibyte c = *src2++; |
771 | 3047 if (c == '\n') |
3048 { | |
3049 data->actual = EOL_LF; | |
3050 break; | |
3051 } | |
3052 else if (c == '\r') | |
3053 { | |
3054 if (n2 == 1) | |
3055 { | |
3056 /* If we're seeing a '\r' at the end of the data, then | |
3057 reject the '\r' right now so it doesn't become an | |
3058 issue in the code below -- unless we're at the end of | |
3059 the stream, in which case we can't do that (because | |
3060 then the '\r' will never get written out), and in any | |
3061 case we should be recognizing it at EOL_CR format. */ | |
3062 if (str->eof) | |
3063 data->actual = EOL_CR; | |
3064 else | |
3065 n--; | |
3066 break; | |
3067 } | |
3068 else if (*src2 == '\n') | |
3069 data->actual = EOL_CRLF; | |
3070 else | |
3071 data->actual = EOL_CR; | |
3072 break; | |
3073 } | |
3074 } | |
3075 } | |
3076 | |
3077 /* str->eof is set, the caller reached EOF on the other end and has | |
3078 no new data to give us. The only data we get is the data we | |
3079 rejected from last time. */ | |
3080 if (data->actual == EOL_LF || data->actual == EOL_AUTODETECT || | |
3081 (str->eof)) | |
3082 Dynarr_add_many (dst, src, n); | |
3083 else | |
3084 { | |
867 | 3085 const Ibyte *end = src + n; |
771 | 3086 while (1) |
3087 { | |
3088 /* Find the next section with no \r and add it. */ | |
867 | 3089 const Ibyte *runstart = src; |
3090 src = (Ibyte *) memchr (src, '\r', end - src); | |
771 | 3091 if (!src) |
3092 src = end; | |
3093 Dynarr_add_many (dst, runstart, src - runstart); | |
3094 /* Stop if at end ... */ | |
3095 if (src == end) | |
3096 break; | |
3097 /* ... else, translate as necessary. */ | |
3098 src++; | |
3099 if (data->actual == EOL_CR) | |
3100 Dynarr_add (dst, '\n'); | |
3101 /* We need to be careful here with CRLF. If we see a CR at the | |
3102 end of the data, we don't know if it's part of a CRLF, so we | |
3103 reject it. Otherwise: If it's part of a CRLF, eat it and | |
3104 loop; the following LF gets added next time around. If it's | |
3105 not part of a CRLF, add the CR and loop. The following | |
3106 character will be processed in the next loop iteration. This | |
3107 correctly handles a sequence like CR+CR+LF. */ | |
3108 else if (src == end) | |
3109 return n - 1; /* reject the CR at the end; we'll get it again | |
3110 next time the convert method is called */ | |
3111 else if (*src != '\n') | |
3112 Dynarr_add (dst, '\r'); | |
3113 } | |
3114 } | |
3115 | |
3116 return n; | |
3117 } | |
3118 else | |
3119 { | |
3120 enum eol_type subtype = | |
3121 XCODING_SYSTEM_CONVERT_EOL_SUBTYPE (str->codesys); | |
867 | 3122 const Ibyte *end = src + n; |
771 | 3123 |
3124 /* We try to be relatively efficient here. */ | |
3125 if (subtype == EOL_LF) | |
3126 Dynarr_add_many (dst, src, n); | |
3127 else | |
3128 { | |
3129 while (1) | |
3130 { | |
3131 /* Find the next section with no \n and add it. */ | |
867 | 3132 const Ibyte *runstart = src; |
3133 src = (Ibyte *) memchr (src, '\n', end - src); | |
771 | 3134 if (!src) |
3135 src = end; | |
3136 Dynarr_add_many (dst, runstart, src - runstart); | |
3137 /* Stop if at end ... */ | |
3138 if (src == end) | |
3139 break; | |
3140 /* ... else, skip over \n and add its translation. */ | |
3141 src++; | |
3142 Dynarr_add (dst, '\r'); | |
3143 if (subtype == EOL_CRLF) | |
3144 Dynarr_add (dst, '\n'); | |
3145 } | |
3146 } | |
3147 | |
3148 return n; | |
3149 } | |
3150 } | |
3151 | |
3152 static Lisp_Object | |
3153 convert_eol_canonicalize_after_coding (struct coding_stream *str) | |
3154 { | |
3155 struct convert_eol_coding_stream *data = | |
3156 CODING_STREAM_TYPE_DATA (str, convert_eol); | |
3157 | |
3158 if (str->direction == CODING_ENCODE) | |
3159 return str->codesys; | |
3160 | |
3161 switch (data->actual) | |
3162 { | |
3163 case EOL_LF: return Fget_coding_system (Qconvert_eol_lf); | |
3164 case EOL_CRLF: return Fget_coding_system (Qconvert_eol_crlf); | |
3165 case EOL_CR: return Fget_coding_system (Qconvert_eol_cr); | |
3166 case EOL_AUTODETECT: return str->codesys; | |
2500 | 3167 default: ABORT (); return Qnil; |
771 | 3168 } |
3169 } | |
3170 | |
3171 | |
3172 /************************************************************************/ | |
3173 /* Undecided methods */ | |
3174 /************************************************************************/ | |
3175 | |
3176 /* Do autodetection. We can autodetect the EOL type only, the coding | |
3177 system only, or both. We only do autodetection when decoding; when | |
3178 encoding, we just pass the data through. | |
3179 | |
3180 When doing just EOL detection, a coding system can be specified; if so, | |
3181 we will decode this data through the coding system before doing EOL | |
3182 detection. The reason for specifying this is so that | |
3183 canonicalize-after-coding works: We will canonicalize the specified | |
3184 coding system into the appropriate EOL type. When doing both coding and | |
3185 EOL detection, we do similar canonicalization, and also catch situations | |
3186 where the EOL type is overspecified, i.e. the detected coding system | |
3187 specifies an EOL type, and either switch to the equivalent | |
3188 non-EOL-processing coding system (if possible), or terminate EOL | |
3189 detection and use the specified EOL type. This prevents data from being | |
3190 EOL-processed twice. | |
3191 */ | |
3192 | |
3193 struct undecided_coding_system | |
3194 { | |
3195 int do_eol, do_coding; | |
3196 Lisp_Object cs; | |
3197 }; | |
3198 | |
3199 struct undecided_coding_stream | |
3200 { | |
3201 Lisp_Object actual; | |
3202 /* Either 2 or 3 lstreams here; see undecided_convert */ | |
3203 struct chain_coding_stream c; | |
3204 | |
3205 struct detection_state *st; | |
3206 }; | |
3207 | |
1204 | 3208 static const struct memory_description undecided_coding_system_description[] = { |
3209 { XD_LISP_OBJECT, offsetof (struct undecided_coding_system, cs) }, | |
771 | 3210 { XD_END } |
3211 }; | |
3212 | |
1204 | 3213 static const struct memory_description undecided_coding_stream_description_1 [] = { |
3214 { XD_LISP_OBJECT, offsetof (struct undecided_coding_stream, actual) }, | |
2367 | 3215 { XD_BLOCK_ARRAY, offsetof (struct undecided_coding_stream, c), |
2551 | 3216 1, { &chain_coding_stream_description } }, |
1204 | 3217 { XD_END } |
3218 }; | |
3219 | |
3220 const struct sized_memory_description undecided_coding_stream_description = { | |
3221 sizeof (struct undecided_coding_stream), undecided_coding_stream_description_1 | |
3222 }; | |
3223 | |
3224 DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (undecided); | |
3225 | |
771 | 3226 static void |
3227 undecided_init (Lisp_Object codesys) | |
3228 { | |
3229 struct undecided_coding_system *data = | |
3230 XCODING_SYSTEM_TYPE_DATA (codesys, undecided); | |
3231 | |
3232 data->cs = Qnil; | |
3233 } | |
3234 | |
3235 static void | |
3236 undecided_mark (Lisp_Object codesys) | |
3237 { | |
3238 struct undecided_coding_system *data = | |
3239 XCODING_SYSTEM_TYPE_DATA (codesys, undecided); | |
3240 | |
3241 mark_object (data->cs); | |
3242 } | |
3243 | |
3244 static void | |
3245 undecided_print (Lisp_Object cs, Lisp_Object printcharfun, int escapeflag) | |
3246 { | |
3247 struct undecided_coding_system *data = | |
3248 XCODING_SYSTEM_TYPE_DATA (cs, undecided); | |
3249 int need_space = 0; | |
3250 | |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
3251 write_ascstring (printcharfun, "("); |
771 | 3252 if (data->do_eol) |
3253 { | |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
3254 write_ascstring (printcharfun, "do-eol"); |
771 | 3255 need_space = 1; |
3256 } | |
3257 if (data->do_coding) | |
3258 { | |
3259 if (need_space) | |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
3260 write_ascstring (printcharfun, " "); |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
3261 write_ascstring (printcharfun, "do-coding"); |
771 | 3262 need_space = 1; |
3263 } | |
3264 if (!NILP (data->cs)) | |
3265 { | |
3266 if (need_space) | |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
3267 write_ascstring (printcharfun, " "); |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
3268 write_ascstring (printcharfun, "coding-system="); |
771 | 3269 print_coding_system_in_print_method (data->cs, printcharfun, escapeflag); |
3270 } | |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
3271 write_ascstring (printcharfun, ")"); |
771 | 3272 } |
3273 | |
3274 static void | |
3275 undecided_mark_coding_stream (struct coding_stream *str) | |
3276 { | |
1204 | 3277 mark_object (CODING_STREAM_TYPE_DATA (str, undecided)->actual); |
771 | 3278 chain_mark_coding_stream_1 (&CODING_STREAM_TYPE_DATA (str, undecided)->c); |
3279 } | |
3280 | |
3281 static int | |
3282 undecided_putprop (Lisp_Object codesys, Lisp_Object key, Lisp_Object value) | |
3283 { | |
3284 struct undecided_coding_system *data = | |
3285 XCODING_SYSTEM_TYPE_DATA (codesys, undecided); | |
3286 | |
3287 if (EQ (key, Qdo_eol)) | |
3288 data->do_eol = 1; | |
3289 else if (EQ (key, Qdo_coding)) | |
3290 data->do_coding = 1; | |
3291 else if (EQ (key, Qcoding_system)) | |
3292 data->cs = get_coding_system_for_text_file (value, 0); | |
3293 else | |
3294 return 0; | |
3295 return 1; | |
3296 } | |
3297 | |
3298 static Lisp_Object | |
3299 undecided_getprop (Lisp_Object codesys, Lisp_Object prop) | |
3300 { | |
3301 struct undecided_coding_system *data = | |
3302 XCODING_SYSTEM_TYPE_DATA (codesys, undecided); | |
3303 | |
3304 if (EQ (prop, Qdo_eol)) | |
3305 return data->do_eol ? Qt : Qnil; | |
3306 if (EQ (prop, Qdo_coding)) | |
3307 return data->do_coding ? Qt : Qnil; | |
3308 if (EQ (prop, Qcoding_system)) | |
3309 return data->cs; | |
3310 return Qunbound; | |
3311 } | |
3312 | |
3313 static struct detection_state * | |
3314 allocate_detection_state (void) | |
3315 { | |
3316 int i; | |
3317 Bytecount size = MAX_ALIGN_SIZE (sizeof (struct detection_state)); | |
3318 struct detection_state *block; | |
3319 | |
3320 for (i = 0; i < coding_detector_count; i++) | |
3321 size += MAX_ALIGN_SIZE (Dynarr_at (all_coding_detectors, i).data_size); | |
3322 | |
3323 block = (struct detection_state *) xmalloc_and_zero (size); | |
3324 | |
3325 size = MAX_ALIGN_SIZE (sizeof (struct detection_state)); | |
3326 for (i = 0; i < coding_detector_count; i++) | |
3327 { | |
3328 block->data_offset[i] = size; | |
3329 size += MAX_ALIGN_SIZE (Dynarr_at (all_coding_detectors, i).data_size); | |
3330 } | |
3331 | |
3332 return block; | |
3333 } | |
3334 | |
3335 static void | |
3336 free_detection_state (struct detection_state *st) | |
3337 { | |
3338 int i; | |
3339 | |
3340 for (i = 0; i < coding_detector_count; i++) | |
3341 { | |
3342 if (Dynarr_at (all_coding_detectors, i).finalize_detection_state_method) | |
3343 Dynarr_at (all_coding_detectors, i).finalize_detection_state_method | |
3344 (st); | |
3345 } | |
3346 | |
4976
16112448d484
Rename xfree(FOO, TYPE) -> xfree(FOO)
Ben Wing <ben@xemacs.org>
parents:
4967
diff
changeset
|
3347 xfree (st); |
771 | 3348 } |
3349 | |
3350 static int | |
3351 coding_category_symbol_to_id (Lisp_Object symbol) | |
428 | 3352 { |
3353 int i; | |
3354 | |
3355 CHECK_SYMBOL (symbol); | |
771 | 3356 for (i = 0; i < coding_detector_count; i++) |
3357 { | |
3358 detector_category_dynarr *cats = | |
3359 Dynarr_at (all_coding_detectors, i).cats; | |
3360 int j; | |
3361 | |
3362 for (j = 0; j < Dynarr_length (cats); j++) | |
3363 if (EQ (Dynarr_at (cats, j).sym, symbol)) | |
3364 return Dynarr_at (cats, j).id; | |
3365 } | |
3366 | |
563 | 3367 invalid_constant ("Unrecognized coding category", symbol); |
1204 | 3368 RETURN_NOT_REACHED (0); |
428 | 3369 } |
3370 | |
771 | 3371 static Lisp_Object |
3372 coding_category_id_to_symbol (int id) | |
428 | 3373 { |
3374 int i; | |
771 | 3375 |
3376 for (i = 0; i < coding_detector_count; i++) | |
3377 { | |
3378 detector_category_dynarr *cats = | |
3379 Dynarr_at (all_coding_detectors, i).cats; | |
3380 int j; | |
3381 | |
3382 for (j = 0; j < Dynarr_length (cats); j++) | |
3383 if (id == Dynarr_at (cats, j).id) | |
3384 return Dynarr_at (cats, j).sym; | |
3385 } | |
3386 | |
2500 | 3387 ABORT (); |
771 | 3388 return Qnil; /* (usually) not reached */ |
428 | 3389 } |
3390 | |
771 | 3391 static Lisp_Object |
3392 detection_result_number_to_symbol (enum detection_result result) | |
428 | 3393 { |
1494 | 3394 /* let compiler warn if not all enumerators are handled */ |
3395 switch (result) { | |
3396 #define FROB(sym, num) case num: return (sym) | |
771 | 3397 FROB (Qnear_certainty, DET_NEAR_CERTAINTY); |
3398 FROB (Qquite_probable, DET_QUITE_PROBABLE); | |
3399 FROB (Qsomewhat_likely, DET_SOMEWHAT_LIKELY); | |
1494 | 3400 FROB (Qslightly_likely, DET_SLIGHTLY_LIKELY); |
771 | 3401 FROB (Qas_likely_as_unlikely, DET_AS_LIKELY_AS_UNLIKELY); |
3402 FROB (Qsomewhat_unlikely, DET_SOMEWHAT_UNLIKELY); | |
3403 FROB (Qquite_improbable, DET_QUITE_IMPROBABLE); | |
3404 FROB (Qnearly_impossible, DET_NEARLY_IMPOSSIBLE); | |
3405 #undef FROB | |
1494 | 3406 } |
771 | 3407 |
2500 | 3408 ABORT (); |
771 | 3409 return Qnil; /* (usually) not reached */ |
3410 } | |
3411 | |
778 | 3412 #if 0 /* not used */ |
771 | 3413 static enum detection_result |
3414 detection_result_symbol_to_number (Lisp_Object symbol) | |
3415 { | |
1494 | 3416 /* using switch here would be bad style, and doesn't help */ |
771 | 3417 #define FROB(sym, num) if (EQ (symbol, sym)) return (num) |
3418 FROB (Qnear_certainty, DET_NEAR_CERTAINTY); | |
3419 FROB (Qquite_probable, DET_QUITE_PROBABLE); | |
3420 FROB (Qsomewhat_likely, DET_SOMEWHAT_LIKELY); | |
1494 | 3421 FROB (Qslightly_likely, DET_SLIGHTLY_LIKELY); |
771 | 3422 FROB (Qas_likely_as_unlikely, DET_AS_LIKELY_AS_UNLIKELY); |
3423 FROB (Qsomewhat_unlikely, DET_SOMEWHAT_UNLIKELY); | |
3424 FROB (Qquite_improbable, DET_QUITE_IMPROBABLE); | |
3425 FROB (Qnearly_impossible, DET_NEARLY_IMPOSSIBLE); | |
3426 #undef FROB | |
3427 | |
3428 invalid_constant ("Unrecognized detection result", symbol); | |
3429 return ((enum detection_result) 0); /* not reached */ | |
3430 } | |
778 | 3431 #endif /* 0 */ |
771 | 3432 |
3433 /* Set all detection results for a given detector to a specified value. */ | |
3434 void | |
3435 set_detection_results (struct detection_state *st, int detector, int given) | |
3436 { | |
3437 detector_category_dynarr *cats = | |
3438 Dynarr_at (all_coding_detectors, detector).cats; | |
3439 int i; | |
3440 | |
3441 for (i = 0; i < Dynarr_length (cats); i++) | |
3442 st->categories[Dynarr_at (cats, i).id] = given; | |
3443 } | |
428 | 3444 |
3445 static int | |
3446 acceptable_control_char_p (int c) | |
3447 { | |
3448 switch (c) | |
3449 { | |
3450 /* Allow and ignore control characters that you might | |
3451 reasonably see in a text file */ | |
3452 case '\r': | |
3453 case '\n': | |
3454 case '\t': | |
3455 case 7: /* bell */ | |
3456 case 8: /* backspace */ | |
3457 case 11: /* vertical tab */ | |
3458 case 12: /* form feed */ | |
3459 case 26: /* MS-DOS C-z junk */ | |
3460 case 31: /* '^_' -- for info */ | |
3461 return 1; | |
3462 default: | |
3463 return 0; | |
3464 } | |
3465 } | |
3466 | |
771 | 3467 #ifdef DEBUG_XEMACS |
3468 | |
3469 static UExtbyte | |
3470 hex_digit_to_char (int digit) | |
428 | 3471 { |
771 | 3472 if (digit < 10) |
3473 return digit + '0'; | |
3474 else | |
3475 return digit - 10 + 'A'; | |
428 | 3476 } |
3477 | |
771 | 3478 static void |
3479 output_bytes_in_ascii_and_hex (const UExtbyte *src, Bytecount n) | |
428 | 3480 { |
3425 | 3481 Extbyte *ascii = alloca_array (Extbyte, n + 1); |
3482 Extbyte *hex = alloca_array (Extbyte, 3 * n + 1); | |
771 | 3483 int i; |
3413 | 3484 DECLARE_EISTRING (eistr_ascii); |
3485 DECLARE_EISTRING (eistr_hex); | |
771 | 3486 |
3487 for (i = 0; i < n; i++) | |
428 | 3488 { |
3425 | 3489 Extbyte c = src[i]; |
771 | 3490 if (c < 0x20) |
3491 ascii[i] = '.'; | |
428 | 3492 else |
771 | 3493 ascii[i] = c; |
3494 hex[3 * i] = hex_digit_to_char (c >> 4); | |
3495 hex[3 * i + 1] = hex_digit_to_char (c & 0xF); | |
3496 hex[3 * i + 2] = ' '; | |
428 | 3497 } |
771 | 3498 ascii[i] = '\0'; |
3499 hex[3 * i - 1] = '\0'; | |
3413 | 3500 |
3501 eicpy_ext(eistr_hex, hex, Qbinary); | |
3502 eicpy_ext(eistr_ascii, ascii, Qbinary); | |
3503 | |
3425 | 3504 stderr_out ("%s %s", eidata(eistr_ascii), eidata(eistr_hex)); |
428 | 3505 } |
3506 | |
771 | 3507 #endif /* DEBUG_XEMACS */ |
3508 | |
3509 /* Attempt to determine the encoding of the given text. Before calling | |
3510 this function for the first time, you must zero out the detection state. | |
428 | 3511 |
3512 Returns: | |
3513 | |
771 | 3514 0 == keep going |
3515 1 == stop | |
428 | 3516 */ |
3517 | |
3518 static int | |
771 | 3519 detect_coding_type (struct detection_state *st, const UExtbyte *src, |
3520 Bytecount n) | |
428 | 3521 { |
771 | 3522 Bytecount n2 = n; |
3523 const UExtbyte *src2 = src; | |
3524 int i; | |
3525 | |
3526 #ifdef DEBUG_XEMACS | |
3527 if (!NILP (Vdebug_coding_detection)) | |
3528 { | |
3529 int bytes = min (16, n); | |
3530 stderr_out ("detect_coding_type: processing %ld bytes\n", n); | |
3531 stderr_out ("First %d: ", bytes); | |
3532 output_bytes_in_ascii_and_hex (src, bytes); | |
3533 stderr_out ("\nLast %d: ", bytes); | |
3534 output_bytes_in_ascii_and_hex (src + n - bytes, bytes); | |
3535 stderr_out ("\n"); | |
3536 } | |
3537 #endif /* DEBUG_XEMACS */ | |
428 | 3538 if (!st->seen_non_ascii) |
3539 { | |
771 | 3540 for (; n2; n2--, src2++) |
428 | 3541 { |
771 | 3542 UExtbyte c = *src2; |
428 | 3543 if ((c < 0x20 && !acceptable_control_char_p (c)) || c >= 0x80) |
3544 { | |
3545 st->seen_non_ascii = 1; | |
3546 break; | |
3547 } | |
3548 } | |
3549 } | |
3550 | |
771 | 3551 for (i = 0; i < coding_detector_count; i++) |
3552 Dynarr_at (all_coding_detectors, i).detect_method (st, src, n); | |
3553 | |
3554 st->bytes_seen += n; | |
3555 | |
3556 #ifdef DEBUG_XEMACS | |
3557 if (!NILP (Vdebug_coding_detection)) | |
3558 { | |
3559 stderr_out ("seen_non_ascii: %d\n", st->seen_non_ascii); | |
1494 | 3560 if (coding_detector_category_count <= 0) |
3561 stderr_out ("found %d detector categories\n", | |
3562 coding_detector_category_count); | |
771 | 3563 for (i = 0; i < coding_detector_category_count; i++) |
3564 stderr_out_lisp | |
3565 ("%s: %s\n", | |
3566 2, | |
3567 coding_category_id_to_symbol (i), | |
3568 detection_result_number_to_symbol ((enum detection_result) | |
3569 st->categories[i])); | |
3570 } | |
3571 #endif /* DEBUG_XEMACS */ | |
3572 | |
3573 { | |
3574 int not_unlikely = 0; | |
3575 int retval; | |
3576 | |
3577 for (i = 0; i < coding_detector_category_count; i++) | |
3578 if (st->categories[i] >= 0) | |
3579 not_unlikely++; | |
3580 | |
3581 retval = (not_unlikely <= 1 | |
3582 #if 0 /* this is bogus */ | |
3583 || st->bytes_seen >= MAX_BYTES_PROCESSED_FOR_DETECTION | |
428 | 3584 #endif |
771 | 3585 ); |
3586 | |
3587 #ifdef DEBUG_XEMACS | |
3588 if (!NILP (Vdebug_coding_detection)) | |
3589 stderr_out ("detect_coding_type: returning %d (%s)\n", | |
3590 retval, retval ? "stop" : "keep going"); | |
3591 #endif /* DEBUG_XEMACS */ | |
3592 | |
3593 return retval; | |
428 | 3594 } |
3595 } | |
3596 | |
3597 static Lisp_Object | |
771 | 3598 detected_coding_system (struct detection_state *st) |
428 | 3599 { |
771 | 3600 int i; |
3601 int even = 1; | |
3602 | |
3603 if (st->seen_non_ascii) | |
3604 { | |
3605 for (i = 0; i < coding_detector_category_count; i++) | |
3606 if (st->categories[i] != DET_AS_LIKELY_AS_UNLIKELY) | |
3607 { | |
3608 even = 0; | |
3609 break; | |
3610 } | |
3611 } | |
3612 | |
3613 /* #### Here we are ignoring the results of detection when it's all | |
3614 ASCII. This is obviously a bad thing. But we need to fix up the | |
3615 existing detection methods somewhat before we can switch. */ | |
3616 if (even) | |
428 | 3617 { |
3618 /* If the file was entirely or basically ASCII, use the | |
3619 default value of `buffer-file-coding-system'. */ | |
3620 Lisp_Object retval = | |
3621 XBUFFER (Vbuffer_defaults)->buffer_file_coding_system; | |
3622 if (!NILP (retval)) | |
3623 { | |
771 | 3624 retval = find_coding_system_for_text_file (retval, 0); |
428 | 3625 if (NILP (retval)) |
3626 { | |
3627 warn_when_safe | |
3628 (Qbad_variable, Qwarning, | |
3629 "Invalid `default-buffer-file-coding-system', set to nil"); | |
3630 XBUFFER (Vbuffer_defaults)->buffer_file_coding_system = Qnil; | |
3631 } | |
3632 } | |
3633 if (NILP (retval)) | |
4100 | 3634 retval = Fget_coding_system (Qbinary); |
428 | 3635 return retval; |
3636 } | |
3637 else | |
3638 { | |
771 | 3639 int likelihood; |
3640 Lisp_Object retval = Qnil; | |
3641 | |
3642 /* Look through the coding categories first by likelihood and then by | |
3643 priority and find the first one that is allowed. */ | |
3644 | |
3645 for (likelihood = DET_HIGHEST; likelihood >= DET_LOWEST; likelihood--) | |
428 | 3646 { |
771 | 3647 for (i = 0; i < coding_detector_category_count; i++) |
3648 { | |
3649 int cat = coding_category_by_priority[i]; | |
3650 if (st->categories[cat] == likelihood && | |
3651 !NILP (coding_category_system[cat])) | |
3652 { | |
3653 retval = (get_coding_system_for_text_file | |
3654 (coding_category_system[cat], 0)); | |
3655 if (likelihood < DET_AS_LIKELY_AS_UNLIKELY) | |
3656 warn_when_safe_lispobj | |
3657 (intern ("detection"), | |
793 | 3658 Qwarning, |
771 | 3659 emacs_sprintf_string_lisp |
3660 ( | |
3661 "Detected coding %s is unlikely to be correct (likelihood == `%s')", | |
3662 Qnil, 2, XCODING_SYSTEM_NAME (retval), | |
3663 detection_result_number_to_symbol | |
3664 ((enum detection_result) likelihood))); | |
3665 return retval; | |
3666 } | |
3667 } | |
428 | 3668 } |
771 | 3669 |
3670 return Fget_coding_system (Qraw_text); | |
428 | 3671 } |
3672 } | |
3673 | |
1347 | 3674 /* Look for a coding system in the string (skipping over leading |
3675 blanks). If found, return it, otherwise nil. */ | |
3676 | |
3677 static Lisp_Object | |
5682
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
3678 snarf_coding_system (const UExtbyte *p, Bytecount len, |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
3679 Boolint find_coding_system_p) |
1347 | 3680 { |
3681 Bytecount n; | |
2531 | 3682 UExtbyte *name; |
1347 | 3683 |
3684 while (*p == ' ' || *p == '\t') p++, len--; | |
3685 len = min (len, 1000); | |
3686 name = alloca_ibytes (len + 1); | |
3687 memcpy (name, p, len); | |
3688 name[len] = '\0'; | |
3689 | |
3690 /* Get coding system name */ | |
3691 /* Characters valid in a MIME charset name (rfc 1521), | |
3692 and in a Lisp symbol name. */ | |
3693 n = qxestrspn (name, | |
3694 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" | |
3695 "abcdefghijklmnopqrstuvwxyz" | |
3696 "0123456789" | |
3697 "!$%&*+-.^_{|}~"); | |
3698 if (n > 0) | |
3699 { | |
3700 name[n] = '\0'; | |
4953
304aebb79cd3
function renamings to track names of char typedefs
Ben Wing <ben@xemacs.org>
parents:
4952
diff
changeset
|
3701 /* This call to intern_istring() is OK because we already verified that |
2531 | 3702 there are only ASCII characters in the string */ |
5682
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
3703 if (find_coding_system_p) |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
3704 { |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
3705 return |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
3706 find_coding_system_for_text_file (intern_istring ((Ibyte *) name), |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
3707 0); |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
3708 } |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
3709 else |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
3710 { |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
3711 return build_ascstring ((const Ascbyte *) name); |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
3712 } |
1347 | 3713 } |
3714 | |
3715 return Qnil; | |
3716 } | |
3717 | |
428 | 3718 /* Given a seekable read stream and potential coding system and EOL type |
3719 as specified, do any autodetection that is called for. If the | |
3720 coding system and/or EOL type are not `autodetect', they will be left | |
3721 alone; but this function will never return an autodetect coding system | |
3722 or EOL type. | |
3723 | |
3724 This function does not automatically fetch subsidiary coding systems; | |
3725 that should be unnecessary with the explicit eol-type argument. */ | |
3726 | |
3727 #define LENGTH(string_constant) (sizeof (string_constant) - 1) | |
3728 | |
771 | 3729 static Lisp_Object |
3730 unwind_free_detection_state (Lisp_Object opaque) | |
3731 { | |
3732 struct detection_state *st = | |
3733 (struct detection_state *) get_opaque_ptr (opaque); | |
3734 free_detection_state (st); | |
3735 free_opaque_ptr (opaque); | |
3736 return Qnil; | |
3737 } | |
3738 | |
3739 static Lisp_Object | |
5682
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
3740 look_for_coding_system_magic_cookie (const UExtbyte *data, Bytecount len, |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
3741 Boolint find_coding_system_p) |
428 | 3742 { |
771 | 3743 const UExtbyte *p; |
3744 const UExtbyte *scan_end; | |
2531 | 3745 Bytecount cookie_len; |
771 | 3746 |
3747 /* Look for initial "-*-"; mode line prefix */ | |
3748 for (p = data, | |
3749 scan_end = data + len - LENGTH ("-*-coding:?-*-"); | |
3750 p <= scan_end | |
3751 && *p != '\n' | |
3752 && *p != '\r'; | |
3753 p++) | |
3754 if (*p == '-' && *(p+1) == '*' && *(p+2) == '-') | |
3755 { | |
3756 const UExtbyte *local_vars_beg = p + 3; | |
3757 /* Look for final "-*-"; mode line suffix */ | |
3758 for (p = local_vars_beg, | |
3759 scan_end = data + len - LENGTH ("-*-"); | |
3760 p <= scan_end | |
428 | 3761 && *p != '\n' |
3762 && *p != '\r'; | |
771 | 3763 p++) |
3764 if (*p == '-' && *(p+1) == '*' && *(p+2) == '-') | |
3765 { | |
3766 const UExtbyte *suffix = p; | |
3767 /* Look for "coding:" */ | |
3768 for (p = local_vars_beg, | |
3769 scan_end = suffix - LENGTH ("coding:?"); | |
3770 p <= scan_end; | |
3771 p++) | |
3772 if (memcmp ("coding:", p, LENGTH ("coding:")) == 0 | |
3773 && (p == local_vars_beg | |
3774 || (*(p-1) == ' ' || | |
3775 *(p-1) == '\t' || | |
3776 *(p-1) == ';'))) | |
3777 { | |
3778 p += LENGTH ("coding:"); | |
5682
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
3779 return snarf_coding_system (p, suffix - p, |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
3780 find_coding_system_p); |
771 | 3781 break; |
3782 } | |
3783 break; | |
3784 } | |
3785 break; | |
3786 } | |
3787 | |
2531 | 3788 /* Look for ;;;###coding system */ |
3789 | |
3790 cookie_len = LENGTH (";;;###coding system: "); | |
3791 | |
3792 for (p = data, | |
3793 scan_end = data + len - cookie_len; | |
3794 p <= scan_end; | |
3795 p++) | |
1347 | 3796 { |
2531 | 3797 if (*p == ';' && !memcmp (p, ";;;###coding system: ", cookie_len)) |
3798 { | |
3799 const UExtbyte *suffix; | |
3800 | |
3801 p += cookie_len; | |
3802 suffix = p; | |
3803 while (suffix < scan_end && !isspace (*suffix)) | |
3804 suffix++; | |
5682
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
3805 return snarf_coding_system (p, suffix - p, find_coding_system_p); |
2531 | 3806 } |
1347 | 3807 } |
3808 | |
3809 return Qnil; | |
771 | 3810 } |
3811 | |
3812 static Lisp_Object | |
3813 determine_real_coding_system (Lstream *stream) | |
3814 { | |
3815 struct detection_state *st = allocate_detection_state (); | |
3816 int depth = record_unwind_protect (unwind_free_detection_state, | |
3817 make_opaque_ptr (st)); | |
3818 UExtbyte buf[4096]; | |
3819 Bytecount nread = Lstream_read (stream, buf, sizeof (buf)); | |
5682
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
3820 Lisp_Object coding_system |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
3821 = look_for_coding_system_magic_cookie (buf, nread, 1); |
771 | 3822 |
3823 if (NILP (coding_system)) | |
3824 { | |
3825 while (1) | |
3826 { | |
3827 if (detect_coding_type (st, buf, nread)) | |
428 | 3828 break; |
771 | 3829 nread = Lstream_read (stream, buf, sizeof (buf)); |
3830 if (nread == 0) | |
3831 break; | |
428 | 3832 } |
771 | 3833 |
3834 coding_system = detected_coding_system (st); | |
428 | 3835 } |
3836 | |
3837 Lstream_rewind (stream); | |
771 | 3838 |
3839 unbind_to (depth); | |
3840 return coding_system; | |
3841 } | |
3842 | |
3843 static void | |
3844 undecided_init_coding_stream (struct coding_stream *str) | |
3845 { | |
3846 struct undecided_coding_stream *data = | |
3847 CODING_STREAM_TYPE_DATA (str, undecided); | |
3848 struct undecided_coding_system *csdata = | |
3849 XCODING_SYSTEM_TYPE_DATA (str->codesys, undecided); | |
3850 | |
3851 data->actual = Qnil; | |
3852 | |
3853 if (str->direction == CODING_DECODE) | |
3854 { | |
3855 Lstream *lst = str->other_end; | |
3856 | |
3857 if ((lst->flags & LSTREAM_FL_READ) && | |
3858 Lstream_seekable_p (lst) && | |
3859 csdata->do_coding) | |
3860 /* We can determine the coding system now. */ | |
3861 data->actual = determine_real_coding_system (lst); | |
3862 } | |
1494 | 3863 |
3864 #ifdef DEBUG_XEMACS | |
3865 if (!NILP (Vdebug_coding_detection)) | |
3866 stderr_out_lisp ("detected coding system: %s\n", 1, data->actual); | |
3867 #endif /* DEBUG_XEMACS */ | |
771 | 3868 } |
3869 | |
3870 static void | |
3871 undecided_rewind_coding_stream (struct coding_stream *str) | |
3872 { | |
3873 chain_rewind_coding_stream_1 (&CODING_STREAM_TYPE_DATA (str, undecided)->c); | |
3874 } | |
3875 | |
3876 static void | |
3877 undecided_finalize_coding_stream (struct coding_stream *str) | |
3878 { | |
3879 struct undecided_coding_stream *data = | |
3880 CODING_STREAM_TYPE_DATA (str, undecided); | |
3881 | |
3882 chain_finalize_coding_stream_1 | |
3883 (&CODING_STREAM_TYPE_DATA (str, undecided)->c); | |
3884 if (data->st) | |
3885 free_detection_state (data->st); | |
3886 } | |
3887 | |
3888 static Lisp_Object | |
3889 undecided_canonicalize (Lisp_Object codesys) | |
3890 { | |
3891 struct undecided_coding_system *csdata = | |
3892 XCODING_SYSTEM_TYPE_DATA (codesys, undecided); | |
3893 if (!csdata->do_eol && !csdata->do_coding) | |
3894 return NILP (csdata->cs) ? Fget_coding_system (Qbinary) : csdata->cs; | |
3895 if (csdata->do_eol && !csdata->do_coding && NILP (csdata->cs)) | |
3896 return Fget_coding_system (Qconvert_eol_autodetect); | |
3897 return codesys; | |
3898 } | |
3899 | |
3900 static Bytecount | |
3901 undecided_convert (struct coding_stream *str, const UExtbyte *src, | |
3902 unsigned_char_dynarr *dst, Bytecount n) | |
3903 { | |
3904 int first_time = 0; | |
3905 | |
3906 if (str->direction == CODING_DECODE) | |
3907 { | |
3908 /* At this point, we have only the following possibilities: | |
3909 | |
3910 do_eol && do_coding | |
3911 do_coding only | |
3912 do_eol only and a coding system was specified | |
3913 | |
3914 Other possibilities are removed during undecided_canonicalize. | |
3915 | |
3916 Therefore, our substreams are either | |
3917 | |
3918 lstream_coding -> lstream_dynarr, or | |
3919 lstream_coding -> lstream_eol -> lstream_dynarr. | |
3920 */ | |
3921 struct undecided_coding_system *csdata = | |
3922 XCODING_SYSTEM_TYPE_DATA (str->codesys, undecided); | |
3923 struct undecided_coding_stream *data = | |
3924 CODING_STREAM_TYPE_DATA (str, undecided); | |
3925 | |
3926 if (str->eof) | |
3927 { | |
3928 /* Each will close the next. We need to close now because more | |
3929 data may be generated. */ | |
3930 if (data->c.initted) | |
3931 Lstream_close (XLSTREAM (data->c.lstreams[0])); | |
3932 return n; | |
3933 } | |
3934 | |
3935 if (!data->c.initted) | |
3936 { | |
3937 data->c.lstream_count = csdata->do_eol ? 3 : 2; | |
3938 data->c.lstreams = xnew_array (Lisp_Object, data->c.lstream_count); | |
3939 | |
3940 data->c.lstreams[data->c.lstream_count - 1] = | |
3941 make_dynarr_output_stream (dst); | |
3942 Lstream_set_buffering | |
3943 (XLSTREAM (data->c.lstreams[data->c.lstream_count - 1]), | |
3944 LSTREAM_UNBUFFERED, 0); | |
3945 if (csdata->do_eol) | |
3946 { | |
3947 data->c.lstreams[1] = | |
3948 make_coding_output_stream | |
3949 (XLSTREAM (data->c.lstreams[data->c.lstream_count - 1]), | |
3950 Fget_coding_system (Qconvert_eol_autodetect), | |
800 | 3951 CODING_DECODE, 0); |
771 | 3952 Lstream_set_buffering |
3953 (XLSTREAM (data->c.lstreams[1]), | |
3954 LSTREAM_UNBUFFERED, 0); | |
3955 } | |
3956 | |
3957 data->c.lstreams[0] = | |
3958 make_coding_output_stream | |
3959 (XLSTREAM (data->c.lstreams[1]), | |
3960 /* Substitute binary if we need to detect the encoding */ | |
3961 csdata->do_coding ? Qbinary : csdata->cs, | |
800 | 3962 CODING_DECODE, 0); |
771 | 3963 Lstream_set_buffering (XLSTREAM (data->c.lstreams[0]), |
3964 LSTREAM_UNBUFFERED, 0); | |
3965 | |
3966 first_time = 1; | |
3967 data->c.initted = 1; | |
3968 } | |
3969 | |
3970 /* If necessary, do encoding-detection now. We do this when we're a | |
3971 writing stream or a non-seekable reading stream, meaning that we | |
3972 can't just process the whole input, rewind, and start over. */ | |
3973 | |
3974 if (csdata->do_coding) | |
3975 { | |
3976 int actual_was_nil = NILP (data->actual); | |
3977 if (NILP (data->actual)) | |
3978 { | |
3979 if (!data->st) | |
3980 data->st = allocate_detection_state (); | |
3981 if (first_time) | |
3982 /* #### This is cheesy. What we really ought to do is buffer | |
3983 up a certain minimum amount of data to get a better result. | |
3984 */ | |
5682
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
3985 data->actual = look_for_coding_system_magic_cookie (src, n, 1); |
771 | 3986 if (NILP (data->actual)) |
3987 { | |
3988 /* #### This is cheesy. What we really ought to do is buffer | |
3989 up a certain minimum amount of data so as to get a less | |
3990 random result when doing subprocess detection. */ | |
3991 detect_coding_type (data->st, src, n); | |
3992 data->actual = detected_coding_system (data->st); | |
4100 | 3993 /* kludge to prevent infinite recursion */ |
3994 if (XCODING_SYSTEM(data->actual)->methods->enumtype == undecided_coding_system) | |
3995 data->actual = Fget_coding_system (Qbinary); | |
771 | 3996 } |
3997 } | |
3998 /* We need to set the detected coding system if we actually have | |
3999 such a coding system but didn't before. That is the case | |
4000 either when we just detected it in the previous code or when | |
4001 it was detected during undecided_init_coding_stream(). We | |
4002 can check for that using first_time. */ | |
4003 if (!NILP (data->actual) && (actual_was_nil || first_time)) | |
4004 { | |
4005 /* If the detected coding system doesn't allow for EOL | |
4006 autodetection, try to get the equivalent that does; | |
4007 otherwise, disable EOL detection (overriding whatever | |
4008 may already have been detected). */ | |
4009 if (XCODING_SYSTEM_EOL_TYPE (data->actual) != EOL_AUTODETECT) | |
4010 { | |
4011 if (!NILP (XCODING_SYSTEM_SUBSIDIARY_PARENT (data->actual))) | |
4012 data->actual = | |
4013 XCODING_SYSTEM_SUBSIDIARY_PARENT (data->actual); | |
4014 else if (data->c.lstream_count == 3) | |
4015 set_coding_stream_coding_system | |
4016 (XLSTREAM (data->c.lstreams[1]), | |
4017 Fget_coding_system (Qidentity)); | |
4018 } | |
4019 set_coding_stream_coding_system | |
4020 (XLSTREAM (data->c.lstreams[0]), data->actual); | |
4021 } | |
4022 } | |
4023 | |
4024 if (Lstream_write (XLSTREAM (data->c.lstreams[0]), src, n) < 0) | |
4025 return -1; | |
4026 return n; | |
4027 } | |
4028 else | |
4029 return no_conversion_convert (str, src, dst, n); | |
4030 } | |
4031 | |
4032 static Lisp_Object | |
4033 undecided_canonicalize_after_coding (struct coding_stream *str) | |
4034 { | |
4035 struct undecided_coding_stream *data = | |
4036 CODING_STREAM_TYPE_DATA (str, undecided); | |
4037 Lisp_Object ret, eolret; | |
4038 | |
4039 if (str->direction == CODING_ENCODE) | |
4040 return str->codesys; | |
4041 | |
4042 if (!data->c.initted) | |
4647
e4ed58cb0e5b
Fix bugs with #'find-file, 0-length files, & coding-system-for-read specified.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
4043 return str->codesys; |
771 | 4044 |
4045 ret = coding_stream_canonicalize_after_coding | |
4046 (XLSTREAM (data->c.lstreams[0])); | |
4047 if (NILP (ret)) | |
4647
e4ed58cb0e5b
Fix bugs with #'find-file, 0-length files, & coding-system-for-read specified.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
4048 ret = str->codesys; |
771 | 4049 if (XCODING_SYSTEM_EOL_TYPE (ret) != EOL_AUTODETECT) |
4050 return ret; | |
4051 eolret = coding_stream_canonicalize_after_coding | |
4052 (XLSTREAM (data->c.lstreams[1])); | |
4053 if (!EQ (XCODING_SYSTEM_TYPE (eolret), Qconvert_eol)) | |
4054 return ret; | |
4055 return | |
4056 Fsubsidiary_coding_system (ret, Fcoding_system_property (eolret, | |
4057 Qsubtype)); | |
4058 } | |
4059 | |
4060 | |
4061 /************************************************************************/ | |
4062 /* Lisp interface: Coding category functions and detection */ | |
4063 /************************************************************************/ | |
4064 | |
4065 DEFUN ("coding-category-list", Fcoding_category_list, 0, 0, 0, /* | |
4066 Return a list of all recognized coding categories. | |
4067 */ | |
4068 ()) | |
4069 { | |
4070 int i; | |
4071 Lisp_Object list = Qnil; | |
4072 | |
4073 for (i = 0; i < coding_detector_count; i++) | |
4074 { | |
4075 detector_category_dynarr *cats = | |
4076 Dynarr_at (all_coding_detectors, i).cats; | |
4077 int j; | |
4078 | |
4079 for (j = 0; j < Dynarr_length (cats); j++) | |
4080 list = Fcons (Dynarr_at (cats, j).sym, list); | |
4081 } | |
4082 | |
4083 return Fnreverse (list); | |
4084 } | |
4085 | |
4086 DEFUN ("set-coding-priority-list", Fset_coding_priority_list, 1, 1, 0, /* | |
4087 Change the priority order of the coding categories. | |
4088 LIST should be list of coding categories, in descending order of | |
4089 priority. Unspecified coding categories will be lower in priority | |
4090 than all specified ones, in the same relative order they were in | |
4091 previously. | |
4092 */ | |
4093 (list)) | |
4094 { | |
4095 int *category_to_priority = | |
4096 alloca_array (int, coding_detector_category_count); | |
4097 int i, j; | |
4098 | |
4099 /* First generate a list that maps coding categories to priorities. */ | |
4100 | |
4101 for (i = 0; i < coding_detector_category_count; i++) | |
4102 category_to_priority[i] = -1; | |
4103 | |
4104 /* Highest priority comes from the specified list. */ | |
4105 i = 0; | |
2367 | 4106 { |
4107 EXTERNAL_LIST_LOOP_2 (elt, list) | |
4108 { | |
4109 int cat = coding_category_symbol_to_id (elt); | |
4110 | |
4111 if (category_to_priority[cat] >= 0) | |
4112 sferror ("Duplicate coding category in list", elt); | |
4113 category_to_priority[cat] = i++; | |
4114 } | |
4115 } | |
771 | 4116 |
4117 /* Now go through the existing categories by priority to retrieve | |
4118 the categories not yet specified and preserve their priority | |
4119 order. */ | |
4120 for (j = 0; j < coding_detector_category_count; j++) | |
4121 { | |
4122 int cat = coding_category_by_priority[j]; | |
4123 if (category_to_priority[cat] < 0) | |
4124 category_to_priority[cat] = i++; | |
4125 } | |
4126 | |
4127 /* Now we need to construct the inverse of the mapping we just | |
4128 constructed. */ | |
4129 | |
4130 for (i = 0; i < coding_detector_category_count; i++) | |
4131 coding_category_by_priority[category_to_priority[i]] = i; | |
4132 | |
4133 /* Phew! That was confusing. */ | |
4134 return Qnil; | |
4135 } | |
4136 | |
4137 DEFUN ("coding-priority-list", Fcoding_priority_list, 0, 0, 0, /* | |
4138 Return a list of coding categories in descending order of priority. | |
4139 */ | |
4140 ()) | |
4141 { | |
4142 int i; | |
4143 Lisp_Object list = Qnil; | |
4144 | |
4145 for (i = 0; i < coding_detector_category_count; i++) | |
4146 list = | |
4147 Fcons (coding_category_id_to_symbol (coding_category_by_priority[i]), | |
4148 list); | |
4149 return Fnreverse (list); | |
4150 } | |
4151 | |
4152 DEFUN ("set-coding-category-system", Fset_coding_category_system, 2, 2, 0, /* | |
4153 Change the coding system associated with a coding category. | |
4154 */ | |
4155 (coding_category, coding_system)) | |
4156 { | |
4157 coding_category_system[coding_category_symbol_to_id (coding_category)] = | |
4158 Fget_coding_system (coding_system); | |
4159 return Qnil; | |
4160 } | |
4161 | |
4162 DEFUN ("coding-category-system", Fcoding_category_system, 1, 1, 0, /* | |
4163 Return the coding system associated with a coding category. | |
4164 */ | |
4165 (coding_category)) | |
4166 { | |
4167 Lisp_Object sys = | |
4168 coding_category_system[coding_category_symbol_to_id (coding_category)]; | |
4169 | |
4170 if (!NILP (sys)) | |
4171 return XCODING_SYSTEM_NAME (sys); | |
4172 return Qnil; | |
4173 } | |
4174 | |
800 | 4175 /* Detect the encoding of STREAM. Assumes stream is at the begnning and will |
4176 read through to the end of STREAM, leaving it there but open. */ | |
4177 | |
771 | 4178 Lisp_Object |
4179 detect_coding_stream (Lisp_Object stream) | |
4180 { | |
4181 Lisp_Object val = Qnil; | |
4182 struct gcpro gcpro1, gcpro2, gcpro3; | |
4183 UExtbyte random_buffer[65536]; | |
4184 Lisp_Object binary_instream = | |
4185 make_coding_input_stream | |
4186 (XLSTREAM (stream), Qbinary, | |
814 | 4187 CODING_ENCODE, LSTREAM_FL_NO_CLOSE_OTHER); |
771 | 4188 Lisp_Object decstream = |
4189 make_coding_input_stream | |
4190 (XLSTREAM (binary_instream), | |
800 | 4191 Qundecided, CODING_DECODE, 0); |
771 | 4192 Lstream *decstr = XLSTREAM (decstream); |
4193 | |
4194 GCPRO3 (decstream, stream, binary_instream); | |
4195 /* Read and discard all data; detection happens as a side effect of this, | |
4196 and we examine what was detected afterwards. */ | |
4197 while (Lstream_read (decstr, random_buffer, sizeof (random_buffer)) > 0) | |
4198 ; | |
4199 | |
4200 val = coding_stream_detected_coding_system (decstr); | |
4201 Lstream_close (decstr); | |
4202 Lstream_delete (decstr); | |
4203 Lstream_delete (XLSTREAM (binary_instream)); | |
4204 UNGCPRO; | |
4205 return val; | |
428 | 4206 } |
4207 | |
4208 DEFUN ("detect-coding-region", Fdetect_coding_region, 2, 3, 0, /* | |
4209 Detect coding system of the text in the region between START and END. | |
444 | 4210 Return a list of possible coding systems ordered by priority. |
3025 | 4211 If only ASCII characters are found, return `undecided' or one of |
428 | 4212 its subsidiary coding systems according to a detected end-of-line |
4213 type. Optional arg BUFFER defaults to the current buffer. | |
4214 */ | |
4215 (start, end, buffer)) | |
4216 { | |
4217 Lisp_Object val = Qnil; | |
4218 struct buffer *buf = decode_buffer (buffer, 0); | |
665 | 4219 Charbpos b, e; |
771 | 4220 Lisp_Object lb_instream; |
428 | 4221 |
4222 get_buffer_range_char (buf, start, end, &b, &e, 0); | |
4223 lb_instream = make_lisp_buffer_input_stream (buf, b, e, 0); | |
771 | 4224 |
4225 val = detect_coding_stream (lb_instream); | |
4226 Lstream_delete (XLSTREAM (lb_instream)); | |
428 | 4227 return val; |
4228 } | |
4229 | |
5682
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4230 DEFUN ("find-coding-system-magic-cookie-in-file", |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4231 Ffind_coding_system_magic_cookie_in_file, 1, 1, 0, /* |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4232 Look for the coding-system magic cookie in FILENAME. |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4233 The coding-system magic cookie is either the local variable specification |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4234 -*- ... coding: ... -*- on the first line, or the exact string |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4235 \";;;###coding system: \" somewhere within the first 3000 characters |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4236 of the file. If found, the coding system name (as a string) is returned; |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4237 otherwise nil is returned. Note that it is extremely unlikely that |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4238 either such string would occur coincidentally as the result of encoding |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4239 some characters in a non-ASCII charset, and that the spaces make it |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4240 even less likely since the space character is not a valid octet in any |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4241 ISO 2022 encoding of most non-ASCII charsets. |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4242 */ |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4243 (filename)) |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4244 { |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4245 Lisp_Object lstream; |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4246 UExtbyte buf[4096]; |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4247 Bytecount nread; |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4248 int fd = -1; |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4249 struct stat st; |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4250 |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4251 filename = Fexpand_file_name (filename, Qnil); |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4252 |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4253 if (qxe_stat (XSTRING_DATA (filename), &st) < 0) |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4254 { |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4255 badopen: |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4256 report_file_error ("Opening input file", filename); |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4257 } |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4258 |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4259 if (fd < 0) |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4260 { |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4261 if ((fd = qxe_interruptible_open (XSTRING_DATA (filename), |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4262 O_RDONLY | OPEN_BINARY, 0)) < 0) |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4263 goto badopen; |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4264 } |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4265 |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4266 lstream = make_filedesc_input_stream (fd, 0, -1, 0); |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4267 Lstream_set_buffering (XLSTREAM (lstream), LSTREAM_UNBUFFERED, 0); |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4268 nread = Lstream_read (XLSTREAM (lstream), buf, sizeof (buf)); |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4269 Lstream_delete (XLSTREAM (lstream)); |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4270 retry_close (fd); |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4271 |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4272 return look_for_coding_system_magic_cookie (buf, nread, 0); |
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4273 } |
428 | 4274 |
771 | 4275 |
4276 #ifdef DEBUG_XEMACS | |
4277 | |
428 | 4278 /************************************************************************/ |
771 | 4279 /* Internal methods */ |
4280 /************************************************************************/ | |
4281 | |
4282 /* Raw (internally-formatted) data. */ | |
4283 DEFINE_CODING_SYSTEM_TYPE (internal); | |
428 | 4284 |
665 | 4285 static Bytecount |
2286 | 4286 internal_convert (struct coding_stream *UNUSED (str), const UExtbyte *src, |
771 | 4287 unsigned_char_dynarr *dst, Bytecount n) |
4288 { | |
4289 Bytecount orign = n; | |
4290 Dynarr_add_many (dst, src, n); | |
4291 return orign; | |
4292 } | |
4293 | |
4294 #endif /* DEBUG_XEMACS */ | |
4295 | |
4296 | |
4297 | |
4298 #ifdef HAVE_ZLIB | |
4299 | |
4300 /************************************************************************/ | |
4301 /* Gzip methods */ | |
4302 /************************************************************************/ | |
4303 | |
4304 struct gzip_coding_system | |
428 | 4305 { |
771 | 4306 int level; /* 0 through 9, or -1 for default */ |
4307 }; | |
4308 | |
4309 #define CODING_SYSTEM_GZIP_LEVEL(codesys) \ | |
4310 (CODING_SYSTEM_TYPE_DATA (codesys, gzip)->level) | |
4311 #define XCODING_SYSTEM_GZIP_LEVEL(codesys) \ | |
4312 (XCODING_SYSTEM_TYPE_DATA (codesys, gzip)->level) | |
4313 | |
4314 struct gzip_coding_stream | |
428 | 4315 { |
771 | 4316 z_stream stream; |
4317 int stream_initted; | |
4318 int reached_eof; /* #### this should be handled by the caller, once we | |
4319 return LSTREAM_EOF */ | |
4320 }; | |
4321 | |
1204 | 4322 static const struct memory_description |
771 | 4323 gzip_coding_system_description[] = { |
4324 { XD_END } | |
4325 }; | |
4326 | |
1204 | 4327 DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (gzip); |
4328 | |
771 | 4329 enum source_sink_type |
4330 gzip_conversion_end_type (Lisp_Object codesys) | |
4331 { | |
4332 return DECODES_BYTE_TO_BYTE; | |
428 | 4333 } |
4334 | |
4335 static void | |
771 | 4336 gzip_init (Lisp_Object codesys) |
4337 { | |
4338 struct gzip_coding_system *data = XCODING_SYSTEM_TYPE_DATA (codesys, gzip); | |
4339 data->level = -1; | |
4340 } | |
4341 | |
4342 static void | |
4343 gzip_print (Lisp_Object cs, Lisp_Object printcharfun, int escapeflag) | |
428 | 4344 { |
771 | 4345 struct gzip_coding_system *data = XCODING_SYSTEM_TYPE_DATA (cs, gzip); |
4346 | |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
4347 write_ascstring (printcharfun, "("); |
771 | 4348 if (data->level == -1) |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
4349 write_ascstring (printcharfun, "default"); |
771 | 4350 else |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5474
diff
changeset
|
4351 print_internal (make_fixnum (data->level), printcharfun, 0); |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
4352 write_ascstring (printcharfun, ")"); |
428 | 4353 } |
4354 | |
4355 static int | |
771 | 4356 gzip_putprop (Lisp_Object codesys, Lisp_Object key, Lisp_Object value) |
428 | 4357 { |
771 | 4358 struct gzip_coding_system *data = XCODING_SYSTEM_TYPE_DATA (codesys, gzip); |
4359 | |
4360 if (EQ (key, Qlevel)) | |
428 | 4361 { |
771 | 4362 if (EQ (value, Qdefault)) |
4363 data->level = -1; | |
4364 else | |
428 | 4365 { |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5474
diff
changeset
|
4366 check_integer_range (value, Qzero, make_fixnum (9)); |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5474
diff
changeset
|
4367 data->level = XFIXNUM (value); |
428 | 4368 } |
4369 } | |
4370 else | |
771 | 4371 return 0; |
4372 return 1; | |
428 | 4373 } |
4374 | |
4375 static Lisp_Object | |
771 | 4376 gzip_getprop (Lisp_Object coding_system, Lisp_Object prop) |
428 | 4377 { |
771 | 4378 struct gzip_coding_system *data = |
4379 XCODING_SYSTEM_TYPE_DATA (coding_system, gzip); | |
4380 | |
4381 if (EQ (prop, Qlevel)) | |
428 | 4382 { |
771 | 4383 if (data->level == -1) |
4384 return Qdefault; | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5474
diff
changeset
|
4385 return make_fixnum (data->level); |
428 | 4386 } |
771 | 4387 |
4388 return Qunbound; | |
428 | 4389 } |
4390 | |
4391 static void | |
771 | 4392 gzip_init_coding_stream (struct coding_stream *str) |
428 | 4393 { |
771 | 4394 struct gzip_coding_stream *data = CODING_STREAM_TYPE_DATA (str, gzip); |
4395 if (data->stream_initted) | |
428 | 4396 { |
771 | 4397 if (str->direction == CODING_DECODE) |
4398 inflateEnd (&data->stream); | |
4399 else | |
4400 deflateEnd (&data->stream); | |
4401 data->stream_initted = 0; | |
428 | 4402 } |
771 | 4403 data->reached_eof = 0; |
428 | 4404 } |
4405 | |
4406 static void | |
771 | 4407 gzip_rewind_coding_stream (struct coding_stream *str) |
428 | 4408 { |
771 | 4409 gzip_init_coding_stream (str); |
428 | 4410 } |
4411 | |
771 | 4412 static Bytecount |
4413 gzip_convert (struct coding_stream *str, | |
4414 const UExtbyte *src, | |
4415 unsigned_char_dynarr *dst, Bytecount n) | |
428 | 4416 { |
771 | 4417 struct gzip_coding_stream *data = CODING_STREAM_TYPE_DATA (str, gzip); |
4418 int zerr; | |
4419 if (str->direction == CODING_DECODE) | |
428 | 4420 { |
771 | 4421 if (data->reached_eof) |
4422 return n; /* eat the data */ | |
4423 | |
4424 if (!data->stream_initted) | |
428 | 4425 { |
771 | 4426 xzero (data->stream); |
4427 if (inflateInit (&data->stream) != Z_OK) | |
4428 return LSTREAM_ERROR; | |
4429 data->stream_initted = 1; | |
428 | 4430 } |
771 | 4431 |
4432 data->stream.next_in = (Bytef *) src; | |
4433 data->stream.avail_in = n; | |
4434 | |
4435 /* Normally we stop when we've fed all data to the decompressor; but | |
4436 if we're at the end of the input, and the decompressor hasn't | |
4437 reported EOF, we need to keep going, as there might be more output | |
4438 to generate. Z_OK from the decompressor means input was processed | |
4439 or output was generated; if neither, we break out of the loop. | |
4440 Other return values are: | |
4441 | |
4442 Z_STREAM_END EOF from decompressor | |
4443 Z_DATA_ERROR Corrupted data | |
4444 Z_BUF_ERROR No progress possible (this should happen if | |
4445 we try to feed it an incomplete file) | |
4446 Z_MEM_ERROR Out of memory | |
4447 Z_STREAM_ERROR (should never happen) | |
4448 Z_NEED_DICT (#### when will this happen?) | |
4449 */ | |
4450 while (data->stream.avail_in > 0 || str->eof) | |
4451 { | |
4452 /* Reserve an output buffer of the same size as the input buffer; | |
4453 if that's not enough, we keep reserving the same size. */ | |
4454 Bytecount reserved = n; | |
4455 Dynarr_add_many (dst, 0, reserved); | |
4456 /* Careful here! Don't retrieve the pointer until after | |
4457 reserving the space, or it might be bogus */ | |
4458 data->stream.next_out = | |
4459 Dynarr_atp (dst, Dynarr_length (dst) - reserved); | |
4460 data->stream.avail_out = reserved; | |
4461 zerr = inflate (&data->stream, Z_NO_FLUSH); | |
4462 /* Lop off the unused portion */ | |
5038 | 4463 Dynarr_set_lengthr (dst, Dynarr_length (dst) - data->stream.avail_out); |
771 | 4464 if (zerr != Z_OK) |
4465 break; | |
4466 } | |
4467 | |
4468 if (zerr == Z_STREAM_END) | |
4469 data->reached_eof = 1; | |
4470 | |
4471 if ((Bytecount) data->stream.avail_in < n) | |
4472 return n - data->stream.avail_in; | |
4473 | |
4474 if (zerr == Z_OK || zerr == Z_STREAM_END) | |
4475 return 0; | |
4476 | |
4477 return LSTREAM_ERROR; | |
428 | 4478 } |
4479 else | |
4480 { | |
771 | 4481 if (!data->stream_initted) |
4482 { | |
4483 int level = XCODING_SYSTEM_GZIP_LEVEL (str->codesys); | |
4484 xzero (data->stream); | |
4485 if (deflateInit (&data->stream, | |
4486 level == -1 ? Z_DEFAULT_COMPRESSION : level) != | |
4487 Z_OK) | |
4488 return LSTREAM_ERROR; | |
4489 data->stream_initted = 1; | |
428 | 4490 } |
771 | 4491 |
4492 data->stream.next_in = (Bytef *) src; | |
4493 data->stream.avail_in = n; | |
4494 | |
4495 /* Normally we stop when we've fed all data to the compressor; but if | |
4496 we're at the end of the input, and the compressor hasn't reported | |
4497 EOF, we need to keep going, as there might be more output to | |
4498 generate. (To signal EOF on our end, we set the FLUSH parameter | |
4499 to Z_FINISH; when all data is output, Z_STREAM_END will be | |
4500 returned.) Z_OK from the compressor means input was processed or | |
4501 output was generated; if neither, we break out of the loop. Other | |
4502 return values are: | |
4503 | |
4504 Z_STREAM_END EOF from compressor | |
4505 Z_BUF_ERROR No progress possible (should never happen) | |
4506 Z_STREAM_ERROR (should never happen) | |
4507 */ | |
4508 while (data->stream.avail_in > 0 || str->eof) | |
4509 { | |
4510 /* Reserve an output buffer of the same size as the input buffer; | |
4511 if that's not enough, we keep reserving the same size. */ | |
4512 Bytecount reserved = n; | |
4513 Dynarr_add_many (dst, 0, reserved); | |
4514 /* Careful here! Don't retrieve the pointer until after | |
4515 reserving the space, or it might be bogus */ | |
4516 data->stream.next_out = | |
4517 Dynarr_atp (dst, Dynarr_length (dst) - reserved); | |
4518 data->stream.avail_out = reserved; | |
4519 zerr = | |
4520 deflate (&data->stream, | |
4521 str->eof ? Z_FINISH : Z_NO_FLUSH); | |
4522 /* Lop off the unused portion */ | |
5038 | 4523 Dynarr_set_lengthr (dst, Dynarr_length (dst) - data->stream.avail_out); |
771 | 4524 if (zerr != Z_OK) |
4525 break; | |
4526 } | |
4527 | |
4528 if ((Bytecount) data->stream.avail_in < n) | |
4529 return n - data->stream.avail_in; | |
4530 | |
4531 if (zerr == Z_OK || zerr == Z_STREAM_END) | |
4532 return 0; | |
4533 | |
4534 return LSTREAM_ERROR; | |
428 | 4535 } |
4536 } | |
4537 | |
771 | 4538 #endif /* HAVE_ZLIB */ |
428 | 4539 |
4540 | |
4541 /************************************************************************/ | |
4542 /* Initialization */ | |
4543 /************************************************************************/ | |
4544 | |
4545 void | |
4546 syms_of_file_coding (void) | |
4547 { | |
5117
3742ea8250b5
Checking in final CVS version of workspace 'ben-lisp-object'
Ben Wing <ben@xemacs.org>
parents:
3025
diff
changeset
|
4548 INIT_LISP_OBJECT (coding_system); |
442 | 4549 |
771 | 4550 DEFSUBR (Fvalid_coding_system_type_p); |
4551 DEFSUBR (Fcoding_system_type_list); | |
428 | 4552 DEFSUBR (Fcoding_system_p); |
4303 | 4553 DEFSUBR (Fautoload_coding_system); |
428 | 4554 DEFSUBR (Ffind_coding_system); |
4555 DEFSUBR (Fget_coding_system); | |
4556 DEFSUBR (Fcoding_system_list); | |
4557 DEFSUBR (Fcoding_system_name); | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4558 DEFSUBR (Fmake_coding_system_internal); |
428 | 4559 DEFSUBR (Fcopy_coding_system); |
440 | 4560 DEFSUBR (Fcoding_system_canonical_name_p); |
4561 DEFSUBR (Fcoding_system_alias_p); | |
4562 DEFSUBR (Fcoding_system_aliasee); | |
428 | 4563 DEFSUBR (Fdefine_coding_system_alias); |
4564 DEFSUBR (Fsubsidiary_coding_system); | |
771 | 4565 DEFSUBR (Fcoding_system_base); |
4566 DEFSUBR (Fcoding_system_used_for_io); | |
428 | 4567 |
4568 DEFSUBR (Fcoding_system_type); | |
771 | 4569 DEFSUBR (Fcoding_system_description); |
428 | 4570 DEFSUBR (Fcoding_system_property); |
4571 | |
4572 DEFSUBR (Fcoding_category_list); | |
4573 DEFSUBR (Fset_coding_priority_list); | |
4574 DEFSUBR (Fcoding_priority_list); | |
4575 DEFSUBR (Fset_coding_category_system); | |
4576 DEFSUBR (Fcoding_category_system); | |
4577 | |
4578 DEFSUBR (Fdetect_coding_region); | |
4579 DEFSUBR (Fdecode_coding_region); | |
4580 DEFSUBR (Fencode_coding_region); | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4581 DEFSUBR (Fquery_coding_region); |
5682
dae33b5feffe
Unify #'find-coding-system-magic-cookie-in-file, look_for_coding_system_magic_cookie()
Aidan Kehoe <kehoea@parhasard.net>
parents:
5581
diff
changeset
|
4582 DEFSUBR (Ffind_coding_system_magic_cookie_in_file); |
563 | 4583 DEFSYMBOL_MULTIWORD_PREDICATE (Qcoding_systemp); |
4584 DEFSYMBOL (Qno_conversion); | |
771 | 4585 DEFSYMBOL (Qconvert_eol); |
4586 DEFSYMBOL (Qconvert_eol_autodetect); | |
4587 DEFSYMBOL (Qconvert_eol_lf); | |
4588 DEFSYMBOL (Qconvert_eol_cr); | |
4589 DEFSYMBOL (Qconvert_eol_crlf); | |
563 | 4590 DEFSYMBOL (Qraw_text); |
771 | 4591 |
563 | 4592 DEFSYMBOL (Qmnemonic); |
4593 DEFSYMBOL (Qeol_type); | |
4594 DEFSYMBOL (Qpost_read_conversion); | |
4595 DEFSYMBOL (Qpre_write_conversion); | |
4596 | |
771 | 4597 DEFSYMBOL (Qtranslation_table_for_decode); |
4598 DEFSYMBOL (Qtranslation_table_for_encode); | |
4599 DEFSYMBOL (Qsafe_chars); | |
4600 DEFSYMBOL (Qsafe_charsets); | |
4601 DEFSYMBOL (Qmime_charset); | |
4602 DEFSYMBOL (Qvalid_codes); | |
4603 | |
563 | 4604 DEFSYMBOL (Qcr); |
4605 DEFSYMBOL (Qlf); | |
4606 DEFSYMBOL (Qcrlf); | |
4607 DEFSYMBOL (Qeol_cr); | |
4608 DEFSYMBOL (Qeol_lf); | |
4609 DEFSYMBOL (Qeol_crlf); | |
4610 DEFSYMBOL (Qencode); | |
4611 DEFSYMBOL (Qdecode); | |
428 | 4612 |
771 | 4613 DEFSYMBOL (Qnear_certainty); |
4614 DEFSYMBOL (Qquite_probable); | |
4615 DEFSYMBOL (Qsomewhat_likely); | |
1494 | 4616 DEFSYMBOL (Qslightly_likely); |
771 | 4617 DEFSYMBOL (Qas_likely_as_unlikely); |
4618 DEFSYMBOL (Qsomewhat_unlikely); | |
4619 DEFSYMBOL (Qquite_improbable); | |
4620 DEFSYMBOL (Qnearly_impossible); | |
4621 | |
4622 DEFSYMBOL (Qdo_eol); | |
4623 DEFSYMBOL (Qdo_coding); | |
4624 | |
4625 DEFSYMBOL (Qcanonicalize_after_coding); | |
4626 | |
4303 | 4627 DEFSYMBOL (Qposix_charset_to_coding_system_hash); |
4628 | |
771 | 4629 DEFSYMBOL (Qescape_quoted); |
4630 | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4631 DEFSYMBOL (Qquery_coding_warning_face); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4632 DEFSYMBOL (Qaliases); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4633 DEFSYMBOL (Qcharset_skip_chars_string); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4634 |
771 | 4635 #ifdef HAVE_ZLIB |
4636 DEFSYMBOL (Qgzip); | |
4637 #endif | |
4638 | |
428 | 4639 } |
4640 | |
4641 void | |
4642 lstream_type_create_file_coding (void) | |
4643 { | |
771 | 4644 LSTREAM_HAS_METHOD (coding, reader); |
4645 LSTREAM_HAS_METHOD (coding, writer); | |
4646 LSTREAM_HAS_METHOD (coding, rewinder); | |
4647 LSTREAM_HAS_METHOD (coding, seekable_p); | |
4648 LSTREAM_HAS_METHOD (coding, marker); | |
4649 LSTREAM_HAS_METHOD (coding, flusher); | |
4650 LSTREAM_HAS_METHOD (coding, closer); | |
4651 LSTREAM_HAS_METHOD (coding, finalizer); | |
4652 } | |
4653 | |
4654 void | |
4655 coding_system_type_create (void) | |
4656 { | |
4657 int i; | |
4658 | |
4659 staticpro (&Vcoding_system_hash_table); | |
4660 Vcoding_system_hash_table = | |
5191
71ee43b8a74d
Add #'equalp as a hash test by default; add #'define-hash-table-test, GNU API
Aidan Kehoe <kehoea@parhasard.net>
parents:
5169
diff
changeset
|
4661 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, Qeq); |
771 | 4662 |
4663 the_coding_system_type_entry_dynarr = Dynarr_new (coding_system_type_entry); | |
2367 | 4664 dump_add_root_block_ptr (&the_coding_system_type_entry_dynarr, |
771 | 4665 &csted_description); |
4666 | |
4667 Vcoding_system_type_list = Qnil; | |
4668 staticpro (&Vcoding_system_type_list); | |
4669 | |
4670 /* Initialize to something reasonable ... */ | |
4671 for (i = 0; i < MAX_DETECTOR_CATEGORIES; i++) | |
4672 { | |
4673 coding_category_system[i] = Qnil; | |
1204 | 4674 dump_add_root_lisp_object (&coding_category_system[i]); |
771 | 4675 coding_category_by_priority[i] = i; |
4676 } | |
4677 | |
4678 dump_add_opaque (coding_category_by_priority, | |
4679 sizeof (coding_category_by_priority)); | |
4680 | |
4681 all_coding_detectors = Dynarr_new2 (detector_dynarr, struct detector); | |
2367 | 4682 dump_add_root_block_ptr (&all_coding_detectors, |
771 | 4683 &detector_dynarr_description); |
4684 | |
4685 dump_add_opaque_int (&coding_system_tick); | |
4686 dump_add_opaque_int (&coding_detector_count); | |
4687 dump_add_opaque_int (&coding_detector_category_count); | |
4688 | |
4689 INITIALIZE_CODING_SYSTEM_TYPE (no_conversion, | |
4690 "no-conversion-coding-system-p"); | |
4691 CODING_SYSTEM_HAS_METHOD (no_conversion, convert); | |
4692 | |
4693 INITIALIZE_DETECTOR (no_conversion); | |
4694 DETECTOR_HAS_METHOD (no_conversion, detect); | |
4695 INITIALIZE_DETECTOR_CATEGORY (no_conversion, no_conversion); | |
4696 | |
4697 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (convert_eol, | |
4698 "convert-eol-coding-system-p"); | |
4699 CODING_SYSTEM_HAS_METHOD (convert_eol, print); | |
4700 CODING_SYSTEM_HAS_METHOD (convert_eol, convert); | |
4701 CODING_SYSTEM_HAS_METHOD (convert_eol, getprop); | |
4702 CODING_SYSTEM_HAS_METHOD (convert_eol, putprop); | |
4703 CODING_SYSTEM_HAS_METHOD (convert_eol, conversion_end_type); | |
4704 CODING_SYSTEM_HAS_METHOD (convert_eol, canonicalize_after_coding); | |
4705 CODING_SYSTEM_HAS_METHOD (convert_eol, init_coding_stream); | |
4706 | |
4707 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (undecided, | |
4708 "undecided-coding-system-p"); | |
4709 CODING_SYSTEM_HAS_METHOD (undecided, init); | |
4710 CODING_SYSTEM_HAS_METHOD (undecided, mark); | |
4711 CODING_SYSTEM_HAS_METHOD (undecided, print); | |
4712 CODING_SYSTEM_HAS_METHOD (undecided, convert); | |
4713 CODING_SYSTEM_HAS_METHOD (undecided, putprop); | |
4714 CODING_SYSTEM_HAS_METHOD (undecided, getprop); | |
4715 CODING_SYSTEM_HAS_METHOD (undecided, init_coding_stream); | |
4716 CODING_SYSTEM_HAS_METHOD (undecided, rewind_coding_stream); | |
4717 CODING_SYSTEM_HAS_METHOD (undecided, finalize_coding_stream); | |
4718 CODING_SYSTEM_HAS_METHOD (undecided, mark_coding_stream); | |
4719 CODING_SYSTEM_HAS_METHOD (undecided, canonicalize); | |
4720 CODING_SYSTEM_HAS_METHOD (undecided, canonicalize_after_coding); | |
4721 | |
4722 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (chain, "chain-coding-system-p"); | |
4723 | |
4724 CODING_SYSTEM_HAS_METHOD (chain, print); | |
4725 CODING_SYSTEM_HAS_METHOD (chain, canonicalize); | |
4726 CODING_SYSTEM_HAS_METHOD (chain, init); | |
4727 CODING_SYSTEM_HAS_METHOD (chain, mark); | |
4728 CODING_SYSTEM_HAS_METHOD (chain, mark_coding_stream); | |
4729 CODING_SYSTEM_HAS_METHOD (chain, convert); | |
4730 CODING_SYSTEM_HAS_METHOD (chain, rewind_coding_stream); | |
4731 CODING_SYSTEM_HAS_METHOD (chain, finalize_coding_stream); | |
4732 CODING_SYSTEM_HAS_METHOD (chain, finalize); | |
4733 CODING_SYSTEM_HAS_METHOD (chain, putprop); | |
4734 CODING_SYSTEM_HAS_METHOD (chain, getprop); | |
4735 CODING_SYSTEM_HAS_METHOD (chain, conversion_end_type); | |
4736 CODING_SYSTEM_HAS_METHOD (chain, canonicalize_after_coding); | |
4737 | |
4738 #ifdef DEBUG_XEMACS | |
4739 INITIALIZE_CODING_SYSTEM_TYPE (internal, "internal-coding-system-p"); | |
4740 CODING_SYSTEM_HAS_METHOD (internal, convert); | |
4741 #endif | |
4742 | |
4743 #ifdef HAVE_ZLIB | |
4744 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (gzip, "gzip-coding-system-p"); | |
4745 CODING_SYSTEM_HAS_METHOD (gzip, conversion_end_type); | |
4746 CODING_SYSTEM_HAS_METHOD (gzip, convert); | |
4747 CODING_SYSTEM_HAS_METHOD (gzip, init); | |
4748 CODING_SYSTEM_HAS_METHOD (gzip, print); | |
4749 CODING_SYSTEM_HAS_METHOD (gzip, init_coding_stream); | |
4750 CODING_SYSTEM_HAS_METHOD (gzip, rewind_coding_stream); | |
4751 CODING_SYSTEM_HAS_METHOD (gzip, putprop); | |
4752 CODING_SYSTEM_HAS_METHOD (gzip, getprop); | |
4753 #endif | |
4754 } | |
4755 | |
4756 void | |
4757 reinit_coding_system_type_create (void) | |
4758 { | |
4759 REINITIALIZE_CODING_SYSTEM_TYPE (no_conversion); | |
4760 REINITIALIZE_CODING_SYSTEM_TYPE (convert_eol); | |
4761 REINITIALIZE_CODING_SYSTEM_TYPE (undecided); | |
4762 REINITIALIZE_CODING_SYSTEM_TYPE (chain); | |
4763 #if 0 | |
4764 REINITIALIZE_CODING_SYSTEM_TYPE (text_file_wrapper); | |
4765 #endif /* 0 */ | |
4766 #ifdef DEBUG_XEMACS | |
4767 REINITIALIZE_CODING_SYSTEM_TYPE (internal); | |
4768 #endif | |
4769 #ifdef HAVE_ZLIB | |
4770 REINITIALIZE_CODING_SYSTEM_TYPE (gzip); | |
4771 #endif | |
4772 } | |
4773 | |
4774 void | |
4775 reinit_vars_of_file_coding (void) | |
4776 { | |
428 | 4777 } |
4778 | |
4779 void | |
4780 vars_of_file_coding (void) | |
4781 { | |
771 | 4782 /* We always have file-coding support */ |
428 | 4783 Fprovide (intern ("file-coding")); |
4784 | |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
4785 QScoding_system_cookie = build_ascstring (";;;###coding system: "); |
1347 | 4786 staticpro (&QScoding_system_cookie); |
4787 | |
1242 | 4788 #ifdef HAVE_DEFAULT_EOL_DETECTION |
2297 | 4789 /* #### Find a more appropriate place for this comment. |
4790 WARNING: The existing categories are intimately tied to the function | |
1242 | 4791 `coding-system-category' in coding.el. If you change a category, or |
4792 change the layout of any coding system associated with a category, you | |
4793 need to check that function and make sure it's written properly. */ | |
4794 | |
4795 Fprovide (intern ("unix-default-eol-detection")); | |
4796 #endif | |
4797 | |
428 | 4798 DEFVAR_LISP ("keyboard-coding-system", &Vkeyboard_coding_system /* |
3142 | 4799 Default coding system used for TTY and X11 keyboard input. |
5384
3889ef128488
Fix misspelled words, and some grammar, across the entire source tree.
Jerry James <james@xemacs.org>
parents:
5345
diff
changeset
|
4800 Under X11, used only to interpret the character for a key event when that |
3142 | 4801 event has a KeySym of NoSymbol but does have an associated string keysym, |
4802 something that's seen with input methods. | |
4803 | |
4804 If you need to set these things to different coding systems, call the | |
4805 function `set-console-tty-coding-system' for the TTY and use this variable | |
4806 for X11. | |
428 | 4807 */ ); |
4808 Vkeyboard_coding_system = Qnil; | |
4809 | |
4810 DEFVAR_LISP ("terminal-coding-system", &Vterminal_coding_system /* | |
4811 Coding system used for TTY display output. | |
4812 Not used under a windowing system. | |
4813 */ ); | |
4814 Vterminal_coding_system = Qnil; | |
4815 | |
4816 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read /* | |
440 | 4817 Overriding coding system used when reading from a file or process. |
4818 You should bind this variable with `let', but do not set it globally. | |
4819 If this is non-nil, it specifies the coding system that will be used | |
4820 to decode input on read operations, such as from a file or process. | |
4821 It overrides `buffer-file-coding-system-for-read', | |
428 | 4822 `insert-file-contents-pre-hook', etc. Use those variables instead of |
440 | 4823 this one for permanent changes to the environment. */ ); |
428 | 4824 Vcoding_system_for_read = Qnil; |
4825 | |
4826 DEFVAR_LISP ("coding-system-for-write", | |
4827 &Vcoding_system_for_write /* | |
440 | 4828 Overriding coding system used when writing to a file or process. |
4829 You should bind this variable with `let', but do not set it globally. | |
4830 If this is non-nil, it specifies the coding system that will be used | |
4831 to encode output for write operations, such as to a file or process. | |
4832 It overrides `buffer-file-coding-system', `write-region-pre-hook', etc. | |
4833 Use those variables instead of this one for permanent changes to the | |
4834 environment. */ ); | |
428 | 4835 Vcoding_system_for_write = Qnil; |
4836 | |
4837 DEFVAR_LISP ("file-name-coding-system", &Vfile_name_coding_system /* | |
4838 Coding system used to convert pathnames when accessing files. | |
4839 */ ); | |
4840 Vfile_name_coding_system = Qnil; | |
4841 | |
4842 DEFVAR_BOOL ("enable-multibyte-characters", &enable_multibyte_characters /* | |
771 | 4843 Setting this has no effect. It is purely for FSF compatibility. |
428 | 4844 */ ); |
4845 enable_multibyte_characters = 1; | |
771 | 4846 |
4847 Vchain_canonicalize_hash_table = | |
5191
71ee43b8a74d
Add #'equalp as a hash test by default; add #'define-hash-table-test, GNU API
Aidan Kehoe <kehoea@parhasard.net>
parents:
5169
diff
changeset
|
4848 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, Qequal); |
771 | 4849 staticpro (&Vchain_canonicalize_hash_table); |
4850 | |
4851 #ifdef DEBUG_XEMACS | |
4852 DEFVAR_LISP ("debug-coding-detection", &Vdebug_coding_detection /* | |
4853 If non-nil, display debug information about detection operations in progress. | |
4854 Information is displayed on stderr. | |
4855 */ ); | |
4856 Vdebug_coding_detection = Qnil; | |
4857 #endif | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4858 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4859 #ifdef MULE |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4860 Vdefault_query_coding_region_chartab_cache |
5191
71ee43b8a74d
Add #'equalp as a hash test by default; add #'define-hash-table-test, GNU API
Aidan Kehoe <kehoea@parhasard.net>
parents:
5169
diff
changeset
|
4861 = make_lisp_hash_table (25, HASH_TABLE_NON_WEAK, Qequal); |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4862 staticpro (&Vdefault_query_coding_region_chartab_cache); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4863 #endif |
428 | 4864 } |
4865 | |
2297 | 4866 /* #### reformat this for consistent appearance? */ |
4867 | |
428 | 4868 void |
4869 complex_vars_of_file_coding (void) | |
4870 { | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4871 Fmake_coding_system_internal |
771 | 4872 (Qconvert_eol_cr, Qconvert_eol, |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
4873 build_defer_string ("Convert CR to LF"), |
5345
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4874 listu (Qdocumentation, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4875 build_defer_string ( |
771 | 4876 "Converts CR (used to mark the end of a line on Macintosh systems) to LF\n" |
4877 "(used internally and under Unix to mark the end of a line)."), | |
5345
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4878 Qmnemonic, build_ascstring ("CR->LF"), |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4879 Qsubtype, Qcr, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4880 /* VERY IMPORTANT! Tell make-coding-system not to generate |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4881 subsidiaries -- it needs the coding systems we're creating |
771 | 4882 to do so! */ |
5345
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4883 Qeol_type, Qlf, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4884 Qsafe_charsets, Qt, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4885 Qunbound)); |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4886 Fmake_coding_system_internal |
771 | 4887 (Qconvert_eol_lf, Qconvert_eol, |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
4888 build_defer_string ("Convert LF to LF (do nothing)"), |
5345
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4889 listu (Qdocumentation, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4890 build_defer_string ("Do nothing."), |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4891 Qmnemonic, build_ascstring ("LF->LF"), |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4892 Qsubtype, Qlf, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4893 /* VERY IMPORTANT! Tell make-coding-system not to generate |
771 | 4894 subsidiaries -- it needs the coding systems we're creating |
4895 to do so! */ | |
5345
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4896 Qeol_type, Qlf, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4897 Qsafe_charsets, Qt, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4898 Qunbound)); |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4899 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4900 Fmake_coding_system_internal |
771 | 4901 (Qconvert_eol_crlf, Qconvert_eol, |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
4902 build_defer_string ("Convert CRLF to LF"), |
5345
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4903 listu (Qdocumentation, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4904 build_defer_string ( |
771 | 4905 "Converts CR+LF (used to mark the end of a line on Macintosh systems) to LF\n" |
4906 "(used internally and under Unix to mark the end of a line)."), | |
5345
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4907 Qmnemonic, build_ascstring ("CRLF->LF"), |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4908 Qsubtype, Qcrlf, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4909 /* VERY IMPORTANT! Tell make-coding-system not to generate |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4910 subsidiaries -- it needs the coding systems we're creating |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4911 to do so! */ |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4912 Qeol_type, Qlf, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4913 Qsafe_charsets, Qt, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4914 Qunbound)); |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4915 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4916 Fmake_coding_system_internal |
771 | 4917 (Qconvert_eol_autodetect, Qconvert_eol, |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
4918 build_defer_string ("Autodetect EOL type"), |
5345
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4919 listu (Qdocumentation, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4920 build_defer_string ("Autodetect the end-of-line type."), |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4921 Qmnemonic, build_ascstring ("Auto-EOL"), |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4922 Qsubtype, Qnil, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4923 /* VERY IMPORTANT! Tell make-coding-system not to generate |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4924 subsidiaries -- it needs the coding systems we're creating |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4925 to do so! */ |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4926 Qeol_type, Qlf, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4927 Qsafe_charsets, Qt, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4928 Qunbound)); |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4929 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4930 Fmake_coding_system_internal |
771 | 4931 (Qundecided, Qundecided, |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
4932 build_defer_string ("Undecided (auto-detect)"), |
5345
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4933 listu (Qdocumentation, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4934 build_defer_string ("Automatically detects the correct encoding."), |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4935 Qmnemonic, build_ascstring ("Auto"), |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4936 Qdo_eol, Qt, Qdo_coding, Qt, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4937 /* We do EOL detection ourselves so we don't need to be |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4938 wrapped in an EOL detector. (It doesn't actually hurt, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4939 though, I don't think.) */ |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4940 Qeol_type, Qlf, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4941 Qunbound)); |
771 | 4942 |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4943 Fmake_coding_system_internal |
771 | 4944 (intern ("undecided-dos"), Qundecided, |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
4945 build_defer_string ("Undecided (auto-detect) (CRLF)"), |
5345
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4946 listu (Qdocumentation, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4947 build_defer_string |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4948 ("Automatically detects the correct encoding; EOL type of CRLF forced."), |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4949 Qmnemonic, build_ascstring ("Auto"), |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4950 Qdo_coding, Qt, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4951 Qeol_type, Qcrlf, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4952 Qunbound)); |
771 | 4953 |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4954 Fmake_coding_system_internal |
771 | 4955 (intern ("undecided-unix"), Qundecided, |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
4956 build_defer_string ("Undecided (auto-detect) (LF)"), |
5345
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4957 listu (Qdocumentation, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4958 build_defer_string |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4959 ("Automatically detects the correct encoding; EOL type of LF forced."), |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4960 Qmnemonic, build_ascstring ("Auto"), |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4961 Qdo_coding, Qt, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4962 Qeol_type, Qlf, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4963 Qunbound));; |
771 | 4964 |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4965 Fmake_coding_system_internal |
771 | 4966 (intern ("undecided-mac"), Qundecided, |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
4967 build_defer_string ("Undecided (auto-detect) (CR)"), |
5345
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4968 listu (Qdocumentation, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4969 build_defer_string |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4970 ("Automatically detects the correct encoding; EOL type of CR forced."), |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4971 Qmnemonic, build_ascstring ("Auto"), |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4972 Qdo_coding, Qt, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4973 Qeol_type, Qcr, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4974 Qunbound)); |
771 | 4975 |
428 | 4976 /* Need to create this here or we're really screwed. */ |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4977 Fmake_coding_system_internal |
428 | 4978 (Qraw_text, Qno_conversion, |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
4979 build_defer_string ("Raw Text"), |
5345
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4980 listu (Qdocumentation, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4981 build_defer_string ("Raw text converts only line-break " |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4982 "codes, and acts otherwise like " |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4983 "`binary'."), |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4984 Qmnemonic, build_ascstring ("Raw"), |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4985 #ifdef MULE |
5345
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4986 Qsafe_charsets, list3 (Vcharset_ascii, Vcharset_control_1, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4987 Vcharset_latin_iso8859_1), |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4988 |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4989 #endif |
5345
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4990 Qunbound)); |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4991 |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4992 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
4993 Fmake_coding_system_internal |
428 | 4994 (Qbinary, Qno_conversion, |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4846
diff
changeset
|
4995 build_defer_string ("Binary"), |
5345
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4996 listu (Qdocumentation, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
4997 build_defer_string ( |
771 | 4998 "This coding system is as close as it comes to doing no conversion.\n" |
4999 "On input, each byte is converted directly into the character\n" | |
5000 "with the corresponding code -- i.e. from the `ascii', `control-1',\n" | |
5001 "or `latin-1' character sets. On output, these characters are\n" | |
5002 "converted back to the corresponding bytes, and other characters\n" | |
5003 "are converted to the default character, i.e. `~'."), | |
5345
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
5004 Qeol_type, Qlf, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
5005 Qmnemonic, build_ascstring ("Binary"), |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
5006 #ifdef MULE |
5345
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
5007 Qsafe_charsets, list3 (Vcharset_ascii, Vcharset_control_1, |
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
5008 Vcharset_latin_iso8859_1), |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4647
diff
changeset
|
5009 #endif |
5345
db326b8fe982
Use Ben's recently-introduced listu (), where appropriate.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5307
diff
changeset
|
5010 Qunbound)); |
428 | 5011 |
771 | 5012 /* Formerly aliased to raw-text! Completely bogus and not even the same |
5013 as FSF Emacs. */ | |
5014 Fdefine_coding_system_alias (Qno_conversion, Qbinary); | |
5015 Fdefine_coding_system_alias (intern ("no-conversion-unix"), | |
5016 intern ("raw-text-unix")); | |
5017 Fdefine_coding_system_alias (intern ("no-conversion-dos"), | |
5018 intern ("raw-text-dos")); | |
5019 Fdefine_coding_system_alias (intern ("no-conversion-mac"), | |
5020 intern ("raw-text-mac")); | |
5021 | |
1318 | 5022 /* These three below will get their defaults set correctly |
5023 in code-init.el. We init them now so we can handle stuff at dump | |
771 | 5024 time before we get to code-init.el. */ |
1318 | 5025 Fdefine_coding_system_alias (Qnative, Qbinary); |
440 | 5026 Fdefine_coding_system_alias (Qterminal, Qbinary); |
5027 Fdefine_coding_system_alias (Qkeyboard, Qbinary); | |
5028 | |
1318 | 5029 Fdefine_coding_system_alias (Qfile_name, Qnative); |
771 | 5030 Fdefine_coding_system_alias (Qidentity, Qconvert_eol_lf); |
5031 | |
428 | 5032 /* Need this for bootstrapping */ |
771 | 5033 coding_category_system[detector_category_no_conversion] = |
428 | 5034 Fget_coding_system (Qraw_text); |
5035 } |