Mercurial > hg > xemacs-beta
annotate src/file-coding.c @ 4645:f2a991ff6db0
Do not #'split-path on nil #'getenv result. <877hz7lzrt.fsf@yahoo.com.cn>
| author | Jerry James <james@xemacs.org> |
|---|---|
| date | Mon, 29 Jun 2009 08:20:47 -0600 |
| parents | 80e0588fb42f |
| children | e4ed58cb0e5b |
| rev | line source |
|---|---|
| 771 | 1 /* Text encoding conversion functions; coding-system object. |
| 2 #### rename me to coding-system.c or coding.c | |
| 428 | 3 Copyright (C) 1991, 1995 Free Software Foundation, Inc. |
| 4 Copyright (C) 1995 Sun Microsystems, Inc. | |
| 3025 | 5 Copyright (C) 2000, 2001, 2002, 2003, 2005 Ben Wing. |
| 428 | 6 |
| 7 This file is part of XEmacs. | |
| 8 | |
| 9 XEmacs is free software; you can redistribute it and/or modify it | |
| 10 under the terms of the GNU General Public License as published by the | |
| 11 Free Software Foundation; either version 2, or (at your option) any | |
| 12 later version. | |
| 13 | |
| 14 XEmacs is distributed in the hope that it will be useful, but WITHOUT | |
| 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
| 16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
| 17 for more details. | |
| 18 | |
| 19 You should have received a copy of the GNU General Public License | |
| 20 along with XEmacs; see the file COPYING. If not, write to | |
| 21 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
| 22 Boston, MA 02111-1307, USA. */ | |
| 23 | |
| 771 | 24 /* Synched up with: Not in FSF. */ |
| 25 | |
| 26 /* Authorship: | |
| 27 | |
| 28 Current primary author: Ben Wing <ben@xemacs.org> | |
| 29 | |
| 30 Rewritten by Ben Wing <ben@xemacs.org>, based originally on coding.c | |
| 31 from Mule 2.? but probably does not share one line of code with that | |
| 32 original source. Rewriting work started around Dec. 1994. or Jan. 1995. | |
| 33 Proceeded in earnest till Nov. 1995. | |
| 34 | |
| 35 Around Feb. 17, 1998, Andy Piper renamed what was then mule-coding.c to | |
| 36 file-coding.c, with the intention of using it to do end-of-line conversion | |
| 37 on non-MULE machines (specifically, on Windows machines). He separated | |
| 38 out the MULE stuff from non-MULE using ifdef's, and searched throughout | |
| 39 the rest of the source tree looking for coding-system-related code that | |
| 40 was ifdef MULE but should be ifdef HAVE_CODING_SYSTEMS. | |
| 41 | |
| 42 Sept. 4 - 8, 1998, Tomohiko Morioka added the UCS_4 and UTF_8 coding system | |
| 43 types, providing a primitive means of decoding and encoding externally- | |
| 44 formatted Unicode/UCS_4 and Unicode/UTF_8 data. | |
| 45 | |
| 46 January 25, 2000, Martin Buchholz redid and fleshed out the coding | |
| 47 system alias handling that was first added in prototype form by | |
| 48 Hrjove Niksic, April 15, 1999. | |
| 49 | |
| 50 April to May 2000, Ben Wing: More major reorganization. Adding features | |
| 51 needed for MS Windows (multibyte, unicode, unicode-to-multibyte), the | |
| 52 "chain" coding system for chaining two together, and doing a lot of | |
| 53 reorganization in preparation for properly abstracting out the different | |
| 54 coding system types. | |
| 55 | |
| 56 June 2001, Ben Wing: Added Unicode support. Eliminated previous | |
| 57 junky Unicode translation support. | |
| 58 | |
| 59 August 2001, Ben Wing: Moved Unicode support to unicode.c. Finished | |
| 60 abstracting everything except detection, which is hard to abstract (see | |
| 61 just below). | |
| 62 | |
| 63 September 2001, Ben Wing: Moved Mule code to mule-coding.c, Windows code | |
| 64 to intl-win32.c. Lots more rewriting; very little code is untouched | |
| 65 from before April 2000. Abstracted the detection code, added multiple | |
| 66 levels of likelihood to increase the reliability of the algorithm. | |
| 67 | |
| 68 October 2001, Ben Wing: HAVE_CODING_SYSTEMS is always now defined. | |
| 69 Removed the conditionals. | |
| 70 */ | |
| 71 | |
| 428 | 72 #include <config.h> |
| 73 #include "lisp.h" | |
| 74 | |
| 75 #include "buffer.h" | |
| 76 #include "elhash.h" | |
| 77 #include "insdel.h" | |
| 78 #include "lstream.h" | |
| 440 | 79 #include "opaque.h" |
| 771 | 80 #include "file-coding.h" |
| 81 | |
| 82 #ifdef HAVE_ZLIB | |
| 83 #include "zlib.h" | |
| 428 | 84 #endif |
| 85 | |
| 86 Lisp_Object Vkeyboard_coding_system; | |
| 87 Lisp_Object Vterminal_coding_system; | |
| 88 Lisp_Object Vcoding_system_for_read; | |
| 89 Lisp_Object Vcoding_system_for_write; | |
| 90 Lisp_Object Vfile_name_coding_system; | |
| 91 | |
| 771 | 92 #ifdef DEBUG_XEMACS |
| 93 Lisp_Object Vdebug_coding_detection; | |
| 440 | 94 #endif |
| 771 | 95 |
| 96 typedef struct coding_system_type_entry | |
| 97 { | |
| 98 struct coding_system_methods *meths; | |
| 99 } coding_system_type_entry; | |
| 100 | |
| 101 typedef struct | |
| 102 { | |
| 103 Dynarr_declare (coding_system_type_entry); | |
| 104 } coding_system_type_entry_dynarr; | |
| 105 | |
| 106 static coding_system_type_entry_dynarr *the_coding_system_type_entry_dynarr; | |
| 107 | |
| 1204 | 108 static const struct memory_description cste_description_1[] = { |
| 2551 | 109 { XD_BLOCK_PTR, offsetof (coding_system_type_entry, meths), 1, |
| 110 { &coding_system_methods_description } }, | |
| 771 | 111 { XD_END } |
| 112 }; | |
| 113 | |
| 1204 | 114 static const struct sized_memory_description cste_description = { |
| 771 | 115 sizeof (coding_system_type_entry), |
| 116 cste_description_1 | |
| 117 }; | |
| 118 | |
| 1204 | 119 static const struct memory_description csted_description_1[] = { |
| 771 | 120 XD_DYNARR_DESC (coding_system_type_entry_dynarr, &cste_description), |
| 428 | 121 { XD_END } |
| 122 }; | |
| 123 | |
| 1204 | 124 static const struct sized_memory_description csted_description = { |
| 771 | 125 sizeof (coding_system_type_entry_dynarr), |
| 126 csted_description_1 | |
| 127 }; | |
| 128 | |
| 129 static Lisp_Object Vcoding_system_type_list; | |
| 130 | |
| 131 /* Coding system currently associated with each coding category. */ | |
| 132 Lisp_Object coding_category_system[MAX_DETECTOR_CATEGORIES]; | |
| 133 | |
| 134 /* Table of all coding categories in decreasing order of priority. | |
| 135 This describes a permutation of the possible coding categories. */ | |
| 136 int coding_category_by_priority[MAX_DETECTOR_CATEGORIES]; | |
| 137 | |
| 138 /* Value used with to give a unique name to nameless coding systems */ | |
| 139 int coding_system_tick; | |
| 140 | |
| 141 int coding_detector_count; | |
| 142 int coding_detector_category_count; | |
| 143 | |
| 144 detector_dynarr *all_coding_detectors; | |
| 145 | |
| 1204 | 146 static const struct memory_description struct_detector_category_description_1[] |
| 771 | 147 = |
| 148 { | |
| 149 { XD_LISP_OBJECT, offsetof (struct detector_category, sym) }, | |
| 150 { XD_END } | |
| 151 }; | |
| 152 | |
| 1204 | 153 static const struct sized_memory_description struct_detector_category_description = |
| 771 | 154 { |
| 155 sizeof (struct detector_category), | |
| 156 struct_detector_category_description_1 | |
| 428 | 157 }; |
| 158 | |
| 1204 | 159 static const struct memory_description detector_category_dynarr_description_1[] = |
| 771 | 160 { |
| 161 XD_DYNARR_DESC (detector_category_dynarr, | |
| 162 &struct_detector_category_description), | |
| 163 { XD_END } | |
| 164 }; | |
| 165 | |
| 1204 | 166 static const struct sized_memory_description detector_category_dynarr_description = { |
| 771 | 167 sizeof (detector_category_dynarr), |
| 168 detector_category_dynarr_description_1 | |
| 169 }; | |
| 170 | |
| 1204 | 171 static const struct memory_description struct_detector_description_1[] |
| 771 | 172 = |
| 173 { | |
| 2367 | 174 { XD_BLOCK_PTR, offsetof (struct detector, cats), 1, |
| 2551 | 175 { &detector_category_dynarr_description } }, |
| 771 | 176 { XD_END } |
| 177 }; | |
| 178 | |
| 1204 | 179 static const struct sized_memory_description struct_detector_description = |
| 771 | 180 { |
| 181 sizeof (struct detector), | |
| 182 struct_detector_description_1 | |
| 183 }; | |
| 184 | |
| 1204 | 185 static const struct memory_description detector_dynarr_description_1[] = |
| 771 | 186 { |
| 187 XD_DYNARR_DESC (detector_dynarr, &struct_detector_description), | |
| 188 { XD_END } | |
| 189 }; | |
| 190 | |
| 1204 | 191 static const struct sized_memory_description detector_dynarr_description = { |
| 771 | 192 sizeof (detector_dynarr), |
| 193 detector_dynarr_description_1 | |
| 194 }; | |
| 428 | 195 |
| 196 Lisp_Object Qcoding_systemp; | |
| 197 | |
| 771 | 198 Lisp_Object Qraw_text; |
| 428 | 199 |
| 200 Lisp_Object Qmnemonic, Qeol_type; | |
| 201 Lisp_Object Qcr, Qcrlf, Qlf; | |
| 202 Lisp_Object Qeol_cr, Qeol_crlf, Qeol_lf; | |
| 203 Lisp_Object Qpost_read_conversion; | |
| 204 Lisp_Object Qpre_write_conversion; | |
| 205 | |
| 771 | 206 Lisp_Object Qtranslation_table_for_decode; |
| 207 Lisp_Object Qtranslation_table_for_encode; | |
| 208 Lisp_Object Qsafe_chars; | |
| 209 Lisp_Object Qsafe_charsets; | |
| 210 Lisp_Object Qmime_charset; | |
| 211 Lisp_Object Qvalid_codes; | |
| 212 | |
| 213 Lisp_Object Qno_conversion; | |
| 214 Lisp_Object Qconvert_eol; | |
| 440 | 215 Lisp_Object Qescape_quoted; |
| 771 | 216 Lisp_Object Qencode, Qdecode; |
| 217 | |
| 218 Lisp_Object Qconvert_eol_lf, Qconvert_eol_cr, Qconvert_eol_crlf; | |
| 219 Lisp_Object Qconvert_eol_autodetect; | |
| 220 | |
| 221 Lisp_Object Qnear_certainty, Qquite_probable, Qsomewhat_likely; | |
| 1494 | 222 Lisp_Object Qslightly_likely; |
| 771 | 223 Lisp_Object Qas_likely_as_unlikely, Qsomewhat_unlikely, Qquite_improbable; |
| 224 Lisp_Object Qnearly_impossible; | |
| 225 | |
| 226 Lisp_Object Qdo_eol, Qdo_coding; | |
| 227 | |
| 228 Lisp_Object Qcanonicalize_after_coding; | |
| 229 | |
| 1347 | 230 Lisp_Object QScoding_system_cookie; |
| 231 | |
| 4303 | 232 Lisp_Object Qposix_charset_to_coding_system_hash; |
| 233 | |
| 771 | 234 /* This is used to convert autodetected coding systems into existing |
| 235 systems. For example, the chain undecided->convert-eol-autodetect may | |
| 236 have its separate parts detected as mswindows-multibyte and | |
| 237 convert-eol-crlf, and the result needs to be mapped to | |
| 238 mswindows-multibyte-dos. */ | |
| 239 /* #### It's not clear we need this whole chain-canonicalize mechanism | |
| 240 any more. */ | |
| 241 static Lisp_Object Vchain_canonicalize_hash_table; | |
| 242 | |
| 243 #ifdef HAVE_ZLIB | |
| 244 Lisp_Object Qgzip; | |
| 428 | 245 #endif |
| 771 | 246 |
| 2297 | 247 /* Maps symbols (coding system names) to either coding system objects or |
| 248 (for aliases) other names. */ | |
| 771 | 249 static Lisp_Object Vcoding_system_hash_table; |
| 428 | 250 |
| 251 int enable_multibyte_characters; | |
| 252 | |
| 253 EXFUN (Fcopy_coding_system, 2); | |
| 254 | |
| 255 | |
| 256 /************************************************************************/ | |
| 771 | 257 /* Coding system object methods */ |
| 428 | 258 /************************************************************************/ |
| 259 | |
| 260 static Lisp_Object | |
| 261 mark_coding_system (Lisp_Object obj) | |
| 262 { | |
| 263 Lisp_Coding_System *codesys = XCODING_SYSTEM (obj); | |
| 264 | |
| 1204 | 265 #define MARKED_SLOT(x) mark_object (codesys->x); |
| 266 #include "coding-system-slots.h" | |
| 771 | 267 |
| 268 MAYBE_CODESYSMETH (codesys, mark, (obj)); | |
| 428 | 269 |
| 1204 | 270 return Qnil; |
| 428 | 271 } |
| 272 | |
| 273 static void | |
| 771 | 274 print_coding_system_properties (Lisp_Object obj, Lisp_Object printcharfun) |
| 275 { | |
| 276 Lisp_Coding_System *c = XCODING_SYSTEM (obj); | |
| 277 print_internal (c->methods->type, printcharfun, 1); | |
| 278 MAYBE_CODESYSMETH (c, print, (obj, printcharfun, 1)); | |
| 279 if (CODING_SYSTEM_EOL_TYPE (c) != EOL_AUTODETECT) | |
| 280 write_fmt_string_lisp (printcharfun, " eol-type=%s", | |
| 281 1, Fcoding_system_property (obj, Qeol_type)); | |
| 282 } | |
| 283 | |
| 284 static void | |
| 428 | 285 print_coding_system (Lisp_Object obj, Lisp_Object printcharfun, |
| 2286 | 286 int UNUSED (escapeflag)) |
| 428 | 287 { |
| 288 Lisp_Coding_System *c = XCODING_SYSTEM (obj); | |
| 289 if (print_readably) | |
| 771 | 290 printing_unreadable_object |
| 291 ("printing unreadable object #<coding-system 0x%x>", c->header.uid); | |
| 292 | |
| 293 write_fmt_string_lisp (printcharfun, "#<coding-system %s ", 1, c->name); | |
| 294 print_coding_system_properties (obj, printcharfun); | |
| 826 | 295 write_c_string (printcharfun, ">"); |
| 428 | 296 } |
| 297 | |
| 771 | 298 /* Print an abbreviated version of a coding system (but still containing |
| 299 all the information), for use within a coding system print method. */ | |
| 300 | |
| 301 static void | |
| 302 print_coding_system_in_print_method (Lisp_Object cs, Lisp_Object printcharfun, | |
| 2286 | 303 int UNUSED (escapeflag)) |
| 771 | 304 { |
| 800 | 305 write_fmt_string_lisp (printcharfun, "%s[", 1, XCODING_SYSTEM_NAME (cs)); |
| 771 | 306 print_coding_system_properties (cs, printcharfun); |
| 826 | 307 write_c_string (printcharfun, "]"); |
| 771 | 308 } |
| 309 | |
| 3263 | 310 #ifndef NEW_GC |
| 428 | 311 static void |
| 312 finalize_coding_system (void *header, int for_disksave) | |
| 313 { | |
| 771 | 314 Lisp_Object cs = wrap_coding_system ((Lisp_Coding_System *) header); |
| 428 | 315 /* Since coding systems never go away, this function is not |
| 316 necessary. But it would be necessary if we changed things | |
| 317 so that coding systems could go away. */ | |
| 318 if (!for_disksave) /* see comment in lstream.c */ | |
| 771 | 319 MAYBE_XCODESYSMETH (cs, finalize, (cs)); |
| 320 } | |
| 3263 | 321 #endif /* not NEW_GC */ |
| 771 | 322 |
| 323 static Bytecount | |
| 324 sizeof_coding_system (const void *header) | |
| 325 { | |
| 326 const Lisp_Coding_System *p = (const Lisp_Coding_System *) header; | |
| 327 return offsetof (Lisp_Coding_System, data) + p->methods->extra_data_size; | |
| 428 | 328 } |
| 329 | |
| 1204 | 330 static const struct memory_description coding_system_methods_description_1[] |
| 771 | 331 = { |
| 332 { XD_LISP_OBJECT, | |
| 333 offsetof (struct coding_system_methods, type) }, | |
| 334 { XD_LISP_OBJECT, | |
| 335 offsetof (struct coding_system_methods, predicate_symbol) }, | |
| 336 { XD_END } | |
| 337 }; | |
| 338 | |
| 1204 | 339 const struct sized_memory_description coding_system_methods_description = { |
| 771 | 340 sizeof (struct coding_system_methods), |
| 341 coding_system_methods_description_1 | |
| 342 }; | |
| 343 | |
| 1204 | 344 static const struct sized_memory_description coding_system_extra_description_map[] = |
| 345 { | |
| 346 { offsetof (Lisp_Coding_System, methods) }, | |
| 347 { offsetof (struct coding_system_methods, extra_description) }, | |
| 348 { -1 }, | |
| 771 | 349 }; |
| 350 | |
| 1204 | 351 static const struct memory_description coding_system_description[] = |
| 428 | 352 { |
| 2367 | 353 { XD_BLOCK_PTR, offsetof (Lisp_Coding_System, methods), 1, |
| 2551 | 354 { &coding_system_methods_description } }, |
| 1204 | 355 #define MARKED_SLOT(x) { XD_LISP_OBJECT, offsetof (Lisp_Coding_System, x) }, |
| 356 #define MARKED_SLOT_ARRAY(slot, size) \ | |
| 357 { XD_LISP_OBJECT_ARRAY, offsetof (Lisp_Coding_System, slot), size }, | |
| 358 #include "coding-system-slots.h" | |
| 2367 | 359 { XD_BLOCK_ARRAY, offsetof (Lisp_Coding_System, data), 1, |
| 2551 | 360 { coding_system_extra_description_map } }, |
| 1204 | 361 { XD_END } |
| 771 | 362 }; |
| 363 | |
| 1204 | 364 static const struct memory_description coding_system_empty_extra_description_1[] = |
| 365 { | |
| 366 { XD_END } | |
| 367 }; | |
| 368 | |
| 369 const struct sized_memory_description coding_system_empty_extra_description = { | |
| 370 0, coding_system_empty_extra_description_1 | |
| 371 }; | |
| 372 | |
| 3263 | 373 #ifdef NEW_GC |
| 374 DEFINE_LRECORD_SEQUENCE_IMPLEMENTATION ("coding-system", coding_system, | |
| 375 1, /*dumpable-flag*/ | |
| 376 mark_coding_system, | |
| 377 print_coding_system, | |
| 378 0, 0, 0, coding_system_description, | |
| 379 sizeof_coding_system, | |
| 380 Lisp_Coding_System); | |
| 381 #else /* not NEW_GC */ | |
| 934 | 382 DEFINE_LRECORD_SEQUENCE_IMPLEMENTATION ("coding-system", coding_system, |
| 383 1, /*dumpable-flag*/ | |
| 384 mark_coding_system, | |
| 385 print_coding_system, | |
| 386 finalize_coding_system, | |
| 387 0, 0, coding_system_description, | |
| 388 sizeof_coding_system, | |
| 389 Lisp_Coding_System); | |
| 3263 | 390 #endif /* not NEW_GC */ |
| 771 | 391 |
| 392 /************************************************************************/ | |
| 393 /* Creating coding systems */ | |
| 394 /************************************************************************/ | |
| 395 | |
| 396 static struct coding_system_methods * | |
| 397 decode_coding_system_type (Lisp_Object type, Error_Behavior errb) | |
| 428 | 398 { |
| 771 | 399 int i; |
| 400 | |
| 401 for (i = 0; i < Dynarr_length (the_coding_system_type_entry_dynarr); i++) | |
| 428 | 402 { |
| 771 | 403 if (EQ (type, |
| 404 Dynarr_at (the_coding_system_type_entry_dynarr, i).meths->type)) | |
| 405 return Dynarr_at (the_coding_system_type_entry_dynarr, i).meths; | |
| 428 | 406 } |
| 771 | 407 |
| 408 maybe_invalid_constant ("Invalid coding system type", type, | |
| 409 Qcoding_system, errb); | |
| 410 | |
| 411 return 0; | |
| 428 | 412 } |
| 413 | |
| 771 | 414 static int |
| 415 valid_coding_system_type_p (Lisp_Object type) | |
| 428 | 416 { |
| 771 | 417 return decode_coding_system_type (type, ERROR_ME_NOT) != 0; |
| 418 } | |
| 419 | |
| 420 DEFUN ("valid-coding-system-type-p", Fvalid_coding_system_type_p, 1, 1, 0, /* | |
| 421 Given a CODING-SYSTEM-TYPE, return non-nil if it is valid. | |
| 422 Valid types depend on how XEmacs was compiled but may include | |
| 3025 | 423 `undecided', `chain', `integer', `ccl', `iso2022', `big5', `shift-jis', |
| 424 `utf-16', `ucs-4', `utf-8', etc. | |
| 771 | 425 */ |
| 426 (coding_system_type)) | |
| 427 { | |
| 428 return valid_coding_system_type_p (coding_system_type) ? Qt : Qnil; | |
| 429 } | |
| 430 | |
| 431 DEFUN ("coding-system-type-list", Fcoding_system_type_list, 0, 0, 0, /* | |
| 432 Return a list of valid coding system types. | |
| 433 */ | |
| 434 ()) | |
| 435 { | |
| 436 return Fcopy_sequence (Vcoding_system_type_list); | |
| 437 } | |
| 438 | |
| 439 void | |
| 440 add_entry_to_coding_system_type_list (struct coding_system_methods *meths) | |
| 441 { | |
| 442 struct coding_system_type_entry entry; | |
| 443 | |
| 444 entry.meths = meths; | |
| 445 Dynarr_add (the_coding_system_type_entry_dynarr, entry); | |
| 446 Vcoding_system_type_list = Fcons (meths->type, Vcoding_system_type_list); | |
| 428 | 447 } |
| 448 | |
| 449 DEFUN ("coding-system-p", Fcoding_system_p, 1, 1, 0, /* | |
| 450 Return t if OBJECT is a coding system. | |
| 451 A coding system is an object that defines how text containing multiple | |
| 452 character sets is encoded into a stream of (typically 8-bit) bytes. | |
| 453 The coding system is used to decode the stream into a series of | |
| 454 characters (which may be from multiple charsets) when the text is read | |
| 455 from a file or process, and is used to encode the text back into the | |
| 456 same format when it is written out to a file or process. | |
| 457 | |
| 458 For example, many ISO2022-compliant coding systems (such as Compound | |
| 459 Text, which is used for inter-client data under the X Window System) | |
| 460 use escape sequences to switch between different charsets -- Japanese | |
| 461 Kanji, for example, is invoked with "ESC $ ( B"; ASCII is invoked | |
| 462 with "ESC ( B"; and Cyrillic is invoked with "ESC - L". See | |
| 463 `make-coding-system' for more information. | |
| 464 | |
| 465 Coding systems are normally identified using a symbol, and the | |
| 466 symbol is accepted in place of the actual coding system object whenever | |
| 467 a coding system is called for. (This is similar to how faces work.) | |
| 468 */ | |
| 469 (object)) | |
| 470 { | |
| 471 return CODING_SYSTEMP (object) ? Qt : Qnil; | |
| 472 } | |
| 473 | |
| 4303 | 474 static Lisp_Object |
| 475 find_coding_system (Lisp_Object coding_system_or_name, | |
| 476 int do_autoloads) | |
| 477 { | |
| 478 Lisp_Object lookup; | |
| 479 | |
| 480 if (NILP (coding_system_or_name)) | |
| 481 coding_system_or_name = Qbinary; | |
| 482 else if (CODING_SYSTEMP (coding_system_or_name)) | |
| 483 return coding_system_or_name; | |
| 484 else | |
| 485 CHECK_SYMBOL (coding_system_or_name); | |
| 486 | |
| 487 while (1) | |
| 488 { | |
| 489 lookup = | |
| 490 Fgethash (coding_system_or_name, Vcoding_system_hash_table, Qnil); | |
| 491 | |
| 492 if (CONSP (lookup) && do_autoloads) | |
| 493 { | |
| 494 struct gcpro gcpro1; | |
| 495 int length; | |
| 496 DECLARE_EISTRING (desired_base); | |
| 497 DECLARE_EISTRING (warning_info); | |
| 498 | |
| 499 eicpy_lstr (desired_base, XSYMBOL_NAME (coding_system_or_name)); | |
| 500 | |
| 501 /* Work out the name of the base coding system. */ | |
| 502 length = eilen (desired_base); | |
| 503 if (length > (int)(sizeof ("-unix") - 1)) | |
| 504 { | |
| 505 if (0 == qxestrcmp ((UAscbyte *)"-unix", (eidata (desired_base)) | |
| 506 + (length - (sizeof ("-unix") - 1)))) | |
| 507 { | |
| 508 eidel (desired_base, length - (sizeof ("-unix") - 1), | |
| 509 -1, 5, 5); | |
| 510 } | |
| 511 } | |
| 512 else if (length > (int)(sizeof ("-dos") - 1)) | |
| 513 { | |
| 514 if ((0 == qxestrcmp ((UAscbyte *)"-dos", (eidata (desired_base)) | |
| 515 + (length - (sizeof ("-dos") - 1)))) || | |
| 516 (0 == qxestrcmp ((UAscbyte *)"-mac", (eidata (desired_base)) | |
| 517 + (length - (sizeof ("-mac") - 1))))) | |
| 518 { | |
| 519 eidel (desired_base, length - (sizeof ("-dos") - 1), -1, | |
| 520 4, 4); | |
| 521 } | |
| 522 } | |
| 523 | |
| 524 coding_system_or_name = intern_int (eidata (desired_base)); | |
| 525 | |
| 526 /* Remove this coding system and its subsidiary coding | |
| 527 systems from the hash, to avoid calling this code recursively. */ | |
| 528 Fremhash (coding_system_or_name, Vcoding_system_hash_table); | |
| 529 Fremhash (add_suffix_to_symbol(coding_system_or_name, "-unix"), | |
| 530 Vcoding_system_hash_table); | |
| 531 Fremhash (add_suffix_to_symbol(coding_system_or_name, "-dos"), | |
| 532 Vcoding_system_hash_table); | |
| 533 Fremhash (add_suffix_to_symbol(coding_system_or_name, "-mac"), | |
| 534 Vcoding_system_hash_table); | |
| 535 | |
| 536 eicpy_ascii (warning_info, "Error autoloading coding system "); | |
| 537 eicat_lstr (warning_info, XSYMBOL_NAME (coding_system_or_name)); | |
| 538 | |
| 539 /* Keep around the form so it doesn't disappear from under | |
| 540 #'eval's feet. */ | |
| 541 GCPRO1 (lookup); | |
| 542 call1_trapping_problems ((const CIbyte *)eidata (warning_info), | |
| 543 Qeval, lookup, 0); | |
| 544 UNGCPRO; | |
| 545 | |
| 546 lookup = | |
| 547 Fgethash (coding_system_or_name, Vcoding_system_hash_table, Qnil); | |
| 548 } | |
| 549 | |
| 550 if (CODING_SYSTEMP (lookup) || NILP (lookup)) | |
| 551 return lookup; | |
| 552 | |
| 553 coding_system_or_name = lookup; | |
| 554 } | |
| 555 } | |
| 556 | |
| 428 | 557 DEFUN ("find-coding-system", Ffind_coding_system, 1, 1, 0, /* |
| 558 Retrieve the coding system of the given name. | |
| 559 | |
| 560 If CODING-SYSTEM-OR-NAME is a coding-system object, it is simply | |
| 561 returned. Otherwise, CODING-SYSTEM-OR-NAME should be a symbol. | |
| 562 If there is no such coding system, nil is returned. Otherwise the | |
| 563 associated coding system object is returned. | |
| 564 */ | |
| 565 (coding_system_or_name)) | |
| 566 { | |
| 4303 | 567 return find_coding_system(coding_system_or_name, 1); |
| 568 } | |
| 569 | |
| 570 DEFUN ("autoload-coding-system", Fautoload_coding_system, 2, 2, 0, /* | |
| 571 Define SYMBOL as a coding-system that is loaded on demand. | |
| 572 | |
| 573 FORM is a form to evaluate to define the coding-system. | |
| 574 */ | |
| 575 (symbol, form)) | |
| 576 { | |
| 577 Lisp_Object lookup; | |
| 578 | |
| 579 CHECK_SYMBOL (symbol); | |
| 580 CHECK_CONS (form); | |
| 581 | |
| 582 lookup = find_coding_system (symbol, 0); | |
| 583 | |
| 584 if (!NILP (lookup) && | |
| 585 /* Allow autoloads to be redefined. */ | |
| 586 !CONSP (lookup)) | |
| 440 | 587 { |
| 4303 | 588 invalid_operation ("Cannot redefine existing coding system", |
| 589 symbol); | |
| 440 | 590 } |
| 4303 | 591 |
| 592 Fputhash (symbol, form, Vcoding_system_hash_table); | |
| 593 Fputhash (add_suffix_to_symbol(symbol, "-unix"), form, | |
| 594 Vcoding_system_hash_table); | |
| 595 Fputhash (add_suffix_to_symbol(symbol, "-dos"), form, | |
| 596 Vcoding_system_hash_table); | |
| 597 Fputhash (add_suffix_to_symbol(symbol, "-mac"), form, | |
| 598 Vcoding_system_hash_table); | |
| 599 | |
| 600 /* Tell the POSIX locale infrastructure about this coding system (though | |
| 601 unfortunately it'll be too late for the startup locale sniffing. */ | |
| 602 if (!UNBOUNDP (Qposix_charset_to_coding_system_hash)) | |
| 603 { | |
| 604 Lisp_Object val = Fsymbol_value (Qposix_charset_to_coding_system_hash); | |
| 605 DECLARE_EISTRING (minimal_name); | |
| 606 Ibyte *full_name; | |
| 607 int len = XSTRING_LENGTH (XSYMBOL_NAME (symbol)), i; | |
| 608 | |
| 609 if (!NILP (val)) | |
| 610 { | |
| 611 full_name = XSTRING_DATA (XSYMBOL_NAME (symbol)); | |
| 612 for (i = 0; i < len; ++i) | |
| 613 { | |
| 614 if (full_name[i] >= '0' && full_name[i] <= '9') | |
| 615 { | |
| 616 eicat_ch (minimal_name, full_name[i]); | |
| 617 } | |
| 618 else if (full_name[i] >= 'a' && full_name[i] <= 'z') | |
| 619 { | |
| 620 eicat_ch (minimal_name, full_name[i]); | |
| 621 } | |
| 622 else if (full_name[i] >= 'A' && full_name[i] <= 'Z') | |
| 623 { | |
| 624 eicat_ch (minimal_name, full_name[i] + | |
| 625 ('a' - 'A')); | |
| 626 } | |
| 627 } | |
| 628 | |
| 629 if (eilen (minimal_name)) | |
| 630 { | |
| 631 CHECK_HASH_TABLE (val); | |
| 632 Fputhash (eimake_string(minimal_name), symbol, val); | |
| 633 } | |
| 634 } | |
| 635 } | |
| 636 | |
| 637 return Qt; | |
| 428 | 638 } |
| 639 | |
| 640 DEFUN ("get-coding-system", Fget_coding_system, 1, 1, 0, /* | |
| 641 Retrieve the coding system of the given name. | |
| 642 Same as `find-coding-system' except that if there is no such | |
| 643 coding system, an error is signaled instead of returning nil. | |
| 644 */ | |
| 645 (name)) | |
| 646 { | |
| 647 Lisp_Object coding_system = Ffind_coding_system (name); | |
| 648 | |
| 649 if (NILP (coding_system)) | |
| 563 | 650 invalid_argument ("No such coding system", name); |
| 428 | 651 return coding_system; |
| 652 } | |
| 653 | |
| 771 | 654 int |
| 655 coding_system_is_binary (Lisp_Object coding_system) | |
| 656 { | |
| 657 Lisp_Coding_System *cs = XCODING_SYSTEM (coding_system); | |
| 658 return | |
| 659 (EQ (CODING_SYSTEM_TYPE (cs), Qno_conversion) && | |
| 660 CODING_SYSTEM_EOL_TYPE (cs) == EOL_LF && | |
| 661 EQ (CODING_SYSTEM_POST_READ_CONVERSION (cs), Qnil) && | |
| 662 EQ (CODING_SYSTEM_PRE_WRITE_CONVERSION (cs), Qnil)); | |
| 663 } | |
| 664 | |
| 665 static Lisp_Object | |
| 666 coding_system_real_canonical (Lisp_Object cs) | |
| 667 { | |
| 668 if (!NILP (XCODING_SYSTEM_CANONICAL (cs))) | |
| 669 return XCODING_SYSTEM_CANONICAL (cs); | |
| 670 return cs; | |
| 671 } | |
| 672 | |
| 673 /* Return true if coding system is of the "standard" type that decodes | |
| 674 bytes into characters (suitable for decoding a text file). */ | |
| 675 int | |
| 676 coding_system_is_for_text_file (Lisp_Object coding_system) | |
| 677 { | |
| 678 return (XCODESYSMETH_OR_GIVEN | |
| 679 (coding_system, conversion_end_type, | |
| 680 (coding_system_real_canonical (coding_system)), | |
| 681 DECODES_BYTE_TO_CHARACTER) == | |
| 682 DECODES_BYTE_TO_CHARACTER); | |
| 683 } | |
| 684 | |
| 685 static int | |
| 686 decoding_source_sink_type_is_char (Lisp_Object cs, enum source_or_sink sex) | |
| 687 { | |
| 688 enum source_sink_type type = | |
| 689 XCODESYSMETH_OR_GIVEN (cs, conversion_end_type, | |
| 690 (coding_system_real_canonical (cs)), | |
| 691 DECODES_BYTE_TO_CHARACTER); | |
| 692 if (sex == CODING_SOURCE) | |
| 693 return (type == DECODES_CHARACTER_TO_CHARACTER || | |
| 694 type == DECODES_CHARACTER_TO_BYTE); | |
| 695 else | |
| 696 return (type == DECODES_CHARACTER_TO_CHARACTER || | |
| 697 type == DECODES_BYTE_TO_CHARACTER); | |
| 698 } | |
| 699 | |
| 700 static int | |
| 701 encoding_source_sink_type_is_char (Lisp_Object cs, enum source_or_sink sex) | |
| 702 { | |
| 703 return decoding_source_sink_type_is_char (cs, | |
| 704 /* Sex change */ | |
| 705 sex == CODING_SOURCE ? | |
| 706 CODING_SINK : CODING_SOURCE); | |
| 707 } | |
| 708 | |
| 709 /* Like Ffind_coding_system() but check that the coding system is of the | |
| 710 "standard" type that decodes bytes into characters (suitable for | |
| 711 decoding a text file), and if not, returns an appropriate wrapper that | |
| 712 does. Also, if EOL_WRAP is non-zero, check whether this coding system | |
| 713 wants EOL auto-detection, and if so, wrap with a convert-eol coding | |
| 714 system to do this. */ | |
| 715 | |
| 716 Lisp_Object | |
| 717 find_coding_system_for_text_file (Lisp_Object name, int eol_wrap) | |
| 718 { | |
| 719 Lisp_Object coding_system = Ffind_coding_system (name); | |
| 720 Lisp_Object wrapper = coding_system; | |
| 721 | |
| 722 if (NILP (coding_system)) | |
| 723 return Qnil; | |
| 724 if (!coding_system_is_for_text_file (coding_system)) | |
| 725 { | |
| 726 wrapper = XCODING_SYSTEM_TEXT_FILE_WRAPPER (coding_system); | |
| 727 if (NILP (wrapper)) | |
| 728 { | |
| 729 Lisp_Object chain; | |
| 730 if (!decoding_source_sink_type_is_char (coding_system, CODING_SINK)) | |
| 731 chain = list2 (coding_system, Qbinary); | |
| 732 else | |
| 733 chain = list1 (coding_system); | |
| 734 if (decoding_source_sink_type_is_char (coding_system, CODING_SOURCE)) | |
| 735 chain = Fcons (Qbinary, chain); | |
| 736 wrapper = | |
| 737 make_internal_coding_system | |
| 738 (coding_system, | |
| 739 "internal-text-file-wrapper", | |
| 740 Qchain, | |
| 741 Qunbound, list4 (Qchain, chain, | |
| 742 Qcanonicalize_after_coding, coding_system)); | |
| 743 XCODING_SYSTEM_TEXT_FILE_WRAPPER (coding_system) = wrapper; | |
| 744 } | |
| 745 } | |
| 746 | |
| 747 if (!eol_wrap || XCODING_SYSTEM_EOL_TYPE (coding_system) != EOL_AUTODETECT) | |
| 748 return wrapper; | |
| 749 | |
| 750 coding_system = wrapper; | |
| 751 wrapper = XCODING_SYSTEM_AUTO_EOL_WRAPPER (coding_system); | |
| 752 if (!NILP (wrapper)) | |
| 753 return wrapper; | |
| 754 wrapper = | |
| 755 make_internal_coding_system | |
| 756 (coding_system, | |
| 757 "internal-auto-eol-wrapper", | |
| 758 Qundecided, Qunbound, | |
| 759 list4 (Qcoding_system, coding_system, | |
| 760 Qdo_eol, Qt)); | |
| 761 XCODING_SYSTEM_AUTO_EOL_WRAPPER (coding_system) = wrapper; | |
| 762 return wrapper; | |
| 763 } | |
| 764 | |
| 765 /* Like Fget_coding_system() but verify that the coding system is of the | |
| 766 "standard" type that decodes bytes into characters (suitable for | |
| 767 decoding a text file), and if not, returns an appropriate wrapper that | |
| 768 does. Also, if EOL_WRAP is non-zero, check whether this coding system | |
| 769 wants EOL auto-detection, and if so, wrap with a convert-eol coding | |
| 770 system to do this. */ | |
| 771 | |
| 772 Lisp_Object | |
| 773 get_coding_system_for_text_file (Lisp_Object name, int eol_wrap) | |
| 774 { | |
| 775 Lisp_Object coding_system = find_coding_system_for_text_file (name, | |
| 776 eol_wrap); | |
| 777 if (NILP (coding_system)) | |
| 778 invalid_argument ("No such coding system", name); | |
| 779 return coding_system; | |
| 780 } | |
| 781 | |
| 782 /* We store the coding systems in hash tables with the names as the | |
| 783 key and the actual coding system object as the value. Occasionally | |
| 784 we need to use them in a list format. These routines provide us | |
| 785 with that. */ | |
| 428 | 786 struct coding_system_list_closure |
| 787 { | |
| 788 Lisp_Object *coding_system_list; | |
| 771 | 789 int normal; |
| 790 int internal; | |
| 428 | 791 }; |
| 792 | |
| 793 static int | |
| 4303 | 794 add_coding_system_to_list_mapper (Lisp_Object key, Lisp_Object value, |
| 428 | 795 void *coding_system_list_closure) |
| 796 { | |
| 797 /* This function can GC */ | |
| 798 struct coding_system_list_closure *cscl = | |
| 799 (struct coding_system_list_closure *) coding_system_list_closure; | |
| 800 Lisp_Object *coding_system_list = cscl->coding_system_list; | |
| 801 | |
| 771 | 802 /* We can't just use VALUE because KEY might be an alias, and we need |
| 4303 | 803 the real coding system object. |
| 804 | |
| 805 Autoloaded coding systems have conses for their values, and can't be | |
| 806 internal coding systems, or coding system aliases. */ | |
| 807 if (CONSP (value) || | |
| 808 (XCODING_SYSTEM (Ffind_coding_system (key))->internal_p ? | |
| 809 cscl->internal : cscl->normal)) | |
| 771 | 810 *coding_system_list = Fcons (key, *coding_system_list); |
| 428 | 811 return 0; |
| 812 } | |
| 813 | |
| 2297 | 814 /* #### should we specify a conventional for "all coding systems"? */ |
| 771 | 815 DEFUN ("coding-system-list", Fcoding_system_list, 0, 1, 0, /* |
| 428 | 816 Return a list of the names of all defined coding systems. |
| 771 | 817 If INTERNAL is nil, only the normal (non-internal) coding systems are |
| 818 included. (Internal coding systems are created for various internal | |
| 819 purposes, such as implementing EOL types of CRLF and CR; generally, you do | |
| 820 not want to see these.) If it is t, only the internal coding systems are | |
| 821 included. If it is any other non-nil value both normal and internal are | |
| 822 included. | |
| 428 | 823 */ |
| 771 | 824 (internal)) |
| 428 | 825 { |
| 826 Lisp_Object coding_system_list = Qnil; | |
| 827 struct gcpro gcpro1; | |
| 828 struct coding_system_list_closure coding_system_list_closure; | |
| 829 | |
| 830 GCPRO1 (coding_system_list); | |
| 831 coding_system_list_closure.coding_system_list = &coding_system_list; | |
| 771 | 832 coding_system_list_closure.normal = !EQ (internal, Qt); |
| 833 coding_system_list_closure.internal = !NILP (internal); | |
| 428 | 834 elisp_maphash (add_coding_system_to_list_mapper, Vcoding_system_hash_table, |
| 835 &coding_system_list_closure); | |
| 836 UNGCPRO; | |
| 837 | |
| 838 return coding_system_list; | |
| 839 } | |
| 840 | |
| 841 DEFUN ("coding-system-name", Fcoding_system_name, 1, 1, 0, /* | |
| 842 Return the name of the given coding system. | |
| 843 */ | |
| 844 (coding_system)) | |
| 845 { | |
| 846 coding_system = Fget_coding_system (coding_system); | |
| 847 return XCODING_SYSTEM_NAME (coding_system); | |
| 848 } | |
| 849 | |
| 850 static Lisp_Coding_System * | |
| 771 | 851 allocate_coding_system (struct coding_system_methods *codesys_meths, |
| 852 Bytecount data_size, | |
| 853 Lisp_Object name) | |
| 428 | 854 { |
| 771 | 855 Bytecount total_size = offsetof (Lisp_Coding_System, data) + data_size; |
| 428 | 856 Lisp_Coding_System *codesys = |
| 3024 | 857 (Lisp_Coding_System *) BASIC_ALLOC_LCRECORD (total_size, |
| 1204 | 858 &lrecord_coding_system); |
| 859 | |
| 771 | 860 codesys->methods = codesys_meths; |
| 1204 | 861 #define MARKED_SLOT(x) codesys->x = Qnil; |
| 862 #include "coding-system-slots.h" | |
| 863 | |
| 771 | 864 CODING_SYSTEM_EOL_TYPE (codesys) = EOL_LF; |
| 865 CODING_SYSTEM_NAME (codesys) = name; | |
| 866 | |
| 867 MAYBE_CODESYSMETH (codesys, init, (wrap_coding_system (codesys))); | |
| 428 | 868 |
| 869 return codesys; | |
| 870 } | |
| 871 | |
| 771 | 872 static enum eol_type |
| 873 symbol_to_eol_type (Lisp_Object symbol) | |
| 874 { | |
| 875 CHECK_SYMBOL (symbol); | |
| 876 if (NILP (symbol)) return EOL_AUTODETECT; | |
| 877 if (EQ (symbol, Qlf)) return EOL_LF; | |
| 878 if (EQ (symbol, Qcrlf)) return EOL_CRLF; | |
| 879 if (EQ (symbol, Qcr)) return EOL_CR; | |
| 880 | |
| 881 invalid_constant ("Unrecognized eol type", symbol); | |
| 1204 | 882 RETURN_NOT_REACHED (EOL_AUTODETECT); |
| 771 | 883 } |
| 884 | |
| 885 static Lisp_Object | |
| 886 eol_type_to_symbol (enum eol_type type) | |
| 887 { | |
| 888 switch (type) | |
| 889 { | |
| 2500 | 890 default: ABORT (); |
| 771 | 891 case EOL_LF: return Qlf; |
| 892 case EOL_CRLF: return Qcrlf; | |
| 893 case EOL_CR: return Qcr; | |
| 894 case EOL_AUTODETECT: return Qnil; | |
| 895 } | |
| 896 } | |
| 897 | |
| 898 struct subsidiary_type | |
| 899 { | |
|
4528
726060ee587c
First draft of g++ 4.3 warning removal patch. Builds. *Needs ChangeLogs.*
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4522
diff
changeset
|
900 const Ascbyte *extension; |
|
726060ee587c
First draft of g++ 4.3 warning removal patch. Builds. *Needs ChangeLogs.*
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4522
diff
changeset
|
901 const Ascbyte *mnemonic_ext; |
| 771 | 902 enum eol_type eol; |
| 903 }; | |
| 904 | |
| 905 static struct subsidiary_type coding_subsidiary_list[] = | |
| 906 { { "-unix", "", EOL_LF }, | |
| 907 { "-dos", ":T", EOL_CRLF }, | |
| 908 { "-mac", ":t", EOL_CR } }; | |
| 909 | |
| 910 /* kludge */ | |
| 428 | 911 static void |
| 771 | 912 setup_eol_coding_systems (Lisp_Object codesys) |
| 428 | 913 { |
| 793 | 914 int len = XSTRING_LENGTH (XSYMBOL (XCODING_SYSTEM_NAME (codesys))->name); |
| 2367 | 915 Ibyte *codesys_name = alloca_ibytes (len + 7); |
| 771 | 916 int mlen = -1; |
| 867 | 917 Ibyte *codesys_mnemonic = 0; |
| 771 | 918 Lisp_Object codesys_name_sym, sub_codesys; |
| 919 int i; | |
| 920 | |
| 921 memcpy (codesys_name, | |
| 793 | 922 XSTRING_DATA (XSYMBOL (XCODING_SYSTEM_NAME (codesys))->name), len); |
| 771 | 923 |
| 924 if (STRINGP (XCODING_SYSTEM_MNEMONIC (codesys))) | |
| 428 | 925 { |
| 771 | 926 mlen = XSTRING_LENGTH (XCODING_SYSTEM_MNEMONIC (codesys)); |
| 2367 | 927 codesys_mnemonic = alloca_ibytes (mlen + 7); |
| 771 | 928 memcpy (codesys_mnemonic, |
| 929 XSTRING_DATA (XCODING_SYSTEM_MNEMONIC (codesys)), mlen); | |
| 930 } | |
| 931 | |
| 932 /* Create three "subsidiary" coding systems, decoding data encoded using | |
| 933 each of the three EOL types. We do this for each subsidiary by | |
| 934 copying the original coding system, setting the EOL type | |
| 935 appropriately, and setting the CANONICAL member of the new coding | |
| 936 system to be a chain consisting of the original coding system followed | |
| 937 by a convert-eol coding system to do the EOL decoding. For EOL type | |
| 938 LF, however, we don't need any decoding, so we skip creating a | |
| 939 CANONICAL. | |
| 940 | |
| 941 If the original coding system is not a text-type coding system | |
| 942 (decodes byte->char), we need to coerce it to one by the appropriate | |
| 943 wrapping in CANONICAL. */ | |
| 944 | |
| 945 for (i = 0; i < countof (coding_subsidiary_list); i++) | |
| 946 { | |
|
4528
726060ee587c
First draft of g++ 4.3 warning removal patch. Builds. *Needs ChangeLogs.*
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4522
diff
changeset
|
947 const Ascbyte *extension = coding_subsidiary_list[i].extension; |
|
726060ee587c
First draft of g++ 4.3 warning removal patch. Builds. *Needs ChangeLogs.*
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4522
diff
changeset
|
948 const Ascbyte *mnemonic_ext = coding_subsidiary_list[i].mnemonic_ext; |
| 771 | 949 enum eol_type eol = coding_subsidiary_list[i].eol; |
| 950 | |
| 2367 | 951 qxestrcpy_ascii (codesys_name + len, extension); |
| 771 | 952 codesys_name_sym = intern_int (codesys_name); |
| 953 if (mlen != -1) | |
| 2367 | 954 qxestrcpy_ascii (codesys_mnemonic + mlen, mnemonic_ext); |
| 771 | 955 |
| 956 sub_codesys = Fcopy_coding_system (codesys, codesys_name_sym); | |
| 957 if (mlen != -1) | |
| 958 XCODING_SYSTEM_MNEMONIC (sub_codesys) = | |
| 959 build_intstring (codesys_mnemonic); | |
| 960 | |
| 961 if (eol != EOL_LF) | |
| 962 { | |
| 963 Lisp_Object chain = list2 (get_coding_system_for_text_file | |
| 964 (codesys, 0), | |
| 965 eol == EOL_CR ? Qconvert_eol_cr : | |
| 966 Qconvert_eol_crlf); | |
| 967 Lisp_Object canon = | |
| 968 make_internal_coding_system | |
| 969 (sub_codesys, "internal-subsidiary-eol-wrapper", | |
| 970 Qchain, Qunbound, | |
| 971 mlen != -1 ? | |
| 972 list6 (Qmnemonic, build_intstring (codesys_mnemonic), | |
| 973 Qchain, chain, | |
| 974 Qcanonicalize_after_coding, sub_codesys) : | |
| 975 list4 (Qchain, chain, | |
| 976 Qcanonicalize_after_coding, sub_codesys)); | |
| 977 XCODING_SYSTEM_CANONICAL (sub_codesys) = canon; | |
| 978 } | |
| 979 XCODING_SYSTEM_EOL_TYPE (sub_codesys) = eol; | |
| 980 XCODING_SYSTEM_SUBSIDIARY_PARENT (sub_codesys) = codesys; | |
| 981 XCODING_SYSTEM (codesys)->eol[eol] = sub_codesys; | |
| 428 | 982 } |
| 983 } | |
| 984 | |
| 771 | 985 /* Basic function to create new coding systems. For `make-coding-system', |
| 986 NAME-OR-EXISTING is the NAME argument, PREFIX is null, and TYPE, | |
| 987 DESCRIPTION, and PROPS are the same. All created coding systems are put | |
| 988 in a hash table indexed by NAME. | |
| 989 | |
| 990 If PREFIX is a string, NAME-OR-EXISTING should specify an existing | |
| 991 coding system (or nil), and an internal coding system will be created. | |
| 992 The name of the coding system will be constructed by combining PREFIX | |
| 993 with the name of the existing coding system (if given), and a number | |
| 994 will be appended to insure uniqueness. In such a case, if Qunbound is | |
| 995 given for DESCRIPTION, the description gets created based on the | |
| 996 generated name. Also, if no mnemonic is given in the properties list, a | |
| 997 mnemonic is created based on the generated name. | |
| 998 | |
| 999 For internal coding systems, the coding system is marked as internal | |
| 1000 (see `coding-system-list'), and no subsidiaries will be created or | |
| 1001 eol-wrapping will happen. Otherwise: | |
| 1002 | |
| 1003 -- if the eol-type property is `lf' or t, the coding system is merely | |
| 1004 created and returned. (For t, the coding system will be wrapped with | |
| 1005 an EOL autodetector when it's used to read a file.) | |
| 1006 | |
| 1007 -- if eol-type is `crlf' or `cr', after the coding system object is | |
| 1008 created, it will be wrapped in a chain with the appropriate | |
| 1009 convert-eol coding system (either `convert-eol-crlf' or | |
| 1010 `convert-eol-cr'), so that CRLF->LF or CR->LF conversion is done at | |
| 1011 decoding time, and the opposite at encoding time. The resulting | |
| 1012 chain becomes the CANONICAL field of the coding system object. | |
| 1013 | |
| 1014 -- if eol-type is nil or omitted, "subsidiaries" are generated: Three | |
| 1015 coding systems where the original coding system (before wrapping with | |
| 1016 convert-eol-autodetect) is either unwrapped or wrapped with | |
| 1017 convert-eol-crlf or convert-eol-cr, respectively, so that coding systems | |
| 1018 to handle LF, CRLF, and CR end-of-line indicators are created. (This | |
| 1019 crazy crap is based on existing behavior in other Mule versions, | |
| 1020 including FSF Emacs.) | |
| 1021 */ | |
| 428 | 1022 |
| 1023 static Lisp_Object | |
|
4528
726060ee587c
First draft of g++ 4.3 warning removal patch. Builds. *Needs ChangeLogs.*
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4522
diff
changeset
|
1024 make_coding_system_1 (Lisp_Object name_or_existing, const Ascbyte *prefix, |
| 771 | 1025 Lisp_Object type, Lisp_Object description, |
| 1026 Lisp_Object props) | |
| 428 | 1027 { |
| 771 | 1028 Lisp_Coding_System *cs; |
| 1029 int need_to_setup_eol_systems = 1; | |
| 1030 enum eol_type eol_wrapper = EOL_AUTODETECT; | |
| 1031 struct coding_system_methods *meths; | |
| 1032 Lisp_Object csobj; | |
| 1033 Lisp_Object defmnem = Qnil; | |
| 1034 | |
| 1035 if (NILP (type)) | |
| 1036 type = Qundecided; | |
| 1037 meths = decode_coding_system_type (type, ERROR_ME); | |
| 1038 | |
| 1039 if (prefix) | |
| 428 | 1040 { |
| 867 | 1041 Ibyte *newname = |
| 771 | 1042 emacs_sprintf_malloc (NULL, "%s-%s-%d", |
| 1043 prefix, | |
| 867 | 1044 NILP (name_or_existing) ? (Ibyte *) "nil" : |
| 771 | 1045 XSTRING_DATA (Fsymbol_name (XCODING_SYSTEM_NAME |
| 1046 (name_or_existing))), | |
| 1047 ++coding_system_tick); | |
| 1048 name_or_existing = intern_int (newname); | |
| 1726 | 1049 xfree (newname, Ibyte *); |
| 771 | 1050 |
| 1051 if (UNBOUNDP (description)) | |
| 1052 { | |
| 1053 newname = | |
| 1054 emacs_sprintf_malloc | |
| 1055 (NULL, "For Internal Use (%s)", | |
| 1056 XSTRING_DATA (Fsymbol_name (name_or_existing))); | |
| 1057 description = build_intstring (newname); | |
| 1726 | 1058 xfree (newname, Ibyte *); |
| 771 | 1059 } |
| 1060 | |
| 1061 newname = emacs_sprintf_malloc (NULL, "Int%d", coding_system_tick); | |
| 1062 defmnem = build_intstring (newname); | |
| 1726 | 1063 xfree (newname, Ibyte *); |
| 428 | 1064 } |
| 771 | 1065 else |
| 1066 CHECK_SYMBOL (name_or_existing); | |
| 1067 | |
| 4303 | 1068 /* See is there an entry for name_or_existing in the defined coding system |
| 1069 hash table. */ | |
| 1070 csobj = find_coding_system (name_or_existing, 0); | |
| 1071 /* Error if it's there and not an autoload form. */ | |
| 1072 if (!NILP (csobj) && !CONSP (csobj)) | |
| 771 | 1073 invalid_operation ("Cannot redefine existing coding system", |
| 4303 | 1074 name_or_existing); |
| 771 | 1075 |
| 1076 cs = allocate_coding_system (meths, meths->extra_data_size, | |
| 1077 name_or_existing); | |
| 793 | 1078 csobj = wrap_coding_system (cs); |
| 771 | 1079 |
| 1080 cs->internal_p = !!prefix; | |
| 1081 | |
| 1082 if (NILP (description)) | |
| 1083 description = build_string (""); | |
| 1084 else | |
| 1085 CHECK_STRING (description); | |
| 1086 CODING_SYSTEM_DESCRIPTION (cs) = description; | |
| 1087 | |
| 1088 if (!NILP (defmnem)) | |
| 1089 CODING_SYSTEM_MNEMONIC (cs) = defmnem; | |
| 1090 | |
| 1091 { | |
| 1092 EXTERNAL_PROPERTY_LIST_LOOP_3 (key, value, props) | |
| 1093 { | |
| 1094 int recognized = 1; | |
| 1095 | |
| 1096 if (EQ (key, Qmnemonic)) | |
| 1097 { | |
| 1098 if (!NILP (value)) | |
| 1099 CHECK_STRING (value); | |
| 1100 CODING_SYSTEM_MNEMONIC (cs) = value; | |
| 1101 } | |
| 1102 | |
| 1103 else if (EQ (key, Qdocumentation)) | |
| 1104 { | |
| 1105 if (!NILP (value)) | |
| 1106 CHECK_STRING (value); | |
| 1107 CODING_SYSTEM_DOCUMENTATION (cs) = value; | |
| 1108 } | |
| 1109 | |
| 1110 else if (EQ (key, Qeol_type)) | |
| 1111 { | |
| 1112 need_to_setup_eol_systems = NILP (value); | |
| 1113 if (EQ (value, Qt)) | |
| 1114 value = Qnil; | |
| 1115 eol_wrapper = symbol_to_eol_type (value); | |
| 1116 } | |
| 1117 | |
| 1118 else if (EQ (key, Qpost_read_conversion)) | |
| 1119 CODING_SYSTEM_POST_READ_CONVERSION (cs) = value; | |
| 1120 else if (EQ (key, Qpre_write_conversion)) | |
| 1121 CODING_SYSTEM_PRE_WRITE_CONVERSION (cs) = value; | |
| 1122 /* FSF compatibility */ | |
| 1123 else if (EQ (key, Qtranslation_table_for_decode)) | |
| 1124 ; | |
| 1125 else if (EQ (key, Qtranslation_table_for_encode)) | |
| 1126 ; | |
| 1127 else if (EQ (key, Qsafe_chars)) | |
|
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1128 CODING_SYSTEM_SAFE_CHARS (cs) = value; |
| 771 | 1129 else if (EQ (key, Qsafe_charsets)) |
|
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1130 CODING_SYSTEM_SAFE_CHARSETS (cs) = value; |
| 771 | 1131 else if (EQ (key, Qmime_charset)) |
| 1132 ; | |
| 1133 else if (EQ (key, Qvalid_codes)) | |
| 1134 ; | |
| 1135 else | |
| 1136 recognized = CODESYSMETH_OR_GIVEN (cs, putprop, | |
| 1137 (csobj, key, value), 0); | |
| 1138 | |
| 1139 if (!recognized) | |
| 1140 invalid_constant ("Unrecognized property", key); | |
| 1141 } | |
| 1142 } | |
| 1143 | |
| 1144 { | |
| 1145 XCODING_SYSTEM_CANONICAL (csobj) = | |
| 1146 CODESYSMETH_OR_GIVEN (cs, canonicalize, (csobj), Qnil); | |
| 1147 XCODING_SYSTEM_EOL_TYPE (csobj) = EOL_AUTODETECT; /* for copy-coding-system | |
| 1148 below */ | |
| 1149 | |
| 4303 | 1150 Fputhash (name_or_existing, csobj, Vcoding_system_hash_table); |
| 1151 | |
| 771 | 1152 if (need_to_setup_eol_systems && !cs->internal_p) |
| 1153 setup_eol_coding_systems (csobj); | |
| 1154 else if (eol_wrapper == EOL_CR || eol_wrapper == EOL_CRLF) | |
| 1155 { | |
| 1156 /* If a specific eol-type (other than LF) was specified, we handle | |
| 1157 this by converting the coding system into a chain that wraps the | |
| 1158 coding system along with a convert-eol system after it, in | |
| 1159 exactly that same switcheroo fashion that the normal | |
| 1160 canonicalize method works -- BUT we will run into a problem if | |
| 1161 we do it the obvious way, because when `chain' creates its | |
| 1162 substreams, the substream containing the coding system we're | |
| 1163 creating will have canonicalization expansion done on it, | |
| 1164 leading to infinite recursion. So we have to generate a new, | |
| 1165 internal coding system with the previous value of CANONICAL. */ | |
| 867 | 1166 Ibyte *newname = |
| 771 | 1167 emacs_sprintf_malloc |
| 1168 (NULL, "internal-eol-copy-%s-%d", | |
| 1169 XSTRING_DATA (Fsymbol_name (name_or_existing)), | |
| 1170 ++coding_system_tick); | |
| 1171 Lisp_Object newnamesym = intern_int (newname); | |
| 1172 Lisp_Object copied = Fcopy_coding_system (csobj, newnamesym); | |
| 1726 | 1173 xfree (newname, Ibyte *); |
| 771 | 1174 |
| 1175 XCODING_SYSTEM_CANONICAL (csobj) = | |
| 1176 make_internal_coding_system | |
| 1177 (csobj, | |
| 1178 "internal-eol-wrapper", | |
| 1179 Qchain, Qunbound, | |
| 1180 list4 (Qchain, | |
| 1181 list2 (copied, | |
| 1182 eol_wrapper == EOL_CR ? | |
| 1183 Qconvert_eol_cr : | |
| 1184 Qconvert_eol_crlf), | |
| 1185 Qcanonicalize_after_coding, | |
| 1186 csobj)); | |
| 1187 } | |
| 1188 XCODING_SYSTEM_EOL_TYPE (csobj) = eol_wrapper; | |
| 1189 } | |
| 1190 | |
| 1191 return csobj; | |
| 428 | 1192 } |
| 1193 | |
| 771 | 1194 Lisp_Object |
|
4528
726060ee587c
First draft of g++ 4.3 warning removal patch. Builds. *Needs ChangeLogs.*
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4522
diff
changeset
|
1195 make_internal_coding_system (Lisp_Object existing, const Ascbyte *prefix, |
| 771 | 1196 Lisp_Object type, Lisp_Object description, |
| 1197 Lisp_Object props) | |
| 1198 { | |
| 1199 return make_coding_system_1 (existing, prefix, type, description, props); | |
| 1200 } | |
| 428 | 1201 |
| 1202 DEFUN ("make-coding-system", Fmake_coding_system, 2, 4, 0, /* | |
| 1203 Register symbol NAME as a coding system. | |
| 1204 | |
| 1205 TYPE describes the conversion method used and should be one of | |
| 1206 | |
| 3025 | 1207 nil or `undecided' |
| 428 | 1208 Automatic conversion. XEmacs attempts to detect the coding system |
| 1209 used in the file. | |
| 3025 | 1210 `chain' |
| 771 | 1211 Chain two or more coding systems together to make a combination coding |
| 1212 system. | |
| 3025 | 1213 `no-conversion' |
| 428 | 1214 No conversion. Use this for binary files and such. On output, |
| 1215 graphic characters that are not in ASCII or Latin-1 will be | |
| 1216 replaced by a ?. (For a no-conversion-encoded buffer, these | |
| 1217 characters will only be present if you explicitly insert them.) | |
| 3025 | 1218 `convert-eol' |
| 771 | 1219 Convert CRLF sequences or CR to LF. |
| 3025 | 1220 `shift-jis' |
| 428 | 1221 Shift-JIS (a Japanese encoding commonly used in PC operating systems). |
| 3025 | 1222 `unicode' |
| 771 | 1223 Any Unicode encoding (UCS-4, UTF-8, UTF-16, etc.). |
| 3025 | 1224 `mswindows-unicode-to-multibyte' |
| 771 | 1225 (MS Windows only) Converts from Windows Unicode to Windows Multibyte |
| 1226 (any code page encoding) upon encoding, and the other way upon decoding. | |
| 3025 | 1227 `mswindows-multibyte' |
| 771 | 1228 Converts to or from Windows Multibyte (any code page encoding). |
| 1229 This is resolved into a chain of `mswindows-unicode' and | |
| 1230 `mswindows-unicode-to-multibyte'. | |
| 3025 | 1231 `iso2022' |
| 428 | 1232 Any ISO2022-compliant encoding. Among other things, this includes |
| 1233 JIS (the Japanese encoding commonly used for e-mail), EUC (the | |
| 1234 standard Unix encoding for Japanese and other languages), and | |
| 1235 Compound Text (the encoding used in X11). You can specify more | |
| 442 | 1236 specific information about the conversion with the PROPS argument. |
| 3025 | 1237 `big5' |
| 2819 | 1238 Big5 (the encoding commonly used for Mandarin Chinese in Taiwan). |
| 3025 | 1239 `ccl' |
| 428 | 1240 The conversion is performed using a user-written pseudo-code |
| 1241 program. CCL (Code Conversion Language) is the name of this | |
| 1242 pseudo-code. | |
| 3025 | 1243 `gzip' |
| 771 | 1244 GZIP compression format. |
| 3025 | 1245 `internal' |
| 428 | 1246 Write out or read in the raw contents of the memory representing |
| 1247 the buffer's text. This is primarily useful for debugging | |
| 1248 purposes, and is only enabled when XEmacs has been compiled with | |
| 1249 DEBUG_XEMACS defined (via the --debug configure option). | |
| 3025 | 1250 WARNING: Reading in a file using `internal' conversion can result |
| 428 | 1251 in an internal inconsistency in the memory representing a |
| 1252 buffer's text, which will produce unpredictable results and may | |
| 1253 cause XEmacs to crash. Under normal circumstances you should | |
| 3025 | 1254 never use `internal' conversion. |
| 428 | 1255 |
| 771 | 1256 DESCRIPTION is a short English phrase describing the coding system, |
| 1257 suitable for use as a menu item. (See also the `documentation' property | |
| 1258 below.) | |
| 428 | 1259 |
| 1260 PROPS is a property list, describing the specific nature of the | |
| 1261 character set. Recognized properties are: | |
| 1262 | |
| 3025 | 1263 `mnemonic' |
| 428 | 1264 String to be displayed in the modeline when this coding system is |
| 1265 active. | |
| 1266 | |
| 3025 | 1267 `documentation' |
| 771 | 1268 Detailed documentation on the coding system. |
| 1269 | |
| 3025 | 1270 `eol-type' |
| 428 | 1271 End-of-line conversion to be used. It should be one of |
| 1272 | |
| 1273 nil | |
| 1274 Automatically detect the end-of-line type (LF, CRLF, | |
| 1275 or CR). Also generate subsidiary coding systems named | |
| 1276 `NAME-unix', `NAME-dos', and `NAME-mac', that are | |
| 1277 identical to this coding system but have an EOL-TYPE | |
| 3025 | 1278 value of `lf', `crlf', and `cr', respectively. |
| 1279 `lf' | |
| 428 | 1280 The end of a line is marked externally using ASCII LF. |
| 1281 Since this is also the way that XEmacs represents an | |
| 1282 end-of-line internally, specifying this option results | |
| 1283 in no end-of-line conversion. This is the standard | |
| 1284 format for Unix text files. | |
| 3025 | 1285 `crlf' |
| 428 | 1286 The end of a line is marked externally using ASCII |
| 1287 CRLF. This is the standard format for MS-DOS text | |
| 1288 files. | |
| 3025 | 1289 `cr' |
| 428 | 1290 The end of a line is marked externally using ASCII CR. |
| 1291 This is the standard format for Macintosh text files. | |
| 1292 t | |
| 1293 Automatically detect the end-of-line type but do not | |
| 1294 generate subsidiary coding systems. (This value is | |
| 1295 converted to nil when stored internally, and | |
| 1296 `coding-system-property' will return nil.) | |
| 1297 | |
| 3025 | 1298 `post-read-conversion' |
| 771 | 1299 The value is a function to call after some text is inserted and |
| 1300 decoded by the coding system itself and before any functions in | |
| 1301 `after-change-functions' are called. (#### Not actually true in | |
| 1302 XEmacs. `after-change-functions' will be called twice if | |
| 1303 `post-read-conversion' changes something.) The argument of this | |
| 1304 function is the same as for a function in | |
| 1305 `after-insert-file-functions', i.e. LENGTH of the text inserted, | |
| 1306 with point at the head of the text to be decoded. | |
| 428 | 1307 |
| 3025 | 1308 `pre-write-conversion' |
| 771 | 1309 The value is a function to call after all functions in |
| 1310 `write-region-annotate-functions' and `buffer-file-format' are | |
| 1311 called, and before the text is encoded by the coding system itself. | |
| 1312 The arguments to this function are the same as those of a function | |
| 1313 in `write-region-annotate-functions', i.e. FROM and TO, specifying | |
| 1314 a region of text. | |
| 1315 | |
| 1316 | |
| 1317 | |
| 1318 The following properties are allowed for FSF compatibility but currently | |
| 1319 ignored: | |
| 1320 | |
| 3025 | 1321 `translation-table-for-decode' |
| 771 | 1322 The value is a translation table to be applied on decoding. See |
| 1323 the function `make-translation-table' for the format of translation | |
| 1324 table. This is not applicable to CCL-based coding systems. | |
| 1325 | |
| 3025 | 1326 `translation-table-for-encode' |
| 771 | 1327 The value is a translation table to be applied on encoding. This is |
| 1328 not applicable to CCL-based coding systems. | |
|
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1329 |
| 3025 | 1330 `mime-charset' |
| 771 | 1331 The value is a symbol of which name is `MIME-charset' parameter of |
| 1332 the coding system. | |
| 1333 | |
| 3025 | 1334 `valid-codes' (meaningful only for a coding system based on CCL) |
| 771 | 1335 The value is a list to indicate valid byte ranges of the encoded |
| 1336 file. Each element of the list is an integer or a cons of integer. | |
| 1337 In the former case, the integer value is a valid byte code. In the | |
| 1338 latter case, the integers specifies the range of valid byte codes. | |
| 1339 | |
|
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1340 The following properties are used by `default-query-coding-region', |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1341 the default implementation of `query-coding-region'. This |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1342 implementation and these properties are not used by the Unicode coding |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1343 systems, nor by those CCL coding systems created with |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1344 `make-8-bit-coding-system'. |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1345 |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1346 `safe-chars' |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1347 The value is a char table. If a character has non-nil value in it, |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1348 the character is safely supported by the coding system. |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1349 Under XEmacs, for the moment, this is used in addition to the |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1350 `safe-charsets' property. It does not override it as it does |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1351 under GNU Emacs. #### We need to consider if we should keep this |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1352 behaviour. |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1353 |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1354 `safe-charsets' |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1355 The value is a list of charsets safely supported by the coding |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1356 system. For coding systems based on ISO 2022, XEmacs may try to |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1357 encode characters outside these character sets, but outside of |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1358 East Asia and East Asian coding systems, it is unlikely that |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1359 consumers of the data will understand XEmacs' encoding. |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1360 The value t means that all XEmacs character sets handles are supported. |
| 771 | 1361 |
| 3025 | 1362 The following additional property is recognized if TYPE is `convert-eol': |
| 1363 | |
| 1364 `subtype' | |
| 793 | 1365 One of `lf', `crlf', `cr' or nil (for autodetection). When decoding, |
| 1366 the corresponding sequence will be converted to LF. When encoding, | |
| 1367 the opposite happens. This coding system converts characters to | |
| 771 | 1368 characters. |
| 1369 | |
| 428 | 1370 |
| 1371 | |
| 3025 | 1372 The following additional properties are recognized if TYPE is `iso2022': |
| 1373 | |
| 1374 `charset-g0' | |
| 1375 `charset-g1' | |
| 1376 `charset-g2' | |
| 1377 `charset-g3' | |
| 428 | 1378 The character set initially designated to the G0 - G3 registers. |
| 1379 The value should be one of | |
| 1380 | |
| 1381 -- A charset object (designate that character set) | |
| 1382 -- nil (do not ever use this register) | |
| 1383 -- t (no character set is initially designated to | |
| 1384 the register, but may be later on; this automatically | |
| 1385 sets the corresponding `force-g*-on-output' property) | |
| 1386 | |
| 3025 | 1387 `force-g0-on-output' |
| 1388 `force-g1-on-output' | |
| 1389 `force-g2-on-output' | |
| 1390 `force-g2-on-output' | |
| 428 | 1391 If non-nil, send an explicit designation sequence on output before |
| 1392 using the specified register. | |
| 1393 | |
| 3025 | 1394 `short' |
| 428 | 1395 If non-nil, use the short forms "ESC $ @", "ESC $ A", and |
| 1396 "ESC $ B" on output in place of the full designation sequences | |
| 1397 "ESC $ ( @", "ESC $ ( A", and "ESC $ ( B". | |
| 1398 | |
| 3025 | 1399 `no-ascii-eol' |
| 428 | 1400 If non-nil, don't designate ASCII to G0 at each end of line on output. |
| 1401 Setting this to non-nil also suppresses other state-resetting that | |
| 1402 normally happens at the end of a line. | |
| 1403 | |
| 3025 | 1404 `no-ascii-cntl' |
| 428 | 1405 If non-nil, don't designate ASCII to G0 before control chars on output. |
| 1406 | |
| 3025 | 1407 `seven' |
| 428 | 1408 If non-nil, use 7-bit environment on output. Otherwise, use 8-bit |
| 1409 environment. | |
| 1410 | |
| 3025 | 1411 `lock-shift' |
| 428 | 1412 If non-nil, use locking-shift (SO/SI) instead of single-shift |
| 1413 or designation by escape sequence. | |
| 1414 | |
| 3025 | 1415 `no-iso6429' |
| 428 | 1416 If non-nil, don't use ISO6429's direction specification. |
| 1417 | |
| 3025 | 1418 `escape-quoted' |
| 428 | 1419 If non-nil, literal control characters that are the same as |
| 1420 the beginning of a recognized ISO2022 or ISO6429 escape sequence | |
| 1421 (in particular, ESC (0x1B), SO (0x0E), SI (0x0F), SS2 (0x8E), | |
| 1422 SS3 (0x8F), and CSI (0x9B)) are "quoted" with an escape character | |
| 1423 so that they can be properly distinguished from an escape sequence. | |
| 1424 (Note that doing this results in a non-portable encoding.) This | |
| 1425 encoding flag is used for byte-compiled files. Note that ESC | |
| 1426 is a good choice for a quoting character because there are no | |
| 1427 escape sequences whose second byte is a character from the Control-0 | |
| 1428 or Control-1 character sets; this is explicitly disallowed by the | |
| 1429 ISO2022 standard. | |
| 1430 | |
| 3025 | 1431 `input-charset-conversion' |
| 428 | 1432 A list of conversion specifications, specifying conversion of |
| 1433 characters in one charset to another when decoding is performed. | |
| 1434 Each specification is a list of two elements: the source charset, | |
| 1435 and the destination charset. | |
| 1436 | |
| 3025 | 1437 `output-charset-conversion' |
| 428 | 1438 A list of conversion specifications, specifying conversion of |
| 1439 characters in one charset to another when encoding is performed. | |
| 1440 The form of each specification is the same as for | |
| 3025 | 1441 `input-charset-conversion'. |
| 428 | 1442 |
| 1443 | |
| 771 | 1444 |
| 428 | 1445 The following additional properties are recognized (and required) |
| 3025 | 1446 if TYPE is `ccl': |
| 1447 | |
| 1448 `decode' | |
| 428 | 1449 CCL program used for decoding (converting to internal format). |
| 1450 | |
| 3025 | 1451 `encode' |
| 428 | 1452 CCL program used for encoding (converting to external format). |
| 771 | 1453 |
| 1454 | |
| 3025 | 1455 The following additional properties are recognized if TYPE is `chain': |
| 1456 | |
| 1457 `chain' | |
| 771 | 1458 List of coding systems to be chained together, in decoding order. |
| 1459 | |
| 3025 | 1460 `canonicalize-after-coding' |
| 771 | 1461 Coding system to be returned by the detector routines in place of |
| 1462 this coding system. | |
| 1463 | |
| 1464 | |
| 1465 | |
| 3025 | 1466 The following additional properties are recognized if TYPE is `unicode': |
| 1467 | |
| 3767 | 1468 `unicode-type' |
| 771 | 1469 One of `utf-16', `utf-8', `ucs-4', or `utf-7' (the latter is not |
| 1470 yet implemented). `utf-16' is the basic two-byte encoding; | |
| 1471 `ucs-4' is the four-byte encoding; `utf-8' is an ASCII-compatible | |
| 1472 variable-width 8-bit encoding; `utf-7' is a 7-bit encoding using | |
| 1473 only characters that will safely pass through all mail gateways. | |
| 2297 | 1474 [[ This should be \"transformation format\". There should also be |
| 1475 `ucs-2' (or `bmp' -- no surrogates) and `utf-32' (range checked). ]] | |
| 771 | 1476 |
| 3025 | 1477 `little-endian' |
| 771 | 1478 If non-nil, `utf-16' and `ucs-4' will write out the groups of two |
| 1479 or four bytes little-endian instead of big-endian. This is required, | |
| 1480 for example, under Windows. | |
| 1481 | |
| 3025 | 1482 `need-bom' |
| 771 | 1483 If non-nil, a byte order mark (BOM, or Unicode FFFE) should be |
| 1484 written out at the beginning of the data. This serves both to | |
| 1485 identify the endianness of the following data and to mark the | |
| 1486 data as Unicode (at least, this is how Windows uses it). | |
| 2297 | 1487 [[ The correct term is \"signature\", since this technique may also |
| 1488 be used with UTF-8. That is the term used in the standard. ]] | |
| 771 | 1489 |
| 1490 | |
| 1491 The following additional properties are recognized if TYPE is | |
| 3025 | 1492 `mswindows-multibyte': |
| 1493 | |
| 1494 `code-page' | |
| 771 | 1495 Either a number (specifying a particular code page) or one of the |
| 1496 symbols `ansi', `oem', `mac', or `ebcdic', specifying the ANSI, | |
| 1497 OEM, Macintosh, or EBCDIC code page associated with a particular | |
| 1498 locale (given by the `locale' property). NOTE: EBCDIC code pages | |
| 1499 only exist in Windows 2000 and later. | |
| 1500 | |
| 3025 | 1501 `locale' |
| 771 | 1502 If `code-page' is a symbol, this specifies the locale whose code |
| 1503 page of the corresponding type should be used. This should be | |
| 1504 one of the following: A cons of two strings, (LANGUAGE | |
| 1505 . SUBLANGUAGE) (see `mswindows-set-current-locale'); a string (a | |
| 1506 language; SUBLANG_DEFAULT, i.e. the default sublanguage, is | |
| 1507 used); or one of the symbols `current', `user-default', or | |
| 1508 `system-default', corresponding to the values of | |
| 1509 `mswindows-current-locale', `mswindows-user-default-locale', or | |
| 1510 `mswindows-system-default-locale', respectively. | |
| 1511 | |
| 1512 | |
| 1513 | |
| 3025 | 1514 The following additional properties are recognized if TYPE is `undecided': |
| 4072 | 1515 \[[ Doesn't GNU use \"detect-*\" for the following two? ]] |
| 771 | 1516 |
| 3025 | 1517 `do-eol' |
| 771 | 1518 Do EOL detection. |
| 1519 | |
| 3025 | 1520 `do-coding' |
| 771 | 1521 Do encoding detection. |
| 1522 | |
| 3025 | 1523 `coding-system' |
| 771 | 1524 If encoding detection is not done, use the specified coding system |
| 1525 to do decoding. This is used internally when implementing coding | |
| 1526 systems with an EOL type that specifies autodetection (the default), | |
| 1527 so that the detector routines return the proper subsidiary. | |
| 1528 | |
| 1529 | |
| 1530 | |
| 3025 | 1531 The following additional property is recognized if TYPE is `gzip': |
| 1532 | |
| 1533 `level' | |
| 771 | 1534 Compression level: 0 through 9, or `default' (currently 6). |
| 1535 | |
| 428 | 1536 */ |
| 771 | 1537 (name, type, description, props)) |
| 428 | 1538 { |
| 771 | 1539 return make_coding_system_1 (name, 0, type, description, props); |
| 428 | 1540 } |
| 1541 | |
| 1542 DEFUN ("copy-coding-system", Fcopy_coding_system, 2, 2, 0, /* | |
| 1543 Copy OLD-CODING-SYSTEM to NEW-NAME. | |
| 1544 If NEW-NAME does not name an existing coding system, a new one will | |
| 1545 be created. | |
| 771 | 1546 If you are using this function to create an alias, think again: |
| 1547 Use `define-coding-system-alias' instead. | |
| 428 | 1548 */ |
| 1549 (old_coding_system, new_name)) | |
| 1550 { | |
| 1551 Lisp_Object new_coding_system; | |
| 1552 old_coding_system = Fget_coding_system (old_coding_system); | |
| 771 | 1553 new_coding_system = |
| 4303 | 1554 UNBOUNDP (new_name) ? Qnil : find_coding_system (new_name, 0); |
| 428 | 1555 if (NILP (new_coding_system)) |
| 1556 { | |
| 793 | 1557 new_coding_system = |
| 1558 wrap_coding_system | |
| 1559 (allocate_coding_system | |
| 1560 (XCODING_SYSTEM (old_coding_system)->methods, | |
| 1561 XCODING_SYSTEM (old_coding_system)->methods->extra_data_size, | |
| 1562 new_name)); | |
| 771 | 1563 if (!UNBOUNDP (new_name)) |
| 1564 Fputhash (new_name, new_coding_system, Vcoding_system_hash_table); | |
| 428 | 1565 } |
| 771 | 1566 else if (XCODING_SYSTEM (old_coding_system)->methods != |
| 1567 XCODING_SYSTEM (new_coding_system)->methods) | |
| 1568 invalid_operation_2 ("Coding systems not same type", | |
| 1569 old_coding_system, new_coding_system); | |
| 428 | 1570 |
| 1571 { | |
| 1572 Lisp_Coding_System *to = XCODING_SYSTEM (new_coding_system); | |
| 1573 Lisp_Coding_System *from = XCODING_SYSTEM (old_coding_system); | |
| 3017 | 1574 COPY_SIZED_LCRECORD (to, from, sizeof_coding_system (from)); |
| 428 | 1575 to->name = new_name; |
| 1576 } | |
| 1577 return new_coding_system; | |
| 1578 } | |
| 1579 | |
| 771 | 1580 DEFUN ("coding-system-canonical-name-p", Fcoding_system_canonical_name_p, |
| 1581 1, 1, 0, /* | |
| 440 | 1582 Return t if OBJECT names a coding system, and is not a coding system alias. |
| 428 | 1583 */ |
| 440 | 1584 (object)) |
| 1585 { | |
| 1586 return CODING_SYSTEMP (Fgethash (object, Vcoding_system_hash_table, Qnil)) | |
| 1587 ? Qt : Qnil; | |
| 1588 } | |
| 1589 | |
| 2297 | 1590 /* #### Shouldn't this really be a find/get pair? */ |
| 1591 | |
| 440 | 1592 DEFUN ("coding-system-alias-p", Fcoding_system_alias_p, 1, 1, 0, /* |
| 1593 Return t if OBJECT is a coding system alias. | |
| 1594 All coding system aliases are created by `define-coding-system-alias'. | |
| 1595 */ | |
| 1596 (object)) | |
| 428 | 1597 { |
| 440 | 1598 return SYMBOLP (Fgethash (object, Vcoding_system_hash_table, Qzero)) |
| 1599 ? Qt : Qnil; | |
| 1600 } | |
| 1601 | |
| 1602 DEFUN ("coding-system-aliasee", Fcoding_system_aliasee, 1, 1, 0, /* | |
| 1603 Return the coding-system symbol for which symbol ALIAS is an alias. | |
| 1604 */ | |
| 1605 (alias)) | |
| 1606 { | |
| 1607 Lisp_Object aliasee = Fgethash (alias, Vcoding_system_hash_table, Qnil); | |
| 1608 if (SYMBOLP (aliasee)) | |
| 1609 return aliasee; | |
| 1610 else | |
| 563 | 1611 invalid_argument ("Symbol is not a coding system alias", alias); |
| 1204 | 1612 RETURN_NOT_REACHED (Qnil); |
| 440 | 1613 } |
| 1614 | |
| 1615 /* A maphash function, for removing dangling coding system aliases. */ | |
| 1616 static int | |
| 2286 | 1617 dangling_coding_system_alias_p (Lisp_Object UNUSED (alias), |
| 440 | 1618 Lisp_Object aliasee, |
| 1619 void *dangling_aliases) | |
| 1620 { | |
| 1621 if (SYMBOLP (aliasee) | |
| 1622 && NILP (Fgethash (aliasee, Vcoding_system_hash_table, Qnil))) | |
| 428 | 1623 { |
| 440 | 1624 (*(int *) dangling_aliases)++; |
| 1625 return 1; | |
| 428 | 1626 } |
| 440 | 1627 else |
| 1628 return 0; | |
| 1629 } | |
| 1630 | |
| 1631 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias, 2, 2, 0, /* | |
| 1632 Define symbol ALIAS as an alias for coding system ALIASEE. | |
| 1633 | |
| 1634 You can use this function to redefine an alias that has already been defined, | |
| 1635 but you cannot redefine a name which is the canonical name for a coding system. | |
| 1636 \(a canonical name of a coding system is what is returned when you call | |
| 1637 `coding-system-name' on a coding system). | |
| 1638 | |
| 1639 ALIASEE itself can be an alias, which allows you to define nested aliases. | |
| 1640 | |
| 1641 You are forbidden, however, from creating alias loops or `dangling' aliases. | |
| 1642 These will be detected, and an error will be signaled if you attempt to do so. | |
| 1643 | |
| 1644 If ALIASEE is nil, then ALIAS will simply be undefined. | |
| 1645 | |
| 1646 See also `coding-system-alias-p', `coding-system-aliasee', | |
| 1647 and `coding-system-canonical-name-p'. | |
| 1648 */ | |
| 1649 (alias, aliasee)) | |
| 1650 { | |
| 2286 | 1651 Lisp_Object probe; |
| 440 | 1652 |
| 1653 CHECK_SYMBOL (alias); | |
| 1654 | |
| 1655 if (!NILP (Fcoding_system_canonical_name_p (alias))) | |
| 563 | 1656 invalid_change |
| 440 | 1657 ("Symbol is the canonical name of a coding system and cannot be redefined", |
| 1658 alias); | |
| 1659 | |
| 1660 if (NILP (aliasee)) | |
| 1661 { | |
| 771 | 1662 Lisp_Object subsidiary_unix = add_suffix_to_symbol (alias, "-unix"); |
| 1663 Lisp_Object subsidiary_dos = add_suffix_to_symbol (alias, "-dos"); | |
| 1664 Lisp_Object subsidiary_mac = add_suffix_to_symbol (alias, "-mac"); | |
| 440 | 1665 |
| 1666 Fremhash (alias, Vcoding_system_hash_table); | |
| 1667 | |
| 1668 /* Undefine subsidiary aliases, | |
| 1669 presumably created by a previous call to this function */ | |
| 1670 if (! NILP (Fcoding_system_alias_p (subsidiary_unix)) && | |
| 1671 ! NILP (Fcoding_system_alias_p (subsidiary_dos)) && | |
| 1672 ! NILP (Fcoding_system_alias_p (subsidiary_mac))) | |
| 1673 { | |
| 1674 Fdefine_coding_system_alias (subsidiary_unix, Qnil); | |
| 1675 Fdefine_coding_system_alias (subsidiary_dos, Qnil); | |
| 1676 Fdefine_coding_system_alias (subsidiary_mac, Qnil); | |
| 1677 } | |
| 1678 | |
| 1679 /* Undefine dangling coding system aliases. */ | |
| 1680 { | |
| 1681 int dangling_aliases; | |
| 1682 | |
| 1683 do { | |
| 1684 dangling_aliases = 0; | |
| 1685 elisp_map_remhash (dangling_coding_system_alias_p, | |
| 1686 Vcoding_system_hash_table, | |
| 1687 &dangling_aliases); | |
| 1688 } while (dangling_aliases > 0); | |
| 1689 } | |
| 1690 | |
| 1691 return Qnil; | |
| 1692 } | |
| 1693 | |
| 1694 if (CODING_SYSTEMP (aliasee)) | |
| 1695 aliasee = XCODING_SYSTEM_NAME (aliasee); | |
| 1696 | |
| 1697 /* Checks that aliasee names a coding-system */ | |
| 2286 | 1698 (void) Fget_coding_system (aliasee); |
| 440 | 1699 |
| 1700 /* Check for coding system alias loops */ | |
| 1701 if (EQ (alias, aliasee)) | |
| 563 | 1702 alias_loop: invalid_operation_2 |
| 440 | 1703 ("Attempt to create a coding system alias loop", alias, aliasee); |
| 1704 | |
| 1705 for (probe = aliasee; | |
| 1706 SYMBOLP (probe); | |
| 1707 probe = Fgethash (probe, Vcoding_system_hash_table, Qzero)) | |
| 1708 { | |
| 1709 if (EQ (probe, alias)) | |
| 1710 goto alias_loop; | |
| 1711 } | |
| 1712 | |
| 1713 Fputhash (alias, aliasee, Vcoding_system_hash_table); | |
| 1714 | |
| 1715 /* Set up aliases for subsidiaries. | |
| 2297 | 1716 #### There must be a better way to handle subsidiary coding systems. |
| 1717 Inquiring Minds Want To Know: shouldn't they always be chains? */ | |
| 440 | 1718 { |
| 1719 static const char *suffixes[] = { "-unix", "-dos", "-mac" }; | |
| 1720 int i; | |
| 1721 for (i = 0; i < countof (suffixes); i++) | |
| 1722 { | |
| 1723 Lisp_Object alias_subsidiary = | |
| 771 | 1724 add_suffix_to_symbol (alias, suffixes[i]); |
| 440 | 1725 Lisp_Object aliasee_subsidiary = |
| 771 | 1726 add_suffix_to_symbol (aliasee, suffixes[i]); |
| 440 | 1727 |
| 1728 if (! NILP (Ffind_coding_system (aliasee_subsidiary))) | |
| 1729 Fdefine_coding_system_alias (alias_subsidiary, aliasee_subsidiary); | |
| 1730 } | |
| 1731 } | |
| 428 | 1732 /* FSF return value is a vector of [ALIAS-unix ALIAS-dos ALIAS-mac], |
| 1733 but it doesn't look intentional, so I'd rather return something | |
| 1734 meaningful or nothing at all. */ | |
| 1735 return Qnil; | |
| 1736 } | |
| 1737 | |
| 1738 static Lisp_Object | |
| 771 | 1739 subsidiary_coding_system (Lisp_Object coding_system, enum eol_type type) |
| 428 | 1740 { |
| 1741 Lisp_Coding_System *cs = XCODING_SYSTEM (coding_system); | |
| 1742 Lisp_Object new_coding_system; | |
| 1743 | |
| 1744 switch (type) | |
| 1745 { | |
| 1746 case EOL_AUTODETECT: return coding_system; | |
| 1747 case EOL_LF: new_coding_system = CODING_SYSTEM_EOL_LF (cs); break; | |
| 1748 case EOL_CR: new_coding_system = CODING_SYSTEM_EOL_CR (cs); break; | |
| 1749 case EOL_CRLF: new_coding_system = CODING_SYSTEM_EOL_CRLF (cs); break; | |
| 2500 | 1750 default: ABORT (); return Qnil; |
| 428 | 1751 } |
| 1752 | |
| 1753 return NILP (new_coding_system) ? coding_system : new_coding_system; | |
| 1754 } | |
| 1755 | |
| 1756 DEFUN ("subsidiary-coding-system", Fsubsidiary_coding_system, 2, 2, 0, /* | |
| 1757 Return the subsidiary coding system of CODING-SYSTEM with eol type EOL-TYPE. | |
| 771 | 1758 The logically opposite operation is `coding-system-base'. |
| 428 | 1759 */ |
| 1760 (coding_system, eol_type)) | |
| 1761 { | |
| 771 | 1762 coding_system = get_coding_system_for_text_file (coding_system, 0); |
| 428 | 1763 |
| 1764 return subsidiary_coding_system (coding_system, | |
| 1765 symbol_to_eol_type (eol_type)); | |
| 1766 } | |
| 1767 | |
| 771 | 1768 DEFUN ("coding-system-base", Fcoding_system_base, |
| 1769 1, 1, 0, /* | |
| 1770 Return the base coding system of CODING-SYSTEM. | |
| 1771 If CODING-SYSTEM is a subsidiary, this returns its parent; otherwise, it | |
| 1772 returns CODING-SYSTEM. | |
| 1773 The logically opposite operation is `subsidiary-coding-system'. | |
| 1774 */ | |
| 1775 (coding_system)) | |
| 1776 { | |
| 1777 Lisp_Object base; | |
| 1778 | |
| 1779 coding_system = Fget_coding_system (coding_system); | |
| 1780 if (EQ (XCODING_SYSTEM_NAME (coding_system), Qbinary)) | |
| 1781 return Fget_coding_system (Qraw_text); /* hack! */ | |
| 1782 base = XCODING_SYSTEM_SUBSIDIARY_PARENT (coding_system); | |
| 1783 if (!NILP (base)) | |
| 1784 return base; | |
| 1785 return coding_system; | |
| 1786 } | |
| 1787 | |
| 1788 DEFUN ("coding-system-used-for-io", Fcoding_system_used_for_io, | |
| 1789 1, 1, 0, /* | |
| 1790 Return the coding system actually used for I/O. | |
| 1791 In some cases (e.g. when a particular EOL type is specified) this won't be | |
| 2297 | 1792 the coding system itself. This can be useful when trying to determine |
| 1793 precisely how data was decoded. | |
| 771 | 1794 */ |
| 1795 (coding_system)) | |
| 1796 { | |
| 1797 Lisp_Object canon; | |
| 1798 | |
| 1799 coding_system = Fget_coding_system (coding_system); | |
| 1800 canon = XCODING_SYSTEM_CANONICAL (coding_system); | |
| 1801 if (!NILP (canon)) | |
| 1802 return canon; | |
| 1803 return coding_system; | |
| 1804 } | |
| 1805 | |
| 428 | 1806 |
| 1807 /************************************************************************/ | |
| 1808 /* Coding system accessors */ | |
| 1809 /************************************************************************/ | |
| 1810 | |
| 771 | 1811 DEFUN ("coding-system-description", Fcoding_system_description, 1, 1, 0, /* |
| 1812 Return the description for CODING-SYSTEM. | |
| 1813 The `description' of a coding system is a short English phrase giving the | |
| 1814 name rendered according to English punctuation rules, plus possibly some | |
| 1815 explanatory text (typically in the form of a parenthetical phrase). The | |
| 1816 description is intended to be short enough that it can appear as a menu item, | |
| 1817 and clear enough to be recognizable even to someone who is assumed to have | |
| 1818 some basic familiarity with different encodings but may not know all the | |
| 1819 technical names; thus, for `cn-gb-2312' is described as "Chinese EUC" and | |
| 1820 `hz-gb-2312' is described as "Hz/ZW (Chinese)", where the actual name of | |
| 1821 the encoding is given, followed by a note that this is a Chinese encoding, | |
| 1822 because the great majority of people encountering this would have no idea | |
| 1823 what it is, and giving the language indicates whether the encoding should | |
| 1824 just be ignored or (conceivably) investigated more thoroughly. | |
| 428 | 1825 */ |
| 1826 (coding_system)) | |
| 1827 { | |
| 1828 coding_system = Fget_coding_system (coding_system); | |
| 771 | 1829 return XCODING_SYSTEM_DESCRIPTION (coding_system); |
| 428 | 1830 } |
| 1831 | |
| 1832 DEFUN ("coding-system-type", Fcoding_system_type, 1, 1, 0, /* | |
| 1833 Return the type of CODING-SYSTEM. | |
| 1834 */ | |
| 1835 (coding_system)) | |
| 1836 { | |
| 771 | 1837 coding_system = Fget_coding_system (coding_system); |
| 1838 return XCODING_SYSTEM_TYPE (coding_system); | |
| 428 | 1839 } |
| 1840 | |
| 1841 DEFUN ("coding-system-property", Fcoding_system_property, 2, 2, 0, /* | |
| 1842 Return the PROP property of CODING-SYSTEM. | |
| 1843 */ | |
| 1844 (coding_system, prop)) | |
| 1845 { | |
| 1846 coding_system = Fget_coding_system (coding_system); | |
| 1847 CHECK_SYMBOL (prop); | |
| 1848 | |
| 1849 if (EQ (prop, Qname)) | |
| 1850 return XCODING_SYSTEM_NAME (coding_system); | |
| 1851 else if (EQ (prop, Qtype)) | |
| 1852 return Fcoding_system_type (coding_system); | |
| 771 | 1853 else if (EQ (prop, Qdescription)) |
| 1854 return XCODING_SYSTEM_DESCRIPTION (coding_system); | |
| 428 | 1855 else if (EQ (prop, Qmnemonic)) |
| 1856 return XCODING_SYSTEM_MNEMONIC (coding_system); | |
| 771 | 1857 else if (EQ (prop, Qdocumentation)) |
| 1858 return XCODING_SYSTEM_DOCUMENTATION (coding_system); | |
| 428 | 1859 else if (EQ (prop, Qeol_type)) |
| 771 | 1860 return eol_type_to_symbol (XCODING_SYSTEM_EOL_TYPE |
| 1861 (coding_system)); | |
| 428 | 1862 else if (EQ (prop, Qeol_lf)) |
| 1863 return XCODING_SYSTEM_EOL_LF (coding_system); | |
| 1864 else if (EQ (prop, Qeol_crlf)) | |
| 1865 return XCODING_SYSTEM_EOL_CRLF (coding_system); | |
| 1866 else if (EQ (prop, Qeol_cr)) | |
| 1867 return XCODING_SYSTEM_EOL_CR (coding_system); | |
| 1868 else if (EQ (prop, Qpost_read_conversion)) | |
| 1869 return XCODING_SYSTEM_POST_READ_CONVERSION (coding_system); | |
| 1870 else if (EQ (prop, Qpre_write_conversion)) | |
| 1871 return XCODING_SYSTEM_PRE_WRITE_CONVERSION (coding_system); | |
|
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1872 else if (EQ (prop, Qsafe_charsets)) |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1873 return XCODING_SYSTEM_SAFE_CHARSETS (coding_system); |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1874 else if (EQ (prop, Qsafe_chars)) |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1875 return XCODING_SYSTEM_SAFE_CHARS (coding_system); |
| 771 | 1876 else |
| 1877 { | |
| 1878 Lisp_Object value = CODESYSMETH_OR_GIVEN (XCODING_SYSTEM (coding_system), | |
| 1879 getprop, | |
| 1880 (coding_system, prop), | |
| 1881 Qunbound); | |
| 1882 if (UNBOUNDP (value)) | |
| 1883 invalid_constant ("Unrecognized property", prop); | |
| 1884 return value; | |
| 1885 } | |
| 1886 } | |
| 1887 | |
| 1888 | |
| 1889 /************************************************************************/ | |
| 1890 /* Coding stream functions */ | |
| 1891 /************************************************************************/ | |
| 1892 | |
| 1893 /* A coding stream is a stream used for encoding or decoding text. The | |
| 1894 coding-stream object keeps track of the actual coding system, the stream | |
| 1895 that is at the other end, and data that needs to be persistent across | |
| 1896 the lifetime of the stream. */ | |
| 1897 | |
| 1204 | 1898 extern const struct sized_memory_description chain_coding_stream_description; |
| 1899 extern const struct sized_memory_description undecided_coding_stream_description; | |
| 1900 | |
| 1901 static const struct memory_description coding_stream_data_description_1 []= { | |
| 2551 | 1902 { XD_BLOCK_PTR, chain_coding_system, 1, |
| 1903 { &chain_coding_stream_description } }, | |
| 1904 { XD_BLOCK_PTR, undecided_coding_system, 1, | |
| 1905 { &undecided_coding_stream_description } }, | |
| 1204 | 1906 { XD_END } |
| 1907 }; | |
| 1908 | |
| 1909 static const struct sized_memory_description coding_stream_data_description = { | |
| 1910 sizeof (void *), coding_stream_data_description_1 | |
| 1911 }; | |
| 1912 | |
| 1913 static const struct memory_description coding_lstream_description[] = { | |
| 1914 { XD_INT, offsetof (struct coding_stream, type) }, | |
| 1915 { XD_LISP_OBJECT, offsetof (struct coding_stream, orig_codesys) }, | |
| 1916 { XD_LISP_OBJECT, offsetof (struct coding_stream, codesys) }, | |
| 1917 { XD_LISP_OBJECT, offsetof (struct coding_stream, other_end) }, | |
| 1918 { XD_UNION, offsetof (struct coding_stream, data), | |
| 2551 | 1919 XD_INDIRECT (0, 0), { &coding_stream_data_description } }, |
| 1204 | 1920 { XD_END } |
| 1921 }; | |
| 1922 | |
| 1923 DEFINE_LSTREAM_IMPLEMENTATION_WITH_DATA ("coding", coding); | |
| 771 | 1924 |
| 1925 /* Encoding and decoding are parallel operations, so we create just one | |
| 1926 stream for both. "Decoding" may involve the extra step of autodetection | |
| 1927 of the data format, but that's only because of the conventional | |
| 1928 definition of decoding as converting from external- to | |
| 1929 internal-formatted data. | |
| 1930 | |
| 2297 | 1931 [[ REWRITE ME! ]] |
| 1932 | |
| 771 | 1933 #### We really need to abstract out the concept of "data formats" and |
| 1934 define "converters" that convert from and to specified formats, | |
| 1935 eliminating the idea of decoding and encoding. When specifying a | |
| 1936 conversion process, we need to give the data formats themselves, not the | |
| 1937 conversion processes -- e.g. a coding system called "Unicode->multibyte" | |
| 1938 converts in both directions, and we could auto-detect the format of data | |
| 1939 at either end. */ | |
| 1940 | |
| 1941 static Bytecount | |
| 1942 coding_reader (Lstream *stream, unsigned char *data, Bytecount size) | |
| 1943 { | |
| 1944 unsigned char *orig_data = data; | |
| 1945 Bytecount read_size; | |
| 1946 int error_occurred = 0; | |
| 1947 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
| 1948 | |
| 1949 /* We need to interface to coding_{de,en}code_1(), which expects to take | |
| 1950 some amount of data and store the result into a Dynarr. We have | |
| 1951 coding_{de,en}code_1() store into c->runoff, and take data from there | |
| 1952 as necessary. */ | |
| 1953 | |
| 1954 /* We loop until we have enough data, reading chunks from the other | |
| 1955 end and converting it. */ | |
| 1956 while (1) | |
| 1957 { | |
| 1958 /* Take data from convert_to if we can. Make sure to take at | |
| 1959 most SIZE bytes, and delete the data from convert_to. */ | |
| 1960 if (Dynarr_length (str->convert_to) > 0) | |
| 1961 { | |
| 1962 Bytecount chunk = | |
| 1963 min (size, (Bytecount) Dynarr_length (str->convert_to)); | |
| 1964 memcpy (data, Dynarr_atp (str->convert_to, 0), chunk); | |
| 1965 Dynarr_delete_many (str->convert_to, 0, chunk); | |
| 1966 data += chunk; | |
| 1967 size -= chunk; | |
| 1968 } | |
| 1969 | |
| 1970 if (size == 0) | |
| 1971 break; /* No more room for data */ | |
| 1972 | |
| 1973 if (str->eof) | |
| 1974 break; | |
| 1975 | |
| 1976 { | |
| 1977 /* Exhausted convert_to, so get some more. Read into convert_from, | |
| 1978 after existing "rejected" data from the last conversion. */ | |
| 1979 Bytecount rejected = Dynarr_length (str->convert_from); | |
| 1980 /* #### 1024 is arbitrary; we really need to separate 0 from EOF, | |
| 1981 and when we get 0, keep taking more data until we don't get 0 -- | |
| 1982 we don't know how much data the conversion routine might need | |
| 2297 | 1983 before it can generate any data of its own (eg, bzip2). */ |
| 814 | 1984 Bytecount readmore = |
| 1985 str->one_byte_at_a_time ? (Bytecount) 1 : | |
| 1986 max (size, (Bytecount) 1024); | |
| 771 | 1987 |
| 1988 Dynarr_add_many (str->convert_from, 0, readmore); | |
| 1989 read_size = Lstream_read (str->other_end, | |
| 1990 Dynarr_atp (str->convert_from, rejected), | |
| 1991 readmore); | |
| 1992 /* Trim size down to how much we actually got */ | |
| 1993 Dynarr_set_size (str->convert_from, rejected + max (0, read_size)); | |
| 1994 } | |
| 1995 | |
| 1996 if (read_size < 0) /* LSTREAM_ERROR */ | |
| 1997 { | |
| 1998 error_occurred = 1; | |
| 1999 break; | |
| 2000 } | |
| 2001 if (read_size == 0) /* LSTREAM_EOF */ | |
| 2002 /* There might be some more end data produced in the translation, | |
| 2003 so we set a flag and call the conversion method once more to | |
| 2004 output any final stuff it may be holding, any "go back to a sane | |
| 2005 state" escape sequences, etc. The conversion method is free to | |
| 2006 look at this flag, and we use it above to stop looping. */ | |
| 2007 str->eof = 1; | |
| 2008 { | |
| 2009 Bytecount processed; | |
| 2010 Bytecount to_process = Dynarr_length (str->convert_from); | |
| 2011 | |
| 2012 /* Convert the data, and save any rejected data in convert_from */ | |
| 2013 processed = | |
| 2014 XCODESYSMETH (str->codesys, convert, | |
| 2015 (str, Dynarr_atp (str->convert_from, 0), | |
| 2016 str->convert_to, to_process)); | |
| 2017 if (processed < 0) | |
| 2018 { | |
| 2019 error_occurred = 1; | |
| 2020 break; | |
| 2021 } | |
| 2022 assert (processed <= to_process); | |
| 2023 if (processed < to_process) | |
| 2024 memmove (Dynarr_atp (str->convert_from, 0), | |
| 2025 Dynarr_atp (str->convert_from, processed), | |
| 2026 to_process - processed); | |
| 2027 Dynarr_set_size (str->convert_from, to_process - processed); | |
| 2028 } | |
| 2029 } | |
| 2030 | |
| 2031 if (data - orig_data == 0) | |
| 2032 return error_occurred ? -1 : 0; | |
| 2033 else | |
| 2034 return data - orig_data; | |
| 2035 } | |
| 2036 | |
| 2037 static Bytecount | |
| 2038 coding_writer (Lstream *stream, const unsigned char *data, Bytecount size) | |
| 2039 { | |
| 2040 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
| 2041 | |
| 2042 /* Convert all our data into convert_to, and then attempt to write | |
| 2043 it all out to the other end. */ | |
| 2044 Dynarr_reset (str->convert_to); | |
| 2045 size = XCODESYSMETH (str->codesys, convert, | |
| 2046 (str, data, str->convert_to, size)); | |
| 2047 if (Lstream_write (str->other_end, Dynarr_atp (str->convert_to, 0), | |
| 2048 Dynarr_length (str->convert_to)) < 0) | |
| 2049 return -1; | |
| 2050 else | |
| 2051 /* The return value indicates how much of the incoming data was | |
| 2052 processed, not how many bytes were written. */ | |
| 2053 return size; | |
| 2054 } | |
| 2055 | |
| 2056 static int | |
| 2057 encode_decode_source_sink_type_is_char (Lisp_Object cs, | |
| 2058 enum source_or_sink sex, | |
| 2059 enum encode_decode direction) | |
| 2060 { | |
| 2061 return (direction == CODING_DECODE ? | |
| 2062 decoding_source_sink_type_is_char (cs, sex) : | |
| 2063 encoding_source_sink_type_is_char (cs, sex)); | |
| 2064 } | |
| 2065 | |
| 2066 /* Ensure that the convert methods only get full characters sent to them to | |
| 2067 convert if the source of that conversion is characters; and that no such | |
| 2068 full-character checking happens when the source is bytes. Keep in mind | |
| 2069 that (1) the conversion_end_type return values take the perspective of | |
| 2070 encoding; (2) the source for decoding is the same as the sink for | |
| 2071 encoding; (3) when writing, the data is given to us, and we set our own | |
| 2072 stream to be character mode or not; (4) when reading, the data comes | |
| 2073 from the other_end stream, and we set that one to be character mode or | |
| 2074 not. This is consistent with the comment above the prototype for | |
| 2075 Lstream_set_character_mode(), which lays out rules for who is allowed to | |
| 2076 modify the character type mode on a stream. | |
| 2077 | |
| 814 | 2078 If we're a read stream, we're always setting character mode on the |
| 2079 source, but we also set it on ourselves consistent with the flag that | |
| 2080 can disable this (see again the comment above | |
| 2081 Lstream_set_character_mode()). | |
| 2082 */ | |
| 771 | 2083 |
| 2084 static void | |
| 2085 set_coding_character_mode (Lstream *stream) | |
| 2086 { | |
| 2087 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
| 2088 Lstream *stream_to_set = | |
| 2089 stream->flags & LSTREAM_FL_WRITE ? stream : str->other_end; | |
| 2090 if (encode_decode_source_sink_type_is_char | |
| 2091 (str->codesys, CODING_SOURCE, str->direction)) | |
| 2092 Lstream_set_character_mode (stream_to_set); | |
| 2093 else | |
| 2094 Lstream_unset_character_mode (stream_to_set); | |
| 814 | 2095 if (str->set_char_mode_on_us_when_reading && |
| 2096 (stream->flags & LSTREAM_FL_READ)) | |
| 2097 { | |
| 2098 if (encode_decode_source_sink_type_is_char | |
| 2099 (str->codesys, CODING_SINK, str->direction)) | |
| 2100 Lstream_set_character_mode (stream); | |
| 2101 else | |
| 2102 Lstream_unset_character_mode (stream); | |
| 2103 } | |
| 771 | 2104 } |
| 2105 | |
| 2106 static Lisp_Object | |
| 2107 coding_marker (Lisp_Object stream) | |
| 2108 { | |
| 2109 struct coding_stream *str = CODING_STREAM_DATA (XLSTREAM (stream)); | |
| 2110 | |
| 2111 mark_object (str->orig_codesys); | |
| 2112 mark_object (str->codesys); | |
| 2113 MAYBE_XCODESYSMETH (str->codesys, mark_coding_stream, (str)); | |
| 2114 return wrap_lstream (str->other_end); | |
| 2115 } | |
| 2116 | |
| 2117 static int | |
| 2118 coding_rewinder (Lstream *stream) | |
| 2119 { | |
| 2120 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
| 2121 MAYBE_XCODESYSMETH (str->codesys, rewind_coding_stream, (str)); | |
| 2122 | |
| 2123 str->ch = 0; | |
| 2124 Dynarr_reset (str->convert_to); | |
| 2125 Dynarr_reset (str->convert_from); | |
| 2126 return Lstream_rewind (str->other_end); | |
| 2127 } | |
| 2128 | |
| 2129 static int | |
| 2130 coding_seekable_p (Lstream *stream) | |
| 2131 { | |
| 2132 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
| 2133 return Lstream_seekable_p (str->other_end); | |
| 2134 } | |
| 2135 | |
| 2136 static int | |
| 2137 coding_flusher (Lstream *stream) | |
| 2138 { | |
| 2139 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
| 2140 return Lstream_flush (str->other_end); | |
| 2141 } | |
| 2142 | |
| 2143 static int | |
| 2144 coding_closer (Lstream *stream) | |
| 2145 { | |
| 2146 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
| 2147 if (stream->flags & LSTREAM_FL_WRITE) | |
| 2148 { | |
| 2149 str->eof = 1; | |
| 2150 coding_writer (stream, 0, 0); | |
| 2151 str->eof = 0; | |
| 2152 } | |
| 2153 /* It's safe to free the runoff dynarrs now because they are used only | |
| 2154 during conversion. We need to keep the type-specific data around, | |
| 2155 though, because of canonicalize_after_coding. */ | |
| 2156 if (str->convert_to) | |
| 2157 { | |
| 2158 Dynarr_free (str->convert_to); | |
| 2159 str->convert_to = 0; | |
| 2160 } | |
| 2161 if (str->convert_from) | |
| 428 | 2162 { |
| 771 | 2163 Dynarr_free (str->convert_from); |
| 2164 str->convert_from = 0; | |
| 2165 } | |
| 2166 | |
| 800 | 2167 if (str->no_close_other) |
| 2168 return Lstream_flush (str->other_end); | |
| 2169 else | |
| 2170 return Lstream_close (str->other_end); | |
| 771 | 2171 } |
| 2172 | |
| 2173 static void | |
| 2174 coding_finalizer (Lstream *stream) | |
| 2175 { | |
| 2176 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
| 2177 | |
| 2178 assert (!str->finalized); | |
| 2179 MAYBE_XCODESYSMETH (str->codesys, finalize_coding_stream, (str)); | |
| 2180 if (str->data) | |
| 2181 { | |
| 1726 | 2182 xfree (str->data, void *); |
| 771 | 2183 str->data = 0; |
| 2184 } | |
| 2185 str->finalized = 1; | |
| 2186 } | |
| 2187 | |
| 2188 static Lisp_Object | |
| 2189 coding_stream_canonicalize_after_coding (Lstream *stream) | |
| 2190 { | |
| 2191 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
| 2192 | |
| 2193 return XCODESYSMETH_OR_GIVEN (str->codesys, canonicalize_after_coding, | |
| 2194 (str), str->codesys); | |
| 2195 } | |
| 2196 | |
| 2197 Lisp_Object | |
| 2198 coding_stream_detected_coding_system (Lstream *stream) | |
| 2199 { | |
| 2200 Lisp_Object codesys = | |
| 2201 coding_stream_canonicalize_after_coding (stream); | |
| 2202 if (NILP (codesys)) | |
| 2203 return Fget_coding_system (Qidentity); | |
| 2204 return codesys; | |
| 2205 } | |
| 2206 | |
| 2207 Lisp_Object | |
| 2208 coding_stream_coding_system (Lstream *stream) | |
| 2209 { | |
| 2210 return CODING_STREAM_DATA (stream)->codesys; | |
| 2211 } | |
| 2212 | |
| 2213 /* Change the coding system associated with a stream. */ | |
| 2214 | |
| 2215 void | |
| 2216 set_coding_stream_coding_system (Lstream *lstr, Lisp_Object codesys) | |
| 2217 { | |
| 2218 struct coding_stream *str = CODING_STREAM_DATA (lstr); | |
| 2219 if (EQ (str->orig_codesys, codesys)) | |
| 2220 return; | |
| 2221 /* We do the equivalent of closing the stream, destroying it, and | |
| 2222 reinitializing it. This includes flushing out the data and signalling | |
| 2223 EOF, if we're a writing stream; we also replace the type-specific data | |
| 2224 with the data appropriate for the new coding system. */ | |
| 2225 if (!NILP (str->codesys)) | |
| 2226 { | |
| 2227 if (lstr->flags & LSTREAM_FL_WRITE) | |
| 2228 { | |
| 2229 Lstream_flush (lstr); | |
| 2230 str->eof = 1; | |
| 2231 coding_writer (lstr, 0, 0); | |
| 2232 str->eof = 0; | |
| 2233 } | |
| 2234 MAYBE_XCODESYSMETH (str->codesys, finalize_coding_stream, (str)); | |
| 2235 } | |
| 2236 str->orig_codesys = codesys; | |
| 2237 str->codesys = coding_system_real_canonical (codesys); | |
| 2238 | |
| 2239 if (str->data) | |
| 2240 { | |
| 1726 | 2241 xfree (str->data, void *); |
| 771 | 2242 str->data = 0; |
| 428 | 2243 } |
| 771 | 2244 if (XCODING_SYSTEM_METHODS (str->codesys)->coding_data_size) |
| 1204 | 2245 { |
| 2246 str->data = | |
| 2247 xmalloc_and_zero (XCODING_SYSTEM_METHODS (str->codesys)-> | |
| 2248 coding_data_size); | |
| 2249 str->type = XCODING_SYSTEM_METHODS (str->codesys)->enumtype; | |
| 2250 } | |
| 771 | 2251 MAYBE_XCODESYSMETH (str->codesys, init_coding_stream, (str)); |
| 2252 /* The new coding system may have different ideas regarding whether its | |
| 2253 ends are characters or bytes. */ | |
| 2254 set_coding_character_mode (lstr); | |
| 2255 } | |
| 2256 | |
| 2257 /* WARNING WARNING WARNING WARNING!!!!! If you open up a coding | |
| 2258 stream for writing, no automatic code detection will be performed. | |
| 2259 The reason for this is that automatic code detection requires a | |
| 2260 seekable input. Things will also fail if you open a coding | |
| 2261 stream for reading using a non-fully-specified coding system and | |
| 2262 a non-seekable input stream. */ | |
| 2263 | |
| 2264 static Lisp_Object | |
| 2265 make_coding_stream_1 (Lstream *stream, Lisp_Object codesys, | |
| 800 | 2266 const char *mode, enum encode_decode direction, |
| 802 | 2267 int flags) |
| 771 | 2268 { |
| 2269 Lstream *lstr = Lstream_new (lstream_coding, mode); | |
| 2270 struct coding_stream *str = CODING_STREAM_DATA (lstr); | |
| 2271 | |
| 2272 codesys = Fget_coding_system (codesys); | |
| 2273 xzero (*str); | |
| 2274 str->codesys = Qnil; | |
| 2275 str->orig_codesys = Qnil; | |
| 2276 str->us = lstr; | |
| 2277 str->other_end = stream; | |
| 2278 str->convert_to = Dynarr_new (unsigned_char); | |
| 2279 str->convert_from = Dynarr_new (unsigned_char); | |
| 2280 str->direction = direction; | |
| 814 | 2281 if (flags & LSTREAM_FL_NO_CLOSE_OTHER) |
| 802 | 2282 str->no_close_other = 1; |
| 814 | 2283 if (flags & LSTREAM_FL_READ_ONE_BYTE_AT_A_TIME) |
| 802 | 2284 str->one_byte_at_a_time = 1; |
| 814 | 2285 if (!(flags & LSTREAM_FL_NO_INIT_CHAR_MODE_WHEN_READING)) |
| 2286 str->set_char_mode_on_us_when_reading = 1; | |
| 802 | 2287 |
| 771 | 2288 set_coding_stream_coding_system (lstr, codesys); |
| 793 | 2289 return wrap_lstream (lstr); |
| 771 | 2290 } |
| 2291 | |
| 814 | 2292 /* FLAGS: |
| 2293 | |
| 2294 LSTREAM_FL_NO_CLOSE_OTHER | |
| 2295 Don't close STREAM (the stream at the other end) when this stream is | |
| 2296 closed. | |
| 2297 | |
| 2298 LSTREAM_FL_READ_ONE_BYTE_AT_A_TIME | |
| 2299 When reading from STREAM, read and process one byte at a time rather | |
| 2300 than in large chunks. This is for reading from TTY's, so we don't | |
| 2301 block. #### We should instead create a non-blocking filedesc stream | |
| 2302 that emulates the behavior as necessary using select(), when the | |
| 2303 fcntls don't work. (As seems to be the case on Cygwin.) | |
| 2304 | |
| 2305 LSTREAM_FL_NO_INIT_CHAR_MODE_WHEN_READING | |
| 2306 When reading from STREAM, read and process one byte at a time rather | |
| 2307 than in large chunks. This is for reading from TTY's, so we don't | |
| 2308 block. #### We should instead create a non-blocking filedesc stream | |
| 2309 that emulates the behavior as necessary using select(), when the | |
| 2310 fcntls don't work. (As seems to be the case on Cygwin.) | |
| 2311 */ | |
| 771 | 2312 Lisp_Object |
| 2313 make_coding_input_stream (Lstream *stream, Lisp_Object codesys, | |
| 802 | 2314 enum encode_decode direction, int flags) |
| 771 | 2315 { |
| 800 | 2316 return make_coding_stream_1 (stream, codesys, "r", direction, |
| 802 | 2317 flags); |
| 771 | 2318 } |
| 2319 | |
| 814 | 2320 /* FLAGS: |
| 2321 | |
| 2322 LSTREAM_FL_NO_CLOSE_OTHER | |
| 2323 Don't close STREAM (the stream at the other end) when this stream is | |
| 2324 closed. | |
| 2325 */ | |
| 771 | 2326 Lisp_Object |
| 2327 make_coding_output_stream (Lstream *stream, Lisp_Object codesys, | |
| 802 | 2328 enum encode_decode direction, int flags) |
| 771 | 2329 { |
| 800 | 2330 return make_coding_stream_1 (stream, codesys, "w", direction, |
| 802 | 2331 flags); |
| 771 | 2332 } |
| 2333 | |
| 2334 static Lisp_Object | |
| 2335 encode_decode_coding_region (Lisp_Object start, Lisp_Object end, | |
| 2336 Lisp_Object coding_system, Lisp_Object buffer, | |
| 2337 enum encode_decode direction) | |
| 2338 { | |
| 2339 Charbpos b, e; | |
| 2340 struct buffer *buf = decode_buffer (buffer, 0); | |
| 2341 Lisp_Object instream = Qnil, to_outstream = Qnil, outstream = Qnil; | |
| 2342 Lisp_Object from_outstream = Qnil, auto_outstream = Qnil; | |
| 2343 Lisp_Object lb_outstream = Qnil; | |
| 2344 Lisp_Object next; | |
| 2345 Lstream *istr, *ostr; | |
| 2346 struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5; | |
| 2347 struct gcpro ngcpro1; | |
| 2348 int source_char, sink_char; | |
| 2349 | |
| 2350 get_buffer_range_char (buf, start, end, &b, &e, 0); | |
| 2351 barf_if_buffer_read_only (buf, b, e); | |
| 2352 | |
| 2353 GCPRO5 (instream, to_outstream, outstream, from_outstream, lb_outstream); | |
| 2354 NGCPRO1 (auto_outstream); | |
| 2355 | |
| 2356 coding_system = Fget_coding_system (coding_system); | |
| 2357 source_char = encode_decode_source_sink_type_is_char (coding_system, | |
| 2358 CODING_SOURCE, | |
| 2359 direction); | |
| 2360 sink_char = encode_decode_source_sink_type_is_char (coding_system, | |
| 2361 CODING_SINK, | |
| 2362 direction); | |
| 2363 | |
| 2364 /* Order is IN <---> [TO] -> OUT -> [FROM] -> [AUTODETECT-EOL] -> LB */ | |
| 2365 instream = make_lisp_buffer_input_stream (buf, b, e, 0); | |
| 2366 next = lb_outstream = make_lisp_buffer_output_stream (buf, b, 0); | |
| 2367 | |
| 2368 if (direction == CODING_DECODE && | |
| 2369 XCODING_SYSTEM_EOL_TYPE (coding_system) == EOL_AUTODETECT) | |
| 2370 next = auto_outstream = | |
| 2371 make_coding_output_stream | |
| 800 | 2372 (XLSTREAM (next), Fget_coding_system (Qconvert_eol_autodetect), |
| 2373 CODING_DECODE, 0); | |
| 771 | 2374 |
| 2375 if (!sink_char) | |
| 2376 next = from_outstream = | |
| 800 | 2377 make_coding_output_stream (XLSTREAM (next), Qbinary, CODING_DECODE, 0); |
| 771 | 2378 outstream = make_coding_output_stream (XLSTREAM (next), coding_system, |
| 800 | 2379 direction, 0); |
| 771 | 2380 if (!source_char) |
| 428 | 2381 { |
| 771 | 2382 to_outstream = |
| 2383 make_coding_output_stream (XLSTREAM (outstream), | |
| 800 | 2384 Qbinary, CODING_ENCODE, 0); |
| 771 | 2385 ostr = XLSTREAM (to_outstream); |
| 2386 } | |
| 2387 else | |
| 2388 ostr = XLSTREAM (outstream); | |
| 2389 istr = XLSTREAM (instream); | |
| 2390 | |
| 2391 /* The chain of streams looks like this: | |
| 2392 | |
| 2297 | 2393 [BUFFER] <----- (( read from/send to loop )) |
| 771 | 2394 ------> [CHAR->BYTE i.e. ENCODE AS BINARY if source is |
| 2395 in bytes] | |
| 2396 ------> [ENCODE/DECODE AS SPECIFIED] | |
| 2397 ------> [BYTE->CHAR i.e. DECODE AS BINARY | |
| 2398 if sink is in bytes] | |
| 2399 ------> [AUTODETECT EOL if | |
| 2400 we're decoding and | |
| 2401 coding system calls | |
| 2402 for this] | |
| 2403 ------> [BUFFER] | |
| 2404 */ | |
| 2367 | 2405 |
| 2406 /* #### See comment | |
| 2407 | |
| 2408 EFFICIENCY OF CODING CONVERSION WITH MULTIPLE COPIES/CHAINS | |
| 2409 | |
| 2410 in text.c. | |
| 2411 */ | |
| 2412 | |
| 771 | 2413 while (1) |
| 2414 { | |
| 2415 char tempbuf[1024]; /* some random amount */ | |
| 2416 Charbpos newpos, even_newer_pos; | |
| 2417 Charbpos oldpos = lisp_buffer_stream_startpos (istr); | |
| 2418 Bytecount size_in_bytes = | |
| 2419 Lstream_read (istr, tempbuf, sizeof (tempbuf)); | |
| 2420 | |
| 2421 if (!size_in_bytes) | |
| 2422 break; | |
| 2423 newpos = lisp_buffer_stream_startpos (istr); | |
| 2424 Lstream_write (ostr, tempbuf, size_in_bytes); | |
| 2425 even_newer_pos = lisp_buffer_stream_startpos (istr); | |
| 2426 buffer_delete_range (buf, even_newer_pos - (newpos - oldpos), | |
| 2427 even_newer_pos, 0); | |
| 428 | 2428 } |
| 771 | 2429 |
| 2430 { | |
| 2431 Charcount retlen = | |
| 2432 lisp_buffer_stream_startpos (XLSTREAM (instream)) - b; | |
| 2433 Lstream_close (istr); | |
| 2434 Lstream_close (ostr); | |
| 2435 NUNGCPRO; | |
| 2436 UNGCPRO; | |
| 2437 Lstream_delete (istr); | |
| 2438 if (!NILP (from_outstream)) | |
| 2439 Lstream_delete (XLSTREAM (from_outstream)); | |
| 2440 Lstream_delete (XLSTREAM (outstream)); | |
| 2441 if (!NILP (to_outstream)) | |
| 2442 Lstream_delete (XLSTREAM (to_outstream)); | |
| 2443 if (!NILP (auto_outstream)) | |
| 2444 Lstream_delete (XLSTREAM (auto_outstream)); | |
| 2445 Lstream_delete (XLSTREAM (lb_outstream)); | |
| 2446 return make_int (retlen); | |
| 2447 } | |
| 2448 } | |
| 2449 | |
| 3302 | 2450 DEFUN ("decode-coding-region", Fdecode_coding_region, 3, 4, |
| 2451 "*r\nzDecode from coding system: \ni", /* | |
| 771 | 2452 Decode the text between START and END which is encoded in CODING-SYSTEM. |
| 2453 This is useful if you've read in encoded text from a file without decoding | |
| 2454 it (e.g. you read in a JIS-formatted file but used the `binary' or | |
| 2455 `no-conversion' coding system, so that it shows up as "^[$B!<!+^[(B"). | |
| 2456 Return length of decoded text. | |
| 3302 | 2457 BUFFER defaults to the current buffer if unspecified, and when interactive. |
| 771 | 2458 */ |
| 2459 (start, end, coding_system, buffer)) | |
| 2460 { | |
| 2461 return encode_decode_coding_region (start, end, coding_system, buffer, | |
| 2462 CODING_DECODE); | |
| 2463 } | |
| 2464 | |
| 3302 | 2465 DEFUN ("encode-coding-region", Fencode_coding_region, 3, 4, |
| 2466 "*r\nzEncode to coding system: \ni", /* | |
| 771 | 2467 Encode the text between START and END using CODING-SYSTEM. |
| 2468 This will, for example, convert Japanese characters into stuff such as | |
| 3302 | 2469 "^[$B!<!+^[(B" if you use the JIS encoding. Return length of encoded text. |
| 2470 BUFFER defaults to the current buffer if unspecified, and when interactive. | |
| 771 | 2471 */ |
| 2472 (start, end, coding_system, buffer)) | |
| 2473 { | |
| 2474 return encode_decode_coding_region (start, end, coding_system, buffer, | |
| 2475 CODING_ENCODE); | |
| 428 | 2476 } |
| 2477 | |
| 2478 | |
| 2479 /************************************************************************/ | |
| 771 | 2480 /* Chain methods */ |
| 428 | 2481 /************************************************************************/ |
| 2482 | |
| 771 | 2483 /* #### Need a way to create "opposite-direction" coding systems. */ |
| 2484 | |
| 2485 /* Chain two or more coding systems together to make a combination coding | |
| 2486 system. */ | |
| 2487 | |
| 2488 struct chain_coding_system | |
| 2489 { | |
| 2490 /* List of coding systems, in decode order */ | |
| 2491 Lisp_Object *chain; | |
| 2492 /* Number of coding systems in list */ | |
| 2493 int count; | |
| 2494 /* Coding system to return as a result of canonicalize-after-coding */ | |
| 2495 Lisp_Object canonicalize_after_coding; | |
| 2496 }; | |
| 2497 | |
| 2498 struct chain_coding_stream | |
| 2499 { | |
| 2500 int initted; | |
| 2501 /* Lstreams for chain coding system */ | |
| 2502 Lisp_Object *lstreams; | |
| 2503 int lstream_count; | |
| 2504 }; | |
| 2505 | |
| 1204 | 2506 static const struct memory_description chain_coding_system_description[] = { |
| 2507 { XD_INT, offsetof (struct chain_coding_system, count) }, | |
| 2367 | 2508 { XD_BLOCK_PTR, offsetof (struct chain_coding_system, chain), |
| 2551 | 2509 XD_INDIRECT (0, 0), { &lisp_object_description } }, |
| 1204 | 2510 { XD_LISP_OBJECT, offsetof (struct chain_coding_system, |
| 2511 canonicalize_after_coding) }, | |
| 771 | 2512 { XD_END } |
| 2513 }; | |
| 2514 | |
| 1204 | 2515 static const struct memory_description chain_coding_stream_description_1 [] = { |
| 2516 { XD_INT, offsetof (struct chain_coding_stream, lstream_count) }, | |
| 2367 | 2517 { XD_BLOCK_PTR, offsetof (struct chain_coding_stream, lstreams), |
| 2551 | 2518 XD_INDIRECT (0, 0), { &lisp_object_description } }, |
| 771 | 2519 { XD_END } |
| 2520 }; | |
| 2521 | |
| 1204 | 2522 const struct sized_memory_description chain_coding_stream_description = { |
| 2523 sizeof (struct chain_coding_stream), chain_coding_stream_description_1 | |
| 2524 }; | |
| 2525 | |
| 2526 DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (chain); | |
| 2527 | |
| 771 | 2528 static Lisp_Object |
| 2529 chain_canonicalize (Lisp_Object codesys) | |
| 2530 { | |
| 2531 /* We make use of the fact that this method is called at init time, after | |
| 2532 properties have been parsed. init_method is called too early. */ | |
| 2533 /* #### It's not clear we need this whole chain-canonicalize mechanism | |
| 2534 any more. */ | |
| 2535 Lisp_Object chain = Flist (XCODING_SYSTEM_CHAIN_COUNT (codesys), | |
| 2536 XCODING_SYSTEM_CHAIN_CHAIN (codesys)); | |
| 2537 chain = Fcons (XCODING_SYSTEM_PRE_WRITE_CONVERSION (codesys), | |
| 2538 Fcons (XCODING_SYSTEM_POST_READ_CONVERSION (codesys), | |
| 2539 chain)); | |
| 2540 Fputhash (chain, codesys, Vchain_canonicalize_hash_table); | |
| 2541 return codesys; | |
| 2542 } | |
| 2543 | |
| 2544 static Lisp_Object | |
| 2545 chain_canonicalize_after_coding (struct coding_stream *str) | |
| 2546 { | |
| 2547 Lisp_Object cac = | |
| 2548 XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (str->codesys); | |
| 2549 if (!NILP (cac)) | |
| 2550 return cac; | |
| 2551 return str->codesys; | |
| 2552 #if 0 | |
| 2553 struct chain_coding_stream *data = CODING_STREAM_TYPE_DATA (str, chain); | |
| 2554 Lisp_Object us = str->codesys, codesys; | |
| 2555 int i; | |
| 2556 Lisp_Object chain; | |
| 2557 Lisp_Object tail; | |
| 2558 int changed = 0; | |
| 2559 | |
| 2560 /* #### It's not clear we need this whole chain-canonicalize mechanism | |
| 2561 any more. */ | |
| 2562 if (str->direction == CODING_ENCODE || !data->initted) | |
| 2563 return us; | |
| 2564 | |
| 2565 chain = Flist (XCODING_SYSTEM_CHAIN_COUNT (us), | |
| 2566 XCODING_SYSTEM_CHAIN_CHAIN (us)); | |
| 2567 | |
| 2568 tail = chain; | |
| 2569 for (i = 0; i < XCODING_SYSTEM_CHAIN_COUNT (us); i++) | |
| 2570 { | |
| 2571 codesys = (coding_stream_canonicalize_after_coding | |
| 2572 (XLSTREAM (data->lstreams[i]))); | |
| 2573 if (!EQ (codesys, XCAR (tail))) | |
| 2574 changed = 1; | |
| 2575 XCAR (tail) = codesys; | |
| 2576 tail = XCDR (tail); | |
| 2577 } | |
| 2578 | |
| 2579 if (!changed) | |
| 2580 return us; | |
| 2581 | |
| 2582 chain = delq_no_quit (Qnil, chain); | |
| 2583 | |
| 2584 if (NILP (XCODING_SYSTEM_PRE_WRITE_CONVERSION (us)) && | |
| 2585 NILP (XCODING_SYSTEM_POST_READ_CONVERSION (us))) | |
| 2586 { | |
| 2587 if (NILP (chain)) | |
| 2588 return Qnil; | |
| 2589 if (NILP (XCDR (chain))) | |
| 2590 return XCAR (chain); | |
| 2591 } | |
| 2592 | |
| 2593 codesys = Fgethash (Fcons (XCODING_SYSTEM_PRE_WRITE_CONVERSION (us), | |
| 2594 Fcons (XCODING_SYSTEM_POST_READ_CONVERSION (us), | |
| 2595 chain)), Vchain_canonicalize_hash_table, | |
| 2596 Qnil); | |
| 2597 if (!NILP (codesys)) | |
| 2598 return codesys; | |
| 2599 return make_internal_coding_system | |
| 2600 (us, "internal-chain-canonicalizer-wrapper", | |
| 2601 Qchain, Qunbound, list2 (Qchain, chain)); | |
| 2602 #endif /* 0 */ | |
| 2603 } | |
| 2604 | |
| 2605 static void | |
| 2606 chain_init (Lisp_Object codesys) | |
| 2607 { | |
| 2608 XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (codesys) = Qnil; | |
| 2609 } | |
| 2610 | |
| 2611 static void | |
| 2612 chain_mark (Lisp_Object codesys) | |
| 2613 { | |
| 2614 int i; | |
| 2615 | |
| 2616 for (i = 0; i < XCODING_SYSTEM_CHAIN_COUNT (codesys); i++) | |
| 2617 mark_object (XCODING_SYSTEM_CHAIN_CHAIN (codesys)[i]); | |
| 2618 mark_object (XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (codesys)); | |
| 2619 } | |
| 2620 | |
| 2621 static void | |
| 2622 chain_mark_coding_stream_1 (struct chain_coding_stream *data) | |
| 2623 { | |
| 2624 int i; | |
| 2625 | |
| 2626 for (i = 0; i < data->lstream_count; i++) | |
| 2627 mark_object (data->lstreams[i]); | |
| 2628 } | |
| 2629 | |
| 2630 static void | |
| 2631 chain_mark_coding_stream (struct coding_stream *str) | |
| 2632 { | |
| 2633 chain_mark_coding_stream_1 (CODING_STREAM_TYPE_DATA (str, chain)); | |
| 2634 } | |
| 2635 | |
| 2636 static void | |
| 2637 chain_print (Lisp_Object cs, Lisp_Object printcharfun, int escapeflag) | |
| 2638 { | |
| 2639 int i; | |
| 2640 | |
| 826 | 2641 write_c_string (printcharfun, "("); |
| 771 | 2642 for (i = 0; i < XCODING_SYSTEM_CHAIN_COUNT (cs); i++) |
| 2643 { | |
| 826 | 2644 write_c_string (printcharfun, i == 0 ? "" : "->"); |
| 771 | 2645 print_coding_system_in_print_method (XCODING_SYSTEM_CHAIN_CHAIN (cs)[i], |
| 2646 printcharfun, escapeflag); | |
| 2647 } | |
| 2648 { | |
| 2649 Lisp_Object cac = XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (cs); | |
| 2650 if (!NILP (cac)) | |
| 2651 { | |
| 2652 if (i > 0) | |
| 826 | 2653 write_c_string (printcharfun, " "); |
| 2654 write_c_string (printcharfun, "canonicalize-after-coding="); | |
| 771 | 2655 print_coding_system_in_print_method (cac, printcharfun, escapeflag); |
| 2656 } | |
| 2657 } | |
| 2658 | |
| 826 | 2659 write_c_string (printcharfun, ")"); |
| 771 | 2660 } |
| 2661 | |
| 2662 static void | |
| 2663 chain_rewind_coding_stream_1 (struct chain_coding_stream *data) | |
| 2664 { | |
| 2665 /* Each will rewind the next; there is always at least one stream (the | |
| 2666 dynarr stream at the end) if we're initted */ | |
| 2667 if (data->initted) | |
| 2668 Lstream_rewind (XLSTREAM (data->lstreams[0])); | |
| 2669 } | |
| 2670 | |
| 2671 static void | |
| 2672 chain_rewind_coding_stream (struct coding_stream *str) | |
| 2673 { | |
| 2674 chain_rewind_coding_stream_1 (CODING_STREAM_TYPE_DATA (str, chain)); | |
| 2675 } | |
| 2676 | |
| 2677 static void | |
| 2678 chain_init_coding_streams_1 (struct chain_coding_stream *data, | |
| 2679 unsigned_char_dynarr *dst, | |
| 2680 int ncodesys, Lisp_Object *codesys, | |
| 2681 enum encode_decode direction) | |
| 2682 { | |
| 2683 int i; | |
| 2684 Lisp_Object lstream_out; | |
| 2685 | |
| 2686 data->lstream_count = ncodesys + 1; | |
| 2687 data->lstreams = xnew_array (Lisp_Object, data->lstream_count); | |
| 2688 | |
| 2689 lstream_out = make_dynarr_output_stream (dst); | |
| 2690 Lstream_set_buffering (XLSTREAM (lstream_out), LSTREAM_UNBUFFERED, 0); | |
| 2691 data->lstreams[data->lstream_count - 1] = lstream_out; | |
| 2692 | |
| 2693 for (i = ncodesys - 1; i >= 0; i--) | |
| 2694 { | |
| 2695 data->lstreams[i] = | |
| 2696 make_coding_output_stream | |
| 2697 (XLSTREAM (lstream_out), | |
| 2698 codesys[direction == CODING_ENCODE ? ncodesys - (i + 1) : i], | |
| 800 | 2699 direction, 0); |
| 771 | 2700 lstream_out = data->lstreams[i]; |
| 2701 Lstream_set_buffering (XLSTREAM (lstream_out), LSTREAM_UNBUFFERED, | |
| 2702 0); | |
| 2703 } | |
| 2704 data->initted = 1; | |
| 2705 } | |
| 2706 | |
| 2707 static Bytecount | |
| 2708 chain_convert (struct coding_stream *str, const UExtbyte *src, | |
| 2709 unsigned_char_dynarr *dst, Bytecount n) | |
| 2710 { | |
| 2711 struct chain_coding_stream *data = CODING_STREAM_TYPE_DATA (str, chain); | |
| 2712 | |
| 2713 if (str->eof) | |
| 2714 { | |
| 2715 /* Each will close the next; there is always at least one stream (the | |
| 2716 dynarr stream at the end) if we're initted. We need to close now | |
| 2717 because more data may be generated. */ | |
| 2718 if (data->initted) | |
| 2719 Lstream_close (XLSTREAM (data->lstreams[0])); | |
| 2720 return n; | |
| 2721 } | |
| 2722 | |
| 2723 if (!data->initted) | |
| 2724 chain_init_coding_streams_1 | |
| 2725 (data, dst, XCODING_SYSTEM_CHAIN_COUNT (str->codesys), | |
| 2726 XCODING_SYSTEM_CHAIN_CHAIN (str->codesys), str->direction); | |
| 2727 | |
| 2728 if (Lstream_write (XLSTREAM (data->lstreams[0]), src, n) < 0) | |
| 2729 return -1; | |
| 2730 return n; | |
| 2731 } | |
| 2732 | |
| 2733 static void | |
| 2734 chain_finalize_coding_stream_1 (struct chain_coding_stream *data) | |
| 2735 { | |
| 2736 if (data->lstreams) | |
| 2737 { | |
| 2297 | 2738 /* During GC, these objects are unmarked, and are about to be freed. |
| 2739 We do NOT want them on the free list, and that will cause lots of | |
| 2740 nastiness including crashes. Just let them be freed normally. */ | |
| 771 | 2741 if (!gc_in_progress) |
| 2742 { | |
| 2743 int i; | |
| 2297 | 2744 /* Order of deletion is important here! Delete from the head of |
| 2745 the chain and work your way towards the tail. In general, | |
| 2746 when you delete an object, there should be *NO* pointers to it | |
| 2747 anywhere. Deleting back-to-front would be a problem because | |
| 2748 there are pointers going forward. If there were pointers in | |
| 2749 both directions, you'd have to disconnect the pointers to a | |
| 2750 particular object before deleting it. */ | |
| 771 | 2751 for (i = 0; i < data->lstream_count; i++) |
| 2752 Lstream_delete (XLSTREAM ((data->lstreams)[i])); | |
| 2753 } | |
| 1726 | 2754 xfree (data->lstreams, Lisp_Object *); |
| 771 | 2755 } |
| 2756 } | |
| 2757 | |
| 2758 static void | |
| 2759 chain_finalize_coding_stream (struct coding_stream *str) | |
| 2760 { | |
| 2761 chain_finalize_coding_stream_1 (CODING_STREAM_TYPE_DATA (str, chain)); | |
| 2762 } | |
| 2763 | |
| 2764 static void | |
| 2765 chain_finalize (Lisp_Object c) | |
| 2766 { | |
| 2767 if (XCODING_SYSTEM_CHAIN_CHAIN (c)) | |
| 1726 | 2768 xfree (XCODING_SYSTEM_CHAIN_CHAIN (c), Lisp_Object *); |
| 771 | 2769 } |
| 2770 | |
| 428 | 2771 static int |
| 771 | 2772 chain_putprop (Lisp_Object codesys, Lisp_Object key, Lisp_Object value) |
| 2773 { | |
| 2774 if (EQ (key, Qchain)) | |
| 2775 { | |
| 2776 Lisp_Object *cslist; | |
| 2777 int count = 0; | |
| 2778 int i; | |
| 2779 | |
| 2367 | 2780 { |
| 2781 EXTERNAL_LIST_LOOP_2 (elt, value) | |
| 2782 { | |
| 2783 Fget_coding_system (elt); | |
| 2784 count++; | |
| 2785 } | |
| 2786 } | |
| 771 | 2787 |
| 2788 cslist = xnew_array (Lisp_Object, count); | |
| 2789 XCODING_SYSTEM_CHAIN_CHAIN (codesys) = cslist; | |
| 2790 | |
| 2791 count = 0; | |
| 2367 | 2792 { |
| 2793 EXTERNAL_LIST_LOOP_2 (elt, value) | |
| 2794 { | |
| 2795 cslist[count] = Fget_coding_system (elt); | |
| 2796 count++; | |
| 2797 } | |
| 2798 } | |
| 771 | 2799 |
| 2800 XCODING_SYSTEM_CHAIN_COUNT (codesys) = count; | |
| 2801 | |
| 2802 for (i = 0; i < count - 1; i++) | |
| 2803 { | |
| 2804 if (decoding_source_sink_type_is_char (cslist[i], CODING_SINK) != | |
| 2805 decoding_source_sink_type_is_char (cslist[i + 1], CODING_SOURCE)) | |
| 2806 invalid_argument_2 ("Sink of first must match source of second", | |
| 2807 cslist[i], cslist[i + 1]); | |
| 2808 } | |
| 2809 } | |
| 2810 else if (EQ (key, Qcanonicalize_after_coding)) | |
| 2811 XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (codesys) = | |
| 2812 Fget_coding_system (value); | |
| 2813 else | |
| 2814 return 0; | |
| 2815 return 1; | |
| 2816 } | |
| 2817 | |
| 2818 static Lisp_Object | |
| 2819 chain_getprop (Lisp_Object coding_system, Lisp_Object prop) | |
| 2820 { | |
| 2821 if (EQ (prop, Qchain)) | |
| 2822 { | |
| 2823 Lisp_Object result = Qnil; | |
| 2824 int i; | |
| 2825 | |
| 2826 for (i = 0; i < XCODING_SYSTEM_CHAIN_COUNT (coding_system); i++) | |
| 2827 result = Fcons (XCODING_SYSTEM_CHAIN_CHAIN (coding_system)[i], | |
| 2828 result); | |
| 2829 | |
| 2830 return Fnreverse (result); | |
| 2831 } | |
| 2832 else if (EQ (prop, Qcanonicalize_after_coding)) | |
| 2833 return XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (coding_system); | |
| 2834 else | |
| 2835 return Qunbound; | |
| 2836 } | |
| 2837 | |
| 2838 static enum source_sink_type | |
| 2839 chain_conversion_end_type (Lisp_Object codesys) | |
| 2840 { | |
| 2841 Lisp_Object *cslist = XCODING_SYSTEM_CHAIN_CHAIN (codesys); | |
| 2842 int n = XCODING_SYSTEM_CHAIN_COUNT (codesys); | |
| 2843 int charp_source, charp_sink; | |
| 2844 | |
| 2845 if (n == 0) | |
| 2846 return DECODES_BYTE_TO_BYTE; /* arbitrary */ | |
| 2847 charp_source = decoding_source_sink_type_is_char (cslist[0], CODING_SOURCE); | |
| 2848 charp_sink = decoding_source_sink_type_is_char (cslist[n - 1], CODING_SINK); | |
| 2849 | |
| 2850 switch (charp_source * 2 + charp_sink) | |
| 2851 { | |
| 2852 case 0: return DECODES_BYTE_TO_BYTE; | |
| 2853 case 1: return DECODES_BYTE_TO_CHARACTER; | |
| 2854 case 2: return DECODES_CHARACTER_TO_BYTE; | |
| 2855 case 3: return DECODES_CHARACTER_TO_CHARACTER; | |
| 2856 } | |
| 2857 | |
| 2500 | 2858 ABORT (); |
| 771 | 2859 return DECODES_BYTE_TO_BYTE; |
| 2860 } | |
| 2861 | |
| 2862 | |
| 2863 /************************************************************************/ | |
| 2864 /* No-conversion methods */ | |
| 2865 /************************************************************************/ | |
| 2866 | |
| 2867 /* "No conversion"; used for binary files. We use quotes because there | |
| 2868 really is some conversion being applied (it does byte<->char | |
| 2869 conversion), but it appears to the user as if the text is read in | |
| 2297 | 2870 without conversion. |
| 2871 | |
| 2872 #### Shouldn't we _call_ it that, then? And while we're at it, | |
| 2873 separate it into "to_internal" and "to_external"? */ | |
| 771 | 2874 DEFINE_CODING_SYSTEM_TYPE (no_conversion); |
| 2875 | |
| 2876 /* This is used when reading in "binary" files -- i.e. files that may | |
| 2877 contain all 256 possible byte values and that are not to be | |
| 2878 interpreted as being in any particular encoding. */ | |
| 2879 static Bytecount | |
| 2880 no_conversion_convert (struct coding_stream *str, | |
| 2881 const UExtbyte *src, | |
| 2882 unsigned_char_dynarr *dst, Bytecount n) | |
| 2883 { | |
| 2884 UExtbyte c; | |
| 2885 unsigned int ch = str->ch; | |
| 2886 Bytecount orign = n; | |
| 2887 | |
| 2888 if (str->direction == CODING_DECODE) | |
| 2889 { | |
| 2890 while (n--) | |
| 2891 { | |
| 2892 c = *src++; | |
| 2893 | |
| 2894 DECODE_ADD_BINARY_CHAR (c, dst); | |
| 2895 } | |
| 2896 | |
| 2897 if (str->eof) | |
| 2898 DECODE_OUTPUT_PARTIAL_CHAR (ch, dst); | |
| 2899 } | |
| 2900 else | |
| 2901 { | |
| 2902 | |
| 2903 while (n--) | |
| 2904 { | |
| 2905 c = *src++; | |
| 826 | 2906 if (byte_ascii_p (c)) |
| 771 | 2907 { |
| 2908 assert (ch == 0); | |
| 2909 Dynarr_add (dst, c); | |
| 2910 } | |
| 2911 #ifdef MULE | |
| 867 | 2912 else if (ibyte_leading_byte_p (c)) |
| 771 | 2913 { |
| 2914 assert (ch == 0); | |
| 2915 if (c == LEADING_BYTE_LATIN_ISO8859_1 || | |
| 2916 c == LEADING_BYTE_CONTROL_1) | |
| 2917 ch = c; | |
| 2918 else | |
| 2297 | 2919 /* #### This is just plain unacceptable. */ |
| 771 | 2920 Dynarr_add (dst, '~'); /* untranslatable character */ |
| 2921 } | |
| 2922 else | |
| 2923 { | |
| 2924 if (ch == LEADING_BYTE_LATIN_ISO8859_1) | |
| 2925 Dynarr_add (dst, c); | |
| 2926 else if (ch == LEADING_BYTE_CONTROL_1) | |
| 2927 { | |
| 2928 assert (c < 0xC0); | |
| 2929 Dynarr_add (dst, c - 0x20); | |
| 2930 } | |
| 2931 /* else it should be the second or third byte of an | |
| 2932 untranslatable character, so ignore it */ | |
| 2933 ch = 0; | |
| 2934 } | |
| 2935 #endif /* MULE */ | |
| 2936 | |
| 2937 } | |
| 2938 } | |
| 2939 | |
| 2940 str->ch = ch; | |
| 2941 return orign; | |
| 2942 } | |
| 2943 | |
| 2944 DEFINE_DETECTOR (no_conversion); | |
| 2945 DEFINE_DETECTOR_CATEGORY (no_conversion, no_conversion); | |
| 2946 | |
| 2947 struct no_conversion_detector | |
| 2948 { | |
| 2949 int dummy; | |
| 2950 }; | |
| 2951 | |
| 2952 static void | |
| 2286 | 2953 no_conversion_detect (struct detection_state *st, const UExtbyte *UNUSED (src), |
| 2954 Bytecount UNUSED (n)) | |
| 771 | 2955 { |
| 2956 /* Hack until we get better handling of this stuff! */ | |
| 2957 DET_RESULT (st, no_conversion) = DET_SLIGHTLY_LIKELY; | |
| 2958 } | |
| 2959 | |
| 2960 | |
| 2961 /************************************************************************/ | |
| 2962 /* Convert-eol methods */ | |
| 2963 /************************************************************************/ | |
| 2964 | |
| 2965 /* This is used to handle end-of-line (EOL) differences. It is | |
| 2819 | 2966 character-to-character, and works (when encoding) *BEFORE* sending data to |
| 2967 the main encoding routine -- thus, that routine must handle different EOL | |
| 2968 types itself if it does line-oriented type processing. This is unavoidable | |
| 2969 because we don't know whether the output of the main encoding routine is | |
| 2970 ASCII compatible (UTF-16 is definitely not, for example). [[ sjt sez this | |
| 2971 is bogus. There should be _no_ EOL processing (or processing of any kind) | |
| 2972 after conversion to external. ]] | |
| 771 | 2973 |
| 793 | 2974 There is one parameter: `subtype', either `cr', `lf', `crlf', or nil. |
| 771 | 2975 */ |
| 2976 | |
| 2977 struct convert_eol_coding_system | |
| 2978 { | |
| 2979 enum eol_type subtype; | |
| 2132 | 2980 int dummy; /* On some architectures (eg ia64) the portable dumper can |
| 2981 produce unaligned access errors without this field. Probably | |
| 2982 because the combined structure of this structure and | |
| 2983 Lisp_Coding_System is not properly aligned. */ | |
| 771 | 2984 }; |
| 2985 | |
| 2986 #define CODING_SYSTEM_CONVERT_EOL_SUBTYPE(codesys) \ | |
| 2987 (CODING_SYSTEM_TYPE_DATA (codesys, convert_eol)->subtype) | |
| 2988 #define XCODING_SYSTEM_CONVERT_EOL_SUBTYPE(codesys) \ | |
| 2989 (XCODING_SYSTEM_TYPE_DATA (codesys, convert_eol)->subtype) | |
| 2990 | |
| 2991 struct convert_eol_coding_stream | |
| 2992 { | |
| 2993 enum eol_type actual; | |
| 2994 }; | |
| 2995 | |
| 1204 | 2996 static const struct memory_description |
| 771 | 2997 convert_eol_coding_system_description[] = { |
| 2998 { XD_END } | |
| 2999 }; | |
| 3000 | |
| 1204 | 3001 DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (convert_eol); |
| 3002 | |
| 771 | 3003 static void |
| 2286 | 3004 convert_eol_print (Lisp_Object cs, Lisp_Object printcharfun, |
| 3005 int UNUSED (escapeflag)) | |
| 771 | 3006 { |
| 3007 struct convert_eol_coding_system *data = | |
| 3008 XCODING_SYSTEM_TYPE_DATA (cs, convert_eol); | |
| 3009 | |
| 3010 write_fmt_string (printcharfun, "(%s)", | |
| 3011 data->subtype == EOL_LF ? "lf" : | |
| 3012 data->subtype == EOL_CRLF ? "crlf" : | |
| 3013 data->subtype == EOL_CR ? "cr" : | |
| 793 | 3014 data->subtype == EOL_AUTODETECT ? "nil" : |
| 2500 | 3015 (ABORT(), "")); |
| 771 | 3016 } |
| 3017 | |
| 3018 static enum source_sink_type | |
| 2286 | 3019 convert_eol_conversion_end_type (Lisp_Object UNUSED (codesys)) |
| 771 | 3020 { |
| 3021 return DECODES_CHARACTER_TO_CHARACTER; | |
| 3022 } | |
| 3023 | |
| 3024 static int | |
| 3025 convert_eol_putprop (Lisp_Object codesys, | |
| 3026 Lisp_Object key, | |
| 3027 Lisp_Object value) | |
| 3028 { | |
| 3029 struct convert_eol_coding_system *data = | |
| 3030 XCODING_SYSTEM_TYPE_DATA (codesys, convert_eol); | |
| 3031 | |
| 3032 if (EQ (key, Qsubtype)) | |
| 3033 { | |
| 3034 if (EQ (value, Qlf) /* || EQ (value, Qunix) */) | |
| 3035 data->subtype = EOL_LF; | |
| 3036 else if (EQ (value, Qcrlf) /* || EQ (value, Qdos) */) | |
| 3037 data->subtype = EOL_CRLF; | |
| 3038 else if (EQ (value, Qcr) /* || EQ (value, Qmac) */) | |
| 3039 data->subtype = EOL_CR; | |
| 793 | 3040 else if (EQ (value, Qnil)) |
| 771 | 3041 data->subtype = EOL_AUTODETECT; |
| 3042 else invalid_constant ("Unrecognized eol type", value); | |
| 3043 } | |
| 3044 else | |
| 3045 return 0; | |
| 3046 return 1; | |
| 3047 } | |
| 3048 | |
| 3049 static Lisp_Object | |
| 3050 convert_eol_getprop (Lisp_Object coding_system, Lisp_Object prop) | |
| 3051 { | |
| 3052 struct convert_eol_coding_system *data = | |
| 3053 XCODING_SYSTEM_TYPE_DATA (coding_system, convert_eol); | |
| 3054 | |
| 3055 if (EQ (prop, Qsubtype)) | |
| 3056 { | |
| 3057 switch (data->subtype) | |
| 3058 { | |
| 3059 case EOL_LF: return Qlf; | |
| 3060 case EOL_CRLF: return Qcrlf; | |
| 3061 case EOL_CR: return Qcr; | |
| 793 | 3062 case EOL_AUTODETECT: return Qnil; |
| 2500 | 3063 default: ABORT (); |
| 771 | 3064 } |
| 3065 } | |
| 3066 | |
| 3067 return Qunbound; | |
| 3068 } | |
| 3069 | |
| 3070 static void | |
| 3071 convert_eol_init_coding_stream (struct coding_stream *str) | |
| 3072 { | |
| 3073 struct convert_eol_coding_stream *data = | |
| 3074 CODING_STREAM_TYPE_DATA (str, convert_eol); | |
| 3075 data->actual = XCODING_SYSTEM_CONVERT_EOL_SUBTYPE (str->codesys); | |
| 3076 } | |
| 3077 | |
| 3078 static Bytecount | |
| 867 | 3079 convert_eol_convert (struct coding_stream *str, const Ibyte *src, |
| 771 | 3080 unsigned_char_dynarr *dst, Bytecount n) |
| 3081 { | |
| 3082 if (str->direction == CODING_DECODE) | |
| 3083 { | |
| 3084 struct convert_eol_coding_stream *data = | |
| 3085 CODING_STREAM_TYPE_DATA (str, convert_eol); | |
| 3086 | |
| 3087 if (data->actual == EOL_AUTODETECT) | |
| 3088 { | |
| 3089 Bytecount n2 = n; | |
| 867 | 3090 const Ibyte *src2 = src; |
| 771 | 3091 |
| 3092 for (; n2; n2--) | |
| 3093 { | |
| 867 | 3094 Ibyte c = *src2++; |
| 771 | 3095 if (c == '\n') |
| 3096 { | |
| 3097 data->actual = EOL_LF; | |
| 3098 break; | |
| 3099 } | |
| 3100 else if (c == '\r') | |
| 3101 { | |
| 3102 if (n2 == 1) | |
| 3103 { | |
| 3104 /* If we're seeing a '\r' at the end of the data, then | |
| 3105 reject the '\r' right now so it doesn't become an | |
| 3106 issue in the code below -- unless we're at the end of | |
| 3107 the stream, in which case we can't do that (because | |
| 3108 then the '\r' will never get written out), and in any | |
| 3109 case we should be recognizing it at EOL_CR format. */ | |
| 3110 if (str->eof) | |
| 3111 data->actual = EOL_CR; | |
| 3112 else | |
| 3113 n--; | |
| 3114 break; | |
| 3115 } | |
| 3116 else if (*src2 == '\n') | |
| 3117 data->actual = EOL_CRLF; | |
| 3118 else | |
| 3119 data->actual = EOL_CR; | |
| 3120 break; | |
| 3121 } | |
| 3122 } | |
| 3123 } | |
| 3124 | |
| 3125 /* str->eof is set, the caller reached EOF on the other end and has | |
| 3126 no new data to give us. The only data we get is the data we | |
| 3127 rejected from last time. */ | |
| 3128 if (data->actual == EOL_LF || data->actual == EOL_AUTODETECT || | |
| 3129 (str->eof)) | |
| 3130 Dynarr_add_many (dst, src, n); | |
| 3131 else | |
| 3132 { | |
| 867 | 3133 const Ibyte *end = src + n; |
| 771 | 3134 while (1) |
| 3135 { | |
| 3136 /* Find the next section with no \r and add it. */ | |
| 867 | 3137 const Ibyte *runstart = src; |
| 3138 src = (Ibyte *) memchr (src, '\r', end - src); | |
| 771 | 3139 if (!src) |
| 3140 src = end; | |
| 3141 Dynarr_add_many (dst, runstart, src - runstart); | |
| 3142 /* Stop if at end ... */ | |
| 3143 if (src == end) | |
| 3144 break; | |
| 3145 /* ... else, translate as necessary. */ | |
| 3146 src++; | |
| 3147 if (data->actual == EOL_CR) | |
| 3148 Dynarr_add (dst, '\n'); | |
| 3149 /* We need to be careful here with CRLF. If we see a CR at the | |
| 3150 end of the data, we don't know if it's part of a CRLF, so we | |
| 3151 reject it. Otherwise: If it's part of a CRLF, eat it and | |
| 3152 loop; the following LF gets added next time around. If it's | |
| 3153 not part of a CRLF, add the CR and loop. The following | |
| 3154 character will be processed in the next loop iteration. This | |
| 3155 correctly handles a sequence like CR+CR+LF. */ | |
| 3156 else if (src == end) | |
| 3157 return n - 1; /* reject the CR at the end; we'll get it again | |
| 3158 next time the convert method is called */ | |
| 3159 else if (*src != '\n') | |
| 3160 Dynarr_add (dst, '\r'); | |
| 3161 } | |
| 3162 } | |
| 3163 | |
| 3164 return n; | |
| 3165 } | |
| 3166 else | |
| 3167 { | |
| 3168 enum eol_type subtype = | |
| 3169 XCODING_SYSTEM_CONVERT_EOL_SUBTYPE (str->codesys); | |
| 867 | 3170 const Ibyte *end = src + n; |
| 771 | 3171 |
| 3172 /* We try to be relatively efficient here. */ | |
| 3173 if (subtype == EOL_LF) | |
| 3174 Dynarr_add_many (dst, src, n); | |
| 3175 else | |
| 3176 { | |
| 3177 while (1) | |
| 3178 { | |
| 3179 /* Find the next section with no \n and add it. */ | |
| 867 | 3180 const Ibyte *runstart = src; |
| 3181 src = (Ibyte *) memchr (src, '\n', end - src); | |
| 771 | 3182 if (!src) |
| 3183 src = end; | |
| 3184 Dynarr_add_many (dst, runstart, src - runstart); | |
| 3185 /* Stop if at end ... */ | |
| 3186 if (src == end) | |
| 3187 break; | |
| 3188 /* ... else, skip over \n and add its translation. */ | |
| 3189 src++; | |
| 3190 Dynarr_add (dst, '\r'); | |
| 3191 if (subtype == EOL_CRLF) | |
| 3192 Dynarr_add (dst, '\n'); | |
| 3193 } | |
| 3194 } | |
| 3195 | |
| 3196 return n; | |
| 3197 } | |
| 3198 } | |
| 3199 | |
| 3200 static Lisp_Object | |
| 3201 convert_eol_canonicalize_after_coding (struct coding_stream *str) | |
| 3202 { | |
| 3203 struct convert_eol_coding_stream *data = | |
| 3204 CODING_STREAM_TYPE_DATA (str, convert_eol); | |
| 3205 | |
| 3206 if (str->direction == CODING_ENCODE) | |
| 3207 return str->codesys; | |
| 3208 | |
| 3209 switch (data->actual) | |
| 3210 { | |
| 3211 case EOL_LF: return Fget_coding_system (Qconvert_eol_lf); | |
| 3212 case EOL_CRLF: return Fget_coding_system (Qconvert_eol_crlf); | |
| 3213 case EOL_CR: return Fget_coding_system (Qconvert_eol_cr); | |
| 3214 case EOL_AUTODETECT: return str->codesys; | |
| 2500 | 3215 default: ABORT (); return Qnil; |
| 771 | 3216 } |
| 3217 } | |
| 3218 | |
| 3219 | |
| 3220 /************************************************************************/ | |
| 3221 /* Undecided methods */ | |
| 3222 /************************************************************************/ | |
| 3223 | |
| 3224 /* Do autodetection. We can autodetect the EOL type only, the coding | |
| 3225 system only, or both. We only do autodetection when decoding; when | |
| 3226 encoding, we just pass the data through. | |
| 3227 | |
| 3228 When doing just EOL detection, a coding system can be specified; if so, | |
| 3229 we will decode this data through the coding system before doing EOL | |
| 3230 detection. The reason for specifying this is so that | |
| 3231 canonicalize-after-coding works: We will canonicalize the specified | |
| 3232 coding system into the appropriate EOL type. When doing both coding and | |
| 3233 EOL detection, we do similar canonicalization, and also catch situations | |
| 3234 where the EOL type is overspecified, i.e. the detected coding system | |
| 3235 specifies an EOL type, and either switch to the equivalent | |
| 3236 non-EOL-processing coding system (if possible), or terminate EOL | |
| 3237 detection and use the specified EOL type. This prevents data from being | |
| 3238 EOL-processed twice. | |
| 3239 */ | |
| 3240 | |
| 3241 struct undecided_coding_system | |
| 3242 { | |
| 3243 int do_eol, do_coding; | |
| 3244 Lisp_Object cs; | |
| 3245 }; | |
| 3246 | |
| 3247 struct undecided_coding_stream | |
| 3248 { | |
| 3249 Lisp_Object actual; | |
| 3250 /* Either 2 or 3 lstreams here; see undecided_convert */ | |
| 3251 struct chain_coding_stream c; | |
| 3252 | |
| 3253 struct detection_state *st; | |
| 3254 }; | |
| 3255 | |
| 1204 | 3256 static const struct memory_description undecided_coding_system_description[] = { |
| 3257 { XD_LISP_OBJECT, offsetof (struct undecided_coding_system, cs) }, | |
| 771 | 3258 { XD_END } |
| 3259 }; | |
| 3260 | |
| 1204 | 3261 static const struct memory_description undecided_coding_stream_description_1 [] = { |
| 3262 { XD_LISP_OBJECT, offsetof (struct undecided_coding_stream, actual) }, | |
| 2367 | 3263 { XD_BLOCK_ARRAY, offsetof (struct undecided_coding_stream, c), |
| 2551 | 3264 1, { &chain_coding_stream_description } }, |
| 1204 | 3265 { XD_END } |
| 3266 }; | |
| 3267 | |
| 3268 const struct sized_memory_description undecided_coding_stream_description = { | |
| 3269 sizeof (struct undecided_coding_stream), undecided_coding_stream_description_1 | |
| 3270 }; | |
| 3271 | |
| 3272 DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (undecided); | |
| 3273 | |
| 771 | 3274 static void |
| 3275 undecided_init (Lisp_Object codesys) | |
| 3276 { | |
| 3277 struct undecided_coding_system *data = | |
| 3278 XCODING_SYSTEM_TYPE_DATA (codesys, undecided); | |
| 3279 | |
| 3280 data->cs = Qnil; | |
| 3281 } | |
| 3282 | |
| 3283 static void | |
| 3284 undecided_mark (Lisp_Object codesys) | |
| 3285 { | |
| 3286 struct undecided_coding_system *data = | |
| 3287 XCODING_SYSTEM_TYPE_DATA (codesys, undecided); | |
| 3288 | |
| 3289 mark_object (data->cs); | |
| 3290 } | |
| 3291 | |
| 3292 static void | |
| 3293 undecided_print (Lisp_Object cs, Lisp_Object printcharfun, int escapeflag) | |
| 3294 { | |
| 3295 struct undecided_coding_system *data = | |
| 3296 XCODING_SYSTEM_TYPE_DATA (cs, undecided); | |
| 3297 int need_space = 0; | |
| 3298 | |
| 826 | 3299 write_c_string (printcharfun, "("); |
| 771 | 3300 if (data->do_eol) |
| 3301 { | |
| 826 | 3302 write_c_string (printcharfun, "do-eol"); |
| 771 | 3303 need_space = 1; |
| 3304 } | |
| 3305 if (data->do_coding) | |
| 3306 { | |
| 3307 if (need_space) | |
| 826 | 3308 write_c_string (printcharfun, " "); |
| 3309 write_c_string (printcharfun, "do-coding"); | |
| 771 | 3310 need_space = 1; |
| 3311 } | |
| 3312 if (!NILP (data->cs)) | |
| 3313 { | |
| 3314 if (need_space) | |
| 826 | 3315 write_c_string (printcharfun, " "); |
| 3316 write_c_string (printcharfun, "coding-system="); | |
| 771 | 3317 print_coding_system_in_print_method (data->cs, printcharfun, escapeflag); |
| 3318 } | |
| 826 | 3319 write_c_string (printcharfun, ")"); |
| 771 | 3320 } |
| 3321 | |
| 3322 static void | |
| 3323 undecided_mark_coding_stream (struct coding_stream *str) | |
| 3324 { | |
| 1204 | 3325 mark_object (CODING_STREAM_TYPE_DATA (str, undecided)->actual); |
| 771 | 3326 chain_mark_coding_stream_1 (&CODING_STREAM_TYPE_DATA (str, undecided)->c); |
| 3327 } | |
| 3328 | |
| 3329 static int | |
| 3330 undecided_putprop (Lisp_Object codesys, Lisp_Object key, Lisp_Object value) | |
| 3331 { | |
| 3332 struct undecided_coding_system *data = | |
| 3333 XCODING_SYSTEM_TYPE_DATA (codesys, undecided); | |
| 3334 | |
| 3335 if (EQ (key, Qdo_eol)) | |
| 3336 data->do_eol = 1; | |
| 3337 else if (EQ (key, Qdo_coding)) | |
| 3338 data->do_coding = 1; | |
| 3339 else if (EQ (key, Qcoding_system)) | |
| 3340 data->cs = get_coding_system_for_text_file (value, 0); | |
| 3341 else | |
| 3342 return 0; | |
| 3343 return 1; | |
| 3344 } | |
| 3345 | |
| 3346 static Lisp_Object | |
| 3347 undecided_getprop (Lisp_Object codesys, Lisp_Object prop) | |
| 3348 { | |
| 3349 struct undecided_coding_system *data = | |
| 3350 XCODING_SYSTEM_TYPE_DATA (codesys, undecided); | |
| 3351 | |
| 3352 if (EQ (prop, Qdo_eol)) | |
| 3353 return data->do_eol ? Qt : Qnil; | |
| 3354 if (EQ (prop, Qdo_coding)) | |
| 3355 return data->do_coding ? Qt : Qnil; | |
| 3356 if (EQ (prop, Qcoding_system)) | |
| 3357 return data->cs; | |
| 3358 return Qunbound; | |
| 3359 } | |
| 3360 | |
| 3361 static struct detection_state * | |
| 3362 allocate_detection_state (void) | |
| 3363 { | |
| 3364 int i; | |
| 3365 Bytecount size = MAX_ALIGN_SIZE (sizeof (struct detection_state)); | |
| 3366 struct detection_state *block; | |
| 3367 | |
| 3368 for (i = 0; i < coding_detector_count; i++) | |
| 3369 size += MAX_ALIGN_SIZE (Dynarr_at (all_coding_detectors, i).data_size); | |
| 3370 | |
| 3371 block = (struct detection_state *) xmalloc_and_zero (size); | |
| 3372 | |
| 3373 size = MAX_ALIGN_SIZE (sizeof (struct detection_state)); | |
| 3374 for (i = 0; i < coding_detector_count; i++) | |
| 3375 { | |
| 3376 block->data_offset[i] = size; | |
| 3377 size += MAX_ALIGN_SIZE (Dynarr_at (all_coding_detectors, i).data_size); | |
| 3378 } | |
| 3379 | |
| 3380 return block; | |
| 3381 } | |
| 3382 | |
| 3383 static void | |
| 3384 free_detection_state (struct detection_state *st) | |
| 3385 { | |
| 3386 int i; | |
| 3387 | |
| 3388 for (i = 0; i < coding_detector_count; i++) | |
| 3389 { | |
| 3390 if (Dynarr_at (all_coding_detectors, i).finalize_detection_state_method) | |
| 3391 Dynarr_at (all_coding_detectors, i).finalize_detection_state_method | |
| 3392 (st); | |
| 3393 } | |
| 3394 | |
| 1726 | 3395 xfree (st, struct detection_state *); |
| 771 | 3396 } |
| 3397 | |
| 3398 static int | |
| 3399 coding_category_symbol_to_id (Lisp_Object symbol) | |
| 428 | 3400 { |
| 3401 int i; | |
| 3402 | |
| 3403 CHECK_SYMBOL (symbol); | |
| 771 | 3404 for (i = 0; i < coding_detector_count; i++) |
| 3405 { | |
| 3406 detector_category_dynarr *cats = | |
| 3407 Dynarr_at (all_coding_detectors, i).cats; | |
| 3408 int j; | |
| 3409 | |
| 3410 for (j = 0; j < Dynarr_length (cats); j++) | |
| 3411 if (EQ (Dynarr_at (cats, j).sym, symbol)) | |
| 3412 return Dynarr_at (cats, j).id; | |
| 3413 } | |
| 3414 | |
| 563 | 3415 invalid_constant ("Unrecognized coding category", symbol); |
| 1204 | 3416 RETURN_NOT_REACHED (0); |
| 428 | 3417 } |
| 3418 | |
| 771 | 3419 static Lisp_Object |
| 3420 coding_category_id_to_symbol (int id) | |
| 428 | 3421 { |
| 3422 int i; | |
| 771 | 3423 |
| 3424 for (i = 0; i < coding_detector_count; i++) | |
| 3425 { | |
| 3426 detector_category_dynarr *cats = | |
| 3427 Dynarr_at (all_coding_detectors, i).cats; | |
| 3428 int j; | |
| 3429 | |
| 3430 for (j = 0; j < Dynarr_length (cats); j++) | |
| 3431 if (id == Dynarr_at (cats, j).id) | |
| 3432 return Dynarr_at (cats, j).sym; | |
| 3433 } | |
| 3434 | |
| 2500 | 3435 ABORT (); |
| 771 | 3436 return Qnil; /* (usually) not reached */ |
| 428 | 3437 } |
| 3438 | |
| 771 | 3439 static Lisp_Object |
| 3440 detection_result_number_to_symbol (enum detection_result result) | |
| 428 | 3441 { |
| 1494 | 3442 /* let compiler warn if not all enumerators are handled */ |
| 3443 switch (result) { | |
| 3444 #define FROB(sym, num) case num: return (sym) | |
| 771 | 3445 FROB (Qnear_certainty, DET_NEAR_CERTAINTY); |
| 3446 FROB (Qquite_probable, DET_QUITE_PROBABLE); | |
| 3447 FROB (Qsomewhat_likely, DET_SOMEWHAT_LIKELY); | |
| 1494 | 3448 FROB (Qslightly_likely, DET_SLIGHTLY_LIKELY); |
| 771 | 3449 FROB (Qas_likely_as_unlikely, DET_AS_LIKELY_AS_UNLIKELY); |
| 3450 FROB (Qsomewhat_unlikely, DET_SOMEWHAT_UNLIKELY); | |
| 3451 FROB (Qquite_improbable, DET_QUITE_IMPROBABLE); | |
| 3452 FROB (Qnearly_impossible, DET_NEARLY_IMPOSSIBLE); | |
| 3453 #undef FROB | |
| 1494 | 3454 } |
| 771 | 3455 |
| 2500 | 3456 ABORT (); |
| 771 | 3457 return Qnil; /* (usually) not reached */ |
| 3458 } | |
| 3459 | |
| 778 | 3460 #if 0 /* not used */ |
| 771 | 3461 static enum detection_result |
| 3462 detection_result_symbol_to_number (Lisp_Object symbol) | |
| 3463 { | |
| 1494 | 3464 /* using switch here would be bad style, and doesn't help */ |
| 771 | 3465 #define FROB(sym, num) if (EQ (symbol, sym)) return (num) |
| 3466 FROB (Qnear_certainty, DET_NEAR_CERTAINTY); | |
| 3467 FROB (Qquite_probable, DET_QUITE_PROBABLE); | |
| 3468 FROB (Qsomewhat_likely, DET_SOMEWHAT_LIKELY); | |
| 1494 | 3469 FROB (Qslightly_likely, DET_SLIGHTLY_LIKELY); |
| 771 | 3470 FROB (Qas_likely_as_unlikely, DET_AS_LIKELY_AS_UNLIKELY); |
| 3471 FROB (Qsomewhat_unlikely, DET_SOMEWHAT_UNLIKELY); | |
| 3472 FROB (Qquite_improbable, DET_QUITE_IMPROBABLE); | |
| 3473 FROB (Qnearly_impossible, DET_NEARLY_IMPOSSIBLE); | |
| 3474 #undef FROB | |
| 3475 | |
| 3476 invalid_constant ("Unrecognized detection result", symbol); | |
| 3477 return ((enum detection_result) 0); /* not reached */ | |
| 3478 } | |
| 778 | 3479 #endif /* 0 */ |
| 771 | 3480 |
| 3481 /* Set all detection results for a given detector to a specified value. */ | |
| 3482 void | |
| 3483 set_detection_results (struct detection_state *st, int detector, int given) | |
| 3484 { | |
| 3485 detector_category_dynarr *cats = | |
| 3486 Dynarr_at (all_coding_detectors, detector).cats; | |
| 3487 int i; | |
| 3488 | |
| 3489 for (i = 0; i < Dynarr_length (cats); i++) | |
| 3490 st->categories[Dynarr_at (cats, i).id] = given; | |
| 3491 } | |
| 428 | 3492 |
| 3493 static int | |
| 3494 acceptable_control_char_p (int c) | |
| 3495 { | |
| 3496 switch (c) | |
| 3497 { | |
| 3498 /* Allow and ignore control characters that you might | |
| 3499 reasonably see in a text file */ | |
| 3500 case '\r': | |
| 3501 case '\n': | |
| 3502 case '\t': | |
| 3503 case 7: /* bell */ | |
| 3504 case 8: /* backspace */ | |
| 3505 case 11: /* vertical tab */ | |
| 3506 case 12: /* form feed */ | |
| 3507 case 26: /* MS-DOS C-z junk */ | |
| 3508 case 31: /* '^_' -- for info */ | |
| 3509 return 1; | |
| 3510 default: | |
| 3511 return 0; | |
| 3512 } | |
| 3513 } | |
| 3514 | |
| 771 | 3515 #ifdef DEBUG_XEMACS |
| 3516 | |
| 3517 static UExtbyte | |
| 3518 hex_digit_to_char (int digit) | |
| 428 | 3519 { |
| 771 | 3520 if (digit < 10) |
| 3521 return digit + '0'; | |
| 3522 else | |
| 3523 return digit - 10 + 'A'; | |
| 428 | 3524 } |
| 3525 | |
| 771 | 3526 static void |
| 3527 output_bytes_in_ascii_and_hex (const UExtbyte *src, Bytecount n) | |
| 428 | 3528 { |
| 3425 | 3529 Extbyte *ascii = alloca_array (Extbyte, n + 1); |
| 3530 Extbyte *hex = alloca_array (Extbyte, 3 * n + 1); | |
| 771 | 3531 int i; |
| 3413 | 3532 DECLARE_EISTRING (eistr_ascii); |
| 3533 DECLARE_EISTRING (eistr_hex); | |
| 771 | 3534 |
| 3535 for (i = 0; i < n; i++) | |
| 428 | 3536 { |
| 3425 | 3537 Extbyte c = src[i]; |
| 771 | 3538 if (c < 0x20) |
| 3539 ascii[i] = '.'; | |
| 428 | 3540 else |
| 771 | 3541 ascii[i] = c; |
| 3542 hex[3 * i] = hex_digit_to_char (c >> 4); | |
| 3543 hex[3 * i + 1] = hex_digit_to_char (c & 0xF); | |
| 3544 hex[3 * i + 2] = ' '; | |
| 428 | 3545 } |
| 771 | 3546 ascii[i] = '\0'; |
| 3547 hex[3 * i - 1] = '\0'; | |
| 3413 | 3548 |
| 3549 eicpy_ext(eistr_hex, hex, Qbinary); | |
| 3550 eicpy_ext(eistr_ascii, ascii, Qbinary); | |
| 3551 | |
| 3425 | 3552 stderr_out ("%s %s", eidata(eistr_ascii), eidata(eistr_hex)); |
| 428 | 3553 } |
| 3554 | |
| 771 | 3555 #endif /* DEBUG_XEMACS */ |
| 3556 | |
| 3557 /* Attempt to determine the encoding of the given text. Before calling | |
| 3558 this function for the first time, you must zero out the detection state. | |
| 428 | 3559 |
| 3560 Returns: | |
| 3561 | |
| 771 | 3562 0 == keep going |
| 3563 1 == stop | |
| 428 | 3564 */ |
| 3565 | |
| 3566 static int | |
| 771 | 3567 detect_coding_type (struct detection_state *st, const UExtbyte *src, |
| 3568 Bytecount n) | |
| 428 | 3569 { |
| 771 | 3570 Bytecount n2 = n; |
| 3571 const UExtbyte *src2 = src; | |
| 3572 int i; | |
| 3573 | |
| 3574 #ifdef DEBUG_XEMACS | |
| 3575 if (!NILP (Vdebug_coding_detection)) | |
| 3576 { | |
| 3577 int bytes = min (16, n); | |
| 3578 stderr_out ("detect_coding_type: processing %ld bytes\n", n); | |
| 3579 stderr_out ("First %d: ", bytes); | |
| 3580 output_bytes_in_ascii_and_hex (src, bytes); | |
| 3581 stderr_out ("\nLast %d: ", bytes); | |
| 3582 output_bytes_in_ascii_and_hex (src + n - bytes, bytes); | |
| 3583 stderr_out ("\n"); | |
| 3584 } | |
| 3585 #endif /* DEBUG_XEMACS */ | |
| 428 | 3586 if (!st->seen_non_ascii) |
| 3587 { | |
| 771 | 3588 for (; n2; n2--, src2++) |
| 428 | 3589 { |
| 771 | 3590 UExtbyte c = *src2; |
| 428 | 3591 if ((c < 0x20 && !acceptable_control_char_p (c)) || c >= 0x80) |
| 3592 { | |
| 3593 st->seen_non_ascii = 1; | |
| 3594 break; | |
| 3595 } | |
| 3596 } | |
| 3597 } | |
| 3598 | |
| 771 | 3599 for (i = 0; i < coding_detector_count; i++) |
| 3600 Dynarr_at (all_coding_detectors, i).detect_method (st, src, n); | |
| 3601 | |
| 3602 st->bytes_seen += n; | |
| 3603 | |
| 3604 #ifdef DEBUG_XEMACS | |
| 3605 if (!NILP (Vdebug_coding_detection)) | |
| 3606 { | |
| 3607 stderr_out ("seen_non_ascii: %d\n", st->seen_non_ascii); | |
| 1494 | 3608 if (coding_detector_category_count <= 0) |
| 3609 stderr_out ("found %d detector categories\n", | |
| 3610 coding_detector_category_count); | |
| 771 | 3611 for (i = 0; i < coding_detector_category_count; i++) |
| 3612 stderr_out_lisp | |
| 3613 ("%s: %s\n", | |
| 3614 2, | |
| 3615 coding_category_id_to_symbol (i), | |
| 3616 detection_result_number_to_symbol ((enum detection_result) | |
| 3617 st->categories[i])); | |
| 3618 } | |
| 3619 #endif /* DEBUG_XEMACS */ | |
| 3620 | |
| 3621 { | |
| 3622 int not_unlikely = 0; | |
| 3623 int retval; | |
| 3624 | |
| 3625 for (i = 0; i < coding_detector_category_count; i++) | |
| 3626 if (st->categories[i] >= 0) | |
| 3627 not_unlikely++; | |
| 3628 | |
| 3629 retval = (not_unlikely <= 1 | |
| 3630 #if 0 /* this is bogus */ | |
| 3631 || st->bytes_seen >= MAX_BYTES_PROCESSED_FOR_DETECTION | |
| 428 | 3632 #endif |
| 771 | 3633 ); |
| 3634 | |
| 3635 #ifdef DEBUG_XEMACS | |
| 3636 if (!NILP (Vdebug_coding_detection)) | |
| 3637 stderr_out ("detect_coding_type: returning %d (%s)\n", | |
| 3638 retval, retval ? "stop" : "keep going"); | |
| 3639 #endif /* DEBUG_XEMACS */ | |
| 3640 | |
| 3641 return retval; | |
| 428 | 3642 } |
| 3643 } | |
| 3644 | |
| 3645 static Lisp_Object | |
| 771 | 3646 detected_coding_system (struct detection_state *st) |
| 428 | 3647 { |
| 771 | 3648 int i; |
| 3649 int even = 1; | |
| 3650 | |
| 3651 if (st->seen_non_ascii) | |
| 3652 { | |
| 3653 for (i = 0; i < coding_detector_category_count; i++) | |
| 3654 if (st->categories[i] != DET_AS_LIKELY_AS_UNLIKELY) | |
| 3655 { | |
| 3656 even = 0; | |
| 3657 break; | |
| 3658 } | |
| 3659 } | |
| 3660 | |
| 3661 /* #### Here we are ignoring the results of detection when it's all | |
| 3662 ASCII. This is obviously a bad thing. But we need to fix up the | |
| 3663 existing detection methods somewhat before we can switch. */ | |
| 3664 if (even) | |
| 428 | 3665 { |
| 3666 /* If the file was entirely or basically ASCII, use the | |
| 3667 default value of `buffer-file-coding-system'. */ | |
| 3668 Lisp_Object retval = | |
| 3669 XBUFFER (Vbuffer_defaults)->buffer_file_coding_system; | |
| 3670 if (!NILP (retval)) | |
| 3671 { | |
| 771 | 3672 retval = find_coding_system_for_text_file (retval, 0); |
| 428 | 3673 if (NILP (retval)) |
| 3674 { | |
| 3675 warn_when_safe | |
| 3676 (Qbad_variable, Qwarning, | |
| 3677 "Invalid `default-buffer-file-coding-system', set to nil"); | |
| 3678 XBUFFER (Vbuffer_defaults)->buffer_file_coding_system = Qnil; | |
| 3679 } | |
| 3680 } | |
| 3681 if (NILP (retval)) | |
| 4100 | 3682 retval = Fget_coding_system (Qbinary); |
| 428 | 3683 return retval; |
| 3684 } | |
| 3685 else | |
| 3686 { | |
| 771 | 3687 int likelihood; |
| 3688 Lisp_Object retval = Qnil; | |
| 3689 | |
| 3690 /* Look through the coding categories first by likelihood and then by | |
| 3691 priority and find the first one that is allowed. */ | |
| 3692 | |
| 3693 for (likelihood = DET_HIGHEST; likelihood >= DET_LOWEST; likelihood--) | |
| 428 | 3694 { |
| 771 | 3695 for (i = 0; i < coding_detector_category_count; i++) |
| 3696 { | |
| 3697 int cat = coding_category_by_priority[i]; | |
| 3698 if (st->categories[cat] == likelihood && | |
| 3699 !NILP (coding_category_system[cat])) | |
| 3700 { | |
| 3701 retval = (get_coding_system_for_text_file | |
| 3702 (coding_category_system[cat], 0)); | |
| 3703 if (likelihood < DET_AS_LIKELY_AS_UNLIKELY) | |
| 3704 warn_when_safe_lispobj | |
| 3705 (intern ("detection"), | |
| 793 | 3706 Qwarning, |
| 771 | 3707 emacs_sprintf_string_lisp |
| 3708 ( | |
| 3709 "Detected coding %s is unlikely to be correct (likelihood == `%s')", | |
| 3710 Qnil, 2, XCODING_SYSTEM_NAME (retval), | |
| 3711 detection_result_number_to_symbol | |
| 3712 ((enum detection_result) likelihood))); | |
| 3713 return retval; | |
| 3714 } | |
| 3715 } | |
| 428 | 3716 } |
| 771 | 3717 |
| 3718 return Fget_coding_system (Qraw_text); | |
| 428 | 3719 } |
| 3720 } | |
| 3721 | |
| 1347 | 3722 /* Look for a coding system in the string (skipping over leading |
| 3723 blanks). If found, return it, otherwise nil. */ | |
| 3724 | |
| 3725 static Lisp_Object | |
| 2531 | 3726 snarf_coding_system (const UExtbyte *p, Bytecount len) |
| 1347 | 3727 { |
| 3728 Bytecount n; | |
| 2531 | 3729 UExtbyte *name; |
| 1347 | 3730 |
| 3731 while (*p == ' ' || *p == '\t') p++, len--; | |
| 3732 len = min (len, 1000); | |
| 3733 name = alloca_ibytes (len + 1); | |
| 3734 memcpy (name, p, len); | |
| 3735 name[len] = '\0'; | |
| 3736 | |
| 3737 /* Get coding system name */ | |
| 3738 /* Characters valid in a MIME charset name (rfc 1521), | |
| 3739 and in a Lisp symbol name. */ | |
| 3740 n = qxestrspn (name, | |
| 3741 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" | |
| 3742 "abcdefghijklmnopqrstuvwxyz" | |
| 3743 "0123456789" | |
| 3744 "!$%&*+-.^_{|}~"); | |
| 3745 if (n > 0) | |
| 3746 { | |
| 3747 name[n] = '\0'; | |
| 2531 | 3748 /* This call to intern_int() is OK because we already verified that |
| 3749 there are only ASCII characters in the string */ | |
| 3750 return find_coding_system_for_text_file (intern_int ((Ibyte *) name), 0); | |
| 1347 | 3751 } |
| 3752 | |
| 3753 return Qnil; | |
| 3754 } | |
| 3755 | |
| 428 | 3756 /* Given a seekable read stream and potential coding system and EOL type |
| 3757 as specified, do any autodetection that is called for. If the | |
| 3758 coding system and/or EOL type are not `autodetect', they will be left | |
| 3759 alone; but this function will never return an autodetect coding system | |
| 3760 or EOL type. | |
| 3761 | |
| 3762 This function does not automatically fetch subsidiary coding systems; | |
| 3763 that should be unnecessary with the explicit eol-type argument. */ | |
| 3764 | |
| 3765 #define LENGTH(string_constant) (sizeof (string_constant) - 1) | |
| 3766 | |
| 771 | 3767 static Lisp_Object |
| 3768 unwind_free_detection_state (Lisp_Object opaque) | |
| 3769 { | |
| 3770 struct detection_state *st = | |
| 3771 (struct detection_state *) get_opaque_ptr (opaque); | |
| 3772 free_detection_state (st); | |
| 3773 free_opaque_ptr (opaque); | |
| 3774 return Qnil; | |
| 3775 } | |
| 3776 | |
| 1347 | 3777 /* #### This duplicates code in `find-coding-system-magic-cookie-in-file' |
| 3778 in files.el. Look into combining them. */ | |
| 3779 | |
| 771 | 3780 static Lisp_Object |
| 3781 look_for_coding_system_magic_cookie (const UExtbyte *data, Bytecount len) | |
| 428 | 3782 { |
| 771 | 3783 const UExtbyte *p; |
| 3784 const UExtbyte *scan_end; | |
| 2531 | 3785 Bytecount cookie_len; |
| 771 | 3786 |
| 3787 /* Look for initial "-*-"; mode line prefix */ | |
| 3788 for (p = data, | |
| 3789 scan_end = data + len - LENGTH ("-*-coding:?-*-"); | |
| 3790 p <= scan_end | |
| 3791 && *p != '\n' | |
| 3792 && *p != '\r'; | |
| 3793 p++) | |
| 3794 if (*p == '-' && *(p+1) == '*' && *(p+2) == '-') | |
| 3795 { | |
| 3796 const UExtbyte *local_vars_beg = p + 3; | |
| 3797 /* Look for final "-*-"; mode line suffix */ | |
| 3798 for (p = local_vars_beg, | |
| 3799 scan_end = data + len - LENGTH ("-*-"); | |
| 3800 p <= scan_end | |
| 428 | 3801 && *p != '\n' |
| 3802 && *p != '\r'; | |
| 771 | 3803 p++) |
| 3804 if (*p == '-' && *(p+1) == '*' && *(p+2) == '-') | |
| 3805 { | |
| 3806 const UExtbyte *suffix = p; | |
| 3807 /* Look for "coding:" */ | |
| 3808 for (p = local_vars_beg, | |
| 3809 scan_end = suffix - LENGTH ("coding:?"); | |
| 3810 p <= scan_end; | |
| 3811 p++) | |
| 3812 if (memcmp ("coding:", p, LENGTH ("coding:")) == 0 | |
| 3813 && (p == local_vars_beg | |
| 3814 || (*(p-1) == ' ' || | |
| 3815 *(p-1) == '\t' || | |
| 3816 *(p-1) == ';'))) | |
| 3817 { | |
| 3818 p += LENGTH ("coding:"); | |
| 1347 | 3819 return snarf_coding_system (p, suffix - p); |
| 771 | 3820 break; |
| 3821 } | |
| 3822 break; | |
| 3823 } | |
| 3824 break; | |
| 3825 } | |
| 3826 | |
| 2531 | 3827 /* Look for ;;;###coding system */ |
| 3828 | |
| 3829 cookie_len = LENGTH (";;;###coding system: "); | |
| 3830 | |
| 3831 for (p = data, | |
| 3832 scan_end = data + len - cookie_len; | |
| 3833 p <= scan_end; | |
| 3834 p++) | |
| 1347 | 3835 { |
| 2531 | 3836 if (*p == ';' && !memcmp (p, ";;;###coding system: ", cookie_len)) |
| 3837 { | |
| 3838 const UExtbyte *suffix; | |
| 3839 | |
| 3840 p += cookie_len; | |
| 3841 suffix = p; | |
| 3842 while (suffix < scan_end && !isspace (*suffix)) | |
| 3843 suffix++; | |
| 3844 return snarf_coding_system (p, suffix - p); | |
| 3845 } | |
| 1347 | 3846 } |
| 3847 | |
| 3848 return Qnil; | |
| 771 | 3849 } |
| 3850 | |
| 3851 static Lisp_Object | |
| 3852 determine_real_coding_system (Lstream *stream) | |
| 3853 { | |
| 3854 struct detection_state *st = allocate_detection_state (); | |
| 3855 int depth = record_unwind_protect (unwind_free_detection_state, | |
| 3856 make_opaque_ptr (st)); | |
| 3857 UExtbyte buf[4096]; | |
| 3858 Bytecount nread = Lstream_read (stream, buf, sizeof (buf)); | |
| 3859 Lisp_Object coding_system = look_for_coding_system_magic_cookie (buf, nread); | |
| 3860 | |
| 3861 if (NILP (coding_system)) | |
| 3862 { | |
| 3863 while (1) | |
| 3864 { | |
| 3865 if (detect_coding_type (st, buf, nread)) | |
| 428 | 3866 break; |
| 771 | 3867 nread = Lstream_read (stream, buf, sizeof (buf)); |
| 3868 if (nread == 0) | |
| 3869 break; | |
| 428 | 3870 } |
| 771 | 3871 |
| 3872 coding_system = detected_coding_system (st); | |
| 428 | 3873 } |
| 3874 | |
| 3875 Lstream_rewind (stream); | |
| 771 | 3876 |
| 3877 unbind_to (depth); | |
| 3878 return coding_system; | |
| 3879 } | |
| 3880 | |
| 3881 static void | |
| 3882 undecided_init_coding_stream (struct coding_stream *str) | |
| 3883 { | |
| 3884 struct undecided_coding_stream *data = | |
| 3885 CODING_STREAM_TYPE_DATA (str, undecided); | |
| 3886 struct undecided_coding_system *csdata = | |
| 3887 XCODING_SYSTEM_TYPE_DATA (str->codesys, undecided); | |
| 3888 | |
| 3889 data->actual = Qnil; | |
| 3890 | |
| 3891 if (str->direction == CODING_DECODE) | |
| 3892 { | |
| 3893 Lstream *lst = str->other_end; | |
| 3894 | |
| 3895 if ((lst->flags & LSTREAM_FL_READ) && | |
| 3896 Lstream_seekable_p (lst) && | |
| 3897 csdata->do_coding) | |
| 3898 /* We can determine the coding system now. */ | |
| 3899 data->actual = determine_real_coding_system (lst); | |
| 3900 } | |
| 1494 | 3901 |
| 3902 #ifdef DEBUG_XEMACS | |
| 3903 if (!NILP (Vdebug_coding_detection)) | |
| 3904 stderr_out_lisp ("detected coding system: %s\n", 1, data->actual); | |
| 3905 #endif /* DEBUG_XEMACS */ | |
| 771 | 3906 } |
| 3907 | |
| 3908 static void | |
| 3909 undecided_rewind_coding_stream (struct coding_stream *str) | |
| 3910 { | |
| 3911 chain_rewind_coding_stream_1 (&CODING_STREAM_TYPE_DATA (str, undecided)->c); | |
| 3912 } | |
| 3913 | |
| 3914 static void | |
| 3915 undecided_finalize_coding_stream (struct coding_stream *str) | |
| 3916 { | |
| 3917 struct undecided_coding_stream *data = | |
| 3918 CODING_STREAM_TYPE_DATA (str, undecided); | |
| 3919 | |
| 3920 chain_finalize_coding_stream_1 | |
| 3921 (&CODING_STREAM_TYPE_DATA (str, undecided)->c); | |
| 3922 if (data->st) | |
| 3923 free_detection_state (data->st); | |
| 3924 } | |
| 3925 | |
| 3926 static Lisp_Object | |
| 3927 undecided_canonicalize (Lisp_Object codesys) | |
| 3928 { | |
| 3929 struct undecided_coding_system *csdata = | |
| 3930 XCODING_SYSTEM_TYPE_DATA (codesys, undecided); | |
| 3931 if (!csdata->do_eol && !csdata->do_coding) | |
| 3932 return NILP (csdata->cs) ? Fget_coding_system (Qbinary) : csdata->cs; | |
| 3933 if (csdata->do_eol && !csdata->do_coding && NILP (csdata->cs)) | |
| 3934 return Fget_coding_system (Qconvert_eol_autodetect); | |
| 3935 return codesys; | |
| 3936 } | |
| 3937 | |
| 3938 static Bytecount | |
| 3939 undecided_convert (struct coding_stream *str, const UExtbyte *src, | |
| 3940 unsigned_char_dynarr *dst, Bytecount n) | |
| 3941 { | |
| 3942 int first_time = 0; | |
| 3943 | |
| 3944 if (str->direction == CODING_DECODE) | |
| 3945 { | |
| 3946 /* At this point, we have only the following possibilities: | |
| 3947 | |
| 3948 do_eol && do_coding | |
| 3949 do_coding only | |
| 3950 do_eol only and a coding system was specified | |
| 3951 | |
| 3952 Other possibilities are removed during undecided_canonicalize. | |
| 3953 | |
| 3954 Therefore, our substreams are either | |
| 3955 | |
| 3956 lstream_coding -> lstream_dynarr, or | |
| 3957 lstream_coding -> lstream_eol -> lstream_dynarr. | |
| 3958 */ | |
| 3959 struct undecided_coding_system *csdata = | |
| 3960 XCODING_SYSTEM_TYPE_DATA (str->codesys, undecided); | |
| 3961 struct undecided_coding_stream *data = | |
| 3962 CODING_STREAM_TYPE_DATA (str, undecided); | |
| 3963 | |
| 3964 if (str->eof) | |
| 3965 { | |
| 3966 /* Each will close the next. We need to close now because more | |
| 3967 data may be generated. */ | |
| 3968 if (data->c.initted) | |
| 3969 Lstream_close (XLSTREAM (data->c.lstreams[0])); | |
| 3970 return n; | |
| 3971 } | |
| 3972 | |
| 3973 if (!data->c.initted) | |
| 3974 { | |
| 3975 data->c.lstream_count = csdata->do_eol ? 3 : 2; | |
| 3976 data->c.lstreams = xnew_array (Lisp_Object, data->c.lstream_count); | |
| 3977 | |
| 3978 data->c.lstreams[data->c.lstream_count - 1] = | |
| 3979 make_dynarr_output_stream (dst); | |
| 3980 Lstream_set_buffering | |
| 3981 (XLSTREAM (data->c.lstreams[data->c.lstream_count - 1]), | |
| 3982 LSTREAM_UNBUFFERED, 0); | |
| 3983 if (csdata->do_eol) | |
| 3984 { | |
| 3985 data->c.lstreams[1] = | |
| 3986 make_coding_output_stream | |
| 3987 (XLSTREAM (data->c.lstreams[data->c.lstream_count - 1]), | |
| 3988 Fget_coding_system (Qconvert_eol_autodetect), | |
| 800 | 3989 CODING_DECODE, 0); |
| 771 | 3990 Lstream_set_buffering |
| 3991 (XLSTREAM (data->c.lstreams[1]), | |
| 3992 LSTREAM_UNBUFFERED, 0); | |
| 3993 } | |
| 3994 | |
| 3995 data->c.lstreams[0] = | |
| 3996 make_coding_output_stream | |
| 3997 (XLSTREAM (data->c.lstreams[1]), | |
| 3998 /* Substitute binary if we need to detect the encoding */ | |
| 3999 csdata->do_coding ? Qbinary : csdata->cs, | |
| 800 | 4000 CODING_DECODE, 0); |
| 771 | 4001 Lstream_set_buffering (XLSTREAM (data->c.lstreams[0]), |
| 4002 LSTREAM_UNBUFFERED, 0); | |
| 4003 | |
| 4004 first_time = 1; | |
| 4005 data->c.initted = 1; | |
| 4006 } | |
| 4007 | |
| 4008 /* If necessary, do encoding-detection now. We do this when we're a | |
| 4009 writing stream or a non-seekable reading stream, meaning that we | |
| 4010 can't just process the whole input, rewind, and start over. */ | |
| 4011 | |
| 4012 if (csdata->do_coding) | |
| 4013 { | |
| 4014 int actual_was_nil = NILP (data->actual); | |
| 4015 if (NILP (data->actual)) | |
| 4016 { | |
| 4017 if (!data->st) | |
| 4018 data->st = allocate_detection_state (); | |
| 4019 if (first_time) | |
| 4020 /* #### This is cheesy. What we really ought to do is buffer | |
| 4021 up a certain minimum amount of data to get a better result. | |
| 4022 */ | |
| 4023 data->actual = look_for_coding_system_magic_cookie (src, n); | |
| 4024 if (NILP (data->actual)) | |
| 4025 { | |
| 4026 /* #### This is cheesy. What we really ought to do is buffer | |
| 4027 up a certain minimum amount of data so as to get a less | |
| 4028 random result when doing subprocess detection. */ | |
| 4029 detect_coding_type (data->st, src, n); | |
| 4030 data->actual = detected_coding_system (data->st); | |
| 4100 | 4031 /* kludge to prevent infinite recursion */ |
| 4032 if (XCODING_SYSTEM(data->actual)->methods->enumtype == undecided_coding_system) | |
| 4033 data->actual = Fget_coding_system (Qbinary); | |
| 771 | 4034 } |
| 4035 } | |
| 4036 /* We need to set the detected coding system if we actually have | |
| 4037 such a coding system but didn't before. That is the case | |
| 4038 either when we just detected it in the previous code or when | |
| 4039 it was detected during undecided_init_coding_stream(). We | |
| 4040 can check for that using first_time. */ | |
| 4041 if (!NILP (data->actual) && (actual_was_nil || first_time)) | |
| 4042 { | |
| 4043 /* If the detected coding system doesn't allow for EOL | |
| 4044 autodetection, try to get the equivalent that does; | |
| 4045 otherwise, disable EOL detection (overriding whatever | |
| 4046 may already have been detected). */ | |
| 4047 if (XCODING_SYSTEM_EOL_TYPE (data->actual) != EOL_AUTODETECT) | |
| 4048 { | |
| 4049 if (!NILP (XCODING_SYSTEM_SUBSIDIARY_PARENT (data->actual))) | |
| 4050 data->actual = | |
| 4051 XCODING_SYSTEM_SUBSIDIARY_PARENT (data->actual); | |
| 4052 else if (data->c.lstream_count == 3) | |
| 4053 set_coding_stream_coding_system | |
| 4054 (XLSTREAM (data->c.lstreams[1]), | |
| 4055 Fget_coding_system (Qidentity)); | |
| 4056 } | |
| 4057 set_coding_stream_coding_system | |
| 4058 (XLSTREAM (data->c.lstreams[0]), data->actual); | |
| 4059 } | |
| 4060 } | |
| 4061 | |
| 4062 if (Lstream_write (XLSTREAM (data->c.lstreams[0]), src, n) < 0) | |
| 4063 return -1; | |
| 4064 return n; | |
| 4065 } | |
| 4066 else | |
| 4067 return no_conversion_convert (str, src, dst, n); | |
| 4068 } | |
| 4069 | |
| 4070 static Lisp_Object | |
| 4071 undecided_canonicalize_after_coding (struct coding_stream *str) | |
| 4072 { | |
| 4073 struct undecided_coding_stream *data = | |
| 4074 CODING_STREAM_TYPE_DATA (str, undecided); | |
| 4075 Lisp_Object ret, eolret; | |
| 4076 | |
| 4077 if (str->direction == CODING_ENCODE) | |
| 4078 return str->codesys; | |
| 4079 | |
| 4080 if (!data->c.initted) | |
| 4081 return Fget_coding_system (Qundecided); | |
| 4082 | |
| 4083 ret = coding_stream_canonicalize_after_coding | |
| 4084 (XLSTREAM (data->c.lstreams[0])); | |
| 4085 if (NILP (ret)) | |
| 4086 ret = Fget_coding_system (Qundecided); | |
| 4087 if (XCODING_SYSTEM_EOL_TYPE (ret) != EOL_AUTODETECT) | |
| 4088 return ret; | |
| 4089 eolret = coding_stream_canonicalize_after_coding | |
| 4090 (XLSTREAM (data->c.lstreams[1])); | |
| 4091 if (!EQ (XCODING_SYSTEM_TYPE (eolret), Qconvert_eol)) | |
| 4092 return ret; | |
| 4093 return | |
| 4094 Fsubsidiary_coding_system (ret, Fcoding_system_property (eolret, | |
| 4095 Qsubtype)); | |
| 4096 } | |
| 4097 | |
| 4098 | |
| 4099 /************************************************************************/ | |
| 4100 /* Lisp interface: Coding category functions and detection */ | |
| 4101 /************************************************************************/ | |
| 4102 | |
| 4103 DEFUN ("coding-category-list", Fcoding_category_list, 0, 0, 0, /* | |
| 4104 Return a list of all recognized coding categories. | |
| 4105 */ | |
| 4106 ()) | |
| 4107 { | |
| 4108 int i; | |
| 4109 Lisp_Object list = Qnil; | |
| 4110 | |
| 4111 for (i = 0; i < coding_detector_count; i++) | |
| 4112 { | |
| 4113 detector_category_dynarr *cats = | |
| 4114 Dynarr_at (all_coding_detectors, i).cats; | |
| 4115 int j; | |
| 4116 | |
| 4117 for (j = 0; j < Dynarr_length (cats); j++) | |
| 4118 list = Fcons (Dynarr_at (cats, j).sym, list); | |
| 4119 } | |
| 4120 | |
| 4121 return Fnreverse (list); | |
| 4122 } | |
| 4123 | |
| 4124 DEFUN ("set-coding-priority-list", Fset_coding_priority_list, 1, 1, 0, /* | |
| 4125 Change the priority order of the coding categories. | |
| 4126 LIST should be list of coding categories, in descending order of | |
| 4127 priority. Unspecified coding categories will be lower in priority | |
| 4128 than all specified ones, in the same relative order they were in | |
| 4129 previously. | |
| 4130 */ | |
| 4131 (list)) | |
| 4132 { | |
| 4133 int *category_to_priority = | |
| 4134 alloca_array (int, coding_detector_category_count); | |
| 4135 int i, j; | |
| 4136 | |
| 4137 /* First generate a list that maps coding categories to priorities. */ | |
| 4138 | |
| 4139 for (i = 0; i < coding_detector_category_count; i++) | |
| 4140 category_to_priority[i] = -1; | |
| 4141 | |
| 4142 /* Highest priority comes from the specified list. */ | |
| 4143 i = 0; | |
| 2367 | 4144 { |
| 4145 EXTERNAL_LIST_LOOP_2 (elt, list) | |
| 4146 { | |
| 4147 int cat = coding_category_symbol_to_id (elt); | |
| 4148 | |
| 4149 if (category_to_priority[cat] >= 0) | |
| 4150 sferror ("Duplicate coding category in list", elt); | |
| 4151 category_to_priority[cat] = i++; | |
| 4152 } | |
| 4153 } | |
| 771 | 4154 |
| 4155 /* Now go through the existing categories by priority to retrieve | |
| 4156 the categories not yet specified and preserve their priority | |
| 4157 order. */ | |
| 4158 for (j = 0; j < coding_detector_category_count; j++) | |
| 4159 { | |
| 4160 int cat = coding_category_by_priority[j]; | |
| 4161 if (category_to_priority[cat] < 0) | |
| 4162 category_to_priority[cat] = i++; | |
| 4163 } | |
| 4164 | |
| 4165 /* Now we need to construct the inverse of the mapping we just | |
| 4166 constructed. */ | |
| 4167 | |
| 4168 for (i = 0; i < coding_detector_category_count; i++) | |
| 4169 coding_category_by_priority[category_to_priority[i]] = i; | |
| 4170 | |
| 4171 /* Phew! That was confusing. */ | |
| 4172 return Qnil; | |
| 4173 } | |
| 4174 | |
| 4175 DEFUN ("coding-priority-list", Fcoding_priority_list, 0, 0, 0, /* | |
| 4176 Return a list of coding categories in descending order of priority. | |
| 4177 */ | |
| 4178 ()) | |
| 4179 { | |
| 4180 int i; | |
| 4181 Lisp_Object list = Qnil; | |
| 4182 | |
| 4183 for (i = 0; i < coding_detector_category_count; i++) | |
| 4184 list = | |
| 4185 Fcons (coding_category_id_to_symbol (coding_category_by_priority[i]), | |
| 4186 list); | |
| 4187 return Fnreverse (list); | |
| 4188 } | |
| 4189 | |
| 4190 DEFUN ("set-coding-category-system", Fset_coding_category_system, 2, 2, 0, /* | |
| 4191 Change the coding system associated with a coding category. | |
| 4192 */ | |
| 4193 (coding_category, coding_system)) | |
| 4194 { | |
| 4195 coding_category_system[coding_category_symbol_to_id (coding_category)] = | |
| 4196 Fget_coding_system (coding_system); | |
| 4197 return Qnil; | |
| 4198 } | |
| 4199 | |
| 4200 DEFUN ("coding-category-system", Fcoding_category_system, 1, 1, 0, /* | |
| 4201 Return the coding system associated with a coding category. | |
| 4202 */ | |
| 4203 (coding_category)) | |
| 4204 { | |
| 4205 Lisp_Object sys = | |
| 4206 coding_category_system[coding_category_symbol_to_id (coding_category)]; | |
| 4207 | |
| 4208 if (!NILP (sys)) | |
| 4209 return XCODING_SYSTEM_NAME (sys); | |
| 4210 return Qnil; | |
| 4211 } | |
| 4212 | |
| 800 | 4213 /* Detect the encoding of STREAM. Assumes stream is at the begnning and will |
| 4214 read through to the end of STREAM, leaving it there but open. */ | |
| 4215 | |
| 771 | 4216 Lisp_Object |
| 4217 detect_coding_stream (Lisp_Object stream) | |
| 4218 { | |
| 4219 Lisp_Object val = Qnil; | |
| 4220 struct gcpro gcpro1, gcpro2, gcpro3; | |
| 4221 UExtbyte random_buffer[65536]; | |
| 4222 Lisp_Object binary_instream = | |
| 4223 make_coding_input_stream | |
| 4224 (XLSTREAM (stream), Qbinary, | |
| 814 | 4225 CODING_ENCODE, LSTREAM_FL_NO_CLOSE_OTHER); |
| 771 | 4226 Lisp_Object decstream = |
| 4227 make_coding_input_stream | |
| 4228 (XLSTREAM (binary_instream), | |
| 800 | 4229 Qundecided, CODING_DECODE, 0); |
| 771 | 4230 Lstream *decstr = XLSTREAM (decstream); |
| 4231 | |
| 4232 GCPRO3 (decstream, stream, binary_instream); | |
| 4233 /* Read and discard all data; detection happens as a side effect of this, | |
| 4234 and we examine what was detected afterwards. */ | |
| 4235 while (Lstream_read (decstr, random_buffer, sizeof (random_buffer)) > 0) | |
| 4236 ; | |
| 4237 | |
| 4238 val = coding_stream_detected_coding_system (decstr); | |
| 4239 Lstream_close (decstr); | |
| 4240 Lstream_delete (decstr); | |
| 4241 Lstream_delete (XLSTREAM (binary_instream)); | |
| 4242 UNGCPRO; | |
| 4243 return val; | |
| 428 | 4244 } |
| 4245 | |
| 4246 DEFUN ("detect-coding-region", Fdetect_coding_region, 2, 3, 0, /* | |
| 4247 Detect coding system of the text in the region between START and END. | |
| 444 | 4248 Return a list of possible coding systems ordered by priority. |
| 3025 | 4249 If only ASCII characters are found, return `undecided' or one of |
| 428 | 4250 its subsidiary coding systems according to a detected end-of-line |
| 4251 type. Optional arg BUFFER defaults to the current buffer. | |
| 4252 */ | |
| 4253 (start, end, buffer)) | |
| 4254 { | |
| 4255 Lisp_Object val = Qnil; | |
| 4256 struct buffer *buf = decode_buffer (buffer, 0); | |
| 665 | 4257 Charbpos b, e; |
| 771 | 4258 Lisp_Object lb_instream; |
| 428 | 4259 |
| 4260 get_buffer_range_char (buf, start, end, &b, &e, 0); | |
| 4261 lb_instream = make_lisp_buffer_input_stream (buf, b, e, 0); | |
| 771 | 4262 |
| 4263 val = detect_coding_stream (lb_instream); | |
| 4264 Lstream_delete (XLSTREAM (lb_instream)); | |
| 428 | 4265 return val; |
| 4266 } | |
| 4267 | |
| 4268 | |
| 771 | 4269 |
| 4270 #ifdef DEBUG_XEMACS | |
| 4271 | |
| 428 | 4272 /************************************************************************/ |
| 771 | 4273 /* Internal methods */ |
| 4274 /************************************************************************/ | |
| 4275 | |
| 4276 /* Raw (internally-formatted) data. */ | |
| 4277 DEFINE_CODING_SYSTEM_TYPE (internal); | |
| 428 | 4278 |
| 665 | 4279 static Bytecount |
| 2286 | 4280 internal_convert (struct coding_stream *UNUSED (str), const UExtbyte *src, |
| 771 | 4281 unsigned_char_dynarr *dst, Bytecount n) |
| 4282 { | |
| 4283 Bytecount orign = n; | |
| 4284 Dynarr_add_many (dst, src, n); | |
| 4285 return orign; | |
| 4286 } | |
| 4287 | |
| 4288 #endif /* DEBUG_XEMACS */ | |
| 4289 | |
| 4290 | |
| 4291 | |
| 4292 #ifdef HAVE_ZLIB | |
| 4293 | |
| 4294 /************************************************************************/ | |
| 4295 /* Gzip methods */ | |
| 4296 /************************************************************************/ | |
| 4297 | |
| 4298 struct gzip_coding_system | |
| 428 | 4299 { |
| 771 | 4300 int level; /* 0 through 9, or -1 for default */ |
| 4301 }; | |
| 4302 | |
| 4303 #define CODING_SYSTEM_GZIP_LEVEL(codesys) \ | |
| 4304 (CODING_SYSTEM_TYPE_DATA (codesys, gzip)->level) | |
| 4305 #define XCODING_SYSTEM_GZIP_LEVEL(codesys) \ | |
| 4306 (XCODING_SYSTEM_TYPE_DATA (codesys, gzip)->level) | |
| 4307 | |
| 4308 struct gzip_coding_stream | |
| 428 | 4309 { |
| 771 | 4310 z_stream stream; |
| 4311 int stream_initted; | |
| 4312 int reached_eof; /* #### this should be handled by the caller, once we | |
| 4313 return LSTREAM_EOF */ | |
| 4314 }; | |
| 4315 | |
| 1204 | 4316 static const struct memory_description |
| 771 | 4317 gzip_coding_system_description[] = { |
| 4318 { XD_END } | |
| 4319 }; | |
| 4320 | |
| 1204 | 4321 DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (gzip); |
| 4322 | |
| 771 | 4323 enum source_sink_type |
| 4324 gzip_conversion_end_type (Lisp_Object codesys) | |
| 4325 { | |
| 4326 return DECODES_BYTE_TO_BYTE; | |
| 428 | 4327 } |
| 4328 | |
| 4329 static void | |
| 771 | 4330 gzip_init (Lisp_Object codesys) |
| 4331 { | |
| 4332 struct gzip_coding_system *data = XCODING_SYSTEM_TYPE_DATA (codesys, gzip); | |
| 4333 data->level = -1; | |
| 4334 } | |
| 4335 | |
| 4336 static void | |
| 4337 gzip_print (Lisp_Object cs, Lisp_Object printcharfun, int escapeflag) | |
| 428 | 4338 { |
| 771 | 4339 struct gzip_coding_system *data = XCODING_SYSTEM_TYPE_DATA (cs, gzip); |
| 4340 | |
| 826 | 4341 write_c_string (printcharfun, "("); |
| 771 | 4342 if (data->level == -1) |
| 826 | 4343 write_c_string (printcharfun, "default"); |
| 771 | 4344 else |
| 4345 print_internal (make_int (data->level), printcharfun, 0); | |
| 826 | 4346 write_c_string (printcharfun, ")"); |
| 428 | 4347 } |
| 4348 | |
| 4349 static int | |
| 771 | 4350 gzip_putprop (Lisp_Object codesys, Lisp_Object key, Lisp_Object value) |
| 428 | 4351 { |
| 771 | 4352 struct gzip_coding_system *data = XCODING_SYSTEM_TYPE_DATA (codesys, gzip); |
| 4353 | |
| 4354 if (EQ (key, Qlevel)) | |
| 428 | 4355 { |
| 771 | 4356 if (EQ (value, Qdefault)) |
| 4357 data->level = -1; | |
| 4358 else | |
| 428 | 4359 { |
| 771 | 4360 CHECK_INT (value); |
| 4361 check_int_range (XINT (value), 0, 9); | |
| 4362 data->level = XINT (value); | |
| 428 | 4363 } |
| 4364 } | |
| 4365 else | |
| 771 | 4366 return 0; |
| 4367 return 1; | |
| 428 | 4368 } |
| 4369 | |
| 4370 static Lisp_Object | |
| 771 | 4371 gzip_getprop (Lisp_Object coding_system, Lisp_Object prop) |
| 428 | 4372 { |
| 771 | 4373 struct gzip_coding_system *data = |
| 4374 XCODING_SYSTEM_TYPE_DATA (coding_system, gzip); | |
| 4375 | |
| 4376 if (EQ (prop, Qlevel)) | |
| 428 | 4377 { |
| 771 | 4378 if (data->level == -1) |
| 4379 return Qdefault; | |
| 4380 return make_int (data->level); | |
| 428 | 4381 } |
| 771 | 4382 |
| 4383 return Qunbound; | |
| 428 | 4384 } |
| 4385 | |
| 4386 static void | |
| 771 | 4387 gzip_init_coding_stream (struct coding_stream *str) |
| 428 | 4388 { |
| 771 | 4389 struct gzip_coding_stream *data = CODING_STREAM_TYPE_DATA (str, gzip); |
| 4390 if (data->stream_initted) | |
| 428 | 4391 { |
| 771 | 4392 if (str->direction == CODING_DECODE) |
| 4393 inflateEnd (&data->stream); | |
| 4394 else | |
| 4395 deflateEnd (&data->stream); | |
| 4396 data->stream_initted = 0; | |
| 428 | 4397 } |
| 771 | 4398 data->reached_eof = 0; |
| 428 | 4399 } |
| 4400 | |
| 4401 static void | |
| 771 | 4402 gzip_rewind_coding_stream (struct coding_stream *str) |
| 428 | 4403 { |
| 771 | 4404 gzip_init_coding_stream (str); |
| 428 | 4405 } |
| 4406 | |
| 771 | 4407 static Bytecount |
| 4408 gzip_convert (struct coding_stream *str, | |
| 4409 const UExtbyte *src, | |
| 4410 unsigned_char_dynarr *dst, Bytecount n) | |
| 428 | 4411 { |
| 771 | 4412 struct gzip_coding_stream *data = CODING_STREAM_TYPE_DATA (str, gzip); |
| 4413 int zerr; | |
| 4414 if (str->direction == CODING_DECODE) | |
| 428 | 4415 { |
| 771 | 4416 if (data->reached_eof) |
| 4417 return n; /* eat the data */ | |
| 4418 | |
| 4419 if (!data->stream_initted) | |
| 428 | 4420 { |
| 771 | 4421 xzero (data->stream); |
| 4422 if (inflateInit (&data->stream) != Z_OK) | |
| 4423 return LSTREAM_ERROR; | |
| 4424 data->stream_initted = 1; | |
| 428 | 4425 } |
| 771 | 4426 |
| 4427 data->stream.next_in = (Bytef *) src; | |
| 4428 data->stream.avail_in = n; | |
| 4429 | |
| 4430 /* Normally we stop when we've fed all data to the decompressor; but | |
| 4431 if we're at the end of the input, and the decompressor hasn't | |
| 4432 reported EOF, we need to keep going, as there might be more output | |
| 4433 to generate. Z_OK from the decompressor means input was processed | |
| 4434 or output was generated; if neither, we break out of the loop. | |
| 4435 Other return values are: | |
| 4436 | |
| 4437 Z_STREAM_END EOF from decompressor | |
| 4438 Z_DATA_ERROR Corrupted data | |
| 4439 Z_BUF_ERROR No progress possible (this should happen if | |
| 4440 we try to feed it an incomplete file) | |
| 4441 Z_MEM_ERROR Out of memory | |
| 4442 Z_STREAM_ERROR (should never happen) | |
| 4443 Z_NEED_DICT (#### when will this happen?) | |
| 4444 */ | |
| 4445 while (data->stream.avail_in > 0 || str->eof) | |
| 4446 { | |
| 4447 /* Reserve an output buffer of the same size as the input buffer; | |
| 4448 if that's not enough, we keep reserving the same size. */ | |
| 4449 Bytecount reserved = n; | |
| 4450 Dynarr_add_many (dst, 0, reserved); | |
| 4451 /* Careful here! Don't retrieve the pointer until after | |
| 4452 reserving the space, or it might be bogus */ | |
| 4453 data->stream.next_out = | |
| 4454 Dynarr_atp (dst, Dynarr_length (dst) - reserved); | |
| 4455 data->stream.avail_out = reserved; | |
| 4456 zerr = inflate (&data->stream, Z_NO_FLUSH); | |
| 4457 /* Lop off the unused portion */ | |
| 4458 Dynarr_set_size (dst, Dynarr_length (dst) - data->stream.avail_out); | |
| 4459 if (zerr != Z_OK) | |
| 4460 break; | |
| 4461 } | |
| 4462 | |
| 4463 if (zerr == Z_STREAM_END) | |
| 4464 data->reached_eof = 1; | |
| 4465 | |
| 4466 if ((Bytecount) data->stream.avail_in < n) | |
| 4467 return n - data->stream.avail_in; | |
| 4468 | |
| 4469 if (zerr == Z_OK || zerr == Z_STREAM_END) | |
| 4470 return 0; | |
| 4471 | |
| 4472 return LSTREAM_ERROR; | |
| 428 | 4473 } |
| 4474 else | |
| 4475 { | |
| 771 | 4476 if (!data->stream_initted) |
| 4477 { | |
| 4478 int level = XCODING_SYSTEM_GZIP_LEVEL (str->codesys); | |
| 4479 xzero (data->stream); | |
| 4480 if (deflateInit (&data->stream, | |
| 4481 level == -1 ? Z_DEFAULT_COMPRESSION : level) != | |
| 4482 Z_OK) | |
| 4483 return LSTREAM_ERROR; | |
| 4484 data->stream_initted = 1; | |
| 428 | 4485 } |
| 771 | 4486 |
| 4487 data->stream.next_in = (Bytef *) src; | |
| 4488 data->stream.avail_in = n; | |
| 4489 | |
| 4490 /* Normally we stop when we've fed all data to the compressor; but if | |
| 4491 we're at the end of the input, and the compressor hasn't reported | |
| 4492 EOF, we need to keep going, as there might be more output to | |
| 4493 generate. (To signal EOF on our end, we set the FLUSH parameter | |
| 4494 to Z_FINISH; when all data is output, Z_STREAM_END will be | |
| 4495 returned.) Z_OK from the compressor means input was processed or | |
| 4496 output was generated; if neither, we break out of the loop. Other | |
| 4497 return values are: | |
| 4498 | |
| 4499 Z_STREAM_END EOF from compressor | |
| 4500 Z_BUF_ERROR No progress possible (should never happen) | |
| 4501 Z_STREAM_ERROR (should never happen) | |
| 4502 */ | |
| 4503 while (data->stream.avail_in > 0 || str->eof) | |
| 4504 { | |
| 4505 /* Reserve an output buffer of the same size as the input buffer; | |
| 4506 if that's not enough, we keep reserving the same size. */ | |
| 4507 Bytecount reserved = n; | |
| 4508 Dynarr_add_many (dst, 0, reserved); | |
| 4509 /* Careful here! Don't retrieve the pointer until after | |
| 4510 reserving the space, or it might be bogus */ | |
| 4511 data->stream.next_out = | |
| 4512 Dynarr_atp (dst, Dynarr_length (dst) - reserved); | |
| 4513 data->stream.avail_out = reserved; | |
| 4514 zerr = | |
| 4515 deflate (&data->stream, | |
| 4516 str->eof ? Z_FINISH : Z_NO_FLUSH); | |
| 4517 /* Lop off the unused portion */ | |
| 4518 Dynarr_set_size (dst, Dynarr_length (dst) - data->stream.avail_out); | |
| 4519 if (zerr != Z_OK) | |
| 4520 break; | |
| 4521 } | |
| 4522 | |
| 4523 if ((Bytecount) data->stream.avail_in < n) | |
| 4524 return n - data->stream.avail_in; | |
| 4525 | |
| 4526 if (zerr == Z_OK || zerr == Z_STREAM_END) | |
| 4527 return 0; | |
| 4528 | |
| 4529 return LSTREAM_ERROR; | |
| 428 | 4530 } |
| 4531 } | |
| 4532 | |
| 771 | 4533 #endif /* HAVE_ZLIB */ |
| 428 | 4534 |
| 4535 | |
| 4536 /************************************************************************/ | |
| 4537 /* Initialization */ | |
| 4538 /************************************************************************/ | |
| 4539 | |
| 4540 void | |
| 4541 syms_of_file_coding (void) | |
| 4542 { | |
| 442 | 4543 INIT_LRECORD_IMPLEMENTATION (coding_system); |
| 4544 | |
| 771 | 4545 DEFSUBR (Fvalid_coding_system_type_p); |
| 4546 DEFSUBR (Fcoding_system_type_list); | |
| 428 | 4547 DEFSUBR (Fcoding_system_p); |
| 4303 | 4548 DEFSUBR (Fautoload_coding_system); |
| 428 | 4549 DEFSUBR (Ffind_coding_system); |
| 4550 DEFSUBR (Fget_coding_system); | |
| 4551 DEFSUBR (Fcoding_system_list); | |
| 4552 DEFSUBR (Fcoding_system_name); | |
| 4553 DEFSUBR (Fmake_coding_system); | |
| 4554 DEFSUBR (Fcopy_coding_system); | |
| 440 | 4555 DEFSUBR (Fcoding_system_canonical_name_p); |
| 4556 DEFSUBR (Fcoding_system_alias_p); | |
| 4557 DEFSUBR (Fcoding_system_aliasee); | |
| 428 | 4558 DEFSUBR (Fdefine_coding_system_alias); |
| 4559 DEFSUBR (Fsubsidiary_coding_system); | |
| 771 | 4560 DEFSUBR (Fcoding_system_base); |
| 4561 DEFSUBR (Fcoding_system_used_for_io); | |
| 428 | 4562 |
| 4563 DEFSUBR (Fcoding_system_type); | |
| 771 | 4564 DEFSUBR (Fcoding_system_description); |
| 428 | 4565 DEFSUBR (Fcoding_system_property); |
| 4566 | |
| 4567 DEFSUBR (Fcoding_category_list); | |
| 4568 DEFSUBR (Fset_coding_priority_list); | |
| 4569 DEFSUBR (Fcoding_priority_list); | |
| 4570 DEFSUBR (Fset_coding_category_system); | |
| 4571 DEFSUBR (Fcoding_category_system); | |
| 4572 | |
| 4573 DEFSUBR (Fdetect_coding_region); | |
| 4574 DEFSUBR (Fdecode_coding_region); | |
| 4575 DEFSUBR (Fencode_coding_region); | |
| 563 | 4576 DEFSYMBOL_MULTIWORD_PREDICATE (Qcoding_systemp); |
| 4577 DEFSYMBOL (Qno_conversion); | |
| 771 | 4578 DEFSYMBOL (Qconvert_eol); |
| 4579 DEFSYMBOL (Qconvert_eol_autodetect); | |
| 4580 DEFSYMBOL (Qconvert_eol_lf); | |
| 4581 DEFSYMBOL (Qconvert_eol_cr); | |
| 4582 DEFSYMBOL (Qconvert_eol_crlf); | |
| 563 | 4583 DEFSYMBOL (Qraw_text); |
| 771 | 4584 |
| 563 | 4585 DEFSYMBOL (Qmnemonic); |
| 4586 DEFSYMBOL (Qeol_type); | |
| 4587 DEFSYMBOL (Qpost_read_conversion); | |
| 4588 DEFSYMBOL (Qpre_write_conversion); | |
| 4589 | |
| 771 | 4590 DEFSYMBOL (Qtranslation_table_for_decode); |
| 4591 DEFSYMBOL (Qtranslation_table_for_encode); | |
| 4592 DEFSYMBOL (Qsafe_chars); | |
| 4593 DEFSYMBOL (Qsafe_charsets); | |
| 4594 DEFSYMBOL (Qmime_charset); | |
| 4595 DEFSYMBOL (Qvalid_codes); | |
| 4596 | |
| 563 | 4597 DEFSYMBOL (Qcr); |
| 4598 DEFSYMBOL (Qlf); | |
| 4599 DEFSYMBOL (Qcrlf); | |
| 4600 DEFSYMBOL (Qeol_cr); | |
| 4601 DEFSYMBOL (Qeol_lf); | |
| 4602 DEFSYMBOL (Qeol_crlf); | |
| 4603 DEFSYMBOL (Qencode); | |
| 4604 DEFSYMBOL (Qdecode); | |
| 428 | 4605 |
| 771 | 4606 DEFSYMBOL (Qnear_certainty); |
| 4607 DEFSYMBOL (Qquite_probable); | |
| 4608 DEFSYMBOL (Qsomewhat_likely); | |
| 1494 | 4609 DEFSYMBOL (Qslightly_likely); |
| 771 | 4610 DEFSYMBOL (Qas_likely_as_unlikely); |
| 4611 DEFSYMBOL (Qsomewhat_unlikely); | |
| 4612 DEFSYMBOL (Qquite_improbable); | |
| 4613 DEFSYMBOL (Qnearly_impossible); | |
| 4614 | |
| 4615 DEFSYMBOL (Qdo_eol); | |
| 4616 DEFSYMBOL (Qdo_coding); | |
| 4617 | |
| 4618 DEFSYMBOL (Qcanonicalize_after_coding); | |
| 4619 | |
| 4303 | 4620 DEFSYMBOL (Qposix_charset_to_coding_system_hash); |
| 4621 | |
| 771 | 4622 DEFSYMBOL (Qescape_quoted); |
| 4623 | |
| 4624 #ifdef HAVE_ZLIB | |
| 4625 DEFSYMBOL (Qgzip); | |
| 4626 #endif | |
| 4627 | |
| 428 | 4628 } |
| 4629 | |
| 4630 void | |
| 4631 lstream_type_create_file_coding (void) | |
| 4632 { | |
| 771 | 4633 LSTREAM_HAS_METHOD (coding, reader); |
| 4634 LSTREAM_HAS_METHOD (coding, writer); | |
| 4635 LSTREAM_HAS_METHOD (coding, rewinder); | |
| 4636 LSTREAM_HAS_METHOD (coding, seekable_p); | |
| 4637 LSTREAM_HAS_METHOD (coding, marker); | |
| 4638 LSTREAM_HAS_METHOD (coding, flusher); | |
| 4639 LSTREAM_HAS_METHOD (coding, closer); | |
| 4640 LSTREAM_HAS_METHOD (coding, finalizer); | |
| 4641 } | |
| 4642 | |
| 4643 void | |
| 4644 coding_system_type_create (void) | |
| 4645 { | |
| 4646 int i; | |
| 4647 | |
| 4648 staticpro (&Vcoding_system_hash_table); | |
| 4649 Vcoding_system_hash_table = | |
| 4650 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ); | |
| 4651 | |
| 4652 the_coding_system_type_entry_dynarr = Dynarr_new (coding_system_type_entry); | |
| 2367 | 4653 dump_add_root_block_ptr (&the_coding_system_type_entry_dynarr, |
| 771 | 4654 &csted_description); |
| 4655 | |
| 4656 Vcoding_system_type_list = Qnil; | |
| 4657 staticpro (&Vcoding_system_type_list); | |
| 4658 | |
| 4659 /* Initialize to something reasonable ... */ | |
| 4660 for (i = 0; i < MAX_DETECTOR_CATEGORIES; i++) | |
| 4661 { | |
| 4662 coding_category_system[i] = Qnil; | |
| 1204 | 4663 dump_add_root_lisp_object (&coding_category_system[i]); |
| 771 | 4664 coding_category_by_priority[i] = i; |
| 4665 } | |
| 4666 | |
| 4667 dump_add_opaque (coding_category_by_priority, | |
| 4668 sizeof (coding_category_by_priority)); | |
| 4669 | |
| 4670 all_coding_detectors = Dynarr_new2 (detector_dynarr, struct detector); | |
| 2367 | 4671 dump_add_root_block_ptr (&all_coding_detectors, |
| 771 | 4672 &detector_dynarr_description); |
| 4673 | |
| 4674 dump_add_opaque_int (&coding_system_tick); | |
| 4675 dump_add_opaque_int (&coding_detector_count); | |
| 4676 dump_add_opaque_int (&coding_detector_category_count); | |
| 4677 | |
| 4678 INITIALIZE_CODING_SYSTEM_TYPE (no_conversion, | |
| 4679 "no-conversion-coding-system-p"); | |
| 4680 CODING_SYSTEM_HAS_METHOD (no_conversion, convert); | |
| 4681 | |
| 4682 INITIALIZE_DETECTOR (no_conversion); | |
| 4683 DETECTOR_HAS_METHOD (no_conversion, detect); | |
| 4684 INITIALIZE_DETECTOR_CATEGORY (no_conversion, no_conversion); | |
| 4685 | |
| 4686 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (convert_eol, | |
| 4687 "convert-eol-coding-system-p"); | |
| 4688 CODING_SYSTEM_HAS_METHOD (convert_eol, print); | |
| 4689 CODING_SYSTEM_HAS_METHOD (convert_eol, convert); | |
| 4690 CODING_SYSTEM_HAS_METHOD (convert_eol, getprop); | |
| 4691 CODING_SYSTEM_HAS_METHOD (convert_eol, putprop); | |
| 4692 CODING_SYSTEM_HAS_METHOD (convert_eol, conversion_end_type); | |
| 4693 CODING_SYSTEM_HAS_METHOD (convert_eol, canonicalize_after_coding); | |
| 4694 CODING_SYSTEM_HAS_METHOD (convert_eol, init_coding_stream); | |
| 4695 | |
| 4696 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (undecided, | |
| 4697 "undecided-coding-system-p"); | |
| 4698 CODING_SYSTEM_HAS_METHOD (undecided, init); | |
| 4699 CODING_SYSTEM_HAS_METHOD (undecided, mark); | |
| 4700 CODING_SYSTEM_HAS_METHOD (undecided, print); | |
| 4701 CODING_SYSTEM_HAS_METHOD (undecided, convert); | |
| 4702 CODING_SYSTEM_HAS_METHOD (undecided, putprop); | |
| 4703 CODING_SYSTEM_HAS_METHOD (undecided, getprop); | |
| 4704 CODING_SYSTEM_HAS_METHOD (undecided, init_coding_stream); | |
| 4705 CODING_SYSTEM_HAS_METHOD (undecided, rewind_coding_stream); | |
| 4706 CODING_SYSTEM_HAS_METHOD (undecided, finalize_coding_stream); | |
| 4707 CODING_SYSTEM_HAS_METHOD (undecided, mark_coding_stream); | |
| 4708 CODING_SYSTEM_HAS_METHOD (undecided, canonicalize); | |
| 4709 CODING_SYSTEM_HAS_METHOD (undecided, canonicalize_after_coding); | |
| 4710 | |
| 4711 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (chain, "chain-coding-system-p"); | |
| 4712 | |
| 4713 CODING_SYSTEM_HAS_METHOD (chain, print); | |
| 4714 CODING_SYSTEM_HAS_METHOD (chain, canonicalize); | |
| 4715 CODING_SYSTEM_HAS_METHOD (chain, init); | |
| 4716 CODING_SYSTEM_HAS_METHOD (chain, mark); | |
| 4717 CODING_SYSTEM_HAS_METHOD (chain, mark_coding_stream); | |
| 4718 CODING_SYSTEM_HAS_METHOD (chain, convert); | |
| 4719 CODING_SYSTEM_HAS_METHOD (chain, rewind_coding_stream); | |
| 4720 CODING_SYSTEM_HAS_METHOD (chain, finalize_coding_stream); | |
| 4721 CODING_SYSTEM_HAS_METHOD (chain, finalize); | |
| 4722 CODING_SYSTEM_HAS_METHOD (chain, putprop); | |
| 4723 CODING_SYSTEM_HAS_METHOD (chain, getprop); | |
| 4724 CODING_SYSTEM_HAS_METHOD (chain, conversion_end_type); | |
| 4725 CODING_SYSTEM_HAS_METHOD (chain, canonicalize_after_coding); | |
| 4726 | |
| 4727 #ifdef DEBUG_XEMACS | |
| 4728 INITIALIZE_CODING_SYSTEM_TYPE (internal, "internal-coding-system-p"); | |
| 4729 CODING_SYSTEM_HAS_METHOD (internal, convert); | |
| 4730 #endif | |
| 4731 | |
| 4732 #ifdef HAVE_ZLIB | |
| 4733 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (gzip, "gzip-coding-system-p"); | |
| 4734 CODING_SYSTEM_HAS_METHOD (gzip, conversion_end_type); | |
| 4735 CODING_SYSTEM_HAS_METHOD (gzip, convert); | |
| 4736 CODING_SYSTEM_HAS_METHOD (gzip, init); | |
| 4737 CODING_SYSTEM_HAS_METHOD (gzip, print); | |
| 4738 CODING_SYSTEM_HAS_METHOD (gzip, init_coding_stream); | |
| 4739 CODING_SYSTEM_HAS_METHOD (gzip, rewind_coding_stream); | |
| 4740 CODING_SYSTEM_HAS_METHOD (gzip, putprop); | |
| 4741 CODING_SYSTEM_HAS_METHOD (gzip, getprop); | |
| 4742 #endif | |
| 4743 } | |
| 4744 | |
| 4745 void | |
| 4746 reinit_coding_system_type_create (void) | |
| 4747 { | |
| 4748 REINITIALIZE_CODING_SYSTEM_TYPE (no_conversion); | |
| 4749 REINITIALIZE_CODING_SYSTEM_TYPE (convert_eol); | |
| 4750 REINITIALIZE_CODING_SYSTEM_TYPE (undecided); | |
| 4751 REINITIALIZE_CODING_SYSTEM_TYPE (chain); | |
| 4752 #if 0 | |
| 4753 REINITIALIZE_CODING_SYSTEM_TYPE (text_file_wrapper); | |
| 4754 #endif /* 0 */ | |
| 4755 #ifdef DEBUG_XEMACS | |
| 4756 REINITIALIZE_CODING_SYSTEM_TYPE (internal); | |
| 4757 #endif | |
| 4758 #ifdef HAVE_ZLIB | |
| 4759 REINITIALIZE_CODING_SYSTEM_TYPE (gzip); | |
| 4760 #endif | |
| 4761 } | |
| 4762 | |
| 4763 void | |
| 4764 reinit_vars_of_file_coding (void) | |
| 4765 { | |
| 428 | 4766 } |
| 4767 | |
| 4768 void | |
| 4769 vars_of_file_coding (void) | |
| 4770 { | |
| 771 | 4771 /* We always have file-coding support */ |
| 428 | 4772 Fprovide (intern ("file-coding")); |
| 4773 | |
| 1347 | 4774 QScoding_system_cookie = build_string (";;;###coding system: "); |
| 4775 staticpro (&QScoding_system_cookie); | |
| 4776 | |
| 1242 | 4777 #ifdef HAVE_DEFAULT_EOL_DETECTION |
| 2297 | 4778 /* #### Find a more appropriate place for this comment. |
| 4779 WARNING: The existing categories are intimately tied to the function | |
| 1242 | 4780 `coding-system-category' in coding.el. If you change a category, or |
| 4781 change the layout of any coding system associated with a category, you | |
| 4782 need to check that function and make sure it's written properly. */ | |
| 4783 | |
| 4784 Fprovide (intern ("unix-default-eol-detection")); | |
| 4785 #endif | |
| 4786 | |
| 428 | 4787 DEFVAR_LISP ("keyboard-coding-system", &Vkeyboard_coding_system /* |
| 3142 | 4788 Default coding system used for TTY and X11 keyboard input. |
| 4789 Under X11, used only to interpet the character for a key event when that | |
| 4790 event has a KeySym of NoSymbol but does have an associated string keysym, | |
| 4791 something that's seen with input methods. | |
| 4792 | |
| 4793 If you need to set these things to different coding systems, call the | |
| 4794 function `set-console-tty-coding-system' for the TTY and use this variable | |
| 4795 for X11. | |
| 428 | 4796 */ ); |
| 4797 Vkeyboard_coding_system = Qnil; | |
| 4798 | |
| 4799 DEFVAR_LISP ("terminal-coding-system", &Vterminal_coding_system /* | |
| 4800 Coding system used for TTY display output. | |
| 4801 Not used under a windowing system. | |
| 4802 */ ); | |
| 4803 Vterminal_coding_system = Qnil; | |
| 4804 | |
| 4805 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read /* | |
| 440 | 4806 Overriding coding system used when reading from a file or process. |
| 4807 You should bind this variable with `let', but do not set it globally. | |
| 4808 If this is non-nil, it specifies the coding system that will be used | |
| 4809 to decode input on read operations, such as from a file or process. | |
| 4810 It overrides `buffer-file-coding-system-for-read', | |
| 428 | 4811 `insert-file-contents-pre-hook', etc. Use those variables instead of |
| 440 | 4812 this one for permanent changes to the environment. */ ); |
| 428 | 4813 Vcoding_system_for_read = Qnil; |
| 4814 | |
| 4815 DEFVAR_LISP ("coding-system-for-write", | |
| 4816 &Vcoding_system_for_write /* | |
| 440 | 4817 Overriding coding system used when writing to a file or process. |
| 4818 You should bind this variable with `let', but do not set it globally. | |
| 4819 If this is non-nil, it specifies the coding system that will be used | |
| 4820 to encode output for write operations, such as to a file or process. | |
| 4821 It overrides `buffer-file-coding-system', `write-region-pre-hook', etc. | |
| 4822 Use those variables instead of this one for permanent changes to the | |
| 4823 environment. */ ); | |
| 428 | 4824 Vcoding_system_for_write = Qnil; |
| 4825 | |
| 4826 DEFVAR_LISP ("file-name-coding-system", &Vfile_name_coding_system /* | |
| 4827 Coding system used to convert pathnames when accessing files. | |
| 4828 */ ); | |
| 4829 Vfile_name_coding_system = Qnil; | |
| 4830 | |
| 4831 DEFVAR_BOOL ("enable-multibyte-characters", &enable_multibyte_characters /* | |
| 771 | 4832 Setting this has no effect. It is purely for FSF compatibility. |
| 428 | 4833 */ ); |
| 4834 enable_multibyte_characters = 1; | |
| 771 | 4835 |
| 4836 Vchain_canonicalize_hash_table = | |
| 4837 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL); | |
| 4838 staticpro (&Vchain_canonicalize_hash_table); | |
| 4839 | |
| 4840 #ifdef DEBUG_XEMACS | |
| 4841 DEFVAR_LISP ("debug-coding-detection", &Vdebug_coding_detection /* | |
| 4842 If non-nil, display debug information about detection operations in progress. | |
| 4843 Information is displayed on stderr. | |
| 4844 */ ); | |
| 4845 Vdebug_coding_detection = Qnil; | |
| 4846 #endif | |
| 428 | 4847 } |
| 4848 | |
| 2297 | 4849 /* #### reformat this for consistent appearance? */ |
| 4850 | |
| 428 | 4851 void |
| 4852 complex_vars_of_file_coding (void) | |
| 4853 { | |
| 771 | 4854 Fmake_coding_system |
| 4855 (Qconvert_eol_cr, Qconvert_eol, | |
| 4856 build_msg_string ("Convert CR to LF"), | |
| 4857 nconc2 (list6 (Qdocumentation, | |
| 4858 build_msg_string ( | |
| 4859 "Converts CR (used to mark the end of a line on Macintosh systems) to LF\n" | |
| 4860 "(used internally and under Unix to mark the end of a line)."), | |
| 4861 Qmnemonic, build_string ("CR->LF"), | |
| 4862 Qsubtype, Qcr), | |
| 4863 /* VERY IMPORTANT! Tell make-coding-system not to generate | |
| 4864 subsidiaries -- it needs the coding systems we're creating | |
| 4865 to do so! */ | |
| 4866 list2 (Qeol_type, Qlf))); | |
| 4867 | |
| 4868 Fmake_coding_system | |
| 4869 (Qconvert_eol_lf, Qconvert_eol, | |
| 4870 build_msg_string ("Convert LF to LF (do nothing)"), | |
| 4871 nconc2 (list6 (Qdocumentation, | |
| 4872 build_msg_string ( | |
| 4873 "Do nothing."), | |
| 4874 Qmnemonic, build_string ("LF->LF"), | |
| 4875 Qsubtype, Qlf), | |
| 4876 /* VERY IMPORTANT! Tell make-coding-system not to generate | |
| 4877 subsidiaries -- it needs the coding systems we're creating | |
| 4878 to do so! */ | |
| 4879 list2 (Qeol_type, Qlf))); | |
| 4880 | |
| 4881 Fmake_coding_system | |
| 4882 (Qconvert_eol_crlf, Qconvert_eol, | |
| 4883 build_msg_string ("Convert CRLF to LF"), | |
| 4884 nconc2 (list6 (Qdocumentation, | |
| 4885 build_msg_string ( | |
| 4886 "Converts CR+LF (used to mark the end of a line on Macintosh systems) to LF\n" | |
| 4887 "(used internally and under Unix to mark the end of a line)."), | |
| 4888 Qmnemonic, build_string ("CRLF->LF"), | |
| 4889 Qsubtype, Qcrlf), | |
| 4890 /* VERY IMPORTANT! Tell make-coding-system not to generate | |
| 4891 subsidiaries -- it needs the coding systems we're creating | |
| 4892 to do so! */ | |
| 4893 list2 (Qeol_type, Qlf))); | |
| 4894 | |
| 4895 Fmake_coding_system | |
| 4896 (Qconvert_eol_autodetect, Qconvert_eol, | |
| 4897 build_msg_string ("Autodetect EOL type"), | |
| 4898 nconc2 (list6 (Qdocumentation, | |
| 4899 build_msg_string ( | |
| 4900 "Autodetect the end-of-line type."), | |
| 4901 Qmnemonic, build_string ("Auto-EOL"), | |
| 793 | 4902 Qsubtype, Qnil), |
| 771 | 4903 /* VERY IMPORTANT! Tell make-coding-system not to generate |
| 4904 subsidiaries -- it needs the coding systems we're creating | |
| 4905 to do so! */ | |
| 4906 list2 (Qeol_type, Qlf))); | |
| 4907 | |
| 4908 Fmake_coding_system | |
| 4909 (Qundecided, Qundecided, | |
| 4910 build_msg_string ("Undecided (auto-detect)"), | |
| 4911 nconc2 (list4 (Qdocumentation, | |
| 4912 build_msg_string | |
| 4913 ("Automatically detects the correct encoding."), | |
| 4914 Qmnemonic, build_string ("Auto")), | |
| 4915 list6 (Qdo_eol, Qt, Qdo_coding, Qt, | |
| 4916 /* We do EOL detection ourselves so we don't need to be | |
| 4917 wrapped in an EOL detector. (It doesn't actually hurt, | |
| 4918 though, I don't think.) */ | |
| 4919 Qeol_type, Qlf))); | |
| 4920 | |
| 4921 Fmake_coding_system | |
| 4922 (intern ("undecided-dos"), Qundecided, | |
| 4923 build_msg_string ("Undecided (auto-detect) (CRLF)"), | |
| 4924 nconc2 (list4 (Qdocumentation, | |
| 4925 build_msg_string | |
| 4926 ("Automatically detects the correct encoding; EOL type of CRLF forced."), | |
| 4927 Qmnemonic, build_string ("Auto")), | |
| 4928 list4 (Qdo_coding, Qt, | |
| 4929 Qeol_type, Qcrlf))); | |
| 4930 | |
| 4931 Fmake_coding_system | |
| 4932 (intern ("undecided-unix"), Qundecided, | |
| 4933 build_msg_string ("Undecided (auto-detect) (LF)"), | |
| 4934 nconc2 (list4 (Qdocumentation, | |
| 4935 build_msg_string | |
| 4936 ("Automatically detects the correct encoding; EOL type of LF forced."), | |
| 4937 Qmnemonic, build_string ("Auto")), | |
| 4938 list4 (Qdo_coding, Qt, | |
| 4939 Qeol_type, Qlf))); | |
| 4940 | |
| 4941 Fmake_coding_system | |
| 4942 (intern ("undecided-mac"), Qundecided, | |
| 4943 build_msg_string ("Undecided (auto-detect) (CR)"), | |
| 4944 nconc2 (list4 (Qdocumentation, | |
| 4945 build_msg_string | |
| 4946 ("Automatically detects the correct encoding; EOL type of CR forced."), | |
| 4947 Qmnemonic, build_string ("Auto")), | |
| 4948 list4 (Qdo_coding, Qt, | |
| 4949 Qeol_type, Qcr))); | |
| 4950 | |
| 428 | 4951 /* Need to create this here or we're really screwed. */ |
| 4952 Fmake_coding_system | |
| 4953 (Qraw_text, Qno_conversion, | |
| 771 | 4954 build_msg_string ("Raw Text"), |
| 4955 list4 (Qdocumentation, | |
| 4956 build_msg_string ("Raw text converts only line-break codes, and acts otherwise like `binary'."), | |
| 4957 Qmnemonic, build_string ("Raw"))); | |
| 428 | 4958 |
| 4959 Fmake_coding_system | |
| 4960 (Qbinary, Qno_conversion, | |
| 771 | 4961 build_msg_string ("Binary"), |
| 4962 list6 (Qdocumentation, | |
| 4963 build_msg_string ( | |
| 4964 "This coding system is as close as it comes to doing no conversion.\n" | |
| 4965 "On input, each byte is converted directly into the character\n" | |
| 4966 "with the corresponding code -- i.e. from the `ascii', `control-1',\n" | |
| 4967 "or `latin-1' character sets. On output, these characters are\n" | |
| 4968 "converted back to the corresponding bytes, and other characters\n" | |
| 4969 "are converted to the default character, i.e. `~'."), | |
| 4970 Qeol_type, Qlf, | |
| 428 | 4971 Qmnemonic, build_string ("Binary"))); |
| 4972 | |
| 771 | 4973 /* Formerly aliased to raw-text! Completely bogus and not even the same |
| 4974 as FSF Emacs. */ | |
| 4975 Fdefine_coding_system_alias (Qno_conversion, Qbinary); | |
| 4976 Fdefine_coding_system_alias (intern ("no-conversion-unix"), | |
| 4977 intern ("raw-text-unix")); | |
| 4978 Fdefine_coding_system_alias (intern ("no-conversion-dos"), | |
| 4979 intern ("raw-text-dos")); | |
| 4980 Fdefine_coding_system_alias (intern ("no-conversion-mac"), | |
| 4981 intern ("raw-text-mac")); | |
| 4982 | |
| 1318 | 4983 /* These three below will get their defaults set correctly |
| 4984 in code-init.el. We init them now so we can handle stuff at dump | |
| 771 | 4985 time before we get to code-init.el. */ |
| 1318 | 4986 Fdefine_coding_system_alias (Qnative, Qbinary); |
| 440 | 4987 Fdefine_coding_system_alias (Qterminal, Qbinary); |
| 4988 Fdefine_coding_system_alias (Qkeyboard, Qbinary); | |
| 4989 | |
| 1318 | 4990 Fdefine_coding_system_alias (Qfile_name, Qnative); |
| 771 | 4991 Fdefine_coding_system_alias (Qidentity, Qconvert_eol_lf); |
| 4992 | |
| 428 | 4993 /* Need this for bootstrapping */ |
| 771 | 4994 coding_category_system[detector_category_no_conversion] = |
| 428 | 4995 Fget_coding_system (Qraw_text); |
| 4996 } |
