Mercurial > hg > xemacs-beta
annotate src/file-coding.c @ 4614:afbfad080ddd
The URLs in our current config.guess and config.sub files are obsolete.
Update to the latest upstream release to get correct URLs, as well as fixes
and enhancements to those scripts.
| author | Jerry James <james@xemacs.org> |
|---|---|
| date | Wed, 11 Feb 2009 11:09:35 -0700 |
| parents | 80e0588fb42f |
| children | e4ed58cb0e5b |
| rev | line source |
|---|---|
| 771 | 1 /* Text encoding conversion functions; coding-system object. |
| 2 #### rename me to coding-system.c or coding.c | |
| 428 | 3 Copyright (C) 1991, 1995 Free Software Foundation, Inc. |
| 4 Copyright (C) 1995 Sun Microsystems, Inc. | |
| 3025 | 5 Copyright (C) 2000, 2001, 2002, 2003, 2005 Ben Wing. |
| 428 | 6 |
| 7 This file is part of XEmacs. | |
| 8 | |
| 9 XEmacs is free software; you can redistribute it and/or modify it | |
| 10 under the terms of the GNU General Public License as published by the | |
| 11 Free Software Foundation; either version 2, or (at your option) any | |
| 12 later version. | |
| 13 | |
| 14 XEmacs is distributed in the hope that it will be useful, but WITHOUT | |
| 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
| 16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
| 17 for more details. | |
| 18 | |
| 19 You should have received a copy of the GNU General Public License | |
| 20 along with XEmacs; see the file COPYING. If not, write to | |
| 21 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
| 22 Boston, MA 02111-1307, USA. */ | |
| 23 | |
| 771 | 24 /* Synched up with: Not in FSF. */ |
| 25 | |
| 26 /* Authorship: | |
| 27 | |
| 28 Current primary author: Ben Wing <ben@xemacs.org> | |
| 29 | |
| 30 Rewritten by Ben Wing <ben@xemacs.org>, based originally on coding.c | |
| 31 from Mule 2.? but probably does not share one line of code with that | |
| 32 original source. Rewriting work started around Dec. 1994. or Jan. 1995. | |
| 33 Proceeded in earnest till Nov. 1995. | |
| 34 | |
| 35 Around Feb. 17, 1998, Andy Piper renamed what was then mule-coding.c to | |
| 36 file-coding.c, with the intention of using it to do end-of-line conversion | |
| 37 on non-MULE machines (specifically, on Windows machines). He separated | |
| 38 out the MULE stuff from non-MULE using ifdef's, and searched throughout | |
| 39 the rest of the source tree looking for coding-system-related code that | |
| 40 was ifdef MULE but should be ifdef HAVE_CODING_SYSTEMS. | |
| 41 | |
| 42 Sept. 4 - 8, 1998, Tomohiko Morioka added the UCS_4 and UTF_8 coding system | |
| 43 types, providing a primitive means of decoding and encoding externally- | |
| 44 formatted Unicode/UCS_4 and Unicode/UTF_8 data. | |
| 45 | |
| 46 January 25, 2000, Martin Buchholz redid and fleshed out the coding | |
| 47 system alias handling that was first added in prototype form by | |
| 48 Hrjove Niksic, April 15, 1999. | |
| 49 | |
| 50 April to May 2000, Ben Wing: More major reorganization. Adding features | |
| 51 needed for MS Windows (multibyte, unicode, unicode-to-multibyte), the | |
| 52 "chain" coding system for chaining two together, and doing a lot of | |
| 53 reorganization in preparation for properly abstracting out the different | |
| 54 coding system types. | |
| 55 | |
| 56 June 2001, Ben Wing: Added Unicode support. Eliminated previous | |
| 57 junky Unicode translation support. | |
| 58 | |
| 59 August 2001, Ben Wing: Moved Unicode support to unicode.c. Finished | |
| 60 abstracting everything except detection, which is hard to abstract (see | |
| 61 just below). | |
| 62 | |
| 63 September 2001, Ben Wing: Moved Mule code to mule-coding.c, Windows code | |
| 64 to intl-win32.c. Lots more rewriting; very little code is untouched | |
| 65 from before April 2000. Abstracted the detection code, added multiple | |
| 66 levels of likelihood to increase the reliability of the algorithm. | |
| 67 | |
| 68 October 2001, Ben Wing: HAVE_CODING_SYSTEMS is always now defined. | |
| 69 Removed the conditionals. | |
| 70 */ | |
| 71 | |
| 428 | 72 #include <config.h> |
| 73 #include "lisp.h" | |
| 74 | |
| 75 #include "buffer.h" | |
| 76 #include "elhash.h" | |
| 77 #include "insdel.h" | |
| 78 #include "lstream.h" | |
| 440 | 79 #include "opaque.h" |
| 771 | 80 #include "file-coding.h" |
| 81 | |
| 82 #ifdef HAVE_ZLIB | |
| 83 #include "zlib.h" | |
| 428 | 84 #endif |
| 85 | |
| 86 Lisp_Object Vkeyboard_coding_system; | |
| 87 Lisp_Object Vterminal_coding_system; | |
| 88 Lisp_Object Vcoding_system_for_read; | |
| 89 Lisp_Object Vcoding_system_for_write; | |
| 90 Lisp_Object Vfile_name_coding_system; | |
| 91 | |
| 771 | 92 #ifdef DEBUG_XEMACS |
| 93 Lisp_Object Vdebug_coding_detection; | |
| 440 | 94 #endif |
| 771 | 95 |
| 96 typedef struct coding_system_type_entry | |
| 97 { | |
| 98 struct coding_system_methods *meths; | |
| 99 } coding_system_type_entry; | |
| 100 | |
| 101 typedef struct | |
| 102 { | |
| 103 Dynarr_declare (coding_system_type_entry); | |
| 104 } coding_system_type_entry_dynarr; | |
| 105 | |
| 106 static coding_system_type_entry_dynarr *the_coding_system_type_entry_dynarr; | |
| 107 | |
| 1204 | 108 static const struct memory_description cste_description_1[] = { |
| 2551 | 109 { XD_BLOCK_PTR, offsetof (coding_system_type_entry, meths), 1, |
| 110 { &coding_system_methods_description } }, | |
| 771 | 111 { XD_END } |
| 112 }; | |
| 113 | |
| 1204 | 114 static const struct sized_memory_description cste_description = { |
| 771 | 115 sizeof (coding_system_type_entry), |
| 116 cste_description_1 | |
| 117 }; | |
| 118 | |
| 1204 | 119 static const struct memory_description csted_description_1[] = { |
| 771 | 120 XD_DYNARR_DESC (coding_system_type_entry_dynarr, &cste_description), |
| 428 | 121 { XD_END } |
| 122 }; | |
| 123 | |
| 1204 | 124 static const struct sized_memory_description csted_description = { |
| 771 | 125 sizeof (coding_system_type_entry_dynarr), |
| 126 csted_description_1 | |
| 127 }; | |
| 128 | |
| 129 static Lisp_Object Vcoding_system_type_list; | |
| 130 | |
| 131 /* Coding system currently associated with each coding category. */ | |
| 132 Lisp_Object coding_category_system[MAX_DETECTOR_CATEGORIES]; | |
| 133 | |
| 134 /* Table of all coding categories in decreasing order of priority. | |
| 135 This describes a permutation of the possible coding categories. */ | |
| 136 int coding_category_by_priority[MAX_DETECTOR_CATEGORIES]; | |
| 137 | |
| 138 /* Value used with to give a unique name to nameless coding systems */ | |
| 139 int coding_system_tick; | |
| 140 | |
| 141 int coding_detector_count; | |
| 142 int coding_detector_category_count; | |
| 143 | |
| 144 detector_dynarr *all_coding_detectors; | |
| 145 | |
| 1204 | 146 static const struct memory_description struct_detector_category_description_1[] |
| 771 | 147 = |
| 148 { | |
| 149 { XD_LISP_OBJECT, offsetof (struct detector_category, sym) }, | |
| 150 { XD_END } | |
| 151 }; | |
| 152 | |
| 1204 | 153 static const struct sized_memory_description struct_detector_category_description = |
| 771 | 154 { |
| 155 sizeof (struct detector_category), | |
| 156 struct_detector_category_description_1 | |
| 428 | 157 }; |
| 158 | |
| 1204 | 159 static const struct memory_description detector_category_dynarr_description_1[] = |
| 771 | 160 { |
| 161 XD_DYNARR_DESC (detector_category_dynarr, | |
| 162 &struct_detector_category_description), | |
| 163 { XD_END } | |
| 164 }; | |
| 165 | |
| 1204 | 166 static const struct sized_memory_description detector_category_dynarr_description = { |
| 771 | 167 sizeof (detector_category_dynarr), |
| 168 detector_category_dynarr_description_1 | |
| 169 }; | |
| 170 | |
| 1204 | 171 static const struct memory_description struct_detector_description_1[] |
| 771 | 172 = |
| 173 { | |
| 2367 | 174 { XD_BLOCK_PTR, offsetof (struct detector, cats), 1, |
| 2551 | 175 { &detector_category_dynarr_description } }, |
| 771 | 176 { XD_END } |
| 177 }; | |
| 178 | |
| 1204 | 179 static const struct sized_memory_description struct_detector_description = |
| 771 | 180 { |
| 181 sizeof (struct detector), | |
| 182 struct_detector_description_1 | |
| 183 }; | |
| 184 | |
| 1204 | 185 static const struct memory_description detector_dynarr_description_1[] = |
| 771 | 186 { |
| 187 XD_DYNARR_DESC (detector_dynarr, &struct_detector_description), | |
| 188 { XD_END } | |
| 189 }; | |
| 190 | |
| 1204 | 191 static const struct sized_memory_description detector_dynarr_description = { |
| 771 | 192 sizeof (detector_dynarr), |
| 193 detector_dynarr_description_1 | |
| 194 }; | |
| 428 | 195 |
| 196 Lisp_Object Qcoding_systemp; | |
| 197 | |
| 771 | 198 Lisp_Object Qraw_text; |
| 428 | 199 |
| 200 Lisp_Object Qmnemonic, Qeol_type; | |
| 201 Lisp_Object Qcr, Qcrlf, Qlf; | |
| 202 Lisp_Object Qeol_cr, Qeol_crlf, Qeol_lf; | |
| 203 Lisp_Object Qpost_read_conversion; | |
| 204 Lisp_Object Qpre_write_conversion; | |
| 205 | |
| 771 | 206 Lisp_Object Qtranslation_table_for_decode; |
| 207 Lisp_Object Qtranslation_table_for_encode; | |
| 208 Lisp_Object Qsafe_chars; | |
| 209 Lisp_Object Qsafe_charsets; | |
| 210 Lisp_Object Qmime_charset; | |
| 211 Lisp_Object Qvalid_codes; | |
| 212 | |
| 213 Lisp_Object Qno_conversion; | |
| 214 Lisp_Object Qconvert_eol; | |
| 440 | 215 Lisp_Object Qescape_quoted; |
| 771 | 216 Lisp_Object Qencode, Qdecode; |
| 217 | |
| 218 Lisp_Object Qconvert_eol_lf, Qconvert_eol_cr, Qconvert_eol_crlf; | |
| 219 Lisp_Object Qconvert_eol_autodetect; | |
| 220 | |
| 221 Lisp_Object Qnear_certainty, Qquite_probable, Qsomewhat_likely; | |
| 1494 | 222 Lisp_Object Qslightly_likely; |
| 771 | 223 Lisp_Object Qas_likely_as_unlikely, Qsomewhat_unlikely, Qquite_improbable; |
| 224 Lisp_Object Qnearly_impossible; | |
| 225 | |
| 226 Lisp_Object Qdo_eol, Qdo_coding; | |
| 227 | |
| 228 Lisp_Object Qcanonicalize_after_coding; | |
| 229 | |
| 1347 | 230 Lisp_Object QScoding_system_cookie; |
| 231 | |
| 4303 | 232 Lisp_Object Qposix_charset_to_coding_system_hash; |
| 233 | |
| 771 | 234 /* This is used to convert autodetected coding systems into existing |
| 235 systems. For example, the chain undecided->convert-eol-autodetect may | |
| 236 have its separate parts detected as mswindows-multibyte and | |
| 237 convert-eol-crlf, and the result needs to be mapped to | |
| 238 mswindows-multibyte-dos. */ | |
| 239 /* #### It's not clear we need this whole chain-canonicalize mechanism | |
| 240 any more. */ | |
| 241 static Lisp_Object Vchain_canonicalize_hash_table; | |
| 242 | |
| 243 #ifdef HAVE_ZLIB | |
| 244 Lisp_Object Qgzip; | |
| 428 | 245 #endif |
| 771 | 246 |
| 2297 | 247 /* Maps symbols (coding system names) to either coding system objects or |
| 248 (for aliases) other names. */ | |
| 771 | 249 static Lisp_Object Vcoding_system_hash_table; |
| 428 | 250 |
| 251 int enable_multibyte_characters; | |
| 252 | |
| 253 EXFUN (Fcopy_coding_system, 2); | |
| 254 | |
| 255 | |
| 256 /************************************************************************/ | |
| 771 | 257 /* Coding system object methods */ |
| 428 | 258 /************************************************************************/ |
| 259 | |
| 260 static Lisp_Object | |
| 261 mark_coding_system (Lisp_Object obj) | |
| 262 { | |
| 263 Lisp_Coding_System *codesys = XCODING_SYSTEM (obj); | |
| 264 | |
| 1204 | 265 #define MARKED_SLOT(x) mark_object (codesys->x); |
| 266 #include "coding-system-slots.h" | |
| 771 | 267 |
| 268 MAYBE_CODESYSMETH (codesys, mark, (obj)); | |
| 428 | 269 |
| 1204 | 270 return Qnil; |
| 428 | 271 } |
| 272 | |
| 273 static void | |
| 771 | 274 print_coding_system_properties (Lisp_Object obj, Lisp_Object printcharfun) |
| 275 { | |
| 276 Lisp_Coding_System *c = XCODING_SYSTEM (obj); | |
| 277 print_internal (c->methods->type, printcharfun, 1); | |
| 278 MAYBE_CODESYSMETH (c, print, (obj, printcharfun, 1)); | |
| 279 if (CODING_SYSTEM_EOL_TYPE (c) != EOL_AUTODETECT) | |
| 280 write_fmt_string_lisp (printcharfun, " eol-type=%s", | |
| 281 1, Fcoding_system_property (obj, Qeol_type)); | |
| 282 } | |
| 283 | |
| 284 static void | |
| 428 | 285 print_coding_system (Lisp_Object obj, Lisp_Object printcharfun, |
| 2286 | 286 int UNUSED (escapeflag)) |
| 428 | 287 { |
| 288 Lisp_Coding_System *c = XCODING_SYSTEM (obj); | |
| 289 if (print_readably) | |
| 771 | 290 printing_unreadable_object |
| 291 ("printing unreadable object #<coding-system 0x%x>", c->header.uid); | |
| 292 | |
| 293 write_fmt_string_lisp (printcharfun, "#<coding-system %s ", 1, c->name); | |
| 294 print_coding_system_properties (obj, printcharfun); | |
| 826 | 295 write_c_string (printcharfun, ">"); |
| 428 | 296 } |
| 297 | |
| 771 | 298 /* Print an abbreviated version of a coding system (but still containing |
| 299 all the information), for use within a coding system print method. */ | |
| 300 | |
| 301 static void | |
| 302 print_coding_system_in_print_method (Lisp_Object cs, Lisp_Object printcharfun, | |
| 2286 | 303 int UNUSED (escapeflag)) |
| 771 | 304 { |
| 800 | 305 write_fmt_string_lisp (printcharfun, "%s[", 1, XCODING_SYSTEM_NAME (cs)); |
| 771 | 306 print_coding_system_properties (cs, printcharfun); |
| 826 | 307 write_c_string (printcharfun, "]"); |
| 771 | 308 } |
| 309 | |
| 3263 | 310 #ifndef NEW_GC |
| 428 | 311 static void |
| 312 finalize_coding_system (void *header, int for_disksave) | |
| 313 { | |
| 771 | 314 Lisp_Object cs = wrap_coding_system ((Lisp_Coding_System *) header); |
| 428 | 315 /* Since coding systems never go away, this function is not |
| 316 necessary. But it would be necessary if we changed things | |
| 317 so that coding systems could go away. */ | |
| 318 if (!for_disksave) /* see comment in lstream.c */ | |
| 771 | 319 MAYBE_XCODESYSMETH (cs, finalize, (cs)); |
| 320 } | |
| 3263 | 321 #endif /* not NEW_GC */ |
| 771 | 322 |
| 323 static Bytecount | |
| 324 sizeof_coding_system (const void *header) | |
| 325 { | |
| 326 const Lisp_Coding_System *p = (const Lisp_Coding_System *) header; | |
| 327 return offsetof (Lisp_Coding_System, data) + p->methods->extra_data_size; | |
| 428 | 328 } |
| 329 | |
| 1204 | 330 static const struct memory_description coding_system_methods_description_1[] |
| 771 | 331 = { |
| 332 { XD_LISP_OBJECT, | |
| 333 offsetof (struct coding_system_methods, type) }, | |
| 334 { XD_LISP_OBJECT, | |
| 335 offsetof (struct coding_system_methods, predicate_symbol) }, | |
| 336 { XD_END } | |
| 337 }; | |
| 338 | |
| 1204 | 339 const struct sized_memory_description coding_system_methods_description = { |
| 771 | 340 sizeof (struct coding_system_methods), |
| 341 coding_system_methods_description_1 | |
| 342 }; | |
| 343 | |
| 1204 | 344 static const struct sized_memory_description coding_system_extra_description_map[] = |
| 345 { | |
| 346 { offsetof (Lisp_Coding_System, methods) }, | |
| 347 { offsetof (struct coding_system_methods, extra_description) }, | |
| 348 { -1 }, | |
| 771 | 349 }; |
| 350 | |
| 1204 | 351 static const struct memory_description coding_system_description[] = |
| 428 | 352 { |
| 2367 | 353 { XD_BLOCK_PTR, offsetof (Lisp_Coding_System, methods), 1, |
| 2551 | 354 { &coding_system_methods_description } }, |
| 1204 | 355 #define MARKED_SLOT(x) { XD_LISP_OBJECT, offsetof (Lisp_Coding_System, x) }, |
| 356 #define MARKED_SLOT_ARRAY(slot, size) \ | |
| 357 { XD_LISP_OBJECT_ARRAY, offsetof (Lisp_Coding_System, slot), size }, | |
| 358 #include "coding-system-slots.h" | |
| 2367 | 359 { XD_BLOCK_ARRAY, offsetof (Lisp_Coding_System, data), 1, |
| 2551 | 360 { coding_system_extra_description_map } }, |
| 1204 | 361 { XD_END } |
| 771 | 362 }; |
| 363 | |
| 1204 | 364 static const struct memory_description coding_system_empty_extra_description_1[] = |
| 365 { | |
| 366 { XD_END } | |
| 367 }; | |
| 368 | |
| 369 const struct sized_memory_description coding_system_empty_extra_description = { | |
| 370 0, coding_system_empty_extra_description_1 | |
| 371 }; | |
| 372 | |
| 3263 | 373 #ifdef NEW_GC |
| 374 DEFINE_LRECORD_SEQUENCE_IMPLEMENTATION ("coding-system", coding_system, | |
| 375 1, /*dumpable-flag*/ | |
| 376 mark_coding_system, | |
| 377 print_coding_system, | |
| 378 0, 0, 0, coding_system_description, | |
| 379 sizeof_coding_system, | |
| 380 Lisp_Coding_System); | |
| 381 #else /* not NEW_GC */ | |
| 934 | 382 DEFINE_LRECORD_SEQUENCE_IMPLEMENTATION ("coding-system", coding_system, |
| 383 1, /*dumpable-flag*/ | |
| 384 mark_coding_system, | |
| 385 print_coding_system, | |
| 386 finalize_coding_system, | |
| 387 0, 0, coding_system_description, | |
| 388 sizeof_coding_system, | |
| 389 Lisp_Coding_System); | |
| 3263 | 390 #endif /* not NEW_GC */ |
| 771 | 391 |
| 392 /************************************************************************/ | |
| 393 /* Creating coding systems */ | |
| 394 /************************************************************************/ | |
| 395 | |
| 396 static struct coding_system_methods * | |
| 397 decode_coding_system_type (Lisp_Object type, Error_Behavior errb) | |
| 428 | 398 { |
| 771 | 399 int i; |
| 400 | |
| 401 for (i = 0; i < Dynarr_length (the_coding_system_type_entry_dynarr); i++) | |
| 428 | 402 { |
| 771 | 403 if (EQ (type, |
| 404 Dynarr_at (the_coding_system_type_entry_dynarr, i).meths->type)) | |
| 405 return Dynarr_at (the_coding_system_type_entry_dynarr, i).meths; | |
| 428 | 406 } |
| 771 | 407 |
| 408 maybe_invalid_constant ("Invalid coding system type", type, | |
| 409 Qcoding_system, errb); | |
| 410 | |
| 411 return 0; | |
| 428 | 412 } |
| 413 | |
| 771 | 414 static int |
| 415 valid_coding_system_type_p (Lisp_Object type) | |
| 428 | 416 { |
| 771 | 417 return decode_coding_system_type (type, ERROR_ME_NOT) != 0; |
| 418 } | |
| 419 | |
| 420 DEFUN ("valid-coding-system-type-p", Fvalid_coding_system_type_p, 1, 1, 0, /* | |
| 421 Given a CODING-SYSTEM-TYPE, return non-nil if it is valid. | |
| 422 Valid types depend on how XEmacs was compiled but may include | |
| 3025 | 423 `undecided', `chain', `integer', `ccl', `iso2022', `big5', `shift-jis', |
| 424 `utf-16', `ucs-4', `utf-8', etc. | |
| 771 | 425 */ |
| 426 (coding_system_type)) | |
| 427 { | |
| 428 return valid_coding_system_type_p (coding_system_type) ? Qt : Qnil; | |
| 429 } | |
| 430 | |
| 431 DEFUN ("coding-system-type-list", Fcoding_system_type_list, 0, 0, 0, /* | |
| 432 Return a list of valid coding system types. | |
| 433 */ | |
| 434 ()) | |
| 435 { | |
| 436 return Fcopy_sequence (Vcoding_system_type_list); | |
| 437 } | |
| 438 | |
| 439 void | |
| 440 add_entry_to_coding_system_type_list (struct coding_system_methods *meths) | |
| 441 { | |
| 442 struct coding_system_type_entry entry; | |
| 443 | |
| 444 entry.meths = meths; | |
| 445 Dynarr_add (the_coding_system_type_entry_dynarr, entry); | |
| 446 Vcoding_system_type_list = Fcons (meths->type, Vcoding_system_type_list); | |
| 428 | 447 } |
| 448 | |
| 449 DEFUN ("coding-system-p", Fcoding_system_p, 1, 1, 0, /* | |
| 450 Return t if OBJECT is a coding system. | |
| 451 A coding system is an object that defines how text containing multiple | |
| 452 character sets is encoded into a stream of (typically 8-bit) bytes. | |
| 453 The coding system is used to decode the stream into a series of | |
| 454 characters (which may be from multiple charsets) when the text is read | |
| 455 from a file or process, and is used to encode the text back into the | |
| 456 same format when it is written out to a file or process. | |
| 457 | |
| 458 For example, many ISO2022-compliant coding systems (such as Compound | |
| 459 Text, which is used for inter-client data under the X Window System) | |
| 460 use escape sequences to switch between different charsets -- Japanese | |
| 461 Kanji, for example, is invoked with "ESC $ ( B"; ASCII is invoked | |
| 462 with "ESC ( B"; and Cyrillic is invoked with "ESC - L". See | |
| 463 `make-coding-system' for more information. | |
| 464 | |
| 465 Coding systems are normally identified using a symbol, and the | |
| 466 symbol is accepted in place of the actual coding system object whenever | |
| 467 a coding system is called for. (This is similar to how faces work.) | |
| 468 */ | |
| 469 (object)) | |
| 470 { | |
| 471 return CODING_SYSTEMP (object) ? Qt : Qnil; | |
| 472 } | |
| 473 | |
| 4303 | 474 static Lisp_Object |
| 475 find_coding_system (Lisp_Object coding_system_or_name, | |
| 476 int do_autoloads) | |
| 477 { | |
| 478 Lisp_Object lookup; | |
| 479 | |
| 480 if (NILP (coding_system_or_name)) | |
| 481 coding_system_or_name = Qbinary; | |
| 482 else if (CODING_SYSTEMP (coding_system_or_name)) | |
| 483 return coding_system_or_name; | |
| 484 else | |
| 485 CHECK_SYMBOL (coding_system_or_name); | |
| 486 | |
| 487 while (1) | |
| 488 { | |
| 489 lookup = | |
| 490 Fgethash (coding_system_or_name, Vcoding_system_hash_table, Qnil); | |
| 491 | |
| 492 if (CONSP (lookup) && do_autoloads) | |
| 493 { | |
| 494 struct gcpro gcpro1; | |
| 495 int length; | |
| 496 DECLARE_EISTRING (desired_base); | |
| 497 DECLARE_EISTRING (warning_info); | |
| 498 | |
| 499 eicpy_lstr (desired_base, XSYMBOL_NAME (coding_system_or_name)); | |
| 500 | |
| 501 /* Work out the name of the base coding system. */ | |
| 502 length = eilen (desired_base); | |
| 503 if (length > (int)(sizeof ("-unix") - 1)) | |
| 504 { | |
| 505 if (0 == qxestrcmp ((UAscbyte *)"-unix", (eidata (desired_base)) | |
| 506 + (length - (sizeof ("-unix") - 1)))) | |
| 507 { | |
| 508 eidel (desired_base, length - (sizeof ("-unix") - 1), | |
| 509 -1, 5, 5); | |
| 510 } | |
| 511 } | |
| 512 else if (length > (int)(sizeof ("-dos") - 1)) | |
| 513 { | |
| 514 if ((0 == qxestrcmp ((UAscbyte *)"-dos", (eidata (desired_base)) | |
| 515 + (length - (sizeof ("-dos") - 1)))) || | |
| 516 (0 == qxestrcmp ((UAscbyte *)"-mac", (eidata (desired_base)) | |
| 517 + (length - (sizeof ("-mac") - 1))))) | |
| 518 { | |
| 519 eidel (desired_base, length - (sizeof ("-dos") - 1), -1, | |
| 520 4, 4); | |
| 521 } | |
| 522 } | |
| 523 | |
| 524 coding_system_or_name = intern_int (eidata (desired_base)); | |
| 525 | |
| 526 /* Remove this coding system and its subsidiary coding | |
| 527 systems from the hash, to avoid calling this code recursively. */ | |
| 528 Fremhash (coding_system_or_name, Vcoding_system_hash_table); | |
| 529 Fremhash (add_suffix_to_symbol(coding_system_or_name, "-unix"), | |
| 530 Vcoding_system_hash_table); | |
| 531 Fremhash (add_suffix_to_symbol(coding_system_or_name, "-dos"), | |
| 532 Vcoding_system_hash_table); | |
| 533 Fremhash (add_suffix_to_symbol(coding_system_or_name, "-mac"), | |
| 534 Vcoding_system_hash_table); | |
| 535 | |
| 536 eicpy_ascii (warning_info, "Error autoloading coding system "); | |
| 537 eicat_lstr (warning_info, XSYMBOL_NAME (coding_system_or_name)); | |
| 538 | |
| 539 /* Keep around the form so it doesn't disappear from under | |
| 540 #'eval's feet. */ | |
| 541 GCPRO1 (lookup); | |
| 542 call1_trapping_problems ((const CIbyte *)eidata (warning_info), | |
| 543 Qeval, lookup, 0); | |
| 544 UNGCPRO; | |
| 545 | |
| 546 lookup = | |
| 547 Fgethash (coding_system_or_name, Vcoding_system_hash_table, Qnil); | |
| 548 } | |
| 549 | |
| 550 if (CODING_SYSTEMP (lookup) || NILP (lookup)) | |
| 551 return lookup; | |
| 552 | |
| 553 coding_system_or_name = lookup; | |
| 554 } | |
| 555 } | |
| 556 | |
| 428 | 557 DEFUN ("find-coding-system", Ffind_coding_system, 1, 1, 0, /* |
| 558 Retrieve the coding system of the given name. | |
| 559 | |
| 560 If CODING-SYSTEM-OR-NAME is a coding-system object, it is simply | |
| 561 returned. Otherwise, CODING-SYSTEM-OR-NAME should be a symbol. | |
| 562 If there is no such coding system, nil is returned. Otherwise the | |
| 563 associated coding system object is returned. | |
| 564 */ | |
| 565 (coding_system_or_name)) | |
| 566 { | |
| 4303 | 567 return find_coding_system(coding_system_or_name, 1); |
| 568 } | |
| 569 | |
| 570 DEFUN ("autoload-coding-system", Fautoload_coding_system, 2, 2, 0, /* | |
| 571 Define SYMBOL as a coding-system that is loaded on demand. | |
| 572 | |
| 573 FORM is a form to evaluate to define the coding-system. | |
| 574 */ | |
| 575 (symbol, form)) | |
| 576 { | |
| 577 Lisp_Object lookup; | |
| 578 | |
| 579 CHECK_SYMBOL (symbol); | |
| 580 CHECK_CONS (form); | |
| 581 | |
| 582 lookup = find_coding_system (symbol, 0); | |
| 583 | |
| 584 if (!NILP (lookup) && | |
| 585 /* Allow autoloads to be redefined. */ | |
| 586 !CONSP (lookup)) | |
| 440 | 587 { |
| 4303 | 588 invalid_operation ("Cannot redefine existing coding system", |
| 589 symbol); | |
| 440 | 590 } |
| 4303 | 591 |
| 592 Fputhash (symbol, form, Vcoding_system_hash_table); | |
| 593 Fputhash (add_suffix_to_symbol(symbol, "-unix"), form, | |
| 594 Vcoding_system_hash_table); | |
| 595 Fputhash (add_suffix_to_symbol(symbol, "-dos"), form, | |
| 596 Vcoding_system_hash_table); | |
| 597 Fputhash (add_suffix_to_symbol(symbol, "-mac"), form, | |
| 598 Vcoding_system_hash_table); | |
| 599 | |
| 600 /* Tell the POSIX locale infrastructure about this coding system (though | |
| 601 unfortunately it'll be too late for the startup locale sniffing. */ | |
| 602 if (!UNBOUNDP (Qposix_charset_to_coding_system_hash)) | |
| 603 { | |
| 604 Lisp_Object val = Fsymbol_value (Qposix_charset_to_coding_system_hash); | |
| 605 DECLARE_EISTRING (minimal_name); | |
| 606 Ibyte *full_name; | |
| 607 int len = XSTRING_LENGTH (XSYMBOL_NAME (symbol)), i; | |
| 608 | |
| 609 if (!NILP (val)) | |
| 610 { | |
| 611 full_name = XSTRING_DATA (XSYMBOL_NAME (symbol)); | |
| 612 for (i = 0; i < len; ++i) | |
| 613 { | |
| 614 if (full_name[i] >= '0' && full_name[i] <= '9') | |
| 615 { | |
| 616 eicat_ch (minimal_name, full_name[i]); | |
| 617 } | |
| 618 else if (full_name[i] >= 'a' && full_name[i] <= 'z') | |
| 619 { | |
| 620 eicat_ch (minimal_name, full_name[i]); | |
| 621 } | |
| 622 else if (full_name[i] >= 'A' && full_name[i] <= 'Z') | |
| 623 { | |
| 624 eicat_ch (minimal_name, full_name[i] + | |
| 625 ('a' - 'A')); | |
| 626 } | |
| 627 } | |
| 628 | |
| 629 if (eilen (minimal_name)) | |
| 630 { | |
| 631 CHECK_HASH_TABLE (val); | |
| 632 Fputhash (eimake_string(minimal_name), symbol, val); | |
| 633 } | |
| 634 } | |
| 635 } | |
| 636 | |
| 637 return Qt; | |
| 428 | 638 } |
| 639 | |
| 640 DEFUN ("get-coding-system", Fget_coding_system, 1, 1, 0, /* | |
| 641 Retrieve the coding system of the given name. | |
| 642 Same as `find-coding-system' except that if there is no such | |
| 643 coding system, an error is signaled instead of returning nil. | |
| 644 */ | |
| 645 (name)) | |
| 646 { | |
| 647 Lisp_Object coding_system = Ffind_coding_system (name); | |
| 648 | |
| 649 if (NILP (coding_system)) | |
| 563 | 650 invalid_argument ("No such coding system", name); |
| 428 | 651 return coding_system; |
| 652 } | |
| 653 | |
| 771 | 654 int |
| 655 coding_system_is_binary (Lisp_Object coding_system) | |
| 656 { | |
| 657 Lisp_Coding_System *cs = XCODING_SYSTEM (coding_system); | |
| 658 return | |
| 659 (EQ (CODING_SYSTEM_TYPE (cs), Qno_conversion) && | |
| 660 CODING_SYSTEM_EOL_TYPE (cs) == EOL_LF && | |
| 661 EQ (CODING_SYSTEM_POST_READ_CONVERSION (cs), Qnil) && | |
| 662 EQ (CODING_SYSTEM_PRE_WRITE_CONVERSION (cs), Qnil)); | |
| 663 } | |
| 664 | |
| 665 static Lisp_Object | |
| 666 coding_system_real_canonical (Lisp_Object cs) | |
| 667 { | |
| 668 if (!NILP (XCODING_SYSTEM_CANONICAL (cs))) | |
| 669 return XCODING_SYSTEM_CANONICAL (cs); | |
| 670 return cs; | |
| 671 } | |
| 672 | |
| 673 /* Return true if coding system is of the "standard" type that decodes | |
| 674 bytes into characters (suitable for decoding a text file). */ | |
| 675 int | |
| 676 coding_system_is_for_text_file (Lisp_Object coding_system) | |
| 677 { | |
| 678 return (XCODESYSMETH_OR_GIVEN | |
| 679 (coding_system, conversion_end_type, | |
| 680 (coding_system_real_canonical (coding_system)), | |
| 681 DECODES_BYTE_TO_CHARACTER) == | |
| 682 DECODES_BYTE_TO_CHARACTER); | |
| 683 } | |
| 684 | |
| 685 static int | |
| 686 decoding_source_sink_type_is_char (Lisp_Object cs, enum source_or_sink sex) | |
| 687 { | |
| 688 enum source_sink_type type = | |
| 689 XCODESYSMETH_OR_GIVEN (cs, conversion_end_type, | |
| 690 (coding_system_real_canonical (cs)), | |
| 691 DECODES_BYTE_TO_CHARACTER); | |
| 692 if (sex == CODING_SOURCE) | |
| 693 return (type == DECODES_CHARACTER_TO_CHARACTER || | |
| 694 type == DECODES_CHARACTER_TO_BYTE); | |
| 695 else | |
| 696 return (type == DECODES_CHARACTER_TO_CHARACTER || | |
| 697 type == DECODES_BYTE_TO_CHARACTER); | |
| 698 } | |
| 699 | |
| 700 static int | |
| 701 encoding_source_sink_type_is_char (Lisp_Object cs, enum source_or_sink sex) | |
| 702 { | |
| 703 return decoding_source_sink_type_is_char (cs, | |
| 704 /* Sex change */ | |
| 705 sex == CODING_SOURCE ? | |
| 706 CODING_SINK : CODING_SOURCE); | |
| 707 } | |
| 708 | |
| 709 /* Like Ffind_coding_system() but check that the coding system is of the | |
| 710 "standard" type that decodes bytes into characters (suitable for | |
| 711 decoding a text file), and if not, returns an appropriate wrapper that | |
| 712 does. Also, if EOL_WRAP is non-zero, check whether this coding system | |
| 713 wants EOL auto-detection, and if so, wrap with a convert-eol coding | |
| 714 system to do this. */ | |
| 715 | |
| 716 Lisp_Object | |
| 717 find_coding_system_for_text_file (Lisp_Object name, int eol_wrap) | |
| 718 { | |
| 719 Lisp_Object coding_system = Ffind_coding_system (name); | |
| 720 Lisp_Object wrapper = coding_system; | |
| 721 | |
| 722 if (NILP (coding_system)) | |
| 723 return Qnil; | |
| 724 if (!coding_system_is_for_text_file (coding_system)) | |
| 725 { | |
| 726 wrapper = XCODING_SYSTEM_TEXT_FILE_WRAPPER (coding_system); | |
| 727 if (NILP (wrapper)) | |
| 728 { | |
| 729 Lisp_Object chain; | |
| 730 if (!decoding_source_sink_type_is_char (coding_system, CODING_SINK)) | |
| 731 chain = list2 (coding_system, Qbinary); | |
| 732 else | |
| 733 chain = list1 (coding_system); | |
| 734 if (decoding_source_sink_type_is_char (coding_system, CODING_SOURCE)) | |
| 735 chain = Fcons (Qbinary, chain); | |
| 736 wrapper = | |
| 737 make_internal_coding_system | |
| 738 (coding_system, | |
| 739 "internal-text-file-wrapper", | |
| 740 Qchain, | |
| 741 Qunbound, list4 (Qchain, chain, | |
| 742 Qcanonicalize_after_coding, coding_system)); | |
| 743 XCODING_SYSTEM_TEXT_FILE_WRAPPER (coding_system) = wrapper; | |
| 744 } | |
| 745 } | |
| 746 | |
| 747 if (!eol_wrap || XCODING_SYSTEM_EOL_TYPE (coding_system) != EOL_AUTODETECT) | |
| 748 return wrapper; | |
| 749 | |
| 750 coding_system = wrapper; | |
| 751 wrapper = XCODING_SYSTEM_AUTO_EOL_WRAPPER (coding_system); | |
| 752 if (!NILP (wrapper)) | |
| 753 return wrapper; | |
| 754 wrapper = | |
| 755 make_internal_coding_system | |
| 756 (coding_system, | |
| 757 "internal-auto-eol-wrapper", | |
| 758 Qundecided, Qunbound, | |
| 759 list4 (Qcoding_system, coding_system, | |
| 760 Qdo_eol, Qt)); | |
| 761 XCODING_SYSTEM_AUTO_EOL_WRAPPER (coding_system) = wrapper; | |
| 762 return wrapper; | |
| 763 } | |
| 764 | |
| 765 /* Like Fget_coding_system() but verify that the coding system is of the | |
| 766 "standard" type that decodes bytes into characters (suitable for | |
| 767 decoding a text file), and if not, returns an appropriate wrapper that | |
| 768 does. Also, if EOL_WRAP is non-zero, check whether this coding system | |
| 769 wants EOL auto-detection, and if so, wrap with a convert-eol coding | |
| 770 system to do this. */ | |
| 771 | |
| 772 Lisp_Object | |
| 773 get_coding_system_for_text_file (Lisp_Object name, int eol_wrap) | |
| 774 { | |
| 775 Lisp_Object coding_system = find_coding_system_for_text_file (name, | |
| 776 eol_wrap); | |
| 777 if (NILP (coding_system)) | |
| 778 invalid_argument ("No such coding system", name); | |
| 779 return coding_system; | |
| 780 } | |
| 781 | |
| 782 /* We store the coding systems in hash tables with the names as the | |
| 783 key and the actual coding system object as the value. Occasionally | |
| 784 we need to use them in a list format. These routines provide us | |
| 785 with that. */ | |
| 428 | 786 struct coding_system_list_closure |
| 787 { | |
| 788 Lisp_Object *coding_system_list; | |
| 771 | 789 int normal; |
| 790 int internal; | |
| 428 | 791 }; |
| 792 | |
| 793 static int | |
| 4303 | 794 add_coding_system_to_list_mapper (Lisp_Object key, Lisp_Object value, |
| 428 | 795 void *coding_system_list_closure) |
| 796 { | |
| 797 /* This function can GC */ | |
| 798 struct coding_system_list_closure *cscl = | |
| 799 (struct coding_system_list_closure *) coding_system_list_closure; | |
| 800 Lisp_Object *coding_system_list = cscl->coding_system_list; | |
| 801 | |
| 771 | 802 /* We can't just use VALUE because KEY might be an alias, and we need |
| 4303 | 803 the real coding system object. |
| 804 | |
| 805 Autoloaded coding systems have conses for their values, and can't be | |
| 806 internal coding systems, or coding system aliases. */ | |
| 807 if (CONSP (value) || | |
| 808 (XCODING_SYSTEM (Ffind_coding_system (key))->internal_p ? | |
| 809 cscl->internal : cscl->normal)) | |
| 771 | 810 *coding_system_list = Fcons (key, *coding_system_list); |
| 428 | 811 return 0; |
| 812 } | |
| 813 | |
| 2297 | 814 /* #### should we specify a conventional for "all coding systems"? */ |
| 771 | 815 DEFUN ("coding-system-list", Fcoding_system_list, 0, 1, 0, /* |
| 428 | 816 Return a list of the names of all defined coding systems. |
| 771 | 817 If INTERNAL is nil, only the normal (non-internal) coding systems are |
| 818 included. (Internal coding systems are created for various internal | |
| 819 purposes, such as implementing EOL types of CRLF and CR; generally, you do | |
| 820 not want to see these.) If it is t, only the internal coding systems are | |
| 821 included. If it is any other non-nil value both normal and internal are | |
| 822 included. | |
| 428 | 823 */ |
| 771 | 824 (internal)) |
| 428 | 825 { |
| 826 Lisp_Object coding_system_list = Qnil; | |
| 827 struct gcpro gcpro1; | |
| 828 struct coding_system_list_closure coding_system_list_closure; | |
| 829 | |
| 830 GCPRO1 (coding_system_list); | |
| 831 coding_system_list_closure.coding_system_list = &coding_system_list; | |
| 771 | 832 coding_system_list_closure.normal = !EQ (internal, Qt); |
| 833 coding_system_list_closure.internal = !NILP (internal); | |
| 428 | 834 elisp_maphash (add_coding_system_to_list_mapper, Vcoding_system_hash_table, |
| 835 &coding_system_list_closure); | |
| 836 UNGCPRO; | |
| 837 | |
| 838 return coding_system_list; | |
| 839 } | |
| 840 | |
| 841 DEFUN ("coding-system-name", Fcoding_system_name, 1, 1, 0, /* | |
| 842 Return the name of the given coding system. | |
| 843 */ | |
| 844 (coding_system)) | |
| 845 { | |
| 846 coding_system = Fget_coding_system (coding_system); | |
| 847 return XCODING_SYSTEM_NAME (coding_system); | |
| 848 } | |
| 849 | |
| 850 static Lisp_Coding_System * | |
| 771 | 851 allocate_coding_system (struct coding_system_methods *codesys_meths, |
| 852 Bytecount data_size, | |
| 853 Lisp_Object name) | |
| 428 | 854 { |
| 771 | 855 Bytecount total_size = offsetof (Lisp_Coding_System, data) + data_size; |
| 428 | 856 Lisp_Coding_System *codesys = |
| 3024 | 857 (Lisp_Coding_System *) BASIC_ALLOC_LCRECORD (total_size, |
| 1204 | 858 &lrecord_coding_system); |
| 859 | |
| 771 | 860 codesys->methods = codesys_meths; |
| 1204 | 861 #define MARKED_SLOT(x) codesys->x = Qnil; |
| 862 #include "coding-system-slots.h" | |
| 863 | |
| 771 | 864 CODING_SYSTEM_EOL_TYPE (codesys) = EOL_LF; |
| 865 CODING_SYSTEM_NAME (codesys) = name; | |
| 866 | |
| 867 MAYBE_CODESYSMETH (codesys, init, (wrap_coding_system (codesys))); | |
| 428 | 868 |
| 869 return codesys; | |
| 870 } | |
| 871 | |
| 771 | 872 static enum eol_type |
| 873 symbol_to_eol_type (Lisp_Object symbol) | |
| 874 { | |
| 875 CHECK_SYMBOL (symbol); | |
| 876 if (NILP (symbol)) return EOL_AUTODETECT; | |
| 877 if (EQ (symbol, Qlf)) return EOL_LF; | |
| 878 if (EQ (symbol, Qcrlf)) return EOL_CRLF; | |
| 879 if (EQ (symbol, Qcr)) return EOL_CR; | |
| 880 | |
| 881 invalid_constant ("Unrecognized eol type", symbol); | |
| 1204 | 882 RETURN_NOT_REACHED (EOL_AUTODETECT); |
| 771 | 883 } |
| 884 | |
| 885 static Lisp_Object | |
| 886 eol_type_to_symbol (enum eol_type type) | |
| 887 { | |
| 888 switch (type) | |
| 889 { | |
| 2500 | 890 default: ABORT (); |
| 771 | 891 case EOL_LF: return Qlf; |
| 892 case EOL_CRLF: return Qcrlf; | |
| 893 case EOL_CR: return Qcr; | |
| 894 case EOL_AUTODETECT: return Qnil; | |
| 895 } | |
| 896 } | |
| 897 | |
| 898 struct subsidiary_type | |
| 899 { | |
|
4528
726060ee587c
First draft of g++ 4.3 warning removal patch. Builds. *Needs ChangeLogs.*
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4522
diff
changeset
|
900 const Ascbyte *extension; |
|
726060ee587c
First draft of g++ 4.3 warning removal patch. Builds. *Needs ChangeLogs.*
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4522
diff
changeset
|
901 const Ascbyte *mnemonic_ext; |
| 771 | 902 enum eol_type eol; |
| 903 }; | |
| 904 | |
| 905 static struct subsidiary_type coding_subsidiary_list[] = | |
| 906 { { "-unix", "", EOL_LF }, | |
| 907 { "-dos", ":T", EOL_CRLF }, | |
| 908 { "-mac", ":t", EOL_CR } }; | |
| 909 | |
| 910 /* kludge */ | |
| 428 | 911 static void |
| 771 | 912 setup_eol_coding_systems (Lisp_Object codesys) |
| 428 | 913 { |
| 793 | 914 int len = XSTRING_LENGTH (XSYMBOL (XCODING_SYSTEM_NAME (codesys))->name); |
| 2367 | 915 Ibyte *codesys_name = alloca_ibytes (len + 7); |
| 771 | 916 int mlen = -1; |
| 867 | 917 Ibyte *codesys_mnemonic = 0; |
| 771 | 918 Lisp_Object codesys_name_sym, sub_codesys; |
| 919 int i; | |
| 920 | |
| 921 memcpy (codesys_name, | |
| 793 | 922 XSTRING_DATA (XSYMBOL (XCODING_SYSTEM_NAME (codesys))->name), len); |
| 771 | 923 |
| 924 if (STRINGP (XCODING_SYSTEM_MNEMONIC (codesys))) | |
| 428 | 925 { |
| 771 | 926 mlen = XSTRING_LENGTH (XCODING_SYSTEM_MNEMONIC (codesys)); |
| 2367 | 927 codesys_mnemonic = alloca_ibytes (mlen + 7); |
| 771 | 928 memcpy (codesys_mnemonic, |
| 929 XSTRING_DATA (XCODING_SYSTEM_MNEMONIC (codesys)), mlen); | |
| 930 } | |
| 931 | |
| 932 /* Create three "subsidiary" coding systems, decoding data encoded using | |
| 933 each of the three EOL types. We do this for each subsidiary by | |
| 934 copying the original coding system, setting the EOL type | |
| 935 appropriately, and setting the CANONICAL member of the new coding | |
| 936 system to be a chain consisting of the original coding system followed | |
| 937 by a convert-eol coding system to do the EOL decoding. For EOL type | |
| 938 LF, however, we don't need any decoding, so we skip creating a | |
| 939 CANONICAL. | |
| 940 | |
| 941 If the original coding system is not a text-type coding system | |
| 942 (decodes byte->char), we need to coerce it to one by the appropriate | |
| 943 wrapping in CANONICAL. */ | |
| 944 | |
| 945 for (i = 0; i < countof (coding_subsidiary_list); i++) | |
| 946 { | |
|
4528
726060ee587c
First draft of g++ 4.3 warning removal patch. Builds. *Needs ChangeLogs.*
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4522
diff
changeset
|
947 const Ascbyte *extension = coding_subsidiary_list[i].extension; |
|
726060ee587c
First draft of g++ 4.3 warning removal patch. Builds. *Needs ChangeLogs.*
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4522
diff
changeset
|
948 const Ascbyte *mnemonic_ext = coding_subsidiary_list[i].mnemonic_ext; |
| 771 | 949 enum eol_type eol = coding_subsidiary_list[i].eol; |
| 950 | |
| 2367 | 951 qxestrcpy_ascii (codesys_name + len, extension); |
| 771 | 952 codesys_name_sym = intern_int (codesys_name); |
| 953 if (mlen != -1) | |
| 2367 | 954 qxestrcpy_ascii (codesys_mnemonic + mlen, mnemonic_ext); |
| 771 | 955 |
| 956 sub_codesys = Fcopy_coding_system (codesys, codesys_name_sym); | |
| 957 if (mlen != -1) | |
| 958 XCODING_SYSTEM_MNEMONIC (sub_codesys) = | |
| 959 build_intstring (codesys_mnemonic); | |
| 960 | |
| 961 if (eol != EOL_LF) | |
| 962 { | |
| 963 Lisp_Object chain = list2 (get_coding_system_for_text_file | |
| 964 (codesys, 0), | |
| 965 eol == EOL_CR ? Qconvert_eol_cr : | |
| 966 Qconvert_eol_crlf); | |
| 967 Lisp_Object canon = | |
| 968 make_internal_coding_system | |
| 969 (sub_codesys, "internal-subsidiary-eol-wrapper", | |
| 970 Qchain, Qunbound, | |
| 971 mlen != -1 ? | |
| 972 list6 (Qmnemonic, build_intstring (codesys_mnemonic), | |
| 973 Qchain, chain, | |
| 974 Qcanonicalize_after_coding, sub_codesys) : | |
| 975 list4 (Qchain, chain, | |
| 976 Qcanonicalize_after_coding, sub_codesys)); | |
| 977 XCODING_SYSTEM_CANONICAL (sub_codesys) = canon; | |
| 978 } | |
| 979 XCODING_SYSTEM_EOL_TYPE (sub_codesys) = eol; | |
| 980 XCODING_SYSTEM_SUBSIDIARY_PARENT (sub_codesys) = codesys; | |
| 981 XCODING_SYSTEM (codesys)->eol[eol] = sub_codesys; | |
| 428 | 982 } |
| 983 } | |
| 984 | |
| 771 | 985 /* Basic function to create new coding systems. For `make-coding-system', |
| 986 NAME-OR-EXISTING is the NAME argument, PREFIX is null, and TYPE, | |
| 987 DESCRIPTION, and PROPS are the same. All created coding systems are put | |
| 988 in a hash table indexed by NAME. | |
| 989 | |
| 990 If PREFIX is a string, NAME-OR-EXISTING should specify an existing | |
| 991 coding system (or nil), and an internal coding system will be created. | |
| 992 The name of the coding system will be constructed by combining PREFIX | |
| 993 with the name of the existing coding system (if given), and a number | |
| 994 will be appended to insure uniqueness. In such a case, if Qunbound is | |
| 995 given for DESCRIPTION, the description gets created based on the | |
| 996 generated name. Also, if no mnemonic is given in the properties list, a | |
| 997 mnemonic is created based on the generated name. | |
| 998 | |
| 999 For internal coding systems, the coding system is marked as internal | |
| 1000 (see `coding-system-list'), and no subsidiaries will be created or | |
| 1001 eol-wrapping will happen. Otherwise: | |
| 1002 | |
| 1003 -- if the eol-type property is `lf' or t, the coding system is merely | |
| 1004 created and returned. (For t, the coding system will be wrapped with | |
| 1005 an EOL autodetector when it's used to read a file.) | |
| 1006 | |
| 1007 -- if eol-type is `crlf' or `cr', after the coding system object is | |
| 1008 created, it will be wrapped in a chain with the appropriate | |
| 1009 convert-eol coding system (either `convert-eol-crlf' or | |
| 1010 `convert-eol-cr'), so that CRLF->LF or CR->LF conversion is done at | |
| 1011 decoding time, and the opposite at encoding time. The resulting | |
| 1012 chain becomes the CANONICAL field of the coding system object. | |
| 1013 | |
| 1014 -- if eol-type is nil or omitted, "subsidiaries" are generated: Three | |
| 1015 coding systems where the original coding system (before wrapping with | |
| 1016 convert-eol-autodetect) is either unwrapped or wrapped with | |
| 1017 convert-eol-crlf or convert-eol-cr, respectively, so that coding systems | |
| 1018 to handle LF, CRLF, and CR end-of-line indicators are created. (This | |
| 1019 crazy crap is based on existing behavior in other Mule versions, | |
| 1020 including FSF Emacs.) | |
| 1021 */ | |
| 428 | 1022 |
| 1023 static Lisp_Object | |
|
4528
726060ee587c
First draft of g++ 4.3 warning removal patch. Builds. *Needs ChangeLogs.*
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4522
diff
changeset
|
1024 make_coding_system_1 (Lisp_Object name_or_existing, const Ascbyte *prefix, |
| 771 | 1025 Lisp_Object type, Lisp_Object description, |
| 1026 Lisp_Object props) | |
| 428 | 1027 { |
| 771 | 1028 Lisp_Coding_System *cs; |
| 1029 int need_to_setup_eol_systems = 1; | |
| 1030 enum eol_type eol_wrapper = EOL_AUTODETECT; | |
| 1031 struct coding_system_methods *meths; | |
| 1032 Lisp_Object csobj; | |
| 1033 Lisp_Object defmnem = Qnil; | |
| 1034 | |
| 1035 if (NILP (type)) | |
| 1036 type = Qundecided; | |
| 1037 meths = decode_coding_system_type (type, ERROR_ME); | |
| 1038 | |
| 1039 if (prefix) | |
| 428 | 1040 { |
| 867 | 1041 Ibyte *newname = |
| 771 | 1042 emacs_sprintf_malloc (NULL, "%s-%s-%d", |
| 1043 prefix, | |
| 867 | 1044 NILP (name_or_existing) ? (Ibyte *) "nil" : |
| 771 | 1045 XSTRING_DATA (Fsymbol_name (XCODING_SYSTEM_NAME |
| 1046 (name_or_existing))), | |
| 1047 ++coding_system_tick); | |
| 1048 name_or_existing = intern_int (newname); | |
| 1726 | 1049 xfree (newname, Ibyte *); |
| 771 | 1050 |
| 1051 if (UNBOUNDP (description)) | |
| 1052 { | |
| 1053 newname = | |
| 1054 emacs_sprintf_malloc | |
| 1055 (NULL, "For Internal Use (%s)", | |
| 1056 XSTRING_DATA (Fsymbol_name (name_or_existing))); | |
| 1057 description = build_intstring (newname); | |
| 1726 | 1058 xfree (newname, Ibyte *); |
| 771 | 1059 } |
| 1060 | |
| 1061 newname = emacs_sprintf_malloc (NULL, "Int%d", coding_system_tick); | |
| 1062 defmnem = build_intstring (newname); | |
| 1726 | 1063 xfree (newname, Ibyte *); |
| 428 | 1064 } |
| 771 | 1065 else |
| 1066 CHECK_SYMBOL (name_or_existing); | |
| 1067 | |
| 4303 | 1068 /* See is there an entry for name_or_existing in the defined coding system |
| 1069 hash table. */ | |
| 1070 csobj = find_coding_system (name_or_existing, 0); | |
| 1071 /* Error if it's there and not an autoload form. */ | |
| 1072 if (!NILP (csobj) && !CONSP (csobj)) | |
| 771 | 1073 invalid_operation ("Cannot redefine existing coding system", |
| 4303 | 1074 name_or_existing); |
| 771 | 1075 |
| 1076 cs = allocate_coding_system (meths, meths->extra_data_size, | |
| 1077 name_or_existing); | |
| 793 | 1078 csobj = wrap_coding_system (cs); |
| 771 | 1079 |
| 1080 cs->internal_p = !!prefix; | |
| 1081 | |
| 1082 if (NILP (description)) | |
| 1083 description = build_string (""); | |
| 1084 else | |
| 1085 CHECK_STRING (description); | |
| 1086 CODING_SYSTEM_DESCRIPTION (cs) = description; | |
| 1087 | |
| 1088 if (!NILP (defmnem)) | |
| 1089 CODING_SYSTEM_MNEMONIC (cs) = defmnem; | |
| 1090 | |
| 1091 { | |
| 1092 EXTERNAL_PROPERTY_LIST_LOOP_3 (key, value, props) | |
| 1093 { | |
| 1094 int recognized = 1; | |
| 1095 | |
| 1096 if (EQ (key, Qmnemonic)) | |
| 1097 { | |
| 1098 if (!NILP (value)) | |
| 1099 CHECK_STRING (value); | |
| 1100 CODING_SYSTEM_MNEMONIC (cs) = value; | |
| 1101 } | |
| 1102 | |
| 1103 else if (EQ (key, Qdocumentation)) | |
| 1104 { | |
| 1105 if (!NILP (value)) | |
| 1106 CHECK_STRING (value); | |
| 1107 CODING_SYSTEM_DOCUMENTATION (cs) = value; | |
| 1108 } | |
| 1109 | |
| 1110 else if (EQ (key, Qeol_type)) | |
| 1111 { | |
| 1112 need_to_setup_eol_systems = NILP (value); | |
| 1113 if (EQ (value, Qt)) | |
| 1114 value = Qnil; | |
| 1115 eol_wrapper = symbol_to_eol_type (value); | |
| 1116 } | |
| 1117 | |
| 1118 else if (EQ (key, Qpost_read_conversion)) | |
| 1119 CODING_SYSTEM_POST_READ_CONVERSION (cs) = value; | |
| 1120 else if (EQ (key, Qpre_write_conversion)) | |
| 1121 CODING_SYSTEM_PRE_WRITE_CONVERSION (cs) = value; | |
| 1122 /* FSF compatibility */ | |
| 1123 else if (EQ (key, Qtranslation_table_for_decode)) | |
| 1124 ; | |
| 1125 else if (EQ (key, Qtranslation_table_for_encode)) | |
| 1126 ; | |
| 1127 else if (EQ (key, Qsafe_chars)) | |
|
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1128 CODING_SYSTEM_SAFE_CHARS (cs) = value; |
| 771 | 1129 else if (EQ (key, Qsafe_charsets)) |
|
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1130 CODING_SYSTEM_SAFE_CHARSETS (cs) = value; |
| 771 | 1131 else if (EQ (key, Qmime_charset)) |
| 1132 ; | |
| 1133 else if (EQ (key, Qvalid_codes)) | |
| 1134 ; | |
| 1135 else | |
| 1136 recognized = CODESYSMETH_OR_GIVEN (cs, putprop, | |
| 1137 (csobj, key, value), 0); | |
| 1138 | |
| 1139 if (!recognized) | |
| 1140 invalid_constant ("Unrecognized property", key); | |
| 1141 } | |
| 1142 } | |
| 1143 | |
| 1144 { | |
| 1145 XCODING_SYSTEM_CANONICAL (csobj) = | |
| 1146 CODESYSMETH_OR_GIVEN (cs, canonicalize, (csobj), Qnil); | |
| 1147 XCODING_SYSTEM_EOL_TYPE (csobj) = EOL_AUTODETECT; /* for copy-coding-system | |
| 1148 below */ | |
| 1149 | |
| 4303 | 1150 Fputhash (name_or_existing, csobj, Vcoding_system_hash_table); |
| 1151 | |
| 771 | 1152 if (need_to_setup_eol_systems && !cs->internal_p) |
| 1153 setup_eol_coding_systems (csobj); | |
| 1154 else if (eol_wrapper == EOL_CR || eol_wrapper == EOL_CRLF) | |
| 1155 { | |
| 1156 /* If a specific eol-type (other than LF) was specified, we handle | |
| 1157 this by converting the coding system into a chain that wraps the | |
| 1158 coding system along with a convert-eol system after it, in | |
| 1159 exactly that same switcheroo fashion that the normal | |
| 1160 canonicalize method works -- BUT we will run into a problem if | |
| 1161 we do it the obvious way, because when `chain' creates its | |
| 1162 substreams, the substream containing the coding system we're | |
| 1163 creating will have canonicalization expansion done on it, | |
| 1164 leading to infinite recursion. So we have to generate a new, | |
| 1165 internal coding system with the previous value of CANONICAL. */ | |
| 867 | 1166 Ibyte *newname = |
| 771 | 1167 emacs_sprintf_malloc |
| 1168 (NULL, "internal-eol-copy-%s-%d", | |
| 1169 XSTRING_DATA (Fsymbol_name (name_or_existing)), | |
| 1170 ++coding_system_tick); | |
| 1171 Lisp_Object newnamesym = intern_int (newname); | |
| 1172 Lisp_Object copied = Fcopy_coding_system (csobj, newnamesym); | |
| 1726 | 1173 xfree (newname, Ibyte *); |
| 771 | 1174 |
| 1175 XCODING_SYSTEM_CANONICAL (csobj) = | |
| 1176 make_internal_coding_system | |
| 1177 (csobj, | |
| 1178 "internal-eol-wrapper", | |
| 1179 Qchain, Qunbound, | |
| 1180 list4 (Qchain, | |
| 1181 list2 (copied, | |
| 1182 eol_wrapper == EOL_CR ? | |
| 1183 Qconvert_eol_cr : | |
| 1184 Qconvert_eol_crlf), | |
| 1185 Qcanonicalize_after_coding, | |
| 1186 csobj)); | |
| 1187 } | |
| 1188 XCODING_SYSTEM_EOL_TYPE (csobj) = eol_wrapper; | |
| 1189 } | |
| 1190 | |
| 1191 return csobj; | |
| 428 | 1192 } |
| 1193 | |
| 771 | 1194 Lisp_Object |
|
4528
726060ee587c
First draft of g++ 4.3 warning removal patch. Builds. *Needs ChangeLogs.*
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4522
diff
changeset
|
1195 make_internal_coding_system (Lisp_Object existing, const Ascbyte *prefix, |
| 771 | 1196 Lisp_Object type, Lisp_Object description, |
| 1197 Lisp_Object props) | |
| 1198 { | |
| 1199 return make_coding_system_1 (existing, prefix, type, description, props); | |
| 1200 } | |
| 428 | 1201 |
| 1202 DEFUN ("make-coding-system", Fmake_coding_system, 2, 4, 0, /* | |
| 1203 Register symbol NAME as a coding system. | |
| 1204 | |
| 1205 TYPE describes the conversion method used and should be one of | |
| 1206 | |
| 3025 | 1207 nil or `undecided' |
| 428 | 1208 Automatic conversion. XEmacs attempts to detect the coding system |
| 1209 used in the file. | |
| 3025 | 1210 `chain' |
| 771 | 1211 Chain two or more coding systems together to make a combination coding |
| 1212 system. | |
| 3025 | 1213 `no-conversion' |
| 428 | 1214 No conversion. Use this for binary files and such. On output, |
| 1215 graphic characters that are not in ASCII or Latin-1 will be | |
| 1216 replaced by a ?. (For a no-conversion-encoded buffer, these | |
| 1217 characters will only be present if you explicitly insert them.) | |
| 3025 | 1218 `convert-eol' |
| 771 | 1219 Convert CRLF sequences or CR to LF. |
| 3025 | 1220 `shift-jis' |
| 428 | 1221 Shift-JIS (a Japanese encoding commonly used in PC operating systems). |
| 3025 | 1222 `unicode' |
| 771 | 1223 Any Unicode encoding (UCS-4, UTF-8, UTF-16, etc.). |
| 3025 | 1224 `mswindows-unicode-to-multibyte' |
| 771 | 1225 (MS Windows only) Converts from Windows Unicode to Windows Multibyte |
| 1226 (any code page encoding) upon encoding, and the other way upon decoding. | |
| 3025 | 1227 `mswindows-multibyte' |
| 771 | 1228 Converts to or from Windows Multibyte (any code page encoding). |
| 1229 This is resolved into a chain of `mswindows-unicode' and | |
| 1230 `mswindows-unicode-to-multibyte'. | |
| 3025 | 1231 `iso2022' |
| 428 | 1232 Any ISO2022-compliant encoding. Among other things, this includes |
| 1233 JIS (the Japanese encoding commonly used for e-mail), EUC (the | |
| 1234 standard Unix encoding for Japanese and other languages), and | |
| 1235 Compound Text (the encoding used in X11). You can specify more | |
| 442 | 1236 specific information about the conversion with the PROPS argument. |
| 3025 | 1237 `big5' |
| 2819 | 1238 Big5 (the encoding commonly used for Mandarin Chinese in Taiwan). |
| 3025 | 1239 `ccl' |
| 428 | 1240 The conversion is performed using a user-written pseudo-code |
| 1241 program. CCL (Code Conversion Language) is the name of this | |
| 1242 pseudo-code. | |
| 3025 | 1243 `gzip' |
| 771 | 1244 GZIP compression format. |
| 3025 | 1245 `internal' |
| 428 | 1246 Write out or read in the raw contents of the memory representing |
| 1247 the buffer's text. This is primarily useful for debugging | |
| 1248 purposes, and is only enabled when XEmacs has been compiled with | |
| 1249 DEBUG_XEMACS defined (via the --debug configure option). | |
| 3025 | 1250 WARNING: Reading in a file using `internal' conversion can result |
| 428 | 1251 in an internal inconsistency in the memory representing a |
| 1252 buffer's text, which will produce unpredictable results and may | |
| 1253 cause XEmacs to crash. Under normal circumstances you should | |
| 3025 | 1254 never use `internal' conversion. |
| 428 | 1255 |
| 771 | 1256 DESCRIPTION is a short English phrase describing the coding system, |
| 1257 suitable for use as a menu item. (See also the `documentation' property | |
| 1258 below.) | |
| 428 | 1259 |
| 1260 PROPS is a property list, describing the specific nature of the | |
| 1261 character set. Recognized properties are: | |
| 1262 | |
| 3025 | 1263 `mnemonic' |
| 428 | 1264 String to be displayed in the modeline when this coding system is |
| 1265 active. | |
| 1266 | |
| 3025 | 1267 `documentation' |
| 771 | 1268 Detailed documentation on the coding system. |
| 1269 | |
| 3025 | 1270 `eol-type' |
| 428 | 1271 End-of-line conversion to be used. It should be one of |
| 1272 | |
| 1273 nil | |
| 1274 Automatically detect the end-of-line type (LF, CRLF, | |
| 1275 or CR). Also generate subsidiary coding systems named | |
| 1276 `NAME-unix', `NAME-dos', and `NAME-mac', that are | |
| 1277 identical to this coding system but have an EOL-TYPE | |
| 3025 | 1278 value of `lf', `crlf', and `cr', respectively. |
| 1279 `lf' | |
| 428 | 1280 The end of a line is marked externally using ASCII LF. |
| 1281 Since this is also the way that XEmacs represents an | |
| 1282 end-of-line internally, specifying this option results | |
| 1283 in no end-of-line conversion. This is the standard | |
| 1284 format for Unix text files. | |
| 3025 | 1285 `crlf' |
| 428 | 1286 The end of a line is marked externally using ASCII |
| 1287 CRLF. This is the standard format for MS-DOS text | |
| 1288 files. | |
| 3025 | 1289 `cr' |
| 428 | 1290 The end of a line is marked externally using ASCII CR. |
| 1291 This is the standard format for Macintosh text files. | |
| 1292 t | |
| 1293 Automatically detect the end-of-line type but do not | |
| 1294 generate subsidiary coding systems. (This value is | |
| 1295 converted to nil when stored internally, and | |
| 1296 `coding-system-property' will return nil.) | |
| 1297 | |
| 3025 | 1298 `post-read-conversion' |
| 771 | 1299 The value is a function to call after some text is inserted and |
| 1300 decoded by the coding system itself and before any functions in | |
| 1301 `after-change-functions' are called. (#### Not actually true in | |
| 1302 XEmacs. `after-change-functions' will be called twice if | |
| 1303 `post-read-conversion' changes something.) The argument of this | |
| 1304 function is the same as for a function in | |
| 1305 `after-insert-file-functions', i.e. LENGTH of the text inserted, | |
| 1306 with point at the head of the text to be decoded. | |
| 428 | 1307 |
| 3025 | 1308 `pre-write-conversion' |
| 771 | 1309 The value is a function to call after all functions in |
| 1310 `write-region-annotate-functions' and `buffer-file-format' are | |
| 1311 called, and before the text is encoded by the coding system itself. | |
| 1312 The arguments to this function are the same as those of a function | |
| 1313 in `write-region-annotate-functions', i.e. FROM and TO, specifying | |
| 1314 a region of text. | |
| 1315 | |
| 1316 | |
| 1317 | |
| 1318 The following properties are allowed for FSF compatibility but currently | |
| 1319 ignored: | |
| 1320 | |
| 3025 | 1321 `translation-table-for-decode' |
| 771 | 1322 The value is a translation table to be applied on decoding. See |
| 1323 the function `make-translation-table' for the format of translation | |
| 1324 table. This is not applicable to CCL-based coding systems. | |
| 1325 | |
| 3025 | 1326 `translation-table-for-encode' |
| 771 | 1327 The value is a translation table to be applied on encoding. This is |
| 1328 not applicable to CCL-based coding systems. | |
|
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1329 |
| 3025 | 1330 `mime-charset' |
| 771 | 1331 The value is a symbol of which name is `MIME-charset' parameter of |
| 1332 the coding system. | |
| 1333 | |
| 3025 | 1334 `valid-codes' (meaningful only for a coding system based on CCL) |
| 771 | 1335 The value is a list to indicate valid byte ranges of the encoded |
| 1336 file. Each element of the list is an integer or a cons of integer. | |
| 1337 In the former case, the integer value is a valid byte code. In the | |
| 1338 latter case, the integers specifies the range of valid byte codes. | |
| 1339 | |
|
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1340 The following properties are used by `default-query-coding-region', |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1341 the default implementation of `query-coding-region'. This |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1342 implementation and these properties are not used by the Unicode coding |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1343 systems, nor by those CCL coding systems created with |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1344 `make-8-bit-coding-system'. |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1345 |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1346 `safe-chars' |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1347 The value is a char table. If a character has non-nil value in it, |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1348 the character is safely supported by the coding system. |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1349 Under XEmacs, for the moment, this is used in addition to the |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1350 `safe-charsets' property. It does not override it as it does |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1351 under GNU Emacs. #### We need to consider if we should keep this |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1352 behaviour. |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1353 |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1354 `safe-charsets' |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1355 The value is a list of charsets safely supported by the coding |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1356 system. For coding systems based on ISO 2022, XEmacs may try to |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1357 encode characters outside these character sets, but outside of |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1358 East Asia and East Asian coding systems, it is unlikely that |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1359 consumers of the data will understand XEmacs' encoding. |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1360 The value t means that all XEmacs character sets handles are supported. |
| 771 | 1361 |
| 3025 | 1362 The following additional property is recognized if TYPE is `convert-eol': |
| 1363 | |
| 1364 `subtype' | |
| 793 | 1365 One of `lf', `crlf', `cr' or nil (for autodetection). When decoding, |
| 1366 the corresponding sequence will be converted to LF. When encoding, | |
| 1367 the opposite happens. This coding system converts characters to | |
| 771 | 1368 characters. |
| 1369 | |
| 428 | 1370 |
| 1371 | |
| 3025 | 1372 The following additional properties are recognized if TYPE is `iso2022': |
| 1373 | |
| 1374 `charset-g0' | |
| 1375 `charset-g1' | |
| 1376 `charset-g2' | |
| 1377 `charset-g3' | |
| 428 | 1378 The character set initially designated to the G0 - G3 registers. |
| 1379 The value should be one of | |
| 1380 | |
| 1381 -- A charset object (designate that character set) | |
| 1382 -- nil (do not ever use this register) | |
| 1383 -- t (no character set is initially designated to | |
| 1384 the register, but may be later on; this automatically | |
| 1385 sets the corresponding `force-g*-on-output' property) | |
| 1386 | |
| 3025 | 1387 `force-g0-on-output' |
| 1388 `force-g1-on-output' | |
| 1389 `force-g2-on-output' | |
| 1390 `force-g2-on-output' | |
| 428 | 1391 If non-nil, send an explicit designation sequence on output before |
| 1392 using the specified register. | |
| 1393 | |
| 3025 | 1394 `short' |
| 428 | 1395 If non-nil, use the short forms "ESC $ @", "ESC $ A", and |
| 1396 "ESC $ B" on output in place of the full designation sequences | |
| 1397 "ESC $ ( @", "ESC $ ( A", and "ESC $ ( B". | |
| 1398 | |
| 3025 | 1399 `no-ascii-eol' |
| 428 | 1400 If non-nil, don't designate ASCII to G0 at each end of line on output. |
| 1401 Setting this to non-nil also suppresses other state-resetting that | |
| 1402 normally happens at the end of a line. | |
| 1403 | |
| 3025 | 1404 `no-ascii-cntl' |
| 428 | 1405 If non-nil, don't designate ASCII to G0 before control chars on output. |
| 1406 | |
| 3025 | 1407 `seven' |
| 428 | 1408 If non-nil, use 7-bit environment on output. Otherwise, use 8-bit |
| 1409 environment. | |
| 1410 | |
| 3025 | 1411 `lock-shift' |
| 428 | 1412 If non-nil, use locking-shift (SO/SI) instead of single-shift |
| 1413 or designation by escape sequence. | |
| 1414 | |
| 3025 | 1415 `no-iso6429' |
| 428 | 1416 If non-nil, don't use ISO6429's direction specification. |
| 1417 | |
| 3025 | 1418 `escape-quoted' |
| 428 | 1419 If non-nil, literal control characters that are the same as |
| 1420 the beginning of a recognized ISO2022 or ISO6429 escape sequence | |
| 1421 (in particular, ESC (0x1B), SO (0x0E), SI (0x0F), SS2 (0x8E), | |
| 1422 SS3 (0x8F), and CSI (0x9B)) are "quoted" with an escape character | |
| 1423 so that they can be properly distinguished from an escape sequence. | |
| 1424 (Note that doing this results in a non-portable encoding.) This | |
| 1425 encoding flag is used for byte-compiled files. Note that ESC | |
| 1426 is a good choice for a quoting character because there are no | |
| 1427 escape sequences whose second byte is a character from the Control-0 | |
| 1428 or Control-1 character sets; this is explicitly disallowed by the | |
| 1429 ISO2022 standard. | |
| 1430 | |
| 3025 | 1431 `input-charset-conversion' |
| 428 | 1432 A list of conversion specifications, specifying conversion of |
| 1433 characters in one charset to another when decoding is performed. | |
| 1434 Each specification is a list of two elements: the source charset, | |
| 1435 and the destination charset. | |
| 1436 | |
| 3025 | 1437 `output-charset-conversion' |
| 428 | 1438 A list of conversion specifications, specifying conversion of |
| 1439 characters in one charset to another when encoding is performed. | |
| 1440 The form of each specification is the same as for | |
| 3025 | 1441 `input-charset-conversion'. |
| 428 | 1442 |
| 1443 | |
| 771 | 1444 |
| 428 | 1445 The following additional properties are recognized (and required) |
| 3025 | 1446 if TYPE is `ccl': |
| 1447 | |
| 1448 `decode' | |
| 428 | 1449 CCL program used for decoding (converting to internal format). |
| 1450 | |
| 3025 | 1451 `encode' |
| 428 | 1452 CCL program used for encoding (converting to external format). |
| 771 | 1453 |
| 1454 | |
| 3025 | 1455 The following additional properties are recognized if TYPE is `chain': |
| 1456 | |
| 1457 `chain' | |
| 771 | 1458 List of coding systems to be chained together, in decoding order. |
| 1459 | |
| 3025 | 1460 `canonicalize-after-coding' |
| 771 | 1461 Coding system to be returned by the detector routines in place of |
| 1462 this coding system. | |
| 1463 | |
| 1464 | |
| 1465 | |
| 3025 | 1466 The following additional properties are recognized if TYPE is `unicode': |
| 1467 | |
| 3767 | 1468 `unicode-type' |
| 771 | 1469 One of `utf-16', `utf-8', `ucs-4', or `utf-7' (the latter is not |
| 1470 yet implemented). `utf-16' is the basic two-byte encoding; | |
| 1471 `ucs-4' is the four-byte encoding; `utf-8' is an ASCII-compatible | |
| 1472 variable-width 8-bit encoding; `utf-7' is a 7-bit encoding using | |
| 1473 only characters that will safely pass through all mail gateways. | |
| 2297 | 1474 [[ This should be \"transformation format\". There should also be |
| 1475 `ucs-2' (or `bmp' -- no surrogates) and `utf-32' (range checked). ]] | |
| 771 | 1476 |
| 3025 | 1477 `little-endian' |
| 771 | 1478 If non-nil, `utf-16' and `ucs-4' will write out the groups of two |
| 1479 or four bytes little-endian instead of big-endian. This is required, | |
| 1480 for example, under Windows. | |
| 1481 | |
| 3025 | 1482 `need-bom' |
| 771 | 1483 If non-nil, a byte order mark (BOM, or Unicode FFFE) should be |
| 1484 written out at the beginning of the data. This serves both to | |
| 1485 identify the endianness of the following data and to mark the | |
| 1486 data as Unicode (at least, this is how Windows uses it). | |
| 2297 | 1487 [[ The correct term is \"signature\", since this technique may also |
| 1488 be used with UTF-8. That is the term used in the standard. ]] | |
| 771 | 1489 |
| 1490 | |
| 1491 The following additional properties are recognized if TYPE is | |
| 3025 | 1492 `mswindows-multibyte': |
| 1493 | |
| 1494 `code-page' | |
| 771 | 1495 Either a number (specifying a particular code page) or one of the |
| 1496 symbols `ansi', `oem', `mac', or `ebcdic', specifying the ANSI, | |
| 1497 OEM, Macintosh, or EBCDIC code page associated with a particular | |
| 1498 locale (given by the `locale' property). NOTE: EBCDIC code pages | |
| 1499 only exist in Windows 2000 and later. | |
| 1500 | |
| 3025 | 1501 `locale' |
| 771 | 1502 If `code-page' is a symbol, this specifies the locale whose code |
| 1503 page of the corresponding type should be used. This should be | |
| 1504 one of the following: A cons of two strings, (LANGUAGE | |
| 1505 . SUBLANGUAGE) (see `mswindows-set-current-locale'); a string (a | |
| 1506 language; SUBLANG_DEFAULT, i.e. the default sublanguage, is | |
| 1507 used); or one of the symbols `current', `user-default', or | |
| 1508 `system-default', corresponding to the values of | |
| 1509 `mswindows-current-locale', `mswindows-user-default-locale', or | |
| 1510 `mswindows-system-default-locale', respectively. | |
| 1511 | |
| 1512 | |
| 1513 | |
| 3025 | 1514 The following additional properties are recognized if TYPE is `undecided': |
| 4072 | 1515 \[[ Doesn't GNU use \"detect-*\" for the following two? ]] |
| 771 | 1516 |
| 3025 | 1517 `do-eol' |
| 771 | 1518 Do EOL detection. |
| 1519 | |
| 3025 | 1520 `do-coding' |
| 771 | 1521 Do encoding detection. |
| 1522 | |
| 3025 | 1523 `coding-system' |
| 771 | 1524 If encoding detection is not done, use the specified coding system |
| 1525 to do decoding. This is used internally when implementing coding | |
| 1526 systems with an EOL type that specifies autodetection (the default), | |
| 1527 so that the detector routines return the proper subsidiary. | |
| 1528 | |
| 1529 | |
| 1530 | |
| 3025 | 1531 The following additional property is recognized if TYPE is `gzip': |
| 1532 | |
| 1533 `level' | |
| 771 | 1534 Compression level: 0 through 9, or `default' (currently 6). |
| 1535 | |
| 428 | 1536 */ |
| 771 | 1537 (name, type, description, props)) |
| 428 | 1538 { |
| 771 | 1539 return make_coding_system_1 (name, 0, type, description, props); |
| 428 | 1540 } |
| 1541 | |
| 1542 DEFUN ("copy-coding-system", Fcopy_coding_system, 2, 2, 0, /* | |
| 1543 Copy OLD-CODING-SYSTEM to NEW-NAME. | |
| 1544 If NEW-NAME does not name an existing coding system, a new one will | |
| 1545 be created. | |
| 771 | 1546 If you are using this function to create an alias, think again: |
| 1547 Use `define-coding-system-alias' instead. | |
| 428 | 1548 */ |
| 1549 (old_coding_system, new_name)) | |
| 1550 { | |
| 1551 Lisp_Object new_coding_system; | |
| 1552 old_coding_system = Fget_coding_system (old_coding_system); | |
| 771 | 1553 new_coding_system = |
| 4303 | 1554 UNBOUNDP (new_name) ? Qnil : find_coding_system (new_name, 0); |
| 428 | 1555 if (NILP (new_coding_system)) |
| 1556 { | |
| 793 | 1557 new_coding_system = |
| 1558 wrap_coding_system | |
| 1559 (allocate_coding_system | |
| 1560 (XCODING_SYSTEM (old_coding_system)->methods, | |
| 1561 XCODING_SYSTEM (old_coding_system)->methods->extra_data_size, | |
| 1562 new_name)); | |
| 771 | 1563 if (!UNBOUNDP (new_name)) |
| 1564 Fputhash (new_name, new_coding_system, Vcoding_system_hash_table); | |
| 428 | 1565 } |
| 771 | 1566 else if (XCODING_SYSTEM (old_coding_system)->methods != |
| 1567 XCODING_SYSTEM (new_coding_system)->methods) | |
| 1568 invalid_operation_2 ("Coding systems not same type", | |
| 1569 old_coding_system, new_coding_system); | |
| 428 | 1570 |
| 1571 { | |
| 1572 Lisp_Coding_System *to = XCODING_SYSTEM (new_coding_system); | |
| 1573 Lisp_Coding_System *from = XCODING_SYSTEM (old_coding_system); | |
| 3017 | 1574 COPY_SIZED_LCRECORD (to, from, sizeof_coding_system (from)); |
| 428 | 1575 to->name = new_name; |
| 1576 } | |
| 1577 return new_coding_system; | |
| 1578 } | |
| 1579 | |
| 771 | 1580 DEFUN ("coding-system-canonical-name-p", Fcoding_system_canonical_name_p, |
| 1581 1, 1, 0, /* | |
| 440 | 1582 Return t if OBJECT names a coding system, and is not a coding system alias. |
| 428 | 1583 */ |
| 440 | 1584 (object)) |
| 1585 { | |
| 1586 return CODING_SYSTEMP (Fgethash (object, Vcoding_system_hash_table, Qnil)) | |
| 1587 ? Qt : Qnil; | |
| 1588 } | |
| 1589 | |
| 2297 | 1590 /* #### Shouldn't this really be a find/get pair? */ |
| 1591 | |
| 440 | 1592 DEFUN ("coding-system-alias-p", Fcoding_system_alias_p, 1, 1, 0, /* |
| 1593 Return t if OBJECT is a coding system alias. | |
| 1594 All coding system aliases are created by `define-coding-system-alias'. | |
| 1595 */ | |
| 1596 (object)) | |
| 428 | 1597 { |
| 440 | 1598 return SYMBOLP (Fgethash (object, Vcoding_system_hash_table, Qzero)) |
| 1599 ? Qt : Qnil; | |
| 1600 } | |
| 1601 | |
| 1602 DEFUN ("coding-system-aliasee", Fcoding_system_aliasee, 1, 1, 0, /* | |
| 1603 Return the coding-system symbol for which symbol ALIAS is an alias. | |
| 1604 */ | |
| 1605 (alias)) | |
| 1606 { | |
| 1607 Lisp_Object aliasee = Fgethash (alias, Vcoding_system_hash_table, Qnil); | |
| 1608 if (SYMBOLP (aliasee)) | |
| 1609 return aliasee; | |
| 1610 else | |
| 563 | 1611 invalid_argument ("Symbol is not a coding system alias", alias); |
| 1204 | 1612 RETURN_NOT_REACHED (Qnil); |
| 440 | 1613 } |
| 1614 | |
| 1615 /* A maphash function, for removing dangling coding system aliases. */ | |
| 1616 static int | |
| 2286 | 1617 dangling_coding_system_alias_p (Lisp_Object UNUSED (alias), |
| 440 | 1618 Lisp_Object aliasee, |
| 1619 void *dangling_aliases) | |
| 1620 { | |
| 1621 if (SYMBOLP (aliasee) | |
| 1622 && NILP (Fgethash (aliasee, Vcoding_system_hash_table, Qnil))) | |
| 428 | 1623 { |
| 440 | 1624 (*(int *) dangling_aliases)++; |
| 1625 return 1; | |
| 428 | 1626 } |
| 440 | 1627 else |
| 1628 return 0; | |
| 1629 } | |
| 1630 | |
| 1631 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias, 2, 2, 0, /* | |
| 1632 Define symbol ALIAS as an alias for coding system ALIASEE. | |
| 1633 | |
| 1634 You can use this function to redefine an alias that has already been defined, | |
| 1635 but you cannot redefine a name which is the canonical name for a coding system. | |
| 1636 \(a canonical name of a coding system is what is returned when you call | |
| 1637 `coding-system-name' on a coding system). | |
| 1638 | |
| 1639 ALIASEE itself can be an alias, which allows you to define nested aliases. | |
| 1640 | |
| 1641 You are forbidden, however, from creating alias loops or `dangling' aliases. | |
| 1642 These will be detected, and an error will be signaled if you attempt to do so. | |
| 1643 | |
| 1644 If ALIASEE is nil, then ALIAS will simply be undefined. | |
| 1645 | |
| 1646 See also `coding-system-alias-p', `coding-system-aliasee', | |
| 1647 and `coding-system-canonical-name-p'. | |
| 1648 */ | |
| 1649 (alias, aliasee)) | |
| 1650 { | |
| 2286 | 1651 Lisp_Object probe; |
| 440 | 1652 |
| 1653 CHECK_SYMBOL (alias); | |
| 1654 | |
| 1655 if (!NILP (Fcoding_system_canonical_name_p (alias))) | |
| 563 | 1656 invalid_change |
| 440 | 1657 ("Symbol is the canonical name of a coding system and cannot be redefined", |
| 1658 alias); | |
| 1659 | |
| 1660 if (NILP (aliasee)) | |
| 1661 { | |
| 771 | 1662 Lisp_Object subsidiary_unix = add_suffix_to_symbol (alias, "-unix"); |
| 1663 Lisp_Object subsidiary_dos = add_suffix_to_symbol (alias, "-dos"); | |
| 1664 Lisp_Object subsidiary_mac = add_suffix_to_symbol (alias, "-mac"); | |
| 440 | 1665 |
| 1666 Fremhash (alias, Vcoding_system_hash_table); | |
| 1667 | |
| 1668 /* Undefine subsidiary aliases, | |
| 1669 presumably created by a previous call to this function */ | |
| 1670 if (! NILP (Fcoding_system_alias_p (subsidiary_unix)) && | |
| 1671 ! NILP (Fcoding_system_alias_p (subsidiary_dos)) && | |
| 1672 ! NILP (Fcoding_system_alias_p (subsidiary_mac))) | |
| 1673 { | |
| 1674 Fdefine_coding_system_alias (subsidiary_unix, Qnil); | |
| 1675 Fdefine_coding_system_alias (subsidiary_dos, Qnil); | |
| 1676 Fdefine_coding_system_alias (subsidiary_mac, Qnil); | |
| 1677 } | |
| 1678 | |
| 1679 /* Undefine dangling coding system aliases. */ | |
| 1680 { | |
| 1681 int dangling_aliases; | |
| 1682 | |
| 1683 do { | |
| 1684 dangling_aliases = 0; | |
| 1685 elisp_map_remhash (dangling_coding_system_alias_p, | |
| 1686 Vcoding_system_hash_table, | |
| 1687 &dangling_aliases); | |
| 1688 } while (dangling_aliases > 0); | |
| 1689 } | |
| 1690 | |
| 1691 return Qnil; | |
| 1692 } | |
| 1693 | |
| 1694 if (CODING_SYSTEMP (aliasee)) | |
| 1695 aliasee = XCODING_SYSTEM_NAME (aliasee); | |
| 1696 | |
| 1697 /* Checks that aliasee names a coding-system */ | |
| 2286 | 1698 (void) Fget_coding_system (aliasee); |
| 440 | 1699 |
| 1700 /* Check for coding system alias loops */ | |
| 1701 if (EQ (alias, aliasee)) | |
| 563 | 1702 alias_loop: invalid_operation_2 |
| 440 | 1703 ("Attempt to create a coding system alias loop", alias, aliasee); |
| 1704 | |
| 1705 for (probe = aliasee; | |
| 1706 SYMBOLP (probe); | |
| 1707 probe = Fgethash (probe, Vcoding_system_hash_table, Qzero)) | |
| 1708 { | |
| 1709 if (EQ (probe, alias)) | |
| 1710 goto alias_loop; | |
| 1711 } | |
| 1712 | |
| 1713 Fputhash (alias, aliasee, Vcoding_system_hash_table); | |
| 1714 | |
| 1715 /* Set up aliases for subsidiaries. | |
| 2297 | 1716 #### There must be a better way to handle subsidiary coding systems. |
| 1717 Inquiring Minds Want To Know: shouldn't they always be chains? */ | |
| 440 | 1718 { |
| 1719 static const char *suffixes[] = { "-unix", "-dos", "-mac" }; | |
| 1720 int i; | |
| 1721 for (i = 0; i < countof (suffixes); i++) | |
| 1722 { | |
| 1723 Lisp_Object alias_subsidiary = | |
| 771 | 1724 add_suffix_to_symbol (alias, suffixes[i]); |
| 440 | 1725 Lisp_Object aliasee_subsidiary = |
| 771 | 1726 add_suffix_to_symbol (aliasee, suffixes[i]); |
| 440 | 1727 |
| 1728 if (! NILP (Ffind_coding_system (aliasee_subsidiary))) | |
| 1729 Fdefine_coding_system_alias (alias_subsidiary, aliasee_subsidiary); | |
| 1730 } | |
| 1731 } | |
| 428 | 1732 /* FSF return value is a vector of [ALIAS-unix ALIAS-dos ALIAS-mac], |
| 1733 but it doesn't look intentional, so I'd rather return something | |
| 1734 meaningful or nothing at all. */ | |
| 1735 return Qnil; | |
| 1736 } | |
| 1737 | |
| 1738 static Lisp_Object | |
| 771 | 1739 subsidiary_coding_system (Lisp_Object coding_system, enum eol_type type) |
| 428 | 1740 { |
| 1741 Lisp_Coding_System *cs = XCODING_SYSTEM (coding_system); | |
| 1742 Lisp_Object new_coding_system; | |
| 1743 | |
| 1744 switch (type) | |
| 1745 { | |
| 1746 case EOL_AUTODETECT: return coding_system; | |
| 1747 case EOL_LF: new_coding_system = CODING_SYSTEM_EOL_LF (cs); break; | |
| 1748 case EOL_CR: new_coding_system = CODING_SYSTEM_EOL_CR (cs); break; | |
| 1749 case EOL_CRLF: new_coding_system = CODING_SYSTEM_EOL_CRLF (cs); break; | |
| 2500 | 1750 default: ABORT (); return Qnil; |
| 428 | 1751 } |
| 1752 | |
| 1753 return NILP (new_coding_system) ? coding_system : new_coding_system; | |
| 1754 } | |
| 1755 | |
| 1756 DEFUN ("subsidiary-coding-system", Fsubsidiary_coding_system, 2, 2, 0, /* | |
| 1757 Return the subsidiary coding system of CODING-SYSTEM with eol type EOL-TYPE. | |
| 771 | 1758 The logically opposite operation is `coding-system-base'. |
| 428 | 1759 */ |
| 1760 (coding_system, eol_type)) | |
| 1761 { | |
| 771 | 1762 coding_system = get_coding_system_for_text_file (coding_system, 0); |
| 428 | 1763 |
| 1764 return subsidiary_coding_system (coding_system, | |
| 1765 symbol_to_eol_type (eol_type)); | |
| 1766 } | |
| 1767 | |
| 771 | 1768 DEFUN ("coding-system-base", Fcoding_system_base, |
| 1769 1, 1, 0, /* | |
| 1770 Return the base coding system of CODING-SYSTEM. | |
| 1771 If CODING-SYSTEM is a subsidiary, this returns its parent; otherwise, it | |
| 1772 returns CODING-SYSTEM. | |
| 1773 The logically opposite operation is `subsidiary-coding-system'. | |
| 1774 */ | |
| 1775 (coding_system)) | |
| 1776 { | |
| 1777 Lisp_Object base; | |
| 1778 | |
| 1779 coding_system = Fget_coding_system (coding_system); | |
| 1780 if (EQ (XCODING_SYSTEM_NAME (coding_system), Qbinary)) | |
| 1781 return Fget_coding_system (Qraw_text); /* hack! */ | |
| 1782 base = XCODING_SYSTEM_SUBSIDIARY_PARENT (coding_system); | |
| 1783 if (!NILP (base)) | |
| 1784 return base; | |
| 1785 return coding_system; | |
| 1786 } | |
| 1787 | |
| 1788 DEFUN ("coding-system-used-for-io", Fcoding_system_used_for_io, | |
| 1789 1, 1, 0, /* | |
| 1790 Return the coding system actually used for I/O. | |
| 1791 In some cases (e.g. when a particular EOL type is specified) this won't be | |
| 2297 | 1792 the coding system itself. This can be useful when trying to determine |
| 1793 precisely how data was decoded. | |
| 771 | 1794 */ |
| 1795 (coding_system)) | |
| 1796 { | |
| 1797 Lisp_Object canon; | |
| 1798 | |
| 1799 coding_system = Fget_coding_system (coding_system); | |
| 1800 canon = XCODING_SYSTEM_CANONICAL (coding_system); | |
| 1801 if (!NILP (canon)) | |
| 1802 return canon; | |
| 1803 return coding_system; | |
| 1804 } | |
| 1805 | |
| 428 | 1806 |
| 1807 /************************************************************************/ | |
| 1808 /* Coding system accessors */ | |
| 1809 /************************************************************************/ | |
| 1810 | |
| 771 | 1811 DEFUN ("coding-system-description", Fcoding_system_description, 1, 1, 0, /* |
| 1812 Return the description for CODING-SYSTEM. | |
| 1813 The `description' of a coding system is a short English phrase giving the | |
| 1814 name rendered according to English punctuation rules, plus possibly some | |
| 1815 explanatory text (typically in the form of a parenthetical phrase). The | |
| 1816 description is intended to be short enough that it can appear as a menu item, | |
| 1817 and clear enough to be recognizable even to someone who is assumed to have | |
| 1818 some basic familiarity with different encodings but may not know all the | |
| 1819 technical names; thus, for `cn-gb-2312' is described as "Chinese EUC" and | |
| 1820 `hz-gb-2312' is described as "Hz/ZW (Chinese)", where the actual name of | |
| 1821 the encoding is given, followed by a note that this is a Chinese encoding, | |
| 1822 because the great majority of people encountering this would have no idea | |
| 1823 what it is, and giving the language indicates whether the encoding should | |
| 1824 just be ignored or (conceivably) investigated more thoroughly. | |
| 428 | 1825 */ |
| 1826 (coding_system)) | |
| 1827 { | |
| 1828 coding_system = Fget_coding_system (coding_system); | |
| 771 | 1829 return XCODING_SYSTEM_DESCRIPTION (coding_system); |
| 428 | 1830 } |
| 1831 | |
| 1832 DEFUN ("coding-system-type", Fcoding_system_type, 1, 1, 0, /* | |
| 1833 Return the type of CODING-SYSTEM. | |
| 1834 */ | |
| 1835 (coding_system)) | |
| 1836 { | |
| 771 | 1837 coding_system = Fget_coding_system (coding_system); |
| 1838 return XCODING_SYSTEM_TYPE (coding_system); | |
| 428 | 1839 } |
| 1840 | |
| 1841 DEFUN ("coding-system-property", Fcoding_system_property, 2, 2, 0, /* | |
| 1842 Return the PROP property of CODING-SYSTEM. | |
| 1843 */ | |
| 1844 (coding_system, prop)) | |
| 1845 { | |
| 1846 coding_system = Fget_coding_system (coding_system); | |
| 1847 CHECK_SYMBOL (prop); | |
| 1848 | |
| 1849 if (EQ (prop, Qname)) | |
| 1850 return XCODING_SYSTEM_NAME (coding_system); | |
| 1851 else if (EQ (prop, Qtype)) | |
| 1852 return Fcoding_system_type (coding_system); | |
| 771 | 1853 else if (EQ (prop, Qdescription)) |
| 1854 return XCODING_SYSTEM_DESCRIPTION (coding_system); | |
| 428 | 1855 else if (EQ (prop, Qmnemonic)) |
| 1856 return XCODING_SYSTEM_MNEMONIC (coding_system); | |
| 771 | 1857 else if (EQ (prop, Qdocumentation)) |
| 1858 return XCODING_SYSTEM_DOCUMENTATION (coding_system); | |
| 428 | 1859 else if (EQ (prop, Qeol_type)) |
| 771 | 1860 return eol_type_to_symbol (XCODING_SYSTEM_EOL_TYPE |
| 1861 (coding_system)); | |
| 428 | 1862 else if (EQ (prop, Qeol_lf)) |
| 1863 return XCODING_SYSTEM_EOL_LF (coding_system); | |
| 1864 else if (EQ (prop, Qeol_crlf)) | |
| 1865 return XCODING_SYSTEM_EOL_CRLF (coding_system); | |
| 1866 else if (EQ (prop, Qeol_cr)) | |
| 1867 return XCODING_SYSTEM_EOL_CR (coding_system); | |
| 1868 else if (EQ (prop, Qpost_read_conversion)) | |
| 1869 return XCODING_SYSTEM_POST_READ_CONVERSION (coding_system); | |
| 1870 else if (EQ (prop, Qpre_write_conversion)) | |
| 1871 return XCODING_SYSTEM_PRE_WRITE_CONVERSION (coding_system); | |
|
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1872 else if (EQ (prop, Qsafe_charsets)) |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1873 return XCODING_SYSTEM_SAFE_CHARSETS (coding_system); |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1874 else if (EQ (prop, Qsafe_chars)) |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4303
diff
changeset
|
1875 return XCODING_SYSTEM_SAFE_CHARS (coding_system); |
| 771 | 1876 else |
| 1877 { | |
| 1878 Lisp_Object value = CODESYSMETH_OR_GIVEN (XCODING_SYSTEM (coding_system), | |
| 1879 getprop, | |
| 1880 (coding_system, prop), | |
| 1881 Qunbound); | |
| 1882 if (UNBOUNDP (value)) | |
| 1883 invalid_constant ("Unrecognized property", prop); | |
| 1884 return value; | |
| 1885 } | |
| 1886 } | |
| 1887 | |
| 1888 | |
| 1889 /************************************************************************/ | |
| 1890 /* Coding stream functions */ | |
| 1891 /************************************************************************/ | |
| 1892 | |
| 1893 /* A coding stream is a stream used for encoding or decoding text. The | |
| 1894 coding-stream object keeps track of the actual coding system, the stream | |
| 1895 that is at the other end, and data that needs to be persistent across | |
| 1896 the lifetime of the stream. */ | |
| 1897 | |
| 1204 | 1898 extern const struct sized_memory_description chain_coding_stream_description; |
| 1899 extern const struct sized_memory_description undecided_coding_stream_description; | |
| 1900 | |
| 1901 static const struct memory_description coding_stream_data_description_1 []= { | |
| 2551 | 1902 { XD_BLOCK_PTR, chain_coding_system, 1, |
| 1903 { &chain_coding_stream_description } }, | |
| 1904 { XD_BLOCK_PTR, undecided_coding_system, 1, | |
| 1905 { &undecided_coding_stream_description } }, | |
| 1204 | 1906 { XD_END } |
| 1907 }; | |
| 1908 | |
| 1909 static const struct sized_memory_description coding_stream_data_description = { | |
| 1910 sizeof (void *), coding_stream_data_description_1 | |
| 1911 }; | |
| 1912 | |
| 1913 static const struct memory_description coding_lstream_description[] = { | |
| 1914 { XD_INT, offsetof (struct coding_stream, type) }, | |
| 1915 { XD_LISP_OBJECT, offsetof (struct coding_stream, orig_codesys) }, | |
| 1916 { XD_LISP_OBJECT, offsetof (struct coding_stream, codesys) }, | |
| 1917 { XD_LISP_OBJECT, offsetof (struct coding_stream, other_end) }, | |
| 1918 { XD_UNION, offsetof (struct coding_stream, data), | |
| 2551 | 1919 XD_INDIRECT (0, 0), { &coding_stream_data_description } }, |
| 1204 | 1920 { XD_END } |
| 1921 }; | |
| 1922 | |
| 1923 DEFINE_LSTREAM_IMPLEMENTATION_WITH_DATA ("coding", coding); | |
| 771 | 1924 |
| 1925 /* Encoding and decoding are parallel operations, so we create just one | |
| 1926 stream for both. "Decoding" may involve the extra step of autodetection | |
| 1927 of the data format, but that's only because of the conventional | |
| 1928 definition of decoding as converting from external- to | |
| 1929 internal-formatted data. | |
| 1930 | |
| 2297 | 1931 [[ REWRITE ME! ]] |
| 1932 | |
| 771 | 1933 #### We really need to abstract out the concept of "data formats" and |
| 1934 define "converters" that convert from and to specified formats, | |
| 1935 eliminating the idea of decoding and encoding. When specifying a | |
| 1936 conversion process, we need to give the data formats themselves, not the | |
| 1937 conversion processes -- e.g. a coding system called "Unicode->multibyte" | |
| 1938 converts in both directions, and we could auto-detect the format of data | |
| 1939 at either end. */ | |
| 1940 | |
| 1941 static Bytecount | |
| 1942 coding_reader (Lstream *stream, unsigned char *data, Bytecount size) | |
| 1943 { | |
| 1944 unsigned char *orig_data = data; | |
| 1945 Bytecount read_size; | |
| 1946 int error_occurred = 0; | |
| 1947 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
| 1948 | |
| 1949 /* We need to interface to coding_{de,en}code_1(), which expects to take | |
| 1950 some amount of data and store the result into a Dynarr. We have | |
| 1951 coding_{de,en}code_1() store into c->runoff, and take data from there | |
| 1952 as necessary. */ | |
| 1953 | |
| 1954 /* We loop until we have enough data, reading chunks from the other | |
| 1955 end and converting it. */ | |
| 1956 while (1) | |
| 1957 { | |
| 1958 /* Take data from convert_to if we can. Make sure to take at | |
| 1959 most SIZE bytes, and delete the data from convert_to. */ | |
| 1960 if (Dynarr_length (str->convert_to) > 0) | |
| 1961 { | |
| 1962 Bytecount chunk = | |
| 1963 min (size, (Bytecount) Dynarr_length (str->convert_to)); | |
| 1964 memcpy (data, Dynarr_atp (str->convert_to, 0), chunk); | |
| 1965 Dynarr_delete_many (str->convert_to, 0, chunk); | |
| 1966 data += chunk; | |
| 1967 size -= chunk; | |
| 1968 } | |
| 1969 | |
| 1970 if (size == 0) | |
| 1971 break; /* No more room for data */ | |
| 1972 | |
| 1973 if (str->eof) | |
| 1974 break; | |
| 1975 | |
| 1976 { | |
| 1977 /* Exhausted convert_to, so get some more. Read into convert_from, | |
| 1978 after existing "rejected" data from the last conversion. */ | |
| 1979 Bytecount rejected = Dynarr_length (str->convert_from); | |
| 1980 /* #### 1024 is arbitrary; we really need to separate 0 from EOF, | |
| 1981 and when we get 0, keep taking more data until we don't get 0 -- | |
| 1982 we don't know how much data the conversion routine might need | |
| 2297 | 1983 before it can generate any data of its own (eg, bzip2). */ |
| 814 | 1984 Bytecount readmore = |
| 1985 str->one_byte_at_a_time ? (Bytecount) 1 : | |
| 1986 max (size, (Bytecount) 1024); | |
| 771 | 1987 |
| 1988 Dynarr_add_many (str->convert_from, 0, readmore); | |
| 1989 read_size = Lstream_read (str->other_end, | |
| 1990 Dynarr_atp (str->convert_from, rejected), | |
| 1991 readmore); | |
| 1992 /* Trim size down to how much we actually got */ | |
| 1993 Dynarr_set_size (str->convert_from, rejected + max (0, read_size)); | |
| 1994 } | |
| 1995 | |
| 1996 if (read_size < 0) /* LSTREAM_ERROR */ | |
| 1997 { | |
| 1998 error_occurred = 1; | |
| 1999 break; | |
| 2000 } | |
| 2001 if (read_size == 0) /* LSTREAM_EOF */ | |
| 2002 /* There might be some more end data produced in the translation, | |
| 2003 so we set a flag and call the conversion method once more to | |
| 2004 output any final stuff it may be holding, any "go back to a sane | |
| 2005 state" escape sequences, etc. The conversion method is free to | |
| 2006 look at this flag, and we use it above to stop looping. */ | |
| 2007 str->eof = 1; | |
| 2008 { | |
| 2009 Bytecount processed; | |
| 2010 Bytecount to_process = Dynarr_length (str->convert_from); | |
| 2011 | |
| 2012 /* Convert the data, and save any rejected data in convert_from */ | |
| 2013 processed = | |
| 2014 XCODESYSMETH (str->codesys, convert, | |
| 2015 (str, Dynarr_atp (str->convert_from, 0), | |
| 2016 str->convert_to, to_process)); | |
| 2017 if (processed < 0) | |
| 2018 { | |
| 2019 error_occurred = 1; | |
| 2020 break; | |
| 2021 } | |
| 2022 assert (processed <= to_process); | |
| 2023 if (processed < to_process) | |
| 2024 memmove (Dynarr_atp (str->convert_from, 0), | |
| 2025 Dynarr_atp (str->convert_from, processed), | |
| 2026 to_process - processed); | |
| 2027 Dynarr_set_size (str->convert_from, to_process - processed); | |
| 2028 } | |
| 2029 } | |
| 2030 | |
| 2031 if (data - orig_data == 0) | |
| 2032 return error_occurred ? -1 : 0; | |
| 2033 else | |
| 2034 return data - orig_data; | |
| 2035 } | |
| 2036 | |
| 2037 static Bytecount | |
| 2038 coding_writer (Lstream *stream, const unsigned char *data, Bytecount size) | |
| 2039 { | |
| 2040 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
| 2041 | |
| 2042 /* Convert all our data into convert_to, and then attempt to write | |
| 2043 it all out to the other end. */ | |
| 2044 Dynarr_reset (str->convert_to); | |
| 2045 size = XCODESYSMETH (str->codesys, convert, | |
| 2046 (str, data, str->convert_to, size)); | |
| 2047 if (Lstream_write (str->other_end, Dynarr_atp (str->convert_to, 0), | |
| 2048 Dynarr_length (str->convert_to)) < 0) | |
| 2049 return -1; | |
| 2050 else | |
| 2051 /* The return value indicates how much of the incoming data was | |
| 2052 processed, not how many bytes were written. */ | |
| 2053 return size; | |
| 2054 } | |
| 2055 | |
| 2056 static int | |
| 2057 encode_decode_source_sink_type_is_char (Lisp_Object cs, | |
| 2058 enum source_or_sink sex, | |
| 2059 enum encode_decode direction) | |
| 2060 { | |
| 2061 return (direction == CODING_DECODE ? | |
| 2062 decoding_source_sink_type_is_char (cs, sex) : | |
| 2063 encoding_source_sink_type_is_char (cs, sex)); | |
| 2064 } | |
| 2065 | |
| 2066 /* Ensure that the convert methods only get full characters sent to them to | |
| 2067 convert if the source of that conversion is characters; and that no such | |
| 2068 full-character checking happens when the source is bytes. Keep in mind | |
| 2069 that (1) the conversion_end_type return values take the perspective of | |
| 2070 encoding; (2) the source for decoding is the same as the sink for | |
| 2071 encoding; (3) when writing, the data is given to us, and we set our own | |
| 2072 stream to be character mode or not; (4) when reading, the data comes | |
| 2073 from the other_end stream, and we set that one to be character mode or | |
| 2074 not. This is consistent with the comment above the prototype for | |
| 2075 Lstream_set_character_mode(), which lays out rules for who is allowed to | |
| 2076 modify the character type mode on a stream. | |
| 2077 | |
| 814 | 2078 If we're a read stream, we're always setting character mode on the |
| 2079 source, but we also set it on ourselves consistent with the flag that | |
| 2080 can disable this (see again the comment above | |
| 2081 Lstream_set_character_mode()). | |
| 2082 */ | |
| 771 | 2083 |
| 2084 static void | |
| 2085 set_coding_character_mode (Lstream *stream) | |
| 2086 { | |
| 2087 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
| 2088 Lstream *stream_to_set = | |
| 2089 stream->flags & LSTREAM_FL_WRITE ? stream : str->other_end; | |
| 2090 if (encode_decode_source_sink_type_is_char | |
| 2091 (str->codesys, CODING_SOURCE, str->direction)) | |
| 2092 Lstream_set_character_mode (stream_to_set); | |
| 2093 else | |
| 2094 Lstream_unset_character_mode (stream_to_set); | |
| 814 | 2095 if (str->set_char_mode_on_us_when_reading && |
| 2096 (stream->flags & LSTREAM_FL_READ)) | |
| 2097 { | |
| 2098 if (encode_decode_source_sink_type_is_char | |
| 2099 (str->codesys, CODING_SINK, str->direction)) | |
| 2100 Lstream_set_character_mode (stream); | |
| 2101 else | |
| 2102 Lstream_unset_character_mode (stream); | |
| 2103 } | |
| 771 | 2104 } |
| 2105 | |
| 2106 static Lisp_Object | |
| 2107 coding_marker (Lisp_Object stream) | |
| 2108 { | |
| 2109 struct coding_stream *str = CODING_STREAM_DATA (XLSTREAM (stream)); | |
| 2110 | |
| 2111 mark_object (str->orig_codesys); | |
| 2112 mark_object (str->codesys); | |
| 2113 MAYBE_XCODESYSMETH (str->codesys, mark_coding_stream, (str)); | |
| 2114 return wrap_lstream (str->other_end); | |
| 2115 } | |
| 2116 | |
| 2117 static int | |
| 2118 coding_rewinder (Lstream *stream) | |
| 2119 { | |
| 2120 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
| 2121 MAYBE_XCODESYSMETH (str->codesys, rewind_coding_stream, (str)); | |
| 2122 | |
| 2123 str->ch = 0; | |
| 2124 Dynarr_reset (str->convert_to); | |
| 2125 Dynarr_reset (str->convert_from); | |
| 2126 return Lstream_rewind (str->other_end); | |
| 2127 } | |
| 2128 | |
| 2129 static int | |
| 2130 coding_seekable_p (Lstream *stream) | |
| 2131 { | |
| 2132 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
| 2133 return Lstream_seekable_p (str->other_end); | |
| 2134 } | |
| 2135 | |
| 2136 static int | |
| 2137 coding_flusher (Lstream *stream) | |
| 2138 { | |
| 2139 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
| 2140 return Lstream_flush (str->other_end); | |
| 2141 } | |
| 2142 | |
| 2143 static int | |
| 2144 coding_closer (Lstream *stream) | |
| 2145 { | |
| 2146 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
| 2147 if (stream->flags & LSTREAM_FL_WRITE) | |
| 2148 { | |
| 2149 str->eof = 1; | |
| 2150 coding_writer (stream, 0, 0); | |
| 2151 str->eof = 0; | |
| 2152 } | |
| 2153 /* It's safe to free the runoff dynarrs now because they are used only | |
| 2154 during conversion. We need to keep the type-specific data around, | |
| 2155 though, because of canonicalize_after_coding. */ | |
| 2156 if (str->convert_to) | |
| 2157 { | |
| 2158 Dynarr_free (str->convert_to); | |
| 2159 str->convert_to = 0; | |
| 2160 } | |
| 2161 if (str->convert_from) | |
| 428 | 2162 { |
| 771 | 2163 Dynarr_free (str->convert_from); |
| 2164 str->convert_from = 0; | |
| 2165 } | |
| 2166 | |
| 800 | 2167 if (str->no_close_other) |
| 2168 return Lstream_flush (str->other_end); | |
| 2169 else | |
| 2170 return Lstream_close (str->other_end); | |
| 771 | 2171 } |
| 2172 | |
| 2173 static void | |
| 2174 coding_finalizer (Lstream *stream) | |
| 2175 { | |
| 2176 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
| 2177 | |
| 2178 assert (!str->finalized); | |
| 2179 MAYBE_XCODESYSMETH (str->codesys, finalize_coding_stream, (str)); | |
| 2180 if (str->data) | |
| 2181 { | |
| 1726 | 2182 xfree (str->data, void *); |
| 771 | 2183 str->data = 0; |
| 2184 } | |
| 2185 str->finalized = 1; | |
| 2186 } | |
| 2187 | |
| 2188 static Lisp_Object | |
| 2189 coding_stream_canonicalize_after_coding (Lstream *stream) | |
| 2190 { | |
| 2191 struct coding_stream *str = CODING_STREAM_DATA (stream); | |
| 2192 | |
| 2193 return XCODESYSMETH_OR_GIVEN (str->codesys, canonicalize_after_coding, | |
| 2194 (str), str->codesys); | |
| 2195 } | |
| 2196 | |
| 2197 Lisp_Object | |
| 2198 coding_stream_detected_coding_system (Lstream *stream) | |
| 2199 { | |
| 2200 Lisp_Object codesys = | |
| 2201 coding_stream_canonicalize_after_coding (stream); | |
| 2202 if (NILP (codesys)) | |
| 2203 return Fget_coding_system (Qidentity); | |
| 2204 return codesys; | |
| 2205 } | |
| 2206 | |
| 2207 Lisp_Object | |
| 2208 coding_stream_coding_system (Lstream *stream) | |
| 2209 { | |
| 2210 return CODING_STREAM_DATA (stream)->codesys; | |
| 2211 } | |
| 2212 | |
| 2213 /* Change the coding system associated with a stream. */ | |
| 2214 | |
| 2215 void | |
| 2216 set_coding_stream_coding_system (Lstream *lstr, Lisp_Object codesys) | |
| 2217 { | |
| 2218 struct coding_stream *str = CODING_STREAM_DATA (lstr); | |
| 2219 if (EQ (str->orig_codesys, codesys)) | |
| 2220 return; | |
| 2221 /* We do the equivalent of closing the stream, destroying it, and | |
| 2222 reinitializing it. This includes flushing out the data and signalling | |
| 2223 EOF, if we're a writing stream; we also replace the type-specific data | |
| 2224 with the data appropriate for the new coding system. */ | |
| 2225 if (!NILP (str->codesys)) | |
| 2226 { | |
| 2227 if (lstr->flags & LSTREAM_FL_WRITE) | |
| 2228 { | |
| 2229 Lstream_flush (lstr); | |
| 2230 str->eof = 1; | |
| 2231 coding_writer (lstr, 0, 0); | |
| 2232 str->eof = 0; | |
| 2233 } | |
| 2234 MAYBE_XCODESYSMETH (str->codesys, finalize_coding_stream, (str)); | |
| 2235 } | |
| 2236 str->orig_codesys = codesys; | |
| 2237 str->codesys = coding_system_real_canonical (codesys); | |
| 2238 | |
| 2239 if (str->data) | |
| 2240 { | |
| 1726 | 2241 xfree (str->data, void *); |
| 771 | 2242 str->data = 0; |
| 428 | 2243 } |
| 771 | 2244 if (XCODING_SYSTEM_METHODS (str->codesys)->coding_data_size) |
| 1204 | 2245 { |
| 2246 str->data = | |
| 2247 xmalloc_and_zero (XCODING_SYSTEM_METHODS (str->codesys)-> | |
| 2248 coding_data_size); | |
| 2249 str->type = XCODING_SYSTEM_METHODS (str->codesys)->enumtype; | |
| 2250 } | |
| 771 | 2251 MAYBE_XCODESYSMETH (str->codesys, init_coding_stream, (str)); |
| 2252 /* The new coding system may have different ideas regarding whether its | |
| 2253 ends are characters or bytes. */ | |
| 2254 set_coding_character_mode (lstr); | |
| 2255 } | |
| 2256 | |
| 2257 /* WARNING WARNING WARNING WARNING!!!!! If you open up a coding | |
| 2258 stream for writing, no automatic code detection will be performed. | |
| 2259 The reason for this is that automatic code detection requires a | |
| 2260 seekable input. Things will also fail if you open a coding | |
| 2261 stream for reading using a non-fully-specified coding system and | |
| 2262 a non-seekable input stream. */ | |
| 2263 | |
| 2264 static Lisp_Object | |
| 2265 make_coding_stream_1 (Lstream *stream, Lisp_Object codesys, | |
| 800 | 2266 const char *mode, enum encode_decode direction, |
| 802 | 2267 int flags) |
| 771 | 2268 { |
| 2269 Lstream *lstr = Lstream_new (lstream_coding, mode); | |
| 2270 struct coding_stream *str = CODING_STREAM_DATA (lstr); | |
| 2271 | |
| 2272 codesys = Fget_coding_system (codesys); | |
| 2273 xzero (*str); | |
| 2274 str->codesys = Qnil; | |
| 2275 str->orig_codesys = Qnil; | |
| 2276 str->us = lstr; | |
| 2277 str->other_end = stream; | |
| 2278 str->convert_to = Dynarr_new (unsigned_char); | |
| 2279 str->convert_from = Dynarr_new (unsigned_char); | |
| 2280 str->direction = direction; | |
| 814 | 2281 if (flags & LSTREAM_FL_NO_CLOSE_OTHER) |
| 802 | 2282 str->no_close_other = 1; |
| 814 | 2283 if (flags & LSTREAM_FL_READ_ONE_BYTE_AT_A_TIME) |
| 802 | 2284 str->one_byte_at_a_time = 1; |
| 814 | 2285 if (!(flags & LSTREAM_FL_NO_INIT_CHAR_MODE_WHEN_READING)) |
| 2286 str->set_char_mode_on_us_when_reading = 1; | |
| 802 | 2287 |
| 771 | 2288 set_coding_stream_coding_system (lstr, codesys); |
| 793 | 2289 return wrap_lstream (lstr); |
| 771 | 2290 } |
| 2291 | |
| 814 | 2292 /* FLAGS: |
| 2293 | |
| 2294 LSTREAM_FL_NO_CLOSE_OTHER | |
| 2295 Don't close STREAM (the stream at the other end) when this stream is | |
| 2296 closed. | |
| 2297 | |
| 2298 LSTREAM_FL_READ_ONE_BYTE_AT_A_TIME | |
| 2299 When reading from STREAM, read and process one byte at a time rather | |
| 2300 than in large chunks. This is for reading from TTY's, so we don't | |
| 2301 block. #### We should instead create a non-blocking filedesc stream | |
| 2302 that emulates the behavior as necessary using select(), when the | |
| 2303 fcntls don't work. (As seems to be the case on Cygwin.) | |
| 2304 | |
| 2305 LSTREAM_FL_NO_INIT_CHAR_MODE_WHEN_READING | |
| 2306 When reading from STREAM, read and process one byte at a time rather | |
| 2307 than in large chunks. This is for reading from TTY's, so we don't | |
| 2308 block. #### We should instead create a non-blocking filedesc stream | |
| 2309 that emulates the behavior as necessary using select(), when the | |
| 2310 fcntls don't work. (As seems to be the case on Cygwin.) | |
| 2311 */ | |
| 771 | 2312 Lisp_Object |
| 2313 make_coding_input_stream (Lstream *stream, Lisp_Object codesys, | |
| 802 | 2314 enum encode_decode direction, int flags) |
| 771 | 2315 { |
| 800 | 2316 return make_coding_stream_1 (stream, codesys, "r", direction, |
| 802 | 2317 flags); |
| 771 | 2318 } |
| 2319 | |
| 814 | 2320 /* FLAGS: |
| 2321 | |
| 2322 LSTREAM_FL_NO_CLOSE_OTHER | |
| 2323 Don't close STREAM (the stream at the other end) when this stream is | |
| 2324 closed. | |
| 2325 */ | |
| 771 | 2326 Lisp_Object |
| 2327 make_coding_output_stream (Lstream *stream, Lisp_Object codesys, | |
| 802 | 2328 enum encode_decode direction, int flags) |
| 771 | 2329 { |
| 800 | 2330 return make_coding_stream_1 (stream, codesys, "w", direction, |
| 802 | 2331 flags); |
| 771 | 2332 } |
| 2333 | |
| 2334 static Lisp_Object | |
| 2335 encode_decode_coding_region (Lisp_Object start, Lisp_Object end, | |
| 2336 Lisp_Object coding_system, Lisp_Object buffer, | |
| 2337 enum encode_decode direction) | |
| 2338 { | |
| 2339 Charbpos b, e; | |
| 2340 struct buffer *buf = decode_buffer (buffer, 0); | |
| 2341 Lisp_Object instream = Qnil, to_outstream = Qnil, outstream = Qnil; | |
| 2342 Lisp_Object from_outstream = Qnil, auto_outstream = Qnil; | |
| 2343 Lisp_Object lb_outstream = Qnil; | |
| 2344 Lisp_Object next; | |
| 2345 Lstream *istr, *ostr; | |
| 2346 struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5; | |
| 2347 struct gcpro ngcpro1; | |
| 2348 int source_char, sink_char; | |
| 2349 | |
| 2350 get_buffer_range_char (buf, start, end, &b, &e, 0); | |
| 2351 barf_if_buffer_read_only (buf, b, e); | |
| 2352 | |
| 2353 GCPRO5 (instream, to_outstream, outstream, from_outstream, lb_outstream); | |
| 2354 NGCPRO1 (auto_outstream); | |
| 2355 | |
| 2356 coding_system = Fget_coding_system (coding_system); | |
| 2357 source_char = encode_decode_source_sink_type_is_char (coding_system, | |
| 2358 CODING_SOURCE, | |
| 2359 direction); | |
| 2360 sink_char = encode_decode_source_sink_type_is_char (coding_system, | |
| 2361 CODING_SINK, | |
| 2362 direction); | |
| 2363 | |
| 2364 /* Order is IN <---> [TO] -> OUT -> [FROM] -> [AUTODETECT-EOL] -> LB */ | |
| 2365 instream = make_lisp_buffer_input_stream (buf, b, e, 0); | |
| 2366 next = lb_outstream = make_lisp_buffer_output_stream (buf, b, 0); | |
| 2367 | |
| 2368 if (direction == CODING_DECODE && | |
| 2369 XCODING_SYSTEM_EOL_TYPE (coding_system) == EOL_AUTODETECT) | |
| 2370 next = auto_outstream = | |
| 2371 make_coding_output_stream | |
| 800 | 2372 (XLSTREAM (next), Fget_coding_system (Qconvert_eol_autodetect), |
| 2373 CODING_DECODE, 0); | |
| 771 | 2374 |
| 2375 if (!sink_char) | |
| 2376 next = from_outstream = | |
| 800 | 2377 make_coding_output_stream (XLSTREAM (next), Qbinary, CODING_DECODE, 0); |
| 771 | 2378 outstream = make_coding_output_stream (XLSTREAM (next), coding_system, |
| 800 | 2379 direction, 0); |
| 771 | 2380 if (!source_char) |
| 428 | 2381 { |
| 771 | 2382 to_outstream = |
| 2383 make_coding_output_stream (XLSTREAM (outstream), | |
| 800 | 2384 Qbinary, CODING_ENCODE, 0); |
| 771 | 2385 ostr = XLSTREAM (to_outstream); |
| 2386 } | |
| 2387 else | |
| 2388 ostr = XLSTREAM (outstream); | |
| 2389 istr = XLSTREAM (instream); | |
| 2390 | |
| 2391 /* The chain of streams looks like this: | |
| 2392 | |
| 2297 | 2393 [BUFFER] <----- (( read from/send to loop )) |
| 771 | 2394 ------> [CHAR->BYTE i.e. ENCODE AS BINARY if source is |
| 2395 in bytes] | |
| 2396 ------> [ENCODE/DECODE AS SPECIFIED] | |
| 2397 ------> [BYTE->CHAR i.e. DECODE AS BINARY | |
| 2398 if sink is in bytes] | |
| 2399 ------> [AUTODETECT EOL if | |
| 2400 we're decoding and | |
| 2401 coding system calls | |
| 2402 for this] | |
| 2403 ------> [BUFFER] | |
| 2404 */ | |
| 2367 | 2405 |
| 2406 /* #### See comment | |
| 2407 | |
| 2408 EFFICIENCY OF CODING CONVERSION WITH MULTIPLE COPIES/CHAINS | |
| 2409 | |
| 2410 in text.c. | |
| 2411 */ | |
| 2412 | |
| 771 | 2413 while (1) |
| 2414 { | |
| 2415 char tempbuf[1024]; /* some random amount */ | |
| 2416 Charbpos newpos, even_newer_pos; | |
| 2417 Charbpos oldpos = lisp_buffer_stream_startpos (istr); | |
| 2418 Bytecount size_in_bytes = | |
| 2419 Lstream_read (istr, tempbuf, sizeof (tempbuf)); | |
| 2420 | |
| 2421 if (!size_in_bytes) | |
| 2422 break; | |
| 2423 newpos = lisp_buffer_stream_startpos (istr); | |
| 2424 Lstream_write (ostr, tempbuf, size_in_bytes); | |
| 2425 even_newer_pos = lisp_buffer_stream_startpos (istr); | |
| 2426 buffer_delete_range (buf, even_newer_pos - (newpos - oldpos), | |
| 2427 even_newer_pos, 0); | |
| 428 | 2428 } |
| 771 | 2429 |
| 2430 { | |
| 2431 Charcount retlen = | |
| 2432 lisp_buffer_stream_startpos (XLSTREAM (instream)) - b; | |
| 2433 Lstream_close (istr); | |
| 2434 Lstream_close (ostr); | |
| 2435 NUNGCPRO; | |
| 2436 UNGCPRO; | |
| 2437 Lstream_delete (istr); | |
| 2438 if (!NILP (from_outstream)) | |
| 2439 Lstream_delete (XLSTREAM (from_outstream)); | |
| 2440 Lstream_delete (XLSTREAM (outstream)); | |
| 2441 if (!NILP (to_outstream)) | |
| 2442 Lstream_delete (XLSTREAM (to_outstream)); | |
| 2443 if (!NILP (auto_outstream)) | |
| 2444 Lstream_delete (XLSTREAM (auto_outstream)); | |
| 2445 Lstream_delete (XLSTREAM (lb_outstream)); | |
| 2446 return make_int (retlen); | |
| 2447 } | |
| 2448 } | |
| 2449 | |
| 3302 | 2450 DEFUN ("decode-coding-region", Fdecode_coding_region, 3, 4, |
| 2451 "*r\nzDecode from coding system: \ni", /* | |
| 771 | 2452 Decode the text between START and END which is encoded in CODING-SYSTEM. |
| 2453 This is useful if you've read in encoded text from a file without decoding | |
| 2454 it (e.g. you read in a JIS-formatted file but used the `binary' or | |
| 2455 `no-conversion' coding system, so that it shows up as "^[$B!<!+^[(B"). | |
| 2456 Return length of decoded text. | |
| 3302 | 2457 BUFFER defaults to the current buffer if unspecified, and when interactive. |
| 771 | 2458 */ |
| 2459 (start, end, coding_system, buffer)) | |
| 2460 { | |
| 2461 return encode_decode_coding_region (start, end, coding_system, buffer, | |
| 2462 CODING_DECODE); | |
| 2463 } | |
| 2464 | |
| 3302 | 2465 DEFUN ("encode-coding-region", Fencode_coding_region, 3, 4, |
| 2466 "*r\nzEncode to coding system: \ni", /* | |
| 771 | 2467 Encode the text between START and END using CODING-SYSTEM. |
| 2468 This will, for example, convert Japanese characters into stuff such as | |
| 3302 | 2469 "^[$B!<!+^[(B" if you use the JIS encoding. Return length of encoded text. |
| 2470 BUFFER defaults to the current buffer if unspecified, and when interactive. | |
| 771 | 2471 */ |
| 2472 (start, end, coding_system, buffer)) | |
| 2473 { | |
| 2474 return encode_decode_coding_region (start, end, coding_system, buffer, | |
| 2475 CODING_ENCODE); | |
| 428 | 2476 } |
| 2477 | |
| 2478 | |
| 2479 /************************************************************************/ | |
| 771 | 2480 /* Chain methods */ |
| 428 | 2481 /************************************************************************/ |
| 2482 | |
| 771 | 2483 /* #### Need a way to create "opposite-direction" coding systems. */ |
| 2484 | |
| 2485 /* Chain two or more coding systems together to make a combination coding | |
| 2486 system. */ | |
| 2487 | |
| 2488 struct chain_coding_system | |
| 2489 { | |
| 2490 /* List of coding systems, in decode order */ | |
| 2491 Lisp_Object *chain; | |
| 2492 /* Number of coding systems in list */ | |
| 2493 int count; | |
| 2494 /* Coding system to return as a result of canonicalize-after-coding */ | |
| 2495 Lisp_Object canonicalize_after_coding; | |
| 2496 }; | |
| 2497 | |
| 2498 struct chain_coding_stream | |
| 2499 { | |
| 2500 int initted; | |
| 2501 /* Lstreams for chain coding system */ | |
| 2502 Lisp_Object *lstreams; | |
| 2503 int lstream_count; | |
| 2504 }; | |
| 2505 | |
| 1204 | 2506 static const struct memory_description chain_coding_system_description[] = { |
| 2507 { XD_INT, offsetof (struct chain_coding_system, count) }, | |
| 2367 | 2508 { XD_BLOCK_PTR, offsetof (struct chain_coding_system, chain), |
| 2551 | 2509 XD_INDIRECT (0, 0), { &lisp_object_description } }, |
| 1204 | 2510 { XD_LISP_OBJECT, offsetof (struct chain_coding_system, |
| 2511 canonicalize_after_coding) }, | |
| 771 | 2512 { XD_END } |
| 2513 }; | |
| 2514 | |
| 1204 | 2515 static const struct memory_description chain_coding_stream_description_1 [] = { |
| 2516 { XD_INT, offsetof (struct chain_coding_stream, lstream_count) }, | |
| 2367 | 2517 { XD_BLOCK_PTR, offsetof (struct chain_coding_stream, lstreams), |
| 2551 | 2518 XD_INDIRECT (0, 0), { &lisp_object_description } }, |
| 771 | 2519 { XD_END } |
| 2520 }; | |
| 2521 | |
| 1204 | 2522 const struct sized_memory_description chain_coding_stream_description = { |
| 2523 sizeof (struct chain_coding_stream), chain_coding_stream_description_1 | |
| 2524 }; | |
| 2525 | |
| 2526 DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (chain); | |
| 2527 | |
| 771 | 2528 static Lisp_Object |
| 2529 chain_canonicalize (Lisp_Object codesys) | |
| 2530 { | |
| 2531 /* We make use of the fact that this method is called at init time, after | |
| 2532 properties have been parsed. init_method is called too early. */ | |
| 2533 /* #### It's not clear we need this whole chain-canonicalize mechanism | |
| 2534 any more. */ | |
| 2535 Lisp_Object chain = Flist (XCODING_SYSTEM_CHAIN_COUNT (codesys), | |
| 2536 XCODING_SYSTEM_CHAIN_CHAIN (codesys)); | |
| 2537 chain = Fcons (XCODING_SYSTEM_PRE_WRITE_CONVERSION (codesys), | |
| 2538 Fcons (XCODING_SYSTEM_POST_READ_CONVERSION (codesys), | |
| 2539 chain)); | |
| 2540 Fputhash (chain, codesys, Vchain_canonicalize_hash_table); | |
| 2541 return codesys; | |
| 2542 } | |
| 2543 | |
| 2544 static Lisp_Object | |
| 2545 chain_canonicalize_after_coding (struct coding_stream *str) | |
| 2546 { | |
| 2547 Lisp_Object cac = | |
| 2548 XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (str->codesys); | |
| 2549 if (!NILP (cac)) | |
| 2550 return cac; | |
| 2551 return str->codesys; | |
| 2552 #if 0 | |
| 2553 struct chain_coding_stream *data = CODING_STREAM_TYPE_DATA (str, chain); | |
| 2554 Lisp_Object us = str->codesys, codesys; | |
| 2555 int i; | |
| 2556 Lisp_Object chain; | |
| 2557 Lisp_Object tail; | |
| 2558 int changed = 0; | |
| 2559 | |
| 2560 /* #### It's not clear we need this whole chain-canonicalize mechanism | |
| 2561 any more. */ | |
| 2562 if (str->direction == CODING_ENCODE || !data->initted) | |
| 2563 return us; | |
| 2564 | |
| 2565 chain = Flist (XCODING_SYSTEM_CHAIN_COUNT (us), | |
| 2566 XCODING_SYSTEM_CHAIN_CHAIN (us)); | |
| 2567 | |
| 2568 tail = chain; | |
| 2569 for (i = 0; i < XCODING_SYSTEM_CHAIN_COUNT (us); i++) | |
| 2570 { | |
| 2571 codesys = (coding_stream_canonicalize_after_coding | |
| 2572 (XLSTREAM (data->lstreams[i]))); | |
| 2573 if (!EQ (codesys, XCAR (tail))) | |
| 2574 changed = 1; | |
| 2575 XCAR (tail) = codesys; | |
| 2576 tail = XCDR (tail); | |
| 2577 } | |
| 2578 | |
| 2579 if (!changed) | |
| 2580 return us; | |
| 2581 | |
| 2582 chain = delq_no_quit (Qnil, chain); | |
| 2583 | |
| 2584 if (NILP (XCODING_SYSTEM_PRE_WRITE_CONVERSION (us)) && | |
| 2585 NILP (XCODING_SYSTEM_POST_READ_CONVERSION (us))) | |
| 2586 { | |
| 2587 if (NILP (chain)) | |
| 2588 return Qnil; | |
| 2589 if (NILP (XCDR (chain))) | |
| 2590 return XCAR (chain); | |
| 2591 } | |
| 2592 | |
| 2593 codesys = Fgethash (Fcons (XCODING_SYSTEM_PRE_WRITE_CONVERSION (us), | |
| 2594 Fcons (XCODING_SYSTEM_POST_READ_CONVERSION (us), | |
| 2595 chain)), Vchain_canonicalize_hash_table, | |
| 2596 Qnil); | |
| 2597 if (!NILP (codesys)) | |
| 2598 return codesys; | |
| 2599 return make_internal_coding_system | |
| 2600 (us, "internal-chain-canonicalizer-wrapper", | |
| 2601 Qchain, Qunbound, list2 (Qchain, chain)); | |
| 2602 #endif /* 0 */ | |
| 2603 } | |
| 2604 | |
| 2605 static void | |
| 2606 chain_init (Lisp_Object codesys) | |
| 2607 { | |
| 2608 XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (codesys) = Qnil; | |
| 2609 } | |
| 2610 | |
| 2611 static void | |
| 2612 chain_mark (Lisp_Object codesys) | |
| 2613 { | |
| 2614 int i; | |
| 2615 | |
| 2616 for (i = 0; i < XCODING_SYSTEM_CHAIN_COUNT (codesys); i++) | |
| 2617 mark_object (XCODING_SYSTEM_CHAIN_CHAIN (codesys)[i]); | |
| 2618 mark_object (XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (codesys)); | |
| 2619 } | |
| 2620 | |
| 2621 static void | |
| 2622 chain_mark_coding_stream_1 (struct chain_coding_stream *data) | |
| 2623 { | |
| 2624 int i; | |
| 2625 | |
| 2626 for (i = 0; i < data->lstream_count; i++) | |
| 2627 mark_object (data->lstreams[i]); | |
| 2628 } | |
| 2629 | |
| 2630 static void | |
| 2631 chain_mark_coding_stream (struct coding_stream *str) | |
| 2632 { | |
| 2633 chain_mark_coding_stream_1 (CODING_STREAM_TYPE_DATA (str, chain)); | |
| 2634 } | |
| 2635 | |
| 2636 static void | |
| 2637 chain_print (Lisp_Object cs, Lisp_Object printcharfun, int escapeflag) | |
| 2638 { | |
| 2639 int i; | |
| 2640 | |
| 826 | 2641 write_c_string (printcharfun, "("); |
| 771 | 2642 for (i = 0; i < XCODING_SYSTEM_CHAIN_COUNT (cs); i++) |
| 2643 { | |
| 826 | 2644 write_c_string (printcharfun, i == 0 ? "" : "->"); |
| 771 | 2645 print_coding_system_in_print_method (XCODING_SYSTEM_CHAIN_CHAIN (cs)[i], |
| 2646 printcharfun, escapeflag); | |
| 2647 } | |
| 2648 { | |
| 2649 Lisp_Object cac = XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (cs); | |
| 2650 if (!NILP (cac)) | |
| 2651 { | |
| 2652 if (i > 0) | |
| 826 | 2653 write_c_string (printcharfun, " "); |
| 2654 write_c_string (printcharfun, "canonicalize-after-coding="); | |
| 771 | 2655 print_coding_system_in_print_method (cac, printcharfun, escapeflag); |
| 2656 } | |
| 2657 } | |
| 2658 | |
| 826 | 2659 write_c_string (printcharfun, ")"); |
| 771 | 2660 } |
| 2661 | |
| 2662 static void | |
| 2663 chain_rewind_coding_stream_1 (struct chain_coding_stream *data) | |
| 2664 { | |
| 2665 /* Each will rewind the next; there is always at least one stream (the | |
| 2666 dynarr stream at the end) if we're initted */ | |
| 2667 if (data->initted) | |
| 2668 Lstream_rewind (XLSTREAM (data->lstreams[0])); | |
| 2669 } | |
| 2670 | |
| 2671 static void | |
| 2672 chain_rewind_coding_stream (struct coding_stream *str) | |
| 2673 { | |
| 2674 chain_rewind_coding_stream_1 (CODING_STREAM_TYPE_DATA (str, chain)); | |
| 2675 } | |
| 2676 | |
| 2677 static void | |
| 2678 chain_init_coding_streams_1 (struct chain_coding_stream *data, | |
| 2679 unsigned_char_dynarr *dst, | |
| 2680 int ncodesys, Lisp_Object *codesys, | |
| 2681 enum encode_decode direction) | |
| 2682 { | |
| 2683 int i; | |
| 2684 Lisp_Object lstream_out; | |
| 2685 | |
| 2686 data->lstream_count = ncodesys + 1; | |
| 2687 data->lstreams = xnew_array (Lisp_Object, data->lstream_count); | |
| 2688 | |
| 2689 lstream_out = make_dynarr_output_stream (dst); | |
| 2690 Lstream_set_buffering (XLSTREAM (lstream_out), LSTREAM_UNBUFFERED, 0); | |
| 2691 data->lstreams[data->lstream_count - 1] = lstream_out; | |
| 2692 | |
| 2693 for (i = ncodesys - 1; i >= 0; i--) | |
| 2694 { | |
| 2695 data->lstreams[i] = | |
| 2696 make_coding_output_stream | |
| 2697 (XLSTREAM (lstream_out), | |
| 2698 codesys[direction == CODING_ENCODE ? ncodesys - (i + 1) : i], | |
| 800 | 2699 direction, 0); |
| 771 | 2700 lstream_out = data->lstreams[i]; |
| 2701 Lstream_set_buffering (XLSTREAM (lstream_out), LSTREAM_UNBUFFERED, | |
| 2702 0); | |
| 2703 } | |
| 2704 data->initted = 1; | |
| 2705 } | |
| 2706 | |
| 2707 static Bytecount | |
| 2708 chain_convert (struct coding_stream *str, const UExtbyte *src, | |
| 2709 unsigned_char_dynarr *dst, Bytecount n) | |
| 2710 { | |
| 2711 struct chain_coding_stream *data = CODING_STREAM_TYPE_DATA (str, chain); | |
| 2712 | |
| 2713 if (str->eof) | |
| 2714 { | |
| 2715 /* Each will close the next; there is always at least one stream (the | |
| 2716 dynarr stream at the end) if we're initted. We need to close now | |
| 2717 because more data may be generated. */ | |
| 2718 if (data->initted) | |
| 2719 Lstream_close (XLSTREAM (data->lstreams[0])); | |
| 2720 return n; | |
| 2721 } | |
| 2722 | |
| 2723 if (!data->initted) | |
| 2724 chain_init_coding_streams_1 | |
| 2725 (data, dst, XCODING_SYSTEM_CHAIN_COUNT (str->codesys), | |
| 2726 XCODING_SYSTEM_CHAIN_CHAIN (str->codesys), str->direction); | |
| 2727 | |
| 2728 if (Lstream_write (XLSTREAM (data->lstreams[0]), src, n) < 0) | |
| 2729 return -1; | |
| 2730 return n; | |
| 2731 } | |
| 2732 | |
| 2733 static void | |
| 2734 chain_finalize_coding_stream_1 (struct chain_coding_stream *data) | |
| 2735 { | |
| 2736 if (data->lstreams) | |
| 2737 { | |
| 2297 | 2738 /* During GC, these objects are unmarked, and are about to be freed. |
| 2739 We do NOT want them on the free list, and that will cause lots of | |
| 2740 nastiness including crashes. Just let them be freed normally. */ | |
| 771 | 2741 if (!gc_in_progress) |
| 2742 { | |
| 2743 int i; | |
| 2297 | 2744 /* Order of deletion is important here! Delete from the head of |
| 2745 the chain and work your way towards the tail. In general, | |
| 2746 when you delete an object, there should be *NO* pointers to it | |
| 2747 anywhere. Deleting back-to-front would be a problem because | |
| 2748 there are pointers going forward. If there were pointers in | |
| 2749 both directions, you'd have to disconnect the pointers to a | |
| 2750 particular object before deleting it. */ | |
| 771 | 2751 for (i = 0; i < data->lstream_count; i++) |
| 2752 Lstream_delete (XLSTREAM ((data->lstreams)[i])); | |
| 2753 } | |
| 1726 | 2754 xfree (data->lstreams, Lisp_Object *); |
| 771 | 2755 } |
| 2756 } | |
| 2757 | |
| 2758 static void | |
| 2759 chain_finalize_coding_stream (struct coding_stream *str) | |
| 2760 { | |
| 2761 chain_finalize_coding_stream_1 (CODING_STREAM_TYPE_DATA (str, chain)); | |
| 2762 } | |
| 2763 | |
| 2764 static void | |
| 2765 chain_finalize (Lisp_Object c) | |
| 2766 { | |
| 2767 if (XCODING_SYSTEM_CHAIN_CHAIN (c)) | |
| 1726 | 2768 xfree (XCODING_SYSTEM_CHAIN_CHAIN (c), Lisp_Object *); |
| 771 | 2769 } |
| 2770 | |
| 428 | 2771 static int |
| 771 | 2772 chain_putprop (Lisp_Object codesys, Lisp_Object key, Lisp_Object value) |
| 2773 { | |
| 2774 if (EQ (key, Qchain)) | |
| 2775 { | |
| 2776 Lisp_Object *cslist; | |
| 2777 int count = 0; | |
| 2778 int i; | |
| 2779 | |
| 2367 | 2780 { |
| 2781 EXTERNAL_LIST_LOOP_2 (elt, value) | |
| 2782 { | |
| 2783 Fget_coding_system (elt); | |
| 2784 count++; | |
| 2785 } | |
| 2786 } | |
| 771 | 2787 |
| 2788 cslist = xnew_array (Lisp_Object, count); | |
| 2789 XCODING_SYSTEM_CHAIN_CHAIN (codesys) = cslist; | |
| 2790 | |
| 2791 count = 0; | |
| 2367 | 2792 { |
| 2793 EXTERNAL_LIST_LOOP_2 (elt, value) | |
| 2794 { | |
| 2795 cslist[count] = Fget_coding_system (elt); | |
| 2796 count++; | |
| 2797 } | |
| 2798 } | |
| 771 | 2799 |
| 2800 XCODING_SYSTEM_CHAIN_COUNT (codesys) = count; | |
| 2801 | |
| 2802 for (i = 0; i < count - 1; i++) | |
| 2803 { | |
| 2804 if (decoding_source_sink_type_is_char (cslist[i], CODING_SINK) != | |
| 2805 decoding_source_sink_type_is_char (cslist[i + 1], CODING_SOURCE)) | |
| 2806 invalid_argument_2 ("Sink of first must match source of second", | |
| 2807 cslist[i], cslist[i + 1]); | |
| 2808 } | |
| 2809 } | |
| 2810 else if (EQ (key, Qcanonicalize_after_coding)) | |
| 2811 XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (codesys) = | |
| 2812 Fget_coding_system (value); | |
| 2813 else | |
| 2814 return 0; | |
| 2815 return 1; | |
| 2816 } | |
| 2817 | |
| 2818 static Lisp_Object | |
| 2819 chain_getprop (Lisp_Object coding_system, Lisp_Object prop) | |
| 2820 { | |
| 2821 if (EQ (prop, Qchain)) | |
| 2822 { | |
| 2823 Lisp_Object result = Qnil; | |
| 2824 int i; | |
| 2825 | |
| 2826 for (i = 0; i < XCODING_SYSTEM_CHAIN_COUNT (coding_system); i++) | |
| 2827 result = Fcons (XCODING_SYSTEM_CHAIN_CHAIN (coding_system)[i], | |
| 2828 result); | |
| 2829 | |
| 2830 return Fnreverse (result); | |
| 2831 } | |
| 2832 else if (EQ (prop, Qcanonicalize_after_coding)) | |
| 2833 return XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (coding_system); | |
| 2834 else | |
| 2835 return Qunbound; | |
| 2836 } | |
| 2837 | |
| 2838 static enum source_sink_type | |
| 2839 chain_conversion_end_type (Lisp_Object codesys) | |
| 2840 { | |
| 2841 Lisp_Object *cslist = XCODING_SYSTEM_CHAIN_CHAIN (codesys); | |
| 2842 int n = XCODING_SYSTEM_CHAIN_COUNT (codesys); | |
| 2843 int charp_source, charp_sink; | |
| 2844 | |
| 2845 if (n == 0) | |
| 2846 return DECODES_BYTE_TO_BYTE; /* arbitrary */ | |
| 2847 charp_source = decoding_source_sink_type_is_char (cslist[0], CODING_SOURCE); | |
| 2848 charp_sink = decoding_source_sink_type_is_char (cslist[n - 1], CODING_SINK); | |
| 2849 | |
| 2850 switch (charp_source * 2 + charp_sink) | |
| 2851 { | |
| 2852 case 0: return DECODES_BYTE_TO_BYTE; | |
| 2853 case 1: return DECODES_BYTE_TO_CHARACTER; | |
| 2854 case 2: return DECODES_CHARACTER_TO_BYTE; | |
| 2855 case 3: return DECODES_CHARACTER_TO_CHARACTER; | |
| 2856 } | |
| 2857 | |
| 2500 | 2858 ABORT (); |
| 771 | 2859 return DECODES_BYTE_TO_BYTE; |
| 2860 } | |
| 2861 | |
| 2862 | |
| 2863 /************************************************************************/ | |
| 2864 /* No-conversion methods */ | |
| 2865 /************************************************************************/ | |
| 2866 | |
| 2867 /* "No conversion"; used for binary files. We use quotes because there | |
| 2868 really is some conversion being applied (it does byte<->char | |
| 2869 conversion), but it appears to the user as if the text is read in | |
| 2297 | 2870 without conversion. |
| 2871 | |
| 2872 #### Shouldn't we _call_ it that, then? And while we're at it, | |
| 2873 separate it into "to_internal" and "to_external"? */ | |
| 771 | 2874 DEFINE_CODING_SYSTEM_TYPE (no_conversion); |
| 2875 | |
| 2876 /* This is used when reading in "binary" files -- i.e. files that may | |
| 2877 contain all 256 possible byte values and that are not to be | |
| 2878 interpreted as being in any particular encoding. */ | |
| 2879 static Bytecount | |
| 2880 no_conversion_convert (struct coding_stream *str, | |
| 2881 const UExtbyte *src, | |
| 2882 unsigned_char_dynarr *dst, Bytecount n) | |
| 2883 { | |
| 2884 UExtbyte c; | |
| 2885 unsigned int ch = str->ch; | |
| 2886 Bytecount orign = n; | |
| 2887 | |
| 2888 if (str->direction == CODING_DECODE) | |
| 2889 { | |
| 2890 while (n--) | |
| 2891 { | |
| 2892 c = *src++; | |
| 2893 | |
| 2894 DECODE_ADD_BINARY_CHAR (c, dst); | |
| 2895 } | |
| 2896 | |
| 2897 if (str->eof) | |
| 2898 DECODE_OUTPUT_PARTIAL_CHAR (ch, dst); | |
| 2899 } | |
| 2900 else | |
| 2901 { | |
| 2902 | |
| 2903 while (n--) | |
| 2904 { | |
| 2905 c = *src++; | |
| 826 | 2906 if (byte_ascii_p (c)) |
| 771 | 2907 { |
| 2908 assert (ch == 0); | |
| 2909 Dynarr_add (dst, c); | |
| 2910 } | |
| 2911 #ifdef MULE | |
| 867 | 2912 else if (ibyte_leading_byte_p (c)) |
| 771 | 2913 { |
| 2914 assert (ch == 0); | |
| 2915 if (c == LEADING_BYTE_LATIN_ISO8859_1 || | |
| 2916 c == LEADING_BYTE_CONTROL_1) | |
| 2917 ch = c; | |
| 2918 else | |
| 2297 | 2919 /* #### This is just plain unacceptable. */ |
| 771 | 2920 Dynarr_add (dst, '~'); /* untranslatable character */ |
| 2921 } | |
| 2922 else | |
| 2923 { | |
| 2924 if (ch == LEADING_BYTE_LATIN_ISO8859_1) | |
| 2925 Dynarr_add (dst, c); | |
| 2926 else if (ch == LEADING_BYTE_CONTROL_1) | |
| 2927 { | |
| 2928 assert (c < 0xC0); | |
| 2929 Dynarr_add (dst, c - 0x20); | |
| 2930 } | |
| 2931 /* else it should be the second or third byte of an | |
| 2932 untranslatable character, so ignore it */ | |
| 2933 ch = 0; | |
| 2934 } | |
| 2935 #endif /* MULE */ | |
| 2936 | |
| 2937 } | |
| 2938 } | |
| 2939 | |
| 2940 str->ch = ch; | |
| 2941 return orign; | |
| 2942 } | |
| 2943 | |
| 2944 DEFINE_DETECTOR (no_conversion); | |
| 2945 DEFINE_DETECTOR_CATEGORY (no_conversion, no_conversion); | |
| 2946 | |
| 2947 struct no_conversion_detector | |
| 2948 { | |
| 2949 int dummy; | |
| 2950 }; | |
| 2951 | |
| 2952 static void | |
| 2286 | 2953 no_conversion_detect (struct detection_state *st, const UExtbyte *UNUSED (src), |
| 2954 Bytecount UNUSED (n)) | |
| 771 | 2955 { |
| 2956 /* Hack until we get better handling of this stuff! */ | |
| 2957 DET_RESULT (st, no_conversion) = DET_SLIGHTLY_LIKELY; | |
| 2958 } | |
| 2959 | |
| 2960 | |
| 2961 /************************************************************************/ | |
| 2962 /* Convert-eol methods */ | |
| 2963 /************************************************************************/ | |
| 2964 | |
| 2965 /* This is used to handle end-of-line (EOL) differences. It is | |
| 2819 | 2966 character-to-character, and works (when encoding) *BEFORE* sending data to |
| 2967 the main encoding routine -- thus, that routine must handle different EOL | |
| 2968 types itself if it does line-oriented type processing. This is unavoidable | |
| 2969 because we don't know whether the output of the main encoding routine is | |
| 2970 ASCII compatible (UTF-16 is definitely not, for example). [[ sjt sez this | |
| 2971 is bogus. There should be _no_ EOL processing (or processing of any kind) | |
| 2972 after conversion to external. ]] | |
| 771 | 2973 |
| 793 | 2974 There is one parameter: `subtype', either `cr', `lf', `crlf', or nil. |
| 771 | 2975 */ |
| 2976 | |
| 2977 struct convert_eol_coding_system | |
| 2978 { | |
| 2979 enum eol_type subtype; | |
| 2132 | 2980 int dummy; /* On some architectures (eg ia64) the portable dumper can |
| 2981 produce unaligned access errors without this field. Probably | |
| 2982 because the combined structure of this structure and | |
| 2983 Lisp_Coding_System is not properly aligned. */ | |
| 771 | 2984 }; |
| 2985 | |
| 2986 #define CODING_SYSTEM_CONVERT_EOL_SUBTYPE(codesys) \ | |
| 2987 (CODING_SYSTEM_TYPE_DATA (codesys, convert_eol)->subtype) | |
| 2988 #define XCODING_SYSTEM_CONVERT_EOL_SUBTYPE(codesys) \ | |
| 2989 (XCODING_SYSTEM_TYPE_DATA (codesys, convert_eol)->subtype) | |
| 2990 | |
| 2991 struct convert_eol_coding_stream | |
| 2992 { | |
| 2993 enum eol_type actual; | |
| 2994 }; | |
| 2995 | |
| 1204 | 2996 static const struct memory_description |
| 771 | 2997 convert_eol_coding_system_description[] = { |
| 2998 { XD_END } | |
| 2999 }; | |
| 3000 | |
| 1204 | 3001 DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (convert_eol); |
| 3002 | |
| 771 | 3003 static void |
| 2286 | 3004 convert_eol_print (Lisp_Object cs, Lisp_Object printcharfun, |
| 3005 int UNUSED (escapeflag)) | |
| 771 | 3006 { |
| 3007 struct convert_eol_coding_system *data = | |
| 3008 XCODING_SYSTEM_TYPE_DATA (cs, convert_eol); | |
| 3009 | |
| 3010 write_fmt_string (printcharfun, "(%s)", | |
| 3011 data->subtype == EOL_LF ? "lf" : | |
| 3012 data->subtype == EOL_CRLF ? "crlf" : | |
| 3013 data->subtype == EOL_CR ? "cr" : | |
| 793 | 3014 data->subtype == EOL_AUTODETECT ? "nil" : |
| 2500 | 3015 (ABORT(), "")); |
| 771 | 3016 } |
| 3017 | |
| 3018 static enum source_sink_type | |
| 2286 | 3019 convert_eol_conversion_end_type (Lisp_Object UNUSED (codesys)) |
| 771 | 3020 { |
| 3021 return DECODES_CHARACTER_TO_CHARACTER; | |
| 3022 } | |
| 3023 | |
| 3024 static int | |
| 3025 convert_eol_putprop (Lisp_Object codesys, | |
| 3026 Lisp_Object key, | |
| 3027 Lisp_Object value) | |
| 3028 { | |
| 3029 struct convert_eol_coding_system *data = | |
| 3030 XCODING_SYSTEM_TYPE_DATA (codesys, convert_eol); | |
| 3031 | |
| 3032 if (EQ (key, Qsubtype)) | |
| 3033 { | |
| 3034 if (EQ (value, Qlf) /* || EQ (value, Qunix) */) | |
| 3035 data->subtype = EOL_LF; | |
| 3036 else if (EQ (value, Qcrlf) /* || EQ (value, Qdos) */) | |
| 3037 data->subtype = EOL_CRLF; | |
| 3038 else if (EQ (value, Qcr) /* || EQ (value, Qmac) */) | |
| 3039 data->subtype = EOL_CR; | |
| 793 | 3040 else if (EQ (value, Qnil)) |
| 771 | 3041 data->subtype = EOL_AUTODETECT; |
| 3042 else invalid_constant ("Unrecognized eol type", value); | |
| 3043 } | |
| 3044 else | |
| 3045 return 0; | |
| 3046 return 1; | |
| 3047 } | |
| 3048 | |
| 3049 static Lisp_Object | |
| 3050 convert_eol_getprop (Lisp_Object coding_system, Lisp_Object prop) | |
| 3051 { | |
| 3052 struct convert_eol_coding_system *data = | |
| 3053 XCODING_SYSTEM_TYPE_DATA (coding_system, convert_eol); | |
| 3054 | |
| 3055 if (EQ (prop, Qsubtype)) | |
| 3056 { | |
| 3057 switch (data->subtype) | |
| 3058 { | |
| 3059 case EOL_LF: return Qlf; | |
| 3060 case EOL_CRLF: return Qcrlf; | |
| 3061 case EOL_CR: return Qcr; | |
| 793 | 3062 case EOL_AUTODETECT: return Qnil; |
| 2500 | 3063 default: ABORT (); |
| 771 | 3064 } |
| 3065 } | |
| 3066 | |
| 3067 return Qunbound; | |
| 3068 } | |
| 3069 | |
| 3070 static void | |
| 3071 convert_eol_init_coding_stream (struct coding_stream *str) | |
| 3072 { | |
| 3073 struct convert_eol_coding_stream *data = | |
| 3074 CODING_STREAM_TYPE_DATA (str, convert_eol); | |
| 3075 data->actual = XCODING_SYSTEM_CONVERT_EOL_SUBTYPE (str->codesys); | |
| 3076 } | |
| 3077 | |
| 3078 static Bytecount | |
| 867 | 3079 convert_eol_convert (struct coding_stream *str, const Ibyte *src, |
| 771 | 3080 unsigned_char_dynarr *dst, Bytecount n) |
| 3081 { | |
| 3082 if (str->direction == CODING_DECODE) | |
| 3083 { | |
| 3084 struct convert_eol_coding_stream *data = | |
| 3085 CODING_STREAM_TYPE_DATA (str, convert_eol); | |
| 3086 | |
| 3087 if (data->actual == EOL_AUTODETECT) | |
| 3088 { | |
| 3089 Bytecount n2 = n; | |
| 867 | 3090 const Ibyte *src2 = src; |
| 771 | 3091 |
| 3092 for (; n2; n2--) | |
| 3093 { | |
| 867 | 3094 Ibyte c = *src2++; |
| 771 | 3095 if (c == '\n') |
| 3096 { | |
| 3097 data->actual = EOL_LF; | |
| 3098 break; | |
| 3099 } | |
| 3100 else if (c == '\r') | |
| 3101 { | |
| 3102 if (n2 == 1) | |
| 3103 { | |
| 3104 /* If we're seeing a '\r' at the end of the data, then | |
| 3105 reject the '\r' right now so it doesn't become an | |
| 3106 issue in the code below -- unless we're at the end of | |
| 3107 the stream, in which case we can't do that (because | |
| 3108 then the '\r' will never get written out), and in any | |
| 3109 case we should be recognizing it at EOL_CR format. */ | |
| 3110 if (str->eof) | |
| 3111 data->actual = EOL_CR; | |
| 3112 else | |
| 3113 n--; | |
| 3114 break; | |
| 3115 } | |
| 3116 else if (*src2 == '\n') | |
| 3117 data->actual = EOL_CRLF; | |
| 3118 else | |
| 3119 data->actual = EOL_CR; | |
| 3120 break; | |
| 3121 } | |
| 3122 } | |
| 3123 } | |
| 3124 | |
| 3125 /* str->eof is set, the caller reached EOF on the other end and has | |
| 3126 no new data to give us. The only data we get is the data we | |
| 3127 rejected from last time. */ | |
| 3128 if (data->actual == EOL_LF || data->actual == EOL_AUTODETECT || | |
| 3129 (str->eof)) | |
| 3130 Dynarr_add_many (dst, src, n); | |
| 3131 else | |
| 3132 { | |
| 867 | 3133 const Ibyte *end = src + n; |
| 771 | 3134 while (1) |
| 3135 { | |
| 3136 /* Find the next section with no \r and add it. */ | |
| 867 | 3137 const Ibyte *runstart = src; |
| 3138 src = (Ibyte *) memchr (src, '\r', end - src); | |
| 771 | 3139 if (!src) |
| 3140 src = end; | |
| 3141 Dynarr_add_many (dst, runstart, src - runstart); | |
| 3142 /* Stop if at end ... */ | |
| 3143 if (src == end) | |
| 3144 break; | |
| 3145 /* ... else, translate as necessary. */ | |
| 3146 src++; | |
| 3147 if (data->actual == EOL_CR) | |
| 3148 Dynarr_add (dst, '\n'); | |
| 3149 /* We need to be careful here with CRLF. If we see a CR at the | |
| 3150 end of the data, we don't know if it's part of a CRLF, so we | |
| 3151 reject it. Otherwise: If it's part of a CRLF, eat it and | |
| 3152 loop; the following LF gets added next time around. If it's | |
| 3153 not part of a CRLF, add the CR and loop. The following | |
| 3154 character will be processed in the next loop iteration. This | |
| 3155 correctly handles a sequence like CR+CR+LF. */ | |
| 3156 else if (src == end) | |
| 3157 return n - 1; /* reject the CR at the end; we'll get it again | |
| 3158 next time the convert method is called */ | |
| 3159 else if (*src != '\n') | |
| 3160 Dynarr_add (dst, '\r'); | |
| 3161 } | |
| 3162 } | |
| 3163 | |
| 3164 return n; | |
| 3165 } | |
| 3166 else | |
| 3167 { | |
| 3168 enum eol_type subtype = | |
| 3169 XCODING_SYSTEM_CONVERT_EOL_SUBTYPE (str->codesys); | |
| 867 | 3170 const Ibyte *end = src + n; |
| 771 | 3171 |
| 3172 /* We try to be relatively efficient here. */ | |
| 3173 if (subtype == EOL_LF) | |
| 3174 Dynarr_add_many (dst, src, n); | |
| 3175 else | |
| 3176 { | |
| 3177 while (1) | |
| 3178 { | |
| 3179 /* Find the next section with no \n and add it. */ | |
| 867 | 3180 const Ibyte *runstart = src; |
| 3181 src = (Ibyte *) memchr (src, '\n', end - src); | |
| 771 | 3182 if (!src) |
| 3183 src = end; | |
| 3184 Dynarr_add_many (dst, runstart, src - runstart); | |
| 3185 /* Stop if at end ... */ | |
| 3186 if (src == end) | |
| 3187 break; | |
| 3188 /* ... else, skip over \n and add its translation. */ | |
| 3189 src++; | |
| 3190 Dynarr_add (dst, '\r'); | |
| 3191 if (subtype == EOL_CRLF) | |
| 3192 Dynarr_add (dst, '\n'); | |
| 3193 } | |
| 3194 } | |
| 3195 | |
| 3196 return n; | |
| 3197 } | |
| 3198 } | |
| 3199 | |
| 3200 static Lisp_Object | |
| 3201 convert_eol_canonicalize_after_coding (struct coding_stream *str) | |
| 3202 { | |
| 3203 struct convert_eol_coding_stream *data = | |
| 3204 CODING_STREAM_TYPE_DATA (str, convert_eol); | |
| 3205 | |
| 3206 if (str->direction == CODING_ENCODE) | |
| 3207 return str->codesys; | |
| 3208 | |
| 3209 switch (data->actual) | |
| 3210 { | |
| 3211 case EOL_LF: return Fget_coding_system (Qconvert_eol_lf); | |
| 3212 case EOL_CRLF: return Fget_coding_system (Qconvert_eol_crlf); | |
| 3213 case EOL_CR: return Fget_coding_system (Qconvert_eol_cr); | |
| 3214 case EOL_AUTODETECT: return str->codesys; | |
| 2500 | 3215 default: ABORT (); return Qnil; |
| 771 | 3216 } |
| 3217 } | |
| 3218 | |
| 3219 | |
| 3220 /************************************************************************/ | |
| 3221 /* Undecided methods */ | |
| 3222 /************************************************************************/ | |
| 3223 | |
| 3224 /* Do autodetection. We can autodetect the EOL type only, the coding | |
| 3225 system only, or both. We only do autodetection when decoding; when | |
| 3226 encoding, we just pass the data through. | |
| 3227 | |
| 3228 When doing just EOL detection, a coding system can be specified; if so, | |
| 3229 we will decode this data through the coding system before doing EOL | |
| 3230 detection. The reason for specifying this is so that | |
| 3231 canonicalize-after-coding works: We will canonicalize the specified | |
| 3232 coding system into the appropriate EOL type. When doing both coding and | |
| 3233 EOL detection, we do similar canonicalization, and also catch situations | |
| 3234 where the EOL type is overspecified, i.e. the detected coding system | |
| 3235 specifies an EOL type, and either switch to the equivalent | |
| 3236 non-EOL-processing coding system (if possible), or terminate EOL | |
| 3237 detection and use the specified EOL type. This prevents data from being | |
| 3238 EOL-processed twice. | |
| 3239 */ | |
| 3240 | |
| 3241 struct undecided_coding_system | |
| 3242 { | |
| 3243 int do_eol, do_coding; | |
| 3244 Lisp_Object cs; | |
| 3245 }; | |
| 3246 | |
| 3247 struct undecided_coding_stream | |
| 3248 { | |
| 3249 Lisp_Object actual; | |
| 3250 /* Either 2 or 3 lstreams here; see undecided_convert */ | |
| 3251 struct chain_coding_stream c; | |
| 3252 | |
| 3253 struct detection_state *st; | |
| 3254 }; | |
| 3255 | |
| 1204 | 3256 static const struct memory_description undecided_coding_system_description[] = { |
| 3257 { XD_LISP_OBJECT, offsetof (struct undecided_coding_system, cs) }, | |
| 771 | 3258 { XD_END } |
| 3259 }; | |
| 3260 | |
| 1204 | 3261 static const struct memory_description undecided_coding_stream_description_1 [] = { |
| 3262 { XD_LISP_OBJECT, offsetof (struct undecided_coding_stream, actual) }, | |
| 2367 | 3263 { XD_BLOCK_ARRAY, offsetof (struct undecided_coding_stream, c), |
| 2551 | 3264 1, { &chain_coding_stream_description } }, |
| 1204 | 3265 { XD_END } |
| 3266 }; | |
| 3267 | |
| 3268 const struct sized_memory_description undecided_coding_stream_description = { | |
| 3269 sizeof (struct undecided_coding_stream), undecided_coding_stream_description_1 | |
| 3270 }; | |
| 3271 | |
| 3272 DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (undecided); | |
| 3273 | |
| 771 | 3274 static void |
| 3275 undecided_init (Lisp_Object codesys) | |
| 3276 { | |
| 3277 struct undecided_coding_system *data = | |
| 3278 XCODING_SYSTEM_TYPE_DATA (codesys, undecided); | |
| 3279 | |
| 3280 data->cs = Qnil; | |
| 3281 } | |
| 3282 | |
| 3283 static void | |
| 3284 undecided_mark (Lisp_Object codesys) | |
| 3285 { | |
| 3286 struct undecided_coding_system *data = | |
| 3287 XCODING_SYSTEM_TYPE_DATA (codesys, undecided); | |
| 3288 | |
| 3289 mark_object (data->cs); | |
| 3290 } | |
| 3291 | |
| 3292 static void | |
| 3293 undecided_print (Lisp_Object cs, Lisp_Object printcharfun, int escapeflag) | |
| 3294 { | |
| 3295 struct undecided_coding_system *data = | |
| 3296 XCODING_SYSTEM_TYPE_DATA (cs, undecided); | |
| 3297 int need_space = 0; | |
| 3298 | |
| 826 | 3299 write_c_string (printcharfun, "("); |
| 771 | 3300 if (data->do_eol) |
| 3301 { | |
| 826 | 3302 write_c_string (printcharfun, "do-eol"); |
| 771 | 3303 need_space = 1; |
| 3304 } | |
| 3305 if (data->do_coding) | |
| 3306 { | |
| 3307 if (need_space) | |
| 826 | 3308 write_c_string (printcharfun, " "); |
| 3309 write_c_string (printcharfun, "do-coding"); | |
| 771 | 3310 need_space = 1; |
| 3311 } | |
| 3312 if (!NILP (data->cs)) | |
| 3313 { | |
| 3314 if (need_space) | |
| 826 | 3315 write_c_string (printcharfun, " "); |
| 3316 write_c_string (printcharfun, "coding-system="); | |
| 771 | 3317 print_coding_system_in_print_method (data->cs, printcharfun, escapeflag); |
| 3318 } | |
| 826 | 3319 write_c_string (printcharfun, ")"); |
| 771 | 3320 } |
| 3321 | |
| 3322 static void | |
| 3323 undecided_mark_coding_stream (struct coding_stream *str) | |
| 3324 { | |
| 1204 | 3325 mark_object (CODING_STREAM_TYPE_DATA (str, undecided)->actual); |
| 771 | 3326 chain_mark_coding_stream_1 (&CODING_STREAM_TYPE_DATA (str, undecided)->c); |
| 3327 } | |
| 3328 | |
| 3329 static int | |
| 3330 undecided_putprop (Lisp_Object codesys, Lisp_Object key, Lisp_Object value) | |
| 3331 { | |
| 3332 struct undecided_coding_system *data = | |
| 3333 XCODING_SYSTEM_TYPE_DATA (codesys, undecided); | |
| 3334 | |
| 3335 if (EQ (key, Qdo_eol)) | |
| 3336 data->do_eol = 1; | |
| 3337 else if (EQ (key, Qdo_coding)) | |
| 3338 data->do_coding = 1; | |
| 3339 else if (EQ (key, Qcoding_system)) | |
| 3340 data->cs = get_coding_system_for_text_file (value, 0); | |
| 3341 else | |
| 3342 return 0; | |
| 3343 return 1; | |
| 3344 } | |
| 3345 | |
| 3346 static Lisp_Object | |
| 3347 undecided_getprop (Lisp_Object codesys, Lisp_Object prop) | |
| 3348 { | |
| 3349 struct undecided_coding_system *data = | |
| 3350 XCODING_SYSTEM_TYPE_DATA (codesys, undecided); | |
| 3351 | |
| 3352 if (EQ (prop, Qdo_eol)) | |
| 3353 return data->do_eol ? Qt : Qnil; | |
| 3354 if (EQ (prop, Qdo_coding)) | |
| 3355 return data->do_coding ? Qt : Qnil; | |
| 3356 if (EQ (prop, Qcoding_system)) | |
| 3357 return data->cs; | |
| 3358 return Qunbound; | |
| 3359 } | |
| 3360 | |
| 3361 static struct detection_state * | |
| 3362 allocate_detection_state (void) | |
| 3363 { | |
| 3364 int i; | |
| 3365 Bytecount size = MAX_ALIGN_SIZE (sizeof (struct detection_state)); | |
| 3366 struct detection_state *block; | |
| 3367 | |
| 3368 for (i = 0; i < coding_detector_count; i++) | |
| 3369 size += MAX_ALIGN_SIZE (Dynarr_at (all_coding_detectors, i).data_size); | |
| 3370 | |
| 3371 block = (struct detection_state *) xmalloc_and_zero (size); | |
| 3372 | |
| 3373 size = MAX_ALIGN_SIZE (sizeof (struct detection_state)); | |
| 3374 for (i = 0; i < coding_detector_count; i++) | |
| 3375 { | |
| 3376 block->data_offset[i] = size; | |
| 3377 size += MAX_ALIGN_SIZE (Dynarr_at (all_coding_detectors, i).data_size); | |
| 3378 } | |
| 3379 | |
| 3380 return block; | |
| 3381 } | |
| 3382 | |
| 3383 static void | |
| 3384 free_detection_state (struct detection_state *st) | |
| 3385 { | |
| 3386 int i; | |
| 3387 | |
| 3388 for (i = 0; i < coding_detector_count; i++) | |
| 3389 { | |
| 3390 if (Dynarr_at (all_coding_detectors, i).finalize_detection_state_method) | |
| 3391 Dynarr_at (all_coding_detectors, i).finalize_detection_state_method | |
| 3392 (st); | |
| 3393 } | |
| 3394 | |
| 1726 | 3395 xfree (st, struct detection_state *); |
| 771 | 3396 } |
| 3397 | |
| 3398 static int | |
| 3399 coding_category_symbol_to_id (Lisp_Object symbol) | |
| 428 | 3400 { |
| 3401 int i; | |
| 3402 | |
| 3403 CHECK_SYMBOL (symbol); | |
| 771 | 3404 for (i = 0; i < coding_detector_count; i++) |
| 3405 { | |
| 3406 detector_category_dynarr *cats = | |
| 3407 Dynarr_at (all_coding_detectors, i).cats; | |
| 3408 int j; | |
| 3409 | |
| 3410 for (j = 0; j < Dynarr_length (cats); j++) | |
| 3411 if (EQ (Dynarr_at (cats, j).sym, symbol)) | |
| 3412 return Dynarr_at (cats, j).id; | |
| 3413 } | |
| 3414 | |
| 563 | 3415 invalid_constant ("Unrecognized coding category", symbol); |
| 1204 | 3416 RETURN_NOT_REACHED (0); |
| 428 | 3417 } |
| 3418 | |
| 771 | 3419 static Lisp_Object |
| 3420 coding_category_id_to_symbol (int id) | |
| 428 | 3421 { |
| 3422 int i; | |
| 771 | 3423 |
| 3424 for (i = 0; i < coding_detector_count; i++) | |
| 3425 { | |
| 3426 detector_category_dynarr *cats = | |
| 3427 Dynarr_at (all_coding_detectors, i).cats; | |
| 3428 int j; | |
| 3429 | |
| 3430 for (j = 0; j < Dynarr_length (cats); j++) | |
| 3431 if (id == Dynarr_at (cats, j).id) | |
| 3432 return Dynarr_at (cats, j).sym; | |
| 3433 } | |
| 3434 | |
| 2500 | 3435 ABORT (); |
| 771 | 3436 return Qnil; /* (usually) not reached */ |
| 428 | 3437 } |
| 3438 | |
| 771 | 3439 static Lisp_Object |
| 3440 detection_result_number_to_symbol (enum detection_result result) | |
| 428 | 3441 { |
| 1494 | 3442 /* let compiler warn if not all enumerators are handled */ |
| 3443 switch (result) { | |
| 3444 #define FROB(sym, num) case num: return (sym) | |
| 771 | 3445 FROB (Qnear_certainty, DET_NEAR_CERTAINTY); |
| 3446 FROB (Qquite_probable, DET_QUITE_PROBABLE); | |
| 3447 FROB (Qsomewhat_likely, DET_SOMEWHAT_LIKELY); | |
| 1494 | 3448 FROB (Qslightly_likely, DET_SLIGHTLY_LIKELY); |
| 771 | 3449 FROB (Qas_likely_as_unlikely, DET_AS_LIKELY_AS_UNLIKELY); |
| 3450 FROB (Qsomewhat_unlikely, DET_SOMEWHAT_UNLIKELY); | |
| 3451 FROB (Qquite_improbable, DET_QUITE_IMPROBABLE); | |
| 3452 FROB (Qnearly_impossible, DET_NEARLY_IMPOSSIBLE); | |
| 3453 #undef FROB | |
| 1494 | 3454 } |
| 771 | 3455 |
| 2500 | 3456 ABORT (); |
| 771 | 3457 return Qnil; /* (usually) not reached */ |
| 3458 } | |
| 3459 | |
| 778 | 3460 #if 0 /* not used */ |
| 771 | 3461 static enum detection_result |
| 3462 detection_result_symbol_to_number (Lisp_Object symbol) | |
| 3463 { | |
| 1494 | 3464 /* using switch here would be bad style, and doesn't help */ |
| 771 | 3465 #define FROB(sym, num) if (EQ (symbol, sym)) return (num) |
| 3466 FROB (Qnear_certainty, DET_NEAR_CERTAINTY); | |
| 3467 FROB (Qquite_probable, DET_QUITE_PROBABLE); | |
| 3468 FROB (Qsomewhat_likely, DET_SOMEWHAT_LIKELY); | |
| 1494 | 3469 FROB (Qslightly_likely, DET_SLIGHTLY_LIKELY); |
| 771 | 3470 FROB (Qas_likely_as_unlikely, DET_AS_LIKELY_AS_UNLIKELY); |
| 3471 FROB (Qsomewhat_unlikely, DET_SOMEWHAT_UNLIKELY); | |
| 3472 FROB (Qquite_improbable, DET_QUITE_IMPROBABLE); | |
| 3473 FROB (Qnearly_impossible, DET_NEARLY_IMPOSSIBLE); | |
| 3474 #undef FROB | |
| 3475 | |
| 3476 invalid_constant ("Unrecognized detection result", symbol); | |
| 3477 return ((enum detection_result) 0); /* not reached */ | |
| 3478 } | |
| 778 | 3479 #endif /* 0 */ |
| 771 | 3480 |
| 3481 /* Set all detection results for a given detector to a specified value. */ | |
| 3482 void | |
| 3483 set_detection_results (struct detection_state *st, int detector, int given) | |
| 3484 { | |
| 3485 detector_category_dynarr *cats = | |
| 3486 Dynarr_at (all_coding_detectors, detector).cats; | |
| 3487 int i; | |
| 3488 | |
| 3489 for (i = 0; i < Dynarr_length (cats); i++) | |
| 3490 st->categories[Dynarr_at (cats, i).id] = given; | |
| 3491 } | |
| 428 | 3492 |
| 3493 static int | |
| 3494 acceptable_control_char_p (int c) | |
| 3495 { | |
| 3496 switch (c) | |
| 3497 { | |
| 3498 /* Allow and ignore control characters that you might | |
| 3499 reasonably see in a text file */ | |
| 3500 case '\r': | |
| 3501 case '\n': | |
| 3502 case '\t': | |
| 3503 case 7: /* bell */ | |
| 3504 case 8: /* backspace */ | |
| 3505 case 11: /* vertical tab */ | |
| 3506 case 12: /* form feed */ | |
| 3507 case 26: /* MS-DOS C-z junk */ | |
| 3508 case 31: /* '^_' -- for info */ | |
| 3509 return 1; | |
| 3510 default: | |
| 3511 return 0; | |
| 3512 } | |
| 3513 } | |
| 3514 | |
| 771 | 3515 #ifdef DEBUG_XEMACS |
| 3516 | |
| 3517 static UExtbyte | |
| 3518 hex_digit_to_char (int digit) | |
| 428 | 3519 { |
| 771 | 3520 if (digit < 10) |
| 3521 return digit + '0'; | |
| 3522 else | |
| 3523 return digit - 10 + 'A'; | |
| 428 | 3524 } |
| 3525 | |
| 771 | 3526 static void |
| 3527 output_bytes_in_ascii_and_hex (const UExtbyte *src, Bytecount n) | |
| 428 | 3528 { |
| 3425 | 3529 Extbyte *ascii = alloca_array (Extbyte, n + 1); |
| 3530 Extbyte *hex = alloca_array (Extbyte, 3 * n + 1); | |
| 771 | 3531 int i; |
| 3413 | 3532 DECLARE_EISTRING (eistr_ascii); |
| 3533 DECLARE_EISTRING (eistr_hex); | |
| 771 | 3534 |
| 3535 for (i = 0; i < n; i++) | |
| 428 | 3536 { |
| 3425 | 3537 Extbyte c = src[i]; |
| 771 | 3538 if (c < 0x20) |
| 3539 ascii[i] = '.'; | |
| 428 | 3540 else |
| 771 | 3541 ascii[i] = c; |
| 3542 hex[3 * i] = hex_digit_to_char (c >> 4); | |
| 3543 hex[3 * i + 1] = hex_digit_to_char (c & 0xF); | |
| 3544 hex[3 * i + 2] = ' '; | |
| 428 | 3545 } |
| 771 | 3546 ascii[i] = '\0'; |
| 3547 hex[3 * i - 1] = '\0'; | |
| 3413 | 3548 |
| 3549 eicpy_ext(eistr_hex, hex, Qbinary); | |
| 3550 eicpy_ext(eistr_ascii, ascii, Qbinary); | |
| 3551 | |
| 3425 | 3552 stderr_out ("%s %s", eidata(eistr_ascii), eidata(eistr_hex)); |
| 428 | 3553 } |
| 3554 | |
| 771 | 3555 #endif /* DEBUG_XEMACS */ |
| 3556 | |
| 3557 /* Attempt to determine the encoding of the given text. Before calling | |
| 3558 this function for the first time, you must zero out the detection state. | |
| 428 | 3559 |
| 3560 Returns: | |
| 3561 | |
| 771 | 3562 0 == keep going |
| 3563 1 == stop | |
| 428 | 3564 */ |
| 3565 | |
| 3566 static int | |
| 771 | 3567 detect_coding_type (struct detection_state *st, const UExtbyte *src, |
| 3568 Bytecount n) | |
| 428 | 3569 { |
| 771 | 3570 Bytecount n2 = n; |
| 3571 const UExtbyte *src2 = src; | |
| 3572 int i; | |
| 3573 | |
| 3574 #ifdef DEBUG_XEMACS | |
| 3575 if (!NILP (Vdebug_coding_detection)) | |
| 3576 { | |
| 3577 int bytes = min (16, n); | |
| 3578 stderr_out ("detect_coding_type: processing %ld bytes\n", n); | |
| 3579 stderr_out ("First %d: ", bytes); | |
| 3580 output_bytes_in_ascii_and_hex (src, bytes); | |
| 3581 stderr_out ("\nLast %d: ", bytes); | |
| 3582 output_bytes_in_ascii_and_hex (src + n - bytes, bytes); | |
| 3583 stderr_out ("\n"); | |
| 3584 } | |
| 3585 #endif /* DEBUG_XEMACS */ | |
| 428 | 3586 if (!st->seen_non_ascii) |
| 3587 { | |
| 771 | 3588 for (; n2; n2--, src2++) |
| 428 | 3589 { |
| 771 | 3590 UExtbyte c = *src2; |
| 428 | 3591 if ((c < 0x20 && !acceptable_control_char_p (c)) || c >= 0x80) |
| 3592 { | |
| 3593 st->seen_non_ascii = 1; | |
| 3594 break; | |
| 3595 } | |
| 3596 } | |
| 3597 } | |
| 3598 | |
| 771 | 3599 for (i = 0; i < coding_detector_count; i++) |
| 3600 Dynarr_at (all_coding_detectors, i).detect_method (st, src, n); | |
| 3601 | |
| 3602 st->bytes_seen += n; | |
| 3603 | |
| 3604 #ifdef DEBUG_XEMACS | |
| 3605 if (!NILP (Vdebug_coding_detection)) | |
| 3606 { | |
| 3607 stderr_out ("seen_non_ascii: %d\n", st->seen_non_ascii); | |
| 1494 | 3608 if (coding_detector_category_count <= 0) |
| 3609 stderr_out ("found %d detector categories\n", | |
| 3610 coding_detector_category_count); | |
| 771 | 3611 for (i = 0; i < coding_detector_category_count; i++) |
| 3612 stderr_out_lisp | |
| 3613 ("%s: %s\n", | |
| 3614 2, | |
| 3615 coding_category_id_to_symbol (i), | |
| 3616 detection_result_number_to_symbol ((enum detection_result) | |
| 3617 st->categories[i])); | |
| 3618 } | |
| 3619 #endif /* DEBUG_XEMACS */ | |
| 3620 | |
| 3621 { | |
| 3622 int not_unlikely = 0; | |
| 3623 int retval; | |
| 3624 | |
| 3625 for (i = 0; i < coding_detector_category_count; i++) | |
| 3626 if (st->categories[i] >= 0) | |
| 3627 not_unlikely++; | |
| 3628 | |
| 3629 retval = (not_unlikely <= 1 | |
| 3630 #if 0 /* this is bogus */ | |
| 3631 || st->bytes_seen >= MAX_BYTES_PROCESSED_FOR_DETECTION | |
| 428 | 3632 #endif |
| 771 | 3633 ); |
| 3634 | |
| 3635 #ifdef DEBUG_XEMACS | |
| 3636 if (!NILP (Vdebug_coding_detection)) | |
| 3637 stderr_out ("detect_coding_type: returning %d (%s)\n", | |
| 3638 retval, retval ? "stop" : "keep going"); | |
| 3639 #endif /* DEBUG_XEMACS */ | |
| 3640 | |
| 3641 return retval; | |
| 428 | 3642 } |
| 3643 } | |
| 3644 | |
| 3645 static Lisp_Object | |
| 771 | 3646 detected_coding_system (struct detection_state *st) |
| 428 | 3647 { |
| 771 | 3648 int i; |
| 3649 int even = 1; | |
| 3650 | |
| 3651 if (st->seen_non_ascii) | |
| 3652 { | |
| 3653 for (i = 0; i < coding_detector_category_count; i++) | |
| 3654 if (st->categories[i] != DET_AS_LIKELY_AS_UNLIKELY) | |
| 3655 { | |
| 3656 even = 0; | |
| 3657 break; | |
| 3658 } | |
| 3659 } | |
| 3660 | |
| 3661 /* #### Here we are ignoring the results of detection when it's all | |
| 3662 ASCII. This is obviously a bad thing. But we need to fix up the | |
| 3663 existing detection methods somewhat before we can switch. */ | |
| 3664 if (even) | |
| 428 | 3665 { |
| 3666 /* If the file was entirely or basically ASCII, use the | |
| 3667 default value of `buffer-file-coding-system'. */ | |
| 3668 Lisp_Object retval = | |
| 3669 XBUFFER (Vbuffer_defaults)->buffer_file_coding_system; | |
| 3670 if (!NILP (retval)) | |
| 3671 { | |
| 771 | 3672 retval = find_coding_system_for_text_file (retval, 0); |
| 428 | 3673 if (NILP (retval)) |
| 3674 { | |
| 3675 warn_when_safe | |
| 3676 (Qbad_variable, Qwarning, | |
| 3677 "Invalid `default-buffer-file-coding-system', set to nil"); | |
| 3678 XBUFFER (Vbuffer_defaults)->buffer_file_coding_system = Qnil; | |
| 3679 } | |
| 3680 } | |
| 3681 if (NILP (retval)) | |
| 4100 | 3682 retval = Fget_coding_system (Qbinary); |
| 428 | 3683 return retval; |
| 3684 } | |
| 3685 else | |
| 3686 { | |
| 771 | 3687 int likelihood; |
| 3688 Lisp_Object retval = Qnil; | |
| 3689 | |
| 3690 /* Look through the coding categories first by likelihood and then by | |
| 3691 priority and find the first one that is allowed. */ | |
| 3692 | |
| 3693 for (likelihood = DET_HIGHEST; likelihood >= DET_LOWEST; likelihood--) | |
| 428 | 3694 { |
| 771 | 3695 for (i = 0; i < coding_detector_category_count; i++) |
| 3696 { | |
| 3697 int cat = coding_category_by_priority[i]; | |
| 3698 if (st->categories[cat] == likelihood && | |
| 3699 !NILP (coding_category_system[cat])) | |
| 3700 { | |
| 3701 retval = (get_coding_system_for_text_file | |
| 3702 (coding_category_system[cat], 0)); | |
| 3703 if (likelihood < DET_AS_LIKELY_AS_UNLIKELY) | |
| 3704 warn_when_safe_lispobj | |
| 3705 (intern ("detection"), | |
| 793 | 3706 Qwarning, |
| 771 | 3707 emacs_sprintf_string_lisp |
| 3708 ( | |
| 3709 "Detected coding %s is unlikely to be correct (likelihood == `%s')", | |
| 3710 Qnil, 2, XCODING_SYSTEM_NAME (retval), | |
| 3711 detection_result_number_to_symbol | |
| 3712 ((enum detection_result) likelihood))); | |
| 3713 return retval; | |
| 3714 } | |
| 3715 } | |
| 428 | 3716 } |
| 771 | 3717 |
| 3718 return Fget_coding_system (Qraw_text); | |
| 428 | 3719 } |
| 3720 } | |
| 3721 | |
| 1347 | 3722 /* Look for a coding system in the string (skipping over leading |
| 3723 blanks). If found, return it, otherwise nil. */ | |
| 3724 | |
| 3725 static Lisp_Object | |
| 2531 | 3726 snarf_coding_system (const UExtbyte *p, Bytecount len) |
| 1347 | 3727 { |
| 3728 Bytecount n; | |
| 2531 | 3729 UExtbyte *name; |
| 1347 | 3730 |
| 3731 while (*p == ' ' || *p == '\t') p++, len--; | |
| 3732 len = min (len, 1000); | |
| 3733 name = alloca_ibytes (len + 1); | |
| 3734 memcpy (name, p, len); | |
| 3735 name[len] = '\0'; | |
| 3736 | |
| 3737 /* Get coding system name */ | |
| 3738 /* Characters valid in a MIME charset name (rfc 1521), | |
| 3739 and in a Lisp symbol name. */ | |
| 3740 n = qxestrspn (name, | |
| 3741 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" | |
| 3742 "abcdefghijklmnopqrstuvwxyz" | |
| 3743 "0123456789" | |
| 3744 "!$%&*+-.^_{|}~"); | |
| 3745 if (n > 0) | |
| 3746 { | |
| 3747 name[n] = '\0'; | |
| 2531 | 3748 /* This call to intern_int() is OK because we already verified that |
| 3749 there are only ASCII characters in the string */ | |
| 3750 return find_coding_system_for_text_file (intern_int ((Ibyte *) name), 0); | |
| 1347 | 3751 } |
| 3752 | |
| 3753 return Qnil; | |
| 3754 } | |
| 3755 | |
| 428 | 3756 /* Given a seekable read stream and potential coding system and EOL type |
| 3757 as specified, do any autodetection that is called for. If the | |
| 3758 coding system and/or EOL type are not `autodetect', they will be left | |
| 3759 alone; but this function will never return an autodetect coding system | |
| 3760 or EOL type. | |
| 3761 | |
| 3762 This function does not automatically fetch subsidiary coding systems; | |
| 3763 that should be unnecessary with the explicit eol-type argument. */ | |
| 3764 | |
| 3765 #define LENGTH(string_constant) (sizeof (string_constant) - 1) | |
| 3766 | |
| 771 | 3767 static Lisp_Object |
| 3768 unwind_free_detection_state (Lisp_Object opaque) | |
| 3769 { | |
| 3770 struct detection_state *st = | |
| 3771 (struct detection_state *) get_opaque_ptr (opaque); | |
| 3772 free_detection_state (st); | |
| 3773 free_opaque_ptr (opaque); | |
| 3774 return Qnil; | |
| 3775 } | |
| 3776 | |
| 1347 | 3777 /* #### This duplicates code in `find-coding-system-magic-cookie-in-file' |
| 3778 in files.el. Look into combining them. */ | |
| 3779 | |
| 771 | 3780 static Lisp_Object |
| 3781 look_for_coding_system_magic_cookie (const UExtbyte *data, Bytecount len) | |
| 428 | 3782 { |
| 771 | 3783 const UExtbyte *p; |
| 3784 const UExtbyte *scan_end; | |
| 2531 | 3785 Bytecount cookie_len; |
| 771 | 3786 |
| 3787 /* Look for initial "-*-"; mode line prefix */ | |
| 3788 for (p = data, | |
| 3789 scan_end = data + len - LENGTH ("-*-coding:?-*-"); | |
| 3790 p <= scan_end | |
| 3791 && *p != '\n' | |
| 3792 && *p != '\r'; | |
| 3793 p++) | |
| 3794 if (*p == '-' && *(p+1) == '*' && *(p+2) == '-') | |
| 3795 { | |
| 3796 const UExtbyte *local_vars_beg = p + 3; | |
| 3797 /* Look for final "-*-"; mode line suffix */ | |
| 3798 for (p = local_vars_beg, | |
| 3799 scan_end = data + len - LENGTH ("-*-"); | |
| 3800 p <= scan_end | |
| 428 | 3801 && *p != '\n' |
| 3802 && *p != '\r'; | |
| 771 | 3803 p++) |
| 3804 if (*p == '-' && *(p+1) == '*' && *(p+2) == '-') | |
| 3805 { | |
| 3806 const UExtbyte *suffix = p; | |
| 3807 /* Look for "coding:" */ | |
| 3808 for (p = local_vars_beg, | |
| 3809 scan_end = suffix - LENGTH ("coding:?"); | |
| 3810 p <= scan_end; | |
| 3811 p++) | |
| 3812 if (memcmp ("coding:", p, LENGTH ("coding:")) == 0 | |
| 3813 && (p == local_vars_beg | |
| 3814 || (*(p-1) == ' ' || | |
| 3815 *(p-1) == '\t' || | |
| 3816 *(p-1) == ';'))) | |
| 3817 { | |
| 3818 p += LENGTH ("coding:"); | |
| 1347 | 3819 return snarf_coding_system (p, suffix - p); |
| 771 | 3820 break; |
| 3821 } | |
| 3822 break; | |
| 3823 } | |
| 3824 break; | |
| 3825 } | |
| 3826 | |
| 2531 | 3827 /* Look for ;;;###coding system */ |
| 3828 | |
| 3829 cookie_len = LENGTH (";;;###coding system: "); | |
| 3830 | |
| 3831 for (p = data, | |
| 3832 scan_end = data + len - cookie_len; | |
| 3833 p <= scan_end; | |
| 3834 p++) | |
| 1347 | 3835 { |
| 2531 | 3836 if (*p == ';' && !memcmp (p, ";;;###coding system: ", cookie_len)) |
| 3837 { | |
| 3838 const UExtbyte *suffix; | |
| 3839 | |
| 3840 p += cookie_len; | |
| 3841 suffix = p; | |
| 3842 while (suffix < scan_end && !isspace (*suffix)) | |
| 3843 suffix++; | |
| 3844 return snarf_coding_system (p, suffix - p); | |
| 3845 } | |
| 1347 | 3846 } |
| 3847 | |
| 3848 return Qnil; | |
| 771 | 3849 } |
| 3850 | |
| 3851 static Lisp_Object | |
| 3852 determine_real_coding_system (Lstream *stream) | |
| 3853 { | |
| 3854 struct detection_state *st = allocate_detection_state (); | |
| 3855 int depth = record_unwind_protect (unwind_free_detection_state, | |
| 3856 make_opaque_ptr (st)); | |
| 3857 UExtbyte buf[4096]; | |
| 3858 Bytecount nread = Lstream_read (stream, buf, sizeof (buf)); | |
| 3859 Lisp_Object coding_system = look_for_coding_system_magic_cookie (buf, nread); | |
| 3860 | |
| 3861 if (NILP (coding_system)) | |
| 3862 { | |
| 3863 while (1) | |
| 3864 { | |
| 3865 if (detect_coding_type (st, buf, nread)) | |
| 428 | 3866 break; |
| 771 | 3867 nread = Lstream_read (stream, buf, sizeof (buf)); |
| 3868 if (nread == 0) | |
| 3869 break; | |
| 428 | 3870 } |
| 771 | 3871 |
| 3872 coding_system = detected_coding_system (st); | |
| 428 | 3873 } |
| 3874 | |
| 3875 Lstream_rewind (stream); | |
| 771 | 3876 |
| 3877 unbind_to (depth); | |
| 3878 return coding_system; | |
| 3879 } | |
| 3880 | |
| 3881 static void | |
| 3882 undecided_init_coding_stream (struct coding_stream *str) | |
| 3883 { | |
| 3884 struct undecided_coding_stream *data = | |
| 3885 CODING_STREAM_TYPE_DATA (str, undecided); | |
| 3886 struct undecided_coding_system *csdata = | |
| 3887 XCODING_SYSTEM_TYPE_DATA (str->codesys, undecided); | |
| 3888 | |
| 3889 data->actual = Qnil; | |
| 3890 | |
| 3891 if (str->direction == CODING_DECODE) | |
| 3892 { | |
| 3893 Lstream *lst = str->other_end; | |
| 3894 | |
| 3895 if ((lst->flags & LSTREAM_FL_READ) && | |
| 3896 Lstream_seekable_p (lst) && | |
| 3897 csdata->do_coding) | |
| 3898 /* We can determine the coding system now. */ | |
| 3899 data->actual = determine_real_coding_system (lst); | |
| 3900 } | |
| 1494 | 3901 |
| 3902 #ifdef DEBUG_XEMACS | |
| 3903 if (!NILP (Vdebug_coding_detection)) | |
| 3904 stderr_out_lisp ("detected coding system: %s\n", 1, data->actual); | |
| 3905 #endif /* DEBUG_XEMACS */ | |
| 771 | 3906 } |
| 3907 | |
| 3908 static void | |
| 3909 undecided_rewind_coding_stream (struct coding_stream *str) | |
| 3910 { | |
| 3911 chain_rewind_coding_stream_1 (&CODING_STREAM_TYPE_DATA (str, undecided)->c); | |
| 3912 } | |
| 3913 | |
| 3914 static void | |
| 3915 undecided_finalize_coding_stream (struct coding_stream *str) | |
| 3916 { | |
| 3917 struct undecided_coding_stream *data = | |
| 3918 CODING_STREAM_TYPE_DATA (str, undecided); | |
| 3919 | |
| 3920 chain_finalize_coding_stream_1 | |
| 3921 (&CODING_STREAM_TYPE_DATA (str, undecided)->c); | |
| 3922 if (data->st) | |
| 3923 free_detection_state (data->st); | |
| 3924 } | |
| 3925 | |
| 3926 static Lisp_Object | |
| 3927 undecided_canonicalize (Lisp_Object codesys) | |
| 3928 { | |
| 3929 struct undecided_coding_system *csdata = | |
| 3930 XCODING_SYSTEM_TYPE_DATA (codesys, undecided); | |
| 3931 if (!csdata->do_eol && !csdata->do_coding) | |
| 3932 return NILP (csdata->cs) ? Fget_coding_system (Qbinary) : csdata->cs; | |
| 3933 if (csdata->do_eol && !csdata->do_coding && NILP (csdata->cs)) | |
| 3934 return Fget_coding_system (Qconvert_eol_autodetect); | |
| 3935 return codesys; | |
| 3936 } | |
| 3937 | |
| 3938 static Bytecount | |
| 3939 undecided_convert (struct coding_stream *str, const UExtbyte *src, | |
| 3940 unsigned_char_dynarr *dst, Bytecount n) | |
| 3941 { | |
| 3942 int first_time = 0; | |
| 3943 | |
| 3944 if (str->direction == CODING_DECODE) | |
| 3945 { | |
| 3946 /* At this point, we have only the following possibilities: | |
| 3947 | |
| 3948 do_eol && do_coding | |
| 3949 do_coding only | |
| 3950 do_eol only and a coding system was specified | |
| 3951 | |
| 3952 Other possibilities are removed during undecided_canonicalize. | |
| 3953 | |
| 3954 Therefore, our substreams are either | |
| 3955 | |
| 3956 lstream_coding -> lstream_dynarr, or | |
| 3957 lstream_coding -> lstream_eol -> lstream_dynarr. | |
| 3958 */ | |
| 3959 struct undecided_coding_system *csdata = | |
| 3960 XCODING_SYSTEM_TYPE_DATA (str->codesys, undecided); | |
| 3961 struct undecided_coding_stream *data = | |
| 3962 CODING_STREAM_TYPE_DATA (str, undecided); | |
| 3963 | |
| 3964 if (str->eof) | |
| 3965 { | |
| 3966 /* Each will close the next. We need to close now because more | |
| 3967 data may be generated. */ | |
| 3968 if (data->c.initted) | |
| 3969 Lstream_close (XLSTREAM (data->c.lstreams[0])); | |
| 3970 return n; | |
| 3971 } | |
| 3972 | |
| 3973 if (!data->c.initted) | |
| 3974 { | |
| 3975 data->c.lstream_count = csdata->do_eol ? 3 : 2; | |
| 3976 data->c.lstreams = xnew_array (Lisp_Object, data->c.lstream_count); | |
| 3977 | |
| 3978 data->c.lstreams[data->c.lstream_count - 1] = | |
| 3979 make_dynarr_output_stream (dst); | |
| 3980 Lstream_set_buffering | |
| 3981 (XLSTREAM (data->c.lstreams[data->c.lstream_count - 1]), | |
| 3982 LSTREAM_UNBUFFERED, 0); | |
| 3983 if (csdata->do_eol) | |
| 3984 { | |
| 3985 data->c.lstreams[1] = | |
| 3986 make_coding_output_stream | |
| 3987 (XLSTREAM (data->c.lstreams[data->c.lstream_count - 1]), | |
| 3988 Fget_coding_system (Qconvert_eol_autodetect), | |
| 800 | 3989 CODING_DECODE, 0); |
| 771 | 3990 Lstream_set_buffering |
| 3991 (XLSTREAM (data->c.lstreams[1]), | |
| 3992 LSTREAM_UNBUFFERED, 0); | |
| 3993 } | |
| 3994 | |
| 3995 data->c.lstreams[0] = | |
| 3996 make_coding_output_stream | |
| 3997 (XLSTREAM (data->c.lstreams[1]), | |
| 3998 /* Substitute binary if we need to detect the encoding */ | |
| 3999 csdata->do_coding ? Qbinary : csdata->cs, | |
| 800 | 4000 CODING_DECODE, 0); |
| 771 | 4001 Lstream_set_buffering (XLSTREAM (data->c.lstreams[0]), |
| 4002 LSTREAM_UNBUFFERED, 0); | |
| 4003 | |
| 4004 first_time = 1; | |
| 4005 data->c.initted = 1; | |
| 4006 } | |
| 4007 | |
| 4008 /* If necessary, do encoding-detection now. We do this when we're a | |
| 4009 writing stream or a non-seekable reading stream, meaning that we | |
| 4010 can't just process the whole input, rewind, and start over. */ | |
| 4011 | |
| 4012 if (csdata->do_coding) | |
| 4013 { | |
| 4014 int actual_was_nil = NILP (data->actual); | |
| 4015 if (NILP (data->actual)) | |
| 4016 { | |
| 4017 if (!data->st) | |
| 4018 data->st = allocate_detection_state (); | |
| 4019 if (first_time) | |
| 4020 /* #### This is cheesy. What we really ought to do is buffer | |
| 4021 up a certain minimum amount of data to get a better result. | |
| 4022 */ | |
| 4023 data->actual = look_for_coding_system_magic_cookie (src, n); | |
| 4024 if (NILP (data->actual)) | |
| 4025 { | |
| 4026 /* #### This is cheesy. What we really ought to do is buffer | |
| 4027 up a certain minimum amount of data so as to get a less | |
| 4028 random result when doing subprocess detection. */ | |
| 4029 detect_coding_type (data->st, src, n); | |
| 4030 data->actual = detected_coding_system (data->st); | |
| 4100 | 4031 /* kludge to prevent infinite recursion */ |
| 4032 if (XCODING_SYSTEM(data->actual)->methods->enumtype == undecided_coding_system) | |
| 4033 data->actual = Fget_coding_system (Qbinary); | |
| 771 | 4034 } |
| 4035 } | |
| 4036 /* We need to set the detected coding system if we actually have | |
| 4037 such a coding system but didn't before. That is the case | |
| 4038 either when we just detected it in the previous code or when | |
| 4039 it was detected during undecided_init_coding_stream(). We | |
| 4040 can check for that using first_time. */ | |
| 4041 if (!NILP (data->actual) && (actual_was_nil || first_time)) | |
| 4042 { | |
| 4043 /* If the detected coding system doesn't allow for EOL | |
| 4044 autodetection, try to get the equivalent that does; | |
| 4045 otherwise, disable EOL detection (overriding whatever | |
| 4046 may already have been detected). */ | |
| 4047 if (XCODING_SYSTEM_EOL_TYPE (data->actual) != EOL_AUTODETECT) | |
| 4048 { | |
| 4049 if (!NILP (XCODING_SYSTEM_SUBSIDIARY_PARENT (data->actual))) | |
| 4050 data->actual = | |
| 4051 XCODING_SYSTEM_SUBSIDIARY_PARENT (data->actual); | |
| 4052 else if (data->c.lstream_count == 3) | |
| 4053 set_coding_stream_coding_system | |
| 4054 (XLSTREAM (data->c.lstreams[1]), | |
| 4055 Fget_coding_system (Qidentity)); | |
| 4056 } | |
| 4057 set_coding_stream_coding_system | |
| 4058 (XLSTREAM (data->c.lstreams[0]), data->actual); | |
| 4059 } | |
| 4060 } | |
| 4061 | |
| 4062 if (Lstream_write (XLSTREAM (data->c.lstreams[0]), src, n) < 0) | |
| 4063 return -1; | |
| 4064 return n; | |
| 4065 } | |
| 4066 else | |
| 4067 return no_conversion_convert (str, src, dst, n); | |
| 4068 } | |
| 4069 | |
| 4070 static Lisp_Object | |
| 4071 undecided_canonicalize_after_coding (struct coding_stream *str) | |
| 4072 { | |
| 4073 struct undecided_coding_stream *data = | |
| 4074 CODING_STREAM_TYPE_DATA (str, undecided); | |
| 4075 Lisp_Object ret, eolret; | |
| 4076 | |
| 4077 if (str->direction == CODING_ENCODE) | |
| 4078 return str->codesys; | |
| 4079 | |
| 4080 if (!data->c.initted) | |
| 4081 return Fget_coding_system (Qundecided); | |
| 4082 | |
| 4083 ret = coding_stream_canonicalize_after_coding | |
| 4084 (XLSTREAM (data->c.lstreams[0])); | |
| 4085 if (NILP (ret)) | |
| 4086 ret = Fget_coding_system (Qundecided); | |
| 4087 if (XCODING_SYSTEM_EOL_TYPE (ret) != EOL_AUTODETECT) | |
| 4088 return ret; | |
| 4089 eolret = coding_stream_canonicalize_after_coding | |
| 4090 (XLSTREAM (data->c.lstreams[1])); | |
| 4091 if (!EQ (XCODING_SYSTEM_TYPE (eolret), Qconvert_eol)) | |
| 4092 return ret; | |
| 4093 return | |
| 4094 Fsubsidiary_coding_system (ret, Fcoding_system_property (eolret, | |
| 4095 Qsubtype)); | |
| 4096 } | |
| 4097 | |
| 4098 | |
| 4099 /************************************************************************/ | |
| 4100 /* Lisp interface: Coding category functions and detection */ | |
| 4101 /************************************************************************/ | |
| 4102 | |
| 4103 DEFUN ("coding-category-list", Fcoding_category_list, 0, 0, 0, /* | |
| 4104 Return a list of all recognized coding categories. | |
| 4105 */ | |
| 4106 ()) | |
| 4107 { | |
| 4108 int i; | |
| 4109 Lisp_Object list = Qnil; | |
| 4110 | |
| 4111 for (i = 0; i < coding_detector_count; i++) | |
| 4112 { | |
| 4113 detector_category_dynarr *cats = | |
| 4114 Dynarr_at (all_coding_detectors, i).cats; | |
| 4115 int j; | |
| 4116 | |
| 4117 for (j = 0; j < Dynarr_length (cats); j++) | |
| 4118 list = Fcons (Dynarr_at (cats, j).sym, list); | |
| 4119 } | |
| 4120 | |
| 4121 return Fnreverse (list); | |
| 4122 } | |
| 4123 | |
| 4124 DEFUN ("set-coding-priority-list", Fset_coding_priority_list, 1, 1, 0, /* | |
| 4125 Change the priority order of the coding categories. | |
| 4126 LIST should be list of coding categories, in descending order of | |
| 4127 priority. Unspecified coding categories will be lower in priority | |
| 4128 than all specified ones, in the same relative order they were in | |
| 4129 previously. | |
| 4130 */ | |
| 4131 (list)) | |
| 4132 { | |
| 4133 int *category_to_priority = | |
| 4134 alloca_array (int, coding_detector_category_count); | |
| 4135 int i, j; | |
| 4136 | |
| 4137 /* First generate a list that maps coding categories to priorities. */ | |
| 4138 | |
| 4139 for (i = 0; i < coding_detector_category_count; i++) | |
| 4140 category_to_priority[i] = -1; | |
| 4141 | |
| 4142 /* Highest priority comes from the specified list. */ | |
| 4143 i = 0; | |
| 2367 | 4144 { |
| 4145 EXTERNAL_LIST_LOOP_2 (elt, list) | |
| 4146 { | |
| 4147 int cat = coding_category_symbol_to_id (elt); | |
| 4148 | |
| 4149 if (category_to_priority[cat] >= 0) | |
| 4150 sferror ("Duplicate coding category in list", elt); | |
| 4151 category_to_priority[cat] = i++; | |
| 4152 } | |
| 4153 } | |
| 771 | 4154 |
| 4155 /* Now go through the existing categories by priority to retrieve | |
| 4156 the categories not yet specified and preserve their priority | |
| 4157 order. */ | |
| 4158 for (j = 0; j < coding_detector_category_count; j++) | |
| 4159 { | |
| 4160 int cat = coding_category_by_priority[j]; | |
| 4161 if (category_to_priority[cat] < 0) | |
| 4162 category_to_priority[cat] = i++; | |
| 4163 } | |
| 4164 | |
| 4165 /* Now we need to construct the inverse of the mapping we just | |
| 4166 constructed. */ | |
| 4167 | |
| 4168 for (i = 0; i < coding_detector_category_count; i++) | |
| 4169 coding_category_by_priority[category_to_priority[i]] = i; | |
| 4170 | |
| 4171 /* Phew! That was confusing. */ | |
| 4172 return Qnil; | |
| 4173 } | |
| 4174 | |
| 4175 DEFUN ("coding-priority-list", Fcoding_priority_list, 0, 0, 0, /* | |
| 4176 Return a list of coding categories in descending order of priority. | |
| 4177 */ | |
| 4178 ()) | |
| 4179 { | |
| 4180 int i; | |
| 4181 Lisp_Object list = Qnil; | |
| 4182 | |
| 4183 for (i = 0; i < coding_detector_category_count; i++) | |
| 4184 list = | |
| 4185 Fcons (coding_category_id_to_symbol (coding_category_by_priority[i]), | |
| 4186 list); | |
| 4187 return Fnreverse (list); | |
| 4188 } | |
| 4189 | |
| 4190 DEFUN ("set-coding-category-system", Fset_coding_category_system, 2, 2, 0, /* | |
| 4191 Change the coding system associated with a coding category. | |
| 4192 */ | |
| 4193 (coding_category, coding_system)) | |
| 4194 { | |
| 4195 coding_category_system[coding_category_symbol_to_id (coding_category)] = | |
| 4196 Fget_coding_system (coding_system); | |
| 4197 return Qnil; | |
| 4198 } | |
| 4199 | |
| 4200 DEFUN ("coding-category-system", Fcoding_category_system, 1, 1, 0, /* | |
| 4201 Return the coding system associated with a coding category. | |
| 4202 */ | |
| 4203 (coding_category)) | |
| 4204 { | |
| 4205 Lisp_Object sys = | |
| 4206 coding_category_system[coding_category_symbol_to_id (coding_category)]; | |
| 4207 | |
| 4208 if (!NILP (sys)) | |
| 4209 return XCODING_SYSTEM_NAME (sys); | |
| 4210 return Qnil; | |
| 4211 } | |
| 4212 | |
| 800 | 4213 /* Detect the encoding of STREAM. Assumes stream is at the begnning and will |
| 4214 read through to the end of STREAM, leaving it there but open. */ | |
| 4215 | |
| 771 | 4216 Lisp_Object |
| 4217 detect_coding_stream (Lisp_Object stream) | |
| 4218 { | |
| 4219 Lisp_Object val = Qnil; | |
| 4220 struct gcpro gcpro1, gcpro2, gcpro3; | |
| 4221 UExtbyte random_buffer[65536]; | |
| 4222 Lisp_Object binary_instream = | |
| 4223 make_coding_input_stream | |
| 4224 (XLSTREAM (stream), Qbinary, | |
| 814 | 4225 CODING_ENCODE, LSTREAM_FL_NO_CLOSE_OTHER); |
| 771 | 4226 Lisp_Object decstream = |
| 4227 make_coding_input_stream | |
| 4228 (XLSTREAM (binary_instream), | |
| 800 | 4229 Qundecided, CODING_DECODE, 0); |
| 771 | 4230 Lstream *decstr = XLSTREAM (decstream); |
| 4231 | |
| 4232 GCPRO3 (decstream, stream, binary_instream); | |
| 4233 /* Read and discard all data; detection happens as a side effect of this, | |
| 4234 and we examine what was detected afterwards. */ | |
| 4235 while (Lstream_read (decstr, random_buffer, sizeof (random_buffer)) > 0) | |
| 4236 ; | |
| 4237 | |
| 4238 val = coding_stream_detected_coding_system (decstr); | |
| 4239 Lstream_close (decstr); | |
| 4240 Lstream_delete (decstr); | |
| 4241 Lstream_delete (XLSTREAM (binary_instream)); | |
| 4242 UNGCPRO; | |
| 4243 return val; | |
| 428 | 4244 } |
| 4245 | |
| 4246 DEFUN ("detect-coding-region", Fdetect_coding_region, 2, 3, 0, /* | |
| 4247 Detect coding system of the text in the region between START and END. | |
| 444 | 4248 Return a list of possible coding systems ordered by priority. |
| 3025 | 4249 If only ASCII characters are found, return `undecided' or one of |
| 428 | 4250 its subsidiary coding systems according to a detected end-of-line |
| 4251 type. Optional arg BUFFER defaults to the current buffer. | |
| 4252 */ | |
| 4253 (start, end, buffer)) | |
| 4254 { | |
| 4255 Lisp_Object val = Qnil; | |
| 4256 struct buffer *buf = decode_buffer (buffer, 0); | |
| 665 | 4257 Charbpos b, e; |
| 771 | 4258 Lisp_Object lb_instream; |
| 428 | 4259 |
| 4260 get_buffer_range_char (buf, start, end, &b, &e, 0); | |
| 4261 lb_instream = make_lisp_buffer_input_stream (buf, b, e, 0); | |
| 771 | 4262 |
| 4263 val = detect_coding_stream (lb_instream); | |
| 4264 Lstream_delete (XLSTREAM (lb_instream)); | |
| 428 | 4265 return val; |
| 4266 } | |
| 4267 | |
| 4268 | |
| 771 | 4269 |
| 4270 #ifdef DEBUG_XEMACS | |
| 4271 | |
| 428 | 4272 /************************************************************************/ |
| 771 | 4273 /* Internal methods */ |
| 4274 /************************************************************************/ | |
| 4275 | |
| 4276 /* Raw (internally-formatted) data. */ | |
| 4277 DEFINE_CODING_SYSTEM_TYPE (internal); | |
| 428 | 4278 |
| 665 | 4279 static Bytecount |
| 2286 | 4280 internal_convert (struct coding_stream *UNUSED (str), const UExtbyte *src, |
| 771 | 4281 unsigned_char_dynarr *dst, Bytecount n) |
| 4282 { | |
| 4283 Bytecount orign = n; | |
| 4284 Dynarr_add_many (dst, src, n); | |
| 4285 return orign; | |
| 4286 } | |
| 4287 | |
| 4288 #endif /* DEBUG_XEMACS */ | |
| 4289 | |
| 4290 | |
| 4291 | |
| 4292 #ifdef HAVE_ZLIB | |
| 4293 | |
| 4294 /************************************************************************/ | |
| 4295 /* Gzip methods */ | |
| 4296 /************************************************************************/ | |
| 4297 | |
| 4298 struct gzip_coding_system | |
| 428 | 4299 { |
| 771 | 4300 int level; /* 0 through 9, or -1 for default */ |
| 4301 }; | |
| 4302 | |
| 4303 #define CODING_SYSTEM_GZIP_LEVEL(codesys) \ | |
| 4304 (CODING_SYSTEM_TYPE_DATA (codesys, gzip)->level) | |
| 4305 #define XCODING_SYSTEM_GZIP_LEVEL(codesys) \ | |
| 4306 (XCODING_SYSTEM_TYPE_DATA (codesys, gzip)->level) | |
| 4307 | |
| 4308 struct gzip_coding_stream | |
| 428 | 4309 { |
| 771 | 4310 z_stream stream; |
| 4311 int stream_initted; | |
| 4312 int reached_eof; /* #### this should be handled by the caller, once we | |
| 4313 return LSTREAM_EOF */ | |
| 4314 }; | |
| 4315 | |
| 1204 | 4316 static const struct memory_description |
| 771 | 4317 gzip_coding_system_description[] = { |
| 4318 { XD_END } | |
| 4319 }; | |
| 4320 | |
| 1204 | 4321 DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (gzip); |
| 4322 | |
| 771 | 4323 enum source_sink_type |
| 4324 gzip_conversion_end_type (Lisp_Object codesys) | |
| 4325 { | |
| 4326 return DECODES_BYTE_TO_BYTE; | |
| 428 | 4327 } |
| 4328 | |
| 4329 static void | |
| 771 | 4330 gzip_init (Lisp_Object codesys) |
| 4331 { | |
| 4332 struct gzip_coding_system *data = XCODING_SYSTEM_TYPE_DATA (codesys, gzip); | |
| 4333 data->level = -1; | |
| 4334 } | |
| 4335 | |
| 4336 static void | |
| 4337 gzip_print (Lisp_Object cs, Lisp_Object printcharfun, int escapeflag) | |
| 428 | 4338 { |
| 771 | 4339 struct gzip_coding_system *data = XCODING_SYSTEM_TYPE_DATA (cs, gzip); |
| 4340 | |
| 826 | 4341 write_c_string (printcharfun, "("); |
| 771 | 4342 if (data->level == -1) |
| 826 | 4343 write_c_string (printcharfun, "default"); |
| 771 | 4344 else |
| 4345 print_internal (make_int (data->level), printcharfun, 0); | |
| 826 | 4346 write_c_string (printcharfun, ")"); |
| 428 | 4347 } |
| 4348 | |
| 4349 static int | |
| 771 | 4350 gzip_putprop (Lisp_Object codesys, Lisp_Object key, Lisp_Object value) |
| 428 | 4351 { |
| 771 | 4352 struct gzip_coding_system *data = XCODING_SYSTEM_TYPE_DATA (codesys, gzip); |
| 4353 | |
| 4354 if (EQ (key, Qlevel)) | |
| 428 | 4355 { |
| 771 | 4356 if (EQ (value, Qdefault)) |
| 4357 data->level = -1; | |
| 4358 else | |
| 428 | 4359 { |
| 771 | 4360 CHECK_INT (value); |
| 4361 check_int_range (XINT (value), 0, 9); | |
| 4362 data->level = XINT (value); | |
| 428 | 4363 } |
| 4364 } | |
| 4365 else | |
| 771 | 4366 return 0; |
| 4367 return 1; | |
| 428 | 4368 } |
| 4369 | |
| 4370 static Lisp_Object | |
| 771 | 4371 gzip_getprop (Lisp_Object coding_system, Lisp_Object prop) |
| 428 | 4372 { |
| 771 | 4373 struct gzip_coding_system *data = |
| 4374 XCODING_SYSTEM_TYPE_DATA (coding_system, gzip); | |
| 4375 | |
| 4376 if (EQ (prop, Qlevel)) | |
| 428 | 4377 { |
| 771 | 4378 if (data->level == -1) |
| 4379 return Qdefault; | |
| 4380 return make_int (data->level); | |
| 428 | 4381 } |
| 771 | 4382 |
| 4383 return Qunbound; | |
| 428 | 4384 } |
| 4385 | |
| 4386 static void | |
| 771 | 4387 gzip_init_coding_stream (struct coding_stream *str) |
| 428 | 4388 { |
| 771 | 4389 struct gzip_coding_stream *data = CODING_STREAM_TYPE_DATA (str, gzip); |
| 4390 if (data->stream_initted) | |
| 428 | 4391 { |
| 771 | 4392 if (str->direction == CODING_DECODE) |
| 4393 inflateEnd (&data->stream); | |
| 4394 else | |
| 4395 deflateEnd (&data->stream); | |
| 4396 data->stream_initted = 0; | |
| 428 | 4397 } |
| 771 | 4398 data->reached_eof = 0; |
| 428 | 4399 } |
| 4400 | |
| 4401 static void | |
| 771 | 4402 gzip_rewind_coding_stream (struct coding_stream *str) |
| 428 | 4403 { |
| 771 | 4404 gzip_init_coding_stream (str); |
| 428 | 4405 } |
| 4406 | |
| 771 | 4407 static Bytecount |
| 4408 gzip_convert (struct coding_stream *str, | |
| 4409 const UExtbyte *src, | |
| 4410 unsigned_char_dynarr *dst, Bytecount n) | |
| 428 | 4411 { |
| 771 | 4412 struct gzip_coding_stream *data = CODING_STREAM_TYPE_DATA (str, gzip); |
| 4413 int zerr; | |
| 4414 if (str->direction == CODING_DECODE) | |
| 428 | 4415 { |
| 771 | 4416 if (data->reached_eof) |
| 4417 return n; /* eat the data */ | |
| 4418 | |
| 4419 if (!data->stream_initted) | |
| 428 | 4420 { |
| 771 | 4421 xzero (data->stream); |
| 4422 if (inflateInit (&data->stream) != Z_OK) | |
| 4423 return LSTREAM_ERROR; | |
| 4424 data->stream_initted = 1; | |
| 428 | 4425 } |
| 771 | 4426 |
| 4427 data->stream.next_in = (Bytef *) src; | |
| 4428 data->stream.avail_in = n; | |
| 4429 | |
| 4430 /* Normally we stop when we've fed all data to the decompressor; but | |
| 4431 if we're at the end of the input, and the decompressor hasn't | |
| 4432 reported EOF, we need to keep going, as there might be more output | |
| 4433 to generate. Z_OK from the decompressor means input was processed | |
| 4434 or output was generated; if neither, we break out of the loop. | |
| 4435 Other return values are: | |
| 4436 | |
| 4437 Z_STREAM_END EOF from decompressor | |
| 4438 Z_DATA_ERROR Corrupted data | |
| 4439 Z_BUF_ERROR No progress possible (this should happen if | |
| 4440 we try to feed it an incomplete file) | |
| 4441 Z_MEM_ERROR Out of memory | |
| 4442 Z_STREAM_ERROR (should never happen) | |
| 4443 Z_NEED_DICT (#### when will this happen?) | |
| 4444 */ | |
| 4445 while (data->stream.avail_in > 0 || str->eof) | |
| 4446 { | |
| 4447 /* Reserve an output buffer of the same size as the input buffer; | |
| 4448 if that's not enough, we keep reserving the same size. */ | |
| 4449 Bytecount reserved = n; | |
| 4450 Dynarr_add_many (dst, 0, reserved); | |
| 4451 /* Careful here! Don't retrieve the pointer until after | |
| 4452 reserving the space, or it might be bogus */ | |
| 4453 data->stream.next_out = | |
| 4454 Dynarr_atp (dst, Dynarr_length (dst) - reserved); | |
| 4455 data->stream.avail_out = reserved; | |
| 4456 zerr = inflate (&data->stream, Z_NO_FLUSH); | |
| 4457 /* Lop off the unused portion */ | |
| 4458 Dynarr_set_size (dst, Dynarr_length (dst) - data->stream.avail_out); | |
| 4459 if (zerr != Z_OK) | |
| 4460 break; | |
| 4461 } | |
| 4462 | |
| 4463 if (zerr == Z_STREAM_END) | |
| 4464 data->reached_eof = 1; | |
| 4465 | |
| 4466 if ((Bytecount) data->stream.avail_in < n) | |
| 4467 return n - data->stream.avail_in; | |
| 4468 | |
| 4469 if (zerr == Z_OK || zerr == Z_STREAM_END) | |
| 4470 return 0; | |
| 4471 | |
| 4472 return LSTREAM_ERROR; | |
| 428 | 4473 } |
| 4474 else | |
| 4475 { | |
| 771 | 4476 if (!data->stream_initted) |
| 4477 { | |
| 4478 int level = XCODING_SYSTEM_GZIP_LEVEL (str->codesys); | |
| 4479 xzero (data->stream); | |
| 4480 if (deflateInit (&data->stream, | |
| 4481 level == -1 ? Z_DEFAULT_COMPRESSION : level) != | |
| 4482 Z_OK) | |
| 4483 return LSTREAM_ERROR; | |
| 4484 data->stream_initted = 1; | |
| 428 | 4485 } |
| 771 | 4486 |
| 4487 data->stream.next_in = (Bytef *) src; | |
| 4488 data->stream.avail_in = n; | |
| 4489 | |
| 4490 /* Normally we stop when we've fed all data to the compressor; but if | |
| 4491 we're at the end of the input, and the compressor hasn't reported | |
| 4492 EOF, we need to keep going, as there might be more output to | |
| 4493 generate. (To signal EOF on our end, we set the FLUSH parameter | |
| 4494 to Z_FINISH; when all data is output, Z_STREAM_END will be | |
| 4495 returned.) Z_OK from the compressor means input was processed or | |
| 4496 output was generated; if neither, we break out of the loop. Other | |
| 4497 return values are: | |
| 4498 | |
| 4499 Z_STREAM_END EOF from compressor | |
| 4500 Z_BUF_ERROR No progress possible (should never happen) | |
| 4501 Z_STREAM_ERROR (should never happen) | |
| 4502 */ | |
| 4503 while (data->stream.avail_in > 0 || str->eof) | |
| 4504 { | |
| 4505 /* Reserve an output buffer of the same size as the input buffer; | |
| 4506 if that's not enough, we keep reserving the same size. */ | |
| 4507 Bytecount reserved = n; | |
| 4508 Dynarr_add_many (dst, 0, reserved); | |
| 4509 /* Careful here! Don't retrieve the pointer until after | |
| 4510 reserving the space, or it might be bogus */ | |
| 4511 data->stream.next_out = | |
| 4512 Dynarr_atp (dst, Dynarr_length (dst) - reserved); | |
| 4513 data->stream.avail_out = reserved; | |
| 4514 zerr = | |
| 4515 deflate (&data->stream, | |
| 4516 str->eof ? Z_FINISH : Z_NO_FLUSH); | |
| 4517 /* Lop off the unused portion */ | |
| 4518 Dynarr_set_size (dst, Dynarr_length (dst) - data->stream.avail_out); | |
| 4519 if (zerr != Z_OK) | |
| 4520 break; | |
| 4521 } | |
| 4522 | |
| 4523 if ((Bytecount) data->stream.avail_in < n) | |
| 4524 return n - data->stream.avail_in; | |
| 4525 | |
| 4526 if (zerr == Z_OK || zerr == Z_STREAM_END) | |
| 4527 return 0; | |
| 4528 | |
| 4529 return LSTREAM_ERROR; | |
| 428 | 4530 } |
| 4531 } | |
| 4532 | |
| 771 | 4533 #endif /* HAVE_ZLIB */ |
| 428 | 4534 |
| 4535 | |
| 4536 /************************************************************************/ | |
| 4537 /* Initialization */ | |
| 4538 /************************************************************************/ | |
| 4539 | |
| 4540 void | |
| 4541 syms_of_file_coding (void) | |
| 4542 { | |
| 442 | 4543 INIT_LRECORD_IMPLEMENTATION (coding_system); |
| 4544 | |
| 771 | 4545 DEFSUBR (Fvalid_coding_system_type_p); |
| 4546 DEFSUBR (Fcoding_system_type_list); | |
| 428 | 4547 DEFSUBR (Fcoding_system_p); |
| 4303 | 4548 DEFSUBR (Fautoload_coding_system); |
| 428 | 4549 DEFSUBR (Ffind_coding_system); |
| 4550 DEFSUBR (Fget_coding_system); | |
| 4551 DEFSUBR (Fcoding_system_list); | |
| 4552 DEFSUBR (Fcoding_system_name); | |
| 4553 DEFSUBR (Fmake_coding_system); | |
| 4554 DEFSUBR (Fcopy_coding_system); | |
| 440 | 4555 DEFSUBR (Fcoding_system_canonical_name_p); |
| 4556 DEFSUBR (Fcoding_system_alias_p); | |
| 4557 DEFSUBR (Fcoding_system_aliasee); | |
| 428 | 4558 DEFSUBR (Fdefine_coding_system_alias); |
| 4559 DEFSUBR (Fsubsidiary_coding_system); | |
| 771 | 4560 DEFSUBR (Fcoding_system_base); |
| 4561 DEFSUBR (Fcoding_system_used_for_io); | |
| 428 | 4562 |
| 4563 DEFSUBR (Fcoding_system_type); | |
| 771 | 4564 DEFSUBR (Fcoding_system_description); |
| 428 | 4565 DEFSUBR (Fcoding_system_property); |
| 4566 | |
| 4567 DEFSUBR (Fcoding_category_list); | |
| 4568 DEFSUBR (Fset_coding_priority_list); | |
| 4569 DEFSUBR (Fcoding_priority_list); | |
| 4570 DEFSUBR (Fset_coding_category_system); | |
| 4571 DEFSUBR (Fcoding_category_system); | |
| 4572 | |
| 4573 DEFSUBR (Fdetect_coding_region); | |
| 4574 DEFSUBR (Fdecode_coding_region); | |
| 4575 DEFSUBR (Fencode_coding_region); | |
| 563 | 4576 DEFSYMBOL_MULTIWORD_PREDICATE (Qcoding_systemp); |
| 4577 DEFSYMBOL (Qno_conversion); | |
| 771 | 4578 DEFSYMBOL (Qconvert_eol); |
| 4579 DEFSYMBOL (Qconvert_eol_autodetect); | |
| 4580 DEFSYMBOL (Qconvert_eol_lf); | |
| 4581 DEFSYMBOL (Qconvert_eol_cr); | |
| 4582 DEFSYMBOL (Qconvert_eol_crlf); | |
| 563 | 4583 DEFSYMBOL (Qraw_text); |
| 771 | 4584 |
| 563 | 4585 DEFSYMBOL (Qmnemonic); |
| 4586 DEFSYMBOL (Qeol_type); | |
| 4587 DEFSYMBOL (Qpost_read_conversion); | |
| 4588 DEFSYMBOL (Qpre_write_conversion); | |
| 4589 | |
| 771 | 4590 DEFSYMBOL (Qtranslation_table_for_decode); |
| 4591 DEFSYMBOL (Qtranslation_table_for_encode); | |
| 4592 DEFSYMBOL (Qsafe_chars); | |
| 4593 DEFSYMBOL (Qsafe_charsets); | |
| 4594 DEFSYMBOL (Qmime_charset); | |
| 4595 DEFSYMBOL (Qvalid_codes); | |
| 4596 | |
| 563 | 4597 DEFSYMBOL (Qcr); |
| 4598 DEFSYMBOL (Qlf); | |
| 4599 DEFSYMBOL (Qcrlf); | |
| 4600 DEFSYMBOL (Qeol_cr); | |
| 4601 DEFSYMBOL (Qeol_lf); | |
| 4602 DEFSYMBOL (Qeol_crlf); | |
| 4603 DEFSYMBOL (Qencode); | |
| 4604 DEFSYMBOL (Qdecode); | |
| 428 | 4605 |
| 771 | 4606 DEFSYMBOL (Qnear_certainty); |
| 4607 DEFSYMBOL (Qquite_probable); | |
| 4608 DEFSYMBOL (Qsomewhat_likely); | |
| 1494 | 4609 DEFSYMBOL (Qslightly_likely); |
| 771 | 4610 DEFSYMBOL (Qas_likely_as_unlikely); |
| 4611 DEFSYMBOL (Qsomewhat_unlikely); | |
| 4612 DEFSYMBOL (Qquite_improbable); | |
| 4613 DEFSYMBOL (Qnearly_impossible); | |
| 4614 | |
| 4615 DEFSYMBOL (Qdo_eol); | |
| 4616 DEFSYMBOL (Qdo_coding); | |
| 4617 | |
| 4618 DEFSYMBOL (Qcanonicalize_after_coding); | |
| 4619 | |
| 4303 | 4620 DEFSYMBOL (Qposix_charset_to_coding_system_hash); |
| 4621 | |
| 771 | 4622 DEFSYMBOL (Qescape_quoted); |
| 4623 | |
| 4624 #ifdef HAVE_ZLIB | |
| 4625 DEFSYMBOL (Qgzip); | |
| 4626 #endif | |
| 4627 | |
| 428 | 4628 } |
| 4629 | |
| 4630 void | |
| 4631 lstream_type_create_file_coding (void) | |
| 4632 { | |
| 771 | 4633 LSTREAM_HAS_METHOD (coding, reader); |
| 4634 LSTREAM_HAS_METHOD (coding, writer); | |
| 4635 LSTREAM_HAS_METHOD (coding, rewinder); | |
| 4636 LSTREAM_HAS_METHOD (coding, seekable_p); | |
| 4637 LSTREAM_HAS_METHOD (coding, marker); | |
| 4638 LSTREAM_HAS_METHOD (coding, flusher); | |
| 4639 LSTREAM_HAS_METHOD (coding, closer); | |
| 4640 LSTREAM_HAS_METHOD (coding, finalizer); | |
| 4641 } | |
| 4642 | |
| 4643 void | |
| 4644 coding_system_type_create (void) | |
| 4645 { | |
| 4646 int i; | |
| 4647 | |
| 4648 staticpro (&Vcoding_system_hash_table); | |
| 4649 Vcoding_system_hash_table = | |
| 4650 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ); | |
| 4651 | |
| 4652 the_coding_system_type_entry_dynarr = Dynarr_new (coding_system_type_entry); | |
| 2367 | 4653 dump_add_root_block_ptr (&the_coding_system_type_entry_dynarr, |
| 771 | 4654 &csted_description); |
| 4655 | |
| 4656 Vcoding_system_type_list = Qnil; | |
| 4657 staticpro (&Vcoding_system_type_list); | |
| 4658 | |
| 4659 /* Initialize to something reasonable ... */ | |
| 4660 for (i = 0; i < MAX_DETECTOR_CATEGORIES; i++) | |
| 4661 { | |
| 4662 coding_category_system[i] = Qnil; | |
| 1204 | 4663 dump_add_root_lisp_object (&coding_category_system[i]); |
| 771 | 4664 coding_category_by_priority[i] = i; |
| 4665 } | |
| 4666 | |
| 4667 dump_add_opaque (coding_category_by_priority, | |
| 4668 sizeof (coding_category_by_priority)); | |
| 4669 | |
| 4670 all_coding_detectors = Dynarr_new2 (detector_dynarr, struct detector); | |
| 2367 | 4671 dump_add_root_block_ptr (&all_coding_detectors, |
| 771 | 4672 &detector_dynarr_description); |
| 4673 | |
| 4674 dump_add_opaque_int (&coding_system_tick); | |
| 4675 dump_add_opaque_int (&coding_detector_count); | |
| 4676 dump_add_opaque_int (&coding_detector_category_count); | |
| 4677 | |
| 4678 INITIALIZE_CODING_SYSTEM_TYPE (no_conversion, | |
| 4679 "no-conversion-coding-system-p"); | |
| 4680 CODING_SYSTEM_HAS_METHOD (no_conversion, convert); | |
| 4681 | |
| 4682 INITIALIZE_DETECTOR (no_conversion); | |
| 4683 DETECTOR_HAS_METHOD (no_conversion, detect); | |
| 4684 INITIALIZE_DETECTOR_CATEGORY (no_conversion, no_conversion); | |
| 4685 | |
| 4686 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (convert_eol, | |
| 4687 "convert-eol-coding-system-p"); | |
| 4688 CODING_SYSTEM_HAS_METHOD (convert_eol, print); | |
| 4689 CODING_SYSTEM_HAS_METHOD (convert_eol, convert); | |
| 4690 CODING_SYSTEM_HAS_METHOD (convert_eol, getprop); | |
| 4691 CODING_SYSTEM_HAS_METHOD (convert_eol, putprop); | |
| 4692 CODING_SYSTEM_HAS_METHOD (convert_eol, conversion_end_type); | |
| 4693 CODING_SYSTEM_HAS_METHOD (convert_eol, canonicalize_after_coding); | |
| 4694 CODING_SYSTEM_HAS_METHOD (convert_eol, init_coding_stream); | |
| 4695 | |
| 4696 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (undecided, | |
| 4697 "undecided-coding-system-p"); | |
| 4698 CODING_SYSTEM_HAS_METHOD (undecided, init); | |
| 4699 CODING_SYSTEM_HAS_METHOD (undecided, mark); | |
| 4700 CODING_SYSTEM_HAS_METHOD (undecided, print); | |
| 4701 CODING_SYSTEM_HAS_METHOD (undecided, convert); | |
| 4702 CODING_SYSTEM_HAS_METHOD (undecided, putprop); | |
| 4703 CODING_SYSTEM_HAS_METHOD (undecided, getprop); | |
| 4704 CODING_SYSTEM_HAS_METHOD (undecided, init_coding_stream); | |
| 4705 CODING_SYSTEM_HAS_METHOD (undecided, rewind_coding_stream); | |
| 4706 CODING_SYSTEM_HAS_METHOD (undecided, finalize_coding_stream); | |
| 4707 CODING_SYSTEM_HAS_METHOD (undecided, mark_coding_stream); | |
| 4708 CODING_SYSTEM_HAS_METHOD (undecided, canonicalize); | |
| 4709 CODING_SYSTEM_HAS_METHOD (undecided, canonicalize_after_coding); | |
| 4710 | |
| 4711 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (chain, "chain-coding-system-p"); | |
| 4712 | |
| 4713 CODING_SYSTEM_HAS_METHOD (chain, print); | |
| 4714 CODING_SYSTEM_HAS_METHOD (chain, canonicalize); | |
| 4715 CODING_SYSTEM_HAS_METHOD (chain, init); | |
| 4716 CODING_SYSTEM_HAS_METHOD (chain, mark); | |
| 4717 CODING_SYSTEM_HAS_METHOD (chain, mark_coding_stream); | |
| 4718 CODING_SYSTEM_HAS_METHOD (chain, convert); | |
| 4719 CODING_SYSTEM_HAS_METHOD (chain, rewind_coding_stream); | |
| 4720 CODING_SYSTEM_HAS_METHOD (chain, finalize_coding_stream); | |
| 4721 CODING_SYSTEM_HAS_METHOD (chain, finalize); | |
| 4722 CODING_SYSTEM_HAS_METHOD (chain, putprop); | |
| 4723 CODING_SYSTEM_HAS_METHOD (chain, getprop); | |
| 4724 CODING_SYSTEM_HAS_METHOD (chain, conversion_end_type); | |
| 4725 CODING_SYSTEM_HAS_METHOD (chain, canonicalize_after_coding); | |
| 4726 | |
| 4727 #ifdef DEBUG_XEMACS | |
| 4728 INITIALIZE_CODING_SYSTEM_TYPE (internal, "internal-coding-system-p"); | |
| 4729 CODING_SYSTEM_HAS_METHOD (internal, convert); | |
| 4730 #endif | |
| 4731 | |
| 4732 #ifdef HAVE_ZLIB | |
| 4733 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (gzip, "gzip-coding-system-p"); | |
| 4734 CODING_SYSTEM_HAS_METHOD (gzip, conversion_end_type); | |
| 4735 CODING_SYSTEM_HAS_METHOD (gzip, convert); | |
| 4736 CODING_SYSTEM_HAS_METHOD (gzip, init); | |
| 4737 CODING_SYSTEM_HAS_METHOD (gzip, print); | |
| 4738 CODING_SYSTEM_HAS_METHOD (gzip, init_coding_stream); | |
| 4739 CODING_SYSTEM_HAS_METHOD (gzip, rewind_coding_stream); | |
| 4740 CODING_SYSTEM_HAS_METHOD (gzip, putprop); | |
| 4741 CODING_SYSTEM_HAS_METHOD (gzip, getprop); | |
| 4742 #endif | |
| 4743 } | |
| 4744 | |
| 4745 void | |
| 4746 reinit_coding_system_type_create (void) | |
| 4747 { | |
| 4748 REINITIALIZE_CODING_SYSTEM_TYPE (no_conversion); | |
| 4749 REINITIALIZE_CODING_SYSTEM_TYPE (convert_eol); | |
| 4750 REINITIALIZE_CODING_SYSTEM_TYPE (undecided); | |
| 4751 REINITIALIZE_CODING_SYSTEM_TYPE (chain); | |
| 4752 #if 0 | |
| 4753 REINITIALIZE_CODING_SYSTEM_TYPE (text_file_wrapper); | |
| 4754 #endif /* 0 */ | |
| 4755 #ifdef DEBUG_XEMACS | |
| 4756 REINITIALIZE_CODING_SYSTEM_TYPE (internal); | |
| 4757 #endif | |
| 4758 #ifdef HAVE_ZLIB | |
| 4759 REINITIALIZE_CODING_SYSTEM_TYPE (gzip); | |
| 4760 #endif | |
| 4761 } | |
| 4762 | |
| 4763 void | |
| 4764 reinit_vars_of_file_coding (void) | |
| 4765 { | |
| 428 | 4766 } |
| 4767 | |
| 4768 void | |
| 4769 vars_of_file_coding (void) | |
| 4770 { | |
| 771 | 4771 /* We always have file-coding support */ |
| 428 | 4772 Fprovide (intern ("file-coding")); |
| 4773 | |
| 1347 | 4774 QScoding_system_cookie = build_string (";;;###coding system: "); |
| 4775 staticpro (&QScoding_system_cookie); | |
| 4776 | |
| 1242 | 4777 #ifdef HAVE_DEFAULT_EOL_DETECTION |
| 2297 | 4778 /* #### Find a more appropriate place for this comment. |
| 4779 WARNING: The existing categories are intimately tied to the function | |
| 1242 | 4780 `coding-system-category' in coding.el. If you change a category, or |
| 4781 change the layout of any coding system associated with a category, you | |
| 4782 need to check that function and make sure it's written properly. */ | |
| 4783 | |
| 4784 Fprovide (intern ("unix-default-eol-detection")); | |
| 4785 #endif | |
| 4786 | |
| 428 | 4787 DEFVAR_LISP ("keyboard-coding-system", &Vkeyboard_coding_system /* |
| 3142 | 4788 Default coding system used for TTY and X11 keyboard input. |
| 4789 Under X11, used only to interpet the character for a key event when that | |
| 4790 event has a KeySym of NoSymbol but does have an associated string keysym, | |
| 4791 something that's seen with input methods. | |
| 4792 | |
| 4793 If you need to set these things to different coding systems, call the | |
| 4794 function `set-console-tty-coding-system' for the TTY and use this variable | |
| 4795 for X11. | |
| 428 | 4796 */ ); |
| 4797 Vkeyboard_coding_system = Qnil; | |
| 4798 | |
| 4799 DEFVAR_LISP ("terminal-coding-system", &Vterminal_coding_system /* | |
| 4800 Coding system used for TTY display output. | |
| 4801 Not used under a windowing system. | |
| 4802 */ ); | |
| 4803 Vterminal_coding_system = Qnil; | |
| 4804 | |
| 4805 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read /* | |
| 440 | 4806 Overriding coding system used when reading from a file or process. |
| 4807 You should bind this variable with `let', but do not set it globally. | |
| 4808 If this is non-nil, it specifies the coding system that will be used | |
| 4809 to decode input on read operations, such as from a file or process. | |
| 4810 It overrides `buffer-file-coding-system-for-read', | |
| 428 | 4811 `insert-file-contents-pre-hook', etc. Use those variables instead of |
| 440 | 4812 this one for permanent changes to the environment. */ ); |
| 428 | 4813 Vcoding_system_for_read = Qnil; |
| 4814 | |
| 4815 DEFVAR_LISP ("coding-system-for-write", | |
| 4816 &Vcoding_system_for_write /* | |
| 440 | 4817 Overriding coding system used when writing to a file or process. |
| 4818 You should bind this variable with `let', but do not set it globally. | |
| 4819 If this is non-nil, it specifies the coding system that will be used | |
| 4820 to encode output for write operations, such as to a file or process. | |
| 4821 It overrides `buffer-file-coding-system', `write-region-pre-hook', etc. | |
| 4822 Use those variables instead of this one for permanent changes to the | |
| 4823 environment. */ ); | |
| 428 | 4824 Vcoding_system_for_write = Qnil; |
| 4825 | |
| 4826 DEFVAR_LISP ("file-name-coding-system", &Vfile_name_coding_system /* | |
| 4827 Coding system used to convert pathnames when accessing files. | |
| 4828 */ ); | |
| 4829 Vfile_name_coding_system = Qnil; | |
| 4830 | |
| 4831 DEFVAR_BOOL ("enable-multibyte-characters", &enable_multibyte_characters /* | |
| 771 | 4832 Setting this has no effect. It is purely for FSF compatibility. |
| 428 | 4833 */ ); |
| 4834 enable_multibyte_characters = 1; | |
| 771 | 4835 |
| 4836 Vchain_canonicalize_hash_table = | |
| 4837 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL); | |
| 4838 staticpro (&Vchain_canonicalize_hash_table); | |
| 4839 | |
| 4840 #ifdef DEBUG_XEMACS | |
| 4841 DEFVAR_LISP ("debug-coding-detection", &Vdebug_coding_detection /* | |
| 4842 If non-nil, display debug information about detection operations in progress. | |
| 4843 Information is displayed on stderr. | |
| 4844 */ ); | |
| 4845 Vdebug_coding_detection = Qnil; | |
| 4846 #endif | |
| 428 | 4847 } |
| 4848 | |
| 2297 | 4849 /* #### reformat this for consistent appearance? */ |
| 4850 | |
| 428 | 4851 void |
| 4852 complex_vars_of_file_coding (void) | |
| 4853 { | |
| 771 | 4854 Fmake_coding_system |
| 4855 (Qconvert_eol_cr, Qconvert_eol, | |
| 4856 build_msg_string ("Convert CR to LF"), | |
| 4857 nconc2 (list6 (Qdocumentation, | |
| 4858 build_msg_string ( | |
| 4859 "Converts CR (used to mark the end of a line on Macintosh systems) to LF\n" | |
| 4860 "(used internally and under Unix to mark the end of a line)."), | |
| 4861 Qmnemonic, build_string ("CR->LF"), | |
| 4862 Qsubtype, Qcr), | |
| 4863 /* VERY IMPORTANT! Tell make-coding-system not to generate | |
| 4864 subsidiaries -- it needs the coding systems we're creating | |
| 4865 to do so! */ | |
| 4866 list2 (Qeol_type, Qlf))); | |
| 4867 | |
| 4868 Fmake_coding_system | |
| 4869 (Qconvert_eol_lf, Qconvert_eol, | |
| 4870 build_msg_string ("Convert LF to LF (do nothing)"), | |
| 4871 nconc2 (list6 (Qdocumentation, | |
| 4872 build_msg_string ( | |
| 4873 "Do nothing."), | |
| 4874 Qmnemonic, build_string ("LF->LF"), | |
| 4875 Qsubtype, Qlf), | |
| 4876 /* VERY IMPORTANT! Tell make-coding-system not to generate | |
| 4877 subsidiaries -- it needs the coding systems we're creating | |
| 4878 to do so! */ | |
| 4879 list2 (Qeol_type, Qlf))); | |
| 4880 | |
| 4881 Fmake_coding_system | |
| 4882 (Qconvert_eol_crlf, Qconvert_eol, | |
| 4883 build_msg_string ("Convert CRLF to LF"), | |
| 4884 nconc2 (list6 (Qdocumentation, | |
| 4885 build_msg_string ( | |
| 4886 "Converts CR+LF (used to mark the end of a line on Macintosh systems) to LF\n" | |
| 4887 "(used internally and under Unix to mark the end of a line)."), | |
| 4888 Qmnemonic, build_string ("CRLF->LF"), | |
| 4889 Qsubtype, Qcrlf), | |
| 4890 /* VERY IMPORTANT! Tell make-coding-system not to generate | |
| 4891 subsidiaries -- it needs the coding systems we're creating | |
| 4892 to do so! */ | |
| 4893 list2 (Qeol_type, Qlf))); | |
| 4894 | |
| 4895 Fmake_coding_system | |
| 4896 (Qconvert_eol_autodetect, Qconvert_eol, | |
| 4897 build_msg_string ("Autodetect EOL type"), | |
| 4898 nconc2 (list6 (Qdocumentation, | |
| 4899 build_msg_string ( | |
| 4900 "Autodetect the end-of-line type."), | |
| 4901 Qmnemonic, build_string ("Auto-EOL"), | |
| 793 | 4902 Qsubtype, Qnil), |
| 771 | 4903 /* VERY IMPORTANT! Tell make-coding-system not to generate |
| 4904 subsidiaries -- it needs the coding systems we're creating | |
| 4905 to do so! */ | |
| 4906 list2 (Qeol_type, Qlf))); | |
| 4907 | |
| 4908 Fmake_coding_system | |
| 4909 (Qundecided, Qundecided, | |
| 4910 build_msg_string ("Undecided (auto-detect)"), | |
| 4911 nconc2 (list4 (Qdocumentation, | |
| 4912 build_msg_string | |
| 4913 ("Automatically detects the correct encoding."), | |
| 4914 Qmnemonic, build_string ("Auto")), | |
| 4915 list6 (Qdo_eol, Qt, Qdo_coding, Qt, | |
| 4916 /* We do EOL detection ourselves so we don't need to be | |
| 4917 wrapped in an EOL detector. (It doesn't actually hurt, | |
| 4918 though, I don't think.) */ | |
| 4919 Qeol_type, Qlf))); | |
| 4920 | |
| 4921 Fmake_coding_system | |
| 4922 (intern ("undecided-dos"), Qundecided, | |
| 4923 build_msg_string ("Undecided (auto-detect) (CRLF)"), | |
| 4924 nconc2 (list4 (Qdocumentation, | |
| 4925 build_msg_string | |
| 4926 ("Automatically detects the correct encoding; EOL type of CRLF forced."), | |
| 4927 Qmnemonic, build_string ("Auto")), | |
| 4928 list4 (Qdo_coding, Qt, | |
| 4929 Qeol_type, Qcrlf))); | |
| 4930 | |
| 4931 Fmake_coding_system | |
| 4932 (intern ("undecided-unix"), Qundecided, | |
| 4933 build_msg_string ("Undecided (auto-detect) (LF)"), | |
| 4934 nconc2 (list4 (Qdocumentation, | |
| 4935 build_msg_string | |
| 4936 ("Automatically detects the correct encoding; EOL type of LF forced."), | |
| 4937 Qmnemonic, build_string ("Auto")), | |
| 4938 list4 (Qdo_coding, Qt, | |
| 4939 Qeol_type, Qlf))); | |
| 4940 | |
| 4941 Fmake_coding_system | |
| 4942 (intern ("undecided-mac"), Qundecided, | |
| 4943 build_msg_string ("Undecided (auto-detect) (CR)"), | |
| 4944 nconc2 (list4 (Qdocumentation, | |
| 4945 build_msg_string | |
| 4946 ("Automatically detects the correct encoding; EOL type of CR forced."), | |
| 4947 Qmnemonic, build_string ("Auto")), | |
| 4948 list4 (Qdo_coding, Qt, | |
| 4949 Qeol_type, Qcr))); | |
| 4950 | |
| 428 | 4951 /* Need to create this here or we're really screwed. */ |
| 4952 Fmake_coding_system | |
| 4953 (Qraw_text, Qno_conversion, | |
| 771 | 4954 build_msg_string ("Raw Text"), |
| 4955 list4 (Qdocumentation, | |
| 4956 build_msg_string ("Raw text converts only line-break codes, and acts otherwise like `binary'."), | |
| 4957 Qmnemonic, build_string ("Raw"))); | |
| 428 | 4958 |
| 4959 Fmake_coding_system | |
| 4960 (Qbinary, Qno_conversion, | |
| 771 | 4961 build_msg_string ("Binary"), |
| 4962 list6 (Qdocumentation, | |
| 4963 build_msg_string ( | |
| 4964 "This coding system is as close as it comes to doing no conversion.\n" | |
| 4965 "On input, each byte is converted directly into the character\n" | |
| 4966 "with the corresponding code -- i.e. from the `ascii', `control-1',\n" | |
| 4967 "or `latin-1' character sets. On output, these characters are\n" | |
| 4968 "converted back to the corresponding bytes, and other characters\n" | |
| 4969 "are converted to the default character, i.e. `~'."), | |
| 4970 Qeol_type, Qlf, | |
| 428 | 4971 Qmnemonic, build_string ("Binary"))); |
| 4972 | |
| 771 | 4973 /* Formerly aliased to raw-text! Completely bogus and not even the same |
| 4974 as FSF Emacs. */ | |
| 4975 Fdefine_coding_system_alias (Qno_conversion, Qbinary); | |
| 4976 Fdefine_coding_system_alias (intern ("no-conversion-unix"), | |
| 4977 intern ("raw-text-unix")); | |
| 4978 Fdefine_coding_system_alias (intern ("no-conversion-dos"), | |
| 4979 intern ("raw-text-dos")); | |
| 4980 Fdefine_coding_system_alias (intern ("no-conversion-mac"), | |
| 4981 intern ("raw-text-mac")); | |
| 4982 | |
| 1318 | 4983 /* These three below will get their defaults set correctly |
| 4984 in code-init.el. We init them now so we can handle stuff at dump | |
| 771 | 4985 time before we get to code-init.el. */ |
| 1318 | 4986 Fdefine_coding_system_alias (Qnative, Qbinary); |
| 440 | 4987 Fdefine_coding_system_alias (Qterminal, Qbinary); |
| 4988 Fdefine_coding_system_alias (Qkeyboard, Qbinary); | |
| 4989 | |
| 1318 | 4990 Fdefine_coding_system_alias (Qfile_name, Qnative); |
| 771 | 4991 Fdefine_coding_system_alias (Qidentity, Qconvert_eol_lf); |
| 4992 | |
| 428 | 4993 /* Need this for bootstrapping */ |
| 771 | 4994 coding_category_system[detector_category_no_conversion] = |
| 428 | 4995 Fget_coding_system (Qraw_text); |
| 4996 } |
