771
+ − 1 /* Text encoding conversion functions; coding-system object.
+ − 2 #### rename me to coding-system.c or coding.c
428
+ − 3 Copyright (C) 1991, 1995 Free Software Foundation, Inc.
+ − 4 Copyright (C) 1995 Sun Microsystems, Inc.
1318
+ − 5 Copyright (C) 2000, 2001, 2002, 2003 Ben Wing.
428
+ − 6
+ − 7 This file is part of XEmacs.
+ − 8
+ − 9 XEmacs is free software; you can redistribute it and/or modify it
+ − 10 under the terms of the GNU General Public License as published by the
+ − 11 Free Software Foundation; either version 2, or (at your option) any
+ − 12 later version.
+ − 13
+ − 14 XEmacs is distributed in the hope that it will be useful, but WITHOUT
+ − 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ − 16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ − 17 for more details.
+ − 18
+ − 19 You should have received a copy of the GNU General Public License
+ − 20 along with XEmacs; see the file COPYING. If not, write to
+ − 21 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ − 22 Boston, MA 02111-1307, USA. */
+ − 23
771
+ − 24 /* Synched up with: Not in FSF. */
+ − 25
+ − 26 /* Authorship:
+ − 27
+ − 28 Current primary author: Ben Wing <ben@xemacs.org>
+ − 29
+ − 30 Rewritten by Ben Wing <ben@xemacs.org>, based originally on coding.c
+ − 31 from Mule 2.? but probably does not share one line of code with that
+ − 32 original source. Rewriting work started around Dec. 1994. or Jan. 1995.
+ − 33 Proceeded in earnest till Nov. 1995.
+ − 34
+ − 35 Around Feb. 17, 1998, Andy Piper renamed what was then mule-coding.c to
+ − 36 file-coding.c, with the intention of using it to do end-of-line conversion
+ − 37 on non-MULE machines (specifically, on Windows machines). He separated
+ − 38 out the MULE stuff from non-MULE using ifdef's, and searched throughout
+ − 39 the rest of the source tree looking for coding-system-related code that
+ − 40 was ifdef MULE but should be ifdef HAVE_CODING_SYSTEMS.
+ − 41
+ − 42 Sept. 4 - 8, 1998, Tomohiko Morioka added the UCS_4 and UTF_8 coding system
+ − 43 types, providing a primitive means of decoding and encoding externally-
+ − 44 formatted Unicode/UCS_4 and Unicode/UTF_8 data.
+ − 45
+ − 46 January 25, 2000, Martin Buchholz redid and fleshed out the coding
+ − 47 system alias handling that was first added in prototype form by
+ − 48 Hrjove Niksic, April 15, 1999.
+ − 49
+ − 50 April to May 2000, Ben Wing: More major reorganization. Adding features
+ − 51 needed for MS Windows (multibyte, unicode, unicode-to-multibyte), the
+ − 52 "chain" coding system for chaining two together, and doing a lot of
+ − 53 reorganization in preparation for properly abstracting out the different
+ − 54 coding system types.
+ − 55
+ − 56 June 2001, Ben Wing: Added Unicode support. Eliminated previous
+ − 57 junky Unicode translation support.
+ − 58
+ − 59 August 2001, Ben Wing: Moved Unicode support to unicode.c. Finished
+ − 60 abstracting everything except detection, which is hard to abstract (see
+ − 61 just below).
+ − 62
+ − 63 September 2001, Ben Wing: Moved Mule code to mule-coding.c, Windows code
+ − 64 to intl-win32.c. Lots more rewriting; very little code is untouched
+ − 65 from before April 2000. Abstracted the detection code, added multiple
+ − 66 levels of likelihood to increase the reliability of the algorithm.
+ − 67
+ − 68 October 2001, Ben Wing: HAVE_CODING_SYSTEMS is always now defined.
+ − 69 Removed the conditionals.
+ − 70 */
+ − 71
428
+ − 72 #include <config.h>
+ − 73 #include "lisp.h"
+ − 74
+ − 75 #include "buffer.h"
+ − 76 #include "elhash.h"
+ − 77 #include "insdel.h"
+ − 78 #include "lstream.h"
440
+ − 79 #include "opaque.h"
771
+ − 80 #include "file-coding.h"
+ − 81
+ − 82 #ifdef HAVE_ZLIB
+ − 83 #include "zlib.h"
428
+ − 84 #endif
+ − 85
+ − 86 Lisp_Object Vkeyboard_coding_system;
+ − 87 Lisp_Object Vterminal_coding_system;
+ − 88 Lisp_Object Vcoding_system_for_read;
+ − 89 Lisp_Object Vcoding_system_for_write;
+ − 90 Lisp_Object Vfile_name_coding_system;
+ − 91
771
+ − 92 #ifdef DEBUG_XEMACS
+ − 93 Lisp_Object Vdebug_coding_detection;
440
+ − 94 #endif
771
+ − 95
+ − 96 typedef struct coding_system_type_entry
+ − 97 {
+ − 98 struct coding_system_methods *meths;
+ − 99 } coding_system_type_entry;
+ − 100
+ − 101 typedef struct
+ − 102 {
+ − 103 Dynarr_declare (coding_system_type_entry);
+ − 104 } coding_system_type_entry_dynarr;
+ − 105
+ − 106 static coding_system_type_entry_dynarr *the_coding_system_type_entry_dynarr;
+ − 107
1204
+ − 108 static const struct memory_description cste_description_1[] = {
2367
+ − 109 { XD_BLOCK_PTR, offsetof (coding_system_type_entry, meths), 1, &coding_system_methods_description },
771
+ − 110 { XD_END }
+ − 111 };
+ − 112
1204
+ − 113 static const struct sized_memory_description cste_description = {
771
+ − 114 sizeof (coding_system_type_entry),
+ − 115 cste_description_1
+ − 116 };
+ − 117
1204
+ − 118 static const struct memory_description csted_description_1[] = {
771
+ − 119 XD_DYNARR_DESC (coding_system_type_entry_dynarr, &cste_description),
428
+ − 120 { XD_END }
+ − 121 };
+ − 122
1204
+ − 123 static const struct sized_memory_description csted_description = {
771
+ − 124 sizeof (coding_system_type_entry_dynarr),
+ − 125 csted_description_1
+ − 126 };
+ − 127
+ − 128 static Lisp_Object Vcoding_system_type_list;
+ − 129
+ − 130 /* Coding system currently associated with each coding category. */
+ − 131 Lisp_Object coding_category_system[MAX_DETECTOR_CATEGORIES];
+ − 132
+ − 133 /* Table of all coding categories in decreasing order of priority.
+ − 134 This describes a permutation of the possible coding categories. */
+ − 135 int coding_category_by_priority[MAX_DETECTOR_CATEGORIES];
+ − 136
+ − 137 /* Value used with to give a unique name to nameless coding systems */
+ − 138 int coding_system_tick;
+ − 139
+ − 140 int coding_detector_count;
+ − 141 int coding_detector_category_count;
+ − 142
+ − 143 detector_dynarr *all_coding_detectors;
+ − 144
1204
+ − 145 static const struct memory_description struct_detector_category_description_1[]
771
+ − 146 =
+ − 147 {
+ − 148 { XD_LISP_OBJECT, offsetof (struct detector_category, sym) },
+ − 149 { XD_END }
+ − 150 };
+ − 151
1204
+ − 152 static const struct sized_memory_description struct_detector_category_description =
771
+ − 153 {
+ − 154 sizeof (struct detector_category),
+ − 155 struct_detector_category_description_1
428
+ − 156 };
+ − 157
1204
+ − 158 static const struct memory_description detector_category_dynarr_description_1[] =
771
+ − 159 {
+ − 160 XD_DYNARR_DESC (detector_category_dynarr,
+ − 161 &struct_detector_category_description),
+ − 162 { XD_END }
+ − 163 };
+ − 164
1204
+ − 165 static const struct sized_memory_description detector_category_dynarr_description = {
771
+ − 166 sizeof (detector_category_dynarr),
+ − 167 detector_category_dynarr_description_1
+ − 168 };
+ − 169
1204
+ − 170 static const struct memory_description struct_detector_description_1[]
771
+ − 171 =
+ − 172 {
2367
+ − 173 { XD_BLOCK_PTR, offsetof (struct detector, cats), 1,
771
+ − 174 &detector_category_dynarr_description },
+ − 175 { XD_END }
+ − 176 };
+ − 177
1204
+ − 178 static const struct sized_memory_description struct_detector_description =
771
+ − 179 {
+ − 180 sizeof (struct detector),
+ − 181 struct_detector_description_1
+ − 182 };
+ − 183
1204
+ − 184 static const struct memory_description detector_dynarr_description_1[] =
771
+ − 185 {
+ − 186 XD_DYNARR_DESC (detector_dynarr, &struct_detector_description),
+ − 187 { XD_END }
+ − 188 };
+ − 189
1204
+ − 190 static const struct sized_memory_description detector_dynarr_description = {
771
+ − 191 sizeof (detector_dynarr),
+ − 192 detector_dynarr_description_1
+ − 193 };
428
+ − 194
+ − 195 Lisp_Object Qcoding_systemp;
+ − 196
771
+ − 197 Lisp_Object Qraw_text;
428
+ − 198
+ − 199 Lisp_Object Qmnemonic, Qeol_type;
+ − 200 Lisp_Object Qcr, Qcrlf, Qlf;
+ − 201 Lisp_Object Qeol_cr, Qeol_crlf, Qeol_lf;
+ − 202 Lisp_Object Qpost_read_conversion;
+ − 203 Lisp_Object Qpre_write_conversion;
+ − 204
771
+ − 205 Lisp_Object Qtranslation_table_for_decode;
+ − 206 Lisp_Object Qtranslation_table_for_encode;
+ − 207 Lisp_Object Qsafe_chars;
+ − 208 Lisp_Object Qsafe_charsets;
+ − 209 Lisp_Object Qmime_charset;
+ − 210 Lisp_Object Qvalid_codes;
+ − 211
+ − 212 Lisp_Object Qno_conversion;
+ − 213 Lisp_Object Qconvert_eol;
440
+ − 214 Lisp_Object Qescape_quoted;
771
+ − 215 Lisp_Object Qencode, Qdecode;
+ − 216
+ − 217 Lisp_Object Qconvert_eol_lf, Qconvert_eol_cr, Qconvert_eol_crlf;
+ − 218 Lisp_Object Qconvert_eol_autodetect;
+ − 219
+ − 220 Lisp_Object Qnear_certainty, Qquite_probable, Qsomewhat_likely;
1494
+ − 221 Lisp_Object Qslightly_likely;
771
+ − 222 Lisp_Object Qas_likely_as_unlikely, Qsomewhat_unlikely, Qquite_improbable;
+ − 223 Lisp_Object Qnearly_impossible;
+ − 224
+ − 225 Lisp_Object Qdo_eol, Qdo_coding;
+ − 226
+ − 227 Lisp_Object Qcanonicalize_after_coding;
+ − 228
1347
+ − 229 Lisp_Object QScoding_system_cookie;
+ − 230
771
+ − 231 /* This is used to convert autodetected coding systems into existing
+ − 232 systems. For example, the chain undecided->convert-eol-autodetect may
+ − 233 have its separate parts detected as mswindows-multibyte and
+ − 234 convert-eol-crlf, and the result needs to be mapped to
+ − 235 mswindows-multibyte-dos. */
+ − 236 /* #### It's not clear we need this whole chain-canonicalize mechanism
+ − 237 any more. */
+ − 238 static Lisp_Object Vchain_canonicalize_hash_table;
+ − 239
+ − 240 #ifdef HAVE_ZLIB
+ − 241 Lisp_Object Qgzip;
428
+ − 242 #endif
771
+ − 243
2297
+ − 244 /* Maps symbols (coding system names) to either coding system objects or
+ − 245 (for aliases) other names. */
771
+ − 246 static Lisp_Object Vcoding_system_hash_table;
428
+ − 247
+ − 248 int enable_multibyte_characters;
+ − 249
+ − 250 EXFUN (Fcopy_coding_system, 2);
+ − 251
+ − 252
+ − 253 /************************************************************************/
771
+ − 254 /* Coding system object methods */
428
+ − 255 /************************************************************************/
+ − 256
+ − 257 static Lisp_Object
+ − 258 mark_coding_system (Lisp_Object obj)
+ − 259 {
+ − 260 Lisp_Coding_System *codesys = XCODING_SYSTEM (obj);
+ − 261
1204
+ − 262 #define MARKED_SLOT(x) mark_object (codesys->x);
+ − 263 #include "coding-system-slots.h"
771
+ − 264
+ − 265 MAYBE_CODESYSMETH (codesys, mark, (obj));
428
+ − 266
1204
+ − 267 return Qnil;
428
+ − 268 }
+ − 269
+ − 270 static void
771
+ − 271 print_coding_system_properties (Lisp_Object obj, Lisp_Object printcharfun)
+ − 272 {
+ − 273 Lisp_Coding_System *c = XCODING_SYSTEM (obj);
+ − 274 print_internal (c->methods->type, printcharfun, 1);
+ − 275 MAYBE_CODESYSMETH (c, print, (obj, printcharfun, 1));
+ − 276 if (CODING_SYSTEM_EOL_TYPE (c) != EOL_AUTODETECT)
+ − 277 write_fmt_string_lisp (printcharfun, " eol-type=%s",
+ − 278 1, Fcoding_system_property (obj, Qeol_type));
+ − 279 }
+ − 280
+ − 281 static void
428
+ − 282 print_coding_system (Lisp_Object obj, Lisp_Object printcharfun,
2286
+ − 283 int UNUSED (escapeflag))
428
+ − 284 {
+ − 285 Lisp_Coding_System *c = XCODING_SYSTEM (obj);
+ − 286 if (print_readably)
771
+ − 287 printing_unreadable_object
+ − 288 ("printing unreadable object #<coding-system 0x%x>", c->header.uid);
+ − 289
+ − 290 write_fmt_string_lisp (printcharfun, "#<coding-system %s ", 1, c->name);
+ − 291 print_coding_system_properties (obj, printcharfun);
826
+ − 292 write_c_string (printcharfun, ">");
428
+ − 293 }
+ − 294
771
+ − 295 /* Print an abbreviated version of a coding system (but still containing
+ − 296 all the information), for use within a coding system print method. */
+ − 297
+ − 298 static void
+ − 299 print_coding_system_in_print_method (Lisp_Object cs, Lisp_Object printcharfun,
2286
+ − 300 int UNUSED (escapeflag))
771
+ − 301 {
800
+ − 302 write_fmt_string_lisp (printcharfun, "%s[", 1, XCODING_SYSTEM_NAME (cs));
771
+ − 303 print_coding_system_properties (cs, printcharfun);
826
+ − 304 write_c_string (printcharfun, "]");
771
+ − 305 }
+ − 306
428
+ − 307 static void
+ − 308 finalize_coding_system (void *header, int for_disksave)
+ − 309 {
771
+ − 310 Lisp_Object cs = wrap_coding_system ((Lisp_Coding_System *) header);
428
+ − 311 /* Since coding systems never go away, this function is not
+ − 312 necessary. But it would be necessary if we changed things
+ − 313 so that coding systems could go away. */
+ − 314 if (!for_disksave) /* see comment in lstream.c */
771
+ − 315 MAYBE_XCODESYSMETH (cs, finalize, (cs));
+ − 316 }
+ − 317
+ − 318 static Bytecount
+ − 319 sizeof_coding_system (const void *header)
+ − 320 {
+ − 321 const Lisp_Coding_System *p = (const Lisp_Coding_System *) header;
+ − 322 return offsetof (Lisp_Coding_System, data) + p->methods->extra_data_size;
428
+ − 323 }
+ − 324
1204
+ − 325 static const struct memory_description coding_system_methods_description_1[]
771
+ − 326 = {
+ − 327 { XD_LISP_OBJECT,
+ − 328 offsetof (struct coding_system_methods, type) },
+ − 329 { XD_LISP_OBJECT,
+ − 330 offsetof (struct coding_system_methods, predicate_symbol) },
+ − 331 { XD_END }
+ − 332 };
+ − 333
1204
+ − 334 const struct sized_memory_description coding_system_methods_description = {
771
+ − 335 sizeof (struct coding_system_methods),
+ − 336 coding_system_methods_description_1
+ − 337 };
+ − 338
1204
+ − 339 static const struct sized_memory_description coding_system_extra_description_map[] =
+ − 340 {
+ − 341 { offsetof (Lisp_Coding_System, methods) },
+ − 342 { offsetof (struct coding_system_methods, extra_description) },
+ − 343 { -1 },
771
+ − 344 };
+ − 345
1204
+ − 346 static const struct memory_description coding_system_description[] =
428
+ − 347 {
2367
+ − 348 { XD_BLOCK_PTR, offsetof (Lisp_Coding_System, methods), 1,
771
+ − 349 &coding_system_methods_description },
1204
+ − 350 #define MARKED_SLOT(x) { XD_LISP_OBJECT, offsetof (Lisp_Coding_System, x) },
+ − 351 #define MARKED_SLOT_ARRAY(slot, size) \
+ − 352 { XD_LISP_OBJECT_ARRAY, offsetof (Lisp_Coding_System, slot), size },
+ − 353 #include "coding-system-slots.h"
2367
+ − 354 { XD_BLOCK_ARRAY, offsetof (Lisp_Coding_System, data), 1,
1204
+ − 355 coding_system_extra_description_map },
+ − 356 { XD_END }
771
+ − 357 };
+ − 358
1204
+ − 359 static const struct memory_description coding_system_empty_extra_description_1[] =
+ − 360 {
+ − 361 { XD_END }
+ − 362 };
+ − 363
+ − 364 const struct sized_memory_description coding_system_empty_extra_description = {
+ − 365 0, coding_system_empty_extra_description_1
+ − 366 };
+ − 367
934
+ − 368 DEFINE_LRECORD_SEQUENCE_IMPLEMENTATION ("coding-system", coding_system,
+ − 369 1, /*dumpable-flag*/
+ − 370 mark_coding_system,
+ − 371 print_coding_system,
+ − 372 finalize_coding_system,
+ − 373 0, 0, coding_system_description,
+ − 374 sizeof_coding_system,
+ − 375 Lisp_Coding_System);
771
+ − 376
+ − 377 /************************************************************************/
+ − 378 /* Creating coding systems */
+ − 379 /************************************************************************/
+ − 380
+ − 381 static struct coding_system_methods *
+ − 382 decode_coding_system_type (Lisp_Object type, Error_Behavior errb)
428
+ − 383 {
771
+ − 384 int i;
+ − 385
+ − 386 for (i = 0; i < Dynarr_length (the_coding_system_type_entry_dynarr); i++)
428
+ − 387 {
771
+ − 388 if (EQ (type,
+ − 389 Dynarr_at (the_coding_system_type_entry_dynarr, i).meths->type))
+ − 390 return Dynarr_at (the_coding_system_type_entry_dynarr, i).meths;
428
+ − 391 }
771
+ − 392
+ − 393 maybe_invalid_constant ("Invalid coding system type", type,
+ − 394 Qcoding_system, errb);
+ − 395
+ − 396 return 0;
428
+ − 397 }
+ − 398
771
+ − 399 static int
+ − 400 valid_coding_system_type_p (Lisp_Object type)
428
+ − 401 {
771
+ − 402 return decode_coding_system_type (type, ERROR_ME_NOT) != 0;
+ − 403 }
+ − 404
+ − 405 DEFUN ("valid-coding-system-type-p", Fvalid_coding_system_type_p, 1, 1, 0, /*
+ − 406 Given a CODING-SYSTEM-TYPE, return non-nil if it is valid.
+ − 407 Valid types depend on how XEmacs was compiled but may include
+ − 408 'undecided, 'chain, 'integer, 'ccl, 'iso2022, 'big5, 'shift-jis,
+ − 409 'utf-16, 'ucs-4, 'utf-8, etc.
+ − 410 */
+ − 411 (coding_system_type))
+ − 412 {
+ − 413 return valid_coding_system_type_p (coding_system_type) ? Qt : Qnil;
+ − 414 }
+ − 415
+ − 416 DEFUN ("coding-system-type-list", Fcoding_system_type_list, 0, 0, 0, /*
+ − 417 Return a list of valid coding system types.
+ − 418 */
+ − 419 ())
+ − 420 {
+ − 421 return Fcopy_sequence (Vcoding_system_type_list);
+ − 422 }
+ − 423
+ − 424 void
+ − 425 add_entry_to_coding_system_type_list (struct coding_system_methods *meths)
+ − 426 {
+ − 427 struct coding_system_type_entry entry;
+ − 428
+ − 429 entry.meths = meths;
+ − 430 Dynarr_add (the_coding_system_type_entry_dynarr, entry);
+ − 431 Vcoding_system_type_list = Fcons (meths->type, Vcoding_system_type_list);
428
+ − 432 }
+ − 433
+ − 434 DEFUN ("coding-system-p", Fcoding_system_p, 1, 1, 0, /*
+ − 435 Return t if OBJECT is a coding system.
+ − 436 A coding system is an object that defines how text containing multiple
+ − 437 character sets is encoded into a stream of (typically 8-bit) bytes.
+ − 438 The coding system is used to decode the stream into a series of
+ − 439 characters (which may be from multiple charsets) when the text is read
+ − 440 from a file or process, and is used to encode the text back into the
+ − 441 same format when it is written out to a file or process.
+ − 442
+ − 443 For example, many ISO2022-compliant coding systems (such as Compound
+ − 444 Text, which is used for inter-client data under the X Window System)
+ − 445 use escape sequences to switch between different charsets -- Japanese
+ − 446 Kanji, for example, is invoked with "ESC $ ( B"; ASCII is invoked
+ − 447 with "ESC ( B"; and Cyrillic is invoked with "ESC - L". See
+ − 448 `make-coding-system' for more information.
+ − 449
+ − 450 Coding systems are normally identified using a symbol, and the
+ − 451 symbol is accepted in place of the actual coding system object whenever
+ − 452 a coding system is called for. (This is similar to how faces work.)
+ − 453 */
+ − 454 (object))
+ − 455 {
+ − 456 return CODING_SYSTEMP (object) ? Qt : Qnil;
+ − 457 }
+ − 458
+ − 459 DEFUN ("find-coding-system", Ffind_coding_system, 1, 1, 0, /*
+ − 460 Retrieve the coding system of the given name.
+ − 461
+ − 462 If CODING-SYSTEM-OR-NAME is a coding-system object, it is simply
+ − 463 returned. Otherwise, CODING-SYSTEM-OR-NAME should be a symbol.
+ − 464 If there is no such coding system, nil is returned. Otherwise the
+ − 465 associated coding system object is returned.
+ − 466 */
+ − 467 (coding_system_or_name))
+ − 468 {
+ − 469 if (NILP (coding_system_or_name))
+ − 470 coding_system_or_name = Qbinary;
440
+ − 471 else if (CODING_SYSTEMP (coding_system_or_name))
+ − 472 return coding_system_or_name;
428
+ − 473 else
+ − 474 CHECK_SYMBOL (coding_system_or_name);
+ − 475
440
+ − 476 while (1)
+ − 477 {
+ − 478 coding_system_or_name =
+ − 479 Fgethash (coding_system_or_name, Vcoding_system_hash_table, Qnil);
+ − 480
771
+ − 481 if (CODING_SYSTEMP (coding_system_or_name)
+ − 482 || NILP (coding_system_or_name))
440
+ − 483 return coding_system_or_name;
+ − 484 }
428
+ − 485 }
+ − 486
+ − 487 DEFUN ("get-coding-system", Fget_coding_system, 1, 1, 0, /*
+ − 488 Retrieve the coding system of the given name.
+ − 489 Same as `find-coding-system' except that if there is no such
+ − 490 coding system, an error is signaled instead of returning nil.
+ − 491 */
+ − 492 (name))
+ − 493 {
+ − 494 Lisp_Object coding_system = Ffind_coding_system (name);
+ − 495
+ − 496 if (NILP (coding_system))
563
+ − 497 invalid_argument ("No such coding system", name);
428
+ − 498 return coding_system;
+ − 499 }
+ − 500
771
+ − 501 int
+ − 502 coding_system_is_binary (Lisp_Object coding_system)
+ − 503 {
+ − 504 Lisp_Coding_System *cs = XCODING_SYSTEM (coding_system);
+ − 505 return
+ − 506 (EQ (CODING_SYSTEM_TYPE (cs), Qno_conversion) &&
+ − 507 CODING_SYSTEM_EOL_TYPE (cs) == EOL_LF &&
+ − 508 EQ (CODING_SYSTEM_POST_READ_CONVERSION (cs), Qnil) &&
+ − 509 EQ (CODING_SYSTEM_PRE_WRITE_CONVERSION (cs), Qnil));
+ − 510 }
+ − 511
+ − 512 static Lisp_Object
+ − 513 coding_system_real_canonical (Lisp_Object cs)
+ − 514 {
+ − 515 if (!NILP (XCODING_SYSTEM_CANONICAL (cs)))
+ − 516 return XCODING_SYSTEM_CANONICAL (cs);
+ − 517 return cs;
+ − 518 }
+ − 519
+ − 520 /* Return true if coding system is of the "standard" type that decodes
+ − 521 bytes into characters (suitable for decoding a text file). */
+ − 522 int
+ − 523 coding_system_is_for_text_file (Lisp_Object coding_system)
+ − 524 {
+ − 525 return (XCODESYSMETH_OR_GIVEN
+ − 526 (coding_system, conversion_end_type,
+ − 527 (coding_system_real_canonical (coding_system)),
+ − 528 DECODES_BYTE_TO_CHARACTER) ==
+ − 529 DECODES_BYTE_TO_CHARACTER);
+ − 530 }
+ − 531
+ − 532 static int
+ − 533 decoding_source_sink_type_is_char (Lisp_Object cs, enum source_or_sink sex)
+ − 534 {
+ − 535 enum source_sink_type type =
+ − 536 XCODESYSMETH_OR_GIVEN (cs, conversion_end_type,
+ − 537 (coding_system_real_canonical (cs)),
+ − 538 DECODES_BYTE_TO_CHARACTER);
+ − 539 if (sex == CODING_SOURCE)
+ − 540 return (type == DECODES_CHARACTER_TO_CHARACTER ||
+ − 541 type == DECODES_CHARACTER_TO_BYTE);
+ − 542 else
+ − 543 return (type == DECODES_CHARACTER_TO_CHARACTER ||
+ − 544 type == DECODES_BYTE_TO_CHARACTER);
+ − 545 }
+ − 546
+ − 547 static int
+ − 548 encoding_source_sink_type_is_char (Lisp_Object cs, enum source_or_sink sex)
+ − 549 {
+ − 550 return decoding_source_sink_type_is_char (cs,
+ − 551 /* Sex change */
+ − 552 sex == CODING_SOURCE ?
+ − 553 CODING_SINK : CODING_SOURCE);
+ − 554 }
+ − 555
+ − 556 /* Like Ffind_coding_system() but check that the coding system is of the
+ − 557 "standard" type that decodes bytes into characters (suitable for
+ − 558 decoding a text file), and if not, returns an appropriate wrapper that
+ − 559 does. Also, if EOL_WRAP is non-zero, check whether this coding system
+ − 560 wants EOL auto-detection, and if so, wrap with a convert-eol coding
+ − 561 system to do this. */
+ − 562
+ − 563 Lisp_Object
+ − 564 find_coding_system_for_text_file (Lisp_Object name, int eol_wrap)
+ − 565 {
+ − 566 Lisp_Object coding_system = Ffind_coding_system (name);
+ − 567 Lisp_Object wrapper = coding_system;
+ − 568
+ − 569 if (NILP (coding_system))
+ − 570 return Qnil;
+ − 571 if (!coding_system_is_for_text_file (coding_system))
+ − 572 {
+ − 573 wrapper = XCODING_SYSTEM_TEXT_FILE_WRAPPER (coding_system);
+ − 574 if (NILP (wrapper))
+ − 575 {
+ − 576 Lisp_Object chain;
+ − 577 if (!decoding_source_sink_type_is_char (coding_system, CODING_SINK))
+ − 578 chain = list2 (coding_system, Qbinary);
+ − 579 else
+ − 580 chain = list1 (coding_system);
+ − 581 if (decoding_source_sink_type_is_char (coding_system, CODING_SOURCE))
+ − 582 chain = Fcons (Qbinary, chain);
+ − 583 wrapper =
+ − 584 make_internal_coding_system
+ − 585 (coding_system,
+ − 586 "internal-text-file-wrapper",
+ − 587 Qchain,
+ − 588 Qunbound, list4 (Qchain, chain,
+ − 589 Qcanonicalize_after_coding, coding_system));
+ − 590 XCODING_SYSTEM_TEXT_FILE_WRAPPER (coding_system) = wrapper;
+ − 591 }
+ − 592 }
+ − 593
+ − 594 if (!eol_wrap || XCODING_SYSTEM_EOL_TYPE (coding_system) != EOL_AUTODETECT)
+ − 595 return wrapper;
+ − 596
+ − 597 coding_system = wrapper;
+ − 598 wrapper = XCODING_SYSTEM_AUTO_EOL_WRAPPER (coding_system);
+ − 599 if (!NILP (wrapper))
+ − 600 return wrapper;
+ − 601 wrapper =
+ − 602 make_internal_coding_system
+ − 603 (coding_system,
+ − 604 "internal-auto-eol-wrapper",
+ − 605 Qundecided, Qunbound,
+ − 606 list4 (Qcoding_system, coding_system,
+ − 607 Qdo_eol, Qt));
+ − 608 XCODING_SYSTEM_AUTO_EOL_WRAPPER (coding_system) = wrapper;
+ − 609 return wrapper;
+ − 610 }
+ − 611
+ − 612 /* Like Fget_coding_system() but verify that the coding system is of the
+ − 613 "standard" type that decodes bytes into characters (suitable for
+ − 614 decoding a text file), and if not, returns an appropriate wrapper that
+ − 615 does. Also, if EOL_WRAP is non-zero, check whether this coding system
+ − 616 wants EOL auto-detection, and if so, wrap with a convert-eol coding
+ − 617 system to do this. */
+ − 618
+ − 619 Lisp_Object
+ − 620 get_coding_system_for_text_file (Lisp_Object name, int eol_wrap)
+ − 621 {
+ − 622 Lisp_Object coding_system = find_coding_system_for_text_file (name,
+ − 623 eol_wrap);
+ − 624 if (NILP (coding_system))
+ − 625 invalid_argument ("No such coding system", name);
+ − 626 return coding_system;
+ − 627 }
+ − 628
+ − 629 /* We store the coding systems in hash tables with the names as the
+ − 630 key and the actual coding system object as the value. Occasionally
+ − 631 we need to use them in a list format. These routines provide us
+ − 632 with that. */
428
+ − 633 struct coding_system_list_closure
+ − 634 {
+ − 635 Lisp_Object *coding_system_list;
771
+ − 636 int normal;
+ − 637 int internal;
428
+ − 638 };
+ − 639
+ − 640 static int
2286
+ − 641 add_coding_system_to_list_mapper (Lisp_Object key, Lisp_Object UNUSED (value),
428
+ − 642 void *coding_system_list_closure)
+ − 643 {
+ − 644 /* This function can GC */
+ − 645 struct coding_system_list_closure *cscl =
+ − 646 (struct coding_system_list_closure *) coding_system_list_closure;
+ − 647 Lisp_Object *coding_system_list = cscl->coding_system_list;
+ − 648
771
+ − 649 /* We can't just use VALUE because KEY might be an alias, and we need
+ − 650 the real coding system object. */
+ − 651 if (XCODING_SYSTEM (Ffind_coding_system (key))->internal_p ?
+ − 652 cscl->internal : cscl->normal)
+ − 653 *coding_system_list = Fcons (key, *coding_system_list);
428
+ − 654 return 0;
+ − 655 }
+ − 656
2297
+ − 657 /* #### should we specify a conventional for "all coding systems"? */
771
+ − 658 DEFUN ("coding-system-list", Fcoding_system_list, 0, 1, 0, /*
428
+ − 659 Return a list of the names of all defined coding systems.
771
+ − 660 If INTERNAL is nil, only the normal (non-internal) coding systems are
+ − 661 included. (Internal coding systems are created for various internal
+ − 662 purposes, such as implementing EOL types of CRLF and CR; generally, you do
+ − 663 not want to see these.) If it is t, only the internal coding systems are
+ − 664 included. If it is any other non-nil value both normal and internal are
+ − 665 included.
428
+ − 666 */
771
+ − 667 (internal))
428
+ − 668 {
+ − 669 Lisp_Object coding_system_list = Qnil;
+ − 670 struct gcpro gcpro1;
+ − 671 struct coding_system_list_closure coding_system_list_closure;
+ − 672
+ − 673 GCPRO1 (coding_system_list);
+ − 674 coding_system_list_closure.coding_system_list = &coding_system_list;
771
+ − 675 coding_system_list_closure.normal = !EQ (internal, Qt);
+ − 676 coding_system_list_closure.internal = !NILP (internal);
428
+ − 677 elisp_maphash (add_coding_system_to_list_mapper, Vcoding_system_hash_table,
+ − 678 &coding_system_list_closure);
+ − 679 UNGCPRO;
+ − 680
+ − 681 return coding_system_list;
+ − 682 }
+ − 683
+ − 684 DEFUN ("coding-system-name", Fcoding_system_name, 1, 1, 0, /*
+ − 685 Return the name of the given coding system.
+ − 686 */
+ − 687 (coding_system))
+ − 688 {
+ − 689 coding_system = Fget_coding_system (coding_system);
+ − 690 return XCODING_SYSTEM_NAME (coding_system);
+ − 691 }
+ − 692
+ − 693 static Lisp_Coding_System *
771
+ − 694 allocate_coding_system (struct coding_system_methods *codesys_meths,
+ − 695 Bytecount data_size,
+ − 696 Lisp_Object name)
428
+ − 697 {
771
+ − 698 Bytecount total_size = offsetof (Lisp_Coding_System, data) + data_size;
428
+ − 699 Lisp_Coding_System *codesys =
1204
+ − 700 (Lisp_Coding_System *) basic_alloc_lcrecord (total_size,
+ − 701 &lrecord_coding_system);
+ − 702
771
+ − 703 codesys->methods = codesys_meths;
1204
+ − 704 #define MARKED_SLOT(x) codesys->x = Qnil;
+ − 705 #include "coding-system-slots.h"
+ − 706
771
+ − 707 CODING_SYSTEM_EOL_TYPE (codesys) = EOL_LF;
+ − 708 CODING_SYSTEM_NAME (codesys) = name;
+ − 709
+ − 710 MAYBE_CODESYSMETH (codesys, init, (wrap_coding_system (codesys)));
428
+ − 711
+ − 712 return codesys;
+ − 713 }
+ − 714
771
+ − 715 static enum eol_type
+ − 716 symbol_to_eol_type (Lisp_Object symbol)
+ − 717 {
+ − 718 CHECK_SYMBOL (symbol);
+ − 719 if (NILP (symbol)) return EOL_AUTODETECT;
+ − 720 if (EQ (symbol, Qlf)) return EOL_LF;
+ − 721 if (EQ (symbol, Qcrlf)) return EOL_CRLF;
+ − 722 if (EQ (symbol, Qcr)) return EOL_CR;
+ − 723
+ − 724 invalid_constant ("Unrecognized eol type", symbol);
1204
+ − 725 RETURN_NOT_REACHED (EOL_AUTODETECT);
771
+ − 726 }
+ − 727
+ − 728 static Lisp_Object
+ − 729 eol_type_to_symbol (enum eol_type type)
+ − 730 {
+ − 731 switch (type)
+ − 732 {
+ − 733 default: abort ();
+ − 734 case EOL_LF: return Qlf;
+ − 735 case EOL_CRLF: return Qcrlf;
+ − 736 case EOL_CR: return Qcr;
+ − 737 case EOL_AUTODETECT: return Qnil;
+ − 738 }
+ − 739 }
+ − 740
+ − 741 struct subsidiary_type
+ − 742 {
2367
+ − 743 Ascbyte *extension;
+ − 744 Ascbyte *mnemonic_ext;
771
+ − 745 enum eol_type eol;
+ − 746 };
+ − 747
+ − 748 static struct subsidiary_type coding_subsidiary_list[] =
+ − 749 { { "-unix", "", EOL_LF },
+ − 750 { "-dos", ":T", EOL_CRLF },
+ − 751 { "-mac", ":t", EOL_CR } };
+ − 752
+ − 753 /* kludge */
428
+ − 754 static void
771
+ − 755 setup_eol_coding_systems (Lisp_Object codesys)
428
+ − 756 {
793
+ − 757 int len = XSTRING_LENGTH (XSYMBOL (XCODING_SYSTEM_NAME (codesys))->name);
2367
+ − 758 Ibyte *codesys_name = alloca_ibytes (len + 7);
771
+ − 759 int mlen = -1;
867
+ − 760 Ibyte *codesys_mnemonic = 0;
771
+ − 761 Lisp_Object codesys_name_sym, sub_codesys;
+ − 762 int i;
+ − 763
+ − 764 memcpy (codesys_name,
793
+ − 765 XSTRING_DATA (XSYMBOL (XCODING_SYSTEM_NAME (codesys))->name), len);
771
+ − 766
+ − 767 if (STRINGP (XCODING_SYSTEM_MNEMONIC (codesys)))
428
+ − 768 {
771
+ − 769 mlen = XSTRING_LENGTH (XCODING_SYSTEM_MNEMONIC (codesys));
2367
+ − 770 codesys_mnemonic = alloca_ibytes (mlen + 7);
771
+ − 771 memcpy (codesys_mnemonic,
+ − 772 XSTRING_DATA (XCODING_SYSTEM_MNEMONIC (codesys)), mlen);
+ − 773 }
+ − 774
+ − 775 /* Create three "subsidiary" coding systems, decoding data encoded using
+ − 776 each of the three EOL types. We do this for each subsidiary by
+ − 777 copying the original coding system, setting the EOL type
+ − 778 appropriately, and setting the CANONICAL member of the new coding
+ − 779 system to be a chain consisting of the original coding system followed
+ − 780 by a convert-eol coding system to do the EOL decoding. For EOL type
+ − 781 LF, however, we don't need any decoding, so we skip creating a
+ − 782 CANONICAL.
+ − 783
+ − 784 If the original coding system is not a text-type coding system
+ − 785 (decodes byte->char), we need to coerce it to one by the appropriate
+ − 786 wrapping in CANONICAL. */
+ − 787
+ − 788 for (i = 0; i < countof (coding_subsidiary_list); i++)
+ − 789 {
2367
+ − 790 Ascbyte *extension = coding_subsidiary_list[i].extension;
+ − 791 Ascbyte *mnemonic_ext = coding_subsidiary_list[i].mnemonic_ext;
771
+ − 792 enum eol_type eol = coding_subsidiary_list[i].eol;
+ − 793
2367
+ − 794 qxestrcpy_ascii (codesys_name + len, extension);
771
+ − 795 codesys_name_sym = intern_int (codesys_name);
+ − 796 if (mlen != -1)
2367
+ − 797 qxestrcpy_ascii (codesys_mnemonic + mlen, mnemonic_ext);
771
+ − 798
+ − 799 sub_codesys = Fcopy_coding_system (codesys, codesys_name_sym);
+ − 800 if (mlen != -1)
+ − 801 XCODING_SYSTEM_MNEMONIC (sub_codesys) =
+ − 802 build_intstring (codesys_mnemonic);
+ − 803
+ − 804 if (eol != EOL_LF)
+ − 805 {
+ − 806 Lisp_Object chain = list2 (get_coding_system_for_text_file
+ − 807 (codesys, 0),
+ − 808 eol == EOL_CR ? Qconvert_eol_cr :
+ − 809 Qconvert_eol_crlf);
+ − 810 Lisp_Object canon =
+ − 811 make_internal_coding_system
+ − 812 (sub_codesys, "internal-subsidiary-eol-wrapper",
+ − 813 Qchain, Qunbound,
+ − 814 mlen != -1 ?
+ − 815 list6 (Qmnemonic, build_intstring (codesys_mnemonic),
+ − 816 Qchain, chain,
+ − 817 Qcanonicalize_after_coding, sub_codesys) :
+ − 818 list4 (Qchain, chain,
+ − 819 Qcanonicalize_after_coding, sub_codesys));
+ − 820 XCODING_SYSTEM_CANONICAL (sub_codesys) = canon;
+ − 821 }
+ − 822 XCODING_SYSTEM_EOL_TYPE (sub_codesys) = eol;
+ − 823 XCODING_SYSTEM_SUBSIDIARY_PARENT (sub_codesys) = codesys;
+ − 824 XCODING_SYSTEM (codesys)->eol[eol] = sub_codesys;
428
+ − 825 }
+ − 826 }
+ − 827
771
+ − 828 /* Basic function to create new coding systems. For `make-coding-system',
+ − 829 NAME-OR-EXISTING is the NAME argument, PREFIX is null, and TYPE,
+ − 830 DESCRIPTION, and PROPS are the same. All created coding systems are put
+ − 831 in a hash table indexed by NAME.
+ − 832
+ − 833 If PREFIX is a string, NAME-OR-EXISTING should specify an existing
+ − 834 coding system (or nil), and an internal coding system will be created.
+ − 835 The name of the coding system will be constructed by combining PREFIX
+ − 836 with the name of the existing coding system (if given), and a number
+ − 837 will be appended to insure uniqueness. In such a case, if Qunbound is
+ − 838 given for DESCRIPTION, the description gets created based on the
+ − 839 generated name. Also, if no mnemonic is given in the properties list, a
+ − 840 mnemonic is created based on the generated name.
+ − 841
+ − 842 For internal coding systems, the coding system is marked as internal
+ − 843 (see `coding-system-list'), and no subsidiaries will be created or
+ − 844 eol-wrapping will happen. Otherwise:
+ − 845
+ − 846 -- if the eol-type property is `lf' or t, the coding system is merely
+ − 847 created and returned. (For t, the coding system will be wrapped with
+ − 848 an EOL autodetector when it's used to read a file.)
+ − 849
+ − 850 -- if eol-type is `crlf' or `cr', after the coding system object is
+ − 851 created, it will be wrapped in a chain with the appropriate
+ − 852 convert-eol coding system (either `convert-eol-crlf' or
+ − 853 `convert-eol-cr'), so that CRLF->LF or CR->LF conversion is done at
+ − 854 decoding time, and the opposite at encoding time. The resulting
+ − 855 chain becomes the CANONICAL field of the coding system object.
+ − 856
+ − 857 -- if eol-type is nil or omitted, "subsidiaries" are generated: Three
+ − 858 coding systems where the original coding system (before wrapping with
+ − 859 convert-eol-autodetect) is either unwrapped or wrapped with
+ − 860 convert-eol-crlf or convert-eol-cr, respectively, so that coding systems
+ − 861 to handle LF, CRLF, and CR end-of-line indicators are created. (This
+ − 862 crazy crap is based on existing behavior in other Mule versions,
+ − 863 including FSF Emacs.)
+ − 864 */
428
+ − 865
+ − 866 static Lisp_Object
2367
+ − 867 make_coding_system_1 (Lisp_Object name_or_existing, Ascbyte *prefix,
771
+ − 868 Lisp_Object type, Lisp_Object description,
+ − 869 Lisp_Object props)
428
+ − 870 {
771
+ − 871 Lisp_Coding_System *cs;
+ − 872 int need_to_setup_eol_systems = 1;
+ − 873 enum eol_type eol_wrapper = EOL_AUTODETECT;
+ − 874 struct coding_system_methods *meths;
+ − 875 Lisp_Object csobj;
+ − 876 Lisp_Object defmnem = Qnil;
+ − 877
+ − 878 if (NILP (type))
+ − 879 type = Qundecided;
+ − 880 meths = decode_coding_system_type (type, ERROR_ME);
+ − 881
+ − 882 if (prefix)
428
+ − 883 {
867
+ − 884 Ibyte *newname =
771
+ − 885 emacs_sprintf_malloc (NULL, "%s-%s-%d",
+ − 886 prefix,
867
+ − 887 NILP (name_or_existing) ? (Ibyte *) "nil" :
771
+ − 888 XSTRING_DATA (Fsymbol_name (XCODING_SYSTEM_NAME
+ − 889 (name_or_existing))),
+ − 890 ++coding_system_tick);
+ − 891 name_or_existing = intern_int (newname);
1726
+ − 892 xfree (newname, Ibyte *);
771
+ − 893
+ − 894 if (UNBOUNDP (description))
+ − 895 {
+ − 896 newname =
+ − 897 emacs_sprintf_malloc
+ − 898 (NULL, "For Internal Use (%s)",
+ − 899 XSTRING_DATA (Fsymbol_name (name_or_existing)));
+ − 900 description = build_intstring (newname);
1726
+ − 901 xfree (newname, Ibyte *);
771
+ − 902 }
+ − 903
+ − 904 newname = emacs_sprintf_malloc (NULL, "Int%d", coding_system_tick);
+ − 905 defmnem = build_intstring (newname);
1726
+ − 906 xfree (newname, Ibyte *);
428
+ − 907 }
771
+ − 908 else
+ − 909 CHECK_SYMBOL (name_or_existing);
+ − 910
+ − 911 if (!NILP (Ffind_coding_system (name_or_existing)))
+ − 912 invalid_operation ("Cannot redefine existing coding system",
+ − 913 name_or_existing);
+ − 914
+ − 915 cs = allocate_coding_system (meths, meths->extra_data_size,
+ − 916 name_or_existing);
793
+ − 917 csobj = wrap_coding_system (cs);
771
+ − 918
+ − 919 cs->internal_p = !!prefix;
+ − 920
+ − 921 if (NILP (description))
+ − 922 description = build_string ("");
+ − 923 else
+ − 924 CHECK_STRING (description);
+ − 925 CODING_SYSTEM_DESCRIPTION (cs) = description;
+ − 926
+ − 927 if (!NILP (defmnem))
+ − 928 CODING_SYSTEM_MNEMONIC (cs) = defmnem;
+ − 929
+ − 930 {
+ − 931 EXTERNAL_PROPERTY_LIST_LOOP_3 (key, value, props)
+ − 932 {
+ − 933 int recognized = 1;
+ − 934
+ − 935 if (EQ (key, Qmnemonic))
+ − 936 {
+ − 937 if (!NILP (value))
+ − 938 CHECK_STRING (value);
+ − 939 CODING_SYSTEM_MNEMONIC (cs) = value;
+ − 940 }
+ − 941
+ − 942 else if (EQ (key, Qdocumentation))
+ − 943 {
+ − 944 if (!NILP (value))
+ − 945 CHECK_STRING (value);
+ − 946 CODING_SYSTEM_DOCUMENTATION (cs) = value;
+ − 947 }
+ − 948
+ − 949 else if (EQ (key, Qeol_type))
+ − 950 {
+ − 951 need_to_setup_eol_systems = NILP (value);
+ − 952 if (EQ (value, Qt))
+ − 953 value = Qnil;
+ − 954 eol_wrapper = symbol_to_eol_type (value);
+ − 955 }
+ − 956
+ − 957 else if (EQ (key, Qpost_read_conversion))
+ − 958 CODING_SYSTEM_POST_READ_CONVERSION (cs) = value;
+ − 959 else if (EQ (key, Qpre_write_conversion))
+ − 960 CODING_SYSTEM_PRE_WRITE_CONVERSION (cs) = value;
+ − 961 /* FSF compatibility */
+ − 962 else if (EQ (key, Qtranslation_table_for_decode))
+ − 963 ;
+ − 964 else if (EQ (key, Qtranslation_table_for_encode))
+ − 965 ;
+ − 966 else if (EQ (key, Qsafe_chars))
+ − 967 ;
+ − 968 else if (EQ (key, Qsafe_charsets))
+ − 969 ;
+ − 970 else if (EQ (key, Qmime_charset))
+ − 971 ;
+ − 972 else if (EQ (key, Qvalid_codes))
+ − 973 ;
+ − 974 else
+ − 975 recognized = CODESYSMETH_OR_GIVEN (cs, putprop,
+ − 976 (csobj, key, value), 0);
+ − 977
+ − 978 if (!recognized)
+ − 979 invalid_constant ("Unrecognized property", key);
+ − 980 }
+ − 981 }
+ − 982
+ − 983 {
+ − 984 XCODING_SYSTEM_CANONICAL (csobj) =
+ − 985 CODESYSMETH_OR_GIVEN (cs, canonicalize, (csobj), Qnil);
+ − 986 XCODING_SYSTEM_EOL_TYPE (csobj) = EOL_AUTODETECT; /* for copy-coding-system
+ − 987 below */
+ − 988
+ − 989 if (need_to_setup_eol_systems && !cs->internal_p)
+ − 990 setup_eol_coding_systems (csobj);
+ − 991 else if (eol_wrapper == EOL_CR || eol_wrapper == EOL_CRLF)
+ − 992 {
+ − 993 /* If a specific eol-type (other than LF) was specified, we handle
+ − 994 this by converting the coding system into a chain that wraps the
+ − 995 coding system along with a convert-eol system after it, in
+ − 996 exactly that same switcheroo fashion that the normal
+ − 997 canonicalize method works -- BUT we will run into a problem if
+ − 998 we do it the obvious way, because when `chain' creates its
+ − 999 substreams, the substream containing the coding system we're
+ − 1000 creating will have canonicalization expansion done on it,
+ − 1001 leading to infinite recursion. So we have to generate a new,
+ − 1002 internal coding system with the previous value of CANONICAL. */
867
+ − 1003 Ibyte *newname =
771
+ − 1004 emacs_sprintf_malloc
+ − 1005 (NULL, "internal-eol-copy-%s-%d",
+ − 1006 XSTRING_DATA (Fsymbol_name (name_or_existing)),
+ − 1007 ++coding_system_tick);
+ − 1008 Lisp_Object newnamesym = intern_int (newname);
+ − 1009 Lisp_Object copied = Fcopy_coding_system (csobj, newnamesym);
1726
+ − 1010 xfree (newname, Ibyte *);
771
+ − 1011
+ − 1012 XCODING_SYSTEM_CANONICAL (csobj) =
+ − 1013 make_internal_coding_system
+ − 1014 (csobj,
+ − 1015 "internal-eol-wrapper",
+ − 1016 Qchain, Qunbound,
+ − 1017 list4 (Qchain,
+ − 1018 list2 (copied,
+ − 1019 eol_wrapper == EOL_CR ?
+ − 1020 Qconvert_eol_cr :
+ − 1021 Qconvert_eol_crlf),
+ − 1022 Qcanonicalize_after_coding,
+ − 1023 csobj));
+ − 1024 }
+ − 1025 XCODING_SYSTEM_EOL_TYPE (csobj) = eol_wrapper;
+ − 1026 }
+ − 1027
+ − 1028 Fputhash (name_or_existing, csobj, Vcoding_system_hash_table);
+ − 1029
+ − 1030 return csobj;
428
+ − 1031 }
+ − 1032
771
+ − 1033 Lisp_Object
2367
+ − 1034 make_internal_coding_system (Lisp_Object existing, Ascbyte *prefix,
771
+ − 1035 Lisp_Object type, Lisp_Object description,
+ − 1036 Lisp_Object props)
+ − 1037 {
+ − 1038 return make_coding_system_1 (existing, prefix, type, description, props);
+ − 1039 }
428
+ − 1040
+ − 1041 DEFUN ("make-coding-system", Fmake_coding_system, 2, 4, 0, /*
+ − 1042 Register symbol NAME as a coding system.
+ − 1043
+ − 1044 TYPE describes the conversion method used and should be one of
+ − 1045
+ − 1046 nil or 'undecided
+ − 1047 Automatic conversion. XEmacs attempts to detect the coding system
+ − 1048 used in the file.
771
+ − 1049 'chain
+ − 1050 Chain two or more coding systems together to make a combination coding
+ − 1051 system.
428
+ − 1052 'no-conversion
+ − 1053 No conversion. Use this for binary files and such. On output,
+ − 1054 graphic characters that are not in ASCII or Latin-1 will be
+ − 1055 replaced by a ?. (For a no-conversion-encoded buffer, these
+ − 1056 characters will only be present if you explicitly insert them.)
771
+ − 1057 'convert-eol
+ − 1058 Convert CRLF sequences or CR to LF.
428
+ − 1059 'shift-jis
+ − 1060 Shift-JIS (a Japanese encoding commonly used in PC operating systems).
771
+ − 1061 'unicode
+ − 1062 Any Unicode encoding (UCS-4, UTF-8, UTF-16, etc.).
+ − 1063 'mswindows-unicode-to-multibyte
+ − 1064 (MS Windows only) Converts from Windows Unicode to Windows Multibyte
+ − 1065 (any code page encoding) upon encoding, and the other way upon decoding.
+ − 1066 'mswindows-multibyte
+ − 1067 Converts to or from Windows Multibyte (any code page encoding).
+ − 1068 This is resolved into a chain of `mswindows-unicode' and
+ − 1069 `mswindows-unicode-to-multibyte'.
428
+ − 1070 'iso2022
+ − 1071 Any ISO2022-compliant encoding. Among other things, this includes
+ − 1072 JIS (the Japanese encoding commonly used for e-mail), EUC (the
+ − 1073 standard Unix encoding for Japanese and other languages), and
+ − 1074 Compound Text (the encoding used in X11). You can specify more
442
+ − 1075 specific information about the conversion with the PROPS argument.
428
+ − 1076 'big5
+ − 1077 Big5 (the encoding commonly used for Taiwanese).
+ − 1078 'ccl
+ − 1079 The conversion is performed using a user-written pseudo-code
+ − 1080 program. CCL (Code Conversion Language) is the name of this
+ − 1081 pseudo-code.
771
+ − 1082 'gzip
+ − 1083 GZIP compression format.
428
+ − 1084 'internal
+ − 1085 Write out or read in the raw contents of the memory representing
+ − 1086 the buffer's text. This is primarily useful for debugging
+ − 1087 purposes, and is only enabled when XEmacs has been compiled with
+ − 1088 DEBUG_XEMACS defined (via the --debug configure option).
+ − 1089 WARNING: Reading in a file using 'internal conversion can result
+ − 1090 in an internal inconsistency in the memory representing a
+ − 1091 buffer's text, which will produce unpredictable results and may
+ − 1092 cause XEmacs to crash. Under normal circumstances you should
+ − 1093 never use 'internal conversion.
+ − 1094
771
+ − 1095 DESCRIPTION is a short English phrase describing the coding system,
+ − 1096 suitable for use as a menu item. (See also the `documentation' property
+ − 1097 below.)
428
+ − 1098
+ − 1099 PROPS is a property list, describing the specific nature of the
+ − 1100 character set. Recognized properties are:
+ − 1101
+ − 1102 'mnemonic
+ − 1103 String to be displayed in the modeline when this coding system is
+ − 1104 active.
+ − 1105
771
+ − 1106 'documentation
+ − 1107 Detailed documentation on the coding system.
+ − 1108
428
+ − 1109 'eol-type
+ − 1110 End-of-line conversion to be used. It should be one of
+ − 1111
+ − 1112 nil
+ − 1113 Automatically detect the end-of-line type (LF, CRLF,
+ − 1114 or CR). Also generate subsidiary coding systems named
+ − 1115 `NAME-unix', `NAME-dos', and `NAME-mac', that are
+ − 1116 identical to this coding system but have an EOL-TYPE
+ − 1117 value of 'lf, 'crlf, and 'cr, respectively.
+ − 1118 'lf
+ − 1119 The end of a line is marked externally using ASCII LF.
+ − 1120 Since this is also the way that XEmacs represents an
+ − 1121 end-of-line internally, specifying this option results
+ − 1122 in no end-of-line conversion. This is the standard
+ − 1123 format for Unix text files.
+ − 1124 'crlf
+ − 1125 The end of a line is marked externally using ASCII
+ − 1126 CRLF. This is the standard format for MS-DOS text
+ − 1127 files.
+ − 1128 'cr
+ − 1129 The end of a line is marked externally using ASCII CR.
+ − 1130 This is the standard format for Macintosh text files.
+ − 1131 t
+ − 1132 Automatically detect the end-of-line type but do not
+ − 1133 generate subsidiary coding systems. (This value is
+ − 1134 converted to nil when stored internally, and
+ − 1135 `coding-system-property' will return nil.)
+ − 1136
+ − 1137 'post-read-conversion
771
+ − 1138 The value is a function to call after some text is inserted and
+ − 1139 decoded by the coding system itself and before any functions in
+ − 1140 `after-change-functions' are called. (#### Not actually true in
+ − 1141 XEmacs. `after-change-functions' will be called twice if
+ − 1142 `post-read-conversion' changes something.) The argument of this
+ − 1143 function is the same as for a function in
+ − 1144 `after-insert-file-functions', i.e. LENGTH of the text inserted,
+ − 1145 with point at the head of the text to be decoded.
428
+ − 1146
+ − 1147 'pre-write-conversion
771
+ − 1148 The value is a function to call after all functions in
+ − 1149 `write-region-annotate-functions' and `buffer-file-format' are
+ − 1150 called, and before the text is encoded by the coding system itself.
+ − 1151 The arguments to this function are the same as those of a function
+ − 1152 in `write-region-annotate-functions', i.e. FROM and TO, specifying
+ − 1153 a region of text.
+ − 1154
+ − 1155
+ − 1156
+ − 1157 The following properties are allowed for FSF compatibility but currently
+ − 1158 ignored:
+ − 1159
+ − 1160 'translation-table-for-decode
+ − 1161 The value is a translation table to be applied on decoding. See
+ − 1162 the function `make-translation-table' for the format of translation
+ − 1163 table. This is not applicable to CCL-based coding systems.
+ − 1164
+ − 1165 'translation-table-for-encode
+ − 1166 The value is a translation table to be applied on encoding. This is
+ − 1167 not applicable to CCL-based coding systems.
+ − 1168
+ − 1169 'safe-chars
+ − 1170 The value is a char table. If a character has non-nil value in it,
+ − 1171 the character is safely supported by the coding system. This
+ − 1172 overrides the specification of safe-charsets.
+ − 1173
+ − 1174 'safe-charsets
+ − 1175 The value is a list of charsets safely supported by the coding
+ − 1176 system. The value t means that all charsets Emacs handles are
+ − 1177 supported. Even if some charset is not in this list, it doesn't
+ − 1178 mean that the charset can't be encoded in the coding system;
+ − 1179 it just means that some other receiver of text encoded
+ − 1180 in the coding system won't be able to handle that charset.
+ − 1181
+ − 1182 'mime-charset
+ − 1183 The value is a symbol of which name is `MIME-charset' parameter of
+ − 1184 the coding system.
+ − 1185
+ − 1186 'valid-codes (meaningful only for a coding system based on CCL)
+ − 1187 The value is a list to indicate valid byte ranges of the encoded
+ − 1188 file. Each element of the list is an integer or a cons of integer.
+ − 1189 In the former case, the integer value is a valid byte code. In the
+ − 1190 latter case, the integers specifies the range of valid byte codes.
+ − 1191
+ − 1192
+ − 1193
+ − 1194 The following additional property is recognized if TYPE is 'convert-eol:
+ − 1195
+ − 1196 'subtype
793
+ − 1197 One of `lf', `crlf', `cr' or nil (for autodetection). When decoding,
+ − 1198 the corresponding sequence will be converted to LF. When encoding,
+ − 1199 the opposite happens. This coding system converts characters to
771
+ − 1200 characters.
+ − 1201
428
+ − 1202
+ − 1203
+ − 1204 The following additional properties are recognized if TYPE is 'iso2022:
+ − 1205
+ − 1206 'charset-g0
+ − 1207 'charset-g1
+ − 1208 'charset-g2
+ − 1209 'charset-g3
+ − 1210 The character set initially designated to the G0 - G3 registers.
+ − 1211 The value should be one of
+ − 1212
+ − 1213 -- A charset object (designate that character set)
+ − 1214 -- nil (do not ever use this register)
+ − 1215 -- t (no character set is initially designated to
+ − 1216 the register, but may be later on; this automatically
+ − 1217 sets the corresponding `force-g*-on-output' property)
+ − 1218
+ − 1219 'force-g0-on-output
+ − 1220 'force-g1-on-output
+ − 1221 'force-g2-on-output
+ − 1222 'force-g2-on-output
+ − 1223 If non-nil, send an explicit designation sequence on output before
+ − 1224 using the specified register.
+ − 1225
+ − 1226 'short
+ − 1227 If non-nil, use the short forms "ESC $ @", "ESC $ A", and
+ − 1228 "ESC $ B" on output in place of the full designation sequences
+ − 1229 "ESC $ ( @", "ESC $ ( A", and "ESC $ ( B".
+ − 1230
+ − 1231 'no-ascii-eol
+ − 1232 If non-nil, don't designate ASCII to G0 at each end of line on output.
+ − 1233 Setting this to non-nil also suppresses other state-resetting that
+ − 1234 normally happens at the end of a line.
+ − 1235
+ − 1236 'no-ascii-cntl
+ − 1237 If non-nil, don't designate ASCII to G0 before control chars on output.
+ − 1238
+ − 1239 'seven
+ − 1240 If non-nil, use 7-bit environment on output. Otherwise, use 8-bit
+ − 1241 environment.
+ − 1242
+ − 1243 'lock-shift
+ − 1244 If non-nil, use locking-shift (SO/SI) instead of single-shift
+ − 1245 or designation by escape sequence.
+ − 1246
+ − 1247 'no-iso6429
+ − 1248 If non-nil, don't use ISO6429's direction specification.
+ − 1249
+ − 1250 'escape-quoted
+ − 1251 If non-nil, literal control characters that are the same as
+ − 1252 the beginning of a recognized ISO2022 or ISO6429 escape sequence
+ − 1253 (in particular, ESC (0x1B), SO (0x0E), SI (0x0F), SS2 (0x8E),
+ − 1254 SS3 (0x8F), and CSI (0x9B)) are "quoted" with an escape character
+ − 1255 so that they can be properly distinguished from an escape sequence.
+ − 1256 (Note that doing this results in a non-portable encoding.) This
+ − 1257 encoding flag is used for byte-compiled files. Note that ESC
+ − 1258 is a good choice for a quoting character because there are no
+ − 1259 escape sequences whose second byte is a character from the Control-0
+ − 1260 or Control-1 character sets; this is explicitly disallowed by the
+ − 1261 ISO2022 standard.
+ − 1262
+ − 1263 'input-charset-conversion
+ − 1264 A list of conversion specifications, specifying conversion of
+ − 1265 characters in one charset to another when decoding is performed.
+ − 1266 Each specification is a list of two elements: the source charset,
+ − 1267 and the destination charset.
+ − 1268
+ − 1269 'output-charset-conversion
+ − 1270 A list of conversion specifications, specifying conversion of
+ − 1271 characters in one charset to another when encoding is performed.
+ − 1272 The form of each specification is the same as for
+ − 1273 'input-charset-conversion.
+ − 1274
+ − 1275
771
+ − 1276
428
+ − 1277 The following additional properties are recognized (and required)
+ − 1278 if TYPE is 'ccl:
+ − 1279
+ − 1280 'decode
+ − 1281 CCL program used for decoding (converting to internal format).
+ − 1282
+ − 1283 'encode
+ − 1284 CCL program used for encoding (converting to external format).
771
+ − 1285
+ − 1286
+ − 1287 The following additional properties are recognized if TYPE is 'chain:
+ − 1288
+ − 1289 'chain
+ − 1290 List of coding systems to be chained together, in decoding order.
+ − 1291
+ − 1292 'canonicalize-after-coding
+ − 1293 Coding system to be returned by the detector routines in place of
+ − 1294 this coding system.
+ − 1295
+ − 1296
+ − 1297
+ − 1298 The following additional properties are recognized if TYPE is 'unicode:
+ − 1299
+ − 1300 'type
+ − 1301 One of `utf-16', `utf-8', `ucs-4', or `utf-7' (the latter is not
+ − 1302 yet implemented). `utf-16' is the basic two-byte encoding;
+ − 1303 `ucs-4' is the four-byte encoding; `utf-8' is an ASCII-compatible
+ − 1304 variable-width 8-bit encoding; `utf-7' is a 7-bit encoding using
+ − 1305 only characters that will safely pass through all mail gateways.
2297
+ − 1306 [[ This should be \"transformation format\". There should also be
+ − 1307 `ucs-2' (or `bmp' -- no surrogates) and `utf-32' (range checked). ]]
771
+ − 1308
+ − 1309 'little-endian
+ − 1310 If non-nil, `utf-16' and `ucs-4' will write out the groups of two
+ − 1311 or four bytes little-endian instead of big-endian. This is required,
+ − 1312 for example, under Windows.
+ − 1313
+ − 1314 'need-bom
+ − 1315 If non-nil, a byte order mark (BOM, or Unicode FFFE) should be
+ − 1316 written out at the beginning of the data. This serves both to
+ − 1317 identify the endianness of the following data and to mark the
+ − 1318 data as Unicode (at least, this is how Windows uses it).
2297
+ − 1319 [[ The correct term is \"signature\", since this technique may also
+ − 1320 be used with UTF-8. That is the term used in the standard. ]]
771
+ − 1321
+ − 1322
+ − 1323 The following additional properties are recognized if TYPE is
+ − 1324 'mswindows-multibyte:
+ − 1325
+ − 1326 'code-page
+ − 1327 Either a number (specifying a particular code page) or one of the
+ − 1328 symbols `ansi', `oem', `mac', or `ebcdic', specifying the ANSI,
+ − 1329 OEM, Macintosh, or EBCDIC code page associated with a particular
+ − 1330 locale (given by the `locale' property). NOTE: EBCDIC code pages
+ − 1331 only exist in Windows 2000 and later.
+ − 1332
+ − 1333 'locale
+ − 1334 If `code-page' is a symbol, this specifies the locale whose code
+ − 1335 page of the corresponding type should be used. This should be
+ − 1336 one of the following: A cons of two strings, (LANGUAGE
+ − 1337 . SUBLANGUAGE) (see `mswindows-set-current-locale'); a string (a
+ − 1338 language; SUBLANG_DEFAULT, i.e. the default sublanguage, is
+ − 1339 used); or one of the symbols `current', `user-default', or
+ − 1340 `system-default', corresponding to the values of
+ − 1341 `mswindows-current-locale', `mswindows-user-default-locale', or
+ − 1342 `mswindows-system-default-locale', respectively.
+ − 1343
+ − 1344
+ − 1345
+ − 1346 The following additional properties are recognized if TYPE is 'undecided:
2297
+ − 1347 [[ Doesn't GNU use \"detect-*\" for the following two? ]]
771
+ − 1348
+ − 1349 'do-eol
+ − 1350 Do EOL detection.
+ − 1351
+ − 1352 'do-coding
+ − 1353 Do encoding detection.
+ − 1354
+ − 1355 'coding-system
+ − 1356 If encoding detection is not done, use the specified coding system
+ − 1357 to do decoding. This is used internally when implementing coding
+ − 1358 systems with an EOL type that specifies autodetection (the default),
+ − 1359 so that the detector routines return the proper subsidiary.
+ − 1360
+ − 1361
+ − 1362
+ − 1363 The following additional property is recognized if TYPE is 'gzip:
+ − 1364
+ − 1365 'level
+ − 1366 Compression level: 0 through 9, or `default' (currently 6).
+ − 1367
428
+ − 1368 */
771
+ − 1369 (name, type, description, props))
428
+ − 1370 {
771
+ − 1371 return make_coding_system_1 (name, 0, type, description, props);
428
+ − 1372 }
+ − 1373
+ − 1374 DEFUN ("copy-coding-system", Fcopy_coding_system, 2, 2, 0, /*
+ − 1375 Copy OLD-CODING-SYSTEM to NEW-NAME.
+ − 1376 If NEW-NAME does not name an existing coding system, a new one will
+ − 1377 be created.
771
+ − 1378 If you are using this function to create an alias, think again:
+ − 1379 Use `define-coding-system-alias' instead.
428
+ − 1380 */
+ − 1381 (old_coding_system, new_name))
+ − 1382 {
+ − 1383 Lisp_Object new_coding_system;
+ − 1384 old_coding_system = Fget_coding_system (old_coding_system);
771
+ − 1385 new_coding_system =
+ − 1386 UNBOUNDP (new_name) ? Qnil : Ffind_coding_system (new_name);
428
+ − 1387 if (NILP (new_coding_system))
+ − 1388 {
793
+ − 1389 new_coding_system =
+ − 1390 wrap_coding_system
+ − 1391 (allocate_coding_system
+ − 1392 (XCODING_SYSTEM (old_coding_system)->methods,
+ − 1393 XCODING_SYSTEM (old_coding_system)->methods->extra_data_size,
+ − 1394 new_name));
771
+ − 1395 if (!UNBOUNDP (new_name))
+ − 1396 Fputhash (new_name, new_coding_system, Vcoding_system_hash_table);
428
+ − 1397 }
771
+ − 1398 else if (XCODING_SYSTEM (old_coding_system)->methods !=
+ − 1399 XCODING_SYSTEM (new_coding_system)->methods)
+ − 1400 invalid_operation_2 ("Coding systems not same type",
+ − 1401 old_coding_system, new_coding_system);
428
+ − 1402
+ − 1403 {
+ − 1404 Lisp_Coding_System *to = XCODING_SYSTEM (new_coding_system);
+ − 1405 Lisp_Coding_System *from = XCODING_SYSTEM (old_coding_system);
771
+ − 1406 copy_sized_lcrecord (to, from, sizeof_coding_system (from));
428
+ − 1407 to->name = new_name;
+ − 1408 }
+ − 1409 return new_coding_system;
+ − 1410 }
+ − 1411
771
+ − 1412 DEFUN ("coding-system-canonical-name-p", Fcoding_system_canonical_name_p,
+ − 1413 1, 1, 0, /*
440
+ − 1414 Return t if OBJECT names a coding system, and is not a coding system alias.
428
+ − 1415 */
440
+ − 1416 (object))
+ − 1417 {
+ − 1418 return CODING_SYSTEMP (Fgethash (object, Vcoding_system_hash_table, Qnil))
+ − 1419 ? Qt : Qnil;
+ − 1420 }
+ − 1421
2297
+ − 1422 /* #### Shouldn't this really be a find/get pair? */
+ − 1423
440
+ − 1424 DEFUN ("coding-system-alias-p", Fcoding_system_alias_p, 1, 1, 0, /*
+ − 1425 Return t if OBJECT is a coding system alias.
+ − 1426 All coding system aliases are created by `define-coding-system-alias'.
+ − 1427 */
+ − 1428 (object))
428
+ − 1429 {
440
+ − 1430 return SYMBOLP (Fgethash (object, Vcoding_system_hash_table, Qzero))
+ − 1431 ? Qt : Qnil;
+ − 1432 }
+ − 1433
+ − 1434 DEFUN ("coding-system-aliasee", Fcoding_system_aliasee, 1, 1, 0, /*
+ − 1435 Return the coding-system symbol for which symbol ALIAS is an alias.
+ − 1436 */
+ − 1437 (alias))
+ − 1438 {
+ − 1439 Lisp_Object aliasee = Fgethash (alias, Vcoding_system_hash_table, Qnil);
+ − 1440 if (SYMBOLP (aliasee))
+ − 1441 return aliasee;
+ − 1442 else
563
+ − 1443 invalid_argument ("Symbol is not a coding system alias", alias);
1204
+ − 1444 RETURN_NOT_REACHED (Qnil);
440
+ − 1445 }
+ − 1446
+ − 1447 /* A maphash function, for removing dangling coding system aliases. */
+ − 1448 static int
2286
+ − 1449 dangling_coding_system_alias_p (Lisp_Object UNUSED (alias),
440
+ − 1450 Lisp_Object aliasee,
+ − 1451 void *dangling_aliases)
+ − 1452 {
+ − 1453 if (SYMBOLP (aliasee)
+ − 1454 && NILP (Fgethash (aliasee, Vcoding_system_hash_table, Qnil)))
428
+ − 1455 {
440
+ − 1456 (*(int *) dangling_aliases)++;
+ − 1457 return 1;
428
+ − 1458 }
440
+ − 1459 else
+ − 1460 return 0;
+ − 1461 }
+ − 1462
+ − 1463 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias, 2, 2, 0, /*
+ − 1464 Define symbol ALIAS as an alias for coding system ALIASEE.
+ − 1465
+ − 1466 You can use this function to redefine an alias that has already been defined,
+ − 1467 but you cannot redefine a name which is the canonical name for a coding system.
+ − 1468 \(a canonical name of a coding system is what is returned when you call
+ − 1469 `coding-system-name' on a coding system).
+ − 1470
+ − 1471 ALIASEE itself can be an alias, which allows you to define nested aliases.
+ − 1472
+ − 1473 You are forbidden, however, from creating alias loops or `dangling' aliases.
+ − 1474 These will be detected, and an error will be signaled if you attempt to do so.
+ − 1475
+ − 1476 If ALIASEE is nil, then ALIAS will simply be undefined.
+ − 1477
+ − 1478 See also `coding-system-alias-p', `coding-system-aliasee',
+ − 1479 and `coding-system-canonical-name-p'.
+ − 1480 */
+ − 1481 (alias, aliasee))
+ − 1482 {
2286
+ − 1483 Lisp_Object probe;
440
+ − 1484
+ − 1485 CHECK_SYMBOL (alias);
+ − 1486
+ − 1487 if (!NILP (Fcoding_system_canonical_name_p (alias)))
563
+ − 1488 invalid_change
440
+ − 1489 ("Symbol is the canonical name of a coding system and cannot be redefined",
+ − 1490 alias);
+ − 1491
+ − 1492 if (NILP (aliasee))
+ − 1493 {
771
+ − 1494 Lisp_Object subsidiary_unix = add_suffix_to_symbol (alias, "-unix");
+ − 1495 Lisp_Object subsidiary_dos = add_suffix_to_symbol (alias, "-dos");
+ − 1496 Lisp_Object subsidiary_mac = add_suffix_to_symbol (alias, "-mac");
440
+ − 1497
+ − 1498 Fremhash (alias, Vcoding_system_hash_table);
+ − 1499
+ − 1500 /* Undefine subsidiary aliases,
+ − 1501 presumably created by a previous call to this function */
+ − 1502 if (! NILP (Fcoding_system_alias_p (subsidiary_unix)) &&
+ − 1503 ! NILP (Fcoding_system_alias_p (subsidiary_dos)) &&
+ − 1504 ! NILP (Fcoding_system_alias_p (subsidiary_mac)))
+ − 1505 {
+ − 1506 Fdefine_coding_system_alias (subsidiary_unix, Qnil);
+ − 1507 Fdefine_coding_system_alias (subsidiary_dos, Qnil);
+ − 1508 Fdefine_coding_system_alias (subsidiary_mac, Qnil);
+ − 1509 }
+ − 1510
+ − 1511 /* Undefine dangling coding system aliases. */
+ − 1512 {
+ − 1513 int dangling_aliases;
+ − 1514
+ − 1515 do {
+ − 1516 dangling_aliases = 0;
+ − 1517 elisp_map_remhash (dangling_coding_system_alias_p,
+ − 1518 Vcoding_system_hash_table,
+ − 1519 &dangling_aliases);
+ − 1520 } while (dangling_aliases > 0);
+ − 1521 }
+ − 1522
+ − 1523 return Qnil;
+ − 1524 }
+ − 1525
+ − 1526 if (CODING_SYSTEMP (aliasee))
+ − 1527 aliasee = XCODING_SYSTEM_NAME (aliasee);
+ − 1528
+ − 1529 /* Checks that aliasee names a coding-system */
2286
+ − 1530 (void) Fget_coding_system (aliasee);
440
+ − 1531
+ − 1532 /* Check for coding system alias loops */
+ − 1533 if (EQ (alias, aliasee))
563
+ − 1534 alias_loop: invalid_operation_2
440
+ − 1535 ("Attempt to create a coding system alias loop", alias, aliasee);
+ − 1536
+ − 1537 for (probe = aliasee;
+ − 1538 SYMBOLP (probe);
+ − 1539 probe = Fgethash (probe, Vcoding_system_hash_table, Qzero))
+ − 1540 {
+ − 1541 if (EQ (probe, alias))
+ − 1542 goto alias_loop;
+ − 1543 }
+ − 1544
+ − 1545 Fputhash (alias, aliasee, Vcoding_system_hash_table);
+ − 1546
+ − 1547 /* Set up aliases for subsidiaries.
2297
+ − 1548 #### There must be a better way to handle subsidiary coding systems.
+ − 1549 Inquiring Minds Want To Know: shouldn't they always be chains? */
440
+ − 1550 {
+ − 1551 static const char *suffixes[] = { "-unix", "-dos", "-mac" };
+ − 1552 int i;
+ − 1553 for (i = 0; i < countof (suffixes); i++)
+ − 1554 {
+ − 1555 Lisp_Object alias_subsidiary =
771
+ − 1556 add_suffix_to_symbol (alias, suffixes[i]);
440
+ − 1557 Lisp_Object aliasee_subsidiary =
771
+ − 1558 add_suffix_to_symbol (aliasee, suffixes[i]);
440
+ − 1559
+ − 1560 if (! NILP (Ffind_coding_system (aliasee_subsidiary)))
+ − 1561 Fdefine_coding_system_alias (alias_subsidiary, aliasee_subsidiary);
+ − 1562 }
+ − 1563 }
428
+ − 1564 /* FSF return value is a vector of [ALIAS-unix ALIAS-dos ALIAS-mac],
+ − 1565 but it doesn't look intentional, so I'd rather return something
+ − 1566 meaningful or nothing at all. */
+ − 1567 return Qnil;
+ − 1568 }
+ − 1569
+ − 1570 static Lisp_Object
771
+ − 1571 subsidiary_coding_system (Lisp_Object coding_system, enum eol_type type)
428
+ − 1572 {
+ − 1573 Lisp_Coding_System *cs = XCODING_SYSTEM (coding_system);
+ − 1574 Lisp_Object new_coding_system;
+ − 1575
+ − 1576 switch (type)
+ − 1577 {
+ − 1578 case EOL_AUTODETECT: return coding_system;
+ − 1579 case EOL_LF: new_coding_system = CODING_SYSTEM_EOL_LF (cs); break;
+ − 1580 case EOL_CR: new_coding_system = CODING_SYSTEM_EOL_CR (cs); break;
+ − 1581 case EOL_CRLF: new_coding_system = CODING_SYSTEM_EOL_CRLF (cs); break;
442
+ − 1582 default: abort (); return Qnil;
428
+ − 1583 }
+ − 1584
+ − 1585 return NILP (new_coding_system) ? coding_system : new_coding_system;
+ − 1586 }
+ − 1587
+ − 1588 DEFUN ("subsidiary-coding-system", Fsubsidiary_coding_system, 2, 2, 0, /*
+ − 1589 Return the subsidiary coding system of CODING-SYSTEM with eol type EOL-TYPE.
771
+ − 1590 The logically opposite operation is `coding-system-base'.
428
+ − 1591 */
+ − 1592 (coding_system, eol_type))
+ − 1593 {
771
+ − 1594 coding_system = get_coding_system_for_text_file (coding_system, 0);
428
+ − 1595
+ − 1596 return subsidiary_coding_system (coding_system,
+ − 1597 symbol_to_eol_type (eol_type));
+ − 1598 }
+ − 1599
771
+ − 1600 DEFUN ("coding-system-base", Fcoding_system_base,
+ − 1601 1, 1, 0, /*
+ − 1602 Return the base coding system of CODING-SYSTEM.
+ − 1603 If CODING-SYSTEM is a subsidiary, this returns its parent; otherwise, it
+ − 1604 returns CODING-SYSTEM.
+ − 1605 The logically opposite operation is `subsidiary-coding-system'.
+ − 1606 */
+ − 1607 (coding_system))
+ − 1608 {
+ − 1609 Lisp_Object base;
+ − 1610
+ − 1611 coding_system = Fget_coding_system (coding_system);
+ − 1612 if (EQ (XCODING_SYSTEM_NAME (coding_system), Qbinary))
+ − 1613 return Fget_coding_system (Qraw_text); /* hack! */
+ − 1614 base = XCODING_SYSTEM_SUBSIDIARY_PARENT (coding_system);
+ − 1615 if (!NILP (base))
+ − 1616 return base;
+ − 1617 return coding_system;
+ − 1618 }
+ − 1619
+ − 1620 DEFUN ("coding-system-used-for-io", Fcoding_system_used_for_io,
+ − 1621 1, 1, 0, /*
+ − 1622 Return the coding system actually used for I/O.
+ − 1623 In some cases (e.g. when a particular EOL type is specified) this won't be
2297
+ − 1624 the coding system itself. This can be useful when trying to determine
+ − 1625 precisely how data was decoded.
771
+ − 1626 */
+ − 1627 (coding_system))
+ − 1628 {
+ − 1629 Lisp_Object canon;
+ − 1630
+ − 1631 coding_system = Fget_coding_system (coding_system);
+ − 1632 canon = XCODING_SYSTEM_CANONICAL (coding_system);
+ − 1633 if (!NILP (canon))
+ − 1634 return canon;
+ − 1635 return coding_system;
+ − 1636 }
+ − 1637
428
+ − 1638
+ − 1639 /************************************************************************/
+ − 1640 /* Coding system accessors */
+ − 1641 /************************************************************************/
+ − 1642
771
+ − 1643 DEFUN ("coding-system-description", Fcoding_system_description, 1, 1, 0, /*
+ − 1644 Return the description for CODING-SYSTEM.
+ − 1645 The `description' of a coding system is a short English phrase giving the
+ − 1646 name rendered according to English punctuation rules, plus possibly some
+ − 1647 explanatory text (typically in the form of a parenthetical phrase). The
+ − 1648 description is intended to be short enough that it can appear as a menu item,
+ − 1649 and clear enough to be recognizable even to someone who is assumed to have
+ − 1650 some basic familiarity with different encodings but may not know all the
+ − 1651 technical names; thus, for `cn-gb-2312' is described as "Chinese EUC" and
+ − 1652 `hz-gb-2312' is described as "Hz/ZW (Chinese)", where the actual name of
+ − 1653 the encoding is given, followed by a note that this is a Chinese encoding,
+ − 1654 because the great majority of people encountering this would have no idea
+ − 1655 what it is, and giving the language indicates whether the encoding should
+ − 1656 just be ignored or (conceivably) investigated more thoroughly.
428
+ − 1657 */
+ − 1658 (coding_system))
+ − 1659 {
+ − 1660 coding_system = Fget_coding_system (coding_system);
771
+ − 1661 return XCODING_SYSTEM_DESCRIPTION (coding_system);
428
+ − 1662 }
+ − 1663
+ − 1664 DEFUN ("coding-system-type", Fcoding_system_type, 1, 1, 0, /*
+ − 1665 Return the type of CODING-SYSTEM.
+ − 1666 */
+ − 1667 (coding_system))
+ − 1668 {
771
+ − 1669 coding_system = Fget_coding_system (coding_system);
+ − 1670 return XCODING_SYSTEM_TYPE (coding_system);
428
+ − 1671 }
+ − 1672
+ − 1673 DEFUN ("coding-system-property", Fcoding_system_property, 2, 2, 0, /*
+ − 1674 Return the PROP property of CODING-SYSTEM.
+ − 1675 */
+ − 1676 (coding_system, prop))
+ − 1677 {
+ − 1678 coding_system = Fget_coding_system (coding_system);
+ − 1679 CHECK_SYMBOL (prop);
+ − 1680
+ − 1681 if (EQ (prop, Qname))
+ − 1682 return XCODING_SYSTEM_NAME (coding_system);
+ − 1683 else if (EQ (prop, Qtype))
+ − 1684 return Fcoding_system_type (coding_system);
771
+ − 1685 else if (EQ (prop, Qdescription))
+ − 1686 return XCODING_SYSTEM_DESCRIPTION (coding_system);
428
+ − 1687 else if (EQ (prop, Qmnemonic))
+ − 1688 return XCODING_SYSTEM_MNEMONIC (coding_system);
771
+ − 1689 else if (EQ (prop, Qdocumentation))
+ − 1690 return XCODING_SYSTEM_DOCUMENTATION (coding_system);
428
+ − 1691 else if (EQ (prop, Qeol_type))
771
+ − 1692 return eol_type_to_symbol (XCODING_SYSTEM_EOL_TYPE
+ − 1693 (coding_system));
428
+ − 1694 else if (EQ (prop, Qeol_lf))
+ − 1695 return XCODING_SYSTEM_EOL_LF (coding_system);
+ − 1696 else if (EQ (prop, Qeol_crlf))
+ − 1697 return XCODING_SYSTEM_EOL_CRLF (coding_system);
+ − 1698 else if (EQ (prop, Qeol_cr))
+ − 1699 return XCODING_SYSTEM_EOL_CR (coding_system);
+ − 1700 else if (EQ (prop, Qpost_read_conversion))
+ − 1701 return XCODING_SYSTEM_POST_READ_CONVERSION (coding_system);
+ − 1702 else if (EQ (prop, Qpre_write_conversion))
+ − 1703 return XCODING_SYSTEM_PRE_WRITE_CONVERSION (coding_system);
771
+ − 1704 else
+ − 1705 {
+ − 1706 Lisp_Object value = CODESYSMETH_OR_GIVEN (XCODING_SYSTEM (coding_system),
+ − 1707 getprop,
+ − 1708 (coding_system, prop),
+ − 1709 Qunbound);
+ − 1710 if (UNBOUNDP (value))
+ − 1711 invalid_constant ("Unrecognized property", prop);
+ − 1712 return value;
+ − 1713 }
+ − 1714 }
+ − 1715
+ − 1716
+ − 1717 /************************************************************************/
+ − 1718 /* Coding stream functions */
+ − 1719 /************************************************************************/
+ − 1720
+ − 1721 /* A coding stream is a stream used for encoding or decoding text. The
+ − 1722 coding-stream object keeps track of the actual coding system, the stream
+ − 1723 that is at the other end, and data that needs to be persistent across
+ − 1724 the lifetime of the stream. */
+ − 1725
1204
+ − 1726 extern const struct sized_memory_description chain_coding_stream_description;
+ − 1727 extern const struct sized_memory_description undecided_coding_stream_description;
+ − 1728
+ − 1729 static const struct memory_description coding_stream_data_description_1 []= {
2367
+ − 1730 { XD_BLOCK_PTR, chain_coding_system, 1, &chain_coding_stream_description},
+ − 1731 { XD_BLOCK_PTR, undecided_coding_system, 1, &undecided_coding_stream_description},
1204
+ − 1732 { XD_END }
+ − 1733 };
+ − 1734
+ − 1735 static const struct sized_memory_description coding_stream_data_description = {
+ − 1736 sizeof (void *), coding_stream_data_description_1
+ − 1737 };
+ − 1738
+ − 1739 static const struct memory_description coding_lstream_description[] = {
+ − 1740 { XD_INT, offsetof (struct coding_stream, type) },
+ − 1741 { XD_LISP_OBJECT, offsetof (struct coding_stream, orig_codesys) },
+ − 1742 { XD_LISP_OBJECT, offsetof (struct coding_stream, codesys) },
+ − 1743 { XD_LISP_OBJECT, offsetof (struct coding_stream, other_end) },
+ − 1744 { XD_UNION, offsetof (struct coding_stream, data),
+ − 1745 XD_INDIRECT (0, 0), &coding_stream_data_description },
+ − 1746 { XD_END }
+ − 1747 };
+ − 1748
+ − 1749 DEFINE_LSTREAM_IMPLEMENTATION_WITH_DATA ("coding", coding);
771
+ − 1750
+ − 1751 /* Encoding and decoding are parallel operations, so we create just one
+ − 1752 stream for both. "Decoding" may involve the extra step of autodetection
+ − 1753 of the data format, but that's only because of the conventional
+ − 1754 definition of decoding as converting from external- to
+ − 1755 internal-formatted data.
+ − 1756
2297
+ − 1757 [[ REWRITE ME! ]]
+ − 1758
771
+ − 1759 #### We really need to abstract out the concept of "data formats" and
+ − 1760 define "converters" that convert from and to specified formats,
+ − 1761 eliminating the idea of decoding and encoding. When specifying a
+ − 1762 conversion process, we need to give the data formats themselves, not the
+ − 1763 conversion processes -- e.g. a coding system called "Unicode->multibyte"
+ − 1764 converts in both directions, and we could auto-detect the format of data
+ − 1765 at either end. */
+ − 1766
+ − 1767 static Bytecount
+ − 1768 coding_reader (Lstream *stream, unsigned char *data, Bytecount size)
+ − 1769 {
+ − 1770 unsigned char *orig_data = data;
+ − 1771 Bytecount read_size;
+ − 1772 int error_occurred = 0;
+ − 1773 struct coding_stream *str = CODING_STREAM_DATA (stream);
+ − 1774
+ − 1775 /* We need to interface to coding_{de,en}code_1(), which expects to take
+ − 1776 some amount of data and store the result into a Dynarr. We have
+ − 1777 coding_{de,en}code_1() store into c->runoff, and take data from there
+ − 1778 as necessary. */
+ − 1779
+ − 1780 /* We loop until we have enough data, reading chunks from the other
+ − 1781 end and converting it. */
+ − 1782 while (1)
+ − 1783 {
+ − 1784 /* Take data from convert_to if we can. Make sure to take at
+ − 1785 most SIZE bytes, and delete the data from convert_to. */
+ − 1786 if (Dynarr_length (str->convert_to) > 0)
+ − 1787 {
+ − 1788 Bytecount chunk =
+ − 1789 min (size, (Bytecount) Dynarr_length (str->convert_to));
+ − 1790 memcpy (data, Dynarr_atp (str->convert_to, 0), chunk);
+ − 1791 Dynarr_delete_many (str->convert_to, 0, chunk);
+ − 1792 data += chunk;
+ − 1793 size -= chunk;
+ − 1794 }
+ − 1795
+ − 1796 if (size == 0)
+ − 1797 break; /* No more room for data */
+ − 1798
+ − 1799 if (str->eof)
+ − 1800 break;
+ − 1801
+ − 1802 {
+ − 1803 /* Exhausted convert_to, so get some more. Read into convert_from,
+ − 1804 after existing "rejected" data from the last conversion. */
+ − 1805 Bytecount rejected = Dynarr_length (str->convert_from);
+ − 1806 /* #### 1024 is arbitrary; we really need to separate 0 from EOF,
+ − 1807 and when we get 0, keep taking more data until we don't get 0 --
+ − 1808 we don't know how much data the conversion routine might need
2297
+ − 1809 before it can generate any data of its own (eg, bzip2). */
814
+ − 1810 Bytecount readmore =
+ − 1811 str->one_byte_at_a_time ? (Bytecount) 1 :
+ − 1812 max (size, (Bytecount) 1024);
771
+ − 1813
+ − 1814 Dynarr_add_many (str->convert_from, 0, readmore);
+ − 1815 read_size = Lstream_read (str->other_end,
+ − 1816 Dynarr_atp (str->convert_from, rejected),
+ − 1817 readmore);
+ − 1818 /* Trim size down to how much we actually got */
+ − 1819 Dynarr_set_size (str->convert_from, rejected + max (0, read_size));
+ − 1820 }
+ − 1821
+ − 1822 if (read_size < 0) /* LSTREAM_ERROR */
+ − 1823 {
+ − 1824 error_occurred = 1;
+ − 1825 break;
+ − 1826 }
+ − 1827 if (read_size == 0) /* LSTREAM_EOF */
+ − 1828 /* There might be some more end data produced in the translation,
+ − 1829 so we set a flag and call the conversion method once more to
+ − 1830 output any final stuff it may be holding, any "go back to a sane
+ − 1831 state" escape sequences, etc. The conversion method is free to
+ − 1832 look at this flag, and we use it above to stop looping. */
+ − 1833 str->eof = 1;
+ − 1834 {
+ − 1835 Bytecount processed;
+ − 1836 Bytecount to_process = Dynarr_length (str->convert_from);
+ − 1837
+ − 1838 /* Convert the data, and save any rejected data in convert_from */
+ − 1839 processed =
+ − 1840 XCODESYSMETH (str->codesys, convert,
+ − 1841 (str, Dynarr_atp (str->convert_from, 0),
+ − 1842 str->convert_to, to_process));
+ − 1843 if (processed < 0)
+ − 1844 {
+ − 1845 error_occurred = 1;
+ − 1846 break;
+ − 1847 }
+ − 1848 assert (processed <= to_process);
+ − 1849 if (processed < to_process)
+ − 1850 memmove (Dynarr_atp (str->convert_from, 0),
+ − 1851 Dynarr_atp (str->convert_from, processed),
+ − 1852 to_process - processed);
+ − 1853 Dynarr_set_size (str->convert_from, to_process - processed);
+ − 1854 }
+ − 1855 }
+ − 1856
+ − 1857 if (data - orig_data == 0)
+ − 1858 return error_occurred ? -1 : 0;
+ − 1859 else
+ − 1860 return data - orig_data;
+ − 1861 }
+ − 1862
+ − 1863 static Bytecount
+ − 1864 coding_writer (Lstream *stream, const unsigned char *data, Bytecount size)
+ − 1865 {
+ − 1866 struct coding_stream *str = CODING_STREAM_DATA (stream);
+ − 1867
+ − 1868 /* Convert all our data into convert_to, and then attempt to write
+ − 1869 it all out to the other end. */
+ − 1870 Dynarr_reset (str->convert_to);
+ − 1871 size = XCODESYSMETH (str->codesys, convert,
+ − 1872 (str, data, str->convert_to, size));
+ − 1873 if (Lstream_write (str->other_end, Dynarr_atp (str->convert_to, 0),
+ − 1874 Dynarr_length (str->convert_to)) < 0)
+ − 1875 return -1;
+ − 1876 else
+ − 1877 /* The return value indicates how much of the incoming data was
+ − 1878 processed, not how many bytes were written. */
+ − 1879 return size;
+ − 1880 }
+ − 1881
+ − 1882 static int
+ − 1883 encode_decode_source_sink_type_is_char (Lisp_Object cs,
+ − 1884 enum source_or_sink sex,
+ − 1885 enum encode_decode direction)
+ − 1886 {
+ − 1887 return (direction == CODING_DECODE ?
+ − 1888 decoding_source_sink_type_is_char (cs, sex) :
+ − 1889 encoding_source_sink_type_is_char (cs, sex));
+ − 1890 }
+ − 1891
+ − 1892 /* Ensure that the convert methods only get full characters sent to them to
+ − 1893 convert if the source of that conversion is characters; and that no such
+ − 1894 full-character checking happens when the source is bytes. Keep in mind
+ − 1895 that (1) the conversion_end_type return values take the perspective of
+ − 1896 encoding; (2) the source for decoding is the same as the sink for
+ − 1897 encoding; (3) when writing, the data is given to us, and we set our own
+ − 1898 stream to be character mode or not; (4) when reading, the data comes
+ − 1899 from the other_end stream, and we set that one to be character mode or
+ − 1900 not. This is consistent with the comment above the prototype for
+ − 1901 Lstream_set_character_mode(), which lays out rules for who is allowed to
+ − 1902 modify the character type mode on a stream.
+ − 1903
814
+ − 1904 If we're a read stream, we're always setting character mode on the
+ − 1905 source, but we also set it on ourselves consistent with the flag that
+ − 1906 can disable this (see again the comment above
+ − 1907 Lstream_set_character_mode()).
+ − 1908 */
771
+ − 1909
+ − 1910 static void
+ − 1911 set_coding_character_mode (Lstream *stream)
+ − 1912 {
+ − 1913 struct coding_stream *str = CODING_STREAM_DATA (stream);
+ − 1914 Lstream *stream_to_set =
+ − 1915 stream->flags & LSTREAM_FL_WRITE ? stream : str->other_end;
+ − 1916 if (encode_decode_source_sink_type_is_char
+ − 1917 (str->codesys, CODING_SOURCE, str->direction))
+ − 1918 Lstream_set_character_mode (stream_to_set);
+ − 1919 else
+ − 1920 Lstream_unset_character_mode (stream_to_set);
814
+ − 1921 if (str->set_char_mode_on_us_when_reading &&
+ − 1922 (stream->flags & LSTREAM_FL_READ))
+ − 1923 {
+ − 1924 if (encode_decode_source_sink_type_is_char
+ − 1925 (str->codesys, CODING_SINK, str->direction))
+ − 1926 Lstream_set_character_mode (stream);
+ − 1927 else
+ − 1928 Lstream_unset_character_mode (stream);
+ − 1929 }
771
+ − 1930 }
+ − 1931
+ − 1932 static Lisp_Object
+ − 1933 coding_marker (Lisp_Object stream)
+ − 1934 {
+ − 1935 struct coding_stream *str = CODING_STREAM_DATA (XLSTREAM (stream));
+ − 1936
+ − 1937 mark_object (str->orig_codesys);
+ − 1938 mark_object (str->codesys);
+ − 1939 MAYBE_XCODESYSMETH (str->codesys, mark_coding_stream, (str));
+ − 1940 return wrap_lstream (str->other_end);
+ − 1941 }
+ − 1942
+ − 1943 static int
+ − 1944 coding_rewinder (Lstream *stream)
+ − 1945 {
+ − 1946 struct coding_stream *str = CODING_STREAM_DATA (stream);
+ − 1947 MAYBE_XCODESYSMETH (str->codesys, rewind_coding_stream, (str));
+ − 1948
+ − 1949 str->ch = 0;
+ − 1950 Dynarr_reset (str->convert_to);
+ − 1951 Dynarr_reset (str->convert_from);
+ − 1952 return Lstream_rewind (str->other_end);
+ − 1953 }
+ − 1954
+ − 1955 static int
+ − 1956 coding_seekable_p (Lstream *stream)
+ − 1957 {
+ − 1958 struct coding_stream *str = CODING_STREAM_DATA (stream);
+ − 1959 return Lstream_seekable_p (str->other_end);
+ − 1960 }
+ − 1961
+ − 1962 static int
+ − 1963 coding_flusher (Lstream *stream)
+ − 1964 {
+ − 1965 struct coding_stream *str = CODING_STREAM_DATA (stream);
+ − 1966 return Lstream_flush (str->other_end);
+ − 1967 }
+ − 1968
+ − 1969 static int
+ − 1970 coding_closer (Lstream *stream)
+ − 1971 {
+ − 1972 struct coding_stream *str = CODING_STREAM_DATA (stream);
+ − 1973 if (stream->flags & LSTREAM_FL_WRITE)
+ − 1974 {
+ − 1975 str->eof = 1;
+ − 1976 coding_writer (stream, 0, 0);
+ − 1977 str->eof = 0;
+ − 1978 }
+ − 1979 /* It's safe to free the runoff dynarrs now because they are used only
+ − 1980 during conversion. We need to keep the type-specific data around,
+ − 1981 though, because of canonicalize_after_coding. */
+ − 1982 if (str->convert_to)
+ − 1983 {
+ − 1984 Dynarr_free (str->convert_to);
+ − 1985 str->convert_to = 0;
+ − 1986 }
+ − 1987 if (str->convert_from)
428
+ − 1988 {
771
+ − 1989 Dynarr_free (str->convert_from);
+ − 1990 str->convert_from = 0;
+ − 1991 }
+ − 1992
800
+ − 1993 if (str->no_close_other)
+ − 1994 return Lstream_flush (str->other_end);
+ − 1995 else
+ − 1996 return Lstream_close (str->other_end);
771
+ − 1997 }
+ − 1998
+ − 1999 static void
+ − 2000 coding_finalizer (Lstream *stream)
+ − 2001 {
+ − 2002 struct coding_stream *str = CODING_STREAM_DATA (stream);
+ − 2003
+ − 2004 assert (!str->finalized);
+ − 2005 MAYBE_XCODESYSMETH (str->codesys, finalize_coding_stream, (str));
+ − 2006 if (str->data)
+ − 2007 {
1726
+ − 2008 xfree (str->data, void *);
771
+ − 2009 str->data = 0;
+ − 2010 }
+ − 2011 str->finalized = 1;
+ − 2012 }
+ − 2013
+ − 2014 static Lisp_Object
+ − 2015 coding_stream_canonicalize_after_coding (Lstream *stream)
+ − 2016 {
+ − 2017 struct coding_stream *str = CODING_STREAM_DATA (stream);
+ − 2018
+ − 2019 return XCODESYSMETH_OR_GIVEN (str->codesys, canonicalize_after_coding,
+ − 2020 (str), str->codesys);
+ − 2021 }
+ − 2022
+ − 2023 Lisp_Object
+ − 2024 coding_stream_detected_coding_system (Lstream *stream)
+ − 2025 {
+ − 2026 Lisp_Object codesys =
+ − 2027 coding_stream_canonicalize_after_coding (stream);
+ − 2028 if (NILP (codesys))
+ − 2029 return Fget_coding_system (Qidentity);
+ − 2030 return codesys;
+ − 2031 }
+ − 2032
+ − 2033 Lisp_Object
+ − 2034 coding_stream_coding_system (Lstream *stream)
+ − 2035 {
+ − 2036 return CODING_STREAM_DATA (stream)->codesys;
+ − 2037 }
+ − 2038
+ − 2039 /* Change the coding system associated with a stream. */
+ − 2040
+ − 2041 void
+ − 2042 set_coding_stream_coding_system (Lstream *lstr, Lisp_Object codesys)
+ − 2043 {
+ − 2044 struct coding_stream *str = CODING_STREAM_DATA (lstr);
+ − 2045 if (EQ (str->orig_codesys, codesys))
+ − 2046 return;
+ − 2047 /* We do the equivalent of closing the stream, destroying it, and
+ − 2048 reinitializing it. This includes flushing out the data and signalling
+ − 2049 EOF, if we're a writing stream; we also replace the type-specific data
+ − 2050 with the data appropriate for the new coding system. */
+ − 2051 if (!NILP (str->codesys))
+ − 2052 {
+ − 2053 if (lstr->flags & LSTREAM_FL_WRITE)
+ − 2054 {
+ − 2055 Lstream_flush (lstr);
+ − 2056 str->eof = 1;
+ − 2057 coding_writer (lstr, 0, 0);
+ − 2058 str->eof = 0;
+ − 2059 }
+ − 2060 MAYBE_XCODESYSMETH (str->codesys, finalize_coding_stream, (str));
+ − 2061 }
+ − 2062 str->orig_codesys = codesys;
+ − 2063 str->codesys = coding_system_real_canonical (codesys);
+ − 2064
+ − 2065 if (str->data)
+ − 2066 {
1726
+ − 2067 xfree (str->data, void *);
771
+ − 2068 str->data = 0;
428
+ − 2069 }
771
+ − 2070 if (XCODING_SYSTEM_METHODS (str->codesys)->coding_data_size)
1204
+ − 2071 {
+ − 2072 str->data =
+ − 2073 xmalloc_and_zero (XCODING_SYSTEM_METHODS (str->codesys)->
+ − 2074 coding_data_size);
+ − 2075 str->type = XCODING_SYSTEM_METHODS (str->codesys)->enumtype;
+ − 2076 }
771
+ − 2077 MAYBE_XCODESYSMETH (str->codesys, init_coding_stream, (str));
+ − 2078 /* The new coding system may have different ideas regarding whether its
+ − 2079 ends are characters or bytes. */
+ − 2080 set_coding_character_mode (lstr);
+ − 2081 }
+ − 2082
+ − 2083 /* WARNING WARNING WARNING WARNING!!!!! If you open up a coding
+ − 2084 stream for writing, no automatic code detection will be performed.
+ − 2085 The reason for this is that automatic code detection requires a
+ − 2086 seekable input. Things will also fail if you open a coding
+ − 2087 stream for reading using a non-fully-specified coding system and
+ − 2088 a non-seekable input stream. */
+ − 2089
+ − 2090 static Lisp_Object
+ − 2091 make_coding_stream_1 (Lstream *stream, Lisp_Object codesys,
800
+ − 2092 const char *mode, enum encode_decode direction,
802
+ − 2093 int flags)
771
+ − 2094 {
+ − 2095 Lstream *lstr = Lstream_new (lstream_coding, mode);
+ − 2096 struct coding_stream *str = CODING_STREAM_DATA (lstr);
+ − 2097
+ − 2098 codesys = Fget_coding_system (codesys);
+ − 2099 xzero (*str);
+ − 2100 str->codesys = Qnil;
+ − 2101 str->orig_codesys = Qnil;
+ − 2102 str->us = lstr;
+ − 2103 str->other_end = stream;
+ − 2104 str->convert_to = Dynarr_new (unsigned_char);
+ − 2105 str->convert_from = Dynarr_new (unsigned_char);
+ − 2106 str->direction = direction;
814
+ − 2107 if (flags & LSTREAM_FL_NO_CLOSE_OTHER)
802
+ − 2108 str->no_close_other = 1;
814
+ − 2109 if (flags & LSTREAM_FL_READ_ONE_BYTE_AT_A_TIME)
802
+ − 2110 str->one_byte_at_a_time = 1;
814
+ − 2111 if (!(flags & LSTREAM_FL_NO_INIT_CHAR_MODE_WHEN_READING))
+ − 2112 str->set_char_mode_on_us_when_reading = 1;
802
+ − 2113
771
+ − 2114 set_coding_stream_coding_system (lstr, codesys);
793
+ − 2115 return wrap_lstream (lstr);
771
+ − 2116 }
+ − 2117
814
+ − 2118 /* FLAGS:
+ − 2119
+ − 2120 LSTREAM_FL_NO_CLOSE_OTHER
+ − 2121 Don't close STREAM (the stream at the other end) when this stream is
+ − 2122 closed.
+ − 2123
+ − 2124 LSTREAM_FL_READ_ONE_BYTE_AT_A_TIME
+ − 2125 When reading from STREAM, read and process one byte at a time rather
+ − 2126 than in large chunks. This is for reading from TTY's, so we don't
+ − 2127 block. #### We should instead create a non-blocking filedesc stream
+ − 2128 that emulates the behavior as necessary using select(), when the
+ − 2129 fcntls don't work. (As seems to be the case on Cygwin.)
+ − 2130
+ − 2131 LSTREAM_FL_NO_INIT_CHAR_MODE_WHEN_READING
+ − 2132 When reading from STREAM, read and process one byte at a time rather
+ − 2133 than in large chunks. This is for reading from TTY's, so we don't
+ − 2134 block. #### We should instead create a non-blocking filedesc stream
+ − 2135 that emulates the behavior as necessary using select(), when the
+ − 2136 fcntls don't work. (As seems to be the case on Cygwin.)
+ − 2137 */
771
+ − 2138 Lisp_Object
+ − 2139 make_coding_input_stream (Lstream *stream, Lisp_Object codesys,
802
+ − 2140 enum encode_decode direction, int flags)
771
+ − 2141 {
800
+ − 2142 return make_coding_stream_1 (stream, codesys, "r", direction,
802
+ − 2143 flags);
771
+ − 2144 }
+ − 2145
814
+ − 2146 /* FLAGS:
+ − 2147
+ − 2148 LSTREAM_FL_NO_CLOSE_OTHER
+ − 2149 Don't close STREAM (the stream at the other end) when this stream is
+ − 2150 closed.
+ − 2151 */
771
+ − 2152 Lisp_Object
+ − 2153 make_coding_output_stream (Lstream *stream, Lisp_Object codesys,
802
+ − 2154 enum encode_decode direction, int flags)
771
+ − 2155 {
800
+ − 2156 return make_coding_stream_1 (stream, codesys, "w", direction,
802
+ − 2157 flags);
771
+ − 2158 }
+ − 2159
+ − 2160 static Lisp_Object
+ − 2161 encode_decode_coding_region (Lisp_Object start, Lisp_Object end,
+ − 2162 Lisp_Object coding_system, Lisp_Object buffer,
+ − 2163 enum encode_decode direction)
+ − 2164 {
+ − 2165 Charbpos b, e;
+ − 2166 struct buffer *buf = decode_buffer (buffer, 0);
+ − 2167 Lisp_Object instream = Qnil, to_outstream = Qnil, outstream = Qnil;
+ − 2168 Lisp_Object from_outstream = Qnil, auto_outstream = Qnil;
+ − 2169 Lisp_Object lb_outstream = Qnil;
+ − 2170 Lisp_Object next;
+ − 2171 Lstream *istr, *ostr;
+ − 2172 struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
+ − 2173 struct gcpro ngcpro1;
+ − 2174 int source_char, sink_char;
+ − 2175
+ − 2176 get_buffer_range_char (buf, start, end, &b, &e, 0);
+ − 2177 barf_if_buffer_read_only (buf, b, e);
+ − 2178
+ − 2179 GCPRO5 (instream, to_outstream, outstream, from_outstream, lb_outstream);
+ − 2180 NGCPRO1 (auto_outstream);
+ − 2181
+ − 2182 coding_system = Fget_coding_system (coding_system);
+ − 2183 source_char = encode_decode_source_sink_type_is_char (coding_system,
+ − 2184 CODING_SOURCE,
+ − 2185 direction);
+ − 2186 sink_char = encode_decode_source_sink_type_is_char (coding_system,
+ − 2187 CODING_SINK,
+ − 2188 direction);
+ − 2189
+ − 2190 /* Order is IN <---> [TO] -> OUT -> [FROM] -> [AUTODETECT-EOL] -> LB */
+ − 2191 instream = make_lisp_buffer_input_stream (buf, b, e, 0);
+ − 2192 next = lb_outstream = make_lisp_buffer_output_stream (buf, b, 0);
+ − 2193
+ − 2194 if (direction == CODING_DECODE &&
+ − 2195 XCODING_SYSTEM_EOL_TYPE (coding_system) == EOL_AUTODETECT)
+ − 2196 next = auto_outstream =
+ − 2197 make_coding_output_stream
800
+ − 2198 (XLSTREAM (next), Fget_coding_system (Qconvert_eol_autodetect),
+ − 2199 CODING_DECODE, 0);
771
+ − 2200
+ − 2201 if (!sink_char)
+ − 2202 next = from_outstream =
800
+ − 2203 make_coding_output_stream (XLSTREAM (next), Qbinary, CODING_DECODE, 0);
771
+ − 2204 outstream = make_coding_output_stream (XLSTREAM (next), coding_system,
800
+ − 2205 direction, 0);
771
+ − 2206 if (!source_char)
428
+ − 2207 {
771
+ − 2208 to_outstream =
+ − 2209 make_coding_output_stream (XLSTREAM (outstream),
800
+ − 2210 Qbinary, CODING_ENCODE, 0);
771
+ − 2211 ostr = XLSTREAM (to_outstream);
+ − 2212 }
+ − 2213 else
+ − 2214 ostr = XLSTREAM (outstream);
+ − 2215 istr = XLSTREAM (instream);
+ − 2216
+ − 2217 /* The chain of streams looks like this:
+ − 2218
2297
+ − 2219 [BUFFER] <----- (( read from/send to loop ))
771
+ − 2220 ------> [CHAR->BYTE i.e. ENCODE AS BINARY if source is
+ − 2221 in bytes]
+ − 2222 ------> [ENCODE/DECODE AS SPECIFIED]
+ − 2223 ------> [BYTE->CHAR i.e. DECODE AS BINARY
+ − 2224 if sink is in bytes]
+ − 2225 ------> [AUTODETECT EOL if
+ − 2226 we're decoding and
+ − 2227 coding system calls
+ − 2228 for this]
+ − 2229 ------> [BUFFER]
+ − 2230 */
2367
+ − 2231
+ − 2232 /* #### See comment
+ − 2233
+ − 2234 EFFICIENCY OF CODING CONVERSION WITH MULTIPLE COPIES/CHAINS
+ − 2235
+ − 2236 in text.c.
+ − 2237 */
+ − 2238
771
+ − 2239 while (1)
+ − 2240 {
+ − 2241 char tempbuf[1024]; /* some random amount */
+ − 2242 Charbpos newpos, even_newer_pos;
+ − 2243 Charbpos oldpos = lisp_buffer_stream_startpos (istr);
+ − 2244 Bytecount size_in_bytes =
+ − 2245 Lstream_read (istr, tempbuf, sizeof (tempbuf));
+ − 2246
+ − 2247 if (!size_in_bytes)
+ − 2248 break;
+ − 2249 newpos = lisp_buffer_stream_startpos (istr);
+ − 2250 Lstream_write (ostr, tempbuf, size_in_bytes);
+ − 2251 even_newer_pos = lisp_buffer_stream_startpos (istr);
+ − 2252 buffer_delete_range (buf, even_newer_pos - (newpos - oldpos),
+ − 2253 even_newer_pos, 0);
428
+ − 2254 }
771
+ − 2255
+ − 2256 {
+ − 2257 Charcount retlen =
+ − 2258 lisp_buffer_stream_startpos (XLSTREAM (instream)) - b;
+ − 2259 Lstream_close (istr);
+ − 2260 Lstream_close (ostr);
+ − 2261 NUNGCPRO;
+ − 2262 UNGCPRO;
+ − 2263 Lstream_delete (istr);
+ − 2264 if (!NILP (from_outstream))
+ − 2265 Lstream_delete (XLSTREAM (from_outstream));
+ − 2266 Lstream_delete (XLSTREAM (outstream));
+ − 2267 if (!NILP (to_outstream))
+ − 2268 Lstream_delete (XLSTREAM (to_outstream));
+ − 2269 if (!NILP (auto_outstream))
+ − 2270 Lstream_delete (XLSTREAM (auto_outstream));
+ − 2271 Lstream_delete (XLSTREAM (lb_outstream));
+ − 2272 return make_int (retlen);
+ − 2273 }
+ − 2274 }
+ − 2275
+ − 2276 DEFUN ("decode-coding-region", Fdecode_coding_region, 3, 4, 0, /*
+ − 2277 Decode the text between START and END which is encoded in CODING-SYSTEM.
+ − 2278 This is useful if you've read in encoded text from a file without decoding
+ − 2279 it (e.g. you read in a JIS-formatted file but used the `binary' or
+ − 2280 `no-conversion' coding system, so that it shows up as "^[$B!<!+^[(B").
+ − 2281 Return length of decoded text.
+ − 2282 BUFFER defaults to the current buffer if unspecified.
+ − 2283 */
+ − 2284 (start, end, coding_system, buffer))
+ − 2285 {
+ − 2286 return encode_decode_coding_region (start, end, coding_system, buffer,
+ − 2287 CODING_DECODE);
+ − 2288 }
+ − 2289
+ − 2290 DEFUN ("encode-coding-region", Fencode_coding_region, 3, 4, 0, /*
+ − 2291 Encode the text between START and END using CODING-SYSTEM.
+ − 2292 This will, for example, convert Japanese characters into stuff such as
+ − 2293 "^[$B!<!+^[(B" if you use the JIS encoding. Return length of encoded
+ − 2294 text. BUFFER defaults to the current buffer if unspecified.
+ − 2295 */
+ − 2296 (start, end, coding_system, buffer))
+ − 2297 {
+ − 2298 return encode_decode_coding_region (start, end, coding_system, buffer,
+ − 2299 CODING_ENCODE);
428
+ − 2300 }
+ − 2301
+ − 2302
+ − 2303 /************************************************************************/
771
+ − 2304 /* Chain methods */
428
+ − 2305 /************************************************************************/
+ − 2306
771
+ − 2307 /* #### Need a way to create "opposite-direction" coding systems. */
+ − 2308
+ − 2309 /* Chain two or more coding systems together to make a combination coding
+ − 2310 system. */
+ − 2311
+ − 2312 struct chain_coding_system
+ − 2313 {
+ − 2314 /* List of coding systems, in decode order */
+ − 2315 Lisp_Object *chain;
+ − 2316 /* Number of coding systems in list */
+ − 2317 int count;
+ − 2318 /* Coding system to return as a result of canonicalize-after-coding */
+ − 2319 Lisp_Object canonicalize_after_coding;
+ − 2320 };
+ − 2321
+ − 2322 struct chain_coding_stream
+ − 2323 {
+ − 2324 int initted;
+ − 2325 /* Lstreams for chain coding system */
+ − 2326 Lisp_Object *lstreams;
+ − 2327 int lstream_count;
+ − 2328 };
+ − 2329
1204
+ − 2330 static const struct memory_description chain_coding_system_description[] = {
+ − 2331 { XD_INT, offsetof (struct chain_coding_system, count) },
2367
+ − 2332 { XD_BLOCK_PTR, offsetof (struct chain_coding_system, chain),
1204
+ − 2333 XD_INDIRECT (0, 0), &lisp_object_description },
+ − 2334 { XD_LISP_OBJECT, offsetof (struct chain_coding_system,
+ − 2335 canonicalize_after_coding) },
771
+ − 2336 { XD_END }
+ − 2337 };
+ − 2338
1204
+ − 2339 static const struct memory_description chain_coding_stream_description_1 [] = {
+ − 2340 { XD_INT, offsetof (struct chain_coding_stream, lstream_count) },
2367
+ − 2341 { XD_BLOCK_PTR, offsetof (struct chain_coding_stream, lstreams),
1204
+ − 2342 XD_INDIRECT (0, 0), &lisp_object_description },
771
+ − 2343 { XD_END }
+ − 2344 };
+ − 2345
1204
+ − 2346 const struct sized_memory_description chain_coding_stream_description = {
+ − 2347 sizeof (struct chain_coding_stream), chain_coding_stream_description_1
+ − 2348 };
+ − 2349
+ − 2350 DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (chain);
+ − 2351
771
+ − 2352 static Lisp_Object
+ − 2353 chain_canonicalize (Lisp_Object codesys)
+ − 2354 {
+ − 2355 /* We make use of the fact that this method is called at init time, after
+ − 2356 properties have been parsed. init_method is called too early. */
+ − 2357 /* #### It's not clear we need this whole chain-canonicalize mechanism
+ − 2358 any more. */
+ − 2359 Lisp_Object chain = Flist (XCODING_SYSTEM_CHAIN_COUNT (codesys),
+ − 2360 XCODING_SYSTEM_CHAIN_CHAIN (codesys));
+ − 2361 chain = Fcons (XCODING_SYSTEM_PRE_WRITE_CONVERSION (codesys),
+ − 2362 Fcons (XCODING_SYSTEM_POST_READ_CONVERSION (codesys),
+ − 2363 chain));
+ − 2364 Fputhash (chain, codesys, Vchain_canonicalize_hash_table);
+ − 2365 return codesys;
+ − 2366 }
+ − 2367
+ − 2368 static Lisp_Object
+ − 2369 chain_canonicalize_after_coding (struct coding_stream *str)
+ − 2370 {
+ − 2371 Lisp_Object cac =
+ − 2372 XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (str->codesys);
+ − 2373 if (!NILP (cac))
+ − 2374 return cac;
+ − 2375 return str->codesys;
+ − 2376 #if 0
+ − 2377 struct chain_coding_stream *data = CODING_STREAM_TYPE_DATA (str, chain);
+ − 2378 Lisp_Object us = str->codesys, codesys;
+ − 2379 int i;
+ − 2380 Lisp_Object chain;
+ − 2381 Lisp_Object tail;
+ − 2382 int changed = 0;
+ − 2383
+ − 2384 /* #### It's not clear we need this whole chain-canonicalize mechanism
+ − 2385 any more. */
+ − 2386 if (str->direction == CODING_ENCODE || !data->initted)
+ − 2387 return us;
+ − 2388
+ − 2389 chain = Flist (XCODING_SYSTEM_CHAIN_COUNT (us),
+ − 2390 XCODING_SYSTEM_CHAIN_CHAIN (us));
+ − 2391
+ − 2392 tail = chain;
+ − 2393 for (i = 0; i < XCODING_SYSTEM_CHAIN_COUNT (us); i++)
+ − 2394 {
+ − 2395 codesys = (coding_stream_canonicalize_after_coding
+ − 2396 (XLSTREAM (data->lstreams[i])));
+ − 2397 if (!EQ (codesys, XCAR (tail)))
+ − 2398 changed = 1;
+ − 2399 XCAR (tail) = codesys;
+ − 2400 tail = XCDR (tail);
+ − 2401 }
+ − 2402
+ − 2403 if (!changed)
+ − 2404 return us;
+ − 2405
+ − 2406 chain = delq_no_quit (Qnil, chain);
+ − 2407
+ − 2408 if (NILP (XCODING_SYSTEM_PRE_WRITE_CONVERSION (us)) &&
+ − 2409 NILP (XCODING_SYSTEM_POST_READ_CONVERSION (us)))
+ − 2410 {
+ − 2411 if (NILP (chain))
+ − 2412 return Qnil;
+ − 2413 if (NILP (XCDR (chain)))
+ − 2414 return XCAR (chain);
+ − 2415 }
+ − 2416
+ − 2417 codesys = Fgethash (Fcons (XCODING_SYSTEM_PRE_WRITE_CONVERSION (us),
+ − 2418 Fcons (XCODING_SYSTEM_POST_READ_CONVERSION (us),
+ − 2419 chain)), Vchain_canonicalize_hash_table,
+ − 2420 Qnil);
+ − 2421 if (!NILP (codesys))
+ − 2422 return codesys;
+ − 2423 return make_internal_coding_system
+ − 2424 (us, "internal-chain-canonicalizer-wrapper",
+ − 2425 Qchain, Qunbound, list2 (Qchain, chain));
+ − 2426 #endif /* 0 */
+ − 2427 }
+ − 2428
+ − 2429 static void
+ − 2430 chain_init (Lisp_Object codesys)
+ − 2431 {
+ − 2432 XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (codesys) = Qnil;
+ − 2433 }
+ − 2434
+ − 2435 static void
+ − 2436 chain_mark (Lisp_Object codesys)
+ − 2437 {
+ − 2438 int i;
+ − 2439
+ − 2440 for (i = 0; i < XCODING_SYSTEM_CHAIN_COUNT (codesys); i++)
+ − 2441 mark_object (XCODING_SYSTEM_CHAIN_CHAIN (codesys)[i]);
+ − 2442 mark_object (XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (codesys));
+ − 2443 }
+ − 2444
+ − 2445 static void
+ − 2446 chain_mark_coding_stream_1 (struct chain_coding_stream *data)
+ − 2447 {
+ − 2448 int i;
+ − 2449
+ − 2450 for (i = 0; i < data->lstream_count; i++)
+ − 2451 mark_object (data->lstreams[i]);
+ − 2452 }
+ − 2453
+ − 2454 static void
+ − 2455 chain_mark_coding_stream (struct coding_stream *str)
+ − 2456 {
+ − 2457 chain_mark_coding_stream_1 (CODING_STREAM_TYPE_DATA (str, chain));
+ − 2458 }
+ − 2459
+ − 2460 static void
+ − 2461 chain_print (Lisp_Object cs, Lisp_Object printcharfun, int escapeflag)
+ − 2462 {
+ − 2463 int i;
+ − 2464
826
+ − 2465 write_c_string (printcharfun, "(");
771
+ − 2466 for (i = 0; i < XCODING_SYSTEM_CHAIN_COUNT (cs); i++)
+ − 2467 {
826
+ − 2468 write_c_string (printcharfun, i == 0 ? "" : "->");
771
+ − 2469 print_coding_system_in_print_method (XCODING_SYSTEM_CHAIN_CHAIN (cs)[i],
+ − 2470 printcharfun, escapeflag);
+ − 2471 }
+ − 2472 {
+ − 2473 Lisp_Object cac = XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (cs);
+ − 2474 if (!NILP (cac))
+ − 2475 {
+ − 2476 if (i > 0)
826
+ − 2477 write_c_string (printcharfun, " ");
+ − 2478 write_c_string (printcharfun, "canonicalize-after-coding=");
771
+ − 2479 print_coding_system_in_print_method (cac, printcharfun, escapeflag);
+ − 2480 }
+ − 2481 }
+ − 2482
826
+ − 2483 write_c_string (printcharfun, ")");
771
+ − 2484 }
+ − 2485
+ − 2486 static void
+ − 2487 chain_rewind_coding_stream_1 (struct chain_coding_stream *data)
+ − 2488 {
+ − 2489 /* Each will rewind the next; there is always at least one stream (the
+ − 2490 dynarr stream at the end) if we're initted */
+ − 2491 if (data->initted)
+ − 2492 Lstream_rewind (XLSTREAM (data->lstreams[0]));
+ − 2493 }
+ − 2494
+ − 2495 static void
+ − 2496 chain_rewind_coding_stream (struct coding_stream *str)
+ − 2497 {
+ − 2498 chain_rewind_coding_stream_1 (CODING_STREAM_TYPE_DATA (str, chain));
+ − 2499 }
+ − 2500
+ − 2501 static void
+ − 2502 chain_init_coding_streams_1 (struct chain_coding_stream *data,
+ − 2503 unsigned_char_dynarr *dst,
+ − 2504 int ncodesys, Lisp_Object *codesys,
+ − 2505 enum encode_decode direction)
+ − 2506 {
+ − 2507 int i;
+ − 2508 Lisp_Object lstream_out;
+ − 2509
+ − 2510 data->lstream_count = ncodesys + 1;
+ − 2511 data->lstreams = xnew_array (Lisp_Object, data->lstream_count);
+ − 2512
+ − 2513 lstream_out = make_dynarr_output_stream (dst);
+ − 2514 Lstream_set_buffering (XLSTREAM (lstream_out), LSTREAM_UNBUFFERED, 0);
+ − 2515 data->lstreams[data->lstream_count - 1] = lstream_out;
+ − 2516
+ − 2517 for (i = ncodesys - 1; i >= 0; i--)
+ − 2518 {
+ − 2519 data->lstreams[i] =
+ − 2520 make_coding_output_stream
+ − 2521 (XLSTREAM (lstream_out),
+ − 2522 codesys[direction == CODING_ENCODE ? ncodesys - (i + 1) : i],
800
+ − 2523 direction, 0);
771
+ − 2524 lstream_out = data->lstreams[i];
+ − 2525 Lstream_set_buffering (XLSTREAM (lstream_out), LSTREAM_UNBUFFERED,
+ − 2526 0);
+ − 2527 }
+ − 2528 data->initted = 1;
+ − 2529 }
+ − 2530
+ − 2531 static Bytecount
+ − 2532 chain_convert (struct coding_stream *str, const UExtbyte *src,
+ − 2533 unsigned_char_dynarr *dst, Bytecount n)
+ − 2534 {
+ − 2535 struct chain_coding_stream *data = CODING_STREAM_TYPE_DATA (str, chain);
+ − 2536
+ − 2537 if (str->eof)
+ − 2538 {
+ − 2539 /* Each will close the next; there is always at least one stream (the
+ − 2540 dynarr stream at the end) if we're initted. We need to close now
+ − 2541 because more data may be generated. */
+ − 2542 if (data->initted)
+ − 2543 Lstream_close (XLSTREAM (data->lstreams[0]));
+ − 2544 return n;
+ − 2545 }
+ − 2546
+ − 2547 if (!data->initted)
+ − 2548 chain_init_coding_streams_1
+ − 2549 (data, dst, XCODING_SYSTEM_CHAIN_COUNT (str->codesys),
+ − 2550 XCODING_SYSTEM_CHAIN_CHAIN (str->codesys), str->direction);
+ − 2551
+ − 2552 if (Lstream_write (XLSTREAM (data->lstreams[0]), src, n) < 0)
+ − 2553 return -1;
+ − 2554 return n;
+ − 2555 }
+ − 2556
+ − 2557 static void
+ − 2558 chain_finalize_coding_stream_1 (struct chain_coding_stream *data)
+ − 2559 {
+ − 2560 if (data->lstreams)
+ − 2561 {
2297
+ − 2562 /* During GC, these objects are unmarked, and are about to be freed.
+ − 2563 We do NOT want them on the free list, and that will cause lots of
+ − 2564 nastiness including crashes. Just let them be freed normally. */
771
+ − 2565 if (!gc_in_progress)
+ − 2566 {
+ − 2567 int i;
2297
+ − 2568 /* Order of deletion is important here! Delete from the head of
+ − 2569 the chain and work your way towards the tail. In general,
+ − 2570 when you delete an object, there should be *NO* pointers to it
+ − 2571 anywhere. Deleting back-to-front would be a problem because
+ − 2572 there are pointers going forward. If there were pointers in
+ − 2573 both directions, you'd have to disconnect the pointers to a
+ − 2574 particular object before deleting it. */
771
+ − 2575 for (i = 0; i < data->lstream_count; i++)
+ − 2576 Lstream_delete (XLSTREAM ((data->lstreams)[i]));
+ − 2577 }
1726
+ − 2578 xfree (data->lstreams, Lisp_Object *);
771
+ − 2579 }
+ − 2580 }
+ − 2581
+ − 2582 static void
+ − 2583 chain_finalize_coding_stream (struct coding_stream *str)
+ − 2584 {
+ − 2585 chain_finalize_coding_stream_1 (CODING_STREAM_TYPE_DATA (str, chain));
+ − 2586 }
+ − 2587
+ − 2588 static void
+ − 2589 chain_finalize (Lisp_Object c)
+ − 2590 {
+ − 2591 if (XCODING_SYSTEM_CHAIN_CHAIN (c))
1726
+ − 2592 xfree (XCODING_SYSTEM_CHAIN_CHAIN (c), Lisp_Object *);
771
+ − 2593 }
+ − 2594
428
+ − 2595 static int
771
+ − 2596 chain_putprop (Lisp_Object codesys, Lisp_Object key, Lisp_Object value)
+ − 2597 {
+ − 2598 if (EQ (key, Qchain))
+ − 2599 {
+ − 2600 Lisp_Object *cslist;
+ − 2601 int count = 0;
+ − 2602 int i;
+ − 2603
2367
+ − 2604 {
+ − 2605 EXTERNAL_LIST_LOOP_2 (elt, value)
+ − 2606 {
+ − 2607 Fget_coding_system (elt);
+ − 2608 count++;
+ − 2609 }
+ − 2610 }
771
+ − 2611
+ − 2612 cslist = xnew_array (Lisp_Object, count);
+ − 2613 XCODING_SYSTEM_CHAIN_CHAIN (codesys) = cslist;
+ − 2614
+ − 2615 count = 0;
2367
+ − 2616 {
+ − 2617 EXTERNAL_LIST_LOOP_2 (elt, value)
+ − 2618 {
+ − 2619 cslist[count] = Fget_coding_system (elt);
+ − 2620 count++;
+ − 2621 }
+ − 2622 }
771
+ − 2623
+ − 2624 XCODING_SYSTEM_CHAIN_COUNT (codesys) = count;
+ − 2625
+ − 2626 for (i = 0; i < count - 1; i++)
+ − 2627 {
+ − 2628 if (decoding_source_sink_type_is_char (cslist[i], CODING_SINK) !=
+ − 2629 decoding_source_sink_type_is_char (cslist[i + 1], CODING_SOURCE))
+ − 2630 invalid_argument_2 ("Sink of first must match source of second",
+ − 2631 cslist[i], cslist[i + 1]);
+ − 2632 }
+ − 2633 }
+ − 2634 else if (EQ (key, Qcanonicalize_after_coding))
+ − 2635 XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (codesys) =
+ − 2636 Fget_coding_system (value);
+ − 2637 else
+ − 2638 return 0;
+ − 2639 return 1;
+ − 2640 }
+ − 2641
+ − 2642 static Lisp_Object
+ − 2643 chain_getprop (Lisp_Object coding_system, Lisp_Object prop)
+ − 2644 {
+ − 2645 if (EQ (prop, Qchain))
+ − 2646 {
+ − 2647 Lisp_Object result = Qnil;
+ − 2648 int i;
+ − 2649
+ − 2650 for (i = 0; i < XCODING_SYSTEM_CHAIN_COUNT (coding_system); i++)
+ − 2651 result = Fcons (XCODING_SYSTEM_CHAIN_CHAIN (coding_system)[i],
+ − 2652 result);
+ − 2653
+ − 2654 return Fnreverse (result);
+ − 2655 }
+ − 2656 else if (EQ (prop, Qcanonicalize_after_coding))
+ − 2657 return XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (coding_system);
+ − 2658 else
+ − 2659 return Qunbound;
+ − 2660 }
+ − 2661
+ − 2662 static enum source_sink_type
+ − 2663 chain_conversion_end_type (Lisp_Object codesys)
+ − 2664 {
+ − 2665 Lisp_Object *cslist = XCODING_SYSTEM_CHAIN_CHAIN (codesys);
+ − 2666 int n = XCODING_SYSTEM_CHAIN_COUNT (codesys);
+ − 2667 int charp_source, charp_sink;
+ − 2668
+ − 2669 if (n == 0)
+ − 2670 return DECODES_BYTE_TO_BYTE; /* arbitrary */
+ − 2671 charp_source = decoding_source_sink_type_is_char (cslist[0], CODING_SOURCE);
+ − 2672 charp_sink = decoding_source_sink_type_is_char (cslist[n - 1], CODING_SINK);
+ − 2673
+ − 2674 switch (charp_source * 2 + charp_sink)
+ − 2675 {
+ − 2676 case 0: return DECODES_BYTE_TO_BYTE;
+ − 2677 case 1: return DECODES_BYTE_TO_CHARACTER;
+ − 2678 case 2: return DECODES_CHARACTER_TO_BYTE;
+ − 2679 case 3: return DECODES_CHARACTER_TO_CHARACTER;
+ − 2680 }
+ − 2681
+ − 2682 abort ();
+ − 2683 return DECODES_BYTE_TO_BYTE;
+ − 2684 }
+ − 2685
+ − 2686
+ − 2687 /************************************************************************/
+ − 2688 /* No-conversion methods */
+ − 2689 /************************************************************************/
+ − 2690
+ − 2691 /* "No conversion"; used for binary files. We use quotes because there
+ − 2692 really is some conversion being applied (it does byte<->char
+ − 2693 conversion), but it appears to the user as if the text is read in
2297
+ − 2694 without conversion.
+ − 2695
+ − 2696 #### Shouldn't we _call_ it that, then? And while we're at it,
+ − 2697 separate it into "to_internal" and "to_external"? */
771
+ − 2698 DEFINE_CODING_SYSTEM_TYPE (no_conversion);
+ − 2699
+ − 2700 /* This is used when reading in "binary" files -- i.e. files that may
+ − 2701 contain all 256 possible byte values and that are not to be
+ − 2702 interpreted as being in any particular encoding. */
+ − 2703 static Bytecount
+ − 2704 no_conversion_convert (struct coding_stream *str,
+ − 2705 const UExtbyte *src,
+ − 2706 unsigned_char_dynarr *dst, Bytecount n)
+ − 2707 {
+ − 2708 UExtbyte c;
+ − 2709 unsigned int ch = str->ch;
+ − 2710 Bytecount orign = n;
+ − 2711
+ − 2712 if (str->direction == CODING_DECODE)
+ − 2713 {
+ − 2714 while (n--)
+ − 2715 {
+ − 2716 c = *src++;
+ − 2717
+ − 2718 DECODE_ADD_BINARY_CHAR (c, dst);
+ − 2719 }
+ − 2720
+ − 2721 if (str->eof)
+ − 2722 DECODE_OUTPUT_PARTIAL_CHAR (ch, dst);
+ − 2723 }
+ − 2724 else
+ − 2725 {
+ − 2726
+ − 2727 while (n--)
+ − 2728 {
+ − 2729 c = *src++;
826
+ − 2730 if (byte_ascii_p (c))
771
+ − 2731 {
+ − 2732 assert (ch == 0);
+ − 2733 Dynarr_add (dst, c);
+ − 2734 }
+ − 2735 #ifdef MULE
867
+ − 2736 else if (ibyte_leading_byte_p (c))
771
+ − 2737 {
+ − 2738 assert (ch == 0);
+ − 2739 if (c == LEADING_BYTE_LATIN_ISO8859_1 ||
+ − 2740 c == LEADING_BYTE_CONTROL_1)
+ − 2741 ch = c;
+ − 2742 else
2297
+ − 2743 /* #### This is just plain unacceptable. */
771
+ − 2744 Dynarr_add (dst, '~'); /* untranslatable character */
+ − 2745 }
+ − 2746 else
+ − 2747 {
+ − 2748 if (ch == LEADING_BYTE_LATIN_ISO8859_1)
+ − 2749 Dynarr_add (dst, c);
+ − 2750 else if (ch == LEADING_BYTE_CONTROL_1)
+ − 2751 {
+ − 2752 assert (c < 0xC0);
+ − 2753 Dynarr_add (dst, c - 0x20);
+ − 2754 }
+ − 2755 /* else it should be the second or third byte of an
+ − 2756 untranslatable character, so ignore it */
+ − 2757 ch = 0;
+ − 2758 }
+ − 2759 #endif /* MULE */
+ − 2760
+ − 2761 }
+ − 2762 }
+ − 2763
+ − 2764 str->ch = ch;
+ − 2765 return orign;
+ − 2766 }
+ − 2767
+ − 2768 DEFINE_DETECTOR (no_conversion);
+ − 2769 DEFINE_DETECTOR_CATEGORY (no_conversion, no_conversion);
+ − 2770
+ − 2771 struct no_conversion_detector
+ − 2772 {
+ − 2773 int dummy;
+ − 2774 };
+ − 2775
+ − 2776 static void
2286
+ − 2777 no_conversion_detect (struct detection_state *st, const UExtbyte *UNUSED (src),
+ − 2778 Bytecount UNUSED (n))
771
+ − 2779 {
+ − 2780 /* Hack until we get better handling of this stuff! */
+ − 2781 DET_RESULT (st, no_conversion) = DET_SLIGHTLY_LIKELY;
+ − 2782 }
+ − 2783
+ − 2784
+ − 2785 /************************************************************************/
+ − 2786 /* Convert-eol methods */
+ − 2787 /************************************************************************/
+ − 2788
+ − 2789 /* This is used to handle end-of-line (EOL) differences. It is
+ − 2790 character-to-character, and works (when encoding) *BEFORE* sending
+ − 2791 data to the main encoding routine -- thus, that routine must handle
+ − 2792 different EOL types itself if it does line-oriented type processing.
+ − 2793 This is unavoidable because we don't know whether the output of the
+ − 2794 main encoding routine is ASCII compatible (Unicode is definitely not,
2297
+ − 2795 for example). [[ sjt sez this is bogus. There should be _no_ EOL
+ − 2796 processing (or processing of any kind) after conversion to external. ]]
771
+ − 2797
793
+ − 2798 There is one parameter: `subtype', either `cr', `lf', `crlf', or nil.
771
+ − 2799 */
+ − 2800
+ − 2801 struct convert_eol_coding_system
+ − 2802 {
+ − 2803 enum eol_type subtype;
2132
+ − 2804 int dummy; /* On some architectures (eg ia64) the portable dumper can
+ − 2805 produce unaligned access errors without this field. Probably
+ − 2806 because the combined structure of this structure and
+ − 2807 Lisp_Coding_System is not properly aligned. */
771
+ − 2808 };
+ − 2809
+ − 2810 #define CODING_SYSTEM_CONVERT_EOL_SUBTYPE(codesys) \
+ − 2811 (CODING_SYSTEM_TYPE_DATA (codesys, convert_eol)->subtype)
+ − 2812 #define XCODING_SYSTEM_CONVERT_EOL_SUBTYPE(codesys) \
+ − 2813 (XCODING_SYSTEM_TYPE_DATA (codesys, convert_eol)->subtype)
+ − 2814
+ − 2815 struct convert_eol_coding_stream
+ − 2816 {
+ − 2817 enum eol_type actual;
+ − 2818 };
+ − 2819
1204
+ − 2820 static const struct memory_description
771
+ − 2821 convert_eol_coding_system_description[] = {
+ − 2822 { XD_END }
+ − 2823 };
+ − 2824
1204
+ − 2825 DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (convert_eol);
+ − 2826
771
+ − 2827 static void
2286
+ − 2828 convert_eol_print (Lisp_Object cs, Lisp_Object printcharfun,
+ − 2829 int UNUSED (escapeflag))
771
+ − 2830 {
+ − 2831 struct convert_eol_coding_system *data =
+ − 2832 XCODING_SYSTEM_TYPE_DATA (cs, convert_eol);
+ − 2833
+ − 2834 write_fmt_string (printcharfun, "(%s)",
+ − 2835 data->subtype == EOL_LF ? "lf" :
+ − 2836 data->subtype == EOL_CRLF ? "crlf" :
+ − 2837 data->subtype == EOL_CR ? "cr" :
793
+ − 2838 data->subtype == EOL_AUTODETECT ? "nil" :
771
+ − 2839 (abort(), ""));
+ − 2840 }
+ − 2841
+ − 2842 static enum source_sink_type
2286
+ − 2843 convert_eol_conversion_end_type (Lisp_Object UNUSED (codesys))
771
+ − 2844 {
+ − 2845 return DECODES_CHARACTER_TO_CHARACTER;
+ − 2846 }
+ − 2847
+ − 2848 static int
+ − 2849 convert_eol_putprop (Lisp_Object codesys,
+ − 2850 Lisp_Object key,
+ − 2851 Lisp_Object value)
+ − 2852 {
+ − 2853 struct convert_eol_coding_system *data =
+ − 2854 XCODING_SYSTEM_TYPE_DATA (codesys, convert_eol);
+ − 2855
+ − 2856 if (EQ (key, Qsubtype))
+ − 2857 {
+ − 2858 if (EQ (value, Qlf) /* || EQ (value, Qunix) */)
+ − 2859 data->subtype = EOL_LF;
+ − 2860 else if (EQ (value, Qcrlf) /* || EQ (value, Qdos) */)
+ − 2861 data->subtype = EOL_CRLF;
+ − 2862 else if (EQ (value, Qcr) /* || EQ (value, Qmac) */)
+ − 2863 data->subtype = EOL_CR;
793
+ − 2864 else if (EQ (value, Qnil))
771
+ − 2865 data->subtype = EOL_AUTODETECT;
+ − 2866 else invalid_constant ("Unrecognized eol type", value);
+ − 2867 }
+ − 2868 else
+ − 2869 return 0;
+ − 2870 return 1;
+ − 2871 }
+ − 2872
+ − 2873 static Lisp_Object
+ − 2874 convert_eol_getprop (Lisp_Object coding_system, Lisp_Object prop)
+ − 2875 {
+ − 2876 struct convert_eol_coding_system *data =
+ − 2877 XCODING_SYSTEM_TYPE_DATA (coding_system, convert_eol);
+ − 2878
+ − 2879 if (EQ (prop, Qsubtype))
+ − 2880 {
+ − 2881 switch (data->subtype)
+ − 2882 {
+ − 2883 case EOL_LF: return Qlf;
+ − 2884 case EOL_CRLF: return Qcrlf;
+ − 2885 case EOL_CR: return Qcr;
793
+ − 2886 case EOL_AUTODETECT: return Qnil;
771
+ − 2887 default: abort ();
+ − 2888 }
+ − 2889 }
+ − 2890
+ − 2891 return Qunbound;
+ − 2892 }
+ − 2893
+ − 2894 static void
+ − 2895 convert_eol_init_coding_stream (struct coding_stream *str)
+ − 2896 {
+ − 2897 struct convert_eol_coding_stream *data =
+ − 2898 CODING_STREAM_TYPE_DATA (str, convert_eol);
+ − 2899 data->actual = XCODING_SYSTEM_CONVERT_EOL_SUBTYPE (str->codesys);
+ − 2900 }
+ − 2901
+ − 2902 static Bytecount
867
+ − 2903 convert_eol_convert (struct coding_stream *str, const Ibyte *src,
771
+ − 2904 unsigned_char_dynarr *dst, Bytecount n)
+ − 2905 {
+ − 2906 if (str->direction == CODING_DECODE)
+ − 2907 {
+ − 2908 struct convert_eol_coding_stream *data =
+ − 2909 CODING_STREAM_TYPE_DATA (str, convert_eol);
+ − 2910
+ − 2911 if (data->actual == EOL_AUTODETECT)
+ − 2912 {
+ − 2913 Bytecount n2 = n;
867
+ − 2914 const Ibyte *src2 = src;
771
+ − 2915
+ − 2916 for (; n2; n2--)
+ − 2917 {
867
+ − 2918 Ibyte c = *src2++;
771
+ − 2919 if (c == '\n')
+ − 2920 {
+ − 2921 data->actual = EOL_LF;
+ − 2922 break;
+ − 2923 }
+ − 2924 else if (c == '\r')
+ − 2925 {
+ − 2926 if (n2 == 1)
+ − 2927 {
+ − 2928 /* If we're seeing a '\r' at the end of the data, then
+ − 2929 reject the '\r' right now so it doesn't become an
+ − 2930 issue in the code below -- unless we're at the end of
+ − 2931 the stream, in which case we can't do that (because
+ − 2932 then the '\r' will never get written out), and in any
+ − 2933 case we should be recognizing it at EOL_CR format. */
+ − 2934 if (str->eof)
+ − 2935 data->actual = EOL_CR;
+ − 2936 else
+ − 2937 n--;
+ − 2938 break;
+ − 2939 }
+ − 2940 else if (*src2 == '\n')
+ − 2941 data->actual = EOL_CRLF;
+ − 2942 else
+ − 2943 data->actual = EOL_CR;
+ − 2944 break;
+ − 2945 }
+ − 2946 }
+ − 2947 }
+ − 2948
+ − 2949 /* str->eof is set, the caller reached EOF on the other end and has
+ − 2950 no new data to give us. The only data we get is the data we
+ − 2951 rejected from last time. */
+ − 2952 if (data->actual == EOL_LF || data->actual == EOL_AUTODETECT ||
+ − 2953 (str->eof))
+ − 2954 Dynarr_add_many (dst, src, n);
+ − 2955 else
+ − 2956 {
867
+ − 2957 const Ibyte *end = src + n;
771
+ − 2958 while (1)
+ − 2959 {
+ − 2960 /* Find the next section with no \r and add it. */
867
+ − 2961 const Ibyte *runstart = src;
+ − 2962 src = (Ibyte *) memchr (src, '\r', end - src);
771
+ − 2963 if (!src)
+ − 2964 src = end;
+ − 2965 Dynarr_add_many (dst, runstart, src - runstart);
+ − 2966 /* Stop if at end ... */
+ − 2967 if (src == end)
+ − 2968 break;
+ − 2969 /* ... else, translate as necessary. */
+ − 2970 src++;
+ − 2971 if (data->actual == EOL_CR)
+ − 2972 Dynarr_add (dst, '\n');
+ − 2973 /* We need to be careful here with CRLF. If we see a CR at the
+ − 2974 end of the data, we don't know if it's part of a CRLF, so we
+ − 2975 reject it. Otherwise: If it's part of a CRLF, eat it and
+ − 2976 loop; the following LF gets added next time around. If it's
+ − 2977 not part of a CRLF, add the CR and loop. The following
+ − 2978 character will be processed in the next loop iteration. This
+ − 2979 correctly handles a sequence like CR+CR+LF. */
+ − 2980 else if (src == end)
+ − 2981 return n - 1; /* reject the CR at the end; we'll get it again
+ − 2982 next time the convert method is called */
+ − 2983 else if (*src != '\n')
+ − 2984 Dynarr_add (dst, '\r');
+ − 2985 }
+ − 2986 }
+ − 2987
+ − 2988 return n;
+ − 2989 }
+ − 2990 else
+ − 2991 {
+ − 2992 enum eol_type subtype =
+ − 2993 XCODING_SYSTEM_CONVERT_EOL_SUBTYPE (str->codesys);
867
+ − 2994 const Ibyte *end = src + n;
771
+ − 2995
+ − 2996 /* We try to be relatively efficient here. */
+ − 2997 if (subtype == EOL_LF)
+ − 2998 Dynarr_add_many (dst, src, n);
+ − 2999 else
+ − 3000 {
+ − 3001 while (1)
+ − 3002 {
+ − 3003 /* Find the next section with no \n and add it. */
867
+ − 3004 const Ibyte *runstart = src;
+ − 3005 src = (Ibyte *) memchr (src, '\n', end - src);
771
+ − 3006 if (!src)
+ − 3007 src = end;
+ − 3008 Dynarr_add_many (dst, runstart, src - runstart);
+ − 3009 /* Stop if at end ... */
+ − 3010 if (src == end)
+ − 3011 break;
+ − 3012 /* ... else, skip over \n and add its translation. */
+ − 3013 src++;
+ − 3014 Dynarr_add (dst, '\r');
+ − 3015 if (subtype == EOL_CRLF)
+ − 3016 Dynarr_add (dst, '\n');
+ − 3017 }
+ − 3018 }
+ − 3019
+ − 3020 return n;
+ − 3021 }
+ − 3022 }
+ − 3023
+ − 3024 static Lisp_Object
+ − 3025 convert_eol_canonicalize_after_coding (struct coding_stream *str)
+ − 3026 {
+ − 3027 struct convert_eol_coding_stream *data =
+ − 3028 CODING_STREAM_TYPE_DATA (str, convert_eol);
+ − 3029
+ − 3030 if (str->direction == CODING_ENCODE)
+ − 3031 return str->codesys;
+ − 3032
+ − 3033 switch (data->actual)
+ − 3034 {
+ − 3035 case EOL_LF: return Fget_coding_system (Qconvert_eol_lf);
+ − 3036 case EOL_CRLF: return Fget_coding_system (Qconvert_eol_crlf);
+ − 3037 case EOL_CR: return Fget_coding_system (Qconvert_eol_cr);
+ − 3038 case EOL_AUTODETECT: return str->codesys;
+ − 3039 default: abort (); return Qnil;
+ − 3040 }
+ − 3041 }
+ − 3042
+ − 3043
+ − 3044 /************************************************************************/
+ − 3045 /* Undecided methods */
+ − 3046 /************************************************************************/
+ − 3047
+ − 3048 /* Do autodetection. We can autodetect the EOL type only, the coding
+ − 3049 system only, or both. We only do autodetection when decoding; when
+ − 3050 encoding, we just pass the data through.
+ − 3051
+ − 3052 When doing just EOL detection, a coding system can be specified; if so,
+ − 3053 we will decode this data through the coding system before doing EOL
+ − 3054 detection. The reason for specifying this is so that
+ − 3055 canonicalize-after-coding works: We will canonicalize the specified
+ − 3056 coding system into the appropriate EOL type. When doing both coding and
+ − 3057 EOL detection, we do similar canonicalization, and also catch situations
+ − 3058 where the EOL type is overspecified, i.e. the detected coding system
+ − 3059 specifies an EOL type, and either switch to the equivalent
+ − 3060 non-EOL-processing coding system (if possible), or terminate EOL
+ − 3061 detection and use the specified EOL type. This prevents data from being
+ − 3062 EOL-processed twice.
+ − 3063 */
+ − 3064
+ − 3065 struct undecided_coding_system
+ − 3066 {
+ − 3067 int do_eol, do_coding;
+ − 3068 Lisp_Object cs;
+ − 3069 };
+ − 3070
+ − 3071 struct undecided_coding_stream
+ − 3072 {
+ − 3073 Lisp_Object actual;
+ − 3074 /* Either 2 or 3 lstreams here; see undecided_convert */
+ − 3075 struct chain_coding_stream c;
+ − 3076
+ − 3077 struct detection_state *st;
+ − 3078 };
+ − 3079
1204
+ − 3080 static const struct memory_description undecided_coding_system_description[] = {
+ − 3081 { XD_LISP_OBJECT, offsetof (struct undecided_coding_system, cs) },
771
+ − 3082 { XD_END }
+ − 3083 };
+ − 3084
1204
+ − 3085 static const struct memory_description undecided_coding_stream_description_1 [] = {
+ − 3086 { XD_LISP_OBJECT, offsetof (struct undecided_coding_stream, actual) },
2367
+ − 3087 { XD_BLOCK_ARRAY, offsetof (struct undecided_coding_stream, c),
1204
+ − 3088 1, &chain_coding_stream_description },
+ − 3089 { XD_END }
+ − 3090 };
+ − 3091
+ − 3092 const struct sized_memory_description undecided_coding_stream_description = {
+ − 3093 sizeof (struct undecided_coding_stream), undecided_coding_stream_description_1
+ − 3094 };
+ − 3095
+ − 3096 DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (undecided);
+ − 3097
771
+ − 3098 static void
+ − 3099 undecided_init (Lisp_Object codesys)
+ − 3100 {
+ − 3101 struct undecided_coding_system *data =
+ − 3102 XCODING_SYSTEM_TYPE_DATA (codesys, undecided);
+ − 3103
+ − 3104 data->cs = Qnil;
+ − 3105 }
+ − 3106
+ − 3107 static void
+ − 3108 undecided_mark (Lisp_Object codesys)
+ − 3109 {
+ − 3110 struct undecided_coding_system *data =
+ − 3111 XCODING_SYSTEM_TYPE_DATA (codesys, undecided);
+ − 3112
+ − 3113 mark_object (data->cs);
+ − 3114 }
+ − 3115
+ − 3116 static void
+ − 3117 undecided_print (Lisp_Object cs, Lisp_Object printcharfun, int escapeflag)
+ − 3118 {
+ − 3119 struct undecided_coding_system *data =
+ − 3120 XCODING_SYSTEM_TYPE_DATA (cs, undecided);
+ − 3121 int need_space = 0;
+ − 3122
826
+ − 3123 write_c_string (printcharfun, "(");
771
+ − 3124 if (data->do_eol)
+ − 3125 {
826
+ − 3126 write_c_string (printcharfun, "do-eol");
771
+ − 3127 need_space = 1;
+ − 3128 }
+ − 3129 if (data->do_coding)
+ − 3130 {
+ − 3131 if (need_space)
826
+ − 3132 write_c_string (printcharfun, " ");
+ − 3133 write_c_string (printcharfun, "do-coding");
771
+ − 3134 need_space = 1;
+ − 3135 }
+ − 3136 if (!NILP (data->cs))
+ − 3137 {
+ − 3138 if (need_space)
826
+ − 3139 write_c_string (printcharfun, " ");
+ − 3140 write_c_string (printcharfun, "coding-system=");
771
+ − 3141 print_coding_system_in_print_method (data->cs, printcharfun, escapeflag);
+ − 3142 }
826
+ − 3143 write_c_string (printcharfun, ")");
771
+ − 3144 }
+ − 3145
+ − 3146 static void
+ − 3147 undecided_mark_coding_stream (struct coding_stream *str)
+ − 3148 {
1204
+ − 3149 mark_object (CODING_STREAM_TYPE_DATA (str, undecided)->actual);
771
+ − 3150 chain_mark_coding_stream_1 (&CODING_STREAM_TYPE_DATA (str, undecided)->c);
+ − 3151 }
+ − 3152
+ − 3153 static int
+ − 3154 undecided_putprop (Lisp_Object codesys, Lisp_Object key, Lisp_Object value)
+ − 3155 {
+ − 3156 struct undecided_coding_system *data =
+ − 3157 XCODING_SYSTEM_TYPE_DATA (codesys, undecided);
+ − 3158
+ − 3159 if (EQ (key, Qdo_eol))
+ − 3160 data->do_eol = 1;
+ − 3161 else if (EQ (key, Qdo_coding))
+ − 3162 data->do_coding = 1;
+ − 3163 else if (EQ (key, Qcoding_system))
+ − 3164 data->cs = get_coding_system_for_text_file (value, 0);
+ − 3165 else
+ − 3166 return 0;
+ − 3167 return 1;
+ − 3168 }
+ − 3169
+ − 3170 static Lisp_Object
+ − 3171 undecided_getprop (Lisp_Object codesys, Lisp_Object prop)
+ − 3172 {
+ − 3173 struct undecided_coding_system *data =
+ − 3174 XCODING_SYSTEM_TYPE_DATA (codesys, undecided);
+ − 3175
+ − 3176 if (EQ (prop, Qdo_eol))
+ − 3177 return data->do_eol ? Qt : Qnil;
+ − 3178 if (EQ (prop, Qdo_coding))
+ − 3179 return data->do_coding ? Qt : Qnil;
+ − 3180 if (EQ (prop, Qcoding_system))
+ − 3181 return data->cs;
+ − 3182 return Qunbound;
+ − 3183 }
+ − 3184
+ − 3185 static struct detection_state *
+ − 3186 allocate_detection_state (void)
+ − 3187 {
+ − 3188 int i;
+ − 3189 Bytecount size = MAX_ALIGN_SIZE (sizeof (struct detection_state));
+ − 3190 struct detection_state *block;
+ − 3191
+ − 3192 for (i = 0; i < coding_detector_count; i++)
+ − 3193 size += MAX_ALIGN_SIZE (Dynarr_at (all_coding_detectors, i).data_size);
+ − 3194
+ − 3195 block = (struct detection_state *) xmalloc_and_zero (size);
+ − 3196
+ − 3197 size = MAX_ALIGN_SIZE (sizeof (struct detection_state));
+ − 3198 for (i = 0; i < coding_detector_count; i++)
+ − 3199 {
+ − 3200 block->data_offset[i] = size;
+ − 3201 size += MAX_ALIGN_SIZE (Dynarr_at (all_coding_detectors, i).data_size);
+ − 3202 }
+ − 3203
+ − 3204 return block;
+ − 3205 }
+ − 3206
+ − 3207 static void
+ − 3208 free_detection_state (struct detection_state *st)
+ − 3209 {
+ − 3210 int i;
+ − 3211
+ − 3212 for (i = 0; i < coding_detector_count; i++)
+ − 3213 {
+ − 3214 if (Dynarr_at (all_coding_detectors, i).finalize_detection_state_method)
+ − 3215 Dynarr_at (all_coding_detectors, i).finalize_detection_state_method
+ − 3216 (st);
+ − 3217 }
+ − 3218
1726
+ − 3219 xfree (st, struct detection_state *);
771
+ − 3220 }
+ − 3221
+ − 3222 static int
+ − 3223 coding_category_symbol_to_id (Lisp_Object symbol)
428
+ − 3224 {
+ − 3225 int i;
+ − 3226
+ − 3227 CHECK_SYMBOL (symbol);
771
+ − 3228 for (i = 0; i < coding_detector_count; i++)
+ − 3229 {
+ − 3230 detector_category_dynarr *cats =
+ − 3231 Dynarr_at (all_coding_detectors, i).cats;
+ − 3232 int j;
+ − 3233
+ − 3234 for (j = 0; j < Dynarr_length (cats); j++)
+ − 3235 if (EQ (Dynarr_at (cats, j).sym, symbol))
+ − 3236 return Dynarr_at (cats, j).id;
+ − 3237 }
+ − 3238
563
+ − 3239 invalid_constant ("Unrecognized coding category", symbol);
1204
+ − 3240 RETURN_NOT_REACHED (0);
428
+ − 3241 }
+ − 3242
771
+ − 3243 static Lisp_Object
+ − 3244 coding_category_id_to_symbol (int id)
428
+ − 3245 {
+ − 3246 int i;
771
+ − 3247
+ − 3248 for (i = 0; i < coding_detector_count; i++)
+ − 3249 {
+ − 3250 detector_category_dynarr *cats =
+ − 3251 Dynarr_at (all_coding_detectors, i).cats;
+ − 3252 int j;
+ − 3253
+ − 3254 for (j = 0; j < Dynarr_length (cats); j++)
+ − 3255 if (id == Dynarr_at (cats, j).id)
+ − 3256 return Dynarr_at (cats, j).sym;
+ − 3257 }
+ − 3258
+ − 3259 abort ();
+ − 3260 return Qnil; /* (usually) not reached */
428
+ − 3261 }
+ − 3262
771
+ − 3263 static Lisp_Object
+ − 3264 detection_result_number_to_symbol (enum detection_result result)
428
+ − 3265 {
1494
+ − 3266 /* let compiler warn if not all enumerators are handled */
+ − 3267 switch (result) {
+ − 3268 #define FROB(sym, num) case num: return (sym)
771
+ − 3269 FROB (Qnear_certainty, DET_NEAR_CERTAINTY);
+ − 3270 FROB (Qquite_probable, DET_QUITE_PROBABLE);
+ − 3271 FROB (Qsomewhat_likely, DET_SOMEWHAT_LIKELY);
1494
+ − 3272 FROB (Qslightly_likely, DET_SLIGHTLY_LIKELY);
771
+ − 3273 FROB (Qas_likely_as_unlikely, DET_AS_LIKELY_AS_UNLIKELY);
+ − 3274 FROB (Qsomewhat_unlikely, DET_SOMEWHAT_UNLIKELY);
+ − 3275 FROB (Qquite_improbable, DET_QUITE_IMPROBABLE);
+ − 3276 FROB (Qnearly_impossible, DET_NEARLY_IMPOSSIBLE);
+ − 3277 #undef FROB
1494
+ − 3278 }
771
+ − 3279
+ − 3280 abort ();
+ − 3281 return Qnil; /* (usually) not reached */
+ − 3282 }
+ − 3283
778
+ − 3284 #if 0 /* not used */
771
+ − 3285 static enum detection_result
+ − 3286 detection_result_symbol_to_number (Lisp_Object symbol)
+ − 3287 {
1494
+ − 3288 /* using switch here would be bad style, and doesn't help */
771
+ − 3289 #define FROB(sym, num) if (EQ (symbol, sym)) return (num)
+ − 3290 FROB (Qnear_certainty, DET_NEAR_CERTAINTY);
+ − 3291 FROB (Qquite_probable, DET_QUITE_PROBABLE);
+ − 3292 FROB (Qsomewhat_likely, DET_SOMEWHAT_LIKELY);
1494
+ − 3293 FROB (Qslightly_likely, DET_SLIGHTLY_LIKELY);
771
+ − 3294 FROB (Qas_likely_as_unlikely, DET_AS_LIKELY_AS_UNLIKELY);
+ − 3295 FROB (Qsomewhat_unlikely, DET_SOMEWHAT_UNLIKELY);
+ − 3296 FROB (Qquite_improbable, DET_QUITE_IMPROBABLE);
+ − 3297 FROB (Qnearly_impossible, DET_NEARLY_IMPOSSIBLE);
+ − 3298 #undef FROB
+ − 3299
+ − 3300 invalid_constant ("Unrecognized detection result", symbol);
+ − 3301 return ((enum detection_result) 0); /* not reached */
+ − 3302 }
778
+ − 3303 #endif /* 0 */
771
+ − 3304
+ − 3305 /* Set all detection results for a given detector to a specified value. */
+ − 3306 void
+ − 3307 set_detection_results (struct detection_state *st, int detector, int given)
+ − 3308 {
+ − 3309 detector_category_dynarr *cats =
+ − 3310 Dynarr_at (all_coding_detectors, detector).cats;
+ − 3311 int i;
+ − 3312
+ − 3313 for (i = 0; i < Dynarr_length (cats); i++)
+ − 3314 st->categories[Dynarr_at (cats, i).id] = given;
+ − 3315 }
428
+ − 3316
+ − 3317 static int
+ − 3318 acceptable_control_char_p (int c)
+ − 3319 {
+ − 3320 switch (c)
+ − 3321 {
+ − 3322 /* Allow and ignore control characters that you might
+ − 3323 reasonably see in a text file */
+ − 3324 case '\r':
+ − 3325 case '\n':
+ − 3326 case '\t':
+ − 3327 case 7: /* bell */
+ − 3328 case 8: /* backspace */
+ − 3329 case 11: /* vertical tab */
+ − 3330 case 12: /* form feed */
+ − 3331 case 26: /* MS-DOS C-z junk */
+ − 3332 case 31: /* '^_' -- for info */
+ − 3333 return 1;
+ − 3334 default:
+ − 3335 return 0;
+ − 3336 }
+ − 3337 }
+ − 3338
771
+ − 3339 #ifdef DEBUG_XEMACS
+ − 3340
+ − 3341 static UExtbyte
+ − 3342 hex_digit_to_char (int digit)
428
+ − 3343 {
771
+ − 3344 if (digit < 10)
+ − 3345 return digit + '0';
+ − 3346 else
+ − 3347 return digit - 10 + 'A';
428
+ − 3348 }
+ − 3349
771
+ − 3350 static void
+ − 3351 output_bytes_in_ascii_and_hex (const UExtbyte *src, Bytecount n)
428
+ − 3352 {
771
+ − 3353 UExtbyte *ascii = alloca_array (UExtbyte, n + 1);
+ − 3354 UExtbyte *hex = alloca_array (UExtbyte, 3 * n + 1);
+ − 3355 int i;
+ − 3356
+ − 3357 for (i = 0; i < n; i++)
428
+ − 3358 {
771
+ − 3359 UExtbyte c = src[i];
+ − 3360 if (c < 0x20)
+ − 3361 ascii[i] = '.';
428
+ − 3362 else
771
+ − 3363 ascii[i] = c;
+ − 3364 hex[3 * i] = hex_digit_to_char (c >> 4);
+ − 3365 hex[3 * i + 1] = hex_digit_to_char (c & 0xF);
+ − 3366 hex[3 * i + 2] = ' ';
428
+ − 3367 }
771
+ − 3368 ascii[i] = '\0';
+ − 3369 hex[3 * i - 1] = '\0';
+ − 3370 stderr_out ("%s %s", ascii, hex);
428
+ − 3371 }
+ − 3372
771
+ − 3373 #endif /* DEBUG_XEMACS */
+ − 3374
+ − 3375 /* Attempt to determine the encoding of the given text. Before calling
+ − 3376 this function for the first time, you must zero out the detection state.
428
+ − 3377
+ − 3378 Returns:
+ − 3379
771
+ − 3380 0 == keep going
+ − 3381 1 == stop
428
+ − 3382 */
+ − 3383
+ − 3384 static int
771
+ − 3385 detect_coding_type (struct detection_state *st, const UExtbyte *src,
+ − 3386 Bytecount n)
428
+ − 3387 {
771
+ − 3388 Bytecount n2 = n;
+ − 3389 const UExtbyte *src2 = src;
+ − 3390 int i;
+ − 3391
+ − 3392 #ifdef DEBUG_XEMACS
+ − 3393 if (!NILP (Vdebug_coding_detection))
+ − 3394 {
+ − 3395 int bytes = min (16, n);
+ − 3396 stderr_out ("detect_coding_type: processing %ld bytes\n", n);
+ − 3397 stderr_out ("First %d: ", bytes);
+ − 3398 output_bytes_in_ascii_and_hex (src, bytes);
+ − 3399 stderr_out ("\nLast %d: ", bytes);
+ − 3400 output_bytes_in_ascii_and_hex (src + n - bytes, bytes);
+ − 3401 stderr_out ("\n");
+ − 3402 }
+ − 3403 #endif /* DEBUG_XEMACS */
428
+ − 3404 if (!st->seen_non_ascii)
+ − 3405 {
771
+ − 3406 for (; n2; n2--, src2++)
428
+ − 3407 {
771
+ − 3408 UExtbyte c = *src2;
428
+ − 3409 if ((c < 0x20 && !acceptable_control_char_p (c)) || c >= 0x80)
+ − 3410 {
+ − 3411 st->seen_non_ascii = 1;
+ − 3412 break;
+ − 3413 }
+ − 3414 }
+ − 3415 }
+ − 3416
771
+ − 3417 for (i = 0; i < coding_detector_count; i++)
+ − 3418 Dynarr_at (all_coding_detectors, i).detect_method (st, src, n);
+ − 3419
+ − 3420 st->bytes_seen += n;
+ − 3421
+ − 3422 #ifdef DEBUG_XEMACS
+ − 3423 if (!NILP (Vdebug_coding_detection))
+ − 3424 {
+ − 3425 stderr_out ("seen_non_ascii: %d\n", st->seen_non_ascii);
1494
+ − 3426 if (coding_detector_category_count <= 0)
+ − 3427 stderr_out ("found %d detector categories\n",
+ − 3428 coding_detector_category_count);
771
+ − 3429 for (i = 0; i < coding_detector_category_count; i++)
+ − 3430 stderr_out_lisp
+ − 3431 ("%s: %s\n",
+ − 3432 2,
+ − 3433 coding_category_id_to_symbol (i),
+ − 3434 detection_result_number_to_symbol ((enum detection_result)
+ − 3435 st->categories[i]));
+ − 3436 }
+ − 3437 #endif /* DEBUG_XEMACS */
+ − 3438
+ − 3439 {
+ − 3440 int not_unlikely = 0;
+ − 3441 int retval;
+ − 3442
+ − 3443 for (i = 0; i < coding_detector_category_count; i++)
+ − 3444 if (st->categories[i] >= 0)
+ − 3445 not_unlikely++;
+ − 3446
+ − 3447 retval = (not_unlikely <= 1
+ − 3448 #if 0 /* this is bogus */
+ − 3449 || st->bytes_seen >= MAX_BYTES_PROCESSED_FOR_DETECTION
428
+ − 3450 #endif
771
+ − 3451 );
+ − 3452
+ − 3453 #ifdef DEBUG_XEMACS
+ − 3454 if (!NILP (Vdebug_coding_detection))
+ − 3455 stderr_out ("detect_coding_type: returning %d (%s)\n",
+ − 3456 retval, retval ? "stop" : "keep going");
+ − 3457 #endif /* DEBUG_XEMACS */
+ − 3458
+ − 3459 return retval;
428
+ − 3460 }
+ − 3461 }
+ − 3462
+ − 3463 static Lisp_Object
771
+ − 3464 detected_coding_system (struct detection_state *st)
428
+ − 3465 {
771
+ − 3466 int i;
+ − 3467 int even = 1;
+ − 3468
+ − 3469 if (st->seen_non_ascii)
+ − 3470 {
+ − 3471 for (i = 0; i < coding_detector_category_count; i++)
+ − 3472 if (st->categories[i] != DET_AS_LIKELY_AS_UNLIKELY)
+ − 3473 {
+ − 3474 even = 0;
+ − 3475 break;
+ − 3476 }
+ − 3477 }
+ − 3478
+ − 3479 /* #### Here we are ignoring the results of detection when it's all
+ − 3480 ASCII. This is obviously a bad thing. But we need to fix up the
+ − 3481 existing detection methods somewhat before we can switch. */
+ − 3482 if (even)
428
+ − 3483 {
+ − 3484 /* If the file was entirely or basically ASCII, use the
+ − 3485 default value of `buffer-file-coding-system'. */
+ − 3486 Lisp_Object retval =
+ − 3487 XBUFFER (Vbuffer_defaults)->buffer_file_coding_system;
+ − 3488 if (!NILP (retval))
+ − 3489 {
771
+ − 3490 retval = find_coding_system_for_text_file (retval, 0);
428
+ − 3491 if (NILP (retval))
+ − 3492 {
+ − 3493 warn_when_safe
+ − 3494 (Qbad_variable, Qwarning,
+ − 3495 "Invalid `default-buffer-file-coding-system', set to nil");
+ − 3496 XBUFFER (Vbuffer_defaults)->buffer_file_coding_system = Qnil;
+ − 3497 }
+ − 3498 }
+ − 3499 if (NILP (retval))
+ − 3500 retval = Fget_coding_system (Qraw_text);
+ − 3501 return retval;
+ − 3502 }
+ − 3503 else
+ − 3504 {
771
+ − 3505 int likelihood;
+ − 3506 Lisp_Object retval = Qnil;
+ − 3507
+ − 3508 /* Look through the coding categories first by likelihood and then by
+ − 3509 priority and find the first one that is allowed. */
+ − 3510
+ − 3511 for (likelihood = DET_HIGHEST; likelihood >= DET_LOWEST; likelihood--)
428
+ − 3512 {
771
+ − 3513 for (i = 0; i < coding_detector_category_count; i++)
+ − 3514 {
+ − 3515 int cat = coding_category_by_priority[i];
+ − 3516 if (st->categories[cat] == likelihood &&
+ − 3517 !NILP (coding_category_system[cat]))
+ − 3518 {
+ − 3519 retval = (get_coding_system_for_text_file
+ − 3520 (coding_category_system[cat], 0));
+ − 3521 if (likelihood < DET_AS_LIKELY_AS_UNLIKELY)
+ − 3522 warn_when_safe_lispobj
+ − 3523 (intern ("detection"),
793
+ − 3524 Qwarning,
771
+ − 3525 emacs_sprintf_string_lisp
+ − 3526 (
+ − 3527 "Detected coding %s is unlikely to be correct (likelihood == `%s')",
+ − 3528 Qnil, 2, XCODING_SYSTEM_NAME (retval),
+ − 3529 detection_result_number_to_symbol
+ − 3530 ((enum detection_result) likelihood)));
+ − 3531 return retval;
+ − 3532 }
+ − 3533 }
428
+ − 3534 }
771
+ − 3535
+ − 3536 return Fget_coding_system (Qraw_text);
428
+ − 3537 }
+ − 3538 }
+ − 3539
1347
+ − 3540 /* Look for a coding system in the string (skipping over leading
+ − 3541 blanks). If found, return it, otherwise nil. */
+ − 3542
+ − 3543 static Lisp_Object
+ − 3544 snarf_coding_system (const Ibyte *p, Bytecount len)
+ − 3545 {
+ − 3546 Bytecount n;
+ − 3547 Ibyte *name;
+ − 3548
+ − 3549 while (*p == ' ' || *p == '\t') p++, len--;
+ − 3550 len = min (len, 1000);
+ − 3551 name = alloca_ibytes (len + 1);
+ − 3552 memcpy (name, p, len);
+ − 3553 name[len] = '\0';
+ − 3554
+ − 3555 /* Get coding system name */
+ − 3556 /* Characters valid in a MIME charset name (rfc 1521),
+ − 3557 and in a Lisp symbol name. */
+ − 3558 n = qxestrspn (name,
+ − 3559 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ − 3560 "abcdefghijklmnopqrstuvwxyz"
+ − 3561 "0123456789"
+ − 3562 "!$%&*+-.^_{|}~");
+ − 3563 if (n > 0)
+ − 3564 {
+ − 3565 name[n] = '\0';
+ − 3566 return find_coding_system_for_text_file (intern_int (name), 0);
+ − 3567 }
+ − 3568
+ − 3569 return Qnil;
+ − 3570 }
+ − 3571
428
+ − 3572 /* Given a seekable read stream and potential coding system and EOL type
+ − 3573 as specified, do any autodetection that is called for. If the
+ − 3574 coding system and/or EOL type are not `autodetect', they will be left
+ − 3575 alone; but this function will never return an autodetect coding system
+ − 3576 or EOL type.
+ − 3577
+ − 3578 This function does not automatically fetch subsidiary coding systems;
+ − 3579 that should be unnecessary with the explicit eol-type argument. */
+ − 3580
+ − 3581 #define LENGTH(string_constant) (sizeof (string_constant) - 1)
+ − 3582
771
+ − 3583 static Lisp_Object
+ − 3584 unwind_free_detection_state (Lisp_Object opaque)
+ − 3585 {
+ − 3586 struct detection_state *st =
+ − 3587 (struct detection_state *) get_opaque_ptr (opaque);
+ − 3588 free_detection_state (st);
+ − 3589 free_opaque_ptr (opaque);
+ − 3590 return Qnil;
+ − 3591 }
+ − 3592
1347
+ − 3593 /* #### This duplicates code in `find-coding-system-magic-cookie-in-file'
+ − 3594 in files.el. Look into combining them. */
+ − 3595
771
+ − 3596 static Lisp_Object
+ − 3597 look_for_coding_system_magic_cookie (const UExtbyte *data, Bytecount len)
428
+ − 3598 {
771
+ − 3599 const UExtbyte *p;
+ − 3600 const UExtbyte *scan_end;
+ − 3601
+ − 3602 /* Look for initial "-*-"; mode line prefix */
+ − 3603 for (p = data,
+ − 3604 scan_end = data + len - LENGTH ("-*-coding:?-*-");
+ − 3605 p <= scan_end
+ − 3606 && *p != '\n'
+ − 3607 && *p != '\r';
+ − 3608 p++)
+ − 3609 if (*p == '-' && *(p+1) == '*' && *(p+2) == '-')
+ − 3610 {
+ − 3611 const UExtbyte *local_vars_beg = p + 3;
+ − 3612 /* Look for final "-*-"; mode line suffix */
+ − 3613 for (p = local_vars_beg,
+ − 3614 scan_end = data + len - LENGTH ("-*-");
+ − 3615 p <= scan_end
428
+ − 3616 && *p != '\n'
+ − 3617 && *p != '\r';
771
+ − 3618 p++)
+ − 3619 if (*p == '-' && *(p+1) == '*' && *(p+2) == '-')
+ − 3620 {
+ − 3621 const UExtbyte *suffix = p;
+ − 3622 /* Look for "coding:" */
+ − 3623 for (p = local_vars_beg,
+ − 3624 scan_end = suffix - LENGTH ("coding:?");
+ − 3625 p <= scan_end;
+ − 3626 p++)
+ − 3627 if (memcmp ("coding:", p, LENGTH ("coding:")) == 0
+ − 3628 && (p == local_vars_beg
+ − 3629 || (*(p-1) == ' ' ||
+ − 3630 *(p-1) == '\t' ||
+ − 3631 *(p-1) == ';')))
+ − 3632 {
+ − 3633 p += LENGTH ("coding:");
1347
+ − 3634 return snarf_coding_system (p, suffix - p);
771
+ − 3635 break;
+ − 3636 }
+ − 3637 break;
+ − 3638 }
+ − 3639 break;
+ − 3640 }
+ − 3641
1348
+ − 3642 #if 0
+ − 3643 /* #### Totally wrong as is, rewrite */
1347
+ − 3644 /* Look for initial ;;;###coding system */
+ − 3645
+ − 3646 {
+ − 3647 Bytecount ind = fast_string_match (QScoding_system_cookie,
+ − 3648 data, Qnil, 0, len, 0, ERROR_ME_NOT,
+ − 3649 1);
+ − 3650 if (ind >= 0)
+ − 3651 return
+ − 3652 snarf_coding_system (data + ind + LENGTH (";;;###coding system: "),
+ − 3653 len - ind - LENGTH (";;;###coding system: "));
+ − 3654 }
1348
+ − 3655 #endif /* 0 */
1347
+ − 3656
+ − 3657 return Qnil;
771
+ − 3658 }
+ − 3659
+ − 3660 static Lisp_Object
+ − 3661 determine_real_coding_system (Lstream *stream)
+ − 3662 {
+ − 3663 struct detection_state *st = allocate_detection_state ();
+ − 3664 int depth = record_unwind_protect (unwind_free_detection_state,
+ − 3665 make_opaque_ptr (st));
+ − 3666 UExtbyte buf[4096];
+ − 3667 Bytecount nread = Lstream_read (stream, buf, sizeof (buf));
+ − 3668 Lisp_Object coding_system = look_for_coding_system_magic_cookie (buf, nread);
+ − 3669
+ − 3670 if (NILP (coding_system))
+ − 3671 {
+ − 3672 while (1)
+ − 3673 {
+ − 3674 if (detect_coding_type (st, buf, nread))
428
+ − 3675 break;
771
+ − 3676 nread = Lstream_read (stream, buf, sizeof (buf));
+ − 3677 if (nread == 0)
+ − 3678 break;
428
+ − 3679 }
771
+ − 3680
+ − 3681 coding_system = detected_coding_system (st);
428
+ − 3682 }
+ − 3683
+ − 3684 Lstream_rewind (stream);
771
+ − 3685
+ − 3686 unbind_to (depth);
+ − 3687 return coding_system;
+ − 3688 }
+ − 3689
+ − 3690 static void
+ − 3691 undecided_init_coding_stream (struct coding_stream *str)
+ − 3692 {
+ − 3693 struct undecided_coding_stream *data =
+ − 3694 CODING_STREAM_TYPE_DATA (str, undecided);
+ − 3695 struct undecided_coding_system *csdata =
+ − 3696 XCODING_SYSTEM_TYPE_DATA (str->codesys, undecided);
+ − 3697
+ − 3698 data->actual = Qnil;
+ − 3699
+ − 3700 if (str->direction == CODING_DECODE)
+ − 3701 {
+ − 3702 Lstream *lst = str->other_end;
+ − 3703
+ − 3704 if ((lst->flags & LSTREAM_FL_READ) &&
+ − 3705 Lstream_seekable_p (lst) &&
+ − 3706 csdata->do_coding)
+ − 3707 /* We can determine the coding system now. */
+ − 3708 data->actual = determine_real_coding_system (lst);
+ − 3709 }
1494
+ − 3710
+ − 3711 #ifdef DEBUG_XEMACS
+ − 3712 if (!NILP (Vdebug_coding_detection))
+ − 3713 stderr_out_lisp ("detected coding system: %s\n", 1, data->actual);
+ − 3714 #endif /* DEBUG_XEMACS */
771
+ − 3715 }
+ − 3716
+ − 3717 static void
+ − 3718 undecided_rewind_coding_stream (struct coding_stream *str)
+ − 3719 {
+ − 3720 chain_rewind_coding_stream_1 (&CODING_STREAM_TYPE_DATA (str, undecided)->c);
+ − 3721 }
+ − 3722
+ − 3723 static void
+ − 3724 undecided_finalize_coding_stream (struct coding_stream *str)
+ − 3725 {
+ − 3726 struct undecided_coding_stream *data =
+ − 3727 CODING_STREAM_TYPE_DATA (str, undecided);
+ − 3728
+ − 3729 chain_finalize_coding_stream_1
+ − 3730 (&CODING_STREAM_TYPE_DATA (str, undecided)->c);
+ − 3731 if (data->st)
+ − 3732 free_detection_state (data->st);
+ − 3733 }
+ − 3734
+ − 3735 static Lisp_Object
+ − 3736 undecided_canonicalize (Lisp_Object codesys)
+ − 3737 {
+ − 3738 struct undecided_coding_system *csdata =
+ − 3739 XCODING_SYSTEM_TYPE_DATA (codesys, undecided);
+ − 3740 if (!csdata->do_eol && !csdata->do_coding)
+ − 3741 return NILP (csdata->cs) ? Fget_coding_system (Qbinary) : csdata->cs;
+ − 3742 if (csdata->do_eol && !csdata->do_coding && NILP (csdata->cs))
+ − 3743 return Fget_coding_system (Qconvert_eol_autodetect);
+ − 3744 return codesys;
+ − 3745 }
+ − 3746
+ − 3747 static Bytecount
+ − 3748 undecided_convert (struct coding_stream *str, const UExtbyte *src,
+ − 3749 unsigned_char_dynarr *dst, Bytecount n)
+ − 3750 {
+ − 3751 int first_time = 0;
+ − 3752
+ − 3753 if (str->direction == CODING_DECODE)
+ − 3754 {
+ − 3755 /* At this point, we have only the following possibilities:
+ − 3756
+ − 3757 do_eol && do_coding
+ − 3758 do_coding only
+ − 3759 do_eol only and a coding system was specified
+ − 3760
+ − 3761 Other possibilities are removed during undecided_canonicalize.
+ − 3762
+ − 3763 Therefore, our substreams are either
+ − 3764
+ − 3765 lstream_coding -> lstream_dynarr, or
+ − 3766 lstream_coding -> lstream_eol -> lstream_dynarr.
+ − 3767 */
+ − 3768 struct undecided_coding_system *csdata =
+ − 3769 XCODING_SYSTEM_TYPE_DATA (str->codesys, undecided);
+ − 3770 struct undecided_coding_stream *data =
+ − 3771 CODING_STREAM_TYPE_DATA (str, undecided);
+ − 3772
+ − 3773 if (str->eof)
+ − 3774 {
+ − 3775 /* Each will close the next. We need to close now because more
+ − 3776 data may be generated. */
+ − 3777 if (data->c.initted)
+ − 3778 Lstream_close (XLSTREAM (data->c.lstreams[0]));
+ − 3779 return n;
+ − 3780 }
+ − 3781
+ − 3782 if (!data->c.initted)
+ − 3783 {
+ − 3784 data->c.lstream_count = csdata->do_eol ? 3 : 2;
+ − 3785 data->c.lstreams = xnew_array (Lisp_Object, data->c.lstream_count);
+ − 3786
+ − 3787 data->c.lstreams[data->c.lstream_count - 1] =
+ − 3788 make_dynarr_output_stream (dst);
+ − 3789 Lstream_set_buffering
+ − 3790 (XLSTREAM (data->c.lstreams[data->c.lstream_count - 1]),
+ − 3791 LSTREAM_UNBUFFERED, 0);
+ − 3792 if (csdata->do_eol)
+ − 3793 {
+ − 3794 data->c.lstreams[1] =
+ − 3795 make_coding_output_stream
+ − 3796 (XLSTREAM (data->c.lstreams[data->c.lstream_count - 1]),
+ − 3797 Fget_coding_system (Qconvert_eol_autodetect),
800
+ − 3798 CODING_DECODE, 0);
771
+ − 3799 Lstream_set_buffering
+ − 3800 (XLSTREAM (data->c.lstreams[1]),
+ − 3801 LSTREAM_UNBUFFERED, 0);
+ − 3802 }
+ − 3803
+ − 3804 data->c.lstreams[0] =
+ − 3805 make_coding_output_stream
+ − 3806 (XLSTREAM (data->c.lstreams[1]),
+ − 3807 /* Substitute binary if we need to detect the encoding */
+ − 3808 csdata->do_coding ? Qbinary : csdata->cs,
800
+ − 3809 CODING_DECODE, 0);
771
+ − 3810 Lstream_set_buffering (XLSTREAM (data->c.lstreams[0]),
+ − 3811 LSTREAM_UNBUFFERED, 0);
+ − 3812
+ − 3813 first_time = 1;
+ − 3814 data->c.initted = 1;
+ − 3815 }
+ − 3816
+ − 3817 /* If necessary, do encoding-detection now. We do this when we're a
+ − 3818 writing stream or a non-seekable reading stream, meaning that we
+ − 3819 can't just process the whole input, rewind, and start over. */
+ − 3820
+ − 3821 if (csdata->do_coding)
+ − 3822 {
+ − 3823 int actual_was_nil = NILP (data->actual);
+ − 3824 if (NILP (data->actual))
+ − 3825 {
+ − 3826 if (!data->st)
+ − 3827 data->st = allocate_detection_state ();
+ − 3828 if (first_time)
+ − 3829 /* #### This is cheesy. What we really ought to do is buffer
+ − 3830 up a certain minimum amount of data to get a better result.
+ − 3831 */
+ − 3832 data->actual = look_for_coding_system_magic_cookie (src, n);
+ − 3833 if (NILP (data->actual))
+ − 3834 {
+ − 3835 /* #### This is cheesy. What we really ought to do is buffer
+ − 3836 up a certain minimum amount of data so as to get a less
+ − 3837 random result when doing subprocess detection. */
+ − 3838 detect_coding_type (data->st, src, n);
+ − 3839 data->actual = detected_coding_system (data->st);
+ − 3840 }
+ − 3841 }
+ − 3842 /* We need to set the detected coding system if we actually have
+ − 3843 such a coding system but didn't before. That is the case
+ − 3844 either when we just detected it in the previous code or when
+ − 3845 it was detected during undecided_init_coding_stream(). We
+ − 3846 can check for that using first_time. */
+ − 3847 if (!NILP (data->actual) && (actual_was_nil || first_time))
+ − 3848 {
+ − 3849 /* If the detected coding system doesn't allow for EOL
+ − 3850 autodetection, try to get the equivalent that does;
+ − 3851 otherwise, disable EOL detection (overriding whatever
+ − 3852 may already have been detected). */
+ − 3853 if (XCODING_SYSTEM_EOL_TYPE (data->actual) != EOL_AUTODETECT)
+ − 3854 {
+ − 3855 if (!NILP (XCODING_SYSTEM_SUBSIDIARY_PARENT (data->actual)))
+ − 3856 data->actual =
+ − 3857 XCODING_SYSTEM_SUBSIDIARY_PARENT (data->actual);
+ − 3858 else if (data->c.lstream_count == 3)
+ − 3859 set_coding_stream_coding_system
+ − 3860 (XLSTREAM (data->c.lstreams[1]),
+ − 3861 Fget_coding_system (Qidentity));
+ − 3862 }
+ − 3863 set_coding_stream_coding_system
+ − 3864 (XLSTREAM (data->c.lstreams[0]), data->actual);
+ − 3865 }
+ − 3866 }
+ − 3867
+ − 3868 if (Lstream_write (XLSTREAM (data->c.lstreams[0]), src, n) < 0)
+ − 3869 return -1;
+ − 3870 return n;
+ − 3871 }
+ − 3872 else
+ − 3873 return no_conversion_convert (str, src, dst, n);
+ − 3874 }
+ − 3875
+ − 3876 static Lisp_Object
+ − 3877 undecided_canonicalize_after_coding (struct coding_stream *str)
+ − 3878 {
+ − 3879 struct undecided_coding_stream *data =
+ − 3880 CODING_STREAM_TYPE_DATA (str, undecided);
+ − 3881 Lisp_Object ret, eolret;
+ − 3882
+ − 3883 if (str->direction == CODING_ENCODE)
+ − 3884 return str->codesys;
+ − 3885
+ − 3886 if (!data->c.initted)
+ − 3887 return Fget_coding_system (Qundecided);
+ − 3888
+ − 3889 ret = coding_stream_canonicalize_after_coding
+ − 3890 (XLSTREAM (data->c.lstreams[0]));
+ − 3891 if (NILP (ret))
+ − 3892 ret = Fget_coding_system (Qundecided);
+ − 3893 if (XCODING_SYSTEM_EOL_TYPE (ret) != EOL_AUTODETECT)
+ − 3894 return ret;
+ − 3895 eolret = coding_stream_canonicalize_after_coding
+ − 3896 (XLSTREAM (data->c.lstreams[1]));
+ − 3897 if (!EQ (XCODING_SYSTEM_TYPE (eolret), Qconvert_eol))
+ − 3898 return ret;
+ − 3899 return
+ − 3900 Fsubsidiary_coding_system (ret, Fcoding_system_property (eolret,
+ − 3901 Qsubtype));
+ − 3902 }
+ − 3903
+ − 3904
+ − 3905 /************************************************************************/
+ − 3906 /* Lisp interface: Coding category functions and detection */
+ − 3907 /************************************************************************/
+ − 3908
+ − 3909 DEFUN ("coding-category-list", Fcoding_category_list, 0, 0, 0, /*
+ − 3910 Return a list of all recognized coding categories.
+ − 3911 */
+ − 3912 ())
+ − 3913 {
+ − 3914 int i;
+ − 3915 Lisp_Object list = Qnil;
+ − 3916
+ − 3917 for (i = 0; i < coding_detector_count; i++)
+ − 3918 {
+ − 3919 detector_category_dynarr *cats =
+ − 3920 Dynarr_at (all_coding_detectors, i).cats;
+ − 3921 int j;
+ − 3922
+ − 3923 for (j = 0; j < Dynarr_length (cats); j++)
+ − 3924 list = Fcons (Dynarr_at (cats, j).sym, list);
+ − 3925 }
+ − 3926
+ − 3927 return Fnreverse (list);
+ − 3928 }
+ − 3929
+ − 3930 DEFUN ("set-coding-priority-list", Fset_coding_priority_list, 1, 1, 0, /*
+ − 3931 Change the priority order of the coding categories.
+ − 3932 LIST should be list of coding categories, in descending order of
+ − 3933 priority. Unspecified coding categories will be lower in priority
+ − 3934 than all specified ones, in the same relative order they were in
+ − 3935 previously.
+ − 3936 */
+ − 3937 (list))
+ − 3938 {
+ − 3939 int *category_to_priority =
+ − 3940 alloca_array (int, coding_detector_category_count);
+ − 3941 int i, j;
+ − 3942
+ − 3943 /* First generate a list that maps coding categories to priorities. */
+ − 3944
+ − 3945 for (i = 0; i < coding_detector_category_count; i++)
+ − 3946 category_to_priority[i] = -1;
+ − 3947
+ − 3948 /* Highest priority comes from the specified list. */
+ − 3949 i = 0;
2367
+ − 3950 {
+ − 3951 EXTERNAL_LIST_LOOP_2 (elt, list)
+ − 3952 {
+ − 3953 int cat = coding_category_symbol_to_id (elt);
+ − 3954
+ − 3955 if (category_to_priority[cat] >= 0)
+ − 3956 sferror ("Duplicate coding category in list", elt);
+ − 3957 category_to_priority[cat] = i++;
+ − 3958 }
+ − 3959 }
771
+ − 3960
+ − 3961 /* Now go through the existing categories by priority to retrieve
+ − 3962 the categories not yet specified and preserve their priority
+ − 3963 order. */
+ − 3964 for (j = 0; j < coding_detector_category_count; j++)
+ − 3965 {
+ − 3966 int cat = coding_category_by_priority[j];
+ − 3967 if (category_to_priority[cat] < 0)
+ − 3968 category_to_priority[cat] = i++;
+ − 3969 }
+ − 3970
+ − 3971 /* Now we need to construct the inverse of the mapping we just
+ − 3972 constructed. */
+ − 3973
+ − 3974 for (i = 0; i < coding_detector_category_count; i++)
+ − 3975 coding_category_by_priority[category_to_priority[i]] = i;
+ − 3976
+ − 3977 /* Phew! That was confusing. */
+ − 3978 return Qnil;
+ − 3979 }
+ − 3980
+ − 3981 DEFUN ("coding-priority-list", Fcoding_priority_list, 0, 0, 0, /*
+ − 3982 Return a list of coding categories in descending order of priority.
+ − 3983 */
+ − 3984 ())
+ − 3985 {
+ − 3986 int i;
+ − 3987 Lisp_Object list = Qnil;
+ − 3988
+ − 3989 for (i = 0; i < coding_detector_category_count; i++)
+ − 3990 list =
+ − 3991 Fcons (coding_category_id_to_symbol (coding_category_by_priority[i]),
+ − 3992 list);
+ − 3993 return Fnreverse (list);
+ − 3994 }
+ − 3995
+ − 3996 DEFUN ("set-coding-category-system", Fset_coding_category_system, 2, 2, 0, /*
+ − 3997 Change the coding system associated with a coding category.
+ − 3998 */
+ − 3999 (coding_category, coding_system))
+ − 4000 {
+ − 4001 coding_category_system[coding_category_symbol_to_id (coding_category)] =
+ − 4002 Fget_coding_system (coding_system);
+ − 4003 return Qnil;
+ − 4004 }
+ − 4005
+ − 4006 DEFUN ("coding-category-system", Fcoding_category_system, 1, 1, 0, /*
+ − 4007 Return the coding system associated with a coding category.
+ − 4008 */
+ − 4009 (coding_category))
+ − 4010 {
+ − 4011 Lisp_Object sys =
+ − 4012 coding_category_system[coding_category_symbol_to_id (coding_category)];
+ − 4013
+ − 4014 if (!NILP (sys))
+ − 4015 return XCODING_SYSTEM_NAME (sys);
+ − 4016 return Qnil;
+ − 4017 }
+ − 4018
800
+ − 4019 /* Detect the encoding of STREAM. Assumes stream is at the begnning and will
+ − 4020 read through to the end of STREAM, leaving it there but open. */
+ − 4021
771
+ − 4022 Lisp_Object
+ − 4023 detect_coding_stream (Lisp_Object stream)
+ − 4024 {
+ − 4025 Lisp_Object val = Qnil;
+ − 4026 struct gcpro gcpro1, gcpro2, gcpro3;
+ − 4027 UExtbyte random_buffer[65536];
+ − 4028 Lisp_Object binary_instream =
+ − 4029 make_coding_input_stream
+ − 4030 (XLSTREAM (stream), Qbinary,
814
+ − 4031 CODING_ENCODE, LSTREAM_FL_NO_CLOSE_OTHER);
771
+ − 4032 Lisp_Object decstream =
+ − 4033 make_coding_input_stream
+ − 4034 (XLSTREAM (binary_instream),
800
+ − 4035 Qundecided, CODING_DECODE, 0);
771
+ − 4036 Lstream *decstr = XLSTREAM (decstream);
+ − 4037
+ − 4038 GCPRO3 (decstream, stream, binary_instream);
+ − 4039 /* Read and discard all data; detection happens as a side effect of this,
+ − 4040 and we examine what was detected afterwards. */
+ − 4041 while (Lstream_read (decstr, random_buffer, sizeof (random_buffer)) > 0)
+ − 4042 ;
+ − 4043
+ − 4044 val = coding_stream_detected_coding_system (decstr);
+ − 4045 Lstream_close (decstr);
+ − 4046 Lstream_delete (decstr);
+ − 4047 Lstream_delete (XLSTREAM (binary_instream));
+ − 4048 UNGCPRO;
+ − 4049 return val;
428
+ − 4050 }
+ − 4051
+ − 4052 DEFUN ("detect-coding-region", Fdetect_coding_region, 2, 3, 0, /*
+ − 4053 Detect coding system of the text in the region between START and END.
444
+ − 4054 Return a list of possible coding systems ordered by priority.
+ − 4055 If only ASCII characters are found, return 'undecided or one of
428
+ − 4056 its subsidiary coding systems according to a detected end-of-line
+ − 4057 type. Optional arg BUFFER defaults to the current buffer.
+ − 4058 */
+ − 4059 (start, end, buffer))
+ − 4060 {
+ − 4061 Lisp_Object val = Qnil;
+ − 4062 struct buffer *buf = decode_buffer (buffer, 0);
665
+ − 4063 Charbpos b, e;
771
+ − 4064 Lisp_Object lb_instream;
428
+ − 4065
+ − 4066 get_buffer_range_char (buf, start, end, &b, &e, 0);
+ − 4067 lb_instream = make_lisp_buffer_input_stream (buf, b, e, 0);
771
+ − 4068
+ − 4069 val = detect_coding_stream (lb_instream);
+ − 4070 Lstream_delete (XLSTREAM (lb_instream));
428
+ − 4071 return val;
+ − 4072 }
+ − 4073
+ − 4074
771
+ − 4075
+ − 4076 #ifdef DEBUG_XEMACS
+ − 4077
428
+ − 4078 /************************************************************************/
771
+ − 4079 /* Internal methods */
+ − 4080 /************************************************************************/
+ − 4081
+ − 4082 /* Raw (internally-formatted) data. */
+ − 4083 DEFINE_CODING_SYSTEM_TYPE (internal);
428
+ − 4084
665
+ − 4085 static Bytecount
2286
+ − 4086 internal_convert (struct coding_stream *UNUSED (str), const UExtbyte *src,
771
+ − 4087 unsigned_char_dynarr *dst, Bytecount n)
+ − 4088 {
+ − 4089 Bytecount orign = n;
+ − 4090 Dynarr_add_many (dst, src, n);
+ − 4091 return orign;
+ − 4092 }
+ − 4093
+ − 4094 #endif /* DEBUG_XEMACS */
+ − 4095
+ − 4096
+ − 4097
+ − 4098 #ifdef HAVE_ZLIB
+ − 4099
+ − 4100 /************************************************************************/
+ − 4101 /* Gzip methods */
+ − 4102 /************************************************************************/
+ − 4103
+ − 4104 struct gzip_coding_system
428
+ − 4105 {
771
+ − 4106 int level; /* 0 through 9, or -1 for default */
+ − 4107 };
+ − 4108
+ − 4109 #define CODING_SYSTEM_GZIP_LEVEL(codesys) \
+ − 4110 (CODING_SYSTEM_TYPE_DATA (codesys, gzip)->level)
+ − 4111 #define XCODING_SYSTEM_GZIP_LEVEL(codesys) \
+ − 4112 (XCODING_SYSTEM_TYPE_DATA (codesys, gzip)->level)
+ − 4113
+ − 4114 struct gzip_coding_stream
428
+ − 4115 {
771
+ − 4116 z_stream stream;
+ − 4117 int stream_initted;
+ − 4118 int reached_eof; /* #### this should be handled by the caller, once we
+ − 4119 return LSTREAM_EOF */
+ − 4120 };
+ − 4121
1204
+ − 4122 static const struct memory_description
771
+ − 4123 gzip_coding_system_description[] = {
+ − 4124 { XD_END }
+ − 4125 };
+ − 4126
1204
+ − 4127 DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (gzip);
+ − 4128
771
+ − 4129 enum source_sink_type
+ − 4130 gzip_conversion_end_type (Lisp_Object codesys)
+ − 4131 {
+ − 4132 return DECODES_BYTE_TO_BYTE;
428
+ − 4133 }
+ − 4134
+ − 4135 static void
771
+ − 4136 gzip_init (Lisp_Object codesys)
+ − 4137 {
+ − 4138 struct gzip_coding_system *data = XCODING_SYSTEM_TYPE_DATA (codesys, gzip);
+ − 4139 data->level = -1;
+ − 4140 }
+ − 4141
+ − 4142 static void
+ − 4143 gzip_print (Lisp_Object cs, Lisp_Object printcharfun, int escapeflag)
428
+ − 4144 {
771
+ − 4145 struct gzip_coding_system *data = XCODING_SYSTEM_TYPE_DATA (cs, gzip);
+ − 4146
826
+ − 4147 write_c_string (printcharfun, "(");
771
+ − 4148 if (data->level == -1)
826
+ − 4149 write_c_string (printcharfun, "default");
771
+ − 4150 else
+ − 4151 print_internal (make_int (data->level), printcharfun, 0);
826
+ − 4152 write_c_string (printcharfun, ")");
428
+ − 4153 }
+ − 4154
+ − 4155 static int
771
+ − 4156 gzip_putprop (Lisp_Object codesys, Lisp_Object key, Lisp_Object value)
428
+ − 4157 {
771
+ − 4158 struct gzip_coding_system *data = XCODING_SYSTEM_TYPE_DATA (codesys, gzip);
+ − 4159
+ − 4160 if (EQ (key, Qlevel))
428
+ − 4161 {
771
+ − 4162 if (EQ (value, Qdefault))
+ − 4163 data->level = -1;
+ − 4164 else
428
+ − 4165 {
771
+ − 4166 CHECK_INT (value);
+ − 4167 check_int_range (XINT (value), 0, 9);
+ − 4168 data->level = XINT (value);
428
+ − 4169 }
+ − 4170 }
+ − 4171 else
771
+ − 4172 return 0;
+ − 4173 return 1;
428
+ − 4174 }
+ − 4175
+ − 4176 static Lisp_Object
771
+ − 4177 gzip_getprop (Lisp_Object coding_system, Lisp_Object prop)
428
+ − 4178 {
771
+ − 4179 struct gzip_coding_system *data =
+ − 4180 XCODING_SYSTEM_TYPE_DATA (coding_system, gzip);
+ − 4181
+ − 4182 if (EQ (prop, Qlevel))
428
+ − 4183 {
771
+ − 4184 if (data->level == -1)
+ − 4185 return Qdefault;
+ − 4186 return make_int (data->level);
428
+ − 4187 }
771
+ − 4188
+ − 4189 return Qunbound;
428
+ − 4190 }
+ − 4191
+ − 4192 static void
771
+ − 4193 gzip_init_coding_stream (struct coding_stream *str)
428
+ − 4194 {
771
+ − 4195 struct gzip_coding_stream *data = CODING_STREAM_TYPE_DATA (str, gzip);
+ − 4196 if (data->stream_initted)
428
+ − 4197 {
771
+ − 4198 if (str->direction == CODING_DECODE)
+ − 4199 inflateEnd (&data->stream);
+ − 4200 else
+ − 4201 deflateEnd (&data->stream);
+ − 4202 data->stream_initted = 0;
428
+ − 4203 }
771
+ − 4204 data->reached_eof = 0;
428
+ − 4205 }
+ − 4206
+ − 4207 static void
771
+ − 4208 gzip_rewind_coding_stream (struct coding_stream *str)
428
+ − 4209 {
771
+ − 4210 gzip_init_coding_stream (str);
428
+ − 4211 }
+ − 4212
771
+ − 4213 static Bytecount
+ − 4214 gzip_convert (struct coding_stream *str,
+ − 4215 const UExtbyte *src,
+ − 4216 unsigned_char_dynarr *dst, Bytecount n)
428
+ − 4217 {
771
+ − 4218 struct gzip_coding_stream *data = CODING_STREAM_TYPE_DATA (str, gzip);
+ − 4219 int zerr;
+ − 4220 if (str->direction == CODING_DECODE)
428
+ − 4221 {
771
+ − 4222 if (data->reached_eof)
+ − 4223 return n; /* eat the data */
+ − 4224
+ − 4225 if (!data->stream_initted)
428
+ − 4226 {
771
+ − 4227 xzero (data->stream);
+ − 4228 if (inflateInit (&data->stream) != Z_OK)
+ − 4229 return LSTREAM_ERROR;
+ − 4230 data->stream_initted = 1;
428
+ − 4231 }
771
+ − 4232
+ − 4233 data->stream.next_in = (Bytef *) src;
+ − 4234 data->stream.avail_in = n;
+ − 4235
+ − 4236 /* Normally we stop when we've fed all data to the decompressor; but
+ − 4237 if we're at the end of the input, and the decompressor hasn't
+ − 4238 reported EOF, we need to keep going, as there might be more output
+ − 4239 to generate. Z_OK from the decompressor means input was processed
+ − 4240 or output was generated; if neither, we break out of the loop.
+ − 4241 Other return values are:
+ − 4242
+ − 4243 Z_STREAM_END EOF from decompressor
+ − 4244 Z_DATA_ERROR Corrupted data
+ − 4245 Z_BUF_ERROR No progress possible (this should happen if
+ − 4246 we try to feed it an incomplete file)
+ − 4247 Z_MEM_ERROR Out of memory
+ − 4248 Z_STREAM_ERROR (should never happen)
+ − 4249 Z_NEED_DICT (#### when will this happen?)
+ − 4250 */
+ − 4251 while (data->stream.avail_in > 0 || str->eof)
+ − 4252 {
+ − 4253 /* Reserve an output buffer of the same size as the input buffer;
+ − 4254 if that's not enough, we keep reserving the same size. */
+ − 4255 Bytecount reserved = n;
+ − 4256 Dynarr_add_many (dst, 0, reserved);
+ − 4257 /* Careful here! Don't retrieve the pointer until after
+ − 4258 reserving the space, or it might be bogus */
+ − 4259 data->stream.next_out =
+ − 4260 Dynarr_atp (dst, Dynarr_length (dst) - reserved);
+ − 4261 data->stream.avail_out = reserved;
+ − 4262 zerr = inflate (&data->stream, Z_NO_FLUSH);
+ − 4263 /* Lop off the unused portion */
+ − 4264 Dynarr_set_size (dst, Dynarr_length (dst) - data->stream.avail_out);
+ − 4265 if (zerr != Z_OK)
+ − 4266 break;
+ − 4267 }
+ − 4268
+ − 4269 if (zerr == Z_STREAM_END)
+ − 4270 data->reached_eof = 1;
+ − 4271
+ − 4272 if ((Bytecount) data->stream.avail_in < n)
+ − 4273 return n - data->stream.avail_in;
+ − 4274
+ − 4275 if (zerr == Z_OK || zerr == Z_STREAM_END)
+ − 4276 return 0;
+ − 4277
+ − 4278 return LSTREAM_ERROR;
428
+ − 4279 }
+ − 4280 else
+ − 4281 {
771
+ − 4282 if (!data->stream_initted)
+ − 4283 {
+ − 4284 int level = XCODING_SYSTEM_GZIP_LEVEL (str->codesys);
+ − 4285 xzero (data->stream);
+ − 4286 if (deflateInit (&data->stream,
+ − 4287 level == -1 ? Z_DEFAULT_COMPRESSION : level) !=
+ − 4288 Z_OK)
+ − 4289 return LSTREAM_ERROR;
+ − 4290 data->stream_initted = 1;
428
+ − 4291 }
771
+ − 4292
+ − 4293 data->stream.next_in = (Bytef *) src;
+ − 4294 data->stream.avail_in = n;
+ − 4295
+ − 4296 /* Normally we stop when we've fed all data to the compressor; but if
+ − 4297 we're at the end of the input, and the compressor hasn't reported
+ − 4298 EOF, we need to keep going, as there might be more output to
+ − 4299 generate. (To signal EOF on our end, we set the FLUSH parameter
+ − 4300 to Z_FINISH; when all data is output, Z_STREAM_END will be
+ − 4301 returned.) Z_OK from the compressor means input was processed or
+ − 4302 output was generated; if neither, we break out of the loop. Other
+ − 4303 return values are:
+ − 4304
+ − 4305 Z_STREAM_END EOF from compressor
+ − 4306 Z_BUF_ERROR No progress possible (should never happen)
+ − 4307 Z_STREAM_ERROR (should never happen)
+ − 4308 */
+ − 4309 while (data->stream.avail_in > 0 || str->eof)
+ − 4310 {
+ − 4311 /* Reserve an output buffer of the same size as the input buffer;
+ − 4312 if that's not enough, we keep reserving the same size. */
+ − 4313 Bytecount reserved = n;
+ − 4314 Dynarr_add_many (dst, 0, reserved);
+ − 4315 /* Careful here! Don't retrieve the pointer until after
+ − 4316 reserving the space, or it might be bogus */
+ − 4317 data->stream.next_out =
+ − 4318 Dynarr_atp (dst, Dynarr_length (dst) - reserved);
+ − 4319 data->stream.avail_out = reserved;
+ − 4320 zerr =
+ − 4321 deflate (&data->stream,
+ − 4322 str->eof ? Z_FINISH : Z_NO_FLUSH);
+ − 4323 /* Lop off the unused portion */
+ − 4324 Dynarr_set_size (dst, Dynarr_length (dst) - data->stream.avail_out);
+ − 4325 if (zerr != Z_OK)
+ − 4326 break;
+ − 4327 }
+ − 4328
+ − 4329 if ((Bytecount) data->stream.avail_in < n)
+ − 4330 return n - data->stream.avail_in;
+ − 4331
+ − 4332 if (zerr == Z_OK || zerr == Z_STREAM_END)
+ − 4333 return 0;
+ − 4334
+ − 4335 return LSTREAM_ERROR;
428
+ − 4336 }
+ − 4337 }
+ − 4338
771
+ − 4339 #endif /* HAVE_ZLIB */
428
+ − 4340
+ − 4341
+ − 4342 /************************************************************************/
+ − 4343 /* Initialization */
+ − 4344 /************************************************************************/
+ − 4345
+ − 4346 void
+ − 4347 syms_of_file_coding (void)
+ − 4348 {
442
+ − 4349 INIT_LRECORD_IMPLEMENTATION (coding_system);
+ − 4350
771
+ − 4351 DEFSUBR (Fvalid_coding_system_type_p);
+ − 4352 DEFSUBR (Fcoding_system_type_list);
428
+ − 4353 DEFSUBR (Fcoding_system_p);
+ − 4354 DEFSUBR (Ffind_coding_system);
+ − 4355 DEFSUBR (Fget_coding_system);
+ − 4356 DEFSUBR (Fcoding_system_list);
+ − 4357 DEFSUBR (Fcoding_system_name);
+ − 4358 DEFSUBR (Fmake_coding_system);
+ − 4359 DEFSUBR (Fcopy_coding_system);
440
+ − 4360 DEFSUBR (Fcoding_system_canonical_name_p);
+ − 4361 DEFSUBR (Fcoding_system_alias_p);
+ − 4362 DEFSUBR (Fcoding_system_aliasee);
428
+ − 4363 DEFSUBR (Fdefine_coding_system_alias);
+ − 4364 DEFSUBR (Fsubsidiary_coding_system);
771
+ − 4365 DEFSUBR (Fcoding_system_base);
+ − 4366 DEFSUBR (Fcoding_system_used_for_io);
428
+ − 4367
+ − 4368 DEFSUBR (Fcoding_system_type);
771
+ − 4369 DEFSUBR (Fcoding_system_description);
428
+ − 4370 DEFSUBR (Fcoding_system_property);
+ − 4371
+ − 4372 DEFSUBR (Fcoding_category_list);
+ − 4373 DEFSUBR (Fset_coding_priority_list);
+ − 4374 DEFSUBR (Fcoding_priority_list);
+ − 4375 DEFSUBR (Fset_coding_category_system);
+ − 4376 DEFSUBR (Fcoding_category_system);
+ − 4377
+ − 4378 DEFSUBR (Fdetect_coding_region);
+ − 4379 DEFSUBR (Fdecode_coding_region);
+ − 4380 DEFSUBR (Fencode_coding_region);
563
+ − 4381 DEFSYMBOL_MULTIWORD_PREDICATE (Qcoding_systemp);
+ − 4382 DEFSYMBOL (Qno_conversion);
771
+ − 4383 DEFSYMBOL (Qconvert_eol);
+ − 4384 DEFSYMBOL (Qconvert_eol_autodetect);
+ − 4385 DEFSYMBOL (Qconvert_eol_lf);
+ − 4386 DEFSYMBOL (Qconvert_eol_cr);
+ − 4387 DEFSYMBOL (Qconvert_eol_crlf);
563
+ − 4388 DEFSYMBOL (Qraw_text);
771
+ − 4389
563
+ − 4390 DEFSYMBOL (Qmnemonic);
+ − 4391 DEFSYMBOL (Qeol_type);
+ − 4392 DEFSYMBOL (Qpost_read_conversion);
+ − 4393 DEFSYMBOL (Qpre_write_conversion);
+ − 4394
771
+ − 4395 DEFSYMBOL (Qtranslation_table_for_decode);
+ − 4396 DEFSYMBOL (Qtranslation_table_for_encode);
+ − 4397 DEFSYMBOL (Qsafe_chars);
+ − 4398 DEFSYMBOL (Qsafe_charsets);
+ − 4399 DEFSYMBOL (Qmime_charset);
+ − 4400 DEFSYMBOL (Qvalid_codes);
+ − 4401
563
+ − 4402 DEFSYMBOL (Qcr);
+ − 4403 DEFSYMBOL (Qlf);
+ − 4404 DEFSYMBOL (Qcrlf);
+ − 4405 DEFSYMBOL (Qeol_cr);
+ − 4406 DEFSYMBOL (Qeol_lf);
+ − 4407 DEFSYMBOL (Qeol_crlf);
+ − 4408 DEFSYMBOL (Qencode);
+ − 4409 DEFSYMBOL (Qdecode);
428
+ − 4410
771
+ − 4411 DEFSYMBOL (Qnear_certainty);
+ − 4412 DEFSYMBOL (Qquite_probable);
+ − 4413 DEFSYMBOL (Qsomewhat_likely);
1494
+ − 4414 DEFSYMBOL (Qslightly_likely);
771
+ − 4415 DEFSYMBOL (Qas_likely_as_unlikely);
+ − 4416 DEFSYMBOL (Qsomewhat_unlikely);
+ − 4417 DEFSYMBOL (Qquite_improbable);
+ − 4418 DEFSYMBOL (Qnearly_impossible);
+ − 4419
+ − 4420 DEFSYMBOL (Qdo_eol);
+ − 4421 DEFSYMBOL (Qdo_coding);
+ − 4422
+ − 4423 DEFSYMBOL (Qcanonicalize_after_coding);
+ − 4424
+ − 4425 DEFSYMBOL (Qescape_quoted);
+ − 4426
+ − 4427 #ifdef HAVE_ZLIB
+ − 4428 DEFSYMBOL (Qgzip);
+ − 4429 #endif
+ − 4430
428
+ − 4431 }
+ − 4432
+ − 4433 void
+ − 4434 lstream_type_create_file_coding (void)
+ − 4435 {
771
+ − 4436 LSTREAM_HAS_METHOD (coding, reader);
+ − 4437 LSTREAM_HAS_METHOD (coding, writer);
+ − 4438 LSTREAM_HAS_METHOD (coding, rewinder);
+ − 4439 LSTREAM_HAS_METHOD (coding, seekable_p);
+ − 4440 LSTREAM_HAS_METHOD (coding, marker);
+ − 4441 LSTREAM_HAS_METHOD (coding, flusher);
+ − 4442 LSTREAM_HAS_METHOD (coding, closer);
+ − 4443 LSTREAM_HAS_METHOD (coding, finalizer);
+ − 4444 }
+ − 4445
+ − 4446 void
+ − 4447 coding_system_type_create (void)
+ − 4448 {
+ − 4449 int i;
+ − 4450
+ − 4451 staticpro (&Vcoding_system_hash_table);
+ − 4452 Vcoding_system_hash_table =
+ − 4453 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
+ − 4454
+ − 4455 the_coding_system_type_entry_dynarr = Dynarr_new (coding_system_type_entry);
2367
+ − 4456 dump_add_root_block_ptr (&the_coding_system_type_entry_dynarr,
771
+ − 4457 &csted_description);
+ − 4458
+ − 4459 Vcoding_system_type_list = Qnil;
+ − 4460 staticpro (&Vcoding_system_type_list);
+ − 4461
+ − 4462 /* Initialize to something reasonable ... */
+ − 4463 for (i = 0; i < MAX_DETECTOR_CATEGORIES; i++)
+ − 4464 {
+ − 4465 coding_category_system[i] = Qnil;
1204
+ − 4466 dump_add_root_lisp_object (&coding_category_system[i]);
771
+ − 4467 coding_category_by_priority[i] = i;
+ − 4468 }
+ − 4469
+ − 4470 dump_add_opaque (coding_category_by_priority,
+ − 4471 sizeof (coding_category_by_priority));
+ − 4472
+ − 4473 all_coding_detectors = Dynarr_new2 (detector_dynarr, struct detector);
2367
+ − 4474 dump_add_root_block_ptr (&all_coding_detectors,
771
+ − 4475 &detector_dynarr_description);
+ − 4476
+ − 4477 dump_add_opaque_int (&coding_system_tick);
+ − 4478 dump_add_opaque_int (&coding_detector_count);
+ − 4479 dump_add_opaque_int (&coding_detector_category_count);
+ − 4480
+ − 4481 INITIALIZE_CODING_SYSTEM_TYPE (no_conversion,
+ − 4482 "no-conversion-coding-system-p");
+ − 4483 CODING_SYSTEM_HAS_METHOD (no_conversion, convert);
+ − 4484
+ − 4485 INITIALIZE_DETECTOR (no_conversion);
+ − 4486 DETECTOR_HAS_METHOD (no_conversion, detect);
+ − 4487 INITIALIZE_DETECTOR_CATEGORY (no_conversion, no_conversion);
+ − 4488
+ − 4489 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (convert_eol,
+ − 4490 "convert-eol-coding-system-p");
+ − 4491 CODING_SYSTEM_HAS_METHOD (convert_eol, print);
+ − 4492 CODING_SYSTEM_HAS_METHOD (convert_eol, convert);
+ − 4493 CODING_SYSTEM_HAS_METHOD (convert_eol, getprop);
+ − 4494 CODING_SYSTEM_HAS_METHOD (convert_eol, putprop);
+ − 4495 CODING_SYSTEM_HAS_METHOD (convert_eol, conversion_end_type);
+ − 4496 CODING_SYSTEM_HAS_METHOD (convert_eol, canonicalize_after_coding);
+ − 4497 CODING_SYSTEM_HAS_METHOD (convert_eol, init_coding_stream);
+ − 4498
+ − 4499 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (undecided,
+ − 4500 "undecided-coding-system-p");
+ − 4501 CODING_SYSTEM_HAS_METHOD (undecided, init);
+ − 4502 CODING_SYSTEM_HAS_METHOD (undecided, mark);
+ − 4503 CODING_SYSTEM_HAS_METHOD (undecided, print);
+ − 4504 CODING_SYSTEM_HAS_METHOD (undecided, convert);
+ − 4505 CODING_SYSTEM_HAS_METHOD (undecided, putprop);
+ − 4506 CODING_SYSTEM_HAS_METHOD (undecided, getprop);
+ − 4507 CODING_SYSTEM_HAS_METHOD (undecided, init_coding_stream);
+ − 4508 CODING_SYSTEM_HAS_METHOD (undecided, rewind_coding_stream);
+ − 4509 CODING_SYSTEM_HAS_METHOD (undecided, finalize_coding_stream);
+ − 4510 CODING_SYSTEM_HAS_METHOD (undecided, mark_coding_stream);
+ − 4511 CODING_SYSTEM_HAS_METHOD (undecided, canonicalize);
+ − 4512 CODING_SYSTEM_HAS_METHOD (undecided, canonicalize_after_coding);
+ − 4513
+ − 4514 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (chain, "chain-coding-system-p");
+ − 4515
+ − 4516 CODING_SYSTEM_HAS_METHOD (chain, print);
+ − 4517 CODING_SYSTEM_HAS_METHOD (chain, canonicalize);
+ − 4518 CODING_SYSTEM_HAS_METHOD (chain, init);
+ − 4519 CODING_SYSTEM_HAS_METHOD (chain, mark);
+ − 4520 CODING_SYSTEM_HAS_METHOD (chain, mark_coding_stream);
+ − 4521 CODING_SYSTEM_HAS_METHOD (chain, convert);
+ − 4522 CODING_SYSTEM_HAS_METHOD (chain, rewind_coding_stream);
+ − 4523 CODING_SYSTEM_HAS_METHOD (chain, finalize_coding_stream);
+ − 4524 CODING_SYSTEM_HAS_METHOD (chain, finalize);
+ − 4525 CODING_SYSTEM_HAS_METHOD (chain, putprop);
+ − 4526 CODING_SYSTEM_HAS_METHOD (chain, getprop);
+ − 4527 CODING_SYSTEM_HAS_METHOD (chain, conversion_end_type);
+ − 4528 CODING_SYSTEM_HAS_METHOD (chain, canonicalize_after_coding);
+ − 4529
+ − 4530 #ifdef DEBUG_XEMACS
+ − 4531 INITIALIZE_CODING_SYSTEM_TYPE (internal, "internal-coding-system-p");
+ − 4532 CODING_SYSTEM_HAS_METHOD (internal, convert);
+ − 4533 #endif
+ − 4534
+ − 4535 #ifdef HAVE_ZLIB
+ − 4536 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (gzip, "gzip-coding-system-p");
+ − 4537 CODING_SYSTEM_HAS_METHOD (gzip, conversion_end_type);
+ − 4538 CODING_SYSTEM_HAS_METHOD (gzip, convert);
+ − 4539 CODING_SYSTEM_HAS_METHOD (gzip, init);
+ − 4540 CODING_SYSTEM_HAS_METHOD (gzip, print);
+ − 4541 CODING_SYSTEM_HAS_METHOD (gzip, init_coding_stream);
+ − 4542 CODING_SYSTEM_HAS_METHOD (gzip, rewind_coding_stream);
+ − 4543 CODING_SYSTEM_HAS_METHOD (gzip, putprop);
+ − 4544 CODING_SYSTEM_HAS_METHOD (gzip, getprop);
+ − 4545 #endif
+ − 4546 }
+ − 4547
+ − 4548 void
+ − 4549 reinit_coding_system_type_create (void)
+ − 4550 {
+ − 4551 REINITIALIZE_CODING_SYSTEM_TYPE (no_conversion);
+ − 4552 REINITIALIZE_CODING_SYSTEM_TYPE (convert_eol);
+ − 4553 REINITIALIZE_CODING_SYSTEM_TYPE (undecided);
+ − 4554 REINITIALIZE_CODING_SYSTEM_TYPE (chain);
+ − 4555 #if 0
+ − 4556 REINITIALIZE_CODING_SYSTEM_TYPE (text_file_wrapper);
+ − 4557 #endif /* 0 */
+ − 4558 #ifdef DEBUG_XEMACS
+ − 4559 REINITIALIZE_CODING_SYSTEM_TYPE (internal);
+ − 4560 #endif
+ − 4561 #ifdef HAVE_ZLIB
+ − 4562 REINITIALIZE_CODING_SYSTEM_TYPE (gzip);
+ − 4563 #endif
+ − 4564 }
+ − 4565
+ − 4566 void
+ − 4567 reinit_vars_of_file_coding (void)
+ − 4568 {
428
+ − 4569 }
+ − 4570
+ − 4571 void
+ − 4572 vars_of_file_coding (void)
+ − 4573 {
771
+ − 4574 /* We always have file-coding support */
428
+ − 4575 Fprovide (intern ("file-coding"));
+ − 4576
1347
+ − 4577 QScoding_system_cookie = build_string (";;;###coding system: ");
+ − 4578 staticpro (&QScoding_system_cookie);
+ − 4579
1242
+ − 4580 #ifdef HAVE_DEFAULT_EOL_DETECTION
2297
+ − 4581 /* #### Find a more appropriate place for this comment.
+ − 4582 WARNING: The existing categories are intimately tied to the function
1242
+ − 4583 `coding-system-category' in coding.el. If you change a category, or
+ − 4584 change the layout of any coding system associated with a category, you
+ − 4585 need to check that function and make sure it's written properly. */
+ − 4586
+ − 4587 Fprovide (intern ("unix-default-eol-detection"));
+ − 4588 #endif
+ − 4589
428
+ − 4590 DEFVAR_LISP ("keyboard-coding-system", &Vkeyboard_coding_system /*
+ − 4591 Coding system used for TTY keyboard input.
+ − 4592 Not used under a windowing system.
+ − 4593 */ );
+ − 4594 Vkeyboard_coding_system = Qnil;
+ − 4595
+ − 4596 DEFVAR_LISP ("terminal-coding-system", &Vterminal_coding_system /*
+ − 4597 Coding system used for TTY display output.
+ − 4598 Not used under a windowing system.
+ − 4599 */ );
+ − 4600 Vterminal_coding_system = Qnil;
+ − 4601
+ − 4602 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read /*
440
+ − 4603 Overriding coding system used when reading from a file or process.
+ − 4604 You should bind this variable with `let', but do not set it globally.
+ − 4605 If this is non-nil, it specifies the coding system that will be used
+ − 4606 to decode input on read operations, such as from a file or process.
+ − 4607 It overrides `buffer-file-coding-system-for-read',
428
+ − 4608 `insert-file-contents-pre-hook', etc. Use those variables instead of
440
+ − 4609 this one for permanent changes to the environment. */ );
428
+ − 4610 Vcoding_system_for_read = Qnil;
+ − 4611
+ − 4612 DEFVAR_LISP ("coding-system-for-write",
+ − 4613 &Vcoding_system_for_write /*
440
+ − 4614 Overriding coding system used when writing to a file or process.
+ − 4615 You should bind this variable with `let', but do not set it globally.
+ − 4616 If this is non-nil, it specifies the coding system that will be used
+ − 4617 to encode output for write operations, such as to a file or process.
+ − 4618 It overrides `buffer-file-coding-system', `write-region-pre-hook', etc.
+ − 4619 Use those variables instead of this one for permanent changes to the
+ − 4620 environment. */ );
428
+ − 4621 Vcoding_system_for_write = Qnil;
+ − 4622
+ − 4623 DEFVAR_LISP ("file-name-coding-system", &Vfile_name_coding_system /*
+ − 4624 Coding system used to convert pathnames when accessing files.
+ − 4625 */ );
+ − 4626 Vfile_name_coding_system = Qnil;
+ − 4627
+ − 4628 DEFVAR_BOOL ("enable-multibyte-characters", &enable_multibyte_characters /*
771
+ − 4629 Setting this has no effect. It is purely for FSF compatibility.
428
+ − 4630 */ );
+ − 4631 enable_multibyte_characters = 1;
771
+ − 4632
+ − 4633 Vchain_canonicalize_hash_table =
+ − 4634 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
+ − 4635 staticpro (&Vchain_canonicalize_hash_table);
+ − 4636
+ − 4637 #ifdef DEBUG_XEMACS
+ − 4638 DEFVAR_LISP ("debug-coding-detection", &Vdebug_coding_detection /*
+ − 4639 If non-nil, display debug information about detection operations in progress.
+ − 4640 Information is displayed on stderr.
+ − 4641 */ );
+ − 4642 Vdebug_coding_detection = Qnil;
+ − 4643 #endif
428
+ − 4644 }
+ − 4645
2297
+ − 4646 /* #### reformat this for consistent appearance? */
+ − 4647
428
+ − 4648 void
+ − 4649 complex_vars_of_file_coding (void)
+ − 4650 {
771
+ − 4651 Fmake_coding_system
+ − 4652 (Qconvert_eol_cr, Qconvert_eol,
+ − 4653 build_msg_string ("Convert CR to LF"),
+ − 4654 nconc2 (list6 (Qdocumentation,
+ − 4655 build_msg_string (
+ − 4656 "Converts CR (used to mark the end of a line on Macintosh systems) to LF\n"
+ − 4657 "(used internally and under Unix to mark the end of a line)."),
+ − 4658 Qmnemonic, build_string ("CR->LF"),
+ − 4659 Qsubtype, Qcr),
+ − 4660 /* VERY IMPORTANT! Tell make-coding-system not to generate
+ − 4661 subsidiaries -- it needs the coding systems we're creating
+ − 4662 to do so! */
+ − 4663 list2 (Qeol_type, Qlf)));
+ − 4664
+ − 4665 Fmake_coding_system
+ − 4666 (Qconvert_eol_lf, Qconvert_eol,
+ − 4667 build_msg_string ("Convert LF to LF (do nothing)"),
+ − 4668 nconc2 (list6 (Qdocumentation,
+ − 4669 build_msg_string (
+ − 4670 "Do nothing."),
+ − 4671 Qmnemonic, build_string ("LF->LF"),
+ − 4672 Qsubtype, Qlf),
+ − 4673 /* VERY IMPORTANT! Tell make-coding-system not to generate
+ − 4674 subsidiaries -- it needs the coding systems we're creating
+ − 4675 to do so! */
+ − 4676 list2 (Qeol_type, Qlf)));
+ − 4677
+ − 4678 Fmake_coding_system
+ − 4679 (Qconvert_eol_crlf, Qconvert_eol,
+ − 4680 build_msg_string ("Convert CRLF to LF"),
+ − 4681 nconc2 (list6 (Qdocumentation,
+ − 4682 build_msg_string (
+ − 4683 "Converts CR+LF (used to mark the end of a line on Macintosh systems) to LF\n"
+ − 4684 "(used internally and under Unix to mark the end of a line)."),
+ − 4685 Qmnemonic, build_string ("CRLF->LF"),
+ − 4686 Qsubtype, Qcrlf),
+ − 4687 /* VERY IMPORTANT! Tell make-coding-system not to generate
+ − 4688 subsidiaries -- it needs the coding systems we're creating
+ − 4689 to do so! */
+ − 4690 list2 (Qeol_type, Qlf)));
+ − 4691
+ − 4692 Fmake_coding_system
+ − 4693 (Qconvert_eol_autodetect, Qconvert_eol,
+ − 4694 build_msg_string ("Autodetect EOL type"),
+ − 4695 nconc2 (list6 (Qdocumentation,
+ − 4696 build_msg_string (
+ − 4697 "Autodetect the end-of-line type."),
+ − 4698 Qmnemonic, build_string ("Auto-EOL"),
793
+ − 4699 Qsubtype, Qnil),
771
+ − 4700 /* VERY IMPORTANT! Tell make-coding-system not to generate
+ − 4701 subsidiaries -- it needs the coding systems we're creating
+ − 4702 to do so! */
+ − 4703 list2 (Qeol_type, Qlf)));
+ − 4704
+ − 4705 Fmake_coding_system
+ − 4706 (Qundecided, Qundecided,
+ − 4707 build_msg_string ("Undecided (auto-detect)"),
+ − 4708 nconc2 (list4 (Qdocumentation,
+ − 4709 build_msg_string
+ − 4710 ("Automatically detects the correct encoding."),
+ − 4711 Qmnemonic, build_string ("Auto")),
+ − 4712 list6 (Qdo_eol, Qt, Qdo_coding, Qt,
+ − 4713 /* We do EOL detection ourselves so we don't need to be
+ − 4714 wrapped in an EOL detector. (It doesn't actually hurt,
+ − 4715 though, I don't think.) */
+ − 4716 Qeol_type, Qlf)));
+ − 4717
+ − 4718 Fmake_coding_system
+ − 4719 (intern ("undecided-dos"), Qundecided,
+ − 4720 build_msg_string ("Undecided (auto-detect) (CRLF)"),
+ − 4721 nconc2 (list4 (Qdocumentation,
+ − 4722 build_msg_string
+ − 4723 ("Automatically detects the correct encoding; EOL type of CRLF forced."),
+ − 4724 Qmnemonic, build_string ("Auto")),
+ − 4725 list4 (Qdo_coding, Qt,
+ − 4726 Qeol_type, Qcrlf)));
+ − 4727
+ − 4728 Fmake_coding_system
+ − 4729 (intern ("undecided-unix"), Qundecided,
+ − 4730 build_msg_string ("Undecided (auto-detect) (LF)"),
+ − 4731 nconc2 (list4 (Qdocumentation,
+ − 4732 build_msg_string
+ − 4733 ("Automatically detects the correct encoding; EOL type of LF forced."),
+ − 4734 Qmnemonic, build_string ("Auto")),
+ − 4735 list4 (Qdo_coding, Qt,
+ − 4736 Qeol_type, Qlf)));
+ − 4737
+ − 4738 Fmake_coding_system
+ − 4739 (intern ("undecided-mac"), Qundecided,
+ − 4740 build_msg_string ("Undecided (auto-detect) (CR)"),
+ − 4741 nconc2 (list4 (Qdocumentation,
+ − 4742 build_msg_string
+ − 4743 ("Automatically detects the correct encoding; EOL type of CR forced."),
+ − 4744 Qmnemonic, build_string ("Auto")),
+ − 4745 list4 (Qdo_coding, Qt,
+ − 4746 Qeol_type, Qcr)));
+ − 4747
428
+ − 4748 /* Need to create this here or we're really screwed. */
+ − 4749 Fmake_coding_system
+ − 4750 (Qraw_text, Qno_conversion,
771
+ − 4751 build_msg_string ("Raw Text"),
+ − 4752 list4 (Qdocumentation,
+ − 4753 build_msg_string ("Raw text converts only line-break codes, and acts otherwise like `binary'."),
+ − 4754 Qmnemonic, build_string ("Raw")));
428
+ − 4755
+ − 4756 Fmake_coding_system
+ − 4757 (Qbinary, Qno_conversion,
771
+ − 4758 build_msg_string ("Binary"),
+ − 4759 list6 (Qdocumentation,
+ − 4760 build_msg_string (
+ − 4761 "This coding system is as close as it comes to doing no conversion.\n"
+ − 4762 "On input, each byte is converted directly into the character\n"
+ − 4763 "with the corresponding code -- i.e. from the `ascii', `control-1',\n"
+ − 4764 "or `latin-1' character sets. On output, these characters are\n"
+ − 4765 "converted back to the corresponding bytes, and other characters\n"
+ − 4766 "are converted to the default character, i.e. `~'."),
+ − 4767 Qeol_type, Qlf,
428
+ − 4768 Qmnemonic, build_string ("Binary")));
+ − 4769
771
+ − 4770 /* Formerly aliased to raw-text! Completely bogus and not even the same
+ − 4771 as FSF Emacs. */
+ − 4772 Fdefine_coding_system_alias (Qno_conversion, Qbinary);
+ − 4773 Fdefine_coding_system_alias (intern ("no-conversion-unix"),
+ − 4774 intern ("raw-text-unix"));
+ − 4775 Fdefine_coding_system_alias (intern ("no-conversion-dos"),
+ − 4776 intern ("raw-text-dos"));
+ − 4777 Fdefine_coding_system_alias (intern ("no-conversion-mac"),
+ − 4778 intern ("raw-text-mac"));
+ − 4779
1318
+ − 4780 /* These three below will get their defaults set correctly
+ − 4781 in code-init.el. We init them now so we can handle stuff at dump
771
+ − 4782 time before we get to code-init.el. */
1318
+ − 4783 Fdefine_coding_system_alias (Qnative, Qbinary);
440
+ − 4784 Fdefine_coding_system_alias (Qterminal, Qbinary);
+ − 4785 Fdefine_coding_system_alias (Qkeyboard, Qbinary);
+ − 4786
1318
+ − 4787 Fdefine_coding_system_alias (Qfile_name, Qnative);
771
+ − 4788 Fdefine_coding_system_alias (Qidentity, Qconvert_eol_lf);
+ − 4789
428
+ − 4790 /* Need this for bootstrapping */
771
+ − 4791 coding_category_system[detector_category_no_conversion] =
428
+ − 4792 Fget_coding_system (Qraw_text);
+ − 4793 }