comparison src/file-coding.c @ 398:74fd4e045ea6 r21-2-29

Import from CVS: tag r21-2-29
author cvs
date Mon, 13 Aug 2007 11:13:30 +0200
parents 6719134a07c2
children a86b2b5e0111
comparison
equal deleted inserted replaced
397:f4aeb21a5bad 398:74fd4e045ea6
23 23
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */ 24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 25
26 #include <config.h> 26 #include <config.h>
27 #include "lisp.h" 27 #include "lisp.h"
28
28 #include "buffer.h" 29 #include "buffer.h"
29 #include "elhash.h" 30 #include "elhash.h"
30 #include "insdel.h" 31 #include "insdel.h"
31 #include "lstream.h" 32 #include "lstream.h"
33 #include "opaque.h"
32 #ifdef MULE 34 #ifdef MULE
33 #include "mule-ccl.h" 35 #include "mule-ccl.h"
34 #include "chartab.h" 36 #include "chartab.h"
35 #endif 37 #endif
36 #include "file-coding.h" 38 #include "file-coding.h"
37 39
38 Lisp_Object Qbuffer_file_coding_system, Qcoding_system_error; 40 Lisp_Object Qcoding_system_error;
39 41
40 Lisp_Object Vkeyboard_coding_system; 42 Lisp_Object Vkeyboard_coding_system;
41 Lisp_Object Vterminal_coding_system; 43 Lisp_Object Vterminal_coding_system;
42 Lisp_Object Vcoding_system_for_read; 44 Lisp_Object Vcoding_system_for_read;
43 Lisp_Object Vcoding_system_for_write; 45 Lisp_Object Vcoding_system_for_write;
44 Lisp_Object Vfile_name_coding_system; 46 Lisp_Object Vfile_name_coding_system;
45 47
46 /* Table of symbols identifying each coding category. */ 48 /* Table of symbols identifying each coding category. */
47 Lisp_Object coding_category_symbol[CODING_CATEGORY_LAST + 1]; 49 Lisp_Object coding_category_symbol[CODING_CATEGORY_LAST + 1];
48 50
49 /* Coding system currently associated with each coding category. */ 51
50 Lisp_Object coding_category_system[CODING_CATEGORY_LAST + 1]; 52
51 53 struct file_coding_dump {
52 /* Table of all coding categories in decreasing order of priority. 54 /* Coding system currently associated with each coding category. */
53 This describes a permutation of the possible coding categories. */ 55 Lisp_Object coding_category_system[CODING_CATEGORY_LAST + 1];
54 int coding_category_by_priority[CODING_CATEGORY_LAST + 1]; 56
55 57 /* Table of all coding categories in decreasing order of priority.
56 Lisp_Object Qcoding_system_p; 58 This describes a permutation of the possible coding categories. */
57 59 int coding_category_by_priority[CODING_CATEGORY_LAST + 1];
58 Lisp_Object Qno_conversion, Qccl, Qiso2022; 60
61 #ifdef MULE
62 Lisp_Object ucs_to_mule_table[65536];
63 #endif
64 } *fcd;
65
66 static const struct lrecord_description fcd_description_1[] = {
67 { XD_LISP_OBJECT_ARRAY, offsetof (struct file_coding_dump, coding_category_system), CODING_CATEGORY_LAST + 1 },
68 #ifdef MULE
69 { XD_LISP_OBJECT_ARRAY, offsetof (struct file_coding_dump, ucs_to_mule_table), 65536 },
70 #endif
71 { XD_END }
72 };
73
74 static const struct struct_description fcd_description = {
75 sizeof (struct file_coding_dump),
76 fcd_description_1
77 };
78
79 Lisp_Object mule_to_ucs_table;
80
81 Lisp_Object Qcoding_systemp;
82
83 Lisp_Object Qraw_text, Qno_conversion, Qccl, Qiso2022;
59 /* Qinternal in general.c */ 84 /* Qinternal in general.c */
60 85
61 Lisp_Object Qmnemonic, Qeol_type; 86 Lisp_Object Qmnemonic, Qeol_type;
62 Lisp_Object Qcr, Qcrlf, Qlf; 87 Lisp_Object Qcr, Qcrlf, Qlf;
63 Lisp_Object Qeol_cr, Qeol_crlf, Qeol_lf; 88 Lisp_Object Qeol_cr, Qeol_crlf, Qeol_lf;
70 Lisp_Object Qcharset_g0, Qcharset_g1, Qcharset_g2, Qcharset_g3; 95 Lisp_Object Qcharset_g0, Qcharset_g1, Qcharset_g2, Qcharset_g3;
71 Lisp_Object Qforce_g0_on_output, Qforce_g1_on_output; 96 Lisp_Object Qforce_g0_on_output, Qforce_g1_on_output;
72 Lisp_Object Qforce_g2_on_output, Qforce_g3_on_output; 97 Lisp_Object Qforce_g2_on_output, Qforce_g3_on_output;
73 Lisp_Object Qno_iso6429; 98 Lisp_Object Qno_iso6429;
74 Lisp_Object Qinput_charset_conversion, Qoutput_charset_conversion; 99 Lisp_Object Qinput_charset_conversion, Qoutput_charset_conversion;
75 Lisp_Object Qctext, Qescape_quoted; 100 Lisp_Object Qescape_quoted;
76 Lisp_Object Qshort, Qno_ascii_eol, Qno_ascii_cntl, Qseven, Qlock_shift; 101 Lisp_Object Qshort, Qno_ascii_eol, Qno_ascii_cntl, Qseven, Qlock_shift;
77 #endif 102 #endif
78 Lisp_Object Qencode, Qdecode; 103 Lisp_Object Qencode, Qdecode;
79 104
80 Lisp_Object Vcoding_system_hash_table; 105 Lisp_Object Vcoding_system_hash_table;
149 #endif /* MULE */ 174 #endif /* MULE */
150 EXFUN (Fcopy_coding_system, 2); 175 EXFUN (Fcopy_coding_system, 2);
151 #ifdef MULE 176 #ifdef MULE
152 struct detection_state; 177 struct detection_state;
153 static int detect_coding_sjis (struct detection_state *st, 178 static int detect_coding_sjis (struct detection_state *st,
154 CONST unsigned char *src, 179 const unsigned char *src,
155 unsigned int n); 180 unsigned int n);
156 static void decode_coding_sjis (Lstream *decoding, 181 static void decode_coding_sjis (Lstream *decoding,
157 CONST unsigned char *src, 182 const unsigned char *src,
158 unsigned_char_dynarr *dst, 183 unsigned_char_dynarr *dst,
159 unsigned int n); 184 unsigned int n);
160 static void encode_coding_sjis (Lstream *encoding, 185 static void encode_coding_sjis (Lstream *encoding,
161 CONST unsigned char *src, 186 const unsigned char *src,
162 unsigned_char_dynarr *dst, 187 unsigned_char_dynarr *dst,
163 unsigned int n); 188 unsigned int n);
164 static int detect_coding_big5 (struct detection_state *st, 189 static int detect_coding_big5 (struct detection_state *st,
165 CONST unsigned char *src, 190 const unsigned char *src,
166 unsigned int n); 191 unsigned int n);
167 static void decode_coding_big5 (Lstream *decoding, 192 static void decode_coding_big5 (Lstream *decoding,
168 CONST unsigned char *src, 193 const unsigned char *src,
169 unsigned_char_dynarr *dst, unsigned int n); 194 unsigned_char_dynarr *dst, unsigned int n);
170 static void encode_coding_big5 (Lstream *encoding, 195 static void encode_coding_big5 (Lstream *encoding,
171 CONST unsigned char *src, 196 const unsigned char *src,
172 unsigned_char_dynarr *dst, unsigned int n); 197 unsigned_char_dynarr *dst, unsigned int n);
173 static int detect_coding_ucs4 (struct detection_state *st, 198 static int detect_coding_ucs4 (struct detection_state *st,
174 CONST unsigned char *src, 199 const unsigned char *src,
175 unsigned int n); 200 unsigned int n);
176 static void decode_coding_ucs4 (Lstream *decoding, 201 static void decode_coding_ucs4 (Lstream *decoding,
177 CONST unsigned char *src, 202 const unsigned char *src,
178 unsigned_char_dynarr *dst, unsigned int n); 203 unsigned_char_dynarr *dst, unsigned int n);
179 static void encode_coding_ucs4 (Lstream *encoding, 204 static void encode_coding_ucs4 (Lstream *encoding,
180 CONST unsigned char *src, 205 const unsigned char *src,
181 unsigned_char_dynarr *dst, unsigned int n); 206 unsigned_char_dynarr *dst, unsigned int n);
182 static int detect_coding_utf8 (struct detection_state *st, 207 static int detect_coding_utf8 (struct detection_state *st,
183 CONST unsigned char *src, 208 const unsigned char *src,
184 unsigned int n); 209 unsigned int n);
185 static void decode_coding_utf8 (Lstream *decoding, 210 static void decode_coding_utf8 (Lstream *decoding,
186 CONST unsigned char *src, 211 const unsigned char *src,
187 unsigned_char_dynarr *dst, unsigned int n); 212 unsigned_char_dynarr *dst, unsigned int n);
188 static void encode_coding_utf8 (Lstream *encoding, 213 static void encode_coding_utf8 (Lstream *encoding,
189 CONST unsigned char *src, 214 const unsigned char *src,
190 unsigned_char_dynarr *dst, unsigned int n); 215 unsigned_char_dynarr *dst, unsigned int n);
191 static int postprocess_iso2022_mask (int mask); 216 static int postprocess_iso2022_mask (int mask);
192 static void reset_iso2022 (Lisp_Object coding_system, 217 static void reset_iso2022 (Lisp_Object coding_system,
193 struct iso2022_decoder *iso); 218 struct iso2022_decoder *iso);
194 static int detect_coding_iso2022 (struct detection_state *st, 219 static int detect_coding_iso2022 (struct detection_state *st,
195 CONST unsigned char *src, 220 const unsigned char *src,
196 unsigned int n); 221 unsigned int n);
197 static void decode_coding_iso2022 (Lstream *decoding, 222 static void decode_coding_iso2022 (Lstream *decoding,
198 CONST unsigned char *src, 223 const unsigned char *src,
199 unsigned_char_dynarr *dst, unsigned int n); 224 unsigned_char_dynarr *dst, unsigned int n);
200 static void encode_coding_iso2022 (Lstream *encoding, 225 static void encode_coding_iso2022 (Lstream *encoding,
201 CONST unsigned char *src, 226 const unsigned char *src,
202 unsigned_char_dynarr *dst, unsigned int n); 227 unsigned_char_dynarr *dst, unsigned int n);
203 #endif /* MULE */ 228 #endif /* MULE */
204 static void decode_coding_no_conversion (Lstream *decoding, 229 static void decode_coding_no_conversion (Lstream *decoding,
205 CONST unsigned char *src, 230 const unsigned char *src,
206 unsigned_char_dynarr *dst, 231 unsigned_char_dynarr *dst,
207 unsigned int n); 232 unsigned int n);
208 static void encode_coding_no_conversion (Lstream *encoding, 233 static void encode_coding_no_conversion (Lstream *encoding,
209 CONST unsigned char *src, 234 const unsigned char *src,
210 unsigned_char_dynarr *dst, 235 unsigned_char_dynarr *dst,
211 unsigned int n); 236 unsigned int n);
212 static void mule_decode (Lstream *decoding, CONST unsigned char *src, 237 static void mule_decode (Lstream *decoding, const unsigned char *src,
213 unsigned_char_dynarr *dst, unsigned int n); 238 unsigned_char_dynarr *dst, unsigned int n);
214 static void mule_encode (Lstream *encoding, CONST unsigned char *src, 239 static void mule_encode (Lstream *encoding, const unsigned char *src,
215 unsigned_char_dynarr *dst, unsigned int n); 240 unsigned_char_dynarr *dst, unsigned int n);
216 241
217 typedef struct codesys_prop codesys_prop; 242 typedef struct codesys_prop codesys_prop;
218 struct codesys_prop 243 struct codesys_prop
219 { 244 {
223 248
224 typedef struct 249 typedef struct
225 { 250 {
226 Dynarr_declare (codesys_prop); 251 Dynarr_declare (codesys_prop);
227 } codesys_prop_dynarr; 252 } codesys_prop_dynarr;
253
254 static const struct lrecord_description codesys_prop_description_1[] = {
255 { XD_LISP_OBJECT, offsetof (codesys_prop, sym) },
256 { XD_END }
257 };
258
259 static const struct struct_description codesys_prop_description = {
260 sizeof (codesys_prop),
261 codesys_prop_description_1
262 };
263
264 static const struct lrecord_description codesys_prop_dynarr_description_1[] = {
265 XD_DYNARR_DESC (codesys_prop_dynarr, &codesys_prop_description),
266 { XD_END }
267 };
268
269 static const struct struct_description codesys_prop_dynarr_description = {
270 sizeof (codesys_prop_dynarr),
271 codesys_prop_dynarr_description_1
272 };
228 273
229 codesys_prop_dynarr *the_codesys_prop_dynarr; 274 codesys_prop_dynarr *the_codesys_prop_dynarr;
230 275
231 enum codesys_prop_enum 276 enum codesys_prop_enum
232 { 277 {
238 283
239 /************************************************************************/ 284 /************************************************************************/
240 /* Coding system functions */ 285 /* Coding system functions */
241 /************************************************************************/ 286 /************************************************************************/
242 287
243 static Lisp_Object mark_coding_system (Lisp_Object, void (*) (Lisp_Object)); 288 static Lisp_Object mark_coding_system (Lisp_Object);
244 static void print_coding_system (Lisp_Object, Lisp_Object, int); 289 static void print_coding_system (Lisp_Object, Lisp_Object, int);
245 static void finalize_coding_system (void *header, int for_disksave); 290 static void finalize_coding_system (void *header, int for_disksave);
291
292 #ifdef MULE
293 static const struct lrecord_description ccs_description_1[] = {
294 { XD_LISP_OBJECT, offsetof (charset_conversion_spec, from_charset) },
295 { XD_LISP_OBJECT, offsetof (charset_conversion_spec, to_charset) },
296 { XD_END }
297 };
298
299 static const struct struct_description ccs_description = {
300 sizeof (charset_conversion_spec),
301 ccs_description_1
302 };
303
304 static const struct lrecord_description ccsd_description_1[] = {
305 XD_DYNARR_DESC (charset_conversion_spec_dynarr, &ccs_description),
306 { XD_END }
307 };
308
309 static const struct struct_description ccsd_description = {
310 sizeof (charset_conversion_spec_dynarr),
311 ccsd_description_1
312 };
313 #endif
314
315 static const struct lrecord_description coding_system_description[] = {
316 { XD_LISP_OBJECT, offsetof (Lisp_Coding_System, name) },
317 { XD_LISP_OBJECT, offsetof (Lisp_Coding_System, doc_string) },
318 { XD_LISP_OBJECT, offsetof (Lisp_Coding_System, mnemonic) },
319 { XD_LISP_OBJECT, offsetof (Lisp_Coding_System, post_read_conversion) },
320 { XD_LISP_OBJECT, offsetof (Lisp_Coding_System, pre_write_conversion) },
321 { XD_LISP_OBJECT, offsetof (Lisp_Coding_System, eol_lf) },
322 { XD_LISP_OBJECT, offsetof (Lisp_Coding_System, eol_crlf) },
323 { XD_LISP_OBJECT, offsetof (Lisp_Coding_System, eol_cr) },
324 #ifdef MULE
325 { XD_LISP_OBJECT_ARRAY, offsetof (Lisp_Coding_System, iso2022.initial_charset), 4 },
326 { XD_STRUCT_PTR, offsetof (Lisp_Coding_System, iso2022.input_conv), 1, &ccsd_description },
327 { XD_STRUCT_PTR, offsetof (Lisp_Coding_System, iso2022.output_conv), 1, &ccsd_description },
328 { XD_LISP_OBJECT, offsetof (Lisp_Coding_System, ccl.decode) },
329 { XD_LISP_OBJECT, offsetof (Lisp_Coding_System, ccl.encode) },
330 #endif
331 { XD_END }
332 };
246 333
247 DEFINE_LRECORD_IMPLEMENTATION ("coding-system", coding_system, 334 DEFINE_LRECORD_IMPLEMENTATION ("coding-system", coding_system,
248 mark_coding_system, print_coding_system, 335 mark_coding_system, print_coding_system,
249 finalize_coding_system, 336 finalize_coding_system,
250 0, 0, struct Lisp_Coding_System); 337 0, 0, coding_system_description,
338 Lisp_Coding_System);
251 339
252 static Lisp_Object 340 static Lisp_Object
253 mark_coding_system (Lisp_Object obj, void (*markobj) (Lisp_Object)) 341 mark_coding_system (Lisp_Object obj)
254 { 342 {
255 Lisp_Coding_System *codesys = XCODING_SYSTEM (obj); 343 Lisp_Coding_System *codesys = XCODING_SYSTEM (obj);
256 344
257 markobj (CODING_SYSTEM_NAME (codesys)); 345 mark_object (CODING_SYSTEM_NAME (codesys));
258 markobj (CODING_SYSTEM_DOC_STRING (codesys)); 346 mark_object (CODING_SYSTEM_DOC_STRING (codesys));
259 markobj (CODING_SYSTEM_MNEMONIC (codesys)); 347 mark_object (CODING_SYSTEM_MNEMONIC (codesys));
260 markobj (CODING_SYSTEM_EOL_LF (codesys)); 348 mark_object (CODING_SYSTEM_EOL_LF (codesys));
261 markobj (CODING_SYSTEM_EOL_CRLF (codesys)); 349 mark_object (CODING_SYSTEM_EOL_CRLF (codesys));
262 markobj (CODING_SYSTEM_EOL_CR (codesys)); 350 mark_object (CODING_SYSTEM_EOL_CR (codesys));
263 351
264 switch (CODING_SYSTEM_TYPE (codesys)) 352 switch (CODING_SYSTEM_TYPE (codesys))
265 { 353 {
266 #ifdef MULE 354 #ifdef MULE
267 int i; 355 int i;
268 case CODESYS_ISO2022: 356 case CODESYS_ISO2022:
269 for (i = 0; i < 4; i++) 357 for (i = 0; i < 4; i++)
270 markobj (CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i)); 358 mark_object (CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i));
271 if (codesys->iso2022.input_conv) 359 if (codesys->iso2022.input_conv)
272 { 360 {
273 for (i = 0; i < Dynarr_length (codesys->iso2022.input_conv); i++) 361 for (i = 0; i < Dynarr_length (codesys->iso2022.input_conv); i++)
274 { 362 {
275 struct charset_conversion_spec *ccs = 363 struct charset_conversion_spec *ccs =
276 Dynarr_atp (codesys->iso2022.input_conv, i); 364 Dynarr_atp (codesys->iso2022.input_conv, i);
277 markobj (ccs->from_charset); 365 mark_object (ccs->from_charset);
278 markobj (ccs->to_charset); 366 mark_object (ccs->to_charset);
279 } 367 }
280 } 368 }
281 if (codesys->iso2022.output_conv) 369 if (codesys->iso2022.output_conv)
282 { 370 {
283 for (i = 0; i < Dynarr_length (codesys->iso2022.output_conv); i++) 371 for (i = 0; i < Dynarr_length (codesys->iso2022.output_conv); i++)
284 { 372 {
285 struct charset_conversion_spec *ccs = 373 struct charset_conversion_spec *ccs =
286 Dynarr_atp (codesys->iso2022.output_conv, i); 374 Dynarr_atp (codesys->iso2022.output_conv, i);
287 markobj (ccs->from_charset); 375 mark_object (ccs->from_charset);
288 markobj (ccs->to_charset); 376 mark_object (ccs->to_charset);
289 } 377 }
290 } 378 }
291 break; 379 break;
292 380
293 case CODESYS_CCL: 381 case CODESYS_CCL:
294 markobj (CODING_SYSTEM_CCL_DECODE (codesys)); 382 mark_object (CODING_SYSTEM_CCL_DECODE (codesys));
295 markobj (CODING_SYSTEM_CCL_ENCODE (codesys)); 383 mark_object (CODING_SYSTEM_CCL_ENCODE (codesys));
296 break; 384 break;
297 #endif /* MULE */ 385 #endif /* MULE */
298 default: 386 default:
299 break; 387 break;
300 } 388 }
301 389
302 markobj (CODING_SYSTEM_PRE_WRITE_CONVERSION (codesys)); 390 mark_object (CODING_SYSTEM_PRE_WRITE_CONVERSION (codesys));
303 return CODING_SYSTEM_POST_READ_CONVERSION (codesys); 391 return CODING_SYSTEM_POST_READ_CONVERSION (codesys);
304 } 392 }
305 393
306 static void 394 static void
307 print_coding_system (Lisp_Object obj, Lisp_Object printcharfun, 395 print_coding_system (Lisp_Object obj, Lisp_Object printcharfun,
346 break; 434 break;
347 } 435 }
348 } 436 }
349 } 437 }
350 438
351 static enum eol_type 439 static eol_type_t
352 symbol_to_eol_type (Lisp_Object symbol) 440 symbol_to_eol_type (Lisp_Object symbol)
353 { 441 {
354 CHECK_SYMBOL (symbol); 442 CHECK_SYMBOL (symbol);
355 if (NILP (symbol)) return EOL_AUTODETECT; 443 if (NILP (symbol)) return EOL_AUTODETECT;
356 if (EQ (symbol, Qlf)) return EOL_LF; 444 if (EQ (symbol, Qlf)) return EOL_LF;
360 signal_simple_error ("Unrecognized eol type", symbol); 448 signal_simple_error ("Unrecognized eol type", symbol);
361 return EOL_AUTODETECT; /* not reached */ 449 return EOL_AUTODETECT; /* not reached */
362 } 450 }
363 451
364 static Lisp_Object 452 static Lisp_Object
365 eol_type_to_symbol (enum eol_type type) 453 eol_type_to_symbol (eol_type_t type)
366 { 454 {
367 switch (type) 455 switch (type)
368 { 456 {
369 default: abort (); 457 default: abort ();
370 case EOL_LF: return Qlf; 458 case EOL_LF: return Qlf;
451 If there is no such coding system, nil is returned. Otherwise the 539 If there is no such coding system, nil is returned. Otherwise the
452 associated coding system object is returned. 540 associated coding system object is returned.
453 */ 541 */
454 (coding_system_or_name)) 542 (coding_system_or_name))
455 { 543 {
456 if (CODING_SYSTEMP (coding_system_or_name))
457 return coding_system_or_name;
458
459 if (NILP (coding_system_or_name)) 544 if (NILP (coding_system_or_name))
460 coding_system_or_name = Qbinary; 545 coding_system_or_name = Qbinary;
546 else if (CODING_SYSTEMP (coding_system_or_name))
547 return coding_system_or_name;
461 else 548 else
462 CHECK_SYMBOL (coding_system_or_name); 549 CHECK_SYMBOL (coding_system_or_name);
463 550
464 return Fgethash (coding_system_or_name, Vcoding_system_hash_table, Qnil); 551 while (1)
552 {
553 coding_system_or_name =
554 Fgethash (coding_system_or_name, Vcoding_system_hash_table, Qnil);
555
556 if (CODING_SYSTEMP (coding_system_or_name) || NILP (coding_system_or_name))
557 return coding_system_or_name;
558 }
465 } 559 }
466 560
467 DEFUN ("get-coding-system", Fget_coding_system, 1, 1, 0, /* 561 DEFUN ("get-coding-system", Fget_coding_system, 1, 1, 0, /*
468 Retrieve the coding system of the given name. 562 Retrieve the coding system of the given name.
469 Same as `find-coding-system' except that if there is no such 563 Same as `find-coding-system' except that if there is no such
493 /* This function can GC */ 587 /* This function can GC */
494 struct coding_system_list_closure *cscl = 588 struct coding_system_list_closure *cscl =
495 (struct coding_system_list_closure *) coding_system_list_closure; 589 (struct coding_system_list_closure *) coding_system_list_closure;
496 Lisp_Object *coding_system_list = cscl->coding_system_list; 590 Lisp_Object *coding_system_list = cscl->coding_system_list;
497 591
498 *coding_system_list = Fcons (XCODING_SYSTEM (value)->name, 592 *coding_system_list = Fcons (key, *coding_system_list);
499 *coding_system_list);
500 return 0; 593 return 0;
501 } 594 }
502 595
503 DEFUN ("coding-system-list", Fcoding_system_list, 0, 0, 0, /* 596 DEFUN ("coding-system-list", Fcoding_system_list, 0, 0, 0, /*
504 Return a list of the names of all defined coding systems. 597 Return a list of the names of all defined coding systems.
529 622
530 static Lisp_Coding_System * 623 static Lisp_Coding_System *
531 allocate_coding_system (enum coding_system_type type, Lisp_Object name) 624 allocate_coding_system (enum coding_system_type type, Lisp_Object name)
532 { 625 {
533 Lisp_Coding_System *codesys = 626 Lisp_Coding_System *codesys =
534 alloc_lcrecord_type (Lisp_Coding_System, lrecord_coding_system); 627 alloc_lcrecord_type (Lisp_Coding_System, &lrecord_coding_system);
535 628
536 zero_lcrecord (codesys); 629 zero_lcrecord (codesys);
537 CODING_SYSTEM_PRE_WRITE_CONVERSION (codesys) = Qnil; 630 CODING_SYSTEM_PRE_WRITE_CONVERSION (codesys) = Qnil;
538 CODING_SYSTEM_POST_READ_CONVERSION (codesys) = Qnil; 631 CODING_SYSTEM_POST_READ_CONVERSION (codesys) = Qnil;
539 CODING_SYSTEM_EOL_TYPE (codesys) = EOL_AUTODETECT; 632 CODING_SYSTEM_EOL_TYPE (codesys) = EOL_AUTODETECT;
947 to->name = new_name; 1040 to->name = new_name;
948 } 1041 }
949 return new_coding_system; 1042 return new_coding_system;
950 } 1043 }
951 1044
1045 DEFUN ("coding-system-canonical-name-p", Fcoding_system_canonical_name_p, 1, 1, 0, /*
1046 Return t if OBJECT names a coding system, and is not a coding system alias.
1047 */
1048 (object))
1049 {
1050 return CODING_SYSTEMP (Fgethash (object, Vcoding_system_hash_table, Qnil))
1051 ? Qt : Qnil;
1052 }
1053
1054 DEFUN ("coding-system-alias-p", Fcoding_system_alias_p, 1, 1, 0, /*
1055 Return t if OBJECT is a coding system alias.
1056 All coding system aliases are created by `define-coding-system-alias'.
1057 */
1058 (object))
1059 {
1060 return SYMBOLP (Fgethash (object, Vcoding_system_hash_table, Qzero))
1061 ? Qt : Qnil;
1062 }
1063
1064 DEFUN ("coding-system-aliasee", Fcoding_system_aliasee, 1, 1, 0, /*
1065 Return the coding-system symbol for which symbol ALIAS is an alias.
1066 */
1067 (alias))
1068 {
1069 Lisp_Object aliasee = Fgethash (alias, Vcoding_system_hash_table, Qnil);
1070 if (SYMBOLP (aliasee))
1071 return aliasee;
1072 else
1073 signal_simple_error ("Symbol is not a coding system alias", alias);
1074 }
1075
952 static Lisp_Object 1076 static Lisp_Object
953 subsidiary_coding_system (Lisp_Object coding_system, enum eol_type type) 1077 append_suffix_to_symbol (Lisp_Object symbol, const char *ascii_string)
1078 {
1079 return Fintern (concat2 (Fsymbol_name (symbol), build_string (ascii_string)),
1080 Qnil);
1081 }
1082
1083 /* A maphash function, for removing dangling coding system aliases. */
1084 static int
1085 dangling_coding_system_alias_p (Lisp_Object alias,
1086 Lisp_Object aliasee,
1087 void *dangling_aliases)
1088 {
1089 if (SYMBOLP (aliasee)
1090 && NILP (Fgethash (aliasee, Vcoding_system_hash_table, Qnil)))
1091 {
1092 (*(int *) dangling_aliases)++;
1093 return 1;
1094 }
1095 else
1096 return 0;
1097 }
1098
1099 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias, 2, 2, 0, /*
1100 Define symbol ALIAS as an alias for coding system ALIASEE.
1101
1102 You can use this function to redefine an alias that has already been defined,
1103 but you cannot redefine a name which is the canonical name for a coding system.
1104 \(a canonical name of a coding system is what is returned when you call
1105 `coding-system-name' on a coding system).
1106
1107 ALIASEE itself can be an alias, which allows you to define nested aliases.
1108
1109 You are forbidden, however, from creating alias loops or `dangling' aliases.
1110 These will be detected, and an error will be signaled if you attempt to do so.
1111
1112 If ALIASEE is nil, then ALIAS will simply be undefined.
1113
1114 See also `coding-system-alias-p', `coding-system-aliasee',
1115 and `coding-system-canonical-name-p'.
1116 */
1117 (alias, aliasee))
1118 {
1119 Lisp_Object real_coding_system, probe;
1120
1121 CHECK_SYMBOL (alias);
1122
1123 if (!NILP (Fcoding_system_canonical_name_p (alias)))
1124 signal_simple_error
1125 ("Symbol is the canonical name of a coding system and cannot be redefined",
1126 alias);
1127
1128 if (NILP (aliasee))
1129 {
1130 Lisp_Object subsidiary_unix = append_suffix_to_symbol (alias, "-unix");
1131 Lisp_Object subsidiary_dos = append_suffix_to_symbol (alias, "-dos");
1132 Lisp_Object subsidiary_mac = append_suffix_to_symbol (alias, "-mac");
1133
1134 Fremhash (alias, Vcoding_system_hash_table);
1135
1136 /* Undefine subsidiary aliases,
1137 presumably created by a previous call to this function */
1138 if (! NILP (Fcoding_system_alias_p (subsidiary_unix)) &&
1139 ! NILP (Fcoding_system_alias_p (subsidiary_dos)) &&
1140 ! NILP (Fcoding_system_alias_p (subsidiary_mac)))
1141 {
1142 Fdefine_coding_system_alias (subsidiary_unix, Qnil);
1143 Fdefine_coding_system_alias (subsidiary_dos, Qnil);
1144 Fdefine_coding_system_alias (subsidiary_mac, Qnil);
1145 }
1146
1147 /* Undefine dangling coding system aliases. */
1148 {
1149 int dangling_aliases;
1150
1151 do {
1152 dangling_aliases = 0;
1153 elisp_map_remhash (dangling_coding_system_alias_p,
1154 Vcoding_system_hash_table,
1155 &dangling_aliases);
1156 } while (dangling_aliases > 0);
1157 }
1158
1159 return Qnil;
1160 }
1161
1162 if (CODING_SYSTEMP (aliasee))
1163 aliasee = XCODING_SYSTEM_NAME (aliasee);
1164
1165 /* Checks that aliasee names a coding-system */
1166 real_coding_system = Fget_coding_system (aliasee);
1167
1168 /* Check for coding system alias loops */
1169 if (EQ (alias, aliasee))
1170 alias_loop: signal_simple_error_2
1171 ("Attempt to create a coding system alias loop", alias, aliasee);
1172
1173 for (probe = aliasee;
1174 SYMBOLP (probe);
1175 probe = Fgethash (probe, Vcoding_system_hash_table, Qzero))
1176 {
1177 if (EQ (probe, alias))
1178 goto alias_loop;
1179 }
1180
1181 Fputhash (alias, aliasee, Vcoding_system_hash_table);
1182
1183 /* Set up aliases for subsidiaries.
1184 #### There must be a better way to handle subsidiary coding systems. */
1185 {
1186 static const char *suffixes[] = { "-unix", "-dos", "-mac" };
1187 int i;
1188 for (i = 0; i < countof (suffixes); i++)
1189 {
1190 Lisp_Object alias_subsidiary =
1191 append_suffix_to_symbol (alias, suffixes[i]);
1192 Lisp_Object aliasee_subsidiary =
1193 append_suffix_to_symbol (aliasee, suffixes[i]);
1194
1195 if (! NILP (Ffind_coding_system (aliasee_subsidiary)))
1196 Fdefine_coding_system_alias (alias_subsidiary, aliasee_subsidiary);
1197 }
1198 }
1199 /* FSF return value is a vector of [ALIAS-unix ALIAS-dos ALIAS-mac],
1200 but it doesn't look intentional, so I'd rather return something
1201 meaningful or nothing at all. */
1202 return Qnil;
1203 }
1204
1205 static Lisp_Object
1206 subsidiary_coding_system (Lisp_Object coding_system, eol_type_t type)
954 { 1207 {
955 Lisp_Coding_System *cs = XCODING_SYSTEM (coding_system); 1208 Lisp_Coding_System *cs = XCODING_SYSTEM (coding_system);
956 Lisp_Object new_coding_system; 1209 Lisp_Object new_coding_system;
957 1210
958 if (CODING_SYSTEM_EOL_TYPE (cs) != EOL_AUTODETECT) 1211 if (CODING_SYSTEM_EOL_TYPE (cs) != EOL_AUTODETECT)
1228 /* Now go through the existing categories by priority to retrieve 1481 /* Now go through the existing categories by priority to retrieve
1229 the categories not yet specified and preserve their priority 1482 the categories not yet specified and preserve their priority
1230 order. */ 1483 order. */
1231 for (j = 0; j <= CODING_CATEGORY_LAST; j++) 1484 for (j = 0; j <= CODING_CATEGORY_LAST; j++)
1232 { 1485 {
1233 int cat = coding_category_by_priority[j]; 1486 int cat = fcd->coding_category_by_priority[j];
1234 if (category_to_priority[cat] < 0) 1487 if (category_to_priority[cat] < 0)
1235 category_to_priority[cat] = i++; 1488 category_to_priority[cat] = i++;
1236 } 1489 }
1237 1490
1238 /* Now we need to construct the inverse of the mapping we just 1491 /* Now we need to construct the inverse of the mapping we just
1239 constructed. */ 1492 constructed. */
1240 1493
1241 for (i = 0; i <= CODING_CATEGORY_LAST; i++) 1494 for (i = 0; i <= CODING_CATEGORY_LAST; i++)
1242 coding_category_by_priority[category_to_priority[i]] = i; 1495 fcd->coding_category_by_priority[category_to_priority[i]] = i;
1243 1496
1244 /* Phew! That was confusing. */ 1497 /* Phew! That was confusing. */
1245 return Qnil; 1498 return Qnil;
1246 } 1499 }
1247 1500
1252 { 1505 {
1253 int i; 1506 int i;
1254 Lisp_Object list = Qnil; 1507 Lisp_Object list = Qnil;
1255 1508
1256 for (i = CODING_CATEGORY_LAST; i >= 0; i--) 1509 for (i = CODING_CATEGORY_LAST; i >= 0; i--)
1257 list = Fcons (coding_category_symbol[coding_category_by_priority[i]], 1510 list = Fcons (coding_category_symbol[fcd->coding_category_by_priority[i]],
1258 list); 1511 list);
1259 return list; 1512 return list;
1260 } 1513 }
1261 1514
1262 DEFUN ("set-coding-category-system", Fset_coding_category_system, 2, 2, 0, /* 1515 DEFUN ("set-coding-category-system", Fset_coding_category_system, 2, 2, 0, /*
1265 (coding_category, coding_system)) 1518 (coding_category, coding_system))
1266 { 1519 {
1267 int cat = decode_coding_category (coding_category); 1520 int cat = decode_coding_category (coding_category);
1268 1521
1269 coding_system = Fget_coding_system (coding_system); 1522 coding_system = Fget_coding_system (coding_system);
1270 coding_category_system[cat] = coding_system; 1523 fcd->coding_category_system[cat] = coding_system;
1271 return Qnil; 1524 return Qnil;
1272 } 1525 }
1273 1526
1274 DEFUN ("coding-category-system", Fcoding_category_system, 1, 1, 0, /* 1527 DEFUN ("coding-category-system", Fcoding_category_system, 1, 1, 0, /*
1275 Return the coding system associated with a coding category. 1528 Return the coding system associated with a coding category.
1276 */ 1529 */
1277 (coding_category)) 1530 (coding_category))
1278 { 1531 {
1279 int cat = decode_coding_category (coding_category); 1532 int cat = decode_coding_category (coding_category);
1280 Lisp_Object sys = coding_category_system[cat]; 1533 Lisp_Object sys = fcd->coding_category_system[cat];
1281 1534
1282 if (!NILP (sys)) 1535 if (!NILP (sys))
1283 return XCODING_SYSTEM_NAME (sys); 1536 return XCODING_SYSTEM_NAME (sys);
1284 return Qnil; 1537 return Qnil;
1285 } 1538 }
1289 /* Detecting the encoding of data */ 1542 /* Detecting the encoding of data */
1290 /************************************************************************/ 1543 /************************************************************************/
1291 1544
1292 struct detection_state 1545 struct detection_state
1293 { 1546 {
1294 enum eol_type eol_type; 1547 eol_type_t eol_type;
1295 int seen_non_ascii; 1548 int seen_non_ascii;
1296 int mask; 1549 int mask;
1297 #ifdef MULE 1550 #ifdef MULE
1298 struct 1551 struct
1299 { 1552 {
1370 /* Perhaps the only thing useful you learn from intensive Microsoft 1623 /* Perhaps the only thing useful you learn from intensive Microsoft
1371 technical interviews */ 1624 technical interviews */
1372 return (mask & (mask - 1)) == 0; 1625 return (mask & (mask - 1)) == 0;
1373 } 1626 }
1374 1627
1375 static enum eol_type 1628 static eol_type_t
1376 detect_eol_type (struct detection_state *st, CONST unsigned char *src, 1629 detect_eol_type (struct detection_state *st, const unsigned char *src,
1377 unsigned int n) 1630 unsigned int n)
1378 { 1631 {
1379 int c; 1632 int c;
1380 1633
1381 while (n--) 1634 while (n--)
1382 { 1635 {
1383 c = *src++; 1636 c = *src++;
1384 if (c == '\r') 1637 if (c == '\n')
1638 {
1639 if (st->eol.just_saw_cr)
1640 return EOL_CRLF;
1641 else if (st->eol.seen_anything)
1642 return EOL_LF;
1643 }
1644 else if (st->eol.just_saw_cr)
1645 return EOL_CR;
1646 else if (c == '\r')
1385 st->eol.just_saw_cr = 1; 1647 st->eol.just_saw_cr = 1;
1386 else 1648 else
1387 { 1649 st->eol.just_saw_cr = 0;
1388 if (c == '\n')
1389 {
1390 if (st->eol.just_saw_cr)
1391 return EOL_CRLF;
1392 else if (st->eol.seen_anything)
1393 return EOL_LF;
1394 }
1395 else if (st->eol.just_saw_cr)
1396 return EOL_CR;
1397 st->eol.just_saw_cr = 0;
1398 }
1399 st->eol.seen_anything = 1; 1650 st->eol.seen_anything = 1;
1400 } 1651 }
1401 1652
1402 return EOL_AUTODETECT; 1653 return EOL_AUTODETECT;
1403 } 1654 }
1418 is present in st->mask 1669 is present in st->mask
1419 1 == definitive answers are here for both st->eol_type and st->mask 1670 1 == definitive answers are here for both st->eol_type and st->mask
1420 */ 1671 */
1421 1672
1422 static int 1673 static int
1423 detect_coding_type (struct detection_state *st, CONST unsigned char *src, 1674 detect_coding_type (struct detection_state *st, const Extbyte *src,
1424 unsigned int n, int just_do_eol) 1675 unsigned int n, int just_do_eol)
1425 { 1676 {
1426 int c; 1677 int c;
1427 1678
1428 if (st->eol_type == EOL_AUTODETECT) 1679 if (st->eol_type == EOL_AUTODETECT)
1495 "Invalid `default-buffer-file-coding-system', set to nil"); 1746 "Invalid `default-buffer-file-coding-system', set to nil");
1496 XBUFFER (Vbuffer_defaults)->buffer_file_coding_system = Qnil; 1747 XBUFFER (Vbuffer_defaults)->buffer_file_coding_system = Qnil;
1497 } 1748 }
1498 } 1749 }
1499 if (NILP (retval)) 1750 if (NILP (retval))
1500 retval = Fget_coding_system (Qno_conversion); 1751 retval = Fget_coding_system (Qraw_text);
1501 return retval; 1752 return retval;
1502 } 1753 }
1503 else 1754 else
1504 { 1755 {
1505 int i; 1756 int i;
1509 #endif 1760 #endif
1510 /* Look through the coding categories by priority and find 1761 /* Look through the coding categories by priority and find
1511 the first one that is allowed. */ 1762 the first one that is allowed. */
1512 for (i = 0; i <= CODING_CATEGORY_LAST; i++) 1763 for (i = 0; i <= CODING_CATEGORY_LAST; i++)
1513 { 1764 {
1514 cat = coding_category_by_priority[i]; 1765 cat = fcd->coding_category_by_priority[i];
1515 if ((mask & (1 << cat)) && 1766 if ((mask & (1 << cat)) &&
1516 !NILP (coding_category_system[cat])) 1767 !NILP (fcd->coding_category_system[cat]))
1517 break; 1768 break;
1518 } 1769 }
1519 if (cat >= 0) 1770 if (cat >= 0)
1520 return coding_category_system[cat]; 1771 return fcd->coding_category_system[cat];
1521 else 1772 else
1522 return Fget_coding_system (Qno_conversion); 1773 return Fget_coding_system (Qraw_text);
1523 } 1774 }
1524 } 1775 }
1525 1776
1526 /* Given a seekable read stream and potential coding system and EOL type 1777 /* Given a seekable read stream and potential coding system and EOL type
1527 as specified, do any autodetection that is called for. If the 1778 as specified, do any autodetection that is called for. If the
1528 coding system and/or EOL type are not autodetect, they will be left 1779 coding system and/or EOL type are not `autodetect', they will be left
1529 alone; but this function will never return an autodetect coding system 1780 alone; but this function will never return an autodetect coding system
1530 or EOL type. 1781 or EOL type.
1531 1782
1532 This function does not automatically fetch subsidiary coding systems; 1783 This function does not automatically fetch subsidiary coding systems;
1533 that should be unnecessary with the explicit eol-type argument. */ 1784 that should be unnecessary with the explicit eol-type argument. */
1534 1785
1786 #define LENGTH(string_constant) (sizeof (string_constant) - 1)
1787
1535 void 1788 void
1536 determine_real_coding_system (Lstream *stream, Lisp_Object *codesys_in_out, 1789 determine_real_coding_system (Lstream *stream, Lisp_Object *codesys_in_out,
1537 enum eol_type *eol_type_in_out) 1790 eol_type_t *eol_type_in_out)
1538 { 1791 {
1539 struct detection_state decst; 1792 struct detection_state decst;
1540 1793
1541 if (*eol_type_in_out == EOL_AUTODETECT) 1794 if (*eol_type_in_out == EOL_AUTODETECT)
1542 *eol_type_in_out = XCODING_SYSTEM_EOL_TYPE (*codesys_in_out); 1795 *eol_type_in_out = XCODING_SYSTEM_EOL_TYPE (*codesys_in_out);
1544 xzero (decst); 1797 xzero (decst);
1545 decst.eol_type = *eol_type_in_out; 1798 decst.eol_type = *eol_type_in_out;
1546 decst.mask = ~0; 1799 decst.mask = ~0;
1547 1800
1548 /* If autodetection is called for, do it now. */ 1801 /* If autodetection is called for, do it now. */
1549 if (XCODING_SYSTEM_TYPE (*codesys_in_out) == CODESYS_AUTODETECT || 1802 if (XCODING_SYSTEM_TYPE (*codesys_in_out) == CODESYS_AUTODETECT
1550 *eol_type_in_out == EOL_AUTODETECT) 1803 || *eol_type_in_out == EOL_AUTODETECT)
1551 { 1804 {
1552 1805 Extbyte buf[4096];
1553 while (1) 1806 Lisp_Object coding_system = Qnil;
1554 { 1807 Extbyte *p;
1555 unsigned char random_buffer[4096]; 1808 ssize_t nread = Lstream_read (stream, buf, sizeof (buf));
1556 int nread; 1809 Extbyte *scan_end;
1557 1810
1558 nread = Lstream_read (stream, random_buffer, sizeof (random_buffer)); 1811 /* Look for initial "-*-"; mode line prefix */
1559 if (!nread) 1812 for (p = buf,
1813 scan_end = buf + nread - LENGTH ("-*-coding:?-*-");
1814 p <= scan_end
1815 && *p != '\n'
1816 && *p != '\r';
1817 p++)
1818 if (*p == '-' && *(p+1) == '*' && *(p+2) == '-')
1819 {
1820 Extbyte *local_vars_beg = p + 3;
1821 /* Look for final "-*-"; mode line suffix */
1822 for (p = local_vars_beg,
1823 scan_end = buf + nread - LENGTH ("-*-");
1824 p <= scan_end
1825 && *p != '\n'
1826 && *p != '\r';
1827 p++)
1828 if (*p == '-' && *(p+1) == '*' && *(p+2) == '-')
1829 {
1830 Extbyte *suffix = p;
1831 /* Look for "coding:" */
1832 for (p = local_vars_beg,
1833 scan_end = suffix - LENGTH ("coding:?");
1834 p <= scan_end;
1835 p++)
1836 if (memcmp ("coding:", p, LENGTH ("coding:")) == 0
1837 && (p == local_vars_beg
1838 || (*(p-1) == ' ' ||
1839 *(p-1) == '\t' ||
1840 *(p-1) == ';')))
1841 {
1842 Extbyte save;
1843 int n;
1844 p += LENGTH ("coding:");
1845 while (*p == ' ' || *p == '\t') p++;
1846
1847 /* Get coding system name */
1848 save = *suffix; *suffix = '\0';
1849 /* Characters valid in a MIME charset name (rfc 1521),
1850 and in a Lisp symbol name. */
1851 n = strspn ( (char *) p,
1852 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
1853 "abcdefghijklmnopqrstuvwxyz"
1854 "0123456789"
1855 "!$%&*+-.^_{|}~");
1856 *suffix = save;
1857 if (n > 0)
1858 {
1859 save = p[n]; p[n] = '\0';
1860 coding_system =
1861 Ffind_coding_system (intern ((char *) p));
1862 p[n] = save;
1863 }
1864 break;
1865 }
1866 break;
1867 }
1560 break; 1868 break;
1561 if (detect_coding_type (&decst, random_buffer, nread, 1869 }
1562 XCODING_SYSTEM_TYPE (*codesys_in_out) != 1870
1563 CODESYS_AUTODETECT)) 1871 if (NILP (coding_system))
1564 break; 1872 do
1565 } 1873 {
1874 if (detect_coding_type (&decst, buf, nread,
1875 XCODING_SYSTEM_TYPE (*codesys_in_out)
1876 != CODESYS_AUTODETECT))
1877 break;
1878 nread = Lstream_read (stream, buf, sizeof (buf));
1879 if (nread == 0)
1880 break;
1881 }
1882 while (1);
1883
1884 else if (XCODING_SYSTEM_TYPE (*codesys_in_out) == CODESYS_AUTODETECT
1885 && XCODING_SYSTEM_EOL_TYPE (coding_system) == EOL_AUTODETECT)
1886 do
1887 {
1888 if (detect_coding_type (&decst, buf, nread, 1))
1889 break;
1890 nread = Lstream_read (stream, buf, sizeof (buf));
1891 if (!nread)
1892 break;
1893 }
1894 while (1);
1566 1895
1567 *eol_type_in_out = decst.eol_type; 1896 *eol_type_in_out = decst.eol_type;
1568 if (XCODING_SYSTEM_TYPE (*codesys_in_out) == CODESYS_AUTODETECT) 1897 if (XCODING_SYSTEM_TYPE (*codesys_in_out) == CODESYS_AUTODETECT)
1569 *codesys_in_out = coding_system_from_mask (decst.mask); 1898 {
1899 if (NILP (coding_system))
1900 *codesys_in_out = coding_system_from_mask (decst.mask);
1901 else
1902 *codesys_in_out = coding_system;
1903 }
1570 } 1904 }
1571 1905
1572 /* If we absolutely can't determine the EOL type, just assume LF. */ 1906 /* If we absolutely can't determine the EOL type, just assume LF. */
1573 if (*eol_type_in_out == EOL_AUTODETECT) 1907 if (*eol_type_in_out == EOL_AUTODETECT)
1574 *eol_type_in_out = EOL_LF; 1908 *eol_type_in_out = EOL_LF;
1603 decst.eol_type = EOL_AUTODETECT; 1937 decst.eol_type = EOL_AUTODETECT;
1604 decst.mask = ~0; 1938 decst.mask = ~0;
1605 while (1) 1939 while (1)
1606 { 1940 {
1607 unsigned char random_buffer[4096]; 1941 unsigned char random_buffer[4096];
1608 int nread = Lstream_read (istr, random_buffer, sizeof (random_buffer)); 1942 ssize_t nread = Lstream_read (istr, random_buffer, sizeof (random_buffer));
1609 1943
1610 if (!nread) 1944 if (!nread)
1611 break; 1945 break;
1612 if (detect_coding_type (&decst, random_buffer, nread, 0)) 1946 if (detect_coding_type (&decst, random_buffer, nread, 0))
1613 break; 1947 break;
1624 #ifdef MULE 1958 #ifdef MULE
1625 decst.mask = postprocess_iso2022_mask (decst.mask); 1959 decst.mask = postprocess_iso2022_mask (decst.mask);
1626 #endif 1960 #endif
1627 for (i = CODING_CATEGORY_LAST; i >= 0; i--) 1961 for (i = CODING_CATEGORY_LAST; i >= 0; i--)
1628 { 1962 {
1629 int sys = coding_category_by_priority[i]; 1963 int sys = fcd->coding_category_by_priority[i];
1630 if (decst.mask & (1 << sys)) 1964 if (decst.mask & (1 << sys))
1631 { 1965 {
1632 Lisp_Object codesys = coding_category_system[sys]; 1966 Lisp_Object codesys = fcd->coding_category_system[sys];
1633 if (!NILP (codesys)) 1967 if (!NILP (codesys))
1634 codesys = subsidiary_coding_system (codesys, decst.eol_type); 1968 codesys = subsidiary_coding_system (codesys, decst.eol_type);
1635 val = Fcons (codesys, val); 1969 val = Fcons (codesys, val);
1636 } 1970 }
1637 } 1971 }
1716 } \ 2050 } \
1717 } while (0) 2051 } while (0)
1718 2052
1719 #define DECODE_HANDLE_END_OF_CONVERSION(flags, ch, dst) \ 2053 #define DECODE_HANDLE_END_OF_CONVERSION(flags, ch, dst) \
1720 do { \ 2054 do { \
1721 DECODE_OUTPUT_PARTIAL_CHAR (ch); \ 2055 if (flags & CODING_STATE_END) \
1722 if ((flags & CODING_STATE_END) && \ 2056 { \
1723 (flags & CODING_STATE_CR)) \ 2057 DECODE_OUTPUT_PARTIAL_CHAR (ch); \
1724 Dynarr_add (dst, '\r'); \ 2058 if (flags & CODING_STATE_CR) \
2059 Dynarr_add (dst, '\r'); \
2060 } \
1725 } while (0) 2061 } while (0)
1726 2062
1727 #define DECODING_STREAM_DATA(stream) LSTREAM_TYPE_DATA (stream, decoding) 2063 #define DECODING_STREAM_DATA(stream) LSTREAM_TYPE_DATA (stream, decoding)
1728 2064
1729 struct decoding_stream 2065 struct decoding_stream
1752 /* EOL_TYPE specifies the type of end-of-line conversion that 2088 /* EOL_TYPE specifies the type of end-of-line conversion that
1753 currently applies. We need to keep this separate from the 2089 currently applies. We need to keep this separate from the
1754 EOL type stored in CODESYS because the latter might indicate 2090 EOL type stored in CODESYS because the latter might indicate
1755 automatic EOL-type detection while the former will always 2091 automatic EOL-type detection while the former will always
1756 indicate a particular EOL type. */ 2092 indicate a particular EOL type. */
1757 enum eol_type eol_type; 2093 eol_type_t eol_type;
1758 #ifdef MULE 2094 #ifdef MULE
1759 /* Additional ISO2022 information. We define the structure above 2095 /* Additional ISO2022 information. We define the structure above
1760 because it's also needed by the detection routines. */ 2096 because it's also needed by the detection routines. */
1761 struct iso2022_decoder iso2022; 2097 struct iso2022_decoder iso2022;
1762 2098
1763 /* Additional information (the state of the running CCL program) 2099 /* Additional information (the state of the running CCL program)
1764 used by the CCL decoder. */ 2100 used by the CCL decoder. */
1765 struct ccl_program ccl; 2101 struct ccl_program ccl;
2102
2103 /* counter for UTF-8 or UCS-4 */
2104 unsigned char counter;
1766 #endif 2105 #endif
1767 struct detection_state decst; 2106 struct detection_state decst;
1768 }; 2107 };
1769 2108
1770 static int decoding_reader (Lstream *stream, unsigned char *data, size_t size); 2109 static ssize_t decoding_reader (Lstream *stream,
1771 static int decoding_writer (Lstream *stream, CONST unsigned char *data, size_t size); 2110 unsigned char *data, size_t size);
2111 static ssize_t decoding_writer (Lstream *stream,
2112 const unsigned char *data, size_t size);
1772 static int decoding_rewinder (Lstream *stream); 2113 static int decoding_rewinder (Lstream *stream);
1773 static int decoding_seekable_p (Lstream *stream); 2114 static int decoding_seekable_p (Lstream *stream);
1774 static int decoding_flusher (Lstream *stream); 2115 static int decoding_flusher (Lstream *stream);
1775 static int decoding_closer (Lstream *stream); 2116 static int decoding_closer (Lstream *stream);
1776 2117
1777 static Lisp_Object decoding_marker (Lisp_Object stream, 2118 static Lisp_Object decoding_marker (Lisp_Object stream);
1778 void (*markobj) (Lisp_Object));
1779 2119
1780 DEFINE_LSTREAM_IMPLEMENTATION ("decoding", lstream_decoding, 2120 DEFINE_LSTREAM_IMPLEMENTATION ("decoding", lstream_decoding,
1781 sizeof (struct decoding_stream)); 2121 sizeof (struct decoding_stream));
1782 2122
1783 static Lisp_Object 2123 static Lisp_Object
1784 decoding_marker (Lisp_Object stream, void (*markobj) (Lisp_Object)) 2124 decoding_marker (Lisp_Object stream)
1785 { 2125 {
1786 Lstream *str = DECODING_STREAM_DATA (XLSTREAM (stream))->other_end; 2126 Lstream *str = DECODING_STREAM_DATA (XLSTREAM (stream))->other_end;
1787 Lisp_Object str_obj; 2127 Lisp_Object str_obj;
1788 2128
1789 /* We do not need to mark the coding systems or charsets stored 2129 /* We do not need to mark the coding systems or charsets stored
1790 within the stream because they are stored in a global list 2130 within the stream because they are stored in a global list
1791 and automatically marked. */ 2131 and automatically marked. */
1792 2132
1793 XSETLSTREAM (str_obj, str); 2133 XSETLSTREAM (str_obj, str);
1794 markobj (str_obj); 2134 mark_object (str_obj);
1795 if (str->imp->marker) 2135 if (str->imp->marker)
1796 return (str->imp->marker) (str_obj, markobj); 2136 return (str->imp->marker) (str_obj);
1797 else 2137 else
1798 return Qnil; 2138 return Qnil;
1799 } 2139 }
1800 2140
1801 /* Read SIZE bytes of data and store it into DATA. We are a decoding stream 2141 /* Read SIZE bytes of data and store it into DATA. We are a decoding stream
1802 so we read data from the other end, decode it, and store it into DATA. */ 2142 so we read data from the other end, decode it, and store it into DATA. */
1803 2143
1804 static int 2144 static ssize_t
1805 decoding_reader (Lstream *stream, unsigned char *data, size_t size) 2145 decoding_reader (Lstream *stream, unsigned char *data, size_t size)
1806 { 2146 {
1807 struct decoding_stream *str = DECODING_STREAM_DATA (stream); 2147 struct decoding_stream *str = DECODING_STREAM_DATA (stream);
1808 unsigned char *orig_data = data; 2148 unsigned char *orig_data = data;
1809 int read_size; 2149 ssize_t read_size;
1810 int error_occurred = 0; 2150 int error_occurred = 0;
1811 2151
1812 /* We need to interface to mule_decode(), which expects to take some 2152 /* We need to interface to mule_decode(), which expects to take some
1813 amount of data and store the result into a Dynarr. We have 2153 amount of data and store the result into a Dynarr. We have
1814 mule_decode() store into str->runoff, and take data from there 2154 mule_decode() store into str->runoff, and take data from there
1861 return error_occurred ? -1 : 0; 2201 return error_occurred ? -1 : 0;
1862 else 2202 else
1863 return data - orig_data; 2203 return data - orig_data;
1864 } 2204 }
1865 2205
1866 static int 2206 static ssize_t
1867 decoding_writer (Lstream *stream, CONST unsigned char *data, size_t size) 2207 decoding_writer (Lstream *stream, const unsigned char *data, size_t size)
1868 { 2208 {
1869 struct decoding_stream *str = DECODING_STREAM_DATA (stream); 2209 struct decoding_stream *str = DECODING_STREAM_DATA (stream);
1870 int retval; 2210 ssize_t retval;
1871 2211
1872 /* Decode all our data into the runoff, and then attempt to write 2212 /* Decode all our data into the runoff, and then attempt to write
1873 it all out to the other end. Remove whatever chunk we succeeded 2213 it all out to the other end. Remove whatever chunk we succeeded
1874 in writing. */ 2214 in writing. */
1875 mule_decode (stream, data, str->runoff, size); 2215 mule_decode (stream, data, str->runoff, size);
1895 } 2235 }
1896 else if (CODING_SYSTEM_TYPE (str->codesys) == CODESYS_CCL) 2236 else if (CODING_SYSTEM_TYPE (str->codesys) == CODESYS_CCL)
1897 { 2237 {
1898 setup_ccl_program (&str->ccl, CODING_SYSTEM_CCL_DECODE (str->codesys)); 2238 setup_ccl_program (&str->ccl, CODING_SYSTEM_CCL_DECODE (str->codesys));
1899 } 2239 }
2240 str->counter = 0;
1900 #endif /* MULE */ 2241 #endif /* MULE */
1901 str->flags = str->ch = 0; 2242 str->flags = str->ch = 0;
1902 } 2243 }
1903 2244
1904 static int 2245 static int
1971 stream for reading using a non-fully-specified coding system and 2312 stream for reading using a non-fully-specified coding system and
1972 a non-seekable input stream. */ 2313 a non-seekable input stream. */
1973 2314
1974 static Lisp_Object 2315 static Lisp_Object
1975 make_decoding_stream_1 (Lstream *stream, Lisp_Object codesys, 2316 make_decoding_stream_1 (Lstream *stream, Lisp_Object codesys,
1976 CONST char *mode) 2317 const char *mode)
1977 { 2318 {
1978 Lstream *lstr = Lstream_new (lstream_decoding, mode); 2319 Lstream *lstr = Lstream_new (lstream_decoding, mode);
1979 struct decoding_stream *str = DECODING_STREAM_DATA (lstr); 2320 struct decoding_stream *str = DECODING_STREAM_DATA (lstr);
1980 Lisp_Object obj; 2321 Lisp_Object obj;
1981 2322
2014 written to that stream; that is handled in decoding_reader() 2355 written to that stream; that is handled in decoding_reader()
2015 or decoding_writer(). This allows the same functions to 2356 or decoding_writer(). This allows the same functions to
2016 be used for both reading and writing. */ 2357 be used for both reading and writing. */
2017 2358
2018 static void 2359 static void
2019 mule_decode (Lstream *decoding, CONST unsigned char *src, 2360 mule_decode (Lstream *decoding, const unsigned char *src,
2020 unsigned_char_dynarr *dst, unsigned int n) 2361 unsigned_char_dynarr *dst, unsigned int n)
2021 { 2362 {
2022 struct decoding_stream *str = DECODING_STREAM_DATA (decoding); 2363 struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
2023 2364
2024 /* If necessary, do encoding-detection now. We do this when 2365 /* If necessary, do encoding-detection now. We do this when
2078 break; 2419 break;
2079 case CODESYS_UTF8: 2420 case CODESYS_UTF8:
2080 decode_coding_utf8 (decoding, src, dst, n); 2421 decode_coding_utf8 (decoding, src, dst, n);
2081 break; 2422 break;
2082 case CODESYS_CCL: 2423 case CODESYS_CCL:
2083 ccl_driver (&str->ccl, src, dst, n, 0); 2424 str->ccl.last_block = str->flags & CODING_STATE_END;
2425 ccl_driver (&str->ccl, src, dst, n, 0, CCL_MODE_DECODING);
2084 break; 2426 break;
2085 case CODESYS_ISO2022: 2427 case CODESYS_ISO2022:
2086 decode_coding_iso2022 (decoding, src, dst, n); 2428 decode_coding_iso2022 (decoding, src, dst, n);
2087 break; 2429 break;
2088 #endif /* MULE */ 2430 #endif /* MULE */
2133 while (1) 2475 while (1)
2134 { 2476 {
2135 char tempbuf[1024]; /* some random amount */ 2477 char tempbuf[1024]; /* some random amount */
2136 Bufpos newpos, even_newer_pos; 2478 Bufpos newpos, even_newer_pos;
2137 Bufpos oldpos = lisp_buffer_stream_startpos (istr); 2479 Bufpos oldpos = lisp_buffer_stream_startpos (istr);
2138 int size_in_bytes = Lstream_read (istr, tempbuf, sizeof (tempbuf)); 2480 ssize_t size_in_bytes = Lstream_read (istr, tempbuf, sizeof (tempbuf));
2139 2481
2140 if (!size_in_bytes) 2482 if (!size_in_bytes)
2141 break; 2483 break;
2142 newpos = lisp_buffer_stream_startpos (istr); 2484 newpos = lisp_buffer_stream_startpos (istr);
2143 Lstream_write (ostr, tempbuf, size_in_bytes); 2485 Lstream_write (ostr, tempbuf, size_in_bytes);
2218 used by the CCL encoder. */ 2560 used by the CCL encoder. */
2219 struct ccl_program ccl; 2561 struct ccl_program ccl;
2220 #endif /* MULE */ 2562 #endif /* MULE */
2221 }; 2563 };
2222 2564
2223 static int encoding_reader (Lstream *stream, unsigned char *data, size_t size); 2565 static ssize_t encoding_reader (Lstream *stream, unsigned char *data, size_t size);
2224 static int encoding_writer (Lstream *stream, CONST unsigned char *data, 2566 static ssize_t encoding_writer (Lstream *stream, const unsigned char *data,
2225 size_t size); 2567 size_t size);
2226 static int encoding_rewinder (Lstream *stream); 2568 static int encoding_rewinder (Lstream *stream);
2227 static int encoding_seekable_p (Lstream *stream); 2569 static int encoding_seekable_p (Lstream *stream);
2228 static int encoding_flusher (Lstream *stream); 2570 static int encoding_flusher (Lstream *stream);
2229 static int encoding_closer (Lstream *stream); 2571 static int encoding_closer (Lstream *stream);
2230 2572
2231 static Lisp_Object encoding_marker (Lisp_Object stream, 2573 static Lisp_Object encoding_marker (Lisp_Object stream);
2232 void (*markobj) (Lisp_Object));
2233 2574
2234 DEFINE_LSTREAM_IMPLEMENTATION ("encoding", lstream_encoding, 2575 DEFINE_LSTREAM_IMPLEMENTATION ("encoding", lstream_encoding,
2235 sizeof (struct encoding_stream)); 2576 sizeof (struct encoding_stream));
2236 2577
2237 static Lisp_Object 2578 static Lisp_Object
2238 encoding_marker (Lisp_Object stream, void (*markobj) (Lisp_Object)) 2579 encoding_marker (Lisp_Object stream)
2239 { 2580 {
2240 Lstream *str = ENCODING_STREAM_DATA (XLSTREAM (stream))->other_end; 2581 Lstream *str = ENCODING_STREAM_DATA (XLSTREAM (stream))->other_end;
2241 Lisp_Object str_obj; 2582 Lisp_Object str_obj;
2242 2583
2243 /* We do not need to mark the coding systems or charsets stored 2584 /* We do not need to mark the coding systems or charsets stored
2244 within the stream because they are stored in a global list 2585 within the stream because they are stored in a global list
2245 and automatically marked. */ 2586 and automatically marked. */
2246 2587
2247 XSETLSTREAM (str_obj, str); 2588 XSETLSTREAM (str_obj, str);
2248 markobj (str_obj); 2589 mark_object (str_obj);
2249 if (str->imp->marker) 2590 if (str->imp->marker)
2250 return (str->imp->marker) (str_obj, markobj); 2591 return (str->imp->marker) (str_obj);
2251 else 2592 else
2252 return Qnil; 2593 return Qnil;
2253 } 2594 }
2254 2595
2255 /* Read SIZE bytes of data and store it into DATA. We are a encoding stream 2596 /* Read SIZE bytes of data and store it into DATA. We are a encoding stream
2256 so we read data from the other end, encode it, and store it into DATA. */ 2597 so we read data from the other end, encode it, and store it into DATA. */
2257 2598
2258 static int 2599 static ssize_t
2259 encoding_reader (Lstream *stream, unsigned char *data, size_t size) 2600 encoding_reader (Lstream *stream, unsigned char *data, size_t size)
2260 { 2601 {
2261 struct encoding_stream *str = ENCODING_STREAM_DATA (stream); 2602 struct encoding_stream *str = ENCODING_STREAM_DATA (stream);
2262 unsigned char *orig_data = data; 2603 unsigned char *orig_data = data;
2263 int read_size; 2604 ssize_t read_size;
2264 int error_occurred = 0; 2605 int error_occurred = 0;
2265 2606
2266 /* We need to interface to mule_encode(), which expects to take some 2607 /* We need to interface to mule_encode(), which expects to take some
2267 amount of data and store the result into a Dynarr. We have 2608 amount of data and store the result into a Dynarr. We have
2268 mule_encode() store into str->runoff, and take data from there 2609 mule_encode() store into str->runoff, and take data from there
2315 return error_occurred ? -1 : 0; 2656 return error_occurred ? -1 : 0;
2316 else 2657 else
2317 return data - orig_data; 2658 return data - orig_data;
2318 } 2659 }
2319 2660
2320 static int 2661 static ssize_t
2321 encoding_writer (Lstream *stream, CONST unsigned char *data, size_t size) 2662 encoding_writer (Lstream *stream, const unsigned char *data, size_t size)
2322 { 2663 {
2323 struct encoding_stream *str = ENCODING_STREAM_DATA (stream); 2664 struct encoding_stream *str = ENCODING_STREAM_DATA (stream);
2324 int retval; 2665 ssize_t retval;
2325 2666
2326 /* Encode all our data into the runoff, and then attempt to write 2667 /* Encode all our data into the runoff, and then attempt to write
2327 it all out to the other end. Remove whatever chunk we succeeded 2668 it all out to the other end. Remove whatever chunk we succeeded
2328 in writing. */ 2669 in writing. */
2329 mule_encode (stream, data, str->runoff, size); 2670 mule_encode (stream, data, str->runoff, size);
2427 reset_encoding_stream (str); 2768 reset_encoding_stream (str);
2428 } 2769 }
2429 2770
2430 static Lisp_Object 2771 static Lisp_Object
2431 make_encoding_stream_1 (Lstream *stream, Lisp_Object codesys, 2772 make_encoding_stream_1 (Lstream *stream, Lisp_Object codesys,
2432 CONST char *mode) 2773 const char *mode)
2433 { 2774 {
2434 Lstream *lstr = Lstream_new (lstream_encoding, mode); 2775 Lstream *lstr = Lstream_new (lstream_encoding, mode);
2435 struct encoding_stream *str = ENCODING_STREAM_DATA (lstr); 2776 struct encoding_stream *str = ENCODING_STREAM_DATA (lstr);
2436 Lisp_Object obj; 2777 Lisp_Object obj;
2437 2778
2458 /* Convert N bytes of internally-formatted data stored in SRC to an 2799 /* Convert N bytes of internally-formatted data stored in SRC to an
2459 external format, according to the encoding stream ENCODING. 2800 external format, according to the encoding stream ENCODING.
2460 Store the encoded data into DST. */ 2801 Store the encoded data into DST. */
2461 2802
2462 static void 2803 static void
2463 mule_encode (Lstream *encoding, CONST unsigned char *src, 2804 mule_encode (Lstream *encoding, const unsigned char *src,
2464 unsigned_char_dynarr *dst, unsigned int n) 2805 unsigned_char_dynarr *dst, unsigned int n)
2465 { 2806 {
2466 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); 2807 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
2467 2808
2468 switch (CODING_SYSTEM_TYPE (str->codesys)) 2809 switch (CODING_SYSTEM_TYPE (str->codesys))
2490 break; 2831 break;
2491 case CODESYS_UTF8: 2832 case CODESYS_UTF8:
2492 encode_coding_utf8 (encoding, src, dst, n); 2833 encode_coding_utf8 (encoding, src, dst, n);
2493 break; 2834 break;
2494 case CODESYS_CCL: 2835 case CODESYS_CCL:
2495 ccl_driver (&str->ccl, src, dst, n, 0); 2836 str->ccl.last_block = str->flags & CODING_STATE_END;
2837 ccl_driver (&str->ccl, src, dst, n, 0, CCL_MODE_ENCODING);
2496 break; 2838 break;
2497 case CODESYS_ISO2022: 2839 case CODESYS_ISO2022:
2498 encode_coding_iso2022 (encoding, src, dst, n); 2840 encode_coding_iso2022 (encoding, src, dst, n);
2499 break; 2841 break;
2500 #endif /* MULE */ 2842 #endif /* MULE */
2541 while (1) 2883 while (1)
2542 { 2884 {
2543 char tempbuf[1024]; /* some random amount */ 2885 char tempbuf[1024]; /* some random amount */
2544 Bufpos newpos, even_newer_pos; 2886 Bufpos newpos, even_newer_pos;
2545 Bufpos oldpos = lisp_buffer_stream_startpos (istr); 2887 Bufpos oldpos = lisp_buffer_stream_startpos (istr);
2546 int size_in_bytes = Lstream_read (istr, tempbuf, sizeof (tempbuf)); 2888 ssize_t size_in_bytes = Lstream_read (istr, tempbuf, sizeof (tempbuf));
2547 2889
2548 if (!size_in_bytes) 2890 if (!size_in_bytes)
2549 break; 2891 break;
2550 newpos = lisp_buffer_stream_startpos (istr); 2892 newpos = lisp_buffer_stream_startpos (istr);
2551 Lstream_write (ostr, tempbuf, size_in_bytes); 2893 Lstream_write (ostr, tempbuf, size_in_bytes);
2604 2946
2605 #define BYTE_SJIS_KATAKANA_P(c) \ 2947 #define BYTE_SJIS_KATAKANA_P(c) \
2606 ((c) >= 0xA1 && (c) <= 0xDF) 2948 ((c) >= 0xA1 && (c) <= 0xDF)
2607 2949
2608 static int 2950 static int
2609 detect_coding_sjis (struct detection_state *st, CONST unsigned char *src, 2951 detect_coding_sjis (struct detection_state *st, const unsigned char *src,
2610 unsigned int n) 2952 unsigned int n)
2611 { 2953 {
2612 int c; 2954 int c;
2613 2955
2614 while (n--) 2956 while (n--)
2629 } 2971 }
2630 2972
2631 /* Convert Shift-JIS data to internal format. */ 2973 /* Convert Shift-JIS data to internal format. */
2632 2974
2633 static void 2975 static void
2634 decode_coding_sjis (Lstream *decoding, CONST unsigned char *src, 2976 decode_coding_sjis (Lstream *decoding, const unsigned char *src,
2635 unsigned_char_dynarr *dst, unsigned int n) 2977 unsigned_char_dynarr *dst, unsigned int n)
2636 { 2978 {
2637 unsigned char c; 2979 unsigned char c;
2638 struct decoding_stream *str = DECODING_STREAM_DATA (decoding); 2980 struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
2639 unsigned int flags = str->flags; 2981 unsigned int flags = str->flags;
2686 } 3028 }
2687 3029
2688 /* Convert internally-formatted data to Shift-JIS. */ 3030 /* Convert internally-formatted data to Shift-JIS. */
2689 3031
2690 static void 3032 static void
2691 encode_coding_sjis (Lstream *encoding, CONST unsigned char *src, 3033 encode_coding_sjis (Lstream *encoding, const unsigned char *src,
2692 unsigned_char_dynarr *dst, unsigned int n) 3034 unsigned_char_dynarr *dst, unsigned int n)
2693 { 3035 {
2694 unsigned char c; 3036 unsigned char c;
2695 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); 3037 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
2696 unsigned int flags = str->flags; 3038 unsigned int flags = str->flags;
2890 b2 = I % BIG5_SAME_ROW; \ 3232 b2 = I % BIG5_SAME_ROW; \
2891 b2 += b2 < 0x3F ? 0x40 : 0x62; \ 3233 b2 += b2 < 0x3F ? 0x40 : 0x62; \
2892 } while (0) 3234 } while (0)
2893 3235
2894 static int 3236 static int
2895 detect_coding_big5 (struct detection_state *st, CONST unsigned char *src, 3237 detect_coding_big5 (struct detection_state *st, const unsigned char *src,
2896 unsigned int n) 3238 unsigned int n)
2897 { 3239 {
2898 int c; 3240 int c;
2899 3241
2900 while (n--) 3242 while (n--)
2916 } 3258 }
2917 3259
2918 /* Convert Big5 data to internal format. */ 3260 /* Convert Big5 data to internal format. */
2919 3261
2920 static void 3262 static void
2921 decode_coding_big5 (Lstream *decoding, CONST unsigned char *src, 3263 decode_coding_big5 (Lstream *decoding, const unsigned char *src,
2922 unsigned_char_dynarr *dst, unsigned int n) 3264 unsigned_char_dynarr *dst, unsigned int n)
2923 { 3265 {
2924 unsigned char c; 3266 unsigned char c;
2925 struct decoding_stream *str = DECODING_STREAM_DATA (decoding); 3267 struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
2926 unsigned int flags = str->flags; 3268 unsigned int flags = str->flags;
2966 } 3308 }
2967 3309
2968 /* Convert internally-formatted data to Big5. */ 3310 /* Convert internally-formatted data to Big5. */
2969 3311
2970 static void 3312 static void
2971 encode_coding_big5 (Lstream *encoding, CONST unsigned char *src, 3313 encode_coding_big5 (Lstream *encoding, const unsigned char *src,
2972 unsigned_char_dynarr *dst, unsigned int n) 3314 unsigned_char_dynarr *dst, unsigned int n)
2973 { 3315 {
2974 unsigned char c; 3316 unsigned char c;
2975 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); 3317 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
2976 unsigned int flags = str->flags; 3318 unsigned int flags = str->flags;
3083 /* */ 3425 /* */
3084 /* UCS-4 character codes are implemented as nonnegative integers. */ 3426 /* UCS-4 character codes are implemented as nonnegative integers. */
3085 /* */ 3427 /* */
3086 /************************************************************************/ 3428 /************************************************************************/
3087 3429
3088 Lisp_Object ucs_to_mule_table[65536];
3089 Lisp_Object mule_to_ucs_table;
3090 3430
3091 DEFUN ("set-ucs-char", Fset_ucs_char, 2, 2, 0, /* 3431 DEFUN ("set-ucs-char", Fset_ucs_char, 2, 2, 0, /*
3092 Map UCS-4 code CODE to Mule character CHARACTER. 3432 Map UCS-4 code CODE to Mule character CHARACTER.
3093 3433
3094 Return T on success, NIL on failure. 3434 Return T on success, NIL on failure.
3099 3439
3100 CHECK_CHAR (character); 3440 CHECK_CHAR (character);
3101 CHECK_INT (code); 3441 CHECK_INT (code);
3102 c = XINT (code); 3442 c = XINT (code);
3103 3443
3104 if (c < sizeof (ucs_to_mule_table)) 3444 if (c < sizeof (fcd->ucs_to_mule_table))
3105 { 3445 {
3106 ucs_to_mule_table[c] = character; 3446 fcd->ucs_to_mule_table[c] = character;
3107 return Qt; 3447 return Qt;
3108 } 3448 }
3109 else 3449 else
3110 return Qnil; 3450 return Qnil;
3111 } 3451 }
3112 3452
3113 static Lisp_Object 3453 static Lisp_Object
3114 ucs_to_char (unsigned long code) 3454 ucs_to_char (unsigned long code)
3115 { 3455 {
3116 if (code < sizeof (ucs_to_mule_table)) 3456 if (code < sizeof (fcd->ucs_to_mule_table))
3117 { 3457 {
3118 return ucs_to_mule_table[code]; 3458 return fcd->ucs_to_mule_table[code];
3119 } 3459 }
3120 else if ((0xe00000 <= code) && (code <= 0xe00000 + 94 * 94 * 14)) 3460 else if ((0xe00000 <= code) && (code <= 0xe00000 + 94 * 94 * 14))
3121 { 3461 {
3122 unsigned int c; 3462 unsigned int c;
3123 3463
3162 { 3502 {
3163 return Fget_char_table (character, mule_to_ucs_table); 3503 return Fget_char_table (character, mule_to_ucs_table);
3164 } 3504 }
3165 3505
3166 /* Decode a UCS-4 character into a buffer. If the lookup fails, use 3506 /* Decode a UCS-4 character into a buffer. If the lookup fails, use
3167 JIS X 0208 double-width `=' instead. 3507 <GETA MARK> (U+3013) of JIS X 0208, which means correct character
3508 is not found, instead.
3168 #### do something more appropriate (use blob?) 3509 #### do something more appropriate (use blob?)
3169 Danger, Will Robinson! Data loss. Should we signal user? */ 3510 Danger, Will Robinson! Data loss. Should we signal user? */
3170 static void 3511 static void
3171 decode_ucs4 (unsigned long ch, unsigned_char_dynarr *dst) 3512 decode_ucs4 (unsigned long ch, unsigned_char_dynarr *dst)
3172 { 3513 {
3234 Dynarr_add (dst, (code >> 8) & 255); 3575 Dynarr_add (dst, (code >> 8) & 255);
3235 Dynarr_add (dst, code & 255); 3576 Dynarr_add (dst, code & 255);
3236 } 3577 }
3237 3578
3238 static int 3579 static int
3239 detect_coding_ucs4 (struct detection_state *st, CONST unsigned char *src, 3580 detect_coding_ucs4 (struct detection_state *st, const unsigned char *src,
3240 unsigned int n) 3581 unsigned int n)
3241 { 3582 {
3242 while (n--) 3583 while (n--)
3243 { 3584 {
3244 int c = *src++; 3585 int c = *src++;
3259 } 3600 }
3260 return CODING_CATEGORY_UCS4_MASK; 3601 return CODING_CATEGORY_UCS4_MASK;
3261 } 3602 }
3262 3603
3263 static void 3604 static void
3264 decode_coding_ucs4 (Lstream *decoding, CONST unsigned char *src, 3605 decode_coding_ucs4 (Lstream *decoding, const unsigned char *src,
3265 unsigned_char_dynarr *dst, unsigned int n) 3606 unsigned_char_dynarr *dst, unsigned int n)
3266 { 3607 {
3267 struct decoding_stream *str = DECODING_STREAM_DATA (decoding); 3608 struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
3268 unsigned int flags = str->flags; 3609 unsigned int flags = str->flags;
3269 unsigned int ch = str->ch; 3610 unsigned int ch = str->ch;
3611 unsigned char counter = str->counter;
3270 3612
3271 while (n--) 3613 while (n--)
3272 { 3614 {
3273 unsigned char c = *src++; 3615 unsigned char c = *src++;
3274 switch (flags) 3616 switch (counter)
3275 { 3617 {
3276 case 0: 3618 case 0:
3277 ch = c; 3619 ch = c;
3278 flags = 3; 3620 counter = 3;
3279 break; 3621 break;
3280 case 1: 3622 case 1:
3281 decode_ucs4 ( ( ch << 8 ) | c, dst); 3623 decode_ucs4 ( ( ch << 8 ) | c, dst);
3282 ch = 0; 3624 ch = 0;
3283 flags = 0; 3625 counter = 0;
3284 break; 3626 break;
3285 default: 3627 default:
3286 ch = ( ch << 8 ) | c; 3628 ch = ( ch << 8 ) | c;
3287 flags--; 3629 counter--;
3288 } 3630 }
3289 } 3631 }
3290 if (flags & CODING_STATE_END) 3632 if (counter & CODING_STATE_END)
3291 DECODE_OUTPUT_PARTIAL_CHAR (ch); 3633 DECODE_OUTPUT_PARTIAL_CHAR (ch);
3292 3634
3293 str->flags = flags; 3635 str->flags = flags;
3294 str->ch = ch; 3636 str->ch = ch;
3637 str->counter = counter;
3295 } 3638 }
3296 3639
3297 static void 3640 static void
3298 encode_coding_ucs4 (Lstream *encoding, CONST unsigned char *src, 3641 encode_coding_ucs4 (Lstream *encoding, const unsigned char *src,
3299 unsigned_char_dynarr *dst, unsigned int n) 3642 unsigned_char_dynarr *dst, unsigned int n)
3300 { 3643 {
3301 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); 3644 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
3302 unsigned int flags = str->flags; 3645 unsigned int flags = str->flags;
3303 unsigned int ch = str->ch; 3646 unsigned int ch = str->ch;
3306 3649
3307 #ifdef ENABLE_COMPOSITE_CHARS 3650 #ifdef ENABLE_COMPOSITE_CHARS
3308 /* flags for handling composite chars. We do a little switcharoo 3651 /* flags for handling composite chars. We do a little switcharoo
3309 on the source while we're outputting the composite char. */ 3652 on the source while we're outputting the composite char. */
3310 unsigned int saved_n = 0; 3653 unsigned int saved_n = 0;
3311 CONST unsigned char *saved_src = NULL; 3654 const unsigned char *saved_src = NULL;
3312 int in_composite = 0; 3655 int in_composite = 0;
3313 3656
3314 back_to_square_n: 3657 back_to_square_n:
3315 #endif 3658 #endif
3316 3659
3432 /************************************************************************/ 3775 /************************************************************************/
3433 /* UTF-8 methods */ 3776 /* UTF-8 methods */
3434 /************************************************************************/ 3777 /************************************************************************/
3435 3778
3436 static int 3779 static int
3437 detect_coding_utf8 (struct detection_state *st, CONST unsigned char *src, 3780 detect_coding_utf8 (struct detection_state *st, const unsigned char *src,
3438 unsigned int n) 3781 unsigned int n)
3439 { 3782 {
3440 while (n--) 3783 while (n--)
3441 { 3784 {
3442 unsigned char c = *src++; 3785 unsigned char c = *src++;
3467 } 3810 }
3468 return CODING_CATEGORY_UTF8_MASK; 3811 return CODING_CATEGORY_UTF8_MASK;
3469 } 3812 }
3470 3813
3471 static void 3814 static void
3472 decode_coding_utf8 (Lstream *decoding, CONST unsigned char *src, 3815 decode_coding_utf8 (Lstream *decoding, const unsigned char *src,
3473 unsigned_char_dynarr *dst, unsigned int n) 3816 unsigned_char_dynarr *dst, unsigned int n)
3474 { 3817 {
3475 struct decoding_stream *str = DECODING_STREAM_DATA (decoding); 3818 struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
3476 unsigned int flags = str->flags; 3819 unsigned int flags = str->flags;
3477 unsigned int ch = str->ch; 3820 unsigned int ch = str->ch;
3478 eol_type_t eol_type = str->eol_type; 3821 eol_type_t eol_type = str->eol_type;
3822 unsigned char counter = str->counter;
3479 3823
3480 while (n--) 3824 while (n--)
3481 { 3825 {
3482 unsigned char c = *src++; 3826 unsigned char c = *src++;
3483 switch (flags) 3827 switch (counter)
3484 { 3828 {
3485 case 0: 3829 case 0:
3486 if ( c >= 0xfc ) 3830 if ( c >= 0xfc )
3487 { 3831 {
3488 ch = c & 0x01; 3832 ch = c & 0x01;
3489 flags = 5; 3833 counter = 5;
3490 } 3834 }
3491 else if ( c >= 0xf8 ) 3835 else if ( c >= 0xf8 )
3492 { 3836 {
3493 ch = c & 0x03; 3837 ch = c & 0x03;
3494 flags = 4; 3838 counter = 4;
3495 } 3839 }
3496 else if ( c >= 0xf0 ) 3840 else if ( c >= 0xf0 )
3497 { 3841 {
3498 ch = c & 0x07; 3842 ch = c & 0x07;
3499 flags = 3; 3843 counter = 3;
3500 } 3844 }
3501 else if ( c >= 0xe0 ) 3845 else if ( c >= 0xe0 )
3502 { 3846 {
3503 ch = c & 0x0f; 3847 ch = c & 0x0f;
3504 flags = 2; 3848 counter = 2;
3505 } 3849 }
3506 else if ( c >= 0xc0 ) 3850 else if ( c >= 0xc0 )
3507 { 3851 {
3508 ch = c & 0x1f; 3852 ch = c & 0x1f;
3509 flags = 1; 3853 counter = 1;
3510 } 3854 }
3511 else 3855 else
3512 { 3856 {
3513 DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst); 3857 DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst);
3514 decode_ucs4 (c, dst); 3858 decode_ucs4 (c, dst);
3516 break; 3860 break;
3517 case 1: 3861 case 1:
3518 ch = ( ch << 6 ) | ( c & 0x3f ); 3862 ch = ( ch << 6 ) | ( c & 0x3f );
3519 decode_ucs4 (ch, dst); 3863 decode_ucs4 (ch, dst);
3520 ch = 0; 3864 ch = 0;
3521 flags = 0; 3865 counter = 0;
3522 break; 3866 break;
3523 default: 3867 default:
3524 ch = ( ch << 6 ) | ( c & 0x3f ); 3868 ch = ( ch << 6 ) | ( c & 0x3f );
3525 flags--; 3869 counter--;
3526 } 3870 }
3527 label_continue_loop:; 3871 label_continue_loop:;
3528 } 3872 }
3529 3873
3530 if (flags & CODING_STATE_END) 3874 if (flags & CODING_STATE_END)
3531 DECODE_OUTPUT_PARTIAL_CHAR (ch); 3875 DECODE_OUTPUT_PARTIAL_CHAR (ch);
3532 3876
3533 str->flags = flags; 3877 str->flags = flags;
3534 str->ch = ch; 3878 str->ch = ch;
3879 str->counter = counter;
3535 } 3880 }
3536 3881
3537 static void 3882 static void
3538 encode_utf8 (Lisp_Object charset, 3883 encode_utf8 (Lisp_Object charset,
3539 unsigned char h, unsigned char l, unsigned_char_dynarr *dst) 3884 unsigned char h, unsigned char l, unsigned_char_dynarr *dst)
3579 Dynarr_add (dst, (code & 0x3f) | 0x80); 3924 Dynarr_add (dst, (code & 0x3f) | 0x80);
3580 } 3925 }
3581 } 3926 }
3582 3927
3583 static void 3928 static void
3584 encode_coding_utf8 (Lstream *encoding, CONST unsigned char *src, 3929 encode_coding_utf8 (Lstream *encoding, const unsigned char *src,
3585 unsigned_char_dynarr *dst, unsigned int n) 3930 unsigned_char_dynarr *dst, unsigned int n)
3586 { 3931 {
3587 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); 3932 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
3588 unsigned int flags = str->flags; 3933 unsigned int flags = str->flags;
3589 unsigned int ch = str->ch; 3934 unsigned int ch = str->ch;
3593 3938
3594 #ifdef ENABLE_COMPOSITE_CHARS 3939 #ifdef ENABLE_COMPOSITE_CHARS
3595 /* flags for handling composite chars. We do a little switcharoo 3940 /* flags for handling composite chars. We do a little switcharoo
3596 on the source while we're outputting the composite char. */ 3941 on the source while we're outputting the composite char. */
3597 unsigned int saved_n = 0; 3942 unsigned int saved_n = 0;
3598 CONST unsigned char *saved_src = NULL; 3943 const unsigned char *saved_src = NULL;
3599 int in_composite = 0; 3944 int in_composite = 0;
3600 3945
3601 back_to_square_n: 3946 back_to_square_n:
3602 #endif /* ENABLE_COMPOSITE_CHARS */ 3947 #endif /* ENABLE_COMPOSITE_CHARS */
3603 3948
3604 while (n--) 3949 while (n--)
3605 { 3950 {
3606 unsigned char c = *src++; 3951 unsigned char c = *src++;
3607 3952
3608 if (BYTE_ASCII_P (c)) 3953 if (BYTE_ASCII_P (c))
4297 iso->switched_dir_and_no_valid_charset_yet = 0; 4642 iso->switched_dir_and_no_valid_charset_yet = 0;
4298 return 1; 4643 return 1;
4299 } 4644 }
4300 4645
4301 static int 4646 static int
4302 detect_coding_iso2022 (struct detection_state *st, CONST unsigned char *src, 4647 detect_coding_iso2022 (struct detection_state *st, const unsigned char *src,
4303 unsigned int n) 4648 unsigned int n)
4304 { 4649 {
4305 int mask; 4650 int mask;
4306 4651
4307 /* #### There are serious deficiencies in the recognition mechanism 4652 /* #### There are serious deficiencies in the recognition mechanism
4488 } 4833 }
4489 4834
4490 /* Convert ISO2022-format data to internal format. */ 4835 /* Convert ISO2022-format data to internal format. */
4491 4836
4492 static void 4837 static void
4493 decode_coding_iso2022 (Lstream *decoding, CONST unsigned char *src, 4838 decode_coding_iso2022 (Lstream *decoding, const unsigned char *src,
4494 unsigned_char_dynarr *dst, unsigned int n) 4839 unsigned_char_dynarr *dst, unsigned int n)
4495 { 4840 {
4496 struct decoding_stream *str = DECODING_STREAM_DATA (decoding); 4841 struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
4497 unsigned int flags = str->flags; 4842 unsigned int flags = str->flags;
4498 unsigned int ch = str->ch; 4843 unsigned int ch = str->ch;
4729 5074
4730 static void 5075 static void
4731 iso2022_designate (Lisp_Object charset, unsigned char reg, 5076 iso2022_designate (Lisp_Object charset, unsigned char reg,
4732 struct encoding_stream *str, unsigned_char_dynarr *dst) 5077 struct encoding_stream *str, unsigned_char_dynarr *dst)
4733 { 5078 {
4734 static CONST char inter94[] = "()*+"; 5079 static const char inter94[] = "()*+";
4735 static CONST char inter96[] = ",-./"; 5080 static const char inter96[] = ",-./";
4736 unsigned int type; 5081 unsigned int type;
4737 unsigned char final; 5082 unsigned char final;
4738 Lisp_Object old_charset = str->iso2022.charset[reg]; 5083 Lisp_Object old_charset = str->iso2022.charset[reg];
4739 5084
4740 str->iso2022.charset[reg] = charset; 5085 str->iso2022.charset[reg] = charset;
4814 } 5159 }
4815 5160
4816 /* Convert internally-formatted data to ISO2022 format. */ 5161 /* Convert internally-formatted data to ISO2022 format. */
4817 5162
4818 static void 5163 static void
4819 encode_coding_iso2022 (Lstream *encoding, CONST unsigned char *src, 5164 encode_coding_iso2022 (Lstream *encoding, const unsigned char *src,
4820 unsigned_char_dynarr *dst, unsigned int n) 5165 unsigned_char_dynarr *dst, unsigned int n)
4821 { 5166 {
4822 unsigned char charmask, c; 5167 unsigned char charmask, c;
4823 unsigned char char_boundary; 5168 unsigned char char_boundary;
4824 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); 5169 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
4832 5177
4833 #ifdef ENABLE_COMPOSITE_CHARS 5178 #ifdef ENABLE_COMPOSITE_CHARS
4834 /* flags for handling composite chars. We do a little switcharoo 5179 /* flags for handling composite chars. We do a little switcharoo
4835 on the source while we're outputting the composite char. */ 5180 on the source while we're outputting the composite char. */
4836 unsigned int saved_n = 0; 5181 unsigned int saved_n = 0;
4837 CONST unsigned char *saved_src = NULL; 5182 const unsigned char *saved_src = NULL;
4838 int in_composite = 0; 5183 int in_composite = 0;
4839 #endif /* ENABLE_COMPOSITE_CHARS */ 5184 #endif /* ENABLE_COMPOSITE_CHARS */
4840 5185
4841 char_boundary = str->iso2022.current_char_boundary; 5186 char_boundary = str->iso2022.current_char_boundary;
4842 charset = str->iso2022.current_charset; 5187 charset = str->iso2022.current_charset;
5123 5468
5124 /* This is used when reading in "binary" files -- i.e. files that may 5469 /* This is used when reading in "binary" files -- i.e. files that may
5125 contain all 256 possible byte values and that are not to be 5470 contain all 256 possible byte values and that are not to be
5126 interpreted as being in any particular decoding. */ 5471 interpreted as being in any particular decoding. */
5127 static void 5472 static void
5128 decode_coding_no_conversion (Lstream *decoding, CONST unsigned char *src, 5473 decode_coding_no_conversion (Lstream *decoding, const unsigned char *src,
5129 unsigned_char_dynarr *dst, unsigned int n) 5474 unsigned_char_dynarr *dst, unsigned int n)
5130 { 5475 {
5131 unsigned char c; 5476 unsigned char c;
5132 struct decoding_stream *str = DECODING_STREAM_DATA (decoding); 5477 struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
5133 unsigned int flags = str->flags; 5478 unsigned int flags = str->flags;
5148 str->flags = flags; 5493 str->flags = flags;
5149 str->ch = ch; 5494 str->ch = ch;
5150 } 5495 }
5151 5496
5152 static void 5497 static void
5153 encode_coding_no_conversion (Lstream *encoding, CONST unsigned char *src, 5498 encode_coding_no_conversion (Lstream *encoding, const unsigned char *src,
5154 unsigned_char_dynarr *dst, unsigned int n) 5499 unsigned_char_dynarr *dst, unsigned int n)
5155 { 5500 {
5156 unsigned char c; 5501 unsigned char c;
5157 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); 5502 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
5158 unsigned int flags = str->flags; 5503 unsigned int flags = str->flags;
5202 str->flags = flags; 5547 str->flags = flags;
5203 str->ch = ch; 5548 str->ch = ch;
5204 } 5549 }
5205 5550
5206 5551
5207 /************************************************************************/ 5552
5208 /* Simple internal/external functions */
5209 /************************************************************************/
5210
5211 static Extbyte_dynarr *conversion_out_dynarr;
5212 static Bufbyte_dynarr *conversion_in_dynarr;
5213
5214 /* Determine coding system from coding format */
5215
5216 /* #### not correct for all values of `fmt'! */
5217 static Lisp_Object
5218 external_data_format_to_coding_system (enum external_data_format fmt)
5219 {
5220 switch (fmt)
5221 {
5222 case FORMAT_FILENAME:
5223 case FORMAT_TERMINAL:
5224 if (EQ (Vfile_name_coding_system, Qnil) ||
5225 EQ (Vfile_name_coding_system, Qbinary))
5226 return Qnil;
5227 else
5228 return Fget_coding_system (Vfile_name_coding_system);
5229 #ifdef MULE
5230 case FORMAT_CTEXT:
5231 return Fget_coding_system (Qctext);
5232 #endif
5233 default:
5234 return Qnil;
5235 }
5236 }
5237
5238 Extbyte *
5239 convert_to_external_format (CONST Bufbyte *ptr,
5240 Bytecount len,
5241 Extcount *len_out,
5242 enum external_data_format fmt)
5243 {
5244 Lisp_Object coding_system = external_data_format_to_coding_system (fmt);
5245
5246 if (!conversion_out_dynarr)
5247 conversion_out_dynarr = Dynarr_new (Extbyte);
5248 else
5249 Dynarr_reset (conversion_out_dynarr);
5250
5251 if (NILP (coding_system))
5252 {
5253 CONST Bufbyte *end = ptr + len;
5254
5255 for (; ptr < end;)
5256 {
5257 Bufbyte c =
5258 (BYTE_ASCII_P (*ptr)) ? *ptr :
5259 (*ptr == LEADING_BYTE_CONTROL_1) ? (*(ptr+1) - 0x20) :
5260 (*ptr == LEADING_BYTE_LATIN_ISO8859_1) ? (*(ptr+1)) :
5261 '~';
5262
5263 Dynarr_add (conversion_out_dynarr, (Extbyte) c);
5264 INC_CHARPTR (ptr);
5265 }
5266
5267 #ifdef ERROR_CHECK_BUFPOS
5268 assert (ptr == end);
5269 #endif
5270 }
5271 else
5272 {
5273 Lisp_Object instream, outstream, da_outstream;
5274 Lstream *istr, *ostr;
5275 struct gcpro gcpro1, gcpro2, gcpro3;
5276 char tempbuf[1024]; /* some random amount */
5277
5278 instream = make_fixed_buffer_input_stream ((unsigned char *) ptr, len);
5279 da_outstream = make_dynarr_output_stream
5280 ((unsigned_char_dynarr *) conversion_out_dynarr);
5281 outstream =
5282 make_encoding_output_stream (XLSTREAM (da_outstream), coding_system);
5283 istr = XLSTREAM (instream);
5284 ostr = XLSTREAM (outstream);
5285 GCPRO3 (instream, outstream, da_outstream);
5286 while (1)
5287 {
5288 int size_in_bytes = Lstream_read (istr, tempbuf, sizeof (tempbuf));
5289 if (!size_in_bytes)
5290 break;
5291 Lstream_write (ostr, tempbuf, size_in_bytes);
5292 }
5293 Lstream_close (istr);
5294 Lstream_close (ostr);
5295 UNGCPRO;
5296 Lstream_delete (istr);
5297 Lstream_delete (ostr);
5298 Lstream_delete (XLSTREAM (da_outstream));
5299 }
5300
5301 *len_out = Dynarr_length (conversion_out_dynarr);
5302 Dynarr_add (conversion_out_dynarr, 0); /* remember to zero-terminate! */
5303 return Dynarr_atp (conversion_out_dynarr, 0);
5304 }
5305
5306 Bufbyte *
5307 convert_from_external_format (CONST Extbyte *ptr,
5308 Extcount len,
5309 Bytecount *len_out,
5310 enum external_data_format fmt)
5311 {
5312 Lisp_Object coding_system = external_data_format_to_coding_system (fmt);
5313
5314 if (!conversion_in_dynarr)
5315 conversion_in_dynarr = Dynarr_new (Bufbyte);
5316 else
5317 Dynarr_reset (conversion_in_dynarr);
5318
5319 if (NILP (coding_system))
5320 {
5321 CONST Extbyte *end = ptr + len;
5322 for (; ptr < end; ptr++)
5323 {
5324 Extbyte c = *ptr;
5325 DECODE_ADD_BINARY_CHAR (c, conversion_in_dynarr);
5326 }
5327 }
5328 else
5329 {
5330 Lisp_Object instream, outstream, da_outstream;
5331 Lstream *istr, *ostr;
5332 struct gcpro gcpro1, gcpro2, gcpro3;
5333 char tempbuf[1024]; /* some random amount */
5334
5335 instream = make_fixed_buffer_input_stream ((unsigned char *) ptr, len);
5336 da_outstream = make_dynarr_output_stream
5337 ((unsigned_char_dynarr *) conversion_in_dynarr);
5338 outstream =
5339 make_decoding_output_stream (XLSTREAM (da_outstream), coding_system);
5340 istr = XLSTREAM (instream);
5341 ostr = XLSTREAM (outstream);
5342 GCPRO3 (instream, outstream, da_outstream);
5343 while (1)
5344 {
5345 int size_in_bytes = Lstream_read (istr, tempbuf, sizeof (tempbuf));
5346 if (!size_in_bytes)
5347 break;
5348 Lstream_write (ostr, tempbuf, size_in_bytes);
5349 }
5350 Lstream_close (istr);
5351 Lstream_close (ostr);
5352 UNGCPRO;
5353 Lstream_delete (istr);
5354 Lstream_delete (ostr);
5355 Lstream_delete (XLSTREAM (da_outstream));
5356 }
5357
5358 *len_out = Dynarr_length (conversion_in_dynarr);
5359 Dynarr_add (conversion_in_dynarr, 0); /* remember to zero-terminate! */
5360 return Dynarr_atp (conversion_in_dynarr, 0);
5361 }
5362
5363
5364 /************************************************************************/ 5553 /************************************************************************/
5365 /* Initialization */ 5554 /* Initialization */
5366 /************************************************************************/ 5555 /************************************************************************/
5367 5556
5368 void 5557 void
5369 syms_of_mule_coding (void) 5558 syms_of_file_coding (void)
5370 { 5559 {
5371 defsymbol (&Qbuffer_file_coding_system, "buffer-file-coding-system");
5372 deferror (&Qcoding_system_error, "coding-system-error", 5560 deferror (&Qcoding_system_error, "coding-system-error",
5373 "Coding-system error", Qio_error); 5561 "Coding-system error", Qio_error);
5374 5562
5375 DEFSUBR (Fcoding_system_p); 5563 DEFSUBR (Fcoding_system_p);
5376 DEFSUBR (Ffind_coding_system); 5564 DEFSUBR (Ffind_coding_system);
5377 DEFSUBR (Fget_coding_system); 5565 DEFSUBR (Fget_coding_system);
5378 DEFSUBR (Fcoding_system_list); 5566 DEFSUBR (Fcoding_system_list);
5379 DEFSUBR (Fcoding_system_name); 5567 DEFSUBR (Fcoding_system_name);
5380 DEFSUBR (Fmake_coding_system); 5568 DEFSUBR (Fmake_coding_system);
5381 DEFSUBR (Fcopy_coding_system); 5569 DEFSUBR (Fcopy_coding_system);
5570 DEFSUBR (Fcoding_system_canonical_name_p);
5571 DEFSUBR (Fcoding_system_alias_p);
5572 DEFSUBR (Fcoding_system_aliasee);
5573 DEFSUBR (Fdefine_coding_system_alias);
5382 DEFSUBR (Fsubsidiary_coding_system); 5574 DEFSUBR (Fsubsidiary_coding_system);
5383 5575
5384 DEFSUBR (Fcoding_system_type); 5576 DEFSUBR (Fcoding_system_type);
5385 DEFSUBR (Fcoding_system_doc_string); 5577 DEFSUBR (Fcoding_system_doc_string);
5386 #ifdef MULE 5578 #ifdef MULE
5405 DEFSUBR (Fset_ucs_char); 5597 DEFSUBR (Fset_ucs_char);
5406 DEFSUBR (Fucs_char); 5598 DEFSUBR (Fucs_char);
5407 DEFSUBR (Fset_char_ucs); 5599 DEFSUBR (Fset_char_ucs);
5408 DEFSUBR (Fchar_ucs); 5600 DEFSUBR (Fchar_ucs);
5409 #endif /* MULE */ 5601 #endif /* MULE */
5410 defsymbol (&Qcoding_system_p, "coding-system-p"); 5602 defsymbol (&Qcoding_systemp, "coding-system-p");
5411 defsymbol (&Qno_conversion, "no-conversion"); 5603 defsymbol (&Qno_conversion, "no-conversion");
5604 defsymbol (&Qraw_text, "raw-text");
5412 #ifdef MULE 5605 #ifdef MULE
5413 defsymbol (&Qbig5, "big5"); 5606 defsymbol (&Qbig5, "big5");
5414 defsymbol (&Qshift_jis, "shift-jis"); 5607 defsymbol (&Qshift_jis, "shift-jis");
5415 defsymbol (&Qucs4, "ucs-4"); 5608 defsymbol (&Qucs4, "ucs-4");
5416 defsymbol (&Qutf8, "utf-8"); 5609 defsymbol (&Qutf8, "utf-8");
5450 #endif /* MULE */ 5643 #endif /* MULE */
5451 defsymbol (&Qencode, "encode"); 5644 defsymbol (&Qencode, "encode");
5452 defsymbol (&Qdecode, "decode"); 5645 defsymbol (&Qdecode, "decode");
5453 5646
5454 #ifdef MULE 5647 #ifdef MULE
5455 defsymbol (&Qctext, "ctext");
5456 defsymbol (&coding_category_symbol[CODING_CATEGORY_SHIFT_JIS], 5648 defsymbol (&coding_category_symbol[CODING_CATEGORY_SHIFT_JIS],
5457 "shift-jis"); 5649 "shift-jis");
5458 defsymbol (&coding_category_symbol[CODING_CATEGORY_BIG5], 5650 defsymbol (&coding_category_symbol[CODING_CATEGORY_BIG5],
5459 "big5"); 5651 "big5");
5460 defsymbol (&coding_category_symbol[CODING_CATEGORY_UCS4], 5652 defsymbol (&coding_category_symbol[CODING_CATEGORY_UCS4],
5475 defsymbol (&coding_category_symbol[CODING_CATEGORY_NO_CONVERSION], 5667 defsymbol (&coding_category_symbol[CODING_CATEGORY_NO_CONVERSION],
5476 "no-conversion"); 5668 "no-conversion");
5477 } 5669 }
5478 5670
5479 void 5671 void
5480 lstream_type_create_mule_coding (void) 5672 lstream_type_create_file_coding (void)
5481 { 5673 {
5482 LSTREAM_HAS_METHOD (decoding, reader); 5674 LSTREAM_HAS_METHOD (decoding, reader);
5483 LSTREAM_HAS_METHOD (decoding, writer); 5675 LSTREAM_HAS_METHOD (decoding, writer);
5484 LSTREAM_HAS_METHOD (decoding, rewinder); 5676 LSTREAM_HAS_METHOD (decoding, rewinder);
5485 LSTREAM_HAS_METHOD (decoding, seekable_p); 5677 LSTREAM_HAS_METHOD (decoding, seekable_p);
5495 LSTREAM_HAS_METHOD (encoding, closer); 5687 LSTREAM_HAS_METHOD (encoding, closer);
5496 LSTREAM_HAS_METHOD (encoding, marker); 5688 LSTREAM_HAS_METHOD (encoding, marker);
5497 } 5689 }
5498 5690
5499 void 5691 void
5500 vars_of_mule_coding (void) 5692 vars_of_file_coding (void)
5501 { 5693 {
5502 int i; 5694 int i;
5695
5696 fcd = xnew (struct file_coding_dump);
5697 dumpstruct (&fcd, &fcd_description);
5503 5698
5504 /* Initialize to something reasonable ... */ 5699 /* Initialize to something reasonable ... */
5505 for (i = 0; i <= CODING_CATEGORY_LAST; i++) 5700 for (i = 0; i <= CODING_CATEGORY_LAST; i++)
5506 { 5701 {
5507 coding_category_system[i] = Qnil; 5702 fcd->coding_category_system[i] = Qnil;
5508 coding_category_by_priority[i] = i; 5703 fcd->coding_category_by_priority[i] = i;
5509 } 5704 }
5510 5705
5511 Fprovide (intern ("file-coding")); 5706 Fprovide (intern ("file-coding"));
5512 5707
5513 DEFVAR_LISP ("keyboard-coding-system", &Vkeyboard_coding_system /* 5708 DEFVAR_LISP ("keyboard-coding-system", &Vkeyboard_coding_system /*
5521 Not used under a windowing system. 5716 Not used under a windowing system.
5522 */ ); 5717 */ );
5523 Vterminal_coding_system = Qnil; 5718 Vterminal_coding_system = Qnil;
5524 5719
5525 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read /* 5720 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read /*
5526 Overriding coding system used when writing a file or process. 5721 Overriding coding system used when reading from a file or process.
5527 You should *bind* this, not set it. If this is non-nil, it specifies 5722 You should bind this variable with `let', but do not set it globally.
5528 the coding system that will be used when a file or process is read 5723 If this is non-nil, it specifies the coding system that will be used
5529 in, and overrides `buffer-file-coding-system-for-read', 5724 to decode input on read operations, such as from a file or process.
5725 It overrides `buffer-file-coding-system-for-read',
5530 `insert-file-contents-pre-hook', etc. Use those variables instead of 5726 `insert-file-contents-pre-hook', etc. Use those variables instead of
5531 this one for permanent changes to the environment. 5727 this one for permanent changes to the environment. */ );
5532 */ );
5533 Vcoding_system_for_read = Qnil; 5728 Vcoding_system_for_read = Qnil;
5534 5729
5535 DEFVAR_LISP ("coding-system-for-write", 5730 DEFVAR_LISP ("coding-system-for-write",
5536 &Vcoding_system_for_write /* 5731 &Vcoding_system_for_write /*
5537 Overriding coding system used when writing a file or process. 5732 Overriding coding system used when writing to a file or process.
5538 You should *bind* this, not set it. If this is non-nil, it specifies 5733 You should bind this variable with `let', but do not set it globally.
5539 the coding system that will be used when a file or process is wrote 5734 If this is non-nil, it specifies the coding system that will be used
5540 in, and overrides `buffer-file-coding-system', 5735 to encode output for write operations, such as to a file or process.
5541 `write-region-pre-hook', etc. Use those variables instead of this one 5736 It overrides `buffer-file-coding-system', `write-region-pre-hook', etc.
5542 for permanent changes to the environment. 5737 Use those variables instead of this one for permanent changes to the
5543 */ ); 5738 environment. */ );
5544 Vcoding_system_for_write = Qnil; 5739 Vcoding_system_for_write = Qnil;
5545 5740
5546 DEFVAR_LISP ("file-name-coding-system", &Vfile_name_coding_system /* 5741 DEFVAR_LISP ("file-name-coding-system", &Vfile_name_coding_system /*
5547 Coding system used to convert pathnames when accessing files. 5742 Coding system used to convert pathnames when accessing files.
5548 */ ); 5743 */ );
5557 */ ); 5752 */ );
5558 enable_multibyte_characters = 1; 5753 enable_multibyte_characters = 1;
5559 } 5754 }
5560 5755
5561 void 5756 void
5562 complex_vars_of_mule_coding (void) 5757 complex_vars_of_file_coding (void)
5563 { 5758 {
5564 staticpro (&Vcoding_system_hash_table); 5759 staticpro (&Vcoding_system_hash_table);
5565 Vcoding_system_hash_table = 5760 Vcoding_system_hash_table =
5566 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ); 5761 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
5567 5762
5568 the_codesys_prop_dynarr = Dynarr_new (codesys_prop); 5763 the_codesys_prop_dynarr = Dynarr_new (codesys_prop);
5764 dumpstruct (&the_codesys_prop_dynarr, &codesys_prop_dynarr_description);
5569 5765
5570 #define DEFINE_CODESYS_PROP(Prop_Type, Sym) do \ 5766 #define DEFINE_CODESYS_PROP(Prop_Type, Sym) do \
5571 { \ 5767 { \
5572 struct codesys_prop csp; \ 5768 struct codesys_prop csp; \
5573 csp.sym = (Sym); \ 5769 csp.sym = (Sym); \
5603 5799
5604 DEFINE_CODESYS_PROP (CODESYS_PROP_CCL, Qencode); 5800 DEFINE_CODESYS_PROP (CODESYS_PROP_CCL, Qencode);
5605 DEFINE_CODESYS_PROP (CODESYS_PROP_CCL, Qdecode); 5801 DEFINE_CODESYS_PROP (CODESYS_PROP_CCL, Qdecode);
5606 #endif /* MULE */ 5802 #endif /* MULE */
5607 /* Need to create this here or we're really screwed. */ 5803 /* Need to create this here or we're really screwed. */
5608 Fmake_coding_system (Qno_conversion, Qno_conversion, build_string ("No conversion"), 5804 Fmake_coding_system
5609 list2 (Qmnemonic, build_string ("Noconv"))); 5805 (Qraw_text, Qno_conversion,
5610 5806 build_string ("Raw text, which means it converts only line-break-codes."),
5611 Fcopy_coding_system (Fcoding_system_property (Qno_conversion, Qeol_lf), 5807 list2 (Qmnemonic, build_string ("Raw")));
5612 Qbinary); 5808
5809 Fmake_coding_system
5810 (Qbinary, Qno_conversion,
5811 build_string ("Binary, which means it does not convert anything."),
5812 list4 (Qeol_type, Qlf,
5813 Qmnemonic, build_string ("Binary")));
5814
5815 Fdefine_coding_system_alias (Qno_conversion, Qraw_text);
5816
5817 Fdefine_coding_system_alias (Qfile_name, Qbinary);
5818
5819 Fdefine_coding_system_alias (Qterminal, Qbinary);
5820 Fdefine_coding_system_alias (Qkeyboard, Qbinary);
5613 5821
5614 /* Need this for bootstrapping */ 5822 /* Need this for bootstrapping */
5615 coding_category_system[CODING_CATEGORY_NO_CONVERSION] = 5823 fcd->coding_category_system[CODING_CATEGORY_NO_CONVERSION] =
5616 Fget_coding_system (Qno_conversion); 5824 Fget_coding_system (Qraw_text);
5617 5825
5618 #ifdef MULE 5826 #ifdef MULE
5619 { 5827 {
5620 unsigned int i; 5828 unsigned int i;
5621 5829
5622 for (i = 0; i < 65536; i++) 5830 for (i = 0; i < 65536; i++)
5623 ucs_to_mule_table[i] = Qnil; 5831 fcd->ucs_to_mule_table[i] = Qnil;
5624 } 5832 }
5625 staticpro (&mule_to_ucs_table); 5833 staticpro (&mule_to_ucs_table);
5626 mule_to_ucs_table = Fmake_char_table(Qgeneric); 5834 mule_to_ucs_table = Fmake_char_table(Qgeneric);
5627 #endif /* MULE */ 5835 #endif /* MULE */
5628 } 5836 }