comparison src/file-coding.c @ 412:697ef44129c6 r21-2-14

Import from CVS: tag r21-2-14
author cvs
date Mon, 13 Aug 2007 11:20:41 +0200
parents b8cc9ab3f761
children da8ed4261e83
comparison
equal deleted inserted replaced
411:12e008d41344 412:697ef44129c6
23 23
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */ 24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
25 25
26 #include <config.h> 26 #include <config.h>
27 #include "lisp.h" 27 #include "lisp.h"
28
29 #include "buffer.h" 28 #include "buffer.h"
30 #include "elhash.h" 29 #include "elhash.h"
31 #include "insdel.h" 30 #include "insdel.h"
32 #include "lstream.h" 31 #include "lstream.h"
33 #include "opaque.h"
34 #ifdef MULE 32 #ifdef MULE
35 #include "mule-ccl.h" 33 #include "mule-ccl.h"
36 #include "chartab.h" 34 #include "chartab.h"
37 #endif 35 #endif
38 #include "file-coding.h" 36 #include "file-coding.h"
39 37
40 Lisp_Object Qcoding_system_error; 38 Lisp_Object Qbuffer_file_coding_system, Qcoding_system_error;
41 39
42 Lisp_Object Vkeyboard_coding_system; 40 Lisp_Object Vkeyboard_coding_system;
43 Lisp_Object Vterminal_coding_system; 41 Lisp_Object Vterminal_coding_system;
44 Lisp_Object Vcoding_system_for_read; 42 Lisp_Object Vcoding_system_for_read;
45 Lisp_Object Vcoding_system_for_write; 43 Lisp_Object Vcoding_system_for_write;
46 Lisp_Object Vfile_name_coding_system; 44 Lisp_Object Vfile_name_coding_system;
47 45
48 /* Table of symbols identifying each coding category. */ 46 /* Table of symbols identifying each coding category. */
49 Lisp_Object coding_category_symbol[CODING_CATEGORY_LAST + 1]; 47 Lisp_Object coding_category_symbol[CODING_CATEGORY_LAST + 1];
50 48
51 49 /* Coding system currently associated with each coding category. */
52 50 Lisp_Object coding_category_system[CODING_CATEGORY_LAST + 1];
53 struct file_coding_dump { 51
54 /* Coding system currently associated with each coding category. */ 52 /* Table of all coding categories in decreasing order of priority.
55 Lisp_Object coding_category_system[CODING_CATEGORY_LAST + 1]; 53 This describes a permutation of the possible coding categories. */
56 54 int coding_category_by_priority[CODING_CATEGORY_LAST + 1];
57 /* Table of all coding categories in decreasing order of priority. 55
58 This describes a permutation of the possible coding categories. */ 56 Lisp_Object Qcoding_system_p;
59 int coding_category_by_priority[CODING_CATEGORY_LAST + 1]; 57
60 58 Lisp_Object Qno_conversion, Qccl, Qiso2022;
61 #ifdef MULE
62 Lisp_Object ucs_to_mule_table[65536];
63 #endif
64 } *fcd;
65
66 static const struct lrecord_description fcd_description_1[] = {
67 { XD_LISP_OBJECT_ARRAY, offsetof (struct file_coding_dump, coding_category_system), CODING_CATEGORY_LAST + 1 },
68 #ifdef MULE
69 { XD_LISP_OBJECT_ARRAY, offsetof (struct file_coding_dump, ucs_to_mule_table), 65536 },
70 #endif
71 { XD_END }
72 };
73
74 static const struct struct_description fcd_description = {
75 sizeof (struct file_coding_dump),
76 fcd_description_1
77 };
78
79 Lisp_Object mule_to_ucs_table;
80
81 Lisp_Object Qcoding_systemp;
82
83 Lisp_Object Qraw_text, Qno_conversion, Qccl, Qiso2022;
84 /* Qinternal in general.c */ 59 /* Qinternal in general.c */
85 60
86 Lisp_Object Qmnemonic, Qeol_type; 61 Lisp_Object Qmnemonic, Qeol_type;
87 Lisp_Object Qcr, Qcrlf, Qlf; 62 Lisp_Object Qcr, Qcrlf, Qlf;
88 Lisp_Object Qeol_cr, Qeol_crlf, Qeol_lf; 63 Lisp_Object Qeol_cr, Qeol_crlf, Qeol_lf;
95 Lisp_Object Qcharset_g0, Qcharset_g1, Qcharset_g2, Qcharset_g3; 70 Lisp_Object Qcharset_g0, Qcharset_g1, Qcharset_g2, Qcharset_g3;
96 Lisp_Object Qforce_g0_on_output, Qforce_g1_on_output; 71 Lisp_Object Qforce_g0_on_output, Qforce_g1_on_output;
97 Lisp_Object Qforce_g2_on_output, Qforce_g3_on_output; 72 Lisp_Object Qforce_g2_on_output, Qforce_g3_on_output;
98 Lisp_Object Qno_iso6429; 73 Lisp_Object Qno_iso6429;
99 Lisp_Object Qinput_charset_conversion, Qoutput_charset_conversion; 74 Lisp_Object Qinput_charset_conversion, Qoutput_charset_conversion;
100 Lisp_Object Qescape_quoted; 75 Lisp_Object Qctext, Qescape_quoted;
101 Lisp_Object Qshort, Qno_ascii_eol, Qno_ascii_cntl, Qseven, Qlock_shift; 76 Lisp_Object Qshort, Qno_ascii_eol, Qno_ascii_cntl, Qseven, Qlock_shift;
102 #endif 77 #endif
103 Lisp_Object Qencode, Qdecode; 78 Lisp_Object Qencode, Qdecode;
104 79
105 Lisp_Object Vcoding_system_hash_table; 80 Lisp_Object Vcoding_system_hash_table;
174 #endif /* MULE */ 149 #endif /* MULE */
175 EXFUN (Fcopy_coding_system, 2); 150 EXFUN (Fcopy_coding_system, 2);
176 #ifdef MULE 151 #ifdef MULE
177 struct detection_state; 152 struct detection_state;
178 static int detect_coding_sjis (struct detection_state *st, 153 static int detect_coding_sjis (struct detection_state *st,
179 const unsigned char *src, 154 CONST unsigned char *src,
180 unsigned int n); 155 unsigned int n);
181 static void decode_coding_sjis (Lstream *decoding, 156 static void decode_coding_sjis (Lstream *decoding,
182 const unsigned char *src, 157 CONST unsigned char *src,
183 unsigned_char_dynarr *dst, 158 unsigned_char_dynarr *dst,
184 unsigned int n); 159 unsigned int n);
185 static void encode_coding_sjis (Lstream *encoding, 160 static void encode_coding_sjis (Lstream *encoding,
186 const unsigned char *src, 161 CONST unsigned char *src,
187 unsigned_char_dynarr *dst, 162 unsigned_char_dynarr *dst,
188 unsigned int n); 163 unsigned int n);
189 static int detect_coding_big5 (struct detection_state *st, 164 static int detect_coding_big5 (struct detection_state *st,
190 const unsigned char *src, 165 CONST unsigned char *src,
191 unsigned int n); 166 unsigned int n);
192 static void decode_coding_big5 (Lstream *decoding, 167 static void decode_coding_big5 (Lstream *decoding,
193 const unsigned char *src, 168 CONST unsigned char *src,
194 unsigned_char_dynarr *dst, unsigned int n); 169 unsigned_char_dynarr *dst, unsigned int n);
195 static void encode_coding_big5 (Lstream *encoding, 170 static void encode_coding_big5 (Lstream *encoding,
196 const unsigned char *src, 171 CONST unsigned char *src,
197 unsigned_char_dynarr *dst, unsigned int n); 172 unsigned_char_dynarr *dst, unsigned int n);
198 static int detect_coding_ucs4 (struct detection_state *st, 173 static int detect_coding_ucs4 (struct detection_state *st,
199 const unsigned char *src, 174 CONST unsigned char *src,
200 unsigned int n); 175 unsigned int n);
201 static void decode_coding_ucs4 (Lstream *decoding, 176 static void decode_coding_ucs4 (Lstream *decoding,
202 const unsigned char *src, 177 CONST unsigned char *src,
203 unsigned_char_dynarr *dst, unsigned int n); 178 unsigned_char_dynarr *dst, unsigned int n);
204 static void encode_coding_ucs4 (Lstream *encoding, 179 static void encode_coding_ucs4 (Lstream *encoding,
205 const unsigned char *src, 180 CONST unsigned char *src,
206 unsigned_char_dynarr *dst, unsigned int n); 181 unsigned_char_dynarr *dst, unsigned int n);
207 static int detect_coding_utf8 (struct detection_state *st, 182 static int detect_coding_utf8 (struct detection_state *st,
208 const unsigned char *src, 183 CONST unsigned char *src,
209 unsigned int n); 184 unsigned int n);
210 static void decode_coding_utf8 (Lstream *decoding, 185 static void decode_coding_utf8 (Lstream *decoding,
211 const unsigned char *src, 186 CONST unsigned char *src,
212 unsigned_char_dynarr *dst, unsigned int n); 187 unsigned_char_dynarr *dst, unsigned int n);
213 static void encode_coding_utf8 (Lstream *encoding, 188 static void encode_coding_utf8 (Lstream *encoding,
214 const unsigned char *src, 189 CONST unsigned char *src,
215 unsigned_char_dynarr *dst, unsigned int n); 190 unsigned_char_dynarr *dst, unsigned int n);
216 static int postprocess_iso2022_mask (int mask); 191 static int postprocess_iso2022_mask (int mask);
217 static void reset_iso2022 (Lisp_Object coding_system, 192 static void reset_iso2022 (Lisp_Object coding_system,
218 struct iso2022_decoder *iso); 193 struct iso2022_decoder *iso);
219 static int detect_coding_iso2022 (struct detection_state *st, 194 static int detect_coding_iso2022 (struct detection_state *st,
220 const unsigned char *src, 195 CONST unsigned char *src,
221 unsigned int n); 196 unsigned int n);
222 static void decode_coding_iso2022 (Lstream *decoding, 197 static void decode_coding_iso2022 (Lstream *decoding,
223 const unsigned char *src, 198 CONST unsigned char *src,
224 unsigned_char_dynarr *dst, unsigned int n); 199 unsigned_char_dynarr *dst, unsigned int n);
225 static void encode_coding_iso2022 (Lstream *encoding, 200 static void encode_coding_iso2022 (Lstream *encoding,
226 const unsigned char *src, 201 CONST unsigned char *src,
227 unsigned_char_dynarr *dst, unsigned int n); 202 unsigned_char_dynarr *dst, unsigned int n);
228 #endif /* MULE */ 203 #endif /* MULE */
229 static void decode_coding_no_conversion (Lstream *decoding, 204 static void decode_coding_no_conversion (Lstream *decoding,
230 const unsigned char *src, 205 CONST unsigned char *src,
231 unsigned_char_dynarr *dst, 206 unsigned_char_dynarr *dst,
232 unsigned int n); 207 unsigned int n);
233 static void encode_coding_no_conversion (Lstream *encoding, 208 static void encode_coding_no_conversion (Lstream *encoding,
234 const unsigned char *src, 209 CONST unsigned char *src,
235 unsigned_char_dynarr *dst, 210 unsigned_char_dynarr *dst,
236 unsigned int n); 211 unsigned int n);
237 static void mule_decode (Lstream *decoding, const unsigned char *src, 212 static void mule_decode (Lstream *decoding, CONST unsigned char *src,
238 unsigned_char_dynarr *dst, unsigned int n); 213 unsigned_char_dynarr *dst, unsigned int n);
239 static void mule_encode (Lstream *encoding, const unsigned char *src, 214 static void mule_encode (Lstream *encoding, CONST unsigned char *src,
240 unsigned_char_dynarr *dst, unsigned int n); 215 unsigned_char_dynarr *dst, unsigned int n);
241 216
242 typedef struct codesys_prop codesys_prop; 217 typedef struct codesys_prop codesys_prop;
243 struct codesys_prop 218 struct codesys_prop
244 { 219 {
248 223
249 typedef struct 224 typedef struct
250 { 225 {
251 Dynarr_declare (codesys_prop); 226 Dynarr_declare (codesys_prop);
252 } codesys_prop_dynarr; 227 } codesys_prop_dynarr;
253
254 static const struct lrecord_description codesys_prop_description_1[] = {
255 { XD_LISP_OBJECT, offsetof (codesys_prop, sym) },
256 { XD_END }
257 };
258
259 static const struct struct_description codesys_prop_description = {
260 sizeof (codesys_prop),
261 codesys_prop_description_1
262 };
263
264 static const struct lrecord_description codesys_prop_dynarr_description_1[] = {
265 XD_DYNARR_DESC (codesys_prop_dynarr, &codesys_prop_description),
266 { XD_END }
267 };
268
269 static const struct struct_description codesys_prop_dynarr_description = {
270 sizeof (codesys_prop_dynarr),
271 codesys_prop_dynarr_description_1
272 };
273 228
274 codesys_prop_dynarr *the_codesys_prop_dynarr; 229 codesys_prop_dynarr *the_codesys_prop_dynarr;
275 230
276 enum codesys_prop_enum 231 enum codesys_prop_enum
277 { 232 {
283 238
284 /************************************************************************/ 239 /************************************************************************/
285 /* Coding system functions */ 240 /* Coding system functions */
286 /************************************************************************/ 241 /************************************************************************/
287 242
288 static Lisp_Object mark_coding_system (Lisp_Object); 243 static Lisp_Object mark_coding_system (Lisp_Object, void (*) (Lisp_Object));
289 static void print_coding_system (Lisp_Object, Lisp_Object, int); 244 static void print_coding_system (Lisp_Object, Lisp_Object, int);
290 static void finalize_coding_system (void *header, int for_disksave); 245 static void finalize_coding_system (void *header, int for_disksave);
291
292 #ifdef MULE
293 static const struct lrecord_description ccs_description_1[] = {
294 { XD_LISP_OBJECT, offsetof (charset_conversion_spec, from_charset) },
295 { XD_LISP_OBJECT, offsetof (charset_conversion_spec, to_charset) },
296 { XD_END }
297 };
298
299 static const struct struct_description ccs_description = {
300 sizeof (charset_conversion_spec),
301 ccs_description_1
302 };
303
304 static const struct lrecord_description ccsd_description_1[] = {
305 XD_DYNARR_DESC (charset_conversion_spec_dynarr, &ccs_description),
306 { XD_END }
307 };
308
309 static const struct struct_description ccsd_description = {
310 sizeof (charset_conversion_spec_dynarr),
311 ccsd_description_1
312 };
313 #endif
314
315 static const struct lrecord_description coding_system_description[] = {
316 { XD_LISP_OBJECT, offsetof (Lisp_Coding_System, name) },
317 { XD_LISP_OBJECT, offsetof (Lisp_Coding_System, doc_string) },
318 { XD_LISP_OBJECT, offsetof (Lisp_Coding_System, mnemonic) },
319 { XD_LISP_OBJECT, offsetof (Lisp_Coding_System, post_read_conversion) },
320 { XD_LISP_OBJECT, offsetof (Lisp_Coding_System, pre_write_conversion) },
321 { XD_LISP_OBJECT, offsetof (Lisp_Coding_System, eol_lf) },
322 { XD_LISP_OBJECT, offsetof (Lisp_Coding_System, eol_crlf) },
323 { XD_LISP_OBJECT, offsetof (Lisp_Coding_System, eol_cr) },
324 #ifdef MULE
325 { XD_LISP_OBJECT_ARRAY, offsetof (Lisp_Coding_System, iso2022.initial_charset), 4 },
326 { XD_STRUCT_PTR, offsetof (Lisp_Coding_System, iso2022.input_conv), 1, &ccsd_description },
327 { XD_STRUCT_PTR, offsetof (Lisp_Coding_System, iso2022.output_conv), 1, &ccsd_description },
328 { XD_LISP_OBJECT, offsetof (Lisp_Coding_System, ccl.decode) },
329 { XD_LISP_OBJECT, offsetof (Lisp_Coding_System, ccl.encode) },
330 #endif
331 { XD_END }
332 };
333 246
334 DEFINE_LRECORD_IMPLEMENTATION ("coding-system", coding_system, 247 DEFINE_LRECORD_IMPLEMENTATION ("coding-system", coding_system,
335 mark_coding_system, print_coding_system, 248 mark_coding_system, print_coding_system,
336 finalize_coding_system, 249 finalize_coding_system,
337 0, 0, coding_system_description, 250 0, 0, struct Lisp_Coding_System);
338 Lisp_Coding_System);
339 251
340 static Lisp_Object 252 static Lisp_Object
341 mark_coding_system (Lisp_Object obj) 253 mark_coding_system (Lisp_Object obj, void (*markobj) (Lisp_Object))
342 { 254 {
343 Lisp_Coding_System *codesys = XCODING_SYSTEM (obj); 255 Lisp_Coding_System *codesys = XCODING_SYSTEM (obj);
344 256
345 mark_object (CODING_SYSTEM_NAME (codesys)); 257 markobj (CODING_SYSTEM_NAME (codesys));
346 mark_object (CODING_SYSTEM_DOC_STRING (codesys)); 258 markobj (CODING_SYSTEM_DOC_STRING (codesys));
347 mark_object (CODING_SYSTEM_MNEMONIC (codesys)); 259 markobj (CODING_SYSTEM_MNEMONIC (codesys));
348 mark_object (CODING_SYSTEM_EOL_LF (codesys)); 260 markobj (CODING_SYSTEM_EOL_LF (codesys));
349 mark_object (CODING_SYSTEM_EOL_CRLF (codesys)); 261 markobj (CODING_SYSTEM_EOL_CRLF (codesys));
350 mark_object (CODING_SYSTEM_EOL_CR (codesys)); 262 markobj (CODING_SYSTEM_EOL_CR (codesys));
351 263
352 switch (CODING_SYSTEM_TYPE (codesys)) 264 switch (CODING_SYSTEM_TYPE (codesys))
353 { 265 {
354 #ifdef MULE 266 #ifdef MULE
355 int i; 267 int i;
356 case CODESYS_ISO2022: 268 case CODESYS_ISO2022:
357 for (i = 0; i < 4; i++) 269 for (i = 0; i < 4; i++)
358 mark_object (CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i)); 270 markobj (CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i));
359 if (codesys->iso2022.input_conv) 271 if (codesys->iso2022.input_conv)
360 { 272 {
361 for (i = 0; i < Dynarr_length (codesys->iso2022.input_conv); i++) 273 for (i = 0; i < Dynarr_length (codesys->iso2022.input_conv); i++)
362 { 274 {
363 struct charset_conversion_spec *ccs = 275 struct charset_conversion_spec *ccs =
364 Dynarr_atp (codesys->iso2022.input_conv, i); 276 Dynarr_atp (codesys->iso2022.input_conv, i);
365 mark_object (ccs->from_charset); 277 markobj (ccs->from_charset);
366 mark_object (ccs->to_charset); 278 markobj (ccs->to_charset);
367 } 279 }
368 } 280 }
369 if (codesys->iso2022.output_conv) 281 if (codesys->iso2022.output_conv)
370 { 282 {
371 for (i = 0; i < Dynarr_length (codesys->iso2022.output_conv); i++) 283 for (i = 0; i < Dynarr_length (codesys->iso2022.output_conv); i++)
372 { 284 {
373 struct charset_conversion_spec *ccs = 285 struct charset_conversion_spec *ccs =
374 Dynarr_atp (codesys->iso2022.output_conv, i); 286 Dynarr_atp (codesys->iso2022.output_conv, i);
375 mark_object (ccs->from_charset); 287 markobj (ccs->from_charset);
376 mark_object (ccs->to_charset); 288 markobj (ccs->to_charset);
377 } 289 }
378 } 290 }
379 break; 291 break;
380 292
381 case CODESYS_CCL: 293 case CODESYS_CCL:
382 mark_object (CODING_SYSTEM_CCL_DECODE (codesys)); 294 markobj (CODING_SYSTEM_CCL_DECODE (codesys));
383 mark_object (CODING_SYSTEM_CCL_ENCODE (codesys)); 295 markobj (CODING_SYSTEM_CCL_ENCODE (codesys));
384 break; 296 break;
385 #endif /* MULE */ 297 #endif /* MULE */
386 default: 298 default:
387 break; 299 break;
388 } 300 }
389 301
390 mark_object (CODING_SYSTEM_PRE_WRITE_CONVERSION (codesys)); 302 markobj (CODING_SYSTEM_PRE_WRITE_CONVERSION (codesys));
391 return CODING_SYSTEM_POST_READ_CONVERSION (codesys); 303 return CODING_SYSTEM_POST_READ_CONVERSION (codesys);
392 } 304 }
393 305
394 static void 306 static void
395 print_coding_system (Lisp_Object obj, Lisp_Object printcharfun, 307 print_coding_system (Lisp_Object obj, Lisp_Object printcharfun,
434 break; 346 break;
435 } 347 }
436 } 348 }
437 } 349 }
438 350
439 static eol_type_t 351 static enum eol_type
440 symbol_to_eol_type (Lisp_Object symbol) 352 symbol_to_eol_type (Lisp_Object symbol)
441 { 353 {
442 CHECK_SYMBOL (symbol); 354 CHECK_SYMBOL (symbol);
443 if (NILP (symbol)) return EOL_AUTODETECT; 355 if (NILP (symbol)) return EOL_AUTODETECT;
444 if (EQ (symbol, Qlf)) return EOL_LF; 356 if (EQ (symbol, Qlf)) return EOL_LF;
448 signal_simple_error ("Unrecognized eol type", symbol); 360 signal_simple_error ("Unrecognized eol type", symbol);
449 return EOL_AUTODETECT; /* not reached */ 361 return EOL_AUTODETECT; /* not reached */
450 } 362 }
451 363
452 static Lisp_Object 364 static Lisp_Object
453 eol_type_to_symbol (eol_type_t type) 365 eol_type_to_symbol (enum eol_type type)
454 { 366 {
455 switch (type) 367 switch (type)
456 { 368 {
457 default: abort (); 369 default: abort ();
458 case EOL_LF: return Qlf; 370 case EOL_LF: return Qlf;
539 If there is no such coding system, nil is returned. Otherwise the 451 If there is no such coding system, nil is returned. Otherwise the
540 associated coding system object is returned. 452 associated coding system object is returned.
541 */ 453 */
542 (coding_system_or_name)) 454 (coding_system_or_name))
543 { 455 {
456 if (CODING_SYSTEMP (coding_system_or_name))
457 return coding_system_or_name;
458
544 if (NILP (coding_system_or_name)) 459 if (NILP (coding_system_or_name))
545 coding_system_or_name = Qbinary; 460 coding_system_or_name = Qbinary;
546 else if (CODING_SYSTEMP (coding_system_or_name))
547 return coding_system_or_name;
548 else 461 else
549 CHECK_SYMBOL (coding_system_or_name); 462 CHECK_SYMBOL (coding_system_or_name);
550 463
551 while (1) 464 return Fgethash (coding_system_or_name, Vcoding_system_hash_table, Qnil);
552 {
553 coding_system_or_name =
554 Fgethash (coding_system_or_name, Vcoding_system_hash_table, Qnil);
555
556 if (CODING_SYSTEMP (coding_system_or_name) || NILP (coding_system_or_name))
557 return coding_system_or_name;
558 }
559 } 465 }
560 466
561 DEFUN ("get-coding-system", Fget_coding_system, 1, 1, 0, /* 467 DEFUN ("get-coding-system", Fget_coding_system, 1, 1, 0, /*
562 Retrieve the coding system of the given name. 468 Retrieve the coding system of the given name.
563 Same as `find-coding-system' except that if there is no such 469 Same as `find-coding-system' except that if there is no such
587 /* This function can GC */ 493 /* This function can GC */
588 struct coding_system_list_closure *cscl = 494 struct coding_system_list_closure *cscl =
589 (struct coding_system_list_closure *) coding_system_list_closure; 495 (struct coding_system_list_closure *) coding_system_list_closure;
590 Lisp_Object *coding_system_list = cscl->coding_system_list; 496 Lisp_Object *coding_system_list = cscl->coding_system_list;
591 497
592 *coding_system_list = Fcons (key, *coding_system_list); 498 *coding_system_list = Fcons (XCODING_SYSTEM (value)->name,
499 *coding_system_list);
593 return 0; 500 return 0;
594 } 501 }
595 502
596 DEFUN ("coding-system-list", Fcoding_system_list, 0, 0, 0, /* 503 DEFUN ("coding-system-list", Fcoding_system_list, 0, 0, 0, /*
597 Return a list of the names of all defined coding systems. 504 Return a list of the names of all defined coding systems.
1040 to->name = new_name; 947 to->name = new_name;
1041 } 948 }
1042 return new_coding_system; 949 return new_coding_system;
1043 } 950 }
1044 951
1045 DEFUN ("coding-system-canonical-name-p", Fcoding_system_canonical_name_p, 1, 1, 0, /* 952 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias, 2, 2, 0, /*
1046 Return t if OBJECT names a coding system, and is not a coding system alias. 953 Define symbol ALIAS as an alias for coding system CODING-SYSTEM.
1047 */ 954 */
1048 (object)) 955 (alias, coding_system))
1049 { 956 {
1050 return CODING_SYSTEMP (Fgethash (object, Vcoding_system_hash_table, Qnil))
1051 ? Qt : Qnil;
1052 }
1053
1054 DEFUN ("coding-system-alias-p", Fcoding_system_alias_p, 1, 1, 0, /*
1055 Return t if OBJECT is a coding system alias.
1056 All coding system aliases are created by `define-coding-system-alias'.
1057 */
1058 (object))
1059 {
1060 return SYMBOLP (Fgethash (object, Vcoding_system_hash_table, Qzero))
1061 ? Qt : Qnil;
1062 }
1063
1064 DEFUN ("coding-system-aliasee", Fcoding_system_aliasee, 1, 1, 0, /*
1065 Return the coding-system symbol for which symbol ALIAS is an alias.
1066 */
1067 (alias))
1068 {
1069 Lisp_Object aliasee = Fgethash (alias, Vcoding_system_hash_table, Qnil);
1070 if (SYMBOLP (aliasee))
1071 return aliasee;
1072 else
1073 signal_simple_error ("Symbol is not a coding system alias", alias);
1074 return Qnil; /* To keep the compiler happy */
1075 }
1076
1077 static Lisp_Object
1078 append_suffix_to_symbol (Lisp_Object symbol, const char *ascii_string)
1079 {
1080 return Fintern (concat2 (Fsymbol_name (symbol), build_string (ascii_string)),
1081 Qnil);
1082 }
1083
1084 /* A maphash function, for removing dangling coding system aliases. */
1085 static int
1086 dangling_coding_system_alias_p (Lisp_Object alias,
1087 Lisp_Object aliasee,
1088 void *dangling_aliases)
1089 {
1090 if (SYMBOLP (aliasee)
1091 && NILP (Fgethash (aliasee, Vcoding_system_hash_table, Qnil)))
1092 {
1093 (*(int *) dangling_aliases)++;
1094 return 1;
1095 }
1096 else
1097 return 0;
1098 }
1099
1100 DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias, 2, 2, 0, /*
1101 Define symbol ALIAS as an alias for coding system ALIASEE.
1102
1103 You can use this function to redefine an alias that has already been defined,
1104 but you cannot redefine a name which is the canonical name for a coding system.
1105 \(a canonical name of a coding system is what is returned when you call
1106 `coding-system-name' on a coding system).
1107
1108 ALIASEE itself can be an alias, which allows you to define nested aliases.
1109
1110 You are forbidden, however, from creating alias loops or `dangling' aliases.
1111 These will be detected, and an error will be signaled if you attempt to do so.
1112
1113 If ALIASEE is nil, then ALIAS will simply be undefined.
1114
1115 See also `coding-system-alias-p', `coding-system-aliasee',
1116 and `coding-system-canonical-name-p'.
1117 */
1118 (alias, aliasee))
1119 {
1120 Lisp_Object real_coding_system, probe;
1121
1122 CHECK_SYMBOL (alias); 957 CHECK_SYMBOL (alias);
1123 958 if (!NILP (Ffind_coding_system (alias)))
1124 if (!NILP (Fcoding_system_canonical_name_p (alias))) 959 signal_simple_error ("Symbol already names a coding system", alias);
1125 signal_simple_error 960 coding_system = Fget_coding_system (coding_system);
1126 ("Symbol is the canonical name of a coding system and cannot be redefined", 961 Fputhash (alias, coding_system, Vcoding_system_hash_table);
1127 alias); 962
1128 963 /* Set up aliases for subsidiaries. */
1129 if (NILP (aliasee)) 964 if (XCODING_SYSTEM_EOL_TYPE (coding_system) == EOL_AUTODETECT)
1130 { 965 {
1131 Lisp_Object subsidiary_unix = append_suffix_to_symbol (alias, "-unix"); 966 Lisp_Object str;
1132 Lisp_Object subsidiary_dos = append_suffix_to_symbol (alias, "-dos"); 967 XSETSTRING (str, symbol_name (XSYMBOL (alias)));
1133 Lisp_Object subsidiary_mac = append_suffix_to_symbol (alias, "-mac"); 968 #define FROB(type, name) \
1134 969 do { \
1135 Fremhash (alias, Vcoding_system_hash_table); 970 Lisp_Object subsidiary = XCODING_SYSTEM_EOL_##type (coding_system); \
1136 971 if (!NILP (subsidiary)) \
1137 /* Undefine subsidiary aliases, 972 Fdefine_coding_system_alias \
1138 presumably created by a previous call to this function */ 973 (Fintern (concat2 (str, build_string (name)), Qnil), subsidiary); \
1139 if (! NILP (Fcoding_system_alias_p (subsidiary_unix)) && 974 } while (0)
1140 ! NILP (Fcoding_system_alias_p (subsidiary_dos)) && 975 FROB (LF, "-unix");
1141 ! NILP (Fcoding_system_alias_p (subsidiary_mac))) 976 FROB (CRLF, "-dos");
1142 { 977 FROB (CR, "-mac");
1143 Fdefine_coding_system_alias (subsidiary_unix, Qnil); 978 #undef FROB
1144 Fdefine_coding_system_alias (subsidiary_dos, Qnil); 979 }
1145 Fdefine_coding_system_alias (subsidiary_mac, Qnil); 980 /* FSF return value is a vector of [ALIAS-unix ALIAS-doc ALIAS-mac],
1146 }
1147
1148 /* Undefine dangling coding system aliases. */
1149 {
1150 int dangling_aliases;
1151
1152 do {
1153 dangling_aliases = 0;
1154 elisp_map_remhash (dangling_coding_system_alias_p,
1155 Vcoding_system_hash_table,
1156 &dangling_aliases);
1157 } while (dangling_aliases > 0);
1158 }
1159
1160 return Qnil;
1161 }
1162
1163 if (CODING_SYSTEMP (aliasee))
1164 aliasee = XCODING_SYSTEM_NAME (aliasee);
1165
1166 /* Checks that aliasee names a coding-system */
1167 real_coding_system = Fget_coding_system (aliasee);
1168
1169 /* Check for coding system alias loops */
1170 if (EQ (alias, aliasee))
1171 alias_loop: signal_simple_error_2
1172 ("Attempt to create a coding system alias loop", alias, aliasee);
1173
1174 for (probe = aliasee;
1175 SYMBOLP (probe);
1176 probe = Fgethash (probe, Vcoding_system_hash_table, Qzero))
1177 {
1178 if (EQ (probe, alias))
1179 goto alias_loop;
1180 }
1181
1182 Fputhash (alias, aliasee, Vcoding_system_hash_table);
1183
1184 /* Set up aliases for subsidiaries.
1185 #### There must be a better way to handle subsidiary coding systems. */
1186 {
1187 static const char *suffixes[] = { "-unix", "-dos", "-mac" };
1188 int i;
1189 for (i = 0; i < countof (suffixes); i++)
1190 {
1191 Lisp_Object alias_subsidiary =
1192 append_suffix_to_symbol (alias, suffixes[i]);
1193 Lisp_Object aliasee_subsidiary =
1194 append_suffix_to_symbol (aliasee, suffixes[i]);
1195
1196 if (! NILP (Ffind_coding_system (aliasee_subsidiary)))
1197 Fdefine_coding_system_alias (alias_subsidiary, aliasee_subsidiary);
1198 }
1199 }
1200 /* FSF return value is a vector of [ALIAS-unix ALIAS-dos ALIAS-mac],
1201 but it doesn't look intentional, so I'd rather return something 981 but it doesn't look intentional, so I'd rather return something
1202 meaningful or nothing at all. */ 982 meaningful or nothing at all. */
1203 return Qnil; 983 return Qnil;
1204 } 984 }
1205 985
1206 static Lisp_Object 986 static Lisp_Object
1207 subsidiary_coding_system (Lisp_Object coding_system, eol_type_t type) 987 subsidiary_coding_system (Lisp_Object coding_system, enum eol_type type)
1208 { 988 {
1209 Lisp_Coding_System *cs = XCODING_SYSTEM (coding_system); 989 Lisp_Coding_System *cs = XCODING_SYSTEM (coding_system);
1210 Lisp_Object new_coding_system; 990 Lisp_Object new_coding_system;
1211 991
1212 if (CODING_SYSTEM_EOL_TYPE (cs) != EOL_AUTODETECT) 992 if (CODING_SYSTEM_EOL_TYPE (cs) != EOL_AUTODETECT)
1482 /* Now go through the existing categories by priority to retrieve 1262 /* Now go through the existing categories by priority to retrieve
1483 the categories not yet specified and preserve their priority 1263 the categories not yet specified and preserve their priority
1484 order. */ 1264 order. */
1485 for (j = 0; j <= CODING_CATEGORY_LAST; j++) 1265 for (j = 0; j <= CODING_CATEGORY_LAST; j++)
1486 { 1266 {
1487 int cat = fcd->coding_category_by_priority[j]; 1267 int cat = coding_category_by_priority[j];
1488 if (category_to_priority[cat] < 0) 1268 if (category_to_priority[cat] < 0)
1489 category_to_priority[cat] = i++; 1269 category_to_priority[cat] = i++;
1490 } 1270 }
1491 1271
1492 /* Now we need to construct the inverse of the mapping we just 1272 /* Now we need to construct the inverse of the mapping we just
1493 constructed. */ 1273 constructed. */
1494 1274
1495 for (i = 0; i <= CODING_CATEGORY_LAST; i++) 1275 for (i = 0; i <= CODING_CATEGORY_LAST; i++)
1496 fcd->coding_category_by_priority[category_to_priority[i]] = i; 1276 coding_category_by_priority[category_to_priority[i]] = i;
1497 1277
1498 /* Phew! That was confusing. */ 1278 /* Phew! That was confusing. */
1499 return Qnil; 1279 return Qnil;
1500 } 1280 }
1501 1281
1506 { 1286 {
1507 int i; 1287 int i;
1508 Lisp_Object list = Qnil; 1288 Lisp_Object list = Qnil;
1509 1289
1510 for (i = CODING_CATEGORY_LAST; i >= 0; i--) 1290 for (i = CODING_CATEGORY_LAST; i >= 0; i--)
1511 list = Fcons (coding_category_symbol[fcd->coding_category_by_priority[i]], 1291 list = Fcons (coding_category_symbol[coding_category_by_priority[i]],
1512 list); 1292 list);
1513 return list; 1293 return list;
1514 } 1294 }
1515 1295
1516 DEFUN ("set-coding-category-system", Fset_coding_category_system, 2, 2, 0, /* 1296 DEFUN ("set-coding-category-system", Fset_coding_category_system, 2, 2, 0, /*
1519 (coding_category, coding_system)) 1299 (coding_category, coding_system))
1520 { 1300 {
1521 int cat = decode_coding_category (coding_category); 1301 int cat = decode_coding_category (coding_category);
1522 1302
1523 coding_system = Fget_coding_system (coding_system); 1303 coding_system = Fget_coding_system (coding_system);
1524 fcd->coding_category_system[cat] = coding_system; 1304 coding_category_system[cat] = coding_system;
1525 return Qnil; 1305 return Qnil;
1526 } 1306 }
1527 1307
1528 DEFUN ("coding-category-system", Fcoding_category_system, 1, 1, 0, /* 1308 DEFUN ("coding-category-system", Fcoding_category_system, 1, 1, 0, /*
1529 Return the coding system associated with a coding category. 1309 Return the coding system associated with a coding category.
1530 */ 1310 */
1531 (coding_category)) 1311 (coding_category))
1532 { 1312 {
1533 int cat = decode_coding_category (coding_category); 1313 int cat = decode_coding_category (coding_category);
1534 Lisp_Object sys = fcd->coding_category_system[cat]; 1314 Lisp_Object sys = coding_category_system[cat];
1535 1315
1536 if (!NILP (sys)) 1316 if (!NILP (sys))
1537 return XCODING_SYSTEM_NAME (sys); 1317 return XCODING_SYSTEM_NAME (sys);
1538 return Qnil; 1318 return Qnil;
1539 } 1319 }
1543 /* Detecting the encoding of data */ 1323 /* Detecting the encoding of data */
1544 /************************************************************************/ 1324 /************************************************************************/
1545 1325
1546 struct detection_state 1326 struct detection_state
1547 { 1327 {
1548 eol_type_t eol_type; 1328 enum eol_type eol_type;
1549 int seen_non_ascii; 1329 int seen_non_ascii;
1550 int mask; 1330 int mask;
1551 #ifdef MULE 1331 #ifdef MULE
1552 struct 1332 struct
1553 { 1333 {
1624 /* Perhaps the only thing useful you learn from intensive Microsoft 1404 /* Perhaps the only thing useful you learn from intensive Microsoft
1625 technical interviews */ 1405 technical interviews */
1626 return (mask & (mask - 1)) == 0; 1406 return (mask & (mask - 1)) == 0;
1627 } 1407 }
1628 1408
1629 static eol_type_t 1409 static enum eol_type
1630 detect_eol_type (struct detection_state *st, const unsigned char *src, 1410 detect_eol_type (struct detection_state *st, CONST unsigned char *src,
1631 unsigned int n) 1411 unsigned int n)
1632 { 1412 {
1633 int c; 1413 int c;
1634 1414
1635 while (n--) 1415 while (n--)
1636 { 1416 {
1637 c = *src++; 1417 c = *src++;
1638 if (c == '\n') 1418 if (c == '\r')
1639 {
1640 if (st->eol.just_saw_cr)
1641 return EOL_CRLF;
1642 else if (st->eol.seen_anything)
1643 return EOL_LF;
1644 }
1645 else if (st->eol.just_saw_cr)
1646 return EOL_CR;
1647 else if (c == '\r')
1648 st->eol.just_saw_cr = 1; 1419 st->eol.just_saw_cr = 1;
1649 else 1420 else
1650 st->eol.just_saw_cr = 0; 1421 {
1422 if (c == '\n')
1423 {
1424 if (st->eol.just_saw_cr)
1425 return EOL_CRLF;
1426 else if (st->eol.seen_anything)
1427 return EOL_LF;
1428 }
1429 else if (st->eol.just_saw_cr)
1430 return EOL_CR;
1431 st->eol.just_saw_cr = 0;
1432 }
1651 st->eol.seen_anything = 1; 1433 st->eol.seen_anything = 1;
1652 } 1434 }
1653 1435
1654 return EOL_AUTODETECT; 1436 return EOL_AUTODETECT;
1655 } 1437 }
1670 is present in st->mask 1452 is present in st->mask
1671 1 == definitive answers are here for both st->eol_type and st->mask 1453 1 == definitive answers are here for both st->eol_type and st->mask
1672 */ 1454 */
1673 1455
1674 static int 1456 static int
1675 detect_coding_type (struct detection_state *st, const Extbyte *src, 1457 detect_coding_type (struct detection_state *st, CONST unsigned char *src,
1676 unsigned int n, int just_do_eol) 1458 unsigned int n, int just_do_eol)
1677 { 1459 {
1678 int c; 1460 int c;
1679 1461
1680 if (st->eol_type == EOL_AUTODETECT) 1462 if (st->eol_type == EOL_AUTODETECT)
1747 "Invalid `default-buffer-file-coding-system', set to nil"); 1529 "Invalid `default-buffer-file-coding-system', set to nil");
1748 XBUFFER (Vbuffer_defaults)->buffer_file_coding_system = Qnil; 1530 XBUFFER (Vbuffer_defaults)->buffer_file_coding_system = Qnil;
1749 } 1531 }
1750 } 1532 }
1751 if (NILP (retval)) 1533 if (NILP (retval))
1752 retval = Fget_coding_system (Qraw_text); 1534 retval = Fget_coding_system (Qno_conversion);
1753 return retval; 1535 return retval;
1754 } 1536 }
1755 else 1537 else
1756 { 1538 {
1757 int i; 1539 int i;
1761 #endif 1543 #endif
1762 /* Look through the coding categories by priority and find 1544 /* Look through the coding categories by priority and find
1763 the first one that is allowed. */ 1545 the first one that is allowed. */
1764 for (i = 0; i <= CODING_CATEGORY_LAST; i++) 1546 for (i = 0; i <= CODING_CATEGORY_LAST; i++)
1765 { 1547 {
1766 cat = fcd->coding_category_by_priority[i]; 1548 cat = coding_category_by_priority[i];
1767 if ((mask & (1 << cat)) && 1549 if ((mask & (1 << cat)) &&
1768 !NILP (fcd->coding_category_system[cat])) 1550 !NILP (coding_category_system[cat]))
1769 break; 1551 break;
1770 } 1552 }
1771 if (cat >= 0) 1553 if (cat >= 0)
1772 return fcd->coding_category_system[cat]; 1554 return coding_category_system[cat];
1773 else 1555 else
1774 return Fget_coding_system (Qraw_text); 1556 return Fget_coding_system (Qno_conversion);
1775 } 1557 }
1776 } 1558 }
1777 1559
1778 /* Given a seekable read stream and potential coding system and EOL type 1560 /* Given a seekable read stream and potential coding system and EOL type
1779 as specified, do any autodetection that is called for. If the 1561 as specified, do any autodetection that is called for. If the
1780 coding system and/or EOL type are not `autodetect', they will be left 1562 coding system and/or EOL type are not autodetect, they will be left
1781 alone; but this function will never return an autodetect coding system 1563 alone; but this function will never return an autodetect coding system
1782 or EOL type. 1564 or EOL type.
1783 1565
1784 This function does not automatically fetch subsidiary coding systems; 1566 This function does not automatically fetch subsidiary coding systems;
1785 that should be unnecessary with the explicit eol-type argument. */ 1567 that should be unnecessary with the explicit eol-type argument. */
1786 1568
1787 #define LENGTH(string_constant) (sizeof (string_constant) - 1)
1788
1789 void 1569 void
1790 determine_real_coding_system (Lstream *stream, Lisp_Object *codesys_in_out, 1570 determine_real_coding_system (Lstream *stream, Lisp_Object *codesys_in_out,
1791 eol_type_t *eol_type_in_out) 1571 enum eol_type *eol_type_in_out)
1792 { 1572 {
1793 struct detection_state decst; 1573 struct detection_state decst;
1794 1574
1795 if (*eol_type_in_out == EOL_AUTODETECT) 1575 if (*eol_type_in_out == EOL_AUTODETECT)
1796 *eol_type_in_out = XCODING_SYSTEM_EOL_TYPE (*codesys_in_out); 1576 *eol_type_in_out = XCODING_SYSTEM_EOL_TYPE (*codesys_in_out);
1798 xzero (decst); 1578 xzero (decst);
1799 decst.eol_type = *eol_type_in_out; 1579 decst.eol_type = *eol_type_in_out;
1800 decst.mask = ~0; 1580 decst.mask = ~0;
1801 1581
1802 /* If autodetection is called for, do it now. */ 1582 /* If autodetection is called for, do it now. */
1803 if (XCODING_SYSTEM_TYPE (*codesys_in_out) == CODESYS_AUTODETECT 1583 if (XCODING_SYSTEM_TYPE (*codesys_in_out) == CODESYS_AUTODETECT ||
1804 || *eol_type_in_out == EOL_AUTODETECT) 1584 *eol_type_in_out == EOL_AUTODETECT)
1805 { 1585 {
1806 Extbyte buf[4096]; 1586
1807 Lisp_Object coding_system = Qnil; 1587 while (1)
1808 Extbyte *p; 1588 {
1809 ssize_t nread = Lstream_read (stream, buf, sizeof (buf)); 1589 unsigned char random_buffer[4096];
1810 Extbyte *scan_end; 1590 int nread;
1811 1591
1812 /* Look for initial "-*-"; mode line prefix */ 1592 nread = Lstream_read (stream, random_buffer, sizeof (random_buffer));
1813 for (p = buf, 1593 if (!nread)
1814 scan_end = buf + nread - LENGTH ("-*-coding:?-*-");
1815 p <= scan_end
1816 && *p != '\n'
1817 && *p != '\r';
1818 p++)
1819 if (*p == '-' && *(p+1) == '*' && *(p+2) == '-')
1820 {
1821 Extbyte *local_vars_beg = p + 3;
1822 /* Look for final "-*-"; mode line suffix */
1823 for (p = local_vars_beg,
1824 scan_end = buf + nread - LENGTH ("-*-");
1825 p <= scan_end
1826 && *p != '\n'
1827 && *p != '\r';
1828 p++)
1829 if (*p == '-' && *(p+1) == '*' && *(p+2) == '-')
1830 {
1831 Extbyte *suffix = p;
1832 /* Look for "coding:" */
1833 for (p = local_vars_beg,
1834 scan_end = suffix - LENGTH ("coding:?");
1835 p <= scan_end;
1836 p++)
1837 if (memcmp ("coding:", p, LENGTH ("coding:")) == 0
1838 && (p == local_vars_beg
1839 || (*(p-1) == ' ' ||
1840 *(p-1) == '\t' ||
1841 *(p-1) == ';')))
1842 {
1843 Extbyte save;
1844 int n;
1845 p += LENGTH ("coding:");
1846 while (*p == ' ' || *p == '\t') p++;
1847
1848 /* Get coding system name */
1849 save = *suffix; *suffix = '\0';
1850 /* Characters valid in a MIME charset name (rfc 1521),
1851 and in a Lisp symbol name. */
1852 n = strspn ( (char *) p,
1853 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
1854 "abcdefghijklmnopqrstuvwxyz"
1855 "0123456789"
1856 "!$%&*+-.^_{|}~");
1857 *suffix = save;
1858 if (n > 0)
1859 {
1860 save = p[n]; p[n] = '\0';
1861 coding_system =
1862 Ffind_coding_system (intern ((char *) p));
1863 p[n] = save;
1864 }
1865 break;
1866 }
1867 break;
1868 }
1869 break; 1594 break;
1870 } 1595 if (detect_coding_type (&decst, random_buffer, nread,
1871 1596 XCODING_SYSTEM_TYPE (*codesys_in_out) !=
1872 if (NILP (coding_system)) 1597 CODESYS_AUTODETECT))
1873 do 1598 break;
1874 { 1599 }
1875 if (detect_coding_type (&decst, buf, nread,
1876 XCODING_SYSTEM_TYPE (*codesys_in_out)
1877 != CODESYS_AUTODETECT))
1878 break;
1879 nread = Lstream_read (stream, buf, sizeof (buf));
1880 if (nread == 0)
1881 break;
1882 }
1883 while (1);
1884
1885 else if (XCODING_SYSTEM_TYPE (*codesys_in_out) == CODESYS_AUTODETECT
1886 && XCODING_SYSTEM_EOL_TYPE (coding_system) == EOL_AUTODETECT)
1887 do
1888 {
1889 if (detect_coding_type (&decst, buf, nread, 1))
1890 break;
1891 nread = Lstream_read (stream, buf, sizeof (buf));
1892 if (!nread)
1893 break;
1894 }
1895 while (1);
1896 1600
1897 *eol_type_in_out = decst.eol_type; 1601 *eol_type_in_out = decst.eol_type;
1898 if (XCODING_SYSTEM_TYPE (*codesys_in_out) == CODESYS_AUTODETECT) 1602 if (XCODING_SYSTEM_TYPE (*codesys_in_out) == CODESYS_AUTODETECT)
1899 { 1603 *codesys_in_out = coding_system_from_mask (decst.mask);
1900 if (NILP (coding_system))
1901 *codesys_in_out = coding_system_from_mask (decst.mask);
1902 else
1903 *codesys_in_out = coding_system;
1904 }
1905 } 1604 }
1906 1605
1907 /* If we absolutely can't determine the EOL type, just assume LF. */ 1606 /* If we absolutely can't determine the EOL type, just assume LF. */
1908 if (*eol_type_in_out == EOL_AUTODETECT) 1607 if (*eol_type_in_out == EOL_AUTODETECT)
1909 *eol_type_in_out = EOL_LF; 1608 *eol_type_in_out = EOL_LF;
1938 decst.eol_type = EOL_AUTODETECT; 1637 decst.eol_type = EOL_AUTODETECT;
1939 decst.mask = ~0; 1638 decst.mask = ~0;
1940 while (1) 1639 while (1)
1941 { 1640 {
1942 unsigned char random_buffer[4096]; 1641 unsigned char random_buffer[4096];
1943 ssize_t nread = Lstream_read (istr, random_buffer, sizeof (random_buffer)); 1642 int nread = Lstream_read (istr, random_buffer, sizeof (random_buffer));
1944 1643
1945 if (!nread) 1644 if (!nread)
1946 break; 1645 break;
1947 if (detect_coding_type (&decst, random_buffer, nread, 0)) 1646 if (detect_coding_type (&decst, random_buffer, nread, 0))
1948 break; 1647 break;
1959 #ifdef MULE 1658 #ifdef MULE
1960 decst.mask = postprocess_iso2022_mask (decst.mask); 1659 decst.mask = postprocess_iso2022_mask (decst.mask);
1961 #endif 1660 #endif
1962 for (i = CODING_CATEGORY_LAST; i >= 0; i--) 1661 for (i = CODING_CATEGORY_LAST; i >= 0; i--)
1963 { 1662 {
1964 int sys = fcd->coding_category_by_priority[i]; 1663 int sys = coding_category_by_priority[i];
1965 if (decst.mask & (1 << sys)) 1664 if (decst.mask & (1 << sys))
1966 { 1665 {
1967 Lisp_Object codesys = fcd->coding_category_system[sys]; 1666 Lisp_Object codesys = coding_category_system[sys];
1968 if (!NILP (codesys)) 1667 if (!NILP (codesys))
1969 codesys = subsidiary_coding_system (codesys, decst.eol_type); 1668 codesys = subsidiary_coding_system (codesys, decst.eol_type);
1970 val = Fcons (codesys, val); 1669 val = Fcons (codesys, val);
1971 } 1670 }
1972 } 1671 }
2089 /* EOL_TYPE specifies the type of end-of-line conversion that 1788 /* EOL_TYPE specifies the type of end-of-line conversion that
2090 currently applies. We need to keep this separate from the 1789 currently applies. We need to keep this separate from the
2091 EOL type stored in CODESYS because the latter might indicate 1790 EOL type stored in CODESYS because the latter might indicate
2092 automatic EOL-type detection while the former will always 1791 automatic EOL-type detection while the former will always
2093 indicate a particular EOL type. */ 1792 indicate a particular EOL type. */
2094 eol_type_t eol_type; 1793 enum eol_type eol_type;
2095 #ifdef MULE 1794 #ifdef MULE
2096 /* Additional ISO2022 information. We define the structure above 1795 /* Additional ISO2022 information. We define the structure above
2097 because it's also needed by the detection routines. */ 1796 because it's also needed by the detection routines. */
2098 struct iso2022_decoder iso2022; 1797 struct iso2022_decoder iso2022;
2099 1798
2100 /* Additional information (the state of the running CCL program) 1799 /* Additional information (the state of the running CCL program)
2101 used by the CCL decoder. */ 1800 used by the CCL decoder. */
2102 struct ccl_program ccl; 1801 struct ccl_program ccl;
2103
2104 /* counter for UTF-8 or UCS-4 */
2105 unsigned char counter;
2106 #endif 1802 #endif
2107 struct detection_state decst; 1803 struct detection_state decst;
2108 }; 1804 };
2109 1805
2110 static ssize_t decoding_reader (Lstream *stream, 1806 static int decoding_reader (Lstream *stream, unsigned char *data, size_t size);
2111 unsigned char *data, size_t size); 1807 static int decoding_writer (Lstream *stream, CONST unsigned char *data, size_t size);
2112 static ssize_t decoding_writer (Lstream *stream,
2113 const unsigned char *data, size_t size);
2114 static int decoding_rewinder (Lstream *stream); 1808 static int decoding_rewinder (Lstream *stream);
2115 static int decoding_seekable_p (Lstream *stream); 1809 static int decoding_seekable_p (Lstream *stream);
2116 static int decoding_flusher (Lstream *stream); 1810 static int decoding_flusher (Lstream *stream);
2117 static int decoding_closer (Lstream *stream); 1811 static int decoding_closer (Lstream *stream);
2118 1812
2119 static Lisp_Object decoding_marker (Lisp_Object stream); 1813 static Lisp_Object decoding_marker (Lisp_Object stream,
1814 void (*markobj) (Lisp_Object));
2120 1815
2121 DEFINE_LSTREAM_IMPLEMENTATION ("decoding", lstream_decoding, 1816 DEFINE_LSTREAM_IMPLEMENTATION ("decoding", lstream_decoding,
2122 sizeof (struct decoding_stream)); 1817 sizeof (struct decoding_stream));
2123 1818
2124 static Lisp_Object 1819 static Lisp_Object
2125 decoding_marker (Lisp_Object stream) 1820 decoding_marker (Lisp_Object stream, void (*markobj) (Lisp_Object))
2126 { 1821 {
2127 Lstream *str = DECODING_STREAM_DATA (XLSTREAM (stream))->other_end; 1822 Lstream *str = DECODING_STREAM_DATA (XLSTREAM (stream))->other_end;
2128 Lisp_Object str_obj; 1823 Lisp_Object str_obj;
2129 1824
2130 /* We do not need to mark the coding systems or charsets stored 1825 /* We do not need to mark the coding systems or charsets stored
2131 within the stream because they are stored in a global list 1826 within the stream because they are stored in a global list
2132 and automatically marked. */ 1827 and automatically marked. */
2133 1828
2134 XSETLSTREAM (str_obj, str); 1829 XSETLSTREAM (str_obj, str);
2135 mark_object (str_obj); 1830 markobj (str_obj);
2136 if (str->imp->marker) 1831 if (str->imp->marker)
2137 return (str->imp->marker) (str_obj); 1832 return (str->imp->marker) (str_obj, markobj);
2138 else 1833 else
2139 return Qnil; 1834 return Qnil;
2140 } 1835 }
2141 1836
2142 /* Read SIZE bytes of data and store it into DATA. We are a decoding stream 1837 /* Read SIZE bytes of data and store it into DATA. We are a decoding stream
2143 so we read data from the other end, decode it, and store it into DATA. */ 1838 so we read data from the other end, decode it, and store it into DATA. */
2144 1839
2145 static ssize_t 1840 static int
2146 decoding_reader (Lstream *stream, unsigned char *data, size_t size) 1841 decoding_reader (Lstream *stream, unsigned char *data, size_t size)
2147 { 1842 {
2148 struct decoding_stream *str = DECODING_STREAM_DATA (stream); 1843 struct decoding_stream *str = DECODING_STREAM_DATA (stream);
2149 unsigned char *orig_data = data; 1844 unsigned char *orig_data = data;
2150 ssize_t read_size; 1845 int read_size;
2151 int error_occurred = 0; 1846 int error_occurred = 0;
2152 1847
2153 /* We need to interface to mule_decode(), which expects to take some 1848 /* We need to interface to mule_decode(), which expects to take some
2154 amount of data and store the result into a Dynarr. We have 1849 amount of data and store the result into a Dynarr. We have
2155 mule_decode() store into str->runoff, and take data from there 1850 mule_decode() store into str->runoff, and take data from there
2202 return error_occurred ? -1 : 0; 1897 return error_occurred ? -1 : 0;
2203 else 1898 else
2204 return data - orig_data; 1899 return data - orig_data;
2205 } 1900 }
2206 1901
2207 static ssize_t 1902 static int
2208 decoding_writer (Lstream *stream, const unsigned char *data, size_t size) 1903 decoding_writer (Lstream *stream, CONST unsigned char *data, size_t size)
2209 { 1904 {
2210 struct decoding_stream *str = DECODING_STREAM_DATA (stream); 1905 struct decoding_stream *str = DECODING_STREAM_DATA (stream);
2211 ssize_t retval; 1906 int retval;
2212 1907
2213 /* Decode all our data into the runoff, and then attempt to write 1908 /* Decode all our data into the runoff, and then attempt to write
2214 it all out to the other end. Remove whatever chunk we succeeded 1909 it all out to the other end. Remove whatever chunk we succeeded
2215 in writing. */ 1910 in writing. */
2216 mule_decode (stream, data, str->runoff, size); 1911 mule_decode (stream, data, str->runoff, size);
2236 } 1931 }
2237 else if (CODING_SYSTEM_TYPE (str->codesys) == CODESYS_CCL) 1932 else if (CODING_SYSTEM_TYPE (str->codesys) == CODESYS_CCL)
2238 { 1933 {
2239 setup_ccl_program (&str->ccl, CODING_SYSTEM_CCL_DECODE (str->codesys)); 1934 setup_ccl_program (&str->ccl, CODING_SYSTEM_CCL_DECODE (str->codesys));
2240 } 1935 }
2241 str->counter = 0;
2242 #endif /* MULE */ 1936 #endif /* MULE */
2243 if (CODING_SYSTEM_TYPE (str->codesys) == CODESYS_AUTODETECT
2244 || CODING_SYSTEM_EOL_TYPE (str->codesys) == EOL_AUTODETECT)
2245 {
2246 xzero (str->decst);
2247 str->decst.eol_type = EOL_AUTODETECT;
2248 str->decst.mask = ~0;
2249 }
2250 str->flags = str->ch = 0; 1937 str->flags = str->ch = 0;
2251 } 1938 }
2252 1939
2253 static int 1940 static int
2254 decoding_rewinder (Lstream *stream) 1941 decoding_rewinder (Lstream *stream)
2320 stream for reading using a non-fully-specified coding system and 2007 stream for reading using a non-fully-specified coding system and
2321 a non-seekable input stream. */ 2008 a non-seekable input stream. */
2322 2009
2323 static Lisp_Object 2010 static Lisp_Object
2324 make_decoding_stream_1 (Lstream *stream, Lisp_Object codesys, 2011 make_decoding_stream_1 (Lstream *stream, Lisp_Object codesys,
2325 const char *mode) 2012 CONST char *mode)
2326 { 2013 {
2327 Lstream *lstr = Lstream_new (lstream_decoding, mode); 2014 Lstream *lstr = Lstream_new (lstream_decoding, mode);
2328 struct decoding_stream *str = DECODING_STREAM_DATA (lstr); 2015 struct decoding_stream *str = DECODING_STREAM_DATA (lstr);
2329 Lisp_Object obj; 2016 Lisp_Object obj;
2330 2017
2363 written to that stream; that is handled in decoding_reader() 2050 written to that stream; that is handled in decoding_reader()
2364 or decoding_writer(). This allows the same functions to 2051 or decoding_writer(). This allows the same functions to
2365 be used for both reading and writing. */ 2052 be used for both reading and writing. */
2366 2053
2367 static void 2054 static void
2368 mule_decode (Lstream *decoding, const unsigned char *src, 2055 mule_decode (Lstream *decoding, CONST unsigned char *src,
2369 unsigned_char_dynarr *dst, unsigned int n) 2056 unsigned_char_dynarr *dst, unsigned int n)
2370 { 2057 {
2371 struct decoding_stream *str = DECODING_STREAM_DATA (decoding); 2058 struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
2372 2059
2373 /* If necessary, do encoding-detection now. We do this when 2060 /* If necessary, do encoding-detection now. We do this when
2427 break; 2114 break;
2428 case CODESYS_UTF8: 2115 case CODESYS_UTF8:
2429 decode_coding_utf8 (decoding, src, dst, n); 2116 decode_coding_utf8 (decoding, src, dst, n);
2430 break; 2117 break;
2431 case CODESYS_CCL: 2118 case CODESYS_CCL:
2432 str->ccl.last_block = str->flags & CODING_STATE_END; 2119 ccl_driver (&str->ccl, src, dst, n, 0);
2433 ccl_driver (&str->ccl, src, dst, n, 0, CCL_MODE_DECODING);
2434 break; 2120 break;
2435 case CODESYS_ISO2022: 2121 case CODESYS_ISO2022:
2436 decode_coding_iso2022 (decoding, src, dst, n); 2122 decode_coding_iso2022 (decoding, src, dst, n);
2437 break; 2123 break;
2438 #endif /* MULE */ 2124 #endif /* MULE */
2483 while (1) 2169 while (1)
2484 { 2170 {
2485 char tempbuf[1024]; /* some random amount */ 2171 char tempbuf[1024]; /* some random amount */
2486 Bufpos newpos, even_newer_pos; 2172 Bufpos newpos, even_newer_pos;
2487 Bufpos oldpos = lisp_buffer_stream_startpos (istr); 2173 Bufpos oldpos = lisp_buffer_stream_startpos (istr);
2488 ssize_t size_in_bytes = Lstream_read (istr, tempbuf, sizeof (tempbuf)); 2174 int size_in_bytes = Lstream_read (istr, tempbuf, sizeof (tempbuf));
2489 2175
2490 if (!size_in_bytes) 2176 if (!size_in_bytes)
2491 break; 2177 break;
2492 newpos = lisp_buffer_stream_startpos (istr); 2178 newpos = lisp_buffer_stream_startpos (istr);
2493 Lstream_write (ostr, tempbuf, size_in_bytes); 2179 Lstream_write (ostr, tempbuf, size_in_bytes);
2568 used by the CCL encoder. */ 2254 used by the CCL encoder. */
2569 struct ccl_program ccl; 2255 struct ccl_program ccl;
2570 #endif /* MULE */ 2256 #endif /* MULE */
2571 }; 2257 };
2572 2258
2573 static ssize_t encoding_reader (Lstream *stream, unsigned char *data, size_t size); 2259 static int encoding_reader (Lstream *stream, unsigned char *data, size_t size);
2574 static ssize_t encoding_writer (Lstream *stream, const unsigned char *data, 2260 static int encoding_writer (Lstream *stream, CONST unsigned char *data,
2575 size_t size); 2261 size_t size);
2576 static int encoding_rewinder (Lstream *stream); 2262 static int encoding_rewinder (Lstream *stream);
2577 static int encoding_seekable_p (Lstream *stream); 2263 static int encoding_seekable_p (Lstream *stream);
2578 static int encoding_flusher (Lstream *stream); 2264 static int encoding_flusher (Lstream *stream);
2579 static int encoding_closer (Lstream *stream); 2265 static int encoding_closer (Lstream *stream);
2580 2266
2581 static Lisp_Object encoding_marker (Lisp_Object stream); 2267 static Lisp_Object encoding_marker (Lisp_Object stream,
2268 void (*markobj) (Lisp_Object));
2582 2269
2583 DEFINE_LSTREAM_IMPLEMENTATION ("encoding", lstream_encoding, 2270 DEFINE_LSTREAM_IMPLEMENTATION ("encoding", lstream_encoding,
2584 sizeof (struct encoding_stream)); 2271 sizeof (struct encoding_stream));
2585 2272
2586 static Lisp_Object 2273 static Lisp_Object
2587 encoding_marker (Lisp_Object stream) 2274 encoding_marker (Lisp_Object stream, void (*markobj) (Lisp_Object))
2588 { 2275 {
2589 Lstream *str = ENCODING_STREAM_DATA (XLSTREAM (stream))->other_end; 2276 Lstream *str = ENCODING_STREAM_DATA (XLSTREAM (stream))->other_end;
2590 Lisp_Object str_obj; 2277 Lisp_Object str_obj;
2591 2278
2592 /* We do not need to mark the coding systems or charsets stored 2279 /* We do not need to mark the coding systems or charsets stored
2593 within the stream because they are stored in a global list 2280 within the stream because they are stored in a global list
2594 and automatically marked. */ 2281 and automatically marked. */
2595 2282
2596 XSETLSTREAM (str_obj, str); 2283 XSETLSTREAM (str_obj, str);
2597 mark_object (str_obj); 2284 markobj (str_obj);
2598 if (str->imp->marker) 2285 if (str->imp->marker)
2599 return (str->imp->marker) (str_obj); 2286 return (str->imp->marker) (str_obj, markobj);
2600 else 2287 else
2601 return Qnil; 2288 return Qnil;
2602 } 2289 }
2603 2290
2604 /* Read SIZE bytes of data and store it into DATA. We are a encoding stream 2291 /* Read SIZE bytes of data and store it into DATA. We are a encoding stream
2605 so we read data from the other end, encode it, and store it into DATA. */ 2292 so we read data from the other end, encode it, and store it into DATA. */
2606 2293
2607 static ssize_t 2294 static int
2608 encoding_reader (Lstream *stream, unsigned char *data, size_t size) 2295 encoding_reader (Lstream *stream, unsigned char *data, size_t size)
2609 { 2296 {
2610 struct encoding_stream *str = ENCODING_STREAM_DATA (stream); 2297 struct encoding_stream *str = ENCODING_STREAM_DATA (stream);
2611 unsigned char *orig_data = data; 2298 unsigned char *orig_data = data;
2612 ssize_t read_size; 2299 int read_size;
2613 int error_occurred = 0; 2300 int error_occurred = 0;
2614 2301
2615 /* We need to interface to mule_encode(), which expects to take some 2302 /* We need to interface to mule_encode(), which expects to take some
2616 amount of data and store the result into a Dynarr. We have 2303 amount of data and store the result into a Dynarr. We have
2617 mule_encode() store into str->runoff, and take data from there 2304 mule_encode() store into str->runoff, and take data from there
2664 return error_occurred ? -1 : 0; 2351 return error_occurred ? -1 : 0;
2665 else 2352 else
2666 return data - orig_data; 2353 return data - orig_data;
2667 } 2354 }
2668 2355
2669 static ssize_t 2356 static int
2670 encoding_writer (Lstream *stream, const unsigned char *data, size_t size) 2357 encoding_writer (Lstream *stream, CONST unsigned char *data, size_t size)
2671 { 2358 {
2672 struct encoding_stream *str = ENCODING_STREAM_DATA (stream); 2359 struct encoding_stream *str = ENCODING_STREAM_DATA (stream);
2673 ssize_t retval; 2360 int retval;
2674 2361
2675 /* Encode all our data into the runoff, and then attempt to write 2362 /* Encode all our data into the runoff, and then attempt to write
2676 it all out to the other end. Remove whatever chunk we succeeded 2363 it all out to the other end. Remove whatever chunk we succeeded
2677 in writing. */ 2364 in writing. */
2678 mule_encode (stream, data, str->runoff, size); 2365 mule_encode (stream, data, str->runoff, size);
2776 reset_encoding_stream (str); 2463 reset_encoding_stream (str);
2777 } 2464 }
2778 2465
2779 static Lisp_Object 2466 static Lisp_Object
2780 make_encoding_stream_1 (Lstream *stream, Lisp_Object codesys, 2467 make_encoding_stream_1 (Lstream *stream, Lisp_Object codesys,
2781 const char *mode) 2468 CONST char *mode)
2782 { 2469 {
2783 Lstream *lstr = Lstream_new (lstream_encoding, mode); 2470 Lstream *lstr = Lstream_new (lstream_encoding, mode);
2784 struct encoding_stream *str = ENCODING_STREAM_DATA (lstr); 2471 struct encoding_stream *str = ENCODING_STREAM_DATA (lstr);
2785 Lisp_Object obj; 2472 Lisp_Object obj;
2786 2473
2807 /* Convert N bytes of internally-formatted data stored in SRC to an 2494 /* Convert N bytes of internally-formatted data stored in SRC to an
2808 external format, according to the encoding stream ENCODING. 2495 external format, according to the encoding stream ENCODING.
2809 Store the encoded data into DST. */ 2496 Store the encoded data into DST. */
2810 2497
2811 static void 2498 static void
2812 mule_encode (Lstream *encoding, const unsigned char *src, 2499 mule_encode (Lstream *encoding, CONST unsigned char *src,
2813 unsigned_char_dynarr *dst, unsigned int n) 2500 unsigned_char_dynarr *dst, unsigned int n)
2814 { 2501 {
2815 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); 2502 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
2816 2503
2817 switch (CODING_SYSTEM_TYPE (str->codesys)) 2504 switch (CODING_SYSTEM_TYPE (str->codesys))
2839 break; 2526 break;
2840 case CODESYS_UTF8: 2527 case CODESYS_UTF8:
2841 encode_coding_utf8 (encoding, src, dst, n); 2528 encode_coding_utf8 (encoding, src, dst, n);
2842 break; 2529 break;
2843 case CODESYS_CCL: 2530 case CODESYS_CCL:
2844 str->ccl.last_block = str->flags & CODING_STATE_END; 2531 ccl_driver (&str->ccl, src, dst, n, 0);
2845 ccl_driver (&str->ccl, src, dst, n, 0, CCL_MODE_ENCODING);
2846 break; 2532 break;
2847 case CODESYS_ISO2022: 2533 case CODESYS_ISO2022:
2848 encode_coding_iso2022 (encoding, src, dst, n); 2534 encode_coding_iso2022 (encoding, src, dst, n);
2849 break; 2535 break;
2850 #endif /* MULE */ 2536 #endif /* MULE */
2891 while (1) 2577 while (1)
2892 { 2578 {
2893 char tempbuf[1024]; /* some random amount */ 2579 char tempbuf[1024]; /* some random amount */
2894 Bufpos newpos, even_newer_pos; 2580 Bufpos newpos, even_newer_pos;
2895 Bufpos oldpos = lisp_buffer_stream_startpos (istr); 2581 Bufpos oldpos = lisp_buffer_stream_startpos (istr);
2896 ssize_t size_in_bytes = Lstream_read (istr, tempbuf, sizeof (tempbuf)); 2582 int size_in_bytes = Lstream_read (istr, tempbuf, sizeof (tempbuf));
2897 2583
2898 if (!size_in_bytes) 2584 if (!size_in_bytes)
2899 break; 2585 break;
2900 newpos = lisp_buffer_stream_startpos (istr); 2586 newpos = lisp_buffer_stream_startpos (istr);
2901 Lstream_write (ostr, tempbuf, size_in_bytes); 2587 Lstream_write (ostr, tempbuf, size_in_bytes);
2954 2640
2955 #define BYTE_SJIS_KATAKANA_P(c) \ 2641 #define BYTE_SJIS_KATAKANA_P(c) \
2956 ((c) >= 0xA1 && (c) <= 0xDF) 2642 ((c) >= 0xA1 && (c) <= 0xDF)
2957 2643
2958 static int 2644 static int
2959 detect_coding_sjis (struct detection_state *st, const unsigned char *src, 2645 detect_coding_sjis (struct detection_state *st, CONST unsigned char *src,
2960 unsigned int n) 2646 unsigned int n)
2961 { 2647 {
2962 int c; 2648 int c;
2963 2649
2964 while (n--) 2650 while (n--)
2979 } 2665 }
2980 2666
2981 /* Convert Shift-JIS data to internal format. */ 2667 /* Convert Shift-JIS data to internal format. */
2982 2668
2983 static void 2669 static void
2984 decode_coding_sjis (Lstream *decoding, const unsigned char *src, 2670 decode_coding_sjis (Lstream *decoding, CONST unsigned char *src,
2985 unsigned_char_dynarr *dst, unsigned int n) 2671 unsigned_char_dynarr *dst, unsigned int n)
2986 { 2672 {
2987 unsigned char c; 2673 unsigned char c;
2988 struct decoding_stream *str = DECODING_STREAM_DATA (decoding); 2674 struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
2989 unsigned int flags = str->flags; 2675 unsigned int flags = str->flags;
3036 } 2722 }
3037 2723
3038 /* Convert internally-formatted data to Shift-JIS. */ 2724 /* Convert internally-formatted data to Shift-JIS. */
3039 2725
3040 static void 2726 static void
3041 encode_coding_sjis (Lstream *encoding, const unsigned char *src, 2727 encode_coding_sjis (Lstream *encoding, CONST unsigned char *src,
3042 unsigned_char_dynarr *dst, unsigned int n) 2728 unsigned_char_dynarr *dst, unsigned int n)
3043 { 2729 {
3044 unsigned char c; 2730 unsigned char c;
3045 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); 2731 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
3046 unsigned int flags = str->flags; 2732 unsigned int flags = str->flags;
3240 b2 = I % BIG5_SAME_ROW; \ 2926 b2 = I % BIG5_SAME_ROW; \
3241 b2 += b2 < 0x3F ? 0x40 : 0x62; \ 2927 b2 += b2 < 0x3F ? 0x40 : 0x62; \
3242 } while (0) 2928 } while (0)
3243 2929
3244 static int 2930 static int
3245 detect_coding_big5 (struct detection_state *st, const unsigned char *src, 2931 detect_coding_big5 (struct detection_state *st, CONST unsigned char *src,
3246 unsigned int n) 2932 unsigned int n)
3247 { 2933 {
3248 int c; 2934 int c;
3249 2935
3250 while (n--) 2936 while (n--)
3266 } 2952 }
3267 2953
3268 /* Convert Big5 data to internal format. */ 2954 /* Convert Big5 data to internal format. */
3269 2955
3270 static void 2956 static void
3271 decode_coding_big5 (Lstream *decoding, const unsigned char *src, 2957 decode_coding_big5 (Lstream *decoding, CONST unsigned char *src,
3272 unsigned_char_dynarr *dst, unsigned int n) 2958 unsigned_char_dynarr *dst, unsigned int n)
3273 { 2959 {
3274 unsigned char c; 2960 unsigned char c;
3275 struct decoding_stream *str = DECODING_STREAM_DATA (decoding); 2961 struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
3276 unsigned int flags = str->flags; 2962 unsigned int flags = str->flags;
3316 } 3002 }
3317 3003
3318 /* Convert internally-formatted data to Big5. */ 3004 /* Convert internally-formatted data to Big5. */
3319 3005
3320 static void 3006 static void
3321 encode_coding_big5 (Lstream *encoding, const unsigned char *src, 3007 encode_coding_big5 (Lstream *encoding, CONST unsigned char *src,
3322 unsigned_char_dynarr *dst, unsigned int n) 3008 unsigned_char_dynarr *dst, unsigned int n)
3323 { 3009 {
3324 unsigned char c; 3010 unsigned char c;
3325 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); 3011 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
3326 unsigned int flags = str->flags; 3012 unsigned int flags = str->flags;
3433 /* */ 3119 /* */
3434 /* UCS-4 character codes are implemented as nonnegative integers. */ 3120 /* UCS-4 character codes are implemented as nonnegative integers. */
3435 /* */ 3121 /* */
3436 /************************************************************************/ 3122 /************************************************************************/
3437 3123
3124 Lisp_Object ucs_to_mule_table[65536];
3125 Lisp_Object mule_to_ucs_table;
3438 3126
3439 DEFUN ("set-ucs-char", Fset_ucs_char, 2, 2, 0, /* 3127 DEFUN ("set-ucs-char", Fset_ucs_char, 2, 2, 0, /*
3440 Map UCS-4 code CODE to Mule character CHARACTER. 3128 Map UCS-4 code CODE to Mule character CHARACTER.
3441 3129
3442 Return T on success, NIL on failure. 3130 Return T on success, NIL on failure.
3447 3135
3448 CHECK_CHAR (character); 3136 CHECK_CHAR (character);
3449 CHECK_INT (code); 3137 CHECK_INT (code);
3450 c = XINT (code); 3138 c = XINT (code);
3451 3139
3452 if (c < sizeof (fcd->ucs_to_mule_table)) 3140 if (c < sizeof (ucs_to_mule_table))
3453 { 3141 {
3454 fcd->ucs_to_mule_table[c] = character; 3142 ucs_to_mule_table[c] = character;
3455 return Qt; 3143 return Qt;
3456 } 3144 }
3457 else 3145 else
3458 return Qnil; 3146 return Qnil;
3459 } 3147 }
3460 3148
3461 static Lisp_Object 3149 static Lisp_Object
3462 ucs_to_char (unsigned long code) 3150 ucs_to_char (unsigned long code)
3463 { 3151 {
3464 if (code < sizeof (fcd->ucs_to_mule_table)) 3152 if (code < sizeof (ucs_to_mule_table))
3465 { 3153 {
3466 return fcd->ucs_to_mule_table[code]; 3154 return ucs_to_mule_table[code];
3467 } 3155 }
3468 else if ((0xe00000 <= code) && (code <= 0xe00000 + 94 * 94 * 14)) 3156 else if ((0xe00000 <= code) && (code <= 0xe00000 + 94 * 94 * 14))
3469 { 3157 {
3470 unsigned int c; 3158 unsigned int c;
3471 3159
3583 Dynarr_add (dst, (code >> 8) & 255); 3271 Dynarr_add (dst, (code >> 8) & 255);
3584 Dynarr_add (dst, code & 255); 3272 Dynarr_add (dst, code & 255);
3585 } 3273 }
3586 3274
3587 static int 3275 static int
3588 detect_coding_ucs4 (struct detection_state *st, const unsigned char *src, 3276 detect_coding_ucs4 (struct detection_state *st, CONST unsigned char *src,
3589 unsigned int n) 3277 unsigned int n)
3590 { 3278 {
3591 while (n--) 3279 while (n--)
3592 { 3280 {
3593 int c = *src++; 3281 int c = *src++;
3608 } 3296 }
3609 return CODING_CATEGORY_UCS4_MASK; 3297 return CODING_CATEGORY_UCS4_MASK;
3610 } 3298 }
3611 3299
3612 static void 3300 static void
3613 decode_coding_ucs4 (Lstream *decoding, const unsigned char *src, 3301 decode_coding_ucs4 (Lstream *decoding, CONST unsigned char *src,
3614 unsigned_char_dynarr *dst, unsigned int n) 3302 unsigned_char_dynarr *dst, unsigned int n)
3615 { 3303 {
3616 struct decoding_stream *str = DECODING_STREAM_DATA (decoding); 3304 struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
3617 unsigned int flags = str->flags; 3305 unsigned int flags = str->flags;
3618 unsigned int ch = str->ch; 3306 unsigned int ch = str->ch;
3619 unsigned char counter = str->counter;
3620 3307
3621 while (n--) 3308 while (n--)
3622 { 3309 {
3623 unsigned char c = *src++; 3310 unsigned char c = *src++;
3624 switch (counter) 3311 switch (flags)
3625 { 3312 {
3626 case 0: 3313 case 0:
3627 ch = c; 3314 ch = c;
3628 counter = 3; 3315 flags = 3;
3629 break; 3316 break;
3630 case 1: 3317 case 1:
3631 decode_ucs4 ( ( ch << 8 ) | c, dst); 3318 decode_ucs4 ( ( ch << 8 ) | c, dst);
3632 ch = 0; 3319 ch = 0;
3633 counter = 0; 3320 flags = 0;
3634 break; 3321 break;
3635 default: 3322 default:
3636 ch = ( ch << 8 ) | c; 3323 ch = ( ch << 8 ) | c;
3637 counter--; 3324 flags--;
3638 } 3325 }
3639 } 3326 }
3640 if (counter & CODING_STATE_END) 3327 if (flags & CODING_STATE_END)
3641 DECODE_OUTPUT_PARTIAL_CHAR (ch); 3328 DECODE_OUTPUT_PARTIAL_CHAR (ch);
3642 3329
3643 str->flags = flags; 3330 str->flags = flags;
3644 str->ch = ch; 3331 str->ch = ch;
3645 str->counter = counter;
3646 } 3332 }
3647 3333
3648 static void 3334 static void
3649 encode_coding_ucs4 (Lstream *encoding, const unsigned char *src, 3335 encode_coding_ucs4 (Lstream *encoding, CONST unsigned char *src,
3650 unsigned_char_dynarr *dst, unsigned int n) 3336 unsigned_char_dynarr *dst, unsigned int n)
3651 { 3337 {
3652 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); 3338 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
3653 unsigned int flags = str->flags; 3339 unsigned int flags = str->flags;
3654 unsigned int ch = str->ch; 3340 unsigned int ch = str->ch;
3657 3343
3658 #ifdef ENABLE_COMPOSITE_CHARS 3344 #ifdef ENABLE_COMPOSITE_CHARS
3659 /* flags for handling composite chars. We do a little switcharoo 3345 /* flags for handling composite chars. We do a little switcharoo
3660 on the source while we're outputting the composite char. */ 3346 on the source while we're outputting the composite char. */
3661 unsigned int saved_n = 0; 3347 unsigned int saved_n = 0;
3662 const unsigned char *saved_src = NULL; 3348 CONST unsigned char *saved_src = NULL;
3663 int in_composite = 0; 3349 int in_composite = 0;
3664 3350
3665 back_to_square_n: 3351 back_to_square_n:
3666 #endif 3352 #endif
3667 3353
3783 /************************************************************************/ 3469 /************************************************************************/
3784 /* UTF-8 methods */ 3470 /* UTF-8 methods */
3785 /************************************************************************/ 3471 /************************************************************************/
3786 3472
3787 static int 3473 static int
3788 detect_coding_utf8 (struct detection_state *st, const unsigned char *src, 3474 detect_coding_utf8 (struct detection_state *st, CONST unsigned char *src,
3789 unsigned int n) 3475 unsigned int n)
3790 { 3476 {
3791 while (n--) 3477 while (n--)
3792 { 3478 {
3793 unsigned char c = *src++; 3479 unsigned char c = *src++;
3818 } 3504 }
3819 return CODING_CATEGORY_UTF8_MASK; 3505 return CODING_CATEGORY_UTF8_MASK;
3820 } 3506 }
3821 3507
3822 static void 3508 static void
3823 decode_coding_utf8 (Lstream *decoding, const unsigned char *src, 3509 decode_coding_utf8 (Lstream *decoding, CONST unsigned char *src,
3824 unsigned_char_dynarr *dst, unsigned int n) 3510 unsigned_char_dynarr *dst, unsigned int n)
3825 { 3511 {
3826 struct decoding_stream *str = DECODING_STREAM_DATA (decoding); 3512 struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
3827 unsigned int flags = str->flags; 3513 unsigned int flags = str->flags;
3828 unsigned int ch = str->ch; 3514 unsigned int ch = str->ch;
3829 eol_type_t eol_type = str->eol_type; 3515 eol_type_t eol_type = str->eol_type;
3830 unsigned char counter = str->counter;
3831 3516
3832 while (n--) 3517 while (n--)
3833 { 3518 {
3834 unsigned char c = *src++; 3519 unsigned char c = *src++;
3835 switch (counter) 3520 switch (flags)
3836 { 3521 {
3837 case 0: 3522 case 0:
3838 if ( c >= 0xfc ) 3523 if ( c >= 0xfc )
3839 { 3524 {
3840 ch = c & 0x01; 3525 ch = c & 0x01;
3841 counter = 5; 3526 flags = 5;
3842 } 3527 }
3843 else if ( c >= 0xf8 ) 3528 else if ( c >= 0xf8 )
3844 { 3529 {
3845 ch = c & 0x03; 3530 ch = c & 0x03;
3846 counter = 4; 3531 flags = 4;
3847 } 3532 }
3848 else if ( c >= 0xf0 ) 3533 else if ( c >= 0xf0 )
3849 { 3534 {
3850 ch = c & 0x07; 3535 ch = c & 0x07;
3851 counter = 3; 3536 flags = 3;
3852 } 3537 }
3853 else if ( c >= 0xe0 ) 3538 else if ( c >= 0xe0 )
3854 { 3539 {
3855 ch = c & 0x0f; 3540 ch = c & 0x0f;
3856 counter = 2; 3541 flags = 2;
3857 } 3542 }
3858 else if ( c >= 0xc0 ) 3543 else if ( c >= 0xc0 )
3859 { 3544 {
3860 ch = c & 0x1f; 3545 ch = c & 0x1f;
3861 counter = 1; 3546 flags = 1;
3862 } 3547 }
3863 else 3548 else
3864 { 3549 {
3865 DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst); 3550 DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst);
3866 decode_ucs4 (c, dst); 3551 decode_ucs4 (c, dst);
3868 break; 3553 break;
3869 case 1: 3554 case 1:
3870 ch = ( ch << 6 ) | ( c & 0x3f ); 3555 ch = ( ch << 6 ) | ( c & 0x3f );
3871 decode_ucs4 (ch, dst); 3556 decode_ucs4 (ch, dst);
3872 ch = 0; 3557 ch = 0;
3873 counter = 0; 3558 flags = 0;
3874 break; 3559 break;
3875 default: 3560 default:
3876 ch = ( ch << 6 ) | ( c & 0x3f ); 3561 ch = ( ch << 6 ) | ( c & 0x3f );
3877 counter--; 3562 flags--;
3878 } 3563 }
3879 label_continue_loop:; 3564 label_continue_loop:;
3880 } 3565 }
3881 3566
3882 if (flags & CODING_STATE_END) 3567 if (flags & CODING_STATE_END)
3883 DECODE_OUTPUT_PARTIAL_CHAR (ch); 3568 DECODE_OUTPUT_PARTIAL_CHAR (ch);
3884 3569
3885 str->flags = flags; 3570 str->flags = flags;
3886 str->ch = ch; 3571 str->ch = ch;
3887 str->counter = counter;
3888 } 3572 }
3889 3573
3890 static void 3574 static void
3891 encode_utf8 (Lisp_Object charset, 3575 encode_utf8 (Lisp_Object charset,
3892 unsigned char h, unsigned char l, unsigned_char_dynarr *dst) 3576 unsigned char h, unsigned char l, unsigned_char_dynarr *dst)
3932 Dynarr_add (dst, (code & 0x3f) | 0x80); 3616 Dynarr_add (dst, (code & 0x3f) | 0x80);
3933 } 3617 }
3934 } 3618 }
3935 3619
3936 static void 3620 static void
3937 encode_coding_utf8 (Lstream *encoding, const unsigned char *src, 3621 encode_coding_utf8 (Lstream *encoding, CONST unsigned char *src,
3938 unsigned_char_dynarr *dst, unsigned int n) 3622 unsigned_char_dynarr *dst, unsigned int n)
3939 { 3623 {
3940 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); 3624 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
3941 unsigned int flags = str->flags; 3625 unsigned int flags = str->flags;
3942 unsigned int ch = str->ch; 3626 unsigned int ch = str->ch;
3946 3630
3947 #ifdef ENABLE_COMPOSITE_CHARS 3631 #ifdef ENABLE_COMPOSITE_CHARS
3948 /* flags for handling composite chars. We do a little switcharoo 3632 /* flags for handling composite chars. We do a little switcharoo
3949 on the source while we're outputting the composite char. */ 3633 on the source while we're outputting the composite char. */
3950 unsigned int saved_n = 0; 3634 unsigned int saved_n = 0;
3951 const unsigned char *saved_src = NULL; 3635 CONST unsigned char *saved_src = NULL;
3952 int in_composite = 0; 3636 int in_composite = 0;
3953 3637
3954 back_to_square_n: 3638 back_to_square_n:
3955 #endif /* ENABLE_COMPOSITE_CHARS */ 3639 #endif /* ENABLE_COMPOSITE_CHARS */
3956 3640
3957 while (n--) 3641 while (n--)
3958 { 3642 {
3959 unsigned char c = *src++; 3643 unsigned char c = *src++;
3960 3644
3961 if (BYTE_ASCII_P (c)) 3645 if (BYTE_ASCII_P (c))
4650 iso->switched_dir_and_no_valid_charset_yet = 0; 4334 iso->switched_dir_and_no_valid_charset_yet = 0;
4651 return 1; 4335 return 1;
4652 } 4336 }
4653 4337
4654 static int 4338 static int
4655 detect_coding_iso2022 (struct detection_state *st, const unsigned char *src, 4339 detect_coding_iso2022 (struct detection_state *st, CONST unsigned char *src,
4656 unsigned int n) 4340 unsigned int n)
4657 { 4341 {
4658 int mask; 4342 int mask;
4659 4343
4660 /* #### There are serious deficiencies in the recognition mechanism 4344 /* #### There are serious deficiencies in the recognition mechanism
4841 } 4525 }
4842 4526
4843 /* Convert ISO2022-format data to internal format. */ 4527 /* Convert ISO2022-format data to internal format. */
4844 4528
4845 static void 4529 static void
4846 decode_coding_iso2022 (Lstream *decoding, const unsigned char *src, 4530 decode_coding_iso2022 (Lstream *decoding, CONST unsigned char *src,
4847 unsigned_char_dynarr *dst, unsigned int n) 4531 unsigned_char_dynarr *dst, unsigned int n)
4848 { 4532 {
4849 struct decoding_stream *str = DECODING_STREAM_DATA (decoding); 4533 struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
4850 unsigned int flags = str->flags; 4534 unsigned int flags = str->flags;
4851 unsigned int ch = str->ch; 4535 unsigned int ch = str->ch;
5082 4766
5083 static void 4767 static void
5084 iso2022_designate (Lisp_Object charset, unsigned char reg, 4768 iso2022_designate (Lisp_Object charset, unsigned char reg,
5085 struct encoding_stream *str, unsigned_char_dynarr *dst) 4769 struct encoding_stream *str, unsigned_char_dynarr *dst)
5086 { 4770 {
5087 static const char inter94[] = "()*+"; 4771 static CONST char inter94[] = "()*+";
5088 static const char inter96[] = ",-./"; 4772 static CONST char inter96[] = ",-./";
5089 unsigned int type; 4773 unsigned int type;
5090 unsigned char final; 4774 unsigned char final;
5091 Lisp_Object old_charset = str->iso2022.charset[reg]; 4775 Lisp_Object old_charset = str->iso2022.charset[reg];
5092 4776
5093 str->iso2022.charset[reg] = charset; 4777 str->iso2022.charset[reg] = charset;
5167 } 4851 }
5168 4852
5169 /* Convert internally-formatted data to ISO2022 format. */ 4853 /* Convert internally-formatted data to ISO2022 format. */
5170 4854
5171 static void 4855 static void
5172 encode_coding_iso2022 (Lstream *encoding, const unsigned char *src, 4856 encode_coding_iso2022 (Lstream *encoding, CONST unsigned char *src,
5173 unsigned_char_dynarr *dst, unsigned int n) 4857 unsigned_char_dynarr *dst, unsigned int n)
5174 { 4858 {
5175 unsigned char charmask, c; 4859 unsigned char charmask, c;
5176 unsigned char char_boundary; 4860 unsigned char char_boundary;
5177 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); 4861 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
5185 4869
5186 #ifdef ENABLE_COMPOSITE_CHARS 4870 #ifdef ENABLE_COMPOSITE_CHARS
5187 /* flags for handling composite chars. We do a little switcharoo 4871 /* flags for handling composite chars. We do a little switcharoo
5188 on the source while we're outputting the composite char. */ 4872 on the source while we're outputting the composite char. */
5189 unsigned int saved_n = 0; 4873 unsigned int saved_n = 0;
5190 const unsigned char *saved_src = NULL; 4874 CONST unsigned char *saved_src = NULL;
5191 int in_composite = 0; 4875 int in_composite = 0;
5192 #endif /* ENABLE_COMPOSITE_CHARS */ 4876 #endif /* ENABLE_COMPOSITE_CHARS */
5193 4877
5194 char_boundary = str->iso2022.current_char_boundary; 4878 char_boundary = str->iso2022.current_char_boundary;
5195 charset = str->iso2022.current_charset; 4879 charset = str->iso2022.current_charset;
5476 5160
5477 /* This is used when reading in "binary" files -- i.e. files that may 5161 /* This is used when reading in "binary" files -- i.e. files that may
5478 contain all 256 possible byte values and that are not to be 5162 contain all 256 possible byte values and that are not to be
5479 interpreted as being in any particular decoding. */ 5163 interpreted as being in any particular decoding. */
5480 static void 5164 static void
5481 decode_coding_no_conversion (Lstream *decoding, const unsigned char *src, 5165 decode_coding_no_conversion (Lstream *decoding, CONST unsigned char *src,
5482 unsigned_char_dynarr *dst, unsigned int n) 5166 unsigned_char_dynarr *dst, unsigned int n)
5483 { 5167 {
5484 unsigned char c; 5168 unsigned char c;
5485 struct decoding_stream *str = DECODING_STREAM_DATA (decoding); 5169 struct decoding_stream *str = DECODING_STREAM_DATA (decoding);
5486 unsigned int flags = str->flags; 5170 unsigned int flags = str->flags;
5501 str->flags = flags; 5185 str->flags = flags;
5502 str->ch = ch; 5186 str->ch = ch;
5503 } 5187 }
5504 5188
5505 static void 5189 static void
5506 encode_coding_no_conversion (Lstream *encoding, const unsigned char *src, 5190 encode_coding_no_conversion (Lstream *encoding, CONST unsigned char *src,
5507 unsigned_char_dynarr *dst, unsigned int n) 5191 unsigned_char_dynarr *dst, unsigned int n)
5508 { 5192 {
5509 unsigned char c; 5193 unsigned char c;
5510 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding); 5194 struct encoding_stream *str = ENCODING_STREAM_DATA (encoding);
5511 unsigned int flags = str->flags; 5195 unsigned int flags = str->flags;
5555 str->flags = flags; 5239 str->flags = flags;
5556 str->ch = ch; 5240 str->ch = ch;
5557 } 5241 }
5558 5242
5559 5243
5560 5244 /************************************************************************/
5245 /* Simple internal/external functions */
5246 /************************************************************************/
5247
5248 static Extbyte_dynarr *conversion_out_dynarr;
5249 static Bufbyte_dynarr *conversion_in_dynarr;
5250
5251 /* Determine coding system from coding format */
5252
5253 /* #### not correct for all values of `fmt'! */
5254 static Lisp_Object
5255 external_data_format_to_coding_system (enum external_data_format fmt)
5256 {
5257 switch (fmt)
5258 {
5259 case FORMAT_FILENAME:
5260 case FORMAT_TERMINAL:
5261 if (EQ (Vfile_name_coding_system, Qnil) ||
5262 EQ (Vfile_name_coding_system, Qbinary))
5263 return Qnil;
5264 else
5265 return Fget_coding_system (Vfile_name_coding_system);
5266 #ifdef MULE
5267 case FORMAT_CTEXT:
5268 return Fget_coding_system (Qctext);
5269 #endif
5270 default:
5271 return Qnil;
5272 }
5273 }
5274
5275 Extbyte *
5276 convert_to_external_format (CONST Bufbyte *ptr,
5277 Bytecount len,
5278 Extcount *len_out,
5279 enum external_data_format fmt)
5280 {
5281 Lisp_Object coding_system = external_data_format_to_coding_system (fmt);
5282
5283 if (!conversion_out_dynarr)
5284 conversion_out_dynarr = Dynarr_new (Extbyte);
5285 else
5286 Dynarr_reset (conversion_out_dynarr);
5287
5288 if (NILP (coding_system))
5289 {
5290 CONST Bufbyte *end = ptr + len;
5291
5292 for (; ptr < end;)
5293 {
5294 Bufbyte c =
5295 (BYTE_ASCII_P (*ptr)) ? *ptr :
5296 (*ptr == LEADING_BYTE_CONTROL_1) ? (*(ptr+1) - 0x20) :
5297 (*ptr == LEADING_BYTE_LATIN_ISO8859_1) ? (*(ptr+1)) :
5298 '~';
5299
5300 Dynarr_add (conversion_out_dynarr, (Extbyte) c);
5301 INC_CHARPTR (ptr);
5302 }
5303
5304 #ifdef ERROR_CHECK_BUFPOS
5305 assert (ptr == end);
5306 #endif
5307 }
5308 else
5309 {
5310 Lisp_Object instream, outstream, da_outstream;
5311 Lstream *istr, *ostr;
5312 struct gcpro gcpro1, gcpro2, gcpro3;
5313 char tempbuf[1024]; /* some random amount */
5314
5315 instream = make_fixed_buffer_input_stream ((unsigned char *) ptr, len);
5316 da_outstream = make_dynarr_output_stream
5317 ((unsigned_char_dynarr *) conversion_out_dynarr);
5318 outstream =
5319 make_encoding_output_stream (XLSTREAM (da_outstream), coding_system);
5320 istr = XLSTREAM (instream);
5321 ostr = XLSTREAM (outstream);
5322 GCPRO3 (instream, outstream, da_outstream);
5323 while (1)
5324 {
5325 int size_in_bytes = Lstream_read (istr, tempbuf, sizeof (tempbuf));
5326 if (!size_in_bytes)
5327 break;
5328 Lstream_write (ostr, tempbuf, size_in_bytes);
5329 }
5330 Lstream_close (istr);
5331 Lstream_close (ostr);
5332 UNGCPRO;
5333 Lstream_delete (istr);
5334 Lstream_delete (ostr);
5335 Lstream_delete (XLSTREAM (da_outstream));
5336 }
5337
5338 *len_out = Dynarr_length (conversion_out_dynarr);
5339 Dynarr_add (conversion_out_dynarr, 0); /* remember to zero-terminate! */
5340 return Dynarr_atp (conversion_out_dynarr, 0);
5341 }
5342
5343 Bufbyte *
5344 convert_from_external_format (CONST Extbyte *ptr,
5345 Extcount len,
5346 Bytecount *len_out,
5347 enum external_data_format fmt)
5348 {
5349 Lisp_Object coding_system = external_data_format_to_coding_system (fmt);
5350
5351 if (!conversion_in_dynarr)
5352 conversion_in_dynarr = Dynarr_new (Bufbyte);
5353 else
5354 Dynarr_reset (conversion_in_dynarr);
5355
5356 if (NILP (coding_system))
5357 {
5358 CONST Extbyte *end = ptr + len;
5359 for (; ptr < end; ptr++)
5360 {
5361 Extbyte c = *ptr;
5362 DECODE_ADD_BINARY_CHAR (c, conversion_in_dynarr);
5363 }
5364 }
5365 else
5366 {
5367 Lisp_Object instream, outstream, da_outstream;
5368 Lstream *istr, *ostr;
5369 struct gcpro gcpro1, gcpro2, gcpro3;
5370 char tempbuf[1024]; /* some random amount */
5371
5372 instream = make_fixed_buffer_input_stream ((unsigned char *) ptr, len);
5373 da_outstream = make_dynarr_output_stream
5374 ((unsigned_char_dynarr *) conversion_in_dynarr);
5375 outstream =
5376 make_decoding_output_stream (XLSTREAM (da_outstream), coding_system);
5377 istr = XLSTREAM (instream);
5378 ostr = XLSTREAM (outstream);
5379 GCPRO3 (instream, outstream, da_outstream);
5380 while (1)
5381 {
5382 int size_in_bytes = Lstream_read (istr, tempbuf, sizeof (tempbuf));
5383 if (!size_in_bytes)
5384 break;
5385 Lstream_write (ostr, tempbuf, size_in_bytes);
5386 }
5387 Lstream_close (istr);
5388 Lstream_close (ostr);
5389 UNGCPRO;
5390 Lstream_delete (istr);
5391 Lstream_delete (ostr);
5392 Lstream_delete (XLSTREAM (da_outstream));
5393 }
5394
5395 *len_out = Dynarr_length (conversion_in_dynarr);
5396 Dynarr_add (conversion_in_dynarr, 0); /* remember to zero-terminate! */
5397 return Dynarr_atp (conversion_in_dynarr, 0);
5398 }
5399
5400
5561 /************************************************************************/ 5401 /************************************************************************/
5562 /* Initialization */ 5402 /* Initialization */
5563 /************************************************************************/ 5403 /************************************************************************/
5564 5404
5565 void 5405 void
5566 syms_of_file_coding (void) 5406 syms_of_mule_coding (void)
5567 { 5407 {
5568 INIT_LRECORD_IMPLEMENTATION (coding_system); 5408 defsymbol (&Qbuffer_file_coding_system, "buffer-file-coding-system");
5569
5570 deferror (&Qcoding_system_error, "coding-system-error", 5409 deferror (&Qcoding_system_error, "coding-system-error",
5571 "Coding-system error", Qio_error); 5410 "Coding-system error", Qio_error);
5572 5411
5573 DEFSUBR (Fcoding_system_p); 5412 DEFSUBR (Fcoding_system_p);
5574 DEFSUBR (Ffind_coding_system); 5413 DEFSUBR (Ffind_coding_system);
5575 DEFSUBR (Fget_coding_system); 5414 DEFSUBR (Fget_coding_system);
5576 DEFSUBR (Fcoding_system_list); 5415 DEFSUBR (Fcoding_system_list);
5577 DEFSUBR (Fcoding_system_name); 5416 DEFSUBR (Fcoding_system_name);
5578 DEFSUBR (Fmake_coding_system); 5417 DEFSUBR (Fmake_coding_system);
5579 DEFSUBR (Fcopy_coding_system); 5418 DEFSUBR (Fcopy_coding_system);
5580 DEFSUBR (Fcoding_system_canonical_name_p);
5581 DEFSUBR (Fcoding_system_alias_p);
5582 DEFSUBR (Fcoding_system_aliasee);
5583 DEFSUBR (Fdefine_coding_system_alias); 5419 DEFSUBR (Fdefine_coding_system_alias);
5584 DEFSUBR (Fsubsidiary_coding_system); 5420 DEFSUBR (Fsubsidiary_coding_system);
5585 5421
5586 DEFSUBR (Fcoding_system_type); 5422 DEFSUBR (Fcoding_system_type);
5587 DEFSUBR (Fcoding_system_doc_string); 5423 DEFSUBR (Fcoding_system_doc_string);
5607 DEFSUBR (Fset_ucs_char); 5443 DEFSUBR (Fset_ucs_char);
5608 DEFSUBR (Fucs_char); 5444 DEFSUBR (Fucs_char);
5609 DEFSUBR (Fset_char_ucs); 5445 DEFSUBR (Fset_char_ucs);
5610 DEFSUBR (Fchar_ucs); 5446 DEFSUBR (Fchar_ucs);
5611 #endif /* MULE */ 5447 #endif /* MULE */
5612 defsymbol (&Qcoding_systemp, "coding-system-p"); 5448 defsymbol (&Qcoding_system_p, "coding-system-p");
5613 defsymbol (&Qno_conversion, "no-conversion"); 5449 defsymbol (&Qno_conversion, "no-conversion");
5614 defsymbol (&Qraw_text, "raw-text");
5615 #ifdef MULE 5450 #ifdef MULE
5616 defsymbol (&Qbig5, "big5"); 5451 defsymbol (&Qbig5, "big5");
5617 defsymbol (&Qshift_jis, "shift-jis"); 5452 defsymbol (&Qshift_jis, "shift-jis");
5618 defsymbol (&Qucs4, "ucs-4"); 5453 defsymbol (&Qucs4, "ucs-4");
5619 defsymbol (&Qutf8, "utf-8"); 5454 defsymbol (&Qutf8, "utf-8");
5653 #endif /* MULE */ 5488 #endif /* MULE */
5654 defsymbol (&Qencode, "encode"); 5489 defsymbol (&Qencode, "encode");
5655 defsymbol (&Qdecode, "decode"); 5490 defsymbol (&Qdecode, "decode");
5656 5491
5657 #ifdef MULE 5492 #ifdef MULE
5493 defsymbol (&Qctext, "ctext");
5658 defsymbol (&coding_category_symbol[CODING_CATEGORY_SHIFT_JIS], 5494 defsymbol (&coding_category_symbol[CODING_CATEGORY_SHIFT_JIS],
5659 "shift-jis"); 5495 "shift-jis");
5660 defsymbol (&coding_category_symbol[CODING_CATEGORY_BIG5], 5496 defsymbol (&coding_category_symbol[CODING_CATEGORY_BIG5],
5661 "big5"); 5497 "big5");
5662 defsymbol (&coding_category_symbol[CODING_CATEGORY_UCS4], 5498 defsymbol (&coding_category_symbol[CODING_CATEGORY_UCS4],
5677 defsymbol (&coding_category_symbol[CODING_CATEGORY_NO_CONVERSION], 5513 defsymbol (&coding_category_symbol[CODING_CATEGORY_NO_CONVERSION],
5678 "no-conversion"); 5514 "no-conversion");
5679 } 5515 }
5680 5516
5681 void 5517 void
5682 lstream_type_create_file_coding (void) 5518 lstream_type_create_mule_coding (void)
5683 { 5519 {
5684 LSTREAM_HAS_METHOD (decoding, reader); 5520 LSTREAM_HAS_METHOD (decoding, reader);
5685 LSTREAM_HAS_METHOD (decoding, writer); 5521 LSTREAM_HAS_METHOD (decoding, writer);
5686 LSTREAM_HAS_METHOD (decoding, rewinder); 5522 LSTREAM_HAS_METHOD (decoding, rewinder);
5687 LSTREAM_HAS_METHOD (decoding, seekable_p); 5523 LSTREAM_HAS_METHOD (decoding, seekable_p);
5697 LSTREAM_HAS_METHOD (encoding, closer); 5533 LSTREAM_HAS_METHOD (encoding, closer);
5698 LSTREAM_HAS_METHOD (encoding, marker); 5534 LSTREAM_HAS_METHOD (encoding, marker);
5699 } 5535 }
5700 5536
5701 void 5537 void
5702 vars_of_file_coding (void) 5538 vars_of_mule_coding (void)
5703 { 5539 {
5704 int i; 5540 int i;
5705
5706 fcd = xnew (struct file_coding_dump);
5707 dumpstruct (&fcd, &fcd_description);
5708 5541
5709 /* Initialize to something reasonable ... */ 5542 /* Initialize to something reasonable ... */
5710 for (i = 0; i <= CODING_CATEGORY_LAST; i++) 5543 for (i = 0; i <= CODING_CATEGORY_LAST; i++)
5711 { 5544 {
5712 fcd->coding_category_system[i] = Qnil; 5545 coding_category_system[i] = Qnil;
5713 fcd->coding_category_by_priority[i] = i; 5546 coding_category_by_priority[i] = i;
5714 } 5547 }
5715 5548
5716 Fprovide (intern ("file-coding")); 5549 Fprovide (intern ("file-coding"));
5717 5550
5718 DEFVAR_LISP ("keyboard-coding-system", &Vkeyboard_coding_system /* 5551 DEFVAR_LISP ("keyboard-coding-system", &Vkeyboard_coding_system /*
5726 Not used under a windowing system. 5559 Not used under a windowing system.
5727 */ ); 5560 */ );
5728 Vterminal_coding_system = Qnil; 5561 Vterminal_coding_system = Qnil;
5729 5562
5730 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read /* 5563 DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read /*
5731 Overriding coding system used when reading from a file or process. 5564 Overriding coding system used when writing a file or process.
5732 You should bind this variable with `let', but do not set it globally. 5565 You should *bind* this, not set it. If this is non-nil, it specifies
5733 If this is non-nil, it specifies the coding system that will be used 5566 the coding system that will be used when a file or process is read
5734 to decode input on read operations, such as from a file or process. 5567 in, and overrides `buffer-file-coding-system-for-read',
5735 It overrides `buffer-file-coding-system-for-read',
5736 `insert-file-contents-pre-hook', etc. Use those variables instead of 5568 `insert-file-contents-pre-hook', etc. Use those variables instead of
5737 this one for permanent changes to the environment. */ ); 5569 this one for permanent changes to the environment.
5570 */ );
5738 Vcoding_system_for_read = Qnil; 5571 Vcoding_system_for_read = Qnil;
5739 5572
5740 DEFVAR_LISP ("coding-system-for-write", 5573 DEFVAR_LISP ("coding-system-for-write",
5741 &Vcoding_system_for_write /* 5574 &Vcoding_system_for_write /*
5742 Overriding coding system used when writing to a file or process. 5575 Overriding coding system used when writing a file or process.
5743 You should bind this variable with `let', but do not set it globally. 5576 You should *bind* this, not set it. If this is non-nil, it specifies
5744 If this is non-nil, it specifies the coding system that will be used 5577 the coding system that will be used when a file or process is wrote
5745 to encode output for write operations, such as to a file or process. 5578 in, and overrides `buffer-file-coding-system',
5746 It overrides `buffer-file-coding-system', `write-region-pre-hook', etc. 5579 `write-region-pre-hook', etc. Use those variables instead of this one
5747 Use those variables instead of this one for permanent changes to the 5580 for permanent changes to the environment.
5748 environment. */ ); 5581 */ );
5749 Vcoding_system_for_write = Qnil; 5582 Vcoding_system_for_write = Qnil;
5750 5583
5751 DEFVAR_LISP ("file-name-coding-system", &Vfile_name_coding_system /* 5584 DEFVAR_LISP ("file-name-coding-system", &Vfile_name_coding_system /*
5752 Coding system used to convert pathnames when accessing files. 5585 Coding system used to convert pathnames when accessing files.
5753 */ ); 5586 */ );
5762 */ ); 5595 */ );
5763 enable_multibyte_characters = 1; 5596 enable_multibyte_characters = 1;
5764 } 5597 }
5765 5598
5766 void 5599 void
5767 complex_vars_of_file_coding (void) 5600 complex_vars_of_mule_coding (void)
5768 { 5601 {
5769 staticpro (&Vcoding_system_hash_table); 5602 staticpro (&Vcoding_system_hash_table);
5770 Vcoding_system_hash_table = 5603 Vcoding_system_hash_table =
5771 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ); 5604 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
5772 5605
5773 the_codesys_prop_dynarr = Dynarr_new (codesys_prop); 5606 the_codesys_prop_dynarr = Dynarr_new (codesys_prop);
5774 dumpstruct (&the_codesys_prop_dynarr, &codesys_prop_dynarr_description);
5775 5607
5776 #define DEFINE_CODESYS_PROP(Prop_Type, Sym) do \ 5608 #define DEFINE_CODESYS_PROP(Prop_Type, Sym) do \
5777 { \ 5609 { \
5778 struct codesys_prop csp; \ 5610 struct codesys_prop csp; \
5779 csp.sym = (Sym); \ 5611 csp.sym = (Sym); \
5809 5641
5810 DEFINE_CODESYS_PROP (CODESYS_PROP_CCL, Qencode); 5642 DEFINE_CODESYS_PROP (CODESYS_PROP_CCL, Qencode);
5811 DEFINE_CODESYS_PROP (CODESYS_PROP_CCL, Qdecode); 5643 DEFINE_CODESYS_PROP (CODESYS_PROP_CCL, Qdecode);
5812 #endif /* MULE */ 5644 #endif /* MULE */
5813 /* Need to create this here or we're really screwed. */ 5645 /* Need to create this here or we're really screwed. */
5814 Fmake_coding_system 5646 Fmake_coding_system (Qno_conversion, Qno_conversion, build_string ("No conversion"),
5815 (Qraw_text, Qno_conversion, 5647 list2 (Qmnemonic, build_string ("Noconv")));
5816 build_string ("Raw text, which means it converts only line-break-codes."), 5648
5817 list2 (Qmnemonic, build_string ("Raw"))); 5649 Fcopy_coding_system (Fcoding_system_property (Qno_conversion, Qeol_lf),
5818 5650 Qbinary);
5819 Fmake_coding_system
5820 (Qbinary, Qno_conversion,
5821 build_string ("Binary, which means it does not convert anything."),
5822 list4 (Qeol_type, Qlf,
5823 Qmnemonic, build_string ("Binary")));
5824
5825 Fdefine_coding_system_alias (Qno_conversion, Qraw_text);
5826
5827 Fdefine_coding_system_alias (Qfile_name, Qbinary);
5828
5829 Fdefine_coding_system_alias (Qterminal, Qbinary);
5830 Fdefine_coding_system_alias (Qkeyboard, Qbinary);
5831 5651
5832 /* Need this for bootstrapping */ 5652 /* Need this for bootstrapping */
5833 fcd->coding_category_system[CODING_CATEGORY_NO_CONVERSION] = 5653 coding_category_system[CODING_CATEGORY_NO_CONVERSION] =
5834 Fget_coding_system (Qraw_text); 5654 Fget_coding_system (Qno_conversion);
5835 5655
5836 #ifdef MULE 5656 #ifdef MULE
5837 { 5657 {
5838 unsigned int i; 5658 unsigned int i;
5839 5659
5840 for (i = 0; i < 65536; i++) 5660 for (i = 0; i < 65536; i++)
5841 fcd->ucs_to_mule_table[i] = Qnil; 5661 ucs_to_mule_table[i] = Qnil;
5842 } 5662 }
5843 staticpro (&mule_to_ucs_table); 5663 staticpro (&mule_to_ucs_table);
5844 mule_to_ucs_table = Fmake_char_table(Qgeneric); 5664 mule_to_ucs_table = Fmake_char_table(Qgeneric);
5845 #endif /* MULE */ 5665 #endif /* MULE */
5846 } 5666 }