comparison src/mule-coding.c @ 179:9ad43877534d r20-3b16

Import from CVS: tag r20-3b16
author cvs
date Mon, 13 Aug 2007 09:52:19 +0200
parents 8eaf7971accc
children bfd6434d15b3
comparison
equal deleted inserted replaced
178:e703507b8a00 179:9ad43877534d
314 break; 314 break;
315 } 315 }
316 } 316 }
317 } 317 }
318 318
319 static int 319 static enum eol_type
320 symbol_to_eol_type (Lisp_Object symbol) 320 symbol_to_eol_type (Lisp_Object symbol)
321 { 321 {
322 CHECK_SYMBOL (symbol); 322 CHECK_SYMBOL (symbol);
323 if (NILP (symbol)) return EOL_AUTODETECT; 323 if (NILP (symbol)) return EOL_AUTODETECT;
324 else if (EQ (symbol, Qlf)) return EOL_LF; 324 if (EQ (symbol, Qlf)) return EOL_LF;
325 else if (EQ (symbol, Qcrlf)) return EOL_CRLF; 325 if (EQ (symbol, Qcrlf)) return EOL_CRLF;
326 else if (EQ (symbol, Qcr)) return EOL_CR; 326 if (EQ (symbol, Qcr)) return EOL_CR;
327 else 327
328 signal_simple_error ("Unrecognized eol type", symbol); 328 signal_simple_error ("Unrecognized eol type", symbol);
329 329 return EOL_AUTODETECT; /* not reached */
330 return 0; /* not reached */
331 } 330 }
332 331
333 static Lisp_Object 332 static Lisp_Object
334 eol_type_to_symbol (int eol_type) 333 eol_type_to_symbol (enum eol_type type)
335 { 334 {
336 switch (eol_type) 335 switch (type)
337 { 336 {
338 case EOL_LF: return Qlf; 337 case EOL_LF: return Qlf;
339 case EOL_CRLF: return Qcrlf; 338 case EOL_CRLF: return Qcrlf;
340 case EOL_CR: return Qcr; 339 case EOL_CR: return Qcr;
341 case EOL_AUTODETECT: return Qnil; 340 case EOL_AUTODETECT: return Qnil;
342 default: abort (); 341 default: abort (); return Qnil; /* not reached */
343 } 342 }
344
345 return Qnil; /* not reached */
346 } 343 }
347 344
348 static void 345 static void
349 setup_eol_coding_systems (struct Lisp_Coding_System *codesys) 346 setup_eol_coding_systems (struct Lisp_Coding_System *codesys)
350 { 347 {
481 coding_system = Fget_coding_system (coding_system); 478 coding_system = Fget_coding_system (coding_system);
482 return XCODING_SYSTEM_NAME (coding_system); 479 return XCODING_SYSTEM_NAME (coding_system);
483 } 480 }
484 481
485 static struct Lisp_Coding_System * 482 static struct Lisp_Coding_System *
486 allocate_coding_system (int type, Lisp_Object name) 483 allocate_coding_system (enum coding_system_type type, Lisp_Object name)
487 { 484 {
488 struct Lisp_Coding_System *codesys; 485 struct Lisp_Coding_System *codesys;
489 486
490 codesys = (struct Lisp_Coding_System *) 487 codesys = (struct Lisp_Coding_System *)
491 alloc_lcrecord (sizeof (struct Lisp_Coding_System), lrecord_coding_system); 488 alloc_lcrecord (sizeof (struct Lisp_Coding_System), lrecord_coding_system);
892 } 889 }
893 return new_coding_system; 890 return new_coding_system;
894 } 891 }
895 892
896 static Lisp_Object 893 static Lisp_Object
897 subsidiary_coding_system (Lisp_Object coding_system, int eol_type) 894 subsidiary_coding_system (Lisp_Object coding_system, enum eol_type type)
898 { 895 {
899 struct Lisp_Coding_System *cs = XCODING_SYSTEM (coding_system); 896 struct Lisp_Coding_System *cs = XCODING_SYSTEM (coding_system);
900 Lisp_Object new_coding_system; 897 Lisp_Object new_coding_system;
901 898
902 if (CODING_SYSTEM_EOL_TYPE (cs) != EOL_AUTODETECT) 899 if (CODING_SYSTEM_EOL_TYPE (cs) != EOL_AUTODETECT)
903 return coding_system; 900 return coding_system;
904 if (eol_type == EOL_AUTODETECT) 901
905 return coding_system; 902 switch (type)
906 903 {
907 switch (eol_type) 904 case EOL_AUTODETECT: return coding_system;
908 {
909 case EOL_LF: new_coding_system = CODING_SYSTEM_EOL_LF (cs); break; 905 case EOL_LF: new_coding_system = CODING_SYSTEM_EOL_LF (cs); break;
910 case EOL_CR: new_coding_system = CODING_SYSTEM_EOL_CR (cs); break; 906 case EOL_CR: new_coding_system = CODING_SYSTEM_EOL_CR (cs); break;
911 case EOL_CRLF: new_coding_system = CODING_SYSTEM_EOL_CRLF (cs); break; 907 case EOL_CRLF: new_coding_system = CODING_SYSTEM_EOL_CRLF (cs); break;
912 default: abort (); 908 default: abort ();
913 } 909 }
1208 /* Detecting the encoding of data */ 1204 /* Detecting the encoding of data */
1209 /************************************************************************/ 1205 /************************************************************************/
1210 1206
1211 struct detection_state 1207 struct detection_state
1212 { 1208 {
1213 int eol_type; 1209 enum eol_type eol_type;
1214 int seen_non_ascii; 1210 int seen_non_ascii;
1215 int mask; 1211 int mask;
1216 1212
1217 struct 1213 struct
1218 { 1214 {
1274 /* Perhaps the only thing useful you learn from intensive Microsoft 1270 /* Perhaps the only thing useful you learn from intensive Microsoft
1275 technical interviews */ 1271 technical interviews */
1276 return (mask & (mask - 1)) == 0; 1272 return (mask & (mask - 1)) == 0;
1277 } 1273 }
1278 1274
1279 static int 1275 static enum eol_type
1280 detect_eol_type (struct detection_state *st, CONST unsigned char *src, 1276 detect_eol_type (struct detection_state *st, CONST unsigned char *src,
1281 unsigned int n) 1277 unsigned int n)
1282 { 1278 {
1283 int c; 1279 int c;
1284 1280
1426 This function does not automatically fetch subsidiary coding systems; 1422 This function does not automatically fetch subsidiary coding systems;
1427 that should be unnecessary with the explicit eol-type argument. */ 1423 that should be unnecessary with the explicit eol-type argument. */
1428 1424
1429 static void 1425 static void
1430 determine_real_coding_system (Lstream *stream, Lisp_Object *codesys_in_out, 1426 determine_real_coding_system (Lstream *stream, Lisp_Object *codesys_in_out,
1431 int *eol_type_in_out) 1427 enum eol_type *eol_type_in_out)
1432 { 1428 {
1433 struct detection_state decst; 1429 struct detection_state decst;
1434 1430
1435 if (*eol_type_in_out == EOL_AUTODETECT) 1431 if (*eol_type_in_out == EOL_AUTODETECT)
1436 *eol_type_in_out = XCODING_SYSTEM_EOL_TYPE (*codesys_in_out); 1432 *eol_type_in_out = XCODING_SYSTEM_EOL_TYPE (*codesys_in_out);
1505 if (detect_coding_type (&decst, random_buffer, nread, 0)) 1501 if (detect_coding_type (&decst, random_buffer, nread, 0))
1506 break; 1502 break;
1507 } 1503 }
1508 1504
1509 if (decst.mask == ~0) 1505 if (decst.mask == ~0)
1510 { 1506 val = subsidiary_coding_system (Fget_coding_system (Qautomatic_conversion),
1511 val = subsidiary_coding_system (Fget_coding_system 1507 decst.eol_type);
1512 (Qautomatic_conversion),
1513 decst.eol_type);
1514 }
1515 else 1508 else
1516 { 1509 {
1517 int i; 1510 int i;
1518 1511
1519 val = Qnil; 1512 val = Qnil;
1643 /* EOL_TYPE specifies the type of end-of-line conversion that 1636 /* EOL_TYPE specifies the type of end-of-line conversion that
1644 currently applies. We need to keep this separate from the 1637 currently applies. We need to keep this separate from the
1645 EOL type stored in CODESYS because the latter might indicate 1638 EOL type stored in CODESYS because the latter might indicate
1646 automatic EOL-type detection while the former will always 1639 automatic EOL-type detection while the former will always
1647 indicate a particular EOL type. */ 1640 indicate a particular EOL type. */
1648 int eol_type; 1641 enum eol_type eol_type;
1649 1642
1650 /* Additional ISO2022 information. We define the structure above 1643 /* Additional ISO2022 information. We define the structure above
1651 because it's also needed by the detection routines. */ 1644 because it's also needed by the detection routines. */
1652 struct iso2022_decoder iso2022; 1645 struct iso2022_decoder iso2022;
1653 1646
3309 goto not_done; 3302 goto not_done;
3310 3303
3311 default: 3304 default:
3312 if (0x28 <= c && c <= 0x2F) 3305 if (0x28 <= c && c <= 0x2F)
3313 { 3306 {
3314 iso->esc = c - 0x28 + ISO_ESC_2_8; 3307 iso->esc = (enum iso_esc_flag) (c - 0x28 + ISO_ESC_2_8);
3315 goto not_done; 3308 goto not_done;
3316 } 3309 }
3317 3310
3318 /* This function is called with CODESYS equal to nil when 3311 /* This function is called with CODESYS equal to nil when
3319 doing coding-system detection. */ 3312 doing coding-system detection. */
3412 /**** designation ****/ 3405 /**** designation ****/
3413 3406
3414 case ISO_ESC_2_4: 3407 case ISO_ESC_2_4:
3415 if (0x28 <= c && c <= 0x2F) 3408 if (0x28 <= c && c <= 0x2F)
3416 { 3409 {
3417 iso->esc = c - 0x28 + ISO_ESC_2_4_8; 3410 iso->esc = (enum iso_esc_flag) (c - 0x28 + ISO_ESC_2_4_8);
3418 goto not_done; 3411 goto not_done;
3419 } 3412 }
3420 if (0x40 <= c && c <= 0x42) 3413 if (0x40 <= c && c <= 0x42)
3421 { 3414 {
3422 cs = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94X94, c, 3415 cs = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94X94, c,
3428 } 3421 }
3429 return 0; 3422 return 0;
3430 3423
3431 default: 3424 default:
3432 { 3425 {
3433 int type; 3426 int type =-1;
3434 3427
3435 if (c < '0' || c > '~') 3428 if (c < '0' || c > '~')
3436 return 0; /* bad final byte */ 3429 return 0; /* bad final byte */
3437 3430
3438 if (iso->esc >= ISO_ESC_2_8 && 3431 if (iso->esc >= ISO_ESC_2_8 &&
3446 iso->esc <= ISO_ESC_2_4_15) 3439 iso->esc <= ISO_ESC_2_4_15)
3447 { 3440 {
3448 type = ((iso->esc >= ISO_ESC_2_4_12) ? 3441 type = ((iso->esc >= ISO_ESC_2_4_12) ?
3449 CHARSET_TYPE_96X96 : CHARSET_TYPE_94X94); 3442 CHARSET_TYPE_96X96 : CHARSET_TYPE_94X94);
3450 reg = (iso->esc - ISO_ESC_2_4_8) & 3; 3443 reg = (iso->esc - ISO_ESC_2_4_8) & 3;
3444 }
3445 else
3446 {
3447 /* Can this ever be reached? -slb */
3448 abort();
3451 } 3449 }
3452 3450
3453 cs = CHARSET_BY_ATTRIBUTES (type, c, 3451 cs = CHARSET_BY_ATTRIBUTES (type, c,
3454 *flags & CODING_STATE_R2L ? 3452 *flags & CODING_STATE_R2L ?
3455 CHARSET_RIGHT_TO_LEFT : 3453 CHARSET_RIGHT_TO_LEFT :