Mercurial > hg > xemacs-beta
comparison src/mule-coding.c @ 179:9ad43877534d r20-3b16
Import from CVS: tag r20-3b16
author | cvs |
---|---|
date | Mon, 13 Aug 2007 09:52:19 +0200 |
parents | 8eaf7971accc |
children | bfd6434d15b3 |
comparison
equal
deleted
inserted
replaced
178:e703507b8a00 | 179:9ad43877534d |
---|---|
314 break; | 314 break; |
315 } | 315 } |
316 } | 316 } |
317 } | 317 } |
318 | 318 |
319 static int | 319 static enum eol_type |
320 symbol_to_eol_type (Lisp_Object symbol) | 320 symbol_to_eol_type (Lisp_Object symbol) |
321 { | 321 { |
322 CHECK_SYMBOL (symbol); | 322 CHECK_SYMBOL (symbol); |
323 if (NILP (symbol)) return EOL_AUTODETECT; | 323 if (NILP (symbol)) return EOL_AUTODETECT; |
324 else if (EQ (symbol, Qlf)) return EOL_LF; | 324 if (EQ (symbol, Qlf)) return EOL_LF; |
325 else if (EQ (symbol, Qcrlf)) return EOL_CRLF; | 325 if (EQ (symbol, Qcrlf)) return EOL_CRLF; |
326 else if (EQ (symbol, Qcr)) return EOL_CR; | 326 if (EQ (symbol, Qcr)) return EOL_CR; |
327 else | 327 |
328 signal_simple_error ("Unrecognized eol type", symbol); | 328 signal_simple_error ("Unrecognized eol type", symbol); |
329 | 329 return EOL_AUTODETECT; /* not reached */ |
330 return 0; /* not reached */ | |
331 } | 330 } |
332 | 331 |
333 static Lisp_Object | 332 static Lisp_Object |
334 eol_type_to_symbol (int eol_type) | 333 eol_type_to_symbol (enum eol_type type) |
335 { | 334 { |
336 switch (eol_type) | 335 switch (type) |
337 { | 336 { |
338 case EOL_LF: return Qlf; | 337 case EOL_LF: return Qlf; |
339 case EOL_CRLF: return Qcrlf; | 338 case EOL_CRLF: return Qcrlf; |
340 case EOL_CR: return Qcr; | 339 case EOL_CR: return Qcr; |
341 case EOL_AUTODETECT: return Qnil; | 340 case EOL_AUTODETECT: return Qnil; |
342 default: abort (); | 341 default: abort (); return Qnil; /* not reached */ |
343 } | 342 } |
344 | |
345 return Qnil; /* not reached */ | |
346 } | 343 } |
347 | 344 |
348 static void | 345 static void |
349 setup_eol_coding_systems (struct Lisp_Coding_System *codesys) | 346 setup_eol_coding_systems (struct Lisp_Coding_System *codesys) |
350 { | 347 { |
481 coding_system = Fget_coding_system (coding_system); | 478 coding_system = Fget_coding_system (coding_system); |
482 return XCODING_SYSTEM_NAME (coding_system); | 479 return XCODING_SYSTEM_NAME (coding_system); |
483 } | 480 } |
484 | 481 |
485 static struct Lisp_Coding_System * | 482 static struct Lisp_Coding_System * |
486 allocate_coding_system (int type, Lisp_Object name) | 483 allocate_coding_system (enum coding_system_type type, Lisp_Object name) |
487 { | 484 { |
488 struct Lisp_Coding_System *codesys; | 485 struct Lisp_Coding_System *codesys; |
489 | 486 |
490 codesys = (struct Lisp_Coding_System *) | 487 codesys = (struct Lisp_Coding_System *) |
491 alloc_lcrecord (sizeof (struct Lisp_Coding_System), lrecord_coding_system); | 488 alloc_lcrecord (sizeof (struct Lisp_Coding_System), lrecord_coding_system); |
892 } | 889 } |
893 return new_coding_system; | 890 return new_coding_system; |
894 } | 891 } |
895 | 892 |
896 static Lisp_Object | 893 static Lisp_Object |
897 subsidiary_coding_system (Lisp_Object coding_system, int eol_type) | 894 subsidiary_coding_system (Lisp_Object coding_system, enum eol_type type) |
898 { | 895 { |
899 struct Lisp_Coding_System *cs = XCODING_SYSTEM (coding_system); | 896 struct Lisp_Coding_System *cs = XCODING_SYSTEM (coding_system); |
900 Lisp_Object new_coding_system; | 897 Lisp_Object new_coding_system; |
901 | 898 |
902 if (CODING_SYSTEM_EOL_TYPE (cs) != EOL_AUTODETECT) | 899 if (CODING_SYSTEM_EOL_TYPE (cs) != EOL_AUTODETECT) |
903 return coding_system; | 900 return coding_system; |
904 if (eol_type == EOL_AUTODETECT) | 901 |
905 return coding_system; | 902 switch (type) |
906 | 903 { |
907 switch (eol_type) | 904 case EOL_AUTODETECT: return coding_system; |
908 { | |
909 case EOL_LF: new_coding_system = CODING_SYSTEM_EOL_LF (cs); break; | 905 case EOL_LF: new_coding_system = CODING_SYSTEM_EOL_LF (cs); break; |
910 case EOL_CR: new_coding_system = CODING_SYSTEM_EOL_CR (cs); break; | 906 case EOL_CR: new_coding_system = CODING_SYSTEM_EOL_CR (cs); break; |
911 case EOL_CRLF: new_coding_system = CODING_SYSTEM_EOL_CRLF (cs); break; | 907 case EOL_CRLF: new_coding_system = CODING_SYSTEM_EOL_CRLF (cs); break; |
912 default: abort (); | 908 default: abort (); |
913 } | 909 } |
1208 /* Detecting the encoding of data */ | 1204 /* Detecting the encoding of data */ |
1209 /************************************************************************/ | 1205 /************************************************************************/ |
1210 | 1206 |
1211 struct detection_state | 1207 struct detection_state |
1212 { | 1208 { |
1213 int eol_type; | 1209 enum eol_type eol_type; |
1214 int seen_non_ascii; | 1210 int seen_non_ascii; |
1215 int mask; | 1211 int mask; |
1216 | 1212 |
1217 struct | 1213 struct |
1218 { | 1214 { |
1274 /* Perhaps the only thing useful you learn from intensive Microsoft | 1270 /* Perhaps the only thing useful you learn from intensive Microsoft |
1275 technical interviews */ | 1271 technical interviews */ |
1276 return (mask & (mask - 1)) == 0; | 1272 return (mask & (mask - 1)) == 0; |
1277 } | 1273 } |
1278 | 1274 |
1279 static int | 1275 static enum eol_type |
1280 detect_eol_type (struct detection_state *st, CONST unsigned char *src, | 1276 detect_eol_type (struct detection_state *st, CONST unsigned char *src, |
1281 unsigned int n) | 1277 unsigned int n) |
1282 { | 1278 { |
1283 int c; | 1279 int c; |
1284 | 1280 |
1426 This function does not automatically fetch subsidiary coding systems; | 1422 This function does not automatically fetch subsidiary coding systems; |
1427 that should be unnecessary with the explicit eol-type argument. */ | 1423 that should be unnecessary with the explicit eol-type argument. */ |
1428 | 1424 |
1429 static void | 1425 static void |
1430 determine_real_coding_system (Lstream *stream, Lisp_Object *codesys_in_out, | 1426 determine_real_coding_system (Lstream *stream, Lisp_Object *codesys_in_out, |
1431 int *eol_type_in_out) | 1427 enum eol_type *eol_type_in_out) |
1432 { | 1428 { |
1433 struct detection_state decst; | 1429 struct detection_state decst; |
1434 | 1430 |
1435 if (*eol_type_in_out == EOL_AUTODETECT) | 1431 if (*eol_type_in_out == EOL_AUTODETECT) |
1436 *eol_type_in_out = XCODING_SYSTEM_EOL_TYPE (*codesys_in_out); | 1432 *eol_type_in_out = XCODING_SYSTEM_EOL_TYPE (*codesys_in_out); |
1505 if (detect_coding_type (&decst, random_buffer, nread, 0)) | 1501 if (detect_coding_type (&decst, random_buffer, nread, 0)) |
1506 break; | 1502 break; |
1507 } | 1503 } |
1508 | 1504 |
1509 if (decst.mask == ~0) | 1505 if (decst.mask == ~0) |
1510 { | 1506 val = subsidiary_coding_system (Fget_coding_system (Qautomatic_conversion), |
1511 val = subsidiary_coding_system (Fget_coding_system | 1507 decst.eol_type); |
1512 (Qautomatic_conversion), | |
1513 decst.eol_type); | |
1514 } | |
1515 else | 1508 else |
1516 { | 1509 { |
1517 int i; | 1510 int i; |
1518 | 1511 |
1519 val = Qnil; | 1512 val = Qnil; |
1643 /* EOL_TYPE specifies the type of end-of-line conversion that | 1636 /* EOL_TYPE specifies the type of end-of-line conversion that |
1644 currently applies. We need to keep this separate from the | 1637 currently applies. We need to keep this separate from the |
1645 EOL type stored in CODESYS because the latter might indicate | 1638 EOL type stored in CODESYS because the latter might indicate |
1646 automatic EOL-type detection while the former will always | 1639 automatic EOL-type detection while the former will always |
1647 indicate a particular EOL type. */ | 1640 indicate a particular EOL type. */ |
1648 int eol_type; | 1641 enum eol_type eol_type; |
1649 | 1642 |
1650 /* Additional ISO2022 information. We define the structure above | 1643 /* Additional ISO2022 information. We define the structure above |
1651 because it's also needed by the detection routines. */ | 1644 because it's also needed by the detection routines. */ |
1652 struct iso2022_decoder iso2022; | 1645 struct iso2022_decoder iso2022; |
1653 | 1646 |
3309 goto not_done; | 3302 goto not_done; |
3310 | 3303 |
3311 default: | 3304 default: |
3312 if (0x28 <= c && c <= 0x2F) | 3305 if (0x28 <= c && c <= 0x2F) |
3313 { | 3306 { |
3314 iso->esc = c - 0x28 + ISO_ESC_2_8; | 3307 iso->esc = (enum iso_esc_flag) (c - 0x28 + ISO_ESC_2_8); |
3315 goto not_done; | 3308 goto not_done; |
3316 } | 3309 } |
3317 | 3310 |
3318 /* This function is called with CODESYS equal to nil when | 3311 /* This function is called with CODESYS equal to nil when |
3319 doing coding-system detection. */ | 3312 doing coding-system detection. */ |
3412 /**** designation ****/ | 3405 /**** designation ****/ |
3413 | 3406 |
3414 case ISO_ESC_2_4: | 3407 case ISO_ESC_2_4: |
3415 if (0x28 <= c && c <= 0x2F) | 3408 if (0x28 <= c && c <= 0x2F) |
3416 { | 3409 { |
3417 iso->esc = c - 0x28 + ISO_ESC_2_4_8; | 3410 iso->esc = (enum iso_esc_flag) (c - 0x28 + ISO_ESC_2_4_8); |
3418 goto not_done; | 3411 goto not_done; |
3419 } | 3412 } |
3420 if (0x40 <= c && c <= 0x42) | 3413 if (0x40 <= c && c <= 0x42) |
3421 { | 3414 { |
3422 cs = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94X94, c, | 3415 cs = CHARSET_BY_ATTRIBUTES (CHARSET_TYPE_94X94, c, |
3428 } | 3421 } |
3429 return 0; | 3422 return 0; |
3430 | 3423 |
3431 default: | 3424 default: |
3432 { | 3425 { |
3433 int type; | 3426 int type =-1; |
3434 | 3427 |
3435 if (c < '0' || c > '~') | 3428 if (c < '0' || c > '~') |
3436 return 0; /* bad final byte */ | 3429 return 0; /* bad final byte */ |
3437 | 3430 |
3438 if (iso->esc >= ISO_ESC_2_8 && | 3431 if (iso->esc >= ISO_ESC_2_8 && |
3446 iso->esc <= ISO_ESC_2_4_15) | 3439 iso->esc <= ISO_ESC_2_4_15) |
3447 { | 3440 { |
3448 type = ((iso->esc >= ISO_ESC_2_4_12) ? | 3441 type = ((iso->esc >= ISO_ESC_2_4_12) ? |
3449 CHARSET_TYPE_96X96 : CHARSET_TYPE_94X94); | 3442 CHARSET_TYPE_96X96 : CHARSET_TYPE_94X94); |
3450 reg = (iso->esc - ISO_ESC_2_4_8) & 3; | 3443 reg = (iso->esc - ISO_ESC_2_4_8) & 3; |
3444 } | |
3445 else | |
3446 { | |
3447 /* Can this ever be reached? -slb */ | |
3448 abort(); | |
3451 } | 3449 } |
3452 | 3450 |
3453 cs = CHARSET_BY_ATTRIBUTES (type, c, | 3451 cs = CHARSET_BY_ATTRIBUTES (type, c, |
3454 *flags & CODING_STATE_R2L ? | 3452 *flags & CODING_STATE_R2L ? |
3455 CHARSET_RIGHT_TO_LEFT : | 3453 CHARSET_RIGHT_TO_LEFT : |