Mercurial > hg > xemacs-beta
comparison src/mule-ccl.c @ 3439:d1754e7f0cea
[xemacs-hg @ 2006-06-03 17:50:39 by aidan]
Just-in-time Unicode code point support.
author | aidan |
---|---|
date | Sat, 03 Jun 2006 17:51:06 +0000 |
parents | d7505a1267a4 |
children | 551c008d3777 |
comparison
equal
deleted
inserted
replaced
3438:14fbcab7c67b | 3439:d1754e7f0cea |
---|---|
459 | 459 |
460 #define CCL_TranslateCharacterConstTbl 0x03 /* Translate a multibyte character | 460 #define CCL_TranslateCharacterConstTbl 0x03 /* Translate a multibyte character |
461 1:ExtendedCOMMNDRrrRRRrrrXXXXX | 461 1:ExtendedCOMMNDRrrRRRrrrXXXXX |
462 2:ARGUMENT(Translation Table ID) | 462 2:ARGUMENT(Translation Table ID) |
463 */ | 463 */ |
464 /* Translate a character whose code point is reg[rrr] and charset ID is | |
465 reg[RRR], into its Unicode code point, which will be written into | |
466 reg[rrr]. */ | |
467 | |
468 #define CCL_MuleToUnicode 0x04 | |
469 | |
470 /* Translate a Unicode code point, in reg[rrr], into a Mule character, | |
471 writing the charset ID into reg[RRR] and the code point into reg[Rrr]. */ | |
472 | |
473 #define CCL_UnicodeToMule 0x05 | |
464 | 474 |
465 /* Iterate looking up MAPs for reg[rrr] starting from the Nth (N = | 475 /* Iterate looking up MAPs for reg[rrr] starting from the Nth (N = |
466 reg[RRR]) MAP until some value is found. | 476 reg[RRR]) MAP until some value is found. |
467 | 477 |
468 Each MAP is a Lisp vector whose element is number, nil, t, or | 478 Each MAP is a Lisp vector whose element is number, nil, t, or |
575 M:SEPARATOR_x (< 0) | 585 M:SEPARATOR_x (< 0) |
576 M+1:MAP-ID_y | 586 M+1:MAP-ID_y |
577 ... | 587 ... |
578 N:SEPARATOR_z (< 0) | 588 N:SEPARATOR_z (< 0) |
579 */ | 589 */ |
580 | |
581 #define MAX_MAP_SET_LEVEL 30 | 590 #define MAX_MAP_SET_LEVEL 30 |
582 | 591 |
583 typedef struct | 592 typedef struct |
584 { | 593 { |
585 int rest_length; | 594 int rest_length; |
835 are not valid, set C to (CODE & 0xFF) because that is usually the | 844 are not valid, set C to (CODE & 0xFF) because that is usually the |
836 case that CCL_ReadMultibyteChar2 read an invalid code and it set | 845 case that CCL_ReadMultibyteChar2 read an invalid code and it set |
837 CODE to that invalid byte. */ | 846 CODE to that invalid byte. */ |
838 | 847 |
839 /* On XEmacs, TranslateCharacter is not supported. Thus, this | 848 /* On XEmacs, TranslateCharacter is not supported. Thus, this |
840 macro is not used. */ | 849 macro is only used in the MuleToUnicode transformation. */ |
841 #if 0 | |
842 #define CCL_MAKE_CHAR(charset, code, c) \ | 850 #define CCL_MAKE_CHAR(charset, code, c) \ |
843 do { \ | 851 do { \ |
844 if ((charset) == CHARSET_ASCII) \ | 852 if ((charset) == LEADING_BYTE_ASCII) \ |
845 (c) = (code) & 0xFF; \ | |
846 else if (CHARSET_DEFINED_P (charset) \ | |
847 && ((code) & 0x7F) >= 32 \ | |
848 && ((code) < 256 || ((code >> 7) & 0x7F) >= 32)) \ | |
849 { \ | 853 { \ |
850 int c1 = (code) & 0x7F, c2 = 0; \ | 854 c = (code) & 0xFF; \ |
855 } \ | |
856 else if ((charset) == LEADING_BYTE_CONTROL_1) \ | |
857 { \ | |
858 c = ((code) & 0xFF) - 0xA0; \ | |
859 } \ | |
860 else if (!NILP(charset_by_leading_byte(charset)) \ | |
861 && ((code) >= 32) \ | |
862 && ((code) < 256 || ((code >> 8) & 0x7F) >= 32)) \ | |
863 { \ | |
864 int c1, c2 = 0; \ | |
851 \ | 865 \ |
852 if ((code) >= 256) \ | 866 if ((code) < 256) \ |
853 c2 = c1, c1 = ((code) >> 7) & 0x7F; \ | 867 { \ |
854 (c) = make_ichar (charset, c1, c2); \ | 868 c1 = (code) & 0x7F; \ |
869 c2 = 0; \ | |
870 } \ | |
871 else \ | |
872 { \ | |
873 c1 = ((code) >> 8) & 0x7F; \ | |
874 c2 = (code) & 0x7F; \ | |
875 } \ | |
876 c = make_ichar (charset_by_leading_byte(charset), \ | |
877 c1, c2); \ | |
855 } \ | 878 } \ |
856 else \ | 879 else \ |
857 (c) = (code) & 0xFF; \ | 880 { \ |
858 } while (0) | 881 c = (code) & 0xFF; \ |
859 #endif | 882 } \ |
883 } while (0) | |
860 | 884 |
861 | 885 |
862 /* Execute CCL code on SRC_BYTES length text at SOURCE. The resulting | 886 /* Execute CCL code on SRC_BYTES length text at SOURCE. The resulting |
863 text goes to a place pointed by DESTINATION, the length of which | 887 text goes to a place pointed by DESTINATION, the length of which |
864 should not exceed DST_BYTES. The bytes actually processed is | 888 should not exceed DST_BYTES. The bytes actually processed is |
1390 | 1414 |
1391 break; | 1415 break; |
1392 | 1416 |
1393 case CCL_TranslateCharacter: | 1417 case CCL_TranslateCharacter: |
1394 #if 0 | 1418 #if 0 |
1395 /* XEmacs does not have translate_char, and its | 1419 /* XEmacs does not have translate_char, nor an |
1396 equivalent nor. We do nothing on this operation. */ | 1420 equivalent. We do nothing on this operation. */ |
1397 CCL_MAKE_CHAR (reg[RRR], reg[rrr], i); | 1421 CCL_MAKE_CHAR(reg[RRR], reg[rrr], op); |
1398 op = translate_char (GET_TRANSLATION_TABLE (reg[Rrr]), | 1422 op = translate_char (GET_TRANSLATION_TABLE (reg[Rrr]), |
1399 i, -1, 0, 0); | 1423 i, -1, 0, 0); |
1400 SPLIT_CHAR (op, reg[RRR], i, j); | 1424 SPLIT_CHAR (op, reg[RRR], i, j); |
1401 if (j != -1) | 1425 if (j != -1) |
1402 i = (i << 7) | j; | 1426 i = (i << 7) | j; |
1418 i = (i << 7) | j; | 1442 i = (i << 7) | j; |
1419 | 1443 |
1420 reg[rrr] = i; | 1444 reg[rrr] = i; |
1421 #endif | 1445 #endif |
1422 break; | 1446 break; |
1447 | |
1448 case CCL_MuleToUnicode: | |
1449 { | |
1450 Lisp_Object ucs; | |
1451 | |
1452 CCL_MAKE_CHAR(reg[rrr], reg[RRR], op); | |
1453 ucs = Fchar_to_unicode(make_char(op)); | |
1454 | |
1455 if (NILP(ucs)) | |
1456 { | |
1457 /* Uhh, char-to-unicode doesn't return nil at the | |
1458 moment, only ever -1. */ | |
1459 reg[rrr] = 0xFFFD; /* REPLACEMENT CHARACTER */ | |
1460 } | |
1461 else | |
1462 { | |
1463 reg[rrr] = XINT(ucs); | |
1464 if (-1 == reg[rrr]) | |
1465 { | |
1466 reg[rrr] = 0xFFFD; /* REPLACEMENT CHARACTER */ | |
1467 } | |
1468 } | |
1469 break; | |
1470 } | |
1471 | |
1472 case CCL_UnicodeToMule: | |
1473 { | |
1474 Lisp_Object scratch; | |
1475 | |
1476 scratch = Funicode_to_char(make_int(reg[rrr]), Qnil); | |
1477 | |
1478 if (!NILP(scratch)) | |
1479 { | |
1480 op = XCHAR(scratch); | |
1481 BREAKUP_ICHAR (op, scratch, i, j); | |
1482 reg[RRR] = XCHARSET_ID(scratch); | |
1483 | |
1484 if (j != 0) | |
1485 { | |
1486 i = (i << 8) | j; | |
1487 } | |
1488 | |
1489 reg[rrr] = i; | |
1490 } | |
1491 else | |
1492 { | |
1493 reg[rrr] = reg[RRR] = 0; | |
1494 } | |
1495 break; | |
1496 } | |
1423 | 1497 |
1424 case CCL_IterateMultipleMap: | 1498 case CCL_IterateMultipleMap: |
1425 { | 1499 { |
1426 Lisp_Object map, content, attrib, value; | 1500 Lisp_Object map, content, attrib, value; |
1427 int point, size, fin_ic; | 1501 int point, size, fin_ic; |