Mercurial > hg > xemacs-beta

diff src/mule-ccl.c @ 3439:d1754e7f0cea
[xemacs-hg @ 2006-06-03 17:50:39 by aidan] Just-in-time Unicode code point support.
author: aidan
date: Sat, 03 Jun 2006 17:51:06 +0000
parents: d7505a1267a4
children: 551c008d3777
--- a/src/mule-ccl.c	Fri Jun 02 22:18:08 2006 +0000
+++ b/src/mule-ccl.c	Sat Jun 03 17:51:06 2006 +0000
@@ -461,6 +461,16 @@
 					       1:ExtendedCOMMNDRrrRRRrrrXXXXX
 					       2:ARGUMENT(Translation Table ID)
 					    */
+/* Translate a character whose code point is reg[rrr] and charset ID is
+   reg[RRR], into its Unicode code point, which will be written into
+   reg[rrr]. */
+
+#define CCL_MuleToUnicode	0x04 
+
+/* Translate a Unicode code point, in reg[rrr], into a Mule character,
+   writing the charset ID into reg[RRR] and the code point into reg[Rrr]. */
+
+#define CCL_UnicodeToMule	0x05 
 
 /* Iterate looking up MAPs for reg[rrr] starting from the Nth (N =
    reg[RRR]) MAP until some value is found.
@@ -577,7 +587,6 @@
 					 ...
 					 N:SEPARATOR_z (< 0)
 				      */
-
 #define MAX_MAP_SET_LEVEL 30
 
 typedef struct
@@ -837,26 +846,41 @@
    CODE to that invalid byte.  */
 
 /* On XEmacs, TranslateCharacter is not supported.  Thus, this
-   macro is not used.  */
-#if 0
+   macro is only used in the MuleToUnicode transformation.  */
 #define CCL_MAKE_CHAR(charset, code, c)				\
   do {								\
-    if ((charset) == CHARSET_ASCII)				\
-      (c) = (code) & 0xFF;						\
-    else if (CHARSET_DEFINED_P (charset)			\
-	     && ((code) & 0x7F) >= 32				\
-	     && ((code) < 256 || ((code >> 7) & 0x7F) >= 32))	\
+    if ((charset) == LEADING_BYTE_ASCII)			\
+      {								\
+	c = (code) & 0xFF;					\
+      }								\
+    else if ((charset) == LEADING_BYTE_CONTROL_1)		\
+      {								\
+	c = ((code) & 0xFF) - 0xA0;				\
+      }								\
+    else if (!NILP(charset_by_leading_byte(charset))		\
+	     && ((code) >= 32)					\
+	     && ((code) < 256 || ((code >> 8) & 0x7F) >= 32))	\
       {								\
-	int c1 = (code) & 0x7F, c2 = 0;				\
+	int c1, c2 = 0;						\
 								\
-	if ((code) >= 256)					\
-	  c2 = c1, c1 = ((code) >> 7) & 0x7F;			\
-	(c) = make_ichar (charset, c1, c2);			\
+	if ((code) < 256)					\
+	  {							\
+	    c1 = (code) & 0x7F;					\
+	    c2 = 0;						\
+	  }							\
+	else							\
+	  {							\
+	    c1 = ((code) >> 8) & 0x7F;				\
+	    c2 = (code) & 0x7F;					\
+	  }							\
+	c = make_ichar (charset_by_leading_byte(charset),	\
+			  c1, c2);				\
       }								\
     else							\
-      (c) = (code) & 0xFF;						\
-  } while (0)
-#endif
+      {								\
+	c = (code) & 0xFF;					\
+      }								\
+  } while (0) 
 
 
 /* Execute CCL code on SRC_BYTES length text at SOURCE.  The resulting
@@ -1392,9 +1416,9 @@
 
 	    case CCL_TranslateCharacter:
 #if 0
-	      /* XEmacs does not have translate_char, and its
-		 equivalent nor.  We do nothing on this operation. */
-	      CCL_MAKE_CHAR (reg[RRR], reg[rrr], i);
+	      /* XEmacs does not have translate_char, nor an
+		 equivalent.  We do nothing on this operation. */
+	      CCL_MAKE_CHAR(reg[RRR], reg[rrr], op);
 	      op = translate_char (GET_TRANSLATION_TABLE (reg[Rrr]),
 				   i, -1, 0, 0);
 	      SPLIT_CHAR (op, reg[RRR], i, j);
@@ -1421,6 +1445,56 @@
 #endif
 	      break;
 
+	    case CCL_MuleToUnicode:
+	      {
+		Lisp_Object ucs;
+
+		CCL_MAKE_CHAR(reg[rrr], reg[RRR], op);
+		ucs = Fchar_to_unicode(make_char(op));
+
+		if (NILP(ucs))
+		  {
+		    /* Uhh, char-to-unicode doesn't return nil at the
+		       moment, only ever -1. */
+		    reg[rrr] = 0xFFFD; /* REPLACEMENT CHARACTER */
+		  }
+		else
+		  {
+		    reg[rrr] = XINT(ucs);
+		    if (-1 == reg[rrr])
+		      {
+			reg[rrr] = 0xFFFD; /* REPLACEMENT CHARACTER */
+		      }
+		  }
+		break;
+	      }
+
+	    case CCL_UnicodeToMule:
+	      {
+		Lisp_Object scratch;
+
+		scratch = Funicode_to_char(make_int(reg[rrr]), Qnil);
+
+		if (!NILP(scratch))
+		  {
+		    op = XCHAR(scratch);
+		    BREAKUP_ICHAR (op, scratch, i, j);
+		    reg[RRR] = XCHARSET_ID(scratch);
+
+		    if (j != 0)
+		      {
+			i = (i << 8) | j;
+		      }
+
+		    reg[rrr] = i;
+		  }
+		else 
+		  {
+		    reg[rrr] = reg[RRR] = 0;
+		  }
+		break;
+	      }
+
 	    case CCL_IterateMultipleMap:
 	      {
 		Lisp_Object map, content, attrib, value;
author	aidan
date	Sat, 03 Jun 2006 17:51:06 +0000
parents	d7505a1267a4
children	551c008d3777