diff src/mule-ccl.c @ 4072:aa28d959af41

[xemacs-hg @ 2007-07-22 22:03:29 by aidan] Add support for non-ISO2022 8 bit fixed-width coding-systems
author aidan
date Sun, 22 Jul 2007 22:04:14 +0000
parents d6a215ad08b8
children 6f05405e63fc
line wrap: on
line diff
--- a/src/mule-ccl.c	Sun Jul 22 21:53:08 2007 +0000
+++ b/src/mule-ccl.c	Sun Jul 22 22:04:14 2007 +0000
@@ -29,15 +29,13 @@
 #include "charset.h"
 #include "mule-ccl.h"
 #include "file-coding.h"
+#include "elhash.h"
 
 Lisp_Object Qccl_error;
 
 /* This contains all code conversion map available to CCL.  */
 Lisp_Object Vcode_conversion_map_vector;
 
-/* Alist of fontname patterns vs corresponding CCL program.  */
-Lisp_Object Vfont_ccl_encoder_alist;
-
 /* This symbol is a property which associates with ccl program vector.
    Ex: (get 'ccl-big5-encoder 'ccl-program) returns ccl program vector. 
    Moved to general-slots.h. */
@@ -59,6 +57,15 @@
    already resolved to index numbers or not.  */
 Lisp_Object Vccl_program_table;
 
+/* Vector of registered hash tables for translation.  */
+Lisp_Object Vtranslation_hash_table_vector;
+
+/* Return a hash table of id number ID.  */
+#define GET_HASH_TABLE(id) \
+  (XHASH_TABLE (XCDR(XVECTOR(Vtranslation_hash_table_vector)->contents[(id)])))
+/* Copied from fns.c.  */
+#define HASH_VALUE(H, IDX) AREF ((H)->key_and_value, 2 * (IDX) + 1)
+
 /* CCL (Code Conversion Language) is a simple language which has
    operations on one input buffer, one output buffer, and 7 registers.
    The syntax of CCL is described in `ccl.el'.  Emacs Lisp function
@@ -651,6 +658,19 @@
 					  set reg[RRR] to -1.
 				     */
 
+#define CCL_LookupIntConstTbl 0x13 /* Lookup multibyte character by
+				      integer key.  Afterwards R7 set
+				      to 1 iff lookup succeeded.
+				      1:ExtendedCOMMNDRrrRRRXXXXXXXX
+				      2:ARGUMENT(Hash table ID) */
+
+#define CCL_LookupCharConstTbl 0x14 /* Lookup integer by multibyte
+				       character key.  Afterwards R7 set
+				       to 1 iff lookup succeeded.
+				       1:ExtendedCOMMNDRrrRRRrrrXXXXX
+				       2:ARGUMENT(Hash table ID) */
+
+
 /* CCL arithmetic/logical operators. */
 #define CCL_PLUS	0x00	/* X = Y + Z */
 #define CCL_MINUS	0x01	/* X = Y - Z */
@@ -773,7 +793,7 @@
       {								\
 	for (i = 0; i < (len); i++)				\
 	  {							\
-	    ch = ((XINT (ccl_prog[ic + (i / 3)]))		\
+	    ch = ((XCHAR_OR_INT (ccl_prog[ic + (i / 3)]))       \
 		  >> ((2 - (i % 3)) * 8)) & 0xFF;		\
 	    if (ch == '\n')					\
 	      {							\
@@ -802,7 +822,7 @@
       {								\
 	for (i = 0; i < (len); i++)				\
 	  {							\
-	    ch = ((XINT (ccl_prog[ic + (i / 3)]))		\
+	    ch = ((XCHAR_OR_INT (ccl_prog[ic + (i / 3)]))       \
 		  >> ((2 - (i % 3)) * 8)) & 0xFF;		\
 	    if (!ichar_multibyte_p(ch))				\
 	      {							\
@@ -837,7 +857,7 @@
   } while (0)
 
 #define POSSIBLE_LEADING_BYTE_P(leading_byte) \
-  ((leading_byte > MIN_LEADING_BYTE) && \
+  ((leading_byte >= MIN_LEADING_BYTE) && \
    (leading_byte - MIN_LEADING_BYTE) < NUM_LEADING_BYTES)
 
 /* Set C to the character code made from CHARSET and CODE.  This is
@@ -864,7 +884,7 @@
       }								\
     else if (!NILP(charset_by_leading_byte(charset))		\
 	     && ((code) >= 32)					\
-	     && ((code) < 256 || ((code >> 8) & 0x7F) >= 32))	\
+	     && ((code) < 256 || ((code >> 7) & 0x7F) >= 32))	\
       {								\
 	int c1, c2 = 0;						\
 								\
@@ -875,7 +895,7 @@
 	  }							\
 	else							\
 	  {							\
-	    c1 = ((code) >> 8) & 0x7F;				\
+	    c1 = ((code) >> 7) & 0x7F;				\
 	    c2 = (code) & 0x7F;					\
 	  }							\
 	c = make_ichar (charset_by_leading_byte(charset),	\
@@ -898,7 +918,7 @@
 
 #ifdef CCL_DEBUG
 #define CCL_DEBUG_BACKTRACE_LEN 256
-int ccl_backtrace_table[CCL_BACKTRACE_TABLE];
+int ccl_backtrace_table[CCL_DEBUG_BACKTRACE_LEN];
 int ccl_backtrace_idx;
 #endif
 
@@ -966,7 +986,7 @@
 	}
 
       this_ic = ic;
-      code = XINT (ccl_prog[ic]); ic++;
+      code = XCHAR_OR_INT (ccl_prog[ic]); ic++;
       field1 = code >> 8;
       field2 = (code & 0xFF) >> 5;
 
@@ -987,7 +1007,7 @@
 	  break;
 
 	case CCL_SetConst:	/* 00000000000000000000rrrXXXXX */
-	  reg[rrr] = XINT (ccl_prog[ic]);
+	  reg[rrr] = XCHAR_OR_INT (ccl_prog[ic]);
 	  ic++;
 	  break;
 
@@ -998,7 +1018,7 @@
 	     but the left one was already there so clearly the intention
 	     was an unsigned comparison. --ben */
 	  if ((unsigned int) i < (unsigned int) j)
-	    reg[rrr] = XINT (ccl_prog[ic + i]);
+	    reg[rrr] = XCHAR_OR_INT (ccl_prog[ic + i]);
 	  ic += j;
 	  break;
 
@@ -1026,13 +1046,13 @@
 	  break;
 
 	case CCL_WriteConstJump: /* A--D--D--R--E--S--S-000XXXXX */
-	  i = XINT (ccl_prog[ic]);
+	  i = XCHAR_OR_INT (ccl_prog[ic]);
 	  CCL_WRITE_CHAR (i);
 	  ic += ADDR;
 	  break;
 
 	case CCL_WriteConstReadJump: /* A--D--D--R--E--S--S-rrrXXXXX */
-	  i = XINT (ccl_prog[ic]);
+	  i = XCHAR_OR_INT (ccl_prog[ic]);
 	  CCL_WRITE_CHAR (i);
 	  ic++;
 	  CCL_READ_CHAR (reg[rrr]);
@@ -1040,7 +1060,7 @@
 	  break;
 
 	case CCL_WriteStringJump: /* A--D--D--R--E--S--S-000XXXXX */
-	  j = XINT (ccl_prog[ic]);
+	  j = XCHAR_OR_INT (ccl_prog[ic]);
 	  ic++;
 	  CCL_WRITE_STRING (j);
 	  ic += ADDR - 1;
@@ -1048,11 +1068,11 @@
 
 	case CCL_WriteArrayReadJump: /* A--D--D--R--E--S--S-rrrXXXXX */
 	  i = reg[rrr];
-	  j = XINT (ccl_prog[ic]);
+	  j = XCHAR_OR_INT (ccl_prog[ic]);
 	  /* #### see comment at CCL_SetArray */
 	  if ((unsigned int) i < (unsigned int) j)
 	    {
-	      i = XINT (ccl_prog[ic + 1 + i]);
+	      i = XCHAR_OR_INT (ccl_prog[ic + 1 + i]);
 	      CCL_WRITE_CHAR (i);
 	    }
 	  ic += j + 2;
@@ -1071,9 +1091,9 @@
 	case CCL_Branch:	/* CCCCCCCCCCCCCCCCCCCCrrrXXXXX */
 	  /* #### see comment at CCL_SetArray */
 	  if ((unsigned int) reg[rrr] < (unsigned int) field1)
-	    ic += XINT (ccl_prog[ic + reg[rrr]]);
+	    ic += XCHAR_OR_INT (ccl_prog[ic + reg[rrr]]);
 	  else
-	    ic += XINT (ccl_prog[ic + field1]);
+	    ic += XCHAR_OR_INT (ccl_prog[ic + field1]);
 	  break;
 
 	case CCL_ReadRegister:	/* CCCCCCCCCCCCCCCCCCCCrrXXXXX */
@@ -1081,7 +1101,7 @@
 	    {
 	      CCL_READ_CHAR (reg[rrr]);
 	      if (!field1) break;
-	      code = XINT (ccl_prog[ic]); ic++;
+	      code = XCHAR_OR_INT (ccl_prog[ic]); ic++;
 	      field1 = code >> 8;
 	      field2 = (code & 0xFF) >> 5;
 	    }
@@ -1090,7 +1110,7 @@
 	case CCL_WriteExprConst:  /* 1:00000OPERATION000RRR000XXXXX */
 	  rrr = 7;
 	  i = reg[RRR];
-	  j = XINT (ccl_prog[ic]);
+	  j = XCHAR_OR_INT (ccl_prog[ic]);
 	  op = field1 >> 6;
 	  jump_address = ic + 1;
 	  goto ccl_set_expr;
@@ -1101,7 +1121,7 @@
 	      i = reg[rrr];
 	      CCL_WRITE_CHAR (i);
 	      if (!field1) break;
-	      code = XINT (ccl_prog[ic]); ic++;
+	      code = XCHAR_OR_INT (ccl_prog[ic]); ic++;
 	      field1 = code >> 8;
 	      field2 = (code & 0xFF) >> 5;
 	    }
@@ -1124,7 +1144,7 @@
                following code.  */
 	    if (rrr)
 	      {
-		prog_id = XINT (ccl_prog[ic]);
+		prog_id = XCHAR_OR_INT (ccl_prog[ic]);
 		ic++;
 	      }
 	    else
@@ -1168,7 +1188,7 @@
 	  /* #### see comment at CCL_SetArray */
 	  if ((unsigned int) i < (unsigned int) field1)
 	    {
-	      j = XINT (ccl_prog[ic + i]);
+	      j = XCHAR_OR_INT (ccl_prog[ic + i]);
 	      CCL_WRITE_CHAR (j);
 	    }
 	  ic += field1;
@@ -1190,7 +1210,7 @@
 	  CCL_SUCCESS;
 
 	case CCL_ExprSelfConst: /* 00000OPERATION000000rrrXXXXX */
-	  i = XINT (ccl_prog[ic]);
+	  i = XCHAR_OR_INT (ccl_prog[ic]);
 	  ic++;
 	  op = field1 >> 6;
 	  goto ccl_expr_self;
@@ -1227,7 +1247,7 @@
 
 	case CCL_SetExprConst:	/* 00000OPERATION000RRRrrrXXXXX */
 	  i = reg[RRR];
-	  j = XINT (ccl_prog[ic]);
+	  j = XCHAR_OR_INT (ccl_prog[ic]);
 	  op = field1 >> 6;
 	  jump_address = ++ic;
 	  goto ccl_set_expr;
@@ -1243,9 +1263,9 @@
 	  CCL_READ_CHAR (reg[rrr]);
 	case CCL_JumpCondExprConst: /* A--D--D--R--E--S--S-rrrXXXXX */
 	  i = reg[rrr];
-	  op = XINT (ccl_prog[ic]);
+	  op = XCHAR_OR_INT (ccl_prog[ic]);
 	  jump_address = ic++ + ADDR;
-	  j = XINT (ccl_prog[ic]);
+	  j = XCHAR_OR_INT (ccl_prog[ic]);
 	  ic++;
 	  rrr = 7;
 	  goto ccl_set_expr;
@@ -1254,9 +1274,9 @@
 	  CCL_READ_CHAR (reg[rrr]);
 	case CCL_JumpCondExprReg:
 	  i = reg[rrr];
-	  op = XINT (ccl_prog[ic]);
+	  op = XCHAR_OR_INT (ccl_prog[ic]);
 	  jump_address = ic++ + ADDR;
-	  j = reg[XINT (ccl_prog[ic])];
+	  j = reg[XCHAR_OR_INT (ccl_prog[ic])];
 	  ic++;
 	  rrr = 7;
 
@@ -1358,7 +1378,7 @@
 		    if ((src + 1) >= src_end)
 		      goto ccl_read_multibyte_character_suspend;
 		    reg[RRR] = *src++;
-		    reg[rrr] = (*src++ & 0x7F);
+		    reg[rrr] = (*src++ & 0xFF);
 		  }
 		else if (i == PRE_LEADING_BYTE_PRIVATE_2)
 		  {
@@ -1438,7 +1458,7 @@
 #if 0
 	      /* XEmacs does not have translate_char or an equivalent.  We
                  do nothing on this operation. */
-	      op = XINT (ccl_prog[ic]); /* table */
+	      op = XCHAR_OR_INT (ccl_prog[ic]); /* table */
 	      ic++;
 	      CCL_MAKE_CHAR (reg[RRR], reg[rrr], i);
 	      op = translate_char (GET_TRANSLATION_TABLE (op), i, -1, 0, 0);
@@ -1454,7 +1474,8 @@
 	      {
 		Lisp_Object ucs;
 
-		CCL_MAKE_CHAR(reg[rrr], reg[RRR], op);
+                CCL_MAKE_CHAR (reg[rrr], reg[RRR], op);
+
 		ucs = Fchar_to_unicode(make_char(op));
 
 		if (NILP(ucs))
@@ -1465,7 +1486,7 @@
 		  }
 		else
 		  {
-		    reg[rrr] = XINT(ucs);
+		    reg[rrr] = XCHAR_OR_INT(ucs);
 		    if (-1 == reg[rrr])
 		      {
 			reg[rrr] = 0xFFFD; /* REPLACEMENT CHARACTER */
@@ -1488,7 +1509,7 @@
 
 		    if (j != 0)
 		      {
-			i = (i << 8) | j;
+			i = (i << 7) | j;
 		      }
 
 		    reg[rrr] = i;
@@ -1500,12 +1521,65 @@
 		break;
 	      }
 
+	    case CCL_LookupIntConstTbl:
+	      op = XCHAR_OR_INT (ccl_prog[ic]); /* table */
+	      ic++;
+	      {		
+		struct Lisp_Hash_Table *h = GET_HASH_TABLE (op);
+		htentry *e = find_htentry(make_int (reg[RRR]), h);
+                Lisp_Object scratch;
+
+                if (!HTENTRY_CLEAR_P(e))
+		  {
+                    op = XCHARVAL (e->value);
+		    if (!valid_ichar_p(op))
+                      {
+                        CCL_INVALID_CMD;
+                      }
+
+		    BREAKUP_ICHAR (op, scratch, i, j);
+                    reg[RRR] = XCHARSET_ID(scratch);
+
+		    if (j != 0)
+                      {
+                        i = (i << 7) | j;
+                      }
+		    reg[rrr] = i;
+		    reg[7] = 1; /* r7 true for success */
+		  }
+		else
+		  reg[7] = 0;
+	      }
+	      break;
+
+	    case CCL_LookupCharConstTbl:
+	      op = XCHAR_OR_INT (ccl_prog[ic]); /* table */
+	      ic++;
+	      CCL_MAKE_CHAR (reg[RRR], reg[rrr], i);
+	      {		
+		struct Lisp_Hash_Table *h = GET_HASH_TABLE (op);
+                htentry *e = find_htentry(make_int(i), h);
+
+		if (!HTENTRY_CLEAR_P(e))
+		  {
+                    op = e->value;
+		    if (!INTP (op))
+		      CCL_INVALID_CMD;
+		    reg[RRR] = XCHAR_OR_INT (op);
+		    reg[7] = 1; /* r7 true for success */
+		  }
+		else
+		  reg[7] = 0;
+	      }
+	      break;
+
+
 	    case CCL_IterateMultipleMap:
 	      {
 		Lisp_Object map, content, attrib, value;
 		int point, size, fin_ic;
 
-		j = XINT (ccl_prog[ic++]); /* number of maps. */
+		j = XCHAR_OR_INT (ccl_prog[ic++]); /* number of maps. */
 		fin_ic = ic + j;
 		op = reg[rrr];
 		if ((j > reg[RRR]) && (j >= 0))
@@ -1523,7 +1597,7 @@
 		for (;i < j;i++)
 		  {
 		    size = XVECTOR (Vcode_conversion_map_vector)->size;
-		    point = XINT (ccl_prog[ic++]);
+		    point = XCHAR_OR_INT (ccl_prog[ic++]);
 		    if (point >= size) continue;
 		    map =
 		      XVECTOR (Vcode_conversion_map_vector)->contents[point];
@@ -1569,7 +1643,7 @@
 		    else if (INTP (content))
 		      {
 			reg[RRR] = i;
-			reg[rrr] = XINT(content);
+			reg[rrr] = XCHAR_OR_INT(content);
 			break;
 		      }
 		    else if (EQ (content, Qt) || EQ (content, Qlambda))
@@ -1620,7 +1694,7 @@
 		stack_idx_of_map_multiple = 0;
 
 		map_set_rest_length =
-		  XINT (ccl_prog[ic++]); /* number of maps and separators. */
+		  XCHAR_OR_INT (ccl_prog[ic++]); /* number of maps and separators. */
 		fin_ic = ic + map_set_rest_length;
 		op = reg[rrr];
 
@@ -1688,7 +1762,7 @@
 		do {
 		  for (;map_set_rest_length > 0;i++, ic++, map_set_rest_length--)
 		    {
-		      point = XINT(ccl_prog[ic]);
+		      point = XCHAR_OR_INT(ccl_prog[ic]);
 		      if (point < 0)
 			{
 			  /* +1 is for including separator. */
@@ -1749,7 +1823,7 @@
 		      reg[RRR] = i;
 		      if (INTP (content))
 			{
-			  op = XINT (content);
+			  op = XCHAR_OR_INT (content);
 			  i += map_set_rest_length - 1;
 			  ic += map_set_rest_length - 1;
 			  POP_MAPPING_STACK (map_set_rest_length, reg[rrr]);
@@ -1807,7 +1881,7 @@
 	      {
 		Lisp_Object map, attrib, value, content;
 		int size, point;
-		j = XINT (ccl_prog[ic++]); /* map_id */
+		j = XCHAR_OR_INT (ccl_prog[ic++]); /* map_id */
 		op = reg[rrr];
 		if (j >= XVECTOR (Vcode_conversion_map_vector)->size)
 		  {
@@ -1840,7 +1914,7 @@
 		    if (NILP (content))
 		      reg[RRR] = -1;
 		    else if (INTP (content))
-		      reg[rrr] = XINT (content);
+		      reg[rrr] = XCHAR_OR_INT (content);
 		    else if (EQ (content, Qt));
 		    else if (CONSP (content))
 		      {
@@ -1943,7 +2017,9 @@
   for (i = 0; i < veclen; i++)
     {
       contents = XVECTOR (result)->contents[i];
-      if (INTP (contents))
+      /* XEmacs change; accept characters as well as integers, on the basis
+         that most CCL code written doesn't make a distinction. */
+      if (INTP (contents) || CHARP(contents))
 	continue;
       else if (CONSP (contents)
 	       && SYMBOLP (XCAR (contents))
@@ -2107,8 +2183,8 @@
     syntax_error ("Length of vector REGISTERS is not 8", Qunbound);
 
   for (i = 0; i < 8; i++)
-    ccl.reg[i] = (INTP (XVECTOR_DATA (reg)[i])
-		  ? XINT (XVECTOR_DATA (reg)[i])
+    ccl.reg[i] = (INTP (XVECTOR_DATA (reg)[i]) || CHARP (XVECTOR_DATA (reg)[i])
+		  ? XCHAR_OR_INT (XVECTOR_DATA (reg)[i])
 		  : 0);
 
   ccl_driver (&ccl, (const unsigned char *)0,
@@ -2172,10 +2248,13 @@
 	XVECTOR_DATA (status)[i] = make_int (0);
       if (INTP (XVECTOR_DATA (status)[i]))
 	ccl.reg[i] = XINT (XVECTOR_DATA (status)[i]);
+      if (CHARP (XVECTOR_DATA (status)[i]))
+	ccl.reg[i] = XCHAR (XVECTOR_DATA (status)[i]);
     }
-  if (INTP (XVECTOR (status)->contents[i]))
+  if (INTP (XVECTOR (status)->contents[i]) ||
+      CHARP (XVECTOR (status)->contents[i]))
     {
-      i = XINT (XVECTOR_DATA (status)[8]);
+      i = XCHAR_OR_INT (XVECTOR_DATA (status)[8]);
       if (ccl.ic < i && i < ccl.size)
 	ccl.ic = i;
     }
@@ -2347,9 +2426,16 @@
 void
 vars_of_mule_ccl (void)
 {
+
   staticpro (&Vccl_program_table);
   Vccl_program_table = Fmake_vector (make_int (32), Qnil);
 
+#ifdef DEBUG_XEMACS
+  DEFVAR_LISP ("ccl-program-table",
+               &Vccl_program_table /*
+Vector containing all registered CCL programs.
+*/ );
+#endif 
   DEFSYMBOL (Qccl_program);
   DEFSYMBOL (Qccl_program_idx);
   DEFSYMBOL (Qcode_conversion_map);
@@ -2360,19 +2446,15 @@
 */ );
   Vcode_conversion_map_vector = Fmake_vector (make_int (16), Qnil);
 
-  DEFVAR_LISP ("font-ccl-encoder-alist", &Vfont_ccl_encoder_alist /*
-Alist of fontname patterns vs corresponding CCL program.
-Each element looks like (REGEXP . CCL-CODE),
- where CCL-CODE is a compiled CCL program.
-When a font whose name matches REGEXP is used for displaying a character,
- CCL-CODE is executed to calculate the code point in the font
- from the charset number and position code(s) of the character which are set
- in CCL registers R0, R1, and R2 before the execution.
-The code point in the font is set in CCL registers R1 and R2
- when the execution terminated.
-If the font is single-byte font, the register R2 is not used.
+  DEFVAR_LISP ("translation-hash-table-vector",
+               &Vtranslation_hash_table_vector /*
+Vector containing all translation hash tables ever defined.
+Comprises pairs (SYMBOL . TABLE) where SYMBOL and TABLE were set up by calls
+to `define-translation-hash-table'.  The vector is indexed by the table id
+used by CCL.
 */ );
-  Vfont_ccl_encoder_alist = Qnil;
+    Vtranslation_hash_table_vector = Qnil;
+
 }
 
 #endif  /* emacs */