xemacs-beta: src/unicode.c comparison

comparison src/unicode.c @ 4268:75d0292c1bff

[xemacs-hg @ 2007-11-14 19:41:04 by aidan] Correct the dumped information for the Unicode JIT infrastructure.

author	aidan
date	Wed, 14 Nov 2007 19:41:09 +0000
parents	1abf84db2c7f
children	bd9b678f4db7

comparison

equal deleted inserted replaced

-:66e2714696bd
+:75d0292c1bff
 Lisp_Object Qignore_first_column;
 Lisp_Object Vcurrent_jit_charset;
 Lisp_Object Qlast_allocated_character;
 Lisp_Object Qccl_encode_to_ucs_2;
+Lisp_Object Vnumber_of_jit_charsets;
+Lisp_Object Vlast_jit_charset_final;
+Lisp_Object Vcharset_descr;
 /************************************************************************/
 /*                        Unicode implementation                        */
 /************************************************************************/
 {
 int u1, u2, u3, u4;
 int code_levels;
 int i;
 int n = Dynarr_length (charsets);
-static int number_of_jit_charsets;
-static Ascbyte last_jit_charset_final;
 type_checking_assert (code >= 0);
 /* This shortcut depends on the representation of an Ichar, see text.c.
 Note that it may _not_ be extended to U+00A0 to U+00FF (many ISO 8859
 coded character sets have points that map into that region, so this
 {
 if (NILP (Vcurrent_jit_charset) ||
 	  (-1 == (i = get_free_codepoint(Vcurrent_jit_charset))))
 	{
 	  Ibyte setname[32];
-	  Lisp_Object charset_descr = build_string
+	  int number_of_jit_charsets = XINT (Vnumber_of_jit_charsets);
-	    ("Mule charset for otherwise unknown Unicode code points.");
+	  Ascbyte last_jit_charset_final = XCHAR (Vlast_jit_charset_final);
-	  struct gcpro gcpro1;
+	  /* This final byte shit is, umm, not that cool. */
+	  assert (last_jit_charset_final >= 0x30);
-	  if ('\0' == last_jit_charset_final)
-	    {
-	      /* This final byte shit is, umm, not that cool. */
-	      last_jit_charset_final = 0x30;
-	    }
 	  /* Assertion added partly because our Win32 layer doesn't
 	     support snprintf; with this, we're sure it won't overflow
 	     the buffer.  */
 	  assert(100 > number_of_jit_charsets);
-	  qxesprintf(setname, "jit-ucs-charset-%d", number_of_jit_charsets++);
+	  qxesprintf(setname, "jit-ucs-charset-%d", number_of_jit_charsets);
-	  /* Aside: GCPROing here would be overkill according to the FSF's
-	     philosophy. make-charset cannot currently GC, but is intended
-	     to be called from Lisp, with its arguments protected by the
-	     Lisp reader. We GCPRO in case it GCs in the future and no-one
-	     checks all the C callers.  */
-	  GCPRO1 (charset_descr);
 	  Vcurrent_jit_charset = Fmake_charset
-	    (intern((const CIbyte *)setname), charset_descr,
+	    (intern((const CIbyte *)setname), Vcharset_descr,
 	     /* Set encode-as-utf-8 to t, to have this character set written
 		using UTF-8 escapes in escape-quoted and ctext. This
 		sidesteps the fact that our internal character -> Unicode
 		mapping is not stable from one invocation to the next.  */
 	     nconc2 (list2(Qencode_as_utf_8, Qt),
 		     nconc2 (list6(Qcolumns, make_int(1), Qchars, make_int(96),
 				   Qdimension, make_int(2)),
 			     list6(Qregistries, Qunicode_registries,
-				   Qfinal, make_char(last_jit_charset_final++),
+				   Qfinal, make_char(last_jit_charset_final),
 				   /* This CCL program is initialised in
 				      unicode.el. */
 				   Qccl_program, Qccl_encode_to_ucs_2))));
-	  UNGCPRO;
+	  /* Record for the Unicode infrastructure that we've created
+	     this character set.  */
+	  Vnumber_of_jit_charsets = make_int (number_of_jit_charsets + 1);
+	  Vlast_jit_charset_final = make_char (last_jit_charset_final + 1);
 	  i = get_free_codepoint(Vcurrent_jit_charset);
 	}
 if (-1 != i)
 argument.
 If the CODE would not otherwise be converted to an XEmacs character, and the
 list of character sets to be consulted is nil or the default, a new XEmacs
 character will be created for it in one of the `jit-ucs-charset' Mule
-character sets, and that character will be returned.  There is scope for
+character sets, and that character will be returned.
-tens of thousands of separate Unicode code points in every session using
-this technique, so despite XEmacs' internal encoding not being based on
+This is limited to around 400,000 characters per XEmacs session, though, so
-Unicode, your data won't be trashed.
+while normal usage will not be problematic, things like:
+\(dotimes (i #x110000) (decode-char 'ucs i))
+will eventually error.  The long-term solution to this is Unicode as an
+internal encoding.
 */
 (code, USED_IF_MULE (charsets)))
 {
 #ifdef MULE
 Lisp_Object_dynarr *dyn;
 }
 void
 coding_system_type_create_unicode (void)
 {
+staticpro (&Vnumber_of_jit_charsets);
+Vnumber_of_jit_charsets = make_int (0);
+staticpro (&Vlast_jit_charset_final);
+Vlast_jit_charset_final = make_char (0x30);
+staticpro (&Vcharset_descr);
+Vcharset_descr
+= build_string ("Mule charset for otherwise unknown Unicode code points.");
 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (unicode, "unicode-coding-system-p");
 CODING_SYSTEM_HAS_METHOD (unicode, print);
 CODING_SYSTEM_HAS_METHOD (unicode, convert);
 CODING_SYSTEM_HAS_METHOD (unicode, init_coding_stream);
 CODING_SYSTEM_HAS_METHOD (unicode, rewind_coding_stream);

Mercurial > hg > xemacs-beta

comparison src/unicode.c @ 4268:75d0292c1bff