changeset 4145:edb00a8b4eff

[xemacs-hg @ 2007-08-26 20:00:29 by aidan] Generally make the language environments and coding systems a little more sane.
author aidan
date Sun, 26 Aug 2007 20:00:42 +0000
parents 4a08a9219456
children 7fd5d2fcb4bf
files lisp/ChangeLog lisp/mule/chinese.el lisp/mule/cyrillic.el lisp/mule/general-late.el lisp/mule/japanese.el lisp/mule/latin.el lisp/mule/mule-cmds.el lisp/mule/mule-coding.el lisp/mule/mule-win32-init.el lisp/unicode.el
diffstat 10 files changed, 767 insertions(+), 284 deletions(-) [+]
line wrap: on
line diff
--- a/lisp/ChangeLog	Sat Aug 25 21:51:21 2007 +0000
+++ b/lisp/ChangeLog	Sun Aug 26 20:00:42 2007 +0000
@@ -1,3 +1,73 @@
+2007-08-25  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* mule/chinese.el ("Chinese-GB"):
+	* mule/chinese.el ("Chinese-BIG5"):
+	* mule/japanese.el ("Japanese"):
+	These three language environments have associated lambda
+	expressions to determine whether the current locale matches and
+	and what the native-coding-system should be.  These used to be
+	stored uncompiled, since they were specified deep in the ALIST
+	argument to a #'set-language-info-alist call; this change moves
+	them into #'set-language-info calls where the byte-compiler can
+	compile them. Shouldn't make a huge difference, but startup time
+	is an issue for us.
+	
+	* mule/cyrillic.el (windows-1251):
+	Unconditionally provide windows-1251 using
+	make-8-bit-coding-system. 
+
+	* mule/latin.el:
+	Add a setup-function and an exit-function to the Turkish language
+	environment to change and restore the standard case table,
+	respectively. This reflects case conventions in Turkish, where
+	dotless i and dotted i are distinct characters with distinct case
+	forms. Add windows-1252, Mac Roman as coding systems. 
+	
+	* mule/mule-cmds.el:
+	* mule/mule-cmds.el (set-language-environment):
+	* mule/mule-cmds.el (finish-set-language-environment):
+	Check setup-function and exit-function's validity with
+	#'functionp, not fboundp. It doesn't make a lot of sense to
+	exclude lambdas from being specified as setup-function and
+	exit-functions. Update a comment to reflect the that
+	posix-charset-to-coding-system-hash is no longer initialised at
+	compile time. 
+	* mule/mule-coding.el:
+	* mule/mule-coding.el (define-translation-hash-table):
+	Call check-argument-type on the two arguments instead of an
+	explicit error in a conditional, for the sake of better style. 
+	* mule/mule-coding.el (make-8-bit-choose-category): New.
+	Function that looks at a make-8-bit-coding-system decode table and
+	decides on an appropriate category for the associated coding
+	system. 
+	* mule/mule-coding.el (make-8-bit-coding-system):
+	Use make-8-bit-choose-category to make the coding category for the
+	generated coding systems a bit more sane. 
+
+	* mule/mule-win32-init.el:
+	Move to a loop-with-destructuring-bind syntax for the creation of
+	Windows-specific coding systems. Specify the coding category of
+	the created coding systems, and create aliases of the form cpNNNN
+	for the compatibility with GNU. Use our own implementations
+	instead of the Windows ones for several coding systems, for the
+	sake of greater cross-platform compatibility.
+
+2007-08-25  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* unicode.el (load-unicode-tables):
+	Explicitly set the default unicode-precedence-list after loading
+	the unicode tables, to move the Western character sets before the
+	East Asian ones by default. 
+	* unicode.el :
+	Avoid overflowing 80 characters per line with the CCL
+	program. Make unicode-error-sequence-zero and
+	unicode-error-sequence-regexp-range available to user code that
+	wants to work with Unicode error sequences. Decode those
+	characters in WGL4 that are not available in other Western
+	character sets at dump time into jit-ucs-charset-0, in the
+	interest of avoiding them getting East Asian fonts and width
+	information when decoding from Unicode.
+
 2007-08-20  Mike Sperber  <mike@xemacs.org>
 
 	* window-xemacs.el (root-window->saved-window): 
--- a/lisp/mule/chinese.el	Sat Aug 25 21:51:21 2007 +0000
+++ b/lisp/mule/chinese.el	Sun Aug 26 20:00:42 2007 +0000
@@ -225,11 +225,6 @@
 		(coding-system cn-gb-2312 iso-2022-7bit hz-gb-2312)
 		(coding-priority cn-gb-2312 big5 iso-2022-7bit)
 		(cygwin-locale "zh")
-		(locale "zh_CN.eucCN" "zh_CN.EUC" "zh_CN"
-			"chinese-s" "zh"
-			 (lambda (arg)
-			      (and arg (let ((case-fold-search t))
-					 (string-match "^zh_.*.GB.*" arg)))))
 		(mswindows-locale ("CHINESE" . "CHINESE_SIMPLIFIED"))
 		(native-coding-system cn-gb-2312)
 		(input-method . "chinese-py-punct")
@@ -240,6 +235,14 @@
 Uses the GB2312 character set."))
  '("Chinese"))
 
+;; Set the locale information separately so that the lambda gets compiled.
+(set-language-info "Chinese-GB" 
+                   'locale
+                   (list "zh_CN.eucCN" "zh_CN.EUC" "zh_CN" "chinese-s" "zh"
+			 (lambda (arg)
+                           (and arg (let ((case-fold-search t))
+                                      (string-match "^zh_.*.GB.*" arg))))))
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; Chinese BIG5 (traditional)
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -295,11 +298,6 @@
 		  (coding-system big5 iso-2022-7bit)
 		  (coding-priority big5 cn-gb-2312 iso-2022-7bit)
 		  (cygwin-locale "zh_TW")
-		  (locale "zh_TW.Big5" "zh_TW.big5" "zh_CN.big5" "zh_TW"
-			  "chinese-t"
-			  (lambda (arg)
-			      (and arg (let ((case-fold-search t))
-					 (string-match "^zh_.*.BIG5.*" arg)))))
 		  (mswindows-locale ("CHINESE" . "CHINESE_TRADITIONAL"))
 		  (native-coding-system big5)
 		  (input-method . "chinese-py-punct-b5")
@@ -311,6 +309,15 @@
 ))
  '("Chinese"))
 
+;; Set the locale information separately so that the lambda gets compiled.
+(set-language-info "Chinese-BIG5" 
+                   'locale
+		  (list "zh_TW.Big5" "zh_TW.big5" "zh_CN.big5" "zh_TW"
+                        "chinese-t"
+                        (lambda (arg)
+                          (and arg (let ((case-fold-search t))
+                                     (string-match "^zh_.*.BIG5.*" arg))))))
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; Chinese CNS11643 (traditional)
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
--- a/lisp/mule/cyrillic.el	Sat Aug 25 21:51:21 2007 +0000
+++ b/lisp/mule/cyrillic.el	Sun Aug 26 20:00:42 2007 +0000
@@ -455,147 +455,143 @@
                 . "Support for Ukrainian."))
  '("Cyrillic"))
 
-;; Windows 1251 may be provided automatically on Windows, in which case we
-;; don't need to provide it.
-;; #### (Though we should provide the CP1251 alias.)
-(unless (find-coding-system 'windows-1251) 
-  (make-8-bit-coding-system 
-   'windows-1251
-   '((#x80 ?\u0402) ;; CYRILLIC CAPITAL LETTER DJE
-     (#x81 ?\u0403) ;; CYRILLIC CAPITAL LETTER GJE
-     (#x82 ?\u201A) ;; SINGLE LOW-9 QUOTATION MARK
-     (#x83 ?\u0453) ;; CYRILLIC SMALL LETTER GJE
-     (#x84 ?\u201E) ;; DOUBLE LOW-9 QUOTATION MARK
-     (#x85 ?\u2026) ;; HORIZONTAL ELLIPSIS
-     (#x86 ?\u2020) ;; DAGGER
-     (#x87 ?\u2021) ;; DOUBLE DAGGER
-     (#x88 ?\u20AC) ;; EURO SIGN
-     (#x89 ?\u2030) ;; PER MILLE SIGN
-     (#x8A ?\u0409) ;; CYRILLIC CAPITAL LETTER LJE
-     (#x8B ?\u2039) ;; SINGLE LEFT-POINTING ANGLE QUOTATION MARK
-     (#x8C ?\u040A) ;; CYRILLIC CAPITAL LETTER NJE
-     (#x8D ?\u040C) ;; CYRILLIC CAPITAL LETTER KJE
-     (#x8E ?\u040B) ;; CYRILLIC CAPITAL LETTER TSHE
-     (#x8F ?\u040F) ;; CYRILLIC CAPITAL LETTER DZHE
-     (#x90 ?\u0452) ;; CYRILLIC SMALL LETTER DJE
-     (#x91 ?\u2018) ;; LEFT SINGLE QUOTATION MARK
-     (#x92 ?\u2019) ;; RIGHT SINGLE QUOTATION MARK
-     (#x93 ?\u201C) ;; LEFT DOUBLE QUOTATION MARK
-     (#x94 ?\u201D) ;; RIGHT DOUBLE QUOTATION MARK
-     (#x95 ?\u2022) ;; BULLET
-     (#x96 ?\u2013) ;; EN DASH
-     (#x97 ?\u2014) ;; EM DASH
-     (#x99 ?\u2122) ;; TRADE MARK SIGN
-     (#x9A ?\u0459) ;; CYRILLIC SMALL LETTER LJE
-     (#x9B ?\u203A) ;; SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
-     (#x9C ?\u045A) ;; CYRILLIC SMALL LETTER NJE
-     (#x9D ?\u045C) ;; CYRILLIC SMALL LETTER KJE
-     (#x9E ?\u045B) ;; CYRILLIC SMALL LETTER TSHE
-     (#x9F ?\u045F) ;; CYRILLIC SMALL LETTER DZHE
-     (#xA0 ?\u00A0) ;; NO-BREAK SPACE
-     (#xA1 ?\u040E) ;; CYRILLIC CAPITAL LETTER SHORT U
-     (#xA2 ?\u045E) ;; CYRILLIC SMALL LETTER SHORT U
-     (#xA3 ?\u0408) ;; CYRILLIC CAPITAL LETTER JE
-     (#xA4 ?\u00A4) ;; CURRENCY SIGN
-     (#xA5 ?\u0490) ;; CYRILLIC CAPITAL LETTER GHE WITH UPTURN
-     (#xA6 ?\u00A6) ;; BROKEN BAR
-     (#xA7 ?\u00A7) ;; SECTION SIGN
-     (#xA8 ?\u0401) ;; CYRILLIC CAPITAL LETTER IO
-     (#xA9 ?\u00A9) ;; COPYRIGHT SIGN
-     (#xAA ?\u0404) ;; CYRILLIC CAPITAL LETTER UKRAINIAN IE
-     (#xAB ?\u00AB) ;; LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-     (#xAC ?\u00AC) ;; NOT SIGN
-     (#xAD ?\u00AD) ;; SOFT HYPHEN
-     (#xAE ?\u00AE) ;; REGISTERED SIGN
-     (#xAF ?\u0407) ;; CYRILLIC CAPITAL LETTER YI
-     (#xB0 ?\u00B0) ;; DEGREE SIGN
-     (#xB1 ?\u00B1) ;; PLUS-MINUS SIGN
-     (#xB2 ?\u0406) ;; CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
-     (#xB3 ?\u0456) ;; CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
-     (#xB4 ?\u0491) ;; CYRILLIC SMALL LETTER GHE WITH UPTURN
-     (#xB5 ?\u00B5) ;; MICRO SIGN
-     (#xB6 ?\u00B6) ;; PILCROW SIGN
-     (#xB7 ?\u00B7) ;; MIDDLE DOT
-     (#xB8 ?\u0451) ;; CYRILLIC SMALL LETTER IO
-     (#xB9 ?\u2116) ;; NUMERO SIGN
-     (#xBA ?\u0454) ;; CYRILLIC SMALL LETTER UKRAINIAN IE
-     (#xBB ?\u00BB) ;; RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-     (#xBC ?\u0458) ;; CYRILLIC SMALL LETTER JE
-     (#xBD ?\u0405) ;; CYRILLIC CAPITAL LETTER DZE
-     (#xBE ?\u0455) ;; CYRILLIC SMALL LETTER DZE
-     (#xBF ?\u0457) ;; CYRILLIC SMALL LETTER YI
-     (#xC0 ?\u0410) ;; CYRILLIC CAPITAL LETTER A
-     (#xC1 ?\u0411) ;; CYRILLIC CAPITAL LETTER BE
-     (#xC2 ?\u0412) ;; CYRILLIC CAPITAL LETTER VE
-     (#xC3 ?\u0413) ;; CYRILLIC CAPITAL LETTER GHE
-     (#xC4 ?\u0414) ;; CYRILLIC CAPITAL LETTER DE
-     (#xC5 ?\u0415) ;; CYRILLIC CAPITAL LETTER IE
-     (#xC6 ?\u0416) ;; CYRILLIC CAPITAL LETTER ZHE
-     (#xC7 ?\u0417) ;; CYRILLIC CAPITAL LETTER ZE
-     (#xC8 ?\u0418) ;; CYRILLIC CAPITAL LETTER I
-     (#xC9 ?\u0419) ;; CYRILLIC CAPITAL LETTER SHORT I
-     (#xCA ?\u041A) ;; CYRILLIC CAPITAL LETTER KA
-     (#xCB ?\u041B) ;; CYRILLIC CAPITAL LETTER EL
-     (#xCC ?\u041C) ;; CYRILLIC CAPITAL LETTER EM
-     (#xCD ?\u041D) ;; CYRILLIC CAPITAL LETTER EN
-     (#xCE ?\u041E) ;; CYRILLIC CAPITAL LETTER O
-     (#xCF ?\u041F) ;; CYRILLIC CAPITAL LETTER PE
-     (#xD0 ?\u0420) ;; CYRILLIC CAPITAL LETTER ER
-     (#xD1 ?\u0421) ;; CYRILLIC CAPITAL LETTER ES
-     (#xD2 ?\u0422) ;; CYRILLIC CAPITAL LETTER TE
-     (#xD3 ?\u0423) ;; CYRILLIC CAPITAL LETTER U
-     (#xD4 ?\u0424) ;; CYRILLIC CAPITAL LETTER EF
-     (#xD5 ?\u0425) ;; CYRILLIC CAPITAL LETTER HA
-     (#xD6 ?\u0426) ;; CYRILLIC CAPITAL LETTER TSE
-     (#xD7 ?\u0427) ;; CYRILLIC CAPITAL LETTER CHE
-     (#xD8 ?\u0428) ;; CYRILLIC CAPITAL LETTER SHA
-     (#xD9 ?\u0429) ;; CYRILLIC CAPITAL LETTER SHCHA
-     (#xDA ?\u042A) ;; CYRILLIC CAPITAL LETTER HARD SIGN
-     (#xDB ?\u042B) ;; CYRILLIC CAPITAL LETTER YERU
-     (#xDC ?\u042C) ;; CYRILLIC CAPITAL LETTER SOFT SIGN
-     (#xDD ?\u042D) ;; CYRILLIC CAPITAL LETTER E
-     (#xDE ?\u042E) ;; CYRILLIC CAPITAL LETTER YU
-     (#xDF ?\u042F) ;; CYRILLIC CAPITAL LETTER YA
-     (#xE0 ?\u0430) ;; CYRILLIC SMALL LETTER A
-     (#xE1 ?\u0431) ;; CYRILLIC SMALL LETTER BE
-     (#xE2 ?\u0432) ;; CYRILLIC SMALL LETTER VE
-     (#xE3 ?\u0433) ;; CYRILLIC SMALL LETTER GHE
-     (#xE4 ?\u0434) ;; CYRILLIC SMALL LETTER DE
-     (#xE5 ?\u0435) ;; CYRILLIC SMALL LETTER IE
-     (#xE6 ?\u0436) ;; CYRILLIC SMALL LETTER ZHE
-     (#xE7 ?\u0437) ;; CYRILLIC SMALL LETTER ZE
-     (#xE8 ?\u0438) ;; CYRILLIC SMALL LETTER I
-     (#xE9 ?\u0439) ;; CYRILLIC SMALL LETTER SHORT I
-     (#xEA ?\u043A) ;; CYRILLIC SMALL LETTER KA
-     (#xEB ?\u043B) ;; CYRILLIC SMALL LETTER EL
-     (#xEC ?\u043C) ;; CYRILLIC SMALL LETTER EM
-     (#xED ?\u043D) ;; CYRILLIC SMALL LETTER EN
-     (#xEE ?\u043E) ;; CYRILLIC SMALL LETTER O
-     (#xEF ?\u043F) ;; CYRILLIC SMALL LETTER PE
-     (#xF0 ?\u0440) ;; CYRILLIC SMALL LETTER ER
-     (#xF1 ?\u0441) ;; CYRILLIC SMALL LETTER ES
-     (#xF2 ?\u0442) ;; CYRILLIC SMALL LETTER TE
-     (#xF3 ?\u0443) ;; CYRILLIC SMALL LETTER U
-     (#xF4 ?\u0444) ;; CYRILLIC SMALL LETTER EF
-     (#xF5 ?\u0445) ;; CYRILLIC SMALL LETTER HA
-     (#xF6 ?\u0446) ;; CYRILLIC SMALL LETTER TSE
-     (#xF7 ?\u0447) ;; CYRILLIC SMALL LETTER CHE
-     (#xF8 ?\u0448) ;; CYRILLIC SMALL LETTER SHA
-     (#xF9 ?\u0449) ;; CYRILLIC SMALL LETTER SHCHA
-     (#xFA ?\u044A) ;; CYRILLIC SMALL LETTER HARD SIGN
-     (#xFB ?\u044B) ;; CYRILLIC SMALL LETTER YERU
-     (#xFC ?\u044C) ;; CYRILLIC SMALL LETTER SOFT SIGN
-     (#xFD ?\u044D) ;; CYRILLIC SMALL LETTER E
-     (#xFE ?\u044E) ;; CYRILLIC SMALL LETTER YU
-     (#xFF ?\u044F)) ;; CYRILLIC SMALL LETTER YA
-   "Microsoft's Code Page 1251, for Russian, Bulgarian, Serbian and others.  "
-   '(mnemonic "CyrW"
-     documentation
-     "This ASCII-compatible encoding is unfortunately not compatible at
+(make-8-bit-coding-system 
+ 'windows-1251
+ '((#x80 ?\u0402) ;; CYRILLIC CAPITAL LETTER DJE
+   (#x81 ?\u0403) ;; CYRILLIC CAPITAL LETTER GJE
+   (#x82 ?\u201A) ;; SINGLE LOW-9 QUOTATION MARK
+   (#x83 ?\u0453) ;; CYRILLIC SMALL LETTER GJE
+   (#x84 ?\u201E) ;; DOUBLE LOW-9 QUOTATION MARK
+   (#x85 ?\u2026) ;; HORIZONTAL ELLIPSIS
+   (#x86 ?\u2020) ;; DAGGER
+   (#x87 ?\u2021) ;; DOUBLE DAGGER
+   (#x88 ?\u20AC) ;; EURO SIGN
+   (#x89 ?\u2030) ;; PER MILLE SIGN
+   (#x8A ?\u0409) ;; CYRILLIC CAPITAL LETTER LJE
+   (#x8B ?\u2039) ;; SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+   (#x8C ?\u040A) ;; CYRILLIC CAPITAL LETTER NJE
+   (#x8D ?\u040C) ;; CYRILLIC CAPITAL LETTER KJE
+   (#x8E ?\u040B) ;; CYRILLIC CAPITAL LETTER TSHE
+   (#x8F ?\u040F) ;; CYRILLIC CAPITAL LETTER DZHE
+   (#x90 ?\u0452) ;; CYRILLIC SMALL LETTER DJE
+   (#x91 ?\u2018) ;; LEFT SINGLE QUOTATION MARK
+   (#x92 ?\u2019) ;; RIGHT SINGLE QUOTATION MARK
+   (#x93 ?\u201C) ;; LEFT DOUBLE QUOTATION MARK
+   (#x94 ?\u201D) ;; RIGHT DOUBLE QUOTATION MARK
+   (#x95 ?\u2022) ;; BULLET
+   (#x96 ?\u2013) ;; EN DASH
+   (#x97 ?\u2014) ;; EM DASH
+   (#x99 ?\u2122) ;; TRADE MARK SIGN
+   (#x9A ?\u0459) ;; CYRILLIC SMALL LETTER LJE
+   (#x9B ?\u203A) ;; SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+   (#x9C ?\u045A) ;; CYRILLIC SMALL LETTER NJE
+   (#x9D ?\u045C) ;; CYRILLIC SMALL LETTER KJE
+   (#x9E ?\u045B) ;; CYRILLIC SMALL LETTER TSHE
+   (#x9F ?\u045F) ;; CYRILLIC SMALL LETTER DZHE
+   (#xA0 ?\u00A0) ;; NO-BREAK SPACE
+   (#xA1 ?\u040E) ;; CYRILLIC CAPITAL LETTER SHORT U
+   (#xA2 ?\u045E) ;; CYRILLIC SMALL LETTER SHORT U
+   (#xA3 ?\u0408) ;; CYRILLIC CAPITAL LETTER JE
+   (#xA4 ?\u00A4) ;; CURRENCY SIGN
+   (#xA5 ?\u0490) ;; CYRILLIC CAPITAL LETTER GHE WITH UPTURN
+   (#xA6 ?\u00A6) ;; BROKEN BAR
+   (#xA7 ?\u00A7) ;; SECTION SIGN
+   (#xA8 ?\u0401) ;; CYRILLIC CAPITAL LETTER IO
+   (#xA9 ?\u00A9) ;; COPYRIGHT SIGN
+   (#xAA ?\u0404) ;; CYRILLIC CAPITAL LETTER UKRAINIAN IE
+   (#xAB ?\u00AB) ;; LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+   (#xAC ?\u00AC) ;; NOT SIGN
+   (#xAD ?\u00AD) ;; SOFT HYPHEN
+   (#xAE ?\u00AE) ;; REGISTERED SIGN
+   (#xAF ?\u0407) ;; CYRILLIC CAPITAL LETTER YI
+   (#xB0 ?\u00B0) ;; DEGREE SIGN
+   (#xB1 ?\u00B1) ;; PLUS-MINUS SIGN
+   (#xB2 ?\u0406) ;; CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
+   (#xB3 ?\u0456) ;; CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
+   (#xB4 ?\u0491) ;; CYRILLIC SMALL LETTER GHE WITH UPTURN
+   (#xB5 ?\u00B5) ;; MICRO SIGN
+   (#xB6 ?\u00B6) ;; PILCROW SIGN
+   (#xB7 ?\u00B7) ;; MIDDLE DOT
+   (#xB8 ?\u0451) ;; CYRILLIC SMALL LETTER IO
+   (#xB9 ?\u2116) ;; NUMERO SIGN
+   (#xBA ?\u0454) ;; CYRILLIC SMALL LETTER UKRAINIAN IE
+   (#xBB ?\u00BB) ;; RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+   (#xBC ?\u0458) ;; CYRILLIC SMALL LETTER JE
+   (#xBD ?\u0405) ;; CYRILLIC CAPITAL LETTER DZE
+   (#xBE ?\u0455) ;; CYRILLIC SMALL LETTER DZE
+   (#xBF ?\u0457) ;; CYRILLIC SMALL LETTER YI
+   (#xC0 ?\u0410) ;; CYRILLIC CAPITAL LETTER A
+   (#xC1 ?\u0411) ;; CYRILLIC CAPITAL LETTER BE
+   (#xC2 ?\u0412) ;; CYRILLIC CAPITAL LETTER VE
+   (#xC3 ?\u0413) ;; CYRILLIC CAPITAL LETTER GHE
+   (#xC4 ?\u0414) ;; CYRILLIC CAPITAL LETTER DE
+   (#xC5 ?\u0415) ;; CYRILLIC CAPITAL LETTER IE
+   (#xC6 ?\u0416) ;; CYRILLIC CAPITAL LETTER ZHE
+   (#xC7 ?\u0417) ;; CYRILLIC CAPITAL LETTER ZE
+   (#xC8 ?\u0418) ;; CYRILLIC CAPITAL LETTER I
+   (#xC9 ?\u0419) ;; CYRILLIC CAPITAL LETTER SHORT I
+   (#xCA ?\u041A) ;; CYRILLIC CAPITAL LETTER KA
+   (#xCB ?\u041B) ;; CYRILLIC CAPITAL LETTER EL
+   (#xCC ?\u041C) ;; CYRILLIC CAPITAL LETTER EM
+   (#xCD ?\u041D) ;; CYRILLIC CAPITAL LETTER EN
+   (#xCE ?\u041E) ;; CYRILLIC CAPITAL LETTER O
+   (#xCF ?\u041F) ;; CYRILLIC CAPITAL LETTER PE
+   (#xD0 ?\u0420) ;; CYRILLIC CAPITAL LETTER ER
+   (#xD1 ?\u0421) ;; CYRILLIC CAPITAL LETTER ES
+   (#xD2 ?\u0422) ;; CYRILLIC CAPITAL LETTER TE
+   (#xD3 ?\u0423) ;; CYRILLIC CAPITAL LETTER U
+   (#xD4 ?\u0424) ;; CYRILLIC CAPITAL LETTER EF
+   (#xD5 ?\u0425) ;; CYRILLIC CAPITAL LETTER HA
+   (#xD6 ?\u0426) ;; CYRILLIC CAPITAL LETTER TSE
+   (#xD7 ?\u0427) ;; CYRILLIC CAPITAL LETTER CHE
+   (#xD8 ?\u0428) ;; CYRILLIC CAPITAL LETTER SHA
+   (#xD9 ?\u0429) ;; CYRILLIC CAPITAL LETTER SHCHA
+   (#xDA ?\u042A) ;; CYRILLIC CAPITAL LETTER HARD SIGN
+   (#xDB ?\u042B) ;; CYRILLIC CAPITAL LETTER YERU
+   (#xDC ?\u042C) ;; CYRILLIC CAPITAL LETTER SOFT SIGN
+   (#xDD ?\u042D) ;; CYRILLIC CAPITAL LETTER E
+   (#xDE ?\u042E) ;; CYRILLIC CAPITAL LETTER YU
+   (#xDF ?\u042F) ;; CYRILLIC CAPITAL LETTER YA
+   (#xE0 ?\u0430) ;; CYRILLIC SMALL LETTER A
+   (#xE1 ?\u0431) ;; CYRILLIC SMALL LETTER BE
+   (#xE2 ?\u0432) ;; CYRILLIC SMALL LETTER VE
+   (#xE3 ?\u0433) ;; CYRILLIC SMALL LETTER GHE
+   (#xE4 ?\u0434) ;; CYRILLIC SMALL LETTER DE
+   (#xE5 ?\u0435) ;; CYRILLIC SMALL LETTER IE
+   (#xE6 ?\u0436) ;; CYRILLIC SMALL LETTER ZHE
+   (#xE7 ?\u0437) ;; CYRILLIC SMALL LETTER ZE
+   (#xE8 ?\u0438) ;; CYRILLIC SMALL LETTER I
+   (#xE9 ?\u0439) ;; CYRILLIC SMALL LETTER SHORT I
+   (#xEA ?\u043A) ;; CYRILLIC SMALL LETTER KA
+   (#xEB ?\u043B) ;; CYRILLIC SMALL LETTER EL
+   (#xEC ?\u043C) ;; CYRILLIC SMALL LETTER EM
+   (#xED ?\u043D) ;; CYRILLIC SMALL LETTER EN
+   (#xEE ?\u043E) ;; CYRILLIC SMALL LETTER O
+   (#xEF ?\u043F) ;; CYRILLIC SMALL LETTER PE
+   (#xF0 ?\u0440) ;; CYRILLIC SMALL LETTER ER
+   (#xF1 ?\u0441) ;; CYRILLIC SMALL LETTER ES
+   (#xF2 ?\u0442) ;; CYRILLIC SMALL LETTER TE
+   (#xF3 ?\u0443) ;; CYRILLIC SMALL LETTER U
+   (#xF4 ?\u0444) ;; CYRILLIC SMALL LETTER EF
+   (#xF5 ?\u0445) ;; CYRILLIC SMALL LETTER HA
+   (#xF6 ?\u0446) ;; CYRILLIC SMALL LETTER TSE
+   (#xF7 ?\u0447) ;; CYRILLIC SMALL LETTER CHE
+   (#xF8 ?\u0448) ;; CYRILLIC SMALL LETTER SHA
+   (#xF9 ?\u0449) ;; CYRILLIC SMALL LETTER SHCHA
+   (#xFA ?\u044A) ;; CYRILLIC SMALL LETTER HARD SIGN
+   (#xFB ?\u044B) ;; CYRILLIC SMALL LETTER YERU
+   (#xFC ?\u044C) ;; CYRILLIC SMALL LETTER SOFT SIGN
+   (#xFD ?\u044D) ;; CYRILLIC SMALL LETTER E
+   (#xFE ?\u044E) ;; CYRILLIC SMALL LETTER YU
+   (#xFF ?\u044F)) ;; CYRILLIC SMALL LETTER YA
+ "Microsoft's Code Page 1251, for Russian, Bulgarian, Serbian and others.  "
+ '(mnemonic "CyrW"
+   documentation
+   "This ASCII-compatible encoding is unfortunately not compatible at
 the code point level with the KOI8 family of encodings, but it
 provides several widely-used punctuation and quotation marks that
 KOI-8R and its relatives don't, and has become widely used.  "
-     aliases (cp1251))))
+   aliases (cp1251)))
 
 (set-language-info-alist
  "Bulgarian" '((coding-system windows-1251)
--- a/lisp/mule/general-late.el	Sat Aug 25 21:51:21 2007 +0000
+++ b/lisp/mule/general-late.el	Sun Aug 26 20:00:42 2007 +0000
@@ -29,18 +29,19 @@
 ;; the language-specific code a chance to create its coding systems.
 
 (setq posix-charset-to-coding-system-hash
-      (eval-when-compile
-	(let ((res (make-hash-table :test 'equal)))
-	  ;; We want both normal and internal coding systems in order
-	  ;; to pick up coding system aliases.
-	  (dolist (coding-system (coding-system-list 'both-normal-and-internal) res)
-	    (setq coding-system
-		  (symbol-name coding-system))
-	    (unless (or (string-match #r"\(-unix\|-mac\|-dos\)$" coding-system)
-			(string-match #r"^\(internal\|mswindows\)" coding-system))
-	      (puthash 
-	       (replace-in-string (downcase coding-system) "[^a-z0-9]" "")
-	       (coding-system-name (intern coding-system)) res)))))
+      (loop
+        ;; We want both normal and internal coding systems in order
+        ;; to pick up coding system aliases.
+        for coding-system in (coding-system-list 'every)
+        with res = (make-hash-table :test #'equal)
+        do
+        (setq coding-system (symbol-name coding-system))
+        (unless (or (string-match #r"\(-unix\|-mac\|-dos\)$" coding-system)
+                    (string-match #r"^\(internal\|mswindows\)" coding-system))
+          (puthash 
+           (replace-in-string (downcase coding-system) "[^a-z0-9]" "")
+           (coding-system-name (intern coding-system)) res))
+        finally return res)
 
       ;; In a thoughtless act of cultural imperialism, move English, German
       ;; and Japanese to the front of language-info-alist to make start-up a
--- a/lisp/mule/japanese.el	Sat Aug 25 21:51:21 2007 +0000
+++ b/lisp/mule/japanese.el	Sun Aug 26 20:00:42 2007 +0000
@@ -424,50 +424,54 @@
 	       "japan"
 	       "ja"
 	       )
-
-	      (native-coding-system
-	       ;; first, see if an explicit encoding was given.
-	       (lambda (locale)
-		   (let ((case-fold-search t))
-		     (cond
-		      ;; many unix versions
-		      ((string-match "\\.euc" locale) 'euc-jp)
-		      ((string-match "\\.sjis" locale) 'shift-jis)
-
-		      ;; X11R6 (CJKV p. 471)
-		      ((string-match "\\.jis7" locale) 'jis7)
-		      ((string-match "\\.jis8" locale) 'jis8)
-		      ((string-match "\\.mscode" locale) 'shift-jis)
-		      ((string-match "\\.pjis" locale) 'iso-2022-jp)
-		      ((string-match "\\.ujis" locale) 'euc-jp)
-
-		      ;; other names in X11R6 locale.alias
-		      ((string-match "\\.ajec" locale) 'euc-jp)
-		      ((string-match "-euc" locale) 'euc-jp)
-		      ((string-match "\\.iso-2022-jp" locale) 'iso-2022-jp)
-		      ((string-match "\\.jis" locale) 'jis7) ;; or just jis?
-		      )))
-
-	       ;; aix (CJKV p. 465)
-	       (lambda (locale)
-		   (when (eq system-type 'aix)
-		     (cond
-		      ((string-match "^Ja_JP" locale) 'shift-jis)
-		      ((string-match "^ja_JP" locale) 'euc-jp))))
-
-	       ;; other X11R6 locale.alias
-	       (lambda (locale)
-		   (cond
-		    ((string-match "^Jp_JP" locale) 'euc-jp)
-		    ((and (eq system-type 'hpux) (eq locale "japanese"))
-		     'shift-jis)))
-
-	       ;; fallback
-	       euc-jp)
-
 ;;	      (input-method . "japanese")
 	      (features japan-util)
 	      (sample-text . "Japanese ($BF|K\8l(B)	$B$3$s$K$A$O(B, (I:]FAJ(B")
 	      (documentation . t)))
 
+;; Set the native-coding-system separately so the lambdas get compiled. (Not
+;; a huge speed improvement, but this code is called at startup, and every
+;; little helps there.)
+(set-language-info "Japanese"
+                   'native-coding-system
+                   (list
+                    ;; first, see if an explicit encoding was given.
+                    (lambda (locale)
+                      (let ((case-fold-search t))
+                        (cond
+                         ;; many unix versions
+                         ((string-match "\\.euc" locale) 'euc-jp)
+                         ((string-match "\\.sjis" locale) 'shift-jis)
+
+                         ;; X11R6 (CJKV p. 471)
+                         ((string-match "\\.jis7" locale) 'jis7)
+                         ((string-match "\\.jis8" locale) 'jis8)
+                         ((string-match "\\.mscode" locale) 'shift-jis)
+                         ((string-match "\\.pjis" locale) 'iso-2022-jp)
+                         ((string-match "\\.ujis" locale) 'euc-jp)
+
+                         ;; other names in X11R6 locale.alias
+                         ((string-match "\\.ajec" locale) 'euc-jp)
+                         ((string-match "-euc" locale) 'euc-jp)
+                         ((string-match "\\.iso-2022-jp" locale) 'iso-2022-jp)
+                         ((string-match "\\.jis" locale) 'jis7) ;; or just jis?
+                         )))
+
+                    ;; aix (CJKV p. 465)
+                    (lambda (locale)
+                      (when (eq system-type 'aix)
+                        (cond
+                         ((string-match "^Ja_JP" locale) 'shift-jis)
+                         ((string-match "^ja_JP" locale) 'euc-jp))))
+
+                    ;; other X11R6 locale.alias
+                    (lambda (locale)
+                      (cond
+                       ((string-match "^Jp_JP" locale) 'euc-jp)
+                       ((and (eq system-type 'hpux) (eq locale "japanese"))
+                        'shift-jis)))
+
+                    ;; fallback
+                    'euc-jp))
+
 ;;; japanese.el ends here
--- a/lisp/mule/latin.el	Sat Aug 25 21:51:21 2007 +0000
+++ b/lisp/mule/latin.el	Sun Aug 26 20:00:42 2007 +0000
@@ -588,7 +588,7 @@
       (string (char-syntax (make-char 'latin-iso8859-1 i)))
       syntax-table))
 
-;; Case. #### Bug: this doesn't handle I WITH DOT ABOVE. 
+;; Case. The Turkish case idiosyncracy is handled with its language environment.  
 (setup-case-pairs
  'latin-iso8859-9
  '((#xC0 #xE0) ;; A WITH GRAVE
@@ -618,6 +618,16 @@
    (#xDC #xFC) ;; U WITH DIAERESIS
    (#xDE #xFE))) ;; S WITH CEDILLA
 
+;; LATIN CAPITAL LETTER I WITH DOT ABOVE
+(put-case-table 'downcase
+                (make-char 'latin-iso8859-9 #xdd)
+                ?i (standard-case-table))
+
+;; LATIN SMALL LETTER DOTLESS I
+(put-case-table 'upcase
+                (make-char 'latin-iso8859-9 #xfd)
+                ?I (standard-case-table))
+
 (make-coding-system
  'iso-8859-9 'iso2022 "ISO-8859-9 (Latin-5)"
  '(charset-g0 ascii
@@ -725,4 +735,192 @@
 This language environment supports %s. " name)))
      '("European"))))
 
+;; The case table for Turkish is special:
+;; #### Maybe we should limit this change to interactive functions; this may
+;; well be awkward for protocols and so on. 
+(set-language-info "Turkish"
+                   'setup-function
+                   (lambda ()
+                     ;; The lowercase version of I is dotless i
+                     (put-case-table-pair ?I 
+                                          (make-char 'latin-iso8859-9 #xfd)
+                                          (standard-case-table))
+                     ;; The uppercase version of i is I with dot
+                     (put-case-table-pair (make-char 'latin-iso8859-9 #xdd)
+                                          ?i (standard-case-table))))
+
+(set-language-info "Turkish"
+                   'exit-function
+                   (lambda ()
+                     ;; Restore the normal case mappings for the characters.
+                     (put-case-table-pair ?I ?i (standard-case-table))))
+
+(make-8-bit-coding-system
+ 'macintosh
+ '((#x80 ?\u00C4) ;; LATIN CAPITAL LETTER A WITH DIAERESIS
+   (#x81 ?\u00C5) ;; LATIN CAPITAL LETTER A WITH RING ABOVE
+   (#x82 ?\u00C7) ;; LATIN CAPITAL LETTER C WITH CEDILLA
+   (#x83 ?\u00C9) ;; LATIN CAPITAL LETTER E WITH ACUTE
+   (#x84 ?\u00D1) ;; LATIN CAPITAL LETTER N WITH TILDE
+   (#x85 ?\u00D6) ;; LATIN CAPITAL LETTER O WITH DIAERESIS
+   (#x86 ?\u00DC) ;; LATIN CAPITAL LETTER U WITH DIAERESIS
+   (#x87 ?\u00E1) ;; LATIN SMALL LETTER A WITH ACUTE
+   (#x88 ?\u00E0) ;; LATIN SMALL LETTER A WITH GRAVE
+   (#x89 ?\u00E2) ;; LATIN SMALL LETTER A WITH CIRCUMFLEX
+   (#x8A ?\u00E4) ;; LATIN SMALL LETTER A WITH DIAERESIS
+   (#x8B ?\u00E3) ;; LATIN SMALL LETTER A WITH TILDE
+   (#x8C ?\u00E5) ;; LATIN SMALL LETTER A WITH RING ABOVE
+   (#x8D ?\u00E7) ;; LATIN SMALL LETTER C WITH CEDILLA
+   (#x8E ?\u00E9) ;; LATIN SMALL LETTER E WITH ACUTE
+   (#x8F ?\u00E8) ;; LATIN SMALL LETTER E WITH GRAVE
+   (#x90 ?\u00EA) ;; LATIN SMALL LETTER E WITH CIRCUMFLEX
+   (#x91 ?\u00EB) ;; LATIN SMALL LETTER E WITH DIAERESIS
+   (#x92 ?\u00ED) ;; LATIN SMALL LETTER I WITH ACUTE
+   (#x93 ?\u00EC) ;; LATIN SMALL LETTER I WITH GRAVE
+   (#x94 ?\u00EE) ;; LATIN SMALL LETTER I WITH CIRCUMFLEX
+   (#x95 ?\u00EF) ;; LATIN SMALL LETTER I WITH DIAERESIS
+   (#x96 ?\u00F1) ;; LATIN SMALL LETTER N WITH TILDE
+   (#x97 ?\u00F3) ;; LATIN SMALL LETTER O WITH ACUTE
+   (#x98 ?\u00F2) ;; LATIN SMALL LETTER O WITH GRAVE
+   (#x99 ?\u00F4) ;; LATIN SMALL LETTER O WITH CIRCUMFLEX
+   (#x9A ?\u00F6) ;; LATIN SMALL LETTER O WITH DIAERESIS
+   (#x9B ?\u00F5) ;; LATIN SMALL LETTER O WITH TILDE
+   (#x9C ?\u00FA) ;; LATIN SMALL LETTER U WITH ACUTE
+   (#x9D ?\u00F9) ;; LATIN SMALL LETTER U WITH GRAVE
+   (#x9E ?\u00FB) ;; LATIN SMALL LETTER U WITH CIRCUMFLEX
+   (#x9F ?\u00FC) ;; LATIN SMALL LETTER U WITH DIAERESIS
+   (#xA0 ?\u2020) ;; DAGGER
+   (#xA1 ?\u00B0) ;; DEGREE SIGN
+   (#xA2 ?\u00A2) ;; CENT SIGN
+   (#xA3 ?\u00A3) ;; POUND SIGN
+   (#xA4 ?\u00A7) ;; SECTION SIGN
+   (#xA5 ?\u2022) ;; BULLET
+   (#xA6 ?\u00B6) ;; PILCROW SIGN
+   (#xA7 ?\u00DF) ;; LATIN SMALL LETTER SHARP S
+   (#xA8 ?\u00AE) ;; REGISTERED SIGN
+   (#xA9 ?\u00A9) ;; COPYRIGHT SIGN
+   (#xAA ?\u2122) ;; TRADE MARK SIGN
+   (#xAB ?\u00B4) ;; ACUTE ACCENT
+   (#xAC ?\u00A8) ;; DIAERESIS
+   (#xAD ?\u2260) ;; NOT EQUAL TO
+   (#xAE ?\u00C6) ;; LATIN CAPITAL LETTER AE
+   (#xAF ?\u00D8) ;; LATIN CAPITAL LETTER O WITH STROKE
+   (#xB0 ?\u221E) ;; INFINITY
+   (#xB1 ?\u00B1) ;; PLUS-MINUS SIGN
+   (#xB2 ?\u2264) ;; LESS-THAN OR EQUAL TO
+   (#xB3 ?\u2265) ;; GREATER-THAN OR EQUAL TO
+   (#xB4 ?\u00A5) ;; YEN SIGN
+   (#xB5 ?\u00B5) ;; MICRO SIGN
+   (#xB6 ?\u2202) ;; PARTIAL DIFFERENTIAL
+   (#xB7 ?\u2211) ;; N-ARY SUMMATION
+   (#xB8 ?\u220F) ;; N-ARY PRODUCT
+   (#xB9 ?\u03C0) ;; GREEK SMALL LETTER PI
+   (#xBA ?\u222B) ;; INTEGRAL
+   (#xBB ?\u00AA) ;; FEMININE ORDINAL INDICATOR
+   (#xBC ?\u00BA) ;; MASCULINE ORDINAL INDICATOR
+   (#xBD ?\u03A9) ;; GREEK CAPITAL LETTER OMEGA
+   (#xBE ?\u00E6) ;; LATIN SMALL LETTER AE
+   (#xBF ?\u00F8) ;; LATIN SMALL LETTER O WITH STROKE
+   (#xC0 ?\u00BF) ;; INVERTED QUESTION MARK
+   (#xC1 ?\u00A1) ;; INVERTED EXCLAMATION MARK
+   (#xC2 ?\u00AC) ;; NOT SIGN
+   (#xC3 ?\u221A) ;; SQUARE ROOT
+   (#xC4 ?\u0192) ;; LATIN SMALL LETTER F WITH HOOK
+   (#xC5 ?\u2248) ;; ALMOST EQUAL TO
+   (#xC6 ?\u2206) ;; INCREMENT
+   (#xC7 ?\u00AB) ;; LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+   (#xC8 ?\u00BB) ;; RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+   (#xC9 ?\u2026) ;; HORIZONTAL ELLIPSIS
+   (#xCA ?\u00A0) ;; NO-BREAK SPACE
+   (#xCB ?\u00C0) ;; LATIN CAPITAL LETTER A WITH GRAVE
+   (#xCC ?\u00C3) ;; LATIN CAPITAL LETTER A WITH TILDE
+   (#xCD ?\u00D5) ;; LATIN CAPITAL LETTER O WITH TILDE
+   (#xCE ?\u0152) ;; LATIN CAPITAL LIGATURE OE
+   (#xCF ?\u0153) ;; LATIN SMALL LIGATURE OE
+   (#xD0 ?\u2013) ;; EN DASH
+   (#xD1 ?\u2014) ;; EM DASH
+   (#xD2 ?\u201C) ;; LEFT DOUBLE QUOTATION MARK
+   (#xD3 ?\u201D) ;; RIGHT DOUBLE QUOTATION MARK
+   (#xD4 ?\u2018) ;; LEFT SINGLE QUOTATION MARK
+   (#xD5 ?\u2019) ;; RIGHT SINGLE QUOTATION MARK
+   (#xD6 ?\u00F7) ;; DIVISION SIGN
+   (#xD7 ?\u25CA) ;; LOZENGE
+   (#xD8 ?\u00FF) ;; LATIN SMALL LETTER Y WITH DIAERESIS
+   (#xD9 ?\u0178) ;; LATIN CAPITAL LETTER Y WITH DIAERESIS
+   (#xDA ?\u2044) ;; FRACTION SLASH
+   (#xDB ?\u20AC) ;; EURO SIGN
+   (#xDC ?\u2039) ;; SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+   (#xDD ?\u203A) ;; SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+   (#xDE ?\uFB01) ;; LATIN SMALL LIGATURE FI
+   (#xDF ?\uFB02) ;; LATIN SMALL LIGATURE FL
+   (#xE0 ?\u2021) ;; DOUBLE DAGGER
+   (#xE1 ?\u00B7) ;; MIDDLE DOT
+   (#xE2 ?\u201A) ;; SINGLE LOW-9 QUOTATION MARK
+   (#xE3 ?\u201E) ;; DOUBLE LOW-9 QUOTATION MARK
+   (#xE4 ?\u2030) ;; PER MILLE SIGN
+   (#xE5 ?\u00C2) ;; LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+   (#xE6 ?\u00CA) ;; LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+   (#xE7 ?\u00C1) ;; LATIN CAPITAL LETTER A WITH ACUTE
+   (#xE8 ?\u00CB) ;; LATIN CAPITAL LETTER E WITH DIAERESIS
+   (#xE9 ?\u00C8) ;; LATIN CAPITAL LETTER E WITH GRAVE
+   (#xEA ?\u00CD) ;; LATIN CAPITAL LETTER I WITH ACUTE
+   (#xEB ?\u00CE) ;; LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+   (#xEC ?\u00CF) ;; LATIN CAPITAL LETTER I WITH DIAERESIS
+   (#xED ?\u00CC) ;; LATIN CAPITAL LETTER I WITH GRAVE
+   (#xEE ?\u00D3) ;; LATIN CAPITAL LETTER O WITH ACUTE
+   (#xEF ?\u00D4) ;; LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+   (#xF0 ?\uF8FF) ;; Apple logo
+   (#xF1 ?\u00D2) ;; LATIN CAPITAL LETTER O WITH GRAVE
+   (#xF2 ?\u00DA) ;; LATIN CAPITAL LETTER U WITH ACUTE
+   (#xF3 ?\u00DB) ;; LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+   (#xF4 ?\u00D9) ;; LATIN CAPITAL LETTER U WITH GRAVE
+   (#xF5 ?\u0131) ;; LATIN SMALL LETTER DOTLESS I
+   (#xF6 ?\u02C6) ;; MODIFIER LETTER CIRCUMFLEX ACCENT
+   (#xF7 ?\u02DC) ;; SMALL TILDE
+   (#xF8 ?\u00AF) ;; MACRON
+   (#xF9 ?\u02D8) ;; BREVE
+   (#xFA ?\u02D9) ;; DOT ABOVE
+   (#xFB ?\u02DA) ;; RING ABOVE
+   (#xFC ?\u00B8) ;; CEDILLA
+   (#xFD ?\u02DD) ;; DOUBLE ACUTE ACCENT
+   (#xFE ?\u02DB) ;; OGONEK
+   (#xFF ?\u02C7)) ;; CARON
+ "The Macintosh encoding for Western Europe and the Americas"
+ '(mnemonic "MR"
+   documentation "MacRoman, MIME name macintosh"
+   aliases (cp10000 MacRoman)))
+
+(make-8-bit-coding-system
+ 'windows-1252
+ '((#x80 ?\u20AC) ;; EURO SIGN
+   (#x82 ?\u201A) ;; SINGLE LOW-9 QUOTATION MARK
+   (#x83 ?\u0192) ;; LATIN SMALL LETTER F WITH HOOK
+   (#x84 ?\u201E) ;; DOUBLE LOW-9 QUOTATION MARK
+   (#x85 ?\u2026) ;; HORIZONTAL ELLIPSIS
+   (#x86 ?\u2020) ;; DAGGER
+   (#x87 ?\u2021) ;; DOUBLE DAGGER
+   (#x88 ?\u02C6) ;; MODIFIER LETTER CIRCUMFLEX ACCENT
+   (#x89 ?\u2030) ;; PER MILLE SIGN
+   (#x8A ?\u0160) ;; LATIN CAPITAL LETTER S WITH CARON
+   (#x8B ?\u2039) ;; SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+   (#x8C ?\u0152) ;; LATIN CAPITAL LIGATURE OE
+   (#x8E ?\u017D) ;; LATIN CAPITAL LETTER Z WITH CARON
+   (#x91 ?\u2018) ;; LEFT SINGLE QUOTATION MARK
+   (#x92 ?\u2019) ;; RIGHT SINGLE QUOTATION MARK
+   (#x93 ?\u201C) ;; LEFT DOUBLE QUOTATION MARK
+   (#x94 ?\u201D) ;; RIGHT DOUBLE QUOTATION MARK
+   (#x95 ?\u2022) ;; BULLET
+   (#x96 ?\u2013) ;; EN DASH
+   (#x97 ?\u2014) ;; EM DASH
+   (#x98 ?\u02DC) ;; SMALL TILDE
+   (#x99 ?\u2122) ;; TRADE MARK SIGN
+   (#x9A ?\u0161) ;; LATIN SMALL LETTER S WITH CARON
+   (#x9B ?\u203A) ;; SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+   (#x9C ?\u0153) ;; LATIN SMALL LIGATURE OE
+   (#x9E ?\u017E) ;; LATIN SMALL LETTER Z WITH CARON
+   (#x9F ?\u0178));; LATIN CAPITAL LETTER Y WITH DIAERESIS
+ "Microsoft's extension of iso-8859-1 for Western Europe and the Americas.  "
+ '(mnemonic "cp1252"
+   aliases (cp1252)))
+
 ;;; latin.el ends here
--- a/lisp/mule/mule-cmds.el	Sat Aug 25 21:51:21 2007 +0000
+++ b/lisp/mule/mule-cmds.el	Sun Aug 26 20:00:42 2007 +0000
@@ -679,7 +679,7 @@
       (let ((func (get-language-info current-language-environment
 				     'exit-function)))
 	(run-hooks 'exit-language-environment-hook)
-	(if (fboundp func) (funcall func))))
+	(if (functionp func) (funcall func))))
   (setq current-language-environment language-name)
   (let ((default-eol-type (coding-system-eol-type
 			   default-buffer-file-coding-system)))
@@ -757,7 +757,7 @@
       (require (car required-features))
       (setq required-features (cdr required-features))))
   (let ((func (get-language-info language-name 'setup-function)))
-    (if (fboundp func)
+    (if (functionp func)
 	(funcall func)))
 
   ;; Fit the charsets preferences in unicode conversions for the
@@ -1021,9 +1021,9 @@
 ;; auto-language-alist deleted.  We have a more sophisticated system,
 ;; with the locales stored in the language data.
 
-;; Initialised with an eval-when-compile in mule/general-late.el, which is
-;; compiled after all the language support--and, thus, minority Chinese
-;; coding systems and so on--has been loaded.
+;; Initialised in mule/general-late.el, which is compiled after all the
+;; language support--and, thus, minority Chinese coding systems and so
+;; on--has been loaded.
 (defvar posix-charset-to-coding-system-hash nil
   "A map from the POSIX locale charset versions of the defined coding
 systems' names, with all alpha-numeric characters removed, to the actual
--- a/lisp/mule/mule-coding.el	Sat Aug 25 21:51:21 2007 +0000
+++ b/lisp/mule/mule-coding.el	Sun Aug 26 20:00:42 2007 +0000
@@ -197,9 +197,8 @@
 Analogous to `define-translation-table', but updates
 `translation-hash-table-vector' and the table is for use in the CCL
 `lookup-integer' and `lookup-character' functions."
-  (unless (and (symbolp symbol)
-	       (hash-table-p table))
-    (error "Bad args to define-translation-hash-table"))
+  (check-argument-type #'symbolp symbol)
+  (check-argument-type #'hash-table-p table)
   (let ((len (length translation-hash-table-vector))
 	(id 0)
 	done)
@@ -229,10 +228,9 @@
 				   encode-failure-octet)
   "Helper function for `make-8-bit-generate-encode-program', which see.
 
-Deals with the case where ASCII and another character set provide the
-can both be encoded unambiguously into the coding-system; if this is
-so, returns a list corresponding to such a ccl-program.  If not, it
-returns nil.  "
+Deals with the case where ASCII and another character set can both be
+encoded unambiguously and completely into the coding-system; if this is so,
+returns a list corresponding to such a ccl-program.  If not, it returns nil.  "
   (let ((tentative-encode-program-parts
 	 (eval-when-compile 
 	   (let* ((compiled 
@@ -313,7 +311,7 @@
 	      worth-trying nil)))
 
     (when worth-trying
-      (setq other-charset-vector (make-vector 256 encode-failure-octet))
+      (setq other-charset-vector (make-vector 128 encode-failure-octet))
       (loop for i from charset-lower to charset-upper
         do (aset other-charset-vector i
 		 (gethash (encode-char (make-char worth-trying i)
@@ -523,6 +521,19 @@
     (append decode-table nil)
     (second decode-program-parts))))
 
+(defun make-8-bit-choose-category (decode-table)
+  "Given DECODE-TABLE, return an appropriate coding category.
+DECODE-TABLE is a 256-entry vector describing the mapping from octets on
+disk to XEmacs characters for some fixed-width 8-bit coding system.  "
+  (check-argument-type #'vectorp decode-table)
+  (check-argument-range (length decode-table) #x100 #x100)
+  (block category
+    (loop
+      for i from #x80 to #xBF
+      do (unless (= i (aref decode-table i))
+           (return-from category 'no-conversion)))
+    'iso-8-1))
+
 ;;;###autoload
 (defun make-8-bit-coding-system (name unicode-map &optional description props)
   "Make and return a fixed-width 8-bit CCL coding system named NAME.
@@ -600,12 +611,12 @@
            description 
            (plist-put (plist-put props 'decode decode-program)
                       'encode encode-program)))
-    (coding-system-put name 'category 'iso-8-1)
+    (coding-system-put name 'category 
+                       (make-8-bit-choose-category decode-table))
     (loop for alias in aliases
       do (define-coding-system-alias alias name))
     result))
 
-;;;###autoload
 (define-compiler-macro make-8-bit-coding-system (&whole form name unicode-map
 						 &optional description props)
 
@@ -671,7 +682,8 @@
                                    'encode-table-sym
                                    (symbol-value 'encode-table-sym)))
                             ',encode-program))))
-        (coding-system-put ',name 'category 'iso-8-1)
+        (coding-system-put ',name 'category ',
+                           (make-8-bit-choose-category decode-table))
         ,(macroexpand `(loop for alias in ',aliases
                         do (define-coding-system-alias alias
                              ',name)))
--- a/lisp/mule/mule-win32-init.el	Sat Aug 25 21:51:21 2007 +0000
+++ b/lisp/mule/mule-win32-init.el	Sun Aug 26 20:00:42 2007 +0000
@@ -116,68 +116,83 @@
    locale user-default
    code-page oem))
 
-(let ((cplist
-       '(("EBCDIC"      037 "EBCDIC")
-	 ("OEM"         437 "MS-DOS United States")
-	 ("EBCDIC"      500 "EBCDIC \"500V1\"")
-	 ("OEM"         708 "Arabic (ASMO 708)")
-	 ("OEM"         709 "Arabic (ASMO 449+, BCON V4)")
-	 ("OEM"         710 "Arabic (Transparent Arabic)")
-	 ("OEM"         720 "Arabic (Transparent ASMO)")
-	 ("OEM"         737 "Greek (formerly 437G)")
-	 ("OEM"         775 "Baltic")
-	 ("OEM"         850 "MS-DOS Multilingual (Latin I)")
-	 ("OEM"         852 "MS-DOS Slavic (Latin II)")
-	 ("OEM"         855 "IBM Cyrillic (primarily Russian)")
-	 ("OEM"         857 "IBM Turkish")
-	 ("OEM"         860 "MS-DOS Portuguese")
-	 ("OEM"         861 "MS-DOS Icelandic")
-	 ("OEM"         862 "Hebrew")
-	 ("OEM"         863 "MS-DOS Canadian-French")
-	 ("OEM"         864 "Arabic")
-	 ("OEM"         865 "MS-DOS Nordic")
-	 ("OEM"         866 "MS-DOS Russian")
-	 ("OEM"         869 "IBM Modern Greek")
-	 ("Ansi/OEM"    874 "Thai")
-	 ("EBCDIC"      875 "EBCDIC")
-	 ("Ansi/OEM"    932 "Japanese")
-	 ("Ansi/OEM"    936 "Chinese (PRC, Singapore)")
-	 ("Ansi/OEM"    949 "Korean")
-	 ("Ansi/OEM"    950 "Chinese (Taiwan; Hong Kong SAR, PRC)")
-	 ("EBCDIC"      1026 "EBCDIC")
-	 ("ANSI"        1200 "Unicode (BMP of ISO 10646)")
-	 ("ANSI"        1250 "Windows 3.1 Eastern European")
-	 ("ANSI"        1251 "Windows 3.1 Cyrillic")
-	 ("ANSI"        1252 "Windows 3.1 US (ANSI)")
-	 ("ANSI"        1253 "Windows 3.1 Greek")
-	 ("ANSI"        1254 "Windows 3.1 Turkish")
-	 ("ANSI"        1255 "Hebrew")
-	 ("ANSI"        1256 "Arabic")
-	 ("ANSI"        1257 "Baltic")
-	 ("ANSI"        1258 "VietNam")
-	 ("Ansi/OEM"    1361 "Korean (Johab)")
-	 ("Mac"         10000 "Macintosh Roman")
-	 ("Mac"         10001 "Macintosh Japanese")
-	 ("Mac"         10006 "Macintosh Greek I")
-	 ("Mac"         10007 "Macintosh Cyrillic")
-	 ("Mac"         10029 "Macintosh Latin 2")
-	 ("Mac"         10079 "Macintosh Icelandic")
-	 ("Mac"         10081 "Macintosh Turkish"))))
-  (dolist (cpprops cplist)
-    (let ((ansioem (first cpprops))
-	  (cp (second cpprops))
-	  (name (third cpprops)))
-      (make-coding-system
-       (intern (format "windows-%s" cp))
-       'mswindows-multibyte
-       (format "MS Windows code page %s (%s, %s)" cp ansioem name)
-       `(mnemonic
-	 ,(format "MSW-%s" cp)
-	 code-page ,cp
-	 documentation
-	 ,(format
-	  "MS Windows Multibyte -- code page %s (%s, %s).
+(loop
+  for (ansioem cp category name)
+  in '(("EBCDIC"      037 no-conversion "EBCDIC")
+       ("OEM"         437 no-conversion "MS-DOS United States")
+       ("EBCDIC"      500 no-conversion "EBCDIC \"500V1\"")
+
+       ;; This is ISO-8859-6. 
+       ;; ("OEM"         708 "Arabic (ASMO 708)")
+       ("OEM"         709 no-conversion "Arabic (ASMO 449+, BCON V4)")
+       ("OEM"         710 no-conversion "Arabic (Transparent Arabic)")
+       ("OEM"         720 no-conversion "Arabic (Transparent ASMO)")
+       ("OEM"         737 no-conversion "Greek (formerly 437G)")
+       ("OEM"         775 no-conversion "Baltic")
+       ("OEM"         850 no-conversion "MS-DOS Multilingual (Latin I)")
+       ("OEM"         852 no-conversion "MS-DOS Slavic (Latin II)")
+       ("OEM"         855 no-conversion "IBM Cyrillic (primarily Russian)")
+       ("OEM"         857 no-conversion "IBM Turkish")
+       ("OEM"         860 no-conversion "MS-DOS Portuguese")
+       ("OEM"         861 no-conversion "MS-DOS Icelandic")
+       ("OEM"         862 no-conversion "Hebrew")
+       ("OEM"         863 no-conversion "MS-DOS Canadian-French")
+       ("OEM"         864 no-conversion "Arabic")
+       ("OEM"         865 no-conversion "MS-DOS Nordic")
+       ; ("OEM"         866 no-conversion "MS-DOS Russian")
+       ("OEM"         869 no-conversion "IBM Modern Greek")
+       ("Ansi/OEM"    874 no-conversion "Thai")
+       ("EBCDIC"      875 no-conversion "EBCDIC")
+       ("Ansi/OEM"    932 shift_jis "Japanese")
+       ("Ansi/OEM"    936 iso_8_2 "Chinese (PRC, Singapore)")
+       ("Ansi/OEM"    949 iso_8_2 "Korean")
+       ("Ansi/OEM"    950 big5 "Chinese (Taiwan; Hong Kong SAR, PRC)")
+       ("EBCDIC"      1026 no-conversion "EBCDIC")
+
+       ;; This code page doesn't work. See 
+       ;; http://blogs.msdn.com/michkap/archive/2005/08/01/446475.aspx
+       ; ("ANSI"        1200 utf-16-little-endian "Unicode (BMP of ISO 10646)")
+
+       ("ANSI"        1250 no-conversion "Windows 3.1 Eastern European")
+
+       ;; We implement this ourselves, and keeping the same implementation
+       ;; across platforms means behaviour is a bit more consistent.
+       ; ("ANSI"        1251 no-conversion "Windows 3.1 Cyrillic")
+       ; ("ANSI"        1252 no-conversion "Windows 3.1 US (ANSI)")
+
+        ; ("ANSI"        1253 no-conversion "Windows 3.1 Greek")
+       ("ANSI"        1254 no-conversion "Windows 3.1 Turkish")
+       ("ANSI"        1255 no-conversion "Hebrew")
+       ("ANSI"        1256 no-conversion "Arabic")
+       ("ANSI"        1257 no-conversion "Baltic")
+       ("ANSI"        1258 no-conversion "VietNam")
+       ;; #### Is this category right? I don't have Lunde to hand, and the
+       ;; online information on Johab is scant.
+       ("Ansi/OEM"    1361 iso_8_2 "Korean (Johab)")
+       ("Mac"         10000 no-conversion "Macintosh Roman")
+       ("Mac"         10001 shift_jis "Macintosh Japanese")
+       ("Mac"         10006 no-conversion "Macintosh Greek I")
+       ("Mac"         10007 no-conversion "Macintosh Cyrillic")
+       ("Mac"         10029 no-conversion "Macintosh Latin 2")
+       ("Mac"         10079 no-conversion "Macintosh Icelandic")
+       ("Mac"         10081 no-conversion "Macintosh Turkish"))
+  do
+  (make-coding-system
+   (intern (format "windows-%s" cp))
+   'mswindows-multibyte
+   (format "MS Windows code page %s (%s, %s)" cp ansioem name)
+   `(mnemonic
+     ,(format "MSW-%s" cp)
+     code-page ,cp
+     documentation
+     ,(format
+       "MS Windows Multibyte -- code page %s (%s, %s).
 
 This implements the encoding specified by code page %s.
 For more information on code pages, see `mswindows-charset-code-page'."
-	  cp ansioem name cp))))))
+       cp ansioem name cp)))
+  (define-coding-system-alias 
+    (intern (format "cp%s" cp))
+    (intern (format "windows-%s" cp)))
+  (coding-system-put (intern (format "windows-%s" cp)) 'category category))
+
--- a/lisp/unicode.el	Sat Aug 25 21:51:21 2007 +0000
+++ b/lisp/unicode.el	Sun Aug 26 20:00:42 2007 +0000
@@ -144,7 +144,25 @@
 				     (expand-file-name (car args) undir)
 				     (cdr args)))
 			  (cdr tables))))
-	    parse-args)))
+	    parse-args)
+    ;; The default-unicode-precedence-list. We set this here to default to
+    ;; *not* mapping various European characters to East Asian characters;
+    ;; otherwise the default-unicode-precedence-list is numerically ordered
+    ;; by charset ID.
+    (set-default-unicode-precedence-list
+     '(ascii control-1 latin-iso8859-1 latin-iso8859-2 latin-iso8859-15
+       greek-iso8859-7 hebrew-iso8859-8 ipa cyrillic-iso8859-5
+       latin-iso8859-16 latin-iso8859-3 latin-iso8859-4 latin-iso8859-9
+       vietnamese-viscii-lower vietnamese-viscii-upper arabic-iso8859-6
+       jit-ucs-charset-0 japanese-jisx0208 japanese-jisx0208-1978
+       japanese-jisx0212 japanese-jisx0213-1 japanese-jisx0213-2
+       chinese-gb2312 chinese-sisheng chinese-big5-1 chinese-big5-2
+       indian-is13194 korean-ksc5601 chinese-cns11643-1 chinese-cns11643-2
+       chinese-isoir165 arabic-1-column arabic-2-column arabic-digit
+       composite ethiopic indian-1-column indian-2-column jit-ucs-charset-0
+       katakana-jisx0201 lao thai-tis620 thai-xtis tibetan tibetan-1-column
+       latin-jisx0201 chinese-cns11643-3 chinese-cns11643-4
+       chinese-cns11643-5 chinese-cns11643-6 chinese-cns11643-7))))
 
 (make-coding-system
  'utf-16 'unicode
@@ -319,8 +337,8 @@
                   ;; one), we can't compile the program at dump time. We can
                   ;; check at byte compile time that the program is as
                   ;; expected, though.
-                  [1 16 131127 7 98872 65823 1307 5 -65536 65313 64833 1028 147513
-                     8 82009 255 22]))
+                  [1 16 131127 7 98872 65823 1307 5 -65536 65313 64833 1028
+                     147513 8 82009 255 22]))
              (when (featurep 'mule)
                ;; Check that the pre-existing constant reflects the intended
                ;; CCL program.
@@ -350,7 +368,169 @@
     (defconst ccl-encode-to-ucs-2 prog
       "CCL program to transform Mule characters to UCS-2.")
     (put 'ccl-encode-to-ucs-2 'ccl-program-idx
-         (register-ccl-program 'ccl-encode-to-ucs-2 prog))))
+         (register-ccl-program 'ccl-encode-to-ucs-2 prog)))
+
+  ;; Now, create jit-ucs-charset-0 entries for those characters in Windows
+  ;; Glyph List 4 that would otherwise end up in East Asian character sets.
+  ;; 
+  ;; WGL4 is a character repertoire from Microsoft that gives a guideline
+  ;; for font implementors as to what characters are sufficient for
+  ;; pan-European support.  The intention of this code is to avoid the
+  ;; situation where these characters end up mapping to East Asian XEmacs
+  ;; characters, which generally clash strongly with European characters
+  ;; both in font choice and character width; jit-ucs-charset-0 is a
+  ;; single-width character set which comes before the East Asian character
+  ;; sets in the default-unicode-precedence-list above.
+  (loop for (ucs ascii-or-latin-1)
+    in '((#x2013 ?-) ;; U+2013 EN DASH
+         (#x2014 ?-) ;; U+2014 EM DASH
+         (#x2105 ?%) ;; U+2105 CARE OF
+         (#x203e ?-) ;; U+203E OVERLINE
+         (#x221f ?|) ;; U+221F RIGHT ANGLE
+         (#x2584 ?|) ;; U+2584 LOWER HALF BLOCK
+         (#x2588 ?|) ;; U+2588 FULL BLOCK
+         (#x258c ?|) ;; U+258C LEFT HALF BLOCK
+         (#x2550 ?|) ;; U+2550 BOX DRAWINGS DOUBLE HORIZONTAL
+         (#x255e ?|) ;; U+255E BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+         (#x256a ?|) ;; U+256A BOX DRAWINGS VERTICAL SINGLE & HORIZONTAL DOUBLE
+         (#x2561 ?|) ;; U+2561 BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+         (#x2215 ?/) ;; U+2215 DIVISION SLASH
+         (#x02c9 ?`) ;; U+02C9 MODIFIER LETTER MACRON
+         (#x2211 ?s) ;; U+2211 N-ARY SUMMATION
+         (#x220f ?s) ;; U+220F N-ARY PRODUCT
+         (#x2248 ?=) ;; U+2248 ALMOST EQUAL TO
+         (#x2264 ?=) ;; U+2264 LESS-THAN OR EQUAL TO
+         (#x2265 ?=) ;; U+2265 GREATER-THAN OR EQUAL TO
+         (#x201c ?') ;; U+201C LEFT DOUBLE QUOTATION MARK
+         (#x2026 ?.) ;; U+2026 HORIZONTAL ELLIPSIS
+         (#x2212 ?-) ;; U+2212 MINUS SIGN
+         (#x2260 ?=) ;; U+2260 NOT EQUAL TO
+         (#x221e ?=) ;; U+221E INFINITY
+         (#x2642 ?=) ;; U+2642 MALE SIGN
+         (#x2640 ?=) ;; U+2640 FEMALE SIGN
+         (#x2032 ?=) ;; U+2032 PRIME
+         (#x2033 ?=) ;; U+2033 DOUBLE PRIME
+         (#x25cb ?=) ;; U+25CB WHITE CIRCLE
+         (#x25cf ?=) ;; U+25CF BLACK CIRCLE
+         (#x25a1 ?=) ;; U+25A1 WHITE SQUARE
+         (#x25a0 ?=) ;; U+25A0 BLACK SQUARE
+         (#x25b2 ?=) ;; U+25B2 BLACK UP-POINTING TRIANGLE
+         (#x25bc ?=) ;; U+25BC BLACK DOWN-POINTING TRIANGLE
+         (#x2192 ?=) ;; U+2192 RIGHTWARDS ARROW
+         (#x2190 ?=) ;; U+2190 LEFTWARDS ARROW
+         (#x2191 ?=) ;; U+2191 UPWARDS ARROW
+         (#x2193 ?=) ;; U+2193 DOWNWARDS ARROW
+         (#x2229 ?=) ;; U+2229 INTERSECTION
+         (#x2202 ?=) ;; U+2202 PARTIAL DIFFERENTIAL
+         (#x2261 ?=) ;; U+2261 IDENTICAL TO
+         (#x221a ?=) ;; U+221A SQUARE ROOT
+         (#x222b ?=) ;; U+222B INTEGRAL
+         (#x2030 ?=) ;; U+2030 PER MILLE SIGN
+         (#x266a ?=) ;; U+266A EIGHTH NOTE
+         (#x2020 ?*) ;; U+2020 DAGGER
+         (#x2021 ?*) ;; U+2021 DOUBLE DAGGER
+         (#x2500 ?|) ;; U+2500 BOX DRAWINGS LIGHT HORIZONTAL
+         (#x2502 ?|) ;; U+2502 BOX DRAWINGS LIGHT VERTICAL
+         (#x250c ?|) ;; U+250C BOX DRAWINGS LIGHT DOWN AND RIGHT
+         (#x2510 ?|) ;; U+2510 BOX DRAWINGS LIGHT DOWN AND LEFT
+         (#x2518 ?|) ;; U+2518 BOX DRAWINGS LIGHT UP AND LEFT
+         (#x2514 ?|) ;; U+2514 BOX DRAWINGS LIGHT UP AND RIGHT
+         (#x251c ?|) ;; U+251C BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+         (#x252c ?|) ;; U+252C BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+         (#x2524 ?|) ;; U+2524 BOX DRAWINGS LIGHT VERTICAL AND LEFT
+         (#x2534 ?|) ;; U+2534 BOX DRAWINGS LIGHT UP AND HORIZONTAL
+         (#x253c ?|) ;; U+253C BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+         (#x02da ?^) ;; U+02DA RING ABOVE
+         (#x2122 ?\xa9) ;; U+2122 TRADE MARK SIGN, ?,A)(B
+
+         (#x0132 ?\xe6) ;; U+0132 LATIN CAPITAL LIGATURE IJ, ?,Af(B
+         (#x013f ?\xe6) ;; U+013F LATIN CAPITAL LETTER L WITH MIDDLE DOT, ?,Af(B
+
+         (#x0133 ?\xe6) ;; U+0133 LATIN SMALL LIGATURE IJ, ?,Af(B
+         (#x0140 ?\xe6) ;; U+0140 LATIN SMALL LETTER L WITH MIDDLE DOT, ?,Af(B
+         (#x0149 ?\xe6) ;; U+0149 LATIN SMALL LETTER N PRECEDED BY APOSTROPH,?,Af(B
+
+         (#x2194 ?|) ;; U+2194 LEFT RIGHT ARROW
+         (#x2660 ?*) ;; U+2660 BLACK SPADE SUIT
+         (#x2665 ?*) ;; U+2665 BLACK HEART SUIT
+         (#x2663 ?*) ;; U+2663 BLACK CLUB SUIT
+         (#x2592 ?|) ;; U+2592 MEDIUM SHADE
+         (#x2195 ?|) ;; U+2195 UP DOWN ARROW
+
+         (#x2113 ?\xb9) ;; U+2113 SCRIPT SMALL L, ?,A9(B
+         (#x215b ?\xbe) ;; U+215B VULGAR FRACTION ONE EIGHTH, ?,A>(B
+         (#x215c ?\xbe) ;; U+215C VULGAR FRACTION THREE EIGHTHS, ?,A>(B
+         (#x215d ?\xbe) ;; U+215D VULGAR FRACTION FIVE EIGHTHS, ?,A>(B
+         (#x215e ?\xbe) ;; U+215E VULGAR FRACTION SEVEN EIGHTHS, ?,A>(B
+         (#x207f ?\xbe) ;; U+207F SUPERSCRIPT LATIN SMALL LETTER N, ?,A>(B
+  
+         ;; These are not in WGL 4, but are IPA characters that should not
+         ;; be double width. They are the only IPA characters that both
+         ;; occur in packages/mule-packages/leim/ipa.el and end up in East
+         ;; Asian character sets when that file is loaded in an XEmacs
+         ;; without packages.
+         (#x2197 ?|) ;; U+2197 NORTH EAST ARROW
+         (#x2199 ?|) ;; U+2199 SOUTH WEST ARROW
+         (#x2191 ?|) ;; U+2191 UPWARDS ARROW
+         (#x207f ?\xb9));; U+207F SUPERSCRIPT LATIN SMALL LETTER N, ?,A9(B
+    with decoded = nil
+    with syntax-table = (standard-syntax-table)
+    ;; This creates jit-ucs-charset-0 entries because:
+    ;;
+    ;;    1. If the tables are dumped, it is run at dump time before they are
+    ;;    dumped, and as such before the relevant conversions are available
+    ;;    (they are made available in mule/general-late.el). 
+    ;;
+    ;;    2. If the tables are not dumped, it is run at dump time, long before
+    ;;    any of the other mappings are available.
+    ;;
+    do
+    (setq decoded (decode-char 'ucs ucs))
+    (assert (eq (char-charset decoded)
+                'jit-ucs-charset-0) nil 
+                "Unexpected Unicode decoding behavior.  ")
+    (modify-syntax-entry decoded
+                         (string 
+                          (char-syntax ascii-or-latin-1))
+                         syntax-table))
+
+  ;; Create all the Unicode error sequences, normally as jit-ucs-charset-0
+  ;; characters starting at U+200000 (which isn't a valid Unicode code
+  ;; point).
+  (loop
+    for i from #x00 to #xFF
+    ;; #xd800 is the first leading surrogate; trailing surrogates must be in
+    ;; the range #xdc00-#xdfff. These examples are not, so we intentionally
+    ;; provoke an error sequence.
+    do (decode-coding-string (format "\xd8\x00\x01%c" i) 'utf-16-be))
+
+  ;; Make them available to user code.
+  (defvar unicode-error-sequence-zero
+    (aref (decode-coding-string "\xd8\x00\x01\x00" 'utf-16-be) 3)
+    "The XEmacs character representing an invalid zero octet in Unicode.
+
+Subtract this character from each XEmacs character in an invalid sequence to
+get the octet on disk. E.g.
+
+\(- (aref (decode-coding-string ?\\x80 'utf-8) 0)
+   unicode-error-characters-zero)
+=> ?\\x80
+
+You can search for invalid sequences using
+`unicode-error-sequence-regexp-range', which see.  ")
+
+  (defvar unicode-error-sequence-regexp-range
+    (format "%c-%c"
+            (aref (decode-coding-string "\xd8\x00\x01\x00" 'utf-16-be) 3)
+            (aref (decode-coding-string "\xd8\x00\x01\xFF" 'utf-16-be) 3))
+    "Regular expression range to match Unicode error sequences in XEmacs.
+
+Invalid Unicode sequences on input are represented as XEmacs characters with
+values starting at `unicode-error-sequence-zero', one character for each
+invalid octet.  Use this variable (with `re-search-forward' or
+`skip-chars-forward') to search for such characters; use
+`unicode-error-sequence-zero' from such characters to get a character
+corresponding to the octet on disk.  "))
 
 ;; #### UTF-7 is not yet implemented, and it's tricky to do.  There's
 ;; an implementation in appendix A.1 of the Unicode Standard, Version