changeset 4090:751ae075e76e

[xemacs-hg @ 2007-08-01 13:53:32 by aidan] Add some more locale information, Windows-1253, fix a make-8-bit-coding-system bug.
author aidan
date Wed, 01 Aug 2007 13:53:41 +0000
parents 094d7cbe2c62
children 7b78a0e97947
files lisp/ChangeLog lisp/mule/cyrillic.el lisp/mule/greek.el lisp/mule/latin.el lisp/mule/mule-coding.el
diffstat 5 files changed, 188 insertions(+), 41 deletions(-) [+]
line wrap: on
line diff
--- a/lisp/ChangeLog	Tue Jul 31 21:51:15 2007 +0000
+++ b/lisp/ChangeLog	Wed Aug 01 13:53:41 2007 +0000
@@ -1,3 +1,25 @@
+2007-08-01  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* mule/cyrillic.el:
+	* mule/cyrillic.el ("Russian"):
+	* mule/cyrillic.el ("Ukrainian"):
+	* mule/cyrillic.el ("Bulgarian"):
+	* mule/cyrillic.el ("Belarusian"):
+	Add POSIX locale information for all four languages. Remove
+	information about specific coding systems in the docstrings, since
+	this information is inaccurate if a variant language environment
+	is being used.
+	
+	* mule/greek.el:
+	* mule/latin.el (for):
+	Add POSIX locale information, provide Windows-1253 as well. 
+	
+	* mule/mule-coding.el (make-8-bit-generate-helper):
+	Fix a bug that was biting me with windows-1251. I need to include
+	tests in tests/automated/mule-tests.el that check that all the
+	coding-systems created with make-8-bit-coding-system are
+	reversible, since all of them should be.
+
 2007-07-28  Aidan Kehoe  <kehoea@parhasard.net>
 
 	* mule/mule-coding.el (make-8-bit-create-decode-encode-tables):
--- a/lisp/mule/cyrillic.el	Tue Jul 31 21:51:15 2007 +0000
+++ b/lisp/mule/cyrillic.el	Wed Aug 01 13:53:41 2007 +0000
@@ -115,6 +115,8 @@
    charset-g3 t
    mnemonic "ISO8/Cyr"))
 
+;; Provide this locale; but don't allow it to be picked up from the Unix
+;; locale (it has no locale entry in the alist), we leave that to Russian.
 (set-language-info-alist
  "Cyrillic-ISO" '((charset cyrillic-iso8859-5)
                   (tutorial . "TUTORIAL.ru")
@@ -271,23 +273,26 @@
 
 ;; Create a corresponding language environment. 
 (set-language-info-alist
- "Cyrillic-KOI8" '((charset cyrillic-iso8859-5)
-                   (coding-system koi8-r)
-                   (native-coding-system koi8-r)
-                   (coding-priority koi8-r)
-                   (input-method . "cyrillic-yawerty")
-                   (features cyril-util)
-                   (locale "ru")
-                   (mswindows-locale . "RUSSIAN")
-                   (tutorial . "TUTORIAL.ru")
-                   (sample-text . "Russian (,L@caaZXY(B)    ,L7T`PRabRcYbU(B!")
-                   (documentation . "Support for Cyrillic KOI8-R."))
+ "Russian" '((charset cyrillic-iso8859-5)
+	     (coding-system koi8-r)
+	     (native-coding-system koi8-r)
+	     (coding-priority koi8-r)
+	     (input-method . "cyrillic-yawerty")
+	     (features cyril-util)
+	     (locale "ru")
+	     (mswindows-locale . "RUSSIAN")
+	     (tutorial . "TUTORIAL.ru")
+	     (sample-text . "Russian (,L@caaZXY(B)    ,L7T`PRabRcYbU(B!")
+	     (documentation . "Support for Russian."))
  '("Cyrillic"))
 
-;; Alias it to Russian. 
+;; Provide Cyrillic-KOI8 for old times' sake too, but don't allow it to be
+;; selected by the Unix locale. A variant language environment called
+;; "Cyrillic-KOI8 (UTF-8)" just looks too odd.
+
 (set-language-info-alist
- "Russian"
- (cdr (assoc "Cyrillic-KOI8" language-info-alist))
+ "Cyrillic-KOI8"
+ (remassq 'locale (copy-list (cdr (assoc "Russian" language-info-alist))))
  '("Cyrillic"))
 
 ;; KOI8-U, for Ukrainian. 
@@ -444,13 +449,15 @@
 (set-language-info-alist
  "Ukrainian" '((coding-system koi8-u)
                (coding-priority koi8-u)
+               (locale "uk")
                (input-method . "cyrillic-ukrainian")
                (documentation
-                . "Support for Ukrainian with KOI8-U character set."))
+                . "Support for Ukrainian."))
  '("Cyrillic"))
 
-;; Windows 1251 may be provide automatically on Windows, in which case
-;; we don't need to.
+;; Windows 1251 may be provided automatically on Windows, in which case we
+;; don't need to provide it.
+;; #### (Though we should provide the CP1251 alias.)
 (unless (find-coding-system 'windows-1251) 
   (make-8-bit-coding-system 
    'windows-1251
@@ -594,18 +601,20 @@
  "Bulgarian" '((coding-system windows-1251)
                (coding-priority windows-1251)
                (input-method . "bulgarian-bds")
+               (locale "bg")
                (documentation
-                . "Support for Bulgarian with windows-1251 character set.")
+                . "Support for Bulgarian. ")
                (tutorial . "TUTORIAL.bg"))
  '("Cyrillic"))
 
 (set-language-info-alist
  "Belarusian" '((coding-system windows-1251)
                 (coding-priority windows-1251)
+                (locale "be")
                 (input-method . "belarusian")
                 (documentation
-                 . "Support for Belarusian with windows-1251 character set.
-\(The name Belarusian replaced Byelorussian in the early 1990s.)"))
+                 . "Support for Belarusian. \(The name Belarusian replaced\
+Byelorussian in the early 1990s.)"))
  '("Cyrillic"))
 
 ;;; Alternativnyj
@@ -890,17 +899,6 @@
  '(mnemonic ",L@C(B"
    aliases (cp21866)))
 
-(set-language-info-alist
- "Cyrillic-KOI8RU" '((charset cyrillic-iso8859-5)
-                     (coding-system koi8-ru)
-                     (native-coding-system koi8-ru)
-                     (coding-priority koi8-ru)
-                     (input-method . "cyrillic-yawerty")
-                     (tutorial . "TUTORIAL.ru")
-                     (sample-text . "Russian (,L@caaZXY(B)  ,L7T`PRabRcYbU(B!")
-                     (documentation . "Support for Cyrillic ALTERNATIVNYJ."))
- '("Cyrillic"))
-
 ;; We should provide an input method and the corresponding language
 ;; environments for the next three coding systems. 
 
@@ -1339,4 +1337,4 @@
 
 (provide 'cyrillic)
 
-;;; cyrillic.el ends here
\ No newline at end of file
+;;; cyrillic.el ends here
--- a/lisp/mule/greek.el	Tue Jul 31 21:51:15 2007 +0000
+++ b/lisp/mule/greek.el	Wed Aug 01 13:53:41 2007 +0000
@@ -126,14 +126,141 @@
    charset-g3 t
    mnemonic "Grk"))
 
+;; Windows 1253 may be provided automatically on Windows, in which case
+;; we don't need to provide it.
+(unless (find-coding-system 'windows-1253) 
+  (make-8-bit-coding-system 
+   'windows-1253
+   '((#x80 ?\u20AC) ;; EURO SIGN
+     (#x82 ?\u201A) ;; SINGLE LOW-9 QUOTATION MARK
+     (#x83 ?\u0192) ;; LATIN SMALL LETTER F WITH HOOK
+     (#x84 ?\u201E) ;; DOUBLE LOW-9 QUOTATION MARK
+     (#x85 ?\u2026) ;; HORIZONTAL ELLIPSIS
+     (#x86 ?\u2020) ;; DAGGER
+     (#x87 ?\u2021) ;; DOUBLE DAGGER
+     (#x89 ?\u2030) ;; PER MILLE SIGN
+     (#x8B ?\u2039) ;; SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+     (#x91 ?\u2018) ;; LEFT SINGLE QUOTATION MARK
+     (#x92 ?\u2019) ;; RIGHT SINGLE QUOTATION MARK
+     (#x93 ?\u201C) ;; LEFT DOUBLE QUOTATION MARK
+     (#x94 ?\u201D) ;; RIGHT DOUBLE QUOTATION MARK
+     (#x95 ?\u2022) ;; BULLET
+     (#x96 ?\u2013) ;; EN DASH
+     (#x97 ?\u2014) ;; EM DASH
+     (#x99 ?\u2122) ;; TRADE MARK SIGN
+     (#x9B ?\u203A) ;; SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+     (#xA0 ?\u00A0) ;; NO-BREAK SPACE
+     (#xA1 ?\u0385) ;; GREEK DIALYTIKA TONOS
+     (#xA2 ?\u0386) ;; GREEK CAPITAL LETTER ALPHA WITH TONOS
+     (#xA3 ?\u00A3) ;; POUND SIGN
+     (#xA4 ?\u00A4) ;; CURRENCY SIGN
+     (#xA5 ?\u00A5) ;; YEN SIGN
+     (#xA6 ?\u00A6) ;; BROKEN BAR
+     (#xA7 ?\u00A7) ;; SECTION SIGN
+     (#xA8 ?\u00A8) ;; DIAERESIS
+     (#xA9 ?\u00A9) ;; COPYRIGHT SIGN
+     (#xAB ?\u00AB) ;; LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+     (#xAC ?\u00AC) ;; NOT SIGN
+     (#xAD ?\u00AD) ;; SOFT HYPHEN
+     (#xAE ?\u00AE) ;; REGISTERED SIGN
+     (#xAF ?\u2015) ;; HORIZONTAL BAR
+     (#xB0 ?\u00B0) ;; DEGREE SIGN
+     (#xB1 ?\u00B1) ;; PLUS-MINUS SIGN
+     (#xB2 ?\u00B2) ;; SUPERSCRIPT TWO
+     (#xB3 ?\u00B3) ;; SUPERSCRIPT THREE
+     (#xB4 ?\u0384) ;; GREEK TONOS
+     (#xB5 ?\u00B5) ;; MICRO SIGN
+     (#xB6 ?\u00B6) ;; PILCROW SIGN
+     (#xB7 ?\u00B7) ;; MIDDLE DOT
+     (#xB8 ?\u0388) ;; GREEK CAPITAL LETTER EPSILON WITH TONOS
+     (#xB9 ?\u0389) ;; GREEK CAPITAL LETTER ETA WITH TONOS
+     (#xBA ?\u038A) ;; GREEK CAPITAL LETTER IOTA WITH TONOS
+     (#xBB ?\u00BB) ;; RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+     (#xBC ?\u038C) ;; GREEK CAPITAL LETTER OMICRON WITH TONOS
+     (#xBD ?\u00BD) ;; VULGAR FRACTION ONE HALF
+     (#xBE ?\u038E) ;; GREEK CAPITAL LETTER UPSILON WITH TONOS
+     (#xBF ?\u038F) ;; GREEK CAPITAL LETTER OMEGA WITH TONOS
+     (#xC0 ?\u0390) ;; GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
+     (#xC1 ?\u0391) ;; GREEK CAPITAL LETTER ALPHA
+     (#xC2 ?\u0392) ;; GREEK CAPITAL LETTER BETA
+     (#xC3 ?\u0393) ;; GREEK CAPITAL LETTER GAMMA
+     (#xC4 ?\u0394) ;; GREEK CAPITAL LETTER DELTA
+     (#xC5 ?\u0395) ;; GREEK CAPITAL LETTER EPSILON
+     (#xC6 ?\u0396) ;; GREEK CAPITAL LETTER ZETA
+     (#xC7 ?\u0397) ;; GREEK CAPITAL LETTER ETA
+     (#xC8 ?\u0398) ;; GREEK CAPITAL LETTER THETA
+     (#xC9 ?\u0399) ;; GREEK CAPITAL LETTER IOTA
+     (#xCA ?\u039A) ;; GREEK CAPITAL LETTER KAPPA
+     (#xCB ?\u039B) ;; GREEK CAPITAL LETTER LAMDA
+     (#xCC ?\u039C) ;; GREEK CAPITAL LETTER MU
+     (#xCD ?\u039D) ;; GREEK CAPITAL LETTER NU
+     (#xCE ?\u039E) ;; GREEK CAPITAL LETTER XI
+     (#xCF ?\u039F) ;; GREEK CAPITAL LETTER OMICRON
+     (#xD0 ?\u03A0) ;; GREEK CAPITAL LETTER PI
+     (#xD1 ?\u03A1) ;; GREEK CAPITAL LETTER RHO
+     (#xD3 ?\u03A3) ;; GREEK CAPITAL LETTER SIGMA
+     (#xD4 ?\u03A4) ;; GREEK CAPITAL LETTER TAU
+     (#xD5 ?\u03A5) ;; GREEK CAPITAL LETTER UPSILON
+     (#xD6 ?\u03A6) ;; GREEK CAPITAL LETTER PHI
+     (#xD7 ?\u03A7) ;; GREEK CAPITAL LETTER CHI
+     (#xD8 ?\u03A8) ;; GREEK CAPITAL LETTER PSI
+     (#xD9 ?\u03A9) ;; GREEK CAPITAL LETTER OMEGA
+     (#xDA ?\u03AA) ;; GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
+     (#xDB ?\u03AB) ;; GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
+     (#xDC ?\u03AC) ;; GREEK SMALL LETTER ALPHA WITH TONOS
+     (#xDD ?\u03AD) ;; GREEK SMALL LETTER EPSILON WITH TONOS
+     (#xDE ?\u03AE) ;; GREEK SMALL LETTER ETA WITH TONOS
+     (#xDF ?\u03AF) ;; GREEK SMALL LETTER IOTA WITH TONOS
+     (#xE0 ?\u03B0) ;; GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
+     (#xE1 ?\u03B1) ;; GREEK SMALL LETTER ALPHA
+     (#xE2 ?\u03B2) ;; GREEK SMALL LETTER BETA
+     (#xE3 ?\u03B3) ;; GREEK SMALL LETTER GAMMA
+     (#xE4 ?\u03B4) ;; GREEK SMALL LETTER DELTA
+     (#xE5 ?\u03B5) ;; GREEK SMALL LETTER EPSILON
+     (#xE6 ?\u03B6) ;; GREEK SMALL LETTER ZETA
+     (#xE7 ?\u03B7) ;; GREEK SMALL LETTER ETA
+     (#xE8 ?\u03B8) ;; GREEK SMALL LETTER THETA
+     (#xE9 ?\u03B9) ;; GREEK SMALL LETTER IOTA
+     (#xEA ?\u03BA) ;; GREEK SMALL LETTER KAPPA
+     (#xEB ?\u03BB) ;; GREEK SMALL LETTER LAMDA
+     (#xEC ?\u03BC) ;; GREEK SMALL LETTER MU
+     (#xED ?\u03BD) ;; GREEK SMALL LETTER NU
+     (#xEE ?\u03BE) ;; GREEK SMALL LETTER XI
+     (#xEF ?\u03BF) ;; GREEK SMALL LETTER OMICRON
+     (#xF0 ?\u03C0) ;; GREEK SMALL LETTER PI
+     (#xF1 ?\u03C1) ;; GREEK SMALL LETTER RHO
+     (#xF2 ?\u03C2) ;; GREEK SMALL LETTER FINAL SIGMA
+     (#xF3 ?\u03C3) ;; GREEK SMALL LETTER SIGMA
+     (#xF4 ?\u03C4) ;; GREEK SMALL LETTER TAU
+     (#xF5 ?\u03C5) ;; GREEK SMALL LETTER UPSILON
+     (#xF6 ?\u03C6) ;; GREEK SMALL LETTER PHI
+     (#xF7 ?\u03C7) ;; GREEK SMALL LETTER CHI
+     (#xF8 ?\u03C8) ;; GREEK SMALL LETTER PSI
+     (#xF9 ?\u03C9) ;; GREEK SMALL LETTER OMEGA
+     (#xFA ?\u03CA) ;; GREEK SMALL LETTER IOTA WITH DIALYTIKA
+     (#xFB ?\u03CB) ;; GREEK SMALL LETTER UPSILON WITH DIALYTIKA
+     (#xFC ?\u03CC) ;; GREEK SMALL LETTER OMICRON WITH TONOS
+     (#xFD ?\u03CD) ;; GREEK SMALL LETTER UPSILON WITH TONOS
+     (#xFE ?\u03CE)) ;; GREEK SMALL LETTER OMEGA WITH TONOS
+   "Microsoft's Code Page 1253, for monotonic Greek.  "
+   '(mnemonic "GrkW"
+     documentation
+     "This ASCII-compatible encoding is slightly incompatibile with
+ISO-8859-7; it provides several widely-used punctuation marks in the C1
+ISO-2022 area, which makes it incompatbile with the latter standard, but
+that latter standard is not used in Greece,  "
+     aliases (cp1253))))
+
 (set-language-info-alist
  "Greek" '((charset greek-iso8859-7)
 	   (coding-system iso-8859-7)
 	   (coding-priority iso-8859-7)
 	   (native-coding-system iso-8859-7)
-	   (locale "el_GR.iso88597" "el_GR.greek8" "el_GR" "greek" "el")
+	   (locale "el")
 	   (input-method . "greek")
 	   (sample-text . "Greek (,FGkk]mija(B)	,FCei\(B ,Fsar(B")
 	   (documentation . t)))
 
+;; Greek (WINDOWS-1253) will be generated automatically under Unix. 
+
 ;;; greek.el ends here
--- a/lisp/mule/latin.el	Tue Jul 31 21:51:15 2007 +0000
+++ b/lisp/mule/latin.el	Wed Aug 01 13:53:41 2007 +0000
@@ -645,7 +645,7 @@
  Irish, Italian, Norwegian, Portuguese, Spanish, and Swedish.")
      (("Danish" "da")
       ("Dutch" "nl" "TUTORIAL.nl")
-      ("Faeroese")
+      ("Faeroese" "fo")
       ("Finnish" "fi")
       ("French" "fr" "TUTORIAL.fr" "Bonjour, ,Ag(Ba va?")
       ("German" "de" "TUTORIAL.de" "\
@@ -666,7 +666,7 @@
 " Albanian, Czech, English, German, Hungarian, Polish, Romanian,
  Serbian, Croatian, Slovak, Slovene, Sorbian (upper and lower),
  and Swedish.") ;; " added because fontification got screwed up, CVS-20061203.
-     (("Albanian" nil)
+     (("Albanian" "sq")
       ("Croatian" ("hrvatski" "hr") "TUTORIAL.hr")
       ("Czech" ("cs" "cz") "TUTORIAL.cs" "P,Bx(Bejeme v,Ba(Bm hezk,B}(B den!"
        "latin-2-postfix")
@@ -685,15 +685,15 @@
  German, Italian, Maltese, Spanish, and Turkish.")
      (("Afrikaans" "af")
       ("Catalan" ("catalan" "ca"))
-      ("Esperanto")
-      ("Galician")
-      ("Maltese")))
+      ("Esperanto" "eo")
+      ("Galician" "gl")
+      ("Maltese" "mt")))
     ((latin-iso8859-4 iso-8859-4 "latin-4-prefix" "Latin-4" "ISO-8859-4"
 " Danish, English, Estonian, Finnish, German, Greenlandic, Lappish,
  Latvian, Lithuanian, and Norwegian.")
      (("Estonian" "et")
-      ("Greenlandic")
-      ("Lappish")
+      ("Greenlandic" "kl")
+      ("Lappish" "se")
       ("Latvian" "lv")
       ("Lithuanian" "li")))
     ((latin-iso8859-9 iso-8859-9 "latin-5-prefix" "Latin-5" "ISO-8859-9")
--- a/lisp/mule/mule-coding.el	Tue Jul 31 21:51:15 2007 +0000
+++ b/lisp/mule/mule-coding.el	Wed Aug 01 13:53:41 2007 +0000
@@ -315,7 +315,7 @@
     (when worth-trying
       (setq other-charset-vector (make-vector 256 encode-failure-octet))
       (loop for i from charset-lower to charset-upper
-        do (aset other-charset-vector (+ #x80 i)
+        do (aset other-charset-vector i
 		 (gethash (encode-char (make-char worth-trying i)
 				       'ucs) encode-table)))
       (setq encode-program