diff lisp/mule/iso-with-esc.el @ 4491:d402d7b18bd8

Revamp the Arabic support. Create greek-iso-8bit-with-esc, arabic-iso-8bit-with-esc. src/ChangeLog addition: 2008-08-05 Aidan Kehoe <kehoea@parhasard.net> * mule-charset.c (complex_vars_of_mule_charset): Remove Vcharset_arabic_iso8859_7. * lisp.h: Remove Vcharset_arabic_iso8859_7. See commentary in lisp/mule/iso-with-esc.el for motivation. lisp/ChangeLog addition: 2008-08-05 Aidan Kehoe <kehoea@parhasard.net> * mule/iso-with-esc.el (greek-iso-8bit-with-esc): (arabic-iso-8bit-with-esc): Add these two here. Move the implementation of the 'arabic-iso8859-6 character set here, with commentary on why that is reasonable. * mule/arabic.el (iso-8859-6): Add iso-8859-6, windows-1256 implementations using make-8-bit-coding-system. Remove our non-standard Mule character sets. * unicode.el (load-unicode-tables): Remove Arabic since it's no longer dumped. * mule/mule-msw-init-late.el: Remove Arabic. * mule/mule-category.el (predefined-category-list): Remove Arabic. etc/ChangeLog addition: 2008-08-05 Aidan Kehoe <kehoea@parhasard.net> * HELLO: Encode the Arabic using UTF-8 sequences, not ISO-8859-6.
author Aidan Kehoe <kehoea@parhasard.net>
date Tue, 05 Aug 2008 08:37:17 +0200
parents cee827542370
children 1d74a1d115ee
line wrap: on
line diff
--- a/lisp/mule/iso-with-esc.el	Sat Jul 26 13:50:27 2008 +0300
+++ b/lisp/mule/iso-with-esc.el	Tue Aug 05 08:37:17 2008 +0200
@@ -98,3 +98,108 @@
   charset-g3 t
    no-iso6429 t
    mnemonic "MIME/Hbrw"))
+
+;;;###autoload
+(make-coding-system
+ 'greek-iso-8bit-with-esc 'iso2022 "MIME ISO-8859-7"
+ '(charset-g0 ascii
+   charset-g1 greek-iso8859-7
+   charset-g2 t
+   charset-g3 t
+   mnemonic "Grk"))
+
+;; ISO 8859-6 is such a useless character set that it seems a waste of
+;; codespace to dump it. Let me count the ways: 
+;; 
+;; 1. It doesn't support Persian or Urdu, let alone Sinhalese, despite
+;;    plenty of unallocated code points.
+;;
+;; 2. It doesn't encode all the vowel diacritics (the Harakaat) despite that
+;;    they are necessary, even for the Arabs, for basic things like
+;;    dictionary entries, children's books, and occasional disambiguation.
+;;
+;; 3. The Arabs don't use it, they use Windows-1256, which also supports
+;;    Persian, at least, as well as the French characters necessary in
+;;    Lebanon and North Africa.
+
+(make-charset
+ 'arabic-iso8859-6 
+ "Right-Hand Part of Latin/Arabic Alphabet (ISO/IEC 8859-6): ISO-IR-127"
+ '(dimension 1
+   registry "ISO8859-6"
+   chars 96
+   columns 1
+   direction r2l
+   final ?G
+   graphic 1
+   short-name "RHP of ISO8859/6"
+   long-name "RHP of Arabic (ISO 8859-6): ISO-IR-127"))
+
+(loop
+  for (iso8859-6 unicode)
+  in '((#xA0 #x00A0) ;; NO-BREAK SPACE
+       (#xA4 #x00A4) ;; CURRENCY SIGN
+       (#xAC #x060C) ;; ARABIC COMMA
+       (#xAD #x00AD) ;; SOFT HYPHEN
+       (#xBB #x061B) ;; ARABIC SEMICOLON
+       (#xBF #x061F) ;; ARABIC QUESTION MARK
+       (#xC1 #x0621) ;; ARABIC LETTER HAMZA
+       (#xC2 #x0622) ;; ARABIC LETTER ALEF WITH MADDA ABOVE
+       (#xC3 #x0623) ;; ARABIC LETTER ALEF WITH HAMZA ABOVE
+       (#xC4 #x0624) ;; ARABIC LETTER WAW WITH HAMZA ABOVE
+       (#xC5 #x0625) ;; ARABIC LETTER ALEF WITH HAMZA BELOW
+       (#xC6 #x0626) ;; ARABIC LETTER YEH WITH HAMZA ABOVE
+       (#xC7 #x0627) ;; ARABIC LETTER ALEF
+       (#xC8 #x0628) ;; ARABIC LETTER BEH
+       (#xC9 #x0629) ;; ARABIC LETTER TEH MARBUTA
+       (#xCA #x062A) ;; ARABIC LETTER TEH
+       (#xCB #x062B) ;; ARABIC LETTER THEH
+       (#xCC #x062C) ;; ARABIC LETTER JEEM
+       (#xCD #x062D) ;; ARABIC LETTER HAH
+       (#xCE #x062E) ;; ARABIC LETTER KHAH
+       (#xCF #x062F) ;; ARABIC LETTER DAL
+       (#xD0 #x0630) ;; ARABIC LETTER THAL
+       (#xD1 #x0631) ;; ARABIC LETTER REH
+       (#xD2 #x0632) ;; ARABIC LETTER ZAIN
+       (#xD3 #x0633) ;; ARABIC LETTER SEEN
+       (#xD4 #x0634) ;; ARABIC LETTER SHEEN
+       (#xD5 #x0635) ;; ARABIC LETTER SAD
+       (#xD6 #x0636) ;; ARABIC LETTER DAD
+       (#xD7 #x0637) ;; ARABIC LETTER TAH
+       (#xD8 #x0638) ;; ARABIC LETTER ZAH
+       (#xD9 #x0639) ;; ARABIC LETTER AIN
+       (#xDA #x063A) ;; ARABIC LETTER GHAIN
+       (#xE0 #x0640) ;; ARABIC TATWEEL
+       (#xE1 #x0641) ;; ARABIC LETTER FEH
+       (#xE2 #x0642) ;; ARABIC LETTER QAF
+       (#xE3 #x0643) ;; ARABIC LETTER KAF
+       (#xE4 #x0644) ;; ARABIC LETTER LAM
+       (#xE5 #x0645) ;; ARABIC LETTER MEEM
+       (#xE6 #x0646) ;; ARABIC LETTER NOON
+       (#xE7 #x0647) ;; ARABIC LETTER HEH
+       (#xE8 #x0648) ;; ARABIC LETTER WAW
+       (#xE9 #x0649) ;; ARABIC LETTER ALEF MAKSURA
+       (#xEA #x064A) ;; ARABIC LETTER YEH
+       (#xEB #x064B) ;; ARABIC FATHATAN
+       (#xEC #x064C) ;; ARABIC DAMMATAN
+       (#xED #x064D) ;; ARABIC KASRATAN
+       (#xEE #x064E) ;; ARABIC FATHA
+       (#xEF #x064F) ;; ARABIC DAMMA
+       (#xF0 #x0650) ;; ARABIC KASRA
+       (#xF1 #x0651) ;; ARABIC SHADDA
+       (#xF2 #x0652));; ARABIC SUKUN
+  do (set-unicode-conversion (make-char 'arabic-iso8859-6 iso8859-6)
+                             unicode))
+
+;;;###autoload
+(make-coding-system
+ 'arabic-iso-8bit-with-esc 'iso2022  ;; GNU's iso-8859-6 is
+                                     ;; iso2022-compatible.
+ "ISO-8859-6 (Arabic)"
+ '(charset-g0 ascii
+   charset-g1 arabic-iso8859-6
+   charset-g2 t
+   charset-g3 t
+   no-iso6429 t
+   mnemonic "MIME/Arbc"))
+