comparison lisp/mule/iso-with-esc.el @ 4491:d402d7b18bd8

Revamp the Arabic support. Create greek-iso-8bit-with-esc, arabic-iso-8bit-with-esc. src/ChangeLog addition: 2008-08-05 Aidan Kehoe <kehoea@parhasard.net> * mule-charset.c (complex_vars_of_mule_charset): Remove Vcharset_arabic_iso8859_7. * lisp.h: Remove Vcharset_arabic_iso8859_7. See commentary in lisp/mule/iso-with-esc.el for motivation. lisp/ChangeLog addition: 2008-08-05 Aidan Kehoe <kehoea@parhasard.net> * mule/iso-with-esc.el (greek-iso-8bit-with-esc): (arabic-iso-8bit-with-esc): Add these two here. Move the implementation of the 'arabic-iso8859-6 character set here, with commentary on why that is reasonable. * mule/arabic.el (iso-8859-6): Add iso-8859-6, windows-1256 implementations using make-8-bit-coding-system. Remove our non-standard Mule character sets. * unicode.el (load-unicode-tables): Remove Arabic since it's no longer dumped. * mule/mule-msw-init-late.el: Remove Arabic. * mule/mule-category.el (predefined-category-list): Remove Arabic. etc/ChangeLog addition: 2008-08-05 Aidan Kehoe <kehoea@parhasard.net> * HELLO: Encode the Arabic using UTF-8 sequences, not ISO-8859-6.
author Aidan Kehoe <kehoea@parhasard.net>
date Tue, 05 Aug 2008 08:37:17 +0200
parents cee827542370
children 1d74a1d115ee
comparison
equal deleted inserted replaced
4488:6b0000935adc 4491:d402d7b18bd8
96 charset-g1 hebrew-iso8859-8 96 charset-g1 hebrew-iso8859-8
97 charset-g2 t 97 charset-g2 t
98 charset-g3 t 98 charset-g3 t
99 no-iso6429 t 99 no-iso6429 t
100 mnemonic "MIME/Hbrw")) 100 mnemonic "MIME/Hbrw"))
101
102 ;;;###autoload
103 (make-coding-system
104 'greek-iso-8bit-with-esc 'iso2022 "MIME ISO-8859-7"
105 '(charset-g0 ascii
106 charset-g1 greek-iso8859-7
107 charset-g2 t
108 charset-g3 t
109 mnemonic "Grk"))
110
111 ;; ISO 8859-6 is such a useless character set that it seems a waste of
112 ;; codespace to dump it. Let me count the ways:
113 ;;
114 ;; 1. It doesn't support Persian or Urdu, let alone Sinhalese, despite
115 ;; plenty of unallocated code points.
116 ;;
117 ;; 2. It doesn't encode all the vowel diacritics (the Harakaat) despite that
118 ;; they are necessary, even for the Arabs, for basic things like
119 ;; dictionary entries, children's books, and occasional disambiguation.
120 ;;
121 ;; 3. The Arabs don't use it, they use Windows-1256, which also supports
122 ;; Persian, at least, as well as the French characters necessary in
123 ;; Lebanon and North Africa.
124
125 (make-charset
126 'arabic-iso8859-6
127 "Right-Hand Part of Latin/Arabic Alphabet (ISO/IEC 8859-6): ISO-IR-127"
128 '(dimension 1
129 registry "ISO8859-6"
130 chars 96
131 columns 1
132 direction r2l
133 final ?G
134 graphic 1
135 short-name "RHP of ISO8859/6"
136 long-name "RHP of Arabic (ISO 8859-6): ISO-IR-127"))
137
138 (loop
139 for (iso8859-6 unicode)
140 in '((#xA0 #x00A0) ;; NO-BREAK SPACE
141 (#xA4 #x00A4) ;; CURRENCY SIGN
142 (#xAC #x060C) ;; ARABIC COMMA
143 (#xAD #x00AD) ;; SOFT HYPHEN
144 (#xBB #x061B) ;; ARABIC SEMICOLON
145 (#xBF #x061F) ;; ARABIC QUESTION MARK
146 (#xC1 #x0621) ;; ARABIC LETTER HAMZA
147 (#xC2 #x0622) ;; ARABIC LETTER ALEF WITH MADDA ABOVE
148 (#xC3 #x0623) ;; ARABIC LETTER ALEF WITH HAMZA ABOVE
149 (#xC4 #x0624) ;; ARABIC LETTER WAW WITH HAMZA ABOVE
150 (#xC5 #x0625) ;; ARABIC LETTER ALEF WITH HAMZA BELOW
151 (#xC6 #x0626) ;; ARABIC LETTER YEH WITH HAMZA ABOVE
152 (#xC7 #x0627) ;; ARABIC LETTER ALEF
153 (#xC8 #x0628) ;; ARABIC LETTER BEH
154 (#xC9 #x0629) ;; ARABIC LETTER TEH MARBUTA
155 (#xCA #x062A) ;; ARABIC LETTER TEH
156 (#xCB #x062B) ;; ARABIC LETTER THEH
157 (#xCC #x062C) ;; ARABIC LETTER JEEM
158 (#xCD #x062D) ;; ARABIC LETTER HAH
159 (#xCE #x062E) ;; ARABIC LETTER KHAH
160 (#xCF #x062F) ;; ARABIC LETTER DAL
161 (#xD0 #x0630) ;; ARABIC LETTER THAL
162 (#xD1 #x0631) ;; ARABIC LETTER REH
163 (#xD2 #x0632) ;; ARABIC LETTER ZAIN
164 (#xD3 #x0633) ;; ARABIC LETTER SEEN
165 (#xD4 #x0634) ;; ARABIC LETTER SHEEN
166 (#xD5 #x0635) ;; ARABIC LETTER SAD
167 (#xD6 #x0636) ;; ARABIC LETTER DAD
168 (#xD7 #x0637) ;; ARABIC LETTER TAH
169 (#xD8 #x0638) ;; ARABIC LETTER ZAH
170 (#xD9 #x0639) ;; ARABIC LETTER AIN
171 (#xDA #x063A) ;; ARABIC LETTER GHAIN
172 (#xE0 #x0640) ;; ARABIC TATWEEL
173 (#xE1 #x0641) ;; ARABIC LETTER FEH
174 (#xE2 #x0642) ;; ARABIC LETTER QAF
175 (#xE3 #x0643) ;; ARABIC LETTER KAF
176 (#xE4 #x0644) ;; ARABIC LETTER LAM
177 (#xE5 #x0645) ;; ARABIC LETTER MEEM
178 (#xE6 #x0646) ;; ARABIC LETTER NOON
179 (#xE7 #x0647) ;; ARABIC LETTER HEH
180 (#xE8 #x0648) ;; ARABIC LETTER WAW
181 (#xE9 #x0649) ;; ARABIC LETTER ALEF MAKSURA
182 (#xEA #x064A) ;; ARABIC LETTER YEH
183 (#xEB #x064B) ;; ARABIC FATHATAN
184 (#xEC #x064C) ;; ARABIC DAMMATAN
185 (#xED #x064D) ;; ARABIC KASRATAN
186 (#xEE #x064E) ;; ARABIC FATHA
187 (#xEF #x064F) ;; ARABIC DAMMA
188 (#xF0 #x0650) ;; ARABIC KASRA
189 (#xF1 #x0651) ;; ARABIC SHADDA
190 (#xF2 #x0652));; ARABIC SUKUN
191 do (set-unicode-conversion (make-char 'arabic-iso8859-6 iso8859-6)
192 unicode))
193
194 ;;;###autoload
195 (make-coding-system
196 'arabic-iso-8bit-with-esc 'iso2022 ;; GNU's iso-8859-6 is
197 ;; iso2022-compatible.
198 "ISO-8859-6 (Arabic)"
199 '(charset-g0 ascii
200 charset-g1 arabic-iso8859-6
201 charset-g2 t
202 charset-g3 t
203 no-iso6429 t
204 mnemonic "MIME/Arbc"))
205