Mercurial > hg > xemacs-beta
annotate lisp/unicode.el @ 5561:9a93bc90b3bd
Add a defsetf for get-char-table, necessary for the tests in the last commit.
lisp/ChangeLog addition:
2011-09-04 Aidan Kehoe <kehoea@parhasard.net>
* cl-macs.el (get-char-table): Add a defsetf for this.
| author | Aidan Kehoe <kehoea@parhasard.net> |
|---|---|
| date | Sun, 04 Sep 2011 20:35:31 +0100 |
| parents | 248176c74e6b |
| children |
| rev | line source |
|---|---|
|
4993
c0934cef10c6
convert some source files to utf-8
Ben Wing <ben@xemacs.org>
parents:
4834
diff
changeset
|
1 ;;; unicode.el --- Unicode support -*- coding: utf-8; -*- |
| 771 | 2 |
| 778 | 3 ;; Copyright (C) 2001, 2002 Ben Wing. |
| 771 | 4 |
| 5 ;; Keywords: multilingual, Unicode | |
| 6 | |
| 7 ;; This file is part of XEmacs. | |
| 8 | |
|
5402
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
4993
diff
changeset
|
9 ;; XEmacs is free software: you can redistribute it and/or modify it |
|
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
4993
diff
changeset
|
10 ;; under the terms of the GNU General Public License as published by the |
|
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
4993
diff
changeset
|
11 ;; Free Software Foundation, either version 3 of the License, or (at your |
|
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
4993
diff
changeset
|
12 ;; option) any later version. |
| 771 | 13 |
|
5402
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
4993
diff
changeset
|
14 ;; XEmacs is distributed in the hope that it will be useful, but WITHOUT |
|
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
4993
diff
changeset
|
15 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
4993
diff
changeset
|
16 ;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
4993
diff
changeset
|
17 ;; for more details. |
| 771 | 18 |
| 19 ;; You should have received a copy of the GNU General Public License | |
|
5402
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
4993
diff
changeset
|
20 ;; along with XEmacs. If not, see <http://www.gnu.org/licenses/>. |
| 771 | 21 |
| 22 ;;; Synched up with: Not in FSF. | |
| 23 | |
| 24 ;;; Commentary: | |
| 25 | |
| 26 ;; Lisp support for Unicode, e.g. initialize the translation tables. | |
| 27 | |
| 28 ;;; Code: | |
| 29 | |
| 3659 | 30 ;; GNU Emacs has the charsets: |
| 778 | 31 |
| 3659 | 32 ;; mule-unicode-2500-33ff |
| 33 ;; mule-unicode-e000-ffff | |
| 34 ;; mule-unicode-0100-24ff | |
| 778 | 35 |
| 3659 | 36 ;; built-in. This is hack--and an incomplete hack at that--against the |
| 37 ;; spirit and the letter of standard ISO 2022 character sets. Instead of | |
| 38 ;; this, we have the jit-ucs-charset-N Mule character sets, created in | |
| 39 ;; unicode.c on encountering a Unicode code point that we don't recognise, | |
| 40 ;; and saved in ISO 2022 coding systems using the UTF-8 escape described in | |
| 41 ;; ISO-IR 196. | |
| 778 | 42 |
| 4083 | 43 (eval-when-compile (when (featurep 'mule) (require 'ccl))) |
| 44 | |
| 2367 | 45 ;; accessed in loadup.el, mule-cmds.el; see discussion in unicode.c |
| 46 (defvar load-unicode-tables-at-dump-time (eq system-type 'windows-nt) | |
| 47 "[INTERNAL] Whether to load the Unicode tables at dump time. | |
| 48 Setting this at run-time does nothing.") | |
| 49 | |
| 771 | 50 ;; NOTE: This takes only a fraction of a second on my Pentium III |
| 51 ;; 700Mhz even with a totally optimization-disabled XEmacs. | |
| 52 (defun load-unicode-tables () | |
| 53 "Initialize the Unicode translation tables for all standard charsets." | |
| 780 | 54 (let ((parse-args |
| 55 '(("unicode/unicode-consortium" | |
| 877 | 56 ;; Due to the braindamaged way Mule treats the ASCII and Control-1 |
| 57 ;; charsets' types, trying to load them results in out-of-range | |
| 58 ;; warnings at unicode.c:1439. They're no-ops anyway, they're | |
| 59 ;; hardwired in unicode.c (unicode_to_ichar, ichar_to_unicode). | |
| 60 ;; ("8859-1.TXT" ascii #x00 #x7F #x0) | |
| 61 ;; ("8859-1.TXT" control-1 #x80 #x9F #x-80) | |
| 62 ;; The 8859-1.TXT G1 assignments are half no-ops, hardwired in | |
| 63 ;; unicode.c ichar_to_unicode, but not in unicode_to_ichar. | |
| 780 | 64 ("8859-1.TXT" latin-iso8859-1 #xA0 #xFF #x-80) |
| 65 ;; "8859-10.TXT" | |
| 66 ;; "8859-13.TXT" | |
| 67 ("8859-14.TXT" latin-iso8859-14 #xA0 #xFF #x-80) | |
| 68 ("8859-15.TXT" latin-iso8859-15 #xA0 #xFF #x-80) | |
| 2575 | 69 ("8859-16.TXT" latin-iso8859-16 #xA0 #xFF #x-80) |
| 780 | 70 ("8859-2.TXT" latin-iso8859-2 #xA0 #xFF #x-80) |
| 71 ("8859-3.TXT" latin-iso8859-3 #xA0 #xFF #x-80) | |
| 72 ("8859-4.TXT" latin-iso8859-4 #xA0 #xFF #x-80) | |
| 73 ("8859-5.TXT" cyrillic-iso8859-5 #xA0 #xFF #x-80) | |
|
4784
a67bfb29dd8b
Dump the arabic-iso8859-6 character set, again.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4783
diff
changeset
|
74 ("8859-6.TXT" arabic-iso8859-6 #xA0 #xFF #x-80) |
| 780 | 75 ("8859-7.TXT" greek-iso8859-7 #xA0 #xFF #x-80) |
| 76 ("8859-8.TXT" hebrew-iso8859-8 #xA0 #xFF #x-80) | |
| 77 ("8859-9.TXT" latin-iso8859-9 #xA0 #xFF #x-80) | |
| 78 ;; charset for Big5 does not matter; specifying `big5' will | |
| 79 ;; automatically make the right thing happen | |
| 80 ("BIG5.TXT" chinese-big5-1 nil nil nil big5) | |
| 81 ("CNS11643.TXT" chinese-cns11643-1 #x10000 #x1FFFF #x-10000) | |
| 82 ("CNS11643.TXT" chinese-cns11643-2 #x20000 #x2FFFF #x-20000) | |
| 83 ;; "CP1250.TXT" | |
| 84 ;; "CP1251.TXT" | |
| 85 ;; "CP1252.TXT" | |
| 86 ;; "CP1253.TXT" | |
| 87 ;; "CP1254.TXT" | |
| 88 ;; "CP1255.TXT" | |
| 89 ;; "CP1256.TXT" | |
| 90 ;; "CP1257.TXT" | |
| 91 ;; "CP1258.TXT" | |
| 92 ;; "CP874.TXT" | |
| 93 ;; "CP932.TXT" | |
| 94 ;; "CP936.TXT" | |
| 95 ;; "CP949.TXT" | |
| 96 ;; "CP950.TXT" | |
| 97 ;; "GB12345.TXT" | |
| 98 ("GB2312.TXT" chinese-gb2312) | |
| 2297 | 99 ;; "HANGUL.TXT" |
| 100 ;; #### shouldn't JIS X 0201's upper limit be 7f? | |
| 780 | 101 ("JIS0201.TXT" latin-jisx0201 #x21 #x80) |
| 102 ("JIS0201.TXT" katakana-jisx0201 #xA0 #xFF #x-80) | |
| 103 ("JIS0208.TXT" japanese-jisx0208 nil nil nil ignore-first-column) | |
| 104 ("JIS0212.TXT" japanese-jisx0212) | |
| 105 ;; "JOHAB.TXT" | |
| 106 ;; "KOI8-R.TXT" | |
| 107 ;; "KSC5601.TXT" | |
| 108 ;; note that KSC5601.TXT as currently distributed is NOT what | |
| 109 ;; it claims to be! see comments in KSX1001.TXT. | |
| 110 ("KSX1001.TXT" korean-ksc5601) | |
| 111 ;; "OLD5601.TXT" | |
| 112 ;; "SHIFTJIS.TXT" | |
| 113 ) | |
| 114 ("unicode/mule-ucs" | |
| 2297 | 115 ;; #### we don't support surrogates?!?? |
| 780 | 116 ;; use these instead of the above ones once we support surrogates |
| 117 ;;("chinese-cns11643-1.txt" chinese-cns11643-1) | |
| 118 ;;("chinese-cns11643-2.txt" chinese-cns11643-2) | |
| 119 ;;("chinese-cns11643-3.txt" chinese-cns11643-3) | |
| 120 ;;("chinese-cns11643-4.txt" chinese-cns11643-4) | |
| 121 ;;("chinese-cns11643-5.txt" chinese-cns11643-5) | |
| 122 ;;("chinese-cns11643-6.txt" chinese-cns11643-6) | |
| 123 ;;("chinese-cns11643-7.txt" chinese-cns11643-7) | |
| 124 ("chinese-sisheng.txt" chinese-sisheng) | |
| 125 ("ethiopic.txt" ethiopic) | |
| 126 ("indian-is13194.txt" indian-is13194) | |
| 127 ("ipa.txt" ipa) | |
| 128 ("thai-tis620.txt" thai-tis620) | |
| 129 ("tibetan.txt" tibetan) | |
| 130 ("vietnamese-viscii-lower.txt" vietnamese-viscii-lower) | |
| 131 ("vietnamese-viscii-upper.txt" vietnamese-viscii-upper) | |
| 132 ) | |
| 133 ("unicode/other" | |
| 134 ("lao.txt" lao) | |
| 135 ) | |
| 771 | 136 ))) |
|
4783
e29fcfd8df5f
Eliminate most core code byte-compile warnings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4690
diff
changeset
|
137 (mapc #'(lambda (tables) |
|
e29fcfd8df5f
Eliminate most core code byte-compile warnings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4690
diff
changeset
|
138 (let ((undir |
|
e29fcfd8df5f
Eliminate most core code byte-compile warnings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4690
diff
changeset
|
139 (expand-file-name (car tables) data-directory))) |
|
e29fcfd8df5f
Eliminate most core code byte-compile warnings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4690
diff
changeset
|
140 (mapc #'(lambda (args) |
|
e29fcfd8df5f
Eliminate most core code byte-compile warnings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4690
diff
changeset
|
141 (apply 'load-unicode-mapping-table |
|
e29fcfd8df5f
Eliminate most core code byte-compile warnings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4690
diff
changeset
|
142 (expand-file-name (car args) undir) |
|
e29fcfd8df5f
Eliminate most core code byte-compile warnings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4690
diff
changeset
|
143 (cdr args))) |
|
e29fcfd8df5f
Eliminate most core code byte-compile warnings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4690
diff
changeset
|
144 (cdr tables)))) |
| 4145 | 145 parse-args) |
| 146 ;; The default-unicode-precedence-list. We set this here to default to | |
| 147 ;; *not* mapping various European characters to East Asian characters; | |
| 148 ;; otherwise the default-unicode-precedence-list is numerically ordered | |
| 149 ;; by charset ID. | |
|
4317
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
150 (declare-fboundp |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
151 (set-default-unicode-precedence-list |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
152 '(ascii control-1 latin-iso8859-1 latin-iso8859-2 latin-iso8859-15 |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
153 greek-iso8859-7 hebrew-iso8859-8 ipa cyrillic-iso8859-5 |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
154 latin-iso8859-16 latin-iso8859-3 latin-iso8859-4 latin-iso8859-9 |
|
4805
980575c76541
Move the arabic-iso8859-6 character set back to C, otherwise X11 lookup fails.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4784
diff
changeset
|
155 vietnamese-viscii-lower vietnamese-viscii-upper arabic-iso8859-6 |
|
4317
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
156 jit-ucs-charset-0 japanese-jisx0208 japanese-jisx0208-1978 |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
157 japanese-jisx0212 japanese-jisx0213-1 japanese-jisx0213-2 |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
158 chinese-gb2312 chinese-sisheng chinese-big5-1 chinese-big5-2 |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
159 indian-is13194 korean-ksc5601 chinese-cns11643-1 chinese-cns11643-2 |
|
4491
d402d7b18bd8
Revamp the Arabic support. Create greek-iso-8bit-with-esc, arabic-iso-8bit-with-esc.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4468
diff
changeset
|
160 chinese-isoir165 |
|
4317
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
161 composite ethiopic indian-1-column indian-2-column jit-ucs-charset-0 |
|
5396
75469840109b
Drop support for Thai-XTIS, which was always non-standard and never used.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4993
diff
changeset
|
162 katakana-jisx0201 lao thai-tis620 tibetan tibetan-1-column |
|
4317
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
163 latin-jisx0201 chinese-cns11643-3 chinese-cns11643-4 |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
164 chinese-cns11643-5 chinese-cns11643-6 chinese-cns11643-7))))) |
| 771 | 165 |
|
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
166 (defconst ccl-encode-to-ucs-2 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
167 (eval-when-compile |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
168 (let ((pre-existing |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
169 ;; This is the compiled CCL program from the assert |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
170 ;; below. Since this file is dumped and ccl.el isn't (and |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
171 ;; even when it was, it was dumped much later than this |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
172 ;; one), we can't compile the program at dump time. We can |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
173 ;; check at byte compile time that the program is as |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
174 ;; expected, though. |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
175 [1 16 131127 7 98872 65823 1307 5 -65536 65313 64833 1028 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
176 147513 8 82009 255 22])) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
177 (when (featurep 'mule) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
178 ;; Check that the pre-existing constant reflects the intended |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
179 ;; CCL program. |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
180 (assert |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
181 (equal pre-existing |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
182 (ccl-compile |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
183 `(1 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
184 ( ;; mule-to-unicode's first argument is the |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
185 ;; charset ID, the second its first byte |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
186 ;; left shifted by 7 bits masked with its |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
187 ;; second byte. |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
188 (r1 = (r1 << 7)) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
189 (r1 = (r1 | r2)) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
190 (mule-to-unicode r0 r1) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
191 (if (r0 & ,(lognot #xFFFF)) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
192 ;; Redisplay looks in r1 and r2 for the first |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
193 ;; and second bytes of the X11 font, |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
194 ;; respectively. For non-BMP characters we |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
195 ;; display U+FFFD. |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
196 ((r1 = #xFF) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
197 (r2 = #xFD)) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
198 ((r1 = (r0 >> 8)) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
199 (r2 = (r0 & #xFF)))))))) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
200 nil |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
201 "The pre-compiled CCL program appears broken. ")) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
202 pre-existing)) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
203 "CCL program to transform Mule characters to UCS-2.") |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
204 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
205 (when (featurep 'mule) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
206 (put 'ccl-encode-to-ucs-2 'ccl-program-idx |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
207 (declare-fboundp |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
208 (register-ccl-program 'ccl-encode-to-ucs-2 ccl-encode-to-ucs-2)))) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
209 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
210 (defun decode-char (quote-ucs code &optional restriction) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
211 "FSF compatibility--return Mule character with Unicode codepoint CODE. |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
212 The second argument must be 'ucs, the third argument is ignored. " |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
213 ;; We're prepared to accept invalid Unicode in unicode-to-char, but not in |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
214 ;; this function, which is the API that should actually be used, since |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
215 ;; it's available in GNU and in Mule-UCS. |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
216 (check-argument-range code #x0 #x10FFFF) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
217 (assert (eq quote-ucs 'ucs) t |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
218 "Sorry, decode-char doesn't yet support anything but the UCS. ") |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
219 (unicode-to-char code)) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
220 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
221 (defun encode-char (char quote-ucs &optional restriction) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
222 "FSF compatibility--return the Unicode code point of CHAR. |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
223 The second argument must be 'ucs, the third argument is ignored. " |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
224 (assert (eq quote-ucs 'ucs) t |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
225 "Sorry, encode-char doesn't yet support anything but the UCS. ") |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
226 (char-to-unicode char)) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4619
diff
changeset
|
227 |
| 771 | 228 (make-coding-system |
| 229 'utf-16 'unicode | |
| 230 "UTF-16" | |
| 231 '(mnemonic "UTF-16" | |
| 3767 | 232 documentation |
| 771 | 233 "UTF-16 Unicode encoding -- the standard (almost-) fixed-width |
| 234 two-byte encoding, with surrogates. It will be fixed-width if all | |
| 235 characters are in the BMP (Basic Multilingual Plane -- first 65536 | |
| 236 codepoints). Cannot represent characters with codepoints above | |
| 237 0x10FFFF (a little more than 1,000,000). Unicode and ISO guarantee | |
| 238 never to encode any characters outside this range -- all the rest are | |
| 239 for private, corporate or internal use." | |
| 3767 | 240 unicode-type utf-16)) |
| 771 | 241 |
| 2574 | 242 (define-coding-system-alias 'utf-16-be 'utf-16) |
| 243 | |
| 771 | 244 (make-coding-system |
| 245 'utf-16-bom 'unicode | |
| 246 "UTF-16 w/BOM" | |
| 247 '(mnemonic "UTF16-BOM" | |
| 3767 | 248 documentation |
| 771 | 249 "UTF-16 Unicode encoding with byte order mark (BOM) at the beginning. |
| 250 The BOM is Unicode character U+FEFF -- i.e. the first two bytes are | |
| 251 0xFE and 0xFF, respectively, or reversed in a little-endian | |
| 252 representation. It has been sanctioned by the Unicode Consortium for | |
| 253 use at the beginning of a Unicode stream as a marker of the byte order | |
| 254 of the stream, and commonly appears in Unicode files under Microsoft | |
| 255 Windows, where it also functions as a magic cookie identifying a | |
| 256 Unicode file. The character is called \"ZERO WIDTH NO-BREAK SPACE\" | |
| 257 and is suitable as a byte-order marker because: | |
| 258 | |
| 259 -- it has no displayable representation | |
| 260 -- due to its semantics it never normally appears at the beginning | |
| 261 of a stream | |
| 262 -- its reverse U+FFFE is not a legal Unicode character | |
| 263 -- neither byte sequence is at all likely in any other standard | |
| 264 encoding, particularly at the beginning of a stream | |
| 265 | |
| 266 This coding system will insert a BOM at the beginning of a stream when | |
| 267 writing and strip it off when reading." | |
| 3767 | 268 unicode-type utf-16 |
| 771 | 269 need-bom t)) |
| 270 | |
| 271 (make-coding-system | |
| 272 'utf-16-little-endian 'unicode | |
| 273 "UTF-16 Little Endian" | |
| 274 '(mnemonic "UTF16-LE" | |
| 275 documentation | |
| 276 "Little-endian version of UTF-16 Unicode encoding. | |
| 277 See `utf-16' coding system." | |
| 3767 | 278 unicode-type utf-16 |
| 771 | 279 little-endian t)) |
| 280 | |
| 2574 | 281 (define-coding-system-alias 'utf-16-le 'utf-16-little-endian) |
| 282 | |
| 771 | 283 (make-coding-system |
| 284 'utf-16-little-endian-bom 'unicode | |
| 285 "UTF-16 Little Endian w/BOM" | |
| 286 '(mnemonic "MSW-Unicode" | |
| 287 documentation | |
| 288 "Little-endian version of UTF-16 Unicode encoding, with byte order mark. | |
| 289 Standard encoding for representing Unicode under MS Windows. See | |
| 290 `utf-16-bom' coding system." | |
| 3767 | 291 unicode-type utf-16 |
| 771 | 292 little-endian t |
| 293 need-bom t)) | |
| 294 | |
| 295 (make-coding-system | |
| 296 'ucs-4 'unicode | |
| 297 "UCS-4" | |
| 298 '(mnemonic "UCS4" | |
| 299 documentation | |
| 300 "UCS-4 Unicode encoding -- fully fixed-width four-byte encoding." | |
| 3767 | 301 unicode-type ucs-4)) |
| 771 | 302 |
| 303 (make-coding-system | |
| 304 'ucs-4-little-endian 'unicode | |
| 305 "UCS-4 Little Endian" | |
| 306 '(mnemonic "UCS4-LE" | |
| 307 documentation | |
| 2297 | 308 ;; #### I don't think this is permitted by ISO 10646, only Unicode. |
| 309 ;; Call it UTF-32 instead? | |
| 771 | 310 "Little-endian version of UCS-4 Unicode encoding. See `ucs-4' coding system." |
| 3767 | 311 unicode-type ucs-4 |
| 771 | 312 little-endian t)) |
| 313 | |
| 314 (make-coding-system | |
| 4096 | 315 'utf-32 'unicode |
| 316 "UTF-32" | |
| 317 '(mnemonic "UTF32" | |
| 318 documentation | |
| 319 "UTF-32 Unicode encoding -- fixed-width four-byte encoding, | |
| 320 characters less than #x10FFFF are not supported. " | |
| 321 unicode-type utf-32)) | |
| 322 | |
| 323 (make-coding-system | |
| 324 'utf-32-little-endian 'unicode | |
| 325 "UTF-32 Little Endian" | |
| 326 '(mnemonic "UTF32-LE" | |
| 327 documentation | |
| 328 "Little-endian version of UTF-32 Unicode encoding. | |
| 329 | |
| 330 A fixed-width four-byte encoding, characters less than #x10FFFF are not | |
| 331 supported. " | |
| 332 unicode-type ucs-4 little-endian t)) | |
| 333 | |
|
4834
b3ea9c582280
Use new cygwin_conv_path API with Cygwin 1.7 for converting names between Win32 and POSIX, UTF-8-aware, with attendant changes elsewhere
Ben Wing <ben@xemacs.org>
parents:
4805
diff
changeset
|
334 ;; Now defined in unicode.c. |
| 771 | 335 |
|
4834
b3ea9c582280
Use new cygwin_conv_path API with Cygwin 1.7 for converting names between Win32 and POSIX, UTF-8-aware, with attendant changes elsewhere
Ben Wing <ben@xemacs.org>
parents:
4805
diff
changeset
|
336 ;;(make-coding-system |
|
b3ea9c582280
Use new cygwin_conv_path API with Cygwin 1.7 for converting names between Win32 and POSIX, UTF-8-aware, with attendant changes elsewhere
Ben Wing <ben@xemacs.org>
parents:
4805
diff
changeset
|
337 ;; 'utf-8 'unicode |
|
b3ea9c582280
Use new cygwin_conv_path API with Cygwin 1.7 for converting names between Win32 and POSIX, UTF-8-aware, with attendant changes elsewhere
Ben Wing <ben@xemacs.org>
parents:
4805
diff
changeset
|
338 ;; "UTF-8" |
|
b3ea9c582280
Use new cygwin_conv_path API with Cygwin 1.7 for converting names between Win32 and POSIX, UTF-8-aware, with attendant changes elsewhere
Ben Wing <ben@xemacs.org>
parents:
4805
diff
changeset
|
339 ;; '(mnemonic "UTF8" |
|
b3ea9c582280
Use new cygwin_conv_path API with Cygwin 1.7 for converting names between Win32 and POSIX, UTF-8-aware, with attendant changes elsewhere
Ben Wing <ben@xemacs.org>
parents:
4805
diff
changeset
|
340 ;; documentation "..." |
|
b3ea9c582280
Use new cygwin_conv_path API with Cygwin 1.7 for converting names between Win32 and POSIX, UTF-8-aware, with attendant changes elsewhere
Ben Wing <ben@xemacs.org>
parents:
4805
diff
changeset
|
341 ;; unicode-type utf-8)) |
| 771 | 342 |
| 985 | 343 (make-coding-system |
| 344 'utf-8-bom 'unicode | |
| 345 "UTF-8 w/BOM" | |
| 346 '(mnemonic "MSW-UTF8" | |
| 347 documentation | |
| 348 "UTF-8 Unicode encoding, with byte order mark. | |
| 349 Standard encoding for representing UTF-8 under MS Windows." | |
| 3767 | 350 unicode-type utf-8 |
| 985 | 351 little-endian t |
| 352 need-bom t)) | |
| 353 | |
|
4317
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
354 ;; Now, create jit-ucs-charset-0 entries for those characters in Windows |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
355 ;; Glyph List 4 that would otherwise end up in East Asian character sets. |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
356 ;; |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
357 ;; WGL4 is a character repertoire from Microsoft that gives a guideline |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
358 ;; for font implementors as to what characters are sufficient for |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
359 ;; pan-European support. The intention of this code is to avoid the |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
360 ;; situation where these characters end up mapping to East Asian XEmacs |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
361 ;; characters, which generally clash strongly with European characters |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
362 ;; both in font choice and character width; jit-ucs-charset-0 is a |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
363 ;; single-width character set which comes before the East Asian character |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
364 ;; sets in the default-unicode-precedence-list above. |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
365 (loop for (ucs ascii-or-latin-1) |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
366 in '((#x2013 ?-) ;; U+2013 EN DASH |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
367 (#x2014 ?-) ;; U+2014 EM DASH |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
368 (#x2105 ?%) ;; U+2105 CARE OF |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
369 (#x203e ?-) ;; U+203E OVERLINE |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
370 (#x221f ?|) ;; U+221F RIGHT ANGLE |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
371 (#x2584 ?|) ;; U+2584 LOWER HALF BLOCK |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
372 (#x2588 ?|) ;; U+2588 FULL BLOCK |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
373 (#x258c ?|) ;; U+258C LEFT HALF BLOCK |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
374 (#x2550 ?|) ;; U+2550 BOX DRAWINGS DOUBLE HORIZONTAL |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
375 (#x255e ?|) ;; U+255E BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
376 (#x256a ?|) ;; U+256A BOX DRAWINGS VERTICAL SINGLE & HORIZONTAL DOUBLE |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
377 (#x2561 ?|) ;; U+2561 BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
378 (#x2215 ?/) ;; U+2215 DIVISION SLASH |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
379 (#x02c9 ?`) ;; U+02C9 MODIFIER LETTER MACRON |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
380 (#x2211 ?s) ;; U+2211 N-ARY SUMMATION |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
381 (#x220f ?s) ;; U+220F N-ARY PRODUCT |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
382 (#x2248 ?=) ;; U+2248 ALMOST EQUAL TO |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
383 (#x2264 ?=) ;; U+2264 LESS-THAN OR EQUAL TO |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
384 (#x2265 ?=) ;; U+2265 GREATER-THAN OR EQUAL TO |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
385 (#x201c ?') ;; U+201C LEFT DOUBLE QUOTATION MARK |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
386 (#x2026 ?.) ;; U+2026 HORIZONTAL ELLIPSIS |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
387 (#x2212 ?-) ;; U+2212 MINUS SIGN |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
388 (#x2260 ?=) ;; U+2260 NOT EQUAL TO |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
389 (#x221e ?=) ;; U+221E INFINITY |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
390 (#x2642 ?=) ;; U+2642 MALE SIGN |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
391 (#x2640 ?=) ;; U+2640 FEMALE SIGN |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
392 (#x2032 ?=) ;; U+2032 PRIME |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
393 (#x2033 ?=) ;; U+2033 DOUBLE PRIME |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
394 (#x25cb ?=) ;; U+25CB WHITE CIRCLE |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
395 (#x25cf ?=) ;; U+25CF BLACK CIRCLE |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
396 (#x25a1 ?=) ;; U+25A1 WHITE SQUARE |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
397 (#x25a0 ?=) ;; U+25A0 BLACK SQUARE |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
398 (#x25b2 ?=) ;; U+25B2 BLACK UP-POINTING TRIANGLE |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
399 (#x25bc ?=) ;; U+25BC BLACK DOWN-POINTING TRIANGLE |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
400 (#x2192 ?=) ;; U+2192 RIGHTWARDS ARROW |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
401 (#x2190 ?=) ;; U+2190 LEFTWARDS ARROW |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
402 (#x2191 ?=) ;; U+2191 UPWARDS ARROW |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
403 (#x2193 ?=) ;; U+2193 DOWNWARDS ARROW |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
404 (#x2229 ?=) ;; U+2229 INTERSECTION |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
405 (#x2202 ?=) ;; U+2202 PARTIAL DIFFERENTIAL |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
406 (#x2261 ?=) ;; U+2261 IDENTICAL TO |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
407 (#x221a ?=) ;; U+221A SQUARE ROOT |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
408 (#x222b ?=) ;; U+222B INTEGRAL |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
409 (#x2030 ?=) ;; U+2030 PER MILLE SIGN |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
410 (#x266a ?=) ;; U+266A EIGHTH NOTE |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
411 (#x2020 ?*) ;; U+2020 DAGGER |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
412 (#x2021 ?*) ;; U+2021 DOUBLE DAGGER |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
413 (#x2500 ?|) ;; U+2500 BOX DRAWINGS LIGHT HORIZONTAL |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
414 (#x2502 ?|) ;; U+2502 BOX DRAWINGS LIGHT VERTICAL |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
415 (#x250c ?|) ;; U+250C BOX DRAWINGS LIGHT DOWN AND RIGHT |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
416 (#x2510 ?|) ;; U+2510 BOX DRAWINGS LIGHT DOWN AND LEFT |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
417 (#x2518 ?|) ;; U+2518 BOX DRAWINGS LIGHT UP AND LEFT |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
418 (#x2514 ?|) ;; U+2514 BOX DRAWINGS LIGHT UP AND RIGHT |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
419 (#x251c ?|) ;; U+251C BOX DRAWINGS LIGHT VERTICAL AND RIGHT |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
420 (#x252c ?|) ;; U+252C BOX DRAWINGS LIGHT DOWN AND HORIZONTAL |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
421 (#x2524 ?|) ;; U+2524 BOX DRAWINGS LIGHT VERTICAL AND LEFT |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
422 (#x2534 ?|) ;; U+2534 BOX DRAWINGS LIGHT UP AND HORIZONTAL |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
423 (#x253c ?|) ;; U+253C BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
424 (#x02da ?^) ;; U+02DA RING ABOVE |
|
4993
c0934cef10c6
convert some source files to utf-8
Ben Wing <ben@xemacs.org>
parents:
4834
diff
changeset
|
425 (#x2122 ?\xa9) ;; U+2122 TRADE MARK SIGN, ?© |
| 4145 | 426 |
|
4993
c0934cef10c6
convert some source files to utf-8
Ben Wing <ben@xemacs.org>
parents:
4834
diff
changeset
|
427 (#x0132 ?\xe6) ;; U+0132 LATIN CAPITAL LIGATURE IJ, ?æ |
|
c0934cef10c6
convert some source files to utf-8
Ben Wing <ben@xemacs.org>
parents:
4834
diff
changeset
|
428 (#x013f ?\xe6) ;; U+013F LATIN CAPITAL LETTER L WITH MIDDLE DOT, ?æ |
| 4145 | 429 |
|
4993
c0934cef10c6
convert some source files to utf-8
Ben Wing <ben@xemacs.org>
parents:
4834
diff
changeset
|
430 (#x0133 ?\xe6) ;; U+0133 LATIN SMALL LIGATURE IJ, ?æ |
|
c0934cef10c6
convert some source files to utf-8
Ben Wing <ben@xemacs.org>
parents:
4834
diff
changeset
|
431 (#x0140 ?\xe6) ;; U+0140 LATIN SMALL LETTER L WITH MIDDLE DOT, ?æ |
|
c0934cef10c6
convert some source files to utf-8
Ben Wing <ben@xemacs.org>
parents:
4834
diff
changeset
|
432 (#x0149 ?\xe6) ;; U+0149 LATIN SMALL LETTER N PRECEDED BY APOSTROPH,?æ |
| 4145 | 433 |
|
4317
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
434 (#x2194 ?|) ;; U+2194 LEFT RIGHT ARROW |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
435 (#x2660 ?*) ;; U+2660 BLACK SPADE SUIT |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
436 (#x2665 ?*) ;; U+2665 BLACK HEART SUIT |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
437 (#x2663 ?*) ;; U+2663 BLACK CLUB SUIT |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
438 (#x2592 ?|) ;; U+2592 MEDIUM SHADE |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
439 (#x2195 ?|) ;; U+2195 UP DOWN ARROW |
| 4145 | 440 |
|
4993
c0934cef10c6
convert some source files to utf-8
Ben Wing <ben@xemacs.org>
parents:
4834
diff
changeset
|
441 (#x2113 ?\xb9) ;; U+2113 SCRIPT SMALL L, ?¹ |
|
c0934cef10c6
convert some source files to utf-8
Ben Wing <ben@xemacs.org>
parents:
4834
diff
changeset
|
442 (#x215b ?\xbe) ;; U+215B VULGAR FRACTION ONE EIGHTH, ?¾ |
|
c0934cef10c6
convert some source files to utf-8
Ben Wing <ben@xemacs.org>
parents:
4834
diff
changeset
|
443 (#x215c ?\xbe) ;; U+215C VULGAR FRACTION THREE EIGHTHS, ?¾ |
|
c0934cef10c6
convert some source files to utf-8
Ben Wing <ben@xemacs.org>
parents:
4834
diff
changeset
|
444 (#x215d ?\xbe) ;; U+215D VULGAR FRACTION FIVE EIGHTHS, ?¾ |
|
c0934cef10c6
convert some source files to utf-8
Ben Wing <ben@xemacs.org>
parents:
4834
diff
changeset
|
445 (#x215e ?\xbe) ;; U+215E VULGAR FRACTION SEVEN EIGHTHS, ?¾ |
|
c0934cef10c6
convert some source files to utf-8
Ben Wing <ben@xemacs.org>
parents:
4834
diff
changeset
|
446 (#x207f ?\xbe) ;; U+207F SUPERSCRIPT LATIN SMALL LETTER N, ?¾ |
| 4145 | 447 |
|
4317
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
448 ;; These are not in WGL 4, but are IPA characters that should not |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
449 ;; be double width. They are the only IPA characters that both |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
450 ;; occur in packages/mule-packages/leim/ipa.el and end up in East |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
451 ;; Asian character sets when that file is loaded in an XEmacs |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
452 ;; without packages. |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
453 (#x2197 ?|) ;; U+2197 NORTH EAST ARROW |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
454 (#x2199 ?|) ;; U+2199 SOUTH WEST ARROW |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
455 (#x2191 ?|) ;; U+2191 UPWARDS ARROW |
|
4993
c0934cef10c6
convert some source files to utf-8
Ben Wing <ben@xemacs.org>
parents:
4834
diff
changeset
|
456 (#x207f ?\xb9)) ;; U+207F SUPERSCRIPT LATIN SMALL LETTER N, ?¹ |
|
4317
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
457 with decoded = nil |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
458 with syntax-table = (standard-syntax-table) |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
459 initially (unless (featurep 'mule) (return)) |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
460 ;; This creates jit-ucs-charset-0 entries because: |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
461 ;; |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
462 ;; 1. If the tables are dumped, it is run at dump time before they are |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
463 ;; dumped, and as such before the relevant conversions are available |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
464 ;; (they are made available in mule/general-late.el). |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
465 ;; |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
466 ;; 2. If the tables are not dumped, it is run at dump time, long before |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
467 ;; any of the other mappings are available. |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
468 ;; |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
469 do |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
470 (setq decoded (decode-char 'ucs ucs)) |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
471 (assert (eq (declare-fboundp (char-charset decoded)) |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
472 'jit-ucs-charset-0) nil |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
473 "Unexpected Unicode decoding behavior. ") |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
474 (modify-syntax-entry decoded |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
475 (string |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
476 (char-syntax ascii-or-latin-1)) |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
477 syntax-table)) |
| 4145 | 478 |
| 4268 | 479 ;; *Sigh*, declarations needs to be at the start of the line to be picked up |
| 480 ;; by make-docfile. Not so much an issue with ccl-encode-to-ucs-2, which we | |
| 481 ;; don't necessarily want to advertise, but the following are important. | |
| 482 | |
| 483 ;; Create all the Unicode error sequences, normally as jit-ucs-charset-0 | |
| 484 ;; characters starting at U+200000 (which isn't a valid Unicode code | |
| 485 ;; point). Make them available to user code. | |
| 486 (defvar unicode-error-default-translation-table | |
| 487 (loop | |
|
4468
a78d697ccd2c
Import and extend GNU's descr-text.el, supporting prefix argument for C-x =
Aidan Kehoe <kehoea@parhasard.net>
parents:
4317
diff
changeset
|
488 with char-table = (make-char-table 'generic) |
| 4268 | 489 for i from ?\x00 to ?\xFF |
|
4317
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
490 initially (unless (featurep 'mule) (return)) |
| 4268 | 491 do |
| 492 (put-char-table (aref | |
| 493 ;; #xd800 is the first leading surrogate; | |
| 494 ;; trailing surrogates must be in the range | |
| 495 ;; #xdc00-#xdfff. These examples are not, so we | |
| 496 ;; intentionally provoke an error sequence. | |
| 497 (decode-coding-string (format "\xd8\x00\x00%c" i) | |
| 498 'utf-16-be) | |
| 499 3) | |
| 500 i | |
| 501 char-table) | |
| 502 finally return char-table) | |
| 503 "Translation table mapping Unicode error sequences to Latin-1 chars. | |
| 4145 | 504 |
| 4202 | 505 To transform XEmacs Unicode error sequences to the Latin-1 characters that |
| 506 correspond to the octets on disk, you can use this variable. ") | |
| 4145 | 507 |
|
4490
67fbcaf3dbdc
error-sequence -> invalid-sequence
Aidan Kehoe <kehoea@parhasard.net>
parents:
4489
diff
changeset
|
508 (defvar unicode-invalid-sequence-regexp-range |
|
4317
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
509 (and (featurep 'mule) |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
510 (format "%c%c-%c" |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
511 (aref (decode-coding-string "\xd8\x00\x00\x00" 'utf-16-be) 0) |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
512 (aref (decode-coding-string "\xd8\x00\x00\x00" 'utf-16-be) 3) |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
513 (aref (decode-coding-string "\xd8\x00\x00\xFF" 'utf-16-be) 3))) |
| 4268 | 514 "Regular expression range to match Unicode error sequences in XEmacs. |
| 4145 | 515 |
| 4202 | 516 Invalid Unicode sequences on input are represented as XEmacs |
| 517 characters with values stored as the keys in | |
| 518 `unicode-error-default-translation-table', one character for each | |
| 519 invalid octet. You can use this variable (with `re-search-forward' or | |
| 520 `skip-chars-forward') to search for such characters; see also | |
| 521 `unicode-error-translate-region'. ") | |
| 522 | |
|
4317
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
523 ;; Check that the lookup table is correct, and that all the actual error |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
524 ;; sequences are caught by the regexp. |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
525 (with-temp-buffer |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
526 (loop |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
527 for i from ?\x00 to ?\xFF |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
528 with to-check = (make-string 20 ?\x20) |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
529 initially (unless (featurep 'mule) (return)) |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
530 do |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
531 (delete-region (point-min) (point-max)) |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
532 (insert to-check) |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
533 (goto-char 10) |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
534 (insert (decode-coding-string (format "\xd8\x00\x00%c" i) |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
535 'utf-16-be)) |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
536 (backward-char) |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
537 (assert (= i (get-char-table (char-after (point)) |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
538 unicode-error-default-translation-table)) |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
539 (format "Char ?\\x%x not the expected error sequence!" |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
540 i)) |
| 4202 | 541 |
|
4317
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
542 (goto-char (point-min)) |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
543 ;; Comment out until the issue in |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
544 ;; 18179.49815.622843.336527@parhasard.net is fixed. |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
545 (assert t ; (re-search-forward (concat "[" |
|
4490
67fbcaf3dbdc
error-sequence -> invalid-sequence
Aidan Kehoe <kehoea@parhasard.net>
parents:
4489
diff
changeset
|
546 ; unicode-invalid-sequence-regexp-range |
|
4317
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
547 ; "]")) |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
548 nil |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
549 (format "Could not find char ?\\x%x in buffer" i)))) |
| 4202 | 550 |
| 4268 | 551 (defun frob-unicode-errors-region (frob-function begin end &optional buffer) |
| 552 "Call FROB-FUNCTION on the Unicode error sequences between BEGIN and END. | |
| 4202 | 553 |
| 554 Optional argument BUFFER specifies the buffer that should be examined for | |
| 555 such sequences. " | |
| 4268 | 556 (check-argument-type #'functionp frob-function) |
| 557 (check-argument-range begin (point-min buffer) (point-max buffer)) | |
| 558 (check-argument-range end (point-min buffer) (point-max buffer)) | |
| 4202 | 559 (save-excursion |
| 560 (save-restriction | |
| 561 (if buffer (set-buffer buffer)) | |
| 562 (narrow-to-region begin end) | |
| 563 (goto-char (point-min)) | |
| 564 (while end | |
| 565 (setq begin | |
| 566 (progn | |
| 567 (skip-chars-forward | |
|
4490
67fbcaf3dbdc
error-sequence -> invalid-sequence
Aidan Kehoe <kehoea@parhasard.net>
parents:
4489
diff
changeset
|
568 (concat "^" unicode-invalid-sequence-regexp-range)) |
| 4202 | 569 (point)) |
| 570 end (and (not (= (point) (point-max))) | |
| 571 (progn | |
| 572 (skip-chars-forward | |
|
4490
67fbcaf3dbdc
error-sequence -> invalid-sequence
Aidan Kehoe <kehoea@parhasard.net>
parents:
4489
diff
changeset
|
573 unicode-invalid-sequence-regexp-range) |
| 4202 | 574 (point)))) |
| 575 (if end | |
| 576 (funcall frob-function begin end)))))) | |
| 577 | |
|
4317
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
578 (defun unicode-error-translate-region (begin end &optional buffer table) |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
579 "Translate the Unicode error sequences in BUFFER between BEGIN and END. |
| 4202 | 580 |
| 581 The error sequences are transformed, by default, into the ASCII, | |
| 582 control-1 and latin-iso8859-1 characters with the numeric values | |
| 583 corresponding to the incorrect octets encountered. This is achieved | |
| 584 by using `unicode-error-default-translation-table' (which see) for | |
| 585 TABLE; you can change this by supplying another character table, | |
| 586 mapping from the error sequences to the desired characters. " | |
| 587 (unless table (setq table unicode-error-default-translation-table)) | |
| 588 (frob-unicode-errors-region | |
| 589 (lambda (start finish) | |
| 590 (translate-region start finish table)) | |
|
4317
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
591 begin end buffer)) |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
592 |
|
4489
b75b075a9041
Support displaying invalid UTF-8 in language-environment-specific ways.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4468
diff
changeset
|
593 ;; Sure would be nice to be able to use defface here. |
|
4490
67fbcaf3dbdc
error-sequence -> invalid-sequence
Aidan Kehoe <kehoea@parhasard.net>
parents:
4489
diff
changeset
|
594 (copy-face 'highlight 'unicode-invalid-sequence-warning-face) |
|
4489
b75b075a9041
Support displaying invalid UTF-8 in language-environment-specific ways.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4468
diff
changeset
|
595 |
|
4317
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
596 (unless (featurep 'mule) |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
597 ;; We do this in such a roundabout way--instead of having the above defun |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
598 ;; and defvar calls inside a (when (featurep 'mule) ...) form--to have |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
599 ;; make-docfile.c pick up symbol and function documentation correctly. An |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
600 ;; alternative approach would be to fix make-docfile.c to be able to read |
|
15d36164ebd7
Eliminate lost docstring warnings on 21.5.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4268
diff
changeset
|
601 ;; Lisp. |
|
4783
e29fcfd8df5f
Eliminate most core code byte-compile warnings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4690
diff
changeset
|
602 (mapc #'unintern |
|
e29fcfd8df5f
Eliminate most core code byte-compile warnings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4690
diff
changeset
|
603 '(ccl-encode-to-ucs-2 unicode-error-default-translation-table |
|
e29fcfd8df5f
Eliminate most core code byte-compile warnings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4690
diff
changeset
|
604 unicode-invalid-regexp-range frob-unicode-errors-region |
|
e29fcfd8df5f
Eliminate most core code byte-compile warnings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4690
diff
changeset
|
605 unicode-error-translate-region unicode-query-coding-region |
|
e29fcfd8df5f
Eliminate most core code byte-compile warnings.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4690
diff
changeset
|
606 unicode-query-coding-skip-chars-arg))) |
| 3667 | 607 |
| 771 | 608 ;; #### UTF-7 is not yet implemented, and it's tricky to do. There's |
| 609 ;; an implementation in appendix A.1 of the Unicode Standard, Version | |
| 610 ;; 2.0, but I don't know its licensing characteristics. | |
| 611 | |
| 612 ; (make-coding-system | |
| 613 ; 'utf-7 'unicode | |
| 614 ; "UTF-7" | |
| 615 ; '(mnemonic "UTF7" | |
| 3659 | 616 ; documentation; "UTF-7 Unicode encoding -- 7-bit-ASCII modal Internet-mail-compatible |
| 771 | 617 ; encoding especially designed for headers, with the following |
| 618 ; properties: | |
| 619 | |
| 620 ; -- Only characters that are considered safe for passing through any mail | |
| 621 ; gateway without damage are used. | |
| 622 | |
| 623 ; -- This is a modal encoding, with two states. The first, default | |
| 624 ; state encodes the most common Unicode characters (upper and | |
| 625 ; lowercase letters, digits, and 9 common punctuation marks) as | |
| 626 ; themselves, and the second state, entered using '+' and | |
| 627 ; terminated with '-' or any character disallowed in state 2, | |
| 628 ; encodes any Unicode characters by first converting to UTF-16, | |
| 629 ; most significant byte first, and then to a slightly modified | |
| 630 ; Base64 encoding. (Thus, UTF-7 has the same limitations on the | |
| 631 ; characters it can encode as UTF-16.) | |
| 632 | |
| 633 ; -- The modified Base64 encoding deviates from standard Base64 in | |
| 634 ; that it omits the `=' pad character. This is eliminated so as to | |
| 635 ; avoid conflicts with the use of `=' as an escape in the | |
| 636 ; Quoted-Printable encoding and the related Q encoding for headers: | |
| 637 ; With this modification, non-whitespace chars in UTF-7 will be | |
| 638 ; represented in Quoted-Printable and in Q as-is, with no further | |
| 639 ; encoding. | |
| 640 | |
| 641 ; For more information, see Appendix A.1 of The Unicode Standard 2.0, or | |
| 642 ; wherever it is in v3.0." | |
| 3767 | 643 ; unicode-type utf-7)) |
