Mercurial > hg > xemacs-beta
comparison lisp/unicode.el @ 2297:13a418960a88
[xemacs-hg @ 2004-09-22 02:05:42 by stephent]
various doc patches <87isa7awrh.fsf@tleepslib.sk.tsukuba.ac.jp>
author | stephent |
---|---|
date | Wed, 22 Sep 2004 02:06:52 +0000 |
parents | b531bf8658e9 |
children | ecf1ebac70d8 |
comparison
equal
deleted
inserted
replaced
2296:a58ea4d0d0cd | 2297:13a418960a88 |
---|---|
29 | 29 |
30 ;;; Code: | 30 ;;; Code: |
31 | 31 |
32 ; ;; Subsets of Unicode. | 32 ; ;; Subsets of Unicode. |
33 | 33 |
34 ; #### what is this bogosity ... "chars 96, final ?2" !!?! | |
34 ; (make-charset 'mule-unicode-2500-33ff | 35 ; (make-charset 'mule-unicode-2500-33ff |
35 ; "Unicode characters of the range U+2500..U+33FF." | 36 ; "Unicode characters of the range U+2500..U+33FF." |
36 ; '(dimension | 37 ; '(dimension |
37 ; 2 | 38 ; 2 |
38 ; registry "ISO10646-1" | 39 ; registry "ISO10646-1" |
122 ;; "CP936.TXT" | 123 ;; "CP936.TXT" |
123 ;; "CP949.TXT" | 124 ;; "CP949.TXT" |
124 ;; "CP950.TXT" | 125 ;; "CP950.TXT" |
125 ;; "GB12345.TXT" | 126 ;; "GB12345.TXT" |
126 ("GB2312.TXT" chinese-gb2312) | 127 ("GB2312.TXT" chinese-gb2312) |
127 ;; "HANGUL.TXT" | 128 ;; "HANGUL.TXT" |
129 ;; #### shouldn't JIS X 0201's upper limit be 7f? | |
128 ("JIS0201.TXT" latin-jisx0201 #x21 #x80) | 130 ("JIS0201.TXT" latin-jisx0201 #x21 #x80) |
129 ("JIS0201.TXT" katakana-jisx0201 #xA0 #xFF #x-80) | 131 ("JIS0201.TXT" katakana-jisx0201 #xA0 #xFF #x-80) |
130 ("JIS0208.TXT" japanese-jisx0208 nil nil nil ignore-first-column) | 132 ("JIS0208.TXT" japanese-jisx0208 nil nil nil ignore-first-column) |
131 ("JIS0212.TXT" japanese-jisx0212) | 133 ("JIS0212.TXT" japanese-jisx0212) |
132 ;; "JOHAB.TXT" | 134 ;; "JOHAB.TXT" |
137 ("KSX1001.TXT" korean-ksc5601) | 139 ("KSX1001.TXT" korean-ksc5601) |
138 ;; "OLD5601.TXT" | 140 ;; "OLD5601.TXT" |
139 ;; "SHIFTJIS.TXT" | 141 ;; "SHIFTJIS.TXT" |
140 ) | 142 ) |
141 ("unicode/mule-ucs" | 143 ("unicode/mule-ucs" |
144 ;; #### we don't support surrogates?!?? | |
142 ;; use these instead of the above ones once we support surrogates | 145 ;; use these instead of the above ones once we support surrogates |
143 ;;("chinese-cns11643-1.txt" chinese-cns11643-1) | 146 ;;("chinese-cns11643-1.txt" chinese-cns11643-1) |
144 ;;("chinese-cns11643-2.txt" chinese-cns11643-2) | 147 ;;("chinese-cns11643-2.txt" chinese-cns11643-2) |
145 ;;("chinese-cns11643-3.txt" chinese-cns11643-3) | 148 ;;("chinese-cns11643-3.txt" chinese-cns11643-3) |
146 ;;("chinese-cns11643-4.txt" chinese-cns11643-4) | 149 ;;("chinese-cns11643-4.txt" chinese-cns11643-4) |
247 (make-coding-system | 250 (make-coding-system |
248 'ucs-4-little-endian 'unicode | 251 'ucs-4-little-endian 'unicode |
249 "UCS-4 Little Endian" | 252 "UCS-4 Little Endian" |
250 '(mnemonic "UCS4-LE" | 253 '(mnemonic "UCS4-LE" |
251 documentation | 254 documentation |
255 ;; #### I don't think this is permitted by ISO 10646, only Unicode. | |
256 ;; Call it UTF-32 instead? | |
252 "Little-endian version of UCS-4 Unicode encoding. See `ucs-4' coding system." | 257 "Little-endian version of UCS-4 Unicode encoding. See `ucs-4' coding system." |
253 type ucs-4 | 258 type ucs-4 |
254 little-endian t)) | 259 little-endian t)) |
255 | 260 |
256 (make-coding-system | 261 (make-coding-system |
257 'utf-8 'unicode | 262 'utf-8 'unicode |
258 "UTF-8" | 263 "UTF-8" |
259 '(mnemonic "UTF8" | 264 '(mnemonic "UTF8" |
260 documentation | 265 documentation |
261 "UTF-8 Unicode encoding -- ASCII-compatible 8-bit variable-width encoding | 266 "UTF-8 Unicode encoding -- ASCII-compatible 8-bit variable-width encoding |
262 with the same principles as the Mule-internal encoding: | 267 sharing the following principles with the Mule-internal encoding: |
263 | 268 |
264 -- All ASCII characters (codepoints 0 through 127) are represented | 269 -- All ASCII characters (codepoints 0 through 127) are represented |
265 by themselves (i.e. using one byte, with the same value as the | 270 by themselves (i.e. using one byte, with the same value as the |
266 ASCII codepoint), and these bytes are disjoint from bytes | 271 ASCII codepoint), and these bytes are disjoint from bytes |
267 representing non-ASCII characters. | 272 representing non-ASCII characters. |