annotate src/mule-charset.h @ 428:3ecd8885ac67 r21-2-22

Import from CVS: tag r21-2-22
author cvs
date Mon, 13 Aug 2007 11:28:15 +0200
parents
children 84b14dcb0985
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
428
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
1 /* Header for multilingual functions.
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
3 Copyright (C) 1995 Sun Microsystems, Inc.
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
4
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
5 This file is part of XEmacs.
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
6
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
7 XEmacs is free software; you can redistribute it and/or modify it
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
8 under the terms of the GNU General Public License as published by the
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
9 Free Software Foundation; either version 2, or (at your option) any
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
10 later version.
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
11
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
15 for more details.
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
16
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
17 You should have received a copy of the GNU General Public License
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
18 along with XEmacs; see the file COPYING. If not, write to
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
20 Boston, MA 02111-1307, USA. */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
21
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
22 /* Synched up with: Mule 2.3. Not in FSF. */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
23
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
25
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
26 #ifndef _XEMACS_MULE_CHARSET_H
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
27 #define _XEMACS_MULE_CHARSET_H
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
28
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
29 /*
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
30 1. Character Sets
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
31 =================
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
32
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
33 A character set (or "charset") is an ordered set of characters.
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
34 A particular character in a charset is indexed using one or
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
35 more "position codes", which are non-negative integers.
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
36 The number of position codes needed to identify a particular
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
37 character in a charset is called the "dimension" of the
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
38 charset. In XEmacs/Mule, all charsets have 1 or 2 dimensions,
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
39 and the size of all charsets (except for a few special cases)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
40 is either 94, 96, 94 by 94, or 96 by 96. The range of
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
41 position codes used to index characters from any of these
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
42 types of character sets is as follows:
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
43
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
44 Charset type Position code 1 Position code 2
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
45 ------------------------------------------------------------
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
46 94 33 - 126 N/A
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
47 96 32 - 127 N/A
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
48 94x94 33 - 126 33 - 126
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
49 96x96 32 - 127 32 - 127
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
50
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
51 Note that in the above cases position codes do not start at
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
52 an expected value such as 0 or 1. The reason for this will
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
53 become clear later.
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
54
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
55 For example, Latin-1 is a 96-character charset, and JISX0208
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
56 (the Japanese national character set) is a 94x94-character
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
57 charset.
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
58
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
59 [Note that, although the ranges above define the *valid*
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
60 position codes for a charset, some of the slots in a particular
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
61 charset may in fact be empty. This is the case for JISX0208,
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
62 for example, where (e.g.) all the slots whose first
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
63 position code is in the range 118 - 127 are empty.]
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
64
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
65 There are three charsets that do not follow the above rules.
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
66 All of them have one dimension, and have ranges of position
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
67 codes as follows:
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
68
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
69 Charset name Position code 1
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
70 ------------------------------------
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
71 ASCII 0 - 127
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
72 Control-1 0 - 31
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
73 Composite 0 - some large number
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
74
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
75 (The upper bound of the position code for composite characters
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
76 has not yet been determined, but it will probably be at
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
77 least 16,383).
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
78
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
79 ASCII is the union of two subsidiary character sets:
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
80 Printing-ASCII (the printing ASCII character set,
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
81 consisting of position codes 33 - 126, like for a standard
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
82 94-character charset) and Control-ASCII (the non-printing
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
83 characters that would appear in a binary file with codes 0
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
84 - 32 and 127).
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
85
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
86 Control-1 contains the non-printing characters that would
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
87 appear in a binary file with codes 128 - 159.
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
88
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
89 Composite contains characters that are generated by
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
90 overstriking one or more characters from other charsets.
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
91
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
92 Note that some characters in ASCII, and all characters
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
93 in Control-1, are "control" (non-printing) characters.
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
94 These have no printed representation but instead control
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
95 some other function of the printing (e.g. TAB or 8 moves
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
96 the current character position to the next tab stop).
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
97 All other characters in all charsets are "graphic"
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
98 (printing) characters.
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
99
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
100 When a binary file is read in, the bytes in the file are
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
101 assigned to character sets as follows:
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
102
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
103 Bytes Character set Range
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
104 --------------------------------------------------
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
105 0 - 127 ASCII 0 - 127
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
106 128 - 159 Control-1 0 - 31
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
107 160 - 255 Latin-1 32 - 127
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
108
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
109 This is a bit ad-hoc but gets the job done.
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
110
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
111 2. Encodings
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
112 ============
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
113
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
114 An "encoding" is a way of numerically representing
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
115 characters from one or more character sets. If an encoding
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
116 only encompasses one character set, then the position codes
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
117 for the characters in that character set could be used
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
118 directly. This is not possible, however, if more than one
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
119 character set is to be used in the encoding.
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
120
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
121 For example, the conversion detailed above between bytes in
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
122 a binary file and characters is effectively an encoding
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
123 that encompasses the three character sets ASCII, Control-1,
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
124 and Latin-1 in a stream of 8-bit bytes.
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
125
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
126 Thus, an encoding can be viewed as a way of encoding
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
127 characters from a specified group of character sets using a
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
128 stream of bytes, each of which contains a fixed number of
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
129 bits (but not necessarily 8, as in the common usage of
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
130 "byte").
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
131
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
132 Here are descriptions of a couple of common
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
133 encodings:
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
134
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
135
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
136 A. Japanese EUC (Extended Unix Code)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
137
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
138 This encompasses the character sets:
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
139 - Printing-ASCII,
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
140 - Katakana-JISX0201 (half-width katakana, the right half of JISX0201).
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
141 - Japanese-JISX0208
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
142 - Japanese-JISX0212
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
143 It uses 8-bit bytes.
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
144
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
145 Note that Printing-ASCII and Katakana-JISX0201 are 94-character
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
146 charsets, while Japanese-JISX0208 is a 94x94-character charset.
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
147
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
148 The encoding is as follows:
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
149
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
150 Character set Representation (PC == position-code)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
151 ------------- --------------
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
152 Printing-ASCII PC1
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
153 Japanese-JISX0208 PC1 + 0x80 | PC2 + 0x80
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
154 Katakana-JISX0201 0x8E | PC1 + 0x80
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
155
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
156
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
157 B. JIS7
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
158
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
159 This encompasses the character sets:
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
160 - Printing-ASCII
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
161 - Latin-JISX0201 (the left half of JISX0201; this character set is
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
162 very similar to Printing-ASCII and is a 94-character charset)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
163 - Japanese-JISX0208
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
164 - Katakana-JISX0201
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
165 It uses 7-bit bytes.
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
166
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
167 Unlike Japanese EUC, this is a "modal" encoding, which
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
168 means that there are multiple states that the encoding can
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
169 be in, which affect how the bytes are to be interpreted.
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
170 Special sequences of bytes (called "escape sequences")
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
171 are used to change states.
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
172
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
173 The encoding is as follows:
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
174
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
175 Character set Representation
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
176 ------------- --------------
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
177 Printing-ASCII PC1
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
178 Latin-JISX0201 PC1
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
179 Katakana-JISX0201 PC1
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
180 Japanese-JISX0208 PC1 | PC2
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
181
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
182 Escape sequence ASCII equivalent Meaning
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
183 --------------- ---------------- -------
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
184 0x1B 0x28 0x42 ESC ( B invoke Printing-ASCII
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
185 0x1B 0x28 0x4A ESC ( J invoke Latin-JISX0201
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
186 0x1B 0x28 0x49 ESC ( I invoke Katakana-JISX0201
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
187 0x1B 0x24 0x42 ESC $ B invoke Japanese-JISX0208
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
188
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
189 Initially, Printing-ASCII is invoked.
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
190
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
191 3. Internal Mule Encodings
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
192 ==========================
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
193
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
194 In XEmacs/Mule, each character set is assigned a unique number,
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
195 called a "leading byte". This is used in the encodings of a
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
196 character. Leading bytes are in the range 0x80 - 0xFF
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
197 (except for ASCII, which has a leading byte of 0), although
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
198 some leading bytes are reserved.
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
199
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
200 Charsets whose leading byte is in the range 0x80 - 0x9F are
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
201 called "official" and are used for built-in charsets.
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
202 Other charsets are called "private" and have leading bytes
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
203 in the range 0xA0 - 0xFF; these are user-defined charsets.
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
204
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
205 More specifically:
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
206
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
207 Character set Leading byte
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
208 ------------- ------------
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
209 ASCII 0
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
210 Composite 0x80
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
211 Dimension-1 Official 0x81 - 0x8D
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
212 (0x8E is free)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
213 Control 0x8F
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
214 Dimension-2 Official 0x90 - 0x99
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
215 (0x9A - 0x9D are free;
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
216 0x9E and 0x9F are reserved)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
217 Dimension-1 Private 0xA0 - 0xEF
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
218 Dimension-2 Private 0xF0 - 0xFF
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
219
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
220 There are two internal encodings for characters in XEmacs/Mule.
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
221 One is called "string encoding" and is an 8-bit encoding that
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
222 is used for representing characters in a buffer or string.
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
223 It uses 1 to 4 bytes per character. The other is called
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
224 "character encoding" and is a 19-bit encoding that is used
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
225 for representing characters individually in a variable.
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
226
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
227 (In the following descriptions, we'll ignore composite
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
228 characters for the moment. We also give a general (structural)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
229 overview first, followed later by the exact details.)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
230
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
231 A. Internal String Encoding
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
232
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
233 ASCII characters are encoded using their position code directly.
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
234 Other characters are encoded using their leading byte followed
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
235 by their position code(s) with the high bit set. Characters
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
236 in private character sets have their leading byte prefixed with
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
237 a "leading byte prefix", which is either 0x9E or 0x9F. (No
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
238 character sets are ever assigned these leading bytes.) Specifically:
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
239
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
240 Character set Encoding (PC == position-code)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
241 ------------- -------- (LB == leading-byte)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
242 ASCII PC1 |
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
243 Control-1 LB | PC1 + 0xA0
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
244 Dimension-1 official LB | PC1 + 0x80
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
245 Dimension-1 private 0x9E | LB | PC1 + 0x80
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
246 Dimension-2 official LB | PC1 | PC2 + 0x80
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
247 Dimension-2 private 0x9F | LB | PC1 + 0x80 | PC2 + 0x80
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
248
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
249 The basic characteristic of this encoding is that the first byte
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
250 of all characters is in the range 0x00 - 0x9F, and the second and
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
251 following bytes of all characters is in the range 0xA0 - 0xFF.
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
252 This means that it is impossible to get out of sync, or more
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
253 specifically:
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
254
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
255 1. Given any byte position, the beginning of the character it is
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
256 within can be determined in constant time.
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
257 2. Given any byte position at the beginning of a character, the
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
258 beginning of the next character can be determined in constant
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
259 time.
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
260 3. Given any byte position at the beginning of a character, the
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
261 beginning of the previous character can be determined in constant
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
262 time.
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
263 4. Textual searches can simply treat encoded strings as if they
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
264 were encoded in a one-byte-per-character fashion rather than
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
265 the actual multi-byte encoding.
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
266
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
267 None of the standard non-modal encodings meet all of these
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
268 conditions. For example, EUC satisfies only (2) and (3), while
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
269 Shift-JIS and Big5 (not yet described) satisfy only (2). (All
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
270 non-modal encodings must satisfy (2), in order to be unambiguous.)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
271
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
272 B. Internal Character Encoding
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
273
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
274 One 19-bit word represents a single character. The word is
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
275 separated into three fields:
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
276
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
277 Bit number: 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
278 <------------> <------------------> <------------------>
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
279 Field: 1 2 3
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
280
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
281 Note that fields 2 and 3 hold 7 bits each, while field 1 holds 5 bits.
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
282
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
283 Character set Field 1 Field 2 Field 3
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
284 ------------- ------- ------- -------
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
285 ASCII 0 0 PC1
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
286 range: (00 - 7F)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
287 Control-1 0 1 PC1
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
288 range: (00 - 1F)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
289 Dimension-1 official 0 LB - 0x80 PC1
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
290 range: (01 - 0D) (20 - 7F)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
291 Dimension-1 private 0 LB - 0x80 PC1
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
292 range: (20 - 6F) (20 - 7F)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
293 Dimension-2 official LB - 0x8F PC1 PC2
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
294 range: (01 - 0A) (20 - 7F) (20 - 7F)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
295 Dimension-2 private LB - 0xE1 PC1 PC2
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
296 range: (0F - 1E) (20 - 7F) (20 - 7F)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
297 Composite 0x1F ? ?
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
298
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
299 Note that character codes 0 - 255 are the same as the "binary encoding"
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
300 described above.
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
301 */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
302
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
303 /*
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
304 About Unicode support:
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
305
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
306 Adding Unicode support is very desirable. Unicode will likely be a
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
307 very common representation in the future, and thus we should
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
308 represent Unicode characters using three bytes instead of four.
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
309 This means we need to find leading bytes for Unicode. Given that
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
310 there are 65,536 characters in Unicode and we can attach 96x96 =
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
311 9,216 characters per leading byte, we need eight leading bytes for
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
312 Unicode. We currently have four free (0x9A - 0x9D), and with a
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
313 little bit of rearranging we can get five: ASCII doesn't really
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
314 need to take up a leading byte. (We could just as well use 0x7F,
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
315 with a little change to the functions that assume that 0x80 is the
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
316 lowest leading byte.) This means we still need to dump three
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
317 leading bytes and move them into private space. The CNS charsets
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
318 are good candidates since they are rarely used, and
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
319 JAPANESE_JISX0208_1978 is becoming less and less used and could
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
320 also be dumped. */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
321
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
322
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
323 /************************************************************************/
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
324 /* Definition of leading bytes */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
325 /************************************************************************/
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
326
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
327 #define MIN_LEADING_BYTE 0x80
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
328 /* These need special treatment in a string and/or character */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
329 #define LEADING_BYTE_ASCII 0x8E /* Omitted in a buffer */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
330 #ifdef ENABLE_COMPOSITE_CHARS
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
331 #endif
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
332 #define LEADING_BYTE_COMPOSITE 0x80 /* for a composite character */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
333 #define LEADING_BYTE_CONTROL_1 0x8F /* represent normal 80-9F */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
334
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
335 /* Note the gap in each official charset can cause core dump
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
336 as first and last values are used to determine whether
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
337 charset is defined or not in non_ascii_valid_char_p */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
338
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
339 /** The following are for 1-byte characters in an official charset. **/
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
340 enum LEADING_BYTE_OFFICIAL_1
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
341 {
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
342 LEADING_BYTE_LATIN_ISO8859_1 = 0x81, /* Right half of ISO 8859-1 */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
343 LEADING_BYTE_LATIN_ISO8859_2, /* 0x82 Right half of ISO 8859-2 */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
344 LEADING_BYTE_LATIN_ISO8859_3, /* 0x83 Right half of ISO 8859-3 */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
345 LEADING_BYTE_LATIN_ISO8859_4, /* 0x84 Right half of ISO 8859-4 */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
346 LEADING_BYTE_THAI_TIS620, /* 0x85 TIS620-2533 */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
347 LEADING_BYTE_GREEK_ISO8859_7, /* 0x86 Right half of ISO 8859-7 */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
348 LEADING_BYTE_ARABIC_ISO8859_6, /* 0x87 Right half of ISO 8859-6 */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
349 LEADING_BYTE_HEBREW_ISO8859_8, /* 0x88 Right half of ISO 8859-8 */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
350 LEADING_BYTE_KATAKANA_JISX0201, /* 0x89 Right half of JIS X0201-1976 */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
351 LEADING_BYTE_LATIN_JISX0201, /* 0x8A Left half of JIS X0201-1976 */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
352 LEADING_BYTE_CYRILLIC_ISO8859_5,/* 0x8B Right half of ISO 8859-5 */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
353 LEADING_BYTE_LATIN_ISO8859_9 /* 0x8C Right half of ISO 8859-9 */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
354 /* 0x8D unused */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
355 };
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
356
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
357 #define MIN_LEADING_BYTE_OFFICIAL_1 LEADING_BYTE_LATIN_ISO8859_1
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
358 #define MAX_LEADING_BYTE_OFFICIAL_1 LEADING_BYTE_LATIN_ISO8859_9
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
359
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
360 /** The following are for 2-byte characters in an official charset. **/
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
361 enum LEADING_BYTE_OFFICIAL_2
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
362 {
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
363 LEADING_BYTE_JAPANESE_JISX0208_1978 = 0x90, /* Japanese JIS X0208-1978 */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
364 LEADING_BYTE_CHINESE_GB2312, /* 0x91 Chinese Hanzi GB2312-1980 */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
365 LEADING_BYTE_JAPANESE_JISX0208, /* 0x92 Japanese JIS X0208-1983 */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
366 LEADING_BYTE_KOREAN_KSC5601, /* 0x93 Hangul KS C5601-1987 */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
367 LEADING_BYTE_JAPANESE_JISX0212, /* 0x94 Japanese JIS X0212-1990 */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
368 LEADING_BYTE_CHINESE_CNS11643_1, /* 0x95 Chinese CNS11643 Set 1 */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
369 LEADING_BYTE_CHINESE_CNS11643_2, /* 0x96 Chinese CNS11643 Set 2 */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
370 LEADING_BYTE_CHINESE_BIG5_1, /* 0x97 Big5 Level 1 */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
371 LEADING_BYTE_CHINESE_BIG5_2 /* 0x98 Big5 Level 2 */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
372 /* 0x99 unused */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
373 /* 0x9A unused */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
374 /* 0x9B unused */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
375 /* 0x9C unused */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
376 };
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
377
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
378 #define MIN_LEADING_BYTE_OFFICIAL_2 LEADING_BYTE_JAPANESE_JISX0208_1978
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
379 #define MAX_LEADING_BYTE_OFFICIAL_2 LEADING_BYTE_CHINESE_BIG5_2
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
380
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
381 /** The following are for 1- and 2-byte characters in a private charset. **/
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
382
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
383 #define PRE_LEADING_BYTE_PRIVATE_1 0x9E /* 1-byte char-set */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
384 #define PRE_LEADING_BYTE_PRIVATE_2 0x9F /* 2-byte char-set */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
385
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
386 #define MIN_LEADING_BYTE_PRIVATE_1 0xA0
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
387 #define MAX_LEADING_BYTE_PRIVATE_1 0xEF
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
388 #define MIN_LEADING_BYTE_PRIVATE_2 0xF0
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
389 #define MAX_LEADING_BYTE_PRIVATE_2 0xFF
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
390
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
391 #define NUM_LEADING_BYTES 128
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
392
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
393
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
394 /************************************************************************/
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
395 /* Operations on leading bytes */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
396 /************************************************************************/
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
397
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
398 /* Is this leading byte for a private charset? */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
399
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
400 #define LEADING_BYTE_PRIVATE_P(lb) ((lb) >= MIN_LEADING_BYTE_PRIVATE_1)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
401
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
402 /* Is this a prefix for a private leading byte? */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
403
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
404 INLINE int LEADING_BYTE_PREFIX_P (unsigned char lb);
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
405 INLINE int
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
406 LEADING_BYTE_PREFIX_P (unsigned char lb)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
407 {
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
408 return (lb == PRE_LEADING_BYTE_PRIVATE_1 ||
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
409 lb == PRE_LEADING_BYTE_PRIVATE_2);
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
410 }
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
411
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
412 /* Given a private leading byte, return the leading byte prefix stored
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
413 in a string */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
414
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
415 #define PRIVATE_LEADING_BYTE_PREFIX(lb) \
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
416 ((lb) < MIN_LEADING_BYTE_PRIVATE_2 ? \
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
417 PRE_LEADING_BYTE_PRIVATE_1 : \
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
418 PRE_LEADING_BYTE_PRIVATE_2)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
419
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
420
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
421 /************************************************************************/
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
422 /* Operations on individual bytes */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
423 /* of any format */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
424 /************************************************************************/
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
425
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
426 /* Argument `c' should be (unsigned int) or (unsigned char). */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
427 /* Note that SP and DEL are not included. */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
428
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
429 #define BYTE_ASCII_P(c) ((c) < 0x80)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
430 #define BYTE_C0_P(c) ((c) < 0x20)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
431 /* Do some forced casting just to make *sure* things are gotten right. */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
432 #define BYTE_C1_P(c) ((unsigned int) ((unsigned int) (c) - 0x80) < 0x20)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
433
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
434
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
435 /************************************************************************/
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
436 /* Operations on individual bytes */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
437 /* in a Mule-formatted string */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
438 /************************************************************************/
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
439
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
440 /* Does this byte represent the first byte of a character? */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
441
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
442 #define BUFBYTE_FIRST_BYTE_P(c) ((c) < 0xA0)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
443
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
444 /* Does this byte represent the first byte of a multi-byte character? */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
445
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
446 #define BUFBYTE_LEADING_BYTE_P(c) BYTE_C1_P (c)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
447
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
448
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
449 /************************************************************************/
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
450 /* Information about a particular character set */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
451 /************************************************************************/
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
452
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
453 struct Lisp_Charset
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
454 {
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
455 struct lcrecord_header header;
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
456
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
457 int id;
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
458 Lisp_Object name;
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
459 Lisp_Object doc_string, registry, short_name, long_name;
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
460
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
461 Lisp_Object reverse_direction_charset;
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
462
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
463 Lisp_Object ccl_program;
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
464
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
465 /* Final byte of this character set in ISO2022 designating escape sequence */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
466 Bufbyte final;
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
467
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
468 /* Number of bytes (1 - 4) required in the internal representation
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
469 for characters in this character set. This is *not* the
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
470 same as the dimension of the character set). */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
471 unsigned int rep_bytes;
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
472
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
473 /* Number of columns a character in this charset takes up, on TTY
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
474 devices. Not used for X devices. */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
475 unsigned int columns;
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
476
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
477 /* Direction of this character set */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
478 unsigned int direction;
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
479
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
480 /* Type of this character set (94, 96, 94x94, 96x96) */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
481 unsigned int type;
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
482
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
483 /* Number of bytes used in encoding of this character set (1 or 2) */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
484 unsigned int dimension;
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
485
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
486 /* Number of chars in each dimension (usually 94 or 96) */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
487 unsigned int chars;
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
488
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
489 /* Which half of font to be used to display this character set */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
490 unsigned int graphic;
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
491 };
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
492
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
493 DECLARE_LRECORD (charset, struct Lisp_Charset);
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
494 #define XCHARSET(x) XRECORD (x, charset, struct Lisp_Charset)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
495 #define XSETCHARSET(x, p) XSETRECORD (x, p, charset)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
496 #define CHARSETP(x) RECORDP (x, charset)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
497 #define CHECK_CHARSET(x) CHECK_RECORD (x, charset)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
498 #define CONCHECK_CHARSET(x) CONCHECK_RECORD (x, charset)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
499
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
500 #define CHARSET_TYPE_94 0 /* This charset includes 94 characters. */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
501 #define CHARSET_TYPE_96 1 /* This charset includes 96 characters. */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
502 #define CHARSET_TYPE_94X94 2 /* This charset includes 94x94 characters. */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
503 #define CHARSET_TYPE_96X96 3 /* This charset includes 96x96 characters. */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
504
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
505 #define CHARSET_LEFT_TO_RIGHT 0
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
506 #define CHARSET_RIGHT_TO_LEFT 1
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
507
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
508 /* Leading byte and id have been regrouped. -- OG */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
509 #define CHARSET_ID(cs) ((cs)->id)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
510 #define CHARSET_LEADING_BYTE(cs) ((Bufbyte) CHARSET_ID(cs))
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
511 #define CHARSET_NAME(cs) ((cs)->name)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
512 #define CHARSET_SHORT_NAME(cs) ((cs)->short_name)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
513 #define CHARSET_LONG_NAME(cs) ((cs)->long_name)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
514 #define CHARSET_REP_BYTES(cs) ((cs)->rep_bytes)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
515 #define CHARSET_COLUMNS(cs) ((cs)->columns)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
516 #define CHARSET_GRAPHIC(cs) ((cs)->graphic)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
517 #define CHARSET_TYPE(cs) ((cs)->type)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
518 #define CHARSET_DIRECTION(cs) ((cs)->direction)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
519 #define CHARSET_FINAL(cs) ((cs)->final)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
520 #define CHARSET_DOC_STRING(cs) ((cs)->doc_string)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
521 #define CHARSET_REGISTRY(cs) ((cs)->registry)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
522 #define CHARSET_CCL_PROGRAM(cs) ((cs)->ccl_program)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
523 #define CHARSET_DIMENSION(cs) ((cs)->dimension)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
524 #define CHARSET_CHARS(cs) ((cs)->chars)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
525 #define CHARSET_REVERSE_DIRECTION_CHARSET(cs) ((cs)->reverse_direction_charset)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
526
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
527
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
528 #define CHARSET_PRIVATE_P(cs) LEADING_BYTE_PRIVATE_P (CHARSET_LEADING_BYTE (cs))
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
529
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
530 #define XCHARSET_ID(cs) CHARSET_ID (XCHARSET (cs))
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
531 #define XCHARSET_NAME(cs) CHARSET_NAME (XCHARSET (cs))
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
532 #define XCHARSET_SHORT_NAME(cs) CHARSET_SHORT_NAME (XCHARSET (cs))
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
533 #define XCHARSET_LONG_NAME(cs) CHARSET_LONG_NAME (XCHARSET (cs))
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
534 #define XCHARSET_REP_BYTES(cs) CHARSET_REP_BYTES (XCHARSET (cs))
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
535 #define XCHARSET_COLUMNS(cs) CHARSET_COLUMNS (XCHARSET (cs))
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
536 #define XCHARSET_GRAPHIC(cs) CHARSET_GRAPHIC (XCHARSET (cs))
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
537 #define XCHARSET_TYPE(cs) CHARSET_TYPE (XCHARSET (cs))
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
538 #define XCHARSET_DIRECTION(cs) CHARSET_DIRECTION (XCHARSET (cs))
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
539 #define XCHARSET_FINAL(cs) CHARSET_FINAL (XCHARSET (cs))
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
540 #define XCHARSET_DOC_STRING(cs) CHARSET_DOC_STRING (XCHARSET (cs))
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
541 #define XCHARSET_REGISTRY(cs) CHARSET_REGISTRY (XCHARSET (cs))
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
542 #define XCHARSET_LEADING_BYTE(cs) CHARSET_LEADING_BYTE (XCHARSET (cs))
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
543 #define XCHARSET_CCL_PROGRAM(cs) CHARSET_CCL_PROGRAM (XCHARSET (cs))
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
544 #define XCHARSET_DIMENSION(cs) CHARSET_DIMENSION (XCHARSET (cs))
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
545 #define XCHARSET_CHARS(cs) CHARSET_CHARS (XCHARSET (cs))
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
546 #define XCHARSET_PRIVATE_P(cs) CHARSET_PRIVATE_P (XCHARSET (cs))
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
547 #define XCHARSET_REVERSE_DIRECTION_CHARSET(cs) \
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
548 CHARSET_REVERSE_DIRECTION_CHARSET (XCHARSET (cs))
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
549
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
550 struct charset_lookup {
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
551 /* Table of charsets indexed by leading byte. */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
552 Lisp_Object charset_by_leading_byte[128];
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
553
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
554 /* Table of charsets indexed by type/final-byte/direction. */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
555 Lisp_Object charset_by_attributes[4][128][2];
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
556 };
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
557
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
558 extern struct charset_lookup *chlook;
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
559
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
560 /* Table of number of bytes in the string representation of a character
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
561 indexed by the first byte of that representation.
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
562
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
563 This value can be derived other ways -- e.g. something like
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
564
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
565 (BYTE_ASCII_P (first_byte) ? 1 :
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
566 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (first_byte)))
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
567
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
568 but it's faster this way. */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
569 extern Bytecount rep_bytes_by_first_byte[0xA0];
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
570
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
571 #ifdef ERROR_CHECK_TYPECHECK
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
572 /* int not Bufbyte even though that is the actual type of a leading byte.
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
573 This way, out-ot-range values will get caught rather than automatically
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
574 truncated. */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
575 INLINE Lisp_Object CHARSET_BY_LEADING_BYTE (int lb);
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
576 INLINE Lisp_Object
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
577 CHARSET_BY_LEADING_BYTE (int lb)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
578 {
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
579 assert (lb >= 0x80 && lb <= 0xFF);
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
580 return chlook->charset_by_leading_byte[lb - 128];
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
581 }
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
582
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
583 #else
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
584
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
585 #define CHARSET_BY_LEADING_BYTE(lb) (chlook->charset_by_leading_byte[(lb) - 128])
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
586
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
587 #endif
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
588
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
589 #define CHARSET_BY_ATTRIBUTES(type, final, dir) \
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
590 (chlook->charset_by_attributes[type][final][dir])
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
591
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
592 #ifdef ERROR_CHECK_TYPECHECK
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
593
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
594 /* Number of bytes in the string representation of a character */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
595 INLINE int REP_BYTES_BY_FIRST_BYTE (int fb);
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
596 INLINE int
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
597 REP_BYTES_BY_FIRST_BYTE (int fb)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
598 {
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
599 assert (fb >= 0 && fb < 0xA0);
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
600 return rep_bytes_by_first_byte[fb];
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
601 }
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
602
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
603 #else
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
604 #define REP_BYTES_BY_FIRST_BYTE(fb) (rep_bytes_by_first_byte[fb])
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
605 #endif
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
606
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
607
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
608 /************************************************************************/
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
609 /* Dealing with characters */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
610 /************************************************************************/
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
611
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
612 /* Is this character represented by more than one byte in a string? */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
613
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
614 #define CHAR_MULTIBYTE_P(c) ((c) >= 0x80)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
615
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
616 #define CHAR_ASCII_P(c) (!CHAR_MULTIBYTE_P (c))
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
617
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
618 /* The bit fields of character are divided into 3 parts:
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
619 FIELD1(5bits):FIELD2(7bits):FIELD3(7bits) */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
620
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
621 #define CHAR_FIELD1_MASK (0x1F << 14)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
622 #define CHAR_FIELD2_MASK (0x7F << 7)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
623 #define CHAR_FIELD3_MASK 0x7F
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
624
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
625 /* Macros to access each field of a character code of C. */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
626
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
627 #define CHAR_FIELD1(c) (((c) & CHAR_FIELD1_MASK) >> 14)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
628 #define CHAR_FIELD2(c) (((c) & CHAR_FIELD2_MASK) >> 7)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
629 #define CHAR_FIELD3(c) ((c) & CHAR_FIELD3_MASK)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
630
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
631 /* Field 1, if non-zero, usually holds a leading byte for a
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
632 dimension-2 charset. Field 2, if non-zero, usually holds a leading
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
633 byte for a dimension-1 charset. */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
634
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
635 /* Converting between field values and leading bytes. */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
636
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
637 #define FIELD2_TO_OFFICIAL_LEADING_BYTE 0x80
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
638 #define FIELD2_TO_PRIVATE_LEADING_BYTE 0x80
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
639
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
640 #define FIELD1_TO_OFFICIAL_LEADING_BYTE 0x8F
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
641 #define FIELD1_TO_PRIVATE_LEADING_BYTE 0xE1
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
642
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
643 /* Minimum and maximum allowed values for the fields. */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
644
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
645 #define MIN_CHAR_FIELD2_OFFICIAL \
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
646 (MIN_LEADING_BYTE_OFFICIAL_1 - FIELD2_TO_OFFICIAL_LEADING_BYTE)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
647 #define MAX_CHAR_FIELD2_OFFICIAL \
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
648 (MAX_LEADING_BYTE_OFFICIAL_1 - FIELD2_TO_OFFICIAL_LEADING_BYTE)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
649
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
650 #define MIN_CHAR_FIELD1_OFFICIAL \
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
651 (MIN_LEADING_BYTE_OFFICIAL_2 - FIELD1_TO_OFFICIAL_LEADING_BYTE)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
652 #define MAX_CHAR_FIELD1_OFFICIAL \
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
653 (MAX_LEADING_BYTE_OFFICIAL_2 - FIELD1_TO_OFFICIAL_LEADING_BYTE)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
654
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
655 #define MIN_CHAR_FIELD2_PRIVATE \
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
656 (MIN_LEADING_BYTE_PRIVATE_1 - FIELD2_TO_PRIVATE_LEADING_BYTE)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
657 #define MAX_CHAR_FIELD2_PRIVATE \
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
658 (MAX_LEADING_BYTE_PRIVATE_1 - FIELD2_TO_PRIVATE_LEADING_BYTE)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
659
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
660 #define MIN_CHAR_FIELD1_PRIVATE \
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
661 (MIN_LEADING_BYTE_PRIVATE_2 - FIELD1_TO_PRIVATE_LEADING_BYTE)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
662 #define MAX_CHAR_FIELD1_PRIVATE \
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
663 (MAX_LEADING_BYTE_PRIVATE_2 - FIELD1_TO_PRIVATE_LEADING_BYTE)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
664
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
665 /* Minimum character code of each <type> character. */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
666
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
667 #define MIN_CHAR_OFFICIAL_TYPE9N (MIN_CHAR_FIELD2_OFFICIAL << 7)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
668 #define MIN_CHAR_PRIVATE_TYPE9N (MIN_CHAR_FIELD2_PRIVATE << 7)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
669 #define MIN_CHAR_OFFICIAL_TYPE9NX9N (MIN_CHAR_FIELD1_OFFICIAL << 14)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
670 #define MIN_CHAR_PRIVATE_TYPE9NX9N (MIN_CHAR_FIELD1_PRIVATE << 14)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
671 #define MIN_CHAR_COMPOSITION (0x1F << 14)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
672
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
673 /* Leading byte of a character.
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
674
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
675 NOTE: This takes advantage of the fact that
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
676 FIELD2_TO_OFFICIAL_LEADING_BYTE and
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
677 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
678 */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
679
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
680 INLINE Bufbyte CHAR_LEADING_BYTE (Emchar c);
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
681 INLINE Bufbyte
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
682 CHAR_LEADING_BYTE (Emchar c)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
683 {
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
684 if (CHAR_ASCII_P (c))
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
685 return LEADING_BYTE_ASCII;
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
686 else if (c < 0xA0)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
687 return LEADING_BYTE_CONTROL_1;
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
688 else if (c < MIN_CHAR_OFFICIAL_TYPE9NX9N)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
689 return CHAR_FIELD2 (c) + FIELD2_TO_OFFICIAL_LEADING_BYTE;
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
690 else if (c < MIN_CHAR_PRIVATE_TYPE9NX9N)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
691 return CHAR_FIELD1 (c) + FIELD1_TO_OFFICIAL_LEADING_BYTE;
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
692 else if (c < MIN_CHAR_COMPOSITION)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
693 return CHAR_FIELD1 (c) + FIELD1_TO_PRIVATE_LEADING_BYTE;
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
694 else
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
695 {
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
696 #ifdef ENABLE_COMPOSITE_CHARS
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
697 return LEADING_BYTE_COMPOSITE;
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
698 #else
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
699 abort();
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
700 return 0;
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
701 #endif /* ENABLE_COMPOSITE_CHARS */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
702 }
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
703 }
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
704
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
705 #define CHAR_CHARSET(c) CHARSET_BY_LEADING_BYTE (CHAR_LEADING_BYTE (c))
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
706
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
707 /* Return a character whose charset is CHARSET and position-codes
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
708 are C1 and C2. TYPE9N character ignores C2.
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
709
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
710 NOTE: This takes advantage of the fact that
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
711 FIELD2_TO_OFFICIAL_LEADING_BYTE and
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
712 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
713 */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
714
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
715 INLINE Emchar MAKE_CHAR (Lisp_Object charset, int c1, int c2);
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
716 INLINE Emchar
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
717 MAKE_CHAR (Lisp_Object charset, int c1, int c2)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
718 {
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
719 if (EQ (charset, Vcharset_ascii))
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
720 return c1;
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
721 else if (EQ (charset, Vcharset_control_1))
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
722 return c1 | 0x80;
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
723 #ifdef ENABLE_COMPOSITE_CHARS
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
724 else if (EQ (charset, Vcharset_composite))
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
725 return (0x1F << 14) | ((c1) << 7) | (c2);
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
726 #endif
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
727 else if (XCHARSET_DIMENSION (charset) == 1)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
728 return ((XCHARSET_LEADING_BYTE (charset) -
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
729 FIELD2_TO_OFFICIAL_LEADING_BYTE) << 7) | (c1);
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
730 else if (!XCHARSET_PRIVATE_P (charset))
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
731 return ((XCHARSET_LEADING_BYTE (charset) -
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
732 FIELD1_TO_OFFICIAL_LEADING_BYTE) << 14) | ((c1) << 7) | (c2);
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
733 else
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
734 return ((XCHARSET_LEADING_BYTE (charset) -
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
735 FIELD1_TO_PRIVATE_LEADING_BYTE) << 14) | ((c1) << 7) | (c2);
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
736 }
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
737
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
738 /* The charset of character C is set to CHARSET, and the
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
739 position-codes of C are set to C1 and C2. C2 of TYPE9N character
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
740 is 0. */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
741
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
742 /* BREAKUP_CHAR_1_UNSAFE assumes that the charset has already been
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
743 calculated, and just computes c1 and c2.
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
744
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
745 BREAKUP_CHAR also computes and stores the charset. */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
746
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
747 #define BREAKUP_CHAR_1_UNSAFE(c, charset, c1, c2) \
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
748 XCHARSET_DIMENSION (charset) == 1 \
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
749 ? ((c1) = CHAR_FIELD3 (c), (c2) = 0) \
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
750 : ((c1) = CHAR_FIELD2 (c), \
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
751 (c2) = CHAR_FIELD3 (c))
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
752
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
753 INLINE void breakup_char_1 (Emchar c, Lisp_Object *charset, int *c1, int *c2);
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
754 INLINE void
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
755 breakup_char_1 (Emchar c, Lisp_Object *charset, int *c1, int *c2)
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
756 {
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
757 *charset = CHAR_CHARSET (c);
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
758 BREAKUP_CHAR_1_UNSAFE (c, *charset, *c1, *c2);
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
759 }
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
760
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
761 #define BREAKUP_CHAR(c, charset, c1, c2) \
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
762 breakup_char_1 (c, &(charset), &(c1), &(c2))
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
763
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
764
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
765
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
766 #ifdef ENABLE_COMPOSITE_CHARS
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
767 /************************************************************************/
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
768 /* Composite characters */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
769 /************************************************************************/
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
770
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
771 Emchar lookup_composite_char (Bufbyte *str, int len);
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
772 Lisp_Object composite_char_string (Emchar ch);
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
773 #endif /* ENABLE_COMPOSITE_CHARS */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
774
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
775
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
776 /************************************************************************/
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
777 /* Exported functions */
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
778 /************************************************************************/
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
779
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
780 EXFUN (Ffind_charset, 1);
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
781 EXFUN (Fget_charset, 1);
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
782
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
783 extern Lisp_Object Vcharset_chinese_big5_1;
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
784 extern Lisp_Object Vcharset_chinese_big5_2;
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
785 extern Lisp_Object Vcharset_japanese_jisx0208;
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
786
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
787 Emchar Lstream_get_emchar_1 (Lstream *stream, int first_char);
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
788 int Lstream_fput_emchar (Lstream *stream, Emchar ch);
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
789 void Lstream_funget_emchar (Lstream *stream, Emchar ch);
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
790
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
791 int copy_internal_to_external (CONST Bufbyte *internal, Bytecount len,
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
792 unsigned char *external);
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
793 Bytecount copy_external_to_internal (CONST unsigned char *external,
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
794 int len, Bufbyte *internal);
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
795
3ecd8885ac67 Import from CVS: tag r21-2-22
cvs
parents:
diff changeset
796 #endif /* _XEMACS_MULE_CHARSET_H */