428
|
1 /* Functions to handle multilingual characters.
|
|
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
|
|
3 Copyright (C) 1995 Sun Microsystems, Inc.
|
|
4
|
|
5 This file is part of XEmacs.
|
|
6
|
|
7 XEmacs is free software; you can redistribute it and/or modify it
|
|
8 under the terms of the GNU General Public License as published by the
|
|
9 Free Software Foundation; either version 2, or (at your option) any
|
|
10 later version.
|
|
11
|
|
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
|
|
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
15 for more details.
|
|
16
|
|
17 You should have received a copy of the GNU General Public License
|
|
18 along with XEmacs; see the file COPYING. If not, write to
|
|
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
20 Boston, MA 02111-1307, USA. */
|
|
21
|
|
22 /* Synched up with: FSF 20.3. Not in FSF. */
|
|
23
|
|
24 /* Rewritten by Ben Wing <ben@xemacs.org>. */
|
|
25
|
|
26 #include <config.h>
|
|
27 #include "lisp.h"
|
|
28
|
|
29 #include "buffer.h"
|
|
30 #include "chartab.h"
|
|
31 #include "elhash.h"
|
|
32 #include "lstream.h"
|
|
33 #include "device.h"
|
|
34 #include "faces.h"
|
|
35 #include "mule-ccl.h"
|
|
36
|
|
37 /* The various pre-defined charsets. */
|
|
38
|
|
39 Lisp_Object Vcharset_ascii;
|
|
40 Lisp_Object Vcharset_control_1;
|
|
41 Lisp_Object Vcharset_latin_iso8859_1;
|
|
42 Lisp_Object Vcharset_latin_iso8859_2;
|
|
43 Lisp_Object Vcharset_latin_iso8859_3;
|
|
44 Lisp_Object Vcharset_latin_iso8859_4;
|
|
45 Lisp_Object Vcharset_thai_tis620;
|
|
46 Lisp_Object Vcharset_greek_iso8859_7;
|
|
47 Lisp_Object Vcharset_arabic_iso8859_6;
|
|
48 Lisp_Object Vcharset_hebrew_iso8859_8;
|
|
49 Lisp_Object Vcharset_katakana_jisx0201;
|
|
50 Lisp_Object Vcharset_latin_jisx0201;
|
|
51 Lisp_Object Vcharset_cyrillic_iso8859_5;
|
|
52 Lisp_Object Vcharset_latin_iso8859_9;
|
728
|
53 Lisp_Object Vcharset_latin_iso8859_15;
|
428
|
54 Lisp_Object Vcharset_japanese_jisx0208_1978;
|
|
55 Lisp_Object Vcharset_chinese_gb2312;
|
|
56 Lisp_Object Vcharset_japanese_jisx0208;
|
|
57 Lisp_Object Vcharset_korean_ksc5601;
|
|
58 Lisp_Object Vcharset_japanese_jisx0212;
|
|
59 Lisp_Object Vcharset_chinese_cns11643_1;
|
|
60 Lisp_Object Vcharset_chinese_cns11643_2;
|
|
61 Lisp_Object Vcharset_chinese_big5_1;
|
|
62 Lisp_Object Vcharset_chinese_big5_2;
|
|
63
|
|
64 #ifdef ENABLE_COMPOSITE_CHARS
|
|
65 Lisp_Object Vcharset_composite;
|
|
66
|
|
67 /* Hash tables for composite chars. One maps string representing
|
|
68 composed chars to their equivalent chars; one goes the
|
|
69 other way. */
|
|
70 Lisp_Object Vcomposite_char_char2string_hash_table;
|
|
71 Lisp_Object Vcomposite_char_string2char_hash_table;
|
|
72
|
|
73 static int composite_char_row_next;
|
|
74 static int composite_char_col_next;
|
|
75
|
|
76 #endif /* ENABLE_COMPOSITE_CHARS */
|
|
77
|
|
78 struct charset_lookup *chlook;
|
|
79
|
|
80 static const struct lrecord_description charset_lookup_description_1[] = {
|
440
|
81 { XD_LISP_OBJECT_ARRAY, offsetof (struct charset_lookup, charset_by_leading_byte), 128+4*128*2 },
|
428
|
82 { XD_END }
|
|
83 };
|
|
84
|
|
85 static const struct struct_description charset_lookup_description = {
|
440
|
86 sizeof (struct charset_lookup),
|
428
|
87 charset_lookup_description_1
|
|
88 };
|
|
89
|
|
90 /* Table of number of bytes in the string representation of a character
|
|
91 indexed by the first byte of that representation.
|
|
92
|
|
93 rep_bytes_by_first_byte(c) is more efficient than the equivalent
|
|
94 canonical computation:
|
|
95
|
438
|
96 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (c)) */
|
428
|
97
|
438
|
98 const Bytecount rep_bytes_by_first_byte[0xA0] =
|
428
|
99 { /* 0x00 - 0x7f are for straight ASCII */
|
|
100 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
101 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
102 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
103 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
104 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
105 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
106 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
107 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
108 /* 0x80 - 0x8f are for Dimension-1 official charsets */
|
|
109 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
|
110 /* 0x90 - 0x9d are for Dimension-2 official charsets */
|
|
111 /* 0x9e is for Dimension-1 private charsets */
|
|
112 /* 0x9f is for Dimension-2 private charsets */
|
|
113 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
|
|
114 };
|
|
115
|
|
116 Lisp_Object Qcharsetp;
|
|
117
|
|
118 /* Qdoc_string, Qdimension, Qchars defined in general.c */
|
|
119 Lisp_Object Qregistry, Qfinal, Qgraphic;
|
|
120 Lisp_Object Qdirection;
|
|
121 Lisp_Object Qreverse_direction_charset;
|
|
122 Lisp_Object Qleading_byte;
|
|
123 Lisp_Object Qshort_name, Qlong_name;
|
|
124
|
|
125 Lisp_Object Qascii,
|
|
126 Qcontrol_1,
|
|
127 Qlatin_iso8859_1,
|
|
128 Qlatin_iso8859_2,
|
|
129 Qlatin_iso8859_3,
|
|
130 Qlatin_iso8859_4,
|
|
131 Qthai_tis620,
|
|
132 Qgreek_iso8859_7,
|
|
133 Qarabic_iso8859_6,
|
|
134 Qhebrew_iso8859_8,
|
|
135 Qkatakana_jisx0201,
|
|
136 Qlatin_jisx0201,
|
|
137 Qcyrillic_iso8859_5,
|
|
138 Qlatin_iso8859_9,
|
728
|
139 Qlatin_iso8859_15,
|
428
|
140 Qjapanese_jisx0208_1978,
|
|
141 Qchinese_gb2312,
|
|
142 Qjapanese_jisx0208,
|
|
143 Qkorean_ksc5601,
|
|
144 Qjapanese_jisx0212,
|
|
145 Qchinese_cns11643_1,
|
|
146 Qchinese_cns11643_2,
|
|
147 Qchinese_big5_1,
|
|
148 Qchinese_big5_2,
|
|
149 Qcomposite;
|
|
150
|
|
151 Lisp_Object Ql2r, Qr2l;
|
|
152
|
|
153 Lisp_Object Vcharset_hash_table;
|
|
154
|
|
155 /* Composite characters are characters constructed by overstriking two
|
|
156 or more regular characters.
|
|
157
|
|
158 1) The old Mule implementation involves storing composite characters
|
|
159 in a buffer as a tag followed by all of the actual characters
|
|
160 used to make up the composite character. I think this is a bad
|
|
161 idea; it greatly complicates code that wants to handle strings
|
|
162 one character at a time because it has to deal with the possibility
|
|
163 of great big ungainly characters. It's much more reasonable to
|
|
164 simply store an index into a table of composite characters.
|
|
165
|
|
166 2) The current implementation only allows for 16,384 separate
|
|
167 composite characters over the lifetime of the XEmacs process.
|
|
168 This could become a potential problem if the user
|
|
169 edited lots of different files that use composite characters.
|
|
170 Due to FSF bogosity, increasing the number of allowable
|
|
171 composite characters under Mule would decrease the number
|
|
172 of possible faces that can exist. Mule already has shrunk
|
|
173 this to 2048, and further shrinkage would become uncomfortable.
|
|
174 No such problems exist in XEmacs.
|
|
175
|
|
176 Composite characters could be represented as 0x80 C1 C2 C3,
|
|
177 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
|
|
178 for slightly under 2^20 (one million) composite characters
|
|
179 over the XEmacs process lifetime, and you only need to
|
|
180 increase the size of a Mule character from 19 to 21 bits.
|
|
181 Or you could use 0x80 C1 C2 C3 C4, allowing for about
|
|
182 85 million (slightly over 2^26) composite characters. */
|
|
183
|
|
184
|
|
185 /************************************************************************/
|
|
186 /* Basic Emchar functions */
|
|
187 /************************************************************************/
|
|
188
|
|
189 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
|
|
190 string in STR. Returns the number of bytes stored.
|
|
191 Do not call this directly. Use the macro set_charptr_emchar() instead.
|
|
192 */
|
|
193
|
|
194 Bytecount
|
665
|
195 non_ascii_set_charptr_emchar (Intbyte *str, Emchar c)
|
428
|
196 {
|
665
|
197 Intbyte *p;
|
|
198 Intbyte lb;
|
428
|
199 int c1, c2;
|
|
200 Lisp_Object charset;
|
|
201
|
|
202 p = str;
|
|
203 BREAKUP_CHAR (c, charset, c1, c2);
|
|
204 lb = CHAR_LEADING_BYTE (c);
|
|
205 if (LEADING_BYTE_PRIVATE_P (lb))
|
|
206 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
|
|
207 *p++ = lb;
|
|
208 if (EQ (charset, Vcharset_control_1))
|
|
209 c1 += 0x20;
|
|
210 *p++ = c1 | 0x80;
|
|
211 if (c2)
|
|
212 *p++ = c2 | 0x80;
|
|
213
|
|
214 return (p - str);
|
|
215 }
|
|
216
|
|
217 /* Return the first character from a Mule-encoded string in STR,
|
|
218 assuming it's non-ASCII. Do not call this directly.
|
|
219 Use the macro charptr_emchar() instead. */
|
|
220
|
|
221 Emchar
|
665
|
222 non_ascii_charptr_emchar (const Intbyte *str)
|
428
|
223 {
|
665
|
224 Intbyte i0 = *str, i1, i2 = 0;
|
428
|
225 Lisp_Object charset;
|
|
226
|
|
227 if (i0 == LEADING_BYTE_CONTROL_1)
|
|
228 return (Emchar) (*++str - 0x20);
|
|
229
|
|
230 if (LEADING_BYTE_PREFIX_P (i0))
|
|
231 i0 = *++str;
|
|
232
|
|
233 i1 = *++str & 0x7F;
|
|
234
|
|
235 charset = CHARSET_BY_LEADING_BYTE (i0);
|
|
236 if (XCHARSET_DIMENSION (charset) == 2)
|
|
237 i2 = *++str & 0x7F;
|
|
238
|
|
239 return MAKE_CHAR (charset, i1, i2);
|
|
240 }
|
|
241
|
|
242 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
|
|
243 Do not call this directly. Use the macro valid_char_p() instead. */
|
|
244
|
|
245 int
|
|
246 non_ascii_valid_char_p (Emchar ch)
|
|
247 {
|
|
248 int f1, f2, f3;
|
|
249
|
|
250 /* Must have only lowest 19 bits set */
|
|
251 if (ch & ~0x7FFFF)
|
|
252 return 0;
|
|
253
|
|
254 f1 = CHAR_FIELD1 (ch);
|
|
255 f2 = CHAR_FIELD2 (ch);
|
|
256 f3 = CHAR_FIELD3 (ch);
|
|
257
|
|
258 if (f1 == 0)
|
|
259 {
|
|
260 Lisp_Object charset;
|
|
261
|
|
262 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
|
|
263 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
|
|
264 f2 > MAX_CHAR_FIELD2_PRIVATE)
|
|
265 return 0;
|
|
266 if (f3 < 0x20)
|
|
267 return 0;
|
|
268
|
|
269 if (f3 != 0x20 && f3 != 0x7F && !(f2 >= MIN_CHAR_FIELD2_PRIVATE &&
|
|
270 f2 <= MAX_CHAR_FIELD2_PRIVATE))
|
|
271 return 1;
|
|
272
|
|
273 /*
|
|
274 NOTE: This takes advantage of the fact that
|
|
275 FIELD2_TO_OFFICIAL_LEADING_BYTE and
|
|
276 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
|
|
277 */
|
|
278 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
|
|
279 if (EQ (charset, Qnil))
|
|
280 return 0;
|
|
281 return (XCHARSET_CHARS (charset) == 96);
|
|
282 }
|
|
283 else
|
|
284 {
|
|
285 Lisp_Object charset;
|
|
286
|
|
287 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
|
|
288 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
|
|
289 f1 > MAX_CHAR_FIELD1_PRIVATE)
|
|
290 return 0;
|
|
291 if (f2 < 0x20 || f3 < 0x20)
|
|
292 return 0;
|
|
293
|
|
294 #ifdef ENABLE_COMPOSITE_CHARS
|
|
295 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
|
|
296 {
|
|
297 if (UNBOUNDP (Fgethash (make_int (ch),
|
|
298 Vcomposite_char_char2string_hash_table,
|
|
299 Qunbound)))
|
|
300 return 0;
|
|
301 return 1;
|
|
302 }
|
|
303 #endif /* ENABLE_COMPOSITE_CHARS */
|
|
304
|
|
305 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F
|
|
306 && !(f1 >= MIN_CHAR_FIELD1_PRIVATE && f1 <= MAX_CHAR_FIELD1_PRIVATE))
|
|
307 return 1;
|
|
308
|
|
309 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
|
|
310 charset =
|
|
311 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
|
|
312 else
|
|
313 charset =
|
|
314 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
|
|
315
|
|
316 if (EQ (charset, Qnil))
|
|
317 return 0;
|
|
318 return (XCHARSET_CHARS (charset) == 96);
|
|
319 }
|
|
320 }
|
|
321
|
|
322
|
|
323 /************************************************************************/
|
|
324 /* Basic string functions */
|
|
325 /************************************************************************/
|
|
326
|
444
|
327 /* Copy the character pointed to by SRC into DST. Do not call this
|
|
328 directly. Use the macro charptr_copy_char() instead.
|
|
329 Return the number of bytes copied. */
|
428
|
330
|
|
331 Bytecount
|
665
|
332 non_ascii_charptr_copy_char (const Intbyte *src, Intbyte *dst)
|
428
|
333 {
|
647
|
334 Bytecount bytes = REP_BYTES_BY_FIRST_BYTE (*src);
|
|
335 Bytecount i;
|
444
|
336 for (i = bytes; i; i--, dst++, src++)
|
|
337 *dst = *src;
|
|
338 return bytes;
|
428
|
339 }
|
|
340
|
|
341
|
|
342 /************************************************************************/
|
|
343 /* streams of Emchars */
|
|
344 /************************************************************************/
|
|
345
|
|
346 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
|
|
347 The functions below are not meant to be called directly; use
|
|
348 the macros in insdel.h. */
|
|
349
|
|
350 Emchar
|
|
351 Lstream_get_emchar_1 (Lstream *stream, int ch)
|
|
352 {
|
665
|
353 Intbyte str[MAX_EMCHAR_LEN];
|
|
354 Intbyte *strptr = str;
|
647
|
355 Bytecount bytes;
|
428
|
356
|
665
|
357 str[0] = (Intbyte) ch;
|
444
|
358
|
|
359 for (bytes = REP_BYTES_BY_FIRST_BYTE (ch) - 1; bytes; bytes--)
|
428
|
360 {
|
444
|
361 int c = Lstream_getc (stream);
|
665
|
362 charbpos_checking_assert (c >= 0);
|
|
363 *++strptr = (Intbyte) c;
|
428
|
364 }
|
|
365 return charptr_emchar (str);
|
|
366 }
|
|
367
|
|
368 int
|
|
369 Lstream_fput_emchar (Lstream *stream, Emchar ch)
|
|
370 {
|
665
|
371 Intbyte str[MAX_EMCHAR_LEN];
|
428
|
372 Bytecount len = set_charptr_emchar (str, ch);
|
|
373 return Lstream_write (stream, str, len);
|
|
374 }
|
|
375
|
|
376 void
|
|
377 Lstream_funget_emchar (Lstream *stream, Emchar ch)
|
|
378 {
|
665
|
379 Intbyte str[MAX_EMCHAR_LEN];
|
428
|
380 Bytecount len = set_charptr_emchar (str, ch);
|
|
381 Lstream_unread (stream, str, len);
|
|
382 }
|
|
383
|
|
384
|
|
385 /************************************************************************/
|
|
386 /* charset object */
|
|
387 /************************************************************************/
|
|
388
|
|
389 static Lisp_Object
|
|
390 mark_charset (Lisp_Object obj)
|
|
391 {
|
440
|
392 Lisp_Charset *cs = XCHARSET (obj);
|
428
|
393
|
|
394 mark_object (cs->short_name);
|
|
395 mark_object (cs->long_name);
|
|
396 mark_object (cs->doc_string);
|
|
397 mark_object (cs->registry);
|
|
398 mark_object (cs->ccl_program);
|
|
399 return cs->name;
|
|
400 }
|
|
401
|
|
402 static void
|
|
403 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
|
|
404 {
|
440
|
405 Lisp_Charset *cs = XCHARSET (obj);
|
428
|
406 char buf[200];
|
|
407
|
|
408 if (print_readably)
|
563
|
409 printing_unreadable_object ("#<charset %s 0x%x>",
|
|
410 string_data (XSYMBOL (CHARSET_NAME (cs))->
|
|
411 name),
|
|
412 cs->header.uid);
|
428
|
413
|
|
414 write_c_string ("#<charset ", printcharfun);
|
|
415 print_internal (CHARSET_NAME (cs), printcharfun, 0);
|
|
416 write_c_string (" ", printcharfun);
|
|
417 print_internal (CHARSET_SHORT_NAME (cs), printcharfun, 1);
|
|
418 write_c_string (" ", printcharfun);
|
|
419 print_internal (CHARSET_LONG_NAME (cs), printcharfun, 1);
|
|
420 write_c_string (" ", printcharfun);
|
|
421 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
|
|
422 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
|
|
423 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
|
|
424 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
|
|
425 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
|
|
426 "96x96",
|
|
427 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
|
|
428 CHARSET_COLUMNS (cs),
|
|
429 CHARSET_GRAPHIC (cs),
|
|
430 CHARSET_FINAL (cs));
|
|
431 write_c_string (buf, printcharfun);
|
|
432 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
|
|
433 sprintf (buf, " 0x%x>", cs->header.uid);
|
|
434 write_c_string (buf, printcharfun);
|
|
435 }
|
|
436
|
|
437 static const struct lrecord_description charset_description[] = {
|
440
|
438 { XD_LISP_OBJECT, offsetof (Lisp_Charset, name) },
|
|
439 { XD_LISP_OBJECT, offsetof (Lisp_Charset, doc_string) },
|
|
440 { XD_LISP_OBJECT, offsetof (Lisp_Charset, registry) },
|
|
441 { XD_LISP_OBJECT, offsetof (Lisp_Charset, short_name) },
|
|
442 { XD_LISP_OBJECT, offsetof (Lisp_Charset, long_name) },
|
|
443 { XD_LISP_OBJECT, offsetof (Lisp_Charset, reverse_direction_charset) },
|
|
444 { XD_LISP_OBJECT, offsetof (Lisp_Charset, ccl_program) },
|
428
|
445 { XD_END }
|
|
446 };
|
|
447
|
|
448 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
|
|
449 mark_charset, print_charset, 0, 0, 0, charset_description,
|
440
|
450 Lisp_Charset);
|
446
|
451
|
428
|
452 /* Make a new charset. */
|
446
|
453 /* #### SJT Should generic properties be allowed? */
|
428
|
454 static Lisp_Object
|
|
455 make_charset (int id, Lisp_Object name, unsigned char rep_bytes,
|
|
456 unsigned char type, unsigned char columns, unsigned char graphic,
|
665
|
457 Intbyte final, unsigned char direction, Lisp_Object short_name,
|
428
|
458 Lisp_Object long_name, Lisp_Object doc,
|
|
459 Lisp_Object reg)
|
|
460 {
|
|
461 Lisp_Object obj;
|
440
|
462 Lisp_Charset *cs = alloc_lcrecord_type (Lisp_Charset, &lrecord_charset);
|
|
463
|
|
464 zero_lcrecord (cs);
|
|
465
|
428
|
466 XSETCHARSET (obj, cs);
|
|
467
|
|
468 CHARSET_ID (cs) = id;
|
|
469 CHARSET_NAME (cs) = name;
|
|
470 CHARSET_SHORT_NAME (cs) = short_name;
|
|
471 CHARSET_LONG_NAME (cs) = long_name;
|
|
472 CHARSET_REP_BYTES (cs) = rep_bytes;
|
|
473 CHARSET_DIRECTION (cs) = direction;
|
|
474 CHARSET_TYPE (cs) = type;
|
|
475 CHARSET_COLUMNS (cs) = columns;
|
|
476 CHARSET_GRAPHIC (cs) = graphic;
|
|
477 CHARSET_FINAL (cs) = final;
|
|
478 CHARSET_DOC_STRING (cs) = doc;
|
|
479 CHARSET_REGISTRY (cs) = reg;
|
|
480 CHARSET_CCL_PROGRAM (cs) = Qnil;
|
|
481 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
|
|
482
|
|
483 CHARSET_DIMENSION (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
|
|
484 CHARSET_TYPE (cs) == CHARSET_TYPE_96) ? 1 : 2;
|
|
485 CHARSET_CHARS (cs) = (CHARSET_TYPE (cs) == CHARSET_TYPE_94 ||
|
|
486 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94) ? 94 : 96;
|
|
487
|
|
488 if (final)
|
|
489 {
|
|
490 /* some charsets do not have final characters. This includes
|
|
491 ASCII, Control-1, Composite, and the two faux private
|
|
492 charsets. */
|
|
493 assert (NILP (chlook->charset_by_attributes[type][final][direction]));
|
|
494 chlook->charset_by_attributes[type][final][direction] = obj;
|
|
495 }
|
|
496
|
|
497 assert (NILP (chlook->charset_by_leading_byte[id - 128]));
|
|
498 chlook->charset_by_leading_byte[id - 128] = obj;
|
|
499
|
|
500 /* Some charsets are "faux" and don't have names or really exist at
|
|
501 all except in the leading-byte table. */
|
|
502 if (!NILP (name))
|
|
503 Fputhash (name, obj, Vcharset_hash_table);
|
|
504 return obj;
|
|
505 }
|
|
506
|
|
507 static int
|
|
508 get_unallocated_leading_byte (int dimension)
|
|
509 {
|
|
510 int lb;
|
|
511
|
|
512 if (dimension == 1)
|
|
513 {
|
442
|
514 if (chlook->next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
|
428
|
515 lb = 0;
|
|
516 else
|
442
|
517 lb = chlook->next_allocated_1_byte_leading_byte++;
|
428
|
518 }
|
|
519 else
|
|
520 {
|
442
|
521 if (chlook->next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
|
428
|
522 lb = 0;
|
|
523 else
|
442
|
524 lb = chlook->next_allocated_2_byte_leading_byte++;
|
428
|
525 }
|
|
526
|
|
527 if (!lb)
|
563
|
528 invalid_operation
|
428
|
529 ("No more character sets free for this dimension",
|
|
530 make_int (dimension));
|
|
531
|
|
532 return lb;
|
|
533 }
|
|
534
|
|
535
|
|
536 /************************************************************************/
|
|
537 /* Basic charset Lisp functions */
|
|
538 /************************************************************************/
|
|
539
|
|
540 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
|
|
541 Return non-nil if OBJECT is a charset.
|
|
542 */
|
|
543 (object))
|
|
544 {
|
|
545 return CHARSETP (object) ? Qt : Qnil;
|
|
546 }
|
|
547
|
|
548 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
|
|
549 Retrieve the charset of the given name.
|
|
550 If CHARSET-OR-NAME is a charset object, it is simply returned.
|
|
551 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
|
|
552 nil is returned. Otherwise the associated charset object is returned.
|
|
553 */
|
|
554 (charset_or_name))
|
|
555 {
|
|
556 if (CHARSETP (charset_or_name))
|
|
557 return charset_or_name;
|
|
558
|
|
559 CHECK_SYMBOL (charset_or_name);
|
|
560 return Fgethash (charset_or_name, Vcharset_hash_table, Qnil);
|
|
561 }
|
|
562
|
|
563 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
|
|
564 Retrieve the charset of the given name.
|
|
565 Same as `find-charset' except an error is signalled if there is no such
|
|
566 charset instead of returning nil.
|
|
567 */
|
|
568 (name))
|
|
569 {
|
|
570 Lisp_Object charset = Ffind_charset (name);
|
|
571
|
|
572 if (NILP (charset))
|
563
|
573 invalid_argument ("No such charset", name);
|
428
|
574 return charset;
|
|
575 }
|
|
576
|
|
577 /* We store the charsets in hash tables with the names as the key and the
|
|
578 actual charset object as the value. Occasionally we need to use them
|
|
579 in a list format. These routines provide us with that. */
|
|
580 struct charset_list_closure
|
|
581 {
|
|
582 Lisp_Object *charset_list;
|
|
583 };
|
|
584
|
|
585 static int
|
|
586 add_charset_to_list_mapper (Lisp_Object key, Lisp_Object value,
|
|
587 void *charset_list_closure)
|
|
588 {
|
|
589 /* This function can GC */
|
|
590 struct charset_list_closure *chcl =
|
|
591 (struct charset_list_closure*) charset_list_closure;
|
|
592 Lisp_Object *charset_list = chcl->charset_list;
|
|
593
|
|
594 *charset_list = Fcons (XCHARSET_NAME (value), *charset_list);
|
|
595 return 0;
|
|
596 }
|
|
597
|
|
598 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
|
|
599 Return a list of the names of all defined charsets.
|
|
600 */
|
|
601 ())
|
|
602 {
|
|
603 Lisp_Object charset_list = Qnil;
|
|
604 struct gcpro gcpro1;
|
|
605 struct charset_list_closure charset_list_closure;
|
|
606
|
|
607 GCPRO1 (charset_list);
|
|
608 charset_list_closure.charset_list = &charset_list;
|
|
609 elisp_maphash (add_charset_to_list_mapper, Vcharset_hash_table,
|
|
610 &charset_list_closure);
|
|
611 UNGCPRO;
|
|
612
|
|
613 return charset_list;
|
|
614 }
|
|
615
|
|
616 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
|
444
|
617 Return the name of charset CHARSET.
|
428
|
618 */
|
|
619 (charset))
|
|
620 {
|
|
621 return XCHARSET_NAME (Fget_charset (charset));
|
|
622 }
|
|
623
|
446
|
624 /* #### SJT Should generic properties be allowed? */
|
428
|
625 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
|
|
626 Define a new character set.
|
|
627 This function is for use with Mule support.
|
|
628 NAME is a symbol, the name by which the character set is normally referred.
|
|
629 DOC-STRING is a string describing the character set.
|
|
630 PROPS is a property list, describing the specific nature of the
|
|
631 character set. Recognized properties are:
|
|
632
|
|
633 'short-name Short version of the charset name (ex: Latin-1)
|
|
634 'long-name Long version of the charset name (ex: ISO8859-1 (Latin-1))
|
|
635 'registry A regular expression matching the font registry field for
|
|
636 this character set.
|
|
637 'dimension Number of octets used to index a character in this charset.
|
|
638 Either 1 or 2. Defaults to 1.
|
|
639 'columns Number of columns used to display a character in this charset.
|
|
640 Only used in TTY mode. (Under X, the actual width of a
|
|
641 character can be derived from the font used to display the
|
|
642 characters.) If unspecified, defaults to the dimension
|
|
643 (this is almost always the correct value).
|
|
644 'chars Number of characters in each dimension (94 or 96).
|
|
645 Defaults to 94. Note that if the dimension is 2, the
|
|
646 character set thus described is 94x94 or 96x96.
|
|
647 'final Final byte of ISO 2022 escape sequence. Must be
|
|
648 supplied. Each combination of (DIMENSION, CHARS) defines a
|
|
649 separate namespace for final bytes. Note that ISO
|
|
650 2022 restricts the final byte to the range
|
|
651 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
|
|
652 dimension == 2. Note also that final bytes in the range
|
|
653 0x30 - 0x3F are reserved for user-defined (not official)
|
|
654 character sets.
|
|
655 'graphic 0 (use left half of font on output) or 1 (use right half
|
|
656 of font on output). Defaults to 0. For example, for
|
|
657 a font whose registry is ISO8859-1, the left half
|
|
658 (octets 0x20 - 0x7F) is the `ascii' character set, while
|
|
659 the right half (octets 0xA0 - 0xFF) is the `latin-1'
|
|
660 character set. With 'graphic set to 0, the octets
|
|
661 will have their high bit cleared; with it set to 1,
|
|
662 the octets will have their high bit set.
|
|
663 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
|
|
664 Defaults to 'l2r.
|
|
665 'ccl-program A compiled CCL program used to convert a character in
|
|
666 this charset into an index into the font. This is in
|
|
667 addition to the 'graphic property. The CCL program
|
|
668 is passed the octets of the character, with the high
|
|
669 bit cleared and set depending upon whether the value
|
|
670 of the 'graphic property is 0 or 1.
|
|
671 */
|
|
672 (name, doc_string, props))
|
|
673 {
|
|
674 int id, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
|
|
675 int direction = CHARSET_LEFT_TO_RIGHT;
|
|
676 int type;
|
|
677 Lisp_Object registry = Qnil;
|
|
678 Lisp_Object charset;
|
|
679 Lisp_Object ccl_program = Qnil;
|
|
680 Lisp_Object short_name = Qnil, long_name = Qnil;
|
|
681
|
|
682 CHECK_SYMBOL (name);
|
|
683 if (!NILP (doc_string))
|
|
684 CHECK_STRING (doc_string);
|
|
685
|
|
686 charset = Ffind_charset (name);
|
|
687 if (!NILP (charset))
|
563
|
688 invalid_operation ("Cannot redefine existing charset", name);
|
428
|
689
|
442
|
690 {
|
|
691 EXTERNAL_PROPERTY_LIST_LOOP_3 (keyword, value, props)
|
|
692 {
|
|
693 if (EQ (keyword, Qshort_name))
|
|
694 {
|
|
695 CHECK_STRING (value);
|
|
696 short_name = value;
|
|
697 }
|
428
|
698
|
519
|
699 else if (EQ (keyword, Qlong_name))
|
442
|
700 {
|
|
701 CHECK_STRING (value);
|
|
702 long_name = value;
|
|
703 }
|
428
|
704
|
442
|
705 else if (EQ (keyword, Qdimension))
|
|
706 {
|
|
707 CHECK_INT (value);
|
|
708 dimension = XINT (value);
|
|
709 if (dimension < 1 || dimension > 2)
|
563
|
710 invalid_constant ("Invalid value for 'dimension", value);
|
442
|
711 }
|
428
|
712
|
442
|
713 else if (EQ (keyword, Qchars))
|
|
714 {
|
|
715 CHECK_INT (value);
|
|
716 chars = XINT (value);
|
|
717 if (chars != 94 && chars != 96)
|
563
|
718 invalid_constant ("Invalid value for 'chars", value);
|
442
|
719 }
|
428
|
720
|
442
|
721 else if (EQ (keyword, Qcolumns))
|
|
722 {
|
|
723 CHECK_INT (value);
|
|
724 columns = XINT (value);
|
|
725 if (columns != 1 && columns != 2)
|
563
|
726 invalid_constant ("Invalid value for 'columns", value);
|
442
|
727 }
|
428
|
728
|
442
|
729 else if (EQ (keyword, Qgraphic))
|
|
730 {
|
|
731 CHECK_INT (value);
|
|
732 graphic = XINT (value);
|
|
733 if (graphic < 0 || graphic > 1)
|
563
|
734 invalid_constant ("Invalid value for 'graphic", value);
|
442
|
735 }
|
428
|
736
|
442
|
737 else if (EQ (keyword, Qregistry))
|
|
738 {
|
|
739 CHECK_STRING (value);
|
|
740 registry = value;
|
|
741 }
|
428
|
742
|
442
|
743 else if (EQ (keyword, Qdirection))
|
|
744 {
|
|
745 if (EQ (value, Ql2r))
|
|
746 direction = CHARSET_LEFT_TO_RIGHT;
|
|
747 else if (EQ (value, Qr2l))
|
|
748 direction = CHARSET_RIGHT_TO_LEFT;
|
|
749 else
|
563
|
750 invalid_constant ("Invalid value for 'direction", value);
|
442
|
751 }
|
428
|
752
|
442
|
753 else if (EQ (keyword, Qfinal))
|
|
754 {
|
|
755 CHECK_CHAR_COERCE_INT (value);
|
|
756 final = XCHAR (value);
|
|
757 if (final < '0' || final > '~')
|
563
|
758 invalid_constant ("Invalid value for 'final", value);
|
442
|
759 }
|
428
|
760
|
442
|
761 else if (EQ (keyword, Qccl_program))
|
|
762 {
|
444
|
763 struct ccl_program test_ccl;
|
|
764
|
|
765 if (setup_ccl_program (&test_ccl, value) < 0)
|
563
|
766 invalid_argument ("Invalid value for 'ccl-program", value);
|
442
|
767 ccl_program = value;
|
|
768 }
|
428
|
769
|
442
|
770 else
|
563
|
771 invalid_constant ("Unrecognized property", keyword);
|
442
|
772 }
|
|
773 }
|
428
|
774
|
|
775 if (!final)
|
563
|
776 invalid_argument ("'final must be specified", Qunbound);
|
428
|
777 if (dimension == 2 && final > 0x5F)
|
563
|
778 invalid_constant
|
428
|
779 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
|
|
780 make_char (final));
|
|
781
|
|
782 if (dimension == 1)
|
|
783 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
|
|
784 else
|
|
785 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
|
|
786
|
|
787 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
|
|
788 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
|
578
|
789 invalid_argument
|
|
790 ("Character set already defined for this DIMENSION/CHARS/FINAL combo",
|
565
|
791 Qunbound);
|
428
|
792
|
|
793 id = get_unallocated_leading_byte (dimension);
|
|
794
|
|
795 if (NILP (doc_string))
|
|
796 doc_string = build_string ("");
|
|
797
|
|
798 if (NILP (registry))
|
|
799 registry = build_string ("");
|
|
800
|
|
801 if (NILP (short_name))
|
|
802 XSETSTRING (short_name, XSYMBOL (name)->name);
|
|
803
|
|
804 if (NILP (long_name))
|
|
805 long_name = doc_string;
|
|
806
|
|
807 if (columns == -1)
|
|
808 columns = dimension;
|
|
809 charset = make_charset (id, name, dimension + 2, type, columns, graphic,
|
|
810 final, direction, short_name, long_name, doc_string, registry);
|
|
811 if (!NILP (ccl_program))
|
|
812 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
|
|
813 return charset;
|
|
814 }
|
|
815
|
|
816 DEFUN ("make-reverse-direction-charset", Fmake_reverse_direction_charset,
|
|
817 2, 2, 0, /*
|
|
818 Make a charset equivalent to CHARSET but which goes in the opposite direction.
|
|
819 NEW-NAME is the name of the new charset. Return the new charset.
|
|
820 */
|
|
821 (charset, new_name))
|
|
822 {
|
|
823 Lisp_Object new_charset = Qnil;
|
|
824 int id, dimension, columns, graphic, final;
|
|
825 int direction, type;
|
|
826 Lisp_Object registry, doc_string, short_name, long_name;
|
440
|
827 Lisp_Charset *cs;
|
428
|
828
|
|
829 charset = Fget_charset (charset);
|
|
830 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
|
563
|
831 invalid_operation ("Charset already has reverse-direction charset",
|
428
|
832 charset);
|
|
833
|
|
834 CHECK_SYMBOL (new_name);
|
|
835 if (!NILP (Ffind_charset (new_name)))
|
563
|
836 invalid_operation ("Cannot redefine existing charset", new_name);
|
428
|
837
|
|
838 cs = XCHARSET (charset);
|
|
839
|
|
840 type = CHARSET_TYPE (cs);
|
|
841 columns = CHARSET_COLUMNS (cs);
|
|
842 dimension = CHARSET_DIMENSION (cs);
|
|
843 id = get_unallocated_leading_byte (dimension);
|
|
844
|
|
845 graphic = CHARSET_GRAPHIC (cs);
|
|
846 final = CHARSET_FINAL (cs);
|
|
847 direction = CHARSET_RIGHT_TO_LEFT;
|
|
848 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
|
|
849 direction = CHARSET_LEFT_TO_RIGHT;
|
|
850 doc_string = CHARSET_DOC_STRING (cs);
|
|
851 short_name = CHARSET_SHORT_NAME (cs);
|
|
852 long_name = CHARSET_LONG_NAME (cs);
|
|
853 registry = CHARSET_REGISTRY (cs);
|
|
854
|
|
855 new_charset = make_charset (id, new_name, dimension + 2, type, columns,
|
|
856 graphic, final, direction, short_name, long_name,
|
|
857 doc_string, registry);
|
|
858
|
|
859 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
|
|
860 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
|
|
861
|
|
862 return new_charset;
|
|
863 }
|
|
864
|
|
865 /* #### Reverse direction charsets not yet implemented. */
|
|
866 #if 0
|
|
867 DEFUN ("charset-reverse-direction-charset", Fcharset_reverse_direction_charset,
|
|
868 1, 1, 0, /*
|
|
869 Return the reverse-direction charset parallel to CHARSET, if any.
|
|
870 This is the charset with the same properties (in particular, the same
|
|
871 dimension, number of characters per dimension, and final byte) as
|
|
872 CHARSET but whose characters are displayed in the opposite direction.
|
|
873 */
|
|
874 (charset))
|
|
875 {
|
|
876 charset = Fget_charset (charset);
|
|
877 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
|
|
878 }
|
|
879 #endif
|
|
880
|
|
881 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
|
|
882 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
|
|
883 If DIRECTION is omitted, both directions will be checked (left-to-right
|
|
884 will be returned if character sets exist for both directions).
|
|
885 */
|
|
886 (dimension, chars, final, direction))
|
|
887 {
|
|
888 int dm, ch, fi, di = -1;
|
|
889 int type;
|
|
890 Lisp_Object obj = Qnil;
|
|
891
|
|
892 CHECK_INT (dimension);
|
|
893 dm = XINT (dimension);
|
|
894 if (dm < 1 || dm > 2)
|
563
|
895 invalid_constant ("Invalid value for DIMENSION", dimension);
|
428
|
896
|
|
897 CHECK_INT (chars);
|
|
898 ch = XINT (chars);
|
|
899 if (ch != 94 && ch != 96)
|
563
|
900 invalid_constant ("Invalid value for CHARS", chars);
|
428
|
901
|
|
902 CHECK_CHAR_COERCE_INT (final);
|
|
903 fi = XCHAR (final);
|
|
904 if (fi < '0' || fi > '~')
|
563
|
905 invalid_constant ("Invalid value for FINAL", final);
|
428
|
906
|
|
907 if (EQ (direction, Ql2r))
|
|
908 di = CHARSET_LEFT_TO_RIGHT;
|
|
909 else if (EQ (direction, Qr2l))
|
|
910 di = CHARSET_RIGHT_TO_LEFT;
|
|
911 else if (!NILP (direction))
|
563
|
912 invalid_constant ("Invalid value for DIRECTION", direction);
|
428
|
913
|
|
914 if (dm == 2 && fi > 0x5F)
|
563
|
915 invalid_constant
|
428
|
916 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
|
|
917
|
|
918 if (dm == 1)
|
|
919 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
|
|
920 else
|
|
921 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
|
|
922
|
|
923 if (di == -1)
|
|
924 {
|
|
925 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
|
|
926 if (NILP (obj))
|
|
927 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
|
|
928 }
|
|
929 else
|
|
930 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
|
|
931
|
|
932 if (CHARSETP (obj))
|
|
933 return XCHARSET_NAME (obj);
|
|
934 return obj;
|
|
935 }
|
|
936
|
|
937 DEFUN ("charset-short-name", Fcharset_short_name, 1, 1, 0, /*
|
|
938 Return short name of CHARSET.
|
|
939 */
|
|
940 (charset))
|
|
941 {
|
|
942 return XCHARSET_SHORT_NAME (Fget_charset (charset));
|
|
943 }
|
|
944
|
|
945 DEFUN ("charset-long-name", Fcharset_long_name, 1, 1, 0, /*
|
|
946 Return long name of CHARSET.
|
|
947 */
|
|
948 (charset))
|
|
949 {
|
|
950 return XCHARSET_LONG_NAME (Fget_charset (charset));
|
|
951 }
|
|
952
|
|
953 DEFUN ("charset-description", Fcharset_description, 1, 1, 0, /*
|
|
954 Return description of CHARSET.
|
|
955 */
|
|
956 (charset))
|
|
957 {
|
|
958 return XCHARSET_DOC_STRING (Fget_charset (charset));
|
|
959 }
|
|
960
|
|
961 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
|
|
962 Return dimension of CHARSET.
|
|
963 */
|
|
964 (charset))
|
|
965 {
|
|
966 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
|
|
967 }
|
|
968
|
|
969 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
|
446
|
970 Return property PROP of CHARSET, a charset object or symbol naming a charset.
|
428
|
971 Recognized properties are those listed in `make-charset', as well as
|
|
972 'name and 'doc-string.
|
|
973 */
|
|
974 (charset, prop))
|
|
975 {
|
440
|
976 Lisp_Charset *cs;
|
428
|
977
|
|
978 charset = Fget_charset (charset);
|
|
979 cs = XCHARSET (charset);
|
|
980
|
|
981 CHECK_SYMBOL (prop);
|
|
982 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
|
|
983 if (EQ (prop, Qshort_name)) return CHARSET_SHORT_NAME (cs);
|
|
984 if (EQ (prop, Qlong_name)) return CHARSET_LONG_NAME (cs);
|
|
985 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
|
|
986 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
|
|
987 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
|
|
988 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
|
|
989 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
|
|
990 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
|
|
991 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
|
|
992 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
|
|
993 if (EQ (prop, Qdirection))
|
|
994 return CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l;
|
|
995 if (EQ (prop, Qreverse_direction_charset))
|
|
996 {
|
|
997 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
|
446
|
998 /* #### Is this translation OK? If so, error checking sufficient? */
|
|
999 return CHARSETP (obj) ? XCHARSET_NAME (obj) : obj;
|
428
|
1000 }
|
563
|
1001 invalid_constant ("Unrecognized charset property name", prop);
|
428
|
1002 return Qnil; /* not reached */
|
|
1003 }
|
|
1004
|
|
1005 DEFUN ("charset-id", Fcharset_id, 1, 1, 0, /*
|
|
1006 Return charset identification number of CHARSET.
|
|
1007 */
|
|
1008 (charset))
|
|
1009 {
|
|
1010 return make_int(XCHARSET_LEADING_BYTE (Fget_charset (charset)));
|
|
1011 }
|
|
1012
|
|
1013 /* #### We need to figure out which properties we really want to
|
|
1014 allow to be set. */
|
|
1015
|
|
1016 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
|
|
1017 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
|
|
1018 */
|
|
1019 (charset, ccl_program))
|
|
1020 {
|
444
|
1021 struct ccl_program test_ccl;
|
|
1022
|
428
|
1023 charset = Fget_charset (charset);
|
444
|
1024 if (setup_ccl_program (&test_ccl, ccl_program) < 0)
|
563
|
1025 invalid_argument ("Invalid ccl-program", ccl_program);
|
428
|
1026 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
|
510
|
1027 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
|
428
|
1028 return Qnil;
|
|
1029 }
|
|
1030
|
|
1031 static void
|
|
1032 invalidate_charset_font_caches (Lisp_Object charset)
|
|
1033 {
|
|
1034 /* Invalidate font cache entries for charset on all devices. */
|
|
1035 Lisp_Object devcons, concons, hash_table;
|
|
1036 DEVICE_LOOP_NO_BREAK (devcons, concons)
|
|
1037 {
|
|
1038 struct device *d = XDEVICE (XCAR (devcons));
|
|
1039 hash_table = Fgethash (charset, d->charset_font_cache, Qunbound);
|
|
1040 if (!UNBOUNDP (hash_table))
|
|
1041 Fclrhash (hash_table);
|
|
1042 }
|
|
1043 }
|
|
1044
|
|
1045 /* Japanese folks may want to (set-charset-registry 'ascii "jisx0201") */
|
|
1046 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
|
|
1047 Set the 'registry property of CHARSET to REGISTRY.
|
|
1048 */
|
|
1049 (charset, registry))
|
|
1050 {
|
|
1051 charset = Fget_charset (charset);
|
|
1052 CHECK_STRING (registry);
|
|
1053 XCHARSET_REGISTRY (charset) = registry;
|
|
1054 invalidate_charset_font_caches (charset);
|
|
1055 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
|
|
1056 return Qnil;
|
|
1057 }
|
|
1058
|
|
1059
|
|
1060 /************************************************************************/
|
|
1061 /* Lisp primitives for working with characters */
|
|
1062 /************************************************************************/
|
|
1063
|
|
1064 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
|
|
1065 Make a character from CHARSET and octets ARG1 and ARG2.
|
|
1066 ARG2 is required only for characters from two-dimensional charsets.
|
|
1067 For example, (make-char 'latin-iso8859-2 185) will return the Latin 2
|
|
1068 character s with caron.
|
|
1069 */
|
|
1070 (charset, arg1, arg2))
|
|
1071 {
|
440
|
1072 Lisp_Charset *cs;
|
428
|
1073 int a1, a2;
|
|
1074 int lowlim, highlim;
|
|
1075
|
|
1076 charset = Fget_charset (charset);
|
|
1077 cs = XCHARSET (charset);
|
|
1078
|
|
1079 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
|
|
1080 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
|
|
1081 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
|
|
1082 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
|
|
1083
|
|
1084 CHECK_INT (arg1);
|
|
1085 /* It is useful (and safe, according to Olivier Galibert) to strip
|
442
|
1086 the 8th bit off ARG1 and ARG2 because it allows programmers to
|
428
|
1087 write (make-char 'latin-iso8859-2 CODE) where code is the actual
|
|
1088 Latin 2 code of the character. */
|
|
1089 a1 = XINT (arg1) & 0x7f;
|
|
1090 if (a1 < lowlim || a1 > highlim)
|
|
1091 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
|
|
1092
|
|
1093 if (CHARSET_DIMENSION (cs) == 1)
|
|
1094 {
|
|
1095 if (!NILP (arg2))
|
563
|
1096 invalid_argument
|
428
|
1097 ("Charset is of dimension one; second octet must be nil", arg2);
|
|
1098 return make_char (MAKE_CHAR (charset, a1, 0));
|
|
1099 }
|
|
1100
|
|
1101 CHECK_INT (arg2);
|
|
1102 a2 = XINT (arg2) & 0x7f;
|
|
1103 if (a2 < lowlim || a2 > highlim)
|
|
1104 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
|
|
1105
|
|
1106 return make_char (MAKE_CHAR (charset, a1, a2));
|
|
1107 }
|
|
1108
|
|
1109 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
|
444
|
1110 Return the character set of CHARACTER.
|
428
|
1111 */
|
444
|
1112 (character))
|
428
|
1113 {
|
444
|
1114 CHECK_CHAR_COERCE_INT (character);
|
428
|
1115
|
|
1116 return XCHARSET_NAME (CHARSET_BY_LEADING_BYTE
|
444
|
1117 (CHAR_LEADING_BYTE (XCHAR (character))));
|
428
|
1118 }
|
|
1119
|
438
|
1120 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
|
444
|
1121 Return the octet numbered N (should be 0 or 1) of CHARACTER.
|
438
|
1122 N defaults to 0 if omitted.
|
|
1123 */
|
444
|
1124 (character, n))
|
438
|
1125 {
|
|
1126 Lisp_Object charset;
|
|
1127 int octet0, octet1;
|
|
1128
|
444
|
1129 CHECK_CHAR_COERCE_INT (character);
|
438
|
1130
|
444
|
1131 BREAKUP_CHAR (XCHAR (character), charset, octet0, octet1);
|
438
|
1132
|
|
1133 if (NILP (n) || EQ (n, Qzero))
|
|
1134 return make_int (octet0);
|
|
1135 else if (EQ (n, make_int (1)))
|
|
1136 return make_int (octet1);
|
|
1137 else
|
563
|
1138 invalid_constant ("Octet number must be 0 or 1", n);
|
438
|
1139 }
|
|
1140
|
428
|
1141 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
|
444
|
1142 Return list of charset and one or two position-codes of CHARACTER.
|
428
|
1143 */
|
|
1144 (character))
|
|
1145 {
|
|
1146 /* This function can GC */
|
|
1147 struct gcpro gcpro1, gcpro2;
|
|
1148 Lisp_Object charset = Qnil;
|
|
1149 Lisp_Object rc = Qnil;
|
|
1150 int c1, c2;
|
|
1151
|
|
1152 GCPRO2 (charset, rc);
|
|
1153 CHECK_CHAR_COERCE_INT (character);
|
|
1154
|
|
1155 BREAKUP_CHAR (XCHAR (character), charset, c1, c2);
|
|
1156
|
|
1157 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2)
|
|
1158 {
|
|
1159 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
|
|
1160 }
|
|
1161 else
|
|
1162 {
|
|
1163 rc = list2 (XCHARSET_NAME (charset), make_int (c1));
|
|
1164 }
|
|
1165 UNGCPRO;
|
|
1166
|
|
1167 return rc;
|
|
1168 }
|
|
1169
|
|
1170
|
|
1171 #ifdef ENABLE_COMPOSITE_CHARS
|
|
1172 /************************************************************************/
|
|
1173 /* composite character functions */
|
|
1174 /************************************************************************/
|
|
1175
|
|
1176 Emchar
|
665
|
1177 lookup_composite_char (Intbyte *str, int len)
|
428
|
1178 {
|
|
1179 Lisp_Object lispstr = make_string (str, len);
|
|
1180 Lisp_Object ch = Fgethash (lispstr,
|
|
1181 Vcomposite_char_string2char_hash_table,
|
|
1182 Qunbound);
|
|
1183 Emchar emch;
|
|
1184
|
|
1185 if (UNBOUNDP (ch))
|
|
1186 {
|
|
1187 if (composite_char_row_next >= 128)
|
563
|
1188 invalid_operation ("No more composite chars available", lispstr);
|
428
|
1189 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
|
|
1190 composite_char_col_next);
|
|
1191 Fputhash (make_char (emch), lispstr,
|
|
1192 Vcomposite_char_char2string_hash_table);
|
|
1193 Fputhash (lispstr, make_char (emch),
|
|
1194 Vcomposite_char_string2char_hash_table);
|
|
1195 composite_char_col_next++;
|
|
1196 if (composite_char_col_next >= 128)
|
|
1197 {
|
|
1198 composite_char_col_next = 32;
|
|
1199 composite_char_row_next++;
|
|
1200 }
|
|
1201 }
|
|
1202 else
|
|
1203 emch = XCHAR (ch);
|
|
1204 return emch;
|
|
1205 }
|
|
1206
|
|
1207 Lisp_Object
|
|
1208 composite_char_string (Emchar ch)
|
|
1209 {
|
|
1210 Lisp_Object str = Fgethash (make_char (ch),
|
|
1211 Vcomposite_char_char2string_hash_table,
|
|
1212 Qunbound);
|
|
1213 assert (!UNBOUNDP (str));
|
|
1214 return str;
|
|
1215 }
|
|
1216
|
|
1217 xxDEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
|
|
1218 Convert a string into a single composite character.
|
|
1219 The character is the result of overstriking all the characters in
|
|
1220 the string.
|
|
1221 */
|
|
1222 (string))
|
|
1223 {
|
|
1224 CHECK_STRING (string);
|
|
1225 return make_char (lookup_composite_char (XSTRING_DATA (string),
|
|
1226 XSTRING_LENGTH (string)));
|
|
1227 }
|
|
1228
|
|
1229 xxDEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
|
|
1230 Return a string of the characters comprising a composite character.
|
|
1231 */
|
|
1232 (ch))
|
|
1233 {
|
|
1234 Emchar emch;
|
|
1235
|
|
1236 CHECK_CHAR (ch);
|
|
1237 emch = XCHAR (ch);
|
|
1238 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
|
563
|
1239 invalid_argument ("Must be composite char", ch);
|
428
|
1240 return composite_char_string (emch);
|
|
1241 }
|
|
1242 #endif /* ENABLE_COMPOSITE_CHARS */
|
|
1243
|
|
1244
|
|
1245 /************************************************************************/
|
|
1246 /* initialization */
|
|
1247 /************************************************************************/
|
|
1248
|
|
1249 void
|
|
1250 syms_of_mule_charset (void)
|
|
1251 {
|
442
|
1252 INIT_LRECORD_IMPLEMENTATION (charset);
|
|
1253
|
428
|
1254 DEFSUBR (Fcharsetp);
|
|
1255 DEFSUBR (Ffind_charset);
|
|
1256 DEFSUBR (Fget_charset);
|
|
1257 DEFSUBR (Fcharset_list);
|
|
1258 DEFSUBR (Fcharset_name);
|
|
1259 DEFSUBR (Fmake_charset);
|
|
1260 DEFSUBR (Fmake_reverse_direction_charset);
|
|
1261 /* DEFSUBR (Freverse_direction_charset); */
|
|
1262 DEFSUBR (Fcharset_from_attributes);
|
|
1263 DEFSUBR (Fcharset_short_name);
|
|
1264 DEFSUBR (Fcharset_long_name);
|
|
1265 DEFSUBR (Fcharset_description);
|
|
1266 DEFSUBR (Fcharset_dimension);
|
|
1267 DEFSUBR (Fcharset_property);
|
|
1268 DEFSUBR (Fcharset_id);
|
|
1269 DEFSUBR (Fset_charset_ccl_program);
|
|
1270 DEFSUBR (Fset_charset_registry);
|
|
1271
|
|
1272 DEFSUBR (Fmake_char);
|
|
1273 DEFSUBR (Fchar_charset);
|
438
|
1274 DEFSUBR (Fchar_octet);
|
428
|
1275 DEFSUBR (Fsplit_char);
|
|
1276
|
|
1277 #ifdef ENABLE_COMPOSITE_CHARS
|
|
1278 DEFSUBR (Fmake_composite_char);
|
|
1279 DEFSUBR (Fcomposite_char_string);
|
|
1280 #endif
|
|
1281
|
563
|
1282 DEFSYMBOL (Qcharsetp);
|
|
1283 DEFSYMBOL (Qregistry);
|
|
1284 DEFSYMBOL (Qfinal);
|
|
1285 DEFSYMBOL (Qgraphic);
|
|
1286 DEFSYMBOL (Qdirection);
|
|
1287 DEFSYMBOL (Qreverse_direction_charset);
|
|
1288 DEFSYMBOL (Qshort_name);
|
|
1289 DEFSYMBOL (Qlong_name);
|
428
|
1290
|
563
|
1291 DEFSYMBOL (Ql2r);
|
|
1292 DEFSYMBOL (Qr2l);
|
428
|
1293
|
|
1294 /* Charsets, compatible with FSF 20.3
|
|
1295 Naming convention is Script-Charset[-Edition] */
|
563
|
1296 DEFSYMBOL (Qascii);
|
|
1297 DEFSYMBOL (Qcontrol_1);
|
|
1298 DEFSYMBOL (Qlatin_iso8859_1);
|
|
1299 DEFSYMBOL (Qlatin_iso8859_2);
|
|
1300 DEFSYMBOL (Qlatin_iso8859_3);
|
|
1301 DEFSYMBOL (Qlatin_iso8859_4);
|
|
1302 DEFSYMBOL (Qthai_tis620);
|
|
1303 DEFSYMBOL (Qgreek_iso8859_7);
|
|
1304 DEFSYMBOL (Qarabic_iso8859_6);
|
|
1305 DEFSYMBOL (Qhebrew_iso8859_8);
|
|
1306 DEFSYMBOL (Qkatakana_jisx0201);
|
|
1307 DEFSYMBOL (Qlatin_jisx0201);
|
|
1308 DEFSYMBOL (Qcyrillic_iso8859_5);
|
|
1309 DEFSYMBOL (Qlatin_iso8859_9);
|
728
|
1310 DEFSYMBOL (Qlatin_iso8859_15);
|
563
|
1311 DEFSYMBOL (Qjapanese_jisx0208_1978);
|
|
1312 DEFSYMBOL (Qchinese_gb2312);
|
|
1313 DEFSYMBOL (Qjapanese_jisx0208);
|
|
1314 DEFSYMBOL (Qkorean_ksc5601);
|
|
1315 DEFSYMBOL (Qjapanese_jisx0212);
|
|
1316 DEFSYMBOL (Qchinese_cns11643_1);
|
|
1317 DEFSYMBOL (Qchinese_cns11643_2);
|
|
1318 DEFSYMBOL (Qchinese_big5_1);
|
|
1319 DEFSYMBOL (Qchinese_big5_2);
|
428
|
1320
|
563
|
1321 DEFSYMBOL (Qcomposite);
|
428
|
1322 }
|
|
1323
|
|
1324 void
|
|
1325 vars_of_mule_charset (void)
|
|
1326 {
|
|
1327 int i, j, k;
|
|
1328
|
452
|
1329 chlook = xnew_and_zero (struct charset_lookup); /* zero for Purify. */
|
|
1330 dump_add_root_struct_ptr (&chlook, &charset_lookup_description);
|
428
|
1331
|
|
1332 /* Table of charsets indexed by leading byte. */
|
|
1333 for (i = 0; i < countof (chlook->charset_by_leading_byte); i++)
|
|
1334 chlook->charset_by_leading_byte[i] = Qnil;
|
|
1335
|
|
1336 /* Table of charsets indexed by type/final-byte/direction. */
|
|
1337 for (i = 0; i < countof (chlook->charset_by_attributes); i++)
|
|
1338 for (j = 0; j < countof (chlook->charset_by_attributes[0]); j++)
|
|
1339 for (k = 0; k < countof (chlook->charset_by_attributes[0][0]); k++)
|
|
1340 chlook->charset_by_attributes[i][j][k] = Qnil;
|
|
1341
|
442
|
1342 chlook->next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
|
|
1343 chlook->next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
|
428
|
1344 }
|
|
1345
|
|
1346 void
|
|
1347 complex_vars_of_mule_charset (void)
|
|
1348 {
|
|
1349 staticpro (&Vcharset_hash_table);
|
|
1350 Vcharset_hash_table =
|
|
1351 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
|
|
1352
|
|
1353 /* Predefined character sets. We store them into variables for
|
|
1354 ease of access. */
|
|
1355
|
|
1356 staticpro (&Vcharset_ascii);
|
|
1357 Vcharset_ascii =
|
|
1358 make_charset (LEADING_BYTE_ASCII, Qascii, 1,
|
|
1359 CHARSET_TYPE_94, 1, 0, 'B',
|
|
1360 CHARSET_LEFT_TO_RIGHT,
|
|
1361 build_string ("ASCII"),
|
|
1362 build_string ("ASCII)"),
|
|
1363 build_string ("ASCII (ISO646 IRV)"),
|
|
1364 build_string ("\\(iso8859-[0-9]*\\|-ascii\\)"));
|
|
1365 staticpro (&Vcharset_control_1);
|
|
1366 Vcharset_control_1 =
|
|
1367 make_charset (LEADING_BYTE_CONTROL_1, Qcontrol_1, 2,
|
|
1368 CHARSET_TYPE_94, 1, 1, 0,
|
|
1369 CHARSET_LEFT_TO_RIGHT,
|
|
1370 build_string ("C1"),
|
|
1371 build_string ("Control characters"),
|
|
1372 build_string ("Control characters 128-191"),
|
|
1373 build_string (""));
|
|
1374 staticpro (&Vcharset_latin_iso8859_1);
|
|
1375 Vcharset_latin_iso8859_1 =
|
|
1376 make_charset (LEADING_BYTE_LATIN_ISO8859_1, Qlatin_iso8859_1, 2,
|
|
1377 CHARSET_TYPE_96, 1, 1, 'A',
|
|
1378 CHARSET_LEFT_TO_RIGHT,
|
|
1379 build_string ("Latin-1"),
|
|
1380 build_string ("ISO8859-1 (Latin-1)"),
|
|
1381 build_string ("ISO8859-1 (Latin-1)"),
|
|
1382 build_string ("iso8859-1"));
|
|
1383 staticpro (&Vcharset_latin_iso8859_2);
|
|
1384 Vcharset_latin_iso8859_2 =
|
|
1385 make_charset (LEADING_BYTE_LATIN_ISO8859_2, Qlatin_iso8859_2, 2,
|
|
1386 CHARSET_TYPE_96, 1, 1, 'B',
|
|
1387 CHARSET_LEFT_TO_RIGHT,
|
|
1388 build_string ("Latin-2"),
|
|
1389 build_string ("ISO8859-2 (Latin-2)"),
|
|
1390 build_string ("ISO8859-2 (Latin-2)"),
|
|
1391 build_string ("iso8859-2"));
|
|
1392 staticpro (&Vcharset_latin_iso8859_3);
|
|
1393 Vcharset_latin_iso8859_3 =
|
|
1394 make_charset (LEADING_BYTE_LATIN_ISO8859_3, Qlatin_iso8859_3, 2,
|
|
1395 CHARSET_TYPE_96, 1, 1, 'C',
|
|
1396 CHARSET_LEFT_TO_RIGHT,
|
|
1397 build_string ("Latin-3"),
|
|
1398 build_string ("ISO8859-3 (Latin-3)"),
|
|
1399 build_string ("ISO8859-3 (Latin-3)"),
|
|
1400 build_string ("iso8859-3"));
|
|
1401 staticpro (&Vcharset_latin_iso8859_4);
|
|
1402 Vcharset_latin_iso8859_4 =
|
|
1403 make_charset (LEADING_BYTE_LATIN_ISO8859_4, Qlatin_iso8859_4, 2,
|
|
1404 CHARSET_TYPE_96, 1, 1, 'D',
|
|
1405 CHARSET_LEFT_TO_RIGHT,
|
|
1406 build_string ("Latin-4"),
|
|
1407 build_string ("ISO8859-4 (Latin-4)"),
|
|
1408 build_string ("ISO8859-4 (Latin-4)"),
|
|
1409 build_string ("iso8859-4"));
|
|
1410 staticpro (&Vcharset_thai_tis620);
|
|
1411 Vcharset_thai_tis620 =
|
|
1412 make_charset (LEADING_BYTE_THAI_TIS620, Qthai_tis620, 2,
|
|
1413 CHARSET_TYPE_96, 1, 1, 'T',
|
|
1414 CHARSET_LEFT_TO_RIGHT,
|
|
1415 build_string ("TIS620"),
|
|
1416 build_string ("TIS620 (Thai)"),
|
|
1417 build_string ("TIS620.2529 (Thai)"),
|
|
1418 build_string ("tis620"));
|
|
1419 staticpro (&Vcharset_greek_iso8859_7);
|
|
1420 Vcharset_greek_iso8859_7 =
|
|
1421 make_charset (LEADING_BYTE_GREEK_ISO8859_7, Qgreek_iso8859_7, 2,
|
|
1422 CHARSET_TYPE_96, 1, 1, 'F',
|
|
1423 CHARSET_LEFT_TO_RIGHT,
|
|
1424 build_string ("ISO8859-7"),
|
|
1425 build_string ("ISO8859-7 (Greek)"),
|
|
1426 build_string ("ISO8859-7 (Greek)"),
|
|
1427 build_string ("iso8859-7"));
|
|
1428 staticpro (&Vcharset_arabic_iso8859_6);
|
|
1429 Vcharset_arabic_iso8859_6 =
|
|
1430 make_charset (LEADING_BYTE_ARABIC_ISO8859_6, Qarabic_iso8859_6, 2,
|
|
1431 CHARSET_TYPE_96, 1, 1, 'G',
|
|
1432 CHARSET_RIGHT_TO_LEFT,
|
|
1433 build_string ("ISO8859-6"),
|
|
1434 build_string ("ISO8859-6 (Arabic)"),
|
|
1435 build_string ("ISO8859-6 (Arabic)"),
|
|
1436 build_string ("iso8859-6"));
|
|
1437 staticpro (&Vcharset_hebrew_iso8859_8);
|
|
1438 Vcharset_hebrew_iso8859_8 =
|
|
1439 make_charset (LEADING_BYTE_HEBREW_ISO8859_8, Qhebrew_iso8859_8, 2,
|
|
1440 CHARSET_TYPE_96, 1, 1, 'H',
|
|
1441 CHARSET_RIGHT_TO_LEFT,
|
|
1442 build_string ("ISO8859-8"),
|
|
1443 build_string ("ISO8859-8 (Hebrew)"),
|
|
1444 build_string ("ISO8859-8 (Hebrew)"),
|
|
1445 build_string ("iso8859-8"));
|
|
1446 staticpro (&Vcharset_katakana_jisx0201);
|
|
1447 Vcharset_katakana_jisx0201 =
|
|
1448 make_charset (LEADING_BYTE_KATAKANA_JISX0201, Qkatakana_jisx0201, 2,
|
|
1449 CHARSET_TYPE_94, 1, 1, 'I',
|
|
1450 CHARSET_LEFT_TO_RIGHT,
|
|
1451 build_string ("JISX0201 Kana"),
|
|
1452 build_string ("JISX0201.1976 (Japanese Kana)"),
|
|
1453 build_string ("JISX0201.1976 Japanese Kana"),
|
|
1454 build_string ("jisx0201.1976"));
|
|
1455 staticpro (&Vcharset_latin_jisx0201);
|
|
1456 Vcharset_latin_jisx0201 =
|
|
1457 make_charset (LEADING_BYTE_LATIN_JISX0201, Qlatin_jisx0201, 2,
|
|
1458 CHARSET_TYPE_94, 1, 0, 'J',
|
|
1459 CHARSET_LEFT_TO_RIGHT,
|
|
1460 build_string ("JISX0201 Roman"),
|
|
1461 build_string ("JISX0201.1976 (Japanese Roman)"),
|
|
1462 build_string ("JISX0201.1976 Japanese Roman"),
|
|
1463 build_string ("jisx0201.1976"));
|
|
1464 staticpro (&Vcharset_cyrillic_iso8859_5);
|
|
1465 Vcharset_cyrillic_iso8859_5 =
|
|
1466 make_charset (LEADING_BYTE_CYRILLIC_ISO8859_5, Qcyrillic_iso8859_5, 2,
|
|
1467 CHARSET_TYPE_96, 1, 1, 'L',
|
|
1468 CHARSET_LEFT_TO_RIGHT,
|
|
1469 build_string ("ISO8859-5"),
|
|
1470 build_string ("ISO8859-5 (Cyrillic)"),
|
|
1471 build_string ("ISO8859-5 (Cyrillic)"),
|
|
1472 build_string ("iso8859-5"));
|
|
1473 staticpro (&Vcharset_latin_iso8859_9);
|
|
1474 Vcharset_latin_iso8859_9 =
|
|
1475 make_charset (LEADING_BYTE_LATIN_ISO8859_9, Qlatin_iso8859_9, 2,
|
|
1476 CHARSET_TYPE_96, 1, 1, 'M',
|
|
1477 CHARSET_LEFT_TO_RIGHT,
|
|
1478 build_string ("Latin-5"),
|
|
1479 build_string ("ISO8859-9 (Latin-5)"),
|
|
1480 build_string ("ISO8859-9 (Latin-5)"),
|
|
1481 build_string ("iso8859-9"));
|
728
|
1482 staticpro (&Vcharset_latin_iso8859_15);
|
|
1483 Vcharset_latin_iso8859_15 =
|
|
1484 make_charset (LEADING_BYTE_LATIN_ISO8859_15, Qlatin_iso8859_15, 2,
|
|
1485 CHARSET_TYPE_96, 1, 1, 'b',
|
|
1486 CHARSET_LEFT_TO_RIGHT,
|
|
1487 build_string ("Latin-9"),
|
|
1488 build_string ("ISO8859-15 (Latin-9)"),
|
|
1489 build_string ("ISO8859-15 (Latin-9)"),
|
|
1490 build_string ("iso8859-15"));
|
428
|
1491 staticpro (&Vcharset_japanese_jisx0208_1978);
|
|
1492 Vcharset_japanese_jisx0208_1978 =
|
|
1493 make_charset (LEADING_BYTE_JAPANESE_JISX0208_1978, Qjapanese_jisx0208_1978, 3,
|
|
1494 CHARSET_TYPE_94X94, 2, 0, '@',
|
|
1495 CHARSET_LEFT_TO_RIGHT,
|
|
1496 build_string ("JISX0208.1978"),
|
|
1497 build_string ("JISX0208.1978 (Japanese)"),
|
|
1498 build_string
|
|
1499 ("JISX0208.1978 Japanese Kanji (so called \"old JIS\")"),
|
|
1500 build_string ("\\(jisx0208\\|jisc6226\\)\\.1978"));
|
|
1501 staticpro (&Vcharset_chinese_gb2312);
|
|
1502 Vcharset_chinese_gb2312 =
|
|
1503 make_charset (LEADING_BYTE_CHINESE_GB2312, Qchinese_gb2312, 3,
|
|
1504 CHARSET_TYPE_94X94, 2, 0, 'A',
|
|
1505 CHARSET_LEFT_TO_RIGHT,
|
|
1506 build_string ("GB2312"),
|
|
1507 build_string ("GB2312)"),
|
|
1508 build_string ("GB2312 Chinese simplified"),
|
|
1509 build_string ("gb2312"));
|
|
1510 staticpro (&Vcharset_japanese_jisx0208);
|
|
1511 Vcharset_japanese_jisx0208 =
|
|
1512 make_charset (LEADING_BYTE_JAPANESE_JISX0208, Qjapanese_jisx0208, 3,
|
|
1513 CHARSET_TYPE_94X94, 2, 0, 'B',
|
|
1514 CHARSET_LEFT_TO_RIGHT,
|
|
1515 build_string ("JISX0208"),
|
|
1516 build_string ("JISX0208.1983/1990 (Japanese)"),
|
|
1517 build_string ("JISX0208.1983/1990 Japanese Kanji"),
|
|
1518 build_string ("jisx0208.19\\(83\\|90\\)"));
|
|
1519 staticpro (&Vcharset_korean_ksc5601);
|
|
1520 Vcharset_korean_ksc5601 =
|
|
1521 make_charset (LEADING_BYTE_KOREAN_KSC5601, Qkorean_ksc5601, 3,
|
|
1522 CHARSET_TYPE_94X94, 2, 0, 'C',
|
|
1523 CHARSET_LEFT_TO_RIGHT,
|
|
1524 build_string ("KSC5601"),
|
|
1525 build_string ("KSC5601 (Korean"),
|
|
1526 build_string ("KSC5601 Korean Hangul and Hanja"),
|
|
1527 build_string ("ksc5601"));
|
|
1528 staticpro (&Vcharset_japanese_jisx0212);
|
|
1529 Vcharset_japanese_jisx0212 =
|
|
1530 make_charset (LEADING_BYTE_JAPANESE_JISX0212, Qjapanese_jisx0212, 3,
|
|
1531 CHARSET_TYPE_94X94, 2, 0, 'D',
|
|
1532 CHARSET_LEFT_TO_RIGHT,
|
|
1533 build_string ("JISX0212"),
|
|
1534 build_string ("JISX0212 (Japanese)"),
|
|
1535 build_string ("JISX0212 Japanese Supplement"),
|
|
1536 build_string ("jisx0212"));
|
|
1537
|
|
1538 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
|
|
1539 staticpro (&Vcharset_chinese_cns11643_1);
|
|
1540 Vcharset_chinese_cns11643_1 =
|
|
1541 make_charset (LEADING_BYTE_CHINESE_CNS11643_1, Qchinese_cns11643_1, 3,
|
|
1542 CHARSET_TYPE_94X94, 2, 0, 'G',
|
|
1543 CHARSET_LEFT_TO_RIGHT,
|
|
1544 build_string ("CNS11643-1"),
|
|
1545 build_string ("CNS11643-1 (Chinese traditional)"),
|
|
1546 build_string
|
|
1547 ("CNS 11643 Plane 1 Chinese traditional"),
|
|
1548 build_string (CHINESE_CNS_PLANE_RE("1")));
|
|
1549 staticpro (&Vcharset_chinese_cns11643_2);
|
|
1550 Vcharset_chinese_cns11643_2 =
|
|
1551 make_charset (LEADING_BYTE_CHINESE_CNS11643_2, Qchinese_cns11643_2, 3,
|
|
1552 CHARSET_TYPE_94X94, 2, 0, 'H',
|
|
1553 CHARSET_LEFT_TO_RIGHT,
|
|
1554 build_string ("CNS11643-2"),
|
|
1555 build_string ("CNS11643-2 (Chinese traditional)"),
|
|
1556 build_string
|
|
1557 ("CNS 11643 Plane 2 Chinese traditional"),
|
|
1558 build_string (CHINESE_CNS_PLANE_RE("2")));
|
|
1559 staticpro (&Vcharset_chinese_big5_1);
|
|
1560 Vcharset_chinese_big5_1 =
|
|
1561 make_charset (LEADING_BYTE_CHINESE_BIG5_1, Qchinese_big5_1, 3,
|
|
1562 CHARSET_TYPE_94X94, 2, 0, '0',
|
|
1563 CHARSET_LEFT_TO_RIGHT,
|
|
1564 build_string ("Big5"),
|
|
1565 build_string ("Big5 (Level-1)"),
|
|
1566 build_string
|
|
1567 ("Big5 Level-1 Chinese traditional"),
|
|
1568 build_string ("big5"));
|
|
1569 staticpro (&Vcharset_chinese_big5_2);
|
|
1570 Vcharset_chinese_big5_2 =
|
|
1571 make_charset (LEADING_BYTE_CHINESE_BIG5_2, Qchinese_big5_2, 3,
|
|
1572 CHARSET_TYPE_94X94, 2, 0, '1',
|
|
1573 CHARSET_LEFT_TO_RIGHT,
|
|
1574 build_string ("Big5"),
|
|
1575 build_string ("Big5 (Level-2)"),
|
|
1576 build_string
|
|
1577 ("Big5 Level-2 Chinese traditional"),
|
|
1578 build_string ("big5"));
|
|
1579
|
|
1580
|
|
1581 #ifdef ENABLE_COMPOSITE_CHARS
|
|
1582 /* #### For simplicity, we put composite chars into a 96x96 charset.
|
|
1583 This is going to lead to problems because you can run out of
|
|
1584 room, esp. as we don't yet recycle numbers. */
|
|
1585 staticpro (&Vcharset_composite);
|
|
1586 Vcharset_composite =
|
|
1587 make_charset (LEADING_BYTE_COMPOSITE, Qcomposite, 3,
|
|
1588 CHARSET_TYPE_96X96, 2, 0, 0,
|
|
1589 CHARSET_LEFT_TO_RIGHT,
|
|
1590 build_string ("Composite"),
|
|
1591 build_string ("Composite characters"),
|
|
1592 build_string ("Composite characters"),
|
|
1593 build_string (""));
|
|
1594
|
|
1595 /* #### not dumped properly */
|
|
1596 composite_char_row_next = 32;
|
|
1597 composite_char_col_next = 32;
|
|
1598
|
|
1599 Vcomposite_char_string2char_hash_table =
|
|
1600 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQUAL);
|
|
1601 Vcomposite_char_char2string_hash_table =
|
|
1602 make_lisp_hash_table (500, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
|
|
1603 staticpro (&Vcomposite_char_string2char_hash_table);
|
|
1604 staticpro (&Vcomposite_char_char2string_hash_table);
|
|
1605 #endif /* ENABLE_COMPOSITE_CHARS */
|
|
1606
|
|
1607 }
|