Mercurial > hg > xemacs-beta
comparison src/mule-charset.c @ 70:131b0175ea99 r20-0b30
Import from CVS: tag r20-0b30
author | cvs |
---|---|
date | Mon, 13 Aug 2007 09:02:59 +0200 |
parents | |
children | 54cc21c15cbb |
comparison
equal
deleted
inserted
replaced
69:804d1389bcd6 | 70:131b0175ea99 |
---|---|
1 /* Functions to handle multilingual characters. | |
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc. | |
3 Copyright (C) 1995 Sun Microsystems, Inc. | |
4 | |
5 This file is part of XEmacs. | |
6 | |
7 XEmacs is free software; you can redistribute it and/or modify it | |
8 under the terms of the GNU General Public License as published by the | |
9 Free Software Foundation; either version 2, or (at your option) any | |
10 later version. | |
11 | |
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT | |
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 for more details. | |
16 | |
17 You should have received a copy of the GNU General Public License | |
18 along with XEmacs; see the file COPYING. If not, write to | |
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
20 Boston, MA 02111-1307, USA. */ | |
21 | |
22 /* Synched up with: Mule 2.3. Not in FSF. */ | |
23 | |
24 /* Rewritten by Ben Wing <wing@666.com>. */ | |
25 | |
26 #include <config.h> | |
27 #include "lisp.h" | |
28 | |
29 #include "buffer.h" | |
30 #include "chartab.h" | |
31 #include "elhash.h" | |
32 #include "lstream.h" | |
33 #include "device.h" | |
34 #include "faces.h" | |
35 | |
36 /* The various pre-defined charsets. */ | |
37 | |
38 Lisp_Object Vcharset_ascii; | |
39 Lisp_Object Vcharset_control_1; | |
40 Lisp_Object Vcharset_latin_1; | |
41 Lisp_Object Vcharset_latin_2; | |
42 Lisp_Object Vcharset_latin_3; | |
43 Lisp_Object Vcharset_latin_4; | |
44 Lisp_Object Vcharset_cyrillic; | |
45 Lisp_Object Vcharset_arabic; | |
46 Lisp_Object Vcharset_greek; | |
47 Lisp_Object Vcharset_hebrew; | |
48 Lisp_Object Vcharset_latin_5; | |
49 Lisp_Object Vcharset_thai; | |
50 Lisp_Object Vcharset_japanese_jisx0201_kana; | |
51 Lisp_Object Vcharset_japanese_jisx0201_roman; | |
52 Lisp_Object Vcharset_japanese_jisx0208_1978; | |
53 Lisp_Object Vcharset_japanese_jisx0208; | |
54 Lisp_Object Vcharset_japanese_jisx0212; | |
55 Lisp_Object Vcharset_chinese_gb; | |
56 Lisp_Object Vcharset_chinese_big5_1; | |
57 Lisp_Object Vcharset_chinese_big5_2; | |
58 Lisp_Object Vcharset_chinese_cns11643_1; | |
59 Lisp_Object Vcharset_chinese_cns11643_2; | |
60 Lisp_Object Vcharset_korean_ksc5601; | |
61 Lisp_Object Vcharset_composite; | |
62 | |
63 /* Hashtables for composite chars. One maps string representing | |
64 composed chars to their equivalent chars; one goes the | |
65 other way. */ | |
66 Lisp_Object Vcomposite_char_char2string_hashtable; | |
67 Lisp_Object Vcomposite_char_string2char_hashtable; | |
68 | |
69 /* Table of charsets indexed by leading byte. */ | |
70 Lisp_Object charset_by_leading_byte[128]; | |
71 | |
72 /* Table of charsets indexed by type/final-byte/direction. */ | |
73 Lisp_Object charset_by_attributes[4][128][2]; | |
74 | |
75 static int composite_char_row_next; | |
76 static int composite_char_col_next; | |
77 | |
78 /* Table of number of bytes in the string representation of a character | |
79 indexed by the first byte of that representation. | |
80 | |
81 This value can be derived other ways -- e.g. something like | |
82 | |
83 (BYTE_ASCII_P (first_byte) ? 1 : | |
84 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (first_byte))) | |
85 | |
86 but it's faster this way. */ | |
87 | |
88 Bytecount rep_bytes_by_first_byte[0xA0] = | |
89 { /* 16 x 8 ones for ASCII */ | |
90 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
91 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
92 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
93 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
94 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
95 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
96 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
97 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | |
98 /* 1 x 8 for Dimension-1 official Mule charsets */ | |
99 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | |
100 /* 0x90 - 0x9d are Dimension-2 official */ | |
101 /* 0x9e is Dimension-1 private */ | |
102 /* 0x9f is Dimension-2 private */ | |
103 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4 | |
104 }; | |
105 | |
106 Lisp_Object Qcharsetp; | |
107 | |
108 /* Qdoc_string, Qdimension, Qchars defined in general.c */ | |
109 Lisp_Object Qregistry, Qfinal, Qgraphic; | |
110 Lisp_Object Qdirection; | |
111 Lisp_Object Qreverse_direction_charset; | |
112 Lisp_Object Qccl_program; | |
113 | |
114 Lisp_Object Qascii, Qcontrol_1, Qlatin_1, Qlatin_2, Qlatin_3, Qlatin_4, Qlatin_5, | |
115 Qthai, Qarabic, Qhebrew, Qgreek, Qcyrillic, | |
116 | |
117 Qjapanese_jisx0201_kana, | |
118 Qjapanese_jisx0201_roman, | |
119 Qjapanese_jisx0208_1978, | |
120 Qjapanese_jisx0208, | |
121 Qjapanese_jisx0212, | |
122 | |
123 Qchinese_gb, Qchinese_big5_1, Qchinese_big5_2, | |
124 Qchinese_cns11643_1, Qchinese_cns11643_2, | |
125 | |
126 Qkorean_ksc5601, Qcomposite; | |
127 | |
128 Lisp_Object Ql2r, Qr2l; | |
129 | |
130 Lisp_Object Vcharset_hashtable; | |
131 | |
132 static Bufbyte next_allocated_1_byte_leading_byte; | |
133 static Bufbyte next_allocated_2_byte_leading_byte; | |
134 | |
135 /* Composite characters are characters constructed by overstriking two | |
136 or more regular characters. | |
137 | |
138 1) The old Mule implementation involves storing composite characters | |
139 in a buffer as a tag followed by all of the actual characters | |
140 used to make up the composite character. I think this is a bad | |
141 idea; it greatly complicates code that wants to handle strings | |
142 one character at a time because it has to deal with the possibility | |
143 of great big ungainly characters. It's much more reasonable to | |
144 simply store an index into a table of composite characters. | |
145 | |
146 2) The current implementation only allows for 16,384 separate | |
147 composite characters over the lifetime of the XEmacs process. | |
148 This could become a potential problem if the user | |
149 edited lots of different files that use composite characters. | |
150 Due to FSF bogosity, increasing the number of allowable | |
151 composite characters under Mule would decrease the number | |
152 of possible faces that can exist. Mule already has shrunk | |
153 this to 2048, and further shrinkage would become uncomfortable. | |
154 No such problems exist in XEmacs. | |
155 | |
156 Composite characters could be represented as 0x80 C1 C2 C3, | |
157 where each C[1-3] is in the range 0xA0 - 0xFF. This allows | |
158 for slightly under 2^20 (one million) composite characters | |
159 over the XEmacs process lifetime, and you only need to | |
160 increase the size of a Mule character from 19 to 21 bits. | |
161 Or you could use 0x80 C1 C2 C3 C4, allowing for about | |
162 85 million (slightly over 2^26) composite characters. */ | |
163 | |
164 | |
165 /************************************************************************/ | |
166 /* Basic Emchar functions */ | |
167 /************************************************************************/ | |
168 | |
169 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded | |
170 string in STR. Returns the number of bytes stored. | |
171 Do not call this directly. Use the macro set_charptr_emchar() instead. | |
172 */ | |
173 | |
174 Bytecount | |
175 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c) | |
176 { | |
177 Bufbyte *p; | |
178 Bufbyte lb; | |
179 int c1, c2; | |
180 Lisp_Object charset; | |
181 | |
182 p = str; | |
183 BREAKUP_CHAR (c, charset, c1, c2); | |
184 lb = CHAR_LEADING_BYTE (c); | |
185 if (LEADING_BYTE_PRIVATE_P (lb)) | |
186 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb); | |
187 *p++ = lb; | |
188 if (EQ (charset, Vcharset_control_1)) | |
189 c1 += 0x20; | |
190 *p++ = c1 | 0x80; | |
191 if (c2) | |
192 *p++ = c2 | 0x80; | |
193 | |
194 return (p - str); | |
195 } | |
196 | |
197 /* Return the first character from a Mule-encoded string in STR, | |
198 assuming it's non-ASCII. Do not call this directly. | |
199 Use the macro charptr_emchar() instead. */ | |
200 | |
201 Emchar | |
202 non_ascii_charptr_emchar (CONST Bufbyte *str) | |
203 { | |
204 Bufbyte i0 = *str, i1, i2 = 0; | |
205 Lisp_Object charset; | |
206 | |
207 if (i0 == LEADING_BYTE_CONTROL_1) | |
208 return (Emchar) (*++str - 0x20); | |
209 | |
210 if (LEADING_BYTE_PREFIX_P (i0)) | |
211 i0 = *++str; | |
212 | |
213 i1 = *++str & 0x7F; | |
214 | |
215 charset = CHARSET_BY_LEADING_BYTE (i0); | |
216 if (XCHARSET_DIMENSION (charset) == 2) | |
217 i2 = *++str & 0x7F; | |
218 | |
219 return MAKE_CHAR (charset, i1, i2); | |
220 } | |
221 | |
222 /* Return whether CH is a valid Emchar, assuming it's non-ASCII. | |
223 Do not call this directly. Use the macro valid_char_p() instead. */ | |
224 | |
225 int | |
226 non_ascii_valid_char_p (Emchar ch) | |
227 { | |
228 int f1, f2, f3; | |
229 | |
230 /* Must have only lowest 19 bits set */ | |
231 if (ch & ~0x7FFFF) | |
232 return 0; | |
233 | |
234 f1 = CHAR_FIELD1 (ch); | |
235 f2 = CHAR_FIELD2 (ch); | |
236 f3 = CHAR_FIELD3 (ch); | |
237 | |
238 if (f1 == 0) | |
239 { | |
240 Lisp_Object charset; | |
241 | |
242 if (f2 < MIN_CHAR_FIELD2_OFFICIAL || | |
243 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) || | |
244 f2 > MAX_CHAR_FIELD2_PRIVATE) | |
245 return 0; | |
246 if (f3 < 0x20) | |
247 return 0; | |
248 | |
249 if (f3 != 0x20 && f3 != 0x7F) | |
250 return 1; | |
251 | |
252 /* | |
253 NOTE: This takes advantage of the fact that | |
254 FIELD2_TO_OFFICIAL_LEADING_BYTE and | |
255 FIELD2_TO_PRIVATE_LEADING_BYTE are the same. | |
256 */ | |
257 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE); | |
258 return (XCHARSET_CHARS (charset) == 96); | |
259 } | |
260 else | |
261 { | |
262 Lisp_Object charset; | |
263 | |
264 if (f1 < MIN_CHAR_FIELD1_OFFICIAL || | |
265 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) || | |
266 f1 > MAX_CHAR_FIELD1_PRIVATE) | |
267 return 0; | |
268 if (f2 < 0x20 || f3 < 0x20) | |
269 return 0; | |
270 | |
271 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE) | |
272 { | |
273 if (UNBOUNDP (Fgethash (make_int (ch), | |
274 Vcomposite_char_char2string_hashtable, | |
275 Qunbound))) | |
276 return 0; | |
277 return 1; | |
278 } | |
279 | |
280 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F) | |
281 return 1; | |
282 | |
283 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL) | |
284 charset = | |
285 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE); | |
286 else | |
287 charset = | |
288 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE); | |
289 | |
290 return (XCHARSET_CHARS (charset) == 96); | |
291 } | |
292 } | |
293 | |
294 | |
295 /************************************************************************/ | |
296 /* Basic string functions */ | |
297 /************************************************************************/ | |
298 | |
299 /* Copy the character pointed to by PTR into STR, assuming it's | |
300 non-ASCII. Do not call this directly. Use the macro | |
301 charptr_copy_char() instead. */ | |
302 | |
303 Bytecount | |
304 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str) | |
305 { | |
306 Bufbyte *strptr = str; | |
307 *strptr = *ptr++; | |
308 switch (REP_BYTES_BY_FIRST_BYTE (*strptr)) | |
309 { | |
310 /* Notice fallthrough. */ | |
311 case 4: *++strptr = *ptr++; | |
312 case 3: *++strptr = *ptr++; | |
313 case 2: *++strptr = *ptr; | |
314 break; | |
315 default: | |
316 abort (); | |
317 } | |
318 return strptr + 1 - str; | |
319 } | |
320 | |
321 | |
322 /************************************************************************/ | |
323 /* streams of Emchars */ | |
324 /************************************************************************/ | |
325 | |
326 /* Treat a stream as a stream of Emchar's rather than a stream of bytes. | |
327 The functions below are not meant to be called directly; use | |
328 the macros in insdel.h. */ | |
329 | |
330 Emchar | |
331 Lstream_get_emchar_1 (Lstream *stream, int ch) | |
332 { | |
333 Bufbyte str[MAX_EMCHAR_LEN]; | |
334 Bufbyte *strptr = str; | |
335 | |
336 str[0] = (Bufbyte) ch; | |
337 switch (REP_BYTES_BY_FIRST_BYTE (ch)) | |
338 { | |
339 /* Notice fallthrough. */ | |
340 case 4: | |
341 ch = Lstream_getc (stream); | |
342 assert (ch >= 0); | |
343 *++strptr = (Bufbyte) ch; | |
344 case 3: | |
345 ch = Lstream_getc (stream); | |
346 assert (ch >= 0); | |
347 *++strptr = (Bufbyte) ch; | |
348 case 2: | |
349 ch = Lstream_getc (stream); | |
350 assert (ch >= 0); | |
351 *++strptr = (Bufbyte) ch; | |
352 break; | |
353 default: | |
354 abort (); | |
355 } | |
356 return charptr_emchar (str); | |
357 } | |
358 | |
359 int | |
360 Lstream_fput_emchar (Lstream *stream, Emchar ch) | |
361 { | |
362 Bufbyte str[MAX_EMCHAR_LEN]; | |
363 Bytecount len = set_charptr_emchar (str, ch); | |
364 return Lstream_write (stream, str, len); | |
365 } | |
366 | |
367 void | |
368 Lstream_funget_emchar (Lstream *stream, Emchar ch) | |
369 { | |
370 Bufbyte str[MAX_EMCHAR_LEN]; | |
371 Bytecount len = set_charptr_emchar (str, ch); | |
372 Lstream_unread (stream, str, len); | |
373 } | |
374 | |
375 | |
376 /************************************************************************/ | |
377 /* charset object */ | |
378 /************************************************************************/ | |
379 | |
380 static Lisp_Object mark_charset (Lisp_Object, void (*) (Lisp_Object)); | |
381 static void print_charset (Lisp_Object, Lisp_Object, int); | |
382 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset, | |
383 mark_charset, print_charset, 0, 0, 0, | |
384 struct Lisp_Charset); | |
385 | |
386 static Lisp_Object | |
387 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object)) | |
388 { | |
389 struct Lisp_Charset *cs = XCHARSET (obj); | |
390 | |
391 (markobj) (cs->doc_string); | |
392 (markobj) (cs->registry); | |
393 (markobj) (cs->ccl_program); | |
394 return cs->name; | |
395 } | |
396 | |
397 static void | |
398 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag) | |
399 { | |
400 struct Lisp_Charset *cs = XCHARSET (obj); | |
401 char buf[200]; | |
402 | |
403 if (print_readably) | |
404 error ("printing unreadable object #<charset %s 0x%x>", | |
405 string_data (XSYMBOL (CHARSET_NAME (cs))->name), | |
406 cs->header.uid); | |
407 | |
408 write_c_string ("#<charset ", printcharfun); | |
409 print_internal (CHARSET_NAME (cs), printcharfun, 0); | |
410 write_c_string (" ", printcharfun); | |
411 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1); | |
412 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=", | |
413 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" : | |
414 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" : | |
415 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" : | |
416 "96x96", | |
417 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l", | |
418 CHARSET_COLUMNS (cs), | |
419 CHARSET_GRAPHIC (cs), | |
420 CHARSET_FINAL (cs)); | |
421 write_c_string (buf, printcharfun); | |
422 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0); | |
423 sprintf (buf, " 0x%x>", cs->header.uid); | |
424 write_c_string (buf, printcharfun); | |
425 } | |
426 | |
427 /* Make a new charset. */ | |
428 | |
429 static Lisp_Object | |
430 make_charset (Lisp_Object name, Bufbyte leading_byte, unsigned char rep_bytes, | |
431 unsigned char type, unsigned char columns, unsigned char graphic, | |
432 Bufbyte final, unsigned char direction, Lisp_Object doc, | |
433 Lisp_Object reg) | |
434 { | |
435 struct Lisp_Charset *cs; | |
436 Lisp_Object obj = Qnil; | |
437 | |
438 cs = (struct Lisp_Charset *) alloc_lcrecord (sizeof (struct Lisp_Charset), | |
439 lrecord_charset); | |
440 XSETCHARSET (obj, cs); | |
441 | |
442 CHARSET_NAME (cs) = name; | |
443 CHARSET_LEADING_BYTE (cs) = leading_byte; | |
444 CHARSET_REP_BYTES (cs) = rep_bytes; | |
445 CHARSET_DIRECTION (cs) = direction; | |
446 CHARSET_TYPE (cs) = type; | |
447 CHARSET_COLUMNS (cs) = columns; | |
448 CHARSET_GRAPHIC (cs) = graphic; | |
449 CHARSET_FINAL (cs) = final; | |
450 CHARSET_DOC_STRING (cs) = doc; | |
451 CHARSET_REGISTRY (cs) = reg; | |
452 CHARSET_CCL_PROGRAM (cs) = Qnil; | |
453 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil; | |
454 | |
455 if (final) | |
456 { | |
457 /* some charsets do not have final characters. This includes | |
458 ASCII, Control-1, Composite, and the two faux private | |
459 charsets. */ | |
460 assert (NILP (charset_by_attributes[type][final][direction])); | |
461 charset_by_attributes[type][final][direction] = obj; | |
462 } | |
463 | |
464 assert (NILP (charset_by_leading_byte[leading_byte - 128])); | |
465 charset_by_leading_byte[leading_byte - 128] = obj; | |
466 if (leading_byte < 0xA0) | |
467 /* official leading byte */ | |
468 rep_bytes_by_first_byte[leading_byte] = rep_bytes; | |
469 | |
470 /* Some charsets are "faux" and don't have names or really exist at | |
471 all except in the leading-byte table. */ | |
472 if (!NILP (name)) | |
473 Fputhash (name, obj, Vcharset_hashtable); | |
474 return obj; | |
475 } | |
476 | |
477 static int | |
478 get_unallocated_leading_byte (int dimension) | |
479 { | |
480 int lb; | |
481 | |
482 if (dimension == 1) | |
483 { | |
484 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1) | |
485 lb = 0; | |
486 else | |
487 lb = next_allocated_1_byte_leading_byte++; | |
488 } | |
489 else | |
490 { | |
491 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2) | |
492 lb = 0; | |
493 else | |
494 lb = next_allocated_2_byte_leading_byte++; | |
495 } | |
496 | |
497 if (!lb) | |
498 signal_simple_error | |
499 ("No more character sets free for this dimension", | |
500 make_int (dimension)); | |
501 | |
502 return lb; | |
503 } | |
504 | |
505 | |
506 /************************************************************************/ | |
507 /* Basic charset Lisp functions */ | |
508 /************************************************************************/ | |
509 | |
510 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /* | |
511 Return non-nil if OBJECT is a charset. | |
512 */ | |
513 (object)) | |
514 { | |
515 return (CHARSETP (object) ? Qt : Qnil); | |
516 } | |
517 | |
518 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /* | |
519 Retrieve the charset of the given name. | |
520 If CHARSET-OR-NAME is a charset object, it is simply returned. | |
521 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset, | |
522 nil is returned. Otherwise the associated charset object is returned. | |
523 */ | |
524 (charset_or_name)) | |
525 { | |
526 if (CHARSETP (charset_or_name)) | |
527 return charset_or_name; | |
528 CHECK_SYMBOL (charset_or_name); | |
529 | |
530 return Fgethash (charset_or_name, Vcharset_hashtable, Qnil); | |
531 } | |
532 | |
533 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /* | |
534 Retrieve the charset of the given name. | |
535 Same as `find-charset' except an error is signalled if there is no such | |
536 charset instead of returning nil. | |
537 */ | |
538 (name)) | |
539 { | |
540 Lisp_Object charset = Ffind_charset (name); | |
541 | |
542 if (NILP (charset)) | |
543 signal_simple_error ("No such charset", name); | |
544 return charset; | |
545 } | |
546 | |
547 /* We store the charsets in hash tables with the names as the key and the | |
548 actual charset object as the value. Occasionally we need to use them | |
549 in a list format. These routines provide us with that. */ | |
550 struct charset_list_closure | |
551 { | |
552 Lisp_Object *charset_list; | |
553 }; | |
554 | |
555 static void | |
556 add_charset_to_list_mapper (CONST void *hash_key, void *hash_contents, | |
557 void *charset_list_closure) | |
558 { | |
559 /* This function can GC */ | |
560 Lisp_Object key, contents; | |
561 Lisp_Object *charset_list; | |
562 struct charset_list_closure *chcl = charset_list_closure; | |
563 CVOID_TO_LISP (key, hash_key); | |
564 VOID_TO_LISP (contents, hash_contents); | |
565 charset_list = chcl->charset_list; | |
566 | |
567 *charset_list = Fcons (XCHARSET_NAME (contents), *charset_list); | |
568 } | |
569 | |
570 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /* | |
571 Return a list of the names of all defined charsets. | |
572 */ | |
573 ()) | |
574 { | |
575 Lisp_Object charset_list = Qnil; | |
576 struct gcpro gcpro1; | |
577 struct charset_list_closure charset_list_closure; | |
578 | |
579 GCPRO1 (charset_list); | |
580 charset_list_closure.charset_list = &charset_list; | |
581 elisp_maphash (add_charset_to_list_mapper, Vcharset_hashtable, | |
582 &charset_list_closure); | |
583 UNGCPRO; | |
584 | |
585 return charset_list; | |
586 } | |
587 | |
588 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /* | |
589 Return the name of the given charset. | |
590 */ | |
591 (charset)) | |
592 { | |
593 return (XCHARSET_NAME (Fget_charset (charset))); | |
594 } | |
595 | |
596 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /* | |
597 Define a new character set. | |
598 This function is for use with Mule support. | |
599 NAME is a symbol, the name by which the character set is normally referred. | |
600 DOC-STRING is a string describing the character set. | |
601 PROPS is a property list, describing the specific nature of the | |
602 character set. Recognized properties are: | |
603 | |
604 'registry A regular expression matching the font registry field for | |
605 this character set. | |
606 'dimension Number of octets used to index a character in this charset. | |
607 Either 1 or 2. Defaults to 1. | |
608 'columns Number of columns used to display a character in this charset. | |
609 Only used in TTY mode. (Under X, the actual width of a | |
610 character can be derived from the font used to display the | |
611 characters.) If unspecified, defaults to the dimension | |
612 (this is almost always the correct value). | |
613 'chars Number of characters in each dimension (94 or 96). | |
614 Defaults to 94. Note that if the dimension is 2, the | |
615 character set thus described is 94x94 or 96x96. | |
616 'final Final byte of ISO 2022 escape sequence. Must be | |
617 supplied. Each combination of (DIMENSION, CHARS) defines a | |
618 separate namespace for final bytes. Note that ISO | |
619 2022 restricts the final byte to the range | |
620 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if | |
621 dimension == 2. Note also that final bytes in the range | |
622 0x30 - 0x3F are reserved for user-defined (not official) | |
623 character sets. | |
624 'graphic 0 (use left half of font on output) or 1 (use right half | |
625 of font on output). Defaults to 0. For example, for | |
626 a font whose registry is ISO8859-1, the left half | |
627 (octets 0x20 - 0x7F) is the `ascii' character set, while | |
628 the right half (octets 0xA0 - 0xFF) is the `latin-1' | |
629 character set. With 'graphic set to 0, the octets | |
630 will have their high bit cleared; with it set to 1, | |
631 the octets will have their high bit set. | |
632 'direction 'l2r (left-to-right) or 'r2l (right-to-left). | |
633 Defaults to 'l2r. | |
634 'ccl-program A compiled CCL program used to convert a character in | |
635 this charset into an index into the font. This is in | |
636 addition to the 'graphic property. The CCL program | |
637 is passed the octets of the character, with the high | |
638 bit cleared and set depending upon whether the value | |
639 of the 'graphic property is 0 or 1. | |
640 */ | |
641 (name, doc_string, props)) | |
642 { | |
643 int lb, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1; | |
644 int direction = CHARSET_LEFT_TO_RIGHT; | |
645 int type; | |
646 Lisp_Object registry = Qnil; | |
647 Lisp_Object charset; | |
648 Lisp_Object rest, keyword, value; | |
649 Lisp_Object ccl_program = Qnil; | |
650 | |
651 CHECK_SYMBOL (name); | |
652 if (!NILP (doc_string)) | |
653 CHECK_STRING (doc_string); | |
654 | |
655 charset = Ffind_charset (name); | |
656 if (!NILP (charset)) | |
657 signal_simple_error ("Cannot redefine existing charset", name); | |
658 | |
659 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props) | |
660 { | |
661 if (EQ (keyword, Qdimension)) | |
662 { | |
663 CHECK_INT (value); | |
664 dimension = XINT (value); | |
665 if (dimension < 1 || dimension > 2) | |
666 signal_simple_error ("Invalid value for 'dimension", value); | |
667 } | |
668 | |
669 else if (EQ (keyword, Qchars)) | |
670 { | |
671 CHECK_INT (value); | |
672 chars = XINT (value); | |
673 if (chars != 94 && chars != 96) | |
674 signal_simple_error ("Invalid value for 'chars", value); | |
675 } | |
676 | |
677 else if (EQ (keyword, Qcolumns)) | |
678 { | |
679 CHECK_INT (value); | |
680 columns = XINT (value); | |
681 if (columns != 1 && columns != 2) | |
682 signal_simple_error ("Invalid value for 'columns", value); | |
683 } | |
684 | |
685 else if (EQ (keyword, Qgraphic)) | |
686 { | |
687 CHECK_INT (value); | |
688 graphic = XINT (value); | |
689 if (graphic < 0 || graphic > 1) | |
690 signal_simple_error ("Invalid value for 'graphic", value); | |
691 } | |
692 | |
693 else if (EQ (keyword, Qregistry)) | |
694 { | |
695 CHECK_STRING (value); | |
696 registry = value; | |
697 } | |
698 | |
699 else if (EQ (keyword, Qdirection)) | |
700 { | |
701 if (EQ (value, Ql2r)) | |
702 direction = CHARSET_LEFT_TO_RIGHT; | |
703 else if (EQ (value, Qr2l)) | |
704 direction = CHARSET_RIGHT_TO_LEFT; | |
705 else | |
706 signal_simple_error ("Invalid value for 'direction", value); | |
707 } | |
708 | |
709 else if (EQ (keyword, Qfinal)) | |
710 { | |
711 CHECK_CHAR_COERCE_INT (value); | |
712 final = XCHAR (value); | |
713 if (final < '0' || final > '~') | |
714 signal_simple_error ("Invalid value for 'final", value); | |
715 } | |
716 | |
717 else if (EQ (keyword, Qccl_program)) | |
718 { | |
719 CHECK_VECTOR (value); | |
720 ccl_program = value; | |
721 } | |
722 | |
723 else | |
724 signal_simple_error ("Unrecognized property", keyword); | |
725 } | |
726 | |
727 if (!final) | |
728 error ("'final must be specified"); | |
729 if (dimension == 2 && final > 0x5F) | |
730 signal_simple_error | |
731 ("Final must be in the range 0x30 - 0x5F for dimension == 2", | |
732 make_char (final)); | |
733 | |
734 if (dimension == 1) | |
735 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96; | |
736 else | |
737 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96; | |
738 | |
739 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) || | |
740 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT))) | |
741 error | |
742 ("Character set already defined for this DIMENSION/CHARS/FINAL combo"); | |
743 | |
744 lb = get_unallocated_leading_byte (dimension); | |
745 | |
746 if (NILP (doc_string)) | |
747 doc_string = build_string (""); | |
748 | |
749 if (NILP (registry)) | |
750 registry = build_string (""); | |
751 | |
752 if (columns == -1) | |
753 columns = dimension; | |
754 charset = make_charset (name, lb, dimension + 2, type, columns, graphic, | |
755 final, direction, doc_string, registry); | |
756 if (!NILP (ccl_program)) | |
757 XCHARSET_CCL_PROGRAM (charset) = ccl_program; | |
758 return charset; | |
759 } | |
760 | |
761 DEFUN ("make-reverse-direction-charset", | |
762 Fmake_reverse_direction_charset, 2, 2, 0, /* | |
763 Make a charset equivalent to CHARSET but which goes in the opposite direction. | |
764 NEW-NAME is the name of the new charset. Return the new charset. | |
765 */ | |
766 (charset, new_name)) | |
767 { | |
768 Lisp_Object new_charset = Qnil; | |
769 int lb, dimension, columns, graphic, final; | |
770 int direction, type; | |
771 Lisp_Object registry, doc_string; | |
772 struct Lisp_Charset *cs; | |
773 | |
774 charset = Fget_charset (charset); | |
775 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset))) | |
776 signal_simple_error ("Charset already has reverse-direction charset", | |
777 charset); | |
778 | |
779 CHECK_SYMBOL (new_name); | |
780 if (!NILP (Ffind_charset (new_name))) | |
781 signal_simple_error ("Cannot redefine existing charset", new_name); | |
782 | |
783 cs = XCHARSET (charset); | |
784 | |
785 type = CHARSET_TYPE (cs); | |
786 columns = CHARSET_COLUMNS (cs); | |
787 dimension = CHARSET_DIMENSION (cs); | |
788 lb = get_unallocated_leading_byte (dimension); | |
789 | |
790 graphic = CHARSET_GRAPHIC (cs); | |
791 final = CHARSET_FINAL (cs); | |
792 direction = CHARSET_RIGHT_TO_LEFT; | |
793 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT) | |
794 direction = CHARSET_LEFT_TO_RIGHT; | |
795 doc_string = CHARSET_DOC_STRING (cs); | |
796 registry = CHARSET_REGISTRY (cs); | |
797 | |
798 new_charset = make_charset (new_name, lb, dimension + 2, type, columns, | |
799 graphic, final, direction, doc_string, registry); | |
800 | |
801 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset; | |
802 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset; | |
803 | |
804 return new_charset; | |
805 } | |
806 | |
807 /* #### The defsubr for this is commented out at the moment but no | |
808 reason why is given. */ | |
809 #if 0 | |
810 DEFUN ("charset-reverse-direction-charset", | |
811 Fcharset_reverse_direction_charset, 1, 1, 0, /* | |
812 Return the reverse-direction charset parallel to CHARSET, if any. | |
813 This is the charset with the same properties (in particular, the same | |
814 dimension, number of characters per dimension, and final byte) as | |
815 CHARSET but whose characters are displayed in the opposite direction. | |
816 */ | |
817 (charset)) | |
818 { | |
819 charset = Fget_charset (charset); | |
820 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset); | |
821 } | |
822 #endif | |
823 | |
824 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /* | |
825 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION. | |
826 If DIRECTION is omitted, both directions will be checked (left-to-right | |
827 will be returned if character sets exist for both directions). | |
828 */ | |
829 (dimension, chars, final, direction)) | |
830 { | |
831 int dm, ch, fi, di = -1; | |
832 int type; | |
833 Lisp_Object obj = Qnil; | |
834 | |
835 CHECK_INT (dimension); | |
836 dm = XINT (dimension); | |
837 if (dm < 1 || dm > 2) | |
838 signal_simple_error ("Invalid value for DIMENSION", dimension); | |
839 | |
840 CHECK_INT (chars); | |
841 ch = XINT (chars); | |
842 if (ch != 94 && ch != 96) | |
843 signal_simple_error ("Invalid value for CHARS", chars); | |
844 | |
845 CHECK_CHAR_COERCE_INT (final); | |
846 fi = XCHAR (final); | |
847 if (fi < '0' || fi > '~') | |
848 signal_simple_error ("Invalid value for FINAL", final); | |
849 | |
850 if (EQ (direction, Ql2r)) | |
851 di = CHARSET_LEFT_TO_RIGHT; | |
852 else if (EQ (direction, Qr2l)) | |
853 di = CHARSET_RIGHT_TO_LEFT; | |
854 else if (!NILP (direction)) | |
855 signal_simple_error ("Invalid value for DIRECTION", direction); | |
856 | |
857 if (dm == 2 && fi > 0x5F) | |
858 signal_simple_error | |
859 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final); | |
860 | |
861 if (dm == 1) | |
862 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96; | |
863 else | |
864 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96; | |
865 | |
866 if (di == -1) | |
867 { | |
868 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT); | |
869 if (NILP (obj)) | |
870 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT); | |
871 } | |
872 else | |
873 obj = CHARSET_BY_ATTRIBUTES (type, fi, di); | |
874 | |
875 if (CHARSETP (obj)) | |
876 return XCHARSET_NAME (obj); | |
877 return obj; | |
878 } | |
879 | |
880 DEFUN ("charset-doc-string", Fcharset_doc_string, 1, 1, 0, /* | |
881 Return doc string of CHARSET. | |
882 */ | |
883 (charset)) | |
884 { | |
885 return XCHARSET_DOC_STRING (Fget_charset (charset)); | |
886 } | |
887 | |
888 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /* | |
889 Return dimension of CHARSET. | |
890 */ | |
891 (charset)) | |
892 { | |
893 return make_int (XCHARSET_DIMENSION (Fget_charset (charset))); | |
894 } | |
895 | |
896 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /* | |
897 Return property PROP of CHARSET. | |
898 Recognized properties are those listed in `make-charset', as well as | |
899 'name and 'doc-string. | |
900 */ | |
901 (charset, prop)) | |
902 { | |
903 struct Lisp_Charset *cs; | |
904 | |
905 charset = Fget_charset (charset); | |
906 cs = XCHARSET (charset); | |
907 | |
908 CHECK_SYMBOL (prop); | |
909 if (EQ (prop, Qname)) return CHARSET_NAME (cs); | |
910 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs); | |
911 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs)); | |
912 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs)); | |
913 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs)); | |
914 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs)); | |
915 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs)); | |
916 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs); | |
917 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs); | |
918 if (EQ (prop, Qdirection)) | |
919 return (CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l); | |
920 if (EQ (prop, Qreverse_direction_charset)) | |
921 { | |
922 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs); | |
923 if (NILP (obj)) | |
924 return Qnil; | |
925 else | |
926 return XCHARSET_NAME (obj); | |
927 } | |
928 signal_simple_error ("Unrecognized charset property name", prop); | |
929 return Qnil; /* not reached */ | |
930 } | |
931 | |
932 /* #### We need to figure out which properties we really want to | |
933 allow to be set. */ | |
934 | |
935 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /* | |
936 Set the 'ccl-program property of CHARSET to CCL-PROGRAM. | |
937 */ | |
938 (charset, ccl_program)) | |
939 { | |
940 charset = Fget_charset (charset); | |
941 CHECK_VECTOR (ccl_program); | |
942 XCHARSET_CCL_PROGRAM (charset) = ccl_program; | |
943 return Qnil; | |
944 } | |
945 | |
946 static void | |
947 invalidate_charset_font_caches (Lisp_Object charset) | |
948 { | |
949 /* Invalidate font cache entries for charset on all devices. */ | |
950 Lisp_Object devcons, concons, hashtab; | |
951 DEVICE_LOOP_NO_BREAK (devcons, concons) | |
952 { | |
953 struct device *d = XDEVICE (XCAR (devcons)); | |
954 hashtab = Fgethash (charset, d->charset_font_cache, Qunbound); | |
955 if (!UNBOUNDP (hashtab)) | |
956 Fclrhash (hashtab); | |
957 } | |
958 } | |
959 | |
960 /* Japanese folks may want to (set-charset-registry 'ascii "jisx0201") */ | |
961 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /* | |
962 Set the 'registry property of CHARSET to REGISTRY. | |
963 */ | |
964 (charset, registry)) | |
965 { | |
966 charset = Fget_charset (charset); | |
967 CHECK_STRING (registry); | |
968 XCHARSET_REGISTRY (charset) = registry; | |
969 invalidate_charset_font_caches (charset); | |
970 face_property_was_changed (Vdefault_face, Qfont, Qglobal); | |
971 return Qnil; | |
972 } | |
973 | |
974 | |
975 /************************************************************************/ | |
976 /* Lisp primitives for working with characters */ | |
977 /************************************************************************/ | |
978 | |
979 DEFUN ("make-char", Fmake_char, 2, 3, 0, /* | |
980 Make a multi-byte character from CHARSET and octets ARG1 and ARG2. | |
981 */ | |
982 (charset, arg1, arg2)) | |
983 { | |
984 struct Lisp_Charset *cs; | |
985 int a1, a2; | |
986 int lowlim, highlim; | |
987 | |
988 charset = Fget_charset (charset); | |
989 cs = XCHARSET (charset); | |
990 | |
991 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127; | |
992 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31; | |
993 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126; | |
994 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127; | |
995 | |
996 CHECK_INT (arg1); | |
997 a1 = XINT (arg1); | |
998 if (a1 < lowlim || a1 > highlim) | |
999 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim)); | |
1000 | |
1001 if (CHARSET_DIMENSION (cs) == 1) | |
1002 { | |
1003 if (!NILP (arg2)) | |
1004 signal_simple_error | |
1005 ("Charset is of dimension one; second octet must be nil", arg2); | |
1006 return make_char (MAKE_CHAR (charset, a1, 0)); | |
1007 } | |
1008 | |
1009 CHECK_INT (arg2); | |
1010 a2 = XINT (arg2); | |
1011 if (a2 < lowlim || a2 > highlim) | |
1012 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim)); | |
1013 | |
1014 return make_char (MAKE_CHAR (charset, a1, a2)); | |
1015 } | |
1016 | |
1017 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /* | |
1018 Return the character set of char CH. | |
1019 */ | |
1020 (ch)) | |
1021 { | |
1022 CHECK_CHAR_COERCE_INT (ch); | |
1023 | |
1024 return XCHARSET_NAME (CHARSET_BY_LEADING_BYTE | |
1025 (CHAR_LEADING_BYTE (XCHAR (ch)))); | |
1026 } | |
1027 | |
1028 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /* | |
1029 Return the octet numbered N (should be 0 or 1) of char CH. | |
1030 N defaults to 0 if omitted. | |
1031 */ | |
1032 (ch, n)) | |
1033 { | |
1034 Lisp_Object charset; | |
1035 int c1, c2; | |
1036 | |
1037 CHECK_CHAR_COERCE_INT (ch); | |
1038 if (NILP (n)) | |
1039 n = Qzero; | |
1040 else | |
1041 { | |
1042 CHECK_INT (n); | |
1043 if (XINT (n) != 0 && XINT (n) != 1) | |
1044 signal_simple_error ("Octet number must be 0 or 1", n); | |
1045 } | |
1046 BREAKUP_CHAR (XCHAR (ch), charset, c1, c2); | |
1047 if (XINT (n) == 0) | |
1048 return make_int (c1); | |
1049 else | |
1050 return make_int (c2); | |
1051 } | |
1052 | |
1053 | |
1054 /************************************************************************/ | |
1055 /* composite character functions */ | |
1056 /************************************************************************/ | |
1057 | |
1058 Emchar | |
1059 lookup_composite_char (Bufbyte *str, int len) | |
1060 { | |
1061 Lisp_Object lispstr = make_string (str, len); | |
1062 Lisp_Object ch = Fgethash (lispstr, | |
1063 Vcomposite_char_string2char_hashtable, | |
1064 Qunbound); | |
1065 Emchar emch; | |
1066 | |
1067 if (UNBOUNDP (ch)) | |
1068 { | |
1069 if (composite_char_row_next >= 128) | |
1070 signal_simple_error ("No more composite chars available", lispstr); | |
1071 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next, | |
1072 composite_char_col_next); | |
1073 Fputhash (make_char (emch), lispstr, | |
1074 Vcomposite_char_char2string_hashtable); | |
1075 Fputhash (lispstr, make_char (emch), | |
1076 Vcomposite_char_string2char_hashtable); | |
1077 composite_char_col_next++; | |
1078 if (composite_char_col_next >= 128) | |
1079 { | |
1080 composite_char_col_next = 32; | |
1081 composite_char_row_next++; | |
1082 } | |
1083 } | |
1084 else | |
1085 emch = XCHAR (ch); | |
1086 return emch; | |
1087 } | |
1088 | |
1089 Lisp_Object | |
1090 composite_char_string (Emchar ch) | |
1091 { | |
1092 Lisp_Object str = Fgethash (make_char (ch), | |
1093 Vcomposite_char_char2string_hashtable, | |
1094 Qunbound); | |
1095 assert (!UNBOUNDP (str)); | |
1096 return str; | |
1097 } | |
1098 | |
1099 DEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /* | |
1100 Convert a string into a single composite character. | |
1101 The character is the result of overstriking all the characters in | |
1102 the string. | |
1103 */ | |
1104 (string)) | |
1105 { | |
1106 CHECK_STRING (string); | |
1107 return make_char (lookup_composite_char (XSTRING_DATA (string), | |
1108 XSTRING_LENGTH (string))); | |
1109 } | |
1110 | |
1111 DEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /* | |
1112 Return a string of the characters comprising a composite character. | |
1113 */ | |
1114 (ch)) | |
1115 { | |
1116 Emchar emch; | |
1117 | |
1118 CHECK_CHAR (ch); | |
1119 emch = XCHAR (ch); | |
1120 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE) | |
1121 signal_simple_error ("Must be composite char", ch); | |
1122 return composite_char_string (emch); | |
1123 } | |
1124 | |
1125 | |
1126 /************************************************************************/ | |
1127 /* initialization */ | |
1128 /************************************************************************/ | |
1129 | |
1130 void | |
1131 syms_of_mule_charset (void) | |
1132 { | |
1133 DEFSUBR (Fcharsetp); | |
1134 DEFSUBR (Ffind_charset); | |
1135 DEFSUBR (Fget_charset); | |
1136 DEFSUBR (Fcharset_list); | |
1137 DEFSUBR (Fcharset_name); | |
1138 DEFSUBR (Fmake_charset); | |
1139 DEFSUBR (Fmake_reverse_direction_charset); | |
1140 /* DEFSUBR (Freverse_direction_charset); */ | |
1141 DEFSUBR (Fcharset_from_attributes); | |
1142 DEFSUBR (Fcharset_doc_string); | |
1143 DEFSUBR (Fcharset_dimension); | |
1144 DEFSUBR (Fcharset_property); | |
1145 DEFSUBR (Fset_charset_ccl_program); | |
1146 DEFSUBR (Fset_charset_registry); | |
1147 | |
1148 DEFSUBR (Fmake_char); | |
1149 DEFSUBR (Fchar_charset); | |
1150 DEFSUBR (Fchar_octet); | |
1151 | |
1152 DEFSUBR (Fmake_composite_char); | |
1153 DEFSUBR (Fcomposite_char_string); | |
1154 | |
1155 defsymbol (&Qcharsetp, "charsetp"); | |
1156 defsymbol (&Qregistry, "registry"); | |
1157 defsymbol (&Qfinal, "final"); | |
1158 defsymbol (&Qgraphic, "graphic"); | |
1159 defsymbol (&Qdirection, "direction"); | |
1160 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset"); | |
1161 defsymbol (&Qccl_program, "ccl-program"); | |
1162 | |
1163 defsymbol (&Ql2r, "l2r"); | |
1164 defsymbol (&Qr2l, "r2l"); | |
1165 | |
1166 /* Charsets */ | |
1167 defsymbol (&Qascii, "ascii"); | |
1168 defsymbol (&Qcontrol_1, "control-1"); | |
1169 defsymbol (&Qlatin_1, "latin-1"); | |
1170 defsymbol (&Qlatin_2, "latin-2"); | |
1171 defsymbol (&Qlatin_3, "latin-3"); | |
1172 defsymbol (&Qlatin_4, "latin-4"); | |
1173 defsymbol (&Qcyrillic, "cyrillic"); | |
1174 defsymbol (&Qarabic, "arabic"); | |
1175 defsymbol (&Qgreek, "greek"); | |
1176 defsymbol (&Qhebrew, "hebrew"); | |
1177 defsymbol (&Qlatin_5, "latin-5"); | |
1178 defsymbol (&Qthai, "thai"); | |
1179 | |
1180 defsymbol (&Qjapanese_jisx0201_kana, "japanese-jisx0201-kana"); | |
1181 defsymbol (&Qjapanese_jisx0201_roman, "japanese-jisx0201-roman"); | |
1182 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978"); | |
1183 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208"); | |
1184 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212"); | |
1185 | |
1186 defsymbol (&Qchinese_gb, "chinese-gb"); | |
1187 defsymbol (&Qchinese_big5_1, "chinese-big5-1"); | |
1188 defsymbol (&Qchinese_big5_2, "chinese-big5-2"); | |
1189 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1"); | |
1190 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2"); | |
1191 | |
1192 defsymbol (&Qkorean_ksc5601, "korean-ksc5601"); | |
1193 defsymbol (&Qcomposite, "composite"); | |
1194 } | |
1195 | |
1196 void | |
1197 vars_of_mule_charset (void) | |
1198 { | |
1199 int i, j, k; | |
1200 | |
1201 for (i = 0; i < 128; i++) | |
1202 charset_by_leading_byte[i] = Qnil; | |
1203 | |
1204 for (i = 0; i < 4; i++) | |
1205 for (j = 0; j < 128; j++) | |
1206 for (k = 0; k < 2; k ++) | |
1207 charset_by_attributes[i][j][k] = Qnil; | |
1208 | |
1209 /* Now done at compile time | |
1210 for (i = 0; i < 128; i++) | |
1211 rep_bytes_by_first_byte[i] = 1; | |
1212 */ | |
1213 | |
1214 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1; | |
1215 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2; | |
1216 } | |
1217 | |
1218 void | |
1219 complex_vars_of_mule_charset (void) | |
1220 { | |
1221 staticpro (&Vcharset_hashtable); | |
1222 Vcharset_hashtable = make_lisp_hashtable (50, HASHTABLE_NONWEAK, | |
1223 HASHTABLE_EQ); | |
1224 | |
1225 /* Predefined character sets. We store them into variables for | |
1226 ease of access. */ | |
1227 | |
1228 Vcharset_ascii = | |
1229 make_charset (Qascii, LEADING_BYTE_ASCII, 1, | |
1230 CHARSET_TYPE_94, 1, 0, 'B', | |
1231 CHARSET_LEFT_TO_RIGHT, | |
1232 build_string ("ASCII"), | |
1233 build_string ("ISO8859-1")); | |
1234 Vcharset_control_1 = | |
1235 make_charset (Qcontrol_1, LEADING_BYTE_CONTROL_1, 2, | |
1236 CHARSET_TYPE_94, 1, 0, 0, | |
1237 CHARSET_LEFT_TO_RIGHT, | |
1238 build_string ("Control characters"), | |
1239 build_string ("")); | |
1240 Vcharset_latin_1 = | |
1241 make_charset (Qlatin_1, LEADING_BYTE_LATIN_1, 2, | |
1242 CHARSET_TYPE_96, 1, 1, 'A', | |
1243 CHARSET_LEFT_TO_RIGHT, | |
1244 build_string ("Latin-1"), | |
1245 build_string ("ISO8859-1")); | |
1246 Vcharset_latin_2 = | |
1247 make_charset (Qlatin_2, LEADING_BYTE_LATIN_2, 2, | |
1248 CHARSET_TYPE_96, 1, 1, 'B', | |
1249 CHARSET_LEFT_TO_RIGHT, | |
1250 build_string ("Latin-2"), | |
1251 build_string ("ISO8859-2")); | |
1252 Vcharset_latin_3 = | |
1253 make_charset (Qlatin_3, LEADING_BYTE_LATIN_3, 2, | |
1254 CHARSET_TYPE_96, 1, 1, 'C', | |
1255 CHARSET_LEFT_TO_RIGHT, | |
1256 build_string ("Latin-3"), | |
1257 build_string ("ISO8859-3")); | |
1258 Vcharset_latin_4 = | |
1259 make_charset (Qlatin_4, LEADING_BYTE_LATIN_4, 2, | |
1260 CHARSET_TYPE_96, 1, 1, 'D', | |
1261 CHARSET_LEFT_TO_RIGHT, | |
1262 build_string ("Latin-4"), | |
1263 build_string ("ISO8859-4")); | |
1264 Vcharset_cyrillic = | |
1265 make_charset (Qcyrillic, LEADING_BYTE_CYRILLIC, 2, | |
1266 CHARSET_TYPE_96, 1, 1, 'L', | |
1267 CHARSET_LEFT_TO_RIGHT, | |
1268 build_string ("Cyrillic"), | |
1269 build_string ("ISO8859-5")); | |
1270 Vcharset_arabic = | |
1271 make_charset (Qarabic, LEADING_BYTE_ARABIC, 2, | |
1272 CHARSET_TYPE_96, 1, 1, 'G', | |
1273 CHARSET_RIGHT_TO_LEFT, | |
1274 build_string ("Arabic"), | |
1275 build_string ("ISO8859-6")); | |
1276 Vcharset_greek = | |
1277 make_charset (Qgreek, LEADING_BYTE_GREEK, 2, | |
1278 CHARSET_TYPE_96, 1, 1, 'F', | |
1279 CHARSET_LEFT_TO_RIGHT, | |
1280 build_string ("Greek"), | |
1281 build_string ("ISO8859-7")); | |
1282 Vcharset_hebrew = | |
1283 make_charset (Qhebrew, LEADING_BYTE_HEBREW, 2, | |
1284 CHARSET_TYPE_96, 1, 1, 'H', | |
1285 CHARSET_RIGHT_TO_LEFT, | |
1286 build_string ("Hebrew"), | |
1287 build_string ("ISO8859-8")); | |
1288 Vcharset_latin_5 = | |
1289 make_charset (Qlatin_5, LEADING_BYTE_LATIN_5, 2, | |
1290 CHARSET_TYPE_96, 1, 1, 'M', | |
1291 CHARSET_LEFT_TO_RIGHT, | |
1292 build_string ("Latin-5"), | |
1293 build_string ("ISO8859-9")); | |
1294 Vcharset_thai = | |
1295 make_charset (Qthai, LEADING_BYTE_THAI, 2, | |
1296 CHARSET_TYPE_96, 1, 1, 'T', | |
1297 CHARSET_LEFT_TO_RIGHT, | |
1298 build_string ("Thai"), | |
1299 build_string ("TIS620")); | |
1300 | |
1301 /* Japanese */ | |
1302 Vcharset_japanese_jisx0201_kana = | |
1303 make_charset (Qjapanese_jisx0201_kana, | |
1304 LEADING_BYTE_JAPANESE_JISX0201_KANA, 2, | |
1305 CHARSET_TYPE_94, 1, 1, 'I', | |
1306 CHARSET_LEFT_TO_RIGHT, | |
1307 build_string ("Japanese Katakana"), | |
1308 build_string ("JISX0201.1976")); | |
1309 Vcharset_japanese_jisx0201_roman = | |
1310 make_charset (Qjapanese_jisx0201_roman, | |
1311 LEADING_BYTE_JAPANESE_JISX0201_ROMAN, 2, | |
1312 CHARSET_TYPE_94, 1, 0, 'J', | |
1313 CHARSET_LEFT_TO_RIGHT, | |
1314 build_string ("Japanese Roman"), | |
1315 build_string ("JISX0201.1976")); | |
1316 Vcharset_japanese_jisx0208_1978 = | |
1317 make_charset (Qjapanese_jisx0208_1978, | |
1318 LEADING_BYTE_JAPANESE_JISX0208_1978, 3, | |
1319 CHARSET_TYPE_94X94, 2, 0, '@', | |
1320 CHARSET_LEFT_TO_RIGHT, | |
1321 build_string ("Japanese Old"), | |
1322 build_string ("\\(JISX0208\\|jisc6226\\).1978")); | |
1323 Vcharset_japanese_jisx0208 = | |
1324 make_charset (Qjapanese_jisx0208, | |
1325 LEADING_BYTE_JAPANESE_JISX0208, 3, | |
1326 CHARSET_TYPE_94X94, 2, 0, 'B', | |
1327 CHARSET_LEFT_TO_RIGHT, | |
1328 build_string ("Japanese"), | |
1329 build_string ("JISX0208.\\(1983\\|1990\\)")); | |
1330 Vcharset_japanese_jisx0212 = | |
1331 make_charset (Qjapanese_jisx0212, | |
1332 LEADING_BYTE_JAPANESE_JISX0212, 3, | |
1333 CHARSET_TYPE_94X94, 2, 0, 'D', | |
1334 CHARSET_LEFT_TO_RIGHT, | |
1335 build_string ("Japanese Supplement"), | |
1336 build_string ("JISX0212")); | |
1337 | |
1338 /* Chinese */ | |
1339 Vcharset_chinese_gb = | |
1340 make_charset (Qchinese_gb, LEADING_BYTE_CHINESE_GB, 3, | |
1341 CHARSET_TYPE_94X94, 2, 0, 'A', | |
1342 CHARSET_LEFT_TO_RIGHT, | |
1343 build_string ("Chinese GB"), | |
1344 build_string ("GB2312")); | |
1345 #define CHINESE_CNS_PLANE_RE(n) "CNS11643[.-]\\(.*[.-]\\)?" n "$" | |
1346 Vcharset_chinese_cns11643_1 = | |
1347 make_charset (Qchinese_cns11643_1, | |
1348 LEADING_BYTE_CHINESE_CNS11643_1, 3, | |
1349 CHARSET_TYPE_94X94, 2, 0, 'G', | |
1350 CHARSET_LEFT_TO_RIGHT, | |
1351 build_string ("Chinese CNS Plane 1"), | |
1352 build_string (CHINESE_CNS_PLANE_RE("1"))); | |
1353 Vcharset_chinese_cns11643_2 = | |
1354 make_charset (Qchinese_cns11643_2, | |
1355 LEADING_BYTE_CHINESE_CNS11643_2, 3, | |
1356 CHARSET_TYPE_94X94, 2, 0, 'H', | |
1357 CHARSET_LEFT_TO_RIGHT, | |
1358 build_string ("Chinese CNS Plane 2"), | |
1359 build_string (CHINESE_CNS_PLANE_RE("2"))); | |
1360 Vcharset_chinese_big5_1 = | |
1361 make_charset (Qchinese_big5_1, LEADING_BYTE_CHINESE_BIG5_1, 3, | |
1362 CHARSET_TYPE_94X94, 2, 0, '0', | |
1363 CHARSET_LEFT_TO_RIGHT, | |
1364 build_string ("Chinese Big5 Level 1"), | |
1365 build_string ("Big5")); | |
1366 Vcharset_chinese_big5_2 = | |
1367 make_charset (Qchinese_big5_2, LEADING_BYTE_CHINESE_BIG5_2, 3, | |
1368 CHARSET_TYPE_94X94, 2, 0, '1', | |
1369 CHARSET_LEFT_TO_RIGHT, | |
1370 build_string ("Chinese Big5 Level 2"), | |
1371 build_string ("Big5")); | |
1372 | |
1373 Vcharset_korean_ksc5601 = | |
1374 make_charset (Qkorean_ksc5601, LEADING_BYTE_KOREAN_KSC5601, 3, | |
1375 CHARSET_TYPE_94X94, 2, 0, 'C', | |
1376 CHARSET_LEFT_TO_RIGHT, | |
1377 build_string ("Korean"), | |
1378 build_string ("KSC5601")); | |
1379 /* #### For simplicity, we put composite chars into a 96x96 charset. | |
1380 This is going to lead to problems because you can run out of | |
1381 room, esp. as we don't yet recycle numbers. */ | |
1382 Vcharset_composite = | |
1383 make_charset (Qcomposite, LEADING_BYTE_COMPOSITE, 3, | |
1384 CHARSET_TYPE_96X96, 2, 0, 0, | |
1385 CHARSET_LEFT_TO_RIGHT, | |
1386 build_string ("Composite characters"), | |
1387 build_string ("")); | |
1388 | |
1389 composite_char_row_next = 32; | |
1390 composite_char_col_next = 32; | |
1391 | |
1392 Vcomposite_char_string2char_hashtable = | |
1393 make_lisp_hashtable (500, HASHTABLE_NONWEAK, | |
1394 HASHTABLE_EQUAL); | |
1395 Vcomposite_char_char2string_hashtable = | |
1396 make_lisp_hashtable (500, HASHTABLE_NONWEAK, | |
1397 HASHTABLE_EQ); | |
1398 staticpro (&Vcomposite_char_string2char_hashtable); | |
1399 staticpro (&Vcomposite_char_char2string_hashtable); | |
1400 | |
1401 /* Faux charsets used only for convenience in retrieving the | |
1402 number of rep bytes associated with a leading byte. */ | |
1403 | |
1404 make_charset (Qnil, PRE_LEADING_BYTE_PRIVATE_1, 3, 0, 0, 0, 0, | |
1405 0, Qnil, Qnil); | |
1406 make_charset (Qnil, PRE_LEADING_BYTE_PRIVATE_2, 4, 0, 0, 0, 0, | |
1407 0, Qnil, Qnil); | |
1408 } |