70
|
1 /* Functions to handle multilingual characters.
|
|
2 Copyright (C) 1992, 1995 Free Software Foundation, Inc.
|
|
3 Copyright (C) 1995 Sun Microsystems, Inc.
|
|
4
|
|
5 This file is part of XEmacs.
|
|
6
|
|
7 XEmacs is free software; you can redistribute it and/or modify it
|
|
8 under the terms of the GNU General Public License as published by the
|
|
9 Free Software Foundation; either version 2, or (at your option) any
|
|
10 later version.
|
|
11
|
|
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
|
|
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
15 for more details.
|
|
16
|
|
17 You should have received a copy of the GNU General Public License
|
|
18 along with XEmacs; see the file COPYING. If not, write to
|
|
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
20 Boston, MA 02111-1307, USA. */
|
|
21
|
|
22 /* Synched up with: Mule 2.3. Not in FSF. */
|
|
23
|
|
24 /* Rewritten by Ben Wing <wing@666.com>. */
|
|
25
|
|
26 #include <config.h>
|
|
27 #include "lisp.h"
|
|
28
|
|
29 #include "buffer.h"
|
|
30 #include "chartab.h"
|
|
31 #include "elhash.h"
|
|
32 #include "lstream.h"
|
|
33 #include "device.h"
|
|
34 #include "faces.h"
|
|
35
|
|
36 /* The various pre-defined charsets. */
|
|
37
|
|
38 Lisp_Object Vcharset_ascii;
|
|
39 Lisp_Object Vcharset_control_1;
|
74
|
40 Lisp_Object Vcharset_latin_iso8859_1;
|
|
41 Lisp_Object Vcharset_latin_iso8859_2;
|
|
42 Lisp_Object Vcharset_latin_iso8859_3;
|
|
43 Lisp_Object Vcharset_latin_iso8859_4;
|
|
44 Lisp_Object Vcharset_cyrillic_iso8859_5;
|
|
45 Lisp_Object Vcharset_arabic_iso8859_6;
|
|
46 Lisp_Object Vcharset_greek_iso8859_7;
|
|
47 Lisp_Object Vcharset_hebrew_iso8859_8;
|
|
48 Lisp_Object Vcharset_latin_iso8859_9;
|
|
49 Lisp_Object Vcharset_thai_tis620;
|
|
50 Lisp_Object Vcharset_katakana_jisx0201;
|
|
51 Lisp_Object Vcharset_latin_jisx0201;
|
70
|
52 Lisp_Object Vcharset_japanese_jisx0208_1978;
|
|
53 Lisp_Object Vcharset_japanese_jisx0208;
|
|
54 Lisp_Object Vcharset_japanese_jisx0212;
|
74
|
55 Lisp_Object Vcharset_chinese_gb2312;
|
70
|
56 Lisp_Object Vcharset_chinese_big5_1;
|
|
57 Lisp_Object Vcharset_chinese_big5_2;
|
|
58 Lisp_Object Vcharset_chinese_cns11643_1;
|
|
59 Lisp_Object Vcharset_chinese_cns11643_2;
|
|
60 Lisp_Object Vcharset_korean_ksc5601;
|
|
61 Lisp_Object Vcharset_composite;
|
|
62
|
|
63 /* Hashtables for composite chars. One maps string representing
|
|
64 composed chars to their equivalent chars; one goes the
|
|
65 other way. */
|
|
66 Lisp_Object Vcomposite_char_char2string_hashtable;
|
|
67 Lisp_Object Vcomposite_char_string2char_hashtable;
|
|
68
|
|
69 /* Table of charsets indexed by leading byte. */
|
|
70 Lisp_Object charset_by_leading_byte[128];
|
|
71
|
|
72 /* Table of charsets indexed by type/final-byte/direction. */
|
|
73 Lisp_Object charset_by_attributes[4][128][2];
|
|
74
|
|
75 static int composite_char_row_next;
|
|
76 static int composite_char_col_next;
|
|
77
|
|
78 /* Table of number of bytes in the string representation of a character
|
|
79 indexed by the first byte of that representation.
|
|
80
|
|
81 This value can be derived other ways -- e.g. something like
|
|
82
|
|
83 (BYTE_ASCII_P (first_byte) ? 1 :
|
|
84 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (first_byte)))
|
|
85
|
|
86 but it's faster this way. */
|
|
87
|
|
88 Bytecount rep_bytes_by_first_byte[0xA0] =
|
|
89 { /* 16 x 8 ones for ASCII */
|
|
90 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
91 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
92 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
93 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
94 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
95 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
96 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
97 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
98 /* 1 x 8 for Dimension-1 official Mule charsets */
|
|
99 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
|
100 /* 0x90 - 0x9d are Dimension-2 official */
|
|
101 /* 0x9e is Dimension-1 private */
|
|
102 /* 0x9f is Dimension-2 private */
|
|
103 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4
|
|
104 };
|
|
105
|
|
106 Lisp_Object Qcharsetp;
|
|
107
|
|
108 /* Qdoc_string, Qdimension, Qchars defined in general.c */
|
|
109 Lisp_Object Qregistry, Qfinal, Qgraphic;
|
|
110 Lisp_Object Qdirection;
|
|
111 Lisp_Object Qreverse_direction_charset;
|
|
112 Lisp_Object Qccl_program;
|
|
113
|
74
|
114 Lisp_Object Qascii, Qcontrol_1,
|
70
|
115
|
74
|
116 Qlatin_iso8859_1,
|
|
117 Qlatin_iso8859_2,
|
|
118 Qlatin_iso8859_3,
|
|
119 Qlatin_iso8859_4,
|
|
120 Qcyrillic_iso8859_5,
|
|
121 Qarabic_iso8859_6,
|
|
122 Qgreek_iso8859_7,
|
|
123 Qhebrew_iso8859_8,
|
|
124 Qlatin_iso8859_9,
|
|
125
|
|
126 Qthai_tis620,
|
|
127
|
|
128 Qkatakana_jisx0201, Qlatin_jisx0201,
|
70
|
129 Qjapanese_jisx0208_1978,
|
|
130 Qjapanese_jisx0208,
|
|
131 Qjapanese_jisx0212,
|
|
132
|
74
|
133 Qchinese_gb2312,
|
|
134 Qchinese_big5_1, Qchinese_big5_2,
|
70
|
135 Qchinese_cns11643_1, Qchinese_cns11643_2,
|
|
136
|
|
137 Qkorean_ksc5601, Qcomposite;
|
|
138
|
|
139 Lisp_Object Ql2r, Qr2l;
|
|
140
|
|
141 Lisp_Object Vcharset_hashtable;
|
|
142
|
|
143 static Bufbyte next_allocated_1_byte_leading_byte;
|
|
144 static Bufbyte next_allocated_2_byte_leading_byte;
|
|
145
|
|
146 /* Composite characters are characters constructed by overstriking two
|
|
147 or more regular characters.
|
|
148
|
|
149 1) The old Mule implementation involves storing composite characters
|
|
150 in a buffer as a tag followed by all of the actual characters
|
|
151 used to make up the composite character. I think this is a bad
|
|
152 idea; it greatly complicates code that wants to handle strings
|
|
153 one character at a time because it has to deal with the possibility
|
|
154 of great big ungainly characters. It's much more reasonable to
|
|
155 simply store an index into a table of composite characters.
|
|
156
|
|
157 2) The current implementation only allows for 16,384 separate
|
|
158 composite characters over the lifetime of the XEmacs process.
|
|
159 This could become a potential problem if the user
|
|
160 edited lots of different files that use composite characters.
|
|
161 Due to FSF bogosity, increasing the number of allowable
|
|
162 composite characters under Mule would decrease the number
|
|
163 of possible faces that can exist. Mule already has shrunk
|
|
164 this to 2048, and further shrinkage would become uncomfortable.
|
|
165 No such problems exist in XEmacs.
|
|
166
|
|
167 Composite characters could be represented as 0x80 C1 C2 C3,
|
|
168 where each C[1-3] is in the range 0xA0 - 0xFF. This allows
|
|
169 for slightly under 2^20 (one million) composite characters
|
|
170 over the XEmacs process lifetime, and you only need to
|
|
171 increase the size of a Mule character from 19 to 21 bits.
|
|
172 Or you could use 0x80 C1 C2 C3 C4, allowing for about
|
|
173 85 million (slightly over 2^26) composite characters. */
|
|
174
|
|
175
|
|
176 /************************************************************************/
|
|
177 /* Basic Emchar functions */
|
|
178 /************************************************************************/
|
|
179
|
|
180 /* Convert a non-ASCII Mule character C into a one-character Mule-encoded
|
|
181 string in STR. Returns the number of bytes stored.
|
|
182 Do not call this directly. Use the macro set_charptr_emchar() instead.
|
|
183 */
|
|
184
|
|
185 Bytecount
|
|
186 non_ascii_set_charptr_emchar (Bufbyte *str, Emchar c)
|
|
187 {
|
|
188 Bufbyte *p;
|
|
189 Bufbyte lb;
|
|
190 int c1, c2;
|
|
191 Lisp_Object charset;
|
|
192
|
|
193 p = str;
|
|
194 BREAKUP_CHAR (c, charset, c1, c2);
|
|
195 lb = CHAR_LEADING_BYTE (c);
|
|
196 if (LEADING_BYTE_PRIVATE_P (lb))
|
|
197 *p++ = PRIVATE_LEADING_BYTE_PREFIX (lb);
|
|
198 *p++ = lb;
|
|
199 if (EQ (charset, Vcharset_control_1))
|
|
200 c1 += 0x20;
|
|
201 *p++ = c1 | 0x80;
|
|
202 if (c2)
|
|
203 *p++ = c2 | 0x80;
|
|
204
|
|
205 return (p - str);
|
|
206 }
|
|
207
|
|
208 /* Return the first character from a Mule-encoded string in STR,
|
|
209 assuming it's non-ASCII. Do not call this directly.
|
|
210 Use the macro charptr_emchar() instead. */
|
|
211
|
|
212 Emchar
|
|
213 non_ascii_charptr_emchar (CONST Bufbyte *str)
|
|
214 {
|
|
215 Bufbyte i0 = *str, i1, i2 = 0;
|
|
216 Lisp_Object charset;
|
|
217
|
|
218 if (i0 == LEADING_BYTE_CONTROL_1)
|
|
219 return (Emchar) (*++str - 0x20);
|
|
220
|
|
221 if (LEADING_BYTE_PREFIX_P (i0))
|
|
222 i0 = *++str;
|
|
223
|
|
224 i1 = *++str & 0x7F;
|
|
225
|
|
226 charset = CHARSET_BY_LEADING_BYTE (i0);
|
|
227 if (XCHARSET_DIMENSION (charset) == 2)
|
|
228 i2 = *++str & 0x7F;
|
|
229
|
|
230 return MAKE_CHAR (charset, i1, i2);
|
|
231 }
|
|
232
|
|
233 /* Return whether CH is a valid Emchar, assuming it's non-ASCII.
|
|
234 Do not call this directly. Use the macro valid_char_p() instead. */
|
|
235
|
|
236 int
|
|
237 non_ascii_valid_char_p (Emchar ch)
|
|
238 {
|
|
239 int f1, f2, f3;
|
|
240
|
|
241 /* Must have only lowest 19 bits set */
|
|
242 if (ch & ~0x7FFFF)
|
|
243 return 0;
|
|
244
|
|
245 f1 = CHAR_FIELD1 (ch);
|
|
246 f2 = CHAR_FIELD2 (ch);
|
|
247 f3 = CHAR_FIELD3 (ch);
|
|
248
|
|
249 if (f1 == 0)
|
|
250 {
|
|
251 Lisp_Object charset;
|
|
252
|
|
253 if (f2 < MIN_CHAR_FIELD2_OFFICIAL ||
|
|
254 (f2 > MAX_CHAR_FIELD2_OFFICIAL && f2 < MIN_CHAR_FIELD2_PRIVATE) ||
|
|
255 f2 > MAX_CHAR_FIELD2_PRIVATE)
|
|
256 return 0;
|
|
257 if (f3 < 0x20)
|
|
258 return 0;
|
|
259
|
|
260 if (f3 != 0x20 && f3 != 0x7F)
|
|
261 return 1;
|
|
262
|
|
263 /*
|
|
264 NOTE: This takes advantage of the fact that
|
|
265 FIELD2_TO_OFFICIAL_LEADING_BYTE and
|
|
266 FIELD2_TO_PRIVATE_LEADING_BYTE are the same.
|
|
267 */
|
|
268 charset = CHARSET_BY_LEADING_BYTE (f2 + FIELD2_TO_OFFICIAL_LEADING_BYTE);
|
|
269 return (XCHARSET_CHARS (charset) == 96);
|
|
270 }
|
|
271 else
|
|
272 {
|
|
273 Lisp_Object charset;
|
|
274
|
|
275 if (f1 < MIN_CHAR_FIELD1_OFFICIAL ||
|
|
276 (f1 > MAX_CHAR_FIELD1_OFFICIAL && f1 < MIN_CHAR_FIELD1_PRIVATE) ||
|
|
277 f1 > MAX_CHAR_FIELD1_PRIVATE)
|
|
278 return 0;
|
|
279 if (f2 < 0x20 || f3 < 0x20)
|
|
280 return 0;
|
|
281
|
|
282 if (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE == LEADING_BYTE_COMPOSITE)
|
|
283 {
|
|
284 if (UNBOUNDP (Fgethash (make_int (ch),
|
|
285 Vcomposite_char_char2string_hashtable,
|
|
286 Qunbound)))
|
|
287 return 0;
|
|
288 return 1;
|
|
289 }
|
|
290
|
|
291 if (f2 != 0x20 && f2 != 0x7F && f3 != 0x20 && f3 != 0x7F)
|
|
292 return 1;
|
|
293
|
|
294 if (f1 <= MAX_CHAR_FIELD1_OFFICIAL)
|
|
295 charset =
|
|
296 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_OFFICIAL_LEADING_BYTE);
|
|
297 else
|
|
298 charset =
|
|
299 CHARSET_BY_LEADING_BYTE (f1 + FIELD1_TO_PRIVATE_LEADING_BYTE);
|
|
300
|
|
301 return (XCHARSET_CHARS (charset) == 96);
|
|
302 }
|
|
303 }
|
|
304
|
|
305
|
|
306 /************************************************************************/
|
|
307 /* Basic string functions */
|
|
308 /************************************************************************/
|
|
309
|
|
310 /* Copy the character pointed to by PTR into STR, assuming it's
|
|
311 non-ASCII. Do not call this directly. Use the macro
|
|
312 charptr_copy_char() instead. */
|
|
313
|
|
314 Bytecount
|
|
315 non_ascii_charptr_copy_char (CONST Bufbyte *ptr, Bufbyte *str)
|
|
316 {
|
|
317 Bufbyte *strptr = str;
|
|
318 *strptr = *ptr++;
|
|
319 switch (REP_BYTES_BY_FIRST_BYTE (*strptr))
|
|
320 {
|
|
321 /* Notice fallthrough. */
|
|
322 case 4: *++strptr = *ptr++;
|
|
323 case 3: *++strptr = *ptr++;
|
|
324 case 2: *++strptr = *ptr;
|
|
325 break;
|
|
326 default:
|
|
327 abort ();
|
|
328 }
|
|
329 return strptr + 1 - str;
|
|
330 }
|
|
331
|
|
332
|
|
333 /************************************************************************/
|
|
334 /* streams of Emchars */
|
|
335 /************************************************************************/
|
|
336
|
|
337 /* Treat a stream as a stream of Emchar's rather than a stream of bytes.
|
|
338 The functions below are not meant to be called directly; use
|
|
339 the macros in insdel.h. */
|
|
340
|
|
341 Emchar
|
|
342 Lstream_get_emchar_1 (Lstream *stream, int ch)
|
|
343 {
|
|
344 Bufbyte str[MAX_EMCHAR_LEN];
|
|
345 Bufbyte *strptr = str;
|
|
346
|
|
347 str[0] = (Bufbyte) ch;
|
|
348 switch (REP_BYTES_BY_FIRST_BYTE (ch))
|
|
349 {
|
|
350 /* Notice fallthrough. */
|
|
351 case 4:
|
|
352 ch = Lstream_getc (stream);
|
|
353 assert (ch >= 0);
|
|
354 *++strptr = (Bufbyte) ch;
|
|
355 case 3:
|
|
356 ch = Lstream_getc (stream);
|
|
357 assert (ch >= 0);
|
|
358 *++strptr = (Bufbyte) ch;
|
|
359 case 2:
|
|
360 ch = Lstream_getc (stream);
|
|
361 assert (ch >= 0);
|
|
362 *++strptr = (Bufbyte) ch;
|
|
363 break;
|
|
364 default:
|
|
365 abort ();
|
|
366 }
|
|
367 return charptr_emchar (str);
|
|
368 }
|
|
369
|
|
370 int
|
|
371 Lstream_fput_emchar (Lstream *stream, Emchar ch)
|
|
372 {
|
|
373 Bufbyte str[MAX_EMCHAR_LEN];
|
|
374 Bytecount len = set_charptr_emchar (str, ch);
|
|
375 return Lstream_write (stream, str, len);
|
|
376 }
|
|
377
|
|
378 void
|
|
379 Lstream_funget_emchar (Lstream *stream, Emchar ch)
|
|
380 {
|
|
381 Bufbyte str[MAX_EMCHAR_LEN];
|
|
382 Bytecount len = set_charptr_emchar (str, ch);
|
|
383 Lstream_unread (stream, str, len);
|
|
384 }
|
|
385
|
|
386
|
|
387 /************************************************************************/
|
|
388 /* charset object */
|
|
389 /************************************************************************/
|
|
390
|
|
391 static Lisp_Object mark_charset (Lisp_Object, void (*) (Lisp_Object));
|
|
392 static void print_charset (Lisp_Object, Lisp_Object, int);
|
|
393 DEFINE_LRECORD_IMPLEMENTATION ("charset", charset,
|
|
394 mark_charset, print_charset, 0, 0, 0,
|
|
395 struct Lisp_Charset);
|
|
396
|
|
397 static Lisp_Object
|
|
398 mark_charset (Lisp_Object obj, void (*markobj) (Lisp_Object))
|
|
399 {
|
|
400 struct Lisp_Charset *cs = XCHARSET (obj);
|
|
401
|
|
402 (markobj) (cs->doc_string);
|
|
403 (markobj) (cs->registry);
|
|
404 (markobj) (cs->ccl_program);
|
|
405 return cs->name;
|
|
406 }
|
|
407
|
|
408 static void
|
|
409 print_charset (Lisp_Object obj, Lisp_Object printcharfun, int escapeflag)
|
|
410 {
|
|
411 struct Lisp_Charset *cs = XCHARSET (obj);
|
|
412 char buf[200];
|
|
413
|
|
414 if (print_readably)
|
|
415 error ("printing unreadable object #<charset %s 0x%x>",
|
|
416 string_data (XSYMBOL (CHARSET_NAME (cs))->name),
|
|
417 cs->header.uid);
|
|
418
|
|
419 write_c_string ("#<charset ", printcharfun);
|
|
420 print_internal (CHARSET_NAME (cs), printcharfun, 0);
|
|
421 write_c_string (" ", printcharfun);
|
|
422 print_internal (CHARSET_DOC_STRING (cs), printcharfun, 1);
|
|
423 sprintf (buf, " %s %s cols=%d g%d final='%c' reg=",
|
|
424 CHARSET_TYPE (cs) == CHARSET_TYPE_94 ? "94" :
|
|
425 CHARSET_TYPE (cs) == CHARSET_TYPE_96 ? "96" :
|
|
426 CHARSET_TYPE (cs) == CHARSET_TYPE_94X94 ? "94x94" :
|
|
427 "96x96",
|
|
428 CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? "l2r" : "r2l",
|
|
429 CHARSET_COLUMNS (cs),
|
|
430 CHARSET_GRAPHIC (cs),
|
|
431 CHARSET_FINAL (cs));
|
|
432 write_c_string (buf, printcharfun);
|
|
433 print_internal (CHARSET_REGISTRY (cs), printcharfun, 0);
|
|
434 sprintf (buf, " 0x%x>", cs->header.uid);
|
|
435 write_c_string (buf, printcharfun);
|
|
436 }
|
|
437
|
|
438 /* Make a new charset. */
|
|
439
|
|
440 static Lisp_Object
|
|
441 make_charset (Lisp_Object name, Bufbyte leading_byte, unsigned char rep_bytes,
|
|
442 unsigned char type, unsigned char columns, unsigned char graphic,
|
|
443 Bufbyte final, unsigned char direction, Lisp_Object doc,
|
|
444 Lisp_Object reg)
|
|
445 {
|
|
446 struct Lisp_Charset *cs;
|
|
447 Lisp_Object obj = Qnil;
|
|
448
|
|
449 cs = (struct Lisp_Charset *) alloc_lcrecord (sizeof (struct Lisp_Charset),
|
|
450 lrecord_charset);
|
|
451 XSETCHARSET (obj, cs);
|
|
452
|
|
453 CHARSET_NAME (cs) = name;
|
|
454 CHARSET_LEADING_BYTE (cs) = leading_byte;
|
|
455 CHARSET_REP_BYTES (cs) = rep_bytes;
|
|
456 CHARSET_DIRECTION (cs) = direction;
|
|
457 CHARSET_TYPE (cs) = type;
|
|
458 CHARSET_COLUMNS (cs) = columns;
|
|
459 CHARSET_GRAPHIC (cs) = graphic;
|
|
460 CHARSET_FINAL (cs) = final;
|
|
461 CHARSET_DOC_STRING (cs) = doc;
|
|
462 CHARSET_REGISTRY (cs) = reg;
|
|
463 CHARSET_CCL_PROGRAM (cs) = Qnil;
|
|
464 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = Qnil;
|
|
465
|
|
466 if (final)
|
|
467 {
|
|
468 /* some charsets do not have final characters. This includes
|
|
469 ASCII, Control-1, Composite, and the two faux private
|
|
470 charsets. */
|
|
471 assert (NILP (charset_by_attributes[type][final][direction]));
|
|
472 charset_by_attributes[type][final][direction] = obj;
|
|
473 }
|
|
474
|
|
475 assert (NILP (charset_by_leading_byte[leading_byte - 128]));
|
|
476 charset_by_leading_byte[leading_byte - 128] = obj;
|
|
477 if (leading_byte < 0xA0)
|
|
478 /* official leading byte */
|
|
479 rep_bytes_by_first_byte[leading_byte] = rep_bytes;
|
|
480
|
|
481 /* Some charsets are "faux" and don't have names or really exist at
|
|
482 all except in the leading-byte table. */
|
|
483 if (!NILP (name))
|
|
484 Fputhash (name, obj, Vcharset_hashtable);
|
|
485 return obj;
|
|
486 }
|
|
487
|
|
488 static int
|
|
489 get_unallocated_leading_byte (int dimension)
|
|
490 {
|
|
491 int lb;
|
|
492
|
|
493 if (dimension == 1)
|
|
494 {
|
|
495 if (next_allocated_1_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_1)
|
|
496 lb = 0;
|
|
497 else
|
|
498 lb = next_allocated_1_byte_leading_byte++;
|
|
499 }
|
|
500 else
|
|
501 {
|
|
502 if (next_allocated_2_byte_leading_byte > MAX_LEADING_BYTE_PRIVATE_2)
|
|
503 lb = 0;
|
|
504 else
|
|
505 lb = next_allocated_2_byte_leading_byte++;
|
|
506 }
|
|
507
|
|
508 if (!lb)
|
|
509 signal_simple_error
|
|
510 ("No more character sets free for this dimension",
|
|
511 make_int (dimension));
|
|
512
|
|
513 return lb;
|
|
514 }
|
|
515
|
|
516
|
|
517 /************************************************************************/
|
|
518 /* Basic charset Lisp functions */
|
|
519 /************************************************************************/
|
|
520
|
|
521 DEFUN ("charsetp", Fcharsetp, 1, 1, 0, /*
|
|
522 Return non-nil if OBJECT is a charset.
|
|
523 */
|
|
524 (object))
|
|
525 {
|
|
526 return (CHARSETP (object) ? Qt : Qnil);
|
|
527 }
|
|
528
|
|
529 DEFUN ("find-charset", Ffind_charset, 1, 1, 0, /*
|
|
530 Retrieve the charset of the given name.
|
|
531 If CHARSET-OR-NAME is a charset object, it is simply returned.
|
|
532 Otherwise, CHARSET-OR-NAME should be a symbol. If there is no such charset,
|
|
533 nil is returned. Otherwise the associated charset object is returned.
|
|
534 */
|
|
535 (charset_or_name))
|
|
536 {
|
|
537 if (CHARSETP (charset_or_name))
|
|
538 return charset_or_name;
|
|
539 CHECK_SYMBOL (charset_or_name);
|
|
540
|
|
541 return Fgethash (charset_or_name, Vcharset_hashtable, Qnil);
|
|
542 }
|
|
543
|
|
544 DEFUN ("get-charset", Fget_charset, 1, 1, 0, /*
|
|
545 Retrieve the charset of the given name.
|
|
546 Same as `find-charset' except an error is signalled if there is no such
|
|
547 charset instead of returning nil.
|
|
548 */
|
|
549 (name))
|
|
550 {
|
|
551 Lisp_Object charset = Ffind_charset (name);
|
|
552
|
|
553 if (NILP (charset))
|
|
554 signal_simple_error ("No such charset", name);
|
|
555 return charset;
|
|
556 }
|
|
557
|
|
558 /* We store the charsets in hash tables with the names as the key and the
|
|
559 actual charset object as the value. Occasionally we need to use them
|
|
560 in a list format. These routines provide us with that. */
|
|
561 struct charset_list_closure
|
|
562 {
|
|
563 Lisp_Object *charset_list;
|
|
564 };
|
|
565
|
|
566 static void
|
|
567 add_charset_to_list_mapper (CONST void *hash_key, void *hash_contents,
|
|
568 void *charset_list_closure)
|
|
569 {
|
|
570 /* This function can GC */
|
|
571 Lisp_Object key, contents;
|
|
572 Lisp_Object *charset_list;
|
|
573 struct charset_list_closure *chcl = charset_list_closure;
|
|
574 CVOID_TO_LISP (key, hash_key);
|
|
575 VOID_TO_LISP (contents, hash_contents);
|
|
576 charset_list = chcl->charset_list;
|
|
577
|
|
578 *charset_list = Fcons (XCHARSET_NAME (contents), *charset_list);
|
|
579 }
|
|
580
|
|
581 DEFUN ("charset-list", Fcharset_list, 0, 0, 0, /*
|
|
582 Return a list of the names of all defined charsets.
|
|
583 */
|
|
584 ())
|
|
585 {
|
|
586 Lisp_Object charset_list = Qnil;
|
|
587 struct gcpro gcpro1;
|
|
588 struct charset_list_closure charset_list_closure;
|
|
589
|
|
590 GCPRO1 (charset_list);
|
|
591 charset_list_closure.charset_list = &charset_list;
|
|
592 elisp_maphash (add_charset_to_list_mapper, Vcharset_hashtable,
|
|
593 &charset_list_closure);
|
|
594 UNGCPRO;
|
|
595
|
|
596 return charset_list;
|
|
597 }
|
|
598
|
|
599 DEFUN ("charset-name", Fcharset_name, 1, 1, 0, /*
|
|
600 Return the name of the given charset.
|
|
601 */
|
|
602 (charset))
|
|
603 {
|
|
604 return (XCHARSET_NAME (Fget_charset (charset)));
|
|
605 }
|
|
606
|
|
607 DEFUN ("make-charset", Fmake_charset, 3, 3, 0, /*
|
|
608 Define a new character set.
|
|
609 This function is for use with Mule support.
|
|
610 NAME is a symbol, the name by which the character set is normally referred.
|
|
611 DOC-STRING is a string describing the character set.
|
|
612 PROPS is a property list, describing the specific nature of the
|
|
613 character set. Recognized properties are:
|
|
614
|
|
615 'registry A regular expression matching the font registry field for
|
|
616 this character set.
|
|
617 'dimension Number of octets used to index a character in this charset.
|
|
618 Either 1 or 2. Defaults to 1.
|
|
619 'columns Number of columns used to display a character in this charset.
|
|
620 Only used in TTY mode. (Under X, the actual width of a
|
|
621 character can be derived from the font used to display the
|
|
622 characters.) If unspecified, defaults to the dimension
|
|
623 (this is almost always the correct value).
|
|
624 'chars Number of characters in each dimension (94 or 96).
|
|
625 Defaults to 94. Note that if the dimension is 2, the
|
|
626 character set thus described is 94x94 or 96x96.
|
|
627 'final Final byte of ISO 2022 escape sequence. Must be
|
|
628 supplied. Each combination of (DIMENSION, CHARS) defines a
|
|
629 separate namespace for final bytes. Note that ISO
|
|
630 2022 restricts the final byte to the range
|
|
631 0x30 - 0x7E if dimension == 1, and 0x30 - 0x5F if
|
|
632 dimension == 2. Note also that final bytes in the range
|
|
633 0x30 - 0x3F are reserved for user-defined (not official)
|
|
634 character sets.
|
|
635 'graphic 0 (use left half of font on output) or 1 (use right half
|
|
636 of font on output). Defaults to 0. For example, for
|
|
637 a font whose registry is ISO8859-1, the left half
|
|
638 (octets 0x20 - 0x7F) is the `ascii' character set, while
|
|
639 the right half (octets 0xA0 - 0xFF) is the `latin-1'
|
|
640 character set. With 'graphic set to 0, the octets
|
|
641 will have their high bit cleared; with it set to 1,
|
|
642 the octets will have their high bit set.
|
|
643 'direction 'l2r (left-to-right) or 'r2l (right-to-left).
|
|
644 Defaults to 'l2r.
|
|
645 'ccl-program A compiled CCL program used to convert a character in
|
|
646 this charset into an index into the font. This is in
|
|
647 addition to the 'graphic property. The CCL program
|
|
648 is passed the octets of the character, with the high
|
|
649 bit cleared and set depending upon whether the value
|
|
650 of the 'graphic property is 0 or 1.
|
|
651 */
|
|
652 (name, doc_string, props))
|
|
653 {
|
|
654 int lb, dimension = 1, chars = 94, graphic = 0, final = 0, columns = -1;
|
|
655 int direction = CHARSET_LEFT_TO_RIGHT;
|
|
656 int type;
|
|
657 Lisp_Object registry = Qnil;
|
|
658 Lisp_Object charset;
|
|
659 Lisp_Object rest, keyword, value;
|
|
660 Lisp_Object ccl_program = Qnil;
|
|
661
|
|
662 CHECK_SYMBOL (name);
|
|
663 if (!NILP (doc_string))
|
|
664 CHECK_STRING (doc_string);
|
|
665
|
|
666 charset = Ffind_charset (name);
|
|
667 if (!NILP (charset))
|
|
668 signal_simple_error ("Cannot redefine existing charset", name);
|
|
669
|
|
670 EXTERNAL_PROPERTY_LIST_LOOP (rest, keyword, value, props)
|
|
671 {
|
|
672 if (EQ (keyword, Qdimension))
|
|
673 {
|
|
674 CHECK_INT (value);
|
|
675 dimension = XINT (value);
|
|
676 if (dimension < 1 || dimension > 2)
|
|
677 signal_simple_error ("Invalid value for 'dimension", value);
|
|
678 }
|
|
679
|
|
680 else if (EQ (keyword, Qchars))
|
|
681 {
|
|
682 CHECK_INT (value);
|
|
683 chars = XINT (value);
|
|
684 if (chars != 94 && chars != 96)
|
|
685 signal_simple_error ("Invalid value for 'chars", value);
|
|
686 }
|
|
687
|
|
688 else if (EQ (keyword, Qcolumns))
|
|
689 {
|
|
690 CHECK_INT (value);
|
|
691 columns = XINT (value);
|
|
692 if (columns != 1 && columns != 2)
|
|
693 signal_simple_error ("Invalid value for 'columns", value);
|
|
694 }
|
|
695
|
|
696 else if (EQ (keyword, Qgraphic))
|
|
697 {
|
|
698 CHECK_INT (value);
|
|
699 graphic = XINT (value);
|
|
700 if (graphic < 0 || graphic > 1)
|
|
701 signal_simple_error ("Invalid value for 'graphic", value);
|
|
702 }
|
|
703
|
|
704 else if (EQ (keyword, Qregistry))
|
|
705 {
|
|
706 CHECK_STRING (value);
|
|
707 registry = value;
|
|
708 }
|
|
709
|
|
710 else if (EQ (keyword, Qdirection))
|
|
711 {
|
|
712 if (EQ (value, Ql2r))
|
|
713 direction = CHARSET_LEFT_TO_RIGHT;
|
|
714 else if (EQ (value, Qr2l))
|
|
715 direction = CHARSET_RIGHT_TO_LEFT;
|
|
716 else
|
|
717 signal_simple_error ("Invalid value for 'direction", value);
|
|
718 }
|
|
719
|
|
720 else if (EQ (keyword, Qfinal))
|
|
721 {
|
|
722 CHECK_CHAR_COERCE_INT (value);
|
|
723 final = XCHAR (value);
|
|
724 if (final < '0' || final > '~')
|
|
725 signal_simple_error ("Invalid value for 'final", value);
|
|
726 }
|
|
727
|
|
728 else if (EQ (keyword, Qccl_program))
|
|
729 {
|
|
730 CHECK_VECTOR (value);
|
|
731 ccl_program = value;
|
|
732 }
|
|
733
|
|
734 else
|
|
735 signal_simple_error ("Unrecognized property", keyword);
|
|
736 }
|
|
737
|
|
738 if (!final)
|
|
739 error ("'final must be specified");
|
|
740 if (dimension == 2 && final > 0x5F)
|
|
741 signal_simple_error
|
|
742 ("Final must be in the range 0x30 - 0x5F for dimension == 2",
|
|
743 make_char (final));
|
|
744
|
|
745 if (dimension == 1)
|
|
746 type = (chars == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
|
|
747 else
|
|
748 type = (chars == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
|
|
749
|
|
750 if (!NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_LEFT_TO_RIGHT)) ||
|
|
751 !NILP (CHARSET_BY_ATTRIBUTES (type, final, CHARSET_RIGHT_TO_LEFT)))
|
|
752 error
|
|
753 ("Character set already defined for this DIMENSION/CHARS/FINAL combo");
|
|
754
|
|
755 lb = get_unallocated_leading_byte (dimension);
|
|
756
|
|
757 if (NILP (doc_string))
|
|
758 doc_string = build_string ("");
|
|
759
|
|
760 if (NILP (registry))
|
|
761 registry = build_string ("");
|
|
762
|
|
763 if (columns == -1)
|
|
764 columns = dimension;
|
|
765 charset = make_charset (name, lb, dimension + 2, type, columns, graphic,
|
|
766 final, direction, doc_string, registry);
|
|
767 if (!NILP (ccl_program))
|
|
768 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
|
|
769 return charset;
|
|
770 }
|
|
771
|
|
772 DEFUN ("make-reverse-direction-charset",
|
|
773 Fmake_reverse_direction_charset, 2, 2, 0, /*
|
|
774 Make a charset equivalent to CHARSET but which goes in the opposite direction.
|
|
775 NEW-NAME is the name of the new charset. Return the new charset.
|
|
776 */
|
|
777 (charset, new_name))
|
|
778 {
|
|
779 Lisp_Object new_charset = Qnil;
|
|
780 int lb, dimension, columns, graphic, final;
|
|
781 int direction, type;
|
|
782 Lisp_Object registry, doc_string;
|
|
783 struct Lisp_Charset *cs;
|
|
784
|
|
785 charset = Fget_charset (charset);
|
|
786 if (!NILP (XCHARSET_REVERSE_DIRECTION_CHARSET (charset)))
|
|
787 signal_simple_error ("Charset already has reverse-direction charset",
|
|
788 charset);
|
|
789
|
|
790 CHECK_SYMBOL (new_name);
|
|
791 if (!NILP (Ffind_charset (new_name)))
|
|
792 signal_simple_error ("Cannot redefine existing charset", new_name);
|
|
793
|
|
794 cs = XCHARSET (charset);
|
|
795
|
|
796 type = CHARSET_TYPE (cs);
|
|
797 columns = CHARSET_COLUMNS (cs);
|
|
798 dimension = CHARSET_DIMENSION (cs);
|
|
799 lb = get_unallocated_leading_byte (dimension);
|
|
800
|
|
801 graphic = CHARSET_GRAPHIC (cs);
|
|
802 final = CHARSET_FINAL (cs);
|
|
803 direction = CHARSET_RIGHT_TO_LEFT;
|
|
804 if (CHARSET_DIRECTION (cs) == CHARSET_RIGHT_TO_LEFT)
|
|
805 direction = CHARSET_LEFT_TO_RIGHT;
|
|
806 doc_string = CHARSET_DOC_STRING (cs);
|
|
807 registry = CHARSET_REGISTRY (cs);
|
|
808
|
|
809 new_charset = make_charset (new_name, lb, dimension + 2, type, columns,
|
|
810 graphic, final, direction, doc_string, registry);
|
|
811
|
|
812 CHARSET_REVERSE_DIRECTION_CHARSET (cs) = new_charset;
|
|
813 XCHARSET_REVERSE_DIRECTION_CHARSET (new_charset) = charset;
|
|
814
|
|
815 return new_charset;
|
|
816 }
|
|
817
|
|
818 /* #### The defsubr for this is commented out at the moment but no
|
|
819 reason why is given. */
|
|
820 #if 0
|
|
821 DEFUN ("charset-reverse-direction-charset",
|
|
822 Fcharset_reverse_direction_charset, 1, 1, 0, /*
|
|
823 Return the reverse-direction charset parallel to CHARSET, if any.
|
|
824 This is the charset with the same properties (in particular, the same
|
|
825 dimension, number of characters per dimension, and final byte) as
|
|
826 CHARSET but whose characters are displayed in the opposite direction.
|
|
827 */
|
|
828 (charset))
|
|
829 {
|
|
830 charset = Fget_charset (charset);
|
|
831 return XCHARSET_REVERSE_DIRECTION_CHARSET (charset);
|
|
832 }
|
|
833 #endif
|
|
834
|
|
835 DEFUN ("charset-from-attributes", Fcharset_from_attributes, 3, 4, 0, /*
|
|
836 Return a charset with the given DIMENSION, CHARS, FINAL, and DIRECTION.
|
|
837 If DIRECTION is omitted, both directions will be checked (left-to-right
|
|
838 will be returned if character sets exist for both directions).
|
|
839 */
|
|
840 (dimension, chars, final, direction))
|
|
841 {
|
|
842 int dm, ch, fi, di = -1;
|
|
843 int type;
|
|
844 Lisp_Object obj = Qnil;
|
|
845
|
|
846 CHECK_INT (dimension);
|
|
847 dm = XINT (dimension);
|
|
848 if (dm < 1 || dm > 2)
|
|
849 signal_simple_error ("Invalid value for DIMENSION", dimension);
|
|
850
|
|
851 CHECK_INT (chars);
|
|
852 ch = XINT (chars);
|
|
853 if (ch != 94 && ch != 96)
|
|
854 signal_simple_error ("Invalid value for CHARS", chars);
|
|
855
|
|
856 CHECK_CHAR_COERCE_INT (final);
|
|
857 fi = XCHAR (final);
|
|
858 if (fi < '0' || fi > '~')
|
|
859 signal_simple_error ("Invalid value for FINAL", final);
|
|
860
|
|
861 if (EQ (direction, Ql2r))
|
|
862 di = CHARSET_LEFT_TO_RIGHT;
|
|
863 else if (EQ (direction, Qr2l))
|
|
864 di = CHARSET_RIGHT_TO_LEFT;
|
|
865 else if (!NILP (direction))
|
|
866 signal_simple_error ("Invalid value for DIRECTION", direction);
|
|
867
|
|
868 if (dm == 2 && fi > 0x5F)
|
|
869 signal_simple_error
|
|
870 ("Final must be in the range 0x30 - 0x5F for dimension == 2", final);
|
|
871
|
|
872 if (dm == 1)
|
|
873 type = (ch == 94) ? CHARSET_TYPE_94 : CHARSET_TYPE_96;
|
|
874 else
|
|
875 type = (ch == 94) ? CHARSET_TYPE_94X94 : CHARSET_TYPE_96X96;
|
|
876
|
|
877 if (di == -1)
|
|
878 {
|
|
879 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_LEFT_TO_RIGHT);
|
|
880 if (NILP (obj))
|
|
881 obj = CHARSET_BY_ATTRIBUTES (type, fi, CHARSET_RIGHT_TO_LEFT);
|
|
882 }
|
|
883 else
|
|
884 obj = CHARSET_BY_ATTRIBUTES (type, fi, di);
|
|
885
|
|
886 if (CHARSETP (obj))
|
|
887 return XCHARSET_NAME (obj);
|
|
888 return obj;
|
|
889 }
|
|
890
|
|
891 DEFUN ("charset-doc-string", Fcharset_doc_string, 1, 1, 0, /*
|
|
892 Return doc string of CHARSET.
|
|
893 */
|
|
894 (charset))
|
|
895 {
|
|
896 return XCHARSET_DOC_STRING (Fget_charset (charset));
|
|
897 }
|
|
898
|
|
899 DEFUN ("charset-dimension", Fcharset_dimension, 1, 1, 0, /*
|
|
900 Return dimension of CHARSET.
|
|
901 */
|
|
902 (charset))
|
|
903 {
|
|
904 return make_int (XCHARSET_DIMENSION (Fget_charset (charset)));
|
|
905 }
|
|
906
|
|
907 DEFUN ("charset-property", Fcharset_property, 2, 2, 0, /*
|
|
908 Return property PROP of CHARSET.
|
|
909 Recognized properties are those listed in `make-charset', as well as
|
|
910 'name and 'doc-string.
|
|
911 */
|
|
912 (charset, prop))
|
|
913 {
|
|
914 struct Lisp_Charset *cs;
|
|
915
|
|
916 charset = Fget_charset (charset);
|
|
917 cs = XCHARSET (charset);
|
|
918
|
|
919 CHECK_SYMBOL (prop);
|
|
920 if (EQ (prop, Qname)) return CHARSET_NAME (cs);
|
|
921 if (EQ (prop, Qdoc_string)) return CHARSET_DOC_STRING (cs);
|
|
922 if (EQ (prop, Qdimension)) return make_int (CHARSET_DIMENSION (cs));
|
|
923 if (EQ (prop, Qcolumns)) return make_int (CHARSET_COLUMNS (cs));
|
|
924 if (EQ (prop, Qgraphic)) return make_int (CHARSET_GRAPHIC (cs));
|
|
925 if (EQ (prop, Qfinal)) return make_char (CHARSET_FINAL (cs));
|
|
926 if (EQ (prop, Qchars)) return make_int (CHARSET_CHARS (cs));
|
|
927 if (EQ (prop, Qregistry)) return CHARSET_REGISTRY (cs);
|
|
928 if (EQ (prop, Qccl_program)) return CHARSET_CCL_PROGRAM (cs);
|
|
929 if (EQ (prop, Qdirection))
|
|
930 return (CHARSET_DIRECTION (cs) == CHARSET_LEFT_TO_RIGHT ? Ql2r : Qr2l);
|
|
931 if (EQ (prop, Qreverse_direction_charset))
|
|
932 {
|
|
933 Lisp_Object obj = CHARSET_REVERSE_DIRECTION_CHARSET (cs);
|
|
934 if (NILP (obj))
|
|
935 return Qnil;
|
|
936 else
|
|
937 return XCHARSET_NAME (obj);
|
|
938 }
|
|
939 signal_simple_error ("Unrecognized charset property name", prop);
|
|
940 return Qnil; /* not reached */
|
|
941 }
|
|
942
|
|
943 /* #### We need to figure out which properties we really want to
|
|
944 allow to be set. */
|
|
945
|
|
946 DEFUN ("set-charset-ccl-program", Fset_charset_ccl_program, 2, 2, 0, /*
|
|
947 Set the 'ccl-program property of CHARSET to CCL-PROGRAM.
|
|
948 */
|
|
949 (charset, ccl_program))
|
|
950 {
|
|
951 charset = Fget_charset (charset);
|
|
952 CHECK_VECTOR (ccl_program);
|
|
953 XCHARSET_CCL_PROGRAM (charset) = ccl_program;
|
|
954 return Qnil;
|
|
955 }
|
|
956
|
|
957 static void
|
|
958 invalidate_charset_font_caches (Lisp_Object charset)
|
|
959 {
|
|
960 /* Invalidate font cache entries for charset on all devices. */
|
|
961 Lisp_Object devcons, concons, hashtab;
|
|
962 DEVICE_LOOP_NO_BREAK (devcons, concons)
|
|
963 {
|
|
964 struct device *d = XDEVICE (XCAR (devcons));
|
|
965 hashtab = Fgethash (charset, d->charset_font_cache, Qunbound);
|
|
966 if (!UNBOUNDP (hashtab))
|
|
967 Fclrhash (hashtab);
|
|
968 }
|
|
969 }
|
|
970
|
|
971 /* Japanese folks may want to (set-charset-registry 'ascii "jisx0201") */
|
|
972 DEFUN ("set-charset-registry", Fset_charset_registry, 2, 2, 0, /*
|
|
973 Set the 'registry property of CHARSET to REGISTRY.
|
|
974 */
|
|
975 (charset, registry))
|
|
976 {
|
|
977 charset = Fget_charset (charset);
|
|
978 CHECK_STRING (registry);
|
|
979 XCHARSET_REGISTRY (charset) = registry;
|
|
980 invalidate_charset_font_caches (charset);
|
|
981 face_property_was_changed (Vdefault_face, Qfont, Qglobal);
|
|
982 return Qnil;
|
|
983 }
|
|
984
|
|
985
|
|
986 /************************************************************************/
|
|
987 /* Lisp primitives for working with characters */
|
|
988 /************************************************************************/
|
|
989
|
|
990 DEFUN ("make-char", Fmake_char, 2, 3, 0, /*
|
|
991 Make a multi-byte character from CHARSET and octets ARG1 and ARG2.
|
|
992 */
|
|
993 (charset, arg1, arg2))
|
|
994 {
|
|
995 struct Lisp_Charset *cs;
|
|
996 int a1, a2;
|
|
997 int lowlim, highlim;
|
|
998
|
|
999 charset = Fget_charset (charset);
|
|
1000 cs = XCHARSET (charset);
|
|
1001
|
|
1002 if (EQ (charset, Vcharset_ascii)) lowlim = 0, highlim = 127;
|
|
1003 else if (EQ (charset, Vcharset_control_1)) lowlim = 0, highlim = 31;
|
|
1004 else if (CHARSET_CHARS (cs) == 94) lowlim = 33, highlim = 126;
|
|
1005 else /* CHARSET_CHARS (cs) == 96) */ lowlim = 32, highlim = 127;
|
|
1006
|
|
1007 CHECK_INT (arg1);
|
|
1008 a1 = XINT (arg1);
|
|
1009 if (a1 < lowlim || a1 > highlim)
|
|
1010 args_out_of_range_3 (arg1, make_int (lowlim), make_int (highlim));
|
|
1011
|
|
1012 if (CHARSET_DIMENSION (cs) == 1)
|
|
1013 {
|
|
1014 if (!NILP (arg2))
|
|
1015 signal_simple_error
|
|
1016 ("Charset is of dimension one; second octet must be nil", arg2);
|
|
1017 return make_char (MAKE_CHAR (charset, a1, 0));
|
|
1018 }
|
|
1019
|
|
1020 CHECK_INT (arg2);
|
|
1021 a2 = XINT (arg2);
|
|
1022 if (a2 < lowlim || a2 > highlim)
|
|
1023 args_out_of_range_3 (arg2, make_int (lowlim), make_int (highlim));
|
|
1024
|
|
1025 return make_char (MAKE_CHAR (charset, a1, a2));
|
|
1026 }
|
|
1027
|
|
1028 DEFUN ("char-charset", Fchar_charset, 1, 1, 0, /*
|
|
1029 Return the character set of char CH.
|
|
1030 */
|
|
1031 (ch))
|
|
1032 {
|
|
1033 CHECK_CHAR_COERCE_INT (ch);
|
|
1034
|
|
1035 return XCHARSET_NAME (CHARSET_BY_LEADING_BYTE
|
|
1036 (CHAR_LEADING_BYTE (XCHAR (ch))));
|
|
1037 }
|
|
1038
|
|
1039 DEFUN ("char-octet", Fchar_octet, 1, 2, 0, /*
|
|
1040 Return the octet numbered N (should be 0 or 1) of char CH.
|
|
1041 N defaults to 0 if omitted.
|
|
1042 */
|
|
1043 (ch, n))
|
|
1044 {
|
|
1045 Lisp_Object charset;
|
|
1046 int c1, c2;
|
|
1047
|
|
1048 CHECK_CHAR_COERCE_INT (ch);
|
|
1049 if (NILP (n))
|
|
1050 n = Qzero;
|
|
1051 else
|
|
1052 {
|
|
1053 CHECK_INT (n);
|
|
1054 if (XINT (n) != 0 && XINT (n) != 1)
|
|
1055 signal_simple_error ("Octet number must be 0 or 1", n);
|
|
1056 }
|
|
1057 BREAKUP_CHAR (XCHAR (ch), charset, c1, c2);
|
|
1058 if (XINT (n) == 0)
|
|
1059 return make_int (c1);
|
|
1060 else
|
|
1061 return make_int (c2);
|
|
1062 }
|
|
1063
|
|
1064
|
|
1065 /************************************************************************/
|
|
1066 /* composite character functions */
|
|
1067 /************************************************************************/
|
|
1068
|
|
1069 Emchar
|
|
1070 lookup_composite_char (Bufbyte *str, int len)
|
|
1071 {
|
|
1072 Lisp_Object lispstr = make_string (str, len);
|
|
1073 Lisp_Object ch = Fgethash (lispstr,
|
|
1074 Vcomposite_char_string2char_hashtable,
|
|
1075 Qunbound);
|
|
1076 Emchar emch;
|
|
1077
|
|
1078 if (UNBOUNDP (ch))
|
|
1079 {
|
|
1080 if (composite_char_row_next >= 128)
|
|
1081 signal_simple_error ("No more composite chars available", lispstr);
|
|
1082 emch = MAKE_CHAR (Vcharset_composite, composite_char_row_next,
|
|
1083 composite_char_col_next);
|
|
1084 Fputhash (make_char (emch), lispstr,
|
|
1085 Vcomposite_char_char2string_hashtable);
|
|
1086 Fputhash (lispstr, make_char (emch),
|
|
1087 Vcomposite_char_string2char_hashtable);
|
|
1088 composite_char_col_next++;
|
|
1089 if (composite_char_col_next >= 128)
|
|
1090 {
|
|
1091 composite_char_col_next = 32;
|
|
1092 composite_char_row_next++;
|
|
1093 }
|
|
1094 }
|
|
1095 else
|
|
1096 emch = XCHAR (ch);
|
|
1097 return emch;
|
|
1098 }
|
|
1099
|
|
1100 Lisp_Object
|
|
1101 composite_char_string (Emchar ch)
|
|
1102 {
|
|
1103 Lisp_Object str = Fgethash (make_char (ch),
|
|
1104 Vcomposite_char_char2string_hashtable,
|
|
1105 Qunbound);
|
|
1106 assert (!UNBOUNDP (str));
|
|
1107 return str;
|
|
1108 }
|
|
1109
|
|
1110 DEFUN ("make-composite-char", Fmake_composite_char, 1, 1, 0, /*
|
|
1111 Convert a string into a single composite character.
|
|
1112 The character is the result of overstriking all the characters in
|
|
1113 the string.
|
|
1114 */
|
|
1115 (string))
|
|
1116 {
|
|
1117 CHECK_STRING (string);
|
|
1118 return make_char (lookup_composite_char (XSTRING_DATA (string),
|
|
1119 XSTRING_LENGTH (string)));
|
|
1120 }
|
|
1121
|
|
1122 DEFUN ("composite-char-string", Fcomposite_char_string, 1, 1, 0, /*
|
|
1123 Return a string of the characters comprising a composite character.
|
|
1124 */
|
|
1125 (ch))
|
|
1126 {
|
|
1127 Emchar emch;
|
|
1128
|
|
1129 CHECK_CHAR (ch);
|
|
1130 emch = XCHAR (ch);
|
|
1131 if (CHAR_LEADING_BYTE (emch) != LEADING_BYTE_COMPOSITE)
|
|
1132 signal_simple_error ("Must be composite char", ch);
|
|
1133 return composite_char_string (emch);
|
|
1134 }
|
|
1135
|
|
1136
|
|
1137 /************************************************************************/
|
|
1138 /* initialization */
|
|
1139 /************************************************************************/
|
|
1140
|
|
1141 void
|
|
1142 syms_of_mule_charset (void)
|
|
1143 {
|
|
1144 DEFSUBR (Fcharsetp);
|
|
1145 DEFSUBR (Ffind_charset);
|
|
1146 DEFSUBR (Fget_charset);
|
|
1147 DEFSUBR (Fcharset_list);
|
|
1148 DEFSUBR (Fcharset_name);
|
|
1149 DEFSUBR (Fmake_charset);
|
|
1150 DEFSUBR (Fmake_reverse_direction_charset);
|
|
1151 /* DEFSUBR (Freverse_direction_charset); */
|
|
1152 DEFSUBR (Fcharset_from_attributes);
|
|
1153 DEFSUBR (Fcharset_doc_string);
|
|
1154 DEFSUBR (Fcharset_dimension);
|
|
1155 DEFSUBR (Fcharset_property);
|
|
1156 DEFSUBR (Fset_charset_ccl_program);
|
|
1157 DEFSUBR (Fset_charset_registry);
|
|
1158
|
|
1159 DEFSUBR (Fmake_char);
|
|
1160 DEFSUBR (Fchar_charset);
|
|
1161 DEFSUBR (Fchar_octet);
|
|
1162
|
|
1163 DEFSUBR (Fmake_composite_char);
|
|
1164 DEFSUBR (Fcomposite_char_string);
|
|
1165
|
|
1166 defsymbol (&Qcharsetp, "charsetp");
|
|
1167 defsymbol (&Qregistry, "registry");
|
|
1168 defsymbol (&Qfinal, "final");
|
|
1169 defsymbol (&Qgraphic, "graphic");
|
|
1170 defsymbol (&Qdirection, "direction");
|
|
1171 defsymbol (&Qreverse_direction_charset, "reverse-direction-charset");
|
|
1172 defsymbol (&Qccl_program, "ccl-program");
|
|
1173
|
|
1174 defsymbol (&Ql2r, "l2r");
|
|
1175 defsymbol (&Qr2l, "r2l");
|
|
1176
|
74
|
1177 /* Charsets, compatible with Emacs/Mule 19.33-delta
|
|
1178 Naming convention is Script-Charset[-Edition] */
|
|
1179 defsymbol (&Qascii, "ascii");
|
|
1180 defsymbol (&Qcontrol_1, "control-1");
|
|
1181 defsymbol (&Qlatin_iso8859_1, "latin-iso8859-1");
|
|
1182 defsymbol (&Qlatin_iso8859_2, "latin-iso8859-2");
|
|
1183 defsymbol (&Qlatin_iso8859_3, "latin-iso8859-3");
|
|
1184 defsymbol (&Qlatin_iso8859_4, "latin-iso8859-4");
|
|
1185 defsymbol (&Qcyrillic_iso8859_5, "cyrillic-iso8859-5");
|
|
1186 defsymbol (&Qarabic_iso8859_6, "arabic-iso8859-6");
|
|
1187 defsymbol (&Qgreek_iso8859_7, "greek-iso8859-7");
|
|
1188 defsymbol (&Qhebrew_iso8859_8, "hebrew-iso8859-8");
|
|
1189 defsymbol (&Qlatin_iso8859_9, "latin-iso8859-9");
|
|
1190 defsymbol (&Qthai_tis620, "thai-tis620");
|
70
|
1191
|
74
|
1192 defsymbol (&Qkatakana_jisx0201, "katakana-jisx0201");
|
|
1193 defsymbol (&Qlatin_jisx0201, "latin-jisx0201");
|
70
|
1194 defsymbol (&Qjapanese_jisx0208_1978, "japanese-jisx0208-1978");
|
|
1195 defsymbol (&Qjapanese_jisx0208, "japanese-jisx0208");
|
|
1196 defsymbol (&Qjapanese_jisx0212, "japanese-jisx0212");
|
|
1197
|
74
|
1198 defsymbol (&Qchinese_gb2312, "chinese-gb2312");
|
|
1199 defsymbol (&Qchinese_big5_1, "chinese-big5-1");
|
|
1200 defsymbol (&Qchinese_big5_2, "chinese-big5-2");
|
|
1201 defsymbol (&Qchinese_cns11643_1, "chinese-cns11643-1");
|
|
1202 defsymbol (&Qchinese_cns11643_2, "chinese-cns11643-2");
|
70
|
1203
|
74
|
1204 defsymbol (&Qkorean_ksc5601, "korean-ksc5601");
|
|
1205 defsymbol (&Qcomposite, "composite");
|
70
|
1206 }
|
|
1207
|
|
1208 void
|
|
1209 vars_of_mule_charset (void)
|
|
1210 {
|
|
1211 int i, j, k;
|
|
1212
|
|
1213 for (i = 0; i < 128; i++)
|
|
1214 charset_by_leading_byte[i] = Qnil;
|
|
1215
|
|
1216 for (i = 0; i < 4; i++)
|
|
1217 for (j = 0; j < 128; j++)
|
|
1218 for (k = 0; k < 2; k ++)
|
|
1219 charset_by_attributes[i][j][k] = Qnil;
|
|
1220
|
|
1221 /* Now done at compile time
|
|
1222 for (i = 0; i < 128; i++)
|
|
1223 rep_bytes_by_first_byte[i] = 1;
|
|
1224 */
|
|
1225
|
|
1226 next_allocated_1_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_1;
|
|
1227 next_allocated_2_byte_leading_byte = MIN_LEADING_BYTE_PRIVATE_2;
|
|
1228 }
|
|
1229
|
|
1230 void
|
|
1231 complex_vars_of_mule_charset (void)
|
|
1232 {
|
|
1233 staticpro (&Vcharset_hashtable);
|
|
1234 Vcharset_hashtable = make_lisp_hashtable (50, HASHTABLE_NONWEAK,
|
|
1235 HASHTABLE_EQ);
|
|
1236
|
|
1237 /* Predefined character sets. We store them into variables for
|
|
1238 ease of access. */
|
|
1239
|
|
1240 Vcharset_ascii =
|
|
1241 make_charset (Qascii, LEADING_BYTE_ASCII, 1,
|
|
1242 CHARSET_TYPE_94, 1, 0, 'B',
|
|
1243 CHARSET_LEFT_TO_RIGHT,
|
|
1244 build_string ("ASCII"),
|
78
|
1245 build_string ("iso8859-1"));
|
70
|
1246 Vcharset_control_1 =
|
|
1247 make_charset (Qcontrol_1, LEADING_BYTE_CONTROL_1, 2,
|
|
1248 CHARSET_TYPE_94, 1, 0, 0,
|
|
1249 CHARSET_LEFT_TO_RIGHT,
|
|
1250 build_string ("Control characters"),
|
|
1251 build_string (""));
|
74
|
1252 Vcharset_latin_iso8859_1 =
|
|
1253 make_charset (Qlatin_iso8859_1, LEADING_BYTE_LATIN_ISO8859_1, 2,
|
70
|
1254 CHARSET_TYPE_96, 1, 1, 'A',
|
|
1255 CHARSET_LEFT_TO_RIGHT,
|
|
1256 build_string ("Latin-1"),
|
78
|
1257 build_string ("iso8859-1"));
|
74
|
1258 Vcharset_latin_iso8859_2 =
|
|
1259 make_charset (Qlatin_iso8859_2, LEADING_BYTE_LATIN_ISO8859_2, 2,
|
70
|
1260 CHARSET_TYPE_96, 1, 1, 'B',
|
|
1261 CHARSET_LEFT_TO_RIGHT,
|
|
1262 build_string ("Latin-2"),
|
78
|
1263 build_string ("iso8859-2"));
|
74
|
1264 Vcharset_latin_iso8859_3 =
|
|
1265 make_charset (Qlatin_iso8859_3, LEADING_BYTE_LATIN_ISO8859_3, 2,
|
70
|
1266 CHARSET_TYPE_96, 1, 1, 'C',
|
|
1267 CHARSET_LEFT_TO_RIGHT,
|
|
1268 build_string ("Latin-3"),
|
78
|
1269 build_string ("iso8859-3"));
|
74
|
1270 Vcharset_latin_iso8859_4 =
|
|
1271 make_charset (Qlatin_iso8859_4, LEADING_BYTE_LATIN_ISO8859_4, 2,
|
70
|
1272 CHARSET_TYPE_96, 1, 1, 'D',
|
|
1273 CHARSET_LEFT_TO_RIGHT,
|
|
1274 build_string ("Latin-4"),
|
78
|
1275 build_string ("iso8859-4"));
|
74
|
1276 Vcharset_cyrillic_iso8859_5 =
|
|
1277 make_charset (Qcyrillic_iso8859_5, LEADING_BYTE_CYRILLIC_ISO8859_5, 2,
|
70
|
1278 CHARSET_TYPE_96, 1, 1, 'L',
|
|
1279 CHARSET_LEFT_TO_RIGHT,
|
|
1280 build_string ("Cyrillic"),
|
78
|
1281 build_string ("iso8859-5"));
|
74
|
1282 Vcharset_arabic_iso8859_6 =
|
|
1283 make_charset (Qarabic_iso8859_6, LEADING_BYTE_ARABIC_ISO8859_6, 2,
|
70
|
1284 CHARSET_TYPE_96, 1, 1, 'G',
|
|
1285 CHARSET_RIGHT_TO_LEFT,
|
|
1286 build_string ("Arabic"),
|
78
|
1287 build_string ("iso8859-6"));
|
74
|
1288 Vcharset_greek_iso8859_7 =
|
|
1289 make_charset (Qgreek_iso8859_7, LEADING_BYTE_GREEK_ISO8859_7, 2,
|
70
|
1290 CHARSET_TYPE_96, 1, 1, 'F',
|
|
1291 CHARSET_LEFT_TO_RIGHT,
|
|
1292 build_string ("Greek"),
|
78
|
1293 build_string ("iso8859-7"));
|
74
|
1294 Vcharset_hebrew_iso8859_8 =
|
|
1295 make_charset (Qhebrew_iso8859_8, LEADING_BYTE_HEBREW_ISO8859_8, 2,
|
70
|
1296 CHARSET_TYPE_96, 1, 1, 'H',
|
|
1297 CHARSET_RIGHT_TO_LEFT,
|
|
1298 build_string ("Hebrew"),
|
78
|
1299 build_string ("iso8859-8"));
|
74
|
1300 Vcharset_latin_iso8859_9 =
|
|
1301 make_charset (Qlatin_iso8859_9, LEADING_BYTE_LATIN_ISO8859_9, 2,
|
70
|
1302 CHARSET_TYPE_96, 1, 1, 'M',
|
|
1303 CHARSET_LEFT_TO_RIGHT,
|
|
1304 build_string ("Latin-5"),
|
78
|
1305 build_string ("iso8859-9"));
|
74
|
1306 Vcharset_thai_tis620 =
|
|
1307 make_charset (Qthai_tis620, LEADING_BYTE_THAI_TIS620, 2,
|
70
|
1308 CHARSET_TYPE_96, 1, 1, 'T',
|
|
1309 CHARSET_LEFT_TO_RIGHT,
|
|
1310 build_string ("Thai"),
|
78
|
1311 build_string ("tis620"));
|
70
|
1312
|
|
1313 /* Japanese */
|
74
|
1314 Vcharset_katakana_jisx0201 =
|
|
1315 make_charset (Qkatakana_jisx0201,
|
|
1316 LEADING_BYTE_KATAKANA_JISX0201, 2,
|
70
|
1317 CHARSET_TYPE_94, 1, 1, 'I',
|
|
1318 CHARSET_LEFT_TO_RIGHT,
|
|
1319 build_string ("Japanese Katakana"),
|
78
|
1320 build_string ("jisx0201.1976"));
|
74
|
1321 Vcharset_latin_jisx0201 =
|
|
1322 make_charset (Qlatin_jisx0201,
|
|
1323 LEADING_BYTE_LATIN_JISX0201, 2,
|
70
|
1324 CHARSET_TYPE_94, 1, 0, 'J',
|
|
1325 CHARSET_LEFT_TO_RIGHT,
|
|
1326 build_string ("Japanese Roman"),
|
78
|
1327 build_string ("jisx0201.1976"));
|
70
|
1328 Vcharset_japanese_jisx0208_1978 =
|
|
1329 make_charset (Qjapanese_jisx0208_1978,
|
|
1330 LEADING_BYTE_JAPANESE_JISX0208_1978, 3,
|
|
1331 CHARSET_TYPE_94X94, 2, 0, '@',
|
|
1332 CHARSET_LEFT_TO_RIGHT,
|
|
1333 build_string ("Japanese Old"),
|
78
|
1334 build_string ("\\(jisx0208\\|jisc6226\\).19"));
|
70
|
1335 Vcharset_japanese_jisx0208 =
|
|
1336 make_charset (Qjapanese_jisx0208,
|
|
1337 LEADING_BYTE_JAPANESE_JISX0208, 3,
|
|
1338 CHARSET_TYPE_94X94, 2, 0, 'B',
|
|
1339 CHARSET_LEFT_TO_RIGHT,
|
|
1340 build_string ("Japanese"),
|
78
|
1341 build_string ("jisx0208.19\\(83\\|90\\)"));
|
70
|
1342 Vcharset_japanese_jisx0212 =
|
|
1343 make_charset (Qjapanese_jisx0212,
|
|
1344 LEADING_BYTE_JAPANESE_JISX0212, 3,
|
|
1345 CHARSET_TYPE_94X94, 2, 0, 'D',
|
|
1346 CHARSET_LEFT_TO_RIGHT,
|
|
1347 build_string ("Japanese Supplement"),
|
78
|
1348 build_string ("jisx0212"));
|
70
|
1349
|
|
1350 /* Chinese */
|
74
|
1351 Vcharset_chinese_gb2312 =
|
|
1352 make_charset (Qchinese_gb2312, LEADING_BYTE_CHINESE_GB2312, 3,
|
70
|
1353 CHARSET_TYPE_94X94, 2, 0, 'A',
|
|
1354 CHARSET_LEFT_TO_RIGHT,
|
74
|
1355 build_string ("Chinese GB2312"),
|
78
|
1356 build_string ("gb2312"));
|
|
1357 #define CHINESE_CNS_PLANE_RE(n) "cns11643[.-]\\(.*[.-]\\)?" n "$"
|
70
|
1358 Vcharset_chinese_cns11643_1 =
|
|
1359 make_charset (Qchinese_cns11643_1,
|
|
1360 LEADING_BYTE_CHINESE_CNS11643_1, 3,
|
|
1361 CHARSET_TYPE_94X94, 2, 0, 'G',
|
|
1362 CHARSET_LEFT_TO_RIGHT,
|
|
1363 build_string ("Chinese CNS Plane 1"),
|
|
1364 build_string (CHINESE_CNS_PLANE_RE("1")));
|
|
1365 Vcharset_chinese_cns11643_2 =
|
|
1366 make_charset (Qchinese_cns11643_2,
|
|
1367 LEADING_BYTE_CHINESE_CNS11643_2, 3,
|
|
1368 CHARSET_TYPE_94X94, 2, 0, 'H',
|
|
1369 CHARSET_LEFT_TO_RIGHT,
|
|
1370 build_string ("Chinese CNS Plane 2"),
|
|
1371 build_string (CHINESE_CNS_PLANE_RE("2")));
|
|
1372 Vcharset_chinese_big5_1 =
|
|
1373 make_charset (Qchinese_big5_1, LEADING_BYTE_CHINESE_BIG5_1, 3,
|
|
1374 CHARSET_TYPE_94X94, 2, 0, '0',
|
|
1375 CHARSET_LEFT_TO_RIGHT,
|
|
1376 build_string ("Chinese Big5 Level 1"),
|
78
|
1377 build_string ("big5"));
|
70
|
1378 Vcharset_chinese_big5_2 =
|
|
1379 make_charset (Qchinese_big5_2, LEADING_BYTE_CHINESE_BIG5_2, 3,
|
|
1380 CHARSET_TYPE_94X94, 2, 0, '1',
|
|
1381 CHARSET_LEFT_TO_RIGHT,
|
|
1382 build_string ("Chinese Big5 Level 2"),
|
78
|
1383 build_string ("big5"));
|
70
|
1384
|
|
1385 Vcharset_korean_ksc5601 =
|
|
1386 make_charset (Qkorean_ksc5601, LEADING_BYTE_KOREAN_KSC5601, 3,
|
|
1387 CHARSET_TYPE_94X94, 2, 0, 'C',
|
|
1388 CHARSET_LEFT_TO_RIGHT,
|
|
1389 build_string ("Korean"),
|
78
|
1390 build_string ("ksc5601"));
|
70
|
1391 /* #### For simplicity, we put composite chars into a 96x96 charset.
|
|
1392 This is going to lead to problems because you can run out of
|
|
1393 room, esp. as we don't yet recycle numbers. */
|
|
1394 Vcharset_composite =
|
|
1395 make_charset (Qcomposite, LEADING_BYTE_COMPOSITE, 3,
|
|
1396 CHARSET_TYPE_96X96, 2, 0, 0,
|
|
1397 CHARSET_LEFT_TO_RIGHT,
|
|
1398 build_string ("Composite characters"),
|
|
1399 build_string (""));
|
|
1400
|
|
1401 composite_char_row_next = 32;
|
|
1402 composite_char_col_next = 32;
|
|
1403
|
|
1404 Vcomposite_char_string2char_hashtable =
|
|
1405 make_lisp_hashtable (500, HASHTABLE_NONWEAK,
|
|
1406 HASHTABLE_EQUAL);
|
|
1407 Vcomposite_char_char2string_hashtable =
|
|
1408 make_lisp_hashtable (500, HASHTABLE_NONWEAK,
|
|
1409 HASHTABLE_EQ);
|
|
1410 staticpro (&Vcomposite_char_string2char_hashtable);
|
|
1411 staticpro (&Vcomposite_char_char2string_hashtable);
|
|
1412
|
|
1413 /* Faux charsets used only for convenience in retrieving the
|
|
1414 number of rep bytes associated with a leading byte. */
|
|
1415
|
|
1416 make_charset (Qnil, PRE_LEADING_BYTE_PRIVATE_1, 3, 0, 0, 0, 0,
|
|
1417 0, Qnil, Qnil);
|
|
1418 make_charset (Qnil, PRE_LEADING_BYTE_PRIVATE_2, 4, 0, 0, 0, 0,
|
|
1419 0, Qnil, Qnil);
|
|
1420 }
|