Mercurial > hg > xemacs-beta
annotate src/mule-coding.c @ 5629:0d05accafc63
Don't lose bits in make_fixnum / make_char_1.
See xemacs-patches message with ID
<CAHCOHQnRTjm6c5gWVO3iizWJ9Jb7GvEyFe3aQ19hAXhcR_mrrA@mail.gmail.com>.
author | Jerry James <james@xemacs.org> |
---|---|
date | Wed, 28 Dec 2011 11:30:47 -0700 |
parents | 56144c8593a8 |
children |
rev | line source |
---|---|
771 | 1 /* Conversion functions for I18N encodings, but not Unicode (in separate file). |
2 Copyright (C) 1991, 1995 Free Software Foundation, Inc. | |
3 Copyright (C) 1995 Sun Microsystems, Inc. | |
5100
3d91f0b64469
fix bad bug with escape-quoted handling
Ben Wing <ben@xemacs.org>
parents:
4976
diff
changeset
|
4 Copyright (C) 2000, 2001, 2002, 2010 Ben Wing. |
771 | 5 |
6 This file is part of XEmacs. | |
7 | |
5402
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
5191
diff
changeset
|
8 XEmacs is free software: you can redistribute it and/or modify it |
771 | 9 under the terms of the GNU General Public License as published by the |
5402
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
5191
diff
changeset
|
10 Free Software Foundation, either version 3 of the License, or (at your |
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
5191
diff
changeset
|
11 option) any later version. |
771 | 12 |
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT | |
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 for more details. | |
17 | |
18 You should have received a copy of the GNU General Public License | |
5402
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
5191
diff
changeset
|
19 along with XEmacs. If not, see <http://www.gnu.org/licenses/>. */ |
771 | 20 |
21 /* Synched up with: Mule 2.3. Not in FSF. */ | |
22 | |
23 /* For previous history, see file-coding.c. | |
24 | |
25 September 10, 2001: Extracted from file-coding.c by Ben Wing. | |
26 | |
27 Later in September: Finished abstraction of detection system, rewrote | |
28 all the detectors to include multiple levels of likelihood. | |
29 */ | |
30 | |
31 #include <config.h> | |
32 #include "lisp.h" | |
33 | |
34 #include "charset.h" | |
35 #include "mule-ccl.h" | |
36 #include "file-coding.h" | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
37 #include "elhash.h" |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
38 #include "rangetab.h" |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
39 #include "buffer.h" |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
40 #include "extents.h" |
771 | 41 |
5565
48a3d3281b48
Pass eighth bit on TTY consoles to coding system if needed.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5538
diff
changeset
|
42 Lisp_Object Qshift_jis, Qbig5, Qccl; |
771 | 43 |
44 Lisp_Object Qcharset_g0, Qcharset_g1, Qcharset_g2, Qcharset_g3; | |
45 Lisp_Object Qforce_g0_on_output, Qforce_g1_on_output; | |
46 Lisp_Object Qforce_g2_on_output, Qforce_g3_on_output; | |
47 Lisp_Object Qno_iso6429; | |
48 Lisp_Object Qinput_charset_conversion, Qoutput_charset_conversion; | |
5565
48a3d3281b48
Pass eighth bit on TTY consoles to coding system if needed.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5538
diff
changeset
|
49 Lisp_Object Qshort, Qno_ascii_eol, Qno_ascii_cntl, Qlock_shift; |
771 | 50 |
51 Lisp_Object Qiso_7, Qiso_8_designate, Qiso_8_1, Qiso_8_2, Qiso_lock_shift; | |
52 | |
4691
3ba90c659d01
Move Qfrom_unicode to general-slots.h, fix the native Win32 build.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4690
diff
changeset
|
53 Lisp_Object Qquery_skip_chars, Qinvalid_sequences_skip_chars; |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
54 Lisp_Object Qfixed_width; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
55 |
771 | 56 |
57 /************************************************************************/ | |
58 /* Shift-JIS methods */ | |
59 /************************************************************************/ | |
60 | |
61 /* Shift-JIS; Hankaku (half-width) KANA is also supported. */ | |
62 DEFINE_CODING_SYSTEM_TYPE (shift_jis); | |
63 | |
64 /* Shift-JIS is a coding system encoding three character sets: ASCII, right | |
65 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded | |
66 as is. A character of JISX0201-Kana (DIMENSION1_CHARS94 character set) is | |
67 encoded by "position-code + 0x80". A character of JISX0208 | |
68 (DIMENSION2_CHARS94 character set) is encoded in 2-byte but two | |
69 position-codes are divided and shifted so that it fit in the range | |
70 below. | |
71 | |
72 --- CODE RANGE of Shift-JIS --- | |
73 (character set) (range) | |
74 ASCII 0x00 .. 0x7F | |
75 JISX0201-Kana 0xA0 .. 0xDF | |
76 JISX0208 (1st byte) 0x80 .. 0x9F and 0xE0 .. 0xEF | |
77 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC | |
78 ------------------------------- | |
79 | |
80 */ | |
81 | |
82 /* Is this the first byte of a Shift-JIS two-byte char? */ | |
83 | |
826 | 84 inline static int |
85 byte_shift_jis_two_byte_1_p (int c) | |
86 { | |
87 return (c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF); | |
88 } | |
771 | 89 |
90 /* Is this the second byte of a Shift-JIS two-byte char? */ | |
91 | |
826 | 92 inline static int |
93 byte_shift_jis_two_byte_2_p (int c) | |
94 { | |
95 return (c >= 0x40 && c <= 0x7E) || (c >= 0x80 && c <= 0xFC); | |
96 } | |
97 | |
98 inline static int | |
99 byte_shift_jis_katakana_p (int c) | |
100 { | |
101 return c >= 0xA1 && c <= 0xDF; | |
102 } | |
771 | 103 |
3439 | 104 inline static void |
105 dynarr_add_2022_one_dimension (Lisp_Object charset, Ibyte c, | |
106 unsigned char charmask, | |
107 unsigned_char_dynarr *dst) | |
108 { | |
109 if (XCHARSET_ENCODE_AS_UTF_8 (charset)) | |
110 { | |
111 encode_unicode_char (charset, c & charmask, 0, | |
4096 | 112 dst, UNICODE_UTF_8, 0, 0); |
3439 | 113 } |
114 else | |
115 { | |
116 Dynarr_add (dst, c & charmask); | |
117 } | |
118 } | |
119 | |
120 inline static void | |
121 dynarr_add_2022_two_dimensions (Lisp_Object charset, Ibyte c, | |
122 unsigned int ch, | |
123 unsigned char charmask, | |
124 unsigned_char_dynarr *dst) | |
125 { | |
126 if (XCHARSET_ENCODE_AS_UTF_8 (charset)) | |
127 { | |
128 encode_unicode_char (charset, | |
129 ch & charmask, | |
130 c & charmask, dst, | |
4096 | 131 UNICODE_UTF_8, 0, 0); |
3439 | 132 } |
133 else | |
134 { | |
135 Dynarr_add (dst, ch & charmask); | |
136 Dynarr_add (dst, c & charmask); | |
137 } | |
138 } | |
139 | |
771 | 140 /* Convert Shift-JIS data to internal format. */ |
141 | |
142 static Bytecount | |
143 shift_jis_convert (struct coding_stream *str, const UExtbyte *src, | |
144 unsigned_char_dynarr *dst, Bytecount n) | |
145 { | |
146 unsigned int ch = str->ch; | |
147 Bytecount orign = n; | |
148 | |
149 if (str->direction == CODING_DECODE) | |
150 { | |
151 while (n--) | |
152 { | |
153 UExtbyte c = *src++; | |
154 | |
155 if (ch) | |
156 { | |
157 /* Previous character was first byte of Shift-JIS Kanji char. */ | |
826 | 158 if (byte_shift_jis_two_byte_2_p (c)) |
771 | 159 { |
867 | 160 Ibyte e1, e2; |
771 | 161 |
162 Dynarr_add (dst, LEADING_BYTE_JAPANESE_JISX0208); | |
163 DECODE_SHIFT_JIS (ch, c, e1, e2); | |
164 Dynarr_add (dst, e1); | |
165 Dynarr_add (dst, e2); | |
166 } | |
167 else | |
168 { | |
169 DECODE_ADD_BINARY_CHAR (ch, dst); | |
170 DECODE_ADD_BINARY_CHAR (c, dst); | |
171 } | |
172 ch = 0; | |
173 } | |
174 else | |
175 { | |
826 | 176 if (byte_shift_jis_two_byte_1_p (c)) |
771 | 177 ch = c; |
826 | 178 else if (byte_shift_jis_katakana_p (c)) |
771 | 179 { |
180 Dynarr_add (dst, LEADING_BYTE_KATAKANA_JISX0201); | |
181 Dynarr_add (dst, c); | |
182 } | |
183 else | |
184 DECODE_ADD_BINARY_CHAR (c, dst); | |
185 } | |
186 } | |
187 | |
188 if (str->eof) | |
189 DECODE_OUTPUT_PARTIAL_CHAR (ch, dst); | |
190 } | |
191 else | |
192 { | |
193 while (n--) | |
194 { | |
867 | 195 Ibyte c = *src++; |
826 | 196 if (byte_ascii_p (c)) |
771 | 197 { |
198 Dynarr_add (dst, c); | |
199 ch = 0; | |
200 } | |
867 | 201 else if (ibyte_leading_byte_p (c)) |
771 | 202 ch = (c == LEADING_BYTE_KATAKANA_JISX0201 || |
203 c == LEADING_BYTE_JAPANESE_JISX0208_1978 || | |
204 c == LEADING_BYTE_JAPANESE_JISX0208) ? c : 0; | |
205 else if (ch) | |
206 { | |
207 if (ch == LEADING_BYTE_KATAKANA_JISX0201) | |
208 { | |
209 Dynarr_add (dst, c); | |
210 ch = 0; | |
211 } | |
212 else if (ch == LEADING_BYTE_JAPANESE_JISX0208_1978 || | |
213 ch == LEADING_BYTE_JAPANESE_JISX0208) | |
214 ch = c; | |
215 else | |
216 { | |
5538
580ef98f2beb
Suppress a "shadowed global" warning.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
5402
diff
changeset
|
217 UExtbyte b1, b2; |
580ef98f2beb
Suppress a "shadowed global" warning.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
5402
diff
changeset
|
218 ENCODE_SHIFT_JIS (ch, c, b1, b2); |
580ef98f2beb
Suppress a "shadowed global" warning.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
5402
diff
changeset
|
219 Dynarr_add (dst, b1); |
580ef98f2beb
Suppress a "shadowed global" warning.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
5402
diff
changeset
|
220 Dynarr_add (dst, b2); |
771 | 221 ch = 0; |
222 } | |
223 } | |
224 } | |
225 } | |
226 | |
227 str->ch = ch; | |
228 | |
229 return orign; | |
230 } | |
231 | |
232 DEFUN ("decode-shift-jis-char", Fdecode_shift_jis_char, 1, 1, 0, /* | |
233 Decode a JISX0208 character of Shift-JIS coding-system. | |
234 CODE is the character code in Shift-JIS as a cons of type bytes. | |
235 Return the corresponding character. | |
236 */ | |
237 (code)) | |
238 { | |
239 int c1, c2, s1, s2; | |
240 | |
241 CHECK_CONS (code); | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
242 CHECK_FIXNUM (XCAR (code)); |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
243 CHECK_FIXNUM (XCDR (code)); |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
244 s1 = XFIXNUM (XCAR (code)); |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
245 s2 = XFIXNUM (XCDR (code)); |
826 | 246 if (byte_shift_jis_two_byte_1_p (s1) && |
247 byte_shift_jis_two_byte_2_p (s2)) | |
771 | 248 { |
249 DECODE_SHIFT_JIS (s1, s2, c1, c2); | |
867 | 250 return make_char (make_ichar (Vcharset_japanese_jisx0208, |
831 | 251 c1 & 0x7F, c2 & 0x7F)); |
771 | 252 } |
253 else | |
254 return Qnil; | |
255 } | |
256 | |
257 DEFUN ("encode-shift-jis-char", Fencode_shift_jis_char, 1, 1, 0, /* | |
258 Encode a JISX0208 character CHARACTER to SHIFT-JIS coding-system. | |
259 Return the corresponding character code in SHIFT-JIS as a cons of two bytes. | |
260 */ | |
261 (character)) | |
262 { | |
263 Lisp_Object charset; | |
264 int c1, c2, s1, s2; | |
265 | |
266 CHECK_CHAR_COERCE_INT (character); | |
867 | 267 BREAKUP_ICHAR (XCHAR (character), charset, c1, c2); |
771 | 268 if (EQ (charset, Vcharset_japanese_jisx0208)) |
269 { | |
270 ENCODE_SHIFT_JIS (c1 | 0x80, c2 | 0x80, s1, s2); | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
271 return Fcons (make_fixnum (s1), make_fixnum (s2)); |
771 | 272 } |
273 else | |
274 return Qnil; | |
275 } | |
276 | |
277 | |
278 /************************************************************************/ | |
279 /* Shift-JIS detector */ | |
280 /************************************************************************/ | |
281 | |
282 DEFINE_DETECTOR (shift_jis); | |
283 DEFINE_DETECTOR_CATEGORY (shift_jis, shift_jis); | |
284 | |
285 struct shift_jis_detector | |
286 { | |
287 int seen_jisx0208_char_in_c1; | |
288 int seen_jisx0208_char_in_upper; | |
289 int seen_jisx0201_char; | |
290 unsigned int seen_iso2022_esc:1; | |
291 unsigned int seen_bad_first_byte:1; | |
292 unsigned int seen_bad_second_byte:1; | |
293 /* temporary */ | |
294 unsigned int in_second_byte:1; | |
295 unsigned int first_byte_was_c1:1; | |
296 }; | |
297 | |
298 static void | |
299 shift_jis_detect (struct detection_state *st, const UExtbyte *src, | |
300 Bytecount n) | |
301 { | |
302 struct shift_jis_detector *data = DETECTION_STATE_DATA (st, shift_jis); | |
303 | |
304 while (n--) | |
305 { | |
306 UExtbyte c = *src++; | |
307 if (!data->in_second_byte) | |
308 { | |
309 if (c >= 0x80 && c <= 0x9F) | |
310 data->first_byte_was_c1 = 1; | |
311 if (c >= 0xA0 && c <= 0xDF) | |
312 data->seen_jisx0201_char++; | |
313 else if ((c >= 0x80 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF)) | |
314 data->in_second_byte = 1; | |
315 else if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) | |
316 data->seen_iso2022_esc = 1; | |
317 else if (c >= 0x80) | |
318 data->seen_bad_first_byte = 1; | |
319 } | |
320 else | |
321 { | |
322 if ((c >= 0x40 && c <= 0x7E) || (c >= 0x80 && c <= 0xFC)) | |
323 { | |
324 if (data->first_byte_was_c1 || (c >= 0x80 && c <= 0x9F)) | |
325 data->seen_jisx0208_char_in_c1++; | |
326 else | |
327 data->seen_jisx0208_char_in_upper++; | |
328 } | |
329 else | |
330 data->seen_bad_second_byte = 1; | |
331 data->in_second_byte = 0; | |
332 data->first_byte_was_c1 = 0; | |
333 } | |
334 } | |
335 | |
336 if (data->seen_bad_second_byte) | |
337 DET_RESULT (st, shift_jis) = DET_NEARLY_IMPOSSIBLE; | |
338 else if (data->seen_bad_first_byte) | |
339 DET_RESULT (st, shift_jis) = DET_QUITE_IMPROBABLE; | |
340 else if (data->seen_iso2022_esc) | |
341 DET_RESULT (st, shift_jis) = DET_SOMEWHAT_UNLIKELY; | |
342 else if (data->seen_jisx0208_char_in_c1 >= 20 || | |
343 (data->seen_jisx0208_char_in_c1 >= 10 && | |
344 data->seen_jisx0208_char_in_upper >= 10)) | |
345 DET_RESULT (st, shift_jis) = DET_QUITE_PROBABLE; | |
346 else if (data->seen_jisx0208_char_in_c1 > 3 || | |
347 data->seen_jisx0208_char_in_upper >= 10 || | |
348 /* Since the range is limited compared to what is often seen | |
349 is typical Latin-X charsets, the fact that we've seen a | |
350 bunch of them and none that are invalid is reasonably | |
351 strong statistical evidence of this encoding, or at least | |
352 not of the common Latin-X ones. */ | |
353 data->seen_jisx0201_char >= 100) | |
354 DET_RESULT (st, shift_jis) = DET_SOMEWHAT_LIKELY; | |
355 else if (data->seen_jisx0208_char_in_c1 > 0 || | |
356 data->seen_jisx0208_char_in_upper > 0 || | |
357 data->seen_jisx0201_char > 0) | |
358 DET_RESULT (st, shift_jis) = DET_SLIGHTLY_LIKELY; | |
359 else | |
360 DET_RESULT (st, shift_jis) = DET_AS_LIKELY_AS_UNLIKELY; | |
361 } | |
362 | |
363 | |
364 /************************************************************************/ | |
365 /* Big5 methods */ | |
366 /************************************************************************/ | |
367 | |
2819 | 368 /* BIG5 (used for Mandarin in Taiwan). */ |
771 | 369 DEFINE_CODING_SYSTEM_TYPE (big5); |
370 | |
371 /* BIG5 is a coding system encoding two character sets: ASCII and | |
372 Big5. An ASCII character is encoded as is. Big5 is a two-byte | |
373 character set and is encoded in two-byte. | |
374 | |
375 --- CODE RANGE of BIG5 --- | |
376 (character set) (range) | |
377 ASCII 0x00 .. 0x7F | |
378 Big5 (1st byte) 0xA1 .. 0xFE | |
379 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE | |
380 -------------------------- | |
381 | |
382 Since the number of characters in Big5 is larger than maximum | |
383 characters in Emacs' charset (96x96), it can't be handled as one | |
384 charset. So, in XEmacs, Big5 is divided into two: `charset-big5-1' | |
385 and `charset-big5-2'. Both <type>s are DIMENSION2_CHARS94. The former | |
386 contains frequently used characters and the latter contains less | |
387 frequently used characters. */ | |
388 | |
826 | 389 inline static int |
390 byte_big5_two_byte_1_p (int c) | |
391 { | |
392 return c >= 0xA1 && c <= 0xFE; | |
393 } | |
771 | 394 |
395 /* Is this the second byte of a Shift-JIS two-byte char? */ | |
396 | |
826 | 397 inline static int |
398 byte_big5_two_byte_2_p (int c) | |
399 { | |
400 return (c >= 0x40 && c <= 0x7E) || (c >= 0xA1 && c <= 0xFE); | |
401 } | |
771 | 402 |
403 /* Number of Big5 characters which have the same code in 1st byte. */ | |
404 | |
405 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40) | |
406 | |
407 /* Code conversion macros. These are macros because they are used in | |
408 inner loops during code conversion. | |
409 | |
410 Note that temporary variables in macros introduce the classic | |
411 dynamic-scoping problems with variable names. We use capital- | |
412 lettered variables in the assumption that XEmacs does not use | |
413 capital letters in variables except in a very formalized way | |
414 (e.g. Qstring). */ | |
415 | |
416 /* Convert Big5 code (b1, b2) into its internal string representation | |
417 (lb, c1, c2). */ | |
418 | |
419 /* There is a much simpler way to split the Big5 charset into two. | |
420 For the moment I'm going to leave the algorithm as-is because it | |
421 claims to separate out the most-used characters into a single | |
422 charset, which perhaps will lead to optimizations in various | |
423 places. | |
424 | |
425 The way the algorithm works is something like this: | |
426 | |
427 Big5 can be viewed as a 94x157 charset, where the row is | |
428 encoded into the bytes 0xA1 .. 0xFE and the column is encoded | |
429 into the bytes 0x40 .. 0x7E and 0xA1 .. 0xFE. As for frequency, | |
430 the split between low and high column numbers is apparently | |
431 meaningless; ascending rows produce less and less frequent chars. | |
432 Therefore, we assign the lower half of rows (0xA1 .. 0xC8) to | |
433 the first charset, and the upper half (0xC9 .. 0xFE) to the | |
434 second. To do the conversion, we convert the character into | |
435 a single number where 0 .. 156 is the first row, 157 .. 313 | |
436 is the second, etc. That way, the characters are ordered by | |
437 decreasing frequency. Then we just chop the space in two | |
438 and coerce the result into a 94x94 space. | |
439 */ | |
440 | |
441 #define DECODE_BIG5(b1, b2, lb, c1, c2) do \ | |
442 { \ | |
443 int B1 = b1, B2 = b2; \ | |
444 int I \ | |
445 = (B1 - 0xA1) * BIG5_SAME_ROW + B2 - (B2 < 0x7F ? 0x40 : 0x62); \ | |
446 \ | |
447 if (B1 < 0xC9) \ | |
448 { \ | |
449 lb = LEADING_BYTE_CHINESE_BIG5_1; \ | |
450 } \ | |
451 else \ | |
452 { \ | |
453 lb = LEADING_BYTE_CHINESE_BIG5_2; \ | |
454 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1); \ | |
455 } \ | |
456 c1 = I / (0xFF - 0xA1) + 0xA1; \ | |
457 c2 = I % (0xFF - 0xA1) + 0xA1; \ | |
458 } while (0) | |
459 | |
460 /* Convert the internal string representation of a Big5 character | |
461 (lb, c1, c2) into Big5 code (b1, b2). */ | |
462 | |
463 #define ENCODE_BIG5(lb, c1, c2, b1, b2) do \ | |
464 { \ | |
465 int I = ((c1) - 0xA1) * (0xFF - 0xA1) + ((c2) - 0xA1); \ | |
466 \ | |
467 if (lb == LEADING_BYTE_CHINESE_BIG5_2) \ | |
468 { \ | |
469 I += BIG5_SAME_ROW * (0xC9 - 0xA1); \ | |
470 } \ | |
471 b1 = I / BIG5_SAME_ROW + 0xA1; \ | |
472 b2 = I % BIG5_SAME_ROW; \ | |
473 b2 += b2 < 0x3F ? 0x40 : 0x62; \ | |
474 } while (0) | |
475 | |
476 /* Convert Big5 data to internal format. */ | |
477 | |
478 static Bytecount | |
479 big5_convert (struct coding_stream *str, const UExtbyte *src, | |
480 unsigned_char_dynarr *dst, Bytecount n) | |
481 { | |
482 unsigned int ch = str->ch; | |
483 Bytecount orign = n; | |
484 | |
485 if (str->direction == CODING_DECODE) | |
486 { | |
487 while (n--) | |
488 { | |
489 UExtbyte c = *src++; | |
490 if (ch) | |
491 { | |
492 /* Previous character was first byte of Big5 char. */ | |
826 | 493 if (byte_big5_two_byte_2_p (c)) |
771 | 494 { |
867 | 495 Ibyte b1, b2, b3; |
771 | 496 DECODE_BIG5 (ch, c, b1, b2, b3); |
497 Dynarr_add (dst, b1); | |
498 Dynarr_add (dst, b2); | |
499 Dynarr_add (dst, b3); | |
500 } | |
501 else | |
502 { | |
503 DECODE_ADD_BINARY_CHAR (ch, dst); | |
504 DECODE_ADD_BINARY_CHAR (c, dst); | |
505 } | |
506 ch = 0; | |
507 } | |
508 else | |
509 { | |
826 | 510 if (byte_big5_two_byte_1_p (c)) |
771 | 511 ch = c; |
512 else | |
513 DECODE_ADD_BINARY_CHAR (c, dst); | |
514 } | |
515 } | |
516 | |
517 if (str->eof) | |
518 DECODE_OUTPUT_PARTIAL_CHAR (ch, dst); | |
519 } | |
520 else | |
521 { | |
522 while (n--) | |
523 { | |
867 | 524 Ibyte c = *src++; |
826 | 525 if (byte_ascii_p (c)) |
771 | 526 { |
527 /* ASCII. */ | |
528 Dynarr_add (dst, c); | |
529 } | |
867 | 530 else if (ibyte_leading_byte_p (c)) |
771 | 531 { |
532 if (c == LEADING_BYTE_CHINESE_BIG5_1 || | |
533 c == LEADING_BYTE_CHINESE_BIG5_2) | |
534 { | |
535 /* A recognized leading byte. */ | |
536 ch = c; | |
537 continue; /* not done with this character. */ | |
538 } | |
539 /* otherwise just ignore this character. */ | |
540 } | |
541 else if (ch == LEADING_BYTE_CHINESE_BIG5_1 || | |
542 ch == LEADING_BYTE_CHINESE_BIG5_2) | |
543 { | |
544 /* Previous char was a recognized leading byte. */ | |
545 ch = (ch << 8) | c; | |
546 continue; /* not done with this character. */ | |
547 } | |
548 else if (ch) | |
549 { | |
550 /* Encountering second byte of a Big5 character. */ | |
551 UExtbyte b1, b2; | |
552 | |
553 ENCODE_BIG5 (ch >> 8, ch & 0xFF, c, b1, b2); | |
554 Dynarr_add (dst, b1); | |
555 Dynarr_add (dst, b2); | |
556 } | |
557 | |
558 ch = 0; | |
559 } | |
560 } | |
561 | |
562 str->ch = ch; | |
563 return orign; | |
564 } | |
565 | |
867 | 566 Ichar |
771 | 567 decode_big5_char (int b1, int b2) |
568 { | |
826 | 569 if (byte_big5_two_byte_1_p (b1) && |
570 byte_big5_two_byte_2_p (b2)) | |
771 | 571 { |
572 int leading_byte; | |
573 Lisp_Object charset; | |
574 int c1, c2; | |
575 | |
576 DECODE_BIG5 (b1, b2, leading_byte, c1, c2); | |
826 | 577 charset = charset_by_leading_byte (leading_byte); |
867 | 578 return make_ichar (charset, c1 & 0x7F, c2 & 0x7F); |
771 | 579 } |
580 else | |
581 return -1; | |
582 } | |
583 | |
584 DEFUN ("decode-big5-char", Fdecode_big5_char, 1, 1, 0, /* | |
585 Convert Big Five character codes in CODE into a character. | |
586 CODE is a cons of two integers specifying the codepoints in Big Five. | |
587 Return the corresponding character, or nil if the codepoints are out of range. | |
588 | |
589 The term `decode' is used because the codepoints can be viewed as the | |
590 representation of the character in the external Big Five encoding, and thus | |
591 converting them to a character is analogous to any other operation that | |
592 decodes an external representation. | |
593 */ | |
594 (code)) | |
595 { | |
867 | 596 Ichar ch; |
771 | 597 |
598 CHECK_CONS (code); | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
599 CHECK_FIXNUM (XCAR (code)); |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
600 CHECK_FIXNUM (XCDR (code)); |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
601 ch = decode_big5_char (XFIXNUM (XCAR (code)), XFIXNUM (XCDR (code))); |
771 | 602 if (ch == -1) |
603 return Qnil; | |
604 else | |
605 return make_char (ch); | |
606 } | |
607 | |
608 DEFUN ("encode-big5-char", Fencode_big5_char, 1, 1, 0, /* | |
609 Convert the specified Big Five character into its codepoints. | |
610 The codepoints are returned as a cons of two integers, specifying the | |
611 Big Five codepoints. See `decode-big5-char' for the reason why the | |
612 term `encode' is used for this operation. | |
613 */ | |
614 (character)) | |
615 { | |
616 Lisp_Object charset; | |
617 int c1, c2, b1, b2; | |
618 | |
619 CHECK_CHAR_COERCE_INT (character); | |
867 | 620 BREAKUP_ICHAR (XCHAR (character), charset, c1, c2); |
771 | 621 if (EQ (charset, Vcharset_chinese_big5_1) || |
622 EQ (charset, Vcharset_chinese_big5_2)) | |
623 { | |
624 ENCODE_BIG5 (XCHARSET_LEADING_BYTE (charset), c1 | 0x80, c2 | 0x80, | |
625 b1, b2); | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
626 return Fcons (make_fixnum (b1), make_fixnum (b2)); |
771 | 627 } |
628 else | |
629 return Qnil; | |
630 } | |
631 | |
632 | |
633 /************************************************************************/ | |
634 /* Big5 detector */ | |
635 /************************************************************************/ | |
636 | |
637 DEFINE_DETECTOR (big5); | |
638 DEFINE_DETECTOR_CATEGORY (big5, big5); | |
639 | |
640 struct big5_detector | |
641 { | |
642 int seen_big5_char; | |
985 | 643 int seen_euc_char; |
771 | 644 unsigned int seen_iso2022_esc:1; |
645 unsigned int seen_bad_first_byte:1; | |
646 unsigned int seen_bad_second_byte:1; | |
647 | |
648 /* temporary */ | |
649 unsigned int in_second_byte:1; | |
650 }; | |
651 | |
652 static void | |
653 big5_detect (struct detection_state *st, const UExtbyte *src, | |
654 Bytecount n) | |
655 { | |
656 struct big5_detector *data = DETECTION_STATE_DATA (st, big5); | |
657 | |
658 while (n--) | |
659 { | |
660 UExtbyte c = *src++; | |
661 if (!data->in_second_byte) | |
662 { | |
663 if (c >= 0xA1 && c <= 0xFE) | |
664 data->in_second_byte = 1; | |
665 else if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) | |
666 data->seen_iso2022_esc = 1; | |
667 else if (c >= 0x80) | |
668 data->seen_bad_first_byte = 1; | |
669 } | |
670 else | |
671 { | |
672 data->in_second_byte = 0; | |
985 | 673 if (c >= 0xA1 && c <= 0xFE) |
674 data->seen_euc_char++; | |
675 else if (c >= 0x40 && c <= 0x7E) | |
771 | 676 data->seen_big5_char++; |
677 else | |
678 data->seen_bad_second_byte = 1; | |
679 } | |
680 } | |
681 | |
682 if (data->seen_bad_second_byte) | |
683 DET_RESULT (st, big5) = DET_NEARLY_IMPOSSIBLE; | |
684 else if (data->seen_bad_first_byte) | |
685 DET_RESULT (st, big5) = DET_QUITE_IMPROBABLE; | |
686 else if (data->seen_iso2022_esc) | |
687 DET_RESULT (st, big5) = DET_SOMEWHAT_UNLIKELY; | |
688 else if (data->seen_big5_char >= 4) | |
689 DET_RESULT (st, big5) = DET_SOMEWHAT_LIKELY; | |
985 | 690 else if (data->seen_euc_char) |
691 DET_RESULT (st, big5) = DET_SLIGHTLY_LIKELY; | |
771 | 692 else |
693 DET_RESULT (st, big5) = DET_AS_LIKELY_AS_UNLIKELY; | |
694 } | |
695 | |
696 | |
697 /************************************************************************/ | |
698 /* ISO2022 methods */ | |
699 /************************************************************************/ | |
700 | |
701 /* Any ISO-2022-compliant coding system. Includes JIS, EUC, CTEXT | |
702 (Compound Text, the encoding of selections in X Windows). See below for | |
703 a complete description of ISO-2022. */ | |
704 | |
705 /* Flags indicating what we've seen so far when parsing an | |
706 ISO2022 escape sequence. */ | |
707 enum iso_esc_flag | |
708 { | |
709 /* Partial sequences */ | |
710 ISO_ESC_NOTHING, /* Nothing has been seen. */ | |
711 ISO_ESC, /* We've seen ESC. */ | |
712 ISO_ESC_2_4, /* We've seen ESC $. This indicates | |
713 that we're designating a multi-byte, rather | |
714 than a single-byte, character set. */ | |
3439 | 715 ISO_ESC_2_5, /* We've seen ESC %. This indicates an escape to a |
716 Unicode coding system; the only one of these | |
717 we're prepared to deal with is UTF-8, which has | |
718 the next character as G. */ | |
771 | 719 ISO_ESC_2_8, /* We've seen ESC 0x28, i.e. ESC (. |
720 This means designate a 94-character | |
721 character set into G0. */ | |
722 ISO_ESC_2_9, /* We've seen ESC 0x29 -- designate a | |
723 94-character character set into G1. */ | |
724 ISO_ESC_2_10, /* We've seen ESC 0x2A. */ | |
725 ISO_ESC_2_11, /* We've seen ESC 0x2B. */ | |
726 ISO_ESC_2_12, /* We've seen ESC 0x2C -- designate a | |
727 96-character character set into G0. | |
728 (This is not ISO2022-standard. | |
729 The following 96-character | |
730 control sequences are standard, | |
731 though.) */ | |
732 ISO_ESC_2_13, /* We've seen ESC 0x2D -- designate a | |
733 96-character character set into G1. | |
734 */ | |
735 ISO_ESC_2_14, /* We've seen ESC 0x2E. */ | |
736 ISO_ESC_2_15, /* We've seen ESC 0x2F. */ | |
737 ISO_ESC_2_4_8, /* We've seen ESC $ 0x28 -- designate | |
738 a 94^N character set into G0. */ | |
739 ISO_ESC_2_4_9, /* We've seen ESC $ 0x29. */ | |
740 ISO_ESC_2_4_10, /* We've seen ESC $ 0x2A. */ | |
741 ISO_ESC_2_4_11, /* We've seen ESC $ 0x2B. */ | |
742 ISO_ESC_2_4_12, /* We've seen ESC $ 0x2C. */ | |
743 ISO_ESC_2_4_13, /* We've seen ESC $ 0x2D. */ | |
744 ISO_ESC_2_4_14, /* We've seen ESC $ 0x2E. */ | |
745 ISO_ESC_2_4_15, /* We've seen ESC $ 0x2F. */ | |
746 ISO_ESC_5_11, /* We've seen ESC [ or 0x9B. This | |
747 starts a directionality-control | |
748 sequence. The next character | |
749 must be 0, 1, 2, or ]. */ | |
750 ISO_ESC_5_11_0, /* We've seen 0x9B 0. The next character must be ]. */ | |
751 ISO_ESC_5_11_1, /* We've seen 0x9B 1. The next character must be ]. */ | |
752 ISO_ESC_5_11_2, /* We've seen 0x9B 2. The next character must be ]. */ | |
753 | |
754 /* Full sequences. */ | |
755 ISO_ESC_START_COMPOSITE, /* Private usage for START COMPOSING */ | |
756 ISO_ESC_END_COMPOSITE, /* Private usage for END COMPOSING */ | |
757 ISO_ESC_SINGLE_SHIFT, /* We've seen a complete single-shift sequence. */ | |
758 ISO_ESC_LOCKING_SHIFT,/* We've seen a complete locking-shift sequence. */ | |
759 ISO_ESC_DESIGNATE, /* We've seen a complete designation sequence. */ | |
760 ISO_ESC_DIRECTIONALITY,/* We've seen a complete ISO6429 directionality | |
761 sequence. */ | |
762 ISO_ESC_LITERAL /* We've seen a literal character ala | |
763 escape-quoting. */ | |
764 }; | |
765 | |
766 enum iso_error | |
767 { | |
768 ISO_ERROR_BAD_FINAL, | |
769 ISO_ERROR_UNKWOWN_ESC_SEQUENCE, | |
770 ISO_ERROR_INVALID_CODE_POINT_CHARACTER, | |
771 }; | |
772 | |
773 | |
774 /* Flags indicating current state while converting code. */ | |
775 | |
776 /************ Used during encoding and decoding: ************/ | |
777 /* If set, the current directionality is right-to-left. Otherwise, it's | |
778 left-to-right. */ | |
779 #define ISO_STATE_R2L (1 << 0) | |
780 | |
781 /************ Used during encoding: ************/ | |
782 /* If set, we just saw a CR. */ | |
783 #define ISO_STATE_CR (1 << 1) | |
784 | |
785 /************ Used during decoding: ************/ | |
786 /* If set, we're currently parsing an escape sequence and the upper 16 bits | |
787 should be looked at to indicate what partial escape sequence we've seen | |
788 so far. Otherwise, we're running through actual text. */ | |
789 #define ISO_STATE_ESCAPE (1 << 2) | |
790 /* If set, G2 is invoked into GL, but only for the next character. */ | |
791 #define ISO_STATE_SS2 (1 << 3) | |
792 /* If set, G3 is invoked into GL, but only for the next character. If both | |
793 ISO_STATE_SS2 and ISO_STATE_SS3 are set, ISO_STATE_SS2 overrides; but | |
794 this probably indicates an error in the text encoding. */ | |
795 #define ISO_STATE_SS3 (1 << 4) | |
796 /* If set, we're currently processing a composite character (i.e. a | |
797 character constructed by overstriking two or more characters). */ | |
798 #define ISO_STATE_COMPOSITE (1 << 5) | |
799 | |
3439 | 800 /* If set, we're processing UTF-8 encoded data within ISO-2022 |
801 processing. */ | |
802 #define ISO_STATE_UTF_8 (1 << 6) | |
803 | |
771 | 804 /* ISO_STATE_LOCK is the mask of flags that remain on until explicitly |
805 turned off when in the ISO2022 encoder/decoder. Other flags are turned | |
806 off at the end of processing each character or escape sequence. */ | |
807 # define ISO_STATE_LOCK \ | |
3439 | 808 (ISO_STATE_COMPOSITE | ISO_STATE_R2L | ISO_STATE_UTF_8) |
771 | 809 |
810 typedef struct charset_conversion_spec | |
811 { | |
812 Lisp_Object from_charset; | |
813 Lisp_Object to_charset; | |
814 } charset_conversion_spec; | |
815 | |
816 typedef struct | |
817 { | |
818 Dynarr_declare (charset_conversion_spec); | |
819 } charset_conversion_spec_dynarr; | |
820 | |
821 struct iso2022_coding_system | |
822 { | |
823 /* What are the charsets to be initially designated to G0, G1, | |
824 G2, G3? If t, no charset is initially designated. If nil, | |
825 no charset is initially designated and no charset is allowed | |
826 to be designated. */ | |
827 Lisp_Object initial_charset[4]; | |
828 | |
829 /* If true, a designation escape sequence needs to be sent on output | |
830 for the charset in G[0-3] before that charset is used. */ | |
831 unsigned char force_charset_on_output[4]; | |
832 | |
833 charset_conversion_spec_dynarr *input_conv; | |
834 charset_conversion_spec_dynarr *output_conv; | |
835 | |
836 unsigned int shoort :1; /* C makes you speak Dutch */ | |
837 unsigned int no_ascii_eol :1; | |
838 unsigned int no_ascii_cntl :1; | |
839 unsigned int seven :1; | |
840 unsigned int lock_shift :1; | |
841 unsigned int no_iso6429 :1; | |
842 unsigned int escape_quoted :1; | |
843 }; | |
844 | |
845 #define CODING_SYSTEM_ISO2022_INITIAL_CHARSET(codesys, g) \ | |
846 (CODING_SYSTEM_TYPE_DATA (codesys, iso2022)->initial_charset[g]) | |
847 #define CODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT(codesys, g) \ | |
848 (CODING_SYSTEM_TYPE_DATA (codesys, iso2022)->force_charset_on_output[g]) | |
849 #define CODING_SYSTEM_ISO2022_SHORT(codesys) \ | |
850 (CODING_SYSTEM_TYPE_DATA (codesys, iso2022)->shoort) | |
851 #define CODING_SYSTEM_ISO2022_NO_ASCII_EOL(codesys) \ | |
852 (CODING_SYSTEM_TYPE_DATA (codesys, iso2022)->no_ascii_eol) | |
853 #define CODING_SYSTEM_ISO2022_NO_ASCII_CNTL(codesys) \ | |
854 (CODING_SYSTEM_TYPE_DATA (codesys, iso2022)->no_ascii_cntl) | |
855 #define CODING_SYSTEM_ISO2022_SEVEN(codesys) \ | |
856 (CODING_SYSTEM_TYPE_DATA (codesys, iso2022)->seven) | |
857 #define CODING_SYSTEM_ISO2022_LOCK_SHIFT(codesys) \ | |
858 (CODING_SYSTEM_TYPE_DATA (codesys, iso2022)->lock_shift) | |
859 #define CODING_SYSTEM_ISO2022_NO_ISO6429(codesys) \ | |
860 (CODING_SYSTEM_TYPE_DATA (codesys, iso2022)->no_iso6429) | |
861 #define CODING_SYSTEM_ISO2022_ESCAPE_QUOTED(codesys) \ | |
862 (CODING_SYSTEM_TYPE_DATA (codesys, iso2022)->escape_quoted) | |
863 #define CODING_SYSTEM_ISO2022_INPUT_CONV(codesys) \ | |
864 (CODING_SYSTEM_TYPE_DATA (codesys, iso2022)->input_conv) | |
865 #define CODING_SYSTEM_ISO2022_OUTPUT_CONV(codesys) \ | |
866 (CODING_SYSTEM_TYPE_DATA (codesys, iso2022)->output_conv) | |
867 | |
868 #define XCODING_SYSTEM_ISO2022_INITIAL_CHARSET(codesys, g) \ | |
869 CODING_SYSTEM_ISO2022_INITIAL_CHARSET (XCODING_SYSTEM (codesys), g) | |
870 #define XCODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT(codesys, g) \ | |
871 CODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT (XCODING_SYSTEM (codesys), g) | |
872 #define XCODING_SYSTEM_ISO2022_SHORT(codesys) \ | |
873 CODING_SYSTEM_ISO2022_SHORT (XCODING_SYSTEM (codesys)) | |
874 #define XCODING_SYSTEM_ISO2022_NO_ASCII_EOL(codesys) \ | |
875 CODING_SYSTEM_ISO2022_NO_ASCII_EOL (XCODING_SYSTEM (codesys)) | |
876 #define XCODING_SYSTEM_ISO2022_NO_ASCII_CNTL(codesys) \ | |
877 CODING_SYSTEM_ISO2022_NO_ASCII_CNTL (XCODING_SYSTEM (codesys)) | |
878 #define XCODING_SYSTEM_ISO2022_SEVEN(codesys) \ | |
879 CODING_SYSTEM_ISO2022_SEVEN (XCODING_SYSTEM (codesys)) | |
880 #define XCODING_SYSTEM_ISO2022_LOCK_SHIFT(codesys) \ | |
881 CODING_SYSTEM_ISO2022_LOCK_SHIFT (XCODING_SYSTEM (codesys)) | |
882 #define XCODING_SYSTEM_ISO2022_NO_ISO6429(codesys) \ | |
883 CODING_SYSTEM_ISO2022_NO_ISO6429 (XCODING_SYSTEM (codesys)) | |
884 #define XCODING_SYSTEM_ISO2022_ESCAPE_QUOTED(codesys) \ | |
885 CODING_SYSTEM_ISO2022_ESCAPE_QUOTED (XCODING_SYSTEM (codesys)) | |
886 #define XCODING_SYSTEM_ISO2022_INPUT_CONV(codesys) \ | |
887 CODING_SYSTEM_ISO2022_INPUT_CONV (XCODING_SYSTEM (codesys)) | |
888 #define XCODING_SYSTEM_ISO2022_OUTPUT_CONV(codesys) \ | |
889 CODING_SYSTEM_ISO2022_OUTPUT_CONV (XCODING_SYSTEM (codesys)) | |
890 | |
891 /* Additional information used by the ISO2022 decoder and detector. */ | |
892 struct iso2022_coding_stream | |
893 { | |
894 /* CHARSET holds the character sets currently assigned to the G0 | |
895 through G3 variables. It is initialized from the array | |
896 INITIAL_CHARSET in CODESYS. */ | |
897 Lisp_Object charset[4]; | |
898 | |
899 /* Which registers are currently invoked into the left (GL) and | |
900 right (GR) halves of the 8-bit encoding space? */ | |
901 int register_left, register_right; | |
902 | |
903 /* FLAGS holds flags indicating the current state of the encoding. Some of | |
904 these flags are actually part of the state-dependent data and should be | |
905 moved there. */ | |
906 unsigned int flags; | |
907 | |
908 /**************** for decoding ****************/ | |
909 | |
910 /* ISO_ESC holds a value indicating part of an escape sequence | |
911 that has already been seen. */ | |
912 enum iso_esc_flag esc; | |
913 | |
914 /* This records the bytes we've seen so far in an escape sequence, | |
915 in case the sequence is invalid (we spit out the bytes unchanged). */ | |
916 unsigned char esc_bytes[8]; | |
917 | |
918 /* Index for next byte to store in ISO escape sequence. */ | |
919 int esc_bytes_index; | |
920 | |
921 #ifdef ENABLE_COMPOSITE_CHARS | |
922 /* Stuff seen so far when composing a string. */ | |
923 unsigned_char_dynarr *composite_chars; | |
924 #endif | |
925 | |
926 /* If we saw an invalid designation sequence for a particular | |
927 register, we flag it here and switch to ASCII. The next time we | |
928 see a valid designation for this register, we turn off the flag | |
929 and do the designation normally, but pretend the sequence was | |
930 invalid. The effect of all this is that (most of the time) the | |
931 escape sequences for both the switch to the unknown charset, and | |
932 the switch back to the known charset, get inserted literally into | |
933 the buffer and saved out as such. The hope is that we can | |
934 preserve the escape sequences so that the resulting written out | |
935 file makes sense. If we don't do any of this, the designation | |
936 to the invalid charset will be preserved but that switch back | |
937 to the known charset will probably get eaten because it was | |
938 the same charset that was already present in the register. */ | |
939 unsigned char invalid_designated[4]; | |
940 | |
941 /* We try to do similar things as above for direction-switching | |
942 sequences. If we encountered a direction switch while an | |
943 invalid designation was present, or an invalid designation | |
944 just after a direction switch (i.e. no valid designation | |
945 encountered yet), we insert the direction-switch escape | |
946 sequence literally into the output stream, and later on | |
947 insert the corresponding direction-restoring escape sequence | |
948 literally also. */ | |
949 unsigned int switched_dir_and_no_valid_charset_yet :1; | |
950 unsigned int invalid_switch_dir :1; | |
951 | |
952 /* Tells the decoder to output the escape sequence literally | |
953 even though it was valid. Used in the games we play to | |
954 avoid lossage when we encounter invalid designations. */ | |
955 unsigned int output_literally :1; | |
956 /* We encountered a direction switch followed by an invalid | |
957 designation. We didn't output the direction switch | |
958 literally because we didn't know about the invalid designation; | |
959 but we have to do so now. */ | |
960 unsigned int output_direction_sequence :1; | |
961 | |
962 /**************** for encoding ****************/ | |
963 | |
964 /* Whether we need to explicitly designate the charset in the | |
965 G? register before using it. It is initialized from the | |
966 array FORCE_CHARSET_ON_OUTPUT in CODESYS. */ | |
967 unsigned char force_charset_on_output[4]; | |
968 | |
969 /* Other state variables that need to be preserved across | |
970 invocations. */ | |
971 Lisp_Object current_charset; | |
972 int current_half; | |
973 int current_char_boundary; | |
3439 | 974 |
975 /* Used for handling UTF-8. */ | |
976 unsigned char counter; | |
4096 | 977 unsigned char indicated_length; |
771 | 978 }; |
979 | |
1204 | 980 static const struct memory_description ccs_description_1[] = |
771 | 981 { |
982 { XD_LISP_OBJECT, offsetof (charset_conversion_spec, from_charset) }, | |
983 { XD_LISP_OBJECT, offsetof (charset_conversion_spec, to_charset) }, | |
984 { XD_END } | |
985 }; | |
986 | |
1204 | 987 static const struct sized_memory_description ccs_description = |
771 | 988 { |
989 sizeof (charset_conversion_spec), | |
990 ccs_description_1 | |
991 }; | |
992 | |
1204 | 993 static const struct memory_description ccsd_description_1[] = |
771 | 994 { |
995 XD_DYNARR_DESC (charset_conversion_spec_dynarr, &ccs_description), | |
996 { XD_END } | |
997 }; | |
998 | |
1204 | 999 static const struct sized_memory_description ccsd_description = |
771 | 1000 { |
1001 sizeof (charset_conversion_spec_dynarr), | |
1002 ccsd_description_1 | |
1003 }; | |
1004 | |
1204 | 1005 static const struct memory_description iso2022_coding_system_description[] = { |
1006 { XD_LISP_OBJECT_ARRAY, offsetof (struct iso2022_coding_system, | |
1007 initial_charset), 4 }, | |
2367 | 1008 { XD_BLOCK_PTR, offsetof (struct iso2022_coding_system, input_conv), |
2551 | 1009 1, { &ccsd_description } }, |
2367 | 1010 { XD_BLOCK_PTR, offsetof (struct iso2022_coding_system, output_conv), |
2551 | 1011 1, { &ccsd_description } }, |
771 | 1012 { XD_END } |
1013 }; | |
1014 | |
1204 | 1015 DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (iso2022); |
1016 | |
771 | 1017 /* The following note taken directly from FSF 21.0.103. */ |
1018 | |
1019 /* The following note describes the coding system ISO2022 briefly. | |
1020 Since the intention of this note is to help understand the | |
1021 functions in this file, some parts are NOT ACCURATE or are OVERLY | |
1022 SIMPLIFIED. For thorough understanding, please refer to the | |
1023 original document of ISO2022. This is equivalent to the standard | |
1024 ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*). | |
1025 | |
1026 ISO2022 provides many mechanisms to encode several character sets | |
1027 in 7-bit and 8-bit environments. For 7-bit environments, all text | |
1028 is encoded using bytes less than 128. This may make the encoded | |
1029 text a little bit longer, but the text passes more easily through | |
1030 several types of gateway, some of which strip off the MSB (Most | |
1031 Significant Bit). | |
1032 | |
1033 There are two kinds of character sets: control character sets and | |
1034 graphic character sets. The former contain control characters such | |
1035 as `newline' and `escape' to provide control functions (control | |
1036 functions are also provided by escape sequences). The latter | |
1037 contain graphic characters such as 'A' and '-'. Emacs recognizes | |
1038 two control character sets and many graphic character sets. | |
1039 | |
1040 Graphic character sets are classified into one of the following | |
1041 four classes, according to the number of bytes (DIMENSION) and | |
1042 number of characters in one dimension (CHARS) of the set: | |
1043 - DIMENSION1_CHARS94 | |
1044 - DIMENSION1_CHARS96 | |
1045 - DIMENSION2_CHARS94 | |
1046 - DIMENSION2_CHARS96 | |
1047 | |
1048 In addition, each character set is assigned an identification tag, | |
1049 unique for each set, called the "final character" (denoted as <F> | |
1050 hereafter). The <F> of each character set is decided by ECMA(*) | |
1051 when it is registered in ISO. The code range of <F> is 0x30..0x7F | |
1052 (0x30..0x3F are for private use only). | |
1053 | |
1054 Note (*): ECMA = European Computer Manufacturers Association | |
1055 | |
1056 Here are examples of graphic character sets [NAME(<F>)]: | |
1057 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ... | |
1058 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ... | |
1059 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ... | |
1060 o DIMENSION2_CHARS96 -- none for the moment | |
1061 | |
1062 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR. | |
1063 C0 [0x00..0x1F] -- control character plane 0 | |
1064 GL [0x20..0x7F] -- graphic character plane 0 | |
1065 C1 [0x80..0x9F] -- control character plane 1 | |
1066 GR [0xA0..0xFF] -- graphic character plane 1 | |
1067 | |
1068 A control character set is directly designated and invoked to C0 or | |
1069 C1 by an escape sequence. The most common case is that: | |
1070 - ISO646's control character set is designated/invoked to C0, and | |
1071 - ISO6429's control character set is designated/invoked to C1, | |
1072 and usually these designations/invocations are omitted in encoded | |
1073 text. In a 7-bit environment, only C0 can be used, and a control | |
1074 character for C1 is encoded by an appropriate escape sequence to | |
1075 fit into the environment. All control characters for C1 are | |
1076 defined to have corresponding escape sequences. | |
1077 | |
1078 A graphic character set is at first designated to one of four | |
1079 graphic registers (G0 through G3), then these graphic registers are | |
1080 invoked to GL or GR. These designations and invocations can be | |
1081 done independently. The most common case is that G0 is invoked to | |
1082 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually | |
1083 these invocations and designations are omitted in encoded text. | |
1084 In a 7-bit environment, only GL can be used. | |
1085 | |
1086 When a graphic character set of CHARS94 is invoked to GL, codes | |
1087 0x20 and 0x7F of the GL area work as control characters SPACE and | |
1088 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not | |
1089 be used. | |
1090 | |
1091 There are two ways of invocation: locking-shift and single-shift. | |
1092 With locking-shift, the invocation lasts until the next different | |
1093 invocation, whereas with single-shift, the invocation affects the | |
1094 following character only and doesn't affect the locking-shift | |
1095 state. Invocations are done by the following control characters or | |
1096 escape sequences: | |
1097 | |
1098 ---------------------------------------------------------------------- | |
1099 abbrev function cntrl escape seq description | |
1100 ---------------------------------------------------------------------- | |
1101 SI/LS0 (shift-in) 0x0F none invoke G0 into GL | |
1102 SO/LS1 (shift-out) 0x0E none invoke G1 into GL | |
1103 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL | |
1104 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL | |
1105 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*) | |
1106 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*) | |
1107 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*) | |
1108 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char | |
1109 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char | |
1110 ---------------------------------------------------------------------- | |
1111 (*) These are not used by any known coding system. | |
1112 | |
1113 Control characters for these functions are defined by macros | |
1114 ISO_CODE_XXX in `coding.h'. | |
1115 | |
1116 Designations are done by the following escape sequences: | |
1117 ---------------------------------------------------------------------- | |
1118 escape sequence description | |
1119 ---------------------------------------------------------------------- | |
1120 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0 | |
1121 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1 | |
1122 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2 | |
1123 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3 | |
1124 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*) | |
1125 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1 | |
1126 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2 | |
1127 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3 | |
1128 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**) | |
1129 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1 | |
1130 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2 | |
1131 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3 | |
1132 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*) | |
1133 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1 | |
1134 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2 | |
1135 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3 | |
1136 ---------------------------------------------------------------------- | |
1137 | |
1138 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set | |
1139 of dimension 1, chars 94, and final character <F>, etc... | |
1140 | |
1141 Note (*): Although these designations are not allowed in ISO2022, | |
1142 Emacs accepts them on decoding, and produces them on encoding | |
1143 CHARS96 character sets in a coding system which is characterized as | |
1144 7-bit environment, non-locking-shift, and non-single-shift. | |
1145 | |
1146 Note (**): If <F> is '@', 'A', or 'B', the intermediate character | |
1147 '(' can be omitted. We refer to this as "short-form" hereafter. | |
1148 | |
1149 Now you may notice that there are a lot of ways of encoding the | |
1150 same multilingual text in ISO2022. Actually, there exist many | |
1151 coding systems such as Compound Text (used in X11's inter client | |
1152 communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR | |
1153 (used in Korean Internet), EUC (Extended UNIX Code, used in Asian | |
1154 localized platforms), and all of these are variants of ISO2022. | |
1155 | |
1156 In addition to the above, Emacs handles two more kinds of escape | |
1157 sequences: ISO6429's direction specification and Emacs' private | |
1158 sequence for specifying character composition. | |
1159 | |
1160 ISO6429's direction specification takes the following form: | |
1161 o CSI ']' -- end of the current direction | |
1162 o CSI '0' ']' -- end of the current direction | |
1163 o CSI '1' ']' -- start of left-to-right text | |
1164 o CSI '2' ']' -- start of right-to-left text | |
1165 The control character CSI (0x9B: control sequence introducer) is | |
1166 abbreviated to the escape sequence ESC '[' in a 7-bit environment. | |
1167 | |
1168 Character composition specification takes the following form: | |
1169 o ESC '0' -- start relative composition | |
1170 o ESC '1' -- end composition | |
1171 o ESC '2' -- start rule-base composition (*) | |
1172 o ESC '3' -- start relative composition with alternate chars (**) | |
1173 o ESC '4' -- start rule-base composition with alternate chars (**) | |
1174 Since these are not standard escape sequences of any ISO standard, | |
1175 the use of them with these meanings is restricted to Emacs only. | |
1176 | |
1177 (*) This form is used only in Emacs 20.5 and older versions, | |
1178 but the newer versions can safely decode it. | |
1179 (**) This form is used only in Emacs 21.1 and newer versions, | |
1180 and the older versions can't decode it. | |
1181 | |
1182 Here's a list of example usages of these composition escape | |
1183 sequences (categorized by `enum composition_method'). | |
1184 | |
1185 COMPOSITION_RELATIVE: | |
1186 ESC 0 CHAR [ CHAR ] ESC 1 | |
1187 COMPOSITION_WITH_RULE: | |
1188 ESC 2 CHAR [ RULE CHAR ] ESC 1 | |
1189 COMPOSITION_WITH_ALTCHARS: | |
1190 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 | |
1191 COMPOSITION_WITH_RULE_ALTCHARS: | |
1192 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */ | |
1193 | |
1194 static void | |
1195 reset_iso2022_decode (Lisp_Object coding_system, | |
1196 struct iso2022_coding_stream *data) | |
1197 { | |
1198 int i; | |
1199 #ifdef ENABLE_COMPOSITE_CHARS | |
1200 unsigned_char_dynarr *old_composite_chars = data->composite_chars; | |
1201 #endif | |
1202 | |
1203 xzero (*data); | |
1204 | |
1205 for (i = 0; i < 4; i++) | |
1206 { | |
1207 if (!NILP (coding_system)) | |
1208 data->charset[i] = | |
1209 XCODING_SYSTEM_ISO2022_INITIAL_CHARSET (coding_system, i); | |
1210 else | |
1211 data->charset[i] = Qt; | |
1212 } | |
1213 data->esc = ISO_ESC_NOTHING; | |
1214 data->register_right = 1; | |
1215 #ifdef ENABLE_COMPOSITE_CHARS | |
1216 if (old_composite_chars) | |
1217 { | |
1218 data->composite_chars = old_composite_chars; | |
1219 Dynarr_reset (data->composite_chars); | |
1220 } | |
1221 #endif | |
1222 } | |
1223 | |
1224 static void | |
1225 reset_iso2022_encode (Lisp_Object coding_system, | |
1226 struct iso2022_coding_stream *data) | |
1227 { | |
1228 int i; | |
1229 | |
1230 xzero (*data); | |
1231 | |
1232 for (i = 0; i < 4; i++) | |
1233 { | |
1234 data->charset[i] = | |
1235 XCODING_SYSTEM_ISO2022_INITIAL_CHARSET (coding_system, i); | |
1236 data->force_charset_on_output[i] = | |
1237 XCODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT (coding_system, i); | |
1238 } | |
1239 data->register_right = 1; | |
1240 data->current_charset = Qnil; | |
1241 data->current_char_boundary = 1; | |
1242 } | |
1243 | |
1244 static void | |
1245 iso2022_init_coding_stream (struct coding_stream *str) | |
1246 { | |
1247 if (str->direction == CODING_DECODE) | |
1248 reset_iso2022_decode (str->codesys, | |
1249 CODING_STREAM_TYPE_DATA (str, iso2022)); | |
1250 else | |
1251 reset_iso2022_encode (str->codesys, | |
1252 CODING_STREAM_TYPE_DATA (str, iso2022)); | |
1253 } | |
1254 | |
1255 static void | |
1256 iso2022_rewind_coding_stream (struct coding_stream *str) | |
1257 { | |
1258 iso2022_init_coding_stream (str); | |
1259 } | |
1260 | |
1261 static int | |
1262 fit_to_be_escape_quoted (unsigned char c) | |
1263 { | |
1264 switch (c) | |
1265 { | |
1266 case ISO_CODE_ESC: | |
1267 case ISO_CODE_CSI: | |
1268 case ISO_CODE_SS2: | |
1269 case ISO_CODE_SS3: | |
1270 case ISO_CODE_SO: | |
1271 case ISO_CODE_SI: | |
1272 return 1; | |
1273 | |
1274 default: | |
1275 return 0; | |
1276 } | |
1277 } | |
1278 | |
1279 static Lisp_Object | |
867 | 1280 charset_by_attributes_or_create_one (int type, Ibyte final, int dir) |
771 | 1281 { |
826 | 1282 Lisp_Object charset = charset_by_attributes (type, final, dir); |
771 | 1283 |
1284 if (NILP (charset)) | |
1285 { | |
1286 int chars, dim; | |
1287 | |
1288 switch (type) | |
1289 { | |
1290 case CHARSET_TYPE_94: | |
1291 chars = 94; dim = 1; | |
1292 break; | |
1293 case CHARSET_TYPE_96: | |
1294 chars = 96; dim = 1; | |
1295 break; | |
1296 case CHARSET_TYPE_94X94: | |
1297 chars = 94; dim = 2; | |
1298 break; | |
1299 case CHARSET_TYPE_96X96: | |
1300 chars = 96; dim = 2; | |
1301 break; | |
1302 default: | |
2500 | 1303 ABORT (); chars = 0; dim = 0; |
771 | 1304 } |
1305 | |
1306 charset = Fmake_charset (Qunbound, Qnil, | |
1307 nconc2 (list6 (Qfinal, make_char (final), | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
1308 Qchars, make_fixnum (chars), |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
1309 Qdimension, make_fixnum (dim)), |
771 | 1310 list2 (Qdirection, |
1311 dir == CHARSET_LEFT_TO_RIGHT ? | |
1312 Ql2r : Qr2l))); | |
1313 } | |
1314 | |
1315 return charset; | |
1316 } | |
1317 | |
1318 /* Parse one byte of an ISO2022 escape sequence. | |
1319 If the result is an invalid escape sequence, return 0 and | |
1320 do not change anything in STR. Otherwise, if the result is | |
1321 an incomplete escape sequence, update ISO2022.ESC and | |
1322 ISO2022.ESC_BYTES and return -1. Otherwise, update | |
1323 all the state variables (but not ISO2022.ESC_BYTES) and | |
1324 return 1. | |
1325 | |
1326 If CHECK_INVALID_CHARSETS is non-zero, check for designation | |
1327 or invocation of an invalid character set and treat that as | |
1328 an unrecognized escape sequence. | |
1329 | |
2367 | 1330 */ |
771 | 1331 |
1332 static int | |
1333 parse_iso2022_esc (Lisp_Object codesys, struct iso2022_coding_stream *iso, | |
1334 unsigned char c, unsigned int *flags, | |
1335 int check_invalid_charsets) | |
1336 { | |
1337 /* (1) If we're at the end of a designation sequence, CS is the | |
1338 charset being designated and REG is the register to designate | |
1339 it to. | |
1340 | |
1341 (2) If we're at the end of a locking-shift sequence, REG is | |
1342 the register to invoke and HALF (0 == left, 1 == right) is | |
1343 the half to invoke it into. | |
1344 | |
1345 (3) If we're at the end of a single-shift sequence, REG is | |
1346 the register to invoke. */ | |
1347 Lisp_Object cs = Qnil; | |
1348 int reg, half; | |
1349 | |
1350 /* NOTE: This code does goto's all over the fucking place. | |
1351 The reason for this is that we're basically implementing | |
1352 a state machine here, and hierarchical languages like C | |
1353 don't really provide a clean way of doing this. */ | |
1354 | |
1355 if (! (*flags & ISO_STATE_ESCAPE)) | |
1356 /* At beginning of escape sequence; we need to reset our | |
1357 escape-state variables. */ | |
1358 iso->esc = ISO_ESC_NOTHING; | |
1359 | |
1360 iso->output_literally = 0; | |
1361 iso->output_direction_sequence = 0; | |
1362 | |
1363 switch (iso->esc) | |
1364 { | |
1365 case ISO_ESC_NOTHING: | |
1366 iso->esc_bytes_index = 0; | |
1367 switch (c) | |
1368 { | |
1369 case ISO_CODE_ESC: /* Start escape sequence */ | |
1370 *flags |= ISO_STATE_ESCAPE; | |
1371 iso->esc = ISO_ESC; | |
1372 goto not_done; | |
1373 | |
1374 case ISO_CODE_CSI: /* ISO6429 (specifying directionality) */ | |
1375 *flags |= ISO_STATE_ESCAPE; | |
1376 iso->esc = ISO_ESC_5_11; | |
1377 goto not_done; | |
1378 | |
1379 case ISO_CODE_SO: /* locking shift 1 */ | |
1380 reg = 1; half = 0; | |
1381 goto locking_shift; | |
1382 case ISO_CODE_SI: /* locking shift 0 */ | |
1383 reg = 0; half = 0; | |
1384 goto locking_shift; | |
1385 | |
1386 case ISO_CODE_SS2: /* single shift */ | |
1387 reg = 2; | |
1388 goto single_shift; | |
1389 case ISO_CODE_SS3: /* single shift */ | |
1390 reg = 3; | |
1391 goto single_shift; | |
1392 | |
1393 default: /* Other control characters */ | |
1394 error: | |
1395 *flags &= ISO_STATE_LOCK; | |
1396 return 0; | |
1397 } | |
1398 | |
1399 case ISO_ESC: | |
3439 | 1400 |
1401 /* The only available ISO 2022 sequence in UTF-8 mode is ESC % @, to | |
1402 exit from it. If we see any other escape sequence, pass it through | |
1403 in the error handler. */ | |
1404 if (*flags & ISO_STATE_UTF_8 && '%' != c) | |
1405 { | |
1406 return 0; | |
1407 } | |
1408 | |
771 | 1409 switch (c) |
1410 { | |
1411 /**** single shift ****/ | |
1412 | |
1413 case 'N': /* single shift 2 */ | |
1414 reg = 2; | |
1415 goto single_shift; | |
1416 case 'O': /* single shift 3 */ | |
1417 reg = 3; | |
1418 goto single_shift; | |
1419 | |
1420 /**** locking shift ****/ | |
1421 | |
1422 case '~': /* locking shift 1 right */ | |
1423 reg = 1; half = 1; | |
1424 goto locking_shift; | |
1425 case 'n': /* locking shift 2 */ | |
1426 reg = 2; half = 0; | |
1427 goto locking_shift; | |
1428 case '}': /* locking shift 2 right */ | |
1429 reg = 2; half = 1; | |
1430 goto locking_shift; | |
1431 case 'o': /* locking shift 3 */ | |
1432 reg = 3; half = 0; | |
1433 goto locking_shift; | |
1434 case '|': /* locking shift 3 right */ | |
1435 reg = 3; half = 1; | |
1436 goto locking_shift; | |
1437 | |
1438 /**** composite ****/ | |
1439 | |
1440 #ifdef ENABLE_COMPOSITE_CHARS | |
1441 case '0': | |
1442 iso->esc = ISO_ESC_START_COMPOSITE; | |
1443 *flags = (*flags & ISO_STATE_LOCK) | | |
1444 ISO_STATE_COMPOSITE; | |
1445 return 1; | |
1446 | |
1447 case '1': | |
1448 iso->esc = ISO_ESC_END_COMPOSITE; | |
1449 *flags = (*flags & ISO_STATE_LOCK) & | |
1450 ~ISO_STATE_COMPOSITE; | |
1451 return 1; | |
1452 #else | |
1453 case '0': case '1': case '2': case '3': case '4': | |
1454 /* We simply return a flag indicating that some composite | |
1455 escape was seen. The caller will use the particular | |
1456 character to encode the appropriate "composite hack" | |
1457 character out of Vcharset_composite, so that we will | |
1458 preserve these values on output. */ | |
1459 iso->esc = ISO_ESC_START_COMPOSITE; | |
1460 *flags &= ISO_STATE_LOCK; | |
1461 return 1; | |
1462 #endif /* ENABLE_COMPOSITE_CHARS */ | |
1463 | |
1464 /**** directionality ****/ | |
1465 | |
1466 case '[': | |
1467 iso->esc = ISO_ESC_5_11; | |
1468 goto not_done; | |
1469 | |
1470 /**** designation ****/ | |
1471 | |
1472 case '$': /* multibyte charset prefix */ | |
1473 iso->esc = ISO_ESC_2_4; | |
1474 goto not_done; | |
1475 | |
3439 | 1476 case '%': /* Prefix to an escape to or from Unicode. */ |
1477 iso->esc = ISO_ESC_2_5; | |
1478 goto not_done; | |
1479 | |
771 | 1480 default: |
1481 if (0x28 <= c && c <= 0x2F) | |
1482 { | |
1483 iso->esc = (enum iso_esc_flag) (c - 0x28 + ISO_ESC_2_8); | |
1484 goto not_done; | |
1485 } | |
1486 | |
1487 /* This function is called with CODESYS equal to nil when | |
1488 doing coding-system detection. */ | |
1489 if (!NILP (codesys) | |
1490 && XCODING_SYSTEM_ISO2022_ESCAPE_QUOTED (codesys) | |
1491 && fit_to_be_escape_quoted (c)) | |
1492 { | |
1493 iso->esc = ISO_ESC_LITERAL; | |
1494 *flags &= ISO_STATE_LOCK; | |
1495 return 1; | |
1496 } | |
1497 | |
1498 /* bzzzt! */ | |
1499 goto error; | |
1500 } | |
1501 | |
3439 | 1502 /* ISO-IR 196 UTF-8 support. */ |
1503 case ISO_ESC_2_5: | |
1504 if ('G' == c) | |
1505 { | |
1506 /* Activate UTF-8 mode. */ | |
1507 *flags &= ISO_STATE_LOCK; | |
1508 *flags |= ISO_STATE_UTF_8; | |
1509 iso->esc = ISO_ESC_NOTHING; | |
1510 return 1; | |
1511 } | |
1512 else if ('@' == c) | |
1513 { | |
1514 /* Deactive UTF-8 mode. */ | |
1515 *flags &= ISO_STATE_LOCK; | |
1516 *flags &= ~(ISO_STATE_UTF_8); | |
1517 iso->esc = ISO_ESC_NOTHING; | |
1518 return 1; | |
1519 } | |
1520 else | |
1521 { | |
1522 /* Oops, we don't support the other UTF-? coding systems within | |
1523 ISO 2022, only in their own context. */ | |
1524 goto error; | |
1525 } | |
771 | 1526 /**** directionality ****/ |
1527 | |
1528 case ISO_ESC_5_11: /* ISO6429 direction control */ | |
1529 if (c == ']') | |
1530 { | |
1531 *flags &= (ISO_STATE_LOCK & ~ISO_STATE_R2L); | |
1532 goto directionality; | |
1533 } | |
1534 if (c == '0') iso->esc = ISO_ESC_5_11_0; | |
1535 else if (c == '1') iso->esc = ISO_ESC_5_11_1; | |
1536 else if (c == '2') iso->esc = ISO_ESC_5_11_2; | |
1537 else goto error; | |
1538 goto not_done; | |
1539 | |
1540 case ISO_ESC_5_11_0: | |
1541 if (c == ']') | |
1542 { | |
1543 *flags &= (ISO_STATE_LOCK & ~ISO_STATE_R2L); | |
1544 goto directionality; | |
1545 } | |
1546 goto error; | |
1547 | |
1548 case ISO_ESC_5_11_1: | |
1549 if (c == ']') | |
1550 { | |
1551 *flags = (ISO_STATE_LOCK & ~ISO_STATE_R2L); | |
1552 goto directionality; | |
1553 } | |
1554 goto error; | |
1555 | |
1556 case ISO_ESC_5_11_2: | |
1557 if (c == ']') | |
1558 { | |
1559 *flags = (*flags & ISO_STATE_LOCK) | ISO_STATE_R2L; | |
1560 goto directionality; | |
1561 } | |
1562 goto error; | |
1563 | |
1564 directionality: | |
1565 iso->esc = ISO_ESC_DIRECTIONALITY; | |
1566 /* Various junk here to attempt to preserve the direction sequences | |
1567 literally in the text if they would otherwise be swallowed due | |
1568 to invalid designations that don't show up as actual charset | |
1569 changes in the text. */ | |
1570 if (iso->invalid_switch_dir) | |
1571 { | |
1572 /* We already inserted a direction switch literally into the | |
1573 text. We assume (#### this may not be right) that the | |
1574 next direction switch is the one going the other way, | |
1575 and we need to output that literally as well. */ | |
1576 iso->output_literally = 1; | |
1577 iso->invalid_switch_dir = 0; | |
1578 } | |
1579 else | |
1580 { | |
1581 int jj; | |
1582 | |
1583 /* If we are in the thrall of an invalid designation, | |
1584 then stick the directionality sequence literally into the | |
1585 output stream so it ends up in the original text again. */ | |
1586 for (jj = 0; jj < 4; jj++) | |
1587 if (iso->invalid_designated[jj]) | |
1588 break; | |
1589 if (jj < 4) | |
1590 { | |
1591 iso->output_literally = 1; | |
1592 iso->invalid_switch_dir = 1; | |
1593 } | |
1594 else | |
1595 /* Indicate that we haven't yet seen a valid designation, | |
1596 so that if a switch-dir is directly followed by an | |
1597 invalid designation, both get inserted literally. */ | |
1598 iso->switched_dir_and_no_valid_charset_yet = 1; | |
1599 } | |
1600 return 1; | |
1601 | |
1602 | |
1603 /**** designation ****/ | |
1604 | |
1605 case ISO_ESC_2_4: | |
1606 if (0x28 <= c && c <= 0x2F) | |
1607 { | |
1608 iso->esc = (enum iso_esc_flag) (c - 0x28 + ISO_ESC_2_4_8); | |
1609 goto not_done; | |
1610 } | |
1611 if (0x40 <= c && c <= 0x42) | |
1612 { | |
1613 cs = charset_by_attributes_or_create_one (CHARSET_TYPE_94X94, c, | |
1614 *flags & ISO_STATE_R2L ? | |
1615 CHARSET_RIGHT_TO_LEFT : | |
1616 CHARSET_LEFT_TO_RIGHT); | |
1617 reg = 0; | |
1618 goto designated; | |
1619 } | |
1620 goto error; | |
1621 | |
1622 default: | |
1623 { | |
1624 int type = -1; | |
1625 | |
1626 if (iso->esc >= ISO_ESC_2_8 && | |
1627 iso->esc <= ISO_ESC_2_15) | |
1628 { | |
1629 type = ((iso->esc >= ISO_ESC_2_12) ? | |
1630 CHARSET_TYPE_96 : CHARSET_TYPE_94); | |
1631 reg = (iso->esc - ISO_ESC_2_8) & 3; | |
1632 } | |
1633 else if (iso->esc >= ISO_ESC_2_4_8 && | |
1634 iso->esc <= ISO_ESC_2_4_15) | |
1635 { | |
1636 type = ((iso->esc >= ISO_ESC_2_4_12) ? | |
1637 CHARSET_TYPE_96X96 : CHARSET_TYPE_94X94); | |
1638 reg = (iso->esc - ISO_ESC_2_4_8) & 3; | |
1639 } | |
1640 else | |
1641 { | |
1642 /* Can this ever be reached? -slb */ | |
2500 | 1643 ABORT (); |
771 | 1644 goto error; |
1645 } | |
1646 | |
1647 if (c < '0' || c > '~' || | |
1648 (c > 0x5F && (type == CHARSET_TYPE_94X94 || | |
1649 type == CHARSET_TYPE_96X96))) | |
1650 goto error; /* bad final byte */ | |
1651 | |
1652 cs = charset_by_attributes_or_create_one (type, c, | |
1653 *flags & ISO_STATE_R2L ? | |
1654 CHARSET_RIGHT_TO_LEFT : | |
1655 CHARSET_LEFT_TO_RIGHT); | |
1656 goto designated; | |
1657 } | |
1658 } | |
1659 | |
1660 not_done: | |
1661 iso->esc_bytes[iso->esc_bytes_index++] = (unsigned char) c; | |
1662 return -1; | |
1663 | |
1664 single_shift: | |
1665 if (check_invalid_charsets && !CHARSETP (iso->charset[reg])) | |
1666 /* can't invoke something that ain't there. */ | |
1667 goto error; | |
1668 iso->esc = ISO_ESC_SINGLE_SHIFT; | |
1669 *flags &= ISO_STATE_LOCK; | |
1670 if (reg == 2) | |
1671 *flags |= ISO_STATE_SS2; | |
1672 else | |
1673 *flags |= ISO_STATE_SS3; | |
1674 return 1; | |
1675 | |
1676 locking_shift: | |
1677 if (check_invalid_charsets && | |
1678 !CHARSETP (iso->charset[reg])) | |
1679 /* can't invoke something that ain't there. */ | |
1680 goto error; | |
1681 if (half) | |
1682 iso->register_right = reg; | |
1683 else | |
1684 iso->register_left = reg; | |
1685 *flags &= ISO_STATE_LOCK; | |
1686 iso->esc = ISO_ESC_LOCKING_SHIFT; | |
1687 return 1; | |
1688 | |
1689 designated: | |
1690 if (NILP (cs) && check_invalid_charsets) | |
1691 { | |
2500 | 1692 ABORT (); |
771 | 1693 /* #### This should never happen now that we automatically create |
1694 temporary charsets as necessary. We should probably remove | |
1695 this code. --ben */ | |
1696 iso->invalid_designated[reg] = 1; | |
1697 iso->charset[reg] = Vcharset_ascii; | |
1698 iso->esc = ISO_ESC_DESIGNATE; | |
1699 *flags &= ISO_STATE_LOCK; | |
1700 iso->output_literally = 1; | |
1701 if (iso->switched_dir_and_no_valid_charset_yet) | |
1702 { | |
1703 /* We encountered a switch-direction followed by an | |
1704 invalid designation. Ensure that the switch-direction | |
1705 gets outputted; otherwise it will probably get eaten | |
1706 when the text is written out again. */ | |
1707 iso->switched_dir_and_no_valid_charset_yet = 0; | |
1708 iso->output_direction_sequence = 1; | |
1709 /* And make sure that the switch-dir going the other | |
1710 way gets outputted, as well. */ | |
1711 iso->invalid_switch_dir = 1; | |
1712 } | |
1713 return 1; | |
1714 } | |
1715 /* This function is called with CODESYS equal to nil when | |
1716 doing coding-system detection. */ | |
1717 if (!NILP (codesys)) | |
1718 { | |
1719 charset_conversion_spec_dynarr *dyn = | |
1720 XCODING_SYSTEM_ISO2022_INPUT_CONV (codesys); | |
1721 | |
1722 if (dyn) | |
1723 { | |
1724 int i; | |
1725 | |
1726 for (i = 0; i < Dynarr_length (dyn); i++) | |
1727 { | |
1728 struct charset_conversion_spec *spec = Dynarr_atp (dyn, i); | |
1729 if (EQ (cs, spec->from_charset)) | |
1730 cs = spec->to_charset; | |
1731 } | |
1732 } | |
1733 } | |
1734 | |
1735 iso->charset[reg] = cs; | |
1736 iso->esc = ISO_ESC_DESIGNATE; | |
1737 *flags &= ISO_STATE_LOCK; | |
1738 if (iso->invalid_designated[reg]) | |
1739 { | |
1740 iso->invalid_designated[reg] = 0; | |
1741 iso->output_literally = 1; | |
1742 } | |
1743 if (iso->switched_dir_and_no_valid_charset_yet) | |
1744 iso->switched_dir_and_no_valid_charset_yet = 0; | |
1745 return 1; | |
1746 } | |
1747 | |
1748 /* If FLAGS is a null pointer or specifies right-to-left motion, | |
1749 output a switch-dir-to-left-to-right sequence to DST. | |
1750 Also update FLAGS if it is not a null pointer. | |
1751 If INTERNAL_P is set, we are outputting in internal format and | |
1752 need to handle the CSI differently. */ | |
1753 | |
1754 static void | |
1755 restore_left_to_right_direction (Lisp_Object codesys, | |
1756 unsigned_char_dynarr *dst, | |
1757 unsigned int *flags, | |
1758 int internal_p) | |
1759 { | |
1760 if (!flags || (*flags & ISO_STATE_R2L)) | |
1761 { | |
1762 if (XCODING_SYSTEM_ISO2022_SEVEN (codesys)) | |
1763 { | |
1764 Dynarr_add (dst, ISO_CODE_ESC); | |
1765 Dynarr_add (dst, '['); | |
1766 } | |
1767 else if (internal_p) | |
1768 DECODE_ADD_BINARY_CHAR (ISO_CODE_CSI, dst); | |
1769 else | |
1770 Dynarr_add (dst, ISO_CODE_CSI); | |
1771 Dynarr_add (dst, '0'); | |
1772 Dynarr_add (dst, ']'); | |
1773 if (flags) | |
1774 *flags &= ~ISO_STATE_R2L; | |
1775 } | |
1776 } | |
1777 | |
1778 /* If FLAGS is a null pointer or specifies a direction different from | |
1779 DIRECTION (which should be either CHARSET_RIGHT_TO_LEFT or | |
1780 CHARSET_LEFT_TO_RIGHT), output the appropriate switch-dir escape | |
1781 sequence to DST. Also update FLAGS if it is not a null pointer. | |
1782 If INTERNAL_P is set, we are outputting in internal format and | |
1783 need to handle the CSI differently. */ | |
1784 | |
1785 static void | |
1786 ensure_correct_direction (int direction, Lisp_Object codesys, | |
1787 unsigned_char_dynarr *dst, unsigned int *flags, | |
1788 int internal_p) | |
1789 { | |
1790 if ((!flags || (*flags & ISO_STATE_R2L)) && | |
1791 direction == CHARSET_LEFT_TO_RIGHT) | |
1792 restore_left_to_right_direction (codesys, dst, flags, internal_p); | |
1793 else if (!XCODING_SYSTEM_ISO2022_NO_ISO6429 (codesys) | |
1794 && (!flags || !(*flags & ISO_STATE_R2L)) && | |
1795 direction == CHARSET_RIGHT_TO_LEFT) | |
1796 { | |
1797 if (XCODING_SYSTEM_ISO2022_SEVEN (codesys)) | |
1798 { | |
1799 Dynarr_add (dst, ISO_CODE_ESC); | |
1800 Dynarr_add (dst, '['); | |
1801 } | |
1802 else if (internal_p) | |
1803 DECODE_ADD_BINARY_CHAR (ISO_CODE_CSI, dst); | |
1804 else | |
1805 Dynarr_add (dst, ISO_CODE_CSI); | |
1806 Dynarr_add (dst, '2'); | |
1807 Dynarr_add (dst, ']'); | |
1808 if (flags) | |
1809 *flags |= ISO_STATE_R2L; | |
1810 } | |
1811 } | |
1812 | |
4096 | 1813 /* Note that this name conflicts with a function in unicode.c. */ |
1814 static void | |
1815 decode_unicode_char (int ucs, unsigned_char_dynarr *dst) | |
1816 { | |
1817 Ibyte work[MAX_ICHAR_LEN]; | |
1818 int len; | |
1819 Lisp_Object chr; | |
1820 | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
1821 chr = Funicode_to_char(make_fixnum(ucs), Qnil); |
4096 | 1822 assert (!NILP(chr)); |
1823 len = set_itext_ichar (work, XCHAR(chr)); | |
1824 Dynarr_add_many (dst, work, len); | |
1825 } | |
1826 | |
1827 #define DECODE_ERROR_OCTET(octet, dst) \ | |
1828 decode_unicode_char ((octet) + UNICODE_ERROR_OCTET_RANGE_START, dst) | |
1829 | |
1830 static inline void | |
1831 indicate_invalid_utf_8 (unsigned char indicated_length, | |
1832 unsigned char counter, | |
1833 int ch, unsigned_char_dynarr *dst) | |
1834 { | |
1835 Binbyte stored = indicated_length - counter; | |
1836 Binbyte mask = "\x00\x00\xC0\xE0\xF0\xF8\xFC"[indicated_length]; | |
1837 | |
1838 while (stored > 0) | |
1839 { | |
1840 DECODE_ERROR_OCTET (((ch >> (6 * (stored - 1))) & 0x3f) | mask, | |
1841 dst); | |
1842 mask = 0x80, stored--; | |
1843 } | |
1844 } | |
1845 | |
771 | 1846 /* Convert ISO2022-format data to internal format. */ |
1847 | |
1848 static Bytecount | |
1849 iso2022_decode (struct coding_stream *str, const UExtbyte *src, | |
1850 unsigned_char_dynarr *dst, Bytecount n) | |
1851 { | |
1852 unsigned int ch = str->ch; | |
1853 #ifdef ENABLE_COMPOSITE_CHARS | |
1854 unsigned_char_dynarr *real_dst = dst; | |
1855 #endif | |
1856 struct iso2022_coding_stream *data = | |
1857 CODING_STREAM_TYPE_DATA (str, iso2022); | |
1858 unsigned int flags = data->flags; | |
1859 Bytecount orign = n; | |
1860 | |
1861 #ifdef ENABLE_COMPOSITE_CHARS | |
1862 if (flags & ISO_STATE_COMPOSITE) | |
1863 dst = data->composite_chars; | |
1864 #endif /* ENABLE_COMPOSITE_CHARS */ | |
1865 | |
1866 while (n--) | |
1867 { | |
1868 UExtbyte c = *src++; | |
1869 if (flags & ISO_STATE_ESCAPE) | |
1870 { /* Within ESC sequence */ | |
1871 int retval = parse_iso2022_esc (str->codesys, data, | |
1872 c, &flags, 1); | |
1873 | |
1874 if (retval) | |
1875 { | |
1876 switch (data->esc) | |
1877 { | |
1878 #ifdef ENABLE_COMPOSITE_CHARS | |
1879 case ISO_ESC_START_COMPOSITE: | |
1880 if (data->composite_chars) | |
1881 Dynarr_reset (data->composite_chars); | |
1882 else | |
1883 data->composite_chars = Dynarr_new (unsigned_char); | |
1884 dst = data->composite_chars; | |
1885 break; | |
1886 case ISO_ESC_END_COMPOSITE: | |
1887 { | |
867 | 1888 Ibyte comstr[MAX_ICHAR_LEN]; |
771 | 1889 Bytecount len; |
4967 | 1890 Ichar emch = lookup_composite_char (Dynarr_begin (dst), |
771 | 1891 Dynarr_length (dst)); |
1892 dst = real_dst; | |
867 | 1893 len = set_itext_ichar (comstr, emch); |
771 | 1894 Dynarr_add_many (dst, comstr, len); |
1895 break; | |
1896 } | |
1897 #else | |
1898 case ISO_ESC_START_COMPOSITE: | |
1899 { | |
867 | 1900 Ibyte comstr[MAX_ICHAR_LEN]; |
771 | 1901 Bytecount len; |
867 | 1902 Ichar emch = make_ichar (Vcharset_composite, c - '0' + ' ', |
771 | 1903 0); |
867 | 1904 len = set_itext_ichar (comstr, emch); |
771 | 1905 Dynarr_add_many (dst, comstr, len); |
1906 break; | |
1907 } | |
1908 #endif /* ENABLE_COMPOSITE_CHARS */ | |
1909 | |
1910 case ISO_ESC_LITERAL: | |
1911 DECODE_ADD_BINARY_CHAR (c, dst); | |
1912 break; | |
1913 | |
1914 default: | |
1915 /* Everything else handled already */ | |
1916 break; | |
1917 } | |
1918 } | |
1919 | |
1920 /* Attempted error recovery. */ | |
1921 if (data->output_direction_sequence) | |
1922 ensure_correct_direction (flags & ISO_STATE_R2L ? | |
1923 CHARSET_RIGHT_TO_LEFT : | |
1924 CHARSET_LEFT_TO_RIGHT, | |
1925 str->codesys, dst, 0, 1); | |
1926 /* More error recovery. */ | |
1927 if (!retval || data->output_literally) | |
1928 { | |
1929 /* Output the (possibly invalid) sequence */ | |
1930 int i; | |
1931 for (i = 0; i < data->esc_bytes_index; i++) | |
1932 DECODE_ADD_BINARY_CHAR (data->esc_bytes[i], dst); | |
1933 flags &= ISO_STATE_LOCK; | |
1934 if (!retval) | |
1935 n++, src--;/* Repeat the loop with the same character. */ | |
1936 else | |
1937 { | |
1938 /* No sense in reprocessing the final byte of the | |
1939 escape sequence; it could mess things up anyway. | |
1940 Just add it now. */ | |
1941 DECODE_ADD_BINARY_CHAR (c, dst); | |
1942 } | |
1943 } | |
1944 ch = 0; | |
1945 } | |
3439 | 1946 else if (flags & ISO_STATE_UTF_8) |
1947 { | |
1948 unsigned char counter = data->counter; | |
4096 | 1949 unsigned char indicated_length = data->indicated_length; |
3439 | 1950 |
1951 if (ISO_CODE_ESC == c) | |
1952 { | |
1953 /* Allow the escape sequence parser to end the UTF-8 state. */ | |
1954 flags |= ISO_STATE_ESCAPE; | |
1955 data->esc = ISO_ESC; | |
1956 data->esc_bytes_index = 1; | |
1957 continue; | |
1958 } | |
1959 | |
4096 | 1960 if (0 == counter) |
1961 { | |
1962 if (0 == (c & 0x80)) | |
1963 { | |
1964 /* ASCII. */ | |
1965 decode_unicode_char (c, dst); | |
1966 } | |
1967 else if (0 == (c & 0x40)) | |
1968 { | |
1969 /* Highest bit set, second highest not--there's | |
1970 something wrong. */ | |
1971 DECODE_ERROR_OCTET (c, dst); | |
1972 } | |
1973 else if (0 == (c & 0x20)) | |
1974 { | |
1975 ch = c & 0x1f; | |
1976 counter = 1; | |
1977 indicated_length = 2; | |
1978 } | |
1979 else if (0 == (c & 0x10)) | |
1980 { | |
1981 ch = c & 0x0f; | |
1982 counter = 2; | |
1983 indicated_length = 3; | |
1984 } | |
1985 else if (0 == (c & 0x08)) | |
1986 { | |
1987 ch = c & 0x0f; | |
1988 counter = 3; | |
1989 indicated_length = 4; | |
1990 } | |
1991 /* We support lengths longer than 4 here, since we want to | |
1992 represent UTF-8 error chars as distinct from the | |
1993 corresponding ISO 8859-1 characters in escape-quoted. | |
1994 | |
1995 However, we can't differentiate UTF-8 error chars as | |
1996 written to disk, and UTF-8 errors in escape-quoted. This | |
1997 is not a big problem; | |
1998 non-Unicode-chars-encoded-as-UTF-8-in-ISO-2022 is not | |
1999 deployed, in practice, so if such a sequence of octets | |
2000 occurs, XEmacs generated it. */ | |
2001 else if (0 == (c & 0x04)) | |
2002 { | |
2003 ch = c & 0x03; | |
2004 counter = 4; | |
2005 indicated_length = 5; | |
2006 } | |
2007 else if (0 == (c & 0x02)) | |
2008 { | |
2009 ch = c & 0x01; | |
2010 counter = 5; | |
2011 indicated_length = 6; | |
2012 } | |
2013 else | |
2014 { | |
2015 /* #xFF is not a valid leading byte in any form of | |
2016 UTF-8. */ | |
2017 DECODE_ERROR_OCTET (c, dst); | |
2018 | |
2019 } | |
2020 } | |
2021 else | |
2022 { | |
2023 /* counter != 0 */ | |
2024 if ((0 == (c & 0x80)) || (0 != (c & 0x40))) | |
2025 { | |
2026 indicate_invalid_utf_8(indicated_length, | |
2027 counter, | |
2028 ch, dst); | |
2029 if (c & 0x80) | |
2030 { | |
2031 DECODE_ERROR_OCTET (c, dst); | |
2032 } | |
2033 else | |
2034 { | |
2035 /* The character just read is ASCII. Treat it as | |
2036 such. */ | |
2037 decode_unicode_char (c, dst); | |
2038 } | |
2039 ch = 0; | |
2040 counter = 0; | |
2041 } | |
2042 else | |
2043 { | |
2044 ch = (ch << 6) | (c & 0x3f); | |
2045 counter--; | |
2046 | |
2047 /* Just processed the final byte. Emit the character. */ | |
2048 if (!counter) | |
2049 { | |
2050 /* Don't accept over-long sequences, or surrogates. */ | |
2051 if ((ch < 0x80) || | |
2052 ((ch < 0x800) && indicated_length > 2) || | |
2053 ((ch < 0x10000) && indicated_length > 3) || | |
2054 /* We accept values above #x110000 in | |
2055 escape-quoted, though not in UTF-8. */ | |
2056 /* (ch > 0x110000) || */ | |
2057 valid_utf_16_surrogate(ch)) | |
2058 { | |
2059 indicate_invalid_utf_8(indicated_length, | |
2060 counter, | |
2061 ch, dst); | |
2062 } | |
2063 else | |
2064 { | |
2065 decode_unicode_char (ch, dst); | |
2066 } | |
2067 ch = 0; | |
2068 } | |
2069 } | |
2070 } | |
2071 | |
2072 if (str->eof && ch) | |
2073 { | |
2074 DECODE_ERROR_OCTET (ch, dst); | |
2075 ch = 0; | |
2076 } | |
3439 | 2077 |
2078 data->counter = counter; | |
4096 | 2079 data->indicated_length = indicated_length; |
3439 | 2080 } |
826 | 2081 else if (byte_c0_p (c) || byte_c1_p (c)) |
771 | 2082 { /* Control characters */ |
2083 | |
2084 /***** Error-handling *****/ | |
2085 | |
2086 /* If we were in the middle of a character, dump out the | |
2087 partial character. */ | |
2088 DECODE_OUTPUT_PARTIAL_CHAR (ch, dst); | |
2089 | |
2090 /* If we just saw a single-shift character, dump it out. | |
2091 This may dump out the wrong sort of single-shift character, | |
2092 but least it will give an indication that something went | |
2093 wrong. */ | |
2094 if (flags & ISO_STATE_SS2) | |
2095 { | |
2096 DECODE_ADD_BINARY_CHAR (ISO_CODE_SS2, dst); | |
2097 flags &= ~ISO_STATE_SS2; | |
2098 } | |
2099 if (flags & ISO_STATE_SS3) | |
2100 { | |
2101 DECODE_ADD_BINARY_CHAR (ISO_CODE_SS3, dst); | |
2102 flags &= ~ISO_STATE_SS3; | |
2103 } | |
2104 | |
2105 /***** Now handle the control characters. *****/ | |
2106 | |
2107 flags &= ISO_STATE_LOCK; | |
2108 | |
2109 if (!parse_iso2022_esc (str->codesys, data, c, &flags, 1)) | |
2110 DECODE_ADD_BINARY_CHAR (c, dst); | |
2111 } | |
2112 else | |
2113 { /* Graphic characters */ | |
2114 Lisp_Object charset; | |
2115 int lb; | |
2116 int reg; | |
2117 | |
2118 /* Now determine the charset. */ | |
2119 reg = ((flags & ISO_STATE_SS2) ? 2 | |
2120 : (flags & ISO_STATE_SS3) ? 3 | |
826 | 2121 : !byte_ascii_p (c) ? data->register_right |
771 | 2122 : data->register_left); |
2123 charset = data->charset[reg]; | |
2124 | |
2125 /* Error checking: */ | |
2126 if (! CHARSETP (charset) | |
2127 || data->invalid_designated[reg] | |
2128 || (((c & 0x7F) == ' ' || (c & 0x7F) == ISO_CODE_DEL) | |
2129 && XCHARSET_CHARS (charset) == 94)) | |
2130 /* Mrmph. We are trying to invoke a register that has no | |
2131 or an invalid charset in it, or trying to add a character | |
2132 outside the range of the charset. Insert that char literally | |
2133 to preserve it for the output. */ | |
2134 { | |
2135 DECODE_OUTPUT_PARTIAL_CHAR (ch, dst); | |
2136 DECODE_ADD_BINARY_CHAR (c, dst); | |
2137 } | |
2138 | |
2139 else | |
2140 { | |
2141 /* Things are probably hunky-dorey. */ | |
2142 | |
2143 /* Fetch reverse charset, maybe. */ | |
2144 if (((flags & ISO_STATE_R2L) && | |
2145 XCHARSET_DIRECTION (charset) == CHARSET_LEFT_TO_RIGHT) | |
2146 || | |
2147 (!(flags & ISO_STATE_R2L) && | |
2148 XCHARSET_DIRECTION (charset) == CHARSET_RIGHT_TO_LEFT)) | |
2149 { | |
2150 Lisp_Object new_charset = | |
2151 XCHARSET_REVERSE_DIRECTION_CHARSET (charset); | |
2152 if (!NILP (new_charset)) | |
2153 charset = new_charset; | |
2154 } | |
2155 | |
2156 lb = XCHARSET_LEADING_BYTE (charset); | |
2157 switch (XCHARSET_REP_BYTES (charset)) | |
2158 { | |
2159 case 1: /* ASCII */ | |
2160 DECODE_OUTPUT_PARTIAL_CHAR (ch, dst); | |
2161 Dynarr_add (dst, c & 0x7F); | |
2162 break; | |
2163 | |
2164 case 2: /* one-byte official */ | |
2165 DECODE_OUTPUT_PARTIAL_CHAR (ch, dst); | |
2166 Dynarr_add (dst, lb); | |
2167 Dynarr_add (dst, c | 0x80); | |
2168 break; | |
2169 | |
2170 case 3: /* one-byte private or two-byte official */ | |
2171 if (XCHARSET_PRIVATE_P (charset)) | |
2172 { | |
2173 DECODE_OUTPUT_PARTIAL_CHAR (ch, dst); | |
2174 Dynarr_add (dst, PRE_LEADING_BYTE_PRIVATE_1); | |
2175 Dynarr_add (dst, lb); | |
2176 Dynarr_add (dst, c | 0x80); | |
2177 } | |
2178 else | |
2179 { | |
2180 if (ch) | |
2181 { | |
2182 Dynarr_add (dst, lb); | |
2183 Dynarr_add (dst, ch | 0x80); | |
2184 Dynarr_add (dst, c | 0x80); | |
2185 ch = 0; | |
2186 } | |
2187 else | |
2188 ch = c; | |
2189 } | |
2190 break; | |
2191 | |
2192 default: /* two-byte private */ | |
2193 if (ch) | |
2194 { | |
2195 Dynarr_add (dst, PRE_LEADING_BYTE_PRIVATE_2); | |
2196 Dynarr_add (dst, lb); | |
2197 Dynarr_add (dst, ch | 0x80); | |
2198 Dynarr_add (dst, c | 0x80); | |
2199 ch = 0; | |
2200 } | |
2201 else | |
2202 ch = c; | |
2203 } | |
2204 } | |
2205 | |
2206 if (!ch) | |
2207 flags &= ISO_STATE_LOCK; | |
2208 } | |
2209 | |
2210 } | |
2211 | |
2212 if (str->eof) | |
2213 DECODE_OUTPUT_PARTIAL_CHAR (ch, dst); | |
2214 | |
2215 data->flags = flags; | |
2216 str->ch = ch; | |
2217 return orign; | |
2218 } | |
2219 | |
2220 | |
2221 /***** ISO2022 encoder *****/ | |
2222 | |
2223 /* Designate CHARSET into register REG. */ | |
2224 | |
2225 static void | |
2226 iso2022_designate (Lisp_Object charset, int reg, | |
2227 struct coding_stream *str, unsigned_char_dynarr *dst) | |
2228 { | |
2229 static const char inter94[] = "()*+"; | |
2230 static const char inter96[] = ",-./"; | |
2231 int type; | |
2232 unsigned char final; | |
2233 struct iso2022_coding_stream *data = | |
2234 CODING_STREAM_TYPE_DATA (str, iso2022); | |
2235 Lisp_Object old_charset = data->charset[reg]; | |
2236 | |
2237 data->charset[reg] = charset; | |
2238 if (!CHARSETP (charset)) | |
2239 /* charset might be an initial nil or t. */ | |
2240 return; | |
2241 type = XCHARSET_TYPE (charset); | |
2242 final = XCHARSET_FINAL (charset); | |
2243 if (!data->force_charset_on_output[reg] && | |
2244 CHARSETP (old_charset) && | |
2245 XCHARSET_TYPE (old_charset) == type && | |
2246 XCHARSET_FINAL (old_charset) == final) | |
2247 return; | |
2248 | |
2249 data->force_charset_on_output[reg] = 0; | |
2250 | |
2251 { | |
2252 charset_conversion_spec_dynarr *dyn = | |
2253 XCODING_SYSTEM_ISO2022_OUTPUT_CONV (str->codesys); | |
2254 | |
2255 if (dyn) | |
2256 { | |
2257 int i; | |
2258 | |
2259 for (i = 0; i < Dynarr_length (dyn); i++) | |
2260 { | |
2261 struct charset_conversion_spec *spec = Dynarr_atp (dyn, i); | |
2262 if (EQ (charset, spec->from_charset)) | |
2263 charset = spec->to_charset; | |
2264 } | |
2265 } | |
2266 } | |
2267 | |
2268 Dynarr_add (dst, ISO_CODE_ESC); | |
3439 | 2269 |
771 | 2270 switch (type) |
2271 { | |
2272 case CHARSET_TYPE_94: | |
2273 Dynarr_add (dst, inter94[reg]); | |
2274 break; | |
2275 case CHARSET_TYPE_96: | |
2276 Dynarr_add (dst, inter96[reg]); | |
2277 break; | |
2278 case CHARSET_TYPE_94X94: | |
2279 Dynarr_add (dst, '$'); | |
2280 if (reg != 0 | |
2281 || !(XCODING_SYSTEM_ISO2022_SHORT (str->codesys)) | |
2282 || final < '@' | |
2283 || final > 'B') | |
2284 Dynarr_add (dst, inter94[reg]); | |
2285 break; | |
2286 case CHARSET_TYPE_96X96: | |
2287 Dynarr_add (dst, '$'); | |
2288 Dynarr_add (dst, inter96[reg]); | |
2289 break; | |
2290 } | |
2291 Dynarr_add (dst, final); | |
2292 } | |
2293 | |
2294 static void | |
2295 ensure_normal_shift (struct coding_stream *str, unsigned_char_dynarr *dst) | |
2296 { | |
2297 struct iso2022_coding_stream *data = | |
2298 CODING_STREAM_TYPE_DATA (str, iso2022); | |
2299 | |
2300 if (data->register_left != 0) | |
2301 { | |
2302 Dynarr_add (dst, ISO_CODE_SI); | |
2303 data->register_left = 0; | |
2304 } | |
2305 } | |
2306 | |
2307 static void | |
2308 ensure_shift_out (struct coding_stream *str, unsigned_char_dynarr *dst) | |
2309 { | |
2310 struct iso2022_coding_stream *data = | |
2311 CODING_STREAM_TYPE_DATA (str, iso2022); | |
2312 | |
2313 if (data->register_left != 1) | |
2314 { | |
2315 Dynarr_add (dst, ISO_CODE_SO); | |
2316 data->register_left = 1; | |
2317 } | |
2318 } | |
2319 | |
2320 /* Convert internally-formatted data to ISO2022 format. */ | |
2321 | |
2322 static Bytecount | |
867 | 2323 iso2022_encode (struct coding_stream *str, const Ibyte *src, |
771 | 2324 unsigned_char_dynarr *dst, Bytecount n) |
2325 { | |
2326 unsigned char charmask; | |
867 | 2327 Ibyte c; |
771 | 2328 unsigned char char_boundary; |
2329 unsigned int ch = str->ch; | |
2330 Lisp_Object codesys = str->codesys; | |
2331 int i; | |
2332 Lisp_Object charset; | |
2333 int half; | |
2334 struct iso2022_coding_stream *data = | |
2335 CODING_STREAM_TYPE_DATA (str, iso2022); | |
2336 unsigned int flags = data->flags; | |
2337 Bytecount orign = n; | |
2338 | |
2339 #ifdef ENABLE_COMPOSITE_CHARS | |
2340 /* flags for handling composite chars. We do a little switcheroo | |
2341 on the source while we're outputting the composite char. */ | |
2342 Bytecount saved_n = 0; | |
867 | 2343 const Ibyte *saved_src = NULL; |
771 | 2344 int in_composite = 0; |
2345 #endif /* ENABLE_COMPOSITE_CHARS */ | |
2346 | |
2347 char_boundary = data->current_char_boundary; | |
2348 charset = data->current_charset; | |
2349 half = data->current_half; | |
2350 | |
2351 #ifdef ENABLE_COMPOSITE_CHARS | |
2352 back_to_square_n: | |
2353 #endif | |
2354 while (n--) | |
2355 { | |
2356 c = *src++; | |
2357 | |
826 | 2358 if (byte_ascii_p (c)) |
771 | 2359 { /* Processing ASCII character */ |
2360 ch = 0; | |
2361 | |
3439 | 2362 if (flags & ISO_STATE_UTF_8) |
2363 { | |
2364 Dynarr_add (dst, ISO_CODE_ESC); | |
2365 Dynarr_add (dst, '%'); | |
2366 Dynarr_add (dst, '@'); | |
2367 flags &= ~(ISO_STATE_UTF_8); | |
2368 } | |
2369 | |
771 | 2370 restore_left_to_right_direction (codesys, dst, &flags, 0); |
2371 | |
2372 /* Make sure G0 contains ASCII */ | |
2373 if ((c > ' ' && c < ISO_CODE_DEL) || | |
2374 !XCODING_SYSTEM_ISO2022_NO_ASCII_CNTL (codesys)) | |
2375 { | |
2376 ensure_normal_shift (str, dst); | |
2377 iso2022_designate (Vcharset_ascii, 0, str, dst); | |
2378 } | |
2379 | |
2380 /* If necessary, restore everything to the default state | |
2381 at end-of-line */ | |
2382 if (!(XCODING_SYSTEM_ISO2022_NO_ASCII_EOL (codesys))) | |
2383 { | |
2384 /* NOTE: CRLF encoding happens *BEFORE* other encoding. | |
2385 Thus, even though we're working with internal-format | |
2386 data, there may be CR's or CRLF sequences representing | |
2387 newlines. */ | |
2388 if (c == '\r' || (c == '\n' && !(flags & ISO_STATE_CR))) | |
2389 { | |
2390 restore_left_to_right_direction (codesys, dst, &flags, 0); | |
2391 | |
2392 ensure_normal_shift (str, dst); | |
2393 | |
2394 for (i = 0; i < 4; i++) | |
2395 { | |
2396 Lisp_Object initial_charset = | |
2397 XCODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i); | |
2398 iso2022_designate (initial_charset, i, str, dst); | |
2399 } | |
2400 } | |
2401 if (c == '\r') | |
2402 flags |= ISO_STATE_CR; | |
2403 else | |
2404 flags &= ~ISO_STATE_CR; | |
2405 } | |
2406 | |
2407 if (XCODING_SYSTEM_ISO2022_ESCAPE_QUOTED (codesys) | |
2408 && fit_to_be_escape_quoted (c)) | |
2409 Dynarr_add (dst, ISO_CODE_ESC); | |
2410 Dynarr_add (dst, c); | |
2411 char_boundary = 1; | |
2412 } | |
867 | 2413 else if (ibyte_leading_byte_p (c) || ibyte_leading_byte_p (ch)) |
771 | 2414 { /* Processing Leading Byte */ |
2415 ch = 0; | |
826 | 2416 charset = charset_by_leading_byte (c); |
2417 if (leading_byte_prefix_p (c)) | |
3439 | 2418 { |
2419 ch = c; | |
2420 } | |
2421 else if (XCHARSET_ENCODE_AS_UTF_8 (charset)) | |
2422 { | |
2423 assert (!EQ (charset, Vcharset_control_1) | |
2424 && !EQ (charset, Vcharset_composite)); | |
2425 | |
2426 /* If the character set is to be encoded as UTF-8, the escape | |
2427 is always the same. */ | |
2428 if (!(flags & ISO_STATE_UTF_8)) | |
2429 { | |
2430 Dynarr_add (dst, ISO_CODE_ESC); | |
2431 Dynarr_add (dst, '%'); | |
2432 Dynarr_add (dst, 'G'); | |
2433 flags |= ISO_STATE_UTF_8; | |
2434 } | |
2435 } | |
771 | 2436 else if (!EQ (charset, Vcharset_control_1) |
2437 && !EQ (charset, Vcharset_composite)) | |
2438 { | |
2439 int reg; | |
2440 | |
3439 | 2441 /* End the UTF-8 state. */ |
2442 if (flags & ISO_STATE_UTF_8) | |
2443 { | |
2444 Dynarr_add (dst, ISO_CODE_ESC); | |
2445 Dynarr_add (dst, '%'); | |
2446 Dynarr_add (dst, '@'); | |
2447 flags &= ~(ISO_STATE_UTF_8); | |
2448 } | |
2449 | |
771 | 2450 ensure_correct_direction (XCHARSET_DIRECTION (charset), |
2451 codesys, dst, &flags, 0); | |
2452 | |
2453 /* Now determine which register to use. */ | |
2454 reg = -1; | |
2455 for (i = 0; i < 4; i++) | |
2456 { | |
2457 if (EQ (charset, data->charset[i]) || | |
2458 EQ (charset, | |
2459 XCODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i))) | |
2460 { | |
2461 reg = i; | |
2462 break; | |
2463 } | |
2464 } | |
2465 | |
2466 if (reg == -1) | |
2467 { | |
2468 if (XCHARSET_GRAPHIC (charset) != 0) | |
2469 { | |
2470 if (!NILP (data->charset[1]) && | |
2471 (!XCODING_SYSTEM_ISO2022_SEVEN (codesys) || | |
2472 XCODING_SYSTEM_ISO2022_LOCK_SHIFT (codesys))) | |
2473 reg = 1; | |
2474 else if (!NILP (data->charset[2])) | |
2475 reg = 2; | |
2476 else if (!NILP (data->charset[3])) | |
2477 reg = 3; | |
2478 else | |
2479 reg = 0; | |
2480 } | |
2481 else | |
2482 reg = 0; | |
2483 } | |
2484 | |
2485 iso2022_designate (charset, reg, str, dst); | |
2486 | |
2487 /* Now invoke that register. */ | |
2488 switch (reg) | |
2489 { | |
2490 case 0: | |
2491 ensure_normal_shift (str, dst); | |
2492 half = 0; | |
2493 break; | |
2494 | |
2495 case 1: | |
2496 if (XCODING_SYSTEM_ISO2022_SEVEN (codesys)) | |
2497 { | |
2498 ensure_shift_out (str, dst); | |
2499 half = 0; | |
2500 } | |
2501 else | |
2502 half = 1; | |
2503 break; | |
2504 | |
2505 case 2: | |
2506 if (XCODING_SYSTEM_ISO2022_SEVEN (str->codesys)) | |
2507 { | |
2508 Dynarr_add (dst, ISO_CODE_ESC); | |
2509 Dynarr_add (dst, 'N'); | |
2510 half = 0; | |
2511 } | |
2512 else | |
2513 { | |
2514 Dynarr_add (dst, ISO_CODE_SS2); | |
2515 half = 1; | |
2516 } | |
2517 break; | |
2518 | |
2519 case 3: | |
2520 if (XCODING_SYSTEM_ISO2022_SEVEN (str->codesys)) | |
2521 { | |
2522 Dynarr_add (dst, ISO_CODE_ESC); | |
2523 Dynarr_add (dst, 'O'); | |
2524 half = 0; | |
2525 } | |
2526 else | |
2527 { | |
2528 Dynarr_add (dst, ISO_CODE_SS3); | |
2529 half = 1; | |
2530 } | |
2531 break; | |
2532 | |
2533 default: | |
2500 | 2534 ABORT (); |
771 | 2535 } |
2536 } | |
2537 char_boundary = 0; | |
2538 } | |
2539 else | |
2540 { /* Processing Non-ASCII character */ | |
2541 charmask = (half == 0 ? 0x7F : 0xFF); | |
2542 char_boundary = 1; | |
2543 if (EQ (charset, Vcharset_control_1)) | |
2544 { | |
2545 if (XCODING_SYSTEM_ISO2022_ESCAPE_QUOTED (codesys) | |
5100
3d91f0b64469
fix bad bug with escape-quoted handling
Ben Wing <ben@xemacs.org>
parents:
4976
diff
changeset
|
2546 && fit_to_be_escape_quoted (c - 0x20)) |
771 | 2547 Dynarr_add (dst, ISO_CODE_ESC); |
2548 /* you asked for it ... */ | |
2549 Dynarr_add (dst, c - 0x20); | |
2550 } | |
2551 #ifndef ENABLE_COMPOSITE_CHARS | |
2552 else if (EQ (charset, Vcharset_composite)) | |
2553 { | |
2554 if (c >= 160 || c <= 164) /* Someone might have stuck in | |
2555 something else */ | |
2556 { | |
2557 Dynarr_add (dst, ISO_CODE_ESC); | |
2558 Dynarr_add (dst, c - 160 + '0'); | |
2559 } | |
2560 } | |
2561 #endif | |
2562 else | |
2563 { | |
2564 switch (XCHARSET_REP_BYTES (charset)) | |
2565 { | |
2566 case 2: | |
3439 | 2567 dynarr_add_2022_one_dimension (charset, c, |
2568 charmask, dst); | |
771 | 2569 break; |
2570 case 3: | |
2571 if (XCHARSET_PRIVATE_P (charset)) | |
2572 { | |
3439 | 2573 dynarr_add_2022_one_dimension (charset, c, |
2574 charmask, dst); | |
771 | 2575 ch = 0; |
2576 } | |
2577 else if (ch) | |
2578 { | |
2579 #ifdef ENABLE_COMPOSITE_CHARS | |
2580 if (EQ (charset, Vcharset_composite)) | |
2581 { | |
3439 | 2582 /* #### Hasn't been written to handle composite |
2583 characters yet. */ | |
2584 assert(!XCHARSET_ENCODE_AS_UTF_8 (charset)) | |
771 | 2585 if (in_composite) |
2586 { | |
2587 /* #### Bother! We don't know how to | |
2588 handle this yet. */ | |
2589 Dynarr_add (dst, '~'); | |
2590 } | |
2591 else | |
2592 { | |
867 | 2593 Ichar emch = make_ichar (Vcharset_composite, |
771 | 2594 ch & 0x7F, c & 0x7F); |
2595 Lisp_Object lstr = composite_char_string (emch); | |
2596 saved_n = n; | |
2597 saved_src = src; | |
2598 in_composite = 1; | |
2599 src = XSTRING_DATA (lstr); | |
2600 n = XSTRING_LENGTH (lstr); | |
2601 Dynarr_add (dst, ISO_CODE_ESC); | |
2602 Dynarr_add (dst, '0'); /* start composing */ | |
2603 } | |
2604 } | |
2605 else | |
2606 #endif /* ENABLE_COMPOSITE_CHARS */ | |
2607 { | |
3439 | 2608 dynarr_add_2022_two_dimensions (charset, c, ch, |
2609 charmask, dst); | |
771 | 2610 } |
2611 ch = 0; | |
2612 } | |
2613 else | |
2614 { | |
2615 ch = c; | |
2616 char_boundary = 0; | |
2617 } | |
2618 break; | |
2619 case 4: | |
2620 if (ch) | |
2621 { | |
3439 | 2622 dynarr_add_2022_two_dimensions (charset, c, ch, |
2623 charmask, dst); | |
771 | 2624 ch = 0; |
2625 } | |
2626 else | |
2627 { | |
2628 ch = c; | |
2629 char_boundary = 0; | |
2630 } | |
2631 break; | |
2632 default: | |
2500 | 2633 ABORT (); |
771 | 2634 } |
2635 } | |
2636 } | |
2637 } | |
2638 | |
2639 #ifdef ENABLE_COMPOSITE_CHARS | |
2640 if (in_composite) | |
2641 { | |
2642 n = saved_n; | |
2643 src = saved_src; | |
2644 in_composite = 0; | |
2645 Dynarr_add (dst, ISO_CODE_ESC); | |
2646 Dynarr_add (dst, '1'); /* end composing */ | |
2647 goto back_to_square_n; /* Wheeeeeeeee ..... */ | |
2648 } | |
2649 #endif /* ENABLE_COMPOSITE_CHARS */ | |
2650 | |
2651 if (char_boundary && str->eof) | |
2652 { | |
2653 restore_left_to_right_direction (codesys, dst, &flags, 0); | |
2654 ensure_normal_shift (str, dst); | |
2655 for (i = 0; i < 4; i++) | |
2656 { | |
2657 Lisp_Object initial_charset = | |
2658 XCODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i); | |
2659 iso2022_designate (initial_charset, i, str, dst); | |
2660 } | |
2661 } | |
2662 | |
2663 data->flags = flags; | |
2664 str->ch = ch; | |
2665 data->current_char_boundary = char_boundary; | |
2666 data->current_charset = charset; | |
2667 data->current_half = half; | |
2668 | |
2669 /* Verbum caro factum est! */ | |
2670 return orign; | |
2671 } | |
2672 | |
2673 static Bytecount | |
2674 iso2022_convert (struct coding_stream *str, | |
2675 const UExtbyte *src, | |
2676 unsigned_char_dynarr *dst, Bytecount n) | |
2677 { | |
2678 if (str->direction == CODING_DECODE) | |
2679 return iso2022_decode (str, src, dst, n); | |
2680 else | |
2681 return iso2022_encode (str, src, dst, n); | |
2682 } | |
2683 | |
2684 static void | |
2685 iso2022_mark (Lisp_Object codesys) | |
2686 { | |
2687 int i; | |
2688 | |
2689 for (i = 0; i < 4; i++) | |
2690 mark_object (XCODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i)); | |
2691 if (XCODING_SYSTEM_ISO2022_INPUT_CONV (codesys)) | |
2692 { | |
2693 for (i = 0; | |
2694 i < Dynarr_length (XCODING_SYSTEM_ISO2022_INPUT_CONV (codesys)); | |
2695 i++) | |
2696 { | |
2697 struct charset_conversion_spec *ccs = | |
2698 Dynarr_atp (XCODING_SYSTEM_ISO2022_INPUT_CONV (codesys), i); | |
2699 mark_object (ccs->from_charset); | |
2700 mark_object (ccs->to_charset); | |
2701 } | |
2702 } | |
2703 if (XCODING_SYSTEM_ISO2022_OUTPUT_CONV (codesys)) | |
2704 { | |
2705 for (i = 0; | |
2706 i < Dynarr_length (XCODING_SYSTEM_ISO2022_OUTPUT_CONV (codesys)); | |
2707 i++) | |
2708 { | |
2709 struct charset_conversion_spec *ccs = | |
2710 Dynarr_atp (XCODING_SYSTEM_ISO2022_OUTPUT_CONV (codesys), i); | |
2711 mark_object (ccs->from_charset); | |
2712 mark_object (ccs->to_charset); | |
2713 } | |
2714 } | |
2715 } | |
2716 | |
2717 static void | |
2718 iso2022_finalize (Lisp_Object cs) | |
2719 { | |
2720 if (XCODING_SYSTEM_ISO2022_INPUT_CONV (cs)) | |
2721 { | |
2722 Dynarr_free (XCODING_SYSTEM_ISO2022_INPUT_CONV (cs)); | |
2723 XCODING_SYSTEM_ISO2022_INPUT_CONV (cs) = 0; | |
2724 } | |
2725 if (XCODING_SYSTEM_ISO2022_OUTPUT_CONV (cs)) | |
2726 { | |
2727 Dynarr_free (XCODING_SYSTEM_ISO2022_OUTPUT_CONV (cs)); | |
2728 XCODING_SYSTEM_ISO2022_OUTPUT_CONV (cs) = 0; | |
2729 } | |
2730 } | |
2731 | |
2732 /* Given a list of charset conversion specs as specified in a Lisp | |
2733 program, parse it into STORE_HERE. */ | |
2734 | |
2735 static void | |
2736 parse_charset_conversion_specs (charset_conversion_spec_dynarr *store_here, | |
2737 Lisp_Object spec_list) | |
2738 { | |
2367 | 2739 EXTERNAL_LIST_LOOP_2 (car, spec_list) |
771 | 2740 { |
2741 Lisp_Object from, to; | |
2742 struct charset_conversion_spec spec; | |
2743 | |
2744 if (!CONSP (car) || !CONSP (XCDR (car)) || !NILP (XCDR (XCDR (car)))) | |
2745 invalid_argument ("Invalid charset conversion spec", car); | |
2746 from = Fget_charset (XCAR (car)); | |
2747 to = Fget_charset (XCAR (XCDR (car))); | |
2748 if (XCHARSET_TYPE (from) != XCHARSET_TYPE (to)) | |
2749 invalid_operation_2 | |
2750 ("Attempted conversion between different charset types", | |
2751 from, to); | |
2752 spec.from_charset = from; | |
2753 spec.to_charset = to; | |
2754 | |
2755 Dynarr_add (store_here, spec); | |
2756 } | |
2757 } | |
2758 | |
2759 /* Given a dynarr LOAD_HERE of internally-stored charset conversion | |
2760 specs, return the equivalent as the Lisp programmer would see it. | |
2761 | |
2762 If LOAD_HERE is 0, return Qnil. */ | |
2763 | |
2764 static Lisp_Object | |
2765 unparse_charset_conversion_specs (charset_conversion_spec_dynarr *load_here, | |
2766 int names) | |
2767 { | |
2768 int i; | |
2769 Lisp_Object result; | |
2770 | |
2771 if (!load_here) | |
2772 return Qnil; | |
2773 for (i = 0, result = Qnil; i < Dynarr_length (load_here); i++) | |
2774 { | |
2775 struct charset_conversion_spec *ccs = Dynarr_atp (load_here, i); | |
2776 if (names) | |
2777 result = Fcons (list2 (XCHARSET_NAME (ccs->from_charset), | |
2778 XCHARSET_NAME (ccs->to_charset)), result); | |
2779 else | |
2780 result = Fcons (list2 (ccs->from_charset, ccs->to_charset), result); | |
2781 } | |
2782 | |
2783 return Fnreverse (result); | |
2784 } | |
2785 | |
2786 static int | |
2787 iso2022_putprop (Lisp_Object codesys, | |
2788 Lisp_Object key, | |
2789 Lisp_Object value) | |
2790 { | |
2791 #define FROB_INITIAL_CHARSET(charset_num) \ | |
2792 XCODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, charset_num) = \ | |
2793 ((EQ (value, Qt) || EQ (value, Qnil)) ? value : Fget_charset (value)) | |
2794 | |
2795 if (EQ (key, Qcharset_g0)) FROB_INITIAL_CHARSET (0); | |
2796 else if (EQ (key, Qcharset_g1)) FROB_INITIAL_CHARSET (1); | |
2797 else if (EQ (key, Qcharset_g2)) FROB_INITIAL_CHARSET (2); | |
2798 else if (EQ (key, Qcharset_g3)) FROB_INITIAL_CHARSET (3); | |
2799 | |
2800 #define FROB_FORCE_CHARSET(charset_num) \ | |
2801 XCODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT (codesys, charset_num) = \ | |
2802 !NILP (value) | |
2803 | |
2804 else if (EQ (key, Qforce_g0_on_output)) FROB_FORCE_CHARSET (0); | |
2805 else if (EQ (key, Qforce_g1_on_output)) FROB_FORCE_CHARSET (1); | |
2806 else if (EQ (key, Qforce_g2_on_output)) FROB_FORCE_CHARSET (2); | |
2807 else if (EQ (key, Qforce_g3_on_output)) FROB_FORCE_CHARSET (3); | |
2808 | |
2809 #define FROB_BOOLEAN_PROPERTY(prop) \ | |
2810 XCODING_SYSTEM_ISO2022_##prop (codesys) = !NILP (value) | |
2811 | |
2812 else if (EQ (key, Qshort)) FROB_BOOLEAN_PROPERTY (SHORT); | |
2813 else if (EQ (key, Qno_ascii_eol)) FROB_BOOLEAN_PROPERTY (NO_ASCII_EOL); | |
2814 else if (EQ (key, Qno_ascii_cntl)) FROB_BOOLEAN_PROPERTY (NO_ASCII_CNTL); | |
2815 else if (EQ (key, Qseven)) FROB_BOOLEAN_PROPERTY (SEVEN); | |
2816 else if (EQ (key, Qlock_shift)) FROB_BOOLEAN_PROPERTY (LOCK_SHIFT); | |
2817 else if (EQ (key, Qno_iso6429)) FROB_BOOLEAN_PROPERTY (NO_ISO6429); | |
2818 else if (EQ (key, Qescape_quoted)) FROB_BOOLEAN_PROPERTY (ESCAPE_QUOTED); | |
2819 | |
2820 else if (EQ (key, Qinput_charset_conversion)) | |
2821 { | |
2822 XCODING_SYSTEM_ISO2022_INPUT_CONV (codesys) = | |
2823 Dynarr_new (charset_conversion_spec); | |
2824 parse_charset_conversion_specs | |
2825 (XCODING_SYSTEM_ISO2022_INPUT_CONV (codesys), value); | |
2826 } | |
2827 else if (EQ (key, Qoutput_charset_conversion)) | |
2828 { | |
2829 XCODING_SYSTEM_ISO2022_OUTPUT_CONV (codesys) = | |
2830 Dynarr_new (charset_conversion_spec); | |
2831 parse_charset_conversion_specs | |
2832 (XCODING_SYSTEM_ISO2022_OUTPUT_CONV (codesys), value); | |
2833 } | |
2834 else | |
2835 return 0; | |
2836 | |
2837 return 1; | |
2838 } | |
2839 | |
5169
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5100
diff
changeset
|
2840 #ifdef ENABLE_COMPOSITE_CHARS |
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5100
diff
changeset
|
2841 #define USED_IF_COMPOSITE_CHARS(x) x |
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5100
diff
changeset
|
2842 #else |
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5100
diff
changeset
|
2843 #define USED_IF_COMPOSITE_CHARS(x) UNUSED (x) |
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5100
diff
changeset
|
2844 #endif |
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5100
diff
changeset
|
2845 |
771 | 2846 static void |
5169
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5100
diff
changeset
|
2847 iso2022_finalize_coding_stream (struct coding_stream * |
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5100
diff
changeset
|
2848 USED_IF_COMPOSITE_CHARS (str)) |
771 | 2849 { |
2850 #ifdef ENABLE_COMPOSITE_CHARS | |
2851 struct iso2022_coding_stream *data = | |
2852 CODING_STREAM_TYPE_DATA (str, iso2022); | |
2853 | |
2854 if (data->composite_chars) | |
2855 Dynarr_free (data->composite_chars); | |
2856 #endif | |
2857 } | |
2858 | |
2859 static void | |
2860 iso2022_init (Lisp_Object codesys) | |
2861 { | |
2862 int i; | |
2863 for (i = 0; i < 4; i++) | |
2864 XCODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i) = Qnil; | |
2865 } | |
2866 | |
2867 static Lisp_Object | |
2868 coding_system_charset (Lisp_Object coding_system, int gnum) | |
2869 { | |
2870 Lisp_Object cs | |
2871 = XCODING_SYSTEM_ISO2022_INITIAL_CHARSET (coding_system, gnum); | |
2872 | |
2873 return CHARSETP (cs) ? XCHARSET_NAME (cs) : Qnil; | |
2874 } | |
2875 | |
2876 static Lisp_Object | |
2877 iso2022_getprop (Lisp_Object coding_system, Lisp_Object prop) | |
2878 { | |
2879 if (EQ (prop, Qcharset_g0)) | |
2880 return coding_system_charset (coding_system, 0); | |
2881 else if (EQ (prop, Qcharset_g1)) | |
2882 return coding_system_charset (coding_system, 1); | |
2883 else if (EQ (prop, Qcharset_g2)) | |
2884 return coding_system_charset (coding_system, 2); | |
2885 else if (EQ (prop, Qcharset_g3)) | |
2886 return coding_system_charset (coding_system, 3); | |
2887 | |
2888 #define FORCE_CHARSET(charset_num) \ | |
2889 (XCODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT \ | |
2890 (coding_system, charset_num) ? Qt : Qnil) | |
2891 | |
2892 else if (EQ (prop, Qforce_g0_on_output)) | |
2893 return FORCE_CHARSET (0); | |
2894 else if (EQ (prop, Qforce_g1_on_output)) | |
2895 return FORCE_CHARSET (1); | |
2896 else if (EQ (prop, Qforce_g2_on_output)) | |
2897 return FORCE_CHARSET (2); | |
2898 else if (EQ (prop, Qforce_g3_on_output)) | |
2899 return FORCE_CHARSET (3); | |
2900 | |
2901 #define LISP_BOOLEAN(prop) \ | |
2902 (XCODING_SYSTEM_ISO2022_##prop (coding_system) ? Qt : Qnil) | |
2903 | |
2904 else if (EQ (prop, Qshort)) return LISP_BOOLEAN (SHORT); | |
2905 else if (EQ (prop, Qno_ascii_eol)) return LISP_BOOLEAN (NO_ASCII_EOL); | |
2906 else if (EQ (prop, Qno_ascii_cntl)) return LISP_BOOLEAN (NO_ASCII_CNTL); | |
2907 else if (EQ (prop, Qseven)) return LISP_BOOLEAN (SEVEN); | |
2908 else if (EQ (prop, Qlock_shift)) return LISP_BOOLEAN (LOCK_SHIFT); | |
2909 else if (EQ (prop, Qno_iso6429)) return LISP_BOOLEAN (NO_ISO6429); | |
2910 else if (EQ (prop, Qescape_quoted)) return LISP_BOOLEAN (ESCAPE_QUOTED); | |
2911 | |
2912 else if (EQ (prop, Qinput_charset_conversion)) | |
2913 return | |
2914 unparse_charset_conversion_specs | |
2915 (XCODING_SYSTEM_ISO2022_INPUT_CONV (coding_system), 0); | |
2916 else if (EQ (prop, Qoutput_charset_conversion)) | |
2917 return | |
2918 unparse_charset_conversion_specs | |
2919 (XCODING_SYSTEM_ISO2022_OUTPUT_CONV (coding_system), 0); | |
2920 else | |
2921 return Qunbound; | |
2922 } | |
2923 | |
2924 static void | |
2286 | 2925 iso2022_print (Lisp_Object cs, Lisp_Object printcharfun, |
2926 int UNUSED (escapeflag)) | |
771 | 2927 { |
2928 int i; | |
2929 | |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4745
diff
changeset
|
2930 write_ascstring (printcharfun, "("); |
771 | 2931 for (i = 0; i < 4; i++) |
2932 { | |
2933 Lisp_Object charset = coding_system_charset (cs, i); | |
2934 if (i > 0) | |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4745
diff
changeset
|
2935 write_ascstring (printcharfun, ", "); |
771 | 2936 write_fmt_string (printcharfun, "g%d=", i); |
800 | 2937 print_internal (CHARSETP (charset) ? XCHARSET_NAME (charset) : charset, printcharfun, 0); |
771 | 2938 if (XCODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT (cs, i)) |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4745
diff
changeset
|
2939 write_ascstring (printcharfun, "(force)"); |
771 | 2940 } |
2941 | |
3084 | 2942 #define FROB(prop) \ |
2943 if (!NILP (iso2022_getprop (cs, prop))) \ | |
2944 { \ | |
2945 write_fmt_string_lisp (printcharfun, ", %s", 1, prop); \ | |
771 | 2946 } |
2947 | |
2948 FROB (Qshort); | |
2949 FROB (Qno_ascii_eol); | |
2950 FROB (Qno_ascii_cntl); | |
2951 FROB (Qseven); | |
2952 FROB (Qlock_shift); | |
2953 FROB (Qno_iso6429); | |
2954 FROB (Qescape_quoted); | |
2955 | |
2956 { | |
2957 Lisp_Object val = | |
2958 unparse_charset_conversion_specs | |
2959 (XCODING_SYSTEM_ISO2022_INPUT_CONV (cs), 1); | |
2960 if (!NILP (val)) | |
2961 { | |
800 | 2962 write_fmt_string_lisp (printcharfun, ", input-charset-conversion=%s", 1, val); |
771 | 2963 } |
2964 val = | |
2965 unparse_charset_conversion_specs | |
2966 (XCODING_SYSTEM_ISO2022_OUTPUT_CONV (cs), 1); | |
2967 if (!NILP (val)) | |
2968 { | |
800 | 2969 write_fmt_string_lisp (printcharfun, ", output-charset-conversion=%s", 1, val); |
771 | 2970 } |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4745
diff
changeset
|
2971 write_ascstring (printcharfun, ")"); |
771 | 2972 } |
2973 } | |
2974 | |
2975 | |
2976 /************************************************************************/ | |
2977 /* ISO2022 detector */ | |
2978 /************************************************************************/ | |
2979 | |
2980 DEFINE_DETECTOR (iso2022); | |
2981 /* ISO2022 system using only seven-bit bytes, no locking shift */ | |
2982 DEFINE_DETECTOR_CATEGORY (iso2022, iso_7); | |
2983 /* ISO2022 system using eight-bit bytes, no locking shift, no single shift, | |
2984 using designation to switch charsets */ | |
2985 DEFINE_DETECTOR_CATEGORY (iso2022, iso_8_designate); | |
2986 /* ISO2022 system using eight-bit bytes, no locking shift, no designation | |
2987 sequences, one-dimension characters in the upper half. */ | |
2988 DEFINE_DETECTOR_CATEGORY (iso2022, iso_8_1); | |
2989 /* ISO2022 system using eight-bit bytes, no locking shift, no designation | |
2990 sequences, two-dimension characters in the upper half. */ | |
2991 DEFINE_DETECTOR_CATEGORY (iso2022, iso_8_2); | |
2992 /* ISO2022 system using locking shift */ | |
2993 DEFINE_DETECTOR_CATEGORY (iso2022, iso_lock_shift); | |
2994 | |
2995 struct iso2022_detector | |
2996 { | |
2997 int initted; | |
2998 struct iso2022_coding_stream *iso; | |
2999 unsigned int flags; | |
3000 | |
3001 /* for keeping temporary track of high-byte groups */ | |
3002 int high_byte_count; | |
3003 unsigned int saw_single_shift_just_now:1; | |
3004 | |
3005 /* running state; we set the likelihoods at the end */ | |
3006 unsigned int seen_high_byte:1; | |
3007 unsigned int seen_single_shift:1; | |
3008 unsigned int seen_locking_shift:1; | |
3009 unsigned int seen_designate:1; | |
3010 unsigned int bad_single_byte_sequences; | |
3011 unsigned int bad_multibyte_escape_sequences; | |
3012 unsigned int good_multibyte_escape_sequences; | |
3013 int even_high_byte_groups; | |
985 | 3014 int longest_even_high_byte; |
771 | 3015 int odd_high_byte_groups; |
3016 }; | |
3017 | |
3018 static void | |
3019 iso2022_detect (struct detection_state *st, const UExtbyte *src, | |
3020 Bytecount n) | |
3021 { | |
3022 Bytecount orign = n; | |
3023 struct iso2022_detector *data = DETECTION_STATE_DATA (st, iso2022); | |
3024 | |
3025 /* #### There are serious deficiencies in the recognition mechanism | |
3026 here. This needs to be much smarter if it's going to cut it. | |
3027 The sequence "\xff\x0f" is currently detected as LOCK_SHIFT while | |
3028 it should be detected as Latin-1. | |
3029 All the ISO2022 stuff in this file should be synced up with the | |
3030 code from FSF Emacs-21.0, in which Mule should be more or less stable. | |
3031 Perhaps we should wait till R2L works in FSF Emacs? */ | |
3032 | |
3033 /* We keep track of running state on our own, and set the categories at the | |
3034 end; that way we can reflect the correct state each time we finish, but | |
3035 not get confused by those results the next time around. */ | |
3036 | |
3037 if (!data->initted) | |
3038 { | |
3039 xzero (*data); | |
3040 data->iso = xnew_and_zero (struct iso2022_coding_stream); | |
3041 reset_iso2022_decode (Qnil, data->iso); | |
3042 data->initted = 1; | |
3043 } | |
3044 | |
3045 while (n--) | |
3046 { | |
3047 UExtbyte c = *src++; | |
3048 if (c >= 0x80) | |
3049 data->seen_high_byte = 1; | |
3050 if (c >= 0xA0) | |
3051 data->high_byte_count++; | |
3052 else | |
3053 { | |
3054 if (data->high_byte_count && | |
3055 !data->saw_single_shift_just_now) | |
3056 { | |
3057 if (data->high_byte_count & 1) | |
3058 data->odd_high_byte_groups++; | |
3059 else | |
985 | 3060 { |
3061 data->even_high_byte_groups++; | |
3062 if (data->longest_even_high_byte < data->high_byte_count) | |
3063 data->longest_even_high_byte = data->high_byte_count; | |
3064 } | |
771 | 3065 } |
3066 data->high_byte_count = 0; | |
3067 data->saw_single_shift_just_now = 0; | |
3068 } | |
3069 if (!(data->flags & ISO_STATE_ESCAPE) | |
826 | 3070 && (byte_c0_p (c) || byte_c1_p (c))) |
771 | 3071 { /* control chars */ |
3072 switch (c) | |
3073 { | |
3074 /* Allow and ignore control characters that you might | |
3075 reasonably see in a text file */ | |
3076 case '\r': | |
3077 case '\n': | |
3078 case '\t': | |
3079 case 7: /* bell */ | |
3080 case 8: /* backspace */ | |
3081 case 11: /* vertical tab */ | |
3082 case 12: /* form feed */ | |
3083 case 26: /* MS-DOS C-z junk */ | |
3084 case 31: /* '^_' -- for info */ | |
3085 goto label_continue_loop; | |
3086 | |
3087 default: | |
3088 break; | |
3089 } | |
3090 } | |
3091 | |
826 | 3092 if ((data->flags & ISO_STATE_ESCAPE) || byte_c0_p (c) |
3093 || byte_c1_p (c)) | |
771 | 3094 { |
3095 switch (parse_iso2022_esc (Qnil, data->iso, c, | |
3096 &data->flags, 0)) | |
3097 { | |
3098 case 1: /* done */ | |
3099 if (data->iso->esc_bytes_index > 0) | |
3100 data->good_multibyte_escape_sequences++; | |
3101 switch (data->iso->esc) | |
3102 { | |
3103 case ISO_ESC_DESIGNATE: | |
3104 data->seen_designate = 1; | |
3105 break; | |
3106 case ISO_ESC_LOCKING_SHIFT: | |
3107 data->seen_locking_shift = 1; | |
3108 break; | |
3109 case ISO_ESC_SINGLE_SHIFT: | |
3110 data->saw_single_shift_just_now = 1; | |
3111 data->seen_single_shift = 1; | |
3112 break; | |
3113 default: | |
3114 break; | |
3115 } | |
3116 break; | |
3117 | |
3118 case -1: /* not done */ | |
3119 break; | |
3120 | |
3121 case 0: /* error */ | |
3122 if (data->iso->esc == ISO_ESC_NOTHING) | |
3123 data->bad_single_byte_sequences++; | |
3124 else | |
3125 data->bad_multibyte_escape_sequences++; | |
3126 } | |
3127 } | |
3128 label_continue_loop:; | |
3129 } | |
3130 | |
985 | 3131 if (data->high_byte_count && |
3132 !data->saw_single_shift_just_now) | |
3133 { | |
3134 if (data->high_byte_count & 1) | |
3135 data->odd_high_byte_groups++; | |
3136 else | |
3137 { | |
3138 data->even_high_byte_groups++; | |
3139 if (data->longest_even_high_byte < data->high_byte_count) | |
3140 data->longest_even_high_byte = data->high_byte_count; | |
3141 } | |
3142 } | |
3143 | |
771 | 3144 if (data->bad_multibyte_escape_sequences > 2 || |
3145 (data->bad_multibyte_escape_sequences > 0 && | |
3146 data->good_multibyte_escape_sequences / | |
3147 data->bad_multibyte_escape_sequences < 10)) | |
3148 /* Just making it up ... */ | |
3149 SET_DET_RESULTS (st, iso2022, DET_NEARLY_IMPOSSIBLE); | |
3150 else if (data->bad_single_byte_sequences > 5 || | |
3151 (data->bad_single_byte_sequences > 0 && | |
3152 (data->good_multibyte_escape_sequences + | |
3153 data->even_high_byte_groups + | |
3154 data->odd_high_byte_groups) / | |
3155 data->bad_single_byte_sequences < 10)) | |
3156 SET_DET_RESULTS (st, iso2022, DET_SOMEWHAT_UNLIKELY); | |
3157 else if (data->seen_locking_shift) | |
3158 { | |
3159 SET_DET_RESULTS (st, iso2022, DET_QUITE_IMPROBABLE); | |
3160 DET_RESULT (st, iso_lock_shift) = DET_QUITE_PROBABLE; | |
3161 } | |
3162 else if (!data->seen_high_byte) | |
3163 { | |
3164 SET_DET_RESULTS (st, iso2022, DET_SOMEWHAT_UNLIKELY); | |
3165 if (data->good_multibyte_escape_sequences) | |
3166 DET_RESULT (st, iso_7) = DET_QUITE_PROBABLE; | |
3167 else if (data->seen_single_shift) | |
3168 DET_RESULT (st, iso_7) = DET_SOMEWHAT_LIKELY; | |
3169 else | |
3170 { | |
3171 /* If we've just seen pure 7-bit data, no escape sequences, | |
3172 then we can't give much likelihood; but if we've seen enough | |
3173 of this data, we can assume some unlikelihood of any 8-bit | |
3174 encoding */ | |
3175 if (orign + st->bytes_seen >= 1000) | |
3176 DET_RESULT (st, iso_7) = DET_AS_LIKELY_AS_UNLIKELY; | |
3177 else | |
3178 SET_DET_RESULTS (st, iso2022, DET_AS_LIKELY_AS_UNLIKELY); | |
3179 } | |
3180 } | |
3181 else if (data->seen_designate) | |
3182 { | |
3183 SET_DET_RESULTS (st, iso2022, DET_QUITE_IMPROBABLE); | |
3184 if (data->seen_single_shift) | |
3185 /* #### Does this really make sense? */ | |
3186 DET_RESULT (st, iso_8_designate) = DET_SOMEWHAT_UNLIKELY; | |
3187 else | |
3188 DET_RESULT (st, iso_8_designate) = DET_QUITE_PROBABLE; | |
3189 } | |
3190 else if (data->odd_high_byte_groups > 0 && | |
3191 data->even_high_byte_groups == 0) | |
3192 { | |
3193 SET_DET_RESULTS (st, iso2022, DET_SOMEWHAT_UNLIKELY); | |
3194 if (data->seen_single_shift) | |
3195 DET_RESULT (st, iso_8_1) = DET_QUITE_PROBABLE; | |
3196 else | |
3197 DET_RESULT (st, iso_8_1) = DET_SOMEWHAT_LIKELY; | |
3198 } | |
3199 else if (data->odd_high_byte_groups == 0 && | |
3200 data->even_high_byte_groups > 0) | |
3201 { | |
985 | 3202 #if 0 |
771 | 3203 SET_DET_RESULTS (st, iso2022, DET_SOMEWHAT_UNLIKELY); |
3204 if (data->even_high_byte_groups > 10) | |
3205 { | |
3206 if (data->seen_single_shift) | |
3207 DET_RESULT (st, iso_8_2) = DET_QUITE_PROBABLE; | |
3208 else | |
3209 DET_RESULT (st, iso_8_2) = DET_SOMEWHAT_LIKELY; | |
3210 if (data->even_high_byte_groups < 50) | |
3211 DET_RESULT (st, iso_8_1) = DET_SOMEWHAT_UNLIKELY; | |
3212 /* else it stays at quite improbable */ | |
3213 } | |
985 | 3214 #else |
3215 SET_DET_RESULTS (st, iso2022, DET_SOMEWHAT_UNLIKELY); | |
3216 if (data->seen_single_shift) | |
3217 DET_RESULT (st, iso_8_2) = DET_QUITE_PROBABLE; | |
3218 else if (data->even_high_byte_groups > 10) | |
3219 DET_RESULT (st, iso_8_2) = DET_SOMEWHAT_LIKELY; | |
3220 else if (data->longest_even_high_byte > 6) | |
3221 DET_RESULT (st, iso_8_2) = DET_SLIGHTLY_LIKELY; | |
3222 #endif | |
771 | 3223 } |
3224 else if (data->odd_high_byte_groups > 0 && | |
3225 data->even_high_byte_groups > 0) | |
3393 | 3226 { |
3227 /* Well, this could be a Latin-1 text, with most high-byte | |
3228 characters single, but sometimes two are together, though | |
3229 this happens not as often. This is common for Western | |
3230 European languages like German, French, Danish, Swedish, etc. | |
3231 Then we would either have a rather small file and | |
3232 even_high_byte_groups would be low. | |
3233 Or we would have a larger file and the ratio of odd to even | |
3234 groups would be very high. */ | |
3235 SET_DET_RESULTS (st, iso2022, DET_SOMEWHAT_UNLIKELY); | |
3236 if (data->even_high_byte_groups <= 3 || | |
3237 data->odd_high_byte_groups >= 10 * data->even_high_byte_groups) | |
3238 DET_RESULT (st, iso_8_1) = DET_SOMEWHAT_LIKELY; | |
3239 } | |
771 | 3240 else |
3241 SET_DET_RESULTS (st, iso2022, DET_AS_LIKELY_AS_UNLIKELY); | |
3242 } | |
3243 | |
3244 static void | |
3245 iso2022_finalize_detection_state (struct detection_state *st) | |
3246 { | |
3247 struct iso2022_detector *data = DETECTION_STATE_DATA (st, iso2022); | |
3248 if (data->iso) | |
5169
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5100
diff
changeset
|
3249 { |
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5100
diff
changeset
|
3250 xfree (data->iso); |
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5100
diff
changeset
|
3251 data->iso = 0; |
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5100
diff
changeset
|
3252 } |
771 | 3253 } |
3254 | |
3255 | |
3256 /************************************************************************/ | |
3257 /* CCL methods */ | |
3258 /************************************************************************/ | |
3259 | |
3260 /* Converter written in CCL. */ | |
3261 | |
3262 struct ccl_coding_system | |
3263 { | |
3264 /* For a CCL coding system, these specify the CCL programs used for | |
3265 decoding (input) and encoding (output). */ | |
3266 Lisp_Object decode; | |
3267 Lisp_Object encode; | |
3268 }; | |
3269 | |
3270 #define CODING_SYSTEM_CCL_DECODE(codesys) \ | |
3271 (CODING_SYSTEM_TYPE_DATA (codesys, ccl)->decode) | |
3272 #define CODING_SYSTEM_CCL_ENCODE(codesys) \ | |
3273 (CODING_SYSTEM_TYPE_DATA (codesys, ccl)->encode) | |
3274 #define XCODING_SYSTEM_CCL_DECODE(codesys) \ | |
3275 CODING_SYSTEM_CCL_DECODE (XCODING_SYSTEM (codesys)) | |
3276 #define XCODING_SYSTEM_CCL_ENCODE(codesys) \ | |
3277 CODING_SYSTEM_CCL_ENCODE (XCODING_SYSTEM (codesys)) | |
3278 | |
3279 struct ccl_coding_stream | |
3280 { | |
3281 /* state of the running CCL program */ | |
3282 struct ccl_program ccl; | |
3283 }; | |
3284 | |
1204 | 3285 static const struct memory_description ccl_coding_system_description[] = { |
3286 { XD_LISP_OBJECT, offsetof (struct ccl_coding_system, decode) }, | |
3287 { XD_LISP_OBJECT, offsetof (struct ccl_coding_system, encode) }, | |
771 | 3288 { XD_END } |
3289 }; | |
3290 | |
1204 | 3291 DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (ccl); |
3292 | |
771 | 3293 static void |
3294 ccl_mark (Lisp_Object codesys) | |
3295 { | |
3296 mark_object (XCODING_SYSTEM_CCL_DECODE (codesys)); | |
3297 mark_object (XCODING_SYSTEM_CCL_ENCODE (codesys)); | |
3298 } | |
3299 | |
3300 static Bytecount | |
3301 ccl_convert (struct coding_stream *str, const UExtbyte *src, | |
3302 unsigned_char_dynarr *dst, Bytecount n) | |
3303 { | |
3304 struct ccl_coding_stream *data = | |
3305 CODING_STREAM_TYPE_DATA (str, ccl); | |
3306 Bytecount orign = n; | |
3307 | |
3308 data->ccl.last_block = str->eof; | |
3309 /* When applying a CCL program to a stream, SRC must not be NULL -- this | |
3310 is a special signal to the driver that read and write operations are | |
3311 not allowed. The code does not actually look at what SRC points to if | |
3312 N == 0. | |
3313 */ | |
3314 ccl_driver (&data->ccl, src ? src : (const unsigned char *) "", | |
3315 dst, n, 0, | |
3316 str->direction == CODING_DECODE ? CCL_MODE_DECODING : | |
3317 CCL_MODE_ENCODING); | |
3318 return orign; | |
3319 } | |
3320 | |
3321 static void | |
3322 ccl_init_coding_stream (struct coding_stream *str) | |
3323 { | |
3324 struct ccl_coding_stream *data = | |
3325 CODING_STREAM_TYPE_DATA (str, ccl); | |
3326 | |
3327 setup_ccl_program (&data->ccl, | |
3328 str->direction == CODING_DECODE ? | |
3329 XCODING_SYSTEM_CCL_DECODE (str->codesys) : | |
3330 XCODING_SYSTEM_CCL_ENCODE (str->codesys)); | |
3331 } | |
3332 | |
3333 static void | |
3334 ccl_rewind_coding_stream (struct coding_stream *str) | |
3335 { | |
3336 ccl_init_coding_stream (str); | |
3337 } | |
3338 | |
3339 static void | |
3340 ccl_init (Lisp_Object codesys) | |
3341 { | |
3342 XCODING_SYSTEM_CCL_DECODE (codesys) = Qnil; | |
3343 XCODING_SYSTEM_CCL_ENCODE (codesys) = Qnil; | |
3344 } | |
3345 | |
3346 static int | |
3347 ccl_putprop (Lisp_Object codesys, Lisp_Object key, Lisp_Object value) | |
3348 { | |
3349 if (EQ (key, Qdecode)) | |
4745
0c54de4c4b9d
Resolve the unregistered-CCL-programs-get-garbage-collected problem correctly
Aidan Kehoe <kehoea@parhasard.net>
parents:
4703
diff
changeset
|
3350 XCODING_SYSTEM_CCL_DECODE (codesys) = get_ccl_program (value); |
771 | 3351 else if (EQ (key, Qencode)) |
4745
0c54de4c4b9d
Resolve the unregistered-CCL-programs-get-garbage-collected problem correctly
Aidan Kehoe <kehoea@parhasard.net>
parents:
4703
diff
changeset
|
3352 XCODING_SYSTEM_CCL_ENCODE (codesys) = get_ccl_program (value); |
771 | 3353 return 1; |
3354 } | |
3355 | |
3356 static Lisp_Object | |
3357 ccl_getprop (Lisp_Object coding_system, Lisp_Object prop) | |
3358 { | |
3359 if (EQ (prop, Qdecode)) | |
3360 return XCODING_SYSTEM_CCL_DECODE (coding_system); | |
3361 else if (EQ (prop, Qencode)) | |
3362 return XCODING_SYSTEM_CCL_ENCODE (coding_system); | |
3363 else | |
3364 return Qunbound; | |
3365 } | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3366 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3367 /************************************************************************/ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3368 /* FIXED_WIDTH methods */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3369 /************************************************************************/ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3370 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3371 struct fixed_width_coding_system |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3372 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3373 /* For a fixed_width coding system, these specify the CCL programs |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3374 used for decoding (input) and encoding (output). */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3375 Lisp_Object decode; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3376 Lisp_Object encode; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3377 Lisp_Object from_unicode; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3378 Lisp_Object invalid_sequences_skip_chars; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3379 Lisp_Object query_skip_chars; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3380 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3381 /* This is not directly accessible from Lisp; it is a concatenation of the |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3382 previous two strings, used for simplicity of implementation. */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3383 Lisp_Object invalid_and_query_skip_chars; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3384 }; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3385 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3386 #define CODING_SYSTEM_FIXED_WIDTH_DECODE(codesys) \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3387 (CODING_SYSTEM_TYPE_DATA (codesys, fixed_width)->decode) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3388 #define CODING_SYSTEM_FIXED_WIDTH_ENCODE(codesys) \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3389 (CODING_SYSTEM_TYPE_DATA (codesys, fixed_width)->encode) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3390 #define CODING_SYSTEM_FIXED_WIDTH_FROM_UNICODE(codesys) \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3391 (CODING_SYSTEM_TYPE_DATA (codesys, fixed_width)->from_unicode) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3392 #define CODING_SYSTEM_FIXED_WIDTH_INVALID_SEQUENCES_SKIP_CHARS(codesys) \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3393 (CODING_SYSTEM_TYPE_DATA (codesys, \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3394 fixed_width)->invalid_sequences_skip_chars) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3395 #define CODING_SYSTEM_FIXED_WIDTH_QUERY_SKIP_CHARS(codesys) \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3396 (CODING_SYSTEM_TYPE_DATA (codesys, fixed_width)->query_skip_chars) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3397 #define CODING_SYSTEM_FIXED_WIDTH_INVALID_AND_QUERY_SKIP_CHARS(codesys) \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3398 (CODING_SYSTEM_TYPE_DATA (codesys, \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3399 fixed_width)->invalid_and_query_skip_chars) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3400 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3401 #define XCODING_SYSTEM_FIXED_WIDTH_DECODE(codesys) \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3402 CODING_SYSTEM_FIXED_WIDTH_DECODE (XCODING_SYSTEM (codesys)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3403 #define XCODING_SYSTEM_FIXED_WIDTH_ENCODE(codesys) \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3404 CODING_SYSTEM_FIXED_WIDTH_ENCODE (XCODING_SYSTEM (codesys)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3405 #define XCODING_SYSTEM_FIXED_WIDTH_FROM_UNICODE(codesys) \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3406 (CODING_SYSTEM_FIXED_WIDTH_FROM_UNICODE (XCODING_SYSTEM (codesys))) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3407 #define XCODING_SYSTEM_FIXED_WIDTH_INVALID_SEQUENCES_SKIP_CHARS(codesys) \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3408 (CODING_SYSTEM_FIXED_WIDTH_INVALID_SEQUENCES_SKIP_CHARS \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3409 (XCODING_SYSTEM (codesys))) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3410 #define XCODING_SYSTEM_FIXED_WIDTH_QUERY_SKIP_CHARS(codesys) \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3411 (CODING_SYSTEM_FIXED_WIDTH_QUERY_SKIP_CHARS (XCODING_SYSTEM (codesys))) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3412 #define XCODING_SYSTEM_FIXED_WIDTH_INVALID_AND_QUERY_SKIP_CHARS(codesys) \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3413 (CODING_SYSTEM_FIXED_WIDTH_INVALID_AND_QUERY_SKIP_CHARS \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3414 (XCODING_SYSTEM(codesys))) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3415 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3416 struct fixed_width_coding_stream |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3417 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3418 /* state of the running CCL program */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3419 struct ccl_program ccl; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3420 }; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3421 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3422 static const struct memory_description |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3423 fixed_width_coding_system_description[] = { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3424 { XD_LISP_OBJECT, offsetof (struct fixed_width_coding_system, decode) }, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3425 { XD_LISP_OBJECT, offsetof (struct fixed_width_coding_system, encode) }, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3426 { XD_LISP_OBJECT, offsetof (struct fixed_width_coding_system, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3427 from_unicode) }, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3428 { XD_LISP_OBJECT, offsetof (struct fixed_width_coding_system, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3429 invalid_sequences_skip_chars) }, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3430 { XD_LISP_OBJECT, offsetof (struct fixed_width_coding_system, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3431 query_skip_chars) }, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3432 { XD_LISP_OBJECT, offsetof (struct fixed_width_coding_system, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3433 invalid_and_query_skip_chars) }, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3434 { XD_END } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3435 }; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3436 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3437 DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (fixed_width); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3438 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3439 static void |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3440 fixed_width_mark (Lisp_Object codesys) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3441 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3442 mark_object (XCODING_SYSTEM_FIXED_WIDTH_DECODE (codesys)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3443 mark_object (XCODING_SYSTEM_FIXED_WIDTH_ENCODE (codesys)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3444 mark_object (XCODING_SYSTEM_FIXED_WIDTH_FROM_UNICODE (codesys)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3445 mark_object |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3446 (XCODING_SYSTEM_FIXED_WIDTH_INVALID_SEQUENCES_SKIP_CHARS (codesys)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3447 mark_object (XCODING_SYSTEM_FIXED_WIDTH_QUERY_SKIP_CHARS (codesys) ); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3448 mark_object |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3449 (XCODING_SYSTEM_FIXED_WIDTH_INVALID_AND_QUERY_SKIP_CHARS(codesys)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3450 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3451 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3452 static Bytecount |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3453 fixed_width_convert (struct coding_stream *str, const UExtbyte *src, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3454 unsigned_char_dynarr *dst, Bytecount n) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3455 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3456 struct fixed_width_coding_stream *data = |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3457 CODING_STREAM_TYPE_DATA (str, fixed_width); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3458 Bytecount orign = n; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3459 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3460 data->ccl.last_block = str->eof; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3461 /* When applying a CCL program to a stream, SRC must not be NULL -- this |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3462 is a special signal to the driver that read and write operations are |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3463 not allowed. The code does not actually look at what SRC points to if |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3464 N == 0. */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3465 ccl_driver (&data->ccl, src ? src : (const unsigned char *) "", |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3466 dst, n, 0, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3467 str->direction == CODING_DECODE ? CCL_MODE_DECODING : |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3468 CCL_MODE_ENCODING); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3469 return orign; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3470 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3471 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3472 static void |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3473 fixed_width_init_coding_stream (struct coding_stream *str) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3474 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3475 struct fixed_width_coding_stream *data = |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3476 CODING_STREAM_TYPE_DATA (str, fixed_width); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3477 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3478 setup_ccl_program (&data->ccl, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3479 str->direction == CODING_DECODE ? |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3480 XCODING_SYSTEM_FIXED_WIDTH_DECODE (str->codesys) : |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3481 XCODING_SYSTEM_FIXED_WIDTH_ENCODE (str->codesys)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3482 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3483 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3484 static void |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3485 fixed_width_rewind_coding_stream (struct coding_stream *str) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3486 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3487 fixed_width_init_coding_stream (str); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3488 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3489 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3490 static void |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3491 fixed_width_init (Lisp_Object codesys) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3492 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3493 XCODING_SYSTEM_FIXED_WIDTH_DECODE (codesys) = Qnil; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3494 XCODING_SYSTEM_FIXED_WIDTH_ENCODE (codesys) = Qnil; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3495 XCODING_SYSTEM_FIXED_WIDTH_FROM_UNICODE (codesys) = Qnil; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3496 XCODING_SYSTEM_FIXED_WIDTH_INVALID_SEQUENCES_SKIP_CHARS (codesys) = Qnil; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3497 XCODING_SYSTEM_FIXED_WIDTH_QUERY_SKIP_CHARS (codesys) = Qnil; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3498 XCODING_SYSTEM_FIXED_WIDTH_INVALID_AND_QUERY_SKIP_CHARS(codesys) = Qnil; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3499 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3500 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3501 static int |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3502 fixed_width_putprop (Lisp_Object codesys, Lisp_Object key, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3503 Lisp_Object value) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3504 { |
4745
0c54de4c4b9d
Resolve the unregistered-CCL-programs-get-garbage-collected problem correctly
Aidan Kehoe <kehoea@parhasard.net>
parents:
4703
diff
changeset
|
3505 if (EQ (key, Qdecode)) |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3506 { |
4745
0c54de4c4b9d
Resolve the unregistered-CCL-programs-get-garbage-collected problem correctly
Aidan Kehoe <kehoea@parhasard.net>
parents:
4703
diff
changeset
|
3507 XCODING_SYSTEM_FIXED_WIDTH_DECODE (codesys) = get_ccl_program (value); |
0c54de4c4b9d
Resolve the unregistered-CCL-programs-get-garbage-collected problem correctly
Aidan Kehoe <kehoea@parhasard.net>
parents:
4703
diff
changeset
|
3508 } |
0c54de4c4b9d
Resolve the unregistered-CCL-programs-get-garbage-collected problem correctly
Aidan Kehoe <kehoea@parhasard.net>
parents:
4703
diff
changeset
|
3509 else if (EQ (key, Qencode)) |
0c54de4c4b9d
Resolve the unregistered-CCL-programs-get-garbage-collected problem correctly
Aidan Kehoe <kehoea@parhasard.net>
parents:
4703
diff
changeset
|
3510 { |
0c54de4c4b9d
Resolve the unregistered-CCL-programs-get-garbage-collected problem correctly
Aidan Kehoe <kehoea@parhasard.net>
parents:
4703
diff
changeset
|
3511 XCODING_SYSTEM_FIXED_WIDTH_ENCODE (codesys) = get_ccl_program (value); |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3512 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3513 else if (EQ (key, Qfrom_unicode)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3514 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3515 CHECK_HASH_TABLE (value); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3516 XCODING_SYSTEM_FIXED_WIDTH_FROM_UNICODE (codesys) = value; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3517 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3518 else if (EQ (key, Qinvalid_sequences_skip_chars)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3519 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3520 CHECK_STRING (value); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3521 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3522 /* Make sure Lisp can't make our data inconsistent: */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3523 value = Fcopy_sequence (value); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3524 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3525 XCODING_SYSTEM_FIXED_WIDTH_INVALID_SEQUENCES_SKIP_CHARS (codesys) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3526 = value; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3527 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3528 XCODING_SYSTEM_FIXED_WIDTH_INVALID_AND_QUERY_SKIP_CHARS (codesys) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3529 = concat2 (value, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3530 XCODING_SYSTEM_FIXED_WIDTH_QUERY_SKIP_CHARS (codesys)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3531 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3532 else if (EQ (key, Qquery_skip_chars)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3533 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3534 CHECK_STRING (value); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3535 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3536 /* Make sure Lisp can't make our data inconsistent: */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3537 value = Fcopy_sequence (value); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3538 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3539 XCODING_SYSTEM_FIXED_WIDTH_QUERY_SKIP_CHARS (codesys) = value; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3540 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3541 XCODING_SYSTEM_FIXED_WIDTH_INVALID_AND_QUERY_SKIP_CHARS (codesys) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3542 = concat2 (value, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3543 XCODING_SYSTEM_FIXED_WIDTH_INVALID_SEQUENCES_SKIP_CHARS |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3544 (codesys)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3545 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3546 else |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3547 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3548 return 0; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3549 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3550 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3551 return 1; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3552 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3553 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3554 static Lisp_Object |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3555 fixed_width_getprop (Lisp_Object codesys, Lisp_Object prop) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3556 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3557 if (EQ (prop, Qdecode)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3558 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3559 return XCODING_SYSTEM_FIXED_WIDTH_DECODE (codesys); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3560 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3561 else if (EQ (prop, Qencode)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3562 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3563 return XCODING_SYSTEM_FIXED_WIDTH_ENCODE (codesys); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3564 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3565 else if (EQ (prop, Qfrom_unicode)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3566 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3567 return XCODING_SYSTEM_FIXED_WIDTH_FROM_UNICODE (codesys); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3568 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3569 else if (EQ (prop, Qinvalid_sequences_skip_chars)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3570 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3571 /* Make sure Lisp can't make our data inconsistent: */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3572 return |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3573 Fcopy_sequence |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3574 (XCODING_SYSTEM_FIXED_WIDTH_INVALID_SEQUENCES_SKIP_CHARS (codesys)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3575 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3576 else if (EQ (prop, Qquery_skip_chars)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3577 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3578 return |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3579 Fcopy_sequence (XCODING_SYSTEM_FIXED_WIDTH_QUERY_SKIP_CHARS (codesys)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3580 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3581 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3582 return Qunbound; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3583 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3584 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3585 static Lisp_Object Vfixed_width_query_ranges_cache; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3586 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3587 static Lisp_Object |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3588 fixed_width_skip_chars_data_given_strings (Lisp_Object string, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3589 Lisp_Object query_skip_chars, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3590 Lisp_Object |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3591 invalid_sequences_skip_chars, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3592 Binbyte *fastmap, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3593 int fastmap_len) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3594 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3595 Lisp_Object result = Fgethash (string, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3596 Vfixed_width_query_ranges_cache, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3597 Qnil); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3598 REGISTER Ibyte *p, *pend; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3599 REGISTER Ichar c; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3600 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3601 memset (fastmap, query_coding_unencodable, fastmap_len); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3602 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3603 if (!NILP (result)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3604 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3605 int i; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3606 Lisp_Object ranged; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3607 assert (RANGE_TABLEP (result)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3608 for (i = 0; i < fastmap_len; ++i) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3609 { |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
3610 ranged = Fget_range_table (make_fixnum (i), result, Qnil); |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3611 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3612 if (EQ (ranged, Qsucceeded)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3613 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3614 fastmap [i] = query_coding_succeeded; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3615 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3616 else if (EQ (ranged, Qinvalid_sequence)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3617 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3618 fastmap [i] = query_coding_invalid_sequence; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3619 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3620 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3621 return result; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3622 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3623 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3624 result = Fmake_range_table (Qstart_closed_end_closed); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3625 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3626 p = XSTRING_DATA (query_skip_chars); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3627 pend = p + XSTRING_LENGTH (query_skip_chars); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3628 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3629 while (p != pend) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3630 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3631 c = itext_ichar (p); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3632 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3633 INC_IBYTEPTR (p); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3634 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3635 if (c == '\\') |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3636 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3637 if (p == pend) break; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3638 c = itext_ichar (p); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3639 INC_IBYTEPTR (p); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3640 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3641 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3642 if (p != pend && *p == '-') |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3643 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3644 Ichar cend; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3645 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3646 /* Skip over the dash. */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3647 p++; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3648 if (p == pend) break; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3649 cend = itext_ichar (p); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3650 |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
3651 Fput_range_table (make_fixnum (c), make_fixnum (cend), Qsucceeded, |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3652 result); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3653 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3654 while (c <= cend && c < fastmap_len) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3655 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3656 fastmap[c] = query_coding_succeeded; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3657 c++; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3658 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3659 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3660 INC_IBYTEPTR (p); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3661 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3662 else |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3663 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3664 if (c < fastmap_len) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3665 fastmap[c] = query_coding_succeeded; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3666 |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
3667 Fput_range_table (make_fixnum (c), make_fixnum (c), Qsucceeded, result); |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3668 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3669 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3670 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3671 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3672 p = XSTRING_DATA (invalid_sequences_skip_chars); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3673 pend = p + XSTRING_LENGTH (invalid_sequences_skip_chars); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3674 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3675 while (p != pend) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3676 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3677 c = itext_ichar (p); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3678 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3679 INC_IBYTEPTR (p); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3680 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3681 if (c == '\\') |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3682 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3683 if (p == pend) break; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3684 c = itext_ichar (p); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3685 INC_IBYTEPTR (p); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3686 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3687 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3688 if (p != pend && *p == '-') |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3689 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3690 Ichar cend; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3691 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3692 /* Skip over the dash. */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3693 p++; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3694 if (p == pend) break; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3695 cend = itext_ichar (p); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3696 |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
3697 Fput_range_table (make_fixnum (c), make_fixnum (cend), Qinvalid_sequence, |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3698 result); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3699 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3700 while (c <= cend && c < fastmap_len) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3701 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3702 fastmap[c] = query_coding_invalid_sequence; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3703 c++; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3704 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3705 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3706 INC_IBYTEPTR (p); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3707 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3708 else |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3709 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3710 if (c < fastmap_len) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3711 fastmap[c] = query_coding_invalid_sequence; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3712 |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
3713 Fput_range_table (make_fixnum (c), make_fixnum (c), Qinvalid_sequence, |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3714 result); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3715 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3716 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3717 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3718 Fputhash (string, result, Vfixed_width_query_ranges_cache); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3719 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3720 return result; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3721 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3722 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3723 static Lisp_Object |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3724 fixed_width_query (Lisp_Object codesys, struct buffer *buf, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3725 Charbpos end, int flags) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3726 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3727 Charbpos pos = BUF_PT (buf), fail_range_start, fail_range_end; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3728 Charbpos pos_byte = BYTE_BUF_PT (buf); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3729 Lisp_Object skip_chars_range_table, from_unicode, checked_unicode, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3730 result = Qnil; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3731 enum query_coding_failure_reasons failed_reason, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3732 previous_failed_reason = query_coding_succeeded; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3733 Binbyte fastmap[0xff]; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3734 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3735 from_unicode = XCODING_SYSTEM_FIXED_WIDTH_FROM_UNICODE (codesys); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3736 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3737 skip_chars_range_table = |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3738 fixed_width_skip_chars_data_given_strings |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3739 ((flags & QUERY_METHOD_IGNORE_INVALID_SEQUENCES ? |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3740 XCODING_SYSTEM_FIXED_WIDTH_INVALID_AND_QUERY_SKIP_CHARS |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3741 (codesys) : |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3742 XCODING_SYSTEM_FIXED_WIDTH_QUERY_SKIP_CHARS(codesys)), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3743 XCODING_SYSTEM_FIXED_WIDTH_QUERY_SKIP_CHARS(codesys), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3744 (flags & QUERY_METHOD_IGNORE_INVALID_SEQUENCES ? |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4745
diff
changeset
|
3745 build_ascstring("") : |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3746 XCODING_SYSTEM_FIXED_WIDTH_INVALID_SEQUENCES_SKIP_CHARS (codesys)), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3747 fastmap, (int)(sizeof (fastmap))); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3748 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3749 if (flags & QUERY_METHOD_HIGHLIGHT && |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3750 /* If we're being called really early, live without highlights getting |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3751 cleared properly: */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3752 !(UNBOUNDP (XSYMBOL (Qquery_coding_clear_highlights)->function))) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3753 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3754 /* It's okay to call Lisp here, the only non-stack object we may have |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3755 allocated up to this point is skip_chars_range_table, and that's |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3756 reachable from its entry in Vfixed_width_query_ranges_cache. */ |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
3757 call3 (Qquery_coding_clear_highlights, make_fixnum (pos), make_fixnum (end), |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3758 wrap_buffer (buf)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3759 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3760 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3761 while (pos < end) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3762 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3763 Ichar ch = BYTE_BUF_FETCH_CHAR (buf, pos_byte); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3764 if ((ch < (int) (sizeof(fastmap))) ? |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3765 (fastmap[ch] == query_coding_succeeded) : |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
3766 (EQ (Qsucceeded, Fget_range_table (make_fixnum (ch), |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3767 skip_chars_range_table, Qnil)))) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3768 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3769 pos++; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3770 INC_BYTEBPOS (buf, pos_byte); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3771 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3772 else |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3773 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3774 fail_range_start = pos; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3775 while ((pos < end) && |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3776 ((!(flags & QUERY_METHOD_IGNORE_INVALID_SEQUENCES) && |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3777 EQ (Qinvalid_sequence, Fget_range_table |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
3778 (make_fixnum (ch), skip_chars_range_table, Qnil)) |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3779 && (failed_reason = query_coding_invalid_sequence)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3780 || ((NILP ((checked_unicode = |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3781 Fgethash (Fchar_to_unicode (make_char (ch)), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3782 from_unicode, Qnil)))) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3783 && (failed_reason = query_coding_unencodable))) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3784 && (previous_failed_reason == query_coding_succeeded |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3785 || previous_failed_reason == failed_reason)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3786 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3787 pos++; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3788 INC_BYTEBPOS (buf, pos_byte); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3789 ch = BYTE_BUF_FETCH_CHAR (buf, pos_byte); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3790 previous_failed_reason = failed_reason; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3791 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3792 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3793 if (fail_range_start == pos) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3794 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3795 /* The character can actually be encoded; move on. */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3796 pos++; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3797 INC_BYTEBPOS (buf, pos_byte); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3798 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3799 else |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3800 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3801 assert (previous_failed_reason == query_coding_invalid_sequence |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3802 || previous_failed_reason == query_coding_unencodable); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3803 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3804 if (flags & QUERY_METHOD_ERRORP) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3805 { |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4745
diff
changeset
|
3806 signal_error_2 |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4745
diff
changeset
|
3807 (Qtext_conversion_error, |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4745
diff
changeset
|
3808 "Cannot encode using coding system", |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4745
diff
changeset
|
3809 make_string_from_buffer (buf, fail_range_start, |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4745
diff
changeset
|
3810 pos - fail_range_start), |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4745
diff
changeset
|
3811 XCODING_SYSTEM_NAME (codesys)); |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3812 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3813 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3814 if (NILP (result)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3815 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3816 result = Fmake_range_table (Qstart_closed_end_open); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3817 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3818 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3819 fail_range_end = pos; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3820 |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
3821 Fput_range_table (make_fixnum (fail_range_start), |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
3822 make_fixnum (fail_range_end), |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3823 (previous_failed_reason |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3824 == query_coding_unencodable ? |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3825 Qunencodable : Qinvalid_sequence), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3826 result); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3827 previous_failed_reason = query_coding_succeeded; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3828 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3829 if (flags & QUERY_METHOD_HIGHLIGHT) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3830 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3831 Lisp_Object extent |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
3832 = Fmake_extent (make_fixnum (fail_range_start), |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
3833 make_fixnum (fail_range_end), |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3834 wrap_buffer (buf)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3835 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3836 Fset_extent_priority |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
3837 (extent, make_fixnum (2 + mouse_highlight_priority)); |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3838 Fset_extent_face (extent, Qquery_coding_warning_face); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3839 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3840 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3841 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3842 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3843 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3844 return result; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3845 } |
771 | 3846 |
3847 | |
3848 /************************************************************************/ | |
3849 /* Initialization */ | |
3850 /************************************************************************/ | |
3851 | |
3852 void | |
3853 syms_of_mule_coding (void) | |
3854 { | |
3855 DEFSUBR (Fdecode_shift_jis_char); | |
3856 DEFSUBR (Fencode_shift_jis_char); | |
3857 DEFSUBR (Fdecode_big5_char); | |
3858 DEFSUBR (Fencode_big5_char); | |
3859 | |
3860 DEFSYMBOL (Qbig5); | |
3861 DEFSYMBOL (Qshift_jis); | |
3862 DEFSYMBOL (Qccl); | |
3863 | |
3864 DEFSYMBOL (Qcharset_g0); | |
3865 DEFSYMBOL (Qcharset_g1); | |
3866 DEFSYMBOL (Qcharset_g2); | |
3867 DEFSYMBOL (Qcharset_g3); | |
3868 DEFSYMBOL (Qforce_g0_on_output); | |
3869 DEFSYMBOL (Qforce_g1_on_output); | |
3870 DEFSYMBOL (Qforce_g2_on_output); | |
3871 DEFSYMBOL (Qforce_g3_on_output); | |
3872 DEFSYMBOL (Qno_iso6429); | |
3873 DEFSYMBOL (Qinput_charset_conversion); | |
3874 DEFSYMBOL (Qoutput_charset_conversion); | |
3875 | |
3876 DEFSYMBOL (Qshort); | |
3877 DEFSYMBOL (Qno_ascii_eol); | |
3878 DEFSYMBOL (Qno_ascii_cntl); | |
3879 DEFSYMBOL (Qlock_shift); | |
3880 | |
3881 DEFSYMBOL (Qiso_7); | |
3882 DEFSYMBOL (Qiso_8_designate); | |
3883 DEFSYMBOL (Qiso_8_1); | |
3884 DEFSYMBOL (Qiso_8_2); | |
3885 DEFSYMBOL (Qiso_lock_shift); | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3886 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3887 DEFSYMBOL (Qinvalid_sequences_skip_chars); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3888 DEFSYMBOL (Qquery_skip_chars); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3889 DEFSYMBOL (Qfixed_width); |
771 | 3890 } |
3891 | |
3892 void | |
3893 coding_system_type_create_mule_coding (void) | |
3894 { | |
3895 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (iso2022, "iso2022-coding-system-p"); | |
3896 CODING_SYSTEM_HAS_METHOD (iso2022, mark); | |
3897 CODING_SYSTEM_HAS_METHOD (iso2022, convert); | |
3898 CODING_SYSTEM_HAS_METHOD (iso2022, finalize_coding_stream); | |
3899 CODING_SYSTEM_HAS_METHOD (iso2022, init_coding_stream); | |
3900 CODING_SYSTEM_HAS_METHOD (iso2022, rewind_coding_stream); | |
3901 CODING_SYSTEM_HAS_METHOD (iso2022, init); | |
3902 CODING_SYSTEM_HAS_METHOD (iso2022, print); | |
3903 CODING_SYSTEM_HAS_METHOD (iso2022, finalize); | |
3904 CODING_SYSTEM_HAS_METHOD (iso2022, putprop); | |
3905 CODING_SYSTEM_HAS_METHOD (iso2022, getprop); | |
3906 | |
3907 INITIALIZE_DETECTOR (iso2022); | |
3908 DETECTOR_HAS_METHOD (iso2022, detect); | |
3909 DETECTOR_HAS_METHOD (iso2022, finalize_detection_state); | |
3910 INITIALIZE_DETECTOR_CATEGORY (iso2022, iso_7); | |
3911 INITIALIZE_DETECTOR_CATEGORY (iso2022, iso_8_designate); | |
3912 INITIALIZE_DETECTOR_CATEGORY (iso2022, iso_8_1); | |
3913 INITIALIZE_DETECTOR_CATEGORY (iso2022, iso_8_2); | |
3914 INITIALIZE_DETECTOR_CATEGORY (iso2022, iso_lock_shift); | |
3915 | |
3916 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (ccl, "ccl-coding-system-p"); | |
3917 CODING_SYSTEM_HAS_METHOD (ccl, mark); | |
3918 CODING_SYSTEM_HAS_METHOD (ccl, convert); | |
3919 CODING_SYSTEM_HAS_METHOD (ccl, init); | |
3920 CODING_SYSTEM_HAS_METHOD (ccl, init_coding_stream); | |
3921 CODING_SYSTEM_HAS_METHOD (ccl, rewind_coding_stream); | |
3922 CODING_SYSTEM_HAS_METHOD (ccl, putprop); | |
3923 CODING_SYSTEM_HAS_METHOD (ccl, getprop); | |
3924 | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3925 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (fixed_width, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3926 "fixed-width-coding-system-p"); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3927 CODING_SYSTEM_HAS_METHOD (fixed_width, mark); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3928 CODING_SYSTEM_HAS_METHOD (fixed_width, convert); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3929 CODING_SYSTEM_HAS_METHOD (fixed_width, query); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3930 CODING_SYSTEM_HAS_METHOD (fixed_width, init); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3931 CODING_SYSTEM_HAS_METHOD (fixed_width, init_coding_stream); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3932 CODING_SYSTEM_HAS_METHOD (fixed_width, rewind_coding_stream); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3933 CODING_SYSTEM_HAS_METHOD (fixed_width, putprop); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3934 CODING_SYSTEM_HAS_METHOD (fixed_width, getprop); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3935 |
771 | 3936 INITIALIZE_CODING_SYSTEM_TYPE (shift_jis, "shift-jis-coding-system-p"); |
3937 CODING_SYSTEM_HAS_METHOD (shift_jis, convert); | |
3938 | |
3939 INITIALIZE_DETECTOR (shift_jis); | |
3940 DETECTOR_HAS_METHOD (shift_jis, detect); | |
3941 INITIALIZE_DETECTOR_CATEGORY (shift_jis, shift_jis); | |
3942 | |
3943 INITIALIZE_CODING_SYSTEM_TYPE (big5, "big5-coding-system-p"); | |
3944 CODING_SYSTEM_HAS_METHOD (big5, convert); | |
3945 | |
3946 INITIALIZE_DETECTOR (big5); | |
3947 DETECTOR_HAS_METHOD (big5, detect); | |
3948 INITIALIZE_DETECTOR_CATEGORY (big5, big5); | |
3949 } | |
3950 | |
3951 void | |
3952 reinit_coding_system_type_create_mule_coding (void) | |
3953 { | |
3954 REINITIALIZE_CODING_SYSTEM_TYPE (iso2022); | |
3955 REINITIALIZE_CODING_SYSTEM_TYPE (ccl); | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3956 REINITIALIZE_CODING_SYSTEM_TYPE (fixed_width); |
771 | 3957 REINITIALIZE_CODING_SYSTEM_TYPE (shift_jis); |
3958 REINITIALIZE_CODING_SYSTEM_TYPE (big5); | |
3959 } | |
3960 | |
3961 void | |
3962 reinit_vars_of_mule_coding (void) | |
3963 { | |
3964 } | |
3965 | |
3966 void | |
3967 vars_of_mule_coding (void) | |
3968 { | |
5191
71ee43b8a74d
Add #'equalp as a hash test by default; add #'define-hash-table-test, GNU API
Aidan Kehoe <kehoea@parhasard.net>
parents:
5169
diff
changeset
|
3969 /* This needs to be Qeq, there's a corner case where |
71ee43b8a74d
Add #'equalp as a hash test by default; add #'define-hash-table-test, GNU API
Aidan Kehoe <kehoea@parhasard.net>
parents:
5169
diff
changeset
|
3970 Qequal won't work. */ |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3971 Vfixed_width_query_ranges_cache |
5191
71ee43b8a74d
Add #'equalp as a hash test by default; add #'define-hash-table-test, GNU API
Aidan Kehoe <kehoea@parhasard.net>
parents:
5169
diff
changeset
|
3972 = make_lisp_hash_table (32, HASH_TABLE_KEY_WEAK, Qeq); |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3973 staticpro (&Vfixed_width_query_ranges_cache); |
771 | 3974 } |