Mercurial > hg > xemacs-beta
annotate src/mule-coding.c @ 5602:c9e5612f5424
Support the MP library on recent FreeBSD, have it pass relevant tests.
src/ChangeLog addition:
2011-11-26 Aidan Kehoe <kehoea@parhasard.net>
* number-mp.c (bignum_to_string):
Don't overwrite the accumulator we've just set up for this
function.
* number-mp.c (BIGNUM_TO_TYPE):
mp_itom() doesn't necessarily do what this code used to think with
negative numbers, it can treat them as unsigned ints. Subtract
numbers from bignum_zero instead of multiplying them by -1 to
convert them to their negative equivalents.
* number-mp.c (bignum_to_int):
* number-mp.c (bignum_to_uint):
* number-mp.c (bignum_to_long):
* number-mp.c (bignum_to_ulong):
* number-mp.c (bignum_to_double):
Use the changed BIGNUM_TO_TYPE() in these functions.
* number-mp.c (bignum_ceil):
* number-mp.c (bignum_floor):
In these functions, be more careful about rounding to positive and
negative infinity, respectively. Don't use the sign of QUOTIENT
when working out out whether to add or subtract one, rather use
the sign QUOTIENT would have if arbitrary-precision division were
done.
* number-mp.h:
* number-mp.h (MP_GCD):
Wrap #include <mp.h> in BEGIN_C_DECLS/END_C_DECLS.
* number.c (Fbigfloat_get_precision):
* number.c (Fbigfloat_set_precision):
Don't attempt to call XBIGFLOAT_GET_PREC if this build doesn't
support big floats.
author | Aidan Kehoe <kehoea@parhasard.net> |
---|---|
date | Sat, 26 Nov 2011 17:59:14 +0000 |
parents | 56144c8593a8 |
children |
rev | line source |
---|---|
771 | 1 /* Conversion functions for I18N encodings, but not Unicode (in separate file). |
2 Copyright (C) 1991, 1995 Free Software Foundation, Inc. | |
3 Copyright (C) 1995 Sun Microsystems, Inc. | |
5100
3d91f0b64469
fix bad bug with escape-quoted handling
Ben Wing <ben@xemacs.org>
parents:
4976
diff
changeset
|
4 Copyright (C) 2000, 2001, 2002, 2010 Ben Wing. |
771 | 5 |
6 This file is part of XEmacs. | |
7 | |
5402
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
5191
diff
changeset
|
8 XEmacs is free software: you can redistribute it and/or modify it |
771 | 9 under the terms of the GNU General Public License as published by the |
5402
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
5191
diff
changeset
|
10 Free Software Foundation, either version 3 of the License, or (at your |
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
5191
diff
changeset
|
11 option) any later version. |
771 | 12 |
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT | |
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 for more details. | |
17 | |
18 You should have received a copy of the GNU General Public License | |
5402
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
5191
diff
changeset
|
19 along with XEmacs. If not, see <http://www.gnu.org/licenses/>. */ |
771 | 20 |
21 /* Synched up with: Mule 2.3. Not in FSF. */ | |
22 | |
23 /* For previous history, see file-coding.c. | |
24 | |
25 September 10, 2001: Extracted from file-coding.c by Ben Wing. | |
26 | |
27 Later in September: Finished abstraction of detection system, rewrote | |
28 all the detectors to include multiple levels of likelihood. | |
29 */ | |
30 | |
31 #include <config.h> | |
32 #include "lisp.h" | |
33 | |
34 #include "charset.h" | |
35 #include "mule-ccl.h" | |
36 #include "file-coding.h" | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
37 #include "elhash.h" |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
38 #include "rangetab.h" |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
39 #include "buffer.h" |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
40 #include "extents.h" |
771 | 41 |
5565
48a3d3281b48
Pass eighth bit on TTY consoles to coding system if needed.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5538
diff
changeset
|
42 Lisp_Object Qshift_jis, Qbig5, Qccl; |
771 | 43 |
44 Lisp_Object Qcharset_g0, Qcharset_g1, Qcharset_g2, Qcharset_g3; | |
45 Lisp_Object Qforce_g0_on_output, Qforce_g1_on_output; | |
46 Lisp_Object Qforce_g2_on_output, Qforce_g3_on_output; | |
47 Lisp_Object Qno_iso6429; | |
48 Lisp_Object Qinput_charset_conversion, Qoutput_charset_conversion; | |
5565
48a3d3281b48
Pass eighth bit on TTY consoles to coding system if needed.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5538
diff
changeset
|
49 Lisp_Object Qshort, Qno_ascii_eol, Qno_ascii_cntl, Qlock_shift; |
771 | 50 |
51 Lisp_Object Qiso_7, Qiso_8_designate, Qiso_8_1, Qiso_8_2, Qiso_lock_shift; | |
52 | |
4691
3ba90c659d01
Move Qfrom_unicode to general-slots.h, fix the native Win32 build.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4690
diff
changeset
|
53 Lisp_Object Qquery_skip_chars, Qinvalid_sequences_skip_chars; |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
54 Lisp_Object Qfixed_width; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
55 |
771 | 56 |
57 /************************************************************************/ | |
58 /* Shift-JIS methods */ | |
59 /************************************************************************/ | |
60 | |
61 /* Shift-JIS; Hankaku (half-width) KANA is also supported. */ | |
62 DEFINE_CODING_SYSTEM_TYPE (shift_jis); | |
63 | |
64 /* Shift-JIS is a coding system encoding three character sets: ASCII, right | |
65 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded | |
66 as is. A character of JISX0201-Kana (DIMENSION1_CHARS94 character set) is | |
67 encoded by "position-code + 0x80". A character of JISX0208 | |
68 (DIMENSION2_CHARS94 character set) is encoded in 2-byte but two | |
69 position-codes are divided and shifted so that it fit in the range | |
70 below. | |
71 | |
72 --- CODE RANGE of Shift-JIS --- | |
73 (character set) (range) | |
74 ASCII 0x00 .. 0x7F | |
75 JISX0201-Kana 0xA0 .. 0xDF | |
76 JISX0208 (1st byte) 0x80 .. 0x9F and 0xE0 .. 0xEF | |
77 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC | |
78 ------------------------------- | |
79 | |
80 */ | |
81 | |
82 /* Is this the first byte of a Shift-JIS two-byte char? */ | |
83 | |
826 | 84 inline static int |
85 byte_shift_jis_two_byte_1_p (int c) | |
86 { | |
87 return (c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF); | |
88 } | |
771 | 89 |
90 /* Is this the second byte of a Shift-JIS two-byte char? */ | |
91 | |
826 | 92 inline static int |
93 byte_shift_jis_two_byte_2_p (int c) | |
94 { | |
95 return (c >= 0x40 && c <= 0x7E) || (c >= 0x80 && c <= 0xFC); | |
96 } | |
97 | |
98 inline static int | |
99 byte_shift_jis_katakana_p (int c) | |
100 { | |
101 return c >= 0xA1 && c <= 0xDF; | |
102 } | |
771 | 103 |
3439 | 104 inline static void |
105 dynarr_add_2022_one_dimension (Lisp_Object charset, Ibyte c, | |
106 unsigned char charmask, | |
107 unsigned_char_dynarr *dst) | |
108 { | |
109 if (XCHARSET_ENCODE_AS_UTF_8 (charset)) | |
110 { | |
111 encode_unicode_char (charset, c & charmask, 0, | |
4096 | 112 dst, UNICODE_UTF_8, 0, 0); |
3439 | 113 } |
114 else | |
115 { | |
116 Dynarr_add (dst, c & charmask); | |
117 } | |
118 } | |
119 | |
120 inline static void | |
121 dynarr_add_2022_two_dimensions (Lisp_Object charset, Ibyte c, | |
122 unsigned int ch, | |
123 unsigned char charmask, | |
124 unsigned_char_dynarr *dst) | |
125 { | |
126 if (XCHARSET_ENCODE_AS_UTF_8 (charset)) | |
127 { | |
128 encode_unicode_char (charset, | |
129 ch & charmask, | |
130 c & charmask, dst, | |
4096 | 131 UNICODE_UTF_8, 0, 0); |
3439 | 132 } |
133 else | |
134 { | |
135 Dynarr_add (dst, ch & charmask); | |
136 Dynarr_add (dst, c & charmask); | |
137 } | |
138 } | |
139 | |
771 | 140 /* Convert Shift-JIS data to internal format. */ |
141 | |
142 static Bytecount | |
143 shift_jis_convert (struct coding_stream *str, const UExtbyte *src, | |
144 unsigned_char_dynarr *dst, Bytecount n) | |
145 { | |
146 unsigned int ch = str->ch; | |
147 Bytecount orign = n; | |
148 | |
149 if (str->direction == CODING_DECODE) | |
150 { | |
151 while (n--) | |
152 { | |
153 UExtbyte c = *src++; | |
154 | |
155 if (ch) | |
156 { | |
157 /* Previous character was first byte of Shift-JIS Kanji char. */ | |
826 | 158 if (byte_shift_jis_two_byte_2_p (c)) |
771 | 159 { |
867 | 160 Ibyte e1, e2; |
771 | 161 |
162 Dynarr_add (dst, LEADING_BYTE_JAPANESE_JISX0208); | |
163 DECODE_SHIFT_JIS (ch, c, e1, e2); | |
164 Dynarr_add (dst, e1); | |
165 Dynarr_add (dst, e2); | |
166 } | |
167 else | |
168 { | |
169 DECODE_ADD_BINARY_CHAR (ch, dst); | |
170 DECODE_ADD_BINARY_CHAR (c, dst); | |
171 } | |
172 ch = 0; | |
173 } | |
174 else | |
175 { | |
826 | 176 if (byte_shift_jis_two_byte_1_p (c)) |
771 | 177 ch = c; |
826 | 178 else if (byte_shift_jis_katakana_p (c)) |
771 | 179 { |
180 Dynarr_add (dst, LEADING_BYTE_KATAKANA_JISX0201); | |
181 Dynarr_add (dst, c); | |
182 } | |
183 else | |
184 DECODE_ADD_BINARY_CHAR (c, dst); | |
185 } | |
186 } | |
187 | |
188 if (str->eof) | |
189 DECODE_OUTPUT_PARTIAL_CHAR (ch, dst); | |
190 } | |
191 else | |
192 { | |
193 while (n--) | |
194 { | |
867 | 195 Ibyte c = *src++; |
826 | 196 if (byte_ascii_p (c)) |
771 | 197 { |
198 Dynarr_add (dst, c); | |
199 ch = 0; | |
200 } | |
867 | 201 else if (ibyte_leading_byte_p (c)) |
771 | 202 ch = (c == LEADING_BYTE_KATAKANA_JISX0201 || |
203 c == LEADING_BYTE_JAPANESE_JISX0208_1978 || | |
204 c == LEADING_BYTE_JAPANESE_JISX0208) ? c : 0; | |
205 else if (ch) | |
206 { | |
207 if (ch == LEADING_BYTE_KATAKANA_JISX0201) | |
208 { | |
209 Dynarr_add (dst, c); | |
210 ch = 0; | |
211 } | |
212 else if (ch == LEADING_BYTE_JAPANESE_JISX0208_1978 || | |
213 ch == LEADING_BYTE_JAPANESE_JISX0208) | |
214 ch = c; | |
215 else | |
216 { | |
5538
580ef98f2beb
Suppress a "shadowed global" warning.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
5402
diff
changeset
|
217 UExtbyte b1, b2; |
580ef98f2beb
Suppress a "shadowed global" warning.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
5402
diff
changeset
|
218 ENCODE_SHIFT_JIS (ch, c, b1, b2); |
580ef98f2beb
Suppress a "shadowed global" warning.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
5402
diff
changeset
|
219 Dynarr_add (dst, b1); |
580ef98f2beb
Suppress a "shadowed global" warning.
Stephen J. Turnbull <stephen@xemacs.org>
parents:
5402
diff
changeset
|
220 Dynarr_add (dst, b2); |
771 | 221 ch = 0; |
222 } | |
223 } | |
224 } | |
225 } | |
226 | |
227 str->ch = ch; | |
228 | |
229 return orign; | |
230 } | |
231 | |
232 DEFUN ("decode-shift-jis-char", Fdecode_shift_jis_char, 1, 1, 0, /* | |
233 Decode a JISX0208 character of Shift-JIS coding-system. | |
234 CODE is the character code in Shift-JIS as a cons of type bytes. | |
235 Return the corresponding character. | |
236 */ | |
237 (code)) | |
238 { | |
239 int c1, c2, s1, s2; | |
240 | |
241 CHECK_CONS (code); | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
242 CHECK_FIXNUM (XCAR (code)); |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
243 CHECK_FIXNUM (XCDR (code)); |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
244 s1 = XFIXNUM (XCAR (code)); |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
245 s2 = XFIXNUM (XCDR (code)); |
826 | 246 if (byte_shift_jis_two_byte_1_p (s1) && |
247 byte_shift_jis_two_byte_2_p (s2)) | |
771 | 248 { |
249 DECODE_SHIFT_JIS (s1, s2, c1, c2); | |
867 | 250 return make_char (make_ichar (Vcharset_japanese_jisx0208, |
831 | 251 c1 & 0x7F, c2 & 0x7F)); |
771 | 252 } |
253 else | |
254 return Qnil; | |
255 } | |
256 | |
257 DEFUN ("encode-shift-jis-char", Fencode_shift_jis_char, 1, 1, 0, /* | |
258 Encode a JISX0208 character CHARACTER to SHIFT-JIS coding-system. | |
259 Return the corresponding character code in SHIFT-JIS as a cons of two bytes. | |
260 */ | |
261 (character)) | |
262 { | |
263 Lisp_Object charset; | |
264 int c1, c2, s1, s2; | |
265 | |
266 CHECK_CHAR_COERCE_INT (character); | |
867 | 267 BREAKUP_ICHAR (XCHAR (character), charset, c1, c2); |
771 | 268 if (EQ (charset, Vcharset_japanese_jisx0208)) |
269 { | |
270 ENCODE_SHIFT_JIS (c1 | 0x80, c2 | 0x80, s1, s2); | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
271 return Fcons (make_fixnum (s1), make_fixnum (s2)); |
771 | 272 } |
273 else | |
274 return Qnil; | |
275 } | |
276 | |
277 | |
278 /************************************************************************/ | |
279 /* Shift-JIS detector */ | |
280 /************************************************************************/ | |
281 | |
282 DEFINE_DETECTOR (shift_jis); | |
283 DEFINE_DETECTOR_CATEGORY (shift_jis, shift_jis); | |
284 | |
285 struct shift_jis_detector | |
286 { | |
287 int seen_jisx0208_char_in_c1; | |
288 int seen_jisx0208_char_in_upper; | |
289 int seen_jisx0201_char; | |
290 unsigned int seen_iso2022_esc:1; | |
291 unsigned int seen_bad_first_byte:1; | |
292 unsigned int seen_bad_second_byte:1; | |
293 /* temporary */ | |
294 unsigned int in_second_byte:1; | |
295 unsigned int first_byte_was_c1:1; | |
296 }; | |
297 | |
298 static void | |
299 shift_jis_detect (struct detection_state *st, const UExtbyte *src, | |
300 Bytecount n) | |
301 { | |
302 struct shift_jis_detector *data = DETECTION_STATE_DATA (st, shift_jis); | |
303 | |
304 while (n--) | |
305 { | |
306 UExtbyte c = *src++; | |
307 if (!data->in_second_byte) | |
308 { | |
309 if (c >= 0x80 && c <= 0x9F) | |
310 data->first_byte_was_c1 = 1; | |
311 if (c >= 0xA0 && c <= 0xDF) | |
312 data->seen_jisx0201_char++; | |
313 else if ((c >= 0x80 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF)) | |
314 data->in_second_byte = 1; | |
315 else if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) | |
316 data->seen_iso2022_esc = 1; | |
317 else if (c >= 0x80) | |
318 data->seen_bad_first_byte = 1; | |
319 } | |
320 else | |
321 { | |
322 if ((c >= 0x40 && c <= 0x7E) || (c >= 0x80 && c <= 0xFC)) | |
323 { | |
324 if (data->first_byte_was_c1 || (c >= 0x80 && c <= 0x9F)) | |
325 data->seen_jisx0208_char_in_c1++; | |
326 else | |
327 data->seen_jisx0208_char_in_upper++; | |
328 } | |
329 else | |
330 data->seen_bad_second_byte = 1; | |
331 data->in_second_byte = 0; | |
332 data->first_byte_was_c1 = 0; | |
333 } | |
334 } | |
335 | |
336 if (data->seen_bad_second_byte) | |
337 DET_RESULT (st, shift_jis) = DET_NEARLY_IMPOSSIBLE; | |
338 else if (data->seen_bad_first_byte) | |
339 DET_RESULT (st, shift_jis) = DET_QUITE_IMPROBABLE; | |
340 else if (data->seen_iso2022_esc) | |
341 DET_RESULT (st, shift_jis) = DET_SOMEWHAT_UNLIKELY; | |
342 else if (data->seen_jisx0208_char_in_c1 >= 20 || | |
343 (data->seen_jisx0208_char_in_c1 >= 10 && | |
344 data->seen_jisx0208_char_in_upper >= 10)) | |
345 DET_RESULT (st, shift_jis) = DET_QUITE_PROBABLE; | |
346 else if (data->seen_jisx0208_char_in_c1 > 3 || | |
347 data->seen_jisx0208_char_in_upper >= 10 || | |
348 /* Since the range is limited compared to what is often seen | |
349 is typical Latin-X charsets, the fact that we've seen a | |
350 bunch of them and none that are invalid is reasonably | |
351 strong statistical evidence of this encoding, or at least | |
352 not of the common Latin-X ones. */ | |
353 data->seen_jisx0201_char >= 100) | |
354 DET_RESULT (st, shift_jis) = DET_SOMEWHAT_LIKELY; | |
355 else if (data->seen_jisx0208_char_in_c1 > 0 || | |
356 data->seen_jisx0208_char_in_upper > 0 || | |
357 data->seen_jisx0201_char > 0) | |
358 DET_RESULT (st, shift_jis) = DET_SLIGHTLY_LIKELY; | |
359 else | |
360 DET_RESULT (st, shift_jis) = DET_AS_LIKELY_AS_UNLIKELY; | |
361 } | |
362 | |
363 | |
364 /************************************************************************/ | |
365 /* Big5 methods */ | |
366 /************************************************************************/ | |
367 | |
2819 | 368 /* BIG5 (used for Mandarin in Taiwan). */ |
771 | 369 DEFINE_CODING_SYSTEM_TYPE (big5); |
370 | |
371 /* BIG5 is a coding system encoding two character sets: ASCII and | |
372 Big5. An ASCII character is encoded as is. Big5 is a two-byte | |
373 character set and is encoded in two-byte. | |
374 | |
375 --- CODE RANGE of BIG5 --- | |
376 (character set) (range) | |
377 ASCII 0x00 .. 0x7F | |
378 Big5 (1st byte) 0xA1 .. 0xFE | |
379 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE | |
380 -------------------------- | |
381 | |
382 Since the number of characters in Big5 is larger than maximum | |
383 characters in Emacs' charset (96x96), it can't be handled as one | |
384 charset. So, in XEmacs, Big5 is divided into two: `charset-big5-1' | |
385 and `charset-big5-2'. Both <type>s are DIMENSION2_CHARS94. The former | |
386 contains frequently used characters and the latter contains less | |
387 frequently used characters. */ | |
388 | |
826 | 389 inline static int |
390 byte_big5_two_byte_1_p (int c) | |
391 { | |
392 return c >= 0xA1 && c <= 0xFE; | |
393 } | |
771 | 394 |
395 /* Is this the second byte of a Shift-JIS two-byte char? */ | |
396 | |
826 | 397 inline static int |
398 byte_big5_two_byte_2_p (int c) | |
399 { | |
400 return (c >= 0x40 && c <= 0x7E) || (c >= 0xA1 && c <= 0xFE); | |
401 } | |
771 | 402 |
403 /* Number of Big5 characters which have the same code in 1st byte. */ | |
404 | |
405 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40) | |
406 | |
407 /* Code conversion macros. These are macros because they are used in | |
408 inner loops during code conversion. | |
409 | |
410 Note that temporary variables in macros introduce the classic | |
411 dynamic-scoping problems with variable names. We use capital- | |
412 lettered variables in the assumption that XEmacs does not use | |
413 capital letters in variables except in a very formalized way | |
414 (e.g. Qstring). */ | |
415 | |
416 /* Convert Big5 code (b1, b2) into its internal string representation | |
417 (lb, c1, c2). */ | |
418 | |
419 /* There is a much simpler way to split the Big5 charset into two. | |
420 For the moment I'm going to leave the algorithm as-is because it | |
421 claims to separate out the most-used characters into a single | |
422 charset, which perhaps will lead to optimizations in various | |
423 places. | |
424 | |
425 The way the algorithm works is something like this: | |
426 | |
427 Big5 can be viewed as a 94x157 charset, where the row is | |
428 encoded into the bytes 0xA1 .. 0xFE and the column is encoded | |
429 into the bytes 0x40 .. 0x7E and 0xA1 .. 0xFE. As for frequency, | |
430 the split between low and high column numbers is apparently | |
431 meaningless; ascending rows produce less and less frequent chars. | |
432 Therefore, we assign the lower half of rows (0xA1 .. 0xC8) to | |
433 the first charset, and the upper half (0xC9 .. 0xFE) to the | |
434 second. To do the conversion, we convert the character into | |
435 a single number where 0 .. 156 is the first row, 157 .. 313 | |
436 is the second, etc. That way, the characters are ordered by | |
437 decreasing frequency. Then we just chop the space in two | |
438 and coerce the result into a 94x94 space. | |
439 */ | |
440 | |
441 #define DECODE_BIG5(b1, b2, lb, c1, c2) do \ | |
442 { \ | |
443 int B1 = b1, B2 = b2; \ | |
444 int I \ | |
445 = (B1 - 0xA1) * BIG5_SAME_ROW + B2 - (B2 < 0x7F ? 0x40 : 0x62); \ | |
446 \ | |
447 if (B1 < 0xC9) \ | |
448 { \ | |
449 lb = LEADING_BYTE_CHINESE_BIG5_1; \ | |
450 } \ | |
451 else \ | |
452 { \ | |
453 lb = LEADING_BYTE_CHINESE_BIG5_2; \ | |
454 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1); \ | |
455 } \ | |
456 c1 = I / (0xFF - 0xA1) + 0xA1; \ | |
457 c2 = I % (0xFF - 0xA1) + 0xA1; \ | |
458 } while (0) | |
459 | |
460 /* Convert the internal string representation of a Big5 character | |
461 (lb, c1, c2) into Big5 code (b1, b2). */ | |
462 | |
463 #define ENCODE_BIG5(lb, c1, c2, b1, b2) do \ | |
464 { \ | |
465 int I = ((c1) - 0xA1) * (0xFF - 0xA1) + ((c2) - 0xA1); \ | |
466 \ | |
467 if (lb == LEADING_BYTE_CHINESE_BIG5_2) \ | |
468 { \ | |
469 I += BIG5_SAME_ROW * (0xC9 - 0xA1); \ | |
470 } \ | |
471 b1 = I / BIG5_SAME_ROW + 0xA1; \ | |
472 b2 = I % BIG5_SAME_ROW; \ | |
473 b2 += b2 < 0x3F ? 0x40 : 0x62; \ | |
474 } while (0) | |
475 | |
476 /* Convert Big5 data to internal format. */ | |
477 | |
478 static Bytecount | |
479 big5_convert (struct coding_stream *str, const UExtbyte *src, | |
480 unsigned_char_dynarr *dst, Bytecount n) | |
481 { | |
482 unsigned int ch = str->ch; | |
483 Bytecount orign = n; | |
484 | |
485 if (str->direction == CODING_DECODE) | |
486 { | |
487 while (n--) | |
488 { | |
489 UExtbyte c = *src++; | |
490 if (ch) | |
491 { | |
492 /* Previous character was first byte of Big5 char. */ | |
826 | 493 if (byte_big5_two_byte_2_p (c)) |
771 | 494 { |
867 | 495 Ibyte b1, b2, b3; |
771 | 496 DECODE_BIG5 (ch, c, b1, b2, b3); |
497 Dynarr_add (dst, b1); | |
498 Dynarr_add (dst, b2); | |
499 Dynarr_add (dst, b3); | |
500 } | |
501 else | |
502 { | |
503 DECODE_ADD_BINARY_CHAR (ch, dst); | |
504 DECODE_ADD_BINARY_CHAR (c, dst); | |
505 } | |
506 ch = 0; | |
507 } | |
508 else | |
509 { | |
826 | 510 if (byte_big5_two_byte_1_p (c)) |
771 | 511 ch = c; |
512 else | |
513 DECODE_ADD_BINARY_CHAR (c, dst); | |
514 } | |
515 } | |
516 | |
517 if (str->eof) | |
518 DECODE_OUTPUT_PARTIAL_CHAR (ch, dst); | |
519 } | |
520 else | |
521 { | |
522 while (n--) | |
523 { | |
867 | 524 Ibyte c = *src++; |
826 | 525 if (byte_ascii_p (c)) |
771 | 526 { |
527 /* ASCII. */ | |
528 Dynarr_add (dst, c); | |
529 } | |
867 | 530 else if (ibyte_leading_byte_p (c)) |
771 | 531 { |
532 if (c == LEADING_BYTE_CHINESE_BIG5_1 || | |
533 c == LEADING_BYTE_CHINESE_BIG5_2) | |
534 { | |
535 /* A recognized leading byte. */ | |
536 ch = c; | |
537 continue; /* not done with this character. */ | |
538 } | |
539 /* otherwise just ignore this character. */ | |
540 } | |
541 else if (ch == LEADING_BYTE_CHINESE_BIG5_1 || | |
542 ch == LEADING_BYTE_CHINESE_BIG5_2) | |
543 { | |
544 /* Previous char was a recognized leading byte. */ | |
545 ch = (ch << 8) | c; | |
546 continue; /* not done with this character. */ | |
547 } | |
548 else if (ch) | |
549 { | |
550 /* Encountering second byte of a Big5 character. */ | |
551 UExtbyte b1, b2; | |
552 | |
553 ENCODE_BIG5 (ch >> 8, ch & 0xFF, c, b1, b2); | |
554 Dynarr_add (dst, b1); | |
555 Dynarr_add (dst, b2); | |
556 } | |
557 | |
558 ch = 0; | |
559 } | |
560 } | |
561 | |
562 str->ch = ch; | |
563 return orign; | |
564 } | |
565 | |
867 | 566 Ichar |
771 | 567 decode_big5_char (int b1, int b2) |
568 { | |
826 | 569 if (byte_big5_two_byte_1_p (b1) && |
570 byte_big5_two_byte_2_p (b2)) | |
771 | 571 { |
572 int leading_byte; | |
573 Lisp_Object charset; | |
574 int c1, c2; | |
575 | |
576 DECODE_BIG5 (b1, b2, leading_byte, c1, c2); | |
826 | 577 charset = charset_by_leading_byte (leading_byte); |
867 | 578 return make_ichar (charset, c1 & 0x7F, c2 & 0x7F); |
771 | 579 } |
580 else | |
581 return -1; | |
582 } | |
583 | |
584 DEFUN ("decode-big5-char", Fdecode_big5_char, 1, 1, 0, /* | |
585 Convert Big Five character codes in CODE into a character. | |
586 CODE is a cons of two integers specifying the codepoints in Big Five. | |
587 Return the corresponding character, or nil if the codepoints are out of range. | |
588 | |
589 The term `decode' is used because the codepoints can be viewed as the | |
590 representation of the character in the external Big Five encoding, and thus | |
591 converting them to a character is analogous to any other operation that | |
592 decodes an external representation. | |
593 */ | |
594 (code)) | |
595 { | |
867 | 596 Ichar ch; |
771 | 597 |
598 CHECK_CONS (code); | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
599 CHECK_FIXNUM (XCAR (code)); |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
600 CHECK_FIXNUM (XCDR (code)); |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
601 ch = decode_big5_char (XFIXNUM (XCAR (code)), XFIXNUM (XCDR (code))); |
771 | 602 if (ch == -1) |
603 return Qnil; | |
604 else | |
605 return make_char (ch); | |
606 } | |
607 | |
608 DEFUN ("encode-big5-char", Fencode_big5_char, 1, 1, 0, /* | |
609 Convert the specified Big Five character into its codepoints. | |
610 The codepoints are returned as a cons of two integers, specifying the | |
611 Big Five codepoints. See `decode-big5-char' for the reason why the | |
612 term `encode' is used for this operation. | |
613 */ | |
614 (character)) | |
615 { | |
616 Lisp_Object charset; | |
617 int c1, c2, b1, b2; | |
618 | |
619 CHECK_CHAR_COERCE_INT (character); | |
867 | 620 BREAKUP_ICHAR (XCHAR (character), charset, c1, c2); |
771 | 621 if (EQ (charset, Vcharset_chinese_big5_1) || |
622 EQ (charset, Vcharset_chinese_big5_2)) | |
623 { | |
624 ENCODE_BIG5 (XCHARSET_LEADING_BYTE (charset), c1 | 0x80, c2 | 0x80, | |
625 b1, b2); | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
626 return Fcons (make_fixnum (b1), make_fixnum (b2)); |
771 | 627 } |
628 else | |
629 return Qnil; | |
630 } | |
631 | |
632 | |
633 /************************************************************************/ | |
634 /* Big5 detector */ | |
635 /************************************************************************/ | |
636 | |
637 DEFINE_DETECTOR (big5); | |
638 DEFINE_DETECTOR_CATEGORY (big5, big5); | |
639 | |
640 struct big5_detector | |
641 { | |
642 int seen_big5_char; | |
985 | 643 int seen_euc_char; |
771 | 644 unsigned int seen_iso2022_esc:1; |
645 unsigned int seen_bad_first_byte:1; | |
646 unsigned int seen_bad_second_byte:1; | |
647 | |
648 /* temporary */ | |
649 unsigned int in_second_byte:1; | |
650 }; | |
651 | |
652 static void | |
653 big5_detect (struct detection_state *st, const UExtbyte *src, | |
654 Bytecount n) | |
655 { | |
656 struct big5_detector *data = DETECTION_STATE_DATA (st, big5); | |
657 | |
658 while (n--) | |
659 { | |
660 UExtbyte c = *src++; | |
661 if (!data->in_second_byte) | |
662 { | |
663 if (c >= 0xA1 && c <= 0xFE) | |
664 data->in_second_byte = 1; | |
665 else if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) | |
666 data->seen_iso2022_esc = 1; | |
667 else if (c >= 0x80) | |
668 data->seen_bad_first_byte = 1; | |
669 } | |
670 else | |
671 { | |
672 data->in_second_byte = 0; | |
985 | 673 if (c >= 0xA1 && c <= 0xFE) |
674 data->seen_euc_char++; | |
675 else if (c >= 0x40 && c <= 0x7E) | |
771 | 676 data->seen_big5_char++; |
677 else | |
678 data->seen_bad_second_byte = 1; | |
679 } | |
680 } | |
681 | |
682 if (data->seen_bad_second_byte) | |
683 DET_RESULT (st, big5) = DET_NEARLY_IMPOSSIBLE; | |
684 else if (data->seen_bad_first_byte) | |
685 DET_RESULT (st, big5) = DET_QUITE_IMPROBABLE; | |
686 else if (data->seen_iso2022_esc) | |
687 DET_RESULT (st, big5) = DET_SOMEWHAT_UNLIKELY; | |
688 else if (data->seen_big5_char >= 4) | |
689 DET_RESULT (st, big5) = DET_SOMEWHAT_LIKELY; | |
985 | 690 else if (data->seen_euc_char) |
691 DET_RESULT (st, big5) = DET_SLIGHTLY_LIKELY; | |
771 | 692 else |
693 DET_RESULT (st, big5) = DET_AS_LIKELY_AS_UNLIKELY; | |
694 } | |
695 | |
696 | |
697 /************************************************************************/ | |
698 /* ISO2022 methods */ | |
699 /************************************************************************/ | |
700 | |
701 /* Any ISO-2022-compliant coding system. Includes JIS, EUC, CTEXT | |
702 (Compound Text, the encoding of selections in X Windows). See below for | |
703 a complete description of ISO-2022. */ | |
704 | |
705 /* Flags indicating what we've seen so far when parsing an | |
706 ISO2022 escape sequence. */ | |
707 enum iso_esc_flag | |
708 { | |
709 /* Partial sequences */ | |
710 ISO_ESC_NOTHING, /* Nothing has been seen. */ | |
711 ISO_ESC, /* We've seen ESC. */ | |
712 ISO_ESC_2_4, /* We've seen ESC $. This indicates | |
713 that we're designating a multi-byte, rather | |
714 than a single-byte, character set. */ | |
3439 | 715 ISO_ESC_2_5, /* We've seen ESC %. This indicates an escape to a |
716 Unicode coding system; the only one of these | |
717 we're prepared to deal with is UTF-8, which has | |
718 the next character as G. */ | |
771 | 719 ISO_ESC_2_8, /* We've seen ESC 0x28, i.e. ESC (. |
720 This means designate a 94-character | |
721 character set into G0. */ | |
722 ISO_ESC_2_9, /* We've seen ESC 0x29 -- designate a | |
723 94-character character set into G1. */ | |
724 ISO_ESC_2_10, /* We've seen ESC 0x2A. */ | |
725 ISO_ESC_2_11, /* We've seen ESC 0x2B. */ | |
726 ISO_ESC_2_12, /* We've seen ESC 0x2C -- designate a | |
727 96-character character set into G0. | |
728 (This is not ISO2022-standard. | |
729 The following 96-character | |
730 control sequences are standard, | |
731 though.) */ | |
732 ISO_ESC_2_13, /* We've seen ESC 0x2D -- designate a | |
733 96-character character set into G1. | |
734 */ | |
735 ISO_ESC_2_14, /* We've seen ESC 0x2E. */ | |
736 ISO_ESC_2_15, /* We've seen ESC 0x2F. */ | |
737 ISO_ESC_2_4_8, /* We've seen ESC $ 0x28 -- designate | |
738 a 94^N character set into G0. */ | |
739 ISO_ESC_2_4_9, /* We've seen ESC $ 0x29. */ | |
740 ISO_ESC_2_4_10, /* We've seen ESC $ 0x2A. */ | |
741 ISO_ESC_2_4_11, /* We've seen ESC $ 0x2B. */ | |
742 ISO_ESC_2_4_12, /* We've seen ESC $ 0x2C. */ | |
743 ISO_ESC_2_4_13, /* We've seen ESC $ 0x2D. */ | |
744 ISO_ESC_2_4_14, /* We've seen ESC $ 0x2E. */ | |
745 ISO_ESC_2_4_15, /* We've seen ESC $ 0x2F. */ | |
746 ISO_ESC_5_11, /* We've seen ESC [ or 0x9B. This | |
747 starts a directionality-control | |
748 sequence. The next character | |
749 must be 0, 1, 2, or ]. */ | |
750 ISO_ESC_5_11_0, /* We've seen 0x9B 0. The next character must be ]. */ | |
751 ISO_ESC_5_11_1, /* We've seen 0x9B 1. The next character must be ]. */ | |
752 ISO_ESC_5_11_2, /* We've seen 0x9B 2. The next character must be ]. */ | |
753 | |
754 /* Full sequences. */ | |
755 ISO_ESC_START_COMPOSITE, /* Private usage for START COMPOSING */ | |
756 ISO_ESC_END_COMPOSITE, /* Private usage for END COMPOSING */ | |
757 ISO_ESC_SINGLE_SHIFT, /* We've seen a complete single-shift sequence. */ | |
758 ISO_ESC_LOCKING_SHIFT,/* We've seen a complete locking-shift sequence. */ | |
759 ISO_ESC_DESIGNATE, /* We've seen a complete designation sequence. */ | |
760 ISO_ESC_DIRECTIONALITY,/* We've seen a complete ISO6429 directionality | |
761 sequence. */ | |
762 ISO_ESC_LITERAL /* We've seen a literal character ala | |
763 escape-quoting. */ | |
764 }; | |
765 | |
766 enum iso_error | |
767 { | |
768 ISO_ERROR_BAD_FINAL, | |
769 ISO_ERROR_UNKWOWN_ESC_SEQUENCE, | |
770 ISO_ERROR_INVALID_CODE_POINT_CHARACTER, | |
771 }; | |
772 | |
773 | |
774 /* Flags indicating current state while converting code. */ | |
775 | |
776 /************ Used during encoding and decoding: ************/ | |
777 /* If set, the current directionality is right-to-left. Otherwise, it's | |
778 left-to-right. */ | |
779 #define ISO_STATE_R2L (1 << 0) | |
780 | |
781 /************ Used during encoding: ************/ | |
782 /* If set, we just saw a CR. */ | |
783 #define ISO_STATE_CR (1 << 1) | |
784 | |
785 /************ Used during decoding: ************/ | |
786 /* If set, we're currently parsing an escape sequence and the upper 16 bits | |
787 should be looked at to indicate what partial escape sequence we've seen | |
788 so far. Otherwise, we're running through actual text. */ | |
789 #define ISO_STATE_ESCAPE (1 << 2) | |
790 /* If set, G2 is invoked into GL, but only for the next character. */ | |
791 #define ISO_STATE_SS2 (1 << 3) | |
792 /* If set, G3 is invoked into GL, but only for the next character. If both | |
793 ISO_STATE_SS2 and ISO_STATE_SS3 are set, ISO_STATE_SS2 overrides; but | |
794 this probably indicates an error in the text encoding. */ | |
795 #define ISO_STATE_SS3 (1 << 4) | |
796 /* If set, we're currently processing a composite character (i.e. a | |
797 character constructed by overstriking two or more characters). */ | |
798 #define ISO_STATE_COMPOSITE (1 << 5) | |
799 | |
3439 | 800 /* If set, we're processing UTF-8 encoded data within ISO-2022 |
801 processing. */ | |
802 #define ISO_STATE_UTF_8 (1 << 6) | |
803 | |
771 | 804 /* ISO_STATE_LOCK is the mask of flags that remain on until explicitly |
805 turned off when in the ISO2022 encoder/decoder. Other flags are turned | |
806 off at the end of processing each character or escape sequence. */ | |
807 # define ISO_STATE_LOCK \ | |
3439 | 808 (ISO_STATE_COMPOSITE | ISO_STATE_R2L | ISO_STATE_UTF_8) |
771 | 809 |
810 typedef struct charset_conversion_spec | |
811 { | |
812 Lisp_Object from_charset; | |
813 Lisp_Object to_charset; | |
814 } charset_conversion_spec; | |
815 | |
816 typedef struct | |
817 { | |
818 Dynarr_declare (charset_conversion_spec); | |
819 } charset_conversion_spec_dynarr; | |
820 | |
821 struct iso2022_coding_system | |
822 { | |
823 /* What are the charsets to be initially designated to G0, G1, | |
824 G2, G3? If t, no charset is initially designated. If nil, | |
825 no charset is initially designated and no charset is allowed | |
826 to be designated. */ | |
827 Lisp_Object initial_charset[4]; | |
828 | |
829 /* If true, a designation escape sequence needs to be sent on output | |
830 for the charset in G[0-3] before that charset is used. */ | |
831 unsigned char force_charset_on_output[4]; | |
832 | |
833 charset_conversion_spec_dynarr *input_conv; | |
834 charset_conversion_spec_dynarr *output_conv; | |
835 | |
836 unsigned int shoort :1; /* C makes you speak Dutch */ | |
837 unsigned int no_ascii_eol :1; | |
838 unsigned int no_ascii_cntl :1; | |
839 unsigned int seven :1; | |
840 unsigned int lock_shift :1; | |
841 unsigned int no_iso6429 :1; | |
842 unsigned int escape_quoted :1; | |
843 }; | |
844 | |
845 #define CODING_SYSTEM_ISO2022_INITIAL_CHARSET(codesys, g) \ | |
846 (CODING_SYSTEM_TYPE_DATA (codesys, iso2022)->initial_charset[g]) | |
847 #define CODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT(codesys, g) \ | |
848 (CODING_SYSTEM_TYPE_DATA (codesys, iso2022)->force_charset_on_output[g]) | |
849 #define CODING_SYSTEM_ISO2022_SHORT(codesys) \ | |
850 (CODING_SYSTEM_TYPE_DATA (codesys, iso2022)->shoort) | |
851 #define CODING_SYSTEM_ISO2022_NO_ASCII_EOL(codesys) \ | |
852 (CODING_SYSTEM_TYPE_DATA (codesys, iso2022)->no_ascii_eol) | |
853 #define CODING_SYSTEM_ISO2022_NO_ASCII_CNTL(codesys) \ | |
854 (CODING_SYSTEM_TYPE_DATA (codesys, iso2022)->no_ascii_cntl) | |
855 #define CODING_SYSTEM_ISO2022_SEVEN(codesys) \ | |
856 (CODING_SYSTEM_TYPE_DATA (codesys, iso2022)->seven) | |
857 #define CODING_SYSTEM_ISO2022_LOCK_SHIFT(codesys) \ | |
858 (CODING_SYSTEM_TYPE_DATA (codesys, iso2022)->lock_shift) | |
859 #define CODING_SYSTEM_ISO2022_NO_ISO6429(codesys) \ | |
860 (CODING_SYSTEM_TYPE_DATA (codesys, iso2022)->no_iso6429) | |
861 #define CODING_SYSTEM_ISO2022_ESCAPE_QUOTED(codesys) \ | |
862 (CODING_SYSTEM_TYPE_DATA (codesys, iso2022)->escape_quoted) | |
863 #define CODING_SYSTEM_ISO2022_INPUT_CONV(codesys) \ | |
864 (CODING_SYSTEM_TYPE_DATA (codesys, iso2022)->input_conv) | |
865 #define CODING_SYSTEM_ISO2022_OUTPUT_CONV(codesys) \ | |
866 (CODING_SYSTEM_TYPE_DATA (codesys, iso2022)->output_conv) | |
867 | |
868 #define XCODING_SYSTEM_ISO2022_INITIAL_CHARSET(codesys, g) \ | |
869 CODING_SYSTEM_ISO2022_INITIAL_CHARSET (XCODING_SYSTEM (codesys), g) | |
870 #define XCODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT(codesys, g) \ | |
871 CODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT (XCODING_SYSTEM (codesys), g) | |
872 #define XCODING_SYSTEM_ISO2022_SHORT(codesys) \ | |
873 CODING_SYSTEM_ISO2022_SHORT (XCODING_SYSTEM (codesys)) | |
874 #define XCODING_SYSTEM_ISO2022_NO_ASCII_EOL(codesys) \ | |
875 CODING_SYSTEM_ISO2022_NO_ASCII_EOL (XCODING_SYSTEM (codesys)) | |
876 #define XCODING_SYSTEM_ISO2022_NO_ASCII_CNTL(codesys) \ | |
877 CODING_SYSTEM_ISO2022_NO_ASCII_CNTL (XCODING_SYSTEM (codesys)) | |
878 #define XCODING_SYSTEM_ISO2022_SEVEN(codesys) \ | |
879 CODING_SYSTEM_ISO2022_SEVEN (XCODING_SYSTEM (codesys)) | |
880 #define XCODING_SYSTEM_ISO2022_LOCK_SHIFT(codesys) \ | |
881 CODING_SYSTEM_ISO2022_LOCK_SHIFT (XCODING_SYSTEM (codesys)) | |
882 #define XCODING_SYSTEM_ISO2022_NO_ISO6429(codesys) \ | |
883 CODING_SYSTEM_ISO2022_NO_ISO6429 (XCODING_SYSTEM (codesys)) | |
884 #define XCODING_SYSTEM_ISO2022_ESCAPE_QUOTED(codesys) \ | |
885 CODING_SYSTEM_ISO2022_ESCAPE_QUOTED (XCODING_SYSTEM (codesys)) | |
886 #define XCODING_SYSTEM_ISO2022_INPUT_CONV(codesys) \ | |
887 CODING_SYSTEM_ISO2022_INPUT_CONV (XCODING_SYSTEM (codesys)) | |
888 #define XCODING_SYSTEM_ISO2022_OUTPUT_CONV(codesys) \ | |
889 CODING_SYSTEM_ISO2022_OUTPUT_CONV (XCODING_SYSTEM (codesys)) | |
890 | |
891 /* Additional information used by the ISO2022 decoder and detector. */ | |
892 struct iso2022_coding_stream | |
893 { | |
894 /* CHARSET holds the character sets currently assigned to the G0 | |
895 through G3 variables. It is initialized from the array | |
896 INITIAL_CHARSET in CODESYS. */ | |
897 Lisp_Object charset[4]; | |
898 | |
899 /* Which registers are currently invoked into the left (GL) and | |
900 right (GR) halves of the 8-bit encoding space? */ | |
901 int register_left, register_right; | |
902 | |
903 /* FLAGS holds flags indicating the current state of the encoding. Some of | |
904 these flags are actually part of the state-dependent data and should be | |
905 moved there. */ | |
906 unsigned int flags; | |
907 | |
908 /**************** for decoding ****************/ | |
909 | |
910 /* ISO_ESC holds a value indicating part of an escape sequence | |
911 that has already been seen. */ | |
912 enum iso_esc_flag esc; | |
913 | |
914 /* This records the bytes we've seen so far in an escape sequence, | |
915 in case the sequence is invalid (we spit out the bytes unchanged). */ | |
916 unsigned char esc_bytes[8]; | |
917 | |
918 /* Index for next byte to store in ISO escape sequence. */ | |
919 int esc_bytes_index; | |
920 | |
921 #ifdef ENABLE_COMPOSITE_CHARS | |
922 /* Stuff seen so far when composing a string. */ | |
923 unsigned_char_dynarr *composite_chars; | |
924 #endif | |
925 | |
926 /* If we saw an invalid designation sequence for a particular | |
927 register, we flag it here and switch to ASCII. The next time we | |
928 see a valid designation for this register, we turn off the flag | |
929 and do the designation normally, but pretend the sequence was | |
930 invalid. The effect of all this is that (most of the time) the | |
931 escape sequences for both the switch to the unknown charset, and | |
932 the switch back to the known charset, get inserted literally into | |
933 the buffer and saved out as such. The hope is that we can | |
934 preserve the escape sequences so that the resulting written out | |
935 file makes sense. If we don't do any of this, the designation | |
936 to the invalid charset will be preserved but that switch back | |
937 to the known charset will probably get eaten because it was | |
938 the same charset that was already present in the register. */ | |
939 unsigned char invalid_designated[4]; | |
940 | |
941 /* We try to do similar things as above for direction-switching | |
942 sequences. If we encountered a direction switch while an | |
943 invalid designation was present, or an invalid designation | |
944 just after a direction switch (i.e. no valid designation | |
945 encountered yet), we insert the direction-switch escape | |
946 sequence literally into the output stream, and later on | |
947 insert the corresponding direction-restoring escape sequence | |
948 literally also. */ | |
949 unsigned int switched_dir_and_no_valid_charset_yet :1; | |
950 unsigned int invalid_switch_dir :1; | |
951 | |
952 /* Tells the decoder to output the escape sequence literally | |
953 even though it was valid. Used in the games we play to | |
954 avoid lossage when we encounter invalid designations. */ | |
955 unsigned int output_literally :1; | |
956 /* We encountered a direction switch followed by an invalid | |
957 designation. We didn't output the direction switch | |
958 literally because we didn't know about the invalid designation; | |
959 but we have to do so now. */ | |
960 unsigned int output_direction_sequence :1; | |
961 | |
962 /**************** for encoding ****************/ | |
963 | |
964 /* Whether we need to explicitly designate the charset in the | |
965 G? register before using it. It is initialized from the | |
966 array FORCE_CHARSET_ON_OUTPUT in CODESYS. */ | |
967 unsigned char force_charset_on_output[4]; | |
968 | |
969 /* Other state variables that need to be preserved across | |
970 invocations. */ | |
971 Lisp_Object current_charset; | |
972 int current_half; | |
973 int current_char_boundary; | |
3439 | 974 |
975 /* Used for handling UTF-8. */ | |
976 unsigned char counter; | |
4096 | 977 unsigned char indicated_length; |
771 | 978 }; |
979 | |
1204 | 980 static const struct memory_description ccs_description_1[] = |
771 | 981 { |
982 { XD_LISP_OBJECT, offsetof (charset_conversion_spec, from_charset) }, | |
983 { XD_LISP_OBJECT, offsetof (charset_conversion_spec, to_charset) }, | |
984 { XD_END } | |
985 }; | |
986 | |
1204 | 987 static const struct sized_memory_description ccs_description = |
771 | 988 { |
989 sizeof (charset_conversion_spec), | |
990 ccs_description_1 | |
991 }; | |
992 | |
1204 | 993 static const struct memory_description ccsd_description_1[] = |
771 | 994 { |
995 XD_DYNARR_DESC (charset_conversion_spec_dynarr, &ccs_description), | |
996 { XD_END } | |
997 }; | |
998 | |
1204 | 999 static const struct sized_memory_description ccsd_description = |
771 | 1000 { |
1001 sizeof (charset_conversion_spec_dynarr), | |
1002 ccsd_description_1 | |
1003 }; | |
1004 | |
1204 | 1005 static const struct memory_description iso2022_coding_system_description[] = { |
1006 { XD_LISP_OBJECT_ARRAY, offsetof (struct iso2022_coding_system, | |
1007 initial_charset), 4 }, | |
2367 | 1008 { XD_BLOCK_PTR, offsetof (struct iso2022_coding_system, input_conv), |
2551 | 1009 1, { &ccsd_description } }, |
2367 | 1010 { XD_BLOCK_PTR, offsetof (struct iso2022_coding_system, output_conv), |
2551 | 1011 1, { &ccsd_description } }, |
771 | 1012 { XD_END } |
1013 }; | |
1014 | |
1204 | 1015 DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (iso2022); |
1016 | |
771 | 1017 /* The following note taken directly from FSF 21.0.103. */ |
1018 | |
1019 /* The following note describes the coding system ISO2022 briefly. | |
1020 Since the intention of this note is to help understand the | |
1021 functions in this file, some parts are NOT ACCURATE or are OVERLY | |
1022 SIMPLIFIED. For thorough understanding, please refer to the | |
1023 original document of ISO2022. This is equivalent to the standard | |
1024 ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*). | |
1025 | |
1026 ISO2022 provides many mechanisms to encode several character sets | |
1027 in 7-bit and 8-bit environments. For 7-bit environments, all text | |
1028 is encoded using bytes less than 128. This may make the encoded | |
1029 text a little bit longer, but the text passes more easily through | |
1030 several types of gateway, some of which strip off the MSB (Most | |
1031 Significant Bit). | |
1032 | |
1033 There are two kinds of character sets: control character sets and | |
1034 graphic character sets. The former contain control characters such | |
1035 as `newline' and `escape' to provide control functions (control | |
1036 functions are also provided by escape sequences). The latter | |
1037 contain graphic characters such as 'A' and '-'. Emacs recognizes | |
1038 two control character sets and many graphic character sets. | |
1039 | |
1040 Graphic character sets are classified into one of the following | |
1041 four classes, according to the number of bytes (DIMENSION) and | |
1042 number of characters in one dimension (CHARS) of the set: | |
1043 - DIMENSION1_CHARS94 | |
1044 - DIMENSION1_CHARS96 | |
1045 - DIMENSION2_CHARS94 | |
1046 - DIMENSION2_CHARS96 | |
1047 | |
1048 In addition, each character set is assigned an identification tag, | |
1049 unique for each set, called the "final character" (denoted as <F> | |
1050 hereafter). The <F> of each character set is decided by ECMA(*) | |
1051 when it is registered in ISO. The code range of <F> is 0x30..0x7F | |
1052 (0x30..0x3F are for private use only). | |
1053 | |
1054 Note (*): ECMA = European Computer Manufacturers Association | |
1055 | |
1056 Here are examples of graphic character sets [NAME(<F>)]: | |
1057 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ... | |
1058 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ... | |
1059 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ... | |
1060 o DIMENSION2_CHARS96 -- none for the moment | |
1061 | |
1062 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR. | |
1063 C0 [0x00..0x1F] -- control character plane 0 | |
1064 GL [0x20..0x7F] -- graphic character plane 0 | |
1065 C1 [0x80..0x9F] -- control character plane 1 | |
1066 GR [0xA0..0xFF] -- graphic character plane 1 | |
1067 | |
1068 A control character set is directly designated and invoked to C0 or | |
1069 C1 by an escape sequence. The most common case is that: | |
1070 - ISO646's control character set is designated/invoked to C0, and | |
1071 - ISO6429's control character set is designated/invoked to C1, | |
1072 and usually these designations/invocations are omitted in encoded | |
1073 text. In a 7-bit environment, only C0 can be used, and a control | |
1074 character for C1 is encoded by an appropriate escape sequence to | |
1075 fit into the environment. All control characters for C1 are | |
1076 defined to have corresponding escape sequences. | |
1077 | |
1078 A graphic character set is at first designated to one of four | |
1079 graphic registers (G0 through G3), then these graphic registers are | |
1080 invoked to GL or GR. These designations and invocations can be | |
1081 done independently. The most common case is that G0 is invoked to | |
1082 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually | |
1083 these invocations and designations are omitted in encoded text. | |
1084 In a 7-bit environment, only GL can be used. | |
1085 | |
1086 When a graphic character set of CHARS94 is invoked to GL, codes | |
1087 0x20 and 0x7F of the GL area work as control characters SPACE and | |
1088 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not | |
1089 be used. | |
1090 | |
1091 There are two ways of invocation: locking-shift and single-shift. | |
1092 With locking-shift, the invocation lasts until the next different | |
1093 invocation, whereas with single-shift, the invocation affects the | |
1094 following character only and doesn't affect the locking-shift | |
1095 state. Invocations are done by the following control characters or | |
1096 escape sequences: | |
1097 | |
1098 ---------------------------------------------------------------------- | |
1099 abbrev function cntrl escape seq description | |
1100 ---------------------------------------------------------------------- | |
1101 SI/LS0 (shift-in) 0x0F none invoke G0 into GL | |
1102 SO/LS1 (shift-out) 0x0E none invoke G1 into GL | |
1103 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL | |
1104 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL | |
1105 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*) | |
1106 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*) | |
1107 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*) | |
1108 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char | |
1109 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char | |
1110 ---------------------------------------------------------------------- | |
1111 (*) These are not used by any known coding system. | |
1112 | |
1113 Control characters for these functions are defined by macros | |
1114 ISO_CODE_XXX in `coding.h'. | |
1115 | |
1116 Designations are done by the following escape sequences: | |
1117 ---------------------------------------------------------------------- | |
1118 escape sequence description | |
1119 ---------------------------------------------------------------------- | |
1120 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0 | |
1121 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1 | |
1122 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2 | |
1123 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3 | |
1124 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*) | |
1125 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1 | |
1126 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2 | |
1127 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3 | |
1128 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**) | |
1129 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1 | |
1130 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2 | |
1131 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3 | |
1132 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*) | |
1133 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1 | |
1134 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2 | |
1135 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3 | |
1136 ---------------------------------------------------------------------- | |
1137 | |
1138 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set | |
1139 of dimension 1, chars 94, and final character <F>, etc... | |
1140 | |
1141 Note (*): Although these designations are not allowed in ISO2022, | |
1142 Emacs accepts them on decoding, and produces them on encoding | |
1143 CHARS96 character sets in a coding system which is characterized as | |
1144 7-bit environment, non-locking-shift, and non-single-shift. | |
1145 | |
1146 Note (**): If <F> is '@', 'A', or 'B', the intermediate character | |
1147 '(' can be omitted. We refer to this as "short-form" hereafter. | |
1148 | |
1149 Now you may notice that there are a lot of ways of encoding the | |
1150 same multilingual text in ISO2022. Actually, there exist many | |
1151 coding systems such as Compound Text (used in X11's inter client | |
1152 communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR | |
1153 (used in Korean Internet), EUC (Extended UNIX Code, used in Asian | |
1154 localized platforms), and all of these are variants of ISO2022. | |
1155 | |
1156 In addition to the above, Emacs handles two more kinds of escape | |
1157 sequences: ISO6429's direction specification and Emacs' private | |
1158 sequence for specifying character composition. | |
1159 | |
1160 ISO6429's direction specification takes the following form: | |
1161 o CSI ']' -- end of the current direction | |
1162 o CSI '0' ']' -- end of the current direction | |
1163 o CSI '1' ']' -- start of left-to-right text | |
1164 o CSI '2' ']' -- start of right-to-left text | |
1165 The control character CSI (0x9B: control sequence introducer) is | |
1166 abbreviated to the escape sequence ESC '[' in a 7-bit environment. | |
1167 | |
1168 Character composition specification takes the following form: | |
1169 o ESC '0' -- start relative composition | |
1170 o ESC '1' -- end composition | |
1171 o ESC '2' -- start rule-base composition (*) | |
1172 o ESC '3' -- start relative composition with alternate chars (**) | |
1173 o ESC '4' -- start rule-base composition with alternate chars (**) | |
1174 Since these are not standard escape sequences of any ISO standard, | |
1175 the use of them with these meanings is restricted to Emacs only. | |
1176 | |
1177 (*) This form is used only in Emacs 20.5 and older versions, | |
1178 but the newer versions can safely decode it. | |
1179 (**) This form is used only in Emacs 21.1 and newer versions, | |
1180 and the older versions can't decode it. | |
1181 | |
1182 Here's a list of example usages of these composition escape | |
1183 sequences (categorized by `enum composition_method'). | |
1184 | |
1185 COMPOSITION_RELATIVE: | |
1186 ESC 0 CHAR [ CHAR ] ESC 1 | |
1187 COMPOSITION_WITH_RULE: | |
1188 ESC 2 CHAR [ RULE CHAR ] ESC 1 | |
1189 COMPOSITION_WITH_ALTCHARS: | |
1190 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 | |
1191 COMPOSITION_WITH_RULE_ALTCHARS: | |
1192 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */ | |
1193 | |
1194 static void | |
1195 reset_iso2022_decode (Lisp_Object coding_system, | |
1196 struct iso2022_coding_stream *data) | |
1197 { | |
1198 int i; | |
1199 #ifdef ENABLE_COMPOSITE_CHARS | |
1200 unsigned_char_dynarr *old_composite_chars = data->composite_chars; | |
1201 #endif | |
1202 | |
1203 xzero (*data); | |
1204 | |
1205 for (i = 0; i < 4; i++) | |
1206 { | |
1207 if (!NILP (coding_system)) | |
1208 data->charset[i] = | |
1209 XCODING_SYSTEM_ISO2022_INITIAL_CHARSET (coding_system, i); | |
1210 else | |
1211 data->charset[i] = Qt; | |
1212 } | |
1213 data->esc = ISO_ESC_NOTHING; | |
1214 data->register_right = 1; | |
1215 #ifdef ENABLE_COMPOSITE_CHARS | |
1216 if (old_composite_chars) | |
1217 { | |
1218 data->composite_chars = old_composite_chars; | |
1219 Dynarr_reset (data->composite_chars); | |
1220 } | |
1221 #endif | |
1222 } | |
1223 | |
1224 static void | |
1225 reset_iso2022_encode (Lisp_Object coding_system, | |
1226 struct iso2022_coding_stream *data) | |
1227 { | |
1228 int i; | |
1229 | |
1230 xzero (*data); | |
1231 | |
1232 for (i = 0; i < 4; i++) | |
1233 { | |
1234 data->charset[i] = | |
1235 XCODING_SYSTEM_ISO2022_INITIAL_CHARSET (coding_system, i); | |
1236 data->force_charset_on_output[i] = | |
1237 XCODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT (coding_system, i); | |
1238 } | |
1239 data->register_right = 1; | |
1240 data->current_charset = Qnil; | |
1241 data->current_char_boundary = 1; | |
1242 } | |
1243 | |
1244 static void | |
1245 iso2022_init_coding_stream (struct coding_stream *str) | |
1246 { | |
1247 if (str->direction == CODING_DECODE) | |
1248 reset_iso2022_decode (str->codesys, | |
1249 CODING_STREAM_TYPE_DATA (str, iso2022)); | |
1250 else | |
1251 reset_iso2022_encode (str->codesys, | |
1252 CODING_STREAM_TYPE_DATA (str, iso2022)); | |
1253 } | |
1254 | |
1255 static void | |
1256 iso2022_rewind_coding_stream (struct coding_stream *str) | |
1257 { | |
1258 iso2022_init_coding_stream (str); | |
1259 } | |
1260 | |
1261 static int | |
1262 fit_to_be_escape_quoted (unsigned char c) | |
1263 { | |
1264 switch (c) | |
1265 { | |
1266 case ISO_CODE_ESC: | |
1267 case ISO_CODE_CSI: | |
1268 case ISO_CODE_SS2: | |
1269 case ISO_CODE_SS3: | |
1270 case ISO_CODE_SO: | |
1271 case ISO_CODE_SI: | |
1272 return 1; | |
1273 | |
1274 default: | |
1275 return 0; | |
1276 } | |
1277 } | |
1278 | |
1279 static Lisp_Object | |
867 | 1280 charset_by_attributes_or_create_one (int type, Ibyte final, int dir) |
771 | 1281 { |
826 | 1282 Lisp_Object charset = charset_by_attributes (type, final, dir); |
771 | 1283 |
1284 if (NILP (charset)) | |
1285 { | |
1286 int chars, dim; | |
1287 | |
1288 switch (type) | |
1289 { | |
1290 case CHARSET_TYPE_94: | |
1291 chars = 94; dim = 1; | |
1292 break; | |
1293 case CHARSET_TYPE_96: | |
1294 chars = 96; dim = 1; | |
1295 break; | |
1296 case CHARSET_TYPE_94X94: | |
1297 chars = 94; dim = 2; | |
1298 break; | |
1299 case CHARSET_TYPE_96X96: | |
1300 chars = 96; dim = 2; | |
1301 break; | |
1302 default: | |
2500 | 1303 ABORT (); chars = 0; dim = 0; |
771 | 1304 } |
1305 | |
1306 charset = Fmake_charset (Qunbound, Qnil, | |
1307 nconc2 (list6 (Qfinal, make_char (final), | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
1308 Qchars, make_fixnum (chars), |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
1309 Qdimension, make_fixnum (dim)), |
771 | 1310 list2 (Qdirection, |
1311 dir == CHARSET_LEFT_TO_RIGHT ? | |
1312 Ql2r : Qr2l))); | |
1313 } | |
1314 | |
1315 return charset; | |
1316 } | |
1317 | |
1318 /* Parse one byte of an ISO2022 escape sequence. | |
1319 If the result is an invalid escape sequence, return 0 and | |
1320 do not change anything in STR. Otherwise, if the result is | |
1321 an incomplete escape sequence, update ISO2022.ESC and | |
1322 ISO2022.ESC_BYTES and return -1. Otherwise, update | |
1323 all the state variables (but not ISO2022.ESC_BYTES) and | |
1324 return 1. | |
1325 | |
1326 If CHECK_INVALID_CHARSETS is non-zero, check for designation | |
1327 or invocation of an invalid character set and treat that as | |
1328 an unrecognized escape sequence. | |
1329 | |
2367 | 1330 */ |
771 | 1331 |
1332 static int | |
1333 parse_iso2022_esc (Lisp_Object codesys, struct iso2022_coding_stream *iso, | |
1334 unsigned char c, unsigned int *flags, | |
1335 int check_invalid_charsets) | |
1336 { | |
1337 /* (1) If we're at the end of a designation sequence, CS is the | |
1338 charset being designated and REG is the register to designate | |
1339 it to. | |
1340 | |
1341 (2) If we're at the end of a locking-shift sequence, REG is | |
1342 the register to invoke and HALF (0 == left, 1 == right) is | |
1343 the half to invoke it into. | |
1344 | |
1345 (3) If we're at the end of a single-shift sequence, REG is | |
1346 the register to invoke. */ | |
1347 Lisp_Object cs = Qnil; | |
1348 int reg, half; | |
1349 | |
1350 /* NOTE: This code does goto's all over the fucking place. | |
1351 The reason for this is that we're basically implementing | |
1352 a state machine here, and hierarchical languages like C | |
1353 don't really provide a clean way of doing this. */ | |
1354 | |
1355 if (! (*flags & ISO_STATE_ESCAPE)) | |
1356 /* At beginning of escape sequence; we need to reset our | |
1357 escape-state variables. */ | |
1358 iso->esc = ISO_ESC_NOTHING; | |
1359 | |
1360 iso->output_literally = 0; | |
1361 iso->output_direction_sequence = 0; | |
1362 | |
1363 switch (iso->esc) | |
1364 { | |
1365 case ISO_ESC_NOTHING: | |
1366 iso->esc_bytes_index = 0; | |
1367 switch (c) | |
1368 { | |
1369 case ISO_CODE_ESC: /* Start escape sequence */ | |
1370 *flags |= ISO_STATE_ESCAPE; | |
1371 iso->esc = ISO_ESC; | |
1372 goto not_done; | |
1373 | |
1374 case ISO_CODE_CSI: /* ISO6429 (specifying directionality) */ | |
1375 *flags |= ISO_STATE_ESCAPE; | |
1376 iso->esc = ISO_ESC_5_11; | |
1377 goto not_done; | |
1378 | |
1379 case ISO_CODE_SO: /* locking shift 1 */ | |
1380 reg = 1; half = 0; | |
1381 goto locking_shift; | |
1382 case ISO_CODE_SI: /* locking shift 0 */ | |
1383 reg = 0; half = 0; | |
1384 goto locking_shift; | |
1385 | |
1386 case ISO_CODE_SS2: /* single shift */ | |
1387 reg = 2; | |
1388 goto single_shift; | |
1389 case ISO_CODE_SS3: /* single shift */ | |
1390 reg = 3; | |
1391 goto single_shift; | |
1392 | |
1393 default: /* Other control characters */ | |
1394 error: | |
1395 *flags &= ISO_STATE_LOCK; | |
1396 return 0; | |
1397 } | |
1398 | |
1399 case ISO_ESC: | |
3439 | 1400 |
1401 /* The only available ISO 2022 sequence in UTF-8 mode is ESC % @, to | |
1402 exit from it. If we see any other escape sequence, pass it through | |
1403 in the error handler. */ | |
1404 if (*flags & ISO_STATE_UTF_8 && '%' != c) | |
1405 { | |
1406 return 0; | |
1407 } | |
1408 | |
771 | 1409 switch (c) |
1410 { | |
1411 /**** single shift ****/ | |
1412 | |
1413 case 'N': /* single shift 2 */ | |
1414 reg = 2; | |
1415 goto single_shift; | |
1416 case 'O': /* single shift 3 */ | |
1417 reg = 3; | |
1418 goto single_shift; | |
1419 | |
1420 /**** locking shift ****/ | |
1421 | |
1422 case '~': /* locking shift 1 right */ | |
1423 reg = 1; half = 1; | |
1424 goto locking_shift; | |
1425 case 'n': /* locking shift 2 */ | |
1426 reg = 2; half = 0; | |
1427 goto locking_shift; | |
1428 case '}': /* locking shift 2 right */ | |
1429 reg = 2; half = 1; | |
1430 goto locking_shift; | |
1431 case 'o': /* locking shift 3 */ | |
1432 reg = 3; half = 0; | |
1433 goto locking_shift; | |
1434 case '|': /* locking shift 3 right */ | |
1435 reg = 3; half = 1; | |
1436 goto locking_shift; | |
1437 | |
1438 /**** composite ****/ | |
1439 | |
1440 #ifdef ENABLE_COMPOSITE_CHARS | |
1441 case '0': | |
1442 iso->esc = ISO_ESC_START_COMPOSITE; | |
1443 *flags = (*flags & ISO_STATE_LOCK) | | |
1444 ISO_STATE_COMPOSITE; | |
1445 return 1; | |
1446 | |
1447 case '1': | |
1448 iso->esc = ISO_ESC_END_COMPOSITE; | |
1449 *flags = (*flags & ISO_STATE_LOCK) & | |
1450 ~ISO_STATE_COMPOSITE; | |
1451 return 1; | |
1452 #else | |
1453 case '0': case '1': case '2': case '3': case '4': | |
1454 /* We simply return a flag indicating that some composite | |
1455 escape was seen. The caller will use the particular | |
1456 character to encode the appropriate "composite hack" | |
1457 character out of Vcharset_composite, so that we will | |
1458 preserve these values on output. */ | |
1459 iso->esc = ISO_ESC_START_COMPOSITE; | |
1460 *flags &= ISO_STATE_LOCK; | |
1461 return 1; | |
1462 #endif /* ENABLE_COMPOSITE_CHARS */ | |
1463 | |
1464 /**** directionality ****/ | |
1465 | |
1466 case '[': | |
1467 iso->esc = ISO_ESC_5_11; | |
1468 goto not_done; | |
1469 | |
1470 /**** designation ****/ | |
1471 | |
1472 case '$': /* multibyte charset prefix */ | |
1473 iso->esc = ISO_ESC_2_4; | |
1474 goto not_done; | |
1475 | |
3439 | 1476 case '%': /* Prefix to an escape to or from Unicode. */ |
1477 iso->esc = ISO_ESC_2_5; | |
1478 goto not_done; | |
1479 | |
771 | 1480 default: |
1481 if (0x28 <= c && c <= 0x2F) | |
1482 { | |
1483 iso->esc = (enum iso_esc_flag) (c - 0x28 + ISO_ESC_2_8); | |
1484 goto not_done; | |
1485 } | |
1486 | |
1487 /* This function is called with CODESYS equal to nil when | |
1488 doing coding-system detection. */ | |
1489 if (!NILP (codesys) | |
1490 && XCODING_SYSTEM_ISO2022_ESCAPE_QUOTED (codesys) | |
1491 && fit_to_be_escape_quoted (c)) | |
1492 { | |
1493 iso->esc = ISO_ESC_LITERAL; | |
1494 *flags &= ISO_STATE_LOCK; | |
1495 return 1; | |
1496 } | |
1497 | |
1498 /* bzzzt! */ | |
1499 goto error; | |
1500 } | |
1501 | |
3439 | 1502 /* ISO-IR 196 UTF-8 support. */ |
1503 case ISO_ESC_2_5: | |
1504 if ('G' == c) | |
1505 { | |
1506 /* Activate UTF-8 mode. */ | |
1507 *flags &= ISO_STATE_LOCK; | |
1508 *flags |= ISO_STATE_UTF_8; | |
1509 iso->esc = ISO_ESC_NOTHING; | |
1510 return 1; | |
1511 } | |
1512 else if ('@' == c) | |
1513 { | |
1514 /* Deactive UTF-8 mode. */ | |
1515 *flags &= ISO_STATE_LOCK; | |
1516 *flags &= ~(ISO_STATE_UTF_8); | |
1517 iso->esc = ISO_ESC_NOTHING; | |
1518 return 1; | |
1519 } | |
1520 else | |
1521 { | |
1522 /* Oops, we don't support the other UTF-? coding systems within | |
1523 ISO 2022, only in their own context. */ | |
1524 goto error; | |
1525 } | |
771 | 1526 /**** directionality ****/ |
1527 | |
1528 case ISO_ESC_5_11: /* ISO6429 direction control */ | |
1529 if (c == ']') | |
1530 { | |
1531 *flags &= (ISO_STATE_LOCK & ~ISO_STATE_R2L); | |
1532 goto directionality; | |
1533 } | |
1534 if (c == '0') iso->esc = ISO_ESC_5_11_0; | |
1535 else if (c == '1') iso->esc = ISO_ESC_5_11_1; | |
1536 else if (c == '2') iso->esc = ISO_ESC_5_11_2; | |
1537 else goto error; | |
1538 goto not_done; | |
1539 | |
1540 case ISO_ESC_5_11_0: | |
1541 if (c == ']') | |
1542 { | |
1543 *flags &= (ISO_STATE_LOCK & ~ISO_STATE_R2L); | |
1544 goto directionality; | |
1545 } | |
1546 goto error; | |
1547 | |
1548 case ISO_ESC_5_11_1: | |
1549 if (c == ']') | |
1550 { | |
1551 *flags = (ISO_STATE_LOCK & ~ISO_STATE_R2L); | |
1552 goto directionality; | |
1553 } | |
1554 goto error; | |
1555 | |
1556 case ISO_ESC_5_11_2: | |
1557 if (c == ']') | |
1558 { | |
1559 *flags = (*flags & ISO_STATE_LOCK) | ISO_STATE_R2L; | |
1560 goto directionality; | |
1561 } | |
1562 goto error; | |
1563 | |
1564 directionality: | |
1565 iso->esc = ISO_ESC_DIRECTIONALITY; | |
1566 /* Various junk here to attempt to preserve the direction sequences | |
1567 literally in the text if they would otherwise be swallowed due | |
1568 to invalid designations that don't show up as actual charset | |
1569 changes in the text. */ | |
1570 if (iso->invalid_switch_dir) | |
1571 { | |
1572 /* We already inserted a direction switch literally into the | |
1573 text. We assume (#### this may not be right) that the | |
1574 next direction switch is the one going the other way, | |
1575 and we need to output that literally as well. */ | |
1576 iso->output_literally = 1; | |
1577 iso->invalid_switch_dir = 0; | |
1578 } | |
1579 else | |
1580 { | |
1581 int jj; | |
1582 | |
1583 /* If we are in the thrall of an invalid designation, | |
1584 then stick the directionality sequence literally into the | |
1585 output stream so it ends up in the original text again. */ | |
1586 for (jj = 0; jj < 4; jj++) | |
1587 if (iso->invalid_designated[jj]) | |
1588 break; | |
1589 if (jj < 4) | |
1590 { | |
1591 iso->output_literally = 1; | |
1592 iso->invalid_switch_dir = 1; | |
1593 } | |
1594 else | |
1595 /* Indicate that we haven't yet seen a valid designation, | |
1596 so that if a switch-dir is directly followed by an | |
1597 invalid designation, both get inserted literally. */ | |
1598 iso->switched_dir_and_no_valid_charset_yet = 1; | |
1599 } | |
1600 return 1; | |
1601 | |
1602 | |
1603 /**** designation ****/ | |
1604 | |
1605 case ISO_ESC_2_4: | |
1606 if (0x28 <= c && c <= 0x2F) | |
1607 { | |
1608 iso->esc = (enum iso_esc_flag) (c - 0x28 + ISO_ESC_2_4_8); | |
1609 goto not_done; | |
1610 } | |
1611 if (0x40 <= c && c <= 0x42) | |
1612 { | |
1613 cs = charset_by_attributes_or_create_one (CHARSET_TYPE_94X94, c, | |
1614 *flags & ISO_STATE_R2L ? | |
1615 CHARSET_RIGHT_TO_LEFT : | |
1616 CHARSET_LEFT_TO_RIGHT); | |
1617 reg = 0; | |
1618 goto designated; | |
1619 } | |
1620 goto error; | |
1621 | |
1622 default: | |
1623 { | |
1624 int type = -1; | |
1625 | |
1626 if (iso->esc >= ISO_ESC_2_8 && | |
1627 iso->esc <= ISO_ESC_2_15) | |
1628 { | |
1629 type = ((iso->esc >= ISO_ESC_2_12) ? | |
1630 CHARSET_TYPE_96 : CHARSET_TYPE_94); | |
1631 reg = (iso->esc - ISO_ESC_2_8) & 3; | |
1632 } | |
1633 else if (iso->esc >= ISO_ESC_2_4_8 && | |
1634 iso->esc <= ISO_ESC_2_4_15) | |
1635 { | |
1636 type = ((iso->esc >= ISO_ESC_2_4_12) ? | |
1637 CHARSET_TYPE_96X96 : CHARSET_TYPE_94X94); | |
1638 reg = (iso->esc - ISO_ESC_2_4_8) & 3; | |
1639 } | |
1640 else | |
1641 { | |
1642 /* Can this ever be reached? -slb */ | |
2500 | 1643 ABORT (); |
771 | 1644 goto error; |
1645 } | |
1646 | |
1647 if (c < '0' || c > '~' || | |
1648 (c > 0x5F && (type == CHARSET_TYPE_94X94 || | |
1649 type == CHARSET_TYPE_96X96))) | |
1650 goto error; /* bad final byte */ | |
1651 | |
1652 cs = charset_by_attributes_or_create_one (type, c, | |
1653 *flags & ISO_STATE_R2L ? | |
1654 CHARSET_RIGHT_TO_LEFT : | |
1655 CHARSET_LEFT_TO_RIGHT); | |
1656 goto designated; | |
1657 } | |
1658 } | |
1659 | |
1660 not_done: | |
1661 iso->esc_bytes[iso->esc_bytes_index++] = (unsigned char) c; | |
1662 return -1; | |
1663 | |
1664 single_shift: | |
1665 if (check_invalid_charsets && !CHARSETP (iso->charset[reg])) | |
1666 /* can't invoke something that ain't there. */ | |
1667 goto error; | |
1668 iso->esc = ISO_ESC_SINGLE_SHIFT; | |
1669 *flags &= ISO_STATE_LOCK; | |
1670 if (reg == 2) | |
1671 *flags |= ISO_STATE_SS2; | |
1672 else | |
1673 *flags |= ISO_STATE_SS3; | |
1674 return 1; | |
1675 | |
1676 locking_shift: | |
1677 if (check_invalid_charsets && | |
1678 !CHARSETP (iso->charset[reg])) | |
1679 /* can't invoke something that ain't there. */ | |
1680 goto error; | |
1681 if (half) | |
1682 iso->register_right = reg; | |
1683 else | |
1684 iso->register_left = reg; | |
1685 *flags &= ISO_STATE_LOCK; | |
1686 iso->esc = ISO_ESC_LOCKING_SHIFT; | |
1687 return 1; | |
1688 | |
1689 designated: | |
1690 if (NILP (cs) && check_invalid_charsets) | |
1691 { | |
2500 | 1692 ABORT (); |
771 | 1693 /* #### This should never happen now that we automatically create |
1694 temporary charsets as necessary. We should probably remove | |
1695 this code. --ben */ | |
1696 iso->invalid_designated[reg] = 1; | |
1697 iso->charset[reg] = Vcharset_ascii; | |
1698 iso->esc = ISO_ESC_DESIGNATE; | |
1699 *flags &= ISO_STATE_LOCK; | |
1700 iso->output_literally = 1; | |
1701 if (iso->switched_dir_and_no_valid_charset_yet) | |
1702 { | |
1703 /* We encountered a switch-direction followed by an | |
1704 invalid designation. Ensure that the switch-direction | |
1705 gets outputted; otherwise it will probably get eaten | |
1706 when the text is written out again. */ | |
1707 iso->switched_dir_and_no_valid_charset_yet = 0; | |
1708 iso->output_direction_sequence = 1; | |
1709 /* And make sure that the switch-dir going the other | |
1710 way gets outputted, as well. */ | |
1711 iso->invalid_switch_dir = 1; | |
1712 } | |
1713 return 1; | |
1714 } | |
1715 /* This function is called with CODESYS equal to nil when | |
1716 doing coding-system detection. */ | |
1717 if (!NILP (codesys)) | |
1718 { | |
1719 charset_conversion_spec_dynarr *dyn = | |
1720 XCODING_SYSTEM_ISO2022_INPUT_CONV (codesys); | |
1721 | |
1722 if (dyn) | |
1723 { | |
1724 int i; | |
1725 | |
1726 for (i = 0; i < Dynarr_length (dyn); i++) | |
1727 { | |
1728 struct charset_conversion_spec *spec = Dynarr_atp (dyn, i); | |
1729 if (EQ (cs, spec->from_charset)) | |
1730 cs = spec->to_charset; | |
1731 } | |
1732 } | |
1733 } | |
1734 | |
1735 iso->charset[reg] = cs; | |
1736 iso->esc = ISO_ESC_DESIGNATE; | |
1737 *flags &= ISO_STATE_LOCK; | |
1738 if (iso->invalid_designated[reg]) | |
1739 { | |
1740 iso->invalid_designated[reg] = 0; | |
1741 iso->output_literally = 1; | |
1742 } | |
1743 if (iso->switched_dir_and_no_valid_charset_yet) | |
1744 iso->switched_dir_and_no_valid_charset_yet = 0; | |
1745 return 1; | |
1746 } | |
1747 | |
1748 /* If FLAGS is a null pointer or specifies right-to-left motion, | |
1749 output a switch-dir-to-left-to-right sequence to DST. | |
1750 Also update FLAGS if it is not a null pointer. | |
1751 If INTERNAL_P is set, we are outputting in internal format and | |
1752 need to handle the CSI differently. */ | |
1753 | |
1754 static void | |
1755 restore_left_to_right_direction (Lisp_Object codesys, | |
1756 unsigned_char_dynarr *dst, | |
1757 unsigned int *flags, | |
1758 int internal_p) | |
1759 { | |
1760 if (!flags || (*flags & ISO_STATE_R2L)) | |
1761 { | |
1762 if (XCODING_SYSTEM_ISO2022_SEVEN (codesys)) | |
1763 { | |
1764 Dynarr_add (dst, ISO_CODE_ESC); | |
1765 Dynarr_add (dst, '['); | |
1766 } | |
1767 else if (internal_p) | |
1768 DECODE_ADD_BINARY_CHAR (ISO_CODE_CSI, dst); | |
1769 else | |
1770 Dynarr_add (dst, ISO_CODE_CSI); | |
1771 Dynarr_add (dst, '0'); | |
1772 Dynarr_add (dst, ']'); | |
1773 if (flags) | |
1774 *flags &= ~ISO_STATE_R2L; | |
1775 } | |
1776 } | |
1777 | |
1778 /* If FLAGS is a null pointer or specifies a direction different from | |
1779 DIRECTION (which should be either CHARSET_RIGHT_TO_LEFT or | |
1780 CHARSET_LEFT_TO_RIGHT), output the appropriate switch-dir escape | |
1781 sequence to DST. Also update FLAGS if it is not a null pointer. | |
1782 If INTERNAL_P is set, we are outputting in internal format and | |
1783 need to handle the CSI differently. */ | |
1784 | |
1785 static void | |
1786 ensure_correct_direction (int direction, Lisp_Object codesys, | |
1787 unsigned_char_dynarr *dst, unsigned int *flags, | |
1788 int internal_p) | |
1789 { | |
1790 if ((!flags || (*flags & ISO_STATE_R2L)) && | |
1791 direction == CHARSET_LEFT_TO_RIGHT) | |
1792 restore_left_to_right_direction (codesys, dst, flags, internal_p); | |
1793 else if (!XCODING_SYSTEM_ISO2022_NO_ISO6429 (codesys) | |
1794 && (!flags || !(*flags & ISO_STATE_R2L)) && | |
1795 direction == CHARSET_RIGHT_TO_LEFT) | |
1796 { | |
1797 if (XCODING_SYSTEM_ISO2022_SEVEN (codesys)) | |
1798 { | |
1799 Dynarr_add (dst, ISO_CODE_ESC); | |
1800 Dynarr_add (dst, '['); | |
1801 } | |
1802 else if (internal_p) | |
1803 DECODE_ADD_BINARY_CHAR (ISO_CODE_CSI, dst); | |
1804 else | |
1805 Dynarr_add (dst, ISO_CODE_CSI); | |
1806 Dynarr_add (dst, '2'); | |
1807 Dynarr_add (dst, ']'); | |
1808 if (flags) | |
1809 *flags |= ISO_STATE_R2L; | |
1810 } | |
1811 } | |
1812 | |
4096 | 1813 /* Note that this name conflicts with a function in unicode.c. */ |
1814 static void | |
1815 decode_unicode_char (int ucs, unsigned_char_dynarr *dst) | |
1816 { | |
1817 Ibyte work[MAX_ICHAR_LEN]; | |
1818 int len; | |
1819 Lisp_Object chr; | |
1820 | |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
1821 chr = Funicode_to_char(make_fixnum(ucs), Qnil); |
4096 | 1822 assert (!NILP(chr)); |
1823 len = set_itext_ichar (work, XCHAR(chr)); | |
1824 Dynarr_add_many (dst, work, len); | |
1825 } | |
1826 | |
1827 #define DECODE_ERROR_OCTET(octet, dst) \ | |
1828 decode_unicode_char ((octet) + UNICODE_ERROR_OCTET_RANGE_START, dst) | |
1829 | |
1830 static inline void | |
1831 indicate_invalid_utf_8 (unsigned char indicated_length, | |
1832 unsigned char counter, | |
1833 int ch, unsigned_char_dynarr *dst) | |
1834 { | |
1835 Binbyte stored = indicated_length - counter; | |
1836 Binbyte mask = "\x00\x00\xC0\xE0\xF0\xF8\xFC"[indicated_length]; | |
1837 | |
1838 while (stored > 0) | |
1839 { | |
1840 DECODE_ERROR_OCTET (((ch >> (6 * (stored - 1))) & 0x3f) | mask, | |
1841 dst); | |
1842 mask = 0x80, stored--; | |
1843 } | |
1844 } | |
1845 | |
771 | 1846 /* Convert ISO2022-format data to internal format. */ |
1847 | |
1848 static Bytecount | |
1849 iso2022_decode (struct coding_stream *str, const UExtbyte *src, | |
1850 unsigned_char_dynarr *dst, Bytecount n) | |
1851 { | |
1852 unsigned int ch = str->ch; | |
1853 #ifdef ENABLE_COMPOSITE_CHARS | |
1854 unsigned_char_dynarr *real_dst = dst; | |
1855 #endif | |
1856 struct iso2022_coding_stream *data = | |
1857 CODING_STREAM_TYPE_DATA (str, iso2022); | |
1858 unsigned int flags = data->flags; | |
1859 Bytecount orign = n; | |
1860 | |
1861 #ifdef ENABLE_COMPOSITE_CHARS | |
1862 if (flags & ISO_STATE_COMPOSITE) | |
1863 dst = data->composite_chars; | |
1864 #endif /* ENABLE_COMPOSITE_CHARS */ | |
1865 | |
1866 while (n--) | |
1867 { | |
1868 UExtbyte c = *src++; | |
1869 if (flags & ISO_STATE_ESCAPE) | |
1870 { /* Within ESC sequence */ | |
1871 int retval = parse_iso2022_esc (str->codesys, data, | |
1872 c, &flags, 1); | |
1873 | |
1874 if (retval) | |
1875 { | |
1876 switch (data->esc) | |
1877 { | |
1878 #ifdef ENABLE_COMPOSITE_CHARS | |
1879 case ISO_ESC_START_COMPOSITE: | |
1880 if (data->composite_chars) | |
1881 Dynarr_reset (data->composite_chars); | |
1882 else | |
1883 data->composite_chars = Dynarr_new (unsigned_char); | |
1884 dst = data->composite_chars; | |
1885 break; | |
1886 case ISO_ESC_END_COMPOSITE: | |
1887 { | |
867 | 1888 Ibyte comstr[MAX_ICHAR_LEN]; |
771 | 1889 Bytecount len; |
4967 | 1890 Ichar emch = lookup_composite_char (Dynarr_begin (dst), |
771 | 1891 Dynarr_length (dst)); |
1892 dst = real_dst; | |
867 | 1893 len = set_itext_ichar (comstr, emch); |
771 | 1894 Dynarr_add_many (dst, comstr, len); |
1895 break; | |
1896 } | |
1897 #else | |
1898 case ISO_ESC_START_COMPOSITE: | |
1899 { | |
867 | 1900 Ibyte comstr[MAX_ICHAR_LEN]; |
771 | 1901 Bytecount len; |
867 | 1902 Ichar emch = make_ichar (Vcharset_composite, c - '0' + ' ', |
771 | 1903 0); |
867 | 1904 len = set_itext_ichar (comstr, emch); |
771 | 1905 Dynarr_add_many (dst, comstr, len); |
1906 break; | |
1907 } | |
1908 #endif /* ENABLE_COMPOSITE_CHARS */ | |
1909 | |
1910 case ISO_ESC_LITERAL: | |
1911 DECODE_ADD_BINARY_CHAR (c, dst); | |
1912 break; | |
1913 | |
1914 default: | |
1915 /* Everything else handled already */ | |
1916 break; | |
1917 } | |
1918 } | |
1919 | |
1920 /* Attempted error recovery. */ | |
1921 if (data->output_direction_sequence) | |
1922 ensure_correct_direction (flags & ISO_STATE_R2L ? | |
1923 CHARSET_RIGHT_TO_LEFT : | |
1924 CHARSET_LEFT_TO_RIGHT, | |
1925 str->codesys, dst, 0, 1); | |
1926 /* More error recovery. */ | |
1927 if (!retval || data->output_literally) | |
1928 { | |
1929 /* Output the (possibly invalid) sequence */ | |
1930 int i; | |
1931 for (i = 0; i < data->esc_bytes_index; i++) | |
1932 DECODE_ADD_BINARY_CHAR (data->esc_bytes[i], dst); | |
1933 flags &= ISO_STATE_LOCK; | |
1934 if (!retval) | |
1935 n++, src--;/* Repeat the loop with the same character. */ | |
1936 else | |
1937 { | |
1938 /* No sense in reprocessing the final byte of the | |
1939 escape sequence; it could mess things up anyway. | |
1940 Just add it now. */ | |
1941 DECODE_ADD_BINARY_CHAR (c, dst); | |
1942 } | |
1943 } | |
1944 ch = 0; | |
1945 } | |
3439 | 1946 else if (flags & ISO_STATE_UTF_8) |
1947 { | |
1948 unsigned char counter = data->counter; | |
4096 | 1949 unsigned char indicated_length = data->indicated_length; |
3439 | 1950 |
1951 if (ISO_CODE_ESC == c) | |
1952 { | |
1953 /* Allow the escape sequence parser to end the UTF-8 state. */ | |
1954 flags |= ISO_STATE_ESCAPE; | |
1955 data->esc = ISO_ESC; | |
1956 data->esc_bytes_index = 1; | |
1957 continue; | |
1958 } | |
1959 | |
4096 | 1960 if (0 == counter) |
1961 { | |
1962 if (0 == (c & 0x80)) | |
1963 { | |
1964 /* ASCII. */ | |
1965 decode_unicode_char (c, dst); | |
1966 } | |
1967 else if (0 == (c & 0x40)) | |
1968 { | |
1969 /* Highest bit set, second highest not--there's | |
1970 something wrong. */ | |
1971 DECODE_ERROR_OCTET (c, dst); | |
1972 } | |
1973 else if (0 == (c & 0x20)) | |
1974 { | |
1975 ch = c & 0x1f; | |
1976 counter = 1; | |
1977 indicated_length = 2; | |
1978 } | |
1979 else if (0 == (c & 0x10)) | |
1980 { | |
1981 ch = c & 0x0f; | |
1982 counter = 2; | |
1983 indicated_length = 3; | |
1984 } | |
1985 else if (0 == (c & 0x08)) | |
1986 { | |
1987 ch = c & 0x0f; | |
1988 counter = 3; | |
1989 indicated_length = 4; | |
1990 } | |
1991 /* We support lengths longer than 4 here, since we want to | |
1992 represent UTF-8 error chars as distinct from the | |
1993 corresponding ISO 8859-1 characters in escape-quoted. | |
1994 | |
1995 However, we can't differentiate UTF-8 error chars as | |
1996 written to disk, and UTF-8 errors in escape-quoted. This | |
1997 is not a big problem; | |
1998 non-Unicode-chars-encoded-as-UTF-8-in-ISO-2022 is not | |
1999 deployed, in practice, so if such a sequence of octets | |
2000 occurs, XEmacs generated it. */ | |
2001 else if (0 == (c & 0x04)) | |
2002 { | |
2003 ch = c & 0x03; | |
2004 counter = 4; | |
2005 indicated_length = 5; | |
2006 } | |
2007 else if (0 == (c & 0x02)) | |
2008 { | |
2009 ch = c & 0x01; | |
2010 counter = 5; | |
2011 indicated_length = 6; | |
2012 } | |
2013 else | |
2014 { | |
2015 /* #xFF is not a valid leading byte in any form of | |
2016 UTF-8. */ | |
2017 DECODE_ERROR_OCTET (c, dst); | |
2018 | |
2019 } | |
2020 } | |
2021 else | |
2022 { | |
2023 /* counter != 0 */ | |
2024 if ((0 == (c & 0x80)) || (0 != (c & 0x40))) | |
2025 { | |
2026 indicate_invalid_utf_8(indicated_length, | |
2027 counter, | |
2028 ch, dst); | |
2029 if (c & 0x80) | |
2030 { | |
2031 DECODE_ERROR_OCTET (c, dst); | |
2032 } | |
2033 else | |
2034 { | |
2035 /* The character just read is ASCII. Treat it as | |
2036 such. */ | |
2037 decode_unicode_char (c, dst); | |
2038 } | |
2039 ch = 0; | |
2040 counter = 0; | |
2041 } | |
2042 else | |
2043 { | |
2044 ch = (ch << 6) | (c & 0x3f); | |
2045 counter--; | |
2046 | |
2047 /* Just processed the final byte. Emit the character. */ | |
2048 if (!counter) | |
2049 { | |
2050 /* Don't accept over-long sequences, or surrogates. */ | |
2051 if ((ch < 0x80) || | |
2052 ((ch < 0x800) && indicated_length > 2) || | |
2053 ((ch < 0x10000) && indicated_length > 3) || | |
2054 /* We accept values above #x110000 in | |
2055 escape-quoted, though not in UTF-8. */ | |
2056 /* (ch > 0x110000) || */ | |
2057 valid_utf_16_surrogate(ch)) | |
2058 { | |
2059 indicate_invalid_utf_8(indicated_length, | |
2060 counter, | |
2061 ch, dst); | |
2062 } | |
2063 else | |
2064 { | |
2065 decode_unicode_char (ch, dst); | |
2066 } | |
2067 ch = 0; | |
2068 } | |
2069 } | |
2070 } | |
2071 | |
2072 if (str->eof && ch) | |
2073 { | |
2074 DECODE_ERROR_OCTET (ch, dst); | |
2075 ch = 0; | |
2076 } | |
3439 | 2077 |
2078 data->counter = counter; | |
4096 | 2079 data->indicated_length = indicated_length; |
3439 | 2080 } |
826 | 2081 else if (byte_c0_p (c) || byte_c1_p (c)) |
771 | 2082 { /* Control characters */ |
2083 | |
2084 /***** Error-handling *****/ | |
2085 | |
2086 /* If we were in the middle of a character, dump out the | |
2087 partial character. */ | |
2088 DECODE_OUTPUT_PARTIAL_CHAR (ch, dst); | |
2089 | |
2090 /* If we just saw a single-shift character, dump it out. | |
2091 This may dump out the wrong sort of single-shift character, | |
2092 but least it will give an indication that something went | |
2093 wrong. */ | |
2094 if (flags & ISO_STATE_SS2) | |
2095 { | |
2096 DECODE_ADD_BINARY_CHAR (ISO_CODE_SS2, dst); | |
2097 flags &= ~ISO_STATE_SS2; | |
2098 } | |
2099 if (flags & ISO_STATE_SS3) | |
2100 { | |
2101 DECODE_ADD_BINARY_CHAR (ISO_CODE_SS3, dst); | |
2102 flags &= ~ISO_STATE_SS3; | |
2103 } | |
2104 | |
2105 /***** Now handle the control characters. *****/ | |
2106 | |
2107 flags &= ISO_STATE_LOCK; | |
2108 | |
2109 if (!parse_iso2022_esc (str->codesys, data, c, &flags, 1)) | |
2110 DECODE_ADD_BINARY_CHAR (c, dst); | |
2111 } | |
2112 else | |
2113 { /* Graphic characters */ | |
2114 Lisp_Object charset; | |
2115 int lb; | |
2116 int reg; | |
2117 | |
2118 /* Now determine the charset. */ | |
2119 reg = ((flags & ISO_STATE_SS2) ? 2 | |
2120 : (flags & ISO_STATE_SS3) ? 3 | |
826 | 2121 : !byte_ascii_p (c) ? data->register_right |
771 | 2122 : data->register_left); |
2123 charset = data->charset[reg]; | |
2124 | |
2125 /* Error checking: */ | |
2126 if (! CHARSETP (charset) | |
2127 || data->invalid_designated[reg] | |
2128 || (((c & 0x7F) == ' ' || (c & 0x7F) == ISO_CODE_DEL) | |
2129 && XCHARSET_CHARS (charset) == 94)) | |
2130 /* Mrmph. We are trying to invoke a register that has no | |
2131 or an invalid charset in it, or trying to add a character | |
2132 outside the range of the charset. Insert that char literally | |
2133 to preserve it for the output. */ | |
2134 { | |
2135 DECODE_OUTPUT_PARTIAL_CHAR (ch, dst); | |
2136 DECODE_ADD_BINARY_CHAR (c, dst); | |
2137 } | |
2138 | |
2139 else | |
2140 { | |
2141 /* Things are probably hunky-dorey. */ | |
2142 | |
2143 /* Fetch reverse charset, maybe. */ | |
2144 if (((flags & ISO_STATE_R2L) && | |
2145 XCHARSET_DIRECTION (charset) == CHARSET_LEFT_TO_RIGHT) | |
2146 || | |
2147 (!(flags & ISO_STATE_R2L) && | |
2148 XCHARSET_DIRECTION (charset) == CHARSET_RIGHT_TO_LEFT)) | |
2149 { | |
2150 Lisp_Object new_charset = | |
2151 XCHARSET_REVERSE_DIRECTION_CHARSET (charset); | |
2152 if (!NILP (new_charset)) | |
2153 charset = new_charset; | |
2154 } | |
2155 | |
2156 lb = XCHARSET_LEADING_BYTE (charset); | |
2157 switch (XCHARSET_REP_BYTES (charset)) | |
2158 { | |
2159 case 1: /* ASCII */ | |
2160 DECODE_OUTPUT_PARTIAL_CHAR (ch, dst); | |
2161 Dynarr_add (dst, c & 0x7F); | |
2162 break; | |
2163 | |
2164 case 2: /* one-byte official */ | |
2165 DECODE_OUTPUT_PARTIAL_CHAR (ch, dst); | |
2166 Dynarr_add (dst, lb); | |
2167 Dynarr_add (dst, c | 0x80); | |
2168 break; | |
2169 | |
2170 case 3: /* one-byte private or two-byte official */ | |
2171 if (XCHARSET_PRIVATE_P (charset)) | |
2172 { | |
2173 DECODE_OUTPUT_PARTIAL_CHAR (ch, dst); | |
2174 Dynarr_add (dst, PRE_LEADING_BYTE_PRIVATE_1); | |
2175 Dynarr_add (dst, lb); | |
2176 Dynarr_add (dst, c | 0x80); | |
2177 } | |
2178 else | |
2179 { | |
2180 if (ch) | |
2181 { | |
2182 Dynarr_add (dst, lb); | |
2183 Dynarr_add (dst, ch | 0x80); | |
2184 Dynarr_add (dst, c | 0x80); | |
2185 ch = 0; | |
2186 } | |
2187 else | |
2188 ch = c; | |
2189 } | |
2190 break; | |
2191 | |
2192 default: /* two-byte private */ | |
2193 if (ch) | |
2194 { | |
2195 Dynarr_add (dst, PRE_LEADING_BYTE_PRIVATE_2); | |
2196 Dynarr_add (dst, lb); | |
2197 Dynarr_add (dst, ch | 0x80); | |
2198 Dynarr_add (dst, c | 0x80); | |
2199 ch = 0; | |
2200 } | |
2201 else | |
2202 ch = c; | |
2203 } | |
2204 } | |
2205 | |
2206 if (!ch) | |
2207 flags &= ISO_STATE_LOCK; | |
2208 } | |
2209 | |
2210 } | |
2211 | |
2212 if (str->eof) | |
2213 DECODE_OUTPUT_PARTIAL_CHAR (ch, dst); | |
2214 | |
2215 data->flags = flags; | |
2216 str->ch = ch; | |
2217 return orign; | |
2218 } | |
2219 | |
2220 | |
2221 /***** ISO2022 encoder *****/ | |
2222 | |
2223 /* Designate CHARSET into register REG. */ | |
2224 | |
2225 static void | |
2226 iso2022_designate (Lisp_Object charset, int reg, | |
2227 struct coding_stream *str, unsigned_char_dynarr *dst) | |
2228 { | |
2229 static const char inter94[] = "()*+"; | |
2230 static const char inter96[] = ",-./"; | |
2231 int type; | |
2232 unsigned char final; | |
2233 struct iso2022_coding_stream *data = | |
2234 CODING_STREAM_TYPE_DATA (str, iso2022); | |
2235 Lisp_Object old_charset = data->charset[reg]; | |
2236 | |
2237 data->charset[reg] = charset; | |
2238 if (!CHARSETP (charset)) | |
2239 /* charset might be an initial nil or t. */ | |
2240 return; | |
2241 type = XCHARSET_TYPE (charset); | |
2242 final = XCHARSET_FINAL (charset); | |
2243 if (!data->force_charset_on_output[reg] && | |
2244 CHARSETP (old_charset) && | |
2245 XCHARSET_TYPE (old_charset) == type && | |
2246 XCHARSET_FINAL (old_charset) == final) | |
2247 return; | |
2248 | |
2249 data->force_charset_on_output[reg] = 0; | |
2250 | |
2251 { | |
2252 charset_conversion_spec_dynarr *dyn = | |
2253 XCODING_SYSTEM_ISO2022_OUTPUT_CONV (str->codesys); | |
2254 | |
2255 if (dyn) | |
2256 { | |
2257 int i; | |
2258 | |
2259 for (i = 0; i < Dynarr_length (dyn); i++) | |
2260 { | |
2261 struct charset_conversion_spec *spec = Dynarr_atp (dyn, i); | |
2262 if (EQ (charset, spec->from_charset)) | |
2263 charset = spec->to_charset; | |
2264 } | |
2265 } | |
2266 } | |
2267 | |
2268 Dynarr_add (dst, ISO_CODE_ESC); | |
3439 | 2269 |
771 | 2270 switch (type) |
2271 { | |
2272 case CHARSET_TYPE_94: | |
2273 Dynarr_add (dst, inter94[reg]); | |
2274 break; | |
2275 case CHARSET_TYPE_96: | |
2276 Dynarr_add (dst, inter96[reg]); | |
2277 break; | |
2278 case CHARSET_TYPE_94X94: | |
2279 Dynarr_add (dst, '$'); | |
2280 if (reg != 0 | |
2281 || !(XCODING_SYSTEM_ISO2022_SHORT (str->codesys)) | |
2282 || final < '@' | |
2283 || final > 'B') | |
2284 Dynarr_add (dst, inter94[reg]); | |
2285 break; | |
2286 case CHARSET_TYPE_96X96: | |
2287 Dynarr_add (dst, '$'); | |
2288 Dynarr_add (dst, inter96[reg]); | |
2289 break; | |
2290 } | |
2291 Dynarr_add (dst, final); | |
2292 } | |
2293 | |
2294 static void | |
2295 ensure_normal_shift (struct coding_stream *str, unsigned_char_dynarr *dst) | |
2296 { | |
2297 struct iso2022_coding_stream *data = | |
2298 CODING_STREAM_TYPE_DATA (str, iso2022); | |
2299 | |
2300 if (data->register_left != 0) | |
2301 { | |
2302 Dynarr_add (dst, ISO_CODE_SI); | |
2303 data->register_left = 0; | |
2304 } | |
2305 } | |
2306 | |
2307 static void | |
2308 ensure_shift_out (struct coding_stream *str, unsigned_char_dynarr *dst) | |
2309 { | |
2310 struct iso2022_coding_stream *data = | |
2311 CODING_STREAM_TYPE_DATA (str, iso2022); | |
2312 | |
2313 if (data->register_left != 1) | |
2314 { | |
2315 Dynarr_add (dst, ISO_CODE_SO); | |
2316 data->register_left = 1; | |
2317 } | |
2318 } | |
2319 | |
2320 /* Convert internally-formatted data to ISO2022 format. */ | |
2321 | |
2322 static Bytecount | |
867 | 2323 iso2022_encode (struct coding_stream *str, const Ibyte *src, |
771 | 2324 unsigned_char_dynarr *dst, Bytecount n) |
2325 { | |
2326 unsigned char charmask; | |
867 | 2327 Ibyte c; |
771 | 2328 unsigned char char_boundary; |
2329 unsigned int ch = str->ch; | |
2330 Lisp_Object codesys = str->codesys; | |
2331 int i; | |
2332 Lisp_Object charset; | |
2333 int half; | |
2334 struct iso2022_coding_stream *data = | |
2335 CODING_STREAM_TYPE_DATA (str, iso2022); | |
2336 unsigned int flags = data->flags; | |
2337 Bytecount orign = n; | |
2338 | |
2339 #ifdef ENABLE_COMPOSITE_CHARS | |
2340 /* flags for handling composite chars. We do a little switcheroo | |
2341 on the source while we're outputting the composite char. */ | |
2342 Bytecount saved_n = 0; | |
867 | 2343 const Ibyte *saved_src = NULL; |
771 | 2344 int in_composite = 0; |
2345 #endif /* ENABLE_COMPOSITE_CHARS */ | |
2346 | |
2347 char_boundary = data->current_char_boundary; | |
2348 charset = data->current_charset; | |
2349 half = data->current_half; | |
2350 | |
2351 #ifdef ENABLE_COMPOSITE_CHARS | |
2352 back_to_square_n: | |
2353 #endif | |
2354 while (n--) | |
2355 { | |
2356 c = *src++; | |
2357 | |
826 | 2358 if (byte_ascii_p (c)) |
771 | 2359 { /* Processing ASCII character */ |
2360 ch = 0; | |
2361 | |
3439 | 2362 if (flags & ISO_STATE_UTF_8) |
2363 { | |
2364 Dynarr_add (dst, ISO_CODE_ESC); | |
2365 Dynarr_add (dst, '%'); | |
2366 Dynarr_add (dst, '@'); | |
2367 flags &= ~(ISO_STATE_UTF_8); | |
2368 } | |
2369 | |
771 | 2370 restore_left_to_right_direction (codesys, dst, &flags, 0); |
2371 | |
2372 /* Make sure G0 contains ASCII */ | |
2373 if ((c > ' ' && c < ISO_CODE_DEL) || | |
2374 !XCODING_SYSTEM_ISO2022_NO_ASCII_CNTL (codesys)) | |
2375 { | |
2376 ensure_normal_shift (str, dst); | |
2377 iso2022_designate (Vcharset_ascii, 0, str, dst); | |
2378 } | |
2379 | |
2380 /* If necessary, restore everything to the default state | |
2381 at end-of-line */ | |
2382 if (!(XCODING_SYSTEM_ISO2022_NO_ASCII_EOL (codesys))) | |
2383 { | |
2384 /* NOTE: CRLF encoding happens *BEFORE* other encoding. | |
2385 Thus, even though we're working with internal-format | |
2386 data, there may be CR's or CRLF sequences representing | |
2387 newlines. */ | |
2388 if (c == '\r' || (c == '\n' && !(flags & ISO_STATE_CR))) | |
2389 { | |
2390 restore_left_to_right_direction (codesys, dst, &flags, 0); | |
2391 | |
2392 ensure_normal_shift (str, dst); | |
2393 | |
2394 for (i = 0; i < 4; i++) | |
2395 { | |
2396 Lisp_Object initial_charset = | |
2397 XCODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i); | |
2398 iso2022_designate (initial_charset, i, str, dst); | |
2399 } | |
2400 } | |
2401 if (c == '\r') | |
2402 flags |= ISO_STATE_CR; | |
2403 else | |
2404 flags &= ~ISO_STATE_CR; | |
2405 } | |
2406 | |
2407 if (XCODING_SYSTEM_ISO2022_ESCAPE_QUOTED (codesys) | |
2408 && fit_to_be_escape_quoted (c)) | |
2409 Dynarr_add (dst, ISO_CODE_ESC); | |
2410 Dynarr_add (dst, c); | |
2411 char_boundary = 1; | |
2412 } | |
867 | 2413 else if (ibyte_leading_byte_p (c) || ibyte_leading_byte_p (ch)) |
771 | 2414 { /* Processing Leading Byte */ |
2415 ch = 0; | |
826 | 2416 charset = charset_by_leading_byte (c); |
2417 if (leading_byte_prefix_p (c)) | |
3439 | 2418 { |
2419 ch = c; | |
2420 } | |
2421 else if (XCHARSET_ENCODE_AS_UTF_8 (charset)) | |
2422 { | |
2423 assert (!EQ (charset, Vcharset_control_1) | |
2424 && !EQ (charset, Vcharset_composite)); | |
2425 | |
2426 /* If the character set is to be encoded as UTF-8, the escape | |
2427 is always the same. */ | |
2428 if (!(flags & ISO_STATE_UTF_8)) | |
2429 { | |
2430 Dynarr_add (dst, ISO_CODE_ESC); | |
2431 Dynarr_add (dst, '%'); | |
2432 Dynarr_add (dst, 'G'); | |
2433 flags |= ISO_STATE_UTF_8; | |
2434 } | |
2435 } | |
771 | 2436 else if (!EQ (charset, Vcharset_control_1) |
2437 && !EQ (charset, Vcharset_composite)) | |
2438 { | |
2439 int reg; | |
2440 | |
3439 | 2441 /* End the UTF-8 state. */ |
2442 if (flags & ISO_STATE_UTF_8) | |
2443 { | |
2444 Dynarr_add (dst, ISO_CODE_ESC); | |
2445 Dynarr_add (dst, '%'); | |
2446 Dynarr_add (dst, '@'); | |
2447 flags &= ~(ISO_STATE_UTF_8); | |
2448 } | |
2449 | |
771 | 2450 ensure_correct_direction (XCHARSET_DIRECTION (charset), |
2451 codesys, dst, &flags, 0); | |
2452 | |
2453 /* Now determine which register to use. */ | |
2454 reg = -1; | |
2455 for (i = 0; i < 4; i++) | |
2456 { | |
2457 if (EQ (charset, data->charset[i]) || | |
2458 EQ (charset, | |
2459 XCODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i))) | |
2460 { | |
2461 reg = i; | |
2462 break; | |
2463 } | |
2464 } | |
2465 | |
2466 if (reg == -1) | |
2467 { | |
2468 if (XCHARSET_GRAPHIC (charset) != 0) | |
2469 { | |
2470 if (!NILP (data->charset[1]) && | |
2471 (!XCODING_SYSTEM_ISO2022_SEVEN (codesys) || | |
2472 XCODING_SYSTEM_ISO2022_LOCK_SHIFT (codesys))) | |
2473 reg = 1; | |
2474 else if (!NILP (data->charset[2])) | |
2475 reg = 2; | |
2476 else if (!NILP (data->charset[3])) | |
2477 reg = 3; | |
2478 else | |
2479 reg = 0; | |
2480 } | |
2481 else | |
2482 reg = 0; | |
2483 } | |
2484 | |
2485 iso2022_designate (charset, reg, str, dst); | |
2486 | |
2487 /* Now invoke that register. */ | |
2488 switch (reg) | |
2489 { | |
2490 case 0: | |
2491 ensure_normal_shift (str, dst); | |
2492 half = 0; | |
2493 break; | |
2494 | |
2495 case 1: | |
2496 if (XCODING_SYSTEM_ISO2022_SEVEN (codesys)) | |
2497 { | |
2498 ensure_shift_out (str, dst); | |
2499 half = 0; | |
2500 } | |
2501 else | |
2502 half = 1; | |
2503 break; | |
2504 | |
2505 case 2: | |
2506 if (XCODING_SYSTEM_ISO2022_SEVEN (str->codesys)) | |
2507 { | |
2508 Dynarr_add (dst, ISO_CODE_ESC); | |
2509 Dynarr_add (dst, 'N'); | |
2510 half = 0; | |
2511 } | |
2512 else | |
2513 { | |
2514 Dynarr_add (dst, ISO_CODE_SS2); | |
2515 half = 1; | |
2516 } | |
2517 break; | |
2518 | |
2519 case 3: | |
2520 if (XCODING_SYSTEM_ISO2022_SEVEN (str->codesys)) | |
2521 { | |
2522 Dynarr_add (dst, ISO_CODE_ESC); | |
2523 Dynarr_add (dst, 'O'); | |
2524 half = 0; | |
2525 } | |
2526 else | |
2527 { | |
2528 Dynarr_add (dst, ISO_CODE_SS3); | |
2529 half = 1; | |
2530 } | |
2531 break; | |
2532 | |
2533 default: | |
2500 | 2534 ABORT (); |
771 | 2535 } |
2536 } | |
2537 char_boundary = 0; | |
2538 } | |
2539 else | |
2540 { /* Processing Non-ASCII character */ | |
2541 charmask = (half == 0 ? 0x7F : 0xFF); | |
2542 char_boundary = 1; | |
2543 if (EQ (charset, Vcharset_control_1)) | |
2544 { | |
2545 if (XCODING_SYSTEM_ISO2022_ESCAPE_QUOTED (codesys) | |
5100
3d91f0b64469
fix bad bug with escape-quoted handling
Ben Wing <ben@xemacs.org>
parents:
4976
diff
changeset
|
2546 && fit_to_be_escape_quoted (c - 0x20)) |
771 | 2547 Dynarr_add (dst, ISO_CODE_ESC); |
2548 /* you asked for it ... */ | |
2549 Dynarr_add (dst, c - 0x20); | |
2550 } | |
2551 #ifndef ENABLE_COMPOSITE_CHARS | |
2552 else if (EQ (charset, Vcharset_composite)) | |
2553 { | |
2554 if (c >= 160 || c <= 164) /* Someone might have stuck in | |
2555 something else */ | |
2556 { | |
2557 Dynarr_add (dst, ISO_CODE_ESC); | |
2558 Dynarr_add (dst, c - 160 + '0'); | |
2559 } | |
2560 } | |
2561 #endif | |
2562 else | |
2563 { | |
2564 switch (XCHARSET_REP_BYTES (charset)) | |
2565 { | |
2566 case 2: | |
3439 | 2567 dynarr_add_2022_one_dimension (charset, c, |
2568 charmask, dst); | |
771 | 2569 break; |
2570 case 3: | |
2571 if (XCHARSET_PRIVATE_P (charset)) | |
2572 { | |
3439 | 2573 dynarr_add_2022_one_dimension (charset, c, |
2574 charmask, dst); | |
771 | 2575 ch = 0; |
2576 } | |
2577 else if (ch) | |
2578 { | |
2579 #ifdef ENABLE_COMPOSITE_CHARS | |
2580 if (EQ (charset, Vcharset_composite)) | |
2581 { | |
3439 | 2582 /* #### Hasn't been written to handle composite |
2583 characters yet. */ | |
2584 assert(!XCHARSET_ENCODE_AS_UTF_8 (charset)) | |
771 | 2585 if (in_composite) |
2586 { | |
2587 /* #### Bother! We don't know how to | |
2588 handle this yet. */ | |
2589 Dynarr_add (dst, '~'); | |
2590 } | |
2591 else | |
2592 { | |
867 | 2593 Ichar emch = make_ichar (Vcharset_composite, |
771 | 2594 ch & 0x7F, c & 0x7F); |
2595 Lisp_Object lstr = composite_char_string (emch); | |
2596 saved_n = n; | |
2597 saved_src = src; | |
2598 in_composite = 1; | |
2599 src = XSTRING_DATA (lstr); | |
2600 n = XSTRING_LENGTH (lstr); | |
2601 Dynarr_add (dst, ISO_CODE_ESC); | |
2602 Dynarr_add (dst, '0'); /* start composing */ | |
2603 } | |
2604 } | |
2605 else | |
2606 #endif /* ENABLE_COMPOSITE_CHARS */ | |
2607 { | |
3439 | 2608 dynarr_add_2022_two_dimensions (charset, c, ch, |
2609 charmask, dst); | |
771 | 2610 } |
2611 ch = 0; | |
2612 } | |
2613 else | |
2614 { | |
2615 ch = c; | |
2616 char_boundary = 0; | |
2617 } | |
2618 break; | |
2619 case 4: | |
2620 if (ch) | |
2621 { | |
3439 | 2622 dynarr_add_2022_two_dimensions (charset, c, ch, |
2623 charmask, dst); | |
771 | 2624 ch = 0; |
2625 } | |
2626 else | |
2627 { | |
2628 ch = c; | |
2629 char_boundary = 0; | |
2630 } | |
2631 break; | |
2632 default: | |
2500 | 2633 ABORT (); |
771 | 2634 } |
2635 } | |
2636 } | |
2637 } | |
2638 | |
2639 #ifdef ENABLE_COMPOSITE_CHARS | |
2640 if (in_composite) | |
2641 { | |
2642 n = saved_n; | |
2643 src = saved_src; | |
2644 in_composite = 0; | |
2645 Dynarr_add (dst, ISO_CODE_ESC); | |
2646 Dynarr_add (dst, '1'); /* end composing */ | |
2647 goto back_to_square_n; /* Wheeeeeeeee ..... */ | |
2648 } | |
2649 #endif /* ENABLE_COMPOSITE_CHARS */ | |
2650 | |
2651 if (char_boundary && str->eof) | |
2652 { | |
2653 restore_left_to_right_direction (codesys, dst, &flags, 0); | |
2654 ensure_normal_shift (str, dst); | |
2655 for (i = 0; i < 4; i++) | |
2656 { | |
2657 Lisp_Object initial_charset = | |
2658 XCODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i); | |
2659 iso2022_designate (initial_charset, i, str, dst); | |
2660 } | |
2661 } | |
2662 | |
2663 data->flags = flags; | |
2664 str->ch = ch; | |
2665 data->current_char_boundary = char_boundary; | |
2666 data->current_charset = charset; | |
2667 data->current_half = half; | |
2668 | |
2669 /* Verbum caro factum est! */ | |
2670 return orign; | |
2671 } | |
2672 | |
2673 static Bytecount | |
2674 iso2022_convert (struct coding_stream *str, | |
2675 const UExtbyte *src, | |
2676 unsigned_char_dynarr *dst, Bytecount n) | |
2677 { | |
2678 if (str->direction == CODING_DECODE) | |
2679 return iso2022_decode (str, src, dst, n); | |
2680 else | |
2681 return iso2022_encode (str, src, dst, n); | |
2682 } | |
2683 | |
2684 static void | |
2685 iso2022_mark (Lisp_Object codesys) | |
2686 { | |
2687 int i; | |
2688 | |
2689 for (i = 0; i < 4; i++) | |
2690 mark_object (XCODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i)); | |
2691 if (XCODING_SYSTEM_ISO2022_INPUT_CONV (codesys)) | |
2692 { | |
2693 for (i = 0; | |
2694 i < Dynarr_length (XCODING_SYSTEM_ISO2022_INPUT_CONV (codesys)); | |
2695 i++) | |
2696 { | |
2697 struct charset_conversion_spec *ccs = | |
2698 Dynarr_atp (XCODING_SYSTEM_ISO2022_INPUT_CONV (codesys), i); | |
2699 mark_object (ccs->from_charset); | |
2700 mark_object (ccs->to_charset); | |
2701 } | |
2702 } | |
2703 if (XCODING_SYSTEM_ISO2022_OUTPUT_CONV (codesys)) | |
2704 { | |
2705 for (i = 0; | |
2706 i < Dynarr_length (XCODING_SYSTEM_ISO2022_OUTPUT_CONV (codesys)); | |
2707 i++) | |
2708 { | |
2709 struct charset_conversion_spec *ccs = | |
2710 Dynarr_atp (XCODING_SYSTEM_ISO2022_OUTPUT_CONV (codesys), i); | |
2711 mark_object (ccs->from_charset); | |
2712 mark_object (ccs->to_charset); | |
2713 } | |
2714 } | |
2715 } | |
2716 | |
2717 static void | |
2718 iso2022_finalize (Lisp_Object cs) | |
2719 { | |
2720 if (XCODING_SYSTEM_ISO2022_INPUT_CONV (cs)) | |
2721 { | |
2722 Dynarr_free (XCODING_SYSTEM_ISO2022_INPUT_CONV (cs)); | |
2723 XCODING_SYSTEM_ISO2022_INPUT_CONV (cs) = 0; | |
2724 } | |
2725 if (XCODING_SYSTEM_ISO2022_OUTPUT_CONV (cs)) | |
2726 { | |
2727 Dynarr_free (XCODING_SYSTEM_ISO2022_OUTPUT_CONV (cs)); | |
2728 XCODING_SYSTEM_ISO2022_OUTPUT_CONV (cs) = 0; | |
2729 } | |
2730 } | |
2731 | |
2732 /* Given a list of charset conversion specs as specified in a Lisp | |
2733 program, parse it into STORE_HERE. */ | |
2734 | |
2735 static void | |
2736 parse_charset_conversion_specs (charset_conversion_spec_dynarr *store_here, | |
2737 Lisp_Object spec_list) | |
2738 { | |
2367 | 2739 EXTERNAL_LIST_LOOP_2 (car, spec_list) |
771 | 2740 { |
2741 Lisp_Object from, to; | |
2742 struct charset_conversion_spec spec; | |
2743 | |
2744 if (!CONSP (car) || !CONSP (XCDR (car)) || !NILP (XCDR (XCDR (car)))) | |
2745 invalid_argument ("Invalid charset conversion spec", car); | |
2746 from = Fget_charset (XCAR (car)); | |
2747 to = Fget_charset (XCAR (XCDR (car))); | |
2748 if (XCHARSET_TYPE (from) != XCHARSET_TYPE (to)) | |
2749 invalid_operation_2 | |
2750 ("Attempted conversion between different charset types", | |
2751 from, to); | |
2752 spec.from_charset = from; | |
2753 spec.to_charset = to; | |
2754 | |
2755 Dynarr_add (store_here, spec); | |
2756 } | |
2757 } | |
2758 | |
2759 /* Given a dynarr LOAD_HERE of internally-stored charset conversion | |
2760 specs, return the equivalent as the Lisp programmer would see it. | |
2761 | |
2762 If LOAD_HERE is 0, return Qnil. */ | |
2763 | |
2764 static Lisp_Object | |
2765 unparse_charset_conversion_specs (charset_conversion_spec_dynarr *load_here, | |
2766 int names) | |
2767 { | |
2768 int i; | |
2769 Lisp_Object result; | |
2770 | |
2771 if (!load_here) | |
2772 return Qnil; | |
2773 for (i = 0, result = Qnil; i < Dynarr_length (load_here); i++) | |
2774 { | |
2775 struct charset_conversion_spec *ccs = Dynarr_atp (load_here, i); | |
2776 if (names) | |
2777 result = Fcons (list2 (XCHARSET_NAME (ccs->from_charset), | |
2778 XCHARSET_NAME (ccs->to_charset)), result); | |
2779 else | |
2780 result = Fcons (list2 (ccs->from_charset, ccs->to_charset), result); | |
2781 } | |
2782 | |
2783 return Fnreverse (result); | |
2784 } | |
2785 | |
2786 static int | |
2787 iso2022_putprop (Lisp_Object codesys, | |
2788 Lisp_Object key, | |
2789 Lisp_Object value) | |
2790 { | |
2791 #define FROB_INITIAL_CHARSET(charset_num) \ | |
2792 XCODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, charset_num) = \ | |
2793 ((EQ (value, Qt) || EQ (value, Qnil)) ? value : Fget_charset (value)) | |
2794 | |
2795 if (EQ (key, Qcharset_g0)) FROB_INITIAL_CHARSET (0); | |
2796 else if (EQ (key, Qcharset_g1)) FROB_INITIAL_CHARSET (1); | |
2797 else if (EQ (key, Qcharset_g2)) FROB_INITIAL_CHARSET (2); | |
2798 else if (EQ (key, Qcharset_g3)) FROB_INITIAL_CHARSET (3); | |
2799 | |
2800 #define FROB_FORCE_CHARSET(charset_num) \ | |
2801 XCODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT (codesys, charset_num) = \ | |
2802 !NILP (value) | |
2803 | |
2804 else if (EQ (key, Qforce_g0_on_output)) FROB_FORCE_CHARSET (0); | |
2805 else if (EQ (key, Qforce_g1_on_output)) FROB_FORCE_CHARSET (1); | |
2806 else if (EQ (key, Qforce_g2_on_output)) FROB_FORCE_CHARSET (2); | |
2807 else if (EQ (key, Qforce_g3_on_output)) FROB_FORCE_CHARSET (3); | |
2808 | |
2809 #define FROB_BOOLEAN_PROPERTY(prop) \ | |
2810 XCODING_SYSTEM_ISO2022_##prop (codesys) = !NILP (value) | |
2811 | |
2812 else if (EQ (key, Qshort)) FROB_BOOLEAN_PROPERTY (SHORT); | |
2813 else if (EQ (key, Qno_ascii_eol)) FROB_BOOLEAN_PROPERTY (NO_ASCII_EOL); | |
2814 else if (EQ (key, Qno_ascii_cntl)) FROB_BOOLEAN_PROPERTY (NO_ASCII_CNTL); | |
2815 else if (EQ (key, Qseven)) FROB_BOOLEAN_PROPERTY (SEVEN); | |
2816 else if (EQ (key, Qlock_shift)) FROB_BOOLEAN_PROPERTY (LOCK_SHIFT); | |
2817 else if (EQ (key, Qno_iso6429)) FROB_BOOLEAN_PROPERTY (NO_ISO6429); | |
2818 else if (EQ (key, Qescape_quoted)) FROB_BOOLEAN_PROPERTY (ESCAPE_QUOTED); | |
2819 | |
2820 else if (EQ (key, Qinput_charset_conversion)) | |
2821 { | |
2822 XCODING_SYSTEM_ISO2022_INPUT_CONV (codesys) = | |
2823 Dynarr_new (charset_conversion_spec); | |
2824 parse_charset_conversion_specs | |
2825 (XCODING_SYSTEM_ISO2022_INPUT_CONV (codesys), value); | |
2826 } | |
2827 else if (EQ (key, Qoutput_charset_conversion)) | |
2828 { | |
2829 XCODING_SYSTEM_ISO2022_OUTPUT_CONV (codesys) = | |
2830 Dynarr_new (charset_conversion_spec); | |
2831 parse_charset_conversion_specs | |
2832 (XCODING_SYSTEM_ISO2022_OUTPUT_CONV (codesys), value); | |
2833 } | |
2834 else | |
2835 return 0; | |
2836 | |
2837 return 1; | |
2838 } | |
2839 | |
5169
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5100
diff
changeset
|
2840 #ifdef ENABLE_COMPOSITE_CHARS |
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5100
diff
changeset
|
2841 #define USED_IF_COMPOSITE_CHARS(x) x |
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5100
diff
changeset
|
2842 #else |
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5100
diff
changeset
|
2843 #define USED_IF_COMPOSITE_CHARS(x) UNUSED (x) |
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5100
diff
changeset
|
2844 #endif |
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5100
diff
changeset
|
2845 |
771 | 2846 static void |
5169
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5100
diff
changeset
|
2847 iso2022_finalize_coding_stream (struct coding_stream * |
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5100
diff
changeset
|
2848 USED_IF_COMPOSITE_CHARS (str)) |
771 | 2849 { |
2850 #ifdef ENABLE_COMPOSITE_CHARS | |
2851 struct iso2022_coding_stream *data = | |
2852 CODING_STREAM_TYPE_DATA (str, iso2022); | |
2853 | |
2854 if (data->composite_chars) | |
2855 Dynarr_free (data->composite_chars); | |
2856 #endif | |
2857 } | |
2858 | |
2859 static void | |
2860 iso2022_init (Lisp_Object codesys) | |
2861 { | |
2862 int i; | |
2863 for (i = 0; i < 4; i++) | |
2864 XCODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i) = Qnil; | |
2865 } | |
2866 | |
2867 static Lisp_Object | |
2868 coding_system_charset (Lisp_Object coding_system, int gnum) | |
2869 { | |
2870 Lisp_Object cs | |
2871 = XCODING_SYSTEM_ISO2022_INITIAL_CHARSET (coding_system, gnum); | |
2872 | |
2873 return CHARSETP (cs) ? XCHARSET_NAME (cs) : Qnil; | |
2874 } | |
2875 | |
2876 static Lisp_Object | |
2877 iso2022_getprop (Lisp_Object coding_system, Lisp_Object prop) | |
2878 { | |
2879 if (EQ (prop, Qcharset_g0)) | |
2880 return coding_system_charset (coding_system, 0); | |
2881 else if (EQ (prop, Qcharset_g1)) | |
2882 return coding_system_charset (coding_system, 1); | |
2883 else if (EQ (prop, Qcharset_g2)) | |
2884 return coding_system_charset (coding_system, 2); | |
2885 else if (EQ (prop, Qcharset_g3)) | |
2886 return coding_system_charset (coding_system, 3); | |
2887 | |
2888 #define FORCE_CHARSET(charset_num) \ | |
2889 (XCODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT \ | |
2890 (coding_system, charset_num) ? Qt : Qnil) | |
2891 | |
2892 else if (EQ (prop, Qforce_g0_on_output)) | |
2893 return FORCE_CHARSET (0); | |
2894 else if (EQ (prop, Qforce_g1_on_output)) | |
2895 return FORCE_CHARSET (1); | |
2896 else if (EQ (prop, Qforce_g2_on_output)) | |
2897 return FORCE_CHARSET (2); | |
2898 else if (EQ (prop, Qforce_g3_on_output)) | |
2899 return FORCE_CHARSET (3); | |
2900 | |
2901 #define LISP_BOOLEAN(prop) \ | |
2902 (XCODING_SYSTEM_ISO2022_##prop (coding_system) ? Qt : Qnil) | |
2903 | |
2904 else if (EQ (prop, Qshort)) return LISP_BOOLEAN (SHORT); | |
2905 else if (EQ (prop, Qno_ascii_eol)) return LISP_BOOLEAN (NO_ASCII_EOL); | |
2906 else if (EQ (prop, Qno_ascii_cntl)) return LISP_BOOLEAN (NO_ASCII_CNTL); | |
2907 else if (EQ (prop, Qseven)) return LISP_BOOLEAN (SEVEN); | |
2908 else if (EQ (prop, Qlock_shift)) return LISP_BOOLEAN (LOCK_SHIFT); | |
2909 else if (EQ (prop, Qno_iso6429)) return LISP_BOOLEAN (NO_ISO6429); | |
2910 else if (EQ (prop, Qescape_quoted)) return LISP_BOOLEAN (ESCAPE_QUOTED); | |
2911 | |
2912 else if (EQ (prop, Qinput_charset_conversion)) | |
2913 return | |
2914 unparse_charset_conversion_specs | |
2915 (XCODING_SYSTEM_ISO2022_INPUT_CONV (coding_system), 0); | |
2916 else if (EQ (prop, Qoutput_charset_conversion)) | |
2917 return | |
2918 unparse_charset_conversion_specs | |
2919 (XCODING_SYSTEM_ISO2022_OUTPUT_CONV (coding_system), 0); | |
2920 else | |
2921 return Qunbound; | |
2922 } | |
2923 | |
2924 static void | |
2286 | 2925 iso2022_print (Lisp_Object cs, Lisp_Object printcharfun, |
2926 int UNUSED (escapeflag)) | |
771 | 2927 { |
2928 int i; | |
2929 | |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4745
diff
changeset
|
2930 write_ascstring (printcharfun, "("); |
771 | 2931 for (i = 0; i < 4; i++) |
2932 { | |
2933 Lisp_Object charset = coding_system_charset (cs, i); | |
2934 if (i > 0) | |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4745
diff
changeset
|
2935 write_ascstring (printcharfun, ", "); |
771 | 2936 write_fmt_string (printcharfun, "g%d=", i); |
800 | 2937 print_internal (CHARSETP (charset) ? XCHARSET_NAME (charset) : charset, printcharfun, 0); |
771 | 2938 if (XCODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT (cs, i)) |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4745
diff
changeset
|
2939 write_ascstring (printcharfun, "(force)"); |
771 | 2940 } |
2941 | |
3084 | 2942 #define FROB(prop) \ |
2943 if (!NILP (iso2022_getprop (cs, prop))) \ | |
2944 { \ | |
2945 write_fmt_string_lisp (printcharfun, ", %s", 1, prop); \ | |
771 | 2946 } |
2947 | |
2948 FROB (Qshort); | |
2949 FROB (Qno_ascii_eol); | |
2950 FROB (Qno_ascii_cntl); | |
2951 FROB (Qseven); | |
2952 FROB (Qlock_shift); | |
2953 FROB (Qno_iso6429); | |
2954 FROB (Qescape_quoted); | |
2955 | |
2956 { | |
2957 Lisp_Object val = | |
2958 unparse_charset_conversion_specs | |
2959 (XCODING_SYSTEM_ISO2022_INPUT_CONV (cs), 1); | |
2960 if (!NILP (val)) | |
2961 { | |
800 | 2962 write_fmt_string_lisp (printcharfun, ", input-charset-conversion=%s", 1, val); |
771 | 2963 } |
2964 val = | |
2965 unparse_charset_conversion_specs | |
2966 (XCODING_SYSTEM_ISO2022_OUTPUT_CONV (cs), 1); | |
2967 if (!NILP (val)) | |
2968 { | |
800 | 2969 write_fmt_string_lisp (printcharfun, ", output-charset-conversion=%s", 1, val); |
771 | 2970 } |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4745
diff
changeset
|
2971 write_ascstring (printcharfun, ")"); |
771 | 2972 } |
2973 } | |
2974 | |
2975 | |
2976 /************************************************************************/ | |
2977 /* ISO2022 detector */ | |
2978 /************************************************************************/ | |
2979 | |
2980 DEFINE_DETECTOR (iso2022); | |
2981 /* ISO2022 system using only seven-bit bytes, no locking shift */ | |
2982 DEFINE_DETECTOR_CATEGORY (iso2022, iso_7); | |
2983 /* ISO2022 system using eight-bit bytes, no locking shift, no single shift, | |
2984 using designation to switch charsets */ | |
2985 DEFINE_DETECTOR_CATEGORY (iso2022, iso_8_designate); | |
2986 /* ISO2022 system using eight-bit bytes, no locking shift, no designation | |
2987 sequences, one-dimension characters in the upper half. */ | |
2988 DEFINE_DETECTOR_CATEGORY (iso2022, iso_8_1); | |
2989 /* ISO2022 system using eight-bit bytes, no locking shift, no designation | |
2990 sequences, two-dimension characters in the upper half. */ | |
2991 DEFINE_DETECTOR_CATEGORY (iso2022, iso_8_2); | |
2992 /* ISO2022 system using locking shift */ | |
2993 DEFINE_DETECTOR_CATEGORY (iso2022, iso_lock_shift); | |
2994 | |
2995 struct iso2022_detector | |
2996 { | |
2997 int initted; | |
2998 struct iso2022_coding_stream *iso; | |
2999 unsigned int flags; | |
3000 | |
3001 /* for keeping temporary track of high-byte groups */ | |
3002 int high_byte_count; | |
3003 unsigned int saw_single_shift_just_now:1; | |
3004 | |
3005 /* running state; we set the likelihoods at the end */ | |
3006 unsigned int seen_high_byte:1; | |
3007 unsigned int seen_single_shift:1; | |
3008 unsigned int seen_locking_shift:1; | |
3009 unsigned int seen_designate:1; | |
3010 unsigned int bad_single_byte_sequences; | |
3011 unsigned int bad_multibyte_escape_sequences; | |
3012 unsigned int good_multibyte_escape_sequences; | |
3013 int even_high_byte_groups; | |
985 | 3014 int longest_even_high_byte; |
771 | 3015 int odd_high_byte_groups; |
3016 }; | |
3017 | |
3018 static void | |
3019 iso2022_detect (struct detection_state *st, const UExtbyte *src, | |
3020 Bytecount n) | |
3021 { | |
3022 Bytecount orign = n; | |
3023 struct iso2022_detector *data = DETECTION_STATE_DATA (st, iso2022); | |
3024 | |
3025 /* #### There are serious deficiencies in the recognition mechanism | |
3026 here. This needs to be much smarter if it's going to cut it. | |
3027 The sequence "\xff\x0f" is currently detected as LOCK_SHIFT while | |
3028 it should be detected as Latin-1. | |
3029 All the ISO2022 stuff in this file should be synced up with the | |
3030 code from FSF Emacs-21.0, in which Mule should be more or less stable. | |
3031 Perhaps we should wait till R2L works in FSF Emacs? */ | |
3032 | |
3033 /* We keep track of running state on our own, and set the categories at the | |
3034 end; that way we can reflect the correct state each time we finish, but | |
3035 not get confused by those results the next time around. */ | |
3036 | |
3037 if (!data->initted) | |
3038 { | |
3039 xzero (*data); | |
3040 data->iso = xnew_and_zero (struct iso2022_coding_stream); | |
3041 reset_iso2022_decode (Qnil, data->iso); | |
3042 data->initted = 1; | |
3043 } | |
3044 | |
3045 while (n--) | |
3046 { | |
3047 UExtbyte c = *src++; | |
3048 if (c >= 0x80) | |
3049 data->seen_high_byte = 1; | |
3050 if (c >= 0xA0) | |
3051 data->high_byte_count++; | |
3052 else | |
3053 { | |
3054 if (data->high_byte_count && | |
3055 !data->saw_single_shift_just_now) | |
3056 { | |
3057 if (data->high_byte_count & 1) | |
3058 data->odd_high_byte_groups++; | |
3059 else | |
985 | 3060 { |
3061 data->even_high_byte_groups++; | |
3062 if (data->longest_even_high_byte < data->high_byte_count) | |
3063 data->longest_even_high_byte = data->high_byte_count; | |
3064 } | |
771 | 3065 } |
3066 data->high_byte_count = 0; | |
3067 data->saw_single_shift_just_now = 0; | |
3068 } | |
3069 if (!(data->flags & ISO_STATE_ESCAPE) | |
826 | 3070 && (byte_c0_p (c) || byte_c1_p (c))) |
771 | 3071 { /* control chars */ |
3072 switch (c) | |
3073 { | |
3074 /* Allow and ignore control characters that you might | |
3075 reasonably see in a text file */ | |
3076 case '\r': | |
3077 case '\n': | |
3078 case '\t': | |
3079 case 7: /* bell */ | |
3080 case 8: /* backspace */ | |
3081 case 11: /* vertical tab */ | |
3082 case 12: /* form feed */ | |
3083 case 26: /* MS-DOS C-z junk */ | |
3084 case 31: /* '^_' -- for info */ | |
3085 goto label_continue_loop; | |
3086 | |
3087 default: | |
3088 break; | |
3089 } | |
3090 } | |
3091 | |
826 | 3092 if ((data->flags & ISO_STATE_ESCAPE) || byte_c0_p (c) |
3093 || byte_c1_p (c)) | |
771 | 3094 { |
3095 switch (parse_iso2022_esc (Qnil, data->iso, c, | |
3096 &data->flags, 0)) | |
3097 { | |
3098 case 1: /* done */ | |
3099 if (data->iso->esc_bytes_index > 0) | |
3100 data->good_multibyte_escape_sequences++; | |
3101 switch (data->iso->esc) | |
3102 { | |
3103 case ISO_ESC_DESIGNATE: | |
3104 data->seen_designate = 1; | |
3105 break; | |
3106 case ISO_ESC_LOCKING_SHIFT: | |
3107 data->seen_locking_shift = 1; | |
3108 break; | |
3109 case ISO_ESC_SINGLE_SHIFT: | |
3110 data->saw_single_shift_just_now = 1; | |
3111 data->seen_single_shift = 1; | |
3112 break; | |
3113 default: | |
3114 break; | |
3115 } | |
3116 break; | |
3117 | |
3118 case -1: /* not done */ | |
3119 break; | |
3120 | |
3121 case 0: /* error */ | |
3122 if (data->iso->esc == ISO_ESC_NOTHING) | |
3123 data->bad_single_byte_sequences++; | |
3124 else | |
3125 data->bad_multibyte_escape_sequences++; | |
3126 } | |
3127 } | |
3128 label_continue_loop:; | |
3129 } | |
3130 | |
985 | 3131 if (data->high_byte_count && |
3132 !data->saw_single_shift_just_now) | |
3133 { | |
3134 if (data->high_byte_count & 1) | |
3135 data->odd_high_byte_groups++; | |
3136 else | |
3137 { | |
3138 data->even_high_byte_groups++; | |
3139 if (data->longest_even_high_byte < data->high_byte_count) | |
3140 data->longest_even_high_byte = data->high_byte_count; | |
3141 } | |
3142 } | |
3143 | |
771 | 3144 if (data->bad_multibyte_escape_sequences > 2 || |
3145 (data->bad_multibyte_escape_sequences > 0 && | |
3146 data->good_multibyte_escape_sequences / | |
3147 data->bad_multibyte_escape_sequences < 10)) | |
3148 /* Just making it up ... */ | |
3149 SET_DET_RESULTS (st, iso2022, DET_NEARLY_IMPOSSIBLE); | |
3150 else if (data->bad_single_byte_sequences > 5 || | |
3151 (data->bad_single_byte_sequences > 0 && | |
3152 (data->good_multibyte_escape_sequences + | |
3153 data->even_high_byte_groups + | |
3154 data->odd_high_byte_groups) / | |
3155 data->bad_single_byte_sequences < 10)) | |
3156 SET_DET_RESULTS (st, iso2022, DET_SOMEWHAT_UNLIKELY); | |
3157 else if (data->seen_locking_shift) | |
3158 { | |
3159 SET_DET_RESULTS (st, iso2022, DET_QUITE_IMPROBABLE); | |
3160 DET_RESULT (st, iso_lock_shift) = DET_QUITE_PROBABLE; | |
3161 } | |
3162 else if (!data->seen_high_byte) | |
3163 { | |
3164 SET_DET_RESULTS (st, iso2022, DET_SOMEWHAT_UNLIKELY); | |
3165 if (data->good_multibyte_escape_sequences) | |
3166 DET_RESULT (st, iso_7) = DET_QUITE_PROBABLE; | |
3167 else if (data->seen_single_shift) | |
3168 DET_RESULT (st, iso_7) = DET_SOMEWHAT_LIKELY; | |
3169 else | |
3170 { | |
3171 /* If we've just seen pure 7-bit data, no escape sequences, | |
3172 then we can't give much likelihood; but if we've seen enough | |
3173 of this data, we can assume some unlikelihood of any 8-bit | |
3174 encoding */ | |
3175 if (orign + st->bytes_seen >= 1000) | |
3176 DET_RESULT (st, iso_7) = DET_AS_LIKELY_AS_UNLIKELY; | |
3177 else | |
3178 SET_DET_RESULTS (st, iso2022, DET_AS_LIKELY_AS_UNLIKELY); | |
3179 } | |
3180 } | |
3181 else if (data->seen_designate) | |
3182 { | |
3183 SET_DET_RESULTS (st, iso2022, DET_QUITE_IMPROBABLE); | |
3184 if (data->seen_single_shift) | |
3185 /* #### Does this really make sense? */ | |
3186 DET_RESULT (st, iso_8_designate) = DET_SOMEWHAT_UNLIKELY; | |
3187 else | |
3188 DET_RESULT (st, iso_8_designate) = DET_QUITE_PROBABLE; | |
3189 } | |
3190 else if (data->odd_high_byte_groups > 0 && | |
3191 data->even_high_byte_groups == 0) | |
3192 { | |
3193 SET_DET_RESULTS (st, iso2022, DET_SOMEWHAT_UNLIKELY); | |
3194 if (data->seen_single_shift) | |
3195 DET_RESULT (st, iso_8_1) = DET_QUITE_PROBABLE; | |
3196 else | |
3197 DET_RESULT (st, iso_8_1) = DET_SOMEWHAT_LIKELY; | |
3198 } | |
3199 else if (data->odd_high_byte_groups == 0 && | |
3200 data->even_high_byte_groups > 0) | |
3201 { | |
985 | 3202 #if 0 |
771 | 3203 SET_DET_RESULTS (st, iso2022, DET_SOMEWHAT_UNLIKELY); |
3204 if (data->even_high_byte_groups > 10) | |
3205 { | |
3206 if (data->seen_single_shift) | |
3207 DET_RESULT (st, iso_8_2) = DET_QUITE_PROBABLE; | |
3208 else | |
3209 DET_RESULT (st, iso_8_2) = DET_SOMEWHAT_LIKELY; | |
3210 if (data->even_high_byte_groups < 50) | |
3211 DET_RESULT (st, iso_8_1) = DET_SOMEWHAT_UNLIKELY; | |
3212 /* else it stays at quite improbable */ | |
3213 } | |
985 | 3214 #else |
3215 SET_DET_RESULTS (st, iso2022, DET_SOMEWHAT_UNLIKELY); | |
3216 if (data->seen_single_shift) | |
3217 DET_RESULT (st, iso_8_2) = DET_QUITE_PROBABLE; | |
3218 else if (data->even_high_byte_groups > 10) | |
3219 DET_RESULT (st, iso_8_2) = DET_SOMEWHAT_LIKELY; | |
3220 else if (data->longest_even_high_byte > 6) | |
3221 DET_RESULT (st, iso_8_2) = DET_SLIGHTLY_LIKELY; | |
3222 #endif | |
771 | 3223 } |
3224 else if (data->odd_high_byte_groups > 0 && | |
3225 data->even_high_byte_groups > 0) | |
3393 | 3226 { |
3227 /* Well, this could be a Latin-1 text, with most high-byte | |
3228 characters single, but sometimes two are together, though | |
3229 this happens not as often. This is common for Western | |
3230 European languages like German, French, Danish, Swedish, etc. | |
3231 Then we would either have a rather small file and | |
3232 even_high_byte_groups would be low. | |
3233 Or we would have a larger file and the ratio of odd to even | |
3234 groups would be very high. */ | |
3235 SET_DET_RESULTS (st, iso2022, DET_SOMEWHAT_UNLIKELY); | |
3236 if (data->even_high_byte_groups <= 3 || | |
3237 data->odd_high_byte_groups >= 10 * data->even_high_byte_groups) | |
3238 DET_RESULT (st, iso_8_1) = DET_SOMEWHAT_LIKELY; | |
3239 } | |
771 | 3240 else |
3241 SET_DET_RESULTS (st, iso2022, DET_AS_LIKELY_AS_UNLIKELY); | |
3242 } | |
3243 | |
3244 static void | |
3245 iso2022_finalize_detection_state (struct detection_state *st) | |
3246 { | |
3247 struct iso2022_detector *data = DETECTION_STATE_DATA (st, iso2022); | |
3248 if (data->iso) | |
5169
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5100
diff
changeset
|
3249 { |
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5100
diff
changeset
|
3250 xfree (data->iso); |
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5100
diff
changeset
|
3251 data->iso = 0; |
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5100
diff
changeset
|
3252 } |
771 | 3253 } |
3254 | |
3255 | |
3256 /************************************************************************/ | |
3257 /* CCL methods */ | |
3258 /************************************************************************/ | |
3259 | |
3260 /* Converter written in CCL. */ | |
3261 | |
3262 struct ccl_coding_system | |
3263 { | |
3264 /* For a CCL coding system, these specify the CCL programs used for | |
3265 decoding (input) and encoding (output). */ | |
3266 Lisp_Object decode; | |
3267 Lisp_Object encode; | |
3268 }; | |
3269 | |
3270 #define CODING_SYSTEM_CCL_DECODE(codesys) \ | |
3271 (CODING_SYSTEM_TYPE_DATA (codesys, ccl)->decode) | |
3272 #define CODING_SYSTEM_CCL_ENCODE(codesys) \ | |
3273 (CODING_SYSTEM_TYPE_DATA (codesys, ccl)->encode) | |
3274 #define XCODING_SYSTEM_CCL_DECODE(codesys) \ | |
3275 CODING_SYSTEM_CCL_DECODE (XCODING_SYSTEM (codesys)) | |
3276 #define XCODING_SYSTEM_CCL_ENCODE(codesys) \ | |
3277 CODING_SYSTEM_CCL_ENCODE (XCODING_SYSTEM (codesys)) | |
3278 | |
3279 struct ccl_coding_stream | |
3280 { | |
3281 /* state of the running CCL program */ | |
3282 struct ccl_program ccl; | |
3283 }; | |
3284 | |
1204 | 3285 static const struct memory_description ccl_coding_system_description[] = { |
3286 { XD_LISP_OBJECT, offsetof (struct ccl_coding_system, decode) }, | |
3287 { XD_LISP_OBJECT, offsetof (struct ccl_coding_system, encode) }, | |
771 | 3288 { XD_END } |
3289 }; | |
3290 | |
1204 | 3291 DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (ccl); |
3292 | |
771 | 3293 static void |
3294 ccl_mark (Lisp_Object codesys) | |
3295 { | |
3296 mark_object (XCODING_SYSTEM_CCL_DECODE (codesys)); | |
3297 mark_object (XCODING_SYSTEM_CCL_ENCODE (codesys)); | |
3298 } | |
3299 | |
3300 static Bytecount | |
3301 ccl_convert (struct coding_stream *str, const UExtbyte *src, | |
3302 unsigned_char_dynarr *dst, Bytecount n) | |
3303 { | |
3304 struct ccl_coding_stream *data = | |
3305 CODING_STREAM_TYPE_DATA (str, ccl); | |
3306 Bytecount orign = n; | |
3307 | |
3308 data->ccl.last_block = str->eof; | |
3309 /* When applying a CCL program to a stream, SRC must not be NULL -- this | |
3310 is a special signal to the driver that read and write operations are | |
3311 not allowed. The code does not actually look at what SRC points to if | |
3312 N == 0. | |
3313 */ | |
3314 ccl_driver (&data->ccl, src ? src : (const unsigned char *) "", | |
3315 dst, n, 0, | |
3316 str->direction == CODING_DECODE ? CCL_MODE_DECODING : | |
3317 CCL_MODE_ENCODING); | |
3318 return orign; | |
3319 } | |
3320 | |
3321 static void | |
3322 ccl_init_coding_stream (struct coding_stream *str) | |
3323 { | |
3324 struct ccl_coding_stream *data = | |
3325 CODING_STREAM_TYPE_DATA (str, ccl); | |
3326 | |
3327 setup_ccl_program (&data->ccl, | |
3328 str->direction == CODING_DECODE ? | |
3329 XCODING_SYSTEM_CCL_DECODE (str->codesys) : | |
3330 XCODING_SYSTEM_CCL_ENCODE (str->codesys)); | |
3331 } | |
3332 | |
3333 static void | |
3334 ccl_rewind_coding_stream (struct coding_stream *str) | |
3335 { | |
3336 ccl_init_coding_stream (str); | |
3337 } | |
3338 | |
3339 static void | |
3340 ccl_init (Lisp_Object codesys) | |
3341 { | |
3342 XCODING_SYSTEM_CCL_DECODE (codesys) = Qnil; | |
3343 XCODING_SYSTEM_CCL_ENCODE (codesys) = Qnil; | |
3344 } | |
3345 | |
3346 static int | |
3347 ccl_putprop (Lisp_Object codesys, Lisp_Object key, Lisp_Object value) | |
3348 { | |
3349 if (EQ (key, Qdecode)) | |
4745
0c54de4c4b9d
Resolve the unregistered-CCL-programs-get-garbage-collected problem correctly
Aidan Kehoe <kehoea@parhasard.net>
parents:
4703
diff
changeset
|
3350 XCODING_SYSTEM_CCL_DECODE (codesys) = get_ccl_program (value); |
771 | 3351 else if (EQ (key, Qencode)) |
4745
0c54de4c4b9d
Resolve the unregistered-CCL-programs-get-garbage-collected problem correctly
Aidan Kehoe <kehoea@parhasard.net>
parents:
4703
diff
changeset
|
3352 XCODING_SYSTEM_CCL_ENCODE (codesys) = get_ccl_program (value); |
771 | 3353 return 1; |
3354 } | |
3355 | |
3356 static Lisp_Object | |
3357 ccl_getprop (Lisp_Object coding_system, Lisp_Object prop) | |
3358 { | |
3359 if (EQ (prop, Qdecode)) | |
3360 return XCODING_SYSTEM_CCL_DECODE (coding_system); | |
3361 else if (EQ (prop, Qencode)) | |
3362 return XCODING_SYSTEM_CCL_ENCODE (coding_system); | |
3363 else | |
3364 return Qunbound; | |
3365 } | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3366 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3367 /************************************************************************/ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3368 /* FIXED_WIDTH methods */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3369 /************************************************************************/ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3370 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3371 struct fixed_width_coding_system |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3372 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3373 /* For a fixed_width coding system, these specify the CCL programs |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3374 used for decoding (input) and encoding (output). */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3375 Lisp_Object decode; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3376 Lisp_Object encode; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3377 Lisp_Object from_unicode; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3378 Lisp_Object invalid_sequences_skip_chars; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3379 Lisp_Object query_skip_chars; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3380 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3381 /* This is not directly accessible from Lisp; it is a concatenation of the |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3382 previous two strings, used for simplicity of implementation. */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3383 Lisp_Object invalid_and_query_skip_chars; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3384 }; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3385 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3386 #define CODING_SYSTEM_FIXED_WIDTH_DECODE(codesys) \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3387 (CODING_SYSTEM_TYPE_DATA (codesys, fixed_width)->decode) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3388 #define CODING_SYSTEM_FIXED_WIDTH_ENCODE(codesys) \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3389 (CODING_SYSTEM_TYPE_DATA (codesys, fixed_width)->encode) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3390 #define CODING_SYSTEM_FIXED_WIDTH_FROM_UNICODE(codesys) \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3391 (CODING_SYSTEM_TYPE_DATA (codesys, fixed_width)->from_unicode) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3392 #define CODING_SYSTEM_FIXED_WIDTH_INVALID_SEQUENCES_SKIP_CHARS(codesys) \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3393 (CODING_SYSTEM_TYPE_DATA (codesys, \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3394 fixed_width)->invalid_sequences_skip_chars) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3395 #define CODING_SYSTEM_FIXED_WIDTH_QUERY_SKIP_CHARS(codesys) \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3396 (CODING_SYSTEM_TYPE_DATA (codesys, fixed_width)->query_skip_chars) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3397 #define CODING_SYSTEM_FIXED_WIDTH_INVALID_AND_QUERY_SKIP_CHARS(codesys) \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3398 (CODING_SYSTEM_TYPE_DATA (codesys, \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3399 fixed_width)->invalid_and_query_skip_chars) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3400 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3401 #define XCODING_SYSTEM_FIXED_WIDTH_DECODE(codesys) \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3402 CODING_SYSTEM_FIXED_WIDTH_DECODE (XCODING_SYSTEM (codesys)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3403 #define XCODING_SYSTEM_FIXED_WIDTH_ENCODE(codesys) \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3404 CODING_SYSTEM_FIXED_WIDTH_ENCODE (XCODING_SYSTEM (codesys)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3405 #define XCODING_SYSTEM_FIXED_WIDTH_FROM_UNICODE(codesys) \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3406 (CODING_SYSTEM_FIXED_WIDTH_FROM_UNICODE (XCODING_SYSTEM (codesys))) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3407 #define XCODING_SYSTEM_FIXED_WIDTH_INVALID_SEQUENCES_SKIP_CHARS(codesys) \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3408 (CODING_SYSTEM_FIXED_WIDTH_INVALID_SEQUENCES_SKIP_CHARS \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3409 (XCODING_SYSTEM (codesys))) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3410 #define XCODING_SYSTEM_FIXED_WIDTH_QUERY_SKIP_CHARS(codesys) \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3411 (CODING_SYSTEM_FIXED_WIDTH_QUERY_SKIP_CHARS (XCODING_SYSTEM (codesys))) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3412 #define XCODING_SYSTEM_FIXED_WIDTH_INVALID_AND_QUERY_SKIP_CHARS(codesys) \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3413 (CODING_SYSTEM_FIXED_WIDTH_INVALID_AND_QUERY_SKIP_CHARS \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3414 (XCODING_SYSTEM(codesys))) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3415 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3416 struct fixed_width_coding_stream |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3417 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3418 /* state of the running CCL program */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3419 struct ccl_program ccl; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3420 }; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3421 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3422 static const struct memory_description |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3423 fixed_width_coding_system_description[] = { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3424 { XD_LISP_OBJECT, offsetof (struct fixed_width_coding_system, decode) }, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3425 { XD_LISP_OBJECT, offsetof (struct fixed_width_coding_system, encode) }, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3426 { XD_LISP_OBJECT, offsetof (struct fixed_width_coding_system, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3427 from_unicode) }, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3428 { XD_LISP_OBJECT, offsetof (struct fixed_width_coding_system, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3429 invalid_sequences_skip_chars) }, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3430 { XD_LISP_OBJECT, offsetof (struct fixed_width_coding_system, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3431 query_skip_chars) }, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3432 { XD_LISP_OBJECT, offsetof (struct fixed_width_coding_system, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3433 invalid_and_query_skip_chars) }, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3434 { XD_END } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3435 }; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3436 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3437 DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (fixed_width); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3438 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3439 static void |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3440 fixed_width_mark (Lisp_Object codesys) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3441 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3442 mark_object (XCODING_SYSTEM_FIXED_WIDTH_DECODE (codesys)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3443 mark_object (XCODING_SYSTEM_FIXED_WIDTH_ENCODE (codesys)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3444 mark_object (XCODING_SYSTEM_FIXED_WIDTH_FROM_UNICODE (codesys)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3445 mark_object |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3446 (XCODING_SYSTEM_FIXED_WIDTH_INVALID_SEQUENCES_SKIP_CHARS (codesys)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3447 mark_object (XCODING_SYSTEM_FIXED_WIDTH_QUERY_SKIP_CHARS (codesys) ); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3448 mark_object |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3449 (XCODING_SYSTEM_FIXED_WIDTH_INVALID_AND_QUERY_SKIP_CHARS(codesys)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3450 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3451 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3452 static Bytecount |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3453 fixed_width_convert (struct coding_stream *str, const UExtbyte *src, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3454 unsigned_char_dynarr *dst, Bytecount n) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3455 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3456 struct fixed_width_coding_stream *data = |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3457 CODING_STREAM_TYPE_DATA (str, fixed_width); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3458 Bytecount orign = n; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3459 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3460 data->ccl.last_block = str->eof; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3461 /* When applying a CCL program to a stream, SRC must not be NULL -- this |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3462 is a special signal to the driver that read and write operations are |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3463 not allowed. The code does not actually look at what SRC points to if |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3464 N == 0. */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3465 ccl_driver (&data->ccl, src ? src : (const unsigned char *) "", |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3466 dst, n, 0, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3467 str->direction == CODING_DECODE ? CCL_MODE_DECODING : |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3468 CCL_MODE_ENCODING); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3469 return orign; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3470 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3471 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3472 static void |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3473 fixed_width_init_coding_stream (struct coding_stream *str) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3474 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3475 struct fixed_width_coding_stream *data = |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3476 CODING_STREAM_TYPE_DATA (str, fixed_width); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3477 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3478 setup_ccl_program (&data->ccl, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3479 str->direction == CODING_DECODE ? |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3480 XCODING_SYSTEM_FIXED_WIDTH_DECODE (str->codesys) : |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3481 XCODING_SYSTEM_FIXED_WIDTH_ENCODE (str->codesys)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3482 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3483 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3484 static void |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3485 fixed_width_rewind_coding_stream (struct coding_stream *str) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3486 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3487 fixed_width_init_coding_stream (str); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3488 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3489 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3490 static void |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3491 fixed_width_init (Lisp_Object codesys) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3492 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3493 XCODING_SYSTEM_FIXED_WIDTH_DECODE (codesys) = Qnil; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3494 XCODING_SYSTEM_FIXED_WIDTH_ENCODE (codesys) = Qnil; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3495 XCODING_SYSTEM_FIXED_WIDTH_FROM_UNICODE (codesys) = Qnil; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3496 XCODING_SYSTEM_FIXED_WIDTH_INVALID_SEQUENCES_SKIP_CHARS (codesys) = Qnil; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3497 XCODING_SYSTEM_FIXED_WIDTH_QUERY_SKIP_CHARS (codesys) = Qnil; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3498 XCODING_SYSTEM_FIXED_WIDTH_INVALID_AND_QUERY_SKIP_CHARS(codesys) = Qnil; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3499 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3500 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3501 static int |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3502 fixed_width_putprop (Lisp_Object codesys, Lisp_Object key, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3503 Lisp_Object value) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3504 { |
4745
0c54de4c4b9d
Resolve the unregistered-CCL-programs-get-garbage-collected problem correctly
Aidan Kehoe <kehoea@parhasard.net>
parents:
4703
diff
changeset
|
3505 if (EQ (key, Qdecode)) |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3506 { |
4745
0c54de4c4b9d
Resolve the unregistered-CCL-programs-get-garbage-collected problem correctly
Aidan Kehoe <kehoea@parhasard.net>
parents:
4703
diff
changeset
|
3507 XCODING_SYSTEM_FIXED_WIDTH_DECODE (codesys) = get_ccl_program (value); |
0c54de4c4b9d
Resolve the unregistered-CCL-programs-get-garbage-collected problem correctly
Aidan Kehoe <kehoea@parhasard.net>
parents:
4703
diff
changeset
|
3508 } |
0c54de4c4b9d
Resolve the unregistered-CCL-programs-get-garbage-collected problem correctly
Aidan Kehoe <kehoea@parhasard.net>
parents:
4703
diff
changeset
|
3509 else if (EQ (key, Qencode)) |
0c54de4c4b9d
Resolve the unregistered-CCL-programs-get-garbage-collected problem correctly
Aidan Kehoe <kehoea@parhasard.net>
parents:
4703
diff
changeset
|
3510 { |
0c54de4c4b9d
Resolve the unregistered-CCL-programs-get-garbage-collected problem correctly
Aidan Kehoe <kehoea@parhasard.net>
parents:
4703
diff
changeset
|
3511 XCODING_SYSTEM_FIXED_WIDTH_ENCODE (codesys) = get_ccl_program (value); |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3512 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3513 else if (EQ (key, Qfrom_unicode)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3514 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3515 CHECK_HASH_TABLE (value); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3516 XCODING_SYSTEM_FIXED_WIDTH_FROM_UNICODE (codesys) = value; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3517 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3518 else if (EQ (key, Qinvalid_sequences_skip_chars)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3519 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3520 CHECK_STRING (value); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3521 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3522 /* Make sure Lisp can't make our data inconsistent: */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3523 value = Fcopy_sequence (value); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3524 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3525 XCODING_SYSTEM_FIXED_WIDTH_INVALID_SEQUENCES_SKIP_CHARS (codesys) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3526 = value; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3527 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3528 XCODING_SYSTEM_FIXED_WIDTH_INVALID_AND_QUERY_SKIP_CHARS (codesys) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3529 = concat2 (value, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3530 XCODING_SYSTEM_FIXED_WIDTH_QUERY_SKIP_CHARS (codesys)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3531 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3532 else if (EQ (key, Qquery_skip_chars)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3533 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3534 CHECK_STRING (value); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3535 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3536 /* Make sure Lisp can't make our data inconsistent: */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3537 value = Fcopy_sequence (value); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3538 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3539 XCODING_SYSTEM_FIXED_WIDTH_QUERY_SKIP_CHARS (codesys) = value; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3540 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3541 XCODING_SYSTEM_FIXED_WIDTH_INVALID_AND_QUERY_SKIP_CHARS (codesys) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3542 = concat2 (value, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3543 XCODING_SYSTEM_FIXED_WIDTH_INVALID_SEQUENCES_SKIP_CHARS |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3544 (codesys)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3545 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3546 else |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3547 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3548 return 0; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3549 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3550 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3551 return 1; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3552 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3553 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3554 static Lisp_Object |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3555 fixed_width_getprop (Lisp_Object codesys, Lisp_Object prop) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3556 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3557 if (EQ (prop, Qdecode)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3558 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3559 return XCODING_SYSTEM_FIXED_WIDTH_DECODE (codesys); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3560 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3561 else if (EQ (prop, Qencode)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3562 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3563 return XCODING_SYSTEM_FIXED_WIDTH_ENCODE (codesys); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3564 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3565 else if (EQ (prop, Qfrom_unicode)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3566 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3567 return XCODING_SYSTEM_FIXED_WIDTH_FROM_UNICODE (codesys); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3568 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3569 else if (EQ (prop, Qinvalid_sequences_skip_chars)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3570 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3571 /* Make sure Lisp can't make our data inconsistent: */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3572 return |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3573 Fcopy_sequence |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3574 (XCODING_SYSTEM_FIXED_WIDTH_INVALID_SEQUENCES_SKIP_CHARS (codesys)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3575 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3576 else if (EQ (prop, Qquery_skip_chars)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3577 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3578 return |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3579 Fcopy_sequence (XCODING_SYSTEM_FIXED_WIDTH_QUERY_SKIP_CHARS (codesys)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3580 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3581 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3582 return Qunbound; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3583 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3584 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3585 static Lisp_Object Vfixed_width_query_ranges_cache; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3586 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3587 static Lisp_Object |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3588 fixed_width_skip_chars_data_given_strings (Lisp_Object string, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3589 Lisp_Object query_skip_chars, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3590 Lisp_Object |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3591 invalid_sequences_skip_chars, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3592 Binbyte *fastmap, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3593 int fastmap_len) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3594 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3595 Lisp_Object result = Fgethash (string, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3596 Vfixed_width_query_ranges_cache, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3597 Qnil); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3598 REGISTER Ibyte *p, *pend; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3599 REGISTER Ichar c; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3600 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3601 memset (fastmap, query_coding_unencodable, fastmap_len); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3602 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3603 if (!NILP (result)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3604 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3605 int i; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3606 Lisp_Object ranged; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3607 assert (RANGE_TABLEP (result)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3608 for (i = 0; i < fastmap_len; ++i) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3609 { |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
3610 ranged = Fget_range_table (make_fixnum (i), result, Qnil); |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3611 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3612 if (EQ (ranged, Qsucceeded)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3613 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3614 fastmap [i] = query_coding_succeeded; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3615 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3616 else if (EQ (ranged, Qinvalid_sequence)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3617 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3618 fastmap [i] = query_coding_invalid_sequence; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3619 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3620 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3621 return result; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3622 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3623 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3624 result = Fmake_range_table (Qstart_closed_end_closed); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3625 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3626 p = XSTRING_DATA (query_skip_chars); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3627 pend = p + XSTRING_LENGTH (query_skip_chars); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3628 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3629 while (p != pend) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3630 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3631 c = itext_ichar (p); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3632 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3633 INC_IBYTEPTR (p); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3634 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3635 if (c == '\\') |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3636 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3637 if (p == pend) break; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3638 c = itext_ichar (p); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3639 INC_IBYTEPTR (p); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3640 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3641 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3642 if (p != pend && *p == '-') |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3643 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3644 Ichar cend; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3645 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3646 /* Skip over the dash. */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3647 p++; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3648 if (p == pend) break; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3649 cend = itext_ichar (p); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3650 |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
3651 Fput_range_table (make_fixnum (c), make_fixnum (cend), Qsucceeded, |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3652 result); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3653 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3654 while (c <= cend && c < fastmap_len) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3655 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3656 fastmap[c] = query_coding_succeeded; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3657 c++; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3658 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3659 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3660 INC_IBYTEPTR (p); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3661 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3662 else |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3663 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3664 if (c < fastmap_len) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3665 fastmap[c] = query_coding_succeeded; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3666 |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
3667 Fput_range_table (make_fixnum (c), make_fixnum (c), Qsucceeded, result); |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3668 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3669 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3670 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3671 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3672 p = XSTRING_DATA (invalid_sequences_skip_chars); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3673 pend = p + XSTRING_LENGTH (invalid_sequences_skip_chars); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3674 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3675 while (p != pend) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3676 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3677 c = itext_ichar (p); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3678 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3679 INC_IBYTEPTR (p); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3680 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3681 if (c == '\\') |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3682 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3683 if (p == pend) break; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3684 c = itext_ichar (p); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3685 INC_IBYTEPTR (p); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3686 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3687 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3688 if (p != pend && *p == '-') |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3689 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3690 Ichar cend; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3691 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3692 /* Skip over the dash. */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3693 p++; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3694 if (p == pend) break; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3695 cend = itext_ichar (p); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3696 |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
3697 Fput_range_table (make_fixnum (c), make_fixnum (cend), Qinvalid_sequence, |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3698 result); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3699 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3700 while (c <= cend && c < fastmap_len) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3701 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3702 fastmap[c] = query_coding_invalid_sequence; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3703 c++; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3704 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3705 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3706 INC_IBYTEPTR (p); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3707 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3708 else |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3709 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3710 if (c < fastmap_len) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3711 fastmap[c] = query_coding_invalid_sequence; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3712 |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
3713 Fput_range_table (make_fixnum (c), make_fixnum (c), Qinvalid_sequence, |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3714 result); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3715 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3716 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3717 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3718 Fputhash (string, result, Vfixed_width_query_ranges_cache); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3719 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3720 return result; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3721 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3722 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3723 static Lisp_Object |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3724 fixed_width_query (Lisp_Object codesys, struct buffer *buf, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3725 Charbpos end, int flags) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3726 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3727 Charbpos pos = BUF_PT (buf), fail_range_start, fail_range_end; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3728 Charbpos pos_byte = BYTE_BUF_PT (buf); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3729 Lisp_Object skip_chars_range_table, from_unicode, checked_unicode, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3730 result = Qnil; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3731 enum query_coding_failure_reasons failed_reason, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3732 previous_failed_reason = query_coding_succeeded; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3733 Binbyte fastmap[0xff]; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3734 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3735 from_unicode = XCODING_SYSTEM_FIXED_WIDTH_FROM_UNICODE (codesys); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3736 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3737 skip_chars_range_table = |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3738 fixed_width_skip_chars_data_given_strings |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3739 ((flags & QUERY_METHOD_IGNORE_INVALID_SEQUENCES ? |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3740 XCODING_SYSTEM_FIXED_WIDTH_INVALID_AND_QUERY_SKIP_CHARS |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3741 (codesys) : |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3742 XCODING_SYSTEM_FIXED_WIDTH_QUERY_SKIP_CHARS(codesys)), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3743 XCODING_SYSTEM_FIXED_WIDTH_QUERY_SKIP_CHARS(codesys), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3744 (flags & QUERY_METHOD_IGNORE_INVALID_SEQUENCES ? |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4745
diff
changeset
|
3745 build_ascstring("") : |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3746 XCODING_SYSTEM_FIXED_WIDTH_INVALID_SEQUENCES_SKIP_CHARS (codesys)), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3747 fastmap, (int)(sizeof (fastmap))); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3748 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3749 if (flags & QUERY_METHOD_HIGHLIGHT && |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3750 /* If we're being called really early, live without highlights getting |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3751 cleared properly: */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3752 !(UNBOUNDP (XSYMBOL (Qquery_coding_clear_highlights)->function))) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3753 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3754 /* It's okay to call Lisp here, the only non-stack object we may have |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3755 allocated up to this point is skip_chars_range_table, and that's |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3756 reachable from its entry in Vfixed_width_query_ranges_cache. */ |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
3757 call3 (Qquery_coding_clear_highlights, make_fixnum (pos), make_fixnum (end), |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3758 wrap_buffer (buf)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3759 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3760 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3761 while (pos < end) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3762 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3763 Ichar ch = BYTE_BUF_FETCH_CHAR (buf, pos_byte); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3764 if ((ch < (int) (sizeof(fastmap))) ? |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3765 (fastmap[ch] == query_coding_succeeded) : |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
3766 (EQ (Qsucceeded, Fget_range_table (make_fixnum (ch), |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3767 skip_chars_range_table, Qnil)))) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3768 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3769 pos++; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3770 INC_BYTEBPOS (buf, pos_byte); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3771 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3772 else |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3773 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3774 fail_range_start = pos; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3775 while ((pos < end) && |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3776 ((!(flags & QUERY_METHOD_IGNORE_INVALID_SEQUENCES) && |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3777 EQ (Qinvalid_sequence, Fget_range_table |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
3778 (make_fixnum (ch), skip_chars_range_table, Qnil)) |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3779 && (failed_reason = query_coding_invalid_sequence)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3780 || ((NILP ((checked_unicode = |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3781 Fgethash (Fchar_to_unicode (make_char (ch)), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3782 from_unicode, Qnil)))) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3783 && (failed_reason = query_coding_unencodable))) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3784 && (previous_failed_reason == query_coding_succeeded |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3785 || previous_failed_reason == failed_reason)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3786 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3787 pos++; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3788 INC_BYTEBPOS (buf, pos_byte); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3789 ch = BYTE_BUF_FETCH_CHAR (buf, pos_byte); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3790 previous_failed_reason = failed_reason; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3791 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3792 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3793 if (fail_range_start == pos) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3794 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3795 /* The character can actually be encoded; move on. */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3796 pos++; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3797 INC_BYTEBPOS (buf, pos_byte); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3798 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3799 else |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3800 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3801 assert (previous_failed_reason == query_coding_invalid_sequence |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3802 || previous_failed_reason == query_coding_unencodable); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3803 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3804 if (flags & QUERY_METHOD_ERRORP) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3805 { |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4745
diff
changeset
|
3806 signal_error_2 |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4745
diff
changeset
|
3807 (Qtext_conversion_error, |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4745
diff
changeset
|
3808 "Cannot encode using coding system", |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4745
diff
changeset
|
3809 make_string_from_buffer (buf, fail_range_start, |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4745
diff
changeset
|
3810 pos - fail_range_start), |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4745
diff
changeset
|
3811 XCODING_SYSTEM_NAME (codesys)); |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3812 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3813 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3814 if (NILP (result)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3815 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3816 result = Fmake_range_table (Qstart_closed_end_open); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3817 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3818 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3819 fail_range_end = pos; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3820 |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
3821 Fput_range_table (make_fixnum (fail_range_start), |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
3822 make_fixnum (fail_range_end), |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3823 (previous_failed_reason |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3824 == query_coding_unencodable ? |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3825 Qunencodable : Qinvalid_sequence), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3826 result); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3827 previous_failed_reason = query_coding_succeeded; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3828 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3829 if (flags & QUERY_METHOD_HIGHLIGHT) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3830 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3831 Lisp_Object extent |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
3832 = Fmake_extent (make_fixnum (fail_range_start), |
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
3833 make_fixnum (fail_range_end), |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3834 wrap_buffer (buf)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3835 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3836 Fset_extent_priority |
5581
56144c8593a8
Mechanically change INT to FIXNUM in our sources.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5565
diff
changeset
|
3837 (extent, make_fixnum (2 + mouse_highlight_priority)); |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3838 Fset_extent_face (extent, Qquery_coding_warning_face); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3839 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3840 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3841 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3842 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3843 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3844 return result; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3845 } |
771 | 3846 |
3847 | |
3848 /************************************************************************/ | |
3849 /* Initialization */ | |
3850 /************************************************************************/ | |
3851 | |
3852 void | |
3853 syms_of_mule_coding (void) | |
3854 { | |
3855 DEFSUBR (Fdecode_shift_jis_char); | |
3856 DEFSUBR (Fencode_shift_jis_char); | |
3857 DEFSUBR (Fdecode_big5_char); | |
3858 DEFSUBR (Fencode_big5_char); | |
3859 | |
3860 DEFSYMBOL (Qbig5); | |
3861 DEFSYMBOL (Qshift_jis); | |
3862 DEFSYMBOL (Qccl); | |
3863 | |
3864 DEFSYMBOL (Qcharset_g0); | |
3865 DEFSYMBOL (Qcharset_g1); | |
3866 DEFSYMBOL (Qcharset_g2); | |
3867 DEFSYMBOL (Qcharset_g3); | |
3868 DEFSYMBOL (Qforce_g0_on_output); | |
3869 DEFSYMBOL (Qforce_g1_on_output); | |
3870 DEFSYMBOL (Qforce_g2_on_output); | |
3871 DEFSYMBOL (Qforce_g3_on_output); | |
3872 DEFSYMBOL (Qno_iso6429); | |
3873 DEFSYMBOL (Qinput_charset_conversion); | |
3874 DEFSYMBOL (Qoutput_charset_conversion); | |
3875 | |
3876 DEFSYMBOL (Qshort); | |
3877 DEFSYMBOL (Qno_ascii_eol); | |
3878 DEFSYMBOL (Qno_ascii_cntl); | |
3879 DEFSYMBOL (Qlock_shift); | |
3880 | |
3881 DEFSYMBOL (Qiso_7); | |
3882 DEFSYMBOL (Qiso_8_designate); | |
3883 DEFSYMBOL (Qiso_8_1); | |
3884 DEFSYMBOL (Qiso_8_2); | |
3885 DEFSYMBOL (Qiso_lock_shift); | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3886 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3887 DEFSYMBOL (Qinvalid_sequences_skip_chars); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3888 DEFSYMBOL (Qquery_skip_chars); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3889 DEFSYMBOL (Qfixed_width); |
771 | 3890 } |
3891 | |
3892 void | |
3893 coding_system_type_create_mule_coding (void) | |
3894 { | |
3895 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (iso2022, "iso2022-coding-system-p"); | |
3896 CODING_SYSTEM_HAS_METHOD (iso2022, mark); | |
3897 CODING_SYSTEM_HAS_METHOD (iso2022, convert); | |
3898 CODING_SYSTEM_HAS_METHOD (iso2022, finalize_coding_stream); | |
3899 CODING_SYSTEM_HAS_METHOD (iso2022, init_coding_stream); | |
3900 CODING_SYSTEM_HAS_METHOD (iso2022, rewind_coding_stream); | |
3901 CODING_SYSTEM_HAS_METHOD (iso2022, init); | |
3902 CODING_SYSTEM_HAS_METHOD (iso2022, print); | |
3903 CODING_SYSTEM_HAS_METHOD (iso2022, finalize); | |
3904 CODING_SYSTEM_HAS_METHOD (iso2022, putprop); | |
3905 CODING_SYSTEM_HAS_METHOD (iso2022, getprop); | |
3906 | |
3907 INITIALIZE_DETECTOR (iso2022); | |
3908 DETECTOR_HAS_METHOD (iso2022, detect); | |
3909 DETECTOR_HAS_METHOD (iso2022, finalize_detection_state); | |
3910 INITIALIZE_DETECTOR_CATEGORY (iso2022, iso_7); | |
3911 INITIALIZE_DETECTOR_CATEGORY (iso2022, iso_8_designate); | |
3912 INITIALIZE_DETECTOR_CATEGORY (iso2022, iso_8_1); | |
3913 INITIALIZE_DETECTOR_CATEGORY (iso2022, iso_8_2); | |
3914 INITIALIZE_DETECTOR_CATEGORY (iso2022, iso_lock_shift); | |
3915 | |
3916 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (ccl, "ccl-coding-system-p"); | |
3917 CODING_SYSTEM_HAS_METHOD (ccl, mark); | |
3918 CODING_SYSTEM_HAS_METHOD (ccl, convert); | |
3919 CODING_SYSTEM_HAS_METHOD (ccl, init); | |
3920 CODING_SYSTEM_HAS_METHOD (ccl, init_coding_stream); | |
3921 CODING_SYSTEM_HAS_METHOD (ccl, rewind_coding_stream); | |
3922 CODING_SYSTEM_HAS_METHOD (ccl, putprop); | |
3923 CODING_SYSTEM_HAS_METHOD (ccl, getprop); | |
3924 | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3925 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (fixed_width, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3926 "fixed-width-coding-system-p"); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3927 CODING_SYSTEM_HAS_METHOD (fixed_width, mark); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3928 CODING_SYSTEM_HAS_METHOD (fixed_width, convert); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3929 CODING_SYSTEM_HAS_METHOD (fixed_width, query); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3930 CODING_SYSTEM_HAS_METHOD (fixed_width, init); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3931 CODING_SYSTEM_HAS_METHOD (fixed_width, init_coding_stream); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3932 CODING_SYSTEM_HAS_METHOD (fixed_width, rewind_coding_stream); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3933 CODING_SYSTEM_HAS_METHOD (fixed_width, putprop); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3934 CODING_SYSTEM_HAS_METHOD (fixed_width, getprop); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3935 |
771 | 3936 INITIALIZE_CODING_SYSTEM_TYPE (shift_jis, "shift-jis-coding-system-p"); |
3937 CODING_SYSTEM_HAS_METHOD (shift_jis, convert); | |
3938 | |
3939 INITIALIZE_DETECTOR (shift_jis); | |
3940 DETECTOR_HAS_METHOD (shift_jis, detect); | |
3941 INITIALIZE_DETECTOR_CATEGORY (shift_jis, shift_jis); | |
3942 | |
3943 INITIALIZE_CODING_SYSTEM_TYPE (big5, "big5-coding-system-p"); | |
3944 CODING_SYSTEM_HAS_METHOD (big5, convert); | |
3945 | |
3946 INITIALIZE_DETECTOR (big5); | |
3947 DETECTOR_HAS_METHOD (big5, detect); | |
3948 INITIALIZE_DETECTOR_CATEGORY (big5, big5); | |
3949 } | |
3950 | |
3951 void | |
3952 reinit_coding_system_type_create_mule_coding (void) | |
3953 { | |
3954 REINITIALIZE_CODING_SYSTEM_TYPE (iso2022); | |
3955 REINITIALIZE_CODING_SYSTEM_TYPE (ccl); | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3956 REINITIALIZE_CODING_SYSTEM_TYPE (fixed_width); |
771 | 3957 REINITIALIZE_CODING_SYSTEM_TYPE (shift_jis); |
3958 REINITIALIZE_CODING_SYSTEM_TYPE (big5); | |
3959 } | |
3960 | |
3961 void | |
3962 reinit_vars_of_mule_coding (void) | |
3963 { | |
3964 } | |
3965 | |
3966 void | |
3967 vars_of_mule_coding (void) | |
3968 { | |
5191
71ee43b8a74d
Add #'equalp as a hash test by default; add #'define-hash-table-test, GNU API
Aidan Kehoe <kehoea@parhasard.net>
parents:
5169
diff
changeset
|
3969 /* This needs to be Qeq, there's a corner case where |
71ee43b8a74d
Add #'equalp as a hash test by default; add #'define-hash-table-test, GNU API
Aidan Kehoe <kehoea@parhasard.net>
parents:
5169
diff
changeset
|
3970 Qequal won't work. */ |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3971 Vfixed_width_query_ranges_cache |
5191
71ee43b8a74d
Add #'equalp as a hash test by default; add #'define-hash-table-test, GNU API
Aidan Kehoe <kehoea@parhasard.net>
parents:
5169
diff
changeset
|
3972 = make_lisp_hash_table (32, HASH_TABLE_KEY_WEAK, Qeq); |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3973 staticpro (&Vfixed_width_query_ranges_cache); |
771 | 3974 } |