Mercurial > hg > xemacs-beta
annotate src/mule-coding.c @ 5321:57a64ab2ae45
Implement some basic Lisp functions in terms of Common Lisp builtins.
2010-12-30 Aidan Kehoe <kehoea@parhasard.net>
* simple.el (assoc-ignore-case): Remove a duplicate definition of
this function (it's already in subr.el).
* iso8859-1.el (char-width):
On non-Mule, make this function equivalent to that produced by
(constantly 1), but preserve its docstring.
* subr.el (subst-char-in-string): Define this in terms of
#'substitute, #'nsubstitute.
(string-width): Define this using #'reduce and #'char-width.
(char-width): Give this a simpler definition, it makes far more
sense to check for mule at load time and redefine, as we do in
iso8859-1.el.
(store-substring): Implement this in terms of #'replace, now
#'replace is cheap.
| author | Aidan Kehoe <kehoea@parhasard.net> |
|---|---|
| date | Thu, 30 Dec 2010 01:00:40 +0000 |
| parents | 71ee43b8a74d |
| children | 308d34e9f07d |
| rev | line source |
|---|---|
| 771 | 1 /* Conversion functions for I18N encodings, but not Unicode (in separate file). |
| 2 Copyright (C) 1991, 1995 Free Software Foundation, Inc. | |
| 3 Copyright (C) 1995 Sun Microsystems, Inc. | |
|
5100
3d91f0b64469
fix bad bug with escape-quoted handling
Ben Wing <ben@xemacs.org>
parents:
4976
diff
changeset
|
4 Copyright (C) 2000, 2001, 2002, 2010 Ben Wing. |
| 771 | 5 |
| 6 This file is part of XEmacs. | |
| 7 | |
| 8 XEmacs is free software; you can redistribute it and/or modify it | |
| 9 under the terms of the GNU General Public License as published by the | |
| 10 Free Software Foundation; either version 2, or (at your option) any | |
| 11 later version. | |
| 12 | |
| 13 XEmacs is distributed in the hope that it will be useful, but WITHOUT | |
| 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
| 15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
| 16 for more details. | |
| 17 | |
| 18 You should have received a copy of the GNU General Public License | |
| 19 along with XEmacs; see the file COPYING. If not, write to | |
| 20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
| 21 Boston, MA 02111-1307, USA. */ | |
| 22 | |
| 23 /* Synched up with: Mule 2.3. Not in FSF. */ | |
| 24 | |
| 25 /* For previous history, see file-coding.c. | |
| 26 | |
| 27 September 10, 2001: Extracted from file-coding.c by Ben Wing. | |
| 28 | |
| 29 Later in September: Finished abstraction of detection system, rewrote | |
| 30 all the detectors to include multiple levels of likelihood. | |
| 31 */ | |
| 32 | |
| 33 #include <config.h> | |
| 34 #include "lisp.h" | |
| 35 | |
| 36 #include "charset.h" | |
| 37 #include "mule-ccl.h" | |
| 38 #include "file-coding.h" | |
|
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
39 #include "elhash.h" |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
40 #include "rangetab.h" |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
41 #include "buffer.h" |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
42 #include "extents.h" |
| 771 | 43 |
| 44 Lisp_Object Qshift_jis, Qiso2022, Qbig5, Qccl; | |
| 45 | |
| 46 Lisp_Object Qcharset_g0, Qcharset_g1, Qcharset_g2, Qcharset_g3; | |
| 47 Lisp_Object Qforce_g0_on_output, Qforce_g1_on_output; | |
| 48 Lisp_Object Qforce_g2_on_output, Qforce_g3_on_output; | |
| 49 Lisp_Object Qno_iso6429; | |
| 50 Lisp_Object Qinput_charset_conversion, Qoutput_charset_conversion; | |
| 51 Lisp_Object Qshort, Qno_ascii_eol, Qno_ascii_cntl, Qseven, Qlock_shift; | |
| 52 | |
| 53 Lisp_Object Qiso_7, Qiso_8_designate, Qiso_8_1, Qiso_8_2, Qiso_lock_shift; | |
| 54 | |
|
4691
3ba90c659d01
Move Qfrom_unicode to general-slots.h, fix the native Win32 build.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4690
diff
changeset
|
55 Lisp_Object Qquery_skip_chars, Qinvalid_sequences_skip_chars; |
|
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
56 Lisp_Object Qfixed_width; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
57 |
| 771 | 58 |
| 59 /************************************************************************/ | |
| 60 /* Shift-JIS methods */ | |
| 61 /************************************************************************/ | |
| 62 | |
| 63 /* Shift-JIS; Hankaku (half-width) KANA is also supported. */ | |
| 64 DEFINE_CODING_SYSTEM_TYPE (shift_jis); | |
| 65 | |
| 66 /* Shift-JIS is a coding system encoding three character sets: ASCII, right | |
| 67 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded | |
| 68 as is. A character of JISX0201-Kana (DIMENSION1_CHARS94 character set) is | |
| 69 encoded by "position-code + 0x80". A character of JISX0208 | |
| 70 (DIMENSION2_CHARS94 character set) is encoded in 2-byte but two | |
| 71 position-codes are divided and shifted so that it fit in the range | |
| 72 below. | |
| 73 | |
| 74 --- CODE RANGE of Shift-JIS --- | |
| 75 (character set) (range) | |
| 76 ASCII 0x00 .. 0x7F | |
| 77 JISX0201-Kana 0xA0 .. 0xDF | |
| 78 JISX0208 (1st byte) 0x80 .. 0x9F and 0xE0 .. 0xEF | |
| 79 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC | |
| 80 ------------------------------- | |
| 81 | |
| 82 */ | |
| 83 | |
| 84 /* Is this the first byte of a Shift-JIS two-byte char? */ | |
| 85 | |
| 826 | 86 inline static int |
| 87 byte_shift_jis_two_byte_1_p (int c) | |
| 88 { | |
| 89 return (c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF); | |
| 90 } | |
| 771 | 91 |
| 92 /* Is this the second byte of a Shift-JIS two-byte char? */ | |
| 93 | |
| 826 | 94 inline static int |
| 95 byte_shift_jis_two_byte_2_p (int c) | |
| 96 { | |
| 97 return (c >= 0x40 && c <= 0x7E) || (c >= 0x80 && c <= 0xFC); | |
| 98 } | |
| 99 | |
| 100 inline static int | |
| 101 byte_shift_jis_katakana_p (int c) | |
| 102 { | |
| 103 return c >= 0xA1 && c <= 0xDF; | |
| 104 } | |
| 771 | 105 |
| 3439 | 106 inline static void |
| 107 dynarr_add_2022_one_dimension (Lisp_Object charset, Ibyte c, | |
| 108 unsigned char charmask, | |
| 109 unsigned_char_dynarr *dst) | |
| 110 { | |
| 111 if (XCHARSET_ENCODE_AS_UTF_8 (charset)) | |
| 112 { | |
| 113 encode_unicode_char (charset, c & charmask, 0, | |
| 4096 | 114 dst, UNICODE_UTF_8, 0, 0); |
| 3439 | 115 } |
| 116 else | |
| 117 { | |
| 118 Dynarr_add (dst, c & charmask); | |
| 119 } | |
| 120 } | |
| 121 | |
| 122 inline static void | |
| 123 dynarr_add_2022_two_dimensions (Lisp_Object charset, Ibyte c, | |
| 124 unsigned int ch, | |
| 125 unsigned char charmask, | |
| 126 unsigned_char_dynarr *dst) | |
| 127 { | |
| 128 if (XCHARSET_ENCODE_AS_UTF_8 (charset)) | |
| 129 { | |
| 130 encode_unicode_char (charset, | |
| 131 ch & charmask, | |
| 132 c & charmask, dst, | |
| 4096 | 133 UNICODE_UTF_8, 0, 0); |
| 3439 | 134 } |
| 135 else | |
| 136 { | |
| 137 Dynarr_add (dst, ch & charmask); | |
| 138 Dynarr_add (dst, c & charmask); | |
| 139 } | |
| 140 } | |
| 141 | |
| 771 | 142 /* Convert Shift-JIS data to internal format. */ |
| 143 | |
| 144 static Bytecount | |
| 145 shift_jis_convert (struct coding_stream *str, const UExtbyte *src, | |
| 146 unsigned_char_dynarr *dst, Bytecount n) | |
| 147 { | |
| 148 unsigned int ch = str->ch; | |
| 149 Bytecount orign = n; | |
| 150 | |
| 151 if (str->direction == CODING_DECODE) | |
| 152 { | |
| 153 while (n--) | |
| 154 { | |
| 155 UExtbyte c = *src++; | |
| 156 | |
| 157 if (ch) | |
| 158 { | |
| 159 /* Previous character was first byte of Shift-JIS Kanji char. */ | |
| 826 | 160 if (byte_shift_jis_two_byte_2_p (c)) |
| 771 | 161 { |
| 867 | 162 Ibyte e1, e2; |
| 771 | 163 |
| 164 Dynarr_add (dst, LEADING_BYTE_JAPANESE_JISX0208); | |
| 165 DECODE_SHIFT_JIS (ch, c, e1, e2); | |
| 166 Dynarr_add (dst, e1); | |
| 167 Dynarr_add (dst, e2); | |
| 168 } | |
| 169 else | |
| 170 { | |
| 171 DECODE_ADD_BINARY_CHAR (ch, dst); | |
| 172 DECODE_ADD_BINARY_CHAR (c, dst); | |
| 173 } | |
| 174 ch = 0; | |
| 175 } | |
| 176 else | |
| 177 { | |
| 826 | 178 if (byte_shift_jis_two_byte_1_p (c)) |
| 771 | 179 ch = c; |
| 826 | 180 else if (byte_shift_jis_katakana_p (c)) |
| 771 | 181 { |
| 182 Dynarr_add (dst, LEADING_BYTE_KATAKANA_JISX0201); | |
| 183 Dynarr_add (dst, c); | |
| 184 } | |
| 185 else | |
| 186 DECODE_ADD_BINARY_CHAR (c, dst); | |
| 187 } | |
| 188 } | |
| 189 | |
| 190 if (str->eof) | |
| 191 DECODE_OUTPUT_PARTIAL_CHAR (ch, dst); | |
| 192 } | |
| 193 else | |
| 194 { | |
| 195 while (n--) | |
| 196 { | |
| 867 | 197 Ibyte c = *src++; |
| 826 | 198 if (byte_ascii_p (c)) |
| 771 | 199 { |
| 200 Dynarr_add (dst, c); | |
| 201 ch = 0; | |
| 202 } | |
| 867 | 203 else if (ibyte_leading_byte_p (c)) |
| 771 | 204 ch = (c == LEADING_BYTE_KATAKANA_JISX0201 || |
| 205 c == LEADING_BYTE_JAPANESE_JISX0208_1978 || | |
| 206 c == LEADING_BYTE_JAPANESE_JISX0208) ? c : 0; | |
| 207 else if (ch) | |
| 208 { | |
| 209 if (ch == LEADING_BYTE_KATAKANA_JISX0201) | |
| 210 { | |
| 211 Dynarr_add (dst, c); | |
| 212 ch = 0; | |
| 213 } | |
| 214 else if (ch == LEADING_BYTE_JAPANESE_JISX0208_1978 || | |
| 215 ch == LEADING_BYTE_JAPANESE_JISX0208) | |
| 216 ch = c; | |
| 217 else | |
| 218 { | |
| 219 UExtbyte j1, j2; | |
| 220 ENCODE_SHIFT_JIS (ch, c, j1, j2); | |
| 221 Dynarr_add (dst, j1); | |
| 222 Dynarr_add (dst, j2); | |
| 223 ch = 0; | |
| 224 } | |
| 225 } | |
| 226 } | |
| 227 } | |
| 228 | |
| 229 str->ch = ch; | |
| 230 | |
| 231 return orign; | |
| 232 } | |
| 233 | |
| 234 DEFUN ("decode-shift-jis-char", Fdecode_shift_jis_char, 1, 1, 0, /* | |
| 235 Decode a JISX0208 character of Shift-JIS coding-system. | |
| 236 CODE is the character code in Shift-JIS as a cons of type bytes. | |
| 237 Return the corresponding character. | |
| 238 */ | |
| 239 (code)) | |
| 240 { | |
| 241 int c1, c2, s1, s2; | |
| 242 | |
| 243 CHECK_CONS (code); | |
| 244 CHECK_INT (XCAR (code)); | |
| 245 CHECK_INT (XCDR (code)); | |
| 246 s1 = XINT (XCAR (code)); | |
| 247 s2 = XINT (XCDR (code)); | |
| 826 | 248 if (byte_shift_jis_two_byte_1_p (s1) && |
| 249 byte_shift_jis_two_byte_2_p (s2)) | |
| 771 | 250 { |
| 251 DECODE_SHIFT_JIS (s1, s2, c1, c2); | |
| 867 | 252 return make_char (make_ichar (Vcharset_japanese_jisx0208, |
| 831 | 253 c1 & 0x7F, c2 & 0x7F)); |
| 771 | 254 } |
| 255 else | |
| 256 return Qnil; | |
| 257 } | |
| 258 | |
| 259 DEFUN ("encode-shift-jis-char", Fencode_shift_jis_char, 1, 1, 0, /* | |
| 260 Encode a JISX0208 character CHARACTER to SHIFT-JIS coding-system. | |
| 261 Return the corresponding character code in SHIFT-JIS as a cons of two bytes. | |
| 262 */ | |
| 263 (character)) | |
| 264 { | |
| 265 Lisp_Object charset; | |
| 266 int c1, c2, s1, s2; | |
| 267 | |
| 268 CHECK_CHAR_COERCE_INT (character); | |
| 867 | 269 BREAKUP_ICHAR (XCHAR (character), charset, c1, c2); |
| 771 | 270 if (EQ (charset, Vcharset_japanese_jisx0208)) |
| 271 { | |
| 272 ENCODE_SHIFT_JIS (c1 | 0x80, c2 | 0x80, s1, s2); | |
| 273 return Fcons (make_int (s1), make_int (s2)); | |
| 274 } | |
| 275 else | |
| 276 return Qnil; | |
| 277 } | |
| 278 | |
| 279 | |
| 280 /************************************************************************/ | |
| 281 /* Shift-JIS detector */ | |
| 282 /************************************************************************/ | |
| 283 | |
| 284 DEFINE_DETECTOR (shift_jis); | |
| 285 DEFINE_DETECTOR_CATEGORY (shift_jis, shift_jis); | |
| 286 | |
| 287 struct shift_jis_detector | |
| 288 { | |
| 289 int seen_jisx0208_char_in_c1; | |
| 290 int seen_jisx0208_char_in_upper; | |
| 291 int seen_jisx0201_char; | |
| 292 unsigned int seen_iso2022_esc:1; | |
| 293 unsigned int seen_bad_first_byte:1; | |
| 294 unsigned int seen_bad_second_byte:1; | |
| 295 /* temporary */ | |
| 296 unsigned int in_second_byte:1; | |
| 297 unsigned int first_byte_was_c1:1; | |
| 298 }; | |
| 299 | |
| 300 static void | |
| 301 shift_jis_detect (struct detection_state *st, const UExtbyte *src, | |
| 302 Bytecount n) | |
| 303 { | |
| 304 struct shift_jis_detector *data = DETECTION_STATE_DATA (st, shift_jis); | |
| 305 | |
| 306 while (n--) | |
| 307 { | |
| 308 UExtbyte c = *src++; | |
| 309 if (!data->in_second_byte) | |
| 310 { | |
| 311 if (c >= 0x80 && c <= 0x9F) | |
| 312 data->first_byte_was_c1 = 1; | |
| 313 if (c >= 0xA0 && c <= 0xDF) | |
| 314 data->seen_jisx0201_char++; | |
| 315 else if ((c >= 0x80 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF)) | |
| 316 data->in_second_byte = 1; | |
| 317 else if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) | |
| 318 data->seen_iso2022_esc = 1; | |
| 319 else if (c >= 0x80) | |
| 320 data->seen_bad_first_byte = 1; | |
| 321 } | |
| 322 else | |
| 323 { | |
| 324 if ((c >= 0x40 && c <= 0x7E) || (c >= 0x80 && c <= 0xFC)) | |
| 325 { | |
| 326 if (data->first_byte_was_c1 || (c >= 0x80 && c <= 0x9F)) | |
| 327 data->seen_jisx0208_char_in_c1++; | |
| 328 else | |
| 329 data->seen_jisx0208_char_in_upper++; | |
| 330 } | |
| 331 else | |
| 332 data->seen_bad_second_byte = 1; | |
| 333 data->in_second_byte = 0; | |
| 334 data->first_byte_was_c1 = 0; | |
| 335 } | |
| 336 } | |
| 337 | |
| 338 if (data->seen_bad_second_byte) | |
| 339 DET_RESULT (st, shift_jis) = DET_NEARLY_IMPOSSIBLE; | |
| 340 else if (data->seen_bad_first_byte) | |
| 341 DET_RESULT (st, shift_jis) = DET_QUITE_IMPROBABLE; | |
| 342 else if (data->seen_iso2022_esc) | |
| 343 DET_RESULT (st, shift_jis) = DET_SOMEWHAT_UNLIKELY; | |
| 344 else if (data->seen_jisx0208_char_in_c1 >= 20 || | |
| 345 (data->seen_jisx0208_char_in_c1 >= 10 && | |
| 346 data->seen_jisx0208_char_in_upper >= 10)) | |
| 347 DET_RESULT (st, shift_jis) = DET_QUITE_PROBABLE; | |
| 348 else if (data->seen_jisx0208_char_in_c1 > 3 || | |
| 349 data->seen_jisx0208_char_in_upper >= 10 || | |
| 350 /* Since the range is limited compared to what is often seen | |
| 351 is typical Latin-X charsets, the fact that we've seen a | |
| 352 bunch of them and none that are invalid is reasonably | |
| 353 strong statistical evidence of this encoding, or at least | |
| 354 not of the common Latin-X ones. */ | |
| 355 data->seen_jisx0201_char >= 100) | |
| 356 DET_RESULT (st, shift_jis) = DET_SOMEWHAT_LIKELY; | |
| 357 else if (data->seen_jisx0208_char_in_c1 > 0 || | |
| 358 data->seen_jisx0208_char_in_upper > 0 || | |
| 359 data->seen_jisx0201_char > 0) | |
| 360 DET_RESULT (st, shift_jis) = DET_SLIGHTLY_LIKELY; | |
| 361 else | |
| 362 DET_RESULT (st, shift_jis) = DET_AS_LIKELY_AS_UNLIKELY; | |
| 363 } | |
| 364 | |
| 365 | |
| 366 /************************************************************************/ | |
| 367 /* Big5 methods */ | |
| 368 /************************************************************************/ | |
| 369 | |
| 2819 | 370 /* BIG5 (used for Mandarin in Taiwan). */ |
| 771 | 371 DEFINE_CODING_SYSTEM_TYPE (big5); |
| 372 | |
| 373 /* BIG5 is a coding system encoding two character sets: ASCII and | |
| 374 Big5. An ASCII character is encoded as is. Big5 is a two-byte | |
| 375 character set and is encoded in two-byte. | |
| 376 | |
| 377 --- CODE RANGE of BIG5 --- | |
| 378 (character set) (range) | |
| 379 ASCII 0x00 .. 0x7F | |
| 380 Big5 (1st byte) 0xA1 .. 0xFE | |
| 381 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE | |
| 382 -------------------------- | |
| 383 | |
| 384 Since the number of characters in Big5 is larger than maximum | |
| 385 characters in Emacs' charset (96x96), it can't be handled as one | |
| 386 charset. So, in XEmacs, Big5 is divided into two: `charset-big5-1' | |
| 387 and `charset-big5-2'. Both <type>s are DIMENSION2_CHARS94. The former | |
| 388 contains frequently used characters and the latter contains less | |
| 389 frequently used characters. */ | |
| 390 | |
| 826 | 391 inline static int |
| 392 byte_big5_two_byte_1_p (int c) | |
| 393 { | |
| 394 return c >= 0xA1 && c <= 0xFE; | |
| 395 } | |
| 771 | 396 |
| 397 /* Is this the second byte of a Shift-JIS two-byte char? */ | |
| 398 | |
| 826 | 399 inline static int |
| 400 byte_big5_two_byte_2_p (int c) | |
| 401 { | |
| 402 return (c >= 0x40 && c <= 0x7E) || (c >= 0xA1 && c <= 0xFE); | |
| 403 } | |
| 771 | 404 |
| 405 /* Number of Big5 characters which have the same code in 1st byte. */ | |
| 406 | |
| 407 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40) | |
| 408 | |
| 409 /* Code conversion macros. These are macros because they are used in | |
| 410 inner loops during code conversion. | |
| 411 | |
| 412 Note that temporary variables in macros introduce the classic | |
| 413 dynamic-scoping problems with variable names. We use capital- | |
| 414 lettered variables in the assumption that XEmacs does not use | |
| 415 capital letters in variables except in a very formalized way | |
| 416 (e.g. Qstring). */ | |
| 417 | |
| 418 /* Convert Big5 code (b1, b2) into its internal string representation | |
| 419 (lb, c1, c2). */ | |
| 420 | |
| 421 /* There is a much simpler way to split the Big5 charset into two. | |
| 422 For the moment I'm going to leave the algorithm as-is because it | |
| 423 claims to separate out the most-used characters into a single | |
| 424 charset, which perhaps will lead to optimizations in various | |
| 425 places. | |
| 426 | |
| 427 The way the algorithm works is something like this: | |
| 428 | |
| 429 Big5 can be viewed as a 94x157 charset, where the row is | |
| 430 encoded into the bytes 0xA1 .. 0xFE and the column is encoded | |
| 431 into the bytes 0x40 .. 0x7E and 0xA1 .. 0xFE. As for frequency, | |
| 432 the split between low and high column numbers is apparently | |
| 433 meaningless; ascending rows produce less and less frequent chars. | |
| 434 Therefore, we assign the lower half of rows (0xA1 .. 0xC8) to | |
| 435 the first charset, and the upper half (0xC9 .. 0xFE) to the | |
| 436 second. To do the conversion, we convert the character into | |
| 437 a single number where 0 .. 156 is the first row, 157 .. 313 | |
| 438 is the second, etc. That way, the characters are ordered by | |
| 439 decreasing frequency. Then we just chop the space in two | |
| 440 and coerce the result into a 94x94 space. | |
| 441 */ | |
| 442 | |
| 443 #define DECODE_BIG5(b1, b2, lb, c1, c2) do \ | |
| 444 { \ | |
| 445 int B1 = b1, B2 = b2; \ | |
| 446 int I \ | |
| 447 = (B1 - 0xA1) * BIG5_SAME_ROW + B2 - (B2 < 0x7F ? 0x40 : 0x62); \ | |
| 448 \ | |
| 449 if (B1 < 0xC9) \ | |
| 450 { \ | |
| 451 lb = LEADING_BYTE_CHINESE_BIG5_1; \ | |
| 452 } \ | |
| 453 else \ | |
| 454 { \ | |
| 455 lb = LEADING_BYTE_CHINESE_BIG5_2; \ | |
| 456 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1); \ | |
| 457 } \ | |
| 458 c1 = I / (0xFF - 0xA1) + 0xA1; \ | |
| 459 c2 = I % (0xFF - 0xA1) + 0xA1; \ | |
| 460 } while (0) | |
| 461 | |
| 462 /* Convert the internal string representation of a Big5 character | |
| 463 (lb, c1, c2) into Big5 code (b1, b2). */ | |
| 464 | |
| 465 #define ENCODE_BIG5(lb, c1, c2, b1, b2) do \ | |
| 466 { \ | |
| 467 int I = ((c1) - 0xA1) * (0xFF - 0xA1) + ((c2) - 0xA1); \ | |
| 468 \ | |
| 469 if (lb == LEADING_BYTE_CHINESE_BIG5_2) \ | |
| 470 { \ | |
| 471 I += BIG5_SAME_ROW * (0xC9 - 0xA1); \ | |
| 472 } \ | |
| 473 b1 = I / BIG5_SAME_ROW + 0xA1; \ | |
| 474 b2 = I % BIG5_SAME_ROW; \ | |
| 475 b2 += b2 < 0x3F ? 0x40 : 0x62; \ | |
| 476 } while (0) | |
| 477 | |
| 478 /* Convert Big5 data to internal format. */ | |
| 479 | |
| 480 static Bytecount | |
| 481 big5_convert (struct coding_stream *str, const UExtbyte *src, | |
| 482 unsigned_char_dynarr *dst, Bytecount n) | |
| 483 { | |
| 484 unsigned int ch = str->ch; | |
| 485 Bytecount orign = n; | |
| 486 | |
| 487 if (str->direction == CODING_DECODE) | |
| 488 { | |
| 489 while (n--) | |
| 490 { | |
| 491 UExtbyte c = *src++; | |
| 492 if (ch) | |
| 493 { | |
| 494 /* Previous character was first byte of Big5 char. */ | |
| 826 | 495 if (byte_big5_two_byte_2_p (c)) |
| 771 | 496 { |
| 867 | 497 Ibyte b1, b2, b3; |
| 771 | 498 DECODE_BIG5 (ch, c, b1, b2, b3); |
| 499 Dynarr_add (dst, b1); | |
| 500 Dynarr_add (dst, b2); | |
| 501 Dynarr_add (dst, b3); | |
| 502 } | |
| 503 else | |
| 504 { | |
| 505 DECODE_ADD_BINARY_CHAR (ch, dst); | |
| 506 DECODE_ADD_BINARY_CHAR (c, dst); | |
| 507 } | |
| 508 ch = 0; | |
| 509 } | |
| 510 else | |
| 511 { | |
| 826 | 512 if (byte_big5_two_byte_1_p (c)) |
| 771 | 513 ch = c; |
| 514 else | |
| 515 DECODE_ADD_BINARY_CHAR (c, dst); | |
| 516 } | |
| 517 } | |
| 518 | |
| 519 if (str->eof) | |
| 520 DECODE_OUTPUT_PARTIAL_CHAR (ch, dst); | |
| 521 } | |
| 522 else | |
| 523 { | |
| 524 while (n--) | |
| 525 { | |
| 867 | 526 Ibyte c = *src++; |
| 826 | 527 if (byte_ascii_p (c)) |
| 771 | 528 { |
| 529 /* ASCII. */ | |
| 530 Dynarr_add (dst, c); | |
| 531 } | |
| 867 | 532 else if (ibyte_leading_byte_p (c)) |
| 771 | 533 { |
| 534 if (c == LEADING_BYTE_CHINESE_BIG5_1 || | |
| 535 c == LEADING_BYTE_CHINESE_BIG5_2) | |
| 536 { | |
| 537 /* A recognized leading byte. */ | |
| 538 ch = c; | |
| 539 continue; /* not done with this character. */ | |
| 540 } | |
| 541 /* otherwise just ignore this character. */ | |
| 542 } | |
| 543 else if (ch == LEADING_BYTE_CHINESE_BIG5_1 || | |
| 544 ch == LEADING_BYTE_CHINESE_BIG5_2) | |
| 545 { | |
| 546 /* Previous char was a recognized leading byte. */ | |
| 547 ch = (ch << 8) | c; | |
| 548 continue; /* not done with this character. */ | |
| 549 } | |
| 550 else if (ch) | |
| 551 { | |
| 552 /* Encountering second byte of a Big5 character. */ | |
| 553 UExtbyte b1, b2; | |
| 554 | |
| 555 ENCODE_BIG5 (ch >> 8, ch & 0xFF, c, b1, b2); | |
| 556 Dynarr_add (dst, b1); | |
| 557 Dynarr_add (dst, b2); | |
| 558 } | |
| 559 | |
| 560 ch = 0; | |
| 561 } | |
| 562 } | |
| 563 | |
| 564 str->ch = ch; | |
| 565 return orign; | |
| 566 } | |
| 567 | |
| 867 | 568 Ichar |
| 771 | 569 decode_big5_char (int b1, int b2) |
| 570 { | |
| 826 | 571 if (byte_big5_two_byte_1_p (b1) && |
| 572 byte_big5_two_byte_2_p (b2)) | |
| 771 | 573 { |
| 574 int leading_byte; | |
| 575 Lisp_Object charset; | |
| 576 int c1, c2; | |
| 577 | |
| 578 DECODE_BIG5 (b1, b2, leading_byte, c1, c2); | |
| 826 | 579 charset = charset_by_leading_byte (leading_byte); |
| 867 | 580 return make_ichar (charset, c1 & 0x7F, c2 & 0x7F); |
| 771 | 581 } |
| 582 else | |
| 583 return -1; | |
| 584 } | |
| 585 | |
| 586 DEFUN ("decode-big5-char", Fdecode_big5_char, 1, 1, 0, /* | |
| 587 Convert Big Five character codes in CODE into a character. | |
| 588 CODE is a cons of two integers specifying the codepoints in Big Five. | |
| 589 Return the corresponding character, or nil if the codepoints are out of range. | |
| 590 | |
| 591 The term `decode' is used because the codepoints can be viewed as the | |
| 592 representation of the character in the external Big Five encoding, and thus | |
| 593 converting them to a character is analogous to any other operation that | |
| 594 decodes an external representation. | |
| 595 */ | |
| 596 (code)) | |
| 597 { | |
| 867 | 598 Ichar ch; |
| 771 | 599 |
| 600 CHECK_CONS (code); | |
| 601 CHECK_INT (XCAR (code)); | |
| 602 CHECK_INT (XCDR (code)); | |
| 603 ch = decode_big5_char (XINT (XCAR (code)), XINT (XCDR (code))); | |
| 604 if (ch == -1) | |
| 605 return Qnil; | |
| 606 else | |
| 607 return make_char (ch); | |
| 608 } | |
| 609 | |
| 610 DEFUN ("encode-big5-char", Fencode_big5_char, 1, 1, 0, /* | |
| 611 Convert the specified Big Five character into its codepoints. | |
| 612 The codepoints are returned as a cons of two integers, specifying the | |
| 613 Big Five codepoints. See `decode-big5-char' for the reason why the | |
| 614 term `encode' is used for this operation. | |
| 615 */ | |
| 616 (character)) | |
| 617 { | |
| 618 Lisp_Object charset; | |
| 619 int c1, c2, b1, b2; | |
| 620 | |
| 621 CHECK_CHAR_COERCE_INT (character); | |
| 867 | 622 BREAKUP_ICHAR (XCHAR (character), charset, c1, c2); |
| 771 | 623 if (EQ (charset, Vcharset_chinese_big5_1) || |
| 624 EQ (charset, Vcharset_chinese_big5_2)) | |
| 625 { | |
| 626 ENCODE_BIG5 (XCHARSET_LEADING_BYTE (charset), c1 | 0x80, c2 | 0x80, | |
| 627 b1, b2); | |
| 628 return Fcons (make_int (b1), make_int (b2)); | |
| 629 } | |
| 630 else | |
| 631 return Qnil; | |
| 632 } | |
| 633 | |
| 634 | |
| 635 /************************************************************************/ | |
| 636 /* Big5 detector */ | |
| 637 /************************************************************************/ | |
| 638 | |
| 639 DEFINE_DETECTOR (big5); | |
| 640 DEFINE_DETECTOR_CATEGORY (big5, big5); | |
| 641 | |
| 642 struct big5_detector | |
| 643 { | |
| 644 int seen_big5_char; | |
| 985 | 645 int seen_euc_char; |
| 771 | 646 unsigned int seen_iso2022_esc:1; |
| 647 unsigned int seen_bad_first_byte:1; | |
| 648 unsigned int seen_bad_second_byte:1; | |
| 649 | |
| 650 /* temporary */ | |
| 651 unsigned int in_second_byte:1; | |
| 652 }; | |
| 653 | |
| 654 static void | |
| 655 big5_detect (struct detection_state *st, const UExtbyte *src, | |
| 656 Bytecount n) | |
| 657 { | |
| 658 struct big5_detector *data = DETECTION_STATE_DATA (st, big5); | |
| 659 | |
| 660 while (n--) | |
| 661 { | |
| 662 UExtbyte c = *src++; | |
| 663 if (!data->in_second_byte) | |
| 664 { | |
| 665 if (c >= 0xA1 && c <= 0xFE) | |
| 666 data->in_second_byte = 1; | |
| 667 else if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) | |
| 668 data->seen_iso2022_esc = 1; | |
| 669 else if (c >= 0x80) | |
| 670 data->seen_bad_first_byte = 1; | |
| 671 } | |
| 672 else | |
| 673 { | |
| 674 data->in_second_byte = 0; | |
| 985 | 675 if (c >= 0xA1 && c <= 0xFE) |
| 676 data->seen_euc_char++; | |
| 677 else if (c >= 0x40 && c <= 0x7E) | |
| 771 | 678 data->seen_big5_char++; |
| 679 else | |
| 680 data->seen_bad_second_byte = 1; | |
| 681 } | |
| 682 } | |
| 683 | |
| 684 if (data->seen_bad_second_byte) | |
| 685 DET_RESULT (st, big5) = DET_NEARLY_IMPOSSIBLE; | |
| 686 else if (data->seen_bad_first_byte) | |
| 687 DET_RESULT (st, big5) = DET_QUITE_IMPROBABLE; | |
| 688 else if (data->seen_iso2022_esc) | |
| 689 DET_RESULT (st, big5) = DET_SOMEWHAT_UNLIKELY; | |
| 690 else if (data->seen_big5_char >= 4) | |
| 691 DET_RESULT (st, big5) = DET_SOMEWHAT_LIKELY; | |
| 985 | 692 else if (data->seen_euc_char) |
| 693 DET_RESULT (st, big5) = DET_SLIGHTLY_LIKELY; | |
| 771 | 694 else |
| 695 DET_RESULT (st, big5) = DET_AS_LIKELY_AS_UNLIKELY; | |
| 696 } | |
| 697 | |
| 698 | |
| 699 /************************************************************************/ | |
| 700 /* ISO2022 methods */ | |
| 701 /************************************************************************/ | |
| 702 | |
| 703 /* Any ISO-2022-compliant coding system. Includes JIS, EUC, CTEXT | |
| 704 (Compound Text, the encoding of selections in X Windows). See below for | |
| 705 a complete description of ISO-2022. */ | |
| 706 | |
| 707 /* Flags indicating what we've seen so far when parsing an | |
| 708 ISO2022 escape sequence. */ | |
| 709 enum iso_esc_flag | |
| 710 { | |
| 711 /* Partial sequences */ | |
| 712 ISO_ESC_NOTHING, /* Nothing has been seen. */ | |
| 713 ISO_ESC, /* We've seen ESC. */ | |
| 714 ISO_ESC_2_4, /* We've seen ESC $. This indicates | |
| 715 that we're designating a multi-byte, rather | |
| 716 than a single-byte, character set. */ | |
| 3439 | 717 ISO_ESC_2_5, /* We've seen ESC %. This indicates an escape to a |
| 718 Unicode coding system; the only one of these | |
| 719 we're prepared to deal with is UTF-8, which has | |
| 720 the next character as G. */ | |
| 771 | 721 ISO_ESC_2_8, /* We've seen ESC 0x28, i.e. ESC (. |
| 722 This means designate a 94-character | |
| 723 character set into G0. */ | |
| 724 ISO_ESC_2_9, /* We've seen ESC 0x29 -- designate a | |
| 725 94-character character set into G1. */ | |
| 726 ISO_ESC_2_10, /* We've seen ESC 0x2A. */ | |
| 727 ISO_ESC_2_11, /* We've seen ESC 0x2B. */ | |
| 728 ISO_ESC_2_12, /* We've seen ESC 0x2C -- designate a | |
| 729 96-character character set into G0. | |
| 730 (This is not ISO2022-standard. | |
| 731 The following 96-character | |
| 732 control sequences are standard, | |
| 733 though.) */ | |
| 734 ISO_ESC_2_13, /* We've seen ESC 0x2D -- designate a | |
| 735 96-character character set into G1. | |
| 736 */ | |
| 737 ISO_ESC_2_14, /* We've seen ESC 0x2E. */ | |
| 738 ISO_ESC_2_15, /* We've seen ESC 0x2F. */ | |
| 739 ISO_ESC_2_4_8, /* We've seen ESC $ 0x28 -- designate | |
| 740 a 94^N character set into G0. */ | |
| 741 ISO_ESC_2_4_9, /* We've seen ESC $ 0x29. */ | |
| 742 ISO_ESC_2_4_10, /* We've seen ESC $ 0x2A. */ | |
| 743 ISO_ESC_2_4_11, /* We've seen ESC $ 0x2B. */ | |
| 744 ISO_ESC_2_4_12, /* We've seen ESC $ 0x2C. */ | |
| 745 ISO_ESC_2_4_13, /* We've seen ESC $ 0x2D. */ | |
| 746 ISO_ESC_2_4_14, /* We've seen ESC $ 0x2E. */ | |
| 747 ISO_ESC_2_4_15, /* We've seen ESC $ 0x2F. */ | |
| 748 ISO_ESC_5_11, /* We've seen ESC [ or 0x9B. This | |
| 749 starts a directionality-control | |
| 750 sequence. The next character | |
| 751 must be 0, 1, 2, or ]. */ | |
| 752 ISO_ESC_5_11_0, /* We've seen 0x9B 0. The next character must be ]. */ | |
| 753 ISO_ESC_5_11_1, /* We've seen 0x9B 1. The next character must be ]. */ | |
| 754 ISO_ESC_5_11_2, /* We've seen 0x9B 2. The next character must be ]. */ | |
| 755 | |
| 756 /* Full sequences. */ | |
| 757 ISO_ESC_START_COMPOSITE, /* Private usage for START COMPOSING */ | |
| 758 ISO_ESC_END_COMPOSITE, /* Private usage for END COMPOSING */ | |
| 759 ISO_ESC_SINGLE_SHIFT, /* We've seen a complete single-shift sequence. */ | |
| 760 ISO_ESC_LOCKING_SHIFT,/* We've seen a complete locking-shift sequence. */ | |
| 761 ISO_ESC_DESIGNATE, /* We've seen a complete designation sequence. */ | |
| 762 ISO_ESC_DIRECTIONALITY,/* We've seen a complete ISO6429 directionality | |
| 763 sequence. */ | |
| 764 ISO_ESC_LITERAL /* We've seen a literal character ala | |
| 765 escape-quoting. */ | |
| 766 }; | |
| 767 | |
| 768 enum iso_error | |
| 769 { | |
| 770 ISO_ERROR_BAD_FINAL, | |
| 771 ISO_ERROR_UNKWOWN_ESC_SEQUENCE, | |
| 772 ISO_ERROR_INVALID_CODE_POINT_CHARACTER, | |
| 773 }; | |
| 774 | |
| 775 | |
| 776 /* Flags indicating current state while converting code. */ | |
| 777 | |
| 778 /************ Used during encoding and decoding: ************/ | |
| 779 /* If set, the current directionality is right-to-left. Otherwise, it's | |
| 780 left-to-right. */ | |
| 781 #define ISO_STATE_R2L (1 << 0) | |
| 782 | |
| 783 /************ Used during encoding: ************/ | |
| 784 /* If set, we just saw a CR. */ | |
| 785 #define ISO_STATE_CR (1 << 1) | |
| 786 | |
| 787 /************ Used during decoding: ************/ | |
| 788 /* If set, we're currently parsing an escape sequence and the upper 16 bits | |
| 789 should be looked at to indicate what partial escape sequence we've seen | |
| 790 so far. Otherwise, we're running through actual text. */ | |
| 791 #define ISO_STATE_ESCAPE (1 << 2) | |
| 792 /* If set, G2 is invoked into GL, but only for the next character. */ | |
| 793 #define ISO_STATE_SS2 (1 << 3) | |
| 794 /* If set, G3 is invoked into GL, but only for the next character. If both | |
| 795 ISO_STATE_SS2 and ISO_STATE_SS3 are set, ISO_STATE_SS2 overrides; but | |
| 796 this probably indicates an error in the text encoding. */ | |
| 797 #define ISO_STATE_SS3 (1 << 4) | |
| 798 /* If set, we're currently processing a composite character (i.e. a | |
| 799 character constructed by overstriking two or more characters). */ | |
| 800 #define ISO_STATE_COMPOSITE (1 << 5) | |
| 801 | |
| 3439 | 802 /* If set, we're processing UTF-8 encoded data within ISO-2022 |
| 803 processing. */ | |
| 804 #define ISO_STATE_UTF_8 (1 << 6) | |
| 805 | |
| 771 | 806 /* ISO_STATE_LOCK is the mask of flags that remain on until explicitly |
| 807 turned off when in the ISO2022 encoder/decoder. Other flags are turned | |
| 808 off at the end of processing each character or escape sequence. */ | |
| 809 # define ISO_STATE_LOCK \ | |
| 3439 | 810 (ISO_STATE_COMPOSITE | ISO_STATE_R2L | ISO_STATE_UTF_8) |
| 771 | 811 |
| 812 typedef struct charset_conversion_spec | |
| 813 { | |
| 814 Lisp_Object from_charset; | |
| 815 Lisp_Object to_charset; | |
| 816 } charset_conversion_spec; | |
| 817 | |
| 818 typedef struct | |
| 819 { | |
| 820 Dynarr_declare (charset_conversion_spec); | |
| 821 } charset_conversion_spec_dynarr; | |
| 822 | |
| 823 struct iso2022_coding_system | |
| 824 { | |
| 825 /* What are the charsets to be initially designated to G0, G1, | |
| 826 G2, G3? If t, no charset is initially designated. If nil, | |
| 827 no charset is initially designated and no charset is allowed | |
| 828 to be designated. */ | |
| 829 Lisp_Object initial_charset[4]; | |
| 830 | |
| 831 /* If true, a designation escape sequence needs to be sent on output | |
| 832 for the charset in G[0-3] before that charset is used. */ | |
| 833 unsigned char force_charset_on_output[4]; | |
| 834 | |
| 835 charset_conversion_spec_dynarr *input_conv; | |
| 836 charset_conversion_spec_dynarr *output_conv; | |
| 837 | |
| 838 unsigned int shoort :1; /* C makes you speak Dutch */ | |
| 839 unsigned int no_ascii_eol :1; | |
| 840 unsigned int no_ascii_cntl :1; | |
| 841 unsigned int seven :1; | |
| 842 unsigned int lock_shift :1; | |
| 843 unsigned int no_iso6429 :1; | |
| 844 unsigned int escape_quoted :1; | |
| 845 }; | |
| 846 | |
| 847 #define CODING_SYSTEM_ISO2022_INITIAL_CHARSET(codesys, g) \ | |
| 848 (CODING_SYSTEM_TYPE_DATA (codesys, iso2022)->initial_charset[g]) | |
| 849 #define CODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT(codesys, g) \ | |
| 850 (CODING_SYSTEM_TYPE_DATA (codesys, iso2022)->force_charset_on_output[g]) | |
| 851 #define CODING_SYSTEM_ISO2022_SHORT(codesys) \ | |
| 852 (CODING_SYSTEM_TYPE_DATA (codesys, iso2022)->shoort) | |
| 853 #define CODING_SYSTEM_ISO2022_NO_ASCII_EOL(codesys) \ | |
| 854 (CODING_SYSTEM_TYPE_DATA (codesys, iso2022)->no_ascii_eol) | |
| 855 #define CODING_SYSTEM_ISO2022_NO_ASCII_CNTL(codesys) \ | |
| 856 (CODING_SYSTEM_TYPE_DATA (codesys, iso2022)->no_ascii_cntl) | |
| 857 #define CODING_SYSTEM_ISO2022_SEVEN(codesys) \ | |
| 858 (CODING_SYSTEM_TYPE_DATA (codesys, iso2022)->seven) | |
| 859 #define CODING_SYSTEM_ISO2022_LOCK_SHIFT(codesys) \ | |
| 860 (CODING_SYSTEM_TYPE_DATA (codesys, iso2022)->lock_shift) | |
| 861 #define CODING_SYSTEM_ISO2022_NO_ISO6429(codesys) \ | |
| 862 (CODING_SYSTEM_TYPE_DATA (codesys, iso2022)->no_iso6429) | |
| 863 #define CODING_SYSTEM_ISO2022_ESCAPE_QUOTED(codesys) \ | |
| 864 (CODING_SYSTEM_TYPE_DATA (codesys, iso2022)->escape_quoted) | |
| 865 #define CODING_SYSTEM_ISO2022_INPUT_CONV(codesys) \ | |
| 866 (CODING_SYSTEM_TYPE_DATA (codesys, iso2022)->input_conv) | |
| 867 #define CODING_SYSTEM_ISO2022_OUTPUT_CONV(codesys) \ | |
| 868 (CODING_SYSTEM_TYPE_DATA (codesys, iso2022)->output_conv) | |
| 869 | |
| 870 #define XCODING_SYSTEM_ISO2022_INITIAL_CHARSET(codesys, g) \ | |
| 871 CODING_SYSTEM_ISO2022_INITIAL_CHARSET (XCODING_SYSTEM (codesys), g) | |
| 872 #define XCODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT(codesys, g) \ | |
| 873 CODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT (XCODING_SYSTEM (codesys), g) | |
| 874 #define XCODING_SYSTEM_ISO2022_SHORT(codesys) \ | |
| 875 CODING_SYSTEM_ISO2022_SHORT (XCODING_SYSTEM (codesys)) | |
| 876 #define XCODING_SYSTEM_ISO2022_NO_ASCII_EOL(codesys) \ | |
| 877 CODING_SYSTEM_ISO2022_NO_ASCII_EOL (XCODING_SYSTEM (codesys)) | |
| 878 #define XCODING_SYSTEM_ISO2022_NO_ASCII_CNTL(codesys) \ | |
| 879 CODING_SYSTEM_ISO2022_NO_ASCII_CNTL (XCODING_SYSTEM (codesys)) | |
| 880 #define XCODING_SYSTEM_ISO2022_SEVEN(codesys) \ | |
| 881 CODING_SYSTEM_ISO2022_SEVEN (XCODING_SYSTEM (codesys)) | |
| 882 #define XCODING_SYSTEM_ISO2022_LOCK_SHIFT(codesys) \ | |
| 883 CODING_SYSTEM_ISO2022_LOCK_SHIFT (XCODING_SYSTEM (codesys)) | |
| 884 #define XCODING_SYSTEM_ISO2022_NO_ISO6429(codesys) \ | |
| 885 CODING_SYSTEM_ISO2022_NO_ISO6429 (XCODING_SYSTEM (codesys)) | |
| 886 #define XCODING_SYSTEM_ISO2022_ESCAPE_QUOTED(codesys) \ | |
| 887 CODING_SYSTEM_ISO2022_ESCAPE_QUOTED (XCODING_SYSTEM (codesys)) | |
| 888 #define XCODING_SYSTEM_ISO2022_INPUT_CONV(codesys) \ | |
| 889 CODING_SYSTEM_ISO2022_INPUT_CONV (XCODING_SYSTEM (codesys)) | |
| 890 #define XCODING_SYSTEM_ISO2022_OUTPUT_CONV(codesys) \ | |
| 891 CODING_SYSTEM_ISO2022_OUTPUT_CONV (XCODING_SYSTEM (codesys)) | |
| 892 | |
| 893 /* Additional information used by the ISO2022 decoder and detector. */ | |
| 894 struct iso2022_coding_stream | |
| 895 { | |
| 896 /* CHARSET holds the character sets currently assigned to the G0 | |
| 897 through G3 variables. It is initialized from the array | |
| 898 INITIAL_CHARSET in CODESYS. */ | |
| 899 Lisp_Object charset[4]; | |
| 900 | |
| 901 /* Which registers are currently invoked into the left (GL) and | |
| 902 right (GR) halves of the 8-bit encoding space? */ | |
| 903 int register_left, register_right; | |
| 904 | |
| 905 /* FLAGS holds flags indicating the current state of the encoding. Some of | |
| 906 these flags are actually part of the state-dependent data and should be | |
| 907 moved there. */ | |
| 908 unsigned int flags; | |
| 909 | |
| 910 /**************** for decoding ****************/ | |
| 911 | |
| 912 /* ISO_ESC holds a value indicating part of an escape sequence | |
| 913 that has already been seen. */ | |
| 914 enum iso_esc_flag esc; | |
| 915 | |
| 916 /* This records the bytes we've seen so far in an escape sequence, | |
| 917 in case the sequence is invalid (we spit out the bytes unchanged). */ | |
| 918 unsigned char esc_bytes[8]; | |
| 919 | |
| 920 /* Index for next byte to store in ISO escape sequence. */ | |
| 921 int esc_bytes_index; | |
| 922 | |
| 923 #ifdef ENABLE_COMPOSITE_CHARS | |
| 924 /* Stuff seen so far when composing a string. */ | |
| 925 unsigned_char_dynarr *composite_chars; | |
| 926 #endif | |
| 927 | |
| 928 /* If we saw an invalid designation sequence for a particular | |
| 929 register, we flag it here and switch to ASCII. The next time we | |
| 930 see a valid designation for this register, we turn off the flag | |
| 931 and do the designation normally, but pretend the sequence was | |
| 932 invalid. The effect of all this is that (most of the time) the | |
| 933 escape sequences for both the switch to the unknown charset, and | |
| 934 the switch back to the known charset, get inserted literally into | |
| 935 the buffer and saved out as such. The hope is that we can | |
| 936 preserve the escape sequences so that the resulting written out | |
| 937 file makes sense. If we don't do any of this, the designation | |
| 938 to the invalid charset will be preserved but that switch back | |
| 939 to the known charset will probably get eaten because it was | |
| 940 the same charset that was already present in the register. */ | |
| 941 unsigned char invalid_designated[4]; | |
| 942 | |
| 943 /* We try to do similar things as above for direction-switching | |
| 944 sequences. If we encountered a direction switch while an | |
| 945 invalid designation was present, or an invalid designation | |
| 946 just after a direction switch (i.e. no valid designation | |
| 947 encountered yet), we insert the direction-switch escape | |
| 948 sequence literally into the output stream, and later on | |
| 949 insert the corresponding direction-restoring escape sequence | |
| 950 literally also. */ | |
| 951 unsigned int switched_dir_and_no_valid_charset_yet :1; | |
| 952 unsigned int invalid_switch_dir :1; | |
| 953 | |
| 954 /* Tells the decoder to output the escape sequence literally | |
| 955 even though it was valid. Used in the games we play to | |
| 956 avoid lossage when we encounter invalid designations. */ | |
| 957 unsigned int output_literally :1; | |
| 958 /* We encountered a direction switch followed by an invalid | |
| 959 designation. We didn't output the direction switch | |
| 960 literally because we didn't know about the invalid designation; | |
| 961 but we have to do so now. */ | |
| 962 unsigned int output_direction_sequence :1; | |
| 963 | |
| 964 /**************** for encoding ****************/ | |
| 965 | |
| 966 /* Whether we need to explicitly designate the charset in the | |
| 967 G? register before using it. It is initialized from the | |
| 968 array FORCE_CHARSET_ON_OUTPUT in CODESYS. */ | |
| 969 unsigned char force_charset_on_output[4]; | |
| 970 | |
| 971 /* Other state variables that need to be preserved across | |
| 972 invocations. */ | |
| 973 Lisp_Object current_charset; | |
| 974 int current_half; | |
| 975 int current_char_boundary; | |
| 3439 | 976 |
| 977 /* Used for handling UTF-8. */ | |
| 978 unsigned char counter; | |
| 4096 | 979 unsigned char indicated_length; |
| 771 | 980 }; |
| 981 | |
| 1204 | 982 static const struct memory_description ccs_description_1[] = |
| 771 | 983 { |
| 984 { XD_LISP_OBJECT, offsetof (charset_conversion_spec, from_charset) }, | |
| 985 { XD_LISP_OBJECT, offsetof (charset_conversion_spec, to_charset) }, | |
| 986 { XD_END } | |
| 987 }; | |
| 988 | |
| 1204 | 989 static const struct sized_memory_description ccs_description = |
| 771 | 990 { |
| 991 sizeof (charset_conversion_spec), | |
| 992 ccs_description_1 | |
| 993 }; | |
| 994 | |
| 1204 | 995 static const struct memory_description ccsd_description_1[] = |
| 771 | 996 { |
| 997 XD_DYNARR_DESC (charset_conversion_spec_dynarr, &ccs_description), | |
| 998 { XD_END } | |
| 999 }; | |
| 1000 | |
| 1204 | 1001 static const struct sized_memory_description ccsd_description = |
| 771 | 1002 { |
| 1003 sizeof (charset_conversion_spec_dynarr), | |
| 1004 ccsd_description_1 | |
| 1005 }; | |
| 1006 | |
| 1204 | 1007 static const struct memory_description iso2022_coding_system_description[] = { |
| 1008 { XD_LISP_OBJECT_ARRAY, offsetof (struct iso2022_coding_system, | |
| 1009 initial_charset), 4 }, | |
| 2367 | 1010 { XD_BLOCK_PTR, offsetof (struct iso2022_coding_system, input_conv), |
| 2551 | 1011 1, { &ccsd_description } }, |
| 2367 | 1012 { XD_BLOCK_PTR, offsetof (struct iso2022_coding_system, output_conv), |
| 2551 | 1013 1, { &ccsd_description } }, |
| 771 | 1014 { XD_END } |
| 1015 }; | |
| 1016 | |
| 1204 | 1017 DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (iso2022); |
| 1018 | |
| 771 | 1019 /* The following note taken directly from FSF 21.0.103. */ |
| 1020 | |
| 1021 /* The following note describes the coding system ISO2022 briefly. | |
| 1022 Since the intention of this note is to help understand the | |
| 1023 functions in this file, some parts are NOT ACCURATE or are OVERLY | |
| 1024 SIMPLIFIED. For thorough understanding, please refer to the | |
| 1025 original document of ISO2022. This is equivalent to the standard | |
| 1026 ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*). | |
| 1027 | |
| 1028 ISO2022 provides many mechanisms to encode several character sets | |
| 1029 in 7-bit and 8-bit environments. For 7-bit environments, all text | |
| 1030 is encoded using bytes less than 128. This may make the encoded | |
| 1031 text a little bit longer, but the text passes more easily through | |
| 1032 several types of gateway, some of which strip off the MSB (Most | |
| 1033 Significant Bit). | |
| 1034 | |
| 1035 There are two kinds of character sets: control character sets and | |
| 1036 graphic character sets. The former contain control characters such | |
| 1037 as `newline' and `escape' to provide control functions (control | |
| 1038 functions are also provided by escape sequences). The latter | |
| 1039 contain graphic characters such as 'A' and '-'. Emacs recognizes | |
| 1040 two control character sets and many graphic character sets. | |
| 1041 | |
| 1042 Graphic character sets are classified into one of the following | |
| 1043 four classes, according to the number of bytes (DIMENSION) and | |
| 1044 number of characters in one dimension (CHARS) of the set: | |
| 1045 - DIMENSION1_CHARS94 | |
| 1046 - DIMENSION1_CHARS96 | |
| 1047 - DIMENSION2_CHARS94 | |
| 1048 - DIMENSION2_CHARS96 | |
| 1049 | |
| 1050 In addition, each character set is assigned an identification tag, | |
| 1051 unique for each set, called the "final character" (denoted as <F> | |
| 1052 hereafter). The <F> of each character set is decided by ECMA(*) | |
| 1053 when it is registered in ISO. The code range of <F> is 0x30..0x7F | |
| 1054 (0x30..0x3F are for private use only). | |
| 1055 | |
| 1056 Note (*): ECMA = European Computer Manufacturers Association | |
| 1057 | |
| 1058 Here are examples of graphic character sets [NAME(<F>)]: | |
| 1059 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ... | |
| 1060 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ... | |
| 1061 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ... | |
| 1062 o DIMENSION2_CHARS96 -- none for the moment | |
| 1063 | |
| 1064 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR. | |
| 1065 C0 [0x00..0x1F] -- control character plane 0 | |
| 1066 GL [0x20..0x7F] -- graphic character plane 0 | |
| 1067 C1 [0x80..0x9F] -- control character plane 1 | |
| 1068 GR [0xA0..0xFF] -- graphic character plane 1 | |
| 1069 | |
| 1070 A control character set is directly designated and invoked to C0 or | |
| 1071 C1 by an escape sequence. The most common case is that: | |
| 1072 - ISO646's control character set is designated/invoked to C0, and | |
| 1073 - ISO6429's control character set is designated/invoked to C1, | |
| 1074 and usually these designations/invocations are omitted in encoded | |
| 1075 text. In a 7-bit environment, only C0 can be used, and a control | |
| 1076 character for C1 is encoded by an appropriate escape sequence to | |
| 1077 fit into the environment. All control characters for C1 are | |
| 1078 defined to have corresponding escape sequences. | |
| 1079 | |
| 1080 A graphic character set is at first designated to one of four | |
| 1081 graphic registers (G0 through G3), then these graphic registers are | |
| 1082 invoked to GL or GR. These designations and invocations can be | |
| 1083 done independently. The most common case is that G0 is invoked to | |
| 1084 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually | |
| 1085 these invocations and designations are omitted in encoded text. | |
| 1086 In a 7-bit environment, only GL can be used. | |
| 1087 | |
| 1088 When a graphic character set of CHARS94 is invoked to GL, codes | |
| 1089 0x20 and 0x7F of the GL area work as control characters SPACE and | |
| 1090 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not | |
| 1091 be used. | |
| 1092 | |
| 1093 There are two ways of invocation: locking-shift and single-shift. | |
| 1094 With locking-shift, the invocation lasts until the next different | |
| 1095 invocation, whereas with single-shift, the invocation affects the | |
| 1096 following character only and doesn't affect the locking-shift | |
| 1097 state. Invocations are done by the following control characters or | |
| 1098 escape sequences: | |
| 1099 | |
| 1100 ---------------------------------------------------------------------- | |
| 1101 abbrev function cntrl escape seq description | |
| 1102 ---------------------------------------------------------------------- | |
| 1103 SI/LS0 (shift-in) 0x0F none invoke G0 into GL | |
| 1104 SO/LS1 (shift-out) 0x0E none invoke G1 into GL | |
| 1105 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL | |
| 1106 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL | |
| 1107 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*) | |
| 1108 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*) | |
| 1109 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*) | |
| 1110 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char | |
| 1111 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char | |
| 1112 ---------------------------------------------------------------------- | |
| 1113 (*) These are not used by any known coding system. | |
| 1114 | |
| 1115 Control characters for these functions are defined by macros | |
| 1116 ISO_CODE_XXX in `coding.h'. | |
| 1117 | |
| 1118 Designations are done by the following escape sequences: | |
| 1119 ---------------------------------------------------------------------- | |
| 1120 escape sequence description | |
| 1121 ---------------------------------------------------------------------- | |
| 1122 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0 | |
| 1123 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1 | |
| 1124 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2 | |
| 1125 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3 | |
| 1126 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*) | |
| 1127 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1 | |
| 1128 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2 | |
| 1129 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3 | |
| 1130 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**) | |
| 1131 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1 | |
| 1132 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2 | |
| 1133 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3 | |
| 1134 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*) | |
| 1135 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1 | |
| 1136 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2 | |
| 1137 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3 | |
| 1138 ---------------------------------------------------------------------- | |
| 1139 | |
| 1140 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set | |
| 1141 of dimension 1, chars 94, and final character <F>, etc... | |
| 1142 | |
| 1143 Note (*): Although these designations are not allowed in ISO2022, | |
| 1144 Emacs accepts them on decoding, and produces them on encoding | |
| 1145 CHARS96 character sets in a coding system which is characterized as | |
| 1146 7-bit environment, non-locking-shift, and non-single-shift. | |
| 1147 | |
| 1148 Note (**): If <F> is '@', 'A', or 'B', the intermediate character | |
| 1149 '(' can be omitted. We refer to this as "short-form" hereafter. | |
| 1150 | |
| 1151 Now you may notice that there are a lot of ways of encoding the | |
| 1152 same multilingual text in ISO2022. Actually, there exist many | |
| 1153 coding systems such as Compound Text (used in X11's inter client | |
| 1154 communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR | |
| 1155 (used in Korean Internet), EUC (Extended UNIX Code, used in Asian | |
| 1156 localized platforms), and all of these are variants of ISO2022. | |
| 1157 | |
| 1158 In addition to the above, Emacs handles two more kinds of escape | |
| 1159 sequences: ISO6429's direction specification and Emacs' private | |
| 1160 sequence for specifying character composition. | |
| 1161 | |
| 1162 ISO6429's direction specification takes the following form: | |
| 1163 o CSI ']' -- end of the current direction | |
| 1164 o CSI '0' ']' -- end of the current direction | |
| 1165 o CSI '1' ']' -- start of left-to-right text | |
| 1166 o CSI '2' ']' -- start of right-to-left text | |
| 1167 The control character CSI (0x9B: control sequence introducer) is | |
| 1168 abbreviated to the escape sequence ESC '[' in a 7-bit environment. | |
| 1169 | |
| 1170 Character composition specification takes the following form: | |
| 1171 o ESC '0' -- start relative composition | |
| 1172 o ESC '1' -- end composition | |
| 1173 o ESC '2' -- start rule-base composition (*) | |
| 1174 o ESC '3' -- start relative composition with alternate chars (**) | |
| 1175 o ESC '4' -- start rule-base composition with alternate chars (**) | |
| 1176 Since these are not standard escape sequences of any ISO standard, | |
| 1177 the use of them with these meanings is restricted to Emacs only. | |
| 1178 | |
| 1179 (*) This form is used only in Emacs 20.5 and older versions, | |
| 1180 but the newer versions can safely decode it. | |
| 1181 (**) This form is used only in Emacs 21.1 and newer versions, | |
| 1182 and the older versions can't decode it. | |
| 1183 | |
| 1184 Here's a list of example usages of these composition escape | |
| 1185 sequences (categorized by `enum composition_method'). | |
| 1186 | |
| 1187 COMPOSITION_RELATIVE: | |
| 1188 ESC 0 CHAR [ CHAR ] ESC 1 | |
| 1189 COMPOSITION_WITH_RULE: | |
| 1190 ESC 2 CHAR [ RULE CHAR ] ESC 1 | |
| 1191 COMPOSITION_WITH_ALTCHARS: | |
| 1192 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 | |
| 1193 COMPOSITION_WITH_RULE_ALTCHARS: | |
| 1194 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */ | |
| 1195 | |
| 1196 static void | |
| 1197 reset_iso2022_decode (Lisp_Object coding_system, | |
| 1198 struct iso2022_coding_stream *data) | |
| 1199 { | |
| 1200 int i; | |
| 1201 #ifdef ENABLE_COMPOSITE_CHARS | |
| 1202 unsigned_char_dynarr *old_composite_chars = data->composite_chars; | |
| 1203 #endif | |
| 1204 | |
| 1205 xzero (*data); | |
| 1206 | |
| 1207 for (i = 0; i < 4; i++) | |
| 1208 { | |
| 1209 if (!NILP (coding_system)) | |
| 1210 data->charset[i] = | |
| 1211 XCODING_SYSTEM_ISO2022_INITIAL_CHARSET (coding_system, i); | |
| 1212 else | |
| 1213 data->charset[i] = Qt; | |
| 1214 } | |
| 1215 data->esc = ISO_ESC_NOTHING; | |
| 1216 data->register_right = 1; | |
| 1217 #ifdef ENABLE_COMPOSITE_CHARS | |
| 1218 if (old_composite_chars) | |
| 1219 { | |
| 1220 data->composite_chars = old_composite_chars; | |
| 1221 Dynarr_reset (data->composite_chars); | |
| 1222 } | |
| 1223 #endif | |
| 1224 } | |
| 1225 | |
| 1226 static void | |
| 1227 reset_iso2022_encode (Lisp_Object coding_system, | |
| 1228 struct iso2022_coding_stream *data) | |
| 1229 { | |
| 1230 int i; | |
| 1231 | |
| 1232 xzero (*data); | |
| 1233 | |
| 1234 for (i = 0; i < 4; i++) | |
| 1235 { | |
| 1236 data->charset[i] = | |
| 1237 XCODING_SYSTEM_ISO2022_INITIAL_CHARSET (coding_system, i); | |
| 1238 data->force_charset_on_output[i] = | |
| 1239 XCODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT (coding_system, i); | |
| 1240 } | |
| 1241 data->register_right = 1; | |
| 1242 data->current_charset = Qnil; | |
| 1243 data->current_char_boundary = 1; | |
| 1244 } | |
| 1245 | |
| 1246 static void | |
| 1247 iso2022_init_coding_stream (struct coding_stream *str) | |
| 1248 { | |
| 1249 if (str->direction == CODING_DECODE) | |
| 1250 reset_iso2022_decode (str->codesys, | |
| 1251 CODING_STREAM_TYPE_DATA (str, iso2022)); | |
| 1252 else | |
| 1253 reset_iso2022_encode (str->codesys, | |
| 1254 CODING_STREAM_TYPE_DATA (str, iso2022)); | |
| 1255 } | |
| 1256 | |
| 1257 static void | |
| 1258 iso2022_rewind_coding_stream (struct coding_stream *str) | |
| 1259 { | |
| 1260 iso2022_init_coding_stream (str); | |
| 1261 } | |
| 1262 | |
| 1263 static int | |
| 1264 fit_to_be_escape_quoted (unsigned char c) | |
| 1265 { | |
| 1266 switch (c) | |
| 1267 { | |
| 1268 case ISO_CODE_ESC: | |
| 1269 case ISO_CODE_CSI: | |
| 1270 case ISO_CODE_SS2: | |
| 1271 case ISO_CODE_SS3: | |
| 1272 case ISO_CODE_SO: | |
| 1273 case ISO_CODE_SI: | |
| 1274 return 1; | |
| 1275 | |
| 1276 default: | |
| 1277 return 0; | |
| 1278 } | |
| 1279 } | |
| 1280 | |
| 1281 static Lisp_Object | |
| 867 | 1282 charset_by_attributes_or_create_one (int type, Ibyte final, int dir) |
| 771 | 1283 { |
| 826 | 1284 Lisp_Object charset = charset_by_attributes (type, final, dir); |
| 771 | 1285 |
| 1286 if (NILP (charset)) | |
| 1287 { | |
| 1288 int chars, dim; | |
| 1289 | |
| 1290 switch (type) | |
| 1291 { | |
| 1292 case CHARSET_TYPE_94: | |
| 1293 chars = 94; dim = 1; | |
| 1294 break; | |
| 1295 case CHARSET_TYPE_96: | |
| 1296 chars = 96; dim = 1; | |
| 1297 break; | |
| 1298 case CHARSET_TYPE_94X94: | |
| 1299 chars = 94; dim = 2; | |
| 1300 break; | |
| 1301 case CHARSET_TYPE_96X96: | |
| 1302 chars = 96; dim = 2; | |
| 1303 break; | |
| 1304 default: | |
| 2500 | 1305 ABORT (); chars = 0; dim = 0; |
| 771 | 1306 } |
| 1307 | |
| 1308 charset = Fmake_charset (Qunbound, Qnil, | |
| 1309 nconc2 (list6 (Qfinal, make_char (final), | |
| 1310 Qchars, make_int (chars), | |
| 1311 Qdimension, make_int (dim)), | |
| 1312 list2 (Qdirection, | |
| 1313 dir == CHARSET_LEFT_TO_RIGHT ? | |
| 1314 Ql2r : Qr2l))); | |
| 1315 } | |
| 1316 | |
| 1317 return charset; | |
| 1318 } | |
| 1319 | |
| 1320 /* Parse one byte of an ISO2022 escape sequence. | |
| 1321 If the result is an invalid escape sequence, return 0 and | |
| 1322 do not change anything in STR. Otherwise, if the result is | |
| 1323 an incomplete escape sequence, update ISO2022.ESC and | |
| 1324 ISO2022.ESC_BYTES and return -1. Otherwise, update | |
| 1325 all the state variables (but not ISO2022.ESC_BYTES) and | |
| 1326 return 1. | |
| 1327 | |
| 1328 If CHECK_INVALID_CHARSETS is non-zero, check for designation | |
| 1329 or invocation of an invalid character set and treat that as | |
| 1330 an unrecognized escape sequence. | |
| 1331 | |
| 2367 | 1332 */ |
| 771 | 1333 |
| 1334 static int | |
| 1335 parse_iso2022_esc (Lisp_Object codesys, struct iso2022_coding_stream *iso, | |
| 1336 unsigned char c, unsigned int *flags, | |
| 1337 int check_invalid_charsets) | |
| 1338 { | |
| 1339 /* (1) If we're at the end of a designation sequence, CS is the | |
| 1340 charset being designated and REG is the register to designate | |
| 1341 it to. | |
| 1342 | |
| 1343 (2) If we're at the end of a locking-shift sequence, REG is | |
| 1344 the register to invoke and HALF (0 == left, 1 == right) is | |
| 1345 the half to invoke it into. | |
| 1346 | |
| 1347 (3) If we're at the end of a single-shift sequence, REG is | |
| 1348 the register to invoke. */ | |
| 1349 Lisp_Object cs = Qnil; | |
| 1350 int reg, half; | |
| 1351 | |
| 1352 /* NOTE: This code does goto's all over the fucking place. | |
| 1353 The reason for this is that we're basically implementing | |
| 1354 a state machine here, and hierarchical languages like C | |
| 1355 don't really provide a clean way of doing this. */ | |
| 1356 | |
| 1357 if (! (*flags & ISO_STATE_ESCAPE)) | |
| 1358 /* At beginning of escape sequence; we need to reset our | |
| 1359 escape-state variables. */ | |
| 1360 iso->esc = ISO_ESC_NOTHING; | |
| 1361 | |
| 1362 iso->output_literally = 0; | |
| 1363 iso->output_direction_sequence = 0; | |
| 1364 | |
| 1365 switch (iso->esc) | |
| 1366 { | |
| 1367 case ISO_ESC_NOTHING: | |
| 1368 iso->esc_bytes_index = 0; | |
| 1369 switch (c) | |
| 1370 { | |
| 1371 case ISO_CODE_ESC: /* Start escape sequence */ | |
| 1372 *flags |= ISO_STATE_ESCAPE; | |
| 1373 iso->esc = ISO_ESC; | |
| 1374 goto not_done; | |
| 1375 | |
| 1376 case ISO_CODE_CSI: /* ISO6429 (specifying directionality) */ | |
| 1377 *flags |= ISO_STATE_ESCAPE; | |
| 1378 iso->esc = ISO_ESC_5_11; | |
| 1379 goto not_done; | |
| 1380 | |
| 1381 case ISO_CODE_SO: /* locking shift 1 */ | |
| 1382 reg = 1; half = 0; | |
| 1383 goto locking_shift; | |
| 1384 case ISO_CODE_SI: /* locking shift 0 */ | |
| 1385 reg = 0; half = 0; | |
| 1386 goto locking_shift; | |
| 1387 | |
| 1388 case ISO_CODE_SS2: /* single shift */ | |
| 1389 reg = 2; | |
| 1390 goto single_shift; | |
| 1391 case ISO_CODE_SS3: /* single shift */ | |
| 1392 reg = 3; | |
| 1393 goto single_shift; | |
| 1394 | |
| 1395 default: /* Other control characters */ | |
| 1396 error: | |
| 1397 *flags &= ISO_STATE_LOCK; | |
| 1398 return 0; | |
| 1399 } | |
| 1400 | |
| 1401 case ISO_ESC: | |
| 3439 | 1402 |
| 1403 /* The only available ISO 2022 sequence in UTF-8 mode is ESC % @, to | |
| 1404 exit from it. If we see any other escape sequence, pass it through | |
| 1405 in the error handler. */ | |
| 1406 if (*flags & ISO_STATE_UTF_8 && '%' != c) | |
| 1407 { | |
| 1408 return 0; | |
| 1409 } | |
| 1410 | |
| 771 | 1411 switch (c) |
| 1412 { | |
| 1413 /**** single shift ****/ | |
| 1414 | |
| 1415 case 'N': /* single shift 2 */ | |
| 1416 reg = 2; | |
| 1417 goto single_shift; | |
| 1418 case 'O': /* single shift 3 */ | |
| 1419 reg = 3; | |
| 1420 goto single_shift; | |
| 1421 | |
| 1422 /**** locking shift ****/ | |
| 1423 | |
| 1424 case '~': /* locking shift 1 right */ | |
| 1425 reg = 1; half = 1; | |
| 1426 goto locking_shift; | |
| 1427 case 'n': /* locking shift 2 */ | |
| 1428 reg = 2; half = 0; | |
| 1429 goto locking_shift; | |
| 1430 case '}': /* locking shift 2 right */ | |
| 1431 reg = 2; half = 1; | |
| 1432 goto locking_shift; | |
| 1433 case 'o': /* locking shift 3 */ | |
| 1434 reg = 3; half = 0; | |
| 1435 goto locking_shift; | |
| 1436 case '|': /* locking shift 3 right */ | |
| 1437 reg = 3; half = 1; | |
| 1438 goto locking_shift; | |
| 1439 | |
| 1440 /**** composite ****/ | |
| 1441 | |
| 1442 #ifdef ENABLE_COMPOSITE_CHARS | |
| 1443 case '0': | |
| 1444 iso->esc = ISO_ESC_START_COMPOSITE; | |
| 1445 *flags = (*flags & ISO_STATE_LOCK) | | |
| 1446 ISO_STATE_COMPOSITE; | |
| 1447 return 1; | |
| 1448 | |
| 1449 case '1': | |
| 1450 iso->esc = ISO_ESC_END_COMPOSITE; | |
| 1451 *flags = (*flags & ISO_STATE_LOCK) & | |
| 1452 ~ISO_STATE_COMPOSITE; | |
| 1453 return 1; | |
| 1454 #else | |
| 1455 case '0': case '1': case '2': case '3': case '4': | |
| 1456 /* We simply return a flag indicating that some composite | |
| 1457 escape was seen. The caller will use the particular | |
| 1458 character to encode the appropriate "composite hack" | |
| 1459 character out of Vcharset_composite, so that we will | |
| 1460 preserve these values on output. */ | |
| 1461 iso->esc = ISO_ESC_START_COMPOSITE; | |
| 1462 *flags &= ISO_STATE_LOCK; | |
| 1463 return 1; | |
| 1464 #endif /* ENABLE_COMPOSITE_CHARS */ | |
| 1465 | |
| 1466 /**** directionality ****/ | |
| 1467 | |
| 1468 case '[': | |
| 1469 iso->esc = ISO_ESC_5_11; | |
| 1470 goto not_done; | |
| 1471 | |
| 1472 /**** designation ****/ | |
| 1473 | |
| 1474 case '$': /* multibyte charset prefix */ | |
| 1475 iso->esc = ISO_ESC_2_4; | |
| 1476 goto not_done; | |
| 1477 | |
| 3439 | 1478 case '%': /* Prefix to an escape to or from Unicode. */ |
| 1479 iso->esc = ISO_ESC_2_5; | |
| 1480 goto not_done; | |
| 1481 | |
| 771 | 1482 default: |
| 1483 if (0x28 <= c && c <= 0x2F) | |
| 1484 { | |
| 1485 iso->esc = (enum iso_esc_flag) (c - 0x28 + ISO_ESC_2_8); | |
| 1486 goto not_done; | |
| 1487 } | |
| 1488 | |
| 1489 /* This function is called with CODESYS equal to nil when | |
| 1490 doing coding-system detection. */ | |
| 1491 if (!NILP (codesys) | |
| 1492 && XCODING_SYSTEM_ISO2022_ESCAPE_QUOTED (codesys) | |
| 1493 && fit_to_be_escape_quoted (c)) | |
| 1494 { | |
| 1495 iso->esc = ISO_ESC_LITERAL; | |
| 1496 *flags &= ISO_STATE_LOCK; | |
| 1497 return 1; | |
| 1498 } | |
| 1499 | |
| 1500 /* bzzzt! */ | |
| 1501 goto error; | |
| 1502 } | |
| 1503 | |
| 3439 | 1504 /* ISO-IR 196 UTF-8 support. */ |
| 1505 case ISO_ESC_2_5: | |
| 1506 if ('G' == c) | |
| 1507 { | |
| 1508 /* Activate UTF-8 mode. */ | |
| 1509 *flags &= ISO_STATE_LOCK; | |
| 1510 *flags |= ISO_STATE_UTF_8; | |
| 1511 iso->esc = ISO_ESC_NOTHING; | |
| 1512 return 1; | |
| 1513 } | |
| 1514 else if ('@' == c) | |
| 1515 { | |
| 1516 /* Deactive UTF-8 mode. */ | |
| 1517 *flags &= ISO_STATE_LOCK; | |
| 1518 *flags &= ~(ISO_STATE_UTF_8); | |
| 1519 iso->esc = ISO_ESC_NOTHING; | |
| 1520 return 1; | |
| 1521 } | |
| 1522 else | |
| 1523 { | |
| 1524 /* Oops, we don't support the other UTF-? coding systems within | |
| 1525 ISO 2022, only in their own context. */ | |
| 1526 goto error; | |
| 1527 } | |
| 771 | 1528 /**** directionality ****/ |
| 1529 | |
| 1530 case ISO_ESC_5_11: /* ISO6429 direction control */ | |
| 1531 if (c == ']') | |
| 1532 { | |
| 1533 *flags &= (ISO_STATE_LOCK & ~ISO_STATE_R2L); | |
| 1534 goto directionality; | |
| 1535 } | |
| 1536 if (c == '0') iso->esc = ISO_ESC_5_11_0; | |
| 1537 else if (c == '1') iso->esc = ISO_ESC_5_11_1; | |
| 1538 else if (c == '2') iso->esc = ISO_ESC_5_11_2; | |
| 1539 else goto error; | |
| 1540 goto not_done; | |
| 1541 | |
| 1542 case ISO_ESC_5_11_0: | |
| 1543 if (c == ']') | |
| 1544 { | |
| 1545 *flags &= (ISO_STATE_LOCK & ~ISO_STATE_R2L); | |
| 1546 goto directionality; | |
| 1547 } | |
| 1548 goto error; | |
| 1549 | |
| 1550 case ISO_ESC_5_11_1: | |
| 1551 if (c == ']') | |
| 1552 { | |
| 1553 *flags = (ISO_STATE_LOCK & ~ISO_STATE_R2L); | |
| 1554 goto directionality; | |
| 1555 } | |
| 1556 goto error; | |
| 1557 | |
| 1558 case ISO_ESC_5_11_2: | |
| 1559 if (c == ']') | |
| 1560 { | |
| 1561 *flags = (*flags & ISO_STATE_LOCK) | ISO_STATE_R2L; | |
| 1562 goto directionality; | |
| 1563 } | |
| 1564 goto error; | |
| 1565 | |
| 1566 directionality: | |
| 1567 iso->esc = ISO_ESC_DIRECTIONALITY; | |
| 1568 /* Various junk here to attempt to preserve the direction sequences | |
| 1569 literally in the text if they would otherwise be swallowed due | |
| 1570 to invalid designations that don't show up as actual charset | |
| 1571 changes in the text. */ | |
| 1572 if (iso->invalid_switch_dir) | |
| 1573 { | |
| 1574 /* We already inserted a direction switch literally into the | |
| 1575 text. We assume (#### this may not be right) that the | |
| 1576 next direction switch is the one going the other way, | |
| 1577 and we need to output that literally as well. */ | |
| 1578 iso->output_literally = 1; | |
| 1579 iso->invalid_switch_dir = 0; | |
| 1580 } | |
| 1581 else | |
| 1582 { | |
| 1583 int jj; | |
| 1584 | |
| 1585 /* If we are in the thrall of an invalid designation, | |
| 1586 then stick the directionality sequence literally into the | |
| 1587 output stream so it ends up in the original text again. */ | |
| 1588 for (jj = 0; jj < 4; jj++) | |
| 1589 if (iso->invalid_designated[jj]) | |
| 1590 break; | |
| 1591 if (jj < 4) | |
| 1592 { | |
| 1593 iso->output_literally = 1; | |
| 1594 iso->invalid_switch_dir = 1; | |
| 1595 } | |
| 1596 else | |
| 1597 /* Indicate that we haven't yet seen a valid designation, | |
| 1598 so that if a switch-dir is directly followed by an | |
| 1599 invalid designation, both get inserted literally. */ | |
| 1600 iso->switched_dir_and_no_valid_charset_yet = 1; | |
| 1601 } | |
| 1602 return 1; | |
| 1603 | |
| 1604 | |
| 1605 /**** designation ****/ | |
| 1606 | |
| 1607 case ISO_ESC_2_4: | |
| 1608 if (0x28 <= c && c <= 0x2F) | |
| 1609 { | |
| 1610 iso->esc = (enum iso_esc_flag) (c - 0x28 + ISO_ESC_2_4_8); | |
| 1611 goto not_done; | |
| 1612 } | |
| 1613 if (0x40 <= c && c <= 0x42) | |
| 1614 { | |
| 1615 cs = charset_by_attributes_or_create_one (CHARSET_TYPE_94X94, c, | |
| 1616 *flags & ISO_STATE_R2L ? | |
| 1617 CHARSET_RIGHT_TO_LEFT : | |
| 1618 CHARSET_LEFT_TO_RIGHT); | |
| 1619 reg = 0; | |
| 1620 goto designated; | |
| 1621 } | |
| 1622 goto error; | |
| 1623 | |
| 1624 default: | |
| 1625 { | |
| 1626 int type = -1; | |
| 1627 | |
| 1628 if (iso->esc >= ISO_ESC_2_8 && | |
| 1629 iso->esc <= ISO_ESC_2_15) | |
| 1630 { | |
| 1631 type = ((iso->esc >= ISO_ESC_2_12) ? | |
| 1632 CHARSET_TYPE_96 : CHARSET_TYPE_94); | |
| 1633 reg = (iso->esc - ISO_ESC_2_8) & 3; | |
| 1634 } | |
| 1635 else if (iso->esc >= ISO_ESC_2_4_8 && | |
| 1636 iso->esc <= ISO_ESC_2_4_15) | |
| 1637 { | |
| 1638 type = ((iso->esc >= ISO_ESC_2_4_12) ? | |
| 1639 CHARSET_TYPE_96X96 : CHARSET_TYPE_94X94); | |
| 1640 reg = (iso->esc - ISO_ESC_2_4_8) & 3; | |
| 1641 } | |
| 1642 else | |
| 1643 { | |
| 1644 /* Can this ever be reached? -slb */ | |
| 2500 | 1645 ABORT (); |
| 771 | 1646 goto error; |
| 1647 } | |
| 1648 | |
| 1649 if (c < '0' || c > '~' || | |
| 1650 (c > 0x5F && (type == CHARSET_TYPE_94X94 || | |
| 1651 type == CHARSET_TYPE_96X96))) | |
| 1652 goto error; /* bad final byte */ | |
| 1653 | |
| 1654 cs = charset_by_attributes_or_create_one (type, c, | |
| 1655 *flags & ISO_STATE_R2L ? | |
| 1656 CHARSET_RIGHT_TO_LEFT : | |
| 1657 CHARSET_LEFT_TO_RIGHT); | |
| 1658 goto designated; | |
| 1659 } | |
| 1660 } | |
| 1661 | |
| 1662 not_done: | |
| 1663 iso->esc_bytes[iso->esc_bytes_index++] = (unsigned char) c; | |
| 1664 return -1; | |
| 1665 | |
| 1666 single_shift: | |
| 1667 if (check_invalid_charsets && !CHARSETP (iso->charset[reg])) | |
| 1668 /* can't invoke something that ain't there. */ | |
| 1669 goto error; | |
| 1670 iso->esc = ISO_ESC_SINGLE_SHIFT; | |
| 1671 *flags &= ISO_STATE_LOCK; | |
| 1672 if (reg == 2) | |
| 1673 *flags |= ISO_STATE_SS2; | |
| 1674 else | |
| 1675 *flags |= ISO_STATE_SS3; | |
| 1676 return 1; | |
| 1677 | |
| 1678 locking_shift: | |
| 1679 if (check_invalid_charsets && | |
| 1680 !CHARSETP (iso->charset[reg])) | |
| 1681 /* can't invoke something that ain't there. */ | |
| 1682 goto error; | |
| 1683 if (half) | |
| 1684 iso->register_right = reg; | |
| 1685 else | |
| 1686 iso->register_left = reg; | |
| 1687 *flags &= ISO_STATE_LOCK; | |
| 1688 iso->esc = ISO_ESC_LOCKING_SHIFT; | |
| 1689 return 1; | |
| 1690 | |
| 1691 designated: | |
| 1692 if (NILP (cs) && check_invalid_charsets) | |
| 1693 { | |
| 2500 | 1694 ABORT (); |
| 771 | 1695 /* #### This should never happen now that we automatically create |
| 1696 temporary charsets as necessary. We should probably remove | |
| 1697 this code. --ben */ | |
| 1698 iso->invalid_designated[reg] = 1; | |
| 1699 iso->charset[reg] = Vcharset_ascii; | |
| 1700 iso->esc = ISO_ESC_DESIGNATE; | |
| 1701 *flags &= ISO_STATE_LOCK; | |
| 1702 iso->output_literally = 1; | |
| 1703 if (iso->switched_dir_and_no_valid_charset_yet) | |
| 1704 { | |
| 1705 /* We encountered a switch-direction followed by an | |
| 1706 invalid designation. Ensure that the switch-direction | |
| 1707 gets outputted; otherwise it will probably get eaten | |
| 1708 when the text is written out again. */ | |
| 1709 iso->switched_dir_and_no_valid_charset_yet = 0; | |
| 1710 iso->output_direction_sequence = 1; | |
| 1711 /* And make sure that the switch-dir going the other | |
| 1712 way gets outputted, as well. */ | |
| 1713 iso->invalid_switch_dir = 1; | |
| 1714 } | |
| 1715 return 1; | |
| 1716 } | |
| 1717 /* This function is called with CODESYS equal to nil when | |
| 1718 doing coding-system detection. */ | |
| 1719 if (!NILP (codesys)) | |
| 1720 { | |
| 1721 charset_conversion_spec_dynarr *dyn = | |
| 1722 XCODING_SYSTEM_ISO2022_INPUT_CONV (codesys); | |
| 1723 | |
| 1724 if (dyn) | |
| 1725 { | |
| 1726 int i; | |
| 1727 | |
| 1728 for (i = 0; i < Dynarr_length (dyn); i++) | |
| 1729 { | |
| 1730 struct charset_conversion_spec *spec = Dynarr_atp (dyn, i); | |
| 1731 if (EQ (cs, spec->from_charset)) | |
| 1732 cs = spec->to_charset; | |
| 1733 } | |
| 1734 } | |
| 1735 } | |
| 1736 | |
| 1737 iso->charset[reg] = cs; | |
| 1738 iso->esc = ISO_ESC_DESIGNATE; | |
| 1739 *flags &= ISO_STATE_LOCK; | |
| 1740 if (iso->invalid_designated[reg]) | |
| 1741 { | |
| 1742 iso->invalid_designated[reg] = 0; | |
| 1743 iso->output_literally = 1; | |
| 1744 } | |
| 1745 if (iso->switched_dir_and_no_valid_charset_yet) | |
| 1746 iso->switched_dir_and_no_valid_charset_yet = 0; | |
| 1747 return 1; | |
| 1748 } | |
| 1749 | |
| 1750 /* If FLAGS is a null pointer or specifies right-to-left motion, | |
| 1751 output a switch-dir-to-left-to-right sequence to DST. | |
| 1752 Also update FLAGS if it is not a null pointer. | |
| 1753 If INTERNAL_P is set, we are outputting in internal format and | |
| 1754 need to handle the CSI differently. */ | |
| 1755 | |
| 1756 static void | |
| 1757 restore_left_to_right_direction (Lisp_Object codesys, | |
| 1758 unsigned_char_dynarr *dst, | |
| 1759 unsigned int *flags, | |
| 1760 int internal_p) | |
| 1761 { | |
| 1762 if (!flags || (*flags & ISO_STATE_R2L)) | |
| 1763 { | |
| 1764 if (XCODING_SYSTEM_ISO2022_SEVEN (codesys)) | |
| 1765 { | |
| 1766 Dynarr_add (dst, ISO_CODE_ESC); | |
| 1767 Dynarr_add (dst, '['); | |
| 1768 } | |
| 1769 else if (internal_p) | |
| 1770 DECODE_ADD_BINARY_CHAR (ISO_CODE_CSI, dst); | |
| 1771 else | |
| 1772 Dynarr_add (dst, ISO_CODE_CSI); | |
| 1773 Dynarr_add (dst, '0'); | |
| 1774 Dynarr_add (dst, ']'); | |
| 1775 if (flags) | |
| 1776 *flags &= ~ISO_STATE_R2L; | |
| 1777 } | |
| 1778 } | |
| 1779 | |
| 1780 /* If FLAGS is a null pointer or specifies a direction different from | |
| 1781 DIRECTION (which should be either CHARSET_RIGHT_TO_LEFT or | |
| 1782 CHARSET_LEFT_TO_RIGHT), output the appropriate switch-dir escape | |
| 1783 sequence to DST. Also update FLAGS if it is not a null pointer. | |
| 1784 If INTERNAL_P is set, we are outputting in internal format and | |
| 1785 need to handle the CSI differently. */ | |
| 1786 | |
| 1787 static void | |
| 1788 ensure_correct_direction (int direction, Lisp_Object codesys, | |
| 1789 unsigned_char_dynarr *dst, unsigned int *flags, | |
| 1790 int internal_p) | |
| 1791 { | |
| 1792 if ((!flags || (*flags & ISO_STATE_R2L)) && | |
| 1793 direction == CHARSET_LEFT_TO_RIGHT) | |
| 1794 restore_left_to_right_direction (codesys, dst, flags, internal_p); | |
| 1795 else if (!XCODING_SYSTEM_ISO2022_NO_ISO6429 (codesys) | |
| 1796 && (!flags || !(*flags & ISO_STATE_R2L)) && | |
| 1797 direction == CHARSET_RIGHT_TO_LEFT) | |
| 1798 { | |
| 1799 if (XCODING_SYSTEM_ISO2022_SEVEN (codesys)) | |
| 1800 { | |
| 1801 Dynarr_add (dst, ISO_CODE_ESC); | |
| 1802 Dynarr_add (dst, '['); | |
| 1803 } | |
| 1804 else if (internal_p) | |
| 1805 DECODE_ADD_BINARY_CHAR (ISO_CODE_CSI, dst); | |
| 1806 else | |
| 1807 Dynarr_add (dst, ISO_CODE_CSI); | |
| 1808 Dynarr_add (dst, '2'); | |
| 1809 Dynarr_add (dst, ']'); | |
| 1810 if (flags) | |
| 1811 *flags |= ISO_STATE_R2L; | |
| 1812 } | |
| 1813 } | |
| 1814 | |
| 4096 | 1815 /* Note that this name conflicts with a function in unicode.c. */ |
| 1816 static void | |
| 1817 decode_unicode_char (int ucs, unsigned_char_dynarr *dst) | |
| 1818 { | |
| 1819 Ibyte work[MAX_ICHAR_LEN]; | |
| 1820 int len; | |
| 1821 Lisp_Object chr; | |
| 1822 | |
| 1823 chr = Funicode_to_char(make_int(ucs), Qnil); | |
| 1824 assert (!NILP(chr)); | |
| 1825 len = set_itext_ichar (work, XCHAR(chr)); | |
| 1826 Dynarr_add_many (dst, work, len); | |
| 1827 } | |
| 1828 | |
| 1829 #define DECODE_ERROR_OCTET(octet, dst) \ | |
| 1830 decode_unicode_char ((octet) + UNICODE_ERROR_OCTET_RANGE_START, dst) | |
| 1831 | |
| 1832 static inline void | |
| 1833 indicate_invalid_utf_8 (unsigned char indicated_length, | |
| 1834 unsigned char counter, | |
| 1835 int ch, unsigned_char_dynarr *dst) | |
| 1836 { | |
| 1837 Binbyte stored = indicated_length - counter; | |
| 1838 Binbyte mask = "\x00\x00\xC0\xE0\xF0\xF8\xFC"[indicated_length]; | |
| 1839 | |
| 1840 while (stored > 0) | |
| 1841 { | |
| 1842 DECODE_ERROR_OCTET (((ch >> (6 * (stored - 1))) & 0x3f) | mask, | |
| 1843 dst); | |
| 1844 mask = 0x80, stored--; | |
| 1845 } | |
| 1846 } | |
| 1847 | |
| 771 | 1848 /* Convert ISO2022-format data to internal format. */ |
| 1849 | |
| 1850 static Bytecount | |
| 1851 iso2022_decode (struct coding_stream *str, const UExtbyte *src, | |
| 1852 unsigned_char_dynarr *dst, Bytecount n) | |
| 1853 { | |
| 1854 unsigned int ch = str->ch; | |
| 1855 #ifdef ENABLE_COMPOSITE_CHARS | |
| 1856 unsigned_char_dynarr *real_dst = dst; | |
| 1857 #endif | |
| 1858 struct iso2022_coding_stream *data = | |
| 1859 CODING_STREAM_TYPE_DATA (str, iso2022); | |
| 1860 unsigned int flags = data->flags; | |
| 1861 Bytecount orign = n; | |
| 1862 | |
| 1863 #ifdef ENABLE_COMPOSITE_CHARS | |
| 1864 if (flags & ISO_STATE_COMPOSITE) | |
| 1865 dst = data->composite_chars; | |
| 1866 #endif /* ENABLE_COMPOSITE_CHARS */ | |
| 1867 | |
| 1868 while (n--) | |
| 1869 { | |
| 1870 UExtbyte c = *src++; | |
| 1871 if (flags & ISO_STATE_ESCAPE) | |
| 1872 { /* Within ESC sequence */ | |
| 1873 int retval = parse_iso2022_esc (str->codesys, data, | |
| 1874 c, &flags, 1); | |
| 1875 | |
| 1876 if (retval) | |
| 1877 { | |
| 1878 switch (data->esc) | |
| 1879 { | |
| 1880 #ifdef ENABLE_COMPOSITE_CHARS | |
| 1881 case ISO_ESC_START_COMPOSITE: | |
| 1882 if (data->composite_chars) | |
| 1883 Dynarr_reset (data->composite_chars); | |
| 1884 else | |
| 1885 data->composite_chars = Dynarr_new (unsigned_char); | |
| 1886 dst = data->composite_chars; | |
| 1887 break; | |
| 1888 case ISO_ESC_END_COMPOSITE: | |
| 1889 { | |
| 867 | 1890 Ibyte comstr[MAX_ICHAR_LEN]; |
| 771 | 1891 Bytecount len; |
| 4967 | 1892 Ichar emch = lookup_composite_char (Dynarr_begin (dst), |
| 771 | 1893 Dynarr_length (dst)); |
| 1894 dst = real_dst; | |
| 867 | 1895 len = set_itext_ichar (comstr, emch); |
| 771 | 1896 Dynarr_add_many (dst, comstr, len); |
| 1897 break; | |
| 1898 } | |
| 1899 #else | |
| 1900 case ISO_ESC_START_COMPOSITE: | |
| 1901 { | |
| 867 | 1902 Ibyte comstr[MAX_ICHAR_LEN]; |
| 771 | 1903 Bytecount len; |
| 867 | 1904 Ichar emch = make_ichar (Vcharset_composite, c - '0' + ' ', |
| 771 | 1905 0); |
| 867 | 1906 len = set_itext_ichar (comstr, emch); |
| 771 | 1907 Dynarr_add_many (dst, comstr, len); |
| 1908 break; | |
| 1909 } | |
| 1910 #endif /* ENABLE_COMPOSITE_CHARS */ | |
| 1911 | |
| 1912 case ISO_ESC_LITERAL: | |
| 1913 DECODE_ADD_BINARY_CHAR (c, dst); | |
| 1914 break; | |
| 1915 | |
| 1916 default: | |
| 1917 /* Everything else handled already */ | |
| 1918 break; | |
| 1919 } | |
| 1920 } | |
| 1921 | |
| 1922 /* Attempted error recovery. */ | |
| 1923 if (data->output_direction_sequence) | |
| 1924 ensure_correct_direction (flags & ISO_STATE_R2L ? | |
| 1925 CHARSET_RIGHT_TO_LEFT : | |
| 1926 CHARSET_LEFT_TO_RIGHT, | |
| 1927 str->codesys, dst, 0, 1); | |
| 1928 /* More error recovery. */ | |
| 1929 if (!retval || data->output_literally) | |
| 1930 { | |
| 1931 /* Output the (possibly invalid) sequence */ | |
| 1932 int i; | |
| 1933 for (i = 0; i < data->esc_bytes_index; i++) | |
| 1934 DECODE_ADD_BINARY_CHAR (data->esc_bytes[i], dst); | |
| 1935 flags &= ISO_STATE_LOCK; | |
| 1936 if (!retval) | |
| 1937 n++, src--;/* Repeat the loop with the same character. */ | |
| 1938 else | |
| 1939 { | |
| 1940 /* No sense in reprocessing the final byte of the | |
| 1941 escape sequence; it could mess things up anyway. | |
| 1942 Just add it now. */ | |
| 1943 DECODE_ADD_BINARY_CHAR (c, dst); | |
| 1944 } | |
| 1945 } | |
| 1946 ch = 0; | |
| 1947 } | |
| 3439 | 1948 else if (flags & ISO_STATE_UTF_8) |
| 1949 { | |
| 1950 unsigned char counter = data->counter; | |
| 4096 | 1951 unsigned char indicated_length = data->indicated_length; |
| 3439 | 1952 |
| 1953 if (ISO_CODE_ESC == c) | |
| 1954 { | |
| 1955 /* Allow the escape sequence parser to end the UTF-8 state. */ | |
| 1956 flags |= ISO_STATE_ESCAPE; | |
| 1957 data->esc = ISO_ESC; | |
| 1958 data->esc_bytes_index = 1; | |
| 1959 continue; | |
| 1960 } | |
| 1961 | |
| 4096 | 1962 if (0 == counter) |
| 1963 { | |
| 1964 if (0 == (c & 0x80)) | |
| 1965 { | |
| 1966 /* ASCII. */ | |
| 1967 decode_unicode_char (c, dst); | |
| 1968 } | |
| 1969 else if (0 == (c & 0x40)) | |
| 1970 { | |
| 1971 /* Highest bit set, second highest not--there's | |
| 1972 something wrong. */ | |
| 1973 DECODE_ERROR_OCTET (c, dst); | |
| 1974 } | |
| 1975 else if (0 == (c & 0x20)) | |
| 1976 { | |
| 1977 ch = c & 0x1f; | |
| 1978 counter = 1; | |
| 1979 indicated_length = 2; | |
| 1980 } | |
| 1981 else if (0 == (c & 0x10)) | |
| 1982 { | |
| 1983 ch = c & 0x0f; | |
| 1984 counter = 2; | |
| 1985 indicated_length = 3; | |
| 1986 } | |
| 1987 else if (0 == (c & 0x08)) | |
| 1988 { | |
| 1989 ch = c & 0x0f; | |
| 1990 counter = 3; | |
| 1991 indicated_length = 4; | |
| 1992 } | |
| 1993 /* We support lengths longer than 4 here, since we want to | |
| 1994 represent UTF-8 error chars as distinct from the | |
| 1995 corresponding ISO 8859-1 characters in escape-quoted. | |
| 1996 | |
| 1997 However, we can't differentiate UTF-8 error chars as | |
| 1998 written to disk, and UTF-8 errors in escape-quoted. This | |
| 1999 is not a big problem; | |
| 2000 non-Unicode-chars-encoded-as-UTF-8-in-ISO-2022 is not | |
| 2001 deployed, in practice, so if such a sequence of octets | |
| 2002 occurs, XEmacs generated it. */ | |
| 2003 else if (0 == (c & 0x04)) | |
| 2004 { | |
| 2005 ch = c & 0x03; | |
| 2006 counter = 4; | |
| 2007 indicated_length = 5; | |
| 2008 } | |
| 2009 else if (0 == (c & 0x02)) | |
| 2010 { | |
| 2011 ch = c & 0x01; | |
| 2012 counter = 5; | |
| 2013 indicated_length = 6; | |
| 2014 } | |
| 2015 else | |
| 2016 { | |
| 2017 /* #xFF is not a valid leading byte in any form of | |
| 2018 UTF-8. */ | |
| 2019 DECODE_ERROR_OCTET (c, dst); | |
| 2020 | |
| 2021 } | |
| 2022 } | |
| 2023 else | |
| 2024 { | |
| 2025 /* counter != 0 */ | |
| 2026 if ((0 == (c & 0x80)) || (0 != (c & 0x40))) | |
| 2027 { | |
| 2028 indicate_invalid_utf_8(indicated_length, | |
| 2029 counter, | |
| 2030 ch, dst); | |
| 2031 if (c & 0x80) | |
| 2032 { | |
| 2033 DECODE_ERROR_OCTET (c, dst); | |
| 2034 } | |
| 2035 else | |
| 2036 { | |
| 2037 /* The character just read is ASCII. Treat it as | |
| 2038 such. */ | |
| 2039 decode_unicode_char (c, dst); | |
| 2040 } | |
| 2041 ch = 0; | |
| 2042 counter = 0; | |
| 2043 } | |
| 2044 else | |
| 2045 { | |
| 2046 ch = (ch << 6) | (c & 0x3f); | |
| 2047 counter--; | |
| 2048 | |
| 2049 /* Just processed the final byte. Emit the character. */ | |
| 2050 if (!counter) | |
| 2051 { | |
| 2052 /* Don't accept over-long sequences, or surrogates. */ | |
| 2053 if ((ch < 0x80) || | |
| 2054 ((ch < 0x800) && indicated_length > 2) || | |
| 2055 ((ch < 0x10000) && indicated_length > 3) || | |
| 2056 /* We accept values above #x110000 in | |
| 2057 escape-quoted, though not in UTF-8. */ | |
| 2058 /* (ch > 0x110000) || */ | |
| 2059 valid_utf_16_surrogate(ch)) | |
| 2060 { | |
| 2061 indicate_invalid_utf_8(indicated_length, | |
| 2062 counter, | |
| 2063 ch, dst); | |
| 2064 } | |
| 2065 else | |
| 2066 { | |
| 2067 decode_unicode_char (ch, dst); | |
| 2068 } | |
| 2069 ch = 0; | |
| 2070 } | |
| 2071 } | |
| 2072 } | |
| 2073 | |
| 2074 if (str->eof && ch) | |
| 2075 { | |
| 2076 DECODE_ERROR_OCTET (ch, dst); | |
| 2077 ch = 0; | |
| 2078 } | |
| 3439 | 2079 |
| 2080 data->counter = counter; | |
| 4096 | 2081 data->indicated_length = indicated_length; |
| 3439 | 2082 } |
| 826 | 2083 else if (byte_c0_p (c) || byte_c1_p (c)) |
| 771 | 2084 { /* Control characters */ |
| 2085 | |
| 2086 /***** Error-handling *****/ | |
| 2087 | |
| 2088 /* If we were in the middle of a character, dump out the | |
| 2089 partial character. */ | |
| 2090 DECODE_OUTPUT_PARTIAL_CHAR (ch, dst); | |
| 2091 | |
| 2092 /* If we just saw a single-shift character, dump it out. | |
| 2093 This may dump out the wrong sort of single-shift character, | |
| 2094 but least it will give an indication that something went | |
| 2095 wrong. */ | |
| 2096 if (flags & ISO_STATE_SS2) | |
| 2097 { | |
| 2098 DECODE_ADD_BINARY_CHAR (ISO_CODE_SS2, dst); | |
| 2099 flags &= ~ISO_STATE_SS2; | |
| 2100 } | |
| 2101 if (flags & ISO_STATE_SS3) | |
| 2102 { | |
| 2103 DECODE_ADD_BINARY_CHAR (ISO_CODE_SS3, dst); | |
| 2104 flags &= ~ISO_STATE_SS3; | |
| 2105 } | |
| 2106 | |
| 2107 /***** Now handle the control characters. *****/ | |
| 2108 | |
| 2109 flags &= ISO_STATE_LOCK; | |
| 2110 | |
| 2111 if (!parse_iso2022_esc (str->codesys, data, c, &flags, 1)) | |
| 2112 DECODE_ADD_BINARY_CHAR (c, dst); | |
| 2113 } | |
| 2114 else | |
| 2115 { /* Graphic characters */ | |
| 2116 Lisp_Object charset; | |
| 2117 int lb; | |
| 2118 int reg; | |
| 2119 | |
| 2120 /* Now determine the charset. */ | |
| 2121 reg = ((flags & ISO_STATE_SS2) ? 2 | |
| 2122 : (flags & ISO_STATE_SS3) ? 3 | |
| 826 | 2123 : !byte_ascii_p (c) ? data->register_right |
| 771 | 2124 : data->register_left); |
| 2125 charset = data->charset[reg]; | |
| 2126 | |
| 2127 /* Error checking: */ | |
| 2128 if (! CHARSETP (charset) | |
| 2129 || data->invalid_designated[reg] | |
| 2130 || (((c & 0x7F) == ' ' || (c & 0x7F) == ISO_CODE_DEL) | |
| 2131 && XCHARSET_CHARS (charset) == 94)) | |
| 2132 /* Mrmph. We are trying to invoke a register that has no | |
| 2133 or an invalid charset in it, or trying to add a character | |
| 2134 outside the range of the charset. Insert that char literally | |
| 2135 to preserve it for the output. */ | |
| 2136 { | |
| 2137 DECODE_OUTPUT_PARTIAL_CHAR (ch, dst); | |
| 2138 DECODE_ADD_BINARY_CHAR (c, dst); | |
| 2139 } | |
| 2140 | |
| 2141 else | |
| 2142 { | |
| 2143 /* Things are probably hunky-dorey. */ | |
| 2144 | |
| 2145 /* Fetch reverse charset, maybe. */ | |
| 2146 if (((flags & ISO_STATE_R2L) && | |
| 2147 XCHARSET_DIRECTION (charset) == CHARSET_LEFT_TO_RIGHT) | |
| 2148 || | |
| 2149 (!(flags & ISO_STATE_R2L) && | |
| 2150 XCHARSET_DIRECTION (charset) == CHARSET_RIGHT_TO_LEFT)) | |
| 2151 { | |
| 2152 Lisp_Object new_charset = | |
| 2153 XCHARSET_REVERSE_DIRECTION_CHARSET (charset); | |
| 2154 if (!NILP (new_charset)) | |
| 2155 charset = new_charset; | |
| 2156 } | |
| 2157 | |
| 2158 lb = XCHARSET_LEADING_BYTE (charset); | |
| 2159 switch (XCHARSET_REP_BYTES (charset)) | |
| 2160 { | |
| 2161 case 1: /* ASCII */ | |
| 2162 DECODE_OUTPUT_PARTIAL_CHAR (ch, dst); | |
| 2163 Dynarr_add (dst, c & 0x7F); | |
| 2164 break; | |
| 2165 | |
| 2166 case 2: /* one-byte official */ | |
| 2167 DECODE_OUTPUT_PARTIAL_CHAR (ch, dst); | |
| 2168 Dynarr_add (dst, lb); | |
| 2169 Dynarr_add (dst, c | 0x80); | |
| 2170 break; | |
| 2171 | |
| 2172 case 3: /* one-byte private or two-byte official */ | |
| 2173 if (XCHARSET_PRIVATE_P (charset)) | |
| 2174 { | |
| 2175 DECODE_OUTPUT_PARTIAL_CHAR (ch, dst); | |
| 2176 Dynarr_add (dst, PRE_LEADING_BYTE_PRIVATE_1); | |
| 2177 Dynarr_add (dst, lb); | |
| 2178 Dynarr_add (dst, c | 0x80); | |
| 2179 } | |
| 2180 else | |
| 2181 { | |
| 2182 if (ch) | |
| 2183 { | |
| 2184 Dynarr_add (dst, lb); | |
| 2185 Dynarr_add (dst, ch | 0x80); | |
| 2186 Dynarr_add (dst, c | 0x80); | |
| 2187 ch = 0; | |
| 2188 } | |
| 2189 else | |
| 2190 ch = c; | |
| 2191 } | |
| 2192 break; | |
| 2193 | |
| 2194 default: /* two-byte private */ | |
| 2195 if (ch) | |
| 2196 { | |
| 2197 Dynarr_add (dst, PRE_LEADING_BYTE_PRIVATE_2); | |
| 2198 Dynarr_add (dst, lb); | |
| 2199 Dynarr_add (dst, ch | 0x80); | |
| 2200 Dynarr_add (dst, c | 0x80); | |
| 2201 ch = 0; | |
| 2202 } | |
| 2203 else | |
| 2204 ch = c; | |
| 2205 } | |
| 2206 } | |
| 2207 | |
| 2208 if (!ch) | |
| 2209 flags &= ISO_STATE_LOCK; | |
| 2210 } | |
| 2211 | |
| 2212 } | |
| 2213 | |
| 2214 if (str->eof) | |
| 2215 DECODE_OUTPUT_PARTIAL_CHAR (ch, dst); | |
| 2216 | |
| 2217 data->flags = flags; | |
| 2218 str->ch = ch; | |
| 2219 return orign; | |
| 2220 } | |
| 2221 | |
| 2222 | |
| 2223 /***** ISO2022 encoder *****/ | |
| 2224 | |
| 2225 /* Designate CHARSET into register REG. */ | |
| 2226 | |
| 2227 static void | |
| 2228 iso2022_designate (Lisp_Object charset, int reg, | |
| 2229 struct coding_stream *str, unsigned_char_dynarr *dst) | |
| 2230 { | |
| 2231 static const char inter94[] = "()*+"; | |
| 2232 static const char inter96[] = ",-./"; | |
| 2233 int type; | |
| 2234 unsigned char final; | |
| 2235 struct iso2022_coding_stream *data = | |
| 2236 CODING_STREAM_TYPE_DATA (str, iso2022); | |
| 2237 Lisp_Object old_charset = data->charset[reg]; | |
| 2238 | |
| 2239 data->charset[reg] = charset; | |
| 2240 if (!CHARSETP (charset)) | |
| 2241 /* charset might be an initial nil or t. */ | |
| 2242 return; | |
| 2243 type = XCHARSET_TYPE (charset); | |
| 2244 final = XCHARSET_FINAL (charset); | |
| 2245 if (!data->force_charset_on_output[reg] && | |
| 2246 CHARSETP (old_charset) && | |
| 2247 XCHARSET_TYPE (old_charset) == type && | |
| 2248 XCHARSET_FINAL (old_charset) == final) | |
| 2249 return; | |
| 2250 | |
| 2251 data->force_charset_on_output[reg] = 0; | |
| 2252 | |
| 2253 { | |
| 2254 charset_conversion_spec_dynarr *dyn = | |
| 2255 XCODING_SYSTEM_ISO2022_OUTPUT_CONV (str->codesys); | |
| 2256 | |
| 2257 if (dyn) | |
| 2258 { | |
| 2259 int i; | |
| 2260 | |
| 2261 for (i = 0; i < Dynarr_length (dyn); i++) | |
| 2262 { | |
| 2263 struct charset_conversion_spec *spec = Dynarr_atp (dyn, i); | |
| 2264 if (EQ (charset, spec->from_charset)) | |
| 2265 charset = spec->to_charset; | |
| 2266 } | |
| 2267 } | |
| 2268 } | |
| 2269 | |
| 2270 Dynarr_add (dst, ISO_CODE_ESC); | |
| 3439 | 2271 |
| 771 | 2272 switch (type) |
| 2273 { | |
| 2274 case CHARSET_TYPE_94: | |
| 2275 Dynarr_add (dst, inter94[reg]); | |
| 2276 break; | |
| 2277 case CHARSET_TYPE_96: | |
| 2278 Dynarr_add (dst, inter96[reg]); | |
| 2279 break; | |
| 2280 case CHARSET_TYPE_94X94: | |
| 2281 Dynarr_add (dst, '$'); | |
| 2282 if (reg != 0 | |
| 2283 || !(XCODING_SYSTEM_ISO2022_SHORT (str->codesys)) | |
| 2284 || final < '@' | |
| 2285 || final > 'B') | |
| 2286 Dynarr_add (dst, inter94[reg]); | |
| 2287 break; | |
| 2288 case CHARSET_TYPE_96X96: | |
| 2289 Dynarr_add (dst, '$'); | |
| 2290 Dynarr_add (dst, inter96[reg]); | |
| 2291 break; | |
| 2292 } | |
| 2293 Dynarr_add (dst, final); | |
| 2294 } | |
| 2295 | |
| 2296 static void | |
| 2297 ensure_normal_shift (struct coding_stream *str, unsigned_char_dynarr *dst) | |
| 2298 { | |
| 2299 struct iso2022_coding_stream *data = | |
| 2300 CODING_STREAM_TYPE_DATA (str, iso2022); | |
| 2301 | |
| 2302 if (data->register_left != 0) | |
| 2303 { | |
| 2304 Dynarr_add (dst, ISO_CODE_SI); | |
| 2305 data->register_left = 0; | |
| 2306 } | |
| 2307 } | |
| 2308 | |
| 2309 static void | |
| 2310 ensure_shift_out (struct coding_stream *str, unsigned_char_dynarr *dst) | |
| 2311 { | |
| 2312 struct iso2022_coding_stream *data = | |
| 2313 CODING_STREAM_TYPE_DATA (str, iso2022); | |
| 2314 | |
| 2315 if (data->register_left != 1) | |
| 2316 { | |
| 2317 Dynarr_add (dst, ISO_CODE_SO); | |
| 2318 data->register_left = 1; | |
| 2319 } | |
| 2320 } | |
| 2321 | |
| 2322 /* Convert internally-formatted data to ISO2022 format. */ | |
| 2323 | |
| 2324 static Bytecount | |
| 867 | 2325 iso2022_encode (struct coding_stream *str, const Ibyte *src, |
| 771 | 2326 unsigned_char_dynarr *dst, Bytecount n) |
| 2327 { | |
| 2328 unsigned char charmask; | |
| 867 | 2329 Ibyte c; |
| 771 | 2330 unsigned char char_boundary; |
| 2331 unsigned int ch = str->ch; | |
| 2332 Lisp_Object codesys = str->codesys; | |
| 2333 int i; | |
| 2334 Lisp_Object charset; | |
| 2335 int half; | |
| 2336 struct iso2022_coding_stream *data = | |
| 2337 CODING_STREAM_TYPE_DATA (str, iso2022); | |
| 2338 unsigned int flags = data->flags; | |
| 2339 Bytecount orign = n; | |
| 2340 | |
| 2341 #ifdef ENABLE_COMPOSITE_CHARS | |
| 2342 /* flags for handling composite chars. We do a little switcheroo | |
| 2343 on the source while we're outputting the composite char. */ | |
| 2344 Bytecount saved_n = 0; | |
| 867 | 2345 const Ibyte *saved_src = NULL; |
| 771 | 2346 int in_composite = 0; |
| 2347 #endif /* ENABLE_COMPOSITE_CHARS */ | |
| 2348 | |
| 2349 char_boundary = data->current_char_boundary; | |
| 2350 charset = data->current_charset; | |
| 2351 half = data->current_half; | |
| 2352 | |
| 2353 #ifdef ENABLE_COMPOSITE_CHARS | |
| 2354 back_to_square_n: | |
| 2355 #endif | |
| 2356 while (n--) | |
| 2357 { | |
| 2358 c = *src++; | |
| 2359 | |
| 826 | 2360 if (byte_ascii_p (c)) |
| 771 | 2361 { /* Processing ASCII character */ |
| 2362 ch = 0; | |
| 2363 | |
| 3439 | 2364 if (flags & ISO_STATE_UTF_8) |
| 2365 { | |
| 2366 Dynarr_add (dst, ISO_CODE_ESC); | |
| 2367 Dynarr_add (dst, '%'); | |
| 2368 Dynarr_add (dst, '@'); | |
| 2369 flags &= ~(ISO_STATE_UTF_8); | |
| 2370 } | |
| 2371 | |
| 771 | 2372 restore_left_to_right_direction (codesys, dst, &flags, 0); |
| 2373 | |
| 2374 /* Make sure G0 contains ASCII */ | |
| 2375 if ((c > ' ' && c < ISO_CODE_DEL) || | |
| 2376 !XCODING_SYSTEM_ISO2022_NO_ASCII_CNTL (codesys)) | |
| 2377 { | |
| 2378 ensure_normal_shift (str, dst); | |
| 2379 iso2022_designate (Vcharset_ascii, 0, str, dst); | |
| 2380 } | |
| 2381 | |
| 2382 /* If necessary, restore everything to the default state | |
| 2383 at end-of-line */ | |
| 2384 if (!(XCODING_SYSTEM_ISO2022_NO_ASCII_EOL (codesys))) | |
| 2385 { | |
| 2386 /* NOTE: CRLF encoding happens *BEFORE* other encoding. | |
| 2387 Thus, even though we're working with internal-format | |
| 2388 data, there may be CR's or CRLF sequences representing | |
| 2389 newlines. */ | |
| 2390 if (c == '\r' || (c == '\n' && !(flags & ISO_STATE_CR))) | |
| 2391 { | |
| 2392 restore_left_to_right_direction (codesys, dst, &flags, 0); | |
| 2393 | |
| 2394 ensure_normal_shift (str, dst); | |
| 2395 | |
| 2396 for (i = 0; i < 4; i++) | |
| 2397 { | |
| 2398 Lisp_Object initial_charset = | |
| 2399 XCODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i); | |
| 2400 iso2022_designate (initial_charset, i, str, dst); | |
| 2401 } | |
| 2402 } | |
| 2403 if (c == '\r') | |
| 2404 flags |= ISO_STATE_CR; | |
| 2405 else | |
| 2406 flags &= ~ISO_STATE_CR; | |
| 2407 } | |
| 2408 | |
| 2409 if (XCODING_SYSTEM_ISO2022_ESCAPE_QUOTED (codesys) | |
| 2410 && fit_to_be_escape_quoted (c)) | |
| 2411 Dynarr_add (dst, ISO_CODE_ESC); | |
| 2412 Dynarr_add (dst, c); | |
| 2413 char_boundary = 1; | |
| 2414 } | |
| 867 | 2415 else if (ibyte_leading_byte_p (c) || ibyte_leading_byte_p (ch)) |
| 771 | 2416 { /* Processing Leading Byte */ |
| 2417 ch = 0; | |
| 826 | 2418 charset = charset_by_leading_byte (c); |
| 2419 if (leading_byte_prefix_p (c)) | |
| 3439 | 2420 { |
| 2421 ch = c; | |
| 2422 } | |
| 2423 else if (XCHARSET_ENCODE_AS_UTF_8 (charset)) | |
| 2424 { | |
| 2425 assert (!EQ (charset, Vcharset_control_1) | |
| 2426 && !EQ (charset, Vcharset_composite)); | |
| 2427 | |
| 2428 /* If the character set is to be encoded as UTF-8, the escape | |
| 2429 is always the same. */ | |
| 2430 if (!(flags & ISO_STATE_UTF_8)) | |
| 2431 { | |
| 2432 Dynarr_add (dst, ISO_CODE_ESC); | |
| 2433 Dynarr_add (dst, '%'); | |
| 2434 Dynarr_add (dst, 'G'); | |
| 2435 flags |= ISO_STATE_UTF_8; | |
| 2436 } | |
| 2437 } | |
| 771 | 2438 else if (!EQ (charset, Vcharset_control_1) |
| 2439 && !EQ (charset, Vcharset_composite)) | |
| 2440 { | |
| 2441 int reg; | |
| 2442 | |
| 3439 | 2443 /* End the UTF-8 state. */ |
| 2444 if (flags & ISO_STATE_UTF_8) | |
| 2445 { | |
| 2446 Dynarr_add (dst, ISO_CODE_ESC); | |
| 2447 Dynarr_add (dst, '%'); | |
| 2448 Dynarr_add (dst, '@'); | |
| 2449 flags &= ~(ISO_STATE_UTF_8); | |
| 2450 } | |
| 2451 | |
| 771 | 2452 ensure_correct_direction (XCHARSET_DIRECTION (charset), |
| 2453 codesys, dst, &flags, 0); | |
| 2454 | |
| 2455 /* Now determine which register to use. */ | |
| 2456 reg = -1; | |
| 2457 for (i = 0; i < 4; i++) | |
| 2458 { | |
| 2459 if (EQ (charset, data->charset[i]) || | |
| 2460 EQ (charset, | |
| 2461 XCODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i))) | |
| 2462 { | |
| 2463 reg = i; | |
| 2464 break; | |
| 2465 } | |
| 2466 } | |
| 2467 | |
| 2468 if (reg == -1) | |
| 2469 { | |
| 2470 if (XCHARSET_GRAPHIC (charset) != 0) | |
| 2471 { | |
| 2472 if (!NILP (data->charset[1]) && | |
| 2473 (!XCODING_SYSTEM_ISO2022_SEVEN (codesys) || | |
| 2474 XCODING_SYSTEM_ISO2022_LOCK_SHIFT (codesys))) | |
| 2475 reg = 1; | |
| 2476 else if (!NILP (data->charset[2])) | |
| 2477 reg = 2; | |
| 2478 else if (!NILP (data->charset[3])) | |
| 2479 reg = 3; | |
| 2480 else | |
| 2481 reg = 0; | |
| 2482 } | |
| 2483 else | |
| 2484 reg = 0; | |
| 2485 } | |
| 2486 | |
| 2487 iso2022_designate (charset, reg, str, dst); | |
| 2488 | |
| 2489 /* Now invoke that register. */ | |
| 2490 switch (reg) | |
| 2491 { | |
| 2492 case 0: | |
| 2493 ensure_normal_shift (str, dst); | |
| 2494 half = 0; | |
| 2495 break; | |
| 2496 | |
| 2497 case 1: | |
| 2498 if (XCODING_SYSTEM_ISO2022_SEVEN (codesys)) | |
| 2499 { | |
| 2500 ensure_shift_out (str, dst); | |
| 2501 half = 0; | |
| 2502 } | |
| 2503 else | |
| 2504 half = 1; | |
| 2505 break; | |
| 2506 | |
| 2507 case 2: | |
| 2508 if (XCODING_SYSTEM_ISO2022_SEVEN (str->codesys)) | |
| 2509 { | |
| 2510 Dynarr_add (dst, ISO_CODE_ESC); | |
| 2511 Dynarr_add (dst, 'N'); | |
| 2512 half = 0; | |
| 2513 } | |
| 2514 else | |
| 2515 { | |
| 2516 Dynarr_add (dst, ISO_CODE_SS2); | |
| 2517 half = 1; | |
| 2518 } | |
| 2519 break; | |
| 2520 | |
| 2521 case 3: | |
| 2522 if (XCODING_SYSTEM_ISO2022_SEVEN (str->codesys)) | |
| 2523 { | |
| 2524 Dynarr_add (dst, ISO_CODE_ESC); | |
| 2525 Dynarr_add (dst, 'O'); | |
| 2526 half = 0; | |
| 2527 } | |
| 2528 else | |
| 2529 { | |
| 2530 Dynarr_add (dst, ISO_CODE_SS3); | |
| 2531 half = 1; | |
| 2532 } | |
| 2533 break; | |
| 2534 | |
| 2535 default: | |
| 2500 | 2536 ABORT (); |
| 771 | 2537 } |
| 2538 } | |
| 2539 char_boundary = 0; | |
| 2540 } | |
| 2541 else | |
| 2542 { /* Processing Non-ASCII character */ | |
| 2543 charmask = (half == 0 ? 0x7F : 0xFF); | |
| 2544 char_boundary = 1; | |
| 2545 if (EQ (charset, Vcharset_control_1)) | |
| 2546 { | |
| 2547 if (XCODING_SYSTEM_ISO2022_ESCAPE_QUOTED (codesys) | |
|
5100
3d91f0b64469
fix bad bug with escape-quoted handling
Ben Wing <ben@xemacs.org>
parents:
4976
diff
changeset
|
2548 && fit_to_be_escape_quoted (c - 0x20)) |
| 771 | 2549 Dynarr_add (dst, ISO_CODE_ESC); |
| 2550 /* you asked for it ... */ | |
| 2551 Dynarr_add (dst, c - 0x20); | |
| 2552 } | |
| 2553 #ifndef ENABLE_COMPOSITE_CHARS | |
| 2554 else if (EQ (charset, Vcharset_composite)) | |
| 2555 { | |
| 2556 if (c >= 160 || c <= 164) /* Someone might have stuck in | |
| 2557 something else */ | |
| 2558 { | |
| 2559 Dynarr_add (dst, ISO_CODE_ESC); | |
| 2560 Dynarr_add (dst, c - 160 + '0'); | |
| 2561 } | |
| 2562 } | |
| 2563 #endif | |
| 2564 else | |
| 2565 { | |
| 2566 switch (XCHARSET_REP_BYTES (charset)) | |
| 2567 { | |
| 2568 case 2: | |
| 3439 | 2569 dynarr_add_2022_one_dimension (charset, c, |
| 2570 charmask, dst); | |
| 771 | 2571 break; |
| 2572 case 3: | |
| 2573 if (XCHARSET_PRIVATE_P (charset)) | |
| 2574 { | |
| 3439 | 2575 dynarr_add_2022_one_dimension (charset, c, |
| 2576 charmask, dst); | |
| 771 | 2577 ch = 0; |
| 2578 } | |
| 2579 else if (ch) | |
| 2580 { | |
| 2581 #ifdef ENABLE_COMPOSITE_CHARS | |
| 2582 if (EQ (charset, Vcharset_composite)) | |
| 2583 { | |
| 3439 | 2584 /* #### Hasn't been written to handle composite |
| 2585 characters yet. */ | |
| 2586 assert(!XCHARSET_ENCODE_AS_UTF_8 (charset)) | |
| 771 | 2587 if (in_composite) |
| 2588 { | |
| 2589 /* #### Bother! We don't know how to | |
| 2590 handle this yet. */ | |
| 2591 Dynarr_add (dst, '~'); | |
| 2592 } | |
| 2593 else | |
| 2594 { | |
| 867 | 2595 Ichar emch = make_ichar (Vcharset_composite, |
| 771 | 2596 ch & 0x7F, c & 0x7F); |
| 2597 Lisp_Object lstr = composite_char_string (emch); | |
| 2598 saved_n = n; | |
| 2599 saved_src = src; | |
| 2600 in_composite = 1; | |
| 2601 src = XSTRING_DATA (lstr); | |
| 2602 n = XSTRING_LENGTH (lstr); | |
| 2603 Dynarr_add (dst, ISO_CODE_ESC); | |
| 2604 Dynarr_add (dst, '0'); /* start composing */ | |
| 2605 } | |
| 2606 } | |
| 2607 else | |
| 2608 #endif /* ENABLE_COMPOSITE_CHARS */ | |
| 2609 { | |
| 3439 | 2610 dynarr_add_2022_two_dimensions (charset, c, ch, |
| 2611 charmask, dst); | |
| 771 | 2612 } |
| 2613 ch = 0; | |
| 2614 } | |
| 2615 else | |
| 2616 { | |
| 2617 ch = c; | |
| 2618 char_boundary = 0; | |
| 2619 } | |
| 2620 break; | |
| 2621 case 4: | |
| 2622 if (ch) | |
| 2623 { | |
| 3439 | 2624 dynarr_add_2022_two_dimensions (charset, c, ch, |
| 2625 charmask, dst); | |
| 771 | 2626 ch = 0; |
| 2627 } | |
| 2628 else | |
| 2629 { | |
| 2630 ch = c; | |
| 2631 char_boundary = 0; | |
| 2632 } | |
| 2633 break; | |
| 2634 default: | |
| 2500 | 2635 ABORT (); |
| 771 | 2636 } |
| 2637 } | |
| 2638 } | |
| 2639 } | |
| 2640 | |
| 2641 #ifdef ENABLE_COMPOSITE_CHARS | |
| 2642 if (in_composite) | |
| 2643 { | |
| 2644 n = saved_n; | |
| 2645 src = saved_src; | |
| 2646 in_composite = 0; | |
| 2647 Dynarr_add (dst, ISO_CODE_ESC); | |
| 2648 Dynarr_add (dst, '1'); /* end composing */ | |
| 2649 goto back_to_square_n; /* Wheeeeeeeee ..... */ | |
| 2650 } | |
| 2651 #endif /* ENABLE_COMPOSITE_CHARS */ | |
| 2652 | |
| 2653 if (char_boundary && str->eof) | |
| 2654 { | |
| 2655 restore_left_to_right_direction (codesys, dst, &flags, 0); | |
| 2656 ensure_normal_shift (str, dst); | |
| 2657 for (i = 0; i < 4; i++) | |
| 2658 { | |
| 2659 Lisp_Object initial_charset = | |
| 2660 XCODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i); | |
| 2661 iso2022_designate (initial_charset, i, str, dst); | |
| 2662 } | |
| 2663 } | |
| 2664 | |
| 2665 data->flags = flags; | |
| 2666 str->ch = ch; | |
| 2667 data->current_char_boundary = char_boundary; | |
| 2668 data->current_charset = charset; | |
| 2669 data->current_half = half; | |
| 2670 | |
| 2671 /* Verbum caro factum est! */ | |
| 2672 return orign; | |
| 2673 } | |
| 2674 | |
| 2675 static Bytecount | |
| 2676 iso2022_convert (struct coding_stream *str, | |
| 2677 const UExtbyte *src, | |
| 2678 unsigned_char_dynarr *dst, Bytecount n) | |
| 2679 { | |
| 2680 if (str->direction == CODING_DECODE) | |
| 2681 return iso2022_decode (str, src, dst, n); | |
| 2682 else | |
| 2683 return iso2022_encode (str, src, dst, n); | |
| 2684 } | |
| 2685 | |
| 2686 static void | |
| 2687 iso2022_mark (Lisp_Object codesys) | |
| 2688 { | |
| 2689 int i; | |
| 2690 | |
| 2691 for (i = 0; i < 4; i++) | |
| 2692 mark_object (XCODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i)); | |
| 2693 if (XCODING_SYSTEM_ISO2022_INPUT_CONV (codesys)) | |
| 2694 { | |
| 2695 for (i = 0; | |
| 2696 i < Dynarr_length (XCODING_SYSTEM_ISO2022_INPUT_CONV (codesys)); | |
| 2697 i++) | |
| 2698 { | |
| 2699 struct charset_conversion_spec *ccs = | |
| 2700 Dynarr_atp (XCODING_SYSTEM_ISO2022_INPUT_CONV (codesys), i); | |
| 2701 mark_object (ccs->from_charset); | |
| 2702 mark_object (ccs->to_charset); | |
| 2703 } | |
| 2704 } | |
| 2705 if (XCODING_SYSTEM_ISO2022_OUTPUT_CONV (codesys)) | |
| 2706 { | |
| 2707 for (i = 0; | |
| 2708 i < Dynarr_length (XCODING_SYSTEM_ISO2022_OUTPUT_CONV (codesys)); | |
| 2709 i++) | |
| 2710 { | |
| 2711 struct charset_conversion_spec *ccs = | |
| 2712 Dynarr_atp (XCODING_SYSTEM_ISO2022_OUTPUT_CONV (codesys), i); | |
| 2713 mark_object (ccs->from_charset); | |
| 2714 mark_object (ccs->to_charset); | |
| 2715 } | |
| 2716 } | |
| 2717 } | |
| 2718 | |
| 2719 static void | |
| 2720 iso2022_finalize (Lisp_Object cs) | |
| 2721 { | |
| 2722 if (XCODING_SYSTEM_ISO2022_INPUT_CONV (cs)) | |
| 2723 { | |
| 2724 Dynarr_free (XCODING_SYSTEM_ISO2022_INPUT_CONV (cs)); | |
| 2725 XCODING_SYSTEM_ISO2022_INPUT_CONV (cs) = 0; | |
| 2726 } | |
| 2727 if (XCODING_SYSTEM_ISO2022_OUTPUT_CONV (cs)) | |
| 2728 { | |
| 2729 Dynarr_free (XCODING_SYSTEM_ISO2022_OUTPUT_CONV (cs)); | |
| 2730 XCODING_SYSTEM_ISO2022_OUTPUT_CONV (cs) = 0; | |
| 2731 } | |
| 2732 } | |
| 2733 | |
| 2734 /* Given a list of charset conversion specs as specified in a Lisp | |
| 2735 program, parse it into STORE_HERE. */ | |
| 2736 | |
| 2737 static void | |
| 2738 parse_charset_conversion_specs (charset_conversion_spec_dynarr *store_here, | |
| 2739 Lisp_Object spec_list) | |
| 2740 { | |
| 2367 | 2741 EXTERNAL_LIST_LOOP_2 (car, spec_list) |
| 771 | 2742 { |
| 2743 Lisp_Object from, to; | |
| 2744 struct charset_conversion_spec spec; | |
| 2745 | |
| 2746 if (!CONSP (car) || !CONSP (XCDR (car)) || !NILP (XCDR (XCDR (car)))) | |
| 2747 invalid_argument ("Invalid charset conversion spec", car); | |
| 2748 from = Fget_charset (XCAR (car)); | |
| 2749 to = Fget_charset (XCAR (XCDR (car))); | |
| 2750 if (XCHARSET_TYPE (from) != XCHARSET_TYPE (to)) | |
| 2751 invalid_operation_2 | |
| 2752 ("Attempted conversion between different charset types", | |
| 2753 from, to); | |
| 2754 spec.from_charset = from; | |
| 2755 spec.to_charset = to; | |
| 2756 | |
| 2757 Dynarr_add (store_here, spec); | |
| 2758 } | |
| 2759 } | |
| 2760 | |
| 2761 /* Given a dynarr LOAD_HERE of internally-stored charset conversion | |
| 2762 specs, return the equivalent as the Lisp programmer would see it. | |
| 2763 | |
| 2764 If LOAD_HERE is 0, return Qnil. */ | |
| 2765 | |
| 2766 static Lisp_Object | |
| 2767 unparse_charset_conversion_specs (charset_conversion_spec_dynarr *load_here, | |
| 2768 int names) | |
| 2769 { | |
| 2770 int i; | |
| 2771 Lisp_Object result; | |
| 2772 | |
| 2773 if (!load_here) | |
| 2774 return Qnil; | |
| 2775 for (i = 0, result = Qnil; i < Dynarr_length (load_here); i++) | |
| 2776 { | |
| 2777 struct charset_conversion_spec *ccs = Dynarr_atp (load_here, i); | |
| 2778 if (names) | |
| 2779 result = Fcons (list2 (XCHARSET_NAME (ccs->from_charset), | |
| 2780 XCHARSET_NAME (ccs->to_charset)), result); | |
| 2781 else | |
| 2782 result = Fcons (list2 (ccs->from_charset, ccs->to_charset), result); | |
| 2783 } | |
| 2784 | |
| 2785 return Fnreverse (result); | |
| 2786 } | |
| 2787 | |
| 2788 static int | |
| 2789 iso2022_putprop (Lisp_Object codesys, | |
| 2790 Lisp_Object key, | |
| 2791 Lisp_Object value) | |
| 2792 { | |
| 2793 #define FROB_INITIAL_CHARSET(charset_num) \ | |
| 2794 XCODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, charset_num) = \ | |
| 2795 ((EQ (value, Qt) || EQ (value, Qnil)) ? value : Fget_charset (value)) | |
| 2796 | |
| 2797 if (EQ (key, Qcharset_g0)) FROB_INITIAL_CHARSET (0); | |
| 2798 else if (EQ (key, Qcharset_g1)) FROB_INITIAL_CHARSET (1); | |
| 2799 else if (EQ (key, Qcharset_g2)) FROB_INITIAL_CHARSET (2); | |
| 2800 else if (EQ (key, Qcharset_g3)) FROB_INITIAL_CHARSET (3); | |
| 2801 | |
| 2802 #define FROB_FORCE_CHARSET(charset_num) \ | |
| 2803 XCODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT (codesys, charset_num) = \ | |
| 2804 !NILP (value) | |
| 2805 | |
| 2806 else if (EQ (key, Qforce_g0_on_output)) FROB_FORCE_CHARSET (0); | |
| 2807 else if (EQ (key, Qforce_g1_on_output)) FROB_FORCE_CHARSET (1); | |
| 2808 else if (EQ (key, Qforce_g2_on_output)) FROB_FORCE_CHARSET (2); | |
| 2809 else if (EQ (key, Qforce_g3_on_output)) FROB_FORCE_CHARSET (3); | |
| 2810 | |
| 2811 #define FROB_BOOLEAN_PROPERTY(prop) \ | |
| 2812 XCODING_SYSTEM_ISO2022_##prop (codesys) = !NILP (value) | |
| 2813 | |
| 2814 else if (EQ (key, Qshort)) FROB_BOOLEAN_PROPERTY (SHORT); | |
| 2815 else if (EQ (key, Qno_ascii_eol)) FROB_BOOLEAN_PROPERTY (NO_ASCII_EOL); | |
| 2816 else if (EQ (key, Qno_ascii_cntl)) FROB_BOOLEAN_PROPERTY (NO_ASCII_CNTL); | |
| 2817 else if (EQ (key, Qseven)) FROB_BOOLEAN_PROPERTY (SEVEN); | |
| 2818 else if (EQ (key, Qlock_shift)) FROB_BOOLEAN_PROPERTY (LOCK_SHIFT); | |
| 2819 else if (EQ (key, Qno_iso6429)) FROB_BOOLEAN_PROPERTY (NO_ISO6429); | |
| 2820 else if (EQ (key, Qescape_quoted)) FROB_BOOLEAN_PROPERTY (ESCAPE_QUOTED); | |
| 2821 | |
| 2822 else if (EQ (key, Qinput_charset_conversion)) | |
| 2823 { | |
| 2824 XCODING_SYSTEM_ISO2022_INPUT_CONV (codesys) = | |
| 2825 Dynarr_new (charset_conversion_spec); | |
| 2826 parse_charset_conversion_specs | |
| 2827 (XCODING_SYSTEM_ISO2022_INPUT_CONV (codesys), value); | |
| 2828 } | |
| 2829 else if (EQ (key, Qoutput_charset_conversion)) | |
| 2830 { | |
| 2831 XCODING_SYSTEM_ISO2022_OUTPUT_CONV (codesys) = | |
| 2832 Dynarr_new (charset_conversion_spec); | |
| 2833 parse_charset_conversion_specs | |
| 2834 (XCODING_SYSTEM_ISO2022_OUTPUT_CONV (codesys), value); | |
| 2835 } | |
| 2836 else | |
| 2837 return 0; | |
| 2838 | |
| 2839 return 1; | |
| 2840 } | |
| 2841 | |
|
5169
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5100
diff
changeset
|
2842 #ifdef ENABLE_COMPOSITE_CHARS |
|
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5100
diff
changeset
|
2843 #define USED_IF_COMPOSITE_CHARS(x) x |
|
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5100
diff
changeset
|
2844 #else |
|
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5100
diff
changeset
|
2845 #define USED_IF_COMPOSITE_CHARS(x) UNUSED (x) |
|
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5100
diff
changeset
|
2846 #endif |
|
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5100
diff
changeset
|
2847 |
| 771 | 2848 static void |
|
5169
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5100
diff
changeset
|
2849 iso2022_finalize_coding_stream (struct coding_stream * |
|
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5100
diff
changeset
|
2850 USED_IF_COMPOSITE_CHARS (str)) |
| 771 | 2851 { |
| 2852 #ifdef ENABLE_COMPOSITE_CHARS | |
| 2853 struct iso2022_coding_stream *data = | |
| 2854 CODING_STREAM_TYPE_DATA (str, iso2022); | |
| 2855 | |
| 2856 if (data->composite_chars) | |
| 2857 Dynarr_free (data->composite_chars); | |
| 2858 #endif | |
| 2859 } | |
| 2860 | |
| 2861 static void | |
| 2862 iso2022_init (Lisp_Object codesys) | |
| 2863 { | |
| 2864 int i; | |
| 2865 for (i = 0; i < 4; i++) | |
| 2866 XCODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i) = Qnil; | |
| 2867 } | |
| 2868 | |
| 2869 static Lisp_Object | |
| 2870 coding_system_charset (Lisp_Object coding_system, int gnum) | |
| 2871 { | |
| 2872 Lisp_Object cs | |
| 2873 = XCODING_SYSTEM_ISO2022_INITIAL_CHARSET (coding_system, gnum); | |
| 2874 | |
| 2875 return CHARSETP (cs) ? XCHARSET_NAME (cs) : Qnil; | |
| 2876 } | |
| 2877 | |
| 2878 static Lisp_Object | |
| 2879 iso2022_getprop (Lisp_Object coding_system, Lisp_Object prop) | |
| 2880 { | |
| 2881 if (EQ (prop, Qcharset_g0)) | |
| 2882 return coding_system_charset (coding_system, 0); | |
| 2883 else if (EQ (prop, Qcharset_g1)) | |
| 2884 return coding_system_charset (coding_system, 1); | |
| 2885 else if (EQ (prop, Qcharset_g2)) | |
| 2886 return coding_system_charset (coding_system, 2); | |
| 2887 else if (EQ (prop, Qcharset_g3)) | |
| 2888 return coding_system_charset (coding_system, 3); | |
| 2889 | |
| 2890 #define FORCE_CHARSET(charset_num) \ | |
| 2891 (XCODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT \ | |
| 2892 (coding_system, charset_num) ? Qt : Qnil) | |
| 2893 | |
| 2894 else if (EQ (prop, Qforce_g0_on_output)) | |
| 2895 return FORCE_CHARSET (0); | |
| 2896 else if (EQ (prop, Qforce_g1_on_output)) | |
| 2897 return FORCE_CHARSET (1); | |
| 2898 else if (EQ (prop, Qforce_g2_on_output)) | |
| 2899 return FORCE_CHARSET (2); | |
| 2900 else if (EQ (prop, Qforce_g3_on_output)) | |
| 2901 return FORCE_CHARSET (3); | |
| 2902 | |
| 2903 #define LISP_BOOLEAN(prop) \ | |
| 2904 (XCODING_SYSTEM_ISO2022_##prop (coding_system) ? Qt : Qnil) | |
| 2905 | |
| 2906 else if (EQ (prop, Qshort)) return LISP_BOOLEAN (SHORT); | |
| 2907 else if (EQ (prop, Qno_ascii_eol)) return LISP_BOOLEAN (NO_ASCII_EOL); | |
| 2908 else if (EQ (prop, Qno_ascii_cntl)) return LISP_BOOLEAN (NO_ASCII_CNTL); | |
| 2909 else if (EQ (prop, Qseven)) return LISP_BOOLEAN (SEVEN); | |
| 2910 else if (EQ (prop, Qlock_shift)) return LISP_BOOLEAN (LOCK_SHIFT); | |
| 2911 else if (EQ (prop, Qno_iso6429)) return LISP_BOOLEAN (NO_ISO6429); | |
| 2912 else if (EQ (prop, Qescape_quoted)) return LISP_BOOLEAN (ESCAPE_QUOTED); | |
| 2913 | |
| 2914 else if (EQ (prop, Qinput_charset_conversion)) | |
| 2915 return | |
| 2916 unparse_charset_conversion_specs | |
| 2917 (XCODING_SYSTEM_ISO2022_INPUT_CONV (coding_system), 0); | |
| 2918 else if (EQ (prop, Qoutput_charset_conversion)) | |
| 2919 return | |
| 2920 unparse_charset_conversion_specs | |
| 2921 (XCODING_SYSTEM_ISO2022_OUTPUT_CONV (coding_system), 0); | |
| 2922 else | |
| 2923 return Qunbound; | |
| 2924 } | |
| 2925 | |
| 2926 static void | |
| 2286 | 2927 iso2022_print (Lisp_Object cs, Lisp_Object printcharfun, |
| 2928 int UNUSED (escapeflag)) | |
| 771 | 2929 { |
| 2930 int i; | |
| 2931 | |
|
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4745
diff
changeset
|
2932 write_ascstring (printcharfun, "("); |
| 771 | 2933 for (i = 0; i < 4; i++) |
| 2934 { | |
| 2935 Lisp_Object charset = coding_system_charset (cs, i); | |
| 2936 if (i > 0) | |
|
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4745
diff
changeset
|
2937 write_ascstring (printcharfun, ", "); |
| 771 | 2938 write_fmt_string (printcharfun, "g%d=", i); |
| 800 | 2939 print_internal (CHARSETP (charset) ? XCHARSET_NAME (charset) : charset, printcharfun, 0); |
| 771 | 2940 if (XCODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT (cs, i)) |
|
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4745
diff
changeset
|
2941 write_ascstring (printcharfun, "(force)"); |
| 771 | 2942 } |
| 2943 | |
| 3084 | 2944 #define FROB(prop) \ |
| 2945 if (!NILP (iso2022_getprop (cs, prop))) \ | |
| 2946 { \ | |
| 2947 write_fmt_string_lisp (printcharfun, ", %s", 1, prop); \ | |
| 771 | 2948 } |
| 2949 | |
| 2950 FROB (Qshort); | |
| 2951 FROB (Qno_ascii_eol); | |
| 2952 FROB (Qno_ascii_cntl); | |
| 2953 FROB (Qseven); | |
| 2954 FROB (Qlock_shift); | |
| 2955 FROB (Qno_iso6429); | |
| 2956 FROB (Qescape_quoted); | |
| 2957 | |
| 2958 { | |
| 2959 Lisp_Object val = | |
| 2960 unparse_charset_conversion_specs | |
| 2961 (XCODING_SYSTEM_ISO2022_INPUT_CONV (cs), 1); | |
| 2962 if (!NILP (val)) | |
| 2963 { | |
| 800 | 2964 write_fmt_string_lisp (printcharfun, ", input-charset-conversion=%s", 1, val); |
| 771 | 2965 } |
| 2966 val = | |
| 2967 unparse_charset_conversion_specs | |
| 2968 (XCODING_SYSTEM_ISO2022_OUTPUT_CONV (cs), 1); | |
| 2969 if (!NILP (val)) | |
| 2970 { | |
| 800 | 2971 write_fmt_string_lisp (printcharfun, ", output-charset-conversion=%s", 1, val); |
| 771 | 2972 } |
|
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4745
diff
changeset
|
2973 write_ascstring (printcharfun, ")"); |
| 771 | 2974 } |
| 2975 } | |
| 2976 | |
| 2977 | |
| 2978 /************************************************************************/ | |
| 2979 /* ISO2022 detector */ | |
| 2980 /************************************************************************/ | |
| 2981 | |
| 2982 DEFINE_DETECTOR (iso2022); | |
| 2983 /* ISO2022 system using only seven-bit bytes, no locking shift */ | |
| 2984 DEFINE_DETECTOR_CATEGORY (iso2022, iso_7); | |
| 2985 /* ISO2022 system using eight-bit bytes, no locking shift, no single shift, | |
| 2986 using designation to switch charsets */ | |
| 2987 DEFINE_DETECTOR_CATEGORY (iso2022, iso_8_designate); | |
| 2988 /* ISO2022 system using eight-bit bytes, no locking shift, no designation | |
| 2989 sequences, one-dimension characters in the upper half. */ | |
| 2990 DEFINE_DETECTOR_CATEGORY (iso2022, iso_8_1); | |
| 2991 /* ISO2022 system using eight-bit bytes, no locking shift, no designation | |
| 2992 sequences, two-dimension characters in the upper half. */ | |
| 2993 DEFINE_DETECTOR_CATEGORY (iso2022, iso_8_2); | |
| 2994 /* ISO2022 system using locking shift */ | |
| 2995 DEFINE_DETECTOR_CATEGORY (iso2022, iso_lock_shift); | |
| 2996 | |
| 2997 struct iso2022_detector | |
| 2998 { | |
| 2999 int initted; | |
| 3000 struct iso2022_coding_stream *iso; | |
| 3001 unsigned int flags; | |
| 3002 | |
| 3003 /* for keeping temporary track of high-byte groups */ | |
| 3004 int high_byte_count; | |
| 3005 unsigned int saw_single_shift_just_now:1; | |
| 3006 | |
| 3007 /* running state; we set the likelihoods at the end */ | |
| 3008 unsigned int seen_high_byte:1; | |
| 3009 unsigned int seen_single_shift:1; | |
| 3010 unsigned int seen_locking_shift:1; | |
| 3011 unsigned int seen_designate:1; | |
| 3012 unsigned int bad_single_byte_sequences; | |
| 3013 unsigned int bad_multibyte_escape_sequences; | |
| 3014 unsigned int good_multibyte_escape_sequences; | |
| 3015 int even_high_byte_groups; | |
| 985 | 3016 int longest_even_high_byte; |
| 771 | 3017 int odd_high_byte_groups; |
| 3018 }; | |
| 3019 | |
| 3020 static void | |
| 3021 iso2022_detect (struct detection_state *st, const UExtbyte *src, | |
| 3022 Bytecount n) | |
| 3023 { | |
| 3024 Bytecount orign = n; | |
| 3025 struct iso2022_detector *data = DETECTION_STATE_DATA (st, iso2022); | |
| 3026 | |
| 3027 /* #### There are serious deficiencies in the recognition mechanism | |
| 3028 here. This needs to be much smarter if it's going to cut it. | |
| 3029 The sequence "\xff\x0f" is currently detected as LOCK_SHIFT while | |
| 3030 it should be detected as Latin-1. | |
| 3031 All the ISO2022 stuff in this file should be synced up with the | |
| 3032 code from FSF Emacs-21.0, in which Mule should be more or less stable. | |
| 3033 Perhaps we should wait till R2L works in FSF Emacs? */ | |
| 3034 | |
| 3035 /* We keep track of running state on our own, and set the categories at the | |
| 3036 end; that way we can reflect the correct state each time we finish, but | |
| 3037 not get confused by those results the next time around. */ | |
| 3038 | |
| 3039 if (!data->initted) | |
| 3040 { | |
| 3041 xzero (*data); | |
| 3042 data->iso = xnew_and_zero (struct iso2022_coding_stream); | |
| 3043 reset_iso2022_decode (Qnil, data->iso); | |
| 3044 data->initted = 1; | |
| 3045 } | |
| 3046 | |
| 3047 while (n--) | |
| 3048 { | |
| 3049 UExtbyte c = *src++; | |
| 3050 if (c >= 0x80) | |
| 3051 data->seen_high_byte = 1; | |
| 3052 if (c >= 0xA0) | |
| 3053 data->high_byte_count++; | |
| 3054 else | |
| 3055 { | |
| 3056 if (data->high_byte_count && | |
| 3057 !data->saw_single_shift_just_now) | |
| 3058 { | |
| 3059 if (data->high_byte_count & 1) | |
| 3060 data->odd_high_byte_groups++; | |
| 3061 else | |
| 985 | 3062 { |
| 3063 data->even_high_byte_groups++; | |
| 3064 if (data->longest_even_high_byte < data->high_byte_count) | |
| 3065 data->longest_even_high_byte = data->high_byte_count; | |
| 3066 } | |
| 771 | 3067 } |
| 3068 data->high_byte_count = 0; | |
| 3069 data->saw_single_shift_just_now = 0; | |
| 3070 } | |
| 3071 if (!(data->flags & ISO_STATE_ESCAPE) | |
| 826 | 3072 && (byte_c0_p (c) || byte_c1_p (c))) |
| 771 | 3073 { /* control chars */ |
| 3074 switch (c) | |
| 3075 { | |
| 3076 /* Allow and ignore control characters that you might | |
| 3077 reasonably see in a text file */ | |
| 3078 case '\r': | |
| 3079 case '\n': | |
| 3080 case '\t': | |
| 3081 case 7: /* bell */ | |
| 3082 case 8: /* backspace */ | |
| 3083 case 11: /* vertical tab */ | |
| 3084 case 12: /* form feed */ | |
| 3085 case 26: /* MS-DOS C-z junk */ | |
| 3086 case 31: /* '^_' -- for info */ | |
| 3087 goto label_continue_loop; | |
| 3088 | |
| 3089 default: | |
| 3090 break; | |
| 3091 } | |
| 3092 } | |
| 3093 | |
| 826 | 3094 if ((data->flags & ISO_STATE_ESCAPE) || byte_c0_p (c) |
| 3095 || byte_c1_p (c)) | |
| 771 | 3096 { |
| 3097 switch (parse_iso2022_esc (Qnil, data->iso, c, | |
| 3098 &data->flags, 0)) | |
| 3099 { | |
| 3100 case 1: /* done */ | |
| 3101 if (data->iso->esc_bytes_index > 0) | |
| 3102 data->good_multibyte_escape_sequences++; | |
| 3103 switch (data->iso->esc) | |
| 3104 { | |
| 3105 case ISO_ESC_DESIGNATE: | |
| 3106 data->seen_designate = 1; | |
| 3107 break; | |
| 3108 case ISO_ESC_LOCKING_SHIFT: | |
| 3109 data->seen_locking_shift = 1; | |
| 3110 break; | |
| 3111 case ISO_ESC_SINGLE_SHIFT: | |
| 3112 data->saw_single_shift_just_now = 1; | |
| 3113 data->seen_single_shift = 1; | |
| 3114 break; | |
| 3115 default: | |
| 3116 break; | |
| 3117 } | |
| 3118 break; | |
| 3119 | |
| 3120 case -1: /* not done */ | |
| 3121 break; | |
| 3122 | |
| 3123 case 0: /* error */ | |
| 3124 if (data->iso->esc == ISO_ESC_NOTHING) | |
| 3125 data->bad_single_byte_sequences++; | |
| 3126 else | |
| 3127 data->bad_multibyte_escape_sequences++; | |
| 3128 } | |
| 3129 } | |
| 3130 label_continue_loop:; | |
| 3131 } | |
| 3132 | |
| 985 | 3133 if (data->high_byte_count && |
| 3134 !data->saw_single_shift_just_now) | |
| 3135 { | |
| 3136 if (data->high_byte_count & 1) | |
| 3137 data->odd_high_byte_groups++; | |
| 3138 else | |
| 3139 { | |
| 3140 data->even_high_byte_groups++; | |
| 3141 if (data->longest_even_high_byte < data->high_byte_count) | |
| 3142 data->longest_even_high_byte = data->high_byte_count; | |
| 3143 } | |
| 3144 } | |
| 3145 | |
| 771 | 3146 if (data->bad_multibyte_escape_sequences > 2 || |
| 3147 (data->bad_multibyte_escape_sequences > 0 && | |
| 3148 data->good_multibyte_escape_sequences / | |
| 3149 data->bad_multibyte_escape_sequences < 10)) | |
| 3150 /* Just making it up ... */ | |
| 3151 SET_DET_RESULTS (st, iso2022, DET_NEARLY_IMPOSSIBLE); | |
| 3152 else if (data->bad_single_byte_sequences > 5 || | |
| 3153 (data->bad_single_byte_sequences > 0 && | |
| 3154 (data->good_multibyte_escape_sequences + | |
| 3155 data->even_high_byte_groups + | |
| 3156 data->odd_high_byte_groups) / | |
| 3157 data->bad_single_byte_sequences < 10)) | |
| 3158 SET_DET_RESULTS (st, iso2022, DET_SOMEWHAT_UNLIKELY); | |
| 3159 else if (data->seen_locking_shift) | |
| 3160 { | |
| 3161 SET_DET_RESULTS (st, iso2022, DET_QUITE_IMPROBABLE); | |
| 3162 DET_RESULT (st, iso_lock_shift) = DET_QUITE_PROBABLE; | |
| 3163 } | |
| 3164 else if (!data->seen_high_byte) | |
| 3165 { | |
| 3166 SET_DET_RESULTS (st, iso2022, DET_SOMEWHAT_UNLIKELY); | |
| 3167 if (data->good_multibyte_escape_sequences) | |
| 3168 DET_RESULT (st, iso_7) = DET_QUITE_PROBABLE; | |
| 3169 else if (data->seen_single_shift) | |
| 3170 DET_RESULT (st, iso_7) = DET_SOMEWHAT_LIKELY; | |
| 3171 else | |
| 3172 { | |
| 3173 /* If we've just seen pure 7-bit data, no escape sequences, | |
| 3174 then we can't give much likelihood; but if we've seen enough | |
| 3175 of this data, we can assume some unlikelihood of any 8-bit | |
| 3176 encoding */ | |
| 3177 if (orign + st->bytes_seen >= 1000) | |
| 3178 DET_RESULT (st, iso_7) = DET_AS_LIKELY_AS_UNLIKELY; | |
| 3179 else | |
| 3180 SET_DET_RESULTS (st, iso2022, DET_AS_LIKELY_AS_UNLIKELY); | |
| 3181 } | |
| 3182 } | |
| 3183 else if (data->seen_designate) | |
| 3184 { | |
| 3185 SET_DET_RESULTS (st, iso2022, DET_QUITE_IMPROBABLE); | |
| 3186 if (data->seen_single_shift) | |
| 3187 /* #### Does this really make sense? */ | |
| 3188 DET_RESULT (st, iso_8_designate) = DET_SOMEWHAT_UNLIKELY; | |
| 3189 else | |
| 3190 DET_RESULT (st, iso_8_designate) = DET_QUITE_PROBABLE; | |
| 3191 } | |
| 3192 else if (data->odd_high_byte_groups > 0 && | |
| 3193 data->even_high_byte_groups == 0) | |
| 3194 { | |
| 3195 SET_DET_RESULTS (st, iso2022, DET_SOMEWHAT_UNLIKELY); | |
| 3196 if (data->seen_single_shift) | |
| 3197 DET_RESULT (st, iso_8_1) = DET_QUITE_PROBABLE; | |
| 3198 else | |
| 3199 DET_RESULT (st, iso_8_1) = DET_SOMEWHAT_LIKELY; | |
| 3200 } | |
| 3201 else if (data->odd_high_byte_groups == 0 && | |
| 3202 data->even_high_byte_groups > 0) | |
| 3203 { | |
| 985 | 3204 #if 0 |
| 771 | 3205 SET_DET_RESULTS (st, iso2022, DET_SOMEWHAT_UNLIKELY); |
| 3206 if (data->even_high_byte_groups > 10) | |
| 3207 { | |
| 3208 if (data->seen_single_shift) | |
| 3209 DET_RESULT (st, iso_8_2) = DET_QUITE_PROBABLE; | |
| 3210 else | |
| 3211 DET_RESULT (st, iso_8_2) = DET_SOMEWHAT_LIKELY; | |
| 3212 if (data->even_high_byte_groups < 50) | |
| 3213 DET_RESULT (st, iso_8_1) = DET_SOMEWHAT_UNLIKELY; | |
| 3214 /* else it stays at quite improbable */ | |
| 3215 } | |
| 985 | 3216 #else |
| 3217 SET_DET_RESULTS (st, iso2022, DET_SOMEWHAT_UNLIKELY); | |
| 3218 if (data->seen_single_shift) | |
| 3219 DET_RESULT (st, iso_8_2) = DET_QUITE_PROBABLE; | |
| 3220 else if (data->even_high_byte_groups > 10) | |
| 3221 DET_RESULT (st, iso_8_2) = DET_SOMEWHAT_LIKELY; | |
| 3222 else if (data->longest_even_high_byte > 6) | |
| 3223 DET_RESULT (st, iso_8_2) = DET_SLIGHTLY_LIKELY; | |
| 3224 #endif | |
| 771 | 3225 } |
| 3226 else if (data->odd_high_byte_groups > 0 && | |
| 3227 data->even_high_byte_groups > 0) | |
| 3393 | 3228 { |
| 3229 /* Well, this could be a Latin-1 text, with most high-byte | |
| 3230 characters single, but sometimes two are together, though | |
| 3231 this happens not as often. This is common for Western | |
| 3232 European languages like German, French, Danish, Swedish, etc. | |
| 3233 Then we would either have a rather small file and | |
| 3234 even_high_byte_groups would be low. | |
| 3235 Or we would have a larger file and the ratio of odd to even | |
| 3236 groups would be very high. */ | |
| 3237 SET_DET_RESULTS (st, iso2022, DET_SOMEWHAT_UNLIKELY); | |
| 3238 if (data->even_high_byte_groups <= 3 || | |
| 3239 data->odd_high_byte_groups >= 10 * data->even_high_byte_groups) | |
| 3240 DET_RESULT (st, iso_8_1) = DET_SOMEWHAT_LIKELY; | |
| 3241 } | |
| 771 | 3242 else |
| 3243 SET_DET_RESULTS (st, iso2022, DET_AS_LIKELY_AS_UNLIKELY); | |
| 3244 } | |
| 3245 | |
| 3246 static void | |
| 3247 iso2022_finalize_detection_state (struct detection_state *st) | |
| 3248 { | |
| 3249 struct iso2022_detector *data = DETECTION_STATE_DATA (st, iso2022); | |
| 3250 if (data->iso) | |
|
5169
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5100
diff
changeset
|
3251 { |
|
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5100
diff
changeset
|
3252 xfree (data->iso); |
|
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5100
diff
changeset
|
3253 data->iso = 0; |
|
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5100
diff
changeset
|
3254 } |
| 771 | 3255 } |
| 3256 | |
| 3257 | |
| 3258 /************************************************************************/ | |
| 3259 /* CCL methods */ | |
| 3260 /************************************************************************/ | |
| 3261 | |
| 3262 /* Converter written in CCL. */ | |
| 3263 | |
| 3264 struct ccl_coding_system | |
| 3265 { | |
| 3266 /* For a CCL coding system, these specify the CCL programs used for | |
| 3267 decoding (input) and encoding (output). */ | |
| 3268 Lisp_Object decode; | |
| 3269 Lisp_Object encode; | |
| 3270 }; | |
| 3271 | |
| 3272 #define CODING_SYSTEM_CCL_DECODE(codesys) \ | |
| 3273 (CODING_SYSTEM_TYPE_DATA (codesys, ccl)->decode) | |
| 3274 #define CODING_SYSTEM_CCL_ENCODE(codesys) \ | |
| 3275 (CODING_SYSTEM_TYPE_DATA (codesys, ccl)->encode) | |
| 3276 #define XCODING_SYSTEM_CCL_DECODE(codesys) \ | |
| 3277 CODING_SYSTEM_CCL_DECODE (XCODING_SYSTEM (codesys)) | |
| 3278 #define XCODING_SYSTEM_CCL_ENCODE(codesys) \ | |
| 3279 CODING_SYSTEM_CCL_ENCODE (XCODING_SYSTEM (codesys)) | |
| 3280 | |
| 3281 struct ccl_coding_stream | |
| 3282 { | |
| 3283 /* state of the running CCL program */ | |
| 3284 struct ccl_program ccl; | |
| 3285 }; | |
| 3286 | |
| 1204 | 3287 static const struct memory_description ccl_coding_system_description[] = { |
| 3288 { XD_LISP_OBJECT, offsetof (struct ccl_coding_system, decode) }, | |
| 3289 { XD_LISP_OBJECT, offsetof (struct ccl_coding_system, encode) }, | |
| 771 | 3290 { XD_END } |
| 3291 }; | |
| 3292 | |
| 1204 | 3293 DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (ccl); |
| 3294 | |
| 771 | 3295 static void |
| 3296 ccl_mark (Lisp_Object codesys) | |
| 3297 { | |
| 3298 mark_object (XCODING_SYSTEM_CCL_DECODE (codesys)); | |
| 3299 mark_object (XCODING_SYSTEM_CCL_ENCODE (codesys)); | |
| 3300 } | |
| 3301 | |
| 3302 static Bytecount | |
| 3303 ccl_convert (struct coding_stream *str, const UExtbyte *src, | |
| 3304 unsigned_char_dynarr *dst, Bytecount n) | |
| 3305 { | |
| 3306 struct ccl_coding_stream *data = | |
| 3307 CODING_STREAM_TYPE_DATA (str, ccl); | |
| 3308 Bytecount orign = n; | |
| 3309 | |
| 3310 data->ccl.last_block = str->eof; | |
| 3311 /* When applying a CCL program to a stream, SRC must not be NULL -- this | |
| 3312 is a special signal to the driver that read and write operations are | |
| 3313 not allowed. The code does not actually look at what SRC points to if | |
| 3314 N == 0. | |
| 3315 */ | |
| 3316 ccl_driver (&data->ccl, src ? src : (const unsigned char *) "", | |
| 3317 dst, n, 0, | |
| 3318 str->direction == CODING_DECODE ? CCL_MODE_DECODING : | |
| 3319 CCL_MODE_ENCODING); | |
| 3320 return orign; | |
| 3321 } | |
| 3322 | |
| 3323 static void | |
| 3324 ccl_init_coding_stream (struct coding_stream *str) | |
| 3325 { | |
| 3326 struct ccl_coding_stream *data = | |
| 3327 CODING_STREAM_TYPE_DATA (str, ccl); | |
| 3328 | |
| 3329 setup_ccl_program (&data->ccl, | |
| 3330 str->direction == CODING_DECODE ? | |
| 3331 XCODING_SYSTEM_CCL_DECODE (str->codesys) : | |
| 3332 XCODING_SYSTEM_CCL_ENCODE (str->codesys)); | |
| 3333 } | |
| 3334 | |
| 3335 static void | |
| 3336 ccl_rewind_coding_stream (struct coding_stream *str) | |
| 3337 { | |
| 3338 ccl_init_coding_stream (str); | |
| 3339 } | |
| 3340 | |
| 3341 static void | |
| 3342 ccl_init (Lisp_Object codesys) | |
| 3343 { | |
| 3344 XCODING_SYSTEM_CCL_DECODE (codesys) = Qnil; | |
| 3345 XCODING_SYSTEM_CCL_ENCODE (codesys) = Qnil; | |
| 3346 } | |
| 3347 | |
| 3348 static int | |
| 3349 ccl_putprop (Lisp_Object codesys, Lisp_Object key, Lisp_Object value) | |
| 3350 { | |
| 3351 if (EQ (key, Qdecode)) | |
|
4745
0c54de4c4b9d
Resolve the unregistered-CCL-programs-get-garbage-collected problem correctly
Aidan Kehoe <kehoea@parhasard.net>
parents:
4703
diff
changeset
|
3352 XCODING_SYSTEM_CCL_DECODE (codesys) = get_ccl_program (value); |
| 771 | 3353 else if (EQ (key, Qencode)) |
|
4745
0c54de4c4b9d
Resolve the unregistered-CCL-programs-get-garbage-collected problem correctly
Aidan Kehoe <kehoea@parhasard.net>
parents:
4703
diff
changeset
|
3354 XCODING_SYSTEM_CCL_ENCODE (codesys) = get_ccl_program (value); |
| 771 | 3355 return 1; |
| 3356 } | |
| 3357 | |
| 3358 static Lisp_Object | |
| 3359 ccl_getprop (Lisp_Object coding_system, Lisp_Object prop) | |
| 3360 { | |
| 3361 if (EQ (prop, Qdecode)) | |
| 3362 return XCODING_SYSTEM_CCL_DECODE (coding_system); | |
| 3363 else if (EQ (prop, Qencode)) | |
| 3364 return XCODING_SYSTEM_CCL_ENCODE (coding_system); | |
| 3365 else | |
| 3366 return Qunbound; | |
| 3367 } | |
|
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3368 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3369 /************************************************************************/ |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3370 /* FIXED_WIDTH methods */ |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3371 /************************************************************************/ |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3372 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3373 struct fixed_width_coding_system |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3374 { |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3375 /* For a fixed_width coding system, these specify the CCL programs |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3376 used for decoding (input) and encoding (output). */ |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3377 Lisp_Object decode; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3378 Lisp_Object encode; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3379 Lisp_Object from_unicode; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3380 Lisp_Object invalid_sequences_skip_chars; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3381 Lisp_Object query_skip_chars; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3382 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3383 /* This is not directly accessible from Lisp; it is a concatenation of the |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3384 previous two strings, used for simplicity of implementation. */ |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3385 Lisp_Object invalid_and_query_skip_chars; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3386 }; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3387 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3388 #define CODING_SYSTEM_FIXED_WIDTH_DECODE(codesys) \ |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3389 (CODING_SYSTEM_TYPE_DATA (codesys, fixed_width)->decode) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3390 #define CODING_SYSTEM_FIXED_WIDTH_ENCODE(codesys) \ |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3391 (CODING_SYSTEM_TYPE_DATA (codesys, fixed_width)->encode) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3392 #define CODING_SYSTEM_FIXED_WIDTH_FROM_UNICODE(codesys) \ |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3393 (CODING_SYSTEM_TYPE_DATA (codesys, fixed_width)->from_unicode) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3394 #define CODING_SYSTEM_FIXED_WIDTH_INVALID_SEQUENCES_SKIP_CHARS(codesys) \ |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3395 (CODING_SYSTEM_TYPE_DATA (codesys, \ |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3396 fixed_width)->invalid_sequences_skip_chars) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3397 #define CODING_SYSTEM_FIXED_WIDTH_QUERY_SKIP_CHARS(codesys) \ |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3398 (CODING_SYSTEM_TYPE_DATA (codesys, fixed_width)->query_skip_chars) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3399 #define CODING_SYSTEM_FIXED_WIDTH_INVALID_AND_QUERY_SKIP_CHARS(codesys) \ |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3400 (CODING_SYSTEM_TYPE_DATA (codesys, \ |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3401 fixed_width)->invalid_and_query_skip_chars) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3402 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3403 #define XCODING_SYSTEM_FIXED_WIDTH_DECODE(codesys) \ |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3404 CODING_SYSTEM_FIXED_WIDTH_DECODE (XCODING_SYSTEM (codesys)) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3405 #define XCODING_SYSTEM_FIXED_WIDTH_ENCODE(codesys) \ |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3406 CODING_SYSTEM_FIXED_WIDTH_ENCODE (XCODING_SYSTEM (codesys)) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3407 #define XCODING_SYSTEM_FIXED_WIDTH_FROM_UNICODE(codesys) \ |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3408 (CODING_SYSTEM_FIXED_WIDTH_FROM_UNICODE (XCODING_SYSTEM (codesys))) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3409 #define XCODING_SYSTEM_FIXED_WIDTH_INVALID_SEQUENCES_SKIP_CHARS(codesys) \ |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3410 (CODING_SYSTEM_FIXED_WIDTH_INVALID_SEQUENCES_SKIP_CHARS \ |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3411 (XCODING_SYSTEM (codesys))) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3412 #define XCODING_SYSTEM_FIXED_WIDTH_QUERY_SKIP_CHARS(codesys) \ |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3413 (CODING_SYSTEM_FIXED_WIDTH_QUERY_SKIP_CHARS (XCODING_SYSTEM (codesys))) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3414 #define XCODING_SYSTEM_FIXED_WIDTH_INVALID_AND_QUERY_SKIP_CHARS(codesys) \ |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3415 (CODING_SYSTEM_FIXED_WIDTH_INVALID_AND_QUERY_SKIP_CHARS \ |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3416 (XCODING_SYSTEM(codesys))) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3417 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3418 struct fixed_width_coding_stream |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3419 { |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3420 /* state of the running CCL program */ |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3421 struct ccl_program ccl; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3422 }; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3423 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3424 static const struct memory_description |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3425 fixed_width_coding_system_description[] = { |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3426 { XD_LISP_OBJECT, offsetof (struct fixed_width_coding_system, decode) }, |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3427 { XD_LISP_OBJECT, offsetof (struct fixed_width_coding_system, encode) }, |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3428 { XD_LISP_OBJECT, offsetof (struct fixed_width_coding_system, |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3429 from_unicode) }, |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3430 { XD_LISP_OBJECT, offsetof (struct fixed_width_coding_system, |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3431 invalid_sequences_skip_chars) }, |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3432 { XD_LISP_OBJECT, offsetof (struct fixed_width_coding_system, |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3433 query_skip_chars) }, |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3434 { XD_LISP_OBJECT, offsetof (struct fixed_width_coding_system, |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3435 invalid_and_query_skip_chars) }, |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3436 { XD_END } |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3437 }; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3438 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3439 DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (fixed_width); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3440 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3441 static void |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3442 fixed_width_mark (Lisp_Object codesys) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3443 { |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3444 mark_object (XCODING_SYSTEM_FIXED_WIDTH_DECODE (codesys)); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3445 mark_object (XCODING_SYSTEM_FIXED_WIDTH_ENCODE (codesys)); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3446 mark_object (XCODING_SYSTEM_FIXED_WIDTH_FROM_UNICODE (codesys)); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3447 mark_object |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3448 (XCODING_SYSTEM_FIXED_WIDTH_INVALID_SEQUENCES_SKIP_CHARS (codesys)); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3449 mark_object (XCODING_SYSTEM_FIXED_WIDTH_QUERY_SKIP_CHARS (codesys) ); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3450 mark_object |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3451 (XCODING_SYSTEM_FIXED_WIDTH_INVALID_AND_QUERY_SKIP_CHARS(codesys)); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3452 } |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3453 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3454 static Bytecount |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3455 fixed_width_convert (struct coding_stream *str, const UExtbyte *src, |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3456 unsigned_char_dynarr *dst, Bytecount n) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3457 { |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3458 struct fixed_width_coding_stream *data = |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3459 CODING_STREAM_TYPE_DATA (str, fixed_width); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3460 Bytecount orign = n; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3461 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3462 data->ccl.last_block = str->eof; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3463 /* When applying a CCL program to a stream, SRC must not be NULL -- this |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3464 is a special signal to the driver that read and write operations are |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3465 not allowed. The code does not actually look at what SRC points to if |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3466 N == 0. */ |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3467 ccl_driver (&data->ccl, src ? src : (const unsigned char *) "", |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3468 dst, n, 0, |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3469 str->direction == CODING_DECODE ? CCL_MODE_DECODING : |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3470 CCL_MODE_ENCODING); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3471 return orign; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3472 } |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3473 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3474 static void |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3475 fixed_width_init_coding_stream (struct coding_stream *str) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3476 { |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3477 struct fixed_width_coding_stream *data = |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3478 CODING_STREAM_TYPE_DATA (str, fixed_width); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3479 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3480 setup_ccl_program (&data->ccl, |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3481 str->direction == CODING_DECODE ? |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3482 XCODING_SYSTEM_FIXED_WIDTH_DECODE (str->codesys) : |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3483 XCODING_SYSTEM_FIXED_WIDTH_ENCODE (str->codesys)); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3484 } |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3485 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3486 static void |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3487 fixed_width_rewind_coding_stream (struct coding_stream *str) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3488 { |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3489 fixed_width_init_coding_stream (str); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3490 } |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3491 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3492 static void |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3493 fixed_width_init (Lisp_Object codesys) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3494 { |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3495 XCODING_SYSTEM_FIXED_WIDTH_DECODE (codesys) = Qnil; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3496 XCODING_SYSTEM_FIXED_WIDTH_ENCODE (codesys) = Qnil; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3497 XCODING_SYSTEM_FIXED_WIDTH_FROM_UNICODE (codesys) = Qnil; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3498 XCODING_SYSTEM_FIXED_WIDTH_INVALID_SEQUENCES_SKIP_CHARS (codesys) = Qnil; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3499 XCODING_SYSTEM_FIXED_WIDTH_QUERY_SKIP_CHARS (codesys) = Qnil; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3500 XCODING_SYSTEM_FIXED_WIDTH_INVALID_AND_QUERY_SKIP_CHARS(codesys) = Qnil; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3501 } |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3502 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3503 static int |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3504 fixed_width_putprop (Lisp_Object codesys, Lisp_Object key, |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3505 Lisp_Object value) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3506 { |
|
4745
0c54de4c4b9d
Resolve the unregistered-CCL-programs-get-garbage-collected problem correctly
Aidan Kehoe <kehoea@parhasard.net>
parents:
4703
diff
changeset
|
3507 if (EQ (key, Qdecode)) |
|
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3508 { |
|
4745
0c54de4c4b9d
Resolve the unregistered-CCL-programs-get-garbage-collected problem correctly
Aidan Kehoe <kehoea@parhasard.net>
parents:
4703
diff
changeset
|
3509 XCODING_SYSTEM_FIXED_WIDTH_DECODE (codesys) = get_ccl_program (value); |
|
0c54de4c4b9d
Resolve the unregistered-CCL-programs-get-garbage-collected problem correctly
Aidan Kehoe <kehoea@parhasard.net>
parents:
4703
diff
changeset
|
3510 } |
|
0c54de4c4b9d
Resolve the unregistered-CCL-programs-get-garbage-collected problem correctly
Aidan Kehoe <kehoea@parhasard.net>
parents:
4703
diff
changeset
|
3511 else if (EQ (key, Qencode)) |
|
0c54de4c4b9d
Resolve the unregistered-CCL-programs-get-garbage-collected problem correctly
Aidan Kehoe <kehoea@parhasard.net>
parents:
4703
diff
changeset
|
3512 { |
|
0c54de4c4b9d
Resolve the unregistered-CCL-programs-get-garbage-collected problem correctly
Aidan Kehoe <kehoea@parhasard.net>
parents:
4703
diff
changeset
|
3513 XCODING_SYSTEM_FIXED_WIDTH_ENCODE (codesys) = get_ccl_program (value); |
|
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3514 } |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3515 else if (EQ (key, Qfrom_unicode)) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3516 { |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3517 CHECK_HASH_TABLE (value); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3518 XCODING_SYSTEM_FIXED_WIDTH_FROM_UNICODE (codesys) = value; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3519 } |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3520 else if (EQ (key, Qinvalid_sequences_skip_chars)) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3521 { |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3522 CHECK_STRING (value); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3523 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3524 /* Make sure Lisp can't make our data inconsistent: */ |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3525 value = Fcopy_sequence (value); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3526 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3527 XCODING_SYSTEM_FIXED_WIDTH_INVALID_SEQUENCES_SKIP_CHARS (codesys) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3528 = value; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3529 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3530 XCODING_SYSTEM_FIXED_WIDTH_INVALID_AND_QUERY_SKIP_CHARS (codesys) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3531 = concat2 (value, |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3532 XCODING_SYSTEM_FIXED_WIDTH_QUERY_SKIP_CHARS (codesys)); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3533 } |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3534 else if (EQ (key, Qquery_skip_chars)) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3535 { |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3536 CHECK_STRING (value); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3537 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3538 /* Make sure Lisp can't make our data inconsistent: */ |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3539 value = Fcopy_sequence (value); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3540 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3541 XCODING_SYSTEM_FIXED_WIDTH_QUERY_SKIP_CHARS (codesys) = value; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3542 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3543 XCODING_SYSTEM_FIXED_WIDTH_INVALID_AND_QUERY_SKIP_CHARS (codesys) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3544 = concat2 (value, |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3545 XCODING_SYSTEM_FIXED_WIDTH_INVALID_SEQUENCES_SKIP_CHARS |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3546 (codesys)); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3547 } |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3548 else |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3549 { |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3550 return 0; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3551 } |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3552 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3553 return 1; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3554 } |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3555 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3556 static Lisp_Object |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3557 fixed_width_getprop (Lisp_Object codesys, Lisp_Object prop) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3558 { |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3559 if (EQ (prop, Qdecode)) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3560 { |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3561 return XCODING_SYSTEM_FIXED_WIDTH_DECODE (codesys); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3562 } |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3563 else if (EQ (prop, Qencode)) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3564 { |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3565 return XCODING_SYSTEM_FIXED_WIDTH_ENCODE (codesys); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3566 } |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3567 else if (EQ (prop, Qfrom_unicode)) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3568 { |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3569 return XCODING_SYSTEM_FIXED_WIDTH_FROM_UNICODE (codesys); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3570 } |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3571 else if (EQ (prop, Qinvalid_sequences_skip_chars)) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3572 { |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3573 /* Make sure Lisp can't make our data inconsistent: */ |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3574 return |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3575 Fcopy_sequence |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3576 (XCODING_SYSTEM_FIXED_WIDTH_INVALID_SEQUENCES_SKIP_CHARS (codesys)); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3577 } |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3578 else if (EQ (prop, Qquery_skip_chars)) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3579 { |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3580 return |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3581 Fcopy_sequence (XCODING_SYSTEM_FIXED_WIDTH_QUERY_SKIP_CHARS (codesys)); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3582 } |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3583 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3584 return Qunbound; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3585 } |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3586 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3587 static Lisp_Object Vfixed_width_query_ranges_cache; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3588 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3589 static Lisp_Object |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3590 fixed_width_skip_chars_data_given_strings (Lisp_Object string, |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3591 Lisp_Object query_skip_chars, |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3592 Lisp_Object |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3593 invalid_sequences_skip_chars, |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3594 Binbyte *fastmap, |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3595 int fastmap_len) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3596 { |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3597 Lisp_Object result = Fgethash (string, |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3598 Vfixed_width_query_ranges_cache, |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3599 Qnil); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3600 REGISTER Ibyte *p, *pend; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3601 REGISTER Ichar c; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3602 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3603 memset (fastmap, query_coding_unencodable, fastmap_len); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3604 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3605 if (!NILP (result)) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3606 { |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3607 int i; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3608 Lisp_Object ranged; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3609 assert (RANGE_TABLEP (result)); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3610 for (i = 0; i < fastmap_len; ++i) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3611 { |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3612 ranged = Fget_range_table (make_int (i), result, Qnil); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3613 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3614 if (EQ (ranged, Qsucceeded)) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3615 { |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3616 fastmap [i] = query_coding_succeeded; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3617 } |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3618 else if (EQ (ranged, Qinvalid_sequence)) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3619 { |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3620 fastmap [i] = query_coding_invalid_sequence; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3621 } |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3622 } |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3623 return result; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3624 } |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3625 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3626 result = Fmake_range_table (Qstart_closed_end_closed); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3627 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3628 p = XSTRING_DATA (query_skip_chars); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3629 pend = p + XSTRING_LENGTH (query_skip_chars); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3630 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3631 while (p != pend) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3632 { |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3633 c = itext_ichar (p); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3634 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3635 INC_IBYTEPTR (p); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3636 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3637 if (c == '\\') |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3638 { |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3639 if (p == pend) break; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3640 c = itext_ichar (p); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3641 INC_IBYTEPTR (p); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3642 } |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3643 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3644 if (p != pend && *p == '-') |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3645 { |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3646 Ichar cend; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3647 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3648 /* Skip over the dash. */ |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3649 p++; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3650 if (p == pend) break; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3651 cend = itext_ichar (p); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3652 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3653 Fput_range_table (make_int (c), make_int (cend), Qsucceeded, |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3654 result); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3655 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3656 while (c <= cend && c < fastmap_len) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3657 { |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3658 fastmap[c] = query_coding_succeeded; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3659 c++; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3660 } |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3661 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3662 INC_IBYTEPTR (p); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3663 } |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3664 else |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3665 { |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3666 if (c < fastmap_len) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3667 fastmap[c] = query_coding_succeeded; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3668 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3669 Fput_range_table (make_int (c), make_int (c), Qsucceeded, result); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3670 } |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3671 } |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3672 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3673 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3674 p = XSTRING_DATA (invalid_sequences_skip_chars); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3675 pend = p + XSTRING_LENGTH (invalid_sequences_skip_chars); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3676 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3677 while (p != pend) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3678 { |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3679 c = itext_ichar (p); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3680 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3681 INC_IBYTEPTR (p); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3682 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3683 if (c == '\\') |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3684 { |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3685 if (p == pend) break; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3686 c = itext_ichar (p); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3687 INC_IBYTEPTR (p); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3688 } |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3689 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3690 if (p != pend && *p == '-') |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3691 { |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3692 Ichar cend; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3693 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3694 /* Skip over the dash. */ |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3695 p++; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3696 if (p == pend) break; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3697 cend = itext_ichar (p); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3698 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3699 Fput_range_table (make_int (c), make_int (cend), Qinvalid_sequence, |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3700 result); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3701 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3702 while (c <= cend && c < fastmap_len) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3703 { |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3704 fastmap[c] = query_coding_invalid_sequence; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3705 c++; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3706 } |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3707 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3708 INC_IBYTEPTR (p); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3709 } |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3710 else |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3711 { |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3712 if (c < fastmap_len) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3713 fastmap[c] = query_coding_invalid_sequence; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3714 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3715 Fput_range_table (make_int (c), make_int (c), Qinvalid_sequence, |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3716 result); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3717 } |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3718 } |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3719 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3720 Fputhash (string, result, Vfixed_width_query_ranges_cache); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3721 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3722 return result; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3723 } |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3724 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3725 static Lisp_Object |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3726 fixed_width_query (Lisp_Object codesys, struct buffer *buf, |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3727 Charbpos end, int flags) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3728 { |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3729 Charbpos pos = BUF_PT (buf), fail_range_start, fail_range_end; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3730 Charbpos pos_byte = BYTE_BUF_PT (buf); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3731 Lisp_Object skip_chars_range_table, from_unicode, checked_unicode, |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3732 result = Qnil; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3733 enum query_coding_failure_reasons failed_reason, |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3734 previous_failed_reason = query_coding_succeeded; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3735 Binbyte fastmap[0xff]; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3736 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3737 from_unicode = XCODING_SYSTEM_FIXED_WIDTH_FROM_UNICODE (codesys); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3738 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3739 skip_chars_range_table = |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3740 fixed_width_skip_chars_data_given_strings |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3741 ((flags & QUERY_METHOD_IGNORE_INVALID_SEQUENCES ? |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3742 XCODING_SYSTEM_FIXED_WIDTH_INVALID_AND_QUERY_SKIP_CHARS |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3743 (codesys) : |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3744 XCODING_SYSTEM_FIXED_WIDTH_QUERY_SKIP_CHARS(codesys)), |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3745 XCODING_SYSTEM_FIXED_WIDTH_QUERY_SKIP_CHARS(codesys), |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3746 (flags & QUERY_METHOD_IGNORE_INVALID_SEQUENCES ? |
|
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4745
diff
changeset
|
3747 build_ascstring("") : |
|
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3748 XCODING_SYSTEM_FIXED_WIDTH_INVALID_SEQUENCES_SKIP_CHARS (codesys)), |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3749 fastmap, (int)(sizeof (fastmap))); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3750 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3751 if (flags & QUERY_METHOD_HIGHLIGHT && |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3752 /* If we're being called really early, live without highlights getting |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3753 cleared properly: */ |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3754 !(UNBOUNDP (XSYMBOL (Qquery_coding_clear_highlights)->function))) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3755 { |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3756 /* It's okay to call Lisp here, the only non-stack object we may have |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3757 allocated up to this point is skip_chars_range_table, and that's |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3758 reachable from its entry in Vfixed_width_query_ranges_cache. */ |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3759 call3 (Qquery_coding_clear_highlights, make_int (pos), make_int (end), |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3760 wrap_buffer (buf)); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3761 } |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3762 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3763 while (pos < end) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3764 { |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3765 Ichar ch = BYTE_BUF_FETCH_CHAR (buf, pos_byte); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3766 if ((ch < (int) (sizeof(fastmap))) ? |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3767 (fastmap[ch] == query_coding_succeeded) : |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3768 (EQ (Qsucceeded, Fget_range_table (make_int (ch), |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3769 skip_chars_range_table, Qnil)))) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3770 { |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3771 pos++; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3772 INC_BYTEBPOS (buf, pos_byte); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3773 } |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3774 else |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3775 { |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3776 fail_range_start = pos; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3777 while ((pos < end) && |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3778 ((!(flags & QUERY_METHOD_IGNORE_INVALID_SEQUENCES) && |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3779 EQ (Qinvalid_sequence, Fget_range_table |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3780 (make_int (ch), skip_chars_range_table, Qnil)) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3781 && (failed_reason = query_coding_invalid_sequence)) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3782 || ((NILP ((checked_unicode = |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3783 Fgethash (Fchar_to_unicode (make_char (ch)), |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3784 from_unicode, Qnil)))) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3785 && (failed_reason = query_coding_unencodable))) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3786 && (previous_failed_reason == query_coding_succeeded |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3787 || previous_failed_reason == failed_reason)) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3788 { |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3789 pos++; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3790 INC_BYTEBPOS (buf, pos_byte); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3791 ch = BYTE_BUF_FETCH_CHAR (buf, pos_byte); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3792 previous_failed_reason = failed_reason; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3793 } |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3794 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3795 if (fail_range_start == pos) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3796 { |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3797 /* The character can actually be encoded; move on. */ |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3798 pos++; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3799 INC_BYTEBPOS (buf, pos_byte); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3800 } |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3801 else |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3802 { |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3803 assert (previous_failed_reason == query_coding_invalid_sequence |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3804 || previous_failed_reason == query_coding_unencodable); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3805 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3806 if (flags & QUERY_METHOD_ERRORP) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3807 { |
|
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4745
diff
changeset
|
3808 signal_error_2 |
|
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4745
diff
changeset
|
3809 (Qtext_conversion_error, |
|
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4745
diff
changeset
|
3810 "Cannot encode using coding system", |
|
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4745
diff
changeset
|
3811 make_string_from_buffer (buf, fail_range_start, |
|
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4745
diff
changeset
|
3812 pos - fail_range_start), |
|
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4745
diff
changeset
|
3813 XCODING_SYSTEM_NAME (codesys)); |
|
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3814 } |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3815 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3816 if (NILP (result)) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3817 { |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3818 result = Fmake_range_table (Qstart_closed_end_open); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3819 } |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3820 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3821 fail_range_end = pos; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3822 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3823 Fput_range_table (make_int (fail_range_start), |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3824 make_int (fail_range_end), |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3825 (previous_failed_reason |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3826 == query_coding_unencodable ? |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3827 Qunencodable : Qinvalid_sequence), |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3828 result); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3829 previous_failed_reason = query_coding_succeeded; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3830 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3831 if (flags & QUERY_METHOD_HIGHLIGHT) |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3832 { |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3833 Lisp_Object extent |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3834 = Fmake_extent (make_int (fail_range_start), |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3835 make_int (fail_range_end), |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3836 wrap_buffer (buf)); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3837 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3838 Fset_extent_priority |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3839 (extent, make_int (2 + mouse_highlight_priority)); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3840 Fset_extent_face (extent, Qquery_coding_warning_face); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3841 } |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3842 } |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3843 } |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3844 } |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3845 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3846 return result; |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3847 } |
| 771 | 3848 |
| 3849 | |
| 3850 /************************************************************************/ | |
| 3851 /* Initialization */ | |
| 3852 /************************************************************************/ | |
| 3853 | |
| 3854 void | |
| 3855 syms_of_mule_coding (void) | |
| 3856 { | |
| 3857 DEFSUBR (Fdecode_shift_jis_char); | |
| 3858 DEFSUBR (Fencode_shift_jis_char); | |
| 3859 DEFSUBR (Fdecode_big5_char); | |
| 3860 DEFSUBR (Fencode_big5_char); | |
| 3861 | |
| 3862 DEFSYMBOL (Qbig5); | |
| 3863 DEFSYMBOL (Qshift_jis); | |
| 3864 DEFSYMBOL (Qccl); | |
| 3865 DEFSYMBOL (Qiso2022); | |
| 3866 | |
| 3867 DEFSYMBOL (Qcharset_g0); | |
| 3868 DEFSYMBOL (Qcharset_g1); | |
| 3869 DEFSYMBOL (Qcharset_g2); | |
| 3870 DEFSYMBOL (Qcharset_g3); | |
| 3871 DEFSYMBOL (Qforce_g0_on_output); | |
| 3872 DEFSYMBOL (Qforce_g1_on_output); | |
| 3873 DEFSYMBOL (Qforce_g2_on_output); | |
| 3874 DEFSYMBOL (Qforce_g3_on_output); | |
| 3875 DEFSYMBOL (Qno_iso6429); | |
| 3876 DEFSYMBOL (Qinput_charset_conversion); | |
| 3877 DEFSYMBOL (Qoutput_charset_conversion); | |
| 3878 | |
| 3879 DEFSYMBOL (Qshort); | |
| 3880 DEFSYMBOL (Qno_ascii_eol); | |
| 3881 DEFSYMBOL (Qno_ascii_cntl); | |
| 3882 DEFSYMBOL (Qseven); | |
| 3883 DEFSYMBOL (Qlock_shift); | |
| 3884 | |
| 3885 DEFSYMBOL (Qiso_7); | |
| 3886 DEFSYMBOL (Qiso_8_designate); | |
| 3887 DEFSYMBOL (Qiso_8_1); | |
| 3888 DEFSYMBOL (Qiso_8_2); | |
| 3889 DEFSYMBOL (Qiso_lock_shift); | |
|
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3890 |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3891 DEFSYMBOL (Qinvalid_sequences_skip_chars); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3892 DEFSYMBOL (Qquery_skip_chars); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3893 DEFSYMBOL (Qfixed_width); |
| 771 | 3894 } |
| 3895 | |
| 3896 void | |
| 3897 coding_system_type_create_mule_coding (void) | |
| 3898 { | |
| 3899 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (iso2022, "iso2022-coding-system-p"); | |
| 3900 CODING_SYSTEM_HAS_METHOD (iso2022, mark); | |
| 3901 CODING_SYSTEM_HAS_METHOD (iso2022, convert); | |
| 3902 CODING_SYSTEM_HAS_METHOD (iso2022, finalize_coding_stream); | |
| 3903 CODING_SYSTEM_HAS_METHOD (iso2022, init_coding_stream); | |
| 3904 CODING_SYSTEM_HAS_METHOD (iso2022, rewind_coding_stream); | |
| 3905 CODING_SYSTEM_HAS_METHOD (iso2022, init); | |
| 3906 CODING_SYSTEM_HAS_METHOD (iso2022, print); | |
| 3907 CODING_SYSTEM_HAS_METHOD (iso2022, finalize); | |
| 3908 CODING_SYSTEM_HAS_METHOD (iso2022, putprop); | |
| 3909 CODING_SYSTEM_HAS_METHOD (iso2022, getprop); | |
| 3910 | |
| 3911 INITIALIZE_DETECTOR (iso2022); | |
| 3912 DETECTOR_HAS_METHOD (iso2022, detect); | |
| 3913 DETECTOR_HAS_METHOD (iso2022, finalize_detection_state); | |
| 3914 INITIALIZE_DETECTOR_CATEGORY (iso2022, iso_7); | |
| 3915 INITIALIZE_DETECTOR_CATEGORY (iso2022, iso_8_designate); | |
| 3916 INITIALIZE_DETECTOR_CATEGORY (iso2022, iso_8_1); | |
| 3917 INITIALIZE_DETECTOR_CATEGORY (iso2022, iso_8_2); | |
| 3918 INITIALIZE_DETECTOR_CATEGORY (iso2022, iso_lock_shift); | |
| 3919 | |
| 3920 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (ccl, "ccl-coding-system-p"); | |
| 3921 CODING_SYSTEM_HAS_METHOD (ccl, mark); | |
| 3922 CODING_SYSTEM_HAS_METHOD (ccl, convert); | |
| 3923 CODING_SYSTEM_HAS_METHOD (ccl, init); | |
| 3924 CODING_SYSTEM_HAS_METHOD (ccl, init_coding_stream); | |
| 3925 CODING_SYSTEM_HAS_METHOD (ccl, rewind_coding_stream); | |
| 3926 CODING_SYSTEM_HAS_METHOD (ccl, putprop); | |
| 3927 CODING_SYSTEM_HAS_METHOD (ccl, getprop); | |
| 3928 | |
|
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3929 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (fixed_width, |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3930 "fixed-width-coding-system-p"); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3931 CODING_SYSTEM_HAS_METHOD (fixed_width, mark); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3932 CODING_SYSTEM_HAS_METHOD (fixed_width, convert); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3933 CODING_SYSTEM_HAS_METHOD (fixed_width, query); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3934 CODING_SYSTEM_HAS_METHOD (fixed_width, init); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3935 CODING_SYSTEM_HAS_METHOD (fixed_width, init_coding_stream); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3936 CODING_SYSTEM_HAS_METHOD (fixed_width, rewind_coding_stream); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3937 CODING_SYSTEM_HAS_METHOD (fixed_width, putprop); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3938 CODING_SYSTEM_HAS_METHOD (fixed_width, getprop); |
|
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3939 |
| 771 | 3940 INITIALIZE_CODING_SYSTEM_TYPE (shift_jis, "shift-jis-coding-system-p"); |
| 3941 CODING_SYSTEM_HAS_METHOD (shift_jis, convert); | |
| 3942 | |
| 3943 INITIALIZE_DETECTOR (shift_jis); | |
| 3944 DETECTOR_HAS_METHOD (shift_jis, detect); | |
| 3945 INITIALIZE_DETECTOR_CATEGORY (shift_jis, shift_jis); | |
| 3946 | |
| 3947 INITIALIZE_CODING_SYSTEM_TYPE (big5, "big5-coding-system-p"); | |
| 3948 CODING_SYSTEM_HAS_METHOD (big5, convert); | |
| 3949 | |
| 3950 INITIALIZE_DETECTOR (big5); | |
| 3951 DETECTOR_HAS_METHOD (big5, detect); | |
| 3952 INITIALIZE_DETECTOR_CATEGORY (big5, big5); | |
| 3953 } | |
| 3954 | |
| 3955 void | |
| 3956 reinit_coding_system_type_create_mule_coding (void) | |
| 3957 { | |
| 3958 REINITIALIZE_CODING_SYSTEM_TYPE (iso2022); | |
| 3959 REINITIALIZE_CODING_SYSTEM_TYPE (ccl); | |
|
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3960 REINITIALIZE_CODING_SYSTEM_TYPE (fixed_width); |
| 771 | 3961 REINITIALIZE_CODING_SYSTEM_TYPE (shift_jis); |
| 3962 REINITIALIZE_CODING_SYSTEM_TYPE (big5); | |
| 3963 } | |
| 3964 | |
| 3965 void | |
| 3966 reinit_vars_of_mule_coding (void) | |
| 3967 { | |
| 3968 } | |
| 3969 | |
| 3970 void | |
| 3971 vars_of_mule_coding (void) | |
| 3972 { | |
|
5191
71ee43b8a74d
Add #'equalp as a hash test by default; add #'define-hash-table-test, GNU API
Aidan Kehoe <kehoea@parhasard.net>
parents:
5169
diff
changeset
|
3973 /* This needs to be Qeq, there's a corner case where |
|
71ee43b8a74d
Add #'equalp as a hash test by default; add #'define-hash-table-test, GNU API
Aidan Kehoe <kehoea@parhasard.net>
parents:
5169
diff
changeset
|
3974 Qequal won't work. */ |
|
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3975 Vfixed_width_query_ranges_cache |
|
5191
71ee43b8a74d
Add #'equalp as a hash test by default; add #'define-hash-table-test, GNU API
Aidan Kehoe <kehoea@parhasard.net>
parents:
5169
diff
changeset
|
3976 = make_lisp_hash_table (32, HASH_TABLE_KEY_WEAK, Qeq); |
|
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3977 staticpro (&Vfixed_width_query_ranges_cache); |
| 771 | 3978 } |
