Mercurial > hg > xemacs-beta
annotate src/mule-coding.c @ 4976:16112448d484
Rename xfree(FOO, TYPE) -> xfree(FOO)
-------------------- ChangeLog entries follow: --------------------
src/ChangeLog addition:
2010-02-04 Ben Wing <ben@xemacs.org>
* alloc.c (release_breathing_space):
* alloc.c (resize_string):
* alloc.c (sweep_lcrecords_1):
* alloc.c (SWEEP_FIXED_TYPE_BLOCK_1):
* alloc.c (ADDITIONAL_FREE_compiled_function):
* alloc.c (compact_string_chars):
* alloc.c (ADDITIONAL_FREE_string):
* alloc.c (sweep_strings):
* alloca.c (xemacs_c_alloca):
* alsaplay.c (alsa_play_sound_file):
* buffer.c (init_initial_directory):
* buffer.h:
* buffer.h (BUFFER_FREE):
* console-stream.c (stream_delete_console):
* console-tty.c (free_tty_console_struct):
* data.c (Fnumber_to_string):
* device-gtk.c (gtk_init_device):
* device-gtk.c (free_gtk_device_struct):
* device-gtk.c (gtk_delete_device):
* device-msw.c (mswindows_delete_device):
* device-msw.c (msprinter_delete_device):
* device-tty.c (free_tty_device_struct):
* device-tty.c (tty_delete_device):
* device-x.c (x_init_device):
* device-x.c (free_x_device_struct):
* device-x.c (x_delete_device):
* dialog-msw.c (handle_directory_dialog_box):
* dialog-x.c (dbox_descriptor_to_widget_value):
* dired-msw.c (Fmswindows_insert_directory):
* dired.c (free_user_cache):
* dired.c (user_name_completion_unwind):
* doc.c (unparesseuxify_doc_string):
* doc.c (Fsubstitute_command_keys):
* doprnt.c (emacs_doprnt_1):
* dumper.c (pdump_load_finish):
* dumper.c (pdump_file_free):
* dumper.c (pdump_file_unmap):
* dynarr.c:
* dynarr.c (Dynarr_free):
* editfns.c (uncache_home_directory):
* editfns.c (Fset_time_zone_rule):
* elhash.c:
* elhash.c (pdump_reorganize_hash_table):
* elhash.c (maphash_unwind):
* emacs.c (make_arg_list_1):
* emacs.c (free_argc_argv):
* emacs.c (sort_args):
* emacs.c (Frunning_temacs_p):
* emodules.c (attempt_module_delete):
* eval.c (free_pointer):
* event-Xt.c (unselect_filedesc):
* event-Xt.c (emacs_Xt_select_process):
* event-gtk.c (unselect_filedesc):
* event-gtk.c (dragndrop_data_received):
* event-msw.c (winsock_closer):
* event-msw.c (mswindows_dde_callback):
* event-msw.c (mswindows_wnd_proc):
* event-stream.c (finalize_command_builder):
* event-stream.c (free_command_builder):
* extents.c (free_gap_array):
* extents.c (free_extent_list):
* extents.c (free_soe):
* extents.c (extent_fragment_delete):
* extents.c (extent_priority_sort_function):
* file-coding.c (make_coding_system_1):
* file-coding.c (coding_finalizer):
* file-coding.c (set_coding_stream_coding_system):
* file-coding.c (chain_finalize_coding_stream_1):
* file-coding.c (chain_finalize):
* file-coding.c (free_detection_state):
* file-coding.c (coding_category_symbol_to_id):
* fileio.c:
* fileio.c (Ffile_name_directory):
* fileio.c (if):
* fileio.c (Ffile_symlink_p):
* filelock.c (FREE_LOCK_INFO):
* filelock.c (current_lock_owner):
* font-mgr.c (Ffc_name_unparse):
* font-mgr.c (Ffc_pattern_duplicate):
* frame-gtk.c (gtk_delete_frame):
* frame-msw.c (mswindows_delete_frame):
* frame-msw.c (msprinter_delete_frame):
* frame-x.c (x_cde_destroy_callback):
* frame-x.c (Fcde_start_drag_internal):
* frame-x.c (x_cde_transfer_callback):
* frame-x.c (x_delete_frame):
* frame.c (update_frame_title):
* frame.c (Fset_frame_pointer):
* gc.c (register_for_finalization):
* gccache-gtk.c (free_gc_cache):
* gccache-gtk.c (gc_cache_lookup):
* gccache-x.c (free_gc_cache):
* gccache-x.c (gc_cache_lookup):
* glyphs-eimage.c:
* glyphs-eimage.c (jpeg_instantiate_unwind):
* glyphs-eimage.c (gif_instantiate_unwind):
* glyphs-eimage.c (png_instantiate_unwind):
* glyphs-eimage.c (png_instantiate):
* glyphs-eimage.c (tiff_instantiate_unwind):
* glyphs-gtk.c (convert_EImage_to_GDKImage):
* glyphs-gtk.c (gtk_finalize_image_instance):
* glyphs-gtk.c (gtk_init_image_instance_from_eimage):
* glyphs-gtk.c (gtk_xpm_instantiate):
* glyphs-msw.c (convert_EImage_to_DIBitmap):
* glyphs-msw.c (mswindows_init_image_instance_from_eimage):
* glyphs-msw.c (mswindows_initialize_image_instance_mask):
* glyphs-msw.c (xpm_to_eimage):
* glyphs-msw.c (mswindows_xpm_instantiate):
* glyphs-msw.c (xbm_create_bitmap_from_data):
* glyphs-msw.c (mswindows_finalize_image_instance):
* glyphs-x.c (convert_EImage_to_XImage):
* glyphs-x.c (x_finalize_image_instance):
* glyphs-x.c (x_init_image_instance_from_eimage):
* glyphs-x.c (x_xpm_instantiate):
* gui-x.c (free_popup_widget_value_tree):
* hash.c (free_hash_table):
* hash.c (grow_hash_table):
* hash.c (pregrow_hash_table_if_necessary):
* imgproc.c (build_EImage_quantable):
* insdel.c (uninit_buffer_text):
* intl-win32.c (convert_multibyte_to_internal_malloc):
* intl.c:
* intl.c (Fset_current_locale):
* keymap.c:
* keymap.c (where_is_recursive_mapper):
* keymap.c (where_is_internal):
* lisp.h:
* lisp.h (xfree):
* lstream.c (Lstream_close):
* lstream.c (resizing_buffer_closer):
* mule-coding.c:
* mule-coding.c (iso2022_finalize_detection_state):
* nt.c:
* nt.c (mswindows_get_long_filename):
* nt.c (nt_get_resource):
* nt.c (init_mswindows_environment):
* nt.c (get_cached_volume_information):
* nt.c (mswindows_opendir):
* nt.c (mswindows_closedir):
* nt.c (mswindows_readdir):
* nt.c (mswindows_stat):
* nt.c (mswindows_getdcwd):
* nt.c (Fmswindows_long_file_name):
* ntplay.c (nt_play_sound_file):
* ntplay.c (play_sound_data_1):
* number-gmp.c (gmp_free):
* number-gmp.c (init_number_gmp):
* number-mp.c (bignum_to_string):
* number-mp.c (BIGNUM_TO_TYPE):
* number.c (bignum_print):
* number.c (bignum_convfree):
* number.c (ratio_print):
* number.c (bigfloat_print):
* number.c (bigfloat_finalize):
* objects-gtk.c (gtk_finalize_color_instance):
* objects-gtk.c (gtk_finalize_font_instance):
* objects-msw.c (mswindows_finalize_color_instance):
* objects-msw.c (mswindows_finalize_font_instance):
* objects-tty.c (tty_finalize_color_instance):
* objects-tty.c (tty_finalize_font_instance):
* objects-tty.c (tty_font_list):
* objects-x.c (x_finalize_color_instance):
* objects-x.c (x_finalize_font_instance):
* process.c:
* process.c (finalize_process):
* realpath.c:
* redisplay.c (add_propagation_runes):
* regex.c:
* regex.c (xfree):
* regex.c (REGEX_FREE_STACK):
* regex.c (FREE_STACK_RETURN):
* regex.c (regex_compile):
* regex.c (regexec):
* regex.c (regfree):
* scrollbar-gtk.c (gtk_free_scrollbar_instance):
* scrollbar-gtk.c (gtk_release_scrollbar_instance):
* scrollbar-msw.c (mswindows_free_scrollbar_instance):
* scrollbar-msw.c (unshow_that_mofo):
* scrollbar-x.c (x_free_scrollbar_instance):
* scrollbar-x.c (x_release_scrollbar_instance):
* select-gtk.c (emacs_gtk_selection_handle):
* select-msw.c (mswindows_own_selection):
* select-x.c:
* select-x.c (x_handle_selection_request):
* select-x.c (unexpect_property_change):
* select-x.c (x_handle_property_notify):
* select-x.c (receive_incremental_selection):
* select-x.c (x_get_window_property_as_lisp_data):
* select-x.c (Fx_get_cutbuffer_internal):
* specifier.c (finalize_specifier):
* syntax.c (uninit_buffer_syntax_cache):
* sysdep.c (qxe_allocating_getcwd):
* sysdep.c (qxe_lstat):
* sysdep.c (copy_in_passwd):
* sysdep.c (qxe_ctime):
* sysdep.c (closedir):
* sysdep.c (DIRSIZ):
* termcap.c (tgetent):
* termcap.c (tprint):
* tests.c (Ftest_data_format_conversion):
* text.c (new_dfc_convert_copy_data):
* text.h (eifree):
* text.h (eito_alloca):
* text.h (eito_external):
* toolbar-msw.c (mswindows_output_toolbar):
* ui-gtk.c (CONVERT_RETVAL):
* ui-gtk.c (__allocate_object_storage):
* unicode.c (free_from_unicode_table):
* unicode.c (free_to_unicode_table):
* unicode.c (free_charset_unicode_tables):
* win32.c (mswindows_read_link_1):
Rename: xfree(VAL, TYPE)->xfree(VAL)
Command used:
gr 'xfree *\((.*),.*\);' 'xfree (\1);' *.[ch]
Followed by grepping for 'xfree.*,' and fixing anything left.
Rationale: Having to specify the TYPE argument is annoying and
error-prone. It was originally put in to work around warnings
due to strict aliasing but years and years ago I rewrote it
in a way that doesn't use the TYPE argument at all and no one
has complained since then. (And anyway, XEmacs is far from
ever being in compliance with strict aliasing and would require
far-reaching changes to get that way.)
author | Ben Wing <ben@xemacs.org> |
---|---|
date | Thu, 04 Feb 2010 07:28:14 -0600 |
parents | 0d4c9d0f6a8d |
children | 3d91f0b64469 |
rev | line source |
---|---|
771 | 1 /* Conversion functions for I18N encodings, but not Unicode (in separate file). |
2 Copyright (C) 1991, 1995 Free Software Foundation, Inc. | |
3 Copyright (C) 1995 Sun Microsystems, Inc. | |
4 Copyright (C) 2000, 2001, 2002 Ben Wing. | |
5 | |
6 This file is part of XEmacs. | |
7 | |
8 XEmacs is free software; you can redistribute it and/or modify it | |
9 under the terms of the GNU General Public License as published by the | |
10 Free Software Foundation; either version 2, or (at your option) any | |
11 later version. | |
12 | |
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT | |
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 for more details. | |
17 | |
18 You should have received a copy of the GNU General Public License | |
19 along with XEmacs; see the file COPYING. If not, write to | |
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
21 Boston, MA 02111-1307, USA. */ | |
22 | |
23 /* Synched up with: Mule 2.3. Not in FSF. */ | |
24 | |
25 /* For previous history, see file-coding.c. | |
26 | |
27 September 10, 2001: Extracted from file-coding.c by Ben Wing. | |
28 | |
29 Later in September: Finished abstraction of detection system, rewrote | |
30 all the detectors to include multiple levels of likelihood. | |
31 */ | |
32 | |
33 #include <config.h> | |
34 #include "lisp.h" | |
35 | |
36 #include "charset.h" | |
37 #include "mule-ccl.h" | |
38 #include "file-coding.h" | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
39 #include "elhash.h" |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
40 #include "rangetab.h" |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
41 #include "buffer.h" |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
42 #include "extents.h" |
771 | 43 |
44 Lisp_Object Qshift_jis, Qiso2022, Qbig5, Qccl; | |
45 | |
46 Lisp_Object Qcharset_g0, Qcharset_g1, Qcharset_g2, Qcharset_g3; | |
47 Lisp_Object Qforce_g0_on_output, Qforce_g1_on_output; | |
48 Lisp_Object Qforce_g2_on_output, Qforce_g3_on_output; | |
49 Lisp_Object Qno_iso6429; | |
50 Lisp_Object Qinput_charset_conversion, Qoutput_charset_conversion; | |
51 Lisp_Object Qshort, Qno_ascii_eol, Qno_ascii_cntl, Qseven, Qlock_shift; | |
52 | |
53 Lisp_Object Qiso_7, Qiso_8_designate, Qiso_8_1, Qiso_8_2, Qiso_lock_shift; | |
54 | |
4691
3ba90c659d01
Move Qfrom_unicode to general-slots.h, fix the native Win32 build.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4690
diff
changeset
|
55 Lisp_Object Qquery_skip_chars, Qinvalid_sequences_skip_chars; |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
56 Lisp_Object Qfixed_width; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
57 |
771 | 58 |
59 /************************************************************************/ | |
60 /* Shift-JIS methods */ | |
61 /************************************************************************/ | |
62 | |
63 /* Shift-JIS; Hankaku (half-width) KANA is also supported. */ | |
64 DEFINE_CODING_SYSTEM_TYPE (shift_jis); | |
65 | |
66 /* Shift-JIS is a coding system encoding three character sets: ASCII, right | |
67 half of JISX0201-Kana, and JISX0208. An ASCII character is encoded | |
68 as is. A character of JISX0201-Kana (DIMENSION1_CHARS94 character set) is | |
69 encoded by "position-code + 0x80". A character of JISX0208 | |
70 (DIMENSION2_CHARS94 character set) is encoded in 2-byte but two | |
71 position-codes are divided and shifted so that it fit in the range | |
72 below. | |
73 | |
74 --- CODE RANGE of Shift-JIS --- | |
75 (character set) (range) | |
76 ASCII 0x00 .. 0x7F | |
77 JISX0201-Kana 0xA0 .. 0xDF | |
78 JISX0208 (1st byte) 0x80 .. 0x9F and 0xE0 .. 0xEF | |
79 (2nd byte) 0x40 .. 0x7E and 0x80 .. 0xFC | |
80 ------------------------------- | |
81 | |
82 */ | |
83 | |
84 /* Is this the first byte of a Shift-JIS two-byte char? */ | |
85 | |
826 | 86 inline static int |
87 byte_shift_jis_two_byte_1_p (int c) | |
88 { | |
89 return (c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF); | |
90 } | |
771 | 91 |
92 /* Is this the second byte of a Shift-JIS two-byte char? */ | |
93 | |
826 | 94 inline static int |
95 byte_shift_jis_two_byte_2_p (int c) | |
96 { | |
97 return (c >= 0x40 && c <= 0x7E) || (c >= 0x80 && c <= 0xFC); | |
98 } | |
99 | |
100 inline static int | |
101 byte_shift_jis_katakana_p (int c) | |
102 { | |
103 return c >= 0xA1 && c <= 0xDF; | |
104 } | |
771 | 105 |
3439 | 106 inline static void |
107 dynarr_add_2022_one_dimension (Lisp_Object charset, Ibyte c, | |
108 unsigned char charmask, | |
109 unsigned_char_dynarr *dst) | |
110 { | |
111 if (XCHARSET_ENCODE_AS_UTF_8 (charset)) | |
112 { | |
113 encode_unicode_char (charset, c & charmask, 0, | |
4096 | 114 dst, UNICODE_UTF_8, 0, 0); |
3439 | 115 } |
116 else | |
117 { | |
118 Dynarr_add (dst, c & charmask); | |
119 } | |
120 } | |
121 | |
122 inline static void | |
123 dynarr_add_2022_two_dimensions (Lisp_Object charset, Ibyte c, | |
124 unsigned int ch, | |
125 unsigned char charmask, | |
126 unsigned_char_dynarr *dst) | |
127 { | |
128 if (XCHARSET_ENCODE_AS_UTF_8 (charset)) | |
129 { | |
130 encode_unicode_char (charset, | |
131 ch & charmask, | |
132 c & charmask, dst, | |
4096 | 133 UNICODE_UTF_8, 0, 0); |
3439 | 134 } |
135 else | |
136 { | |
137 Dynarr_add (dst, ch & charmask); | |
138 Dynarr_add (dst, c & charmask); | |
139 } | |
140 } | |
141 | |
771 | 142 /* Convert Shift-JIS data to internal format. */ |
143 | |
144 static Bytecount | |
145 shift_jis_convert (struct coding_stream *str, const UExtbyte *src, | |
146 unsigned_char_dynarr *dst, Bytecount n) | |
147 { | |
148 unsigned int ch = str->ch; | |
149 Bytecount orign = n; | |
150 | |
151 if (str->direction == CODING_DECODE) | |
152 { | |
153 while (n--) | |
154 { | |
155 UExtbyte c = *src++; | |
156 | |
157 if (ch) | |
158 { | |
159 /* Previous character was first byte of Shift-JIS Kanji char. */ | |
826 | 160 if (byte_shift_jis_two_byte_2_p (c)) |
771 | 161 { |
867 | 162 Ibyte e1, e2; |
771 | 163 |
164 Dynarr_add (dst, LEADING_BYTE_JAPANESE_JISX0208); | |
165 DECODE_SHIFT_JIS (ch, c, e1, e2); | |
166 Dynarr_add (dst, e1); | |
167 Dynarr_add (dst, e2); | |
168 } | |
169 else | |
170 { | |
171 DECODE_ADD_BINARY_CHAR (ch, dst); | |
172 DECODE_ADD_BINARY_CHAR (c, dst); | |
173 } | |
174 ch = 0; | |
175 } | |
176 else | |
177 { | |
826 | 178 if (byte_shift_jis_two_byte_1_p (c)) |
771 | 179 ch = c; |
826 | 180 else if (byte_shift_jis_katakana_p (c)) |
771 | 181 { |
182 Dynarr_add (dst, LEADING_BYTE_KATAKANA_JISX0201); | |
183 Dynarr_add (dst, c); | |
184 } | |
185 else | |
186 DECODE_ADD_BINARY_CHAR (c, dst); | |
187 } | |
188 } | |
189 | |
190 if (str->eof) | |
191 DECODE_OUTPUT_PARTIAL_CHAR (ch, dst); | |
192 } | |
193 else | |
194 { | |
195 while (n--) | |
196 { | |
867 | 197 Ibyte c = *src++; |
826 | 198 if (byte_ascii_p (c)) |
771 | 199 { |
200 Dynarr_add (dst, c); | |
201 ch = 0; | |
202 } | |
867 | 203 else if (ibyte_leading_byte_p (c)) |
771 | 204 ch = (c == LEADING_BYTE_KATAKANA_JISX0201 || |
205 c == LEADING_BYTE_JAPANESE_JISX0208_1978 || | |
206 c == LEADING_BYTE_JAPANESE_JISX0208) ? c : 0; | |
207 else if (ch) | |
208 { | |
209 if (ch == LEADING_BYTE_KATAKANA_JISX0201) | |
210 { | |
211 Dynarr_add (dst, c); | |
212 ch = 0; | |
213 } | |
214 else if (ch == LEADING_BYTE_JAPANESE_JISX0208_1978 || | |
215 ch == LEADING_BYTE_JAPANESE_JISX0208) | |
216 ch = c; | |
217 else | |
218 { | |
219 UExtbyte j1, j2; | |
220 ENCODE_SHIFT_JIS (ch, c, j1, j2); | |
221 Dynarr_add (dst, j1); | |
222 Dynarr_add (dst, j2); | |
223 ch = 0; | |
224 } | |
225 } | |
226 } | |
227 } | |
228 | |
229 str->ch = ch; | |
230 | |
231 return orign; | |
232 } | |
233 | |
234 DEFUN ("decode-shift-jis-char", Fdecode_shift_jis_char, 1, 1, 0, /* | |
235 Decode a JISX0208 character of Shift-JIS coding-system. | |
236 CODE is the character code in Shift-JIS as a cons of type bytes. | |
237 Return the corresponding character. | |
238 */ | |
239 (code)) | |
240 { | |
241 int c1, c2, s1, s2; | |
242 | |
243 CHECK_CONS (code); | |
244 CHECK_INT (XCAR (code)); | |
245 CHECK_INT (XCDR (code)); | |
246 s1 = XINT (XCAR (code)); | |
247 s2 = XINT (XCDR (code)); | |
826 | 248 if (byte_shift_jis_two_byte_1_p (s1) && |
249 byte_shift_jis_two_byte_2_p (s2)) | |
771 | 250 { |
251 DECODE_SHIFT_JIS (s1, s2, c1, c2); | |
867 | 252 return make_char (make_ichar (Vcharset_japanese_jisx0208, |
831 | 253 c1 & 0x7F, c2 & 0x7F)); |
771 | 254 } |
255 else | |
256 return Qnil; | |
257 } | |
258 | |
259 DEFUN ("encode-shift-jis-char", Fencode_shift_jis_char, 1, 1, 0, /* | |
260 Encode a JISX0208 character CHARACTER to SHIFT-JIS coding-system. | |
261 Return the corresponding character code in SHIFT-JIS as a cons of two bytes. | |
262 */ | |
263 (character)) | |
264 { | |
265 Lisp_Object charset; | |
266 int c1, c2, s1, s2; | |
267 | |
268 CHECK_CHAR_COERCE_INT (character); | |
867 | 269 BREAKUP_ICHAR (XCHAR (character), charset, c1, c2); |
771 | 270 if (EQ (charset, Vcharset_japanese_jisx0208)) |
271 { | |
272 ENCODE_SHIFT_JIS (c1 | 0x80, c2 | 0x80, s1, s2); | |
273 return Fcons (make_int (s1), make_int (s2)); | |
274 } | |
275 else | |
276 return Qnil; | |
277 } | |
278 | |
279 | |
280 /************************************************************************/ | |
281 /* Shift-JIS detector */ | |
282 /************************************************************************/ | |
283 | |
284 DEFINE_DETECTOR (shift_jis); | |
285 DEFINE_DETECTOR_CATEGORY (shift_jis, shift_jis); | |
286 | |
287 struct shift_jis_detector | |
288 { | |
289 int seen_jisx0208_char_in_c1; | |
290 int seen_jisx0208_char_in_upper; | |
291 int seen_jisx0201_char; | |
292 unsigned int seen_iso2022_esc:1; | |
293 unsigned int seen_bad_first_byte:1; | |
294 unsigned int seen_bad_second_byte:1; | |
295 /* temporary */ | |
296 unsigned int in_second_byte:1; | |
297 unsigned int first_byte_was_c1:1; | |
298 }; | |
299 | |
300 static void | |
301 shift_jis_detect (struct detection_state *st, const UExtbyte *src, | |
302 Bytecount n) | |
303 { | |
304 struct shift_jis_detector *data = DETECTION_STATE_DATA (st, shift_jis); | |
305 | |
306 while (n--) | |
307 { | |
308 UExtbyte c = *src++; | |
309 if (!data->in_second_byte) | |
310 { | |
311 if (c >= 0x80 && c <= 0x9F) | |
312 data->first_byte_was_c1 = 1; | |
313 if (c >= 0xA0 && c <= 0xDF) | |
314 data->seen_jisx0201_char++; | |
315 else if ((c >= 0x80 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF)) | |
316 data->in_second_byte = 1; | |
317 else if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) | |
318 data->seen_iso2022_esc = 1; | |
319 else if (c >= 0x80) | |
320 data->seen_bad_first_byte = 1; | |
321 } | |
322 else | |
323 { | |
324 if ((c >= 0x40 && c <= 0x7E) || (c >= 0x80 && c <= 0xFC)) | |
325 { | |
326 if (data->first_byte_was_c1 || (c >= 0x80 && c <= 0x9F)) | |
327 data->seen_jisx0208_char_in_c1++; | |
328 else | |
329 data->seen_jisx0208_char_in_upper++; | |
330 } | |
331 else | |
332 data->seen_bad_second_byte = 1; | |
333 data->in_second_byte = 0; | |
334 data->first_byte_was_c1 = 0; | |
335 } | |
336 } | |
337 | |
338 if (data->seen_bad_second_byte) | |
339 DET_RESULT (st, shift_jis) = DET_NEARLY_IMPOSSIBLE; | |
340 else if (data->seen_bad_first_byte) | |
341 DET_RESULT (st, shift_jis) = DET_QUITE_IMPROBABLE; | |
342 else if (data->seen_iso2022_esc) | |
343 DET_RESULT (st, shift_jis) = DET_SOMEWHAT_UNLIKELY; | |
344 else if (data->seen_jisx0208_char_in_c1 >= 20 || | |
345 (data->seen_jisx0208_char_in_c1 >= 10 && | |
346 data->seen_jisx0208_char_in_upper >= 10)) | |
347 DET_RESULT (st, shift_jis) = DET_QUITE_PROBABLE; | |
348 else if (data->seen_jisx0208_char_in_c1 > 3 || | |
349 data->seen_jisx0208_char_in_upper >= 10 || | |
350 /* Since the range is limited compared to what is often seen | |
351 is typical Latin-X charsets, the fact that we've seen a | |
352 bunch of them and none that are invalid is reasonably | |
353 strong statistical evidence of this encoding, or at least | |
354 not of the common Latin-X ones. */ | |
355 data->seen_jisx0201_char >= 100) | |
356 DET_RESULT (st, shift_jis) = DET_SOMEWHAT_LIKELY; | |
357 else if (data->seen_jisx0208_char_in_c1 > 0 || | |
358 data->seen_jisx0208_char_in_upper > 0 || | |
359 data->seen_jisx0201_char > 0) | |
360 DET_RESULT (st, shift_jis) = DET_SLIGHTLY_LIKELY; | |
361 else | |
362 DET_RESULT (st, shift_jis) = DET_AS_LIKELY_AS_UNLIKELY; | |
363 } | |
364 | |
365 | |
366 /************************************************************************/ | |
367 /* Big5 methods */ | |
368 /************************************************************************/ | |
369 | |
2819 | 370 /* BIG5 (used for Mandarin in Taiwan). */ |
771 | 371 DEFINE_CODING_SYSTEM_TYPE (big5); |
372 | |
373 /* BIG5 is a coding system encoding two character sets: ASCII and | |
374 Big5. An ASCII character is encoded as is. Big5 is a two-byte | |
375 character set and is encoded in two-byte. | |
376 | |
377 --- CODE RANGE of BIG5 --- | |
378 (character set) (range) | |
379 ASCII 0x00 .. 0x7F | |
380 Big5 (1st byte) 0xA1 .. 0xFE | |
381 (2nd byte) 0x40 .. 0x7E and 0xA1 .. 0xFE | |
382 -------------------------- | |
383 | |
384 Since the number of characters in Big5 is larger than maximum | |
385 characters in Emacs' charset (96x96), it can't be handled as one | |
386 charset. So, in XEmacs, Big5 is divided into two: `charset-big5-1' | |
387 and `charset-big5-2'. Both <type>s are DIMENSION2_CHARS94. The former | |
388 contains frequently used characters and the latter contains less | |
389 frequently used characters. */ | |
390 | |
826 | 391 inline static int |
392 byte_big5_two_byte_1_p (int c) | |
393 { | |
394 return c >= 0xA1 && c <= 0xFE; | |
395 } | |
771 | 396 |
397 /* Is this the second byte of a Shift-JIS two-byte char? */ | |
398 | |
826 | 399 inline static int |
400 byte_big5_two_byte_2_p (int c) | |
401 { | |
402 return (c >= 0x40 && c <= 0x7E) || (c >= 0xA1 && c <= 0xFE); | |
403 } | |
771 | 404 |
405 /* Number of Big5 characters which have the same code in 1st byte. */ | |
406 | |
407 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40) | |
408 | |
409 /* Code conversion macros. These are macros because they are used in | |
410 inner loops during code conversion. | |
411 | |
412 Note that temporary variables in macros introduce the classic | |
413 dynamic-scoping problems with variable names. We use capital- | |
414 lettered variables in the assumption that XEmacs does not use | |
415 capital letters in variables except in a very formalized way | |
416 (e.g. Qstring). */ | |
417 | |
418 /* Convert Big5 code (b1, b2) into its internal string representation | |
419 (lb, c1, c2). */ | |
420 | |
421 /* There is a much simpler way to split the Big5 charset into two. | |
422 For the moment I'm going to leave the algorithm as-is because it | |
423 claims to separate out the most-used characters into a single | |
424 charset, which perhaps will lead to optimizations in various | |
425 places. | |
426 | |
427 The way the algorithm works is something like this: | |
428 | |
429 Big5 can be viewed as a 94x157 charset, where the row is | |
430 encoded into the bytes 0xA1 .. 0xFE and the column is encoded | |
431 into the bytes 0x40 .. 0x7E and 0xA1 .. 0xFE. As for frequency, | |
432 the split between low and high column numbers is apparently | |
433 meaningless; ascending rows produce less and less frequent chars. | |
434 Therefore, we assign the lower half of rows (0xA1 .. 0xC8) to | |
435 the first charset, and the upper half (0xC9 .. 0xFE) to the | |
436 second. To do the conversion, we convert the character into | |
437 a single number where 0 .. 156 is the first row, 157 .. 313 | |
438 is the second, etc. That way, the characters are ordered by | |
439 decreasing frequency. Then we just chop the space in two | |
440 and coerce the result into a 94x94 space. | |
441 */ | |
442 | |
443 #define DECODE_BIG5(b1, b2, lb, c1, c2) do \ | |
444 { \ | |
445 int B1 = b1, B2 = b2; \ | |
446 int I \ | |
447 = (B1 - 0xA1) * BIG5_SAME_ROW + B2 - (B2 < 0x7F ? 0x40 : 0x62); \ | |
448 \ | |
449 if (B1 < 0xC9) \ | |
450 { \ | |
451 lb = LEADING_BYTE_CHINESE_BIG5_1; \ | |
452 } \ | |
453 else \ | |
454 { \ | |
455 lb = LEADING_BYTE_CHINESE_BIG5_2; \ | |
456 I -= (BIG5_SAME_ROW) * (0xC9 - 0xA1); \ | |
457 } \ | |
458 c1 = I / (0xFF - 0xA1) + 0xA1; \ | |
459 c2 = I % (0xFF - 0xA1) + 0xA1; \ | |
460 } while (0) | |
461 | |
462 /* Convert the internal string representation of a Big5 character | |
463 (lb, c1, c2) into Big5 code (b1, b2). */ | |
464 | |
465 #define ENCODE_BIG5(lb, c1, c2, b1, b2) do \ | |
466 { \ | |
467 int I = ((c1) - 0xA1) * (0xFF - 0xA1) + ((c2) - 0xA1); \ | |
468 \ | |
469 if (lb == LEADING_BYTE_CHINESE_BIG5_2) \ | |
470 { \ | |
471 I += BIG5_SAME_ROW * (0xC9 - 0xA1); \ | |
472 } \ | |
473 b1 = I / BIG5_SAME_ROW + 0xA1; \ | |
474 b2 = I % BIG5_SAME_ROW; \ | |
475 b2 += b2 < 0x3F ? 0x40 : 0x62; \ | |
476 } while (0) | |
477 | |
478 /* Convert Big5 data to internal format. */ | |
479 | |
480 static Bytecount | |
481 big5_convert (struct coding_stream *str, const UExtbyte *src, | |
482 unsigned_char_dynarr *dst, Bytecount n) | |
483 { | |
484 unsigned int ch = str->ch; | |
485 Bytecount orign = n; | |
486 | |
487 if (str->direction == CODING_DECODE) | |
488 { | |
489 while (n--) | |
490 { | |
491 UExtbyte c = *src++; | |
492 if (ch) | |
493 { | |
494 /* Previous character was first byte of Big5 char. */ | |
826 | 495 if (byte_big5_two_byte_2_p (c)) |
771 | 496 { |
867 | 497 Ibyte b1, b2, b3; |
771 | 498 DECODE_BIG5 (ch, c, b1, b2, b3); |
499 Dynarr_add (dst, b1); | |
500 Dynarr_add (dst, b2); | |
501 Dynarr_add (dst, b3); | |
502 } | |
503 else | |
504 { | |
505 DECODE_ADD_BINARY_CHAR (ch, dst); | |
506 DECODE_ADD_BINARY_CHAR (c, dst); | |
507 } | |
508 ch = 0; | |
509 } | |
510 else | |
511 { | |
826 | 512 if (byte_big5_two_byte_1_p (c)) |
771 | 513 ch = c; |
514 else | |
515 DECODE_ADD_BINARY_CHAR (c, dst); | |
516 } | |
517 } | |
518 | |
519 if (str->eof) | |
520 DECODE_OUTPUT_PARTIAL_CHAR (ch, dst); | |
521 } | |
522 else | |
523 { | |
524 while (n--) | |
525 { | |
867 | 526 Ibyte c = *src++; |
826 | 527 if (byte_ascii_p (c)) |
771 | 528 { |
529 /* ASCII. */ | |
530 Dynarr_add (dst, c); | |
531 } | |
867 | 532 else if (ibyte_leading_byte_p (c)) |
771 | 533 { |
534 if (c == LEADING_BYTE_CHINESE_BIG5_1 || | |
535 c == LEADING_BYTE_CHINESE_BIG5_2) | |
536 { | |
537 /* A recognized leading byte. */ | |
538 ch = c; | |
539 continue; /* not done with this character. */ | |
540 } | |
541 /* otherwise just ignore this character. */ | |
542 } | |
543 else if (ch == LEADING_BYTE_CHINESE_BIG5_1 || | |
544 ch == LEADING_BYTE_CHINESE_BIG5_2) | |
545 { | |
546 /* Previous char was a recognized leading byte. */ | |
547 ch = (ch << 8) | c; | |
548 continue; /* not done with this character. */ | |
549 } | |
550 else if (ch) | |
551 { | |
552 /* Encountering second byte of a Big5 character. */ | |
553 UExtbyte b1, b2; | |
554 | |
555 ENCODE_BIG5 (ch >> 8, ch & 0xFF, c, b1, b2); | |
556 Dynarr_add (dst, b1); | |
557 Dynarr_add (dst, b2); | |
558 } | |
559 | |
560 ch = 0; | |
561 } | |
562 } | |
563 | |
564 str->ch = ch; | |
565 return orign; | |
566 } | |
567 | |
867 | 568 Ichar |
771 | 569 decode_big5_char (int b1, int b2) |
570 { | |
826 | 571 if (byte_big5_two_byte_1_p (b1) && |
572 byte_big5_two_byte_2_p (b2)) | |
771 | 573 { |
574 int leading_byte; | |
575 Lisp_Object charset; | |
576 int c1, c2; | |
577 | |
578 DECODE_BIG5 (b1, b2, leading_byte, c1, c2); | |
826 | 579 charset = charset_by_leading_byte (leading_byte); |
867 | 580 return make_ichar (charset, c1 & 0x7F, c2 & 0x7F); |
771 | 581 } |
582 else | |
583 return -1; | |
584 } | |
585 | |
586 DEFUN ("decode-big5-char", Fdecode_big5_char, 1, 1, 0, /* | |
587 Convert Big Five character codes in CODE into a character. | |
588 CODE is a cons of two integers specifying the codepoints in Big Five. | |
589 Return the corresponding character, or nil if the codepoints are out of range. | |
590 | |
591 The term `decode' is used because the codepoints can be viewed as the | |
592 representation of the character in the external Big Five encoding, and thus | |
593 converting them to a character is analogous to any other operation that | |
594 decodes an external representation. | |
595 */ | |
596 (code)) | |
597 { | |
867 | 598 Ichar ch; |
771 | 599 |
600 CHECK_CONS (code); | |
601 CHECK_INT (XCAR (code)); | |
602 CHECK_INT (XCDR (code)); | |
603 ch = decode_big5_char (XINT (XCAR (code)), XINT (XCDR (code))); | |
604 if (ch == -1) | |
605 return Qnil; | |
606 else | |
607 return make_char (ch); | |
608 } | |
609 | |
610 DEFUN ("encode-big5-char", Fencode_big5_char, 1, 1, 0, /* | |
611 Convert the specified Big Five character into its codepoints. | |
612 The codepoints are returned as a cons of two integers, specifying the | |
613 Big Five codepoints. See `decode-big5-char' for the reason why the | |
614 term `encode' is used for this operation. | |
615 */ | |
616 (character)) | |
617 { | |
618 Lisp_Object charset; | |
619 int c1, c2, b1, b2; | |
620 | |
621 CHECK_CHAR_COERCE_INT (character); | |
867 | 622 BREAKUP_ICHAR (XCHAR (character), charset, c1, c2); |
771 | 623 if (EQ (charset, Vcharset_chinese_big5_1) || |
624 EQ (charset, Vcharset_chinese_big5_2)) | |
625 { | |
626 ENCODE_BIG5 (XCHARSET_LEADING_BYTE (charset), c1 | 0x80, c2 | 0x80, | |
627 b1, b2); | |
628 return Fcons (make_int (b1), make_int (b2)); | |
629 } | |
630 else | |
631 return Qnil; | |
632 } | |
633 | |
634 | |
635 /************************************************************************/ | |
636 /* Big5 detector */ | |
637 /************************************************************************/ | |
638 | |
639 DEFINE_DETECTOR (big5); | |
640 DEFINE_DETECTOR_CATEGORY (big5, big5); | |
641 | |
642 struct big5_detector | |
643 { | |
644 int seen_big5_char; | |
985 | 645 int seen_euc_char; |
771 | 646 unsigned int seen_iso2022_esc:1; |
647 unsigned int seen_bad_first_byte:1; | |
648 unsigned int seen_bad_second_byte:1; | |
649 | |
650 /* temporary */ | |
651 unsigned int in_second_byte:1; | |
652 }; | |
653 | |
654 static void | |
655 big5_detect (struct detection_state *st, const UExtbyte *src, | |
656 Bytecount n) | |
657 { | |
658 struct big5_detector *data = DETECTION_STATE_DATA (st, big5); | |
659 | |
660 while (n--) | |
661 { | |
662 UExtbyte c = *src++; | |
663 if (!data->in_second_byte) | |
664 { | |
665 if (c >= 0xA1 && c <= 0xFE) | |
666 data->in_second_byte = 1; | |
667 else if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) | |
668 data->seen_iso2022_esc = 1; | |
669 else if (c >= 0x80) | |
670 data->seen_bad_first_byte = 1; | |
671 } | |
672 else | |
673 { | |
674 data->in_second_byte = 0; | |
985 | 675 if (c >= 0xA1 && c <= 0xFE) |
676 data->seen_euc_char++; | |
677 else if (c >= 0x40 && c <= 0x7E) | |
771 | 678 data->seen_big5_char++; |
679 else | |
680 data->seen_bad_second_byte = 1; | |
681 } | |
682 } | |
683 | |
684 if (data->seen_bad_second_byte) | |
685 DET_RESULT (st, big5) = DET_NEARLY_IMPOSSIBLE; | |
686 else if (data->seen_bad_first_byte) | |
687 DET_RESULT (st, big5) = DET_QUITE_IMPROBABLE; | |
688 else if (data->seen_iso2022_esc) | |
689 DET_RESULT (st, big5) = DET_SOMEWHAT_UNLIKELY; | |
690 else if (data->seen_big5_char >= 4) | |
691 DET_RESULT (st, big5) = DET_SOMEWHAT_LIKELY; | |
985 | 692 else if (data->seen_euc_char) |
693 DET_RESULT (st, big5) = DET_SLIGHTLY_LIKELY; | |
771 | 694 else |
695 DET_RESULT (st, big5) = DET_AS_LIKELY_AS_UNLIKELY; | |
696 } | |
697 | |
698 | |
699 /************************************************************************/ | |
700 /* ISO2022 methods */ | |
701 /************************************************************************/ | |
702 | |
703 /* Any ISO-2022-compliant coding system. Includes JIS, EUC, CTEXT | |
704 (Compound Text, the encoding of selections in X Windows). See below for | |
705 a complete description of ISO-2022. */ | |
706 | |
707 /* Flags indicating what we've seen so far when parsing an | |
708 ISO2022 escape sequence. */ | |
709 enum iso_esc_flag | |
710 { | |
711 /* Partial sequences */ | |
712 ISO_ESC_NOTHING, /* Nothing has been seen. */ | |
713 ISO_ESC, /* We've seen ESC. */ | |
714 ISO_ESC_2_4, /* We've seen ESC $. This indicates | |
715 that we're designating a multi-byte, rather | |
716 than a single-byte, character set. */ | |
3439 | 717 ISO_ESC_2_5, /* We've seen ESC %. This indicates an escape to a |
718 Unicode coding system; the only one of these | |
719 we're prepared to deal with is UTF-8, which has | |
720 the next character as G. */ | |
771 | 721 ISO_ESC_2_8, /* We've seen ESC 0x28, i.e. ESC (. |
722 This means designate a 94-character | |
723 character set into G0. */ | |
724 ISO_ESC_2_9, /* We've seen ESC 0x29 -- designate a | |
725 94-character character set into G1. */ | |
726 ISO_ESC_2_10, /* We've seen ESC 0x2A. */ | |
727 ISO_ESC_2_11, /* We've seen ESC 0x2B. */ | |
728 ISO_ESC_2_12, /* We've seen ESC 0x2C -- designate a | |
729 96-character character set into G0. | |
730 (This is not ISO2022-standard. | |
731 The following 96-character | |
732 control sequences are standard, | |
733 though.) */ | |
734 ISO_ESC_2_13, /* We've seen ESC 0x2D -- designate a | |
735 96-character character set into G1. | |
736 */ | |
737 ISO_ESC_2_14, /* We've seen ESC 0x2E. */ | |
738 ISO_ESC_2_15, /* We've seen ESC 0x2F. */ | |
739 ISO_ESC_2_4_8, /* We've seen ESC $ 0x28 -- designate | |
740 a 94^N character set into G0. */ | |
741 ISO_ESC_2_4_9, /* We've seen ESC $ 0x29. */ | |
742 ISO_ESC_2_4_10, /* We've seen ESC $ 0x2A. */ | |
743 ISO_ESC_2_4_11, /* We've seen ESC $ 0x2B. */ | |
744 ISO_ESC_2_4_12, /* We've seen ESC $ 0x2C. */ | |
745 ISO_ESC_2_4_13, /* We've seen ESC $ 0x2D. */ | |
746 ISO_ESC_2_4_14, /* We've seen ESC $ 0x2E. */ | |
747 ISO_ESC_2_4_15, /* We've seen ESC $ 0x2F. */ | |
748 ISO_ESC_5_11, /* We've seen ESC [ or 0x9B. This | |
749 starts a directionality-control | |
750 sequence. The next character | |
751 must be 0, 1, 2, or ]. */ | |
752 ISO_ESC_5_11_0, /* We've seen 0x9B 0. The next character must be ]. */ | |
753 ISO_ESC_5_11_1, /* We've seen 0x9B 1. The next character must be ]. */ | |
754 ISO_ESC_5_11_2, /* We've seen 0x9B 2. The next character must be ]. */ | |
755 | |
756 /* Full sequences. */ | |
757 ISO_ESC_START_COMPOSITE, /* Private usage for START COMPOSING */ | |
758 ISO_ESC_END_COMPOSITE, /* Private usage for END COMPOSING */ | |
759 ISO_ESC_SINGLE_SHIFT, /* We've seen a complete single-shift sequence. */ | |
760 ISO_ESC_LOCKING_SHIFT,/* We've seen a complete locking-shift sequence. */ | |
761 ISO_ESC_DESIGNATE, /* We've seen a complete designation sequence. */ | |
762 ISO_ESC_DIRECTIONALITY,/* We've seen a complete ISO6429 directionality | |
763 sequence. */ | |
764 ISO_ESC_LITERAL /* We've seen a literal character ala | |
765 escape-quoting. */ | |
766 }; | |
767 | |
768 enum iso_error | |
769 { | |
770 ISO_ERROR_BAD_FINAL, | |
771 ISO_ERROR_UNKWOWN_ESC_SEQUENCE, | |
772 ISO_ERROR_INVALID_CODE_POINT_CHARACTER, | |
773 }; | |
774 | |
775 | |
776 /* Flags indicating current state while converting code. */ | |
777 | |
778 /************ Used during encoding and decoding: ************/ | |
779 /* If set, the current directionality is right-to-left. Otherwise, it's | |
780 left-to-right. */ | |
781 #define ISO_STATE_R2L (1 << 0) | |
782 | |
783 /************ Used during encoding: ************/ | |
784 /* If set, we just saw a CR. */ | |
785 #define ISO_STATE_CR (1 << 1) | |
786 | |
787 /************ Used during decoding: ************/ | |
788 /* If set, we're currently parsing an escape sequence and the upper 16 bits | |
789 should be looked at to indicate what partial escape sequence we've seen | |
790 so far. Otherwise, we're running through actual text. */ | |
791 #define ISO_STATE_ESCAPE (1 << 2) | |
792 /* If set, G2 is invoked into GL, but only for the next character. */ | |
793 #define ISO_STATE_SS2 (1 << 3) | |
794 /* If set, G3 is invoked into GL, but only for the next character. If both | |
795 ISO_STATE_SS2 and ISO_STATE_SS3 are set, ISO_STATE_SS2 overrides; but | |
796 this probably indicates an error in the text encoding. */ | |
797 #define ISO_STATE_SS3 (1 << 4) | |
798 /* If set, we're currently processing a composite character (i.e. a | |
799 character constructed by overstriking two or more characters). */ | |
800 #define ISO_STATE_COMPOSITE (1 << 5) | |
801 | |
3439 | 802 /* If set, we're processing UTF-8 encoded data within ISO-2022 |
803 processing. */ | |
804 #define ISO_STATE_UTF_8 (1 << 6) | |
805 | |
771 | 806 /* ISO_STATE_LOCK is the mask of flags that remain on until explicitly |
807 turned off when in the ISO2022 encoder/decoder. Other flags are turned | |
808 off at the end of processing each character or escape sequence. */ | |
809 # define ISO_STATE_LOCK \ | |
3439 | 810 (ISO_STATE_COMPOSITE | ISO_STATE_R2L | ISO_STATE_UTF_8) |
771 | 811 |
812 typedef struct charset_conversion_spec | |
813 { | |
814 Lisp_Object from_charset; | |
815 Lisp_Object to_charset; | |
816 } charset_conversion_spec; | |
817 | |
818 typedef struct | |
819 { | |
820 Dynarr_declare (charset_conversion_spec); | |
821 } charset_conversion_spec_dynarr; | |
822 | |
823 struct iso2022_coding_system | |
824 { | |
825 /* What are the charsets to be initially designated to G0, G1, | |
826 G2, G3? If t, no charset is initially designated. If nil, | |
827 no charset is initially designated and no charset is allowed | |
828 to be designated. */ | |
829 Lisp_Object initial_charset[4]; | |
830 | |
831 /* If true, a designation escape sequence needs to be sent on output | |
832 for the charset in G[0-3] before that charset is used. */ | |
833 unsigned char force_charset_on_output[4]; | |
834 | |
835 charset_conversion_spec_dynarr *input_conv; | |
836 charset_conversion_spec_dynarr *output_conv; | |
837 | |
838 unsigned int shoort :1; /* C makes you speak Dutch */ | |
839 unsigned int no_ascii_eol :1; | |
840 unsigned int no_ascii_cntl :1; | |
841 unsigned int seven :1; | |
842 unsigned int lock_shift :1; | |
843 unsigned int no_iso6429 :1; | |
844 unsigned int escape_quoted :1; | |
845 }; | |
846 | |
847 #define CODING_SYSTEM_ISO2022_INITIAL_CHARSET(codesys, g) \ | |
848 (CODING_SYSTEM_TYPE_DATA (codesys, iso2022)->initial_charset[g]) | |
849 #define CODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT(codesys, g) \ | |
850 (CODING_SYSTEM_TYPE_DATA (codesys, iso2022)->force_charset_on_output[g]) | |
851 #define CODING_SYSTEM_ISO2022_SHORT(codesys) \ | |
852 (CODING_SYSTEM_TYPE_DATA (codesys, iso2022)->shoort) | |
853 #define CODING_SYSTEM_ISO2022_NO_ASCII_EOL(codesys) \ | |
854 (CODING_SYSTEM_TYPE_DATA (codesys, iso2022)->no_ascii_eol) | |
855 #define CODING_SYSTEM_ISO2022_NO_ASCII_CNTL(codesys) \ | |
856 (CODING_SYSTEM_TYPE_DATA (codesys, iso2022)->no_ascii_cntl) | |
857 #define CODING_SYSTEM_ISO2022_SEVEN(codesys) \ | |
858 (CODING_SYSTEM_TYPE_DATA (codesys, iso2022)->seven) | |
859 #define CODING_SYSTEM_ISO2022_LOCK_SHIFT(codesys) \ | |
860 (CODING_SYSTEM_TYPE_DATA (codesys, iso2022)->lock_shift) | |
861 #define CODING_SYSTEM_ISO2022_NO_ISO6429(codesys) \ | |
862 (CODING_SYSTEM_TYPE_DATA (codesys, iso2022)->no_iso6429) | |
863 #define CODING_SYSTEM_ISO2022_ESCAPE_QUOTED(codesys) \ | |
864 (CODING_SYSTEM_TYPE_DATA (codesys, iso2022)->escape_quoted) | |
865 #define CODING_SYSTEM_ISO2022_INPUT_CONV(codesys) \ | |
866 (CODING_SYSTEM_TYPE_DATA (codesys, iso2022)->input_conv) | |
867 #define CODING_SYSTEM_ISO2022_OUTPUT_CONV(codesys) \ | |
868 (CODING_SYSTEM_TYPE_DATA (codesys, iso2022)->output_conv) | |
869 | |
870 #define XCODING_SYSTEM_ISO2022_INITIAL_CHARSET(codesys, g) \ | |
871 CODING_SYSTEM_ISO2022_INITIAL_CHARSET (XCODING_SYSTEM (codesys), g) | |
872 #define XCODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT(codesys, g) \ | |
873 CODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT (XCODING_SYSTEM (codesys), g) | |
874 #define XCODING_SYSTEM_ISO2022_SHORT(codesys) \ | |
875 CODING_SYSTEM_ISO2022_SHORT (XCODING_SYSTEM (codesys)) | |
876 #define XCODING_SYSTEM_ISO2022_NO_ASCII_EOL(codesys) \ | |
877 CODING_SYSTEM_ISO2022_NO_ASCII_EOL (XCODING_SYSTEM (codesys)) | |
878 #define XCODING_SYSTEM_ISO2022_NO_ASCII_CNTL(codesys) \ | |
879 CODING_SYSTEM_ISO2022_NO_ASCII_CNTL (XCODING_SYSTEM (codesys)) | |
880 #define XCODING_SYSTEM_ISO2022_SEVEN(codesys) \ | |
881 CODING_SYSTEM_ISO2022_SEVEN (XCODING_SYSTEM (codesys)) | |
882 #define XCODING_SYSTEM_ISO2022_LOCK_SHIFT(codesys) \ | |
883 CODING_SYSTEM_ISO2022_LOCK_SHIFT (XCODING_SYSTEM (codesys)) | |
884 #define XCODING_SYSTEM_ISO2022_NO_ISO6429(codesys) \ | |
885 CODING_SYSTEM_ISO2022_NO_ISO6429 (XCODING_SYSTEM (codesys)) | |
886 #define XCODING_SYSTEM_ISO2022_ESCAPE_QUOTED(codesys) \ | |
887 CODING_SYSTEM_ISO2022_ESCAPE_QUOTED (XCODING_SYSTEM (codesys)) | |
888 #define XCODING_SYSTEM_ISO2022_INPUT_CONV(codesys) \ | |
889 CODING_SYSTEM_ISO2022_INPUT_CONV (XCODING_SYSTEM (codesys)) | |
890 #define XCODING_SYSTEM_ISO2022_OUTPUT_CONV(codesys) \ | |
891 CODING_SYSTEM_ISO2022_OUTPUT_CONV (XCODING_SYSTEM (codesys)) | |
892 | |
893 /* Additional information used by the ISO2022 decoder and detector. */ | |
894 struct iso2022_coding_stream | |
895 { | |
896 /* CHARSET holds the character sets currently assigned to the G0 | |
897 through G3 variables. It is initialized from the array | |
898 INITIAL_CHARSET in CODESYS. */ | |
899 Lisp_Object charset[4]; | |
900 | |
901 /* Which registers are currently invoked into the left (GL) and | |
902 right (GR) halves of the 8-bit encoding space? */ | |
903 int register_left, register_right; | |
904 | |
905 /* FLAGS holds flags indicating the current state of the encoding. Some of | |
906 these flags are actually part of the state-dependent data and should be | |
907 moved there. */ | |
908 unsigned int flags; | |
909 | |
910 /**************** for decoding ****************/ | |
911 | |
912 /* ISO_ESC holds a value indicating part of an escape sequence | |
913 that has already been seen. */ | |
914 enum iso_esc_flag esc; | |
915 | |
916 /* This records the bytes we've seen so far in an escape sequence, | |
917 in case the sequence is invalid (we spit out the bytes unchanged). */ | |
918 unsigned char esc_bytes[8]; | |
919 | |
920 /* Index for next byte to store in ISO escape sequence. */ | |
921 int esc_bytes_index; | |
922 | |
923 #ifdef ENABLE_COMPOSITE_CHARS | |
924 /* Stuff seen so far when composing a string. */ | |
925 unsigned_char_dynarr *composite_chars; | |
926 #endif | |
927 | |
928 /* If we saw an invalid designation sequence for a particular | |
929 register, we flag it here and switch to ASCII. The next time we | |
930 see a valid designation for this register, we turn off the flag | |
931 and do the designation normally, but pretend the sequence was | |
932 invalid. The effect of all this is that (most of the time) the | |
933 escape sequences for both the switch to the unknown charset, and | |
934 the switch back to the known charset, get inserted literally into | |
935 the buffer and saved out as such. The hope is that we can | |
936 preserve the escape sequences so that the resulting written out | |
937 file makes sense. If we don't do any of this, the designation | |
938 to the invalid charset will be preserved but that switch back | |
939 to the known charset will probably get eaten because it was | |
940 the same charset that was already present in the register. */ | |
941 unsigned char invalid_designated[4]; | |
942 | |
943 /* We try to do similar things as above for direction-switching | |
944 sequences. If we encountered a direction switch while an | |
945 invalid designation was present, or an invalid designation | |
946 just after a direction switch (i.e. no valid designation | |
947 encountered yet), we insert the direction-switch escape | |
948 sequence literally into the output stream, and later on | |
949 insert the corresponding direction-restoring escape sequence | |
950 literally also. */ | |
951 unsigned int switched_dir_and_no_valid_charset_yet :1; | |
952 unsigned int invalid_switch_dir :1; | |
953 | |
954 /* Tells the decoder to output the escape sequence literally | |
955 even though it was valid. Used in the games we play to | |
956 avoid lossage when we encounter invalid designations. */ | |
957 unsigned int output_literally :1; | |
958 /* We encountered a direction switch followed by an invalid | |
959 designation. We didn't output the direction switch | |
960 literally because we didn't know about the invalid designation; | |
961 but we have to do so now. */ | |
962 unsigned int output_direction_sequence :1; | |
963 | |
964 /**************** for encoding ****************/ | |
965 | |
966 /* Whether we need to explicitly designate the charset in the | |
967 G? register before using it. It is initialized from the | |
968 array FORCE_CHARSET_ON_OUTPUT in CODESYS. */ | |
969 unsigned char force_charset_on_output[4]; | |
970 | |
971 /* Other state variables that need to be preserved across | |
972 invocations. */ | |
973 Lisp_Object current_charset; | |
974 int current_half; | |
975 int current_char_boundary; | |
3439 | 976 |
977 /* Used for handling UTF-8. */ | |
978 unsigned char counter; | |
4096 | 979 unsigned char indicated_length; |
771 | 980 }; |
981 | |
1204 | 982 static const struct memory_description ccs_description_1[] = |
771 | 983 { |
984 { XD_LISP_OBJECT, offsetof (charset_conversion_spec, from_charset) }, | |
985 { XD_LISP_OBJECT, offsetof (charset_conversion_spec, to_charset) }, | |
986 { XD_END } | |
987 }; | |
988 | |
1204 | 989 static const struct sized_memory_description ccs_description = |
771 | 990 { |
991 sizeof (charset_conversion_spec), | |
992 ccs_description_1 | |
993 }; | |
994 | |
1204 | 995 static const struct memory_description ccsd_description_1[] = |
771 | 996 { |
997 XD_DYNARR_DESC (charset_conversion_spec_dynarr, &ccs_description), | |
998 { XD_END } | |
999 }; | |
1000 | |
1204 | 1001 static const struct sized_memory_description ccsd_description = |
771 | 1002 { |
1003 sizeof (charset_conversion_spec_dynarr), | |
1004 ccsd_description_1 | |
1005 }; | |
1006 | |
1204 | 1007 static const struct memory_description iso2022_coding_system_description[] = { |
1008 { XD_LISP_OBJECT_ARRAY, offsetof (struct iso2022_coding_system, | |
1009 initial_charset), 4 }, | |
2367 | 1010 { XD_BLOCK_PTR, offsetof (struct iso2022_coding_system, input_conv), |
2551 | 1011 1, { &ccsd_description } }, |
2367 | 1012 { XD_BLOCK_PTR, offsetof (struct iso2022_coding_system, output_conv), |
2551 | 1013 1, { &ccsd_description } }, |
771 | 1014 { XD_END } |
1015 }; | |
1016 | |
1204 | 1017 DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (iso2022); |
1018 | |
771 | 1019 /* The following note taken directly from FSF 21.0.103. */ |
1020 | |
1021 /* The following note describes the coding system ISO2022 briefly. | |
1022 Since the intention of this note is to help understand the | |
1023 functions in this file, some parts are NOT ACCURATE or are OVERLY | |
1024 SIMPLIFIED. For thorough understanding, please refer to the | |
1025 original document of ISO2022. This is equivalent to the standard | |
1026 ECMA-35, obtainable from <URL:http://www.ecma.ch/> (*). | |
1027 | |
1028 ISO2022 provides many mechanisms to encode several character sets | |
1029 in 7-bit and 8-bit environments. For 7-bit environments, all text | |
1030 is encoded using bytes less than 128. This may make the encoded | |
1031 text a little bit longer, but the text passes more easily through | |
1032 several types of gateway, some of which strip off the MSB (Most | |
1033 Significant Bit). | |
1034 | |
1035 There are two kinds of character sets: control character sets and | |
1036 graphic character sets. The former contain control characters such | |
1037 as `newline' and `escape' to provide control functions (control | |
1038 functions are also provided by escape sequences). The latter | |
1039 contain graphic characters such as 'A' and '-'. Emacs recognizes | |
1040 two control character sets and many graphic character sets. | |
1041 | |
1042 Graphic character sets are classified into one of the following | |
1043 four classes, according to the number of bytes (DIMENSION) and | |
1044 number of characters in one dimension (CHARS) of the set: | |
1045 - DIMENSION1_CHARS94 | |
1046 - DIMENSION1_CHARS96 | |
1047 - DIMENSION2_CHARS94 | |
1048 - DIMENSION2_CHARS96 | |
1049 | |
1050 In addition, each character set is assigned an identification tag, | |
1051 unique for each set, called the "final character" (denoted as <F> | |
1052 hereafter). The <F> of each character set is decided by ECMA(*) | |
1053 when it is registered in ISO. The code range of <F> is 0x30..0x7F | |
1054 (0x30..0x3F are for private use only). | |
1055 | |
1056 Note (*): ECMA = European Computer Manufacturers Association | |
1057 | |
1058 Here are examples of graphic character sets [NAME(<F>)]: | |
1059 o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ... | |
1060 o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ... | |
1061 o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ... | |
1062 o DIMENSION2_CHARS96 -- none for the moment | |
1063 | |
1064 A code area (1 byte=8 bits) is divided into 4 areas, C0, GL, C1, and GR. | |
1065 C0 [0x00..0x1F] -- control character plane 0 | |
1066 GL [0x20..0x7F] -- graphic character plane 0 | |
1067 C1 [0x80..0x9F] -- control character plane 1 | |
1068 GR [0xA0..0xFF] -- graphic character plane 1 | |
1069 | |
1070 A control character set is directly designated and invoked to C0 or | |
1071 C1 by an escape sequence. The most common case is that: | |
1072 - ISO646's control character set is designated/invoked to C0, and | |
1073 - ISO6429's control character set is designated/invoked to C1, | |
1074 and usually these designations/invocations are omitted in encoded | |
1075 text. In a 7-bit environment, only C0 can be used, and a control | |
1076 character for C1 is encoded by an appropriate escape sequence to | |
1077 fit into the environment. All control characters for C1 are | |
1078 defined to have corresponding escape sequences. | |
1079 | |
1080 A graphic character set is at first designated to one of four | |
1081 graphic registers (G0 through G3), then these graphic registers are | |
1082 invoked to GL or GR. These designations and invocations can be | |
1083 done independently. The most common case is that G0 is invoked to | |
1084 GL, G1 is invoked to GR, and ASCII is designated to G0. Usually | |
1085 these invocations and designations are omitted in encoded text. | |
1086 In a 7-bit environment, only GL can be used. | |
1087 | |
1088 When a graphic character set of CHARS94 is invoked to GL, codes | |
1089 0x20 and 0x7F of the GL area work as control characters SPACE and | |
1090 DEL respectively, and codes 0xA0 and 0xFF of the GR area should not | |
1091 be used. | |
1092 | |
1093 There are two ways of invocation: locking-shift and single-shift. | |
1094 With locking-shift, the invocation lasts until the next different | |
1095 invocation, whereas with single-shift, the invocation affects the | |
1096 following character only and doesn't affect the locking-shift | |
1097 state. Invocations are done by the following control characters or | |
1098 escape sequences: | |
1099 | |
1100 ---------------------------------------------------------------------- | |
1101 abbrev function cntrl escape seq description | |
1102 ---------------------------------------------------------------------- | |
1103 SI/LS0 (shift-in) 0x0F none invoke G0 into GL | |
1104 SO/LS1 (shift-out) 0x0E none invoke G1 into GL | |
1105 LS2 (locking-shift-2) none ESC 'n' invoke G2 into GL | |
1106 LS3 (locking-shift-3) none ESC 'o' invoke G3 into GL | |
1107 LS1R (locking-shift-1 right) none ESC '~' invoke G1 into GR (*) | |
1108 LS2R (locking-shift-2 right) none ESC '}' invoke G2 into GR (*) | |
1109 LS3R (locking-shift 3 right) none ESC '|' invoke G3 into GR (*) | |
1110 SS2 (single-shift-2) 0x8E ESC 'N' invoke G2 for one char | |
1111 SS3 (single-shift-3) 0x8F ESC 'O' invoke G3 for one char | |
1112 ---------------------------------------------------------------------- | |
1113 (*) These are not used by any known coding system. | |
1114 | |
1115 Control characters for these functions are defined by macros | |
1116 ISO_CODE_XXX in `coding.h'. | |
1117 | |
1118 Designations are done by the following escape sequences: | |
1119 ---------------------------------------------------------------------- | |
1120 escape sequence description | |
1121 ---------------------------------------------------------------------- | |
1122 ESC '(' <F> designate DIMENSION1_CHARS94<F> to G0 | |
1123 ESC ')' <F> designate DIMENSION1_CHARS94<F> to G1 | |
1124 ESC '*' <F> designate DIMENSION1_CHARS94<F> to G2 | |
1125 ESC '+' <F> designate DIMENSION1_CHARS94<F> to G3 | |
1126 ESC ',' <F> designate DIMENSION1_CHARS96<F> to G0 (*) | |
1127 ESC '-' <F> designate DIMENSION1_CHARS96<F> to G1 | |
1128 ESC '.' <F> designate DIMENSION1_CHARS96<F> to G2 | |
1129 ESC '/' <F> designate DIMENSION1_CHARS96<F> to G3 | |
1130 ESC '$' '(' <F> designate DIMENSION2_CHARS94<F> to G0 (**) | |
1131 ESC '$' ')' <F> designate DIMENSION2_CHARS94<F> to G1 | |
1132 ESC '$' '*' <F> designate DIMENSION2_CHARS94<F> to G2 | |
1133 ESC '$' '+' <F> designate DIMENSION2_CHARS94<F> to G3 | |
1134 ESC '$' ',' <F> designate DIMENSION2_CHARS96<F> to G0 (*) | |
1135 ESC '$' '-' <F> designate DIMENSION2_CHARS96<F> to G1 | |
1136 ESC '$' '.' <F> designate DIMENSION2_CHARS96<F> to G2 | |
1137 ESC '$' '/' <F> designate DIMENSION2_CHARS96<F> to G3 | |
1138 ---------------------------------------------------------------------- | |
1139 | |
1140 In this list, "DIMENSION1_CHARS94<F>" means a graphic character set | |
1141 of dimension 1, chars 94, and final character <F>, etc... | |
1142 | |
1143 Note (*): Although these designations are not allowed in ISO2022, | |
1144 Emacs accepts them on decoding, and produces them on encoding | |
1145 CHARS96 character sets in a coding system which is characterized as | |
1146 7-bit environment, non-locking-shift, and non-single-shift. | |
1147 | |
1148 Note (**): If <F> is '@', 'A', or 'B', the intermediate character | |
1149 '(' can be omitted. We refer to this as "short-form" hereafter. | |
1150 | |
1151 Now you may notice that there are a lot of ways of encoding the | |
1152 same multilingual text in ISO2022. Actually, there exist many | |
1153 coding systems such as Compound Text (used in X11's inter client | |
1154 communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR | |
1155 (used in Korean Internet), EUC (Extended UNIX Code, used in Asian | |
1156 localized platforms), and all of these are variants of ISO2022. | |
1157 | |
1158 In addition to the above, Emacs handles two more kinds of escape | |
1159 sequences: ISO6429's direction specification and Emacs' private | |
1160 sequence for specifying character composition. | |
1161 | |
1162 ISO6429's direction specification takes the following form: | |
1163 o CSI ']' -- end of the current direction | |
1164 o CSI '0' ']' -- end of the current direction | |
1165 o CSI '1' ']' -- start of left-to-right text | |
1166 o CSI '2' ']' -- start of right-to-left text | |
1167 The control character CSI (0x9B: control sequence introducer) is | |
1168 abbreviated to the escape sequence ESC '[' in a 7-bit environment. | |
1169 | |
1170 Character composition specification takes the following form: | |
1171 o ESC '0' -- start relative composition | |
1172 o ESC '1' -- end composition | |
1173 o ESC '2' -- start rule-base composition (*) | |
1174 o ESC '3' -- start relative composition with alternate chars (**) | |
1175 o ESC '4' -- start rule-base composition with alternate chars (**) | |
1176 Since these are not standard escape sequences of any ISO standard, | |
1177 the use of them with these meanings is restricted to Emacs only. | |
1178 | |
1179 (*) This form is used only in Emacs 20.5 and older versions, | |
1180 but the newer versions can safely decode it. | |
1181 (**) This form is used only in Emacs 21.1 and newer versions, | |
1182 and the older versions can't decode it. | |
1183 | |
1184 Here's a list of example usages of these composition escape | |
1185 sequences (categorized by `enum composition_method'). | |
1186 | |
1187 COMPOSITION_RELATIVE: | |
1188 ESC 0 CHAR [ CHAR ] ESC 1 | |
1189 COMPOSITION_WITH_RULE: | |
1190 ESC 2 CHAR [ RULE CHAR ] ESC 1 | |
1191 COMPOSITION_WITH_ALTCHARS: | |
1192 ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 | |
1193 COMPOSITION_WITH_RULE_ALTCHARS: | |
1194 ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */ | |
1195 | |
1196 static void | |
1197 reset_iso2022_decode (Lisp_Object coding_system, | |
1198 struct iso2022_coding_stream *data) | |
1199 { | |
1200 int i; | |
1201 #ifdef ENABLE_COMPOSITE_CHARS | |
1202 unsigned_char_dynarr *old_composite_chars = data->composite_chars; | |
1203 #endif | |
1204 | |
1205 xzero (*data); | |
1206 | |
1207 for (i = 0; i < 4; i++) | |
1208 { | |
1209 if (!NILP (coding_system)) | |
1210 data->charset[i] = | |
1211 XCODING_SYSTEM_ISO2022_INITIAL_CHARSET (coding_system, i); | |
1212 else | |
1213 data->charset[i] = Qt; | |
1214 } | |
1215 data->esc = ISO_ESC_NOTHING; | |
1216 data->register_right = 1; | |
1217 #ifdef ENABLE_COMPOSITE_CHARS | |
1218 if (old_composite_chars) | |
1219 { | |
1220 data->composite_chars = old_composite_chars; | |
1221 Dynarr_reset (data->composite_chars); | |
1222 } | |
1223 #endif | |
1224 } | |
1225 | |
1226 static void | |
1227 reset_iso2022_encode (Lisp_Object coding_system, | |
1228 struct iso2022_coding_stream *data) | |
1229 { | |
1230 int i; | |
1231 | |
1232 xzero (*data); | |
1233 | |
1234 for (i = 0; i < 4; i++) | |
1235 { | |
1236 data->charset[i] = | |
1237 XCODING_SYSTEM_ISO2022_INITIAL_CHARSET (coding_system, i); | |
1238 data->force_charset_on_output[i] = | |
1239 XCODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT (coding_system, i); | |
1240 } | |
1241 data->register_right = 1; | |
1242 data->current_charset = Qnil; | |
1243 data->current_char_boundary = 1; | |
1244 } | |
1245 | |
1246 static void | |
1247 iso2022_init_coding_stream (struct coding_stream *str) | |
1248 { | |
1249 if (str->direction == CODING_DECODE) | |
1250 reset_iso2022_decode (str->codesys, | |
1251 CODING_STREAM_TYPE_DATA (str, iso2022)); | |
1252 else | |
1253 reset_iso2022_encode (str->codesys, | |
1254 CODING_STREAM_TYPE_DATA (str, iso2022)); | |
1255 } | |
1256 | |
1257 static void | |
1258 iso2022_rewind_coding_stream (struct coding_stream *str) | |
1259 { | |
1260 iso2022_init_coding_stream (str); | |
1261 } | |
1262 | |
1263 static int | |
1264 fit_to_be_escape_quoted (unsigned char c) | |
1265 { | |
1266 switch (c) | |
1267 { | |
1268 case ISO_CODE_ESC: | |
1269 case ISO_CODE_CSI: | |
1270 case ISO_CODE_SS2: | |
1271 case ISO_CODE_SS3: | |
1272 case ISO_CODE_SO: | |
1273 case ISO_CODE_SI: | |
1274 return 1; | |
1275 | |
1276 default: | |
1277 return 0; | |
1278 } | |
1279 } | |
1280 | |
1281 static Lisp_Object | |
867 | 1282 charset_by_attributes_or_create_one (int type, Ibyte final, int dir) |
771 | 1283 { |
826 | 1284 Lisp_Object charset = charset_by_attributes (type, final, dir); |
771 | 1285 |
1286 if (NILP (charset)) | |
1287 { | |
1288 int chars, dim; | |
1289 | |
1290 switch (type) | |
1291 { | |
1292 case CHARSET_TYPE_94: | |
1293 chars = 94; dim = 1; | |
1294 break; | |
1295 case CHARSET_TYPE_96: | |
1296 chars = 96; dim = 1; | |
1297 break; | |
1298 case CHARSET_TYPE_94X94: | |
1299 chars = 94; dim = 2; | |
1300 break; | |
1301 case CHARSET_TYPE_96X96: | |
1302 chars = 96; dim = 2; | |
1303 break; | |
1304 default: | |
2500 | 1305 ABORT (); chars = 0; dim = 0; |
771 | 1306 } |
1307 | |
1308 charset = Fmake_charset (Qunbound, Qnil, | |
1309 nconc2 (list6 (Qfinal, make_char (final), | |
1310 Qchars, make_int (chars), | |
1311 Qdimension, make_int (dim)), | |
1312 list2 (Qdirection, | |
1313 dir == CHARSET_LEFT_TO_RIGHT ? | |
1314 Ql2r : Qr2l))); | |
1315 } | |
1316 | |
1317 return charset; | |
1318 } | |
1319 | |
1320 /* Parse one byte of an ISO2022 escape sequence. | |
1321 If the result is an invalid escape sequence, return 0 and | |
1322 do not change anything in STR. Otherwise, if the result is | |
1323 an incomplete escape sequence, update ISO2022.ESC and | |
1324 ISO2022.ESC_BYTES and return -1. Otherwise, update | |
1325 all the state variables (but not ISO2022.ESC_BYTES) and | |
1326 return 1. | |
1327 | |
1328 If CHECK_INVALID_CHARSETS is non-zero, check for designation | |
1329 or invocation of an invalid character set and treat that as | |
1330 an unrecognized escape sequence. | |
1331 | |
2367 | 1332 */ |
771 | 1333 |
1334 static int | |
1335 parse_iso2022_esc (Lisp_Object codesys, struct iso2022_coding_stream *iso, | |
1336 unsigned char c, unsigned int *flags, | |
1337 int check_invalid_charsets) | |
1338 { | |
1339 /* (1) If we're at the end of a designation sequence, CS is the | |
1340 charset being designated and REG is the register to designate | |
1341 it to. | |
1342 | |
1343 (2) If we're at the end of a locking-shift sequence, REG is | |
1344 the register to invoke and HALF (0 == left, 1 == right) is | |
1345 the half to invoke it into. | |
1346 | |
1347 (3) If we're at the end of a single-shift sequence, REG is | |
1348 the register to invoke. */ | |
1349 Lisp_Object cs = Qnil; | |
1350 int reg, half; | |
1351 | |
1352 /* NOTE: This code does goto's all over the fucking place. | |
1353 The reason for this is that we're basically implementing | |
1354 a state machine here, and hierarchical languages like C | |
1355 don't really provide a clean way of doing this. */ | |
1356 | |
1357 if (! (*flags & ISO_STATE_ESCAPE)) | |
1358 /* At beginning of escape sequence; we need to reset our | |
1359 escape-state variables. */ | |
1360 iso->esc = ISO_ESC_NOTHING; | |
1361 | |
1362 iso->output_literally = 0; | |
1363 iso->output_direction_sequence = 0; | |
1364 | |
1365 switch (iso->esc) | |
1366 { | |
1367 case ISO_ESC_NOTHING: | |
1368 iso->esc_bytes_index = 0; | |
1369 switch (c) | |
1370 { | |
1371 case ISO_CODE_ESC: /* Start escape sequence */ | |
1372 *flags |= ISO_STATE_ESCAPE; | |
1373 iso->esc = ISO_ESC; | |
1374 goto not_done; | |
1375 | |
1376 case ISO_CODE_CSI: /* ISO6429 (specifying directionality) */ | |
1377 *flags |= ISO_STATE_ESCAPE; | |
1378 iso->esc = ISO_ESC_5_11; | |
1379 goto not_done; | |
1380 | |
1381 case ISO_CODE_SO: /* locking shift 1 */ | |
1382 reg = 1; half = 0; | |
1383 goto locking_shift; | |
1384 case ISO_CODE_SI: /* locking shift 0 */ | |
1385 reg = 0; half = 0; | |
1386 goto locking_shift; | |
1387 | |
1388 case ISO_CODE_SS2: /* single shift */ | |
1389 reg = 2; | |
1390 goto single_shift; | |
1391 case ISO_CODE_SS3: /* single shift */ | |
1392 reg = 3; | |
1393 goto single_shift; | |
1394 | |
1395 default: /* Other control characters */ | |
1396 error: | |
1397 *flags &= ISO_STATE_LOCK; | |
1398 return 0; | |
1399 } | |
1400 | |
1401 case ISO_ESC: | |
3439 | 1402 |
1403 /* The only available ISO 2022 sequence in UTF-8 mode is ESC % @, to | |
1404 exit from it. If we see any other escape sequence, pass it through | |
1405 in the error handler. */ | |
1406 if (*flags & ISO_STATE_UTF_8 && '%' != c) | |
1407 { | |
1408 return 0; | |
1409 } | |
1410 | |
771 | 1411 switch (c) |
1412 { | |
1413 /**** single shift ****/ | |
1414 | |
1415 case 'N': /* single shift 2 */ | |
1416 reg = 2; | |
1417 goto single_shift; | |
1418 case 'O': /* single shift 3 */ | |
1419 reg = 3; | |
1420 goto single_shift; | |
1421 | |
1422 /**** locking shift ****/ | |
1423 | |
1424 case '~': /* locking shift 1 right */ | |
1425 reg = 1; half = 1; | |
1426 goto locking_shift; | |
1427 case 'n': /* locking shift 2 */ | |
1428 reg = 2; half = 0; | |
1429 goto locking_shift; | |
1430 case '}': /* locking shift 2 right */ | |
1431 reg = 2; half = 1; | |
1432 goto locking_shift; | |
1433 case 'o': /* locking shift 3 */ | |
1434 reg = 3; half = 0; | |
1435 goto locking_shift; | |
1436 case '|': /* locking shift 3 right */ | |
1437 reg = 3; half = 1; | |
1438 goto locking_shift; | |
1439 | |
1440 /**** composite ****/ | |
1441 | |
1442 #ifdef ENABLE_COMPOSITE_CHARS | |
1443 case '0': | |
1444 iso->esc = ISO_ESC_START_COMPOSITE; | |
1445 *flags = (*flags & ISO_STATE_LOCK) | | |
1446 ISO_STATE_COMPOSITE; | |
1447 return 1; | |
1448 | |
1449 case '1': | |
1450 iso->esc = ISO_ESC_END_COMPOSITE; | |
1451 *flags = (*flags & ISO_STATE_LOCK) & | |
1452 ~ISO_STATE_COMPOSITE; | |
1453 return 1; | |
1454 #else | |
1455 case '0': case '1': case '2': case '3': case '4': | |
1456 /* We simply return a flag indicating that some composite | |
1457 escape was seen. The caller will use the particular | |
1458 character to encode the appropriate "composite hack" | |
1459 character out of Vcharset_composite, so that we will | |
1460 preserve these values on output. */ | |
1461 iso->esc = ISO_ESC_START_COMPOSITE; | |
1462 *flags &= ISO_STATE_LOCK; | |
1463 return 1; | |
1464 #endif /* ENABLE_COMPOSITE_CHARS */ | |
1465 | |
1466 /**** directionality ****/ | |
1467 | |
1468 case '[': | |
1469 iso->esc = ISO_ESC_5_11; | |
1470 goto not_done; | |
1471 | |
1472 /**** designation ****/ | |
1473 | |
1474 case '$': /* multibyte charset prefix */ | |
1475 iso->esc = ISO_ESC_2_4; | |
1476 goto not_done; | |
1477 | |
3439 | 1478 case '%': /* Prefix to an escape to or from Unicode. */ |
1479 iso->esc = ISO_ESC_2_5; | |
1480 goto not_done; | |
1481 | |
771 | 1482 default: |
1483 if (0x28 <= c && c <= 0x2F) | |
1484 { | |
1485 iso->esc = (enum iso_esc_flag) (c - 0x28 + ISO_ESC_2_8); | |
1486 goto not_done; | |
1487 } | |
1488 | |
1489 /* This function is called with CODESYS equal to nil when | |
1490 doing coding-system detection. */ | |
1491 if (!NILP (codesys) | |
1492 && XCODING_SYSTEM_ISO2022_ESCAPE_QUOTED (codesys) | |
1493 && fit_to_be_escape_quoted (c)) | |
1494 { | |
1495 iso->esc = ISO_ESC_LITERAL; | |
1496 *flags &= ISO_STATE_LOCK; | |
1497 return 1; | |
1498 } | |
1499 | |
1500 /* bzzzt! */ | |
1501 goto error; | |
1502 } | |
1503 | |
3439 | 1504 /* ISO-IR 196 UTF-8 support. */ |
1505 case ISO_ESC_2_5: | |
1506 if ('G' == c) | |
1507 { | |
1508 /* Activate UTF-8 mode. */ | |
1509 *flags &= ISO_STATE_LOCK; | |
1510 *flags |= ISO_STATE_UTF_8; | |
1511 iso->esc = ISO_ESC_NOTHING; | |
1512 return 1; | |
1513 } | |
1514 else if ('@' == c) | |
1515 { | |
1516 /* Deactive UTF-8 mode. */ | |
1517 *flags &= ISO_STATE_LOCK; | |
1518 *flags &= ~(ISO_STATE_UTF_8); | |
1519 iso->esc = ISO_ESC_NOTHING; | |
1520 return 1; | |
1521 } | |
1522 else | |
1523 { | |
1524 /* Oops, we don't support the other UTF-? coding systems within | |
1525 ISO 2022, only in their own context. */ | |
1526 goto error; | |
1527 } | |
771 | 1528 /**** directionality ****/ |
1529 | |
1530 case ISO_ESC_5_11: /* ISO6429 direction control */ | |
1531 if (c == ']') | |
1532 { | |
1533 *flags &= (ISO_STATE_LOCK & ~ISO_STATE_R2L); | |
1534 goto directionality; | |
1535 } | |
1536 if (c == '0') iso->esc = ISO_ESC_5_11_0; | |
1537 else if (c == '1') iso->esc = ISO_ESC_5_11_1; | |
1538 else if (c == '2') iso->esc = ISO_ESC_5_11_2; | |
1539 else goto error; | |
1540 goto not_done; | |
1541 | |
1542 case ISO_ESC_5_11_0: | |
1543 if (c == ']') | |
1544 { | |
1545 *flags &= (ISO_STATE_LOCK & ~ISO_STATE_R2L); | |
1546 goto directionality; | |
1547 } | |
1548 goto error; | |
1549 | |
1550 case ISO_ESC_5_11_1: | |
1551 if (c == ']') | |
1552 { | |
1553 *flags = (ISO_STATE_LOCK & ~ISO_STATE_R2L); | |
1554 goto directionality; | |
1555 } | |
1556 goto error; | |
1557 | |
1558 case ISO_ESC_5_11_2: | |
1559 if (c == ']') | |
1560 { | |
1561 *flags = (*flags & ISO_STATE_LOCK) | ISO_STATE_R2L; | |
1562 goto directionality; | |
1563 } | |
1564 goto error; | |
1565 | |
1566 directionality: | |
1567 iso->esc = ISO_ESC_DIRECTIONALITY; | |
1568 /* Various junk here to attempt to preserve the direction sequences | |
1569 literally in the text if they would otherwise be swallowed due | |
1570 to invalid designations that don't show up as actual charset | |
1571 changes in the text. */ | |
1572 if (iso->invalid_switch_dir) | |
1573 { | |
1574 /* We already inserted a direction switch literally into the | |
1575 text. We assume (#### this may not be right) that the | |
1576 next direction switch is the one going the other way, | |
1577 and we need to output that literally as well. */ | |
1578 iso->output_literally = 1; | |
1579 iso->invalid_switch_dir = 0; | |
1580 } | |
1581 else | |
1582 { | |
1583 int jj; | |
1584 | |
1585 /* If we are in the thrall of an invalid designation, | |
1586 then stick the directionality sequence literally into the | |
1587 output stream so it ends up in the original text again. */ | |
1588 for (jj = 0; jj < 4; jj++) | |
1589 if (iso->invalid_designated[jj]) | |
1590 break; | |
1591 if (jj < 4) | |
1592 { | |
1593 iso->output_literally = 1; | |
1594 iso->invalid_switch_dir = 1; | |
1595 } | |
1596 else | |
1597 /* Indicate that we haven't yet seen a valid designation, | |
1598 so that if a switch-dir is directly followed by an | |
1599 invalid designation, both get inserted literally. */ | |
1600 iso->switched_dir_and_no_valid_charset_yet = 1; | |
1601 } | |
1602 return 1; | |
1603 | |
1604 | |
1605 /**** designation ****/ | |
1606 | |
1607 case ISO_ESC_2_4: | |
1608 if (0x28 <= c && c <= 0x2F) | |
1609 { | |
1610 iso->esc = (enum iso_esc_flag) (c - 0x28 + ISO_ESC_2_4_8); | |
1611 goto not_done; | |
1612 } | |
1613 if (0x40 <= c && c <= 0x42) | |
1614 { | |
1615 cs = charset_by_attributes_or_create_one (CHARSET_TYPE_94X94, c, | |
1616 *flags & ISO_STATE_R2L ? | |
1617 CHARSET_RIGHT_TO_LEFT : | |
1618 CHARSET_LEFT_TO_RIGHT); | |
1619 reg = 0; | |
1620 goto designated; | |
1621 } | |
1622 goto error; | |
1623 | |
1624 default: | |
1625 { | |
1626 int type = -1; | |
1627 | |
1628 if (iso->esc >= ISO_ESC_2_8 && | |
1629 iso->esc <= ISO_ESC_2_15) | |
1630 { | |
1631 type = ((iso->esc >= ISO_ESC_2_12) ? | |
1632 CHARSET_TYPE_96 : CHARSET_TYPE_94); | |
1633 reg = (iso->esc - ISO_ESC_2_8) & 3; | |
1634 } | |
1635 else if (iso->esc >= ISO_ESC_2_4_8 && | |
1636 iso->esc <= ISO_ESC_2_4_15) | |
1637 { | |
1638 type = ((iso->esc >= ISO_ESC_2_4_12) ? | |
1639 CHARSET_TYPE_96X96 : CHARSET_TYPE_94X94); | |
1640 reg = (iso->esc - ISO_ESC_2_4_8) & 3; | |
1641 } | |
1642 else | |
1643 { | |
1644 /* Can this ever be reached? -slb */ | |
2500 | 1645 ABORT (); |
771 | 1646 goto error; |
1647 } | |
1648 | |
1649 if (c < '0' || c > '~' || | |
1650 (c > 0x5F && (type == CHARSET_TYPE_94X94 || | |
1651 type == CHARSET_TYPE_96X96))) | |
1652 goto error; /* bad final byte */ | |
1653 | |
1654 cs = charset_by_attributes_or_create_one (type, c, | |
1655 *flags & ISO_STATE_R2L ? | |
1656 CHARSET_RIGHT_TO_LEFT : | |
1657 CHARSET_LEFT_TO_RIGHT); | |
1658 goto designated; | |
1659 } | |
1660 } | |
1661 | |
1662 not_done: | |
1663 iso->esc_bytes[iso->esc_bytes_index++] = (unsigned char) c; | |
1664 return -1; | |
1665 | |
1666 single_shift: | |
1667 if (check_invalid_charsets && !CHARSETP (iso->charset[reg])) | |
1668 /* can't invoke something that ain't there. */ | |
1669 goto error; | |
1670 iso->esc = ISO_ESC_SINGLE_SHIFT; | |
1671 *flags &= ISO_STATE_LOCK; | |
1672 if (reg == 2) | |
1673 *flags |= ISO_STATE_SS2; | |
1674 else | |
1675 *flags |= ISO_STATE_SS3; | |
1676 return 1; | |
1677 | |
1678 locking_shift: | |
1679 if (check_invalid_charsets && | |
1680 !CHARSETP (iso->charset[reg])) | |
1681 /* can't invoke something that ain't there. */ | |
1682 goto error; | |
1683 if (half) | |
1684 iso->register_right = reg; | |
1685 else | |
1686 iso->register_left = reg; | |
1687 *flags &= ISO_STATE_LOCK; | |
1688 iso->esc = ISO_ESC_LOCKING_SHIFT; | |
1689 return 1; | |
1690 | |
1691 designated: | |
1692 if (NILP (cs) && check_invalid_charsets) | |
1693 { | |
2500 | 1694 ABORT (); |
771 | 1695 /* #### This should never happen now that we automatically create |
1696 temporary charsets as necessary. We should probably remove | |
1697 this code. --ben */ | |
1698 iso->invalid_designated[reg] = 1; | |
1699 iso->charset[reg] = Vcharset_ascii; | |
1700 iso->esc = ISO_ESC_DESIGNATE; | |
1701 *flags &= ISO_STATE_LOCK; | |
1702 iso->output_literally = 1; | |
1703 if (iso->switched_dir_and_no_valid_charset_yet) | |
1704 { | |
1705 /* We encountered a switch-direction followed by an | |
1706 invalid designation. Ensure that the switch-direction | |
1707 gets outputted; otherwise it will probably get eaten | |
1708 when the text is written out again. */ | |
1709 iso->switched_dir_and_no_valid_charset_yet = 0; | |
1710 iso->output_direction_sequence = 1; | |
1711 /* And make sure that the switch-dir going the other | |
1712 way gets outputted, as well. */ | |
1713 iso->invalid_switch_dir = 1; | |
1714 } | |
1715 return 1; | |
1716 } | |
1717 /* This function is called with CODESYS equal to nil when | |
1718 doing coding-system detection. */ | |
1719 if (!NILP (codesys)) | |
1720 { | |
1721 charset_conversion_spec_dynarr *dyn = | |
1722 XCODING_SYSTEM_ISO2022_INPUT_CONV (codesys); | |
1723 | |
1724 if (dyn) | |
1725 { | |
1726 int i; | |
1727 | |
1728 for (i = 0; i < Dynarr_length (dyn); i++) | |
1729 { | |
1730 struct charset_conversion_spec *spec = Dynarr_atp (dyn, i); | |
1731 if (EQ (cs, spec->from_charset)) | |
1732 cs = spec->to_charset; | |
1733 } | |
1734 } | |
1735 } | |
1736 | |
1737 iso->charset[reg] = cs; | |
1738 iso->esc = ISO_ESC_DESIGNATE; | |
1739 *flags &= ISO_STATE_LOCK; | |
1740 if (iso->invalid_designated[reg]) | |
1741 { | |
1742 iso->invalid_designated[reg] = 0; | |
1743 iso->output_literally = 1; | |
1744 } | |
1745 if (iso->switched_dir_and_no_valid_charset_yet) | |
1746 iso->switched_dir_and_no_valid_charset_yet = 0; | |
1747 return 1; | |
1748 } | |
1749 | |
1750 /* If FLAGS is a null pointer or specifies right-to-left motion, | |
1751 output a switch-dir-to-left-to-right sequence to DST. | |
1752 Also update FLAGS if it is not a null pointer. | |
1753 If INTERNAL_P is set, we are outputting in internal format and | |
1754 need to handle the CSI differently. */ | |
1755 | |
1756 static void | |
1757 restore_left_to_right_direction (Lisp_Object codesys, | |
1758 unsigned_char_dynarr *dst, | |
1759 unsigned int *flags, | |
1760 int internal_p) | |
1761 { | |
1762 if (!flags || (*flags & ISO_STATE_R2L)) | |
1763 { | |
1764 if (XCODING_SYSTEM_ISO2022_SEVEN (codesys)) | |
1765 { | |
1766 Dynarr_add (dst, ISO_CODE_ESC); | |
1767 Dynarr_add (dst, '['); | |
1768 } | |
1769 else if (internal_p) | |
1770 DECODE_ADD_BINARY_CHAR (ISO_CODE_CSI, dst); | |
1771 else | |
1772 Dynarr_add (dst, ISO_CODE_CSI); | |
1773 Dynarr_add (dst, '0'); | |
1774 Dynarr_add (dst, ']'); | |
1775 if (flags) | |
1776 *flags &= ~ISO_STATE_R2L; | |
1777 } | |
1778 } | |
1779 | |
1780 /* If FLAGS is a null pointer or specifies a direction different from | |
1781 DIRECTION (which should be either CHARSET_RIGHT_TO_LEFT or | |
1782 CHARSET_LEFT_TO_RIGHT), output the appropriate switch-dir escape | |
1783 sequence to DST. Also update FLAGS if it is not a null pointer. | |
1784 If INTERNAL_P is set, we are outputting in internal format and | |
1785 need to handle the CSI differently. */ | |
1786 | |
1787 static void | |
1788 ensure_correct_direction (int direction, Lisp_Object codesys, | |
1789 unsigned_char_dynarr *dst, unsigned int *flags, | |
1790 int internal_p) | |
1791 { | |
1792 if ((!flags || (*flags & ISO_STATE_R2L)) && | |
1793 direction == CHARSET_LEFT_TO_RIGHT) | |
1794 restore_left_to_right_direction (codesys, dst, flags, internal_p); | |
1795 else if (!XCODING_SYSTEM_ISO2022_NO_ISO6429 (codesys) | |
1796 && (!flags || !(*flags & ISO_STATE_R2L)) && | |
1797 direction == CHARSET_RIGHT_TO_LEFT) | |
1798 { | |
1799 if (XCODING_SYSTEM_ISO2022_SEVEN (codesys)) | |
1800 { | |
1801 Dynarr_add (dst, ISO_CODE_ESC); | |
1802 Dynarr_add (dst, '['); | |
1803 } | |
1804 else if (internal_p) | |
1805 DECODE_ADD_BINARY_CHAR (ISO_CODE_CSI, dst); | |
1806 else | |
1807 Dynarr_add (dst, ISO_CODE_CSI); | |
1808 Dynarr_add (dst, '2'); | |
1809 Dynarr_add (dst, ']'); | |
1810 if (flags) | |
1811 *flags |= ISO_STATE_R2L; | |
1812 } | |
1813 } | |
1814 | |
4096 | 1815 /* Note that this name conflicts with a function in unicode.c. */ |
1816 static void | |
1817 decode_unicode_char (int ucs, unsigned_char_dynarr *dst) | |
1818 { | |
1819 Ibyte work[MAX_ICHAR_LEN]; | |
1820 int len; | |
1821 Lisp_Object chr; | |
1822 | |
1823 chr = Funicode_to_char(make_int(ucs), Qnil); | |
1824 assert (!NILP(chr)); | |
1825 len = set_itext_ichar (work, XCHAR(chr)); | |
1826 Dynarr_add_many (dst, work, len); | |
1827 } | |
1828 | |
1829 #define DECODE_ERROR_OCTET(octet, dst) \ | |
1830 decode_unicode_char ((octet) + UNICODE_ERROR_OCTET_RANGE_START, dst) | |
1831 | |
1832 static inline void | |
1833 indicate_invalid_utf_8 (unsigned char indicated_length, | |
1834 unsigned char counter, | |
1835 int ch, unsigned_char_dynarr *dst) | |
1836 { | |
1837 Binbyte stored = indicated_length - counter; | |
1838 Binbyte mask = "\x00\x00\xC0\xE0\xF0\xF8\xFC"[indicated_length]; | |
1839 | |
1840 while (stored > 0) | |
1841 { | |
1842 DECODE_ERROR_OCTET (((ch >> (6 * (stored - 1))) & 0x3f) | mask, | |
1843 dst); | |
1844 mask = 0x80, stored--; | |
1845 } | |
1846 } | |
1847 | |
771 | 1848 /* Convert ISO2022-format data to internal format. */ |
1849 | |
1850 static Bytecount | |
1851 iso2022_decode (struct coding_stream *str, const UExtbyte *src, | |
1852 unsigned_char_dynarr *dst, Bytecount n) | |
1853 { | |
1854 unsigned int ch = str->ch; | |
1855 #ifdef ENABLE_COMPOSITE_CHARS | |
1856 unsigned_char_dynarr *real_dst = dst; | |
1857 #endif | |
1858 struct iso2022_coding_stream *data = | |
1859 CODING_STREAM_TYPE_DATA (str, iso2022); | |
1860 unsigned int flags = data->flags; | |
1861 Bytecount orign = n; | |
1862 | |
1863 #ifdef ENABLE_COMPOSITE_CHARS | |
1864 if (flags & ISO_STATE_COMPOSITE) | |
1865 dst = data->composite_chars; | |
1866 #endif /* ENABLE_COMPOSITE_CHARS */ | |
1867 | |
1868 while (n--) | |
1869 { | |
1870 UExtbyte c = *src++; | |
1871 if (flags & ISO_STATE_ESCAPE) | |
1872 { /* Within ESC sequence */ | |
1873 int retval = parse_iso2022_esc (str->codesys, data, | |
1874 c, &flags, 1); | |
1875 | |
1876 if (retval) | |
1877 { | |
1878 switch (data->esc) | |
1879 { | |
1880 #ifdef ENABLE_COMPOSITE_CHARS | |
1881 case ISO_ESC_START_COMPOSITE: | |
1882 if (data->composite_chars) | |
1883 Dynarr_reset (data->composite_chars); | |
1884 else | |
1885 data->composite_chars = Dynarr_new (unsigned_char); | |
1886 dst = data->composite_chars; | |
1887 break; | |
1888 case ISO_ESC_END_COMPOSITE: | |
1889 { | |
867 | 1890 Ibyte comstr[MAX_ICHAR_LEN]; |
771 | 1891 Bytecount len; |
4967 | 1892 Ichar emch = lookup_composite_char (Dynarr_begin (dst), |
771 | 1893 Dynarr_length (dst)); |
1894 dst = real_dst; | |
867 | 1895 len = set_itext_ichar (comstr, emch); |
771 | 1896 Dynarr_add_many (dst, comstr, len); |
1897 break; | |
1898 } | |
1899 #else | |
1900 case ISO_ESC_START_COMPOSITE: | |
1901 { | |
867 | 1902 Ibyte comstr[MAX_ICHAR_LEN]; |
771 | 1903 Bytecount len; |
867 | 1904 Ichar emch = make_ichar (Vcharset_composite, c - '0' + ' ', |
771 | 1905 0); |
867 | 1906 len = set_itext_ichar (comstr, emch); |
771 | 1907 Dynarr_add_many (dst, comstr, len); |
1908 break; | |
1909 } | |
1910 #endif /* ENABLE_COMPOSITE_CHARS */ | |
1911 | |
1912 case ISO_ESC_LITERAL: | |
1913 DECODE_ADD_BINARY_CHAR (c, dst); | |
1914 break; | |
1915 | |
1916 default: | |
1917 /* Everything else handled already */ | |
1918 break; | |
1919 } | |
1920 } | |
1921 | |
1922 /* Attempted error recovery. */ | |
1923 if (data->output_direction_sequence) | |
1924 ensure_correct_direction (flags & ISO_STATE_R2L ? | |
1925 CHARSET_RIGHT_TO_LEFT : | |
1926 CHARSET_LEFT_TO_RIGHT, | |
1927 str->codesys, dst, 0, 1); | |
1928 /* More error recovery. */ | |
1929 if (!retval || data->output_literally) | |
1930 { | |
1931 /* Output the (possibly invalid) sequence */ | |
1932 int i; | |
1933 for (i = 0; i < data->esc_bytes_index; i++) | |
1934 DECODE_ADD_BINARY_CHAR (data->esc_bytes[i], dst); | |
1935 flags &= ISO_STATE_LOCK; | |
1936 if (!retval) | |
1937 n++, src--;/* Repeat the loop with the same character. */ | |
1938 else | |
1939 { | |
1940 /* No sense in reprocessing the final byte of the | |
1941 escape sequence; it could mess things up anyway. | |
1942 Just add it now. */ | |
1943 DECODE_ADD_BINARY_CHAR (c, dst); | |
1944 } | |
1945 } | |
1946 ch = 0; | |
1947 } | |
3439 | 1948 else if (flags & ISO_STATE_UTF_8) |
1949 { | |
1950 unsigned char counter = data->counter; | |
4096 | 1951 unsigned char indicated_length = data->indicated_length; |
3439 | 1952 |
1953 if (ISO_CODE_ESC == c) | |
1954 { | |
1955 /* Allow the escape sequence parser to end the UTF-8 state. */ | |
1956 flags |= ISO_STATE_ESCAPE; | |
1957 data->esc = ISO_ESC; | |
1958 data->esc_bytes_index = 1; | |
1959 continue; | |
1960 } | |
1961 | |
4096 | 1962 if (0 == counter) |
1963 { | |
1964 if (0 == (c & 0x80)) | |
1965 { | |
1966 /* ASCII. */ | |
1967 decode_unicode_char (c, dst); | |
1968 } | |
1969 else if (0 == (c & 0x40)) | |
1970 { | |
1971 /* Highest bit set, second highest not--there's | |
1972 something wrong. */ | |
1973 DECODE_ERROR_OCTET (c, dst); | |
1974 } | |
1975 else if (0 == (c & 0x20)) | |
1976 { | |
1977 ch = c & 0x1f; | |
1978 counter = 1; | |
1979 indicated_length = 2; | |
1980 } | |
1981 else if (0 == (c & 0x10)) | |
1982 { | |
1983 ch = c & 0x0f; | |
1984 counter = 2; | |
1985 indicated_length = 3; | |
1986 } | |
1987 else if (0 == (c & 0x08)) | |
1988 { | |
1989 ch = c & 0x0f; | |
1990 counter = 3; | |
1991 indicated_length = 4; | |
1992 } | |
1993 /* We support lengths longer than 4 here, since we want to | |
1994 represent UTF-8 error chars as distinct from the | |
1995 corresponding ISO 8859-1 characters in escape-quoted. | |
1996 | |
1997 However, we can't differentiate UTF-8 error chars as | |
1998 written to disk, and UTF-8 errors in escape-quoted. This | |
1999 is not a big problem; | |
2000 non-Unicode-chars-encoded-as-UTF-8-in-ISO-2022 is not | |
2001 deployed, in practice, so if such a sequence of octets | |
2002 occurs, XEmacs generated it. */ | |
2003 else if (0 == (c & 0x04)) | |
2004 { | |
2005 ch = c & 0x03; | |
2006 counter = 4; | |
2007 indicated_length = 5; | |
2008 } | |
2009 else if (0 == (c & 0x02)) | |
2010 { | |
2011 ch = c & 0x01; | |
2012 counter = 5; | |
2013 indicated_length = 6; | |
2014 } | |
2015 else | |
2016 { | |
2017 /* #xFF is not a valid leading byte in any form of | |
2018 UTF-8. */ | |
2019 DECODE_ERROR_OCTET (c, dst); | |
2020 | |
2021 } | |
2022 } | |
2023 else | |
2024 { | |
2025 /* counter != 0 */ | |
2026 if ((0 == (c & 0x80)) || (0 != (c & 0x40))) | |
2027 { | |
2028 indicate_invalid_utf_8(indicated_length, | |
2029 counter, | |
2030 ch, dst); | |
2031 if (c & 0x80) | |
2032 { | |
2033 DECODE_ERROR_OCTET (c, dst); | |
2034 } | |
2035 else | |
2036 { | |
2037 /* The character just read is ASCII. Treat it as | |
2038 such. */ | |
2039 decode_unicode_char (c, dst); | |
2040 } | |
2041 ch = 0; | |
2042 counter = 0; | |
2043 } | |
2044 else | |
2045 { | |
2046 ch = (ch << 6) | (c & 0x3f); | |
2047 counter--; | |
2048 | |
2049 /* Just processed the final byte. Emit the character. */ | |
2050 if (!counter) | |
2051 { | |
2052 /* Don't accept over-long sequences, or surrogates. */ | |
2053 if ((ch < 0x80) || | |
2054 ((ch < 0x800) && indicated_length > 2) || | |
2055 ((ch < 0x10000) && indicated_length > 3) || | |
2056 /* We accept values above #x110000 in | |
2057 escape-quoted, though not in UTF-8. */ | |
2058 /* (ch > 0x110000) || */ | |
2059 valid_utf_16_surrogate(ch)) | |
2060 { | |
2061 indicate_invalid_utf_8(indicated_length, | |
2062 counter, | |
2063 ch, dst); | |
2064 } | |
2065 else | |
2066 { | |
2067 decode_unicode_char (ch, dst); | |
2068 } | |
2069 ch = 0; | |
2070 } | |
2071 } | |
2072 } | |
2073 | |
2074 if (str->eof && ch) | |
2075 { | |
2076 DECODE_ERROR_OCTET (ch, dst); | |
2077 ch = 0; | |
2078 } | |
3439 | 2079 |
2080 data->counter = counter; | |
4096 | 2081 data->indicated_length = indicated_length; |
3439 | 2082 } |
826 | 2083 else if (byte_c0_p (c) || byte_c1_p (c)) |
771 | 2084 { /* Control characters */ |
2085 | |
2086 /***** Error-handling *****/ | |
2087 | |
2088 /* If we were in the middle of a character, dump out the | |
2089 partial character. */ | |
2090 DECODE_OUTPUT_PARTIAL_CHAR (ch, dst); | |
2091 | |
2092 /* If we just saw a single-shift character, dump it out. | |
2093 This may dump out the wrong sort of single-shift character, | |
2094 but least it will give an indication that something went | |
2095 wrong. */ | |
2096 if (flags & ISO_STATE_SS2) | |
2097 { | |
2098 DECODE_ADD_BINARY_CHAR (ISO_CODE_SS2, dst); | |
2099 flags &= ~ISO_STATE_SS2; | |
2100 } | |
2101 if (flags & ISO_STATE_SS3) | |
2102 { | |
2103 DECODE_ADD_BINARY_CHAR (ISO_CODE_SS3, dst); | |
2104 flags &= ~ISO_STATE_SS3; | |
2105 } | |
2106 | |
2107 /***** Now handle the control characters. *****/ | |
2108 | |
2109 flags &= ISO_STATE_LOCK; | |
2110 | |
2111 if (!parse_iso2022_esc (str->codesys, data, c, &flags, 1)) | |
2112 DECODE_ADD_BINARY_CHAR (c, dst); | |
2113 } | |
2114 else | |
2115 { /* Graphic characters */ | |
2116 Lisp_Object charset; | |
2117 int lb; | |
2118 int reg; | |
2119 | |
2120 /* Now determine the charset. */ | |
2121 reg = ((flags & ISO_STATE_SS2) ? 2 | |
2122 : (flags & ISO_STATE_SS3) ? 3 | |
826 | 2123 : !byte_ascii_p (c) ? data->register_right |
771 | 2124 : data->register_left); |
2125 charset = data->charset[reg]; | |
2126 | |
2127 /* Error checking: */ | |
2128 if (! CHARSETP (charset) | |
2129 || data->invalid_designated[reg] | |
2130 || (((c & 0x7F) == ' ' || (c & 0x7F) == ISO_CODE_DEL) | |
2131 && XCHARSET_CHARS (charset) == 94)) | |
2132 /* Mrmph. We are trying to invoke a register that has no | |
2133 or an invalid charset in it, or trying to add a character | |
2134 outside the range of the charset. Insert that char literally | |
2135 to preserve it for the output. */ | |
2136 { | |
2137 DECODE_OUTPUT_PARTIAL_CHAR (ch, dst); | |
2138 DECODE_ADD_BINARY_CHAR (c, dst); | |
2139 } | |
2140 | |
2141 else | |
2142 { | |
2143 /* Things are probably hunky-dorey. */ | |
2144 | |
2145 /* Fetch reverse charset, maybe. */ | |
2146 if (((flags & ISO_STATE_R2L) && | |
2147 XCHARSET_DIRECTION (charset) == CHARSET_LEFT_TO_RIGHT) | |
2148 || | |
2149 (!(flags & ISO_STATE_R2L) && | |
2150 XCHARSET_DIRECTION (charset) == CHARSET_RIGHT_TO_LEFT)) | |
2151 { | |
2152 Lisp_Object new_charset = | |
2153 XCHARSET_REVERSE_DIRECTION_CHARSET (charset); | |
2154 if (!NILP (new_charset)) | |
2155 charset = new_charset; | |
2156 } | |
2157 | |
2158 lb = XCHARSET_LEADING_BYTE (charset); | |
2159 switch (XCHARSET_REP_BYTES (charset)) | |
2160 { | |
2161 case 1: /* ASCII */ | |
2162 DECODE_OUTPUT_PARTIAL_CHAR (ch, dst); | |
2163 Dynarr_add (dst, c & 0x7F); | |
2164 break; | |
2165 | |
2166 case 2: /* one-byte official */ | |
2167 DECODE_OUTPUT_PARTIAL_CHAR (ch, dst); | |
2168 Dynarr_add (dst, lb); | |
2169 Dynarr_add (dst, c | 0x80); | |
2170 break; | |
2171 | |
2172 case 3: /* one-byte private or two-byte official */ | |
2173 if (XCHARSET_PRIVATE_P (charset)) | |
2174 { | |
2175 DECODE_OUTPUT_PARTIAL_CHAR (ch, dst); | |
2176 Dynarr_add (dst, PRE_LEADING_BYTE_PRIVATE_1); | |
2177 Dynarr_add (dst, lb); | |
2178 Dynarr_add (dst, c | 0x80); | |
2179 } | |
2180 else | |
2181 { | |
2182 if (ch) | |
2183 { | |
2184 Dynarr_add (dst, lb); | |
2185 Dynarr_add (dst, ch | 0x80); | |
2186 Dynarr_add (dst, c | 0x80); | |
2187 ch = 0; | |
2188 } | |
2189 else | |
2190 ch = c; | |
2191 } | |
2192 break; | |
2193 | |
2194 default: /* two-byte private */ | |
2195 if (ch) | |
2196 { | |
2197 Dynarr_add (dst, PRE_LEADING_BYTE_PRIVATE_2); | |
2198 Dynarr_add (dst, lb); | |
2199 Dynarr_add (dst, ch | 0x80); | |
2200 Dynarr_add (dst, c | 0x80); | |
2201 ch = 0; | |
2202 } | |
2203 else | |
2204 ch = c; | |
2205 } | |
2206 } | |
2207 | |
2208 if (!ch) | |
2209 flags &= ISO_STATE_LOCK; | |
2210 } | |
2211 | |
2212 } | |
2213 | |
2214 if (str->eof) | |
2215 DECODE_OUTPUT_PARTIAL_CHAR (ch, dst); | |
2216 | |
2217 data->flags = flags; | |
2218 str->ch = ch; | |
2219 return orign; | |
2220 } | |
2221 | |
2222 | |
2223 /***** ISO2022 encoder *****/ | |
2224 | |
2225 /* Designate CHARSET into register REG. */ | |
2226 | |
2227 static void | |
2228 iso2022_designate (Lisp_Object charset, int reg, | |
2229 struct coding_stream *str, unsigned_char_dynarr *dst) | |
2230 { | |
2231 static const char inter94[] = "()*+"; | |
2232 static const char inter96[] = ",-./"; | |
2233 int type; | |
2234 unsigned char final; | |
2235 struct iso2022_coding_stream *data = | |
2236 CODING_STREAM_TYPE_DATA (str, iso2022); | |
2237 Lisp_Object old_charset = data->charset[reg]; | |
2238 | |
2239 data->charset[reg] = charset; | |
2240 if (!CHARSETP (charset)) | |
2241 /* charset might be an initial nil or t. */ | |
2242 return; | |
2243 type = XCHARSET_TYPE (charset); | |
2244 final = XCHARSET_FINAL (charset); | |
2245 if (!data->force_charset_on_output[reg] && | |
2246 CHARSETP (old_charset) && | |
2247 XCHARSET_TYPE (old_charset) == type && | |
2248 XCHARSET_FINAL (old_charset) == final) | |
2249 return; | |
2250 | |
2251 data->force_charset_on_output[reg] = 0; | |
2252 | |
2253 { | |
2254 charset_conversion_spec_dynarr *dyn = | |
2255 XCODING_SYSTEM_ISO2022_OUTPUT_CONV (str->codesys); | |
2256 | |
2257 if (dyn) | |
2258 { | |
2259 int i; | |
2260 | |
2261 for (i = 0; i < Dynarr_length (dyn); i++) | |
2262 { | |
2263 struct charset_conversion_spec *spec = Dynarr_atp (dyn, i); | |
2264 if (EQ (charset, spec->from_charset)) | |
2265 charset = spec->to_charset; | |
2266 } | |
2267 } | |
2268 } | |
2269 | |
2270 Dynarr_add (dst, ISO_CODE_ESC); | |
3439 | 2271 |
771 | 2272 switch (type) |
2273 { | |
2274 case CHARSET_TYPE_94: | |
2275 Dynarr_add (dst, inter94[reg]); | |
2276 break; | |
2277 case CHARSET_TYPE_96: | |
2278 Dynarr_add (dst, inter96[reg]); | |
2279 break; | |
2280 case CHARSET_TYPE_94X94: | |
2281 Dynarr_add (dst, '$'); | |
2282 if (reg != 0 | |
2283 || !(XCODING_SYSTEM_ISO2022_SHORT (str->codesys)) | |
2284 || final < '@' | |
2285 || final > 'B') | |
2286 Dynarr_add (dst, inter94[reg]); | |
2287 break; | |
2288 case CHARSET_TYPE_96X96: | |
2289 Dynarr_add (dst, '$'); | |
2290 Dynarr_add (dst, inter96[reg]); | |
2291 break; | |
2292 } | |
2293 Dynarr_add (dst, final); | |
2294 } | |
2295 | |
2296 static void | |
2297 ensure_normal_shift (struct coding_stream *str, unsigned_char_dynarr *dst) | |
2298 { | |
2299 struct iso2022_coding_stream *data = | |
2300 CODING_STREAM_TYPE_DATA (str, iso2022); | |
2301 | |
2302 if (data->register_left != 0) | |
2303 { | |
2304 Dynarr_add (dst, ISO_CODE_SI); | |
2305 data->register_left = 0; | |
2306 } | |
2307 } | |
2308 | |
2309 static void | |
2310 ensure_shift_out (struct coding_stream *str, unsigned_char_dynarr *dst) | |
2311 { | |
2312 struct iso2022_coding_stream *data = | |
2313 CODING_STREAM_TYPE_DATA (str, iso2022); | |
2314 | |
2315 if (data->register_left != 1) | |
2316 { | |
2317 Dynarr_add (dst, ISO_CODE_SO); | |
2318 data->register_left = 1; | |
2319 } | |
2320 } | |
2321 | |
2322 /* Convert internally-formatted data to ISO2022 format. */ | |
2323 | |
2324 static Bytecount | |
867 | 2325 iso2022_encode (struct coding_stream *str, const Ibyte *src, |
771 | 2326 unsigned_char_dynarr *dst, Bytecount n) |
2327 { | |
2328 unsigned char charmask; | |
867 | 2329 Ibyte c; |
771 | 2330 unsigned char char_boundary; |
2331 unsigned int ch = str->ch; | |
2332 Lisp_Object codesys = str->codesys; | |
2333 int i; | |
2334 Lisp_Object charset; | |
2335 int half; | |
2336 struct iso2022_coding_stream *data = | |
2337 CODING_STREAM_TYPE_DATA (str, iso2022); | |
2338 unsigned int flags = data->flags; | |
2339 Bytecount orign = n; | |
2340 | |
2341 #ifdef ENABLE_COMPOSITE_CHARS | |
2342 /* flags for handling composite chars. We do a little switcheroo | |
2343 on the source while we're outputting the composite char. */ | |
2344 Bytecount saved_n = 0; | |
867 | 2345 const Ibyte *saved_src = NULL; |
771 | 2346 int in_composite = 0; |
2347 #endif /* ENABLE_COMPOSITE_CHARS */ | |
2348 | |
2349 char_boundary = data->current_char_boundary; | |
2350 charset = data->current_charset; | |
2351 half = data->current_half; | |
2352 | |
2353 #ifdef ENABLE_COMPOSITE_CHARS | |
2354 back_to_square_n: | |
2355 #endif | |
2356 while (n--) | |
2357 { | |
2358 c = *src++; | |
2359 | |
826 | 2360 if (byte_ascii_p (c)) |
771 | 2361 { /* Processing ASCII character */ |
2362 ch = 0; | |
2363 | |
3439 | 2364 if (flags & ISO_STATE_UTF_8) |
2365 { | |
2366 Dynarr_add (dst, ISO_CODE_ESC); | |
2367 Dynarr_add (dst, '%'); | |
2368 Dynarr_add (dst, '@'); | |
2369 flags &= ~(ISO_STATE_UTF_8); | |
2370 } | |
2371 | |
771 | 2372 restore_left_to_right_direction (codesys, dst, &flags, 0); |
2373 | |
2374 /* Make sure G0 contains ASCII */ | |
2375 if ((c > ' ' && c < ISO_CODE_DEL) || | |
2376 !XCODING_SYSTEM_ISO2022_NO_ASCII_CNTL (codesys)) | |
2377 { | |
2378 ensure_normal_shift (str, dst); | |
2379 iso2022_designate (Vcharset_ascii, 0, str, dst); | |
2380 } | |
2381 | |
2382 /* If necessary, restore everything to the default state | |
2383 at end-of-line */ | |
2384 if (!(XCODING_SYSTEM_ISO2022_NO_ASCII_EOL (codesys))) | |
2385 { | |
2386 /* NOTE: CRLF encoding happens *BEFORE* other encoding. | |
2387 Thus, even though we're working with internal-format | |
2388 data, there may be CR's or CRLF sequences representing | |
2389 newlines. */ | |
2390 if (c == '\r' || (c == '\n' && !(flags & ISO_STATE_CR))) | |
2391 { | |
2392 restore_left_to_right_direction (codesys, dst, &flags, 0); | |
2393 | |
2394 ensure_normal_shift (str, dst); | |
2395 | |
2396 for (i = 0; i < 4; i++) | |
2397 { | |
2398 Lisp_Object initial_charset = | |
2399 XCODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i); | |
2400 iso2022_designate (initial_charset, i, str, dst); | |
2401 } | |
2402 } | |
2403 if (c == '\r') | |
2404 flags |= ISO_STATE_CR; | |
2405 else | |
2406 flags &= ~ISO_STATE_CR; | |
2407 } | |
2408 | |
2409 if (XCODING_SYSTEM_ISO2022_ESCAPE_QUOTED (codesys) | |
2410 && fit_to_be_escape_quoted (c)) | |
2411 Dynarr_add (dst, ISO_CODE_ESC); | |
2412 Dynarr_add (dst, c); | |
2413 char_boundary = 1; | |
2414 } | |
867 | 2415 else if (ibyte_leading_byte_p (c) || ibyte_leading_byte_p (ch)) |
771 | 2416 { /* Processing Leading Byte */ |
2417 ch = 0; | |
826 | 2418 charset = charset_by_leading_byte (c); |
2419 if (leading_byte_prefix_p (c)) | |
3439 | 2420 { |
2421 ch = c; | |
2422 } | |
2423 else if (XCHARSET_ENCODE_AS_UTF_8 (charset)) | |
2424 { | |
2425 assert (!EQ (charset, Vcharset_control_1) | |
2426 && !EQ (charset, Vcharset_composite)); | |
2427 | |
2428 /* If the character set is to be encoded as UTF-8, the escape | |
2429 is always the same. */ | |
2430 if (!(flags & ISO_STATE_UTF_8)) | |
2431 { | |
2432 Dynarr_add (dst, ISO_CODE_ESC); | |
2433 Dynarr_add (dst, '%'); | |
2434 Dynarr_add (dst, 'G'); | |
2435 flags |= ISO_STATE_UTF_8; | |
2436 } | |
2437 } | |
771 | 2438 else if (!EQ (charset, Vcharset_control_1) |
2439 && !EQ (charset, Vcharset_composite)) | |
2440 { | |
2441 int reg; | |
2442 | |
3439 | 2443 /* End the UTF-8 state. */ |
2444 if (flags & ISO_STATE_UTF_8) | |
2445 { | |
2446 Dynarr_add (dst, ISO_CODE_ESC); | |
2447 Dynarr_add (dst, '%'); | |
2448 Dynarr_add (dst, '@'); | |
2449 flags &= ~(ISO_STATE_UTF_8); | |
2450 } | |
2451 | |
771 | 2452 ensure_correct_direction (XCHARSET_DIRECTION (charset), |
2453 codesys, dst, &flags, 0); | |
2454 | |
2455 /* Now determine which register to use. */ | |
2456 reg = -1; | |
2457 for (i = 0; i < 4; i++) | |
2458 { | |
2459 if (EQ (charset, data->charset[i]) || | |
2460 EQ (charset, | |
2461 XCODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i))) | |
2462 { | |
2463 reg = i; | |
2464 break; | |
2465 } | |
2466 } | |
2467 | |
2468 if (reg == -1) | |
2469 { | |
2470 if (XCHARSET_GRAPHIC (charset) != 0) | |
2471 { | |
2472 if (!NILP (data->charset[1]) && | |
2473 (!XCODING_SYSTEM_ISO2022_SEVEN (codesys) || | |
2474 XCODING_SYSTEM_ISO2022_LOCK_SHIFT (codesys))) | |
2475 reg = 1; | |
2476 else if (!NILP (data->charset[2])) | |
2477 reg = 2; | |
2478 else if (!NILP (data->charset[3])) | |
2479 reg = 3; | |
2480 else | |
2481 reg = 0; | |
2482 } | |
2483 else | |
2484 reg = 0; | |
2485 } | |
2486 | |
2487 iso2022_designate (charset, reg, str, dst); | |
2488 | |
2489 /* Now invoke that register. */ | |
2490 switch (reg) | |
2491 { | |
2492 case 0: | |
2493 ensure_normal_shift (str, dst); | |
2494 half = 0; | |
2495 break; | |
2496 | |
2497 case 1: | |
2498 if (XCODING_SYSTEM_ISO2022_SEVEN (codesys)) | |
2499 { | |
2500 ensure_shift_out (str, dst); | |
2501 half = 0; | |
2502 } | |
2503 else | |
2504 half = 1; | |
2505 break; | |
2506 | |
2507 case 2: | |
2508 if (XCODING_SYSTEM_ISO2022_SEVEN (str->codesys)) | |
2509 { | |
2510 Dynarr_add (dst, ISO_CODE_ESC); | |
2511 Dynarr_add (dst, 'N'); | |
2512 half = 0; | |
2513 } | |
2514 else | |
2515 { | |
2516 Dynarr_add (dst, ISO_CODE_SS2); | |
2517 half = 1; | |
2518 } | |
2519 break; | |
2520 | |
2521 case 3: | |
2522 if (XCODING_SYSTEM_ISO2022_SEVEN (str->codesys)) | |
2523 { | |
2524 Dynarr_add (dst, ISO_CODE_ESC); | |
2525 Dynarr_add (dst, 'O'); | |
2526 half = 0; | |
2527 } | |
2528 else | |
2529 { | |
2530 Dynarr_add (dst, ISO_CODE_SS3); | |
2531 half = 1; | |
2532 } | |
2533 break; | |
2534 | |
2535 default: | |
2500 | 2536 ABORT (); |
771 | 2537 } |
2538 } | |
2539 char_boundary = 0; | |
2540 } | |
2541 else | |
2542 { /* Processing Non-ASCII character */ | |
2543 charmask = (half == 0 ? 0x7F : 0xFF); | |
2544 char_boundary = 1; | |
2545 if (EQ (charset, Vcharset_control_1)) | |
2546 { | |
2547 if (XCODING_SYSTEM_ISO2022_ESCAPE_QUOTED (codesys) | |
2548 && fit_to_be_escape_quoted (c)) | |
2549 Dynarr_add (dst, ISO_CODE_ESC); | |
2550 /* you asked for it ... */ | |
2551 Dynarr_add (dst, c - 0x20); | |
2552 } | |
2553 #ifndef ENABLE_COMPOSITE_CHARS | |
2554 else if (EQ (charset, Vcharset_composite)) | |
2555 { | |
2556 if (c >= 160 || c <= 164) /* Someone might have stuck in | |
2557 something else */ | |
2558 { | |
2559 Dynarr_add (dst, ISO_CODE_ESC); | |
2560 Dynarr_add (dst, c - 160 + '0'); | |
2561 } | |
2562 } | |
2563 #endif | |
2564 else | |
2565 { | |
2566 switch (XCHARSET_REP_BYTES (charset)) | |
2567 { | |
2568 case 2: | |
3439 | 2569 dynarr_add_2022_one_dimension (charset, c, |
2570 charmask, dst); | |
771 | 2571 break; |
2572 case 3: | |
2573 if (XCHARSET_PRIVATE_P (charset)) | |
2574 { | |
3439 | 2575 dynarr_add_2022_one_dimension (charset, c, |
2576 charmask, dst); | |
771 | 2577 ch = 0; |
2578 } | |
2579 else if (ch) | |
2580 { | |
2581 #ifdef ENABLE_COMPOSITE_CHARS | |
2582 if (EQ (charset, Vcharset_composite)) | |
2583 { | |
3439 | 2584 /* #### Hasn't been written to handle composite |
2585 characters yet. */ | |
2586 assert(!XCHARSET_ENCODE_AS_UTF_8 (charset)) | |
771 | 2587 if (in_composite) |
2588 { | |
2589 /* #### Bother! We don't know how to | |
2590 handle this yet. */ | |
2591 Dynarr_add (dst, '~'); | |
2592 } | |
2593 else | |
2594 { | |
867 | 2595 Ichar emch = make_ichar (Vcharset_composite, |
771 | 2596 ch & 0x7F, c & 0x7F); |
2597 Lisp_Object lstr = composite_char_string (emch); | |
2598 saved_n = n; | |
2599 saved_src = src; | |
2600 in_composite = 1; | |
2601 src = XSTRING_DATA (lstr); | |
2602 n = XSTRING_LENGTH (lstr); | |
2603 Dynarr_add (dst, ISO_CODE_ESC); | |
2604 Dynarr_add (dst, '0'); /* start composing */ | |
2605 } | |
2606 } | |
2607 else | |
2608 #endif /* ENABLE_COMPOSITE_CHARS */ | |
2609 { | |
3439 | 2610 dynarr_add_2022_two_dimensions (charset, c, ch, |
2611 charmask, dst); | |
771 | 2612 } |
2613 ch = 0; | |
2614 } | |
2615 else | |
2616 { | |
2617 ch = c; | |
2618 char_boundary = 0; | |
2619 } | |
2620 break; | |
2621 case 4: | |
2622 if (ch) | |
2623 { | |
3439 | 2624 dynarr_add_2022_two_dimensions (charset, c, ch, |
2625 charmask, dst); | |
771 | 2626 ch = 0; |
2627 } | |
2628 else | |
2629 { | |
2630 ch = c; | |
2631 char_boundary = 0; | |
2632 } | |
2633 break; | |
2634 default: | |
2500 | 2635 ABORT (); |
771 | 2636 } |
2637 } | |
2638 } | |
2639 } | |
2640 | |
2641 #ifdef ENABLE_COMPOSITE_CHARS | |
2642 if (in_composite) | |
2643 { | |
2644 n = saved_n; | |
2645 src = saved_src; | |
2646 in_composite = 0; | |
2647 Dynarr_add (dst, ISO_CODE_ESC); | |
2648 Dynarr_add (dst, '1'); /* end composing */ | |
2649 goto back_to_square_n; /* Wheeeeeeeee ..... */ | |
2650 } | |
2651 #endif /* ENABLE_COMPOSITE_CHARS */ | |
2652 | |
2653 if (char_boundary && str->eof) | |
2654 { | |
2655 restore_left_to_right_direction (codesys, dst, &flags, 0); | |
2656 ensure_normal_shift (str, dst); | |
2657 for (i = 0; i < 4; i++) | |
2658 { | |
2659 Lisp_Object initial_charset = | |
2660 XCODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i); | |
2661 iso2022_designate (initial_charset, i, str, dst); | |
2662 } | |
2663 } | |
2664 | |
2665 data->flags = flags; | |
2666 str->ch = ch; | |
2667 data->current_char_boundary = char_boundary; | |
2668 data->current_charset = charset; | |
2669 data->current_half = half; | |
2670 | |
2671 /* Verbum caro factum est! */ | |
2672 return orign; | |
2673 } | |
2674 | |
2675 static Bytecount | |
2676 iso2022_convert (struct coding_stream *str, | |
2677 const UExtbyte *src, | |
2678 unsigned_char_dynarr *dst, Bytecount n) | |
2679 { | |
2680 if (str->direction == CODING_DECODE) | |
2681 return iso2022_decode (str, src, dst, n); | |
2682 else | |
2683 return iso2022_encode (str, src, dst, n); | |
2684 } | |
2685 | |
2686 static void | |
2687 iso2022_mark (Lisp_Object codesys) | |
2688 { | |
2689 int i; | |
2690 | |
2691 for (i = 0; i < 4; i++) | |
2692 mark_object (XCODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i)); | |
2693 if (XCODING_SYSTEM_ISO2022_INPUT_CONV (codesys)) | |
2694 { | |
2695 for (i = 0; | |
2696 i < Dynarr_length (XCODING_SYSTEM_ISO2022_INPUT_CONV (codesys)); | |
2697 i++) | |
2698 { | |
2699 struct charset_conversion_spec *ccs = | |
2700 Dynarr_atp (XCODING_SYSTEM_ISO2022_INPUT_CONV (codesys), i); | |
2701 mark_object (ccs->from_charset); | |
2702 mark_object (ccs->to_charset); | |
2703 } | |
2704 } | |
2705 if (XCODING_SYSTEM_ISO2022_OUTPUT_CONV (codesys)) | |
2706 { | |
2707 for (i = 0; | |
2708 i < Dynarr_length (XCODING_SYSTEM_ISO2022_OUTPUT_CONV (codesys)); | |
2709 i++) | |
2710 { | |
2711 struct charset_conversion_spec *ccs = | |
2712 Dynarr_atp (XCODING_SYSTEM_ISO2022_OUTPUT_CONV (codesys), i); | |
2713 mark_object (ccs->from_charset); | |
2714 mark_object (ccs->to_charset); | |
2715 } | |
2716 } | |
2717 } | |
2718 | |
2719 static void | |
2720 iso2022_finalize (Lisp_Object cs) | |
2721 { | |
2722 if (XCODING_SYSTEM_ISO2022_INPUT_CONV (cs)) | |
2723 { | |
2724 Dynarr_free (XCODING_SYSTEM_ISO2022_INPUT_CONV (cs)); | |
2725 XCODING_SYSTEM_ISO2022_INPUT_CONV (cs) = 0; | |
2726 } | |
2727 if (XCODING_SYSTEM_ISO2022_OUTPUT_CONV (cs)) | |
2728 { | |
2729 Dynarr_free (XCODING_SYSTEM_ISO2022_OUTPUT_CONV (cs)); | |
2730 XCODING_SYSTEM_ISO2022_OUTPUT_CONV (cs) = 0; | |
2731 } | |
2732 } | |
2733 | |
2734 /* Given a list of charset conversion specs as specified in a Lisp | |
2735 program, parse it into STORE_HERE. */ | |
2736 | |
2737 static void | |
2738 parse_charset_conversion_specs (charset_conversion_spec_dynarr *store_here, | |
2739 Lisp_Object spec_list) | |
2740 { | |
2367 | 2741 EXTERNAL_LIST_LOOP_2 (car, spec_list) |
771 | 2742 { |
2743 Lisp_Object from, to; | |
2744 struct charset_conversion_spec spec; | |
2745 | |
2746 if (!CONSP (car) || !CONSP (XCDR (car)) || !NILP (XCDR (XCDR (car)))) | |
2747 invalid_argument ("Invalid charset conversion spec", car); | |
2748 from = Fget_charset (XCAR (car)); | |
2749 to = Fget_charset (XCAR (XCDR (car))); | |
2750 if (XCHARSET_TYPE (from) != XCHARSET_TYPE (to)) | |
2751 invalid_operation_2 | |
2752 ("Attempted conversion between different charset types", | |
2753 from, to); | |
2754 spec.from_charset = from; | |
2755 spec.to_charset = to; | |
2756 | |
2757 Dynarr_add (store_here, spec); | |
2758 } | |
2759 } | |
2760 | |
2761 /* Given a dynarr LOAD_HERE of internally-stored charset conversion | |
2762 specs, return the equivalent as the Lisp programmer would see it. | |
2763 | |
2764 If LOAD_HERE is 0, return Qnil. */ | |
2765 | |
2766 static Lisp_Object | |
2767 unparse_charset_conversion_specs (charset_conversion_spec_dynarr *load_here, | |
2768 int names) | |
2769 { | |
2770 int i; | |
2771 Lisp_Object result; | |
2772 | |
2773 if (!load_here) | |
2774 return Qnil; | |
2775 for (i = 0, result = Qnil; i < Dynarr_length (load_here); i++) | |
2776 { | |
2777 struct charset_conversion_spec *ccs = Dynarr_atp (load_here, i); | |
2778 if (names) | |
2779 result = Fcons (list2 (XCHARSET_NAME (ccs->from_charset), | |
2780 XCHARSET_NAME (ccs->to_charset)), result); | |
2781 else | |
2782 result = Fcons (list2 (ccs->from_charset, ccs->to_charset), result); | |
2783 } | |
2784 | |
2785 return Fnreverse (result); | |
2786 } | |
2787 | |
2788 static int | |
2789 iso2022_putprop (Lisp_Object codesys, | |
2790 Lisp_Object key, | |
2791 Lisp_Object value) | |
2792 { | |
2793 #define FROB_INITIAL_CHARSET(charset_num) \ | |
2794 XCODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, charset_num) = \ | |
2795 ((EQ (value, Qt) || EQ (value, Qnil)) ? value : Fget_charset (value)) | |
2796 | |
2797 if (EQ (key, Qcharset_g0)) FROB_INITIAL_CHARSET (0); | |
2798 else if (EQ (key, Qcharset_g1)) FROB_INITIAL_CHARSET (1); | |
2799 else if (EQ (key, Qcharset_g2)) FROB_INITIAL_CHARSET (2); | |
2800 else if (EQ (key, Qcharset_g3)) FROB_INITIAL_CHARSET (3); | |
2801 | |
2802 #define FROB_FORCE_CHARSET(charset_num) \ | |
2803 XCODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT (codesys, charset_num) = \ | |
2804 !NILP (value) | |
2805 | |
2806 else if (EQ (key, Qforce_g0_on_output)) FROB_FORCE_CHARSET (0); | |
2807 else if (EQ (key, Qforce_g1_on_output)) FROB_FORCE_CHARSET (1); | |
2808 else if (EQ (key, Qforce_g2_on_output)) FROB_FORCE_CHARSET (2); | |
2809 else if (EQ (key, Qforce_g3_on_output)) FROB_FORCE_CHARSET (3); | |
2810 | |
2811 #define FROB_BOOLEAN_PROPERTY(prop) \ | |
2812 XCODING_SYSTEM_ISO2022_##prop (codesys) = !NILP (value) | |
2813 | |
2814 else if (EQ (key, Qshort)) FROB_BOOLEAN_PROPERTY (SHORT); | |
2815 else if (EQ (key, Qno_ascii_eol)) FROB_BOOLEAN_PROPERTY (NO_ASCII_EOL); | |
2816 else if (EQ (key, Qno_ascii_cntl)) FROB_BOOLEAN_PROPERTY (NO_ASCII_CNTL); | |
2817 else if (EQ (key, Qseven)) FROB_BOOLEAN_PROPERTY (SEVEN); | |
2818 else if (EQ (key, Qlock_shift)) FROB_BOOLEAN_PROPERTY (LOCK_SHIFT); | |
2819 else if (EQ (key, Qno_iso6429)) FROB_BOOLEAN_PROPERTY (NO_ISO6429); | |
2820 else if (EQ (key, Qescape_quoted)) FROB_BOOLEAN_PROPERTY (ESCAPE_QUOTED); | |
2821 | |
2822 else if (EQ (key, Qinput_charset_conversion)) | |
2823 { | |
2824 XCODING_SYSTEM_ISO2022_INPUT_CONV (codesys) = | |
2825 Dynarr_new (charset_conversion_spec); | |
2826 parse_charset_conversion_specs | |
2827 (XCODING_SYSTEM_ISO2022_INPUT_CONV (codesys), value); | |
2828 } | |
2829 else if (EQ (key, Qoutput_charset_conversion)) | |
2830 { | |
2831 XCODING_SYSTEM_ISO2022_OUTPUT_CONV (codesys) = | |
2832 Dynarr_new (charset_conversion_spec); | |
2833 parse_charset_conversion_specs | |
2834 (XCODING_SYSTEM_ISO2022_OUTPUT_CONV (codesys), value); | |
2835 } | |
2836 else | |
2837 return 0; | |
2838 | |
2839 return 1; | |
2840 } | |
2841 | |
2842 static void | |
2286 | 2843 iso2022_finalize_coding_stream ( |
2844 #ifdef ENABLE_COMPOSITE_CHARS | |
2845 struct coding_stream *str | |
2846 #else | |
2847 struct coding_stream *UNUSED (str) | |
2848 #endif | |
2849 ) | |
771 | 2850 { |
2851 #ifdef ENABLE_COMPOSITE_CHARS | |
2852 struct iso2022_coding_stream *data = | |
2853 CODING_STREAM_TYPE_DATA (str, iso2022); | |
2854 | |
2855 if (data->composite_chars) | |
2856 Dynarr_free (data->composite_chars); | |
2857 #endif | |
2858 } | |
2859 | |
2860 static void | |
2861 iso2022_init (Lisp_Object codesys) | |
2862 { | |
2863 int i; | |
2864 for (i = 0; i < 4; i++) | |
2865 XCODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i) = Qnil; | |
2866 } | |
2867 | |
2868 static Lisp_Object | |
2869 coding_system_charset (Lisp_Object coding_system, int gnum) | |
2870 { | |
2871 Lisp_Object cs | |
2872 = XCODING_SYSTEM_ISO2022_INITIAL_CHARSET (coding_system, gnum); | |
2873 | |
2874 return CHARSETP (cs) ? XCHARSET_NAME (cs) : Qnil; | |
2875 } | |
2876 | |
2877 static Lisp_Object | |
2878 iso2022_getprop (Lisp_Object coding_system, Lisp_Object prop) | |
2879 { | |
2880 if (EQ (prop, Qcharset_g0)) | |
2881 return coding_system_charset (coding_system, 0); | |
2882 else if (EQ (prop, Qcharset_g1)) | |
2883 return coding_system_charset (coding_system, 1); | |
2884 else if (EQ (prop, Qcharset_g2)) | |
2885 return coding_system_charset (coding_system, 2); | |
2886 else if (EQ (prop, Qcharset_g3)) | |
2887 return coding_system_charset (coding_system, 3); | |
2888 | |
2889 #define FORCE_CHARSET(charset_num) \ | |
2890 (XCODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT \ | |
2891 (coding_system, charset_num) ? Qt : Qnil) | |
2892 | |
2893 else if (EQ (prop, Qforce_g0_on_output)) | |
2894 return FORCE_CHARSET (0); | |
2895 else if (EQ (prop, Qforce_g1_on_output)) | |
2896 return FORCE_CHARSET (1); | |
2897 else if (EQ (prop, Qforce_g2_on_output)) | |
2898 return FORCE_CHARSET (2); | |
2899 else if (EQ (prop, Qforce_g3_on_output)) | |
2900 return FORCE_CHARSET (3); | |
2901 | |
2902 #define LISP_BOOLEAN(prop) \ | |
2903 (XCODING_SYSTEM_ISO2022_##prop (coding_system) ? Qt : Qnil) | |
2904 | |
2905 else if (EQ (prop, Qshort)) return LISP_BOOLEAN (SHORT); | |
2906 else if (EQ (prop, Qno_ascii_eol)) return LISP_BOOLEAN (NO_ASCII_EOL); | |
2907 else if (EQ (prop, Qno_ascii_cntl)) return LISP_BOOLEAN (NO_ASCII_CNTL); | |
2908 else if (EQ (prop, Qseven)) return LISP_BOOLEAN (SEVEN); | |
2909 else if (EQ (prop, Qlock_shift)) return LISP_BOOLEAN (LOCK_SHIFT); | |
2910 else if (EQ (prop, Qno_iso6429)) return LISP_BOOLEAN (NO_ISO6429); | |
2911 else if (EQ (prop, Qescape_quoted)) return LISP_BOOLEAN (ESCAPE_QUOTED); | |
2912 | |
2913 else if (EQ (prop, Qinput_charset_conversion)) | |
2914 return | |
2915 unparse_charset_conversion_specs | |
2916 (XCODING_SYSTEM_ISO2022_INPUT_CONV (coding_system), 0); | |
2917 else if (EQ (prop, Qoutput_charset_conversion)) | |
2918 return | |
2919 unparse_charset_conversion_specs | |
2920 (XCODING_SYSTEM_ISO2022_OUTPUT_CONV (coding_system), 0); | |
2921 else | |
2922 return Qunbound; | |
2923 } | |
2924 | |
2925 static void | |
2286 | 2926 iso2022_print (Lisp_Object cs, Lisp_Object printcharfun, |
2927 int UNUSED (escapeflag)) | |
771 | 2928 { |
2929 int i; | |
2930 | |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4745
diff
changeset
|
2931 write_ascstring (printcharfun, "("); |
771 | 2932 for (i = 0; i < 4; i++) |
2933 { | |
2934 Lisp_Object charset = coding_system_charset (cs, i); | |
2935 if (i > 0) | |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4745
diff
changeset
|
2936 write_ascstring (printcharfun, ", "); |
771 | 2937 write_fmt_string (printcharfun, "g%d=", i); |
800 | 2938 print_internal (CHARSETP (charset) ? XCHARSET_NAME (charset) : charset, printcharfun, 0); |
771 | 2939 if (XCODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT (cs, i)) |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4745
diff
changeset
|
2940 write_ascstring (printcharfun, "(force)"); |
771 | 2941 } |
2942 | |
3084 | 2943 #define FROB(prop) \ |
2944 if (!NILP (iso2022_getprop (cs, prop))) \ | |
2945 { \ | |
2946 write_fmt_string_lisp (printcharfun, ", %s", 1, prop); \ | |
771 | 2947 } |
2948 | |
2949 FROB (Qshort); | |
2950 FROB (Qno_ascii_eol); | |
2951 FROB (Qno_ascii_cntl); | |
2952 FROB (Qseven); | |
2953 FROB (Qlock_shift); | |
2954 FROB (Qno_iso6429); | |
2955 FROB (Qescape_quoted); | |
2956 | |
2957 { | |
2958 Lisp_Object val = | |
2959 unparse_charset_conversion_specs | |
2960 (XCODING_SYSTEM_ISO2022_INPUT_CONV (cs), 1); | |
2961 if (!NILP (val)) | |
2962 { | |
800 | 2963 write_fmt_string_lisp (printcharfun, ", input-charset-conversion=%s", 1, val); |
771 | 2964 } |
2965 val = | |
2966 unparse_charset_conversion_specs | |
2967 (XCODING_SYSTEM_ISO2022_OUTPUT_CONV (cs), 1); | |
2968 if (!NILP (val)) | |
2969 { | |
800 | 2970 write_fmt_string_lisp (printcharfun, ", output-charset-conversion=%s", 1, val); |
771 | 2971 } |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4745
diff
changeset
|
2972 write_ascstring (printcharfun, ")"); |
771 | 2973 } |
2974 } | |
2975 | |
2976 | |
2977 /************************************************************************/ | |
2978 /* ISO2022 detector */ | |
2979 /************************************************************************/ | |
2980 | |
2981 DEFINE_DETECTOR (iso2022); | |
2982 /* ISO2022 system using only seven-bit bytes, no locking shift */ | |
2983 DEFINE_DETECTOR_CATEGORY (iso2022, iso_7); | |
2984 /* ISO2022 system using eight-bit bytes, no locking shift, no single shift, | |
2985 using designation to switch charsets */ | |
2986 DEFINE_DETECTOR_CATEGORY (iso2022, iso_8_designate); | |
2987 /* ISO2022 system using eight-bit bytes, no locking shift, no designation | |
2988 sequences, one-dimension characters in the upper half. */ | |
2989 DEFINE_DETECTOR_CATEGORY (iso2022, iso_8_1); | |
2990 /* ISO2022 system using eight-bit bytes, no locking shift, no designation | |
2991 sequences, two-dimension characters in the upper half. */ | |
2992 DEFINE_DETECTOR_CATEGORY (iso2022, iso_8_2); | |
2993 /* ISO2022 system using locking shift */ | |
2994 DEFINE_DETECTOR_CATEGORY (iso2022, iso_lock_shift); | |
2995 | |
2996 struct iso2022_detector | |
2997 { | |
2998 int initted; | |
2999 struct iso2022_coding_stream *iso; | |
3000 unsigned int flags; | |
3001 | |
3002 /* for keeping temporary track of high-byte groups */ | |
3003 int high_byte_count; | |
3004 unsigned int saw_single_shift_just_now:1; | |
3005 | |
3006 /* running state; we set the likelihoods at the end */ | |
3007 unsigned int seen_high_byte:1; | |
3008 unsigned int seen_single_shift:1; | |
3009 unsigned int seen_locking_shift:1; | |
3010 unsigned int seen_designate:1; | |
3011 unsigned int bad_single_byte_sequences; | |
3012 unsigned int bad_multibyte_escape_sequences; | |
3013 unsigned int good_multibyte_escape_sequences; | |
3014 int even_high_byte_groups; | |
985 | 3015 int longest_even_high_byte; |
771 | 3016 int odd_high_byte_groups; |
3017 }; | |
3018 | |
3019 static void | |
3020 iso2022_detect (struct detection_state *st, const UExtbyte *src, | |
3021 Bytecount n) | |
3022 { | |
3023 Bytecount orign = n; | |
3024 struct iso2022_detector *data = DETECTION_STATE_DATA (st, iso2022); | |
3025 | |
3026 /* #### There are serious deficiencies in the recognition mechanism | |
3027 here. This needs to be much smarter if it's going to cut it. | |
3028 The sequence "\xff\x0f" is currently detected as LOCK_SHIFT while | |
3029 it should be detected as Latin-1. | |
3030 All the ISO2022 stuff in this file should be synced up with the | |
3031 code from FSF Emacs-21.0, in which Mule should be more or less stable. | |
3032 Perhaps we should wait till R2L works in FSF Emacs? */ | |
3033 | |
3034 /* We keep track of running state on our own, and set the categories at the | |
3035 end; that way we can reflect the correct state each time we finish, but | |
3036 not get confused by those results the next time around. */ | |
3037 | |
3038 if (!data->initted) | |
3039 { | |
3040 xzero (*data); | |
3041 data->iso = xnew_and_zero (struct iso2022_coding_stream); | |
3042 reset_iso2022_decode (Qnil, data->iso); | |
3043 data->initted = 1; | |
3044 } | |
3045 | |
3046 while (n--) | |
3047 { | |
3048 UExtbyte c = *src++; | |
3049 if (c >= 0x80) | |
3050 data->seen_high_byte = 1; | |
3051 if (c >= 0xA0) | |
3052 data->high_byte_count++; | |
3053 else | |
3054 { | |
3055 if (data->high_byte_count && | |
3056 !data->saw_single_shift_just_now) | |
3057 { | |
3058 if (data->high_byte_count & 1) | |
3059 data->odd_high_byte_groups++; | |
3060 else | |
985 | 3061 { |
3062 data->even_high_byte_groups++; | |
3063 if (data->longest_even_high_byte < data->high_byte_count) | |
3064 data->longest_even_high_byte = data->high_byte_count; | |
3065 } | |
771 | 3066 } |
3067 data->high_byte_count = 0; | |
3068 data->saw_single_shift_just_now = 0; | |
3069 } | |
3070 if (!(data->flags & ISO_STATE_ESCAPE) | |
826 | 3071 && (byte_c0_p (c) || byte_c1_p (c))) |
771 | 3072 { /* control chars */ |
3073 switch (c) | |
3074 { | |
3075 /* Allow and ignore control characters that you might | |
3076 reasonably see in a text file */ | |
3077 case '\r': | |
3078 case '\n': | |
3079 case '\t': | |
3080 case 7: /* bell */ | |
3081 case 8: /* backspace */ | |
3082 case 11: /* vertical tab */ | |
3083 case 12: /* form feed */ | |
3084 case 26: /* MS-DOS C-z junk */ | |
3085 case 31: /* '^_' -- for info */ | |
3086 goto label_continue_loop; | |
3087 | |
3088 default: | |
3089 break; | |
3090 } | |
3091 } | |
3092 | |
826 | 3093 if ((data->flags & ISO_STATE_ESCAPE) || byte_c0_p (c) |
3094 || byte_c1_p (c)) | |
771 | 3095 { |
3096 switch (parse_iso2022_esc (Qnil, data->iso, c, | |
3097 &data->flags, 0)) | |
3098 { | |
3099 case 1: /* done */ | |
3100 if (data->iso->esc_bytes_index > 0) | |
3101 data->good_multibyte_escape_sequences++; | |
3102 switch (data->iso->esc) | |
3103 { | |
3104 case ISO_ESC_DESIGNATE: | |
3105 data->seen_designate = 1; | |
3106 break; | |
3107 case ISO_ESC_LOCKING_SHIFT: | |
3108 data->seen_locking_shift = 1; | |
3109 break; | |
3110 case ISO_ESC_SINGLE_SHIFT: | |
3111 data->saw_single_shift_just_now = 1; | |
3112 data->seen_single_shift = 1; | |
3113 break; | |
3114 default: | |
3115 break; | |
3116 } | |
3117 break; | |
3118 | |
3119 case -1: /* not done */ | |
3120 break; | |
3121 | |
3122 case 0: /* error */ | |
3123 if (data->iso->esc == ISO_ESC_NOTHING) | |
3124 data->bad_single_byte_sequences++; | |
3125 else | |
3126 data->bad_multibyte_escape_sequences++; | |
3127 } | |
3128 } | |
3129 label_continue_loop:; | |
3130 } | |
3131 | |
985 | 3132 if (data->high_byte_count && |
3133 !data->saw_single_shift_just_now) | |
3134 { | |
3135 if (data->high_byte_count & 1) | |
3136 data->odd_high_byte_groups++; | |
3137 else | |
3138 { | |
3139 data->even_high_byte_groups++; | |
3140 if (data->longest_even_high_byte < data->high_byte_count) | |
3141 data->longest_even_high_byte = data->high_byte_count; | |
3142 } | |
3143 } | |
3144 | |
771 | 3145 if (data->bad_multibyte_escape_sequences > 2 || |
3146 (data->bad_multibyte_escape_sequences > 0 && | |
3147 data->good_multibyte_escape_sequences / | |
3148 data->bad_multibyte_escape_sequences < 10)) | |
3149 /* Just making it up ... */ | |
3150 SET_DET_RESULTS (st, iso2022, DET_NEARLY_IMPOSSIBLE); | |
3151 else if (data->bad_single_byte_sequences > 5 || | |
3152 (data->bad_single_byte_sequences > 0 && | |
3153 (data->good_multibyte_escape_sequences + | |
3154 data->even_high_byte_groups + | |
3155 data->odd_high_byte_groups) / | |
3156 data->bad_single_byte_sequences < 10)) | |
3157 SET_DET_RESULTS (st, iso2022, DET_SOMEWHAT_UNLIKELY); | |
3158 else if (data->seen_locking_shift) | |
3159 { | |
3160 SET_DET_RESULTS (st, iso2022, DET_QUITE_IMPROBABLE); | |
3161 DET_RESULT (st, iso_lock_shift) = DET_QUITE_PROBABLE; | |
3162 } | |
3163 else if (!data->seen_high_byte) | |
3164 { | |
3165 SET_DET_RESULTS (st, iso2022, DET_SOMEWHAT_UNLIKELY); | |
3166 if (data->good_multibyte_escape_sequences) | |
3167 DET_RESULT (st, iso_7) = DET_QUITE_PROBABLE; | |
3168 else if (data->seen_single_shift) | |
3169 DET_RESULT (st, iso_7) = DET_SOMEWHAT_LIKELY; | |
3170 else | |
3171 { | |
3172 /* If we've just seen pure 7-bit data, no escape sequences, | |
3173 then we can't give much likelihood; but if we've seen enough | |
3174 of this data, we can assume some unlikelihood of any 8-bit | |
3175 encoding */ | |
3176 if (orign + st->bytes_seen >= 1000) | |
3177 DET_RESULT (st, iso_7) = DET_AS_LIKELY_AS_UNLIKELY; | |
3178 else | |
3179 SET_DET_RESULTS (st, iso2022, DET_AS_LIKELY_AS_UNLIKELY); | |
3180 } | |
3181 } | |
3182 else if (data->seen_designate) | |
3183 { | |
3184 SET_DET_RESULTS (st, iso2022, DET_QUITE_IMPROBABLE); | |
3185 if (data->seen_single_shift) | |
3186 /* #### Does this really make sense? */ | |
3187 DET_RESULT (st, iso_8_designate) = DET_SOMEWHAT_UNLIKELY; | |
3188 else | |
3189 DET_RESULT (st, iso_8_designate) = DET_QUITE_PROBABLE; | |
3190 } | |
3191 else if (data->odd_high_byte_groups > 0 && | |
3192 data->even_high_byte_groups == 0) | |
3193 { | |
3194 SET_DET_RESULTS (st, iso2022, DET_SOMEWHAT_UNLIKELY); | |
3195 if (data->seen_single_shift) | |
3196 DET_RESULT (st, iso_8_1) = DET_QUITE_PROBABLE; | |
3197 else | |
3198 DET_RESULT (st, iso_8_1) = DET_SOMEWHAT_LIKELY; | |
3199 } | |
3200 else if (data->odd_high_byte_groups == 0 && | |
3201 data->even_high_byte_groups > 0) | |
3202 { | |
985 | 3203 #if 0 |
771 | 3204 SET_DET_RESULTS (st, iso2022, DET_SOMEWHAT_UNLIKELY); |
3205 if (data->even_high_byte_groups > 10) | |
3206 { | |
3207 if (data->seen_single_shift) | |
3208 DET_RESULT (st, iso_8_2) = DET_QUITE_PROBABLE; | |
3209 else | |
3210 DET_RESULT (st, iso_8_2) = DET_SOMEWHAT_LIKELY; | |
3211 if (data->even_high_byte_groups < 50) | |
3212 DET_RESULT (st, iso_8_1) = DET_SOMEWHAT_UNLIKELY; | |
3213 /* else it stays at quite improbable */ | |
3214 } | |
985 | 3215 #else |
3216 SET_DET_RESULTS (st, iso2022, DET_SOMEWHAT_UNLIKELY); | |
3217 if (data->seen_single_shift) | |
3218 DET_RESULT (st, iso_8_2) = DET_QUITE_PROBABLE; | |
3219 else if (data->even_high_byte_groups > 10) | |
3220 DET_RESULT (st, iso_8_2) = DET_SOMEWHAT_LIKELY; | |
3221 else if (data->longest_even_high_byte > 6) | |
3222 DET_RESULT (st, iso_8_2) = DET_SLIGHTLY_LIKELY; | |
3223 #endif | |
771 | 3224 } |
3225 else if (data->odd_high_byte_groups > 0 && | |
3226 data->even_high_byte_groups > 0) | |
3393 | 3227 { |
3228 /* Well, this could be a Latin-1 text, with most high-byte | |
3229 characters single, but sometimes two are together, though | |
3230 this happens not as often. This is common for Western | |
3231 European languages like German, French, Danish, Swedish, etc. | |
3232 Then we would either have a rather small file and | |
3233 even_high_byte_groups would be low. | |
3234 Or we would have a larger file and the ratio of odd to even | |
3235 groups would be very high. */ | |
3236 SET_DET_RESULTS (st, iso2022, DET_SOMEWHAT_UNLIKELY); | |
3237 if (data->even_high_byte_groups <= 3 || | |
3238 data->odd_high_byte_groups >= 10 * data->even_high_byte_groups) | |
3239 DET_RESULT (st, iso_8_1) = DET_SOMEWHAT_LIKELY; | |
3240 } | |
771 | 3241 else |
3242 SET_DET_RESULTS (st, iso2022, DET_AS_LIKELY_AS_UNLIKELY); | |
3243 } | |
3244 | |
3245 static void | |
3246 iso2022_finalize_detection_state (struct detection_state *st) | |
3247 { | |
3248 struct iso2022_detector *data = DETECTION_STATE_DATA (st, iso2022); | |
3249 if (data->iso) | |
4976
16112448d484
Rename xfree(FOO, TYPE) -> xfree(FOO)
Ben Wing <ben@xemacs.org>
parents:
4967
diff
changeset
|
3250 xfree (data->iso); |
771 | 3251 } |
3252 | |
3253 | |
3254 /************************************************************************/ | |
3255 /* CCL methods */ | |
3256 /************************************************************************/ | |
3257 | |
3258 /* Converter written in CCL. */ | |
3259 | |
3260 struct ccl_coding_system | |
3261 { | |
3262 /* For a CCL coding system, these specify the CCL programs used for | |
3263 decoding (input) and encoding (output). */ | |
3264 Lisp_Object decode; | |
3265 Lisp_Object encode; | |
3266 }; | |
3267 | |
3268 #define CODING_SYSTEM_CCL_DECODE(codesys) \ | |
3269 (CODING_SYSTEM_TYPE_DATA (codesys, ccl)->decode) | |
3270 #define CODING_SYSTEM_CCL_ENCODE(codesys) \ | |
3271 (CODING_SYSTEM_TYPE_DATA (codesys, ccl)->encode) | |
3272 #define XCODING_SYSTEM_CCL_DECODE(codesys) \ | |
3273 CODING_SYSTEM_CCL_DECODE (XCODING_SYSTEM (codesys)) | |
3274 #define XCODING_SYSTEM_CCL_ENCODE(codesys) \ | |
3275 CODING_SYSTEM_CCL_ENCODE (XCODING_SYSTEM (codesys)) | |
3276 | |
3277 struct ccl_coding_stream | |
3278 { | |
3279 /* state of the running CCL program */ | |
3280 struct ccl_program ccl; | |
3281 }; | |
3282 | |
1204 | 3283 static const struct memory_description ccl_coding_system_description[] = { |
3284 { XD_LISP_OBJECT, offsetof (struct ccl_coding_system, decode) }, | |
3285 { XD_LISP_OBJECT, offsetof (struct ccl_coding_system, encode) }, | |
771 | 3286 { XD_END } |
3287 }; | |
3288 | |
1204 | 3289 DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (ccl); |
3290 | |
771 | 3291 static void |
3292 ccl_mark (Lisp_Object codesys) | |
3293 { | |
3294 mark_object (XCODING_SYSTEM_CCL_DECODE (codesys)); | |
3295 mark_object (XCODING_SYSTEM_CCL_ENCODE (codesys)); | |
3296 } | |
3297 | |
3298 static Bytecount | |
3299 ccl_convert (struct coding_stream *str, const UExtbyte *src, | |
3300 unsigned_char_dynarr *dst, Bytecount n) | |
3301 { | |
3302 struct ccl_coding_stream *data = | |
3303 CODING_STREAM_TYPE_DATA (str, ccl); | |
3304 Bytecount orign = n; | |
3305 | |
3306 data->ccl.last_block = str->eof; | |
3307 /* When applying a CCL program to a stream, SRC must not be NULL -- this | |
3308 is a special signal to the driver that read and write operations are | |
3309 not allowed. The code does not actually look at what SRC points to if | |
3310 N == 0. | |
3311 */ | |
3312 ccl_driver (&data->ccl, src ? src : (const unsigned char *) "", | |
3313 dst, n, 0, | |
3314 str->direction == CODING_DECODE ? CCL_MODE_DECODING : | |
3315 CCL_MODE_ENCODING); | |
3316 return orign; | |
3317 } | |
3318 | |
3319 static void | |
3320 ccl_init_coding_stream (struct coding_stream *str) | |
3321 { | |
3322 struct ccl_coding_stream *data = | |
3323 CODING_STREAM_TYPE_DATA (str, ccl); | |
3324 | |
3325 setup_ccl_program (&data->ccl, | |
3326 str->direction == CODING_DECODE ? | |
3327 XCODING_SYSTEM_CCL_DECODE (str->codesys) : | |
3328 XCODING_SYSTEM_CCL_ENCODE (str->codesys)); | |
3329 } | |
3330 | |
3331 static void | |
3332 ccl_rewind_coding_stream (struct coding_stream *str) | |
3333 { | |
3334 ccl_init_coding_stream (str); | |
3335 } | |
3336 | |
3337 static void | |
3338 ccl_init (Lisp_Object codesys) | |
3339 { | |
3340 XCODING_SYSTEM_CCL_DECODE (codesys) = Qnil; | |
3341 XCODING_SYSTEM_CCL_ENCODE (codesys) = Qnil; | |
3342 } | |
3343 | |
3344 static int | |
3345 ccl_putprop (Lisp_Object codesys, Lisp_Object key, Lisp_Object value) | |
3346 { | |
3347 if (EQ (key, Qdecode)) | |
4745
0c54de4c4b9d
Resolve the unregistered-CCL-programs-get-garbage-collected problem correctly
Aidan Kehoe <kehoea@parhasard.net>
parents:
4703
diff
changeset
|
3348 XCODING_SYSTEM_CCL_DECODE (codesys) = get_ccl_program (value); |
771 | 3349 else if (EQ (key, Qencode)) |
4745
0c54de4c4b9d
Resolve the unregistered-CCL-programs-get-garbage-collected problem correctly
Aidan Kehoe <kehoea@parhasard.net>
parents:
4703
diff
changeset
|
3350 XCODING_SYSTEM_CCL_ENCODE (codesys) = get_ccl_program (value); |
771 | 3351 return 1; |
3352 } | |
3353 | |
3354 static Lisp_Object | |
3355 ccl_getprop (Lisp_Object coding_system, Lisp_Object prop) | |
3356 { | |
3357 if (EQ (prop, Qdecode)) | |
3358 return XCODING_SYSTEM_CCL_DECODE (coding_system); | |
3359 else if (EQ (prop, Qencode)) | |
3360 return XCODING_SYSTEM_CCL_ENCODE (coding_system); | |
3361 else | |
3362 return Qunbound; | |
3363 } | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3364 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3365 /************************************************************************/ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3366 /* FIXED_WIDTH methods */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3367 /************************************************************************/ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3368 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3369 struct fixed_width_coding_system |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3370 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3371 /* For a fixed_width coding system, these specify the CCL programs |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3372 used for decoding (input) and encoding (output). */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3373 Lisp_Object decode; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3374 Lisp_Object encode; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3375 Lisp_Object from_unicode; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3376 Lisp_Object invalid_sequences_skip_chars; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3377 Lisp_Object query_skip_chars; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3378 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3379 /* This is not directly accessible from Lisp; it is a concatenation of the |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3380 previous two strings, used for simplicity of implementation. */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3381 Lisp_Object invalid_and_query_skip_chars; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3382 }; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3383 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3384 #define CODING_SYSTEM_FIXED_WIDTH_DECODE(codesys) \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3385 (CODING_SYSTEM_TYPE_DATA (codesys, fixed_width)->decode) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3386 #define CODING_SYSTEM_FIXED_WIDTH_ENCODE(codesys) \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3387 (CODING_SYSTEM_TYPE_DATA (codesys, fixed_width)->encode) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3388 #define CODING_SYSTEM_FIXED_WIDTH_FROM_UNICODE(codesys) \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3389 (CODING_SYSTEM_TYPE_DATA (codesys, fixed_width)->from_unicode) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3390 #define CODING_SYSTEM_FIXED_WIDTH_INVALID_SEQUENCES_SKIP_CHARS(codesys) \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3391 (CODING_SYSTEM_TYPE_DATA (codesys, \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3392 fixed_width)->invalid_sequences_skip_chars) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3393 #define CODING_SYSTEM_FIXED_WIDTH_QUERY_SKIP_CHARS(codesys) \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3394 (CODING_SYSTEM_TYPE_DATA (codesys, fixed_width)->query_skip_chars) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3395 #define CODING_SYSTEM_FIXED_WIDTH_INVALID_AND_QUERY_SKIP_CHARS(codesys) \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3396 (CODING_SYSTEM_TYPE_DATA (codesys, \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3397 fixed_width)->invalid_and_query_skip_chars) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3398 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3399 #define XCODING_SYSTEM_FIXED_WIDTH_DECODE(codesys) \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3400 CODING_SYSTEM_FIXED_WIDTH_DECODE (XCODING_SYSTEM (codesys)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3401 #define XCODING_SYSTEM_FIXED_WIDTH_ENCODE(codesys) \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3402 CODING_SYSTEM_FIXED_WIDTH_ENCODE (XCODING_SYSTEM (codesys)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3403 #define XCODING_SYSTEM_FIXED_WIDTH_FROM_UNICODE(codesys) \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3404 (CODING_SYSTEM_FIXED_WIDTH_FROM_UNICODE (XCODING_SYSTEM (codesys))) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3405 #define XCODING_SYSTEM_FIXED_WIDTH_INVALID_SEQUENCES_SKIP_CHARS(codesys) \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3406 (CODING_SYSTEM_FIXED_WIDTH_INVALID_SEQUENCES_SKIP_CHARS \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3407 (XCODING_SYSTEM (codesys))) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3408 #define XCODING_SYSTEM_FIXED_WIDTH_QUERY_SKIP_CHARS(codesys) \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3409 (CODING_SYSTEM_FIXED_WIDTH_QUERY_SKIP_CHARS (XCODING_SYSTEM (codesys))) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3410 #define XCODING_SYSTEM_FIXED_WIDTH_INVALID_AND_QUERY_SKIP_CHARS(codesys) \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3411 (CODING_SYSTEM_FIXED_WIDTH_INVALID_AND_QUERY_SKIP_CHARS \ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3412 (XCODING_SYSTEM(codesys))) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3413 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3414 struct fixed_width_coding_stream |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3415 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3416 /* state of the running CCL program */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3417 struct ccl_program ccl; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3418 }; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3419 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3420 static const struct memory_description |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3421 fixed_width_coding_system_description[] = { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3422 { XD_LISP_OBJECT, offsetof (struct fixed_width_coding_system, decode) }, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3423 { XD_LISP_OBJECT, offsetof (struct fixed_width_coding_system, encode) }, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3424 { XD_LISP_OBJECT, offsetof (struct fixed_width_coding_system, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3425 from_unicode) }, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3426 { XD_LISP_OBJECT, offsetof (struct fixed_width_coding_system, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3427 invalid_sequences_skip_chars) }, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3428 { XD_LISP_OBJECT, offsetof (struct fixed_width_coding_system, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3429 query_skip_chars) }, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3430 { XD_LISP_OBJECT, offsetof (struct fixed_width_coding_system, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3431 invalid_and_query_skip_chars) }, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3432 { XD_END } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3433 }; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3434 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3435 DEFINE_CODING_SYSTEM_TYPE_WITH_DATA (fixed_width); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3436 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3437 static void |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3438 fixed_width_mark (Lisp_Object codesys) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3439 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3440 mark_object (XCODING_SYSTEM_FIXED_WIDTH_DECODE (codesys)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3441 mark_object (XCODING_SYSTEM_FIXED_WIDTH_ENCODE (codesys)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3442 mark_object (XCODING_SYSTEM_FIXED_WIDTH_FROM_UNICODE (codesys)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3443 mark_object |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3444 (XCODING_SYSTEM_FIXED_WIDTH_INVALID_SEQUENCES_SKIP_CHARS (codesys)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3445 mark_object (XCODING_SYSTEM_FIXED_WIDTH_QUERY_SKIP_CHARS (codesys) ); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3446 mark_object |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3447 (XCODING_SYSTEM_FIXED_WIDTH_INVALID_AND_QUERY_SKIP_CHARS(codesys)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3448 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3449 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3450 static Bytecount |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3451 fixed_width_convert (struct coding_stream *str, const UExtbyte *src, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3452 unsigned_char_dynarr *dst, Bytecount n) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3453 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3454 struct fixed_width_coding_stream *data = |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3455 CODING_STREAM_TYPE_DATA (str, fixed_width); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3456 Bytecount orign = n; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3457 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3458 data->ccl.last_block = str->eof; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3459 /* When applying a CCL program to a stream, SRC must not be NULL -- this |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3460 is a special signal to the driver that read and write operations are |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3461 not allowed. The code does not actually look at what SRC points to if |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3462 N == 0. */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3463 ccl_driver (&data->ccl, src ? src : (const unsigned char *) "", |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3464 dst, n, 0, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3465 str->direction == CODING_DECODE ? CCL_MODE_DECODING : |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3466 CCL_MODE_ENCODING); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3467 return orign; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3468 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3469 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3470 static void |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3471 fixed_width_init_coding_stream (struct coding_stream *str) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3472 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3473 struct fixed_width_coding_stream *data = |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3474 CODING_STREAM_TYPE_DATA (str, fixed_width); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3475 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3476 setup_ccl_program (&data->ccl, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3477 str->direction == CODING_DECODE ? |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3478 XCODING_SYSTEM_FIXED_WIDTH_DECODE (str->codesys) : |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3479 XCODING_SYSTEM_FIXED_WIDTH_ENCODE (str->codesys)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3480 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3481 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3482 static void |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3483 fixed_width_rewind_coding_stream (struct coding_stream *str) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3484 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3485 fixed_width_init_coding_stream (str); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3486 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3487 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3488 static void |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3489 fixed_width_init (Lisp_Object codesys) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3490 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3491 XCODING_SYSTEM_FIXED_WIDTH_DECODE (codesys) = Qnil; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3492 XCODING_SYSTEM_FIXED_WIDTH_ENCODE (codesys) = Qnil; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3493 XCODING_SYSTEM_FIXED_WIDTH_FROM_UNICODE (codesys) = Qnil; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3494 XCODING_SYSTEM_FIXED_WIDTH_INVALID_SEQUENCES_SKIP_CHARS (codesys) = Qnil; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3495 XCODING_SYSTEM_FIXED_WIDTH_QUERY_SKIP_CHARS (codesys) = Qnil; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3496 XCODING_SYSTEM_FIXED_WIDTH_INVALID_AND_QUERY_SKIP_CHARS(codesys) = Qnil; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3497 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3498 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3499 static int |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3500 fixed_width_putprop (Lisp_Object codesys, Lisp_Object key, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3501 Lisp_Object value) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3502 { |
4745
0c54de4c4b9d
Resolve the unregistered-CCL-programs-get-garbage-collected problem correctly
Aidan Kehoe <kehoea@parhasard.net>
parents:
4703
diff
changeset
|
3503 if (EQ (key, Qdecode)) |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3504 { |
4745
0c54de4c4b9d
Resolve the unregistered-CCL-programs-get-garbage-collected problem correctly
Aidan Kehoe <kehoea@parhasard.net>
parents:
4703
diff
changeset
|
3505 XCODING_SYSTEM_FIXED_WIDTH_DECODE (codesys) = get_ccl_program (value); |
0c54de4c4b9d
Resolve the unregistered-CCL-programs-get-garbage-collected problem correctly
Aidan Kehoe <kehoea@parhasard.net>
parents:
4703
diff
changeset
|
3506 } |
0c54de4c4b9d
Resolve the unregistered-CCL-programs-get-garbage-collected problem correctly
Aidan Kehoe <kehoea@parhasard.net>
parents:
4703
diff
changeset
|
3507 else if (EQ (key, Qencode)) |
0c54de4c4b9d
Resolve the unregistered-CCL-programs-get-garbage-collected problem correctly
Aidan Kehoe <kehoea@parhasard.net>
parents:
4703
diff
changeset
|
3508 { |
0c54de4c4b9d
Resolve the unregistered-CCL-programs-get-garbage-collected problem correctly
Aidan Kehoe <kehoea@parhasard.net>
parents:
4703
diff
changeset
|
3509 XCODING_SYSTEM_FIXED_WIDTH_ENCODE (codesys) = get_ccl_program (value); |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3510 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3511 else if (EQ (key, Qfrom_unicode)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3512 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3513 CHECK_HASH_TABLE (value); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3514 XCODING_SYSTEM_FIXED_WIDTH_FROM_UNICODE (codesys) = value; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3515 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3516 else if (EQ (key, Qinvalid_sequences_skip_chars)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3517 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3518 CHECK_STRING (value); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3519 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3520 /* Make sure Lisp can't make our data inconsistent: */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3521 value = Fcopy_sequence (value); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3522 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3523 XCODING_SYSTEM_FIXED_WIDTH_INVALID_SEQUENCES_SKIP_CHARS (codesys) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3524 = value; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3525 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3526 XCODING_SYSTEM_FIXED_WIDTH_INVALID_AND_QUERY_SKIP_CHARS (codesys) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3527 = concat2 (value, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3528 XCODING_SYSTEM_FIXED_WIDTH_QUERY_SKIP_CHARS (codesys)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3529 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3530 else if (EQ (key, Qquery_skip_chars)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3531 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3532 CHECK_STRING (value); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3533 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3534 /* Make sure Lisp can't make our data inconsistent: */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3535 value = Fcopy_sequence (value); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3536 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3537 XCODING_SYSTEM_FIXED_WIDTH_QUERY_SKIP_CHARS (codesys) = value; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3538 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3539 XCODING_SYSTEM_FIXED_WIDTH_INVALID_AND_QUERY_SKIP_CHARS (codesys) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3540 = concat2 (value, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3541 XCODING_SYSTEM_FIXED_WIDTH_INVALID_SEQUENCES_SKIP_CHARS |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3542 (codesys)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3543 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3544 else |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3545 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3546 return 0; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3547 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3548 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3549 return 1; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3550 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3551 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3552 static Lisp_Object |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3553 fixed_width_getprop (Lisp_Object codesys, Lisp_Object prop) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3554 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3555 if (EQ (prop, Qdecode)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3556 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3557 return XCODING_SYSTEM_FIXED_WIDTH_DECODE (codesys); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3558 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3559 else if (EQ (prop, Qencode)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3560 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3561 return XCODING_SYSTEM_FIXED_WIDTH_ENCODE (codesys); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3562 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3563 else if (EQ (prop, Qfrom_unicode)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3564 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3565 return XCODING_SYSTEM_FIXED_WIDTH_FROM_UNICODE (codesys); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3566 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3567 else if (EQ (prop, Qinvalid_sequences_skip_chars)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3568 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3569 /* Make sure Lisp can't make our data inconsistent: */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3570 return |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3571 Fcopy_sequence |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3572 (XCODING_SYSTEM_FIXED_WIDTH_INVALID_SEQUENCES_SKIP_CHARS (codesys)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3573 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3574 else if (EQ (prop, Qquery_skip_chars)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3575 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3576 return |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3577 Fcopy_sequence (XCODING_SYSTEM_FIXED_WIDTH_QUERY_SKIP_CHARS (codesys)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3578 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3579 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3580 return Qunbound; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3581 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3582 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3583 static Lisp_Object Vfixed_width_query_ranges_cache; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3584 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3585 static Lisp_Object |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3586 fixed_width_skip_chars_data_given_strings (Lisp_Object string, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3587 Lisp_Object query_skip_chars, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3588 Lisp_Object |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3589 invalid_sequences_skip_chars, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3590 Binbyte *fastmap, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3591 int fastmap_len) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3592 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3593 Lisp_Object result = Fgethash (string, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3594 Vfixed_width_query_ranges_cache, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3595 Qnil); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3596 REGISTER Ibyte *p, *pend; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3597 REGISTER Ichar c; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3598 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3599 memset (fastmap, query_coding_unencodable, fastmap_len); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3600 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3601 if (!NILP (result)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3602 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3603 int i; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3604 Lisp_Object ranged; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3605 assert (RANGE_TABLEP (result)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3606 for (i = 0; i < fastmap_len; ++i) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3607 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3608 ranged = Fget_range_table (make_int (i), result, Qnil); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3609 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3610 if (EQ (ranged, Qsucceeded)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3611 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3612 fastmap [i] = query_coding_succeeded; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3613 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3614 else if (EQ (ranged, Qinvalid_sequence)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3615 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3616 fastmap [i] = query_coding_invalid_sequence; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3617 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3618 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3619 return result; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3620 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3621 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3622 result = Fmake_range_table (Qstart_closed_end_closed); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3623 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3624 p = XSTRING_DATA (query_skip_chars); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3625 pend = p + XSTRING_LENGTH (query_skip_chars); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3626 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3627 while (p != pend) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3628 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3629 c = itext_ichar (p); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3630 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3631 INC_IBYTEPTR (p); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3632 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3633 if (c == '\\') |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3634 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3635 if (p == pend) break; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3636 c = itext_ichar (p); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3637 INC_IBYTEPTR (p); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3638 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3639 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3640 if (p != pend && *p == '-') |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3641 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3642 Ichar cend; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3643 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3644 /* Skip over the dash. */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3645 p++; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3646 if (p == pend) break; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3647 cend = itext_ichar (p); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3648 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3649 Fput_range_table (make_int (c), make_int (cend), Qsucceeded, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3650 result); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3651 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3652 while (c <= cend && c < fastmap_len) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3653 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3654 fastmap[c] = query_coding_succeeded; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3655 c++; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3656 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3657 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3658 INC_IBYTEPTR (p); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3659 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3660 else |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3661 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3662 if (c < fastmap_len) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3663 fastmap[c] = query_coding_succeeded; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3664 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3665 Fput_range_table (make_int (c), make_int (c), Qsucceeded, result); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3666 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3667 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3668 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3669 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3670 p = XSTRING_DATA (invalid_sequences_skip_chars); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3671 pend = p + XSTRING_LENGTH (invalid_sequences_skip_chars); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3672 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3673 while (p != pend) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3674 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3675 c = itext_ichar (p); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3676 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3677 INC_IBYTEPTR (p); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3678 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3679 if (c == '\\') |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3680 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3681 if (p == pend) break; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3682 c = itext_ichar (p); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3683 INC_IBYTEPTR (p); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3684 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3685 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3686 if (p != pend && *p == '-') |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3687 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3688 Ichar cend; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3689 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3690 /* Skip over the dash. */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3691 p++; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3692 if (p == pend) break; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3693 cend = itext_ichar (p); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3694 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3695 Fput_range_table (make_int (c), make_int (cend), Qinvalid_sequence, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3696 result); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3697 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3698 while (c <= cend && c < fastmap_len) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3699 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3700 fastmap[c] = query_coding_invalid_sequence; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3701 c++; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3702 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3703 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3704 INC_IBYTEPTR (p); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3705 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3706 else |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3707 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3708 if (c < fastmap_len) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3709 fastmap[c] = query_coding_invalid_sequence; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3710 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3711 Fput_range_table (make_int (c), make_int (c), Qinvalid_sequence, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3712 result); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3713 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3714 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3715 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3716 Fputhash (string, result, Vfixed_width_query_ranges_cache); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3717 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3718 return result; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3719 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3720 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3721 static Lisp_Object |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3722 fixed_width_query (Lisp_Object codesys, struct buffer *buf, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3723 Charbpos end, int flags) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3724 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3725 Charbpos pos = BUF_PT (buf), fail_range_start, fail_range_end; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3726 Charbpos pos_byte = BYTE_BUF_PT (buf); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3727 Lisp_Object skip_chars_range_table, from_unicode, checked_unicode, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3728 result = Qnil; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3729 enum query_coding_failure_reasons failed_reason, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3730 previous_failed_reason = query_coding_succeeded; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3731 Binbyte fastmap[0xff]; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3732 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3733 from_unicode = XCODING_SYSTEM_FIXED_WIDTH_FROM_UNICODE (codesys); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3734 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3735 skip_chars_range_table = |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3736 fixed_width_skip_chars_data_given_strings |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3737 ((flags & QUERY_METHOD_IGNORE_INVALID_SEQUENCES ? |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3738 XCODING_SYSTEM_FIXED_WIDTH_INVALID_AND_QUERY_SKIP_CHARS |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3739 (codesys) : |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3740 XCODING_SYSTEM_FIXED_WIDTH_QUERY_SKIP_CHARS(codesys)), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3741 XCODING_SYSTEM_FIXED_WIDTH_QUERY_SKIP_CHARS(codesys), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3742 (flags & QUERY_METHOD_IGNORE_INVALID_SEQUENCES ? |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4745
diff
changeset
|
3743 build_ascstring("") : |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3744 XCODING_SYSTEM_FIXED_WIDTH_INVALID_SEQUENCES_SKIP_CHARS (codesys)), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3745 fastmap, (int)(sizeof (fastmap))); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3746 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3747 if (flags & QUERY_METHOD_HIGHLIGHT && |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3748 /* If we're being called really early, live without highlights getting |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3749 cleared properly: */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3750 !(UNBOUNDP (XSYMBOL (Qquery_coding_clear_highlights)->function))) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3751 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3752 /* It's okay to call Lisp here, the only non-stack object we may have |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3753 allocated up to this point is skip_chars_range_table, and that's |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3754 reachable from its entry in Vfixed_width_query_ranges_cache. */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3755 call3 (Qquery_coding_clear_highlights, make_int (pos), make_int (end), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3756 wrap_buffer (buf)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3757 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3758 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3759 while (pos < end) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3760 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3761 Ichar ch = BYTE_BUF_FETCH_CHAR (buf, pos_byte); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3762 if ((ch < (int) (sizeof(fastmap))) ? |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3763 (fastmap[ch] == query_coding_succeeded) : |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3764 (EQ (Qsucceeded, Fget_range_table (make_int (ch), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3765 skip_chars_range_table, Qnil)))) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3766 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3767 pos++; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3768 INC_BYTEBPOS (buf, pos_byte); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3769 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3770 else |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3771 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3772 fail_range_start = pos; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3773 while ((pos < end) && |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3774 ((!(flags & QUERY_METHOD_IGNORE_INVALID_SEQUENCES) && |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3775 EQ (Qinvalid_sequence, Fget_range_table |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3776 (make_int (ch), skip_chars_range_table, Qnil)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3777 && (failed_reason = query_coding_invalid_sequence)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3778 || ((NILP ((checked_unicode = |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3779 Fgethash (Fchar_to_unicode (make_char (ch)), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3780 from_unicode, Qnil)))) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3781 && (failed_reason = query_coding_unencodable))) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3782 && (previous_failed_reason == query_coding_succeeded |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3783 || previous_failed_reason == failed_reason)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3784 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3785 pos++; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3786 INC_BYTEBPOS (buf, pos_byte); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3787 ch = BYTE_BUF_FETCH_CHAR (buf, pos_byte); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3788 previous_failed_reason = failed_reason; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3789 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3790 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3791 if (fail_range_start == pos) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3792 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3793 /* The character can actually be encoded; move on. */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3794 pos++; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3795 INC_BYTEBPOS (buf, pos_byte); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3796 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3797 else |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3798 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3799 assert (previous_failed_reason == query_coding_invalid_sequence |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3800 || previous_failed_reason == query_coding_unencodable); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3801 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3802 if (flags & QUERY_METHOD_ERRORP) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3803 { |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4745
diff
changeset
|
3804 signal_error_2 |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4745
diff
changeset
|
3805 (Qtext_conversion_error, |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4745
diff
changeset
|
3806 "Cannot encode using coding system", |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4745
diff
changeset
|
3807 make_string_from_buffer (buf, fail_range_start, |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4745
diff
changeset
|
3808 pos - fail_range_start), |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4745
diff
changeset
|
3809 XCODING_SYSTEM_NAME (codesys)); |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3810 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3811 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3812 if (NILP (result)) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3813 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3814 result = Fmake_range_table (Qstart_closed_end_open); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3815 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3816 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3817 fail_range_end = pos; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3818 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3819 Fput_range_table (make_int (fail_range_start), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3820 make_int (fail_range_end), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3821 (previous_failed_reason |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3822 == query_coding_unencodable ? |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3823 Qunencodable : Qinvalid_sequence), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3824 result); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3825 previous_failed_reason = query_coding_succeeded; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3826 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3827 if (flags & QUERY_METHOD_HIGHLIGHT) |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3828 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3829 Lisp_Object extent |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3830 = Fmake_extent (make_int (fail_range_start), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3831 make_int (fail_range_end), |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3832 wrap_buffer (buf)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3833 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3834 Fset_extent_priority |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3835 (extent, make_int (2 + mouse_highlight_priority)); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3836 Fset_extent_face (extent, Qquery_coding_warning_face); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3837 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3838 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3839 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3840 } |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3841 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3842 return result; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3843 } |
771 | 3844 |
3845 | |
3846 /************************************************************************/ | |
3847 /* Initialization */ | |
3848 /************************************************************************/ | |
3849 | |
3850 void | |
3851 syms_of_mule_coding (void) | |
3852 { | |
3853 DEFSUBR (Fdecode_shift_jis_char); | |
3854 DEFSUBR (Fencode_shift_jis_char); | |
3855 DEFSUBR (Fdecode_big5_char); | |
3856 DEFSUBR (Fencode_big5_char); | |
3857 | |
3858 DEFSYMBOL (Qbig5); | |
3859 DEFSYMBOL (Qshift_jis); | |
3860 DEFSYMBOL (Qccl); | |
3861 DEFSYMBOL (Qiso2022); | |
3862 | |
3863 DEFSYMBOL (Qcharset_g0); | |
3864 DEFSYMBOL (Qcharset_g1); | |
3865 DEFSYMBOL (Qcharset_g2); | |
3866 DEFSYMBOL (Qcharset_g3); | |
3867 DEFSYMBOL (Qforce_g0_on_output); | |
3868 DEFSYMBOL (Qforce_g1_on_output); | |
3869 DEFSYMBOL (Qforce_g2_on_output); | |
3870 DEFSYMBOL (Qforce_g3_on_output); | |
3871 DEFSYMBOL (Qno_iso6429); | |
3872 DEFSYMBOL (Qinput_charset_conversion); | |
3873 DEFSYMBOL (Qoutput_charset_conversion); | |
3874 | |
3875 DEFSYMBOL (Qshort); | |
3876 DEFSYMBOL (Qno_ascii_eol); | |
3877 DEFSYMBOL (Qno_ascii_cntl); | |
3878 DEFSYMBOL (Qseven); | |
3879 DEFSYMBOL (Qlock_shift); | |
3880 | |
3881 DEFSYMBOL (Qiso_7); | |
3882 DEFSYMBOL (Qiso_8_designate); | |
3883 DEFSYMBOL (Qiso_8_1); | |
3884 DEFSYMBOL (Qiso_8_2); | |
3885 DEFSYMBOL (Qiso_lock_shift); | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3886 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3887 DEFSYMBOL (Qinvalid_sequences_skip_chars); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3888 DEFSYMBOL (Qquery_skip_chars); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3889 DEFSYMBOL (Qfixed_width); |
771 | 3890 } |
3891 | |
3892 void | |
3893 coding_system_type_create_mule_coding (void) | |
3894 { | |
3895 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (iso2022, "iso2022-coding-system-p"); | |
3896 CODING_SYSTEM_HAS_METHOD (iso2022, mark); | |
3897 CODING_SYSTEM_HAS_METHOD (iso2022, convert); | |
3898 CODING_SYSTEM_HAS_METHOD (iso2022, finalize_coding_stream); | |
3899 CODING_SYSTEM_HAS_METHOD (iso2022, init_coding_stream); | |
3900 CODING_SYSTEM_HAS_METHOD (iso2022, rewind_coding_stream); | |
3901 CODING_SYSTEM_HAS_METHOD (iso2022, init); | |
3902 CODING_SYSTEM_HAS_METHOD (iso2022, print); | |
3903 CODING_SYSTEM_HAS_METHOD (iso2022, finalize); | |
3904 CODING_SYSTEM_HAS_METHOD (iso2022, putprop); | |
3905 CODING_SYSTEM_HAS_METHOD (iso2022, getprop); | |
3906 | |
3907 INITIALIZE_DETECTOR (iso2022); | |
3908 DETECTOR_HAS_METHOD (iso2022, detect); | |
3909 DETECTOR_HAS_METHOD (iso2022, finalize_detection_state); | |
3910 INITIALIZE_DETECTOR_CATEGORY (iso2022, iso_7); | |
3911 INITIALIZE_DETECTOR_CATEGORY (iso2022, iso_8_designate); | |
3912 INITIALIZE_DETECTOR_CATEGORY (iso2022, iso_8_1); | |
3913 INITIALIZE_DETECTOR_CATEGORY (iso2022, iso_8_2); | |
3914 INITIALIZE_DETECTOR_CATEGORY (iso2022, iso_lock_shift); | |
3915 | |
3916 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (ccl, "ccl-coding-system-p"); | |
3917 CODING_SYSTEM_HAS_METHOD (ccl, mark); | |
3918 CODING_SYSTEM_HAS_METHOD (ccl, convert); | |
3919 CODING_SYSTEM_HAS_METHOD (ccl, init); | |
3920 CODING_SYSTEM_HAS_METHOD (ccl, init_coding_stream); | |
3921 CODING_SYSTEM_HAS_METHOD (ccl, rewind_coding_stream); | |
3922 CODING_SYSTEM_HAS_METHOD (ccl, putprop); | |
3923 CODING_SYSTEM_HAS_METHOD (ccl, getprop); | |
3924 | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3925 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA (fixed_width, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3926 "fixed-width-coding-system-p"); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3927 CODING_SYSTEM_HAS_METHOD (fixed_width, mark); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3928 CODING_SYSTEM_HAS_METHOD (fixed_width, convert); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3929 CODING_SYSTEM_HAS_METHOD (fixed_width, query); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3930 CODING_SYSTEM_HAS_METHOD (fixed_width, init); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3931 CODING_SYSTEM_HAS_METHOD (fixed_width, init_coding_stream); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3932 CODING_SYSTEM_HAS_METHOD (fixed_width, rewind_coding_stream); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3933 CODING_SYSTEM_HAS_METHOD (fixed_width, putprop); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3934 CODING_SYSTEM_HAS_METHOD (fixed_width, getprop); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3935 |
771 | 3936 INITIALIZE_CODING_SYSTEM_TYPE (shift_jis, "shift-jis-coding-system-p"); |
3937 CODING_SYSTEM_HAS_METHOD (shift_jis, convert); | |
3938 | |
3939 INITIALIZE_DETECTOR (shift_jis); | |
3940 DETECTOR_HAS_METHOD (shift_jis, detect); | |
3941 INITIALIZE_DETECTOR_CATEGORY (shift_jis, shift_jis); | |
3942 | |
3943 INITIALIZE_CODING_SYSTEM_TYPE (big5, "big5-coding-system-p"); | |
3944 CODING_SYSTEM_HAS_METHOD (big5, convert); | |
3945 | |
3946 INITIALIZE_DETECTOR (big5); | |
3947 DETECTOR_HAS_METHOD (big5, detect); | |
3948 INITIALIZE_DETECTOR_CATEGORY (big5, big5); | |
3949 } | |
3950 | |
3951 void | |
3952 reinit_coding_system_type_create_mule_coding (void) | |
3953 { | |
3954 REINITIALIZE_CODING_SYSTEM_TYPE (iso2022); | |
3955 REINITIALIZE_CODING_SYSTEM_TYPE (ccl); | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3956 REINITIALIZE_CODING_SYSTEM_TYPE (fixed_width); |
771 | 3957 REINITIALIZE_CODING_SYSTEM_TYPE (shift_jis); |
3958 REINITIALIZE_CODING_SYSTEM_TYPE (big5); | |
3959 } | |
3960 | |
3961 void | |
3962 reinit_vars_of_mule_coding (void) | |
3963 { | |
3964 } | |
3965 | |
3966 void | |
3967 vars_of_mule_coding (void) | |
3968 { | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3969 /* This needs to be HASH_TABLE_EQ, there's a corner case where |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3970 HASH_TABLE_EQUAL won't work. */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3971 Vfixed_width_query_ranges_cache |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3972 = make_lisp_hash_table (32, HASH_TABLE_KEY_WEAK, HASH_TABLE_EQ); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4528
diff
changeset
|
3973 staticpro (&Vfixed_width_query_ranges_cache); |
771 | 3974 } |