Mercurial > hg > xemacs-beta
annotate src/text.h @ 4953:304aebb79cd3
function renamings to track names of char typedefs
-------------------- ChangeLog entries follow: --------------------
man/ChangeLog addition:
2010-01-27 Ben Wing <ben@xemacs.org>
* internals/internals.texi (lrecords):
* internals/internals.texi (The DFC API):
* internals/internals.texi (Conversion to and from External Data):
* internals/internals.texi (Mule-izing Code):
* internals/internals.texi (Pervasive changes throughout XEmacs sources):
* internals/internals.texi (Ben's README):
* internals/internals.texi (Future Work -- Localized Text/Messages):
* emodules.texi (Defining Variables):
Rename:
write_c_string -> write_cistring
build_intstring -> build_istring
build_string -> build_cistring
build_ext_string -> build_extstring
make_ext_string -> make_extstring
buffer_insert_c_string -> buffer_insert_ascstring
intern_int -> intern_istring
See comment in src/ChangeLog about this.
modules/ChangeLog addition:
2010-01-27 Ben Wing <ben@xemacs.org>
* postgresql/postgresql.c (print_pgconn):
* postgresql/postgresql.c (print_pgresult):
* postgresql/postgresql.c (Fpq_conn_defaults):
* postgresql/postgresql.c (Fpq_pgconn):
* postgresql/postgresql.c (Fpq_res_status):
* postgresql/postgresql.c (Fpq_result_error_message):
* postgresql/postgresql.c (Fpq_fname):
* postgresql/postgresql.c (Fpq_get_value):
* postgresql/postgresql.c (Fpq_cmd_status):
* postgresql/postgresql.c (Fpq_cmd_tuples):
* postgresql/postgresql.c (Fpq_notifies):
* postgresql/postgresql.c (Fpq_get_line):
* postgresql/postgresql.c (Fpq_get_line_async):
* postgresql/postgresql.c (FROB):
* postgresql/postgresql.c (init_postgresql_from_environment):
* ldap/eldap.c:
* ldap/eldap.c (Fldap_open):
* ldap/eldap.c (Fldap_search_basic):
* canna/canna_api.c:
* canna/canna_api.c (make_euc_string):
Rename:
write_c_string -> write_cistring
build_intstring -> build_istring
build_string -> build_cistring
build_ext_string -> build_extstring
make_ext_string -> make_extstring
buffer_insert_c_string -> buffer_insert_ascstring
intern_int -> intern_istring
See comment in src/ChangeLog about this.
src/ChangeLog addition:
2010-01-27 Ben Wing <ben@xemacs.org>
* .gdbinit.in.in:
* README:
* abbrev.c (write_abbrev):
* abbrev.c (describe_abbrev):
* alloc.c (make_extstring):
* alloc.c (build_istring):
* alloc.c (build_cistring):
* alloc.c (build_ascstring):
* alloc.c (build_extstring):
* alloc.c (build_msg_istring):
* alloc.c (build_defer_istring):
* buffer.c (Fgenerate_new_buffer_name):
* buffer.c (init_buffer_2):
* console-tty.c (tty_init_console):
* console-x.c (get_display_arg_connection):
* console-x.c (x_perhaps_init_unseen_key_defaults):
* database.c (dbm_map):
* database.c (dbm_get):
* database.c (berkdb_get):
* database.c (berkdb_map):
* device-gtk.c (FROB_PIXMAP):
* device-gtk.c (Fgtk_style_info):
* device-msw.c (msprinter_default_printer):
* device-msw.c (sync_printer_with_devmode):
* device-x.c (coding_system_of_xrm_database):
* device-x.c (x_init_device):
* device-x.c (signal_if_x_error):
* device-x.c (Fx_get_resource):
* device-x.c (Fx_server_vendor):
* device-x.c (Fx_get_font_path):
* dialog-x.c (maybe_run_dbox_text_callback):
* doc.c (extract_object_file_name):
* doc.c (unparesseuxify_doc_string):
* doc.c (get_doc_string):
* doc.c (get_object_file_name):
* doc.c (Fdocumentation):
* doc.c (Fsnarf_documentation):
* doc.c (Fsubstitute_command_keys):
* editfns.c (init_editfns):
* editfns.c (Ftemp_directory):
* editfns.c (Fuser_login_name):
* editfns.c (Fuser_real_login_name):
* editfns.c (Fuser_home_directory):
* editfns.c (Fformat_time_string):
* editfns.c (Fcurrent_time_string):
* editfns.c (Fcurrent_time_zone):
* emacs.c:
* emacs.c (main_1):
* emodules.c (Flist_modules):
* emodules.c (emodules_load):
* emodules.c (emodules_doc_sym):
* emodules.c (vars_of_module):
* event-Xt.c (x_has_keysym):
* event-gtk.c (emacs_gtk_format_magic_event):
* event-gtk.c (dragndrop_data_received):
* event-gtk.c (gtk_reset_key_mapping):
* event-msw.c (mswindows_dde_callback):
* event-msw.c (mswindows_wnd_proc):
* faces.c (complex_vars_of_faces):
* file-coding.c (find_coding_system):
* file-coding.c (setup_eol_coding_systems):
* file-coding.c (make_coding_system_1):
* file-coding.c (snarf_coding_system):
* fileio.c:
* fileio.c (lisp_strerror):
* fileio.c (Ffile_name_directory):
* fileio.c (Ffile_name_as_directory):
* fileio.c (Fdirectory_file_name):
* fileio.c (if):
* fileio.c (Ffile_symlink_p):
* fileio.c (Fencrypt_string):
* fileio.c (Fdecrypt_string):
* filelock.c (lock_file):
* filelock.c (Ffile_locked_p):
* floatfns.c (matherr):
* font-mgr.c (build_fcapi_string):
* font-mgr.c (make_xlfd_font_regexp):
* frame-msw.c (mswindows_window_id):
* frame-msw.c (mswindows_frame_property):
* frame-x.c:
* frame-x.c (color_to_string):
* frame-x.c (maybe_set_frame_title_format):
* frame-x.c (x_cde_transfer_callback):
* frame-x.c (Fx_window_id):
* glade.c (connector):
* glade.c (Fglade_xml_textdomain):
* glade.c (syms_of_glade):
* glyphs-eimage.c (jpeg_instantiate):
* glyphs-eimage.c (png_instantiate):
* glyphs-eimage.c (tiff_instantiate):
* glyphs-gtk.c (font_instantiate):
* glyphs-gtk.c (BUILD_GLYPH_INST):
* glyphs-x.c (x_locate_pixmap_file):
* glyphs-x.c (font_instantiate):
* glyphs-x.c (x_widget_property):
* glyphs-x.c (BUILD_GLYPH_INST):
* glyphs.c (print_image_instance):
* glyphs.c (bitmap_to_lisp_data):
* glyphs.c (pixmap_to_lisp_data):
* gpmevent.c (turn_off_gpm):
* gpmevent.c (Fgpm_enabled_p):
* gpmevent.c (Fgpm_enable):
* gtk-glue.c (__make_string_mapper):
* gtk-glue.c (xemacs_gtklist_to_list):
* gtk-xemacs.c (FROB_FACE):
* gtk-xemacs.c (xemacs_gtk_convert_color):
* hpplay.c (player_error_internal):
* hpplay.c (myHandler):
* insdel.c (buffer_insert_ascstring_1):
* insdel.h:
* insdel.h (buffer_insert_ascstring):
* intl.c (Fcurrent_locale):
* intl.c (Fset_current_locale):
* keymap.c (make_key_description):
* keymap.c (Ftext_char_description):
* keymap.c (describe_command):
* keymap.c (describe_map):
* lisp.h:
* lread.c:
* lread.c (locate_file_in_directory_mapper):
* lread.c (locate_file_construct_suffixed_files_mapper):
* mule-charset.c (Fmake_charset):
* nt.c (Fmswindows_short_file_name):
* nt.c (Fmswindows_long_file_name):
* objects-gtk.c (__get_gtk_font_truename):
* objects-gtk.c (__gtk_font_list_internal):
* objects-msw.c (font_enum_callback_2):
* objects-msw.c (create_hfont_from_font_spec):
* objects-msw.c (mswindows_font_list):
* objects-msw.c (mswindows_font_spec_matches_charset_stage_2):
* objects-tty.c (tty_initialize_font_instance):
* objects-x.c (x_font_truename):
* objects-x.c (x_font_instance_truename):
* objects-x.c (x_font_instance_properties):
* objects-x.c (x_font_list):
* print.c (write_cistring):
* print.c (print_vector_internal):
* print.c (print_cons):
* process-nt.c (nt_canonicalize_host_name):
* process-unix.c (unix_create_process):
* process-unix.c (unix_canonicalize_host_name):
* process.c (status_message):
* process.c (status_notify):
* process.c (init_xemacs_process):
* process.c (syms_of_process):
* redisplay-tty.c (term_get_fkeys_1):
* redisplay-tty.c (CONDITIONAL_REASSIGN):
* search.c (compile_pattern_1):
* select-common.h (selection_data_to_lisp_data):
* select-gtk.c (atom_to_symbol):
* select-gtk.c (PROCESSING_GTK_CODE):
* select-msw.c (mswindows_get_foreign_selection):
* select-x.c (x_atom_to_symbol):
* select-x.c (Fx_get_cutbuffer_internal):
* symbols.c (intern_istring):
* symbols.c (intern):
* symbols.c (intern_converting_underscores_to_dashes):
* symbols.c (Fintern):
* sysdep.c (init_system_name):
* sysdll.c (dll_error):
* sysdll.c (dll_open):
* syswindows.h:
* syswindows.h (build_tstr_string):
* tests.c (DFC_CHECK_LENGTH):
* tests.c (DFC_CHECK_CONTENT):
* tests.c (DFC_RESULT_PASS):
* tests.c (Ftest_data_format_conversion):
* text.c:
* text.c (new_dfc_convert_now_damn_it):
* text.h:
* text.h (build_wext_string):
* tooltalk.c (tt_build_c_string):
* tooltalk.c (Ftooltalk_default_procid):
* tooltalk.c (Ftooltalk_default_session):
* tooltalk.c (init_tooltalk):
* ui-byhand.c (Fgtk_clist_get_text):
* ui-byhand.c (Fgtk_clist_get_pixtext):
* ui-byhand.c (Fgtk_label_get):
* ui-byhand.c (Fgtk_notebook_query_tab_label_packing):
* ui-gtk.c (emacs_gtk_object_printer):
* ui-gtk.c (emacs_gtk_boxed_printer):
* ui-gtk.c (gtk_type_to_lisp):
* ui-gtk.c (symbol_to_enum):
* ui-gtk.c (enum_to_symbol):
* unexaix.c (report_error):
* unexaix.c (ERROR0):
* unexec.c (report_error):
* unexec.c (ERROR0):
* unicode.c (unicode_to_ichar):
* win32.c (tstr_to_local_file_format):
* win32.c (Fmswindows_cygwin_to_win32_path):
* win32.c (struct read_link_hash):
* xemacs.def.in.in:
Rename:
write_c_string -> write_cistring
build_intstring -> build_istring
build_string -> build_cistring
build_ext_string -> build_extstring
make_ext_string -> make_extstring
buffer_insert_c_string -> buffer_insert_ascstring
intern_int -> intern_istring
These functions have been renamed so that the naming harmonizes
with the typedefs for strings: `cistring' along with CIbyte *,
`istring' along with Ibyte *, `extstring' along with Extbyte *,
`ascstring' along with Ascbyte *.
Also make buffer_insert_ascstring take Ascbyte * and assert
that its argument is ASCII.
author | Ben Wing <ben@xemacs.org> |
---|---|
date | Wed, 27 Jan 2010 00:35:36 -0600 |
parents | 19a72041c5ed |
children | 16112448d484 4aebb0131297 |
rev | line source |
---|---|
771 | 1 /* Header file for text manipulation primitives and macros. |
2 Copyright (C) 1985-1995 Free Software Foundation, Inc. | |
3 Copyright (C) 1995 Sun Microsystems, Inc. | |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
4 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2010 Ben Wing. |
771 | 5 |
6 This file is part of XEmacs. | |
7 | |
8 XEmacs is free software; you can redistribute it and/or modify it | |
9 under the terms of the GNU General Public License as published by the | |
10 Free Software Foundation; either version 2, or (at your option) any | |
11 later version. | |
12 | |
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT | |
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 for more details. | |
17 | |
18 You should have received a copy of the GNU General Public License | |
19 along with XEmacs; see the file COPYING. If not, write to | |
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
21 Boston, MA 02111-1307, USA. */ | |
22 | |
23 /* Synched up with: FSF 19.30. */ | |
24 | |
25 /* Authorship: | |
26 | |
27 Mostly written by Ben Wing, starting around 1995. | |
28 Current TO_IN/EXTERNAL_FORMAT macros written by Martin Buchholz, | |
29 designed by Ben Wing based on earlier macros by Ben Wing. | |
30 Separated out June 18, 2000 from buffer.h into text.h. | |
31 */ | |
32 | |
33 #ifndef INCLUDED_text_h_ | |
34 #define INCLUDED_text_h_ | |
35 | |
912 | 36 #ifdef HAVE_WCHAR_H |
771 | 37 #include <wchar.h> |
912 | 38 #else |
1257 | 39 size_t wcslen (const wchar_t *); |
912 | 40 #endif |
1204 | 41 #ifndef HAVE_STRLWR |
1257 | 42 char *strlwr (char *); |
1204 | 43 #endif |
44 #ifndef HAVE_STRUPR | |
1257 | 45 char *strupr (char *); |
1204 | 46 #endif |
771 | 47 |
1743 | 48 BEGIN_C_DECLS |
1650 | 49 |
771 | 50 /* ---------------------------------------------------------------------- */ |
51 /* Super-basic character properties */ | |
52 /* ---------------------------------------------------------------------- */ | |
53 | |
54 /* These properties define the specifics of how our current encoding fits | |
55 in the basic model used for the encoding. Because this model is the same | |
56 as is used for UTF-8, all these properties could be defined for it, too. | |
57 This would instantly make the rest of this file work with UTF-8 (with | |
58 the exception of a few called functions that would need to be redefined). | |
59 | |
60 (UTF-2000 implementers, take note!) | |
61 */ | |
62 | |
63 /* If you want more than this, you need to include charset.h */ | |
64 | |
65 #ifndef MULE | |
66 | |
826 | 67 #define rep_bytes_by_first_byte(fb) 1 |
68 #define byte_ascii_p(byte) 1 | |
867 | 69 #define MAX_ICHAR_LEN 1 |
771 | 70 |
71 #else /* MULE */ | |
72 | |
73 /* These are carefully designed to work if BYTE is signed or unsigned. */ | |
74 /* Note that SPC and DEL are considered ASCII, not control. */ | |
75 | |
826 | 76 #define byte_ascii_p(byte) (((byte) & ~0x7f) == 0) |
77 #define byte_c0_p(byte) (((byte) & ~0x1f) == 0) | |
78 #define byte_c1_p(byte) (((byte) & ~0x1f) == 0x80) | |
771 | 79 |
80 /* Does BYTE represent the first byte of a character? */ | |
81 | |
826 | 82 #ifdef ERROR_CHECK_TEXT |
83 | |
84 DECLARE_INLINE_HEADER ( | |
85 int | |
867 | 86 ibyte_first_byte_p_1 (int byte, const char *file, int line) |
826 | 87 ) |
88 { | |
89 assert_at_line (byte >= 0 && byte < 256, file, line); | |
90 return byte < 0xA0; | |
91 } | |
92 | |
867 | 93 #define ibyte_first_byte_p(byte) \ |
94 ibyte_first_byte_p_1 (byte, __FILE__, __LINE__) | |
826 | 95 |
96 #else | |
97 | |
867 | 98 #define ibyte_first_byte_p(byte) ((byte) < 0xA0) |
826 | 99 |
100 #endif | |
101 | |
102 #ifdef ERROR_CHECK_TEXT | |
771 | 103 |
104 /* Does BYTE represent the first byte of a multi-byte character? */ | |
105 | |
826 | 106 DECLARE_INLINE_HEADER ( |
107 int | |
867 | 108 ibyte_leading_byte_p_1 (int byte, const char *file, int line) |
826 | 109 ) |
110 { | |
111 assert_at_line (byte >= 0 && byte < 256, file, line); | |
112 return byte_c1_p (byte); | |
113 } | |
114 | |
867 | 115 #define ibyte_leading_byte_p(byte) \ |
116 ibyte_leading_byte_p_1 (byte, __FILE__, __LINE__) | |
826 | 117 |
118 #else | |
119 | |
867 | 120 #define ibyte_leading_byte_p(byte) byte_c1_p (byte) |
826 | 121 |
122 #endif | |
771 | 123 |
124 /* Table of number of bytes in the string representation of a character | |
125 indexed by the first byte of that representation. | |
126 | |
127 This value can be derived in other ways -- e.g. something like | |
826 | 128 XCHARSET_REP_BYTES (charset_by_leading_byte (first_byte)) |
771 | 129 but it's faster this way. */ |
1632 | 130 extern MODULE_API const Bytecount rep_bytes_by_first_byte[0xA0]; |
771 | 131 |
132 /* Number of bytes in the string representation of a character. */ | |
788 | 133 |
800 | 134 #ifdef ERROR_CHECK_TEXT |
788 | 135 |
826 | 136 DECLARE_INLINE_HEADER ( |
137 Bytecount | |
138 rep_bytes_by_first_byte_1 (int fb, const char *file, int line) | |
139 ) | |
771 | 140 { |
826 | 141 assert_at_line (fb >= 0 && fb < 0xA0, file, line); |
771 | 142 return rep_bytes_by_first_byte[fb]; |
143 } | |
144 | |
826 | 145 #define rep_bytes_by_first_byte(fb) \ |
146 rep_bytes_by_first_byte_1 (fb, __FILE__, __LINE__) | |
788 | 147 |
800 | 148 #else /* ERROR_CHECK_TEXT */ |
788 | 149 |
826 | 150 #define rep_bytes_by_first_byte(fb) (rep_bytes_by_first_byte[fb]) |
788 | 151 |
800 | 152 #endif /* ERROR_CHECK_TEXT */ |
788 | 153 |
826 | 154 /* Is this character represented by more than one byte in a string in the |
155 default format? */ | |
156 | |
867 | 157 #define ichar_multibyte_p(c) ((c) >= 0x80) |
158 | |
159 #define ichar_ascii_p(c) (!ichar_multibyte_p (c)) | |
826 | 160 |
161 /* Maximum number of bytes per Emacs character when represented as text, in | |
162 any format. | |
163 */ | |
771 | 164 |
867 | 165 #define MAX_ICHAR_LEN 4 |
771 | 166 |
826 | 167 #endif /* not MULE */ |
168 | |
2367 | 169 /* For more discussion, see text.c, "handling non-default formats" */ |
170 | |
826 | 171 typedef enum internal_format |
172 { | |
173 FORMAT_DEFAULT, | |
174 FORMAT_8_BIT_FIXED, | |
175 FORMAT_16_BIT_FIXED, /* not implemented */ | |
176 FORMAT_32_BIT_FIXED /* not implemented */ | |
177 } Internal_Format; | |
178 | |
179 #ifdef MULE | |
180 /* "OBJECT" below will usually be a buffer, string, or nil. This needs to | |
181 be passed in because the interpretation of 8-bit-fixed and 16-bit-fixed | |
182 values may depend on the buffer, e.g. depending on what language the | |
183 text in the buffer is in. */ | |
184 | |
867 | 185 /* True if Ichar CH can be represented in 8-bit-fixed format. */ |
186 #define ichar_8_bit_fixed_p(ch, object) (((ch) & ~0xff) == 0) | |
187 /* Convert Ichar CH to an 8-bit int, as will be stored in the buffer. */ | |
188 #define ichar_to_raw_8_bit_fixed(ch, object) ((Ibyte) (ch)) | |
826 | 189 /* Convert the other way. */ |
867 | 190 #define raw_8_bit_fixed_to_ichar(ch, object) ((Ichar) (ch)) |
191 | |
192 #define ichar_16_bit_fixed_p(ch, object) (((ch) & ~0xffff) == 0) | |
193 /* Convert Ichar CH to a 16-bit int, as will be stored in the buffer. */ | |
194 #define ichar_to_raw_16_bit_fixed(ch, object) ((UINT_16_BIT) (ch)) | |
826 | 195 /* Convert the other way. */ |
867 | 196 #define raw_16_bit_fixed_to_ichar(ch, object) ((Ichar) (ch)) |
197 | |
198 /* Convert Ichar CH to a 32-bit int, as will be stored in the buffer. */ | |
199 #define ichar_to_raw_32_bit_fixed(ch, object) ((UINT_32_BIT) (ch)) | |
826 | 200 /* Convert the other way. */ |
867 | 201 #define raw_32_bit_fixed_to_ichar(ch, object) ((Ichar) (ch)) |
826 | 202 |
203 /* Return the "raw value" of a character as stored in the buffer. In the | |
204 default format, this is just the same as the character. In fixed-width | |
205 formats, this is the actual value in the buffer, which will be limited | |
206 to the range as established by the format. This is used when searching | |
207 for a character in a buffer -- it's faster to convert the character to | |
208 the raw value and look for that, than repeatedly convert each raw value | |
209 in the buffer into a character. */ | |
210 | |
211 DECLARE_INLINE_HEADER ( | |
867 | 212 Raw_Ichar |
2286 | 213 ichar_to_raw (Ichar ch, Internal_Format fmt, |
214 Lisp_Object UNUSED (object)) | |
826 | 215 ) |
216 { | |
217 switch (fmt) | |
218 { | |
219 case FORMAT_DEFAULT: | |
867 | 220 return (Raw_Ichar) ch; |
826 | 221 case FORMAT_16_BIT_FIXED: |
867 | 222 text_checking_assert (ichar_16_bit_fixed_p (ch, object)); |
223 return (Raw_Ichar) ichar_to_raw_16_bit_fixed (ch, object); | |
826 | 224 case FORMAT_32_BIT_FIXED: |
867 | 225 return (Raw_Ichar) ichar_to_raw_32_bit_fixed (ch, object); |
826 | 226 default: |
227 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); | |
867 | 228 text_checking_assert (ichar_8_bit_fixed_p (ch, object)); |
229 return (Raw_Ichar) ichar_to_raw_8_bit_fixed (ch, object); | |
826 | 230 } |
231 } | |
232 | |
233 /* Return whether CH is representable in the given format in the given | |
234 object. */ | |
235 | |
236 DECLARE_INLINE_HEADER ( | |
237 int | |
2286 | 238 ichar_fits_in_format (Ichar ch, Internal_Format fmt, |
239 Lisp_Object UNUSED (object)) | |
826 | 240 ) |
241 { | |
242 switch (fmt) | |
243 { | |
244 case FORMAT_DEFAULT: | |
245 return 1; | |
246 case FORMAT_16_BIT_FIXED: | |
867 | 247 return ichar_16_bit_fixed_p (ch, object); |
826 | 248 case FORMAT_32_BIT_FIXED: |
249 return 1; | |
250 default: | |
251 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); | |
867 | 252 return ichar_8_bit_fixed_p (ch, object); |
826 | 253 } |
254 } | |
255 | |
256 /* Assuming the formats are the same, return whether the two objects | |
257 represent text in exactly the same way. */ | |
258 | |
259 DECLARE_INLINE_HEADER ( | |
260 int | |
2286 | 261 objects_have_same_internal_representation (Lisp_Object UNUSED (srcobj), |
262 Lisp_Object UNUSED (dstobj)) | |
826 | 263 ) |
264 { | |
265 /* &&#### implement this properly when we allow per-object format | |
266 differences */ | |
267 return 1; | |
268 } | |
269 | |
270 #else | |
271 | |
867 | 272 #define ichar_to_raw(ch, fmt, object) ((Raw_Ichar) (ch)) |
273 #define ichar_fits_in_format(ch, fmt, object) 1 | |
826 | 274 #define objects_have_same_internal_representation(srcobj, dstobj) 1 |
275 | |
771 | 276 #endif /* MULE */ |
277 | |
1632 | 278 MODULE_API int dfc_coding_system_is_unicode (Lisp_Object codesys); |
771 | 279 |
280 DECLARE_INLINE_HEADER ( | |
281 Bytecount dfc_external_data_len (const void *ptr, Lisp_Object codesys) | |
282 ) | |
283 { | |
284 if (dfc_coding_system_is_unicode (codesys)) | |
285 return sizeof (wchar_t) * wcslen ((wchar_t *) ptr); | |
286 else | |
287 return strlen ((char *) ptr); | |
288 } | |
289 | |
290 | |
291 /************************************************************************/ | |
292 /* */ | |
293 /* working with raw internal-format data */ | |
294 /* */ | |
295 /************************************************************************/ | |
296 | |
826 | 297 /* |
298 Use the following functions/macros on contiguous text in any of the | |
299 internal formats. Those that take a format arg work on all internal | |
300 formats; the others work only on the default (variable-width under Mule) | |
301 format. If the text you're operating on is known to come from a buffer, | |
302 use the buffer-level functions in buffer.h, which automatically know the | |
303 correct format and handle the gap. | |
304 | |
305 Some terminology: | |
306 | |
867 | 307 "itext" appearing in the macros means "internal-format text" -- type |
308 `Ibyte *'. Operations on such pointers themselves, rather than on the | |
309 text being pointed to, have "itext" instead of "itext" in the macro | |
310 name. "ichar" in the macro names means an Ichar -- the representation | |
826 | 311 of a character as a single integer rather than a series of bytes, as part |
867 | 312 of "itext". Many of the macros below are for converting between the |
826 | 313 two representations of characters. |
314 | |
867 | 315 Note also that we try to consistently distinguish between an "Ichar" and |
826 | 316 a Lisp character. Stuff working with Lisp characters often just says |
867 | 317 "char", so we consistently use "Ichar" when that's what we're working |
826 | 318 with. */ |
319 | |
320 /* The three golden rules of macros: | |
771 | 321 |
322 1) Anything that's an lvalue can be evaluated more than once. | |
826 | 323 |
324 2) Macros where anything else can be evaluated more than once should | |
325 have the word "unsafe" in their name (exceptions may be made for | |
326 large sets of macros that evaluate arguments of certain types more | |
327 than once, e.g. struct buffer * arguments, when clearly indicated in | |
328 the macro documentation). These macros are generally meant to be | |
329 called only by other macros that have already stored the calling | |
330 values in temporary variables. | |
331 | |
332 3) Nothing else can be evaluated more than once. Use inline | |
771 | 333 functions, if necessary, to prevent multiple evaluation. |
826 | 334 |
335 NOTE: The functions and macros below are given full prototypes in their | |
336 docs, even when the implementation is a macro. In such cases, passing | |
337 an argument of a type other than expected will produce undefined | |
338 results. Also, given that macros can do things functions can't (in | |
339 particular, directly modify arguments as if they were passed by | |
340 reference), the declaration syntax has been extended to include the | |
341 call-by-reference syntax from C++, where an & after a type indicates | |
342 that the argument is an lvalue and is passed by reference, i.e. the | |
343 function can modify its value. (This is equivalent in C to passing a | |
344 pointer to the argument, but without the need to explicitly worry about | |
345 pointers.) | |
346 | |
347 When to capitalize macros: | |
348 | |
349 -- Capitalize macros doing stuff obviously impossible with (C) | |
350 functions, e.g. directly modifying arguments as if they were passed by | |
351 reference. | |
352 | |
353 -- Capitalize macros that evaluate *any* argument more than once regardless | |
354 of whether that's "allowed" (e.g. buffer arguments). | |
355 | |
356 -- Capitalize macros that directly access a field in a Lisp_Object or | |
357 its equivalent underlying structure. In such cases, access through the | |
358 Lisp_Object precedes the macro with an X, and access through the underlying | |
359 structure doesn't. | |
360 | |
361 -- Capitalize certain other basic macros relating to Lisp_Objects; e.g. | |
362 FRAMEP, CHECK_FRAME, etc. | |
363 | |
364 -- Try to avoid capitalizing any other macros. | |
771 | 365 */ |
366 | |
367 /* ---------------------------------------------------------------------- */ | |
867 | 368 /* Working with itext's (pointers to internally-formatted text) */ |
771 | 369 /* ---------------------------------------------------------------------- */ |
370 | |
867 | 371 /* Given an itext, does it point to the beginning of a character? |
826 | 372 */ |
373 | |
771 | 374 #ifdef MULE |
867 | 375 # define valid_ibyteptr_p(ptr) ibyte_first_byte_p (* (ptr)) |
771 | 376 #else |
867 | 377 # define valid_ibyteptr_p(ptr) 1 |
771 | 378 #endif |
379 | |
867 | 380 /* If error-checking is enabled, assert that the given itext points to |
826 | 381 the beginning of a character. Otherwise, do nothing. |
382 */ | |
383 | |
867 | 384 #define assert_valid_ibyteptr(ptr) text_checking_assert (valid_ibyteptr_p (ptr)) |
385 | |
386 /* Given a itext (assumed to point at the beginning of a character), | |
826 | 387 modify that pointer so it points to the beginning of the next character. |
388 | |
867 | 389 Note that INC_IBYTEPTR() and DEC_IBYTEPTR() have to be written in |
390 completely separate ways. INC_IBYTEPTR() cannot use the DEC_IBYTEPTR() | |
771 | 391 trick of looking for a valid first byte because it might run off |
867 | 392 the end of the string. DEC_IBYTEPTR() can't use the INC_IBYTEPTR() |
771 | 393 method because it doesn't have easy access to the first byte of |
394 the character it's moving over. */ | |
395 | |
867 | 396 #define INC_IBYTEPTR(ptr) do { \ |
397 assert_valid_ibyteptr (ptr); \ | |
826 | 398 (ptr) += rep_bytes_by_first_byte (* (ptr)); \ |
399 } while (0) | |
400 | |
1204 | 401 #define INC_IBYTEPTR_FMT(ptr, fmt) \ |
402 do { \ | |
403 Internal_Format __icf_fmt = (fmt); \ | |
404 switch (__icf_fmt) \ | |
405 { \ | |
406 case FORMAT_DEFAULT: \ | |
407 INC_IBYTEPTR (ptr); \ | |
408 break; \ | |
409 case FORMAT_16_BIT_FIXED: \ | |
410 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT)); \ | |
411 (ptr) += 2; \ | |
412 break; \ | |
413 case FORMAT_32_BIT_FIXED: \ | |
414 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT)); \ | |
415 (ptr) += 4; \ | |
416 break; \ | |
417 default: \ | |
418 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); \ | |
419 (ptr)++; \ | |
420 break; \ | |
421 } \ | |
826 | 422 } while (0) |
423 | |
867 | 424 /* Given a itext (assumed to point at the beginning of a character or at |
826 | 425 the very end of the text), modify that pointer so it points to the |
426 beginning of the previous character. | |
427 */ | |
771 | 428 |
800 | 429 #ifdef ERROR_CHECK_TEXT |
826 | 430 /* We use a separate definition to avoid warnings about unused dc_ptr1 */ |
867 | 431 #define DEC_IBYTEPTR(ptr) do { \ |
1333 | 432 const Ibyte *dc_ptr1 = (ptr); \ |
826 | 433 do { \ |
434 (ptr)--; \ | |
867 | 435 } while (!valid_ibyteptr_p (ptr)); \ |
826 | 436 text_checking_assert (dc_ptr1 - (ptr) == rep_bytes_by_first_byte (*(ptr))); \ |
771 | 437 } while (0) |
826 | 438 #else |
867 | 439 #define DEC_IBYTEPTR(ptr) do { \ |
826 | 440 do { \ |
441 (ptr)--; \ | |
867 | 442 } while (!valid_ibyteptr_p (ptr)); \ |
771 | 443 } while (0) |
826 | 444 #endif /* ERROR_CHECK_TEXT */ |
445 | |
1204 | 446 #define DEC_IBYTEPTR_FMT(ptr, fmt) \ |
447 do { \ | |
448 Internal_Format __icf_fmt = (fmt); \ | |
449 switch (__icf_fmt) \ | |
450 { \ | |
451 case FORMAT_DEFAULT: \ | |
452 DEC_IBYTEPTR (ptr); \ | |
453 break; \ | |
454 case FORMAT_16_BIT_FIXED: \ | |
455 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT)); \ | |
456 (ptr) -= 2; \ | |
457 break; \ | |
458 case FORMAT_32_BIT_FIXED: \ | |
459 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT)); \ | |
460 (ptr) -= 4; \ | |
461 break; \ | |
462 default: \ | |
463 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); \ | |
464 (ptr)--; \ | |
465 break; \ | |
466 } \ | |
771 | 467 } while (0) |
468 | |
469 #ifdef MULE | |
470 | |
826 | 471 /* Make sure that PTR is pointing to the beginning of a character. If not, |
472 back up until this is the case. Note that there are not too many places | |
473 where it is legitimate to do this sort of thing. It's an error if | |
474 you're passed an "invalid" char * pointer. NOTE: PTR *must* be pointing | |
475 to a valid part of the string (i.e. not the very end, unless the string | |
476 is zero-terminated or something) in order for this function to not cause | |
477 crashes. | |
478 */ | |
479 | |
771 | 480 /* Note that this reads the byte at *PTR! */ |
481 | |
867 | 482 #define VALIDATE_IBYTEPTR_BACKWARD(ptr) do { \ |
483 while (!valid_ibyteptr_p (ptr)) ptr--; \ | |
771 | 484 } while (0) |
485 | |
826 | 486 /* Make sure that PTR is pointing to the beginning of a character. If not, |
487 move forward until this is the case. Note that there are not too many | |
488 places where it is legitimate to do this sort of thing. It's an error | |
489 if you're passed an "invalid" char * pointer. | |
490 */ | |
771 | 491 |
867 | 492 /* This needs to be trickier than VALIDATE_IBYTEPTR_BACKWARD() to avoid the |
771 | 493 possibility of running off the end of the string. */ |
494 | |
867 | 495 #define VALIDATE_IBYTEPTR_FORWARD(ptr) do { \ |
496 Ibyte *vcf_ptr = (ptr); \ | |
497 VALIDATE_IBYTEPTR_BACKWARD (vcf_ptr); \ | |
771 | 498 if (vcf_ptr != (ptr)) \ |
499 { \ | |
500 (ptr) = vcf_ptr; \ | |
867 | 501 INC_IBYTEPTR (ptr); \ |
771 | 502 } \ |
503 } while (0) | |
504 | |
505 #else /* not MULE */ | |
867 | 506 #define VALIDATE_IBYTEPTR_BACKWARD(ptr) |
507 #define VALIDATE_IBYTEPTR_FORWARD(ptr) | |
826 | 508 #endif /* not MULE */ |
509 | |
510 #ifdef MULE | |
511 | |
867 | 512 /* Given a Ibyte string at PTR of size N, possibly with a partial |
826 | 513 character at the end, return the size of the longest substring of |
514 complete characters. Does not assume that the byte at *(PTR + N) is | |
515 readable. Note that there are not too many places where it is | |
516 legitimate to do this sort of thing. It's an error if you're passed an | |
517 "invalid" offset. */ | |
518 | |
519 DECLARE_INLINE_HEADER ( | |
520 Bytecount | |
867 | 521 validate_ibyte_string_backward (const Ibyte *ptr, Bytecount n) |
826 | 522 ) |
523 { | |
867 | 524 const Ibyte *ptr2; |
826 | 525 |
526 if (n == 0) | |
527 return n; | |
528 ptr2 = ptr + n - 1; | |
867 | 529 VALIDATE_IBYTEPTR_BACKWARD (ptr2); |
826 | 530 if (ptr2 + rep_bytes_by_first_byte (*ptr2) != ptr + n) |
531 return ptr2 - ptr; | |
532 return n; | |
533 } | |
534 | |
535 #else | |
536 | |
867 | 537 #define validate_ibyte_string_backward(ptr, n) (n) |
826 | 538 |
539 #endif /* MULE */ | |
771 | 540 |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
541 /* ASSERT_ASCTEXT_ASCII(ptr): Check that an Ascbyte * pointer points to |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
542 purely ASCII text. Useful for checking that putatively ASCII strings |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
543 (i.e. declared as Ascbyte * or const Ascbyte *) are actually ASCII. |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
544 This is important because otherwise we need to worry about what |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
545 encoding they are in -- internal or some external encoding. |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
546 |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
547 ASSERT_ASCTEXT_ASCII_LEN(ptr, len): Same as ASSERT_ASCTEXT_ASCII() |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
548 but where the length has been explicitly given. Useful if the string |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
549 may contain embedded zeroes. |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
550 */ |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
551 |
2367 | 552 #ifdef ERROR_CHECK_TEXT |
553 #define ASSERT_ASCTEXT_ASCII_LEN(ptr, len) \ | |
554 do { \ | |
555 int aia2; \ | |
556 const Ascbyte *aia2ptr = (ptr); \ | |
557 int aia2len = (len); \ | |
558 \ | |
559 for (aia2 = 0; aia2 < aia2len; aia2++) \ | |
560 assert (aia2ptr[aia2] >= 0x00 && aia2ptr[aia2] < 0x7F); \ | |
561 } while (0) | |
562 #define ASSERT_ASCTEXT_ASCII(ptr) \ | |
563 do { \ | |
564 const Ascbyte *aiaz2 = (ptr); \ | |
565 ASSERT_ASCTEXT_ASCII_LEN (aiaz2, strlen (aiaz2)); \ | |
566 } while (0) | |
567 #else | |
568 #define ASSERT_ASCTEXT_ASCII_LEN(ptr, len) | |
569 #define ASSERT_ASCTEXT_ASCII(ptr) | |
570 #endif | |
571 | |
771 | 572 /* -------------------------------------------------------------- */ |
826 | 573 /* Working with the length (in bytes and characters) of a */ |
574 /* section of internally-formatted text */ | |
771 | 575 /* -------------------------------------------------------------- */ |
576 | |
826 | 577 #ifdef MULE |
578 | |
1632 | 579 MODULE_API Charcount |
580 bytecount_to_charcount_fun (const Ibyte *ptr, Bytecount len); | |
581 MODULE_API Bytecount | |
582 charcount_to_bytecount_fun (const Ibyte *ptr, Charcount len); | |
826 | 583 |
584 /* Given a pointer to a text string and a length in bytes, return | |
585 the equivalent length in characters. */ | |
586 | |
587 DECLARE_INLINE_HEADER ( | |
588 Charcount | |
867 | 589 bytecount_to_charcount (const Ibyte *ptr, Bytecount len) |
826 | 590 ) |
591 { | |
592 if (len < 20) /* Just a random guess, but it should be more or less correct. | |
593 If number of bytes is small, just do a simple loop, | |
594 which should be more efficient. */ | |
595 { | |
596 Charcount count = 0; | |
867 | 597 const Ibyte *end = ptr + len; |
826 | 598 while (ptr < end) |
599 { | |
867 | 600 INC_IBYTEPTR (ptr); |
826 | 601 count++; |
602 } | |
603 /* Bomb out if the specified substring ends in the middle | |
604 of a character. Note that we might have already gotten | |
605 a core dump above from an invalid reference, but at least | |
606 we will get no farther than here. | |
607 | |
608 This also catches len < 0. */ | |
609 text_checking_assert (ptr == end); | |
610 | |
611 return count; | |
612 } | |
613 else | |
614 return bytecount_to_charcount_fun (ptr, len); | |
615 } | |
616 | |
617 /* Given a pointer to a text string and a length in characters, return the | |
618 equivalent length in bytes. | |
619 */ | |
620 | |
621 DECLARE_INLINE_HEADER ( | |
622 Bytecount | |
867 | 623 charcount_to_bytecount (const Ibyte *ptr, Charcount len) |
826 | 624 ) |
625 { | |
626 text_checking_assert (len >= 0); | |
627 if (len < 20) /* See above */ | |
628 { | |
867 | 629 const Ibyte *newptr = ptr; |
826 | 630 while (len > 0) |
631 { | |
867 | 632 INC_IBYTEPTR (newptr); |
826 | 633 len--; |
634 } | |
635 return newptr - ptr; | |
636 } | |
637 else | |
638 return charcount_to_bytecount_fun (ptr, len); | |
639 } | |
640 | |
2367 | 641 MODULE_API Bytecount |
642 charcount_to_bytecount_down_fun (const Ibyte *ptr, Charcount len); | |
643 | |
644 /* Given a pointer to a text string and a length in bytes, return | |
645 the equivalent length in characters of the stretch [PTR - LEN, PTR). */ | |
646 | |
647 DECLARE_INLINE_HEADER ( | |
648 Charcount | |
649 bytecount_to_charcount_down (const Ibyte *ptr, Bytecount len) | |
650 ) | |
651 { | |
652 /* No need to be clever here */ | |
653 return bytecount_to_charcount (ptr - len, len); | |
654 } | |
655 | |
656 /* Given a pointer to a text string and a length in characters, return the | |
657 equivalent length in bytes of the stretch of characters of that length | |
658 BEFORE the pointer. | |
659 */ | |
660 | |
661 DECLARE_INLINE_HEADER ( | |
662 Bytecount | |
663 charcount_to_bytecount_down (const Ibyte *ptr, Charcount len) | |
664 ) | |
665 { | |
666 #define SLEDGEHAMMER_CHECK_TEXT | |
667 #ifdef SLEDGEHAMMER_CHECK_TEXT | |
668 Charcount len1 = len; | |
669 Bytecount ret1, ret2; | |
670 | |
671 /* To test the correctness of the function version, always do the | |
672 calculation both ways and check that the values are the same. */ | |
673 text_checking_assert (len >= 0); | |
674 { | |
675 const Ibyte *newptr = ptr; | |
676 while (len1 > 0) | |
677 { | |
678 DEC_IBYTEPTR (newptr); | |
679 len1--; | |
680 } | |
681 ret1 = ptr - newptr; | |
682 } | |
683 ret2 = charcount_to_bytecount_down_fun (ptr, len); | |
684 text_checking_assert (ret1 == ret2); | |
685 return ret1; | |
686 #else | |
687 text_checking_assert (len >= 0); | |
688 if (len < 20) /* See above */ | |
689 { | |
690 const Ibyte *newptr = ptr; | |
691 while (len > 0) | |
692 { | |
693 DEC_IBYTEPTR (newptr); | |
694 len--; | |
695 } | |
696 return ptr - newptr; | |
697 } | |
698 else | |
699 return charcount_to_bytecount_down_fun (ptr, len); | |
700 #endif /* SLEDGEHAMMER_CHECK_TEXT */ | |
701 } | |
702 | |
826 | 703 /* Given a pointer to a text string in the specified format and a length in |
704 bytes, return the equivalent length in characters. | |
705 */ | |
706 | |
707 DECLARE_INLINE_HEADER ( | |
708 Charcount | |
867 | 709 bytecount_to_charcount_fmt (const Ibyte *ptr, Bytecount len, |
826 | 710 Internal_Format fmt) |
711 ) | |
712 { | |
713 switch (fmt) | |
714 { | |
715 case FORMAT_DEFAULT: | |
716 return bytecount_to_charcount (ptr, len); | |
717 case FORMAT_16_BIT_FIXED: | |
1204 | 718 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT)); |
826 | 719 return (Charcount) (len << 1); |
720 case FORMAT_32_BIT_FIXED: | |
1204 | 721 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT)); |
826 | 722 return (Charcount) (len << 2); |
723 default: | |
724 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); | |
725 return (Charcount) len; | |
726 } | |
727 } | |
728 | |
729 /* Given a pointer to a text string in the specified format and a length in | |
730 characters, return the equivalent length in bytes. | |
731 */ | |
732 | |
733 DECLARE_INLINE_HEADER ( | |
734 Bytecount | |
867 | 735 charcount_to_bytecount_fmt (const Ibyte *ptr, Charcount len, |
826 | 736 Internal_Format fmt) |
737 ) | |
738 { | |
739 switch (fmt) | |
740 { | |
741 case FORMAT_DEFAULT: | |
742 return charcount_to_bytecount (ptr, len); | |
743 case FORMAT_16_BIT_FIXED: | |
1204 | 744 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT)); |
826 | 745 text_checking_assert (!(len & 1)); |
746 return (Bytecount) (len >> 1); | |
747 case FORMAT_32_BIT_FIXED: | |
748 text_checking_assert (!(len & 3)); | |
1204 | 749 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT)); |
826 | 750 return (Bytecount) (len >> 2); |
751 default: | |
752 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); | |
753 return (Bytecount) len; | |
754 } | |
755 } | |
756 | |
757 #else | |
758 | |
759 #define bytecount_to_charcount(ptr, len) ((Charcount) (len)) | |
760 #define bytecount_to_charcount_fmt(ptr, len, fmt) ((Charcount) (len)) | |
761 #define charcount_to_bytecount(ptr, len) ((Bytecount) (len)) | |
762 #define charcount_to_bytecount_fmt(ptr, len, fmt) ((Bytecount) (len)) | |
763 | |
764 #endif /* MULE */ | |
765 | |
766 /* Return the length of the first character at PTR. Equivalent to | |
767 charcount_to_bytecount (ptr, 1). | |
768 | |
769 [Since charcount_to_bytecount() is Written as inline, a smart compiler | |
770 should really optimize charcount_to_bytecount (ptr, 1) to the same as | |
771 the following, with no error checking. But since this idiom occurs so | |
772 often, we'll be helpful and define a special macro for it.] | |
773 */ | |
774 | |
867 | 775 #define itext_ichar_len(ptr) rep_bytes_by_first_byte (*(ptr)) |
826 | 776 |
777 /* Return the length of the first character at PTR, which is in the | |
778 specified internal format. Equivalent to charcount_to_bytecount_fmt | |
779 (ptr, 1, fmt). | |
780 */ | |
781 | |
782 DECLARE_INLINE_HEADER ( | |
783 Bytecount | |
4853 | 784 itext_ichar_len_fmt (const Ibyte *ptr, Internal_Format fmt) |
826 | 785 ) |
786 { | |
787 switch (fmt) | |
788 { | |
789 case FORMAT_DEFAULT: | |
867 | 790 return itext_ichar_len (ptr); |
826 | 791 case FORMAT_16_BIT_FIXED: |
1204 | 792 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT)); |
826 | 793 return 2; |
794 case FORMAT_32_BIT_FIXED: | |
1204 | 795 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT)); |
826 | 796 return 4; |
797 default: | |
798 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); | |
799 return 1; | |
800 } | |
801 } | |
802 | |
803 /* Return a pointer to the beginning of the character offset N (in | |
804 characters) from PTR. | |
805 */ | |
806 | |
807 DECLARE_INLINE_HEADER ( | |
867 | 808 const Ibyte * |
809 itext_n_addr (const Ibyte *ptr, Charcount offset) | |
826 | 810 ) |
771 | 811 { |
812 return ptr + charcount_to_bytecount (ptr, offset); | |
813 } | |
814 | |
867 | 815 /* Given a itext and an offset into the text pointed to by the itext, |
826 | 816 modify the offset so it points to the beginning of the next character. |
817 */ | |
818 | |
819 #define INC_BYTECOUNT(ptr, pos) do { \ | |
867 | 820 assert_valid_ibyteptr (ptr); \ |
826 | 821 (pos += rep_bytes_by_first_byte (* ((ptr) + (pos)))); \ |
822 } while (0) | |
823 | |
771 | 824 /* -------------------------------------------------------------------- */ |
867 | 825 /* Retrieving or changing the character pointed to by a itext */ |
771 | 826 /* -------------------------------------------------------------------- */ |
827 | |
867 | 828 #define simple_itext_ichar(ptr) ((Ichar) (ptr)[0]) |
829 #define simple_set_itext_ichar(ptr, x) \ | |
830 ((ptr)[0] = (Ibyte) (x), (Bytecount) 1) | |
831 #define simple_itext_copy_ichar(src, dst) \ | |
814 | 832 ((dst)[0] = *(src), (Bytecount) 1) |
771 | 833 |
834 #ifdef MULE | |
835 | |
1632 | 836 MODULE_API Ichar non_ascii_itext_ichar (const Ibyte *ptr); |
837 MODULE_API Bytecount non_ascii_set_itext_ichar (Ibyte *ptr, Ichar c); | |
838 MODULE_API Bytecount non_ascii_itext_copy_ichar (const Ibyte *src, Ibyte *dst); | |
867 | 839 |
840 /* Retrieve the character pointed to by PTR as an Ichar. */ | |
826 | 841 |
842 DECLARE_INLINE_HEADER ( | |
867 | 843 Ichar |
844 itext_ichar (const Ibyte *ptr) | |
826 | 845 ) |
771 | 846 { |
826 | 847 return byte_ascii_p (*ptr) ? |
867 | 848 simple_itext_ichar (ptr) : |
849 non_ascii_itext_ichar (ptr); | |
771 | 850 } |
851 | |
826 | 852 /* Retrieve the character pointed to by PTR (a pointer to text in the |
853 format FMT, coming from OBJECT [a buffer, string?, or nil]) as an | |
867 | 854 Ichar. |
826 | 855 |
856 Note: For these and other *_fmt() functions, if you pass in a constant | |
857 FMT, the switch will be optimized out of existence. Therefore, there is | |
858 no need to create separate versions for the various formats for | |
867 | 859 "efficiency reasons". In fact, we don't really need itext_ichar() |
826 | 860 and such written separately, but they are used often so it's simpler |
861 that way. */ | |
862 | |
863 DECLARE_INLINE_HEADER ( | |
867 | 864 Ichar |
865 itext_ichar_fmt (const Ibyte *ptr, Internal_Format fmt, | |
2286 | 866 Lisp_Object UNUSED (object)) |
826 | 867 ) |
868 { | |
869 switch (fmt) | |
870 { | |
871 case FORMAT_DEFAULT: | |
867 | 872 return itext_ichar (ptr); |
826 | 873 case FORMAT_16_BIT_FIXED: |
1204 | 874 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT)); |
867 | 875 return raw_16_bit_fixed_to_ichar (* (UINT_16_BIT *) ptr, object); |
826 | 876 case FORMAT_32_BIT_FIXED: |
1204 | 877 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT)); |
867 | 878 return raw_32_bit_fixed_to_ichar (* (UINT_32_BIT *) ptr, object); |
826 | 879 default: |
880 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); | |
867 | 881 return raw_8_bit_fixed_to_ichar (*ptr, object); |
826 | 882 } |
883 } | |
884 | |
885 /* Return the character at PTR (which is in format FMT), suitable for | |
886 comparison with an ASCII character. This guarantees that if the | |
887 character at PTR is ASCII (range 0 - 127), that character will be | |
888 returned; otherwise, some character outside of the ASCII range will be | |
889 returned, but not necessarily the character actually at PTR. This will | |
867 | 890 be faster than itext_ichar_fmt() for some formats -- in particular, |
826 | 891 FORMAT_DEFAULT. */ |
892 | |
893 DECLARE_INLINE_HEADER ( | |
867 | 894 Ichar |
895 itext_ichar_ascii_fmt (const Ibyte *ptr, Internal_Format fmt, | |
2286 | 896 Lisp_Object UNUSED (object)) |
826 | 897 ) |
898 { | |
899 switch (fmt) | |
900 { | |
901 case FORMAT_DEFAULT: | |
867 | 902 return (Ichar) *ptr; |
826 | 903 case FORMAT_16_BIT_FIXED: |
1204 | 904 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT)); |
867 | 905 return raw_16_bit_fixed_to_ichar (* (UINT_16_BIT *) ptr, object); |
826 | 906 case FORMAT_32_BIT_FIXED: |
1204 | 907 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT)); |
867 | 908 return raw_32_bit_fixed_to_ichar (* (UINT_32_BIT *) ptr, object); |
826 | 909 default: |
910 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); | |
867 | 911 return raw_8_bit_fixed_to_ichar (*ptr, object); |
826 | 912 } |
913 } | |
914 | |
915 /* Return the "raw value" of the character at PTR, in format FMT. This is | |
916 useful when searching for a character; convert the character using | |
867 | 917 ichar_to_raw(). */ |
826 | 918 |
919 DECLARE_INLINE_HEADER ( | |
867 | 920 Raw_Ichar |
921 itext_ichar_raw_fmt (const Ibyte *ptr, Internal_Format fmt) | |
826 | 922 ) |
923 { | |
924 switch (fmt) | |
925 { | |
926 case FORMAT_DEFAULT: | |
867 | 927 return (Raw_Ichar) itext_ichar (ptr); |
826 | 928 case FORMAT_16_BIT_FIXED: |
1204 | 929 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT)); |
867 | 930 return (Raw_Ichar) (* (UINT_16_BIT *) ptr); |
826 | 931 case FORMAT_32_BIT_FIXED: |
1204 | 932 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT)); |
867 | 933 return (Raw_Ichar) (* (UINT_32_BIT *) ptr); |
826 | 934 default: |
935 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); | |
867 | 936 return (Raw_Ichar) (*ptr); |
826 | 937 } |
938 } | |
939 | |
867 | 940 /* Store the character CH (an Ichar) as internally-formatted text starting |
826 | 941 at PTR. Return the number of bytes stored. |
942 */ | |
943 | |
944 DECLARE_INLINE_HEADER ( | |
945 Bytecount | |
867 | 946 set_itext_ichar (Ibyte *ptr, Ichar x) |
826 | 947 ) |
771 | 948 { |
867 | 949 return !ichar_multibyte_p (x) ? |
950 simple_set_itext_ichar (ptr, x) : | |
951 non_ascii_set_itext_ichar (ptr, x); | |
771 | 952 } |
953 | |
867 | 954 /* Store the character CH (an Ichar) as internally-formatted text of |
826 | 955 format FMT starting at PTR, which comes from OBJECT. Return the number |
956 of bytes stored. | |
957 */ | |
958 | |
959 DECLARE_INLINE_HEADER ( | |
960 Bytecount | |
867 | 961 set_itext_ichar_fmt (Ibyte *ptr, Ichar x, Internal_Format fmt, |
2286 | 962 Lisp_Object UNUSED (object)) |
826 | 963 ) |
771 | 964 { |
826 | 965 switch (fmt) |
966 { | |
967 case FORMAT_DEFAULT: | |
867 | 968 return set_itext_ichar (ptr, x); |
826 | 969 case FORMAT_16_BIT_FIXED: |
867 | 970 text_checking_assert (ichar_16_bit_fixed_p (x, object)); |
1204 | 971 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT)); |
867 | 972 * (UINT_16_BIT *) ptr = ichar_to_raw_16_bit_fixed (x, object); |
826 | 973 return 2; |
974 case FORMAT_32_BIT_FIXED: | |
1204 | 975 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT)); |
867 | 976 * (UINT_32_BIT *) ptr = ichar_to_raw_32_bit_fixed (x, object); |
826 | 977 return 4; |
978 default: | |
979 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); | |
867 | 980 text_checking_assert (ichar_8_bit_fixed_p (x, object)); |
981 *ptr = ichar_to_raw_8_bit_fixed (x, object); | |
826 | 982 return 1; |
983 } | |
984 } | |
985 | |
986 /* Retrieve the character pointed to by SRC and store it as | |
987 internally-formatted text in DST. | |
988 */ | |
989 | |
990 DECLARE_INLINE_HEADER ( | |
991 Bytecount | |
867 | 992 itext_copy_ichar (const Ibyte *src, Ibyte *dst) |
826 | 993 ) |
994 { | |
995 return byte_ascii_p (*src) ? | |
867 | 996 simple_itext_copy_ichar (src, dst) : |
997 non_ascii_itext_copy_ichar (src, dst); | |
771 | 998 } |
999 | |
1000 #else /* not MULE */ | |
1001 | |
867 | 1002 # define itext_ichar(ptr) simple_itext_ichar (ptr) |
1003 # define itext_ichar_fmt(ptr, fmt, object) itext_ichar (ptr) | |
1004 # define itext_ichar_ascii_fmt(ptr, fmt, object) itext_ichar (ptr) | |
1005 # define itext_ichar_raw_fmt(ptr, fmt) itext_ichar (ptr) | |
1006 # define set_itext_ichar(ptr, x) simple_set_itext_ichar (ptr, x) | |
1007 # define set_itext_ichar_fmt(ptr, x, fmt, obj) set_itext_ichar (ptr, x) | |
1008 # define itext_copy_ichar(src, dst) simple_itext_copy_ichar (src, dst) | |
771 | 1009 |
1010 #endif /* not MULE */ | |
1011 | |
826 | 1012 /* Retrieve the character at offset N (in characters) from PTR, as an |
867 | 1013 Ichar. |
826 | 1014 */ |
1015 | |
867 | 1016 #define itext_ichar_n(ptr, offset) \ |
1017 itext_ichar (itext_n_addr (ptr, offset)) | |
771 | 1018 |
1019 | |
1020 /************************************************************************/ | |
1021 /* */ | |
826 | 1022 /* working with Lisp strings */ |
1023 /* */ | |
1024 /************************************************************************/ | |
1025 | |
1026 #define string_char_length(s) \ | |
1027 string_index_byte_to_char (s, XSTRING_LENGTH (s)) | |
1028 #define string_byte(s, i) (XSTRING_DATA (s)[i] + 0) | |
1029 /* In case we ever allow strings to be in a different format ... */ | |
1030 #define set_string_byte(s, i, c) (XSTRING_DATA (s)[i] = (c)) | |
1031 | |
1032 #define ASSERT_VALID_CHAR_STRING_INDEX_UNSAFE(s, x) do { \ | |
1033 text_checking_assert ((x) >= 0 && x <= string_char_length (s)); \ | |
1034 } while (0) | |
1035 | |
1036 #define ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE(s, x) do { \ | |
1037 text_checking_assert ((x) >= 0 && x <= XSTRING_LENGTH (s)); \ | |
867 | 1038 text_checking_assert (valid_ibyteptr_p (string_byte_addr (s, x))); \ |
826 | 1039 } while (0) |
1040 | |
1041 /* Convert offset I in string S to a pointer to text there. */ | |
1042 #define string_byte_addr(s, i) (&(XSTRING_DATA (s)[i])) | |
1043 /* Convert pointer to text in string S into the byte offset to that text. */ | |
1044 #define string_addr_to_byte(s, ptr) ((Bytecount) ((ptr) - XSTRING_DATA (s))) | |
867 | 1045 /* Return the Ichar at *CHARACTER* offset I. */ |
1046 #define string_ichar(s, i) itext_ichar (string_char_addr (s, i)) | |
826 | 1047 |
1048 #ifdef ERROR_CHECK_TEXT | |
1049 #define SLEDGEHAMMER_CHECK_ASCII_BEGIN | |
1050 #endif | |
1051 | |
1052 #ifdef SLEDGEHAMMER_CHECK_ASCII_BEGIN | |
1053 void sledgehammer_check_ascii_begin (Lisp_Object str); | |
1054 #else | |
1055 #define sledgehammer_check_ascii_begin(str) | |
1056 #endif | |
1057 | |
1058 /* Make an alloca'd copy of a Lisp string */ | |
1059 #define LISP_STRING_TO_ALLOCA(s, lval) \ | |
1060 do { \ | |
1315 | 1061 Ibyte **_lta_ = (Ibyte **) &(lval); \ |
826 | 1062 Lisp_Object _lta_2 = (s); \ |
2367 | 1063 *_lta_ = alloca_ibytes (1 + XSTRING_LENGTH (_lta_2)); \ |
826 | 1064 memcpy (*_lta_, XSTRING_DATA (_lta_2), 1 + XSTRING_LENGTH (_lta_2)); \ |
1065 } while (0) | |
1066 | |
1067 void resize_string (Lisp_Object s, Bytecount pos, Bytecount delta); | |
1068 | |
1069 /* Convert a byte index into a string into a char index. */ | |
1070 DECLARE_INLINE_HEADER ( | |
1071 Charcount | |
4853 | 1072 string_index_byte_to_char (Lisp_Object s, Bytecount idx) |
826 | 1073 ) |
1074 { | |
1075 Charcount retval; | |
1076 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, idx); | |
1077 #ifdef MULE | |
1078 if (idx <= (Bytecount) XSTRING_ASCII_BEGIN (s)) | |
1079 retval = (Charcount) idx; | |
1080 else | |
1081 retval = (XSTRING_ASCII_BEGIN (s) + | |
1082 bytecount_to_charcount (XSTRING_DATA (s) + | |
1083 XSTRING_ASCII_BEGIN (s), | |
1084 idx - XSTRING_ASCII_BEGIN (s))); | |
1085 # ifdef SLEDGEHAMMER_CHECK_ASCII_BEGIN | |
1086 assert (retval == bytecount_to_charcount (XSTRING_DATA (s), idx)); | |
1087 # endif | |
1088 #else | |
1089 retval = (Charcount) idx; | |
1090 #endif | |
1091 /* Don't call ASSERT_VALID_CHAR_STRING_INDEX_UNSAFE() here because it will | |
1092 call string_index_byte_to_char(). */ | |
1093 return retval; | |
1094 } | |
1095 | |
1096 /* Convert a char index into a string into a byte index. */ | |
1097 DECLARE_INLINE_HEADER ( | |
1098 Bytecount | |
4853 | 1099 string_index_char_to_byte (Lisp_Object s, Charcount idx) |
826 | 1100 ) |
1101 { | |
1102 Bytecount retval; | |
1103 ASSERT_VALID_CHAR_STRING_INDEX_UNSAFE (s, idx); | |
1104 #ifdef MULE | |
1105 if (idx <= (Charcount) XSTRING_ASCII_BEGIN (s)) | |
1106 retval = (Bytecount) idx; | |
1107 else | |
1108 retval = (XSTRING_ASCII_BEGIN (s) + | |
1109 charcount_to_bytecount (XSTRING_DATA (s) + | |
1110 XSTRING_ASCII_BEGIN (s), | |
1111 idx - XSTRING_ASCII_BEGIN (s))); | |
1112 # ifdef SLEDGEHAMMER_CHECK_ASCII_BEGIN | |
1113 assert (retval == charcount_to_bytecount (XSTRING_DATA (s), idx)); | |
1114 # endif | |
1115 #else | |
1116 retval = (Bytecount) idx; | |
1117 #endif | |
1118 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, retval); | |
1119 return retval; | |
1120 } | |
1121 | |
1122 /* Convert a substring length (starting at byte offset OFF) from bytes to | |
1123 chars. */ | |
1124 DECLARE_INLINE_HEADER ( | |
1125 Charcount | |
4853 | 1126 string_offset_byte_to_char_len (Lisp_Object s, Bytecount off, Bytecount len) |
826 | 1127 ) |
1128 { | |
1129 Charcount retval; | |
1130 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, off); | |
1131 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, off + len); | |
1132 #ifdef MULE | |
1133 if (off + len <= (Bytecount) XSTRING_ASCII_BEGIN (s)) | |
1134 retval = (Charcount) len; | |
1135 else if (off < (Bytecount) XSTRING_ASCII_BEGIN (s)) | |
1136 retval = | |
1137 XSTRING_ASCII_BEGIN (s) - (Charcount) off + | |
1138 bytecount_to_charcount (XSTRING_DATA (s) + XSTRING_ASCII_BEGIN (s), | |
1139 len - (XSTRING_ASCII_BEGIN (s) - off)); | |
1140 else | |
1141 retval = bytecount_to_charcount (XSTRING_DATA (s) + off, len); | |
1142 # ifdef SLEDGEHAMMER_CHECK_ASCII_BEGIN | |
1143 assert (retval == bytecount_to_charcount (XSTRING_DATA (s) + off, len)); | |
1144 # endif | |
1145 #else | |
1146 retval = (Charcount) len; | |
1147 #endif | |
1148 return retval; | |
1149 } | |
1150 | |
1151 /* Convert a substring length (starting at byte offset OFF) from chars to | |
1152 bytes. */ | |
1153 DECLARE_INLINE_HEADER ( | |
1154 Bytecount | |
4853 | 1155 string_offset_char_to_byte_len (Lisp_Object s, Bytecount off, Charcount len) |
826 | 1156 ) |
1157 { | |
1158 Bytecount retval; | |
1159 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, off); | |
1160 #ifdef MULE | |
1161 /* casts to avoid errors from combining Bytecount/Charcount and warnings | |
1162 from signed/unsigned comparisons */ | |
1163 if (off + (Bytecount) len <= (Bytecount) XSTRING_ASCII_BEGIN (s)) | |
1164 retval = (Bytecount) len; | |
1165 else if (off < (Bytecount) XSTRING_ASCII_BEGIN (s)) | |
1166 retval = | |
1167 XSTRING_ASCII_BEGIN (s) - off + | |
1168 charcount_to_bytecount (XSTRING_DATA (s) + XSTRING_ASCII_BEGIN (s), | |
1169 len - (XSTRING_ASCII_BEGIN (s) - | |
1170 (Charcount) off)); | |
1171 else | |
1172 retval = charcount_to_bytecount (XSTRING_DATA (s) + off, len); | |
1173 # ifdef SLEDGEHAMMER_CHECK_ASCII_BEGIN | |
1174 assert (retval == charcount_to_bytecount (XSTRING_DATA (s) + off, len)); | |
1175 # endif | |
1176 #else | |
1177 retval = (Bytecount) len; | |
1178 #endif | |
1179 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, off + retval); | |
1180 return retval; | |
1181 } | |
1182 | |
1183 DECLARE_INLINE_HEADER ( | |
867 | 1184 const Ibyte * |
826 | 1185 string_char_addr (Lisp_Object s, Charcount idx) |
1186 ) | |
1187 { | |
1188 return XSTRING_DATA (s) + string_index_char_to_byte (s, idx); | |
1189 } | |
1190 | |
1191 /* WARNING: If you modify an existing string, you must call | |
1192 bump_string_modiff() afterwards. */ | |
1193 #ifdef MULE | |
867 | 1194 void set_string_char (Lisp_Object s, Charcount i, Ichar c); |
826 | 1195 #else |
1196 #define set_string_char(s, i, c) set_string_byte (s, i, c) | |
1197 #endif /* not MULE */ | |
1198 | |
1199 /* Return index to character before the one at IDX. */ | |
1200 DECLARE_INLINE_HEADER ( | |
1201 Bytecount | |
1202 prev_string_index (Lisp_Object s, Bytecount idx) | |
1203 ) | |
1204 { | |
867 | 1205 const Ibyte *ptr = string_byte_addr (s, idx); |
1206 DEC_IBYTEPTR (ptr); | |
826 | 1207 return string_addr_to_byte (s, ptr); |
1208 } | |
1209 | |
1210 /* Return index to character after the one at IDX. */ | |
1211 DECLARE_INLINE_HEADER ( | |
1212 Bytecount | |
1213 next_string_index (Lisp_Object s, Bytecount idx) | |
1214 ) | |
1215 { | |
867 | 1216 const Ibyte *ptr = string_byte_addr (s, idx); |
1217 INC_IBYTEPTR (ptr); | |
826 | 1218 return string_addr_to_byte (s, ptr); |
1219 } | |
1220 | |
1221 | |
1222 /************************************************************************/ | |
1223 /* */ | |
771 | 1224 /* working with Eistrings */ |
1225 /* */ | |
1226 /************************************************************************/ | |
1227 | |
1228 /* | |
1229 #### NOTE: This is a work in progress. Neither the API nor especially | |
1230 the implementation is finished. | |
1231 | |
1232 NOTE: An Eistring is a structure that makes it easy to work with | |
1233 internally-formatted strings of data. It provides operations similar | |
1234 in feel to the standard strcpy(), strcat(), strlen(), etc., but | |
1235 | |
1236 (a) it is Mule-correct | |
1237 (b) it does dynamic allocation so you never have to worry about size | |
793 | 1238 restrictions |
851 | 1239 (c) it comes in an ALLOCA() variety (all allocation is stack-local, |
793 | 1240 so there is no need to explicitly clean up) as well as a malloc() |
1241 variety | |
1242 (d) it knows its own length, so it does not suffer from standard null | |
1243 byte brain-damage -- but it null-terminates the data anyway, so | |
1244 it can be passed to standard routines | |
1245 (e) it provides a much more powerful set of operations and knows about | |
771 | 1246 all the standard places where string data might reside: Lisp_Objects, |
867 | 1247 other Eistrings, Ibyte * data with or without an explicit length, |
1248 ASCII strings, Ichars, etc. | |
793 | 1249 (f) it provides easy operations to convert to/from externally-formatted |
1250 data, and is easier to use than the standard TO_INTERNAL_FORMAT | |
771 | 1251 and TO_EXTERNAL_FORMAT macros. (An Eistring can store both the internal |
1252 and external version of its data, but the external version is only | |
1253 initialized or changed when you call eito_external().) | |
1254 | |
793 | 1255 The idea is to make it as easy to write Mule-correct string manipulation |
1256 code as it is to write normal string manipulation code. We also make | |
1257 the API sufficiently general that it can handle multiple internal data | |
1258 formats (e.g. some fixed-width optimizing formats and a default variable | |
1259 width format) and allows for *ANY* data format we might choose in the | |
1260 future for the default format, including UCS2. (In other words, we can't | |
1261 assume that the internal format is ASCII-compatible and we can't assume | |
1262 it doesn't have embedded null bytes. We do assume, however, that any | |
1263 chosen format will have the concept of null-termination.) All of this is | |
1264 hidden from the user. | |
771 | 1265 |
1266 #### It is really too bad that we don't have a real object-oriented | |
1267 language, or at least a language with polymorphism! | |
1268 | |
1269 | |
1270 ********************************************** | |
1271 * Declaration * | |
1272 ********************************************** | |
1273 | |
1274 To declare an Eistring, either put one of the following in the local | |
1275 variable section: | |
1276 | |
1277 DECLARE_EISTRING (name); | |
2367 | 1278 Declare a new Eistring and initialize it to the empy string. This |
1279 is a standard local variable declaration and can go anywhere in the | |
1280 variable declaration section. NAME itself is declared as an | |
1281 Eistring *, and its storage declared on the stack. | |
771 | 1282 |
1283 DECLARE_EISTRING_MALLOC (name); | |
2367 | 1284 Declare and initialize a new Eistring, which uses malloc()ed |
1285 instead of ALLOCA()ed data. This is a standard local variable | |
1286 declaration and can go anywhere in the variable declaration | |
1287 section. Once you initialize the Eistring, you will have to free | |
1288 it using eifree() to avoid memory leaks. You will need to use this | |
1289 form if you are passing an Eistring to any function that modifies | |
1290 it (otherwise, the modified data may be in stack space and get | |
1291 overwritten when the function returns). | |
771 | 1292 |
1293 or use | |
1294 | |
793 | 1295 Eistring ei; |
1296 void eiinit (Eistring *ei); | |
1297 void eiinit_malloc (Eistring *einame); | |
771 | 1298 If you need to put an Eistring elsewhere than in a local variable |
1299 declaration (e.g. in a structure), declare it as shown and then | |
1300 call one of the init macros. | |
1301 | |
1302 Also note: | |
1303 | |
793 | 1304 void eifree (Eistring *ei); |
771 | 1305 If you declared an Eistring to use malloc() to hold its data, |
1306 or converted it to the heap using eito_malloc(), then this | |
1307 releases any data in it and afterwards resets the Eistring | |
1308 using eiinit_malloc(). Otherwise, it just resets the Eistring | |
1309 using eiinit(). | |
1310 | |
1311 | |
1312 ********************************************** | |
1313 * Conventions * | |
1314 ********************************************** | |
1315 | |
1316 - The names of the functions have been chosen, where possible, to | |
1317 match the names of str*() functions in the standard C API. | |
1318 - | |
1319 | |
1320 | |
1321 ********************************************** | |
1322 * Initialization * | |
1323 ********************************************** | |
1324 | |
1325 void eireset (Eistring *eistr); | |
1326 Initialize the Eistring to the empty string. | |
1327 | |
1328 void eicpy_* (Eistring *eistr, ...); | |
1329 Initialize the Eistring from somewhere: | |
1330 | |
1331 void eicpy_ei (Eistring *eistr, Eistring *eistr2); | |
1332 ... from another Eistring. | |
1333 void eicpy_lstr (Eistring *eistr, Lisp_Object lisp_string); | |
1334 ... from a Lisp_Object string. | |
867 | 1335 void eicpy_ch (Eistring *eistr, Ichar ch); |
1336 ... from an Ichar (this can be a conventional C character). | |
771 | 1337 |
1338 void eicpy_lstr_off (Eistring *eistr, Lisp_Object lisp_string, | |
1339 Bytecount off, Charcount charoff, | |
1340 Bytecount len, Charcount charlen); | |
1341 ... from a section of a Lisp_Object string. | |
1342 void eicpy_lbuf (Eistring *eistr, Lisp_Object lisp_buf, | |
1343 Bytecount off, Charcount charoff, | |
1344 Bytecount len, Charcount charlen); | |
1345 ... from a section of a Lisp_Object buffer. | |
867 | 1346 void eicpy_raw (Eistring *eistr, const Ibyte *data, Bytecount len); |
771 | 1347 ... from raw internal-format data in the default internal format. |
867 | 1348 void eicpy_rawz (Eistring *eistr, const Ibyte *data); |
771 | 1349 ... from raw internal-format data in the default internal format |
1350 that is "null-terminated" (the meaning of this depends on the nature | |
1351 of the default internal format). | |
867 | 1352 void eicpy_raw_fmt (Eistring *eistr, const Ibyte *data, Bytecount len, |
826 | 1353 Internal_Format intfmt, Lisp_Object object); |
771 | 1354 ... from raw internal-format data in the specified format. |
867 | 1355 void eicpy_rawz_fmt (Eistring *eistr, const Ibyte *data, |
826 | 1356 Internal_Format intfmt, Lisp_Object object); |
771 | 1357 ... from raw internal-format data in the specified format that is |
1358 "null-terminated" (the meaning of this depends on the nature of | |
1359 the specific format). | |
2421 | 1360 void eicpy_ascii (Eistring *eistr, const Ascbyte *ascstr); |
771 | 1361 ... from an ASCII null-terminated string. Non-ASCII characters in |
2500 | 1362 the string are *ILLEGAL* (read ABORT() with error-checking defined). |
2421 | 1363 void eicpy_ascii_len (Eistring *eistr, const Ascbyte *ascstr, len); |
771 | 1364 ... from an ASCII string, with length specified. Non-ASCII characters |
2500 | 1365 in the string are *ILLEGAL* (read ABORT() with error-checking defined). |
771 | 1366 void eicpy_ext (Eistring *eistr, const Extbyte *extdata, |
1318 | 1367 Lisp_Object codesys); |
771 | 1368 ... from external null-terminated data, with coding system specified. |
1369 void eicpy_ext_len (Eistring *eistr, const Extbyte *extdata, | |
1318 | 1370 Bytecount extlen, Lisp_Object codesys); |
771 | 1371 ... from external data, with length and coding system specified. |
1372 void eicpy_lstream (Eistring *eistr, Lisp_Object lstream); | |
1373 ... from an lstream; reads data till eof. Data must be in default | |
1374 internal format; otherwise, interpose a decoding lstream. | |
1375 | |
1376 | |
1377 ********************************************** | |
1378 * Getting the data out of the Eistring * | |
1379 ********************************************** | |
1380 | |
867 | 1381 Ibyte *eidata (Eistring *eistr); |
771 | 1382 Return a pointer to the raw data in an Eistring. This is NOT |
1383 a copy. | |
1384 | |
1385 Lisp_Object eimake_string (Eistring *eistr); | |
1386 Make a Lisp string out of the Eistring. | |
1387 | |
1388 Lisp_Object eimake_string_off (Eistring *eistr, | |
1389 Bytecount off, Charcount charoff, | |
1390 Bytecount len, Charcount charlen); | |
1391 Make a Lisp string out of a section of the Eistring. | |
1392 | |
867 | 1393 void eicpyout_alloca (Eistring *eistr, LVALUE: Ibyte *ptr_out, |
771 | 1394 LVALUE: Bytecount len_out); |
851 | 1395 Make an ALLOCA() copy of the data in the Eistring, using the |
1396 default internal format. Due to the nature of ALLOCA(), this | |
771 | 1397 must be a macro, with all lvalues passed in as parameters. |
793 | 1398 (More specifically, not all compilers correctly handle using |
851 | 1399 ALLOCA() as the argument to a function call -- GCC on x86 |
1400 didn't used to, for example.) A pointer to the ALLOCA()ed data | |
793 | 1401 is stored in PTR_OUT, and the length of the data (not including |
1402 the terminating zero) is stored in LEN_OUT. | |
771 | 1403 |
867 | 1404 void eicpyout_alloca_fmt (Eistring *eistr, LVALUE: Ibyte *ptr_out, |
771 | 1405 LVALUE: Bytecount len_out, |
826 | 1406 Internal_Format intfmt, Lisp_Object object); |
771 | 1407 Like eicpyout_alloca(), but converts to the specified internal |
1408 format. (No formats other than FORMAT_DEFAULT are currently | |
1409 implemented, and you get an assertion failure if you try.) | |
1410 | |
867 | 1411 Ibyte *eicpyout_malloc (Eistring *eistr, Bytecount *intlen_out); |
771 | 1412 Make a malloc() copy of the data in the Eistring, using the |
1413 default internal format. This is a real function. No lvalues | |
1414 passed in. Returns the new data, and stores the length (not | |
1415 including the terminating zero) using INTLEN_OUT, unless it's | |
1416 a NULL pointer. | |
1417 | |
867 | 1418 Ibyte *eicpyout_malloc_fmt (Eistring *eistr, Internal_Format intfmt, |
826 | 1419 Bytecount *intlen_out, Lisp_Object object); |
771 | 1420 Like eicpyout_malloc(), but converts to the specified internal |
1421 format. (No formats other than FORMAT_DEFAULT are currently | |
1422 implemented, and you get an assertion failure if you try.) | |
1423 | |
1424 | |
1425 ********************************************** | |
1426 * Moving to the heap * | |
1427 ********************************************** | |
1428 | |
1429 void eito_malloc (Eistring *eistr); | |
1430 Move this Eistring to the heap. Its data will be stored in a | |
1431 malloc()ed block rather than the stack. Subsequent changes to | |
1432 this Eistring will realloc() the block as necessary. Use this | |
1433 when you want the Eistring to remain in scope past the end of | |
1434 this function call. You will have to manually free the data | |
1435 in the Eistring using eifree(). | |
1436 | |
1437 void eito_alloca (Eistring *eistr); | |
1438 Move this Eistring back to the stack, if it was moved to the | |
1439 heap with eito_malloc(). This will automatically free any | |
1440 heap-allocated data. | |
1441 | |
1442 | |
1443 | |
1444 ********************************************** | |
1445 * Retrieving the length * | |
1446 ********************************************** | |
1447 | |
1448 Bytecount eilen (Eistring *eistr); | |
1449 Return the length of the internal data, in bytes. See also | |
1450 eiextlen(), below. | |
1451 Charcount eicharlen (Eistring *eistr); | |
1452 Return the length of the internal data, in characters. | |
1453 | |
1454 | |
1455 ********************************************** | |
1456 * Working with positions * | |
1457 ********************************************** | |
1458 | |
1459 Bytecount eicharpos_to_bytepos (Eistring *eistr, Charcount charpos); | |
1460 Convert a char offset to a byte offset. | |
1461 Charcount eibytepos_to_charpos (Eistring *eistr, Bytecount bytepos); | |
1462 Convert a byte offset to a char offset. | |
1463 Bytecount eiincpos (Eistring *eistr, Bytecount bytepos); | |
1464 Increment the given position by one character. | |
1465 Bytecount eiincpos_n (Eistring *eistr, Bytecount bytepos, Charcount n); | |
1466 Increment the given position by N characters. | |
1467 Bytecount eidecpos (Eistring *eistr, Bytecount bytepos); | |
1468 Decrement the given position by one character. | |
1469 Bytecount eidecpos_n (Eistring *eistr, Bytecount bytepos, Charcount n); | |
1470 Deccrement the given position by N characters. | |
1471 | |
1472 | |
1473 ********************************************** | |
1474 * Getting the character at a position * | |
1475 ********************************************** | |
1476 | |
867 | 1477 Ichar eigetch (Eistring *eistr, Bytecount bytepos); |
771 | 1478 Return the character at a particular byte offset. |
867 | 1479 Ichar eigetch_char (Eistring *eistr, Charcount charpos); |
771 | 1480 Return the character at a particular character offset. |
1481 | |
1482 | |
1483 ********************************************** | |
1484 * Setting the character at a position * | |
1485 ********************************************** | |
1486 | |
867 | 1487 Ichar eisetch (Eistring *eistr, Bytecount bytepos, Ichar chr); |
771 | 1488 Set the character at a particular byte offset. |
867 | 1489 Ichar eisetch_char (Eistring *eistr, Charcount charpos, Ichar chr); |
771 | 1490 Set the character at a particular character offset. |
1491 | |
1492 | |
1493 ********************************************** | |
1494 * Concatenation * | |
1495 ********************************************** | |
1496 | |
1497 void eicat_* (Eistring *eistr, ...); | |
1498 Concatenate onto the end of the Eistring, with data coming from the | |
1499 same places as above: | |
1500 | |
1501 void eicat_ei (Eistring *eistr, Eistring *eistr2); | |
1502 ... from another Eistring. | |
2421 | 1503 void eicat_ascii (Eistring *eistr, Ascbyte *ascstr); |
771 | 1504 ... from an ASCII null-terminated string. Non-ASCII characters in |
2500 | 1505 the string are *ILLEGAL* (read ABORT() with error-checking defined). |
867 | 1506 void eicat_raw (ei, const Ibyte *data, Bytecount len); |
771 | 1507 ... from raw internal-format data in the default internal format. |
867 | 1508 void eicat_rawz (ei, const Ibyte *data); |
771 | 1509 ... from raw internal-format data in the default internal format |
1510 that is "null-terminated" (the meaning of this depends on the nature | |
1511 of the default internal format). | |
1512 void eicat_lstr (ei, Lisp_Object lisp_string); | |
1513 ... from a Lisp_Object string. | |
867 | 1514 void eicat_ch (ei, Ichar ch); |
1515 ... from an Ichar. | |
771 | 1516 |
1517 (All except the first variety are convenience functions. | |
1518 In the general case, create another Eistring from the source.) | |
1519 | |
1520 | |
1521 ********************************************** | |
1522 * Replacement * | |
1523 ********************************************** | |
1524 | |
1525 void eisub_* (Eistring *eistr, Bytecount off, Charcount charoff, | |
1526 Bytecount len, Charcount charlen, ...); | |
1527 Replace a section of the Eistring, specifically: | |
1528 | |
1529 void eisub_ei (Eistring *eistr, Bytecount off, Charcount charoff, | |
1530 Bytecount len, Charcount charlen, Eistring *eistr2); | |
1531 ... with another Eistring. | |
2421 | 1532 void eisub_ascii (Eistring *eistr, Bytecount off, Charcount charoff, |
1533 Bytecount len, Charcount charlen, Ascbyte *ascstr); | |
771 | 1534 ... with an ASCII null-terminated string. Non-ASCII characters in |
2500 | 1535 the string are *ILLEGAL* (read ABORT() with error-checking defined). |
771 | 1536 void eisub_ch (Eistring *eistr, Bytecount off, Charcount charoff, |
867 | 1537 Bytecount len, Charcount charlen, Ichar ch); |
1538 ... with an Ichar. | |
771 | 1539 |
1540 void eidel (Eistring *eistr, Bytecount off, Charcount charoff, | |
1541 Bytecount len, Charcount charlen); | |
1542 Delete a section of the Eistring. | |
1543 | |
1544 | |
1545 ********************************************** | |
1546 * Converting to an external format * | |
1547 ********************************************** | |
1548 | |
1318 | 1549 void eito_external (Eistring *eistr, Lisp_Object codesys); |
771 | 1550 Convert the Eistring to an external format and store the result |
1551 in the string. NOTE: Further changes to the Eistring will *NOT* | |
1552 change the external data stored in the string. You will have to | |
1553 call eito_external() again in such a case if you want the external | |
1554 data. | |
1555 | |
1556 Extbyte *eiextdata (Eistring *eistr); | |
1557 Return a pointer to the external data stored in the Eistring as | |
1558 a result of a prior call to eito_external(). | |
1559 | |
1560 Bytecount eiextlen (Eistring *eistr); | |
1561 Return the length in bytes of the external data stored in the | |
1562 Eistring as a result of a prior call to eito_external(). | |
1563 | |
1564 | |
1565 ********************************************** | |
1566 * Searching in the Eistring for a character * | |
1567 ********************************************** | |
1568 | |
867 | 1569 Bytecount eichr (Eistring *eistr, Ichar chr); |
1570 Charcount eichr_char (Eistring *eistr, Ichar chr); | |
1571 Bytecount eichr_off (Eistring *eistr, Ichar chr, Bytecount off, | |
771 | 1572 Charcount charoff); |
867 | 1573 Charcount eichr_off_char (Eistring *eistr, Ichar chr, Bytecount off, |
771 | 1574 Charcount charoff); |
867 | 1575 Bytecount eirchr (Eistring *eistr, Ichar chr); |
1576 Charcount eirchr_char (Eistring *eistr, Ichar chr); | |
1577 Bytecount eirchr_off (Eistring *eistr, Ichar chr, Bytecount off, | |
771 | 1578 Charcount charoff); |
867 | 1579 Charcount eirchr_off_char (Eistring *eistr, Ichar chr, Bytecount off, |
771 | 1580 Charcount charoff); |
1581 | |
1582 | |
1583 ********************************************** | |
1584 * Searching in the Eistring for a string * | |
1585 ********************************************** | |
1586 | |
1587 Bytecount eistr_ei (Eistring *eistr, Eistring *eistr2); | |
1588 Charcount eistr_ei_char (Eistring *eistr, Eistring *eistr2); | |
1589 Bytecount eistr_ei_off (Eistring *eistr, Eistring *eistr2, Bytecount off, | |
1590 Charcount charoff); | |
1591 Charcount eistr_ei_off_char (Eistring *eistr, Eistring *eistr2, | |
1592 Bytecount off, Charcount charoff); | |
1593 Bytecount eirstr_ei (Eistring *eistr, Eistring *eistr2); | |
1594 Charcount eirstr_ei_char (Eistring *eistr, Eistring *eistr2); | |
1595 Bytecount eirstr_ei_off (Eistring *eistr, Eistring *eistr2, Bytecount off, | |
1596 Charcount charoff); | |
1597 Charcount eirstr_ei_off_char (Eistring *eistr, Eistring *eistr2, | |
1598 Bytecount off, Charcount charoff); | |
1599 | |
2421 | 1600 Bytecount eistr_ascii (Eistring *eistr, Ascbyte *ascstr); |
1601 Charcount eistr_ascii_char (Eistring *eistr, Ascbyte *ascstr); | |
1602 Bytecount eistr_ascii_off (Eistring *eistr, Ascbyte *ascstr, Bytecount off, | |
771 | 1603 Charcount charoff); |
2421 | 1604 Charcount eistr_ascii_off_char (Eistring *eistr, Ascbyte *ascstr, |
771 | 1605 Bytecount off, Charcount charoff); |
2421 | 1606 Bytecount eirstr_ascii (Eistring *eistr, Ascbyte *ascstr); |
1607 Charcount eirstr_ascii_char (Eistring *eistr, Ascbyte *ascstr); | |
1608 Bytecount eirstr_ascii_off (Eistring *eistr, Ascbyte *ascstr, | |
771 | 1609 Bytecount off, Charcount charoff); |
2421 | 1610 Charcount eirstr_ascii_off_char (Eistring *eistr, Ascbyte *ascstr, |
771 | 1611 Bytecount off, Charcount charoff); |
1612 | |
1613 | |
1614 ********************************************** | |
1615 * Comparison * | |
1616 ********************************************** | |
1617 | |
1618 int eicmp_* (Eistring *eistr, ...); | |
1619 int eicmp_off_* (Eistring *eistr, Bytecount off, Charcount charoff, | |
1620 Bytecount len, Charcount charlen, ...); | |
1621 int eicasecmp_* (Eistring *eistr, ...); | |
1622 int eicasecmp_off_* (Eistring *eistr, Bytecount off, Charcount charoff, | |
1623 Bytecount len, Charcount charlen, ...); | |
1624 int eicasecmp_i18n_* (Eistring *eistr, ...); | |
1625 int eicasecmp_i18n_off_* (Eistring *eistr, Bytecount off, Charcount charoff, | |
1626 Bytecount len, Charcount charlen, ...); | |
1627 | |
1628 Compare the Eistring with the other data. Return value same as | |
1629 from strcmp. The `*' is either `ei' for another Eistring (in | |
1630 which case `...' is an Eistring), or `c' for a pure-ASCII string | |
1631 (in which case `...' is a pointer to that string). For anything | |
1632 more complex, first create an Eistring out of the source. | |
1633 Comparison is either simple (`eicmp_...'), ASCII case-folding | |
1634 (`eicasecmp_...'), or multilingual case-folding | |
1635 (`eicasecmp_i18n_...). | |
1636 | |
1637 | |
1638 More specifically, the prototypes are: | |
1639 | |
1640 int eicmp_ei (Eistring *eistr, Eistring *eistr2); | |
1641 int eicmp_off_ei (Eistring *eistr, Bytecount off, Charcount charoff, | |
1642 Bytecount len, Charcount charlen, Eistring *eistr2); | |
1643 int eicasecmp_ei (Eistring *eistr, Eistring *eistr2); | |
1644 int eicasecmp_off_ei (Eistring *eistr, Bytecount off, Charcount charoff, | |
1645 Bytecount len, Charcount charlen, Eistring *eistr2); | |
1646 int eicasecmp_i18n_ei (Eistring *eistr, Eistring *eistr2); | |
1647 int eicasecmp_i18n_off_ei (Eistring *eistr, Bytecount off, | |
1648 Charcount charoff, Bytecount len, | |
1649 Charcount charlen, Eistring *eistr2); | |
1650 | |
2421 | 1651 int eicmp_ascii (Eistring *eistr, Ascbyte *ascstr); |
1652 int eicmp_off_ascii (Eistring *eistr, Bytecount off, Charcount charoff, | |
1653 Bytecount len, Charcount charlen, Ascbyte *ascstr); | |
1654 int eicasecmp_ascii (Eistring *eistr, Ascbyte *ascstr); | |
1655 int eicasecmp_off_ascii (Eistring *eistr, Bytecount off, Charcount charoff, | |
771 | 1656 Bytecount len, Charcount charlen, |
2421 | 1657 Ascbyte *ascstr); |
1658 int eicasecmp_i18n_ascii (Eistring *eistr, Ascbyte *ascstr); | |
1659 int eicasecmp_i18n_off_ascii (Eistring *eistr, Bytecount off, Charcount charoff, | |
771 | 1660 Bytecount len, Charcount charlen, |
2421 | 1661 Ascbyte *ascstr); |
771 | 1662 |
1663 | |
1664 ********************************************** | |
1665 * Case-changing the Eistring * | |
1666 ********************************************** | |
1667 | |
1668 void eilwr (Eistring *eistr); | |
1669 Convert all characters in the Eistring to lowercase. | |
1670 void eiupr (Eistring *eistr); | |
1671 Convert all characters in the Eistring to uppercase. | |
1672 */ | |
1673 | |
1674 | |
1675 /* Principles for writing Eistring functions: | |
1676 | |
1677 (1) Unfortunately, we have to write most of the Eistring functions | |
851 | 1678 as macros, because of the use of ALLOCA(). The principle used |
771 | 1679 below to assure no conflict in local variables is to prefix all |
1680 local variables with "ei" plus a number, which should be unique | |
1681 among macros. In practice, when finding a new number, find the | |
1682 highest so far used, and add 1. | |
1683 | |
1684 (2) We also suffix the Eistring fields with an _ to avoid problems | |
1685 with macro parameters of the same name. (And as the standard | |
1686 signal not to access these fields directly.) | |
1687 | |
1688 (3) We maintain both the length in bytes and chars of the data in | |
1689 the Eistring at all times, for convenient retrieval by outside | |
1690 functions. That means when writing functions that manipulate | |
1691 Eistrings, you too need to keep both lengths up to date for all | |
1692 data that you work with. | |
1693 | |
1694 (4) When writing a new type of operation (e.g. substitution), you | |
1695 will often find yourself working with outside data, and thus | |
1696 have a series of related API's, for different forms that the | |
1697 outside data is in. Generally, you will want to choose a | |
1698 subset of the forms supported by eicpy_*, which has to be | |
1699 totally general because that's the fundamental way to get data | |
1700 into an Eistring, and once the data is into the string, it | |
1701 would be to create a whole series of Ei operations that work on | |
1702 nothing but Eistrings. Although theoretically nice, in | |
1703 practice it's a hassle, so we suggest that you provide | |
1704 convenience functions. In particular, there are two paths you | |
1705 can take. One is minimalist -- it only allows other Eistrings | |
867 | 1706 and ASCII data, and Ichars if the particular operation makes |
771 | 1707 sense with a character. The other provides interfaces for the |
1708 most commonly-used forms -- Eistring, ASCII data, Lisp string, | |
1709 raw internal-format string with length, raw internal-format | |
867 | 1710 string without, and possibly Ichar. (In the function names, |
771 | 1711 these are designated `ei', `c', `lstr', `raw', `rawz', and |
1712 `ch', respectively.) | |
1713 | |
1714 (5) When coding a new type of operation, such as was discussed in | |
1715 previous section, the correct approach is to declare an worker | |
1716 function that does the work of everything, and is called by the | |
1717 other "container" macros that handle the different outside data | |
1718 forms. The data coming into the worker function, which | |
1719 typically ends in `_1', is in the form of three parameters: | |
1720 DATA, LEN, CHARLEN. (See point [3] about having two lengths and | |
1721 keeping them in sync.) | |
1722 | |
1723 (6) Handling argument evaluation in macros: We take great care | |
1724 never to evaluate any argument more than once in any macro, | |
1725 except the initial Eistring parameter. This can and will be | |
1726 evaluated multiple times, but it should pretty much always just | |
1727 be a simple variable. This means, for example, that if an | |
1728 Eistring is the second (not first) argument of a macro, it | |
1729 doesn't fall under the "initial Eistring" exemption, so it | |
1730 needs protection against multi-evaluation. (Take the address of | |
1731 the Eistring structure, store in a temporary variable, and use | |
1732 temporary variable for all access to the Eistring. | |
1733 Essentially, we want it to appear as if these Eistring macros | |
1734 are functions -- we would like to declare them as functions but | |
851 | 1735 they use ALLOCA(), so we can't (and we can't make them inline |
1736 functions either -- ALLOCA() is explicitly disallowed in inline | |
771 | 1737 functions.) |
1738 | |
1739 (7) Note that our rules regarding multiple evaluation are *more* | |
1740 strict than the rules listed above under the heading "working | |
1741 with raw internal-format data". | |
1742 */ | |
1743 | |
1744 | |
1745 /* ----- Declaration ----- */ | |
1746 | |
1747 typedef struct | |
1748 { | |
1749 /* Data for the Eistring, stored in the default internal format. | |
1750 Always includes terminating null. */ | |
867 | 1751 Ibyte *data_; |
771 | 1752 /* Total number of bytes allocated in DATA (including null). */ |
1753 Bytecount max_size_allocated_; | |
1754 Bytecount bytelen_; | |
1755 Charcount charlen_; | |
1756 int mallocp_; | |
1757 | |
1758 Extbyte *extdata_; | |
1759 Bytecount extlen_; | |
1760 } Eistring; | |
1761 | |
1762 extern Eistring the_eistring_zero_init, the_eistring_malloc_zero_init; | |
1763 | |
1764 #define DECLARE_EISTRING(name) \ | |
1765 Eistring __ ## name ## __storage__ = the_eistring_zero_init; \ | |
1766 Eistring *name = & __ ## name ## __storage__ | |
1767 #define DECLARE_EISTRING_MALLOC(name) \ | |
1768 Eistring __ ## name ## __storage__ = the_eistring_malloc_zero_init; \ | |
1769 Eistring *name = & __ ## name ## __storage__ | |
1770 | |
1771 #define eiinit(ei) \ | |
1772 do { \ | |
793 | 1773 *(ei) = the_eistring_zero_init; \ |
771 | 1774 } while (0) |
1775 | |
1776 #define eiinit_malloc(ei) \ | |
1777 do { \ | |
793 | 1778 *(ei) = the_eistring_malloc_zero_init; \ |
771 | 1779 } while (0) |
1780 | |
1781 | |
1782 /* ----- Utility ----- */ | |
1783 | |
1784 /* Make sure both LEN and CHARLEN are specified, in case one is given | |
1785 as -1. PTR evaluated at most once, others multiply. */ | |
1786 #define eifixup_bytechar(ptr, len, charlen) \ | |
1787 do { \ | |
1788 if ((len) == -1) \ | |
1789 (len) = charcount_to_bytecount (ptr, charlen); \ | |
1790 else if ((charlen) == -1) \ | |
1791 (charlen) = bytecount_to_charcount (ptr, len); \ | |
1792 } while (0) | |
1793 | |
1794 /* Make sure LEN is specified, in case it's is given as -1. PTR | |
1795 evaluated at most once, others multiply. */ | |
1796 #define eifixup_byte(ptr, len, charlen) \ | |
1797 do { \ | |
1798 if ((len) == -1) \ | |
1799 (len) = charcount_to_bytecount (ptr, charlen); \ | |
1800 } while (0) | |
1801 | |
1802 /* Make sure CHARLEN is specified, in case it's is given as -1. PTR | |
1803 evaluated at most once, others multiply. */ | |
1804 #define eifixup_char(ptr, len, charlen) \ | |
1805 do { \ | |
1806 if ((charlen) == -1) \ | |
1807 (charlen) = bytecount_to_charcount (ptr, len); \ | |
1808 } while (0) | |
1809 | |
1810 | |
1811 | |
1812 /* Make sure we can hold NEWBYTELEN bytes (which is NEWCHARLEN chars) | |
1813 plus a zero terminator. Preserve existing data as much as possible, | |
1814 including existing zero terminator. Put a new zero terminator where it | |
1815 should go if NEWZ if non-zero. All args but EI are evalled only once. */ | |
1816 | |
1817 #define EI_ALLOC(ei, newbytelen, newcharlen, newz) \ | |
1818 do { \ | |
1819 int ei1oldeibytelen = (ei)->bytelen_; \ | |
1820 \ | |
1821 (ei)->charlen_ = (newcharlen); \ | |
1822 (ei)->bytelen_ = (newbytelen); \ | |
1823 \ | |
1824 if (ei1oldeibytelen != (ei)->bytelen_) \ | |
1825 { \ | |
1826 int ei1newsize = (ei)->max_size_allocated_; \ | |
1827 while (ei1newsize < (ei)->bytelen_ + 1) \ | |
1828 { \ | |
1829 ei1newsize = (int) (ei1newsize * 1.5); \ | |
1830 if (ei1newsize < 32) \ | |
1831 ei1newsize = 32; \ | |
1832 } \ | |
1833 if (ei1newsize != (ei)->max_size_allocated_) \ | |
1834 { \ | |
1835 if ((ei)->mallocp_) \ | |
1836 /* xrealloc always preserves existing data as much as possible */ \ | |
1333 | 1837 (ei)->data_ = (Ibyte *) xrealloc ((ei)->data_, ei1newsize); \ |
771 | 1838 else \ |
1839 { \ | |
851 | 1840 /* We don't have realloc, so ALLOCA() more space and copy the \ |
771 | 1841 data into it. */ \ |
867 | 1842 Ibyte *ei1oldeidata = (ei)->data_; \ |
2367 | 1843 (ei)->data_ = alloca_ibytes (ei1newsize); \ |
771 | 1844 if (ei1oldeidata) \ |
1845 memcpy ((ei)->data_, ei1oldeidata, ei1oldeibytelen + 1); \ | |
1846 } \ | |
1847 (ei)->max_size_allocated_ = ei1newsize; \ | |
1848 } \ | |
1849 if (newz) \ | |
1850 (ei)->data_[(ei)->bytelen_] = '\0'; \ | |
1851 } \ | |
1852 } while (0) | |
1853 | |
1854 #define EI_ALLOC_AND_COPY(ei, data, bytelen, charlen) \ | |
1855 do { \ | |
1856 EI_ALLOC (ei, bytelen, charlen, 1); \ | |
1857 memcpy ((ei)->data_, data, (ei)->bytelen_); \ | |
1858 } while (0) | |
1859 | |
1860 /* ----- Initialization ----- */ | |
1861 | |
1862 #define eicpy_ei(ei, eicpy) \ | |
1863 do { \ | |
1864 const Eistring *ei2 = (eicpy); \ | |
1865 EI_ALLOC_AND_COPY (ei, ei2->data_, ei2->bytelen_, ei2->charlen_); \ | |
1866 } while (0) | |
1867 | |
1868 #define eicpy_lstr(ei, lisp_string) \ | |
1869 do { \ | |
1870 Lisp_Object ei3 = (lisp_string); \ | |
1871 EI_ALLOC_AND_COPY (ei, XSTRING_DATA (ei3), XSTRING_LENGTH (ei3), \ | |
1333 | 1872 string_char_length (ei3)); \ |
771 | 1873 } while (0) |
1874 | |
1875 #define eicpy_lstr_off(ei, lisp_string, off, charoff, len, charlen) \ | |
1876 do { \ | |
1877 Lisp_Object ei23lstr = (lisp_string); \ | |
1878 int ei23off = (off); \ | |
1879 int ei23charoff = (charoff); \ | |
1880 int ei23len = (len); \ | |
1881 int ei23charlen = (charlen); \ | |
867 | 1882 const Ibyte *ei23data = XSTRING_DATA (ei23lstr); \ |
771 | 1883 \ |
1884 int ei23oldbytelen = (ei)->bytelen_; \ | |
1885 \ | |
1886 eifixup_byte (ei23data, ei23off, ei23charoff); \ | |
1887 eifixup_bytechar (ei23data + ei23off, ei23len, ei23charlen); \ | |
1888 \ | |
1889 EI_ALLOC_AND_COPY (ei, ei23data + ei23off, ei23len, ei23charlen); \ | |
1890 } while (0) | |
1891 | |
826 | 1892 #define eicpy_raw_fmt(ei, ptr, len, fmt, object) \ |
771 | 1893 do { \ |
1333 | 1894 const Ibyte *ei12ptr = (ptr); \ |
771 | 1895 Internal_Format ei12fmt = (fmt); \ |
1896 int ei12len = (len); \ | |
1897 assert (ei12fmt == FORMAT_DEFAULT); \ | |
1898 EI_ALLOC_AND_COPY (ei, ei12ptr, ei12len, \ | |
1899 bytecount_to_charcount (ei12ptr, ei12len)); \ | |
1900 } while (0) | |
1901 | |
826 | 1902 #define eicpy_raw(ei, ptr, len) \ |
1903 eicpy_raw_fmt (ei, ptr, len, FORMAT_DEFAULT, Qnil) | |
1904 | |
1905 #define eicpy_rawz_fmt(ei, ptr, fmt, object) \ | |
1906 do { \ | |
867 | 1907 const Ibyte *ei12p1ptr = (ptr); \ |
826 | 1908 Internal_Format ei12p1fmt = (fmt); \ |
1909 assert (ei12p1fmt == FORMAT_DEFAULT); \ | |
1910 eicpy_raw_fmt (ei, ei12p1ptr, qxestrlen (ei12p1ptr), fmt, object); \ | |
771 | 1911 } while (0) |
1912 | |
826 | 1913 #define eicpy_rawz(ei, ptr) eicpy_rawz_fmt (ei, ptr, FORMAT_DEFAULT, Qnil) |
771 | 1914 |
1333 | 1915 #define eicpy_ch(ei, ch) \ |
1916 do { \ | |
867 | 1917 Ibyte ei12p2[MAX_ICHAR_LEN]; \ |
1918 Bytecount ei12p2len = set_itext_ichar (ei12p2, ch); \ | |
1333 | 1919 EI_ALLOC_AND_COPY (ei, ei12p2, ei12p2len, 1); \ |
771 | 1920 } while (0) |
1921 | |
2421 | 1922 #define eicpy_ascii(ei, ascstr) \ |
771 | 1923 do { \ |
2421 | 1924 const Ascbyte *ei4 = (ascstr); \ |
771 | 1925 \ |
2367 | 1926 ASSERT_ASCTEXT_ASCII (ei4); \ |
771 | 1927 eicpy_ext (ei, ei4, Qbinary); \ |
1928 } while (0) | |
1929 | |
2421 | 1930 #define eicpy_ascii_len(ei, ascstr, c_len) \ |
771 | 1931 do { \ |
2421 | 1932 const Ascbyte *ei6 = (ascstr); \ |
771 | 1933 int ei6len = (c_len); \ |
1934 \ | |
2367 | 1935 ASSERT_ASCTEXT_ASCII_LEN (ei6, ei6len); \ |
771 | 1936 eicpy_ext_len (ei, ei6, ei6len, Qbinary); \ |
1937 } while (0) | |
1938 | |
1318 | 1939 #define eicpy_ext_len(ei, extdata, extlen, codesys) \ |
771 | 1940 do { \ |
1941 const Extbyte *ei7 = (extdata); \ | |
1942 int ei7len = (extlen); \ | |
1943 \ | |
1318 | 1944 SIZED_EXTERNAL_TO_SIZED_C_STRING (ei7, ei7len, (ei)->data_, \ |
1945 (ei)->bytelen_, codesys); \ | |
771 | 1946 (ei)->max_size_allocated_ = (ei)->bytelen_ + 1; \ |
1947 (ei)->charlen_ = bytecount_to_charcount ((ei)->data_, (ei)->bytelen_); \ | |
1948 } while (0) | |
1949 | |
1318 | 1950 #define eicpy_ext(ei, extdata, codesys) \ |
1951 do { \ | |
1952 const Extbyte *ei8 = (extdata); \ | |
1953 \ | |
1954 eicpy_ext_len (ei, ei8, dfc_external_data_len (ei8, codesys), \ | |
1955 codesys); \ | |
771 | 1956 } while (0) |
1957 | |
1958 #define eicpy_lbuf(eistr, lisp_buf, off, charoff, len, charlen) \ | |
1959 NOT YET IMPLEMENTED | |
1960 | |
1961 #define eicpy_lstream(eistr, lstream) \ | |
1962 NOT YET IMPLEMENTED | |
1963 | |
867 | 1964 #define eireset(eistr) eicpy_rawz (eistr, (Ibyte *) "") |
771 | 1965 |
1966 /* ----- Getting the data out of the Eistring ----- */ | |
1967 | |
1968 #define eidata(ei) ((ei)->data_) | |
1969 | |
1970 #define eimake_string(ei) make_string (eidata (ei), eilen (ei)) | |
1971 | |
1972 #define eimake_string_off(eistr, off, charoff, len, charlen) \ | |
1973 do { \ | |
1974 Lisp_Object ei24lstr; \ | |
1975 int ei24off = (off); \ | |
1976 int ei24charoff = (charoff); \ | |
1977 int ei24len = (len); \ | |
1978 int ei24charlen = (charlen); \ | |
1979 \ | |
1980 eifixup_byte ((eistr)->data_, ei24off, ei24charoff); \ | |
1981 eifixup_byte ((eistr)->data_ + ei24off, ei24len, ei24charlen); \ | |
1982 \ | |
1983 return make_string ((eistr)->data_ + ei24off, ei24len); \ | |
1984 } while (0) | |
1985 | |
1986 #define eicpyout_alloca(eistr, ptrout, lenout) \ | |
826 | 1987 eicpyout_alloca_fmt (eistr, ptrout, lenout, FORMAT_DEFAULT, Qnil) |
771 | 1988 #define eicpyout_malloc(eistr, lenout) \ |
826 | 1989 eicpyout_malloc_fmt (eistr, lenout, FORMAT_DEFAULT, Qnil) |
867 | 1990 Ibyte *eicpyout_malloc_fmt (Eistring *eistr, Bytecount *len_out, |
826 | 1991 Internal_Format fmt, Lisp_Object object); |
1992 #define eicpyout_alloca_fmt(eistr, ptrout, lenout, fmt, object) \ | |
771 | 1993 do { \ |
1994 Internal_Format ei23fmt = (fmt); \ | |
867 | 1995 Ibyte *ei23ptrout = &(ptrout); \ |
771 | 1996 Bytecount *ei23lenout = &(lenout); \ |
1997 \ | |
1998 assert (ei23fmt == FORMAT_DEFAULT); \ | |
1999 \ | |
2000 *ei23lenout = (eistr)->bytelen_; \ | |
2367 | 2001 *ei23ptrout = alloca_ibytes ((eistr)->bytelen_ + 1); \ |
771 | 2002 memcpy (*ei23ptrout, (eistr)->data_, (eistr)->bytelen_ + 1); \ |
2003 } while (0) | |
2004 | |
2005 /* ----- Moving to the heap ----- */ | |
2006 | |
2007 #define eifree(ei) \ | |
2008 do { \ | |
2009 if ((ei)->mallocp_) \ | |
2010 { \ | |
2011 if ((ei)->data_) \ | |
1726 | 2012 xfree ((ei)->data_, Ibyte *); \ |
771 | 2013 if ((ei)->extdata_) \ |
1726 | 2014 xfree ((ei)->extdata_, Extbyte *); \ |
771 | 2015 eiinit_malloc (ei); \ |
2016 } \ | |
2017 else \ | |
2018 eiinit (ei); \ | |
2019 } while (0) | |
2020 | |
2021 int eifind_large_enough_buffer (int oldbufsize, int needed_size); | |
2022 void eito_malloc_1 (Eistring *ei); | |
2023 | |
2024 #define eito_malloc(ei) eito_malloc_1 (ei) | |
2025 | |
2026 #define eito_alloca(ei) \ | |
2027 do { \ | |
2028 if (!(ei)->mallocp_) \ | |
2029 return; \ | |
2030 (ei)->mallocp_ = 0; \ | |
2031 if ((ei)->data_) \ | |
2032 { \ | |
867 | 2033 Ibyte *ei13newdata; \ |
771 | 2034 \ |
2035 (ei)->max_size_allocated_ = \ | |
2036 eifind_large_enough_buffer (0, (ei)->bytelen_ + 1); \ | |
2367 | 2037 ei13newdata = alloca_ibytes ((ei)->max_size_allocated_); \ |
771 | 2038 memcpy (ei13newdata, (ei)->data_, (ei)->bytelen_ + 1); \ |
1726 | 2039 xfree ((ei)->data_, Ibyte *); \ |
771 | 2040 (ei)->data_ = ei13newdata; \ |
2041 } \ | |
2042 \ | |
2043 if ((ei)->extdata_) \ | |
2044 { \ | |
2367 | 2045 Extbyte *ei13newdata = alloca_extbytes ((ei)->extlen_ + 2); \ |
771 | 2046 \ |
2047 memcpy (ei13newdata, (ei)->extdata_, (ei)->extlen_); \ | |
2048 /* Double null-terminate in case of Unicode data */ \ | |
2049 ei13newdata[(ei)->extlen_] = '\0'; \ | |
2050 ei13newdata[(ei)->extlen_ + 1] = '\0'; \ | |
1726 | 2051 xfree ((ei)->extdata_, Extbyte *); \ |
771 | 2052 (ei)->extdata_ = ei13newdata; \ |
2053 } \ | |
2054 } while (0) | |
2055 | |
2056 | |
2057 /* ----- Retrieving the length ----- */ | |
2058 | |
2059 #define eilen(ei) ((ei)->bytelen_) | |
2060 #define eicharlen(ei) ((ei)->charlen_) | |
2061 | |
2062 | |
2063 /* ----- Working with positions ----- */ | |
2064 | |
2065 #define eicharpos_to_bytepos(ei, charpos) \ | |
2066 charcount_to_bytecount ((ei)->data_, charpos) | |
2067 #define eibytepos_to_charpos(ei, bytepos) \ | |
2068 bytecount_to_charcount ((ei)->data_, bytepos) | |
2069 | |
2070 DECLARE_INLINE_HEADER (Bytecount eiincpos_1 (Eistring *eistr, | |
2071 Bytecount bytepos, | |
2072 Charcount n)) | |
2073 { | |
867 | 2074 Ibyte *pos = eistr->data_ + bytepos; |
814 | 2075 Charcount i; |
771 | 2076 |
800 | 2077 text_checking_assert (bytepos >= 0 && bytepos <= eistr->bytelen_); |
2078 text_checking_assert (n >= 0 && n <= eistr->charlen_); | |
771 | 2079 /* We could check N more correctly now, but that would require a |
2080 call to bytecount_to_charcount(), which would be needlessly | |
2081 expensive (it would convert O(N) algorithms into O(N^2) algorithms | |
800 | 2082 with ERROR_CHECK_TEXT, which would be bad). If N is bad, we are |
867 | 2083 guaranteed to catch it either inside INC_IBYTEPTR() or in the check |
771 | 2084 below. */ |
2085 for (i = 0; i < n; i++) | |
867 | 2086 INC_IBYTEPTR (pos); |
800 | 2087 text_checking_assert (pos - eistr->data_ <= eistr->bytelen_); |
771 | 2088 return pos - eistr->data_; |
2089 } | |
2090 | |
2091 #define eiincpos (ei, bytepos) eiincpos_1 (ei, bytepos, 1) | |
2092 #define eiincpos_n (ei, bytepos, n) eiincpos_1 (ei, bytepos, n) | |
2093 | |
2094 DECLARE_INLINE_HEADER (Bytecount eidecpos_1 (Eistring *eistr, | |
2095 Bytecount bytepos, | |
2096 Charcount n)) | |
2097 { | |
867 | 2098 Ibyte *pos = eistr->data_ + bytepos; |
771 | 2099 int i; |
2100 | |
800 | 2101 text_checking_assert (bytepos >= 0 && bytepos <= eistr->bytelen_); |
2102 text_checking_assert (n >= 0 && n <= eistr->charlen_); | |
771 | 2103 /* We could check N more correctly now, but ... see above. */ |
2104 for (i = 0; i < n; i++) | |
867 | 2105 DEC_IBYTEPTR (pos); |
800 | 2106 text_checking_assert (pos - eistr->data_ <= eistr->bytelen_); |
771 | 2107 return pos - eistr->data_; |
2108 } | |
2109 | |
2110 #define eidecpos (ei, bytepos) eidecpos_1 (ei, bytepos, 1) | |
2111 #define eidecpos_n (ei, bytepos, n) eidecpos_1 (ei, bytepos, n) | |
2112 | |
2113 | |
2114 /* ----- Getting the character at a position ----- */ | |
2115 | |
2116 #define eigetch(ei, bytepos) \ | |
867 | 2117 itext_ichar ((ei)->data_ + (bytepos)) |
2118 #define eigetch_char(ei, charpos) itext_ichar_n ((ei)->data_, charpos) | |
771 | 2119 |
2120 | |
2121 /* ----- Setting the character at a position ----- */ | |
2122 | |
2123 #define eisetch(ei, bytepos, chr) \ | |
2124 eisub_ch (ei, bytepos, -1, -1, 1, chr) | |
2125 #define eisetch_char(ei, charpos, chr) \ | |
2126 eisub_ch (ei, -1, charpos, -1, 1, chr) | |
2127 | |
2128 | |
2129 /* ----- Concatenation ----- */ | |
2130 | |
2131 #define eicat_1(ei, data, bytelen, charlen) \ | |
2132 do { \ | |
2133 int ei14oldeibytelen = (ei)->bytelen_; \ | |
2134 int ei14bytelen = (bytelen); \ | |
2135 EI_ALLOC (ei, (ei)->bytelen_ + ei14bytelen, \ | |
2136 (ei)->charlen_ + (charlen), 1); \ | |
2137 memcpy ((ei)->data_ + ei14oldeibytelen, (data), \ | |
2138 ei14bytelen); \ | |
2139 } while (0) | |
2140 | |
2141 #define eicat_ei(ei, ei2) \ | |
2142 do { \ | |
2143 const Eistring *ei9 = (ei2); \ | |
2144 eicat_1 (ei, ei9->data_, ei9->bytelen_, ei9->charlen_); \ | |
2145 } while (0) | |
2146 | |
2421 | 2147 #define eicat_ascii(ei, ascstr) \ |
771 | 2148 do { \ |
2421 | 2149 const Ascbyte *ei15 = (ascstr); \ |
771 | 2150 int ei15len = strlen (ei15); \ |
2151 \ | |
2367 | 2152 ASSERT_ASCTEXT_ASCII_LEN (ei15, ei15len); \ |
771 | 2153 eicat_1 (ei, ei15, ei15len, \ |
867 | 2154 bytecount_to_charcount ((Ibyte *) ei15, ei15len)); \ |
771 | 2155 } while (0) |
2156 | |
2157 #define eicat_raw(ei, data, len) \ | |
2158 do { \ | |
2159 int ei16len = (len); \ | |
867 | 2160 const Ibyte *ei16data = (data); \ |
771 | 2161 eicat_1 (ei, ei16data, ei16len, \ |
2162 bytecount_to_charcount (ei16data, ei16len)); \ | |
2163 } while (0) | |
2164 | |
2165 #define eicat_rawz(ei, ptr) \ | |
2166 do { \ | |
867 | 2167 const Ibyte *ei16p5ptr = (ptr); \ |
771 | 2168 eicat_raw (ei, ei16p5ptr, qxestrlen (ei16p5ptr)); \ |
2169 } while (0) | |
2170 | |
2171 #define eicat_lstr(ei, lisp_string) \ | |
2172 do { \ | |
2173 Lisp_Object ei17 = (lisp_string); \ | |
2174 eicat_1 (ei, XSTRING_DATA (ei17), XSTRING_LENGTH (ei17), \ | |
826 | 2175 string_char_length (ei17)); \ |
771 | 2176 } while (0) |
2177 | |
2178 #define eicat_ch(ei, ch) \ | |
2179 do { \ | |
1333 | 2180 Ibyte ei22ch[MAX_ICHAR_LEN]; \ |
867 | 2181 Bytecount ei22len = set_itext_ichar (ei22ch, ch); \ |
771 | 2182 eicat_1 (ei, ei22ch, ei22len, 1); \ |
2183 } while (0) | |
2184 | |
2185 | |
2186 /* ----- Replacement ----- */ | |
2187 | |
2188 /* Replace the section of an Eistring at (OFF, LEN) with the data at | |
2189 SRC of length LEN. All positions have corresponding character values, | |
2190 and either can be -1 -- it will be computed from the other. */ | |
2191 | |
2192 #define eisub_1(ei, off, charoff, len, charlen, src, srclen, srccharlen) \ | |
2193 do { \ | |
2194 int ei18off = (off); \ | |
2195 int ei18charoff = (charoff); \ | |
2196 int ei18len = (len); \ | |
2197 int ei18charlen = (charlen); \ | |
867 | 2198 Ibyte *ei18src = (Ibyte *) (src); \ |
771 | 2199 int ei18srclen = (srclen); \ |
2200 int ei18srccharlen = (srccharlen); \ | |
2201 \ | |
2202 int ei18oldeibytelen = (ei)->bytelen_; \ | |
2203 \ | |
2204 eifixup_bytechar ((ei)->data_, ei18off, ei18charoff); \ | |
2205 eifixup_bytechar ((ei)->data_ + ei18off, ei18len, ei18charlen); \ | |
2206 eifixup_bytechar (ei18src, ei18srclen, ei18srccharlen); \ | |
2207 \ | |
2208 EI_ALLOC (ei, (ei)->bytelen_ + ei18srclen - ei18len, \ | |
2209 (ei)->charlen_ + ei18srccharlen - ei18charlen, 0); \ | |
2210 if (ei18len != ei18srclen) \ | |
2211 memmove ((ei)->data_ + ei18off + ei18srclen, \ | |
2212 (ei)->data_ + ei18off + ei18len, \ | |
2213 /* include zero terminator. */ \ | |
2214 ei18oldeibytelen - (ei18off + ei18len) + 1); \ | |
2215 if (ei18srclen > 0) \ | |
2216 memcpy ((ei)->data_ + ei18off, ei18src, ei18srclen); \ | |
2217 } while (0) | |
2218 | |
2219 #define eisub_ei(ei, off, charoff, len, charlen, ei2) \ | |
2220 do { \ | |
1333 | 2221 const Eistring *ei19 = (ei2); \ |
771 | 2222 eisub_1 (ei, off, charoff, len, charlen, ei19->data_, ei19->bytelen_, \ |
2223 ei19->charlen_); \ | |
2224 } while (0) | |
2225 | |
2421 | 2226 #define eisub_ascii(ei, off, charoff, len, charlen, ascstr) \ |
771 | 2227 do { \ |
2421 | 2228 const Ascbyte *ei20 = (ascstr); \ |
771 | 2229 int ei20len = strlen (ei20); \ |
2367 | 2230 ASSERT_ASCTEXT_ASCII_LEN (ei20, ei20len); \ |
771 | 2231 eisub_1 (ei, off, charoff, len, charlen, ei20, ei20len, -1); \ |
2232 } while (0) | |
2233 | |
2234 #define eisub_ch(ei, off, charoff, len, charlen, ch) \ | |
2235 do { \ | |
1333 | 2236 Ibyte ei21ch[MAX_ICHAR_LEN]; \ |
867 | 2237 Bytecount ei21len = set_itext_ichar (ei21ch, ch); \ |
771 | 2238 eisub_1 (ei, off, charoff, len, charlen, ei21ch, ei21len, 1); \ |
2239 } while (0) | |
2240 | |
2241 #define eidel(ei, off, charoff, len, charlen) \ | |
2242 eisub_1(ei, off, charoff, len, charlen, NULL, 0, 0) | |
2243 | |
2244 | |
2245 /* ----- Converting to an external format ----- */ | |
2246 | |
1333 | 2247 #define eito_external(ei, codesys) \ |
771 | 2248 do { \ |
2249 if ((ei)->mallocp_) \ | |
2250 { \ | |
2251 if ((ei)->extdata_) \ | |
2252 { \ | |
1726 | 2253 xfree ((ei)->extdata_, Extbyte *); \ |
771 | 2254 (ei)->extdata_ = 0; \ |
2255 } \ | |
2256 TO_EXTERNAL_FORMAT (DATA, ((ei)->data_, (ei)->bytelen_), \ | |
2257 MALLOC, ((ei)->extdata_, (ei)->extlen_), \ | |
1333 | 2258 codesys); \ |
771 | 2259 } \ |
2260 else \ | |
2261 TO_EXTERNAL_FORMAT (DATA, ((ei)->data_, (ei)->bytelen_), \ | |
2262 ALLOCA, ((ei)->extdata_, (ei)->extlen_), \ | |
1318 | 2263 codesys); \ |
771 | 2264 } while (0) |
2265 | |
2266 #define eiextdata(ei) ((ei)->extdata_) | |
2267 #define eiextlen(ei) ((ei)->extlen_) | |
2268 | |
2269 | |
2270 /* ----- Searching in the Eistring for a character ----- */ | |
2271 | |
2272 #define eichr(eistr, chr) \ | |
2273 NOT YET IMPLEMENTED | |
2274 #define eichr_char(eistr, chr) \ | |
2275 NOT YET IMPLEMENTED | |
2276 #define eichr_off(eistr, chr, off, charoff) \ | |
2277 NOT YET IMPLEMENTED | |
2278 #define eichr_off_char(eistr, chr, off, charoff) \ | |
2279 NOT YET IMPLEMENTED | |
2280 #define eirchr(eistr, chr) \ | |
2281 NOT YET IMPLEMENTED | |
2282 #define eirchr_char(eistr, chr) \ | |
2283 NOT YET IMPLEMENTED | |
2284 #define eirchr_off(eistr, chr, off, charoff) \ | |
2285 NOT YET IMPLEMENTED | |
2286 #define eirchr_off_char(eistr, chr, off, charoff) \ | |
2287 NOT YET IMPLEMENTED | |
2288 | |
2289 | |
2290 /* ----- Searching in the Eistring for a string ----- */ | |
2291 | |
2292 #define eistr_ei(eistr, eistr2) \ | |
2293 NOT YET IMPLEMENTED | |
2294 #define eistr_ei_char(eistr, eistr2) \ | |
2295 NOT YET IMPLEMENTED | |
2296 #define eistr_ei_off(eistr, eistr2, off, charoff) \ | |
2297 NOT YET IMPLEMENTED | |
2298 #define eistr_ei_off_char(eistr, eistr2, off, charoff) \ | |
2299 NOT YET IMPLEMENTED | |
2300 #define eirstr_ei(eistr, eistr2) \ | |
2301 NOT YET IMPLEMENTED | |
2302 #define eirstr_ei_char(eistr, eistr2) \ | |
2303 NOT YET IMPLEMENTED | |
2304 #define eirstr_ei_off(eistr, eistr2, off, charoff) \ | |
2305 NOT YET IMPLEMENTED | |
2306 #define eirstr_ei_off_char(eistr, eistr2, off, charoff) \ | |
2307 NOT YET IMPLEMENTED | |
2308 | |
2421 | 2309 #define eistr_ascii(eistr, ascstr) \ |
771 | 2310 NOT YET IMPLEMENTED |
2421 | 2311 #define eistr_ascii_char(eistr, ascstr) \ |
771 | 2312 NOT YET IMPLEMENTED |
2421 | 2313 #define eistr_ascii_off(eistr, ascstr, off, charoff) \ |
771 | 2314 NOT YET IMPLEMENTED |
2421 | 2315 #define eistr_ascii_off_char(eistr, ascstr, off, charoff) \ |
771 | 2316 NOT YET IMPLEMENTED |
2421 | 2317 #define eirstr_ascii(eistr, ascstr) \ |
771 | 2318 NOT YET IMPLEMENTED |
2421 | 2319 #define eirstr_ascii_char(eistr, ascstr) \ |
771 | 2320 NOT YET IMPLEMENTED |
2421 | 2321 #define eirstr_ascii_off(eistr, ascstr, off, charoff) \ |
771 | 2322 NOT YET IMPLEMENTED |
2421 | 2323 #define eirstr_ascii_off_char(eistr, ascstr, off, charoff) \ |
771 | 2324 NOT YET IMPLEMENTED |
2325 | |
2326 | |
2327 /* ----- Comparison ----- */ | |
2328 | |
2329 int eicmp_1 (Eistring *ei, Bytecount off, Charcount charoff, | |
867 | 2330 Bytecount len, Charcount charlen, const Ibyte *data, |
2526 | 2331 const Eistring *ei2, int is_ascii, int fold_case); |
771 | 2332 |
2333 #define eicmp_ei(eistr, eistr2) \ | |
2334 eicmp_1 (eistr, 0, -1, -1, -1, 0, eistr2, 0, 0) | |
2335 #define eicmp_off_ei(eistr, off, charoff, len, charlen, eistr2) \ | |
2336 eicmp_1 (eistr, off, charoff, len, charlen, 0, eistr2, 0, 0) | |
2337 #define eicasecmp_ei(eistr, eistr2) \ | |
2338 eicmp_1 (eistr, 0, -1, -1, -1, 0, eistr2, 0, 1) | |
2339 #define eicasecmp_off_ei(eistr, off, charoff, len, charlen, eistr2) \ | |
2340 eicmp_1 (eistr, off, charoff, len, charlen, 0, eistr2, 0, 1) | |
2341 #define eicasecmp_i18n_ei(eistr, eistr2) \ | |
2342 eicmp_1 (eistr, 0, -1, -1, -1, 0, eistr2, 0, 2) | |
2343 #define eicasecmp_i18n_off_ei(eistr, off, charoff, len, charlen, eistr2) \ | |
2344 eicmp_1 (eistr, off, charoff, len, charlen, 0, eistr2, 0, 2) | |
2345 | |
2421 | 2346 #define eicmp_ascii(eistr, ascstr) \ |
2347 eicmp_1 (eistr, 0, -1, -1, -1, (const Ibyte *) ascstr, 0, 1, 0) | |
2348 #define eicmp_off_ascii(eistr, off, charoff, len, charlen, ascstr) \ | |
2349 eicmp_1 (eistr, off, charoff, len, charlen, (const Ibyte *) ascstr, 0, 1, 0) | |
2350 #define eicasecmp_ascii(eistr, ascstr) \ | |
2351 eicmp_1 (eistr, 0, -1, -1, -1, (const Ibyte *) ascstr, 0, 1, 1) | |
2352 #define eicasecmp_off_ascii(eistr, off, charoff, len, charlen, ascstr) \ | |
2353 eicmp_1 (eistr, off, charoff, len, charlen, (const Ibyte *) ascstr, 0, 1, 1) | |
2354 #define eicasecmp_i18n_ascii(eistr, ascstr) \ | |
2355 eicmp_1 (eistr, 0, -1, -1, -1, (const Ibyte *) ascstr, 0, 1, 2) | |
2356 #define eicasecmp_i18n_off_ascii(eistr, off, charoff, len, charlen, ascstr) \ | |
2357 eicmp_1 (eistr, off, charoff, len, charlen, (const Ibyte *) ascstr, 0, 1, 2) | |
771 | 2358 |
2359 | |
2360 /* ----- Case-changing the Eistring ----- */ | |
2361 | |
867 | 2362 int eistr_casefiddle_1 (Ibyte *olddata, Bytecount len, Ibyte *newdata, |
771 | 2363 int downp); |
2364 | |
2365 #define EI_CASECHANGE(ei, downp) \ | |
2366 do { \ | |
867 | 2367 int ei11new_allocmax = (ei)->charlen_ * MAX_ICHAR_LEN + 1; \ |
1333 | 2368 Ibyte *ei11storage = \ |
2367 | 2369 (Ibyte *) alloca_ibytes (ei11new_allocmax); \ |
771 | 2370 int ei11newlen = eistr_casefiddle_1 ((ei)->data_, (ei)->bytelen_, \ |
2371 ei11storage, downp); \ | |
2372 \ | |
2373 if (ei11newlen) \ | |
2374 { \ | |
2375 (ei)->max_size_allocated_ = ei11new_allocmax; \ | |
1333 | 2376 (ei)->data_ = ei11storage; \ |
771 | 2377 (ei)->bytelen_ = ei11newlen; \ |
2378 /* charlen is the same. */ \ | |
2379 } \ | |
2380 } while (0) | |
2381 | |
2382 #define eilwr(ei) EI_CASECHANGE (ei, 1) | |
2383 #define eiupr(ei) EI_CASECHANGE (ei, 0) | |
2384 | |
1743 | 2385 END_C_DECLS |
1650 | 2386 |
771 | 2387 |
2388 /************************************************************************/ | |
2389 /* */ | |
2390 /* Converting between internal and external format */ | |
2391 /* */ | |
2392 /************************************************************************/ | |
2393 /* | |
1318 | 2394 The macros below are used for converting data between different formats. |
2395 Generally, the data is textual, and the formats are related to | |
2396 internationalization (e.g. converting between internal-format text and | |
2397 UTF-8) -- but the mechanism is general, and could be used for anything, | |
2398 e.g. decoding gzipped data. | |
2399 | |
2400 In general, conversion involves a source of data, a sink, the existing | |
2401 format of the source data, and the desired format of the sink. The | |
2402 macros below, however, always require that either the source or sink is | |
2403 internal-format text. Therefore, in practice the conversions below | |
2404 involve source, sink, an external format (specified by a coding system), | |
2405 and the direction of conversion (internal->external or vice-versa). | |
2406 | |
2407 Sources and sinks can be raw data (sized or unsized -- when unsized, | |
2408 input data is assumed to be null-terminated [double null-terminated for | |
2409 Unicode-format data], and on output the length is not stored anywhere), | |
2410 Lisp strings, Lisp buffers, lstreams, and opaque data objects. When the | |
2411 output is raw data, the result can be allocated either with alloca() or | |
2412 malloc(). (There is currently no provision for writing into a fixed | |
2413 buffer. If you want this, use alloca() output and then copy the data -- | |
2414 but be careful with the size! Unless you are very sure of the encoding | |
2415 being used, upper bounds for the size are not in general computable.) | |
2416 The obvious restrictions on source and sink types apply (e.g. Lisp | |
2417 strings are a source and sink only for internal data). | |
2418 | |
2419 All raw data outputted will contain an extra null byte (two bytes for | |
2420 Unicode -- currently, in fact, all output data, whether internal or | |
2421 external, is double-null-terminated, but you can't count on this; see | |
2422 below). This means that enough space is allocated to contain the extra | |
2423 nulls; however, these nulls are not reflected in the returned output | |
2424 size. | |
2425 | |
2426 The most basic macros are TO_EXTERNAL_FORMAT and TO_INTERNAL_FORMAT. | |
2427 These can be used to convert between any kinds of sources or sinks. | |
2428 However, 99% of conversions involve raw data or Lisp strings as both | |
2429 source and sink, and usually data is output as alloca() rather than | |
2430 malloc(). For this reason, convenience macros are defined for many types | |
2431 of conversions involving raw data and/or Lisp strings, especially when | |
2432 the output is an alloca()ed string. (When the destination is a | |
2433 Lisp_String, there are other functions that should be used instead -- | |
4953
304aebb79cd3
function renamings to track names of char typedefs
Ben Wing <ben@xemacs.org>
parents:
4952
diff
changeset
|
2434 build_extstring() and make_extstring(), for example.) The convenience |
1318 | 2435 macros are of two types -- the older kind that store the result into a |
2436 specified variable, and the newer kind that return the result. The newer | |
2437 kind of macros don't exist when the output is sized data, because that | |
2438 would have two return values. NOTE: All convenience macros are | |
2439 ultimately defined in terms of TO_EXTERNAL_FORMAT and TO_INTERNAL_FORMAT. | |
2440 Thus, any comments below about the workings of these macros also apply to | |
2441 all convenience macros. | |
2442 | |
2443 TO_EXTERNAL_FORMAT (source_type, source, sink_type, sink, codesys) | |
2444 TO_INTERNAL_FORMAT (source_type, source, sink_type, sink, codesys) | |
771 | 2445 |
2446 Typical use is | |
2447 | |
2367 | 2448 TO_EXTERNAL_FORMAT (LISP_STRING, str, C_STRING_MALLOC, ptr, Qfile_name); |
2449 | |
2450 which means that the contents of the lisp string `str' are written | |
2451 to a malloc'ed memory area which will be pointed to by `ptr', after the | |
2452 function returns. The conversion will be done using the `file-name' | |
2453 coding system (which will be controlled by the user indirectly by | |
2454 setting or binding the variable `file-name-coding-system'). | |
2455 | |
2456 Some sources and sinks require two C variables to specify. We use | |
2457 some preprocessor magic to allow different source and sink types, and | |
2458 even different numbers of arguments to specify different types of | |
2459 sources and sinks. | |
2460 | |
2461 So we can have a call that looks like | |
2462 | |
2463 TO_INTERNAL_FORMAT (DATA, (ptr, len), | |
2464 MALLOC, (ptr, len), | |
2465 coding_system); | |
2466 | |
2467 The parenthesized argument pairs are required to make the | |
2468 preprocessor magic work. | |
771 | 2469 |
2470 NOTE: GC is inhibited during the entire operation of these macros. This | |
2471 is because frequently the data to be converted comes from strings but | |
2472 gets passed in as just DATA, and GC may move around the string data. If | |
2473 we didn't inhibit GC, there'd have to be a lot of messy recoding, | |
2474 alloca-copying of strings and other annoying stuff. | |
2475 | |
2476 The source or sink can be specified in one of these ways: | |
2477 | |
2478 DATA, (ptr, len), // input data is a fixed buffer of size len | |
851 | 2479 ALLOCA, (ptr, len), // output data is in a ALLOCA()ed buffer of size len |
771 | 2480 MALLOC, (ptr, len), // output data is in a malloc()ed buffer of size len |
2481 C_STRING_ALLOCA, ptr, // equivalent to ALLOCA (ptr, len_ignored) on output | |
2482 C_STRING_MALLOC, ptr, // equivalent to MALLOC (ptr, len_ignored) on output | |
2483 C_STRING, ptr, // equivalent to DATA, (ptr, strlen/wcslen (ptr)) | |
2484 // on input (the Unicode version is used when correct) | |
2485 LISP_STRING, string, // input or output is a Lisp_Object of type string | |
2486 LISP_BUFFER, buffer, // output is written to (point) in lisp buffer | |
2487 LISP_LSTREAM, lstream, // input or output is a Lisp_Object of type lstream | |
2488 LISP_OPAQUE, object, // input or output is a Lisp_Object of type opaque | |
2489 | |
2490 When specifying the sink, use lvalues, since the macro will assign to them, | |
2491 except when the sink is an lstream or a lisp buffer. | |
2492 | |
2367 | 2493 For the sink types `ALLOCA' and `C_STRING_ALLOCA', the resulting text is |
2494 stored in a stack-allocated buffer, which is automatically freed on | |
2495 returning from the function. However, the sink types `MALLOC' and | |
2496 `C_STRING_MALLOC' return `xmalloc()'ed memory. The caller is responsible | |
2497 for freeing this memory using `xfree()'. | |
2498 | |
771 | 2499 The macros accept the kinds of sources and sinks appropriate for |
2500 internal and external data representation. See the type_checking_assert | |
2501 macros below for the actual allowed types. | |
2502 | |
2503 Since some sources and sinks use one argument (a Lisp_Object) to | |
2504 specify them, while others take a (pointer, length) pair, we use | |
2505 some C preprocessor trickery to allow pair arguments to be specified | |
2506 by parenthesizing them, as in the examples above. | |
2507 | |
2508 Anything prefixed by dfc_ (`data format conversion') is private. | |
2509 They are only used to implement these macros. | |
2510 | |
2511 [[Using C_STRING* is appropriate for using with external APIs that | |
2512 take null-terminated strings. For internal data, we should try to | |
2513 be '\0'-clean - i.e. allow arbitrary data to contain embedded '\0'. | |
2514 | |
2515 Sometime in the future we might allow output to C_STRING_ALLOCA or | |
2516 C_STRING_MALLOC _only_ with TO_EXTERNAL_FORMAT(), not | |
2517 TO_INTERNAL_FORMAT().]] | |
2518 | |
2519 The above comments are not true. Frequently (most of the time, in | |
2520 fact), external strings come as zero-terminated entities, where the | |
2521 zero-termination is the only way to find out the length. Even in | |
2522 cases where you can get the length, most of the time the system will | |
2523 still use the null to signal the end of the string, and there will | |
2524 still be no way to either send in or receive a string with embedded | |
2525 nulls. In such situations, it's pointless to track the length | |
2526 because null bytes can never be in the string. We have a lot of | |
2527 operations that make it easy to operate on zero-terminated strings, | |
2528 and forcing the user the deal with the length everywhere would only | |
2529 make the code uglier and more complicated, for no gain. --ben | |
2530 | |
2531 There is no problem using the same lvalue for source and sink. | |
2532 | |
2533 Also, when pointers are required, the code (currently at least) is | |
2534 lax and allows any pointer types, either in the source or the sink. | |
2535 This makes it possible, e.g., to deal with internal format data held | |
2536 in char *'s or external format data held in WCHAR * (i.e. Unicode). | |
2537 | |
2538 Finally, whenever storage allocation is called for, extra space is | |
2539 allocated for a terminating zero, and such a zero is stored in the | |
2540 appropriate place, regardless of whether the source data was | |
2541 specified using a length or was specified as zero-terminated. This | |
2542 allows you to freely pass the resulting data, no matter how | |
2543 obtained, to a routine that expects zero termination (modulo, of | |
2544 course, that any embedded zeros in the resulting text will cause | |
2545 truncation). In fact, currently two embedded zeros are allocated | |
2546 and stored after the data result. This is to allow for the | |
2547 possibility of storing a Unicode value on output, which needs the | |
2548 two zeros. Currently, however, the two zeros are stored regardless | |
2549 of whether the conversion is internal or external and regardless of | |
2550 whether the external coding system is in fact Unicode. This | |
2551 behavior may change in the future, and you cannot rely on this -- | |
2552 the most you can rely on is that sink data in Unicode format will | |
2553 have two terminating nulls, which combine to form one Unicode null | |
2367 | 2554 character. |
2555 | |
2556 NOTE: You might ask, why are these not written as functions that | |
2557 *RETURN* the converted string, since that would allow them to be used | |
2558 much more conveniently, without having to constantly declare temporary | |
2559 variables? The answer is that in fact I originally did write the | |
2560 routines that way, but that required either | |
2561 | |
2562 (a) calling alloca() inside of a function call, or | |
2563 (b) using expressions separated by commas and a global temporary variable, or | |
2564 (c) using the GCC extension ({ ... }). | |
2565 | |
2566 Turned out that all of the above had bugs, all caused by GCC (hence the | |
2567 comments about "those GCC wankers" and "ream gcc up the ass"). As for | |
2568 (a), some versions of GCC (especially on Intel platforms), which had | |
2569 buggy implementations of alloca() that couldn't handle being called | |
2570 inside of a function call -- they just decremented the stack right in the | |
2571 middle of pushing args. Oops, crash with stack trashing, very bad. (b) | |
2572 was an attempt to fix (a), and that led to further GCC crashes, esp. when | |
2573 you had two such calls in a single subexpression, because GCC couldn't be | |
2574 counted upon to follow even a minimally reasonable order of execution. | |
2575 True, you can't count on one argument being evaluated before another, but | |
2576 GCC would actually interleave them so that the temp var got stomped on by | |
2577 one while the other was accessing it. So I tried (c), which was | |
2578 problematic because that GCC extension has more bugs in it than a | |
2579 termite's nest. | |
2580 | |
2581 So reluctantly I converted to the current way. Now, that was awhile ago | |
2582 (c. 1994), and it appears that the bug involving alloca in function calls | |
2583 has long since been fixed. More recently, I defined the new-dfc routines | |
2584 down below, which DO allow exactly such convenience of returning your | |
2585 args rather than store them in temp variables, and I also wrote a | |
2586 configure check to see whether alloca() causes crashes inside of function | |
2587 calls, and if so use the portable alloca() implementation in alloca.c. | |
2588 If you define TEST_NEW_DFC, the old routines get written in terms of the | |
2589 new ones, and I've had a beta put out with this on and it appeared to | |
2590 this appears to cause no problems -- so we should consider | |
2591 switching, and feel no compunctions about writing further such function- | |
2592 like alloca() routines in lieu of statement-like ones. --ben */ | |
771 | 2593 |
2594 #define TO_EXTERNAL_FORMAT(source_type, source, sink_type, sink, codesys) \ | |
2595 do { \ | |
2596 dfc_conversion_type dfc_simplified_source_type; \ | |
2597 dfc_conversion_type dfc_simplified_sink_type; \ | |
2598 dfc_conversion_data dfc_source; \ | |
2599 dfc_conversion_data dfc_sink; \ | |
2600 Lisp_Object dfc_codesys = (codesys); \ | |
2601 \ | |
2602 type_checking_assert \ | |
2603 ((DFC_TYPE_##source_type == DFC_TYPE_DATA || \ | |
2604 DFC_TYPE_##source_type == DFC_TYPE_C_STRING || \ | |
2605 DFC_TYPE_##source_type == DFC_TYPE_LISP_STRING || \ | |
2606 DFC_TYPE_##source_type == DFC_TYPE_LISP_OPAQUE || \ | |
2607 DFC_TYPE_##source_type == DFC_TYPE_LISP_LSTREAM) \ | |
2608 && \ | |
2609 (DFC_TYPE_##sink_type == DFC_TYPE_ALLOCA || \ | |
2610 DFC_TYPE_##sink_type == DFC_TYPE_MALLOC || \ | |
2611 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_ALLOCA || \ | |
2612 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_MALLOC || \ | |
2613 DFC_TYPE_##sink_type == DFC_TYPE_LISP_LSTREAM || \ | |
2614 DFC_TYPE_##sink_type == DFC_TYPE_LISP_OPAQUE)); \ | |
2615 \ | |
2616 DFC_EXT_SOURCE_##source_type##_TO_ARGS (source, dfc_codesys); \ | |
2617 DFC_SINK_##sink_type##_TO_ARGS (sink); \ | |
2618 \ | |
2619 dfc_convert_to_external_format (dfc_simplified_source_type, &dfc_source, \ | |
2620 dfc_codesys, \ | |
2621 dfc_simplified_sink_type, &dfc_sink); \ | |
2622 \ | |
2623 DFC_##sink_type##_USE_CONVERTED_DATA (sink); \ | |
2624 } while (0) | |
2625 | |
2626 #define TO_INTERNAL_FORMAT(source_type, source, sink_type, sink, codesys) \ | |
2627 do { \ | |
2628 dfc_conversion_type dfc_simplified_source_type; \ | |
2629 dfc_conversion_type dfc_simplified_sink_type; \ | |
2630 dfc_conversion_data dfc_source; \ | |
2631 dfc_conversion_data dfc_sink; \ | |
2632 Lisp_Object dfc_codesys = (codesys); \ | |
2633 \ | |
2634 type_checking_assert \ | |
2635 ((DFC_TYPE_##source_type == DFC_TYPE_DATA || \ | |
2636 DFC_TYPE_##source_type == DFC_TYPE_C_STRING || \ | |
2637 DFC_TYPE_##source_type == DFC_TYPE_LISP_OPAQUE || \ | |
2638 DFC_TYPE_##source_type == DFC_TYPE_LISP_LSTREAM) \ | |
2639 && \ | |
2640 (DFC_TYPE_##sink_type == DFC_TYPE_ALLOCA || \ | |
2641 DFC_TYPE_##sink_type == DFC_TYPE_MALLOC || \ | |
2642 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_ALLOCA || \ | |
2643 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_MALLOC || \ | |
2644 DFC_TYPE_##sink_type == DFC_TYPE_LISP_STRING || \ | |
2645 DFC_TYPE_##sink_type == DFC_TYPE_LISP_LSTREAM || \ | |
2646 DFC_TYPE_##sink_type == DFC_TYPE_LISP_BUFFER)); \ | |
2647 \ | |
2648 DFC_INT_SOURCE_##source_type##_TO_ARGS (source, dfc_codesys); \ | |
2649 DFC_SINK_##sink_type##_TO_ARGS (sink); \ | |
2650 \ | |
2651 dfc_convert_to_internal_format (dfc_simplified_source_type, &dfc_source, \ | |
2652 dfc_codesys, \ | |
2653 dfc_simplified_sink_type, &dfc_sink); \ | |
2654 \ | |
2655 DFC_##sink_type##_USE_CONVERTED_DATA (sink); \ | |
2656 } while (0) | |
2657 | |
814 | 2658 #ifdef __cplusplus |
771 | 2659 |
814 | 2660 /* Error if you try to use a union here: "member `struct {anonymous |
2661 union}::{anonymous} {anonymous union}::data' with constructor not allowed | |
2662 in union" (Bytecount is a class) */ | |
2663 | |
2664 typedef struct | |
2665 #else | |
771 | 2666 typedef union |
814 | 2667 #endif |
771 | 2668 { |
2669 struct { const void *ptr; Bytecount len; } data; | |
2670 Lisp_Object lisp_object; | |
2671 } dfc_conversion_data; | |
2672 | |
2673 enum dfc_conversion_type | |
2674 { | |
2675 DFC_TYPE_DATA, | |
2676 DFC_TYPE_ALLOCA, | |
2677 DFC_TYPE_MALLOC, | |
2678 DFC_TYPE_C_STRING, | |
2679 DFC_TYPE_C_STRING_ALLOCA, | |
2680 DFC_TYPE_C_STRING_MALLOC, | |
2681 DFC_TYPE_LISP_STRING, | |
2682 DFC_TYPE_LISP_LSTREAM, | |
2683 DFC_TYPE_LISP_OPAQUE, | |
2684 DFC_TYPE_LISP_BUFFER | |
2685 }; | |
2686 typedef enum dfc_conversion_type dfc_conversion_type; | |
2687 | |
1743 | 2688 BEGIN_C_DECLS |
1650 | 2689 |
771 | 2690 /* WARNING: These use a static buffer. This can lead to disaster if |
2691 these functions are not used *very* carefully. Another reason to only use | |
2692 TO_EXTERNAL_FORMAT() and TO_INTERNAL_FORMAT(). */ | |
1632 | 2693 MODULE_API void |
771 | 2694 dfc_convert_to_external_format (dfc_conversion_type source_type, |
2695 dfc_conversion_data *source, | |
1318 | 2696 Lisp_Object codesys, |
771 | 2697 dfc_conversion_type sink_type, |
2698 dfc_conversion_data *sink); | |
1632 | 2699 MODULE_API void |
771 | 2700 dfc_convert_to_internal_format (dfc_conversion_type source_type, |
2701 dfc_conversion_data *source, | |
1318 | 2702 Lisp_Object codesys, |
771 | 2703 dfc_conversion_type sink_type, |
2704 dfc_conversion_data *sink); | |
2705 /* CPP Trickery */ | |
2706 #define DFC_CPP_CAR(x,y) (x) | |
2707 #define DFC_CPP_CDR(x,y) (y) | |
2708 | |
2709 /* Convert `source' to args for dfc_convert_to_external_format() */ | |
2710 #define DFC_EXT_SOURCE_DATA_TO_ARGS(val, codesys) do { \ | |
2711 dfc_source.data.ptr = DFC_CPP_CAR val; \ | |
2712 dfc_source.data.len = DFC_CPP_CDR val; \ | |
2713 dfc_simplified_source_type = DFC_TYPE_DATA; \ | |
2714 } while (0) | |
2715 #define DFC_EXT_SOURCE_C_STRING_TO_ARGS(val, codesys) do { \ | |
2716 dfc_source.data.len = \ | |
2717 strlen ((char *) (dfc_source.data.ptr = (val))); \ | |
2718 dfc_simplified_source_type = DFC_TYPE_DATA; \ | |
2719 } while (0) | |
2720 #define DFC_EXT_SOURCE_LISP_STRING_TO_ARGS(val, codesys) do { \ | |
2721 Lisp_Object dfc_slsta = (val); \ | |
2722 type_checking_assert (STRINGP (dfc_slsta)); \ | |
2723 dfc_source.lisp_object = dfc_slsta; \ | |
2724 dfc_simplified_source_type = DFC_TYPE_LISP_STRING; \ | |
2725 } while (0) | |
2726 #define DFC_EXT_SOURCE_LISP_LSTREAM_TO_ARGS(val, codesys) do { \ | |
2727 Lisp_Object dfc_sllta = (val); \ | |
2728 type_checking_assert (LSTREAMP (dfc_sllta)); \ | |
2729 dfc_source.lisp_object = dfc_sllta; \ | |
2730 dfc_simplified_source_type = DFC_TYPE_LISP_LSTREAM; \ | |
2731 } while (0) | |
2732 #define DFC_EXT_SOURCE_LISP_OPAQUE_TO_ARGS(val, codesys) do { \ | |
2733 Lisp_Opaque *dfc_slota = XOPAQUE (val); \ | |
2734 dfc_source.data.ptr = OPAQUE_DATA (dfc_slota); \ | |
2735 dfc_source.data.len = OPAQUE_SIZE (dfc_slota); \ | |
2736 dfc_simplified_source_type = DFC_TYPE_DATA; \ | |
2737 } while (0) | |
2738 | |
2739 /* Convert `source' to args for dfc_convert_to_internal_format() */ | |
2740 #define DFC_INT_SOURCE_DATA_TO_ARGS(val, codesys) \ | |
2741 DFC_EXT_SOURCE_DATA_TO_ARGS (val, codesys) | |
2742 #define DFC_INT_SOURCE_C_STRING_TO_ARGS(val, codesys) do { \ | |
2743 dfc_source.data.len = dfc_external_data_len (dfc_source.data.ptr = (val), \ | |
2744 codesys); \ | |
2745 dfc_simplified_source_type = DFC_TYPE_DATA; \ | |
2746 } while (0) | |
2747 #define DFC_INT_SOURCE_LISP_STRING_TO_ARGS(val, codesys) \ | |
2748 DFC_EXT_SOURCE_LISP_STRING_TO_ARGS (val, codesys) | |
2749 #define DFC_INT_SOURCE_LISP_LSTREAM_TO_ARGS(val, codesys) \ | |
2750 DFC_EXT_SOURCE_LISP_LSTREAM_TO_ARGS (val, codesys) | |
2751 #define DFC_INT_SOURCE_LISP_OPAQUE_TO_ARGS(val, codesys) \ | |
2752 DFC_EXT_SOURCE_LISP_OPAQUE_TO_ARGS (val, codesys) | |
2753 | |
2754 /* Convert `sink' to args for dfc_convert_to_*_format() */ | |
2755 #define DFC_SINK_ALLOCA_TO_ARGS(val) \ | |
2756 dfc_simplified_sink_type = DFC_TYPE_DATA | |
2757 #define DFC_SINK_C_STRING_ALLOCA_TO_ARGS(val) \ | |
2758 dfc_simplified_sink_type = DFC_TYPE_DATA | |
2759 #define DFC_SINK_MALLOC_TO_ARGS(val) \ | |
2760 dfc_simplified_sink_type = DFC_TYPE_DATA | |
2761 #define DFC_SINK_C_STRING_MALLOC_TO_ARGS(val) \ | |
2762 dfc_simplified_sink_type = DFC_TYPE_DATA | |
2763 #define DFC_SINK_LISP_STRING_TO_ARGS(val) \ | |
2764 dfc_simplified_sink_type = DFC_TYPE_DATA | |
2765 #define DFC_SINK_LISP_OPAQUE_TO_ARGS(val) \ | |
2766 dfc_simplified_sink_type = DFC_TYPE_DATA | |
2767 #define DFC_SINK_LISP_LSTREAM_TO_ARGS(val) do { \ | |
2768 Lisp_Object dfc_sllta = (val); \ | |
2769 type_checking_assert (LSTREAMP (dfc_sllta)); \ | |
2770 dfc_sink.lisp_object = dfc_sllta; \ | |
2771 dfc_simplified_sink_type = DFC_TYPE_LISP_LSTREAM; \ | |
2772 } while (0) | |
2773 #define DFC_SINK_LISP_BUFFER_TO_ARGS(val) do { \ | |
2774 struct buffer *dfc_slbta = XBUFFER (val); \ | |
2775 dfc_sink.lisp_object = \ | |
2776 make_lisp_buffer_output_stream \ | |
2777 (dfc_slbta, BUF_PT (dfc_slbta), 0); \ | |
2778 dfc_simplified_sink_type = DFC_TYPE_LISP_LSTREAM; \ | |
2779 } while (0) | |
2780 | |
2781 /* Assign to the `sink' lvalue(s) using the converted data. */ | |
2782 /* + 2 because we double zero-extended to account for Unicode conversion */ | |
2783 typedef union { char c; void *p; } *dfc_aliasing_voidpp; | |
2784 #define DFC_ALLOCA_USE_CONVERTED_DATA(sink) do { \ | |
851 | 2785 void * dfc_sink_ret = ALLOCA (dfc_sink.data.len + 2); \ |
771 | 2786 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2); \ |
2367 | 2787 VOIDP_CAST (DFC_CPP_CAR sink) = dfc_sink_ret; \ |
771 | 2788 (DFC_CPP_CDR sink) = dfc_sink.data.len; \ |
2789 } while (0) | |
2790 #define DFC_MALLOC_USE_CONVERTED_DATA(sink) do { \ | |
2791 void * dfc_sink_ret = xmalloc (dfc_sink.data.len + 2); \ | |
2792 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2); \ | |
2367 | 2793 VOIDP_CAST (DFC_CPP_CAR sink) = dfc_sink_ret; \ |
771 | 2794 (DFC_CPP_CDR sink) = dfc_sink.data.len; \ |
2795 } while (0) | |
2796 #define DFC_C_STRING_ALLOCA_USE_CONVERTED_DATA(sink) do { \ | |
851 | 2797 void * dfc_sink_ret = ALLOCA (dfc_sink.data.len + 2); \ |
771 | 2798 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2); \ |
2367 | 2799 VOIDP_CAST (sink) = dfc_sink_ret; \ |
771 | 2800 } while (0) |
2801 #define DFC_C_STRING_MALLOC_USE_CONVERTED_DATA(sink) do { \ | |
2802 void * dfc_sink_ret = xmalloc (dfc_sink.data.len + 2); \ | |
2803 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2); \ | |
2367 | 2804 VOIDP_CAST (sink) = dfc_sink_ret; \ |
771 | 2805 } while (0) |
2806 #define DFC_LISP_STRING_USE_CONVERTED_DATA(sink) \ | |
867 | 2807 sink = make_string ((Ibyte *) dfc_sink.data.ptr, dfc_sink.data.len) |
771 | 2808 #define DFC_LISP_OPAQUE_USE_CONVERTED_DATA(sink) \ |
2809 sink = make_opaque (dfc_sink.data.ptr, dfc_sink.data.len) | |
2810 #define DFC_LISP_LSTREAM_USE_CONVERTED_DATA(sink) /* data already used */ | |
2811 #define DFC_LISP_BUFFER_USE_CONVERTED_DATA(sink) \ | |
2812 Lstream_delete (XLSTREAM (dfc_sink.lisp_object)) | |
2813 | |
1318 | 2814 /* #define TEST_NEW_DFC */ |
2815 | |
771 | 2816 /* Convenience macros for extremely common invocations */ |
1318 | 2817 #ifdef TEST_NEW_DFC |
2818 #define C_STRING_TO_EXTERNAL(in, out, codesys) \ | |
2819 do { * (Extbyte **) &(out) = \ | |
2820 NEW_C_STRING_TO_EXTERNAL (in, codesys); } while (0) | |
2821 #define SIZED_C_STRING_TO_EXTERNAL(in, inlen, out, codesys) \ | |
2822 do { * (Extbyte **) &(out) = \ | |
2823 NEW_SIZED_C_STRING_TO_EXTERNAL (in, inlen, codesys); } while (0) | |
2824 #define EXTERNAL_TO_C_STRING(in, out, codesys) \ | |
2825 do { * (Ibyte **) &(out) = \ | |
2826 NEW_EXTERNAL_TO_C_STRING (in, codesys); } while (0) | |
2827 #define SIZED_EXTERNAL_TO_C_STRING(in, inlen, out, codesys) \ | |
2828 do { * (Ibyte **) &(out) = \ | |
2829 NEW_SIZED_EXTERNAL_TO_C_STRING (in, inlen, codesys); } while (0) | |
2830 #define LISP_STRING_TO_EXTERNAL(in, out, codesys) \ | |
2831 do { * (Extbyte **) &(out) = \ | |
2832 NEW_LISP_STRING_TO_EXTERNAL (in, codesys); } while (0) | |
2833 #else | |
2834 #define C_STRING_TO_EXTERNAL(in, out, codesys) \ | |
2835 TO_EXTERNAL_FORMAT (C_STRING, in, C_STRING_ALLOCA, out, codesys) | |
2836 #define SIZED_C_STRING_TO_EXTERNAL(in, inlen, out, codesys) \ | |
2837 TO_EXTERNAL_FORMAT (DATA, (in, inlen), C_STRING_ALLOCA, out, codesys) | |
2838 #define EXTERNAL_TO_C_STRING(in, out, codesys) \ | |
2839 TO_INTERNAL_FORMAT (C_STRING, in, C_STRING_ALLOCA, out, codesys) | |
2840 #define SIZED_EXTERNAL_TO_C_STRING(in, inlen, out, codesys) \ | |
2841 TO_INTERNAL_FORMAT (DATA, (in, inlen), C_STRING_ALLOCA, out, codesys) | |
2842 #define LISP_STRING_TO_EXTERNAL(in, out, codesys) \ | |
2843 TO_EXTERNAL_FORMAT (LISP_STRING, in, C_STRING_ALLOCA, out, codesys) | |
2844 #endif /* TEST_NEW_DFC */ | |
2845 | |
2846 #define C_STRING_TO_SIZED_EXTERNAL(in, out, outlen, codesys) \ | |
2847 TO_EXTERNAL_FORMAT (C_STRING, in, ALLOCA, (out, outlen), codesys) | |
2848 #define SIZED_C_STRING_TO_SIZED_EXTERNAL(in, inlen, out, outlen, codesys) \ | |
2849 TO_EXTERNAL_FORMAT (DATA, (in, inlen), ALLOCA, (out, outlen), codesys) | |
2850 #define EXTERNAL_TO_SIZED_C_STRING(in, out, outlen, codesys) \ | |
2851 TO_INTERNAL_FORMAT (C_STRING, in, ALLOCA, (out, outlen), codesys) | |
2852 #define SIZED_EXTERNAL_TO_SIZED_C_STRING(in, inlen, out, outlen, codesys) \ | |
2853 TO_INTERNAL_FORMAT (DATA, (in, inlen), ALLOCA, (out, outlen), codesys) | |
2854 #define LISP_STRING_TO_SIZED_EXTERNAL(in, out, outlen, codesys) \ | |
2855 TO_EXTERNAL_FORMAT (LISP_STRING, in, ALLOCA, (out, outlen), codesys) | |
2856 | |
4953
304aebb79cd3
function renamings to track names of char typedefs
Ben Wing <ben@xemacs.org>
parents:
4952
diff
changeset
|
2857 /* In place of EXTERNAL_TO_LISP_STRING(), use build_extstring() and/or |
304aebb79cd3
function renamings to track names of char typedefs
Ben Wing <ben@xemacs.org>
parents:
4952
diff
changeset
|
2858 make_extstring(). */ |
1318 | 2859 |
2860 #ifdef TEST_NEW_DFC | |
2861 #define C_STRING_TO_EXTERNAL_MALLOC(in, out, codesys) \ | |
2862 do { * (Extbyte **) &(out) = \ | |
2863 NEW_C_STRING_TO_EXTERNAL_MALLOC (in, codesys); } while (0) | |
2367 | 2864 #define SIZED_C_STRING_TO_EXTERNAL_MALLOC(in, inlen, out, codesys) \ |
2865 do { * (Extbyte **) &(out) = \ | |
2866 NEW_SIZED_C_STRING_TO_EXTERNAL_MALLOC (in, inlen, codesys); } \ | |
2867 while (0) | |
1318 | 2868 #define EXTERNAL_TO_C_STRING_MALLOC(in, out, codesys) \ |
2869 do { * (Ibyte **) &(out) = \ | |
2870 NEW_EXTERNAL_TO_C_STRING_MALLOC (in, codesys); } while (0) | |
2367 | 2871 #define SIZED_EXTERNAL_TO_C_STRING_MALLOC(in, inlen, out, codesys) \ |
2872 do { * (Ibyte **) &(out) = \ | |
2873 NEW_SIZED_EXTERNAL_TO_C_STRING_MALLOC (in, inlen, codesys); } \ | |
2874 while (0) | |
1318 | 2875 #define LISP_STRING_TO_EXTERNAL_MALLOC(in, out, codesys) \ |
2876 do { * (Extbyte **) &(out) = \ | |
2877 NEW_LISP_STRING_TO_EXTERNAL_MALLOC (in, codesys); } while (0) | |
2878 #else | |
2879 #define C_STRING_TO_EXTERNAL_MALLOC(in, out, codesys) \ | |
2880 TO_EXTERNAL_FORMAT (C_STRING, in, C_STRING_MALLOC, out, codesys) | |
2367 | 2881 #define SIZED_C_STRING_TO_EXTERNAL_MALLOC(in, inlen, out, codesys) \ |
2882 TO_EXTERNAL_FORMAT (DATA, (in, inlen), C_STRING_MALLOC, out, codesys) | |
1318 | 2883 #define EXTERNAL_TO_C_STRING_MALLOC(in, out, codesys) \ |
2884 TO_INTERNAL_FORMAT (C_STRING, in, C_STRING_MALLOC, out, codesys) | |
2367 | 2885 #define SIZED_EXTERNAL_TO_C_STRING_MALLOC(in, inlen, out, codesys) \ |
2886 TO_INTERNAL_FORMAT (DATA, (in, inlen), C_STRING_MALLOC, out, codesys) | |
1318 | 2887 #define LISP_STRING_TO_EXTERNAL_MALLOC(in, out, codesys) \ |
2888 TO_EXTERNAL_FORMAT (LISP_STRING, in, C_STRING_MALLOC, out, codesys) | |
2889 #endif /* TEST_NEW_DFC */ | |
2890 | |
2367 | 2891 #define C_STRING_TO_SIZED_EXTERNAL_MALLOC(in, out, outlen, codesys) \ |
2892 TO_EXTERNAL_FORMAT (C_STRING, in, MALLOC, (out, outlen), codesys) | |
2893 #define SIZED_C_STRING_TO_SIZED_EXTERNAL_MALLOC(in, inlen, out, outlen, \ | |
2894 codesys) \ | |
2895 TO_EXTERNAL_FORMAT (DATA, (in, inlen), MALLOC, (out, outlen), codesys) | |
2896 #define EXTERNAL_TO_SIZED_C_STRING_MALLOC(in, out, outlen, codesys) \ | |
2897 TO_INTERNAL_FORMAT (C_STRING, in, MALLOC, (out, outlen), codesys) | |
2898 #define SIZED_EXTERNAL_TO_SIZED_C_STRING_MALLOC(in, inlen, out, outlen, \ | |
2899 codesys) \ | |
2900 TO_INTERNAL_FORMAT (DATA, (in, inlen), MALLOC, (out, outlen), codesys) | |
2901 #define LISP_STRING_TO_SIZED_EXTERNAL_MALLOC(in, out, outlen, codesys) \ | |
2902 TO_EXTERNAL_FORMAT (LISP_STRING, in, MALLOC, (out, outlen), codesys) | |
2903 | |
1318 | 2904 enum new_dfc_src_type |
2905 { | |
2906 DFC_EXTERNAL, | |
2907 DFC_SIZED_EXTERNAL, | |
2908 DFC_INTERNAL, | |
2909 DFC_SIZED_INTERNAL, | |
2910 DFC_LISP_STRING | |
2911 }; | |
2912 | |
1632 | 2913 MODULE_API void *new_dfc_convert_malloc (const void *src, Bytecount src_size, |
2914 enum new_dfc_src_type type, | |
2915 Lisp_Object codesys); | |
2367 | 2916 MODULE_API Bytecount new_dfc_convert_size (const char *srctext, |
2917 const void *src, | |
1632 | 2918 Bytecount src_size, |
2919 enum new_dfc_src_type type, | |
2920 Lisp_Object codesys); | |
2367 | 2921 MODULE_API void *new_dfc_convert_copy_data (const char *srctext, |
2922 void *alloca_data); | |
1318 | 2923 |
1743 | 2924 END_C_DECLS |
1650 | 2925 |
1318 | 2926 /* Version of EXTERNAL_TO_C_STRING that *RETURNS* the translated string, |
2927 still in alloca() space. Requires some trickiness to do this, but gets | |
2928 it done! */ | |
2929 | |
2930 /* NOTE: If you make two invocations of the dfc functions below in the same | |
2931 subexpression and use the exact same expression for the source in both | |
2932 cases, you will lose. In this unlikely case, you will get an abort, and | |
2933 need to rewrite the code. | |
2934 */ | |
2935 | |
2936 /* We need to use ALLOCA_FUNCALL_OK here. Some compilers have been known | |
2937 to choke when alloca() occurs as a funcall argument, and so we check | |
2938 this in configure. Rewriting the expressions below to use a temporary | |
2939 variable, so that the call to alloca() is outside of | |
2382 | 2940 new_dfc_convert_copy_data(), won't help because the entire NEW_DFC call |
1318 | 2941 could be inside of a function call. */ |
2942 | |
2943 #define NEW_DFC_CONVERT_1_ALLOCA(src, src_size, type, codesys) \ | |
2367 | 2944 new_dfc_convert_copy_data \ |
1318 | 2945 (#src, ALLOCA_FUNCALL_OK (new_dfc_convert_size (#src, src, src_size, \ |
2946 type, codesys))) | |
2947 | |
2948 #define NEW_EXTERNAL_TO_C_STRING(src, codesys) \ | |
2949 (Ibyte *) NEW_DFC_CONVERT_1_ALLOCA (src, -1, DFC_EXTERNAL, codesys) | |
2950 #define NEW_EXTERNAL_TO_C_STRING_MALLOC(src, codesys) \ | |
2951 (Ibyte *) new_dfc_convert_malloc (src, -1, DFC_EXTERNAL, codesys) | |
2952 #define NEW_SIZED_EXTERNAL_TO_C_STRING(src, len, codesys) \ | |
2953 (Ibyte *) NEW_DFC_CONVERT_1_ALLOCA (src, len, DFC_SIZED_EXTERNAL, codesys) | |
2954 #define NEW_SIZED_EXTERNAL_TO_C_STRING_MALLOC(src, len, codesys) \ | |
2955 (Ibyte *) new_dfc_convert_malloc (src, len, DFC_SIZED_EXTERNAL, codesys) | |
2956 #define NEW_C_STRING_TO_EXTERNAL(src, codesys) \ | |
2957 (Extbyte *) NEW_DFC_CONVERT_1_ALLOCA (src, -1, DFC_INTERNAL, codesys) | |
2958 #define NEW_C_STRING_TO_EXTERNAL_MALLOC(src, codesys) \ | |
2959 (Extbyte *) new_dfc_convert_malloc (src, -1, DFC_INTERNAL, codesys) | |
2960 #define NEW_SIZED_C_STRING_TO_EXTERNAL(src, len, codesys) \ | |
2961 (Extbyte *) NEW_DFC_CONVERT_1_ALLOCA (src, len, DFC_SIZED_INTERNAL, codesys) | |
2962 #define NEW_SIZED_C_STRING_TO_EXTERNAL_MALLOC(src, len, codesys) \ | |
2963 (Extbyte *) new_dfc_convert_malloc (src, len, DFC_SIZED_INTERNAL, codesys) | |
2964 #define NEW_LISP_STRING_TO_EXTERNAL(src, codesys) \ | |
2965 (Extbyte *) NEW_DFC_CONVERT_1_ALLOCA (LISP_TO_VOID (src), -1, \ | |
2966 DFC_LISP_STRING, codesys) | |
2967 #define NEW_LISP_STRING_TO_EXTERNAL_MALLOC(src, codesys) \ | |
2968 (Extbyte *) new_dfc_convert_malloc (LISP_TO_VOID (src), -1, \ | |
2969 DFC_LISP_STRING, codesys) | |
771 | 2970 |
2367 | 2971 /* Wexttext functions. The type of Wexttext is selected at compile time |
2972 and will sometimes be wchar_t, sometimes char. */ | |
2973 | |
2974 int wcscmp_ascii (const wchar_t *s1, const Ascbyte *s2); | |
2975 int wcsncmp_ascii (const wchar_t *s1, const Ascbyte *s2, Charcount len); | |
2976 | |
2977 #ifdef WEXTTEXT_IS_WIDE /* defined under MS Windows i.e. WIN32_NATIVE */ | |
2978 #define WEXTTEXT_ZTERM_SIZE sizeof (wchar_t) | |
2979 /* Extra indirection needed in case of manifest constant as arg */ | |
2980 #define WEXTSTRING_1(arg) L##arg | |
2981 #define WEXTSTRING(arg) WEXTSTRING_1(arg) | |
2982 #define wext_strlen wcslen | |
2983 #define wext_strcmp wcscmp | |
2984 #define wext_strncmp wcsncmp | |
2985 #define wext_strcmp_ascii wcscmp_ascii | |
2986 #define wext_strncmp_ascii wcsncmp_ascii | |
2987 #define wext_strcpy wcscpy | |
2988 #define wext_strncpy wcsncpy | |
2989 #define wext_strchr wcschr | |
2990 #define wext_strrchr wcsrchr | |
2991 #define wext_strdup wcsdup | |
2992 #define wext_atol(str) wcstol (str, 0, 10) | |
2993 #define wext_sprintf wsprintfW /* Huh? both wsprintfA and wsprintfW? */ | |
2994 #define wext_getenv _wgetenv | |
4953
304aebb79cd3
function renamings to track names of char typedefs
Ben Wing <ben@xemacs.org>
parents:
4952
diff
changeset
|
2995 #define build_wext_string(str, cs) build_extstring ((Extbyte *) str, cs) |
2367 | 2996 #define WEXTTEXT_TO_8_BIT(arg) WEXTTEXT_TO_MULTIBYTE(arg) |
2997 #ifdef WIN32_NATIVE | |
2998 int XCDECL wext_retry_open (const Wexttext *path, int oflag, ...); | |
2999 #else | |
3000 #error Cannot handle Wexttext yet on this system | |
3001 #endif | |
3002 #define wext_access _waccess | |
3003 #define wext_stat _wstat | |
3004 #else | |
3005 #define WEXTTEXT_ZTERM_SIZE sizeof (char) | |
3006 #define WEXTSTRING(arg) arg | |
3007 #define wext_strlen strlen | |
3008 #define wext_strcmp strcmp | |
3009 #define wext_strncmp strncmp | |
3010 #define wext_strcmp_ascii strcmp | |
3011 #define wext_strncmp_ascii strncmp | |
3012 #define wext_strcpy strcpy | |
3013 #define wext_strncpy strncpy | |
3014 #define wext_strchr strchr | |
3015 #define wext_strrchr strrchr | |
3016 #define wext_strdup xstrdup | |
3017 #define wext_atol(str) atol (str) | |
3018 #define wext_sprintf sprintf | |
3019 #define wext_getenv getenv | |
4953
304aebb79cd3
function renamings to track names of char typedefs
Ben Wing <ben@xemacs.org>
parents:
4952
diff
changeset
|
3020 #define build_wext_string build_extstring |
2367 | 3021 #define wext_retry_open retry_open |
3022 #define wext_access access | |
3023 #define wext_stat stat | |
3024 #define WEXTTEXT_TO_8_BIT(arg) ((Extbyte *) arg) | |
3025 #endif | |
3026 | |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3027 /* Standins for various encodings. |
1318 | 3028 |
3029 About encodings in X: | |
3030 | |
3031 X works with 5 different encodings: | |
3032 | |
3033 -- "Host Portable Character Encoding" == printable ASCII + space, tab, | |
3034 newline | |
3035 | |
3036 -- STRING encoding == ASCII + Latin-1 + tab, newline | |
3037 | |
3038 -- Locale-specific encoding | |
3039 | |
3040 -- Compound text == STRING encoding + ISO-2022 escape sequences to | |
3041 switch between different locale-specific encodings. | |
3042 | |
3043 -- ANSI C wide-character encoding | |
3044 | |
3045 The Host Portable Character Encoding (HPCE) is used for atom names, font | |
3046 names, color names, keysyms, geometry strings, resource manager quarks, | |
3047 display names, locale names, and various other things. When describing | |
3048 such strings, the X manual typically says "If the ... is not in the Host | |
3049 Portable Character Encoding, the result is implementation dependent." | |
3050 | |
3051 The wide-character encoding is used only in the Xwc* functions, which | |
3052 are provided as equivalents to Xmb* functions. | |
3053 | |
3054 STRING and compound text are used in the value of string properties and | |
3055 selection data, both of which are values with an associated type atom, | |
3056 which can be STRING or COMPOUND_TEXT. It can also be a locale name, as | |
3057 specified in setlocale() (#### as usual, there is no normalization | |
3058 whatsoever of these names). | |
3059 | |
3060 X also defines a type called "TEXT", which is used only as a requested | |
3061 type, and produces data in a type "convenient to the owner". However, | |
3062 there is some indication that X expects this to be the locale-specific | |
3063 encoding. | |
3064 | |
3065 According to the glossary, the locale is used in | |
3066 | |
3067 -- Encoding and processing of input method text | |
3068 -- Encoding of resource files and values | |
3069 -- Encoding and imaging of text strings | |
3070 -- Encoding and decoding for inter-client text communication | |
3071 | |
3072 The functions XmbTextListToTextProperty and XmbTextPropertyToTextList | |
3073 (and Xwc* equivalents) can be used to convert between the | |
3074 locale-specific encoding (XTextStyle), STRING (XStringStyle), and | |
3075 compound text (XCompoundTextStyle), as well as XStdICCTextStyle, which | |
3076 converts to STRING if possible, and if not, COMPOUND_TEXT. This is | |
3077 used, for example, in XmbSetWMProperties, in the window_name and | |
3078 icon_name properties (WM_NAME and WM_ICON_NAME), which are in the | |
3079 locale-specific encoding on input, and are stored as STRING if possible, | |
3080 COMPOUND_TEXT otherwise. | |
3081 */ | |
771 | 3082 |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3083 #ifdef WEXTTEXT_IS_WIDE |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3084 #define Qcommand_argument_encoding Qmswindows_unicode |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3085 #define Qenvironment_variable_encoding Qmswindows_unicode |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3086 #else |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3087 #define Qcommand_argument_encoding Qnative |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3088 #define Qenvironment_variable_encoding Qnative |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3089 #endif |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3090 #define Qunix_host_name_encoding Qnative |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3091 #define Qunix_service_name_encoding Qnative |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3092 #define Qtime_function_encoding Qnative |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3093 #define Qtime_zone_encoding Qtime_function_encoding |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3094 #define Qmswindows_host_name_encoding Qmswindows_multibyte |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3095 #define Qmswindows_service_name_encoding Qmswindows_multibyte |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3096 #define Quser_name_encoding Qnative |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3097 #define Qerror_message_encoding Qnative |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3098 #define Qjpeg_error_message_encoding Qerror_message_encoding |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3099 #define Qtooltalk_encoding Qnative |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3100 #define Qgtk_encoding Qnative |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3101 |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3102 #define Qdll_symbol_encoding Qnative |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3103 #define Qdll_function_name_encoding Qdll_symbol_encoding |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3104 #define Qdll_variable_name_encoding Qdll_symbol_encoding |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3105 #define Qdll_filename_encoding Qfile_name |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3106 #define Qemodule_string_encoding Qnative |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3107 |
771 | 3108 /* !!#### Need to verify the encoding used in lwlib -- Qnative or Qctext? |
3109 Almost certainly the former. Use a standin for now. */ | |
3110 #define Qlwlib_encoding Qnative | |
3111 | |
1318 | 3112 /* The Host Portable Character Encoding. */ |
3113 #define Qx_hpc_encoding Qnative | |
3114 | |
3115 #define Qx_atom_name_encoding Qx_hpc_encoding | |
3116 #define Qx_font_name_encoding Qx_hpc_encoding | |
3117 #define Qx_color_name_encoding Qx_hpc_encoding | |
3118 #define Qx_keysym_encoding Qx_hpc_encoding | |
3119 #define Qx_geometry_encoding Qx_hpc_encoding | |
3120 #define Qx_resource_name_encoding Qx_hpc_encoding | |
3121 #define Qx_application_class_encoding Qx_hpc_encoding | |
771 | 3122 /* the following probably must agree with Qcommand_argument_encoding and |
3123 Qenvironment_variable_encoding */ | |
1318 | 3124 #define Qx_display_name_encoding Qx_hpc_encoding |
3125 #define Qx_xpm_data_encoding Qx_hpc_encoding | |
4834
b3ea9c582280
Use new cygwin_conv_path API with Cygwin 1.7 for converting names between Win32 and POSIX, UTF-8-aware, with attendant changes elsewhere
Ben Wing <ben@xemacs.org>
parents:
4790
diff
changeset
|
3126 #define Qx_error_message_encoding Qx_hpc_encoding |
1318 | 3127 |
2367 | 3128 /* !!#### Verify these! */ |
3129 #define Qxt_widget_arg_encoding Qnative | |
3130 #define Qdt_dnd_encoding Qnative | |
3131 | |
1318 | 3132 /* RedHat 6.2 contains a locale called "Francais" with the C-cedilla |
3133 encoded in ISO2022! */ | |
3134 #define Qlocale_name_encoding Qctext | |
771 | 3135 |
3136 #define Qstrerror_encoding Qnative | |
3137 | |
1318 | 3138 /* !!#### This exists to remind us that our hexify routine is totally |
3139 un-Muleized. */ | |
3140 #define Qdnd_hexify_encoding Qascii | |
3141 | |
771 | 3142 #define GET_STRERROR(var, num) \ |
3143 do { \ | |
3144 int __gsnum__ = (num); \ | |
3145 Extbyte * __gserr__ = strerror (__gsnum__); \ | |
3146 \ | |
3147 if (!__gserr__) \ | |
3148 { \ | |
867 | 3149 var = alloca_ibytes (99); \ |
771 | 3150 qxesprintf (var, "Unknown error %d", __gsnum__); \ |
3151 } \ | |
3152 else \ | |
3153 EXTERNAL_TO_C_STRING (__gserr__, var, Qstrerror_encoding); \ | |
3154 } while (0) | |
3155 | |
3156 #endif /* INCLUDED_text_h_ */ |