Mercurial > hg > xemacs-beta
annotate src/text.h @ 4952:19a72041c5ed
Mule-izing, various fixes related to char * arguments
-------------------- ChangeLog entries follow: --------------------
modules/ChangeLog addition:
2010-01-26 Ben Wing <ben@xemacs.org>
* postgresql/postgresql.c:
* postgresql/postgresql.c (CHECK_LIVE_CONNECTION):
* postgresql/postgresql.c (print_pgresult):
* postgresql/postgresql.c (Fpq_conn_defaults):
* postgresql/postgresql.c (Fpq_connectdb):
* postgresql/postgresql.c (Fpq_connect_start):
* postgresql/postgresql.c (Fpq_result_status):
* postgresql/postgresql.c (Fpq_res_status):
Mule-ize large parts of it.
2010-01-26 Ben Wing <ben@xemacs.org>
* ldap/eldap.c (print_ldap):
* ldap/eldap.c (allocate_ldap):
Use write_ascstring().
src/ChangeLog addition:
2010-01-26 Ben Wing <ben@xemacs.org>
* alloc.c:
* alloc.c (build_ascstring):
* alloc.c (build_msg_cistring):
* alloc.c (staticpro_1):
* alloc.c (staticpro_name):
* alloc.c (staticpro_nodump_1):
* alloc.c (staticpro_nodump_name):
* alloc.c (unstaticpro_nodump_1):
* alloc.c (mcpro_1):
* alloc.c (mcpro_name):
* alloc.c (object_memory_usage_stats):
* alloc.c (common_init_alloc_early):
* alloc.c (init_alloc_once_early):
* buffer.c (print_buffer):
* buffer.c (vars_of_buffer):
* buffer.c (common_init_complex_vars_of_buffer):
* buffer.c (init_initial_directory):
* bytecode.c (invalid_byte_code):
* bytecode.c (print_compiled_function):
* bytecode.c (mark_compiled_function):
* chartab.c (print_table_entry):
* chartab.c (print_char_table):
* config.h.in:
* console-gtk.c:
* console-gtk.c (gtk_device_to_console_connection):
* console-gtk.c (gtk_semi_canonicalize_console_connection):
* console-gtk.c (gtk_canonicalize_console_connection):
* console-gtk.c (gtk_semi_canonicalize_device_connection):
* console-gtk.c (gtk_canonicalize_device_connection):
* console-stream.c (stream_init_frame_1):
* console-stream.c (vars_of_console_stream):
* console-stream.c (init_console_stream):
* console-x.c (x_semi_canonicalize_console_connection):
* console-x.c (x_semi_canonicalize_device_connection):
* console-x.c (x_canonicalize_device_connection):
* console-x.h:
* data.c (eq_with_ebola_notice):
* data.c (Fsubr_interactive):
* data.c (Fnumber_to_string):
* data.c (digit_to_number):
* device-gtk.c (gtk_init_device):
* device-msw.c (print_devmode):
* device-x.c (x_event_name):
* dialog-msw.c (handle_directory_dialog_box):
* dialog-msw.c (handle_file_dialog_box):
* dialog-msw.c (vars_of_dialog_mswindows):
* doc.c (weird_doc):
* doc.c (Fsnarf_documentation):
* doc.c (vars_of_doc):
* dumper.c (pdump):
* dynarr.c:
* dynarr.c (Dynarr_realloc):
* editfns.c (Fuser_real_login_name):
* editfns.c (get_home_directory):
* elhash.c (print_hash_table_data):
* elhash.c (print_hash_table):
* emacs.c (main_1):
* emacs.c (vars_of_emacs):
* emodules.c:
* emodules.c (_emodules_list):
* emodules.c (Fload_module):
* emodules.c (Funload_module):
* emodules.c (Flist_modules):
* emodules.c (find_make_module):
* emodules.c (attempt_module_delete):
* emodules.c (emodules_load):
* emodules.c (emodules_doc_subr):
* emodules.c (emodules_doc_sym):
* emodules.c (syms_of_module):
* emodules.c (vars_of_module):
* emodules.h:
* eval.c (print_subr):
* eval.c (signal_call_debugger):
* eval.c (build_error_data):
* eval.c (signal_error):
* eval.c (maybe_signal_error):
* eval.c (signal_continuable_error):
* eval.c (maybe_signal_continuable_error):
* eval.c (signal_error_2):
* eval.c (maybe_signal_error_2):
* eval.c (signal_continuable_error_2):
* eval.c (maybe_signal_continuable_error_2):
* eval.c (signal_ferror):
* eval.c (maybe_signal_ferror):
* eval.c (signal_continuable_ferror):
* eval.c (maybe_signal_continuable_ferror):
* eval.c (signal_ferror_with_frob):
* eval.c (maybe_signal_ferror_with_frob):
* eval.c (signal_continuable_ferror_with_frob):
* eval.c (maybe_signal_continuable_ferror_with_frob):
* eval.c (syntax_error):
* eval.c (syntax_error_2):
* eval.c (maybe_syntax_error):
* eval.c (sferror):
* eval.c (sferror_2):
* eval.c (maybe_sferror):
* eval.c (invalid_argument):
* eval.c (invalid_argument_2):
* eval.c (maybe_invalid_argument):
* eval.c (invalid_constant):
* eval.c (invalid_constant_2):
* eval.c (maybe_invalid_constant):
* eval.c (invalid_operation):
* eval.c (invalid_operation_2):
* eval.c (maybe_invalid_operation):
* eval.c (invalid_change):
* eval.c (invalid_change_2):
* eval.c (maybe_invalid_change):
* eval.c (invalid_state):
* eval.c (invalid_state_2):
* eval.c (maybe_invalid_state):
* eval.c (wtaerror):
* eval.c (stack_overflow):
* eval.c (out_of_memory):
* eval.c (print_multiple_value):
* eval.c (issue_call_trapping_problems_warning):
* eval.c (backtrace_specials):
* eval.c (backtrace_unevalled_args):
* eval.c (Fbacktrace):
* eval.c (warn_when_safe):
* event-Xt.c (modwarn):
* event-Xt.c (modbarf):
* event-Xt.c (check_modifier):
* event-Xt.c (store_modifier):
* event-Xt.c (emacs_Xt_format_magic_event):
* event-Xt.c (describe_event):
* event-gtk.c (dragndrop_data_received):
* event-gtk.c (store_modifier):
* event-gtk.c (gtk_reset_modifier_mapping):
* event-msw.c (dde_eval_string):
* event-msw.c (Fdde_alloc_advise_item):
* event-msw.c (mswindows_dde_callback):
* event-msw.c (FROB):
* event-msw.c (emacs_mswindows_format_magic_event):
* event-stream.c (external_debugging_print_event):
* event-stream.c (execute_help_form):
* event-stream.c (vars_of_event_stream):
* events.c (print_event_1):
* events.c (print_event):
* events.c (event_equal):
* extents.c (print_extent_1):
* extents.c (print_extent):
* extents.c (vars_of_extents):
* faces.c (print_face):
* faces.c (complex_vars_of_faces):
* file-coding.c:
* file-coding.c (print_coding_system):
* file-coding.c (print_coding_system_in_print_method):
* file-coding.c (default_query_method):
* file-coding.c (find_coding_system):
* file-coding.c (make_coding_system_1):
* file-coding.c (chain_print):
* file-coding.c (undecided_print):
* file-coding.c (gzip_print):
* file-coding.c (vars_of_file_coding):
* file-coding.c (complex_vars_of_file_coding):
* fileio.c:
* fileio.c (report_file_type_error):
* fileio.c (report_error_with_errno):
* fileio.c (report_file_error):
* fileio.c (barf_or_query_if_file_exists):
* fileio.c (vars_of_fileio):
* floatfns.c (matherr):
* fns.c (print_bit_vector):
* fns.c (Fmapconcat):
* fns.c (add_suffix_to_symbol):
* fns.c (add_prefix_to_symbol):
* frame-gtk.c:
* frame-gtk.c (Fgtk_window_id):
* frame-x.c (def):
* frame-x.c (x_cde_transfer_callback):
* frame.c:
* frame.c (Fmake_frame):
* gc.c (show_gc_cursor_and_message):
* gc.c (vars_of_gc):
* glyphs-eimage.c (png_instantiate):
* glyphs-eimage.c (tiff_instantiate):
* glyphs-gtk.c (gtk_print_image_instance):
* glyphs-msw.c (mswindows_print_image_instance):
* glyphs-x.c (x_print_image_instance):
* glyphs-x.c (update_widget_face):
* glyphs.c (make_string_from_file):
* glyphs.c (print_image_instance):
* glyphs.c (signal_image_error):
* glyphs.c (signal_image_error_2):
* glyphs.c (signal_double_image_error):
* glyphs.c (signal_double_image_error_2):
* glyphs.c (xbm_mask_file_munging):
* glyphs.c (pixmap_to_lisp_data):
* glyphs.h:
* gui.c (gui_item_display_flush_left):
* hpplay.c (player_error_internal):
* hpplay.c (myHandler):
* intl-win32.c:
* intl-win32.c (langcode_to_lang):
* intl-win32.c (sublangcode_to_lang):
* intl-win32.c (Fmswindows_get_locale_info):
* intl-win32.c (lcid_to_locale_mule_or_no):
* intl-win32.c (mswindows_multibyte_to_unicode_print):
* intl-win32.c (complex_vars_of_intl_win32):
* keymap.c:
* keymap.c (print_keymap):
* keymap.c (ensure_meta_prefix_char_keymapp):
* keymap.c (Fkey_description):
* keymap.c (Ftext_char_description):
* lisp.h:
* lisp.h (struct):
* lisp.h (DECLARE_INLINE_HEADER):
* lread.c (Fload_internal):
* lread.c (locate_file):
* lread.c (read_escape):
* lread.c (read_raw_string):
* lread.c (read1):
* lread.c (read_list):
* lread.c (read_compiled_function):
* lread.c (init_lread):
* lrecord.h:
* marker.c (print_marker):
* marker.c (marker_equal):
* menubar-msw.c (displayable_menu_item):
* menubar-x.c (command_builder_operate_menu_accelerator):
* menubar.c (vars_of_menubar):
* minibuf.c (reinit_complex_vars_of_minibuf):
* minibuf.c (complex_vars_of_minibuf):
* mule-charset.c (Fmake_charset):
* mule-charset.c (complex_vars_of_mule_charset):
* mule-coding.c (iso2022_print):
* mule-coding.c (fixed_width_query):
* number.c (bignum_print):
* number.c (ratio_print):
* number.c (bigfloat_print):
* number.c (bigfloat_finalize):
* objects-msw.c:
* objects-msw.c (mswindows_color_to_string):
* objects-msw.c (mswindows_color_list):
* objects-tty.c:
* objects-tty.c (tty_font_list):
* objects-tty.c (tty_find_charset_font):
* objects-xlike-inc.c (xft_find_charset_font):
* objects-xlike-inc.c (endif):
* print.c:
* print.c (write_istring):
* print.c (write_ascstring):
* print.c (Fterpri):
* print.c (Fprint):
* print.c (print_error_message):
* print.c (print_vector_internal):
* print.c (print_cons):
* print.c (print_string):
* print.c (printing_unreadable_object):
* print.c (print_internal):
* print.c (print_float):
* print.c (print_symbol):
* process-nt.c (mswindows_report_winsock_error):
* process-nt.c (nt_canonicalize_host_name):
* process-unix.c (unix_canonicalize_host_name):
* process.c (print_process):
* process.c (report_process_error):
* process.c (report_network_error):
* process.c (make_process_internal):
* process.c (Fstart_process_internal):
* process.c (status_message):
* process.c (putenv_internal):
* process.c (vars_of_process):
* process.h:
* profile.c (vars_of_profile):
* rangetab.c (print_range_table):
* realpath.c (vars_of_realpath):
* redisplay.c (vars_of_redisplay):
* search.c (wordify):
* search.c (Freplace_match):
* sheap.c (sheap_adjust_h):
* sound.c (report_sound_error):
* sound.c (Fplay_sound_file):
* specifier.c (print_specifier):
* symbols.c (Fsubr_name):
* symbols.c (do_symval_forwarding):
* symbols.c (set_default_buffer_slot_variable):
* symbols.c (set_default_console_slot_variable):
* symbols.c (store_symval_forwarding):
* symbols.c (default_value):
* symbols.c (defsymbol_massage_name_1):
* symbols.c (defsymbol_massage_name_nodump):
* symbols.c (defsymbol_massage_name):
* symbols.c (defsymbol_massage_multiword_predicate_nodump):
* symbols.c (defsymbol_massage_multiword_predicate):
* symbols.c (defsymbol_nodump):
* symbols.c (defsymbol):
* symbols.c (defkeyword):
* symbols.c (defkeyword_massage_name):
* symbols.c (check_module_subr):
* symbols.c (deferror_1):
* symbols.c (deferror):
* symbols.c (deferror_massage_name):
* symbols.c (deferror_massage_name_and_message):
* symbols.c (defvar_magic):
* symeval.h:
* symeval.h (DEFVAR_SYMVAL_FWD):
* sysdep.c:
* sysdep.c (init_system_name):
* sysdll.c:
* sysdll.c (MAYBE_PREPEND_UNDERSCORE):
* sysdll.c (dll_function):
* sysdll.c (dll_variable):
* sysdll.c (dll_error):
* sysdll.c (dll_open):
* sysdll.c (dll_close):
* sysdll.c (image_for_address):
* sysdll.c (my_find_image):
* sysdll.c (search_linked_libs):
* sysdll.h:
* sysfile.h:
* sysfile.h (DEFAULT_DIRECTORY_FALLBACK):
* syswindows.h:
* tests.c (DFC_CHECK_LENGTH):
* tests.c (DFC_CHECK_CONTENT):
* tests.c (Ftest_hash_tables):
* text.c (vars_of_text):
* text.h:
* tooltalk.c (tt_opnum_string):
* tooltalk.c (tt_message_arg_ival_string):
* tooltalk.c (Ftooltalk_default_procid):
* tooltalk.c (Ftooltalk_default_session):
* tooltalk.c (init_tooltalk):
* tooltalk.c (vars_of_tooltalk):
* ui-gtk.c (Fdll_load):
* ui-gtk.c (type_to_marshaller_type):
* ui-gtk.c (Fgtk_import_function_internal):
* ui-gtk.c (emacs_gtk_object_printer):
* ui-gtk.c (emacs_gtk_boxed_printer):
* unicode.c (unicode_to_ichar):
* unicode.c (unicode_print):
* unicode.c (unicode_query):
* unicode.c (vars_of_unicode):
* unicode.c (complex_vars_of_unicode):
* win32.c:
* win32.c (mswindows_report_process_error):
* window.c (print_window):
* xemacs.def.in.in:
BASIC IDEA: Further fixing up uses of char * and CIbyte *
to reflect their actual semantics; Mule-izing some code;
redoing of the not-yet-working code to handle message translation.
Clean up code to handle message-translation (not yet working).
Create separate versions of build_msg_string() for working with
Ibyte *, CIbyte *, and Ascbyte * arguments. Assert that Ascbyte *
arguments are pure-ASCII. Make build_msg_string() be the same
as build_msg_ascstring(). Create same three versions of GETTEXT()
and DEFER_GETTEXT(). Also create build_defer_string() and
variants for the equivalent of DEFER_GETTEXT() when building a
string. Remove old CGETTEXT(). Clean up code where GETTEXT(),
DEFER_GETTEXT(), build_msg_string(), etc. was being called and
introduce some new calls to build_msg_string(), etc. Remove
GETTEXT() from calls to weird_doc() -- we assume that the
message snarfer knows about weird_doc(). Remove uses of
DEFER_GETTEXT() from error messages in sysdep.c and instead use
special comments /* @@@begin-snarf@@@ */ and /* @@@end-snarf@@@ */
that the message snarfer presumably knows about.
Create build_ascstring() and use it in many instances in place
of build_string(). The purpose of having Ascbyte * variants is
to make the code more self-documenting in terms of what sort of
semantics is expected for char * strings. In fact in the process
of looking for uses of build_string(), much improperly Mule-ized
was discovered.
Mule-ize a lot of code as described in previous paragraph,
e.g. in sysdep.c.
Make the error functions take Ascbyte * strings and fix up a
couple of places where non-pure-ASCII strings were being passed in
(file-coding.c, mule-coding.c, unicode.c). (It's debatable whether
we really need to make the error functions work this way. It
helps catch places where code is written in a way that message
translation won't work, but we may well never implement message
translation.)
Make staticpro() and friends take Ascbyte * strings instead of
raw char * strings. Create a const_Ascbyte_ptr dynarr type
to describe what's held by staticpro_names[] and friends,
create pdump descriptions for const_Ascbyte_ptr dynarrs, and
use them in place of specially-crafted staticpro descriptions.
Mule-ize certain other functions (e.g. x_event_name) by correcting
raw use of char * to Ascbyte *, Rawbyte * or another such type,
and raw use of char[] buffers to another type (usually Ascbyte[]).
Change many uses of write_c_string() to write_msg_string(),
write_ascstring(), etc.
Mule-ize emodules.c, emodules.h, sysdll.h.
Fix some un-Mule-ized code in intl-win32.c.
A comment in event-Xt.c and the limitations of the message
snarfer (make-msgfile or whatever) is presumably incorrect --
it should be smart enough to handle function calls spread over
more than one line. Clean up code in event-Xt.c that was
written awkwardly for this reason.
In config.h.in, instead of NEED_ERROR_CHECK_TYPES_INLINES,
create a more general XEMACS_DEFS_NEEDS_INLINE_DECLS to
indicate when inlined functions need to be declared in
xemacs.defs.in.in, and make use of it in xemacs.defs.in.in.
We need to do this because postgresql.c now calls qxestrdup(),
which is an inline function.
Make nconc2() and other such functions MODULE_API and put
them in xemacs.defs.in.in since postgresql.c now uses them.
Clean up indentation in lread.c and a few other places.
In text.h, document ASSERT_ASCTEXT_ASCII() and
ASSERT_ASCTEXT_ASCII_LEN(), group together the stand-in
encodings and add some more for DLL symbols, function and
variable names, etc.
author | Ben Wing <ben@xemacs.org> |
---|---|
date | Tue, 26 Jan 2010 23:22:30 -0600 |
parents | d35e231d347d |
children | 304aebb79cd3 |
rev | line source |
---|---|
771 | 1 /* Header file for text manipulation primitives and macros. |
2 Copyright (C) 1985-1995 Free Software Foundation, Inc. | |
3 Copyright (C) 1995 Sun Microsystems, Inc. | |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
4 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2010 Ben Wing. |
771 | 5 |
6 This file is part of XEmacs. | |
7 | |
8 XEmacs is free software; you can redistribute it and/or modify it | |
9 under the terms of the GNU General Public License as published by the | |
10 Free Software Foundation; either version 2, or (at your option) any | |
11 later version. | |
12 | |
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT | |
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 for more details. | |
17 | |
18 You should have received a copy of the GNU General Public License | |
19 along with XEmacs; see the file COPYING. If not, write to | |
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
21 Boston, MA 02111-1307, USA. */ | |
22 | |
23 /* Synched up with: FSF 19.30. */ | |
24 | |
25 /* Authorship: | |
26 | |
27 Mostly written by Ben Wing, starting around 1995. | |
28 Current TO_IN/EXTERNAL_FORMAT macros written by Martin Buchholz, | |
29 designed by Ben Wing based on earlier macros by Ben Wing. | |
30 Separated out June 18, 2000 from buffer.h into text.h. | |
31 */ | |
32 | |
33 #ifndef INCLUDED_text_h_ | |
34 #define INCLUDED_text_h_ | |
35 | |
912 | 36 #ifdef HAVE_WCHAR_H |
771 | 37 #include <wchar.h> |
912 | 38 #else |
1257 | 39 size_t wcslen (const wchar_t *); |
912 | 40 #endif |
1204 | 41 #ifndef HAVE_STRLWR |
1257 | 42 char *strlwr (char *); |
1204 | 43 #endif |
44 #ifndef HAVE_STRUPR | |
1257 | 45 char *strupr (char *); |
1204 | 46 #endif |
771 | 47 |
1743 | 48 BEGIN_C_DECLS |
1650 | 49 |
771 | 50 /* ---------------------------------------------------------------------- */ |
51 /* Super-basic character properties */ | |
52 /* ---------------------------------------------------------------------- */ | |
53 | |
54 /* These properties define the specifics of how our current encoding fits | |
55 in the basic model used for the encoding. Because this model is the same | |
56 as is used for UTF-8, all these properties could be defined for it, too. | |
57 This would instantly make the rest of this file work with UTF-8 (with | |
58 the exception of a few called functions that would need to be redefined). | |
59 | |
60 (UTF-2000 implementers, take note!) | |
61 */ | |
62 | |
63 /* If you want more than this, you need to include charset.h */ | |
64 | |
65 #ifndef MULE | |
66 | |
826 | 67 #define rep_bytes_by_first_byte(fb) 1 |
68 #define byte_ascii_p(byte) 1 | |
867 | 69 #define MAX_ICHAR_LEN 1 |
771 | 70 |
71 #else /* MULE */ | |
72 | |
73 /* These are carefully designed to work if BYTE is signed or unsigned. */ | |
74 /* Note that SPC and DEL are considered ASCII, not control. */ | |
75 | |
826 | 76 #define byte_ascii_p(byte) (((byte) & ~0x7f) == 0) |
77 #define byte_c0_p(byte) (((byte) & ~0x1f) == 0) | |
78 #define byte_c1_p(byte) (((byte) & ~0x1f) == 0x80) | |
771 | 79 |
80 /* Does BYTE represent the first byte of a character? */ | |
81 | |
826 | 82 #ifdef ERROR_CHECK_TEXT |
83 | |
84 DECLARE_INLINE_HEADER ( | |
85 int | |
867 | 86 ibyte_first_byte_p_1 (int byte, const char *file, int line) |
826 | 87 ) |
88 { | |
89 assert_at_line (byte >= 0 && byte < 256, file, line); | |
90 return byte < 0xA0; | |
91 } | |
92 | |
867 | 93 #define ibyte_first_byte_p(byte) \ |
94 ibyte_first_byte_p_1 (byte, __FILE__, __LINE__) | |
826 | 95 |
96 #else | |
97 | |
867 | 98 #define ibyte_first_byte_p(byte) ((byte) < 0xA0) |
826 | 99 |
100 #endif | |
101 | |
102 #ifdef ERROR_CHECK_TEXT | |
771 | 103 |
104 /* Does BYTE represent the first byte of a multi-byte character? */ | |
105 | |
826 | 106 DECLARE_INLINE_HEADER ( |
107 int | |
867 | 108 ibyte_leading_byte_p_1 (int byte, const char *file, int line) |
826 | 109 ) |
110 { | |
111 assert_at_line (byte >= 0 && byte < 256, file, line); | |
112 return byte_c1_p (byte); | |
113 } | |
114 | |
867 | 115 #define ibyte_leading_byte_p(byte) \ |
116 ibyte_leading_byte_p_1 (byte, __FILE__, __LINE__) | |
826 | 117 |
118 #else | |
119 | |
867 | 120 #define ibyte_leading_byte_p(byte) byte_c1_p (byte) |
826 | 121 |
122 #endif | |
771 | 123 |
124 /* Table of number of bytes in the string representation of a character | |
125 indexed by the first byte of that representation. | |
126 | |
127 This value can be derived in other ways -- e.g. something like | |
826 | 128 XCHARSET_REP_BYTES (charset_by_leading_byte (first_byte)) |
771 | 129 but it's faster this way. */ |
1632 | 130 extern MODULE_API const Bytecount rep_bytes_by_first_byte[0xA0]; |
771 | 131 |
132 /* Number of bytes in the string representation of a character. */ | |
788 | 133 |
800 | 134 #ifdef ERROR_CHECK_TEXT |
788 | 135 |
826 | 136 DECLARE_INLINE_HEADER ( |
137 Bytecount | |
138 rep_bytes_by_first_byte_1 (int fb, const char *file, int line) | |
139 ) | |
771 | 140 { |
826 | 141 assert_at_line (fb >= 0 && fb < 0xA0, file, line); |
771 | 142 return rep_bytes_by_first_byte[fb]; |
143 } | |
144 | |
826 | 145 #define rep_bytes_by_first_byte(fb) \ |
146 rep_bytes_by_first_byte_1 (fb, __FILE__, __LINE__) | |
788 | 147 |
800 | 148 #else /* ERROR_CHECK_TEXT */ |
788 | 149 |
826 | 150 #define rep_bytes_by_first_byte(fb) (rep_bytes_by_first_byte[fb]) |
788 | 151 |
800 | 152 #endif /* ERROR_CHECK_TEXT */ |
788 | 153 |
826 | 154 /* Is this character represented by more than one byte in a string in the |
155 default format? */ | |
156 | |
867 | 157 #define ichar_multibyte_p(c) ((c) >= 0x80) |
158 | |
159 #define ichar_ascii_p(c) (!ichar_multibyte_p (c)) | |
826 | 160 |
161 /* Maximum number of bytes per Emacs character when represented as text, in | |
162 any format. | |
163 */ | |
771 | 164 |
867 | 165 #define MAX_ICHAR_LEN 4 |
771 | 166 |
826 | 167 #endif /* not MULE */ |
168 | |
2367 | 169 /* For more discussion, see text.c, "handling non-default formats" */ |
170 | |
826 | 171 typedef enum internal_format |
172 { | |
173 FORMAT_DEFAULT, | |
174 FORMAT_8_BIT_FIXED, | |
175 FORMAT_16_BIT_FIXED, /* not implemented */ | |
176 FORMAT_32_BIT_FIXED /* not implemented */ | |
177 } Internal_Format; | |
178 | |
179 #ifdef MULE | |
180 /* "OBJECT" below will usually be a buffer, string, or nil. This needs to | |
181 be passed in because the interpretation of 8-bit-fixed and 16-bit-fixed | |
182 values may depend on the buffer, e.g. depending on what language the | |
183 text in the buffer is in. */ | |
184 | |
867 | 185 /* True if Ichar CH can be represented in 8-bit-fixed format. */ |
186 #define ichar_8_bit_fixed_p(ch, object) (((ch) & ~0xff) == 0) | |
187 /* Convert Ichar CH to an 8-bit int, as will be stored in the buffer. */ | |
188 #define ichar_to_raw_8_bit_fixed(ch, object) ((Ibyte) (ch)) | |
826 | 189 /* Convert the other way. */ |
867 | 190 #define raw_8_bit_fixed_to_ichar(ch, object) ((Ichar) (ch)) |
191 | |
192 #define ichar_16_bit_fixed_p(ch, object) (((ch) & ~0xffff) == 0) | |
193 /* Convert Ichar CH to a 16-bit int, as will be stored in the buffer. */ | |
194 #define ichar_to_raw_16_bit_fixed(ch, object) ((UINT_16_BIT) (ch)) | |
826 | 195 /* Convert the other way. */ |
867 | 196 #define raw_16_bit_fixed_to_ichar(ch, object) ((Ichar) (ch)) |
197 | |
198 /* Convert Ichar CH to a 32-bit int, as will be stored in the buffer. */ | |
199 #define ichar_to_raw_32_bit_fixed(ch, object) ((UINT_32_BIT) (ch)) | |
826 | 200 /* Convert the other way. */ |
867 | 201 #define raw_32_bit_fixed_to_ichar(ch, object) ((Ichar) (ch)) |
826 | 202 |
203 /* Return the "raw value" of a character as stored in the buffer. In the | |
204 default format, this is just the same as the character. In fixed-width | |
205 formats, this is the actual value in the buffer, which will be limited | |
206 to the range as established by the format. This is used when searching | |
207 for a character in a buffer -- it's faster to convert the character to | |
208 the raw value and look for that, than repeatedly convert each raw value | |
209 in the buffer into a character. */ | |
210 | |
211 DECLARE_INLINE_HEADER ( | |
867 | 212 Raw_Ichar |
2286 | 213 ichar_to_raw (Ichar ch, Internal_Format fmt, |
214 Lisp_Object UNUSED (object)) | |
826 | 215 ) |
216 { | |
217 switch (fmt) | |
218 { | |
219 case FORMAT_DEFAULT: | |
867 | 220 return (Raw_Ichar) ch; |
826 | 221 case FORMAT_16_BIT_FIXED: |
867 | 222 text_checking_assert (ichar_16_bit_fixed_p (ch, object)); |
223 return (Raw_Ichar) ichar_to_raw_16_bit_fixed (ch, object); | |
826 | 224 case FORMAT_32_BIT_FIXED: |
867 | 225 return (Raw_Ichar) ichar_to_raw_32_bit_fixed (ch, object); |
826 | 226 default: |
227 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); | |
867 | 228 text_checking_assert (ichar_8_bit_fixed_p (ch, object)); |
229 return (Raw_Ichar) ichar_to_raw_8_bit_fixed (ch, object); | |
826 | 230 } |
231 } | |
232 | |
233 /* Return whether CH is representable in the given format in the given | |
234 object. */ | |
235 | |
236 DECLARE_INLINE_HEADER ( | |
237 int | |
2286 | 238 ichar_fits_in_format (Ichar ch, Internal_Format fmt, |
239 Lisp_Object UNUSED (object)) | |
826 | 240 ) |
241 { | |
242 switch (fmt) | |
243 { | |
244 case FORMAT_DEFAULT: | |
245 return 1; | |
246 case FORMAT_16_BIT_FIXED: | |
867 | 247 return ichar_16_bit_fixed_p (ch, object); |
826 | 248 case FORMAT_32_BIT_FIXED: |
249 return 1; | |
250 default: | |
251 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); | |
867 | 252 return ichar_8_bit_fixed_p (ch, object); |
826 | 253 } |
254 } | |
255 | |
256 /* Assuming the formats are the same, return whether the two objects | |
257 represent text in exactly the same way. */ | |
258 | |
259 DECLARE_INLINE_HEADER ( | |
260 int | |
2286 | 261 objects_have_same_internal_representation (Lisp_Object UNUSED (srcobj), |
262 Lisp_Object UNUSED (dstobj)) | |
826 | 263 ) |
264 { | |
265 /* &&#### implement this properly when we allow per-object format | |
266 differences */ | |
267 return 1; | |
268 } | |
269 | |
270 #else | |
271 | |
867 | 272 #define ichar_to_raw(ch, fmt, object) ((Raw_Ichar) (ch)) |
273 #define ichar_fits_in_format(ch, fmt, object) 1 | |
826 | 274 #define objects_have_same_internal_representation(srcobj, dstobj) 1 |
275 | |
771 | 276 #endif /* MULE */ |
277 | |
1632 | 278 MODULE_API int dfc_coding_system_is_unicode (Lisp_Object codesys); |
771 | 279 |
280 DECLARE_INLINE_HEADER ( | |
281 Bytecount dfc_external_data_len (const void *ptr, Lisp_Object codesys) | |
282 ) | |
283 { | |
284 if (dfc_coding_system_is_unicode (codesys)) | |
285 return sizeof (wchar_t) * wcslen ((wchar_t *) ptr); | |
286 else | |
287 return strlen ((char *) ptr); | |
288 } | |
289 | |
290 | |
291 /************************************************************************/ | |
292 /* */ | |
293 /* working with raw internal-format data */ | |
294 /* */ | |
295 /************************************************************************/ | |
296 | |
826 | 297 /* |
298 Use the following functions/macros on contiguous text in any of the | |
299 internal formats. Those that take a format arg work on all internal | |
300 formats; the others work only on the default (variable-width under Mule) | |
301 format. If the text you're operating on is known to come from a buffer, | |
302 use the buffer-level functions in buffer.h, which automatically know the | |
303 correct format and handle the gap. | |
304 | |
305 Some terminology: | |
306 | |
867 | 307 "itext" appearing in the macros means "internal-format text" -- type |
308 `Ibyte *'. Operations on such pointers themselves, rather than on the | |
309 text being pointed to, have "itext" instead of "itext" in the macro | |
310 name. "ichar" in the macro names means an Ichar -- the representation | |
826 | 311 of a character as a single integer rather than a series of bytes, as part |
867 | 312 of "itext". Many of the macros below are for converting between the |
826 | 313 two representations of characters. |
314 | |
867 | 315 Note also that we try to consistently distinguish between an "Ichar" and |
826 | 316 a Lisp character. Stuff working with Lisp characters often just says |
867 | 317 "char", so we consistently use "Ichar" when that's what we're working |
826 | 318 with. */ |
319 | |
320 /* The three golden rules of macros: | |
771 | 321 |
322 1) Anything that's an lvalue can be evaluated more than once. | |
826 | 323 |
324 2) Macros where anything else can be evaluated more than once should | |
325 have the word "unsafe" in their name (exceptions may be made for | |
326 large sets of macros that evaluate arguments of certain types more | |
327 than once, e.g. struct buffer * arguments, when clearly indicated in | |
328 the macro documentation). These macros are generally meant to be | |
329 called only by other macros that have already stored the calling | |
330 values in temporary variables. | |
331 | |
332 3) Nothing else can be evaluated more than once. Use inline | |
771 | 333 functions, if necessary, to prevent multiple evaluation. |
826 | 334 |
335 NOTE: The functions and macros below are given full prototypes in their | |
336 docs, even when the implementation is a macro. In such cases, passing | |
337 an argument of a type other than expected will produce undefined | |
338 results. Also, given that macros can do things functions can't (in | |
339 particular, directly modify arguments as if they were passed by | |
340 reference), the declaration syntax has been extended to include the | |
341 call-by-reference syntax from C++, where an & after a type indicates | |
342 that the argument is an lvalue and is passed by reference, i.e. the | |
343 function can modify its value. (This is equivalent in C to passing a | |
344 pointer to the argument, but without the need to explicitly worry about | |
345 pointers.) | |
346 | |
347 When to capitalize macros: | |
348 | |
349 -- Capitalize macros doing stuff obviously impossible with (C) | |
350 functions, e.g. directly modifying arguments as if they were passed by | |
351 reference. | |
352 | |
353 -- Capitalize macros that evaluate *any* argument more than once regardless | |
354 of whether that's "allowed" (e.g. buffer arguments). | |
355 | |
356 -- Capitalize macros that directly access a field in a Lisp_Object or | |
357 its equivalent underlying structure. In such cases, access through the | |
358 Lisp_Object precedes the macro with an X, and access through the underlying | |
359 structure doesn't. | |
360 | |
361 -- Capitalize certain other basic macros relating to Lisp_Objects; e.g. | |
362 FRAMEP, CHECK_FRAME, etc. | |
363 | |
364 -- Try to avoid capitalizing any other macros. | |
771 | 365 */ |
366 | |
367 /* ---------------------------------------------------------------------- */ | |
867 | 368 /* Working with itext's (pointers to internally-formatted text) */ |
771 | 369 /* ---------------------------------------------------------------------- */ |
370 | |
867 | 371 /* Given an itext, does it point to the beginning of a character? |
826 | 372 */ |
373 | |
771 | 374 #ifdef MULE |
867 | 375 # define valid_ibyteptr_p(ptr) ibyte_first_byte_p (* (ptr)) |
771 | 376 #else |
867 | 377 # define valid_ibyteptr_p(ptr) 1 |
771 | 378 #endif |
379 | |
867 | 380 /* If error-checking is enabled, assert that the given itext points to |
826 | 381 the beginning of a character. Otherwise, do nothing. |
382 */ | |
383 | |
867 | 384 #define assert_valid_ibyteptr(ptr) text_checking_assert (valid_ibyteptr_p (ptr)) |
385 | |
386 /* Given a itext (assumed to point at the beginning of a character), | |
826 | 387 modify that pointer so it points to the beginning of the next character. |
388 | |
867 | 389 Note that INC_IBYTEPTR() and DEC_IBYTEPTR() have to be written in |
390 completely separate ways. INC_IBYTEPTR() cannot use the DEC_IBYTEPTR() | |
771 | 391 trick of looking for a valid first byte because it might run off |
867 | 392 the end of the string. DEC_IBYTEPTR() can't use the INC_IBYTEPTR() |
771 | 393 method because it doesn't have easy access to the first byte of |
394 the character it's moving over. */ | |
395 | |
867 | 396 #define INC_IBYTEPTR(ptr) do { \ |
397 assert_valid_ibyteptr (ptr); \ | |
826 | 398 (ptr) += rep_bytes_by_first_byte (* (ptr)); \ |
399 } while (0) | |
400 | |
1204 | 401 #define INC_IBYTEPTR_FMT(ptr, fmt) \ |
402 do { \ | |
403 Internal_Format __icf_fmt = (fmt); \ | |
404 switch (__icf_fmt) \ | |
405 { \ | |
406 case FORMAT_DEFAULT: \ | |
407 INC_IBYTEPTR (ptr); \ | |
408 break; \ | |
409 case FORMAT_16_BIT_FIXED: \ | |
410 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT)); \ | |
411 (ptr) += 2; \ | |
412 break; \ | |
413 case FORMAT_32_BIT_FIXED: \ | |
414 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT)); \ | |
415 (ptr) += 4; \ | |
416 break; \ | |
417 default: \ | |
418 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); \ | |
419 (ptr)++; \ | |
420 break; \ | |
421 } \ | |
826 | 422 } while (0) |
423 | |
867 | 424 /* Given a itext (assumed to point at the beginning of a character or at |
826 | 425 the very end of the text), modify that pointer so it points to the |
426 beginning of the previous character. | |
427 */ | |
771 | 428 |
800 | 429 #ifdef ERROR_CHECK_TEXT |
826 | 430 /* We use a separate definition to avoid warnings about unused dc_ptr1 */ |
867 | 431 #define DEC_IBYTEPTR(ptr) do { \ |
1333 | 432 const Ibyte *dc_ptr1 = (ptr); \ |
826 | 433 do { \ |
434 (ptr)--; \ | |
867 | 435 } while (!valid_ibyteptr_p (ptr)); \ |
826 | 436 text_checking_assert (dc_ptr1 - (ptr) == rep_bytes_by_first_byte (*(ptr))); \ |
771 | 437 } while (0) |
826 | 438 #else |
867 | 439 #define DEC_IBYTEPTR(ptr) do { \ |
826 | 440 do { \ |
441 (ptr)--; \ | |
867 | 442 } while (!valid_ibyteptr_p (ptr)); \ |
771 | 443 } while (0) |
826 | 444 #endif /* ERROR_CHECK_TEXT */ |
445 | |
1204 | 446 #define DEC_IBYTEPTR_FMT(ptr, fmt) \ |
447 do { \ | |
448 Internal_Format __icf_fmt = (fmt); \ | |
449 switch (__icf_fmt) \ | |
450 { \ | |
451 case FORMAT_DEFAULT: \ | |
452 DEC_IBYTEPTR (ptr); \ | |
453 break; \ | |
454 case FORMAT_16_BIT_FIXED: \ | |
455 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT)); \ | |
456 (ptr) -= 2; \ | |
457 break; \ | |
458 case FORMAT_32_BIT_FIXED: \ | |
459 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT)); \ | |
460 (ptr) -= 4; \ | |
461 break; \ | |
462 default: \ | |
463 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); \ | |
464 (ptr)--; \ | |
465 break; \ | |
466 } \ | |
771 | 467 } while (0) |
468 | |
469 #ifdef MULE | |
470 | |
826 | 471 /* Make sure that PTR is pointing to the beginning of a character. If not, |
472 back up until this is the case. Note that there are not too many places | |
473 where it is legitimate to do this sort of thing. It's an error if | |
474 you're passed an "invalid" char * pointer. NOTE: PTR *must* be pointing | |
475 to a valid part of the string (i.e. not the very end, unless the string | |
476 is zero-terminated or something) in order for this function to not cause | |
477 crashes. | |
478 */ | |
479 | |
771 | 480 /* Note that this reads the byte at *PTR! */ |
481 | |
867 | 482 #define VALIDATE_IBYTEPTR_BACKWARD(ptr) do { \ |
483 while (!valid_ibyteptr_p (ptr)) ptr--; \ | |
771 | 484 } while (0) |
485 | |
826 | 486 /* Make sure that PTR is pointing to the beginning of a character. If not, |
487 move forward until this is the case. Note that there are not too many | |
488 places where it is legitimate to do this sort of thing. It's an error | |
489 if you're passed an "invalid" char * pointer. | |
490 */ | |
771 | 491 |
867 | 492 /* This needs to be trickier than VALIDATE_IBYTEPTR_BACKWARD() to avoid the |
771 | 493 possibility of running off the end of the string. */ |
494 | |
867 | 495 #define VALIDATE_IBYTEPTR_FORWARD(ptr) do { \ |
496 Ibyte *vcf_ptr = (ptr); \ | |
497 VALIDATE_IBYTEPTR_BACKWARD (vcf_ptr); \ | |
771 | 498 if (vcf_ptr != (ptr)) \ |
499 { \ | |
500 (ptr) = vcf_ptr; \ | |
867 | 501 INC_IBYTEPTR (ptr); \ |
771 | 502 } \ |
503 } while (0) | |
504 | |
505 #else /* not MULE */ | |
867 | 506 #define VALIDATE_IBYTEPTR_BACKWARD(ptr) |
507 #define VALIDATE_IBYTEPTR_FORWARD(ptr) | |
826 | 508 #endif /* not MULE */ |
509 | |
510 #ifdef MULE | |
511 | |
867 | 512 /* Given a Ibyte string at PTR of size N, possibly with a partial |
826 | 513 character at the end, return the size of the longest substring of |
514 complete characters. Does not assume that the byte at *(PTR + N) is | |
515 readable. Note that there are not too many places where it is | |
516 legitimate to do this sort of thing. It's an error if you're passed an | |
517 "invalid" offset. */ | |
518 | |
519 DECLARE_INLINE_HEADER ( | |
520 Bytecount | |
867 | 521 validate_ibyte_string_backward (const Ibyte *ptr, Bytecount n) |
826 | 522 ) |
523 { | |
867 | 524 const Ibyte *ptr2; |
826 | 525 |
526 if (n == 0) | |
527 return n; | |
528 ptr2 = ptr + n - 1; | |
867 | 529 VALIDATE_IBYTEPTR_BACKWARD (ptr2); |
826 | 530 if (ptr2 + rep_bytes_by_first_byte (*ptr2) != ptr + n) |
531 return ptr2 - ptr; | |
532 return n; | |
533 } | |
534 | |
535 #else | |
536 | |
867 | 537 #define validate_ibyte_string_backward(ptr, n) (n) |
826 | 538 |
539 #endif /* MULE */ | |
771 | 540 |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
541 /* ASSERT_ASCTEXT_ASCII(ptr): Check that an Ascbyte * pointer points to |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
542 purely ASCII text. Useful for checking that putatively ASCII strings |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
543 (i.e. declared as Ascbyte * or const Ascbyte *) are actually ASCII. |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
544 This is important because otherwise we need to worry about what |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
545 encoding they are in -- internal or some external encoding. |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
546 |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
547 ASSERT_ASCTEXT_ASCII_LEN(ptr, len): Same as ASSERT_ASCTEXT_ASCII() |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
548 but where the length has been explicitly given. Useful if the string |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
549 may contain embedded zeroes. |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
550 */ |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
551 |
2367 | 552 #ifdef ERROR_CHECK_TEXT |
553 #define ASSERT_ASCTEXT_ASCII_LEN(ptr, len) \ | |
554 do { \ | |
555 int aia2; \ | |
556 const Ascbyte *aia2ptr = (ptr); \ | |
557 int aia2len = (len); \ | |
558 \ | |
559 for (aia2 = 0; aia2 < aia2len; aia2++) \ | |
560 assert (aia2ptr[aia2] >= 0x00 && aia2ptr[aia2] < 0x7F); \ | |
561 } while (0) | |
562 #define ASSERT_ASCTEXT_ASCII(ptr) \ | |
563 do { \ | |
564 const Ascbyte *aiaz2 = (ptr); \ | |
565 ASSERT_ASCTEXT_ASCII_LEN (aiaz2, strlen (aiaz2)); \ | |
566 } while (0) | |
567 #else | |
568 #define ASSERT_ASCTEXT_ASCII_LEN(ptr, len) | |
569 #define ASSERT_ASCTEXT_ASCII(ptr) | |
570 #endif | |
571 | |
771 | 572 /* -------------------------------------------------------------- */ |
826 | 573 /* Working with the length (in bytes and characters) of a */ |
574 /* section of internally-formatted text */ | |
771 | 575 /* -------------------------------------------------------------- */ |
576 | |
826 | 577 #ifdef MULE |
578 | |
1632 | 579 MODULE_API Charcount |
580 bytecount_to_charcount_fun (const Ibyte *ptr, Bytecount len); | |
581 MODULE_API Bytecount | |
582 charcount_to_bytecount_fun (const Ibyte *ptr, Charcount len); | |
826 | 583 |
584 /* Given a pointer to a text string and a length in bytes, return | |
585 the equivalent length in characters. */ | |
586 | |
587 DECLARE_INLINE_HEADER ( | |
588 Charcount | |
867 | 589 bytecount_to_charcount (const Ibyte *ptr, Bytecount len) |
826 | 590 ) |
591 { | |
592 if (len < 20) /* Just a random guess, but it should be more or less correct. | |
593 If number of bytes is small, just do a simple loop, | |
594 which should be more efficient. */ | |
595 { | |
596 Charcount count = 0; | |
867 | 597 const Ibyte *end = ptr + len; |
826 | 598 while (ptr < end) |
599 { | |
867 | 600 INC_IBYTEPTR (ptr); |
826 | 601 count++; |
602 } | |
603 /* Bomb out if the specified substring ends in the middle | |
604 of a character. Note that we might have already gotten | |
605 a core dump above from an invalid reference, but at least | |
606 we will get no farther than here. | |
607 | |
608 This also catches len < 0. */ | |
609 text_checking_assert (ptr == end); | |
610 | |
611 return count; | |
612 } | |
613 else | |
614 return bytecount_to_charcount_fun (ptr, len); | |
615 } | |
616 | |
617 /* Given a pointer to a text string and a length in characters, return the | |
618 equivalent length in bytes. | |
619 */ | |
620 | |
621 DECLARE_INLINE_HEADER ( | |
622 Bytecount | |
867 | 623 charcount_to_bytecount (const Ibyte *ptr, Charcount len) |
826 | 624 ) |
625 { | |
626 text_checking_assert (len >= 0); | |
627 if (len < 20) /* See above */ | |
628 { | |
867 | 629 const Ibyte *newptr = ptr; |
826 | 630 while (len > 0) |
631 { | |
867 | 632 INC_IBYTEPTR (newptr); |
826 | 633 len--; |
634 } | |
635 return newptr - ptr; | |
636 } | |
637 else | |
638 return charcount_to_bytecount_fun (ptr, len); | |
639 } | |
640 | |
2367 | 641 MODULE_API Bytecount |
642 charcount_to_bytecount_down_fun (const Ibyte *ptr, Charcount len); | |
643 | |
644 /* Given a pointer to a text string and a length in bytes, return | |
645 the equivalent length in characters of the stretch [PTR - LEN, PTR). */ | |
646 | |
647 DECLARE_INLINE_HEADER ( | |
648 Charcount | |
649 bytecount_to_charcount_down (const Ibyte *ptr, Bytecount len) | |
650 ) | |
651 { | |
652 /* No need to be clever here */ | |
653 return bytecount_to_charcount (ptr - len, len); | |
654 } | |
655 | |
656 /* Given a pointer to a text string and a length in characters, return the | |
657 equivalent length in bytes of the stretch of characters of that length | |
658 BEFORE the pointer. | |
659 */ | |
660 | |
661 DECLARE_INLINE_HEADER ( | |
662 Bytecount | |
663 charcount_to_bytecount_down (const Ibyte *ptr, Charcount len) | |
664 ) | |
665 { | |
666 #define SLEDGEHAMMER_CHECK_TEXT | |
667 #ifdef SLEDGEHAMMER_CHECK_TEXT | |
668 Charcount len1 = len; | |
669 Bytecount ret1, ret2; | |
670 | |
671 /* To test the correctness of the function version, always do the | |
672 calculation both ways and check that the values are the same. */ | |
673 text_checking_assert (len >= 0); | |
674 { | |
675 const Ibyte *newptr = ptr; | |
676 while (len1 > 0) | |
677 { | |
678 DEC_IBYTEPTR (newptr); | |
679 len1--; | |
680 } | |
681 ret1 = ptr - newptr; | |
682 } | |
683 ret2 = charcount_to_bytecount_down_fun (ptr, len); | |
684 text_checking_assert (ret1 == ret2); | |
685 return ret1; | |
686 #else | |
687 text_checking_assert (len >= 0); | |
688 if (len < 20) /* See above */ | |
689 { | |
690 const Ibyte *newptr = ptr; | |
691 while (len > 0) | |
692 { | |
693 DEC_IBYTEPTR (newptr); | |
694 len--; | |
695 } | |
696 return ptr - newptr; | |
697 } | |
698 else | |
699 return charcount_to_bytecount_down_fun (ptr, len); | |
700 #endif /* SLEDGEHAMMER_CHECK_TEXT */ | |
701 } | |
702 | |
826 | 703 /* Given a pointer to a text string in the specified format and a length in |
704 bytes, return the equivalent length in characters. | |
705 */ | |
706 | |
707 DECLARE_INLINE_HEADER ( | |
708 Charcount | |
867 | 709 bytecount_to_charcount_fmt (const Ibyte *ptr, Bytecount len, |
826 | 710 Internal_Format fmt) |
711 ) | |
712 { | |
713 switch (fmt) | |
714 { | |
715 case FORMAT_DEFAULT: | |
716 return bytecount_to_charcount (ptr, len); | |
717 case FORMAT_16_BIT_FIXED: | |
1204 | 718 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT)); |
826 | 719 return (Charcount) (len << 1); |
720 case FORMAT_32_BIT_FIXED: | |
1204 | 721 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT)); |
826 | 722 return (Charcount) (len << 2); |
723 default: | |
724 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); | |
725 return (Charcount) len; | |
726 } | |
727 } | |
728 | |
729 /* Given a pointer to a text string in the specified format and a length in | |
730 characters, return the equivalent length in bytes. | |
731 */ | |
732 | |
733 DECLARE_INLINE_HEADER ( | |
734 Bytecount | |
867 | 735 charcount_to_bytecount_fmt (const Ibyte *ptr, Charcount len, |
826 | 736 Internal_Format fmt) |
737 ) | |
738 { | |
739 switch (fmt) | |
740 { | |
741 case FORMAT_DEFAULT: | |
742 return charcount_to_bytecount (ptr, len); | |
743 case FORMAT_16_BIT_FIXED: | |
1204 | 744 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT)); |
826 | 745 text_checking_assert (!(len & 1)); |
746 return (Bytecount) (len >> 1); | |
747 case FORMAT_32_BIT_FIXED: | |
748 text_checking_assert (!(len & 3)); | |
1204 | 749 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT)); |
826 | 750 return (Bytecount) (len >> 2); |
751 default: | |
752 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); | |
753 return (Bytecount) len; | |
754 } | |
755 } | |
756 | |
757 #else | |
758 | |
759 #define bytecount_to_charcount(ptr, len) ((Charcount) (len)) | |
760 #define bytecount_to_charcount_fmt(ptr, len, fmt) ((Charcount) (len)) | |
761 #define charcount_to_bytecount(ptr, len) ((Bytecount) (len)) | |
762 #define charcount_to_bytecount_fmt(ptr, len, fmt) ((Bytecount) (len)) | |
763 | |
764 #endif /* MULE */ | |
765 | |
766 /* Return the length of the first character at PTR. Equivalent to | |
767 charcount_to_bytecount (ptr, 1). | |
768 | |
769 [Since charcount_to_bytecount() is Written as inline, a smart compiler | |
770 should really optimize charcount_to_bytecount (ptr, 1) to the same as | |
771 the following, with no error checking. But since this idiom occurs so | |
772 often, we'll be helpful and define a special macro for it.] | |
773 */ | |
774 | |
867 | 775 #define itext_ichar_len(ptr) rep_bytes_by_first_byte (*(ptr)) |
826 | 776 |
777 /* Return the length of the first character at PTR, which is in the | |
778 specified internal format. Equivalent to charcount_to_bytecount_fmt | |
779 (ptr, 1, fmt). | |
780 */ | |
781 | |
782 DECLARE_INLINE_HEADER ( | |
783 Bytecount | |
4853 | 784 itext_ichar_len_fmt (const Ibyte *ptr, Internal_Format fmt) |
826 | 785 ) |
786 { | |
787 switch (fmt) | |
788 { | |
789 case FORMAT_DEFAULT: | |
867 | 790 return itext_ichar_len (ptr); |
826 | 791 case FORMAT_16_BIT_FIXED: |
1204 | 792 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT)); |
826 | 793 return 2; |
794 case FORMAT_32_BIT_FIXED: | |
1204 | 795 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT)); |
826 | 796 return 4; |
797 default: | |
798 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); | |
799 return 1; | |
800 } | |
801 } | |
802 | |
803 /* Return a pointer to the beginning of the character offset N (in | |
804 characters) from PTR. | |
805 */ | |
806 | |
807 DECLARE_INLINE_HEADER ( | |
867 | 808 const Ibyte * |
809 itext_n_addr (const Ibyte *ptr, Charcount offset) | |
826 | 810 ) |
771 | 811 { |
812 return ptr + charcount_to_bytecount (ptr, offset); | |
813 } | |
814 | |
867 | 815 /* Given a itext and an offset into the text pointed to by the itext, |
826 | 816 modify the offset so it points to the beginning of the next character. |
817 */ | |
818 | |
819 #define INC_BYTECOUNT(ptr, pos) do { \ | |
867 | 820 assert_valid_ibyteptr (ptr); \ |
826 | 821 (pos += rep_bytes_by_first_byte (* ((ptr) + (pos)))); \ |
822 } while (0) | |
823 | |
771 | 824 /* -------------------------------------------------------------------- */ |
867 | 825 /* Retrieving or changing the character pointed to by a itext */ |
771 | 826 /* -------------------------------------------------------------------- */ |
827 | |
867 | 828 #define simple_itext_ichar(ptr) ((Ichar) (ptr)[0]) |
829 #define simple_set_itext_ichar(ptr, x) \ | |
830 ((ptr)[0] = (Ibyte) (x), (Bytecount) 1) | |
831 #define simple_itext_copy_ichar(src, dst) \ | |
814 | 832 ((dst)[0] = *(src), (Bytecount) 1) |
771 | 833 |
834 #ifdef MULE | |
835 | |
1632 | 836 MODULE_API Ichar non_ascii_itext_ichar (const Ibyte *ptr); |
837 MODULE_API Bytecount non_ascii_set_itext_ichar (Ibyte *ptr, Ichar c); | |
838 MODULE_API Bytecount non_ascii_itext_copy_ichar (const Ibyte *src, Ibyte *dst); | |
867 | 839 |
840 /* Retrieve the character pointed to by PTR as an Ichar. */ | |
826 | 841 |
842 DECLARE_INLINE_HEADER ( | |
867 | 843 Ichar |
844 itext_ichar (const Ibyte *ptr) | |
826 | 845 ) |
771 | 846 { |
826 | 847 return byte_ascii_p (*ptr) ? |
867 | 848 simple_itext_ichar (ptr) : |
849 non_ascii_itext_ichar (ptr); | |
771 | 850 } |
851 | |
826 | 852 /* Retrieve the character pointed to by PTR (a pointer to text in the |
853 format FMT, coming from OBJECT [a buffer, string?, or nil]) as an | |
867 | 854 Ichar. |
826 | 855 |
856 Note: For these and other *_fmt() functions, if you pass in a constant | |
857 FMT, the switch will be optimized out of existence. Therefore, there is | |
858 no need to create separate versions for the various formats for | |
867 | 859 "efficiency reasons". In fact, we don't really need itext_ichar() |
826 | 860 and such written separately, but they are used often so it's simpler |
861 that way. */ | |
862 | |
863 DECLARE_INLINE_HEADER ( | |
867 | 864 Ichar |
865 itext_ichar_fmt (const Ibyte *ptr, Internal_Format fmt, | |
2286 | 866 Lisp_Object UNUSED (object)) |
826 | 867 ) |
868 { | |
869 switch (fmt) | |
870 { | |
871 case FORMAT_DEFAULT: | |
867 | 872 return itext_ichar (ptr); |
826 | 873 case FORMAT_16_BIT_FIXED: |
1204 | 874 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT)); |
867 | 875 return raw_16_bit_fixed_to_ichar (* (UINT_16_BIT *) ptr, object); |
826 | 876 case FORMAT_32_BIT_FIXED: |
1204 | 877 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT)); |
867 | 878 return raw_32_bit_fixed_to_ichar (* (UINT_32_BIT *) ptr, object); |
826 | 879 default: |
880 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); | |
867 | 881 return raw_8_bit_fixed_to_ichar (*ptr, object); |
826 | 882 } |
883 } | |
884 | |
885 /* Return the character at PTR (which is in format FMT), suitable for | |
886 comparison with an ASCII character. This guarantees that if the | |
887 character at PTR is ASCII (range 0 - 127), that character will be | |
888 returned; otherwise, some character outside of the ASCII range will be | |
889 returned, but not necessarily the character actually at PTR. This will | |
867 | 890 be faster than itext_ichar_fmt() for some formats -- in particular, |
826 | 891 FORMAT_DEFAULT. */ |
892 | |
893 DECLARE_INLINE_HEADER ( | |
867 | 894 Ichar |
895 itext_ichar_ascii_fmt (const Ibyte *ptr, Internal_Format fmt, | |
2286 | 896 Lisp_Object UNUSED (object)) |
826 | 897 ) |
898 { | |
899 switch (fmt) | |
900 { | |
901 case FORMAT_DEFAULT: | |
867 | 902 return (Ichar) *ptr; |
826 | 903 case FORMAT_16_BIT_FIXED: |
1204 | 904 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT)); |
867 | 905 return raw_16_bit_fixed_to_ichar (* (UINT_16_BIT *) ptr, object); |
826 | 906 case FORMAT_32_BIT_FIXED: |
1204 | 907 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT)); |
867 | 908 return raw_32_bit_fixed_to_ichar (* (UINT_32_BIT *) ptr, object); |
826 | 909 default: |
910 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); | |
867 | 911 return raw_8_bit_fixed_to_ichar (*ptr, object); |
826 | 912 } |
913 } | |
914 | |
915 /* Return the "raw value" of the character at PTR, in format FMT. This is | |
916 useful when searching for a character; convert the character using | |
867 | 917 ichar_to_raw(). */ |
826 | 918 |
919 DECLARE_INLINE_HEADER ( | |
867 | 920 Raw_Ichar |
921 itext_ichar_raw_fmt (const Ibyte *ptr, Internal_Format fmt) | |
826 | 922 ) |
923 { | |
924 switch (fmt) | |
925 { | |
926 case FORMAT_DEFAULT: | |
867 | 927 return (Raw_Ichar) itext_ichar (ptr); |
826 | 928 case FORMAT_16_BIT_FIXED: |
1204 | 929 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT)); |
867 | 930 return (Raw_Ichar) (* (UINT_16_BIT *) ptr); |
826 | 931 case FORMAT_32_BIT_FIXED: |
1204 | 932 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT)); |
867 | 933 return (Raw_Ichar) (* (UINT_32_BIT *) ptr); |
826 | 934 default: |
935 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); | |
867 | 936 return (Raw_Ichar) (*ptr); |
826 | 937 } |
938 } | |
939 | |
867 | 940 /* Store the character CH (an Ichar) as internally-formatted text starting |
826 | 941 at PTR. Return the number of bytes stored. |
942 */ | |
943 | |
944 DECLARE_INLINE_HEADER ( | |
945 Bytecount | |
867 | 946 set_itext_ichar (Ibyte *ptr, Ichar x) |
826 | 947 ) |
771 | 948 { |
867 | 949 return !ichar_multibyte_p (x) ? |
950 simple_set_itext_ichar (ptr, x) : | |
951 non_ascii_set_itext_ichar (ptr, x); | |
771 | 952 } |
953 | |
867 | 954 /* Store the character CH (an Ichar) as internally-formatted text of |
826 | 955 format FMT starting at PTR, which comes from OBJECT. Return the number |
956 of bytes stored. | |
957 */ | |
958 | |
959 DECLARE_INLINE_HEADER ( | |
960 Bytecount | |
867 | 961 set_itext_ichar_fmt (Ibyte *ptr, Ichar x, Internal_Format fmt, |
2286 | 962 Lisp_Object UNUSED (object)) |
826 | 963 ) |
771 | 964 { |
826 | 965 switch (fmt) |
966 { | |
967 case FORMAT_DEFAULT: | |
867 | 968 return set_itext_ichar (ptr, x); |
826 | 969 case FORMAT_16_BIT_FIXED: |
867 | 970 text_checking_assert (ichar_16_bit_fixed_p (x, object)); |
1204 | 971 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT)); |
867 | 972 * (UINT_16_BIT *) ptr = ichar_to_raw_16_bit_fixed (x, object); |
826 | 973 return 2; |
974 case FORMAT_32_BIT_FIXED: | |
1204 | 975 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT)); |
867 | 976 * (UINT_32_BIT *) ptr = ichar_to_raw_32_bit_fixed (x, object); |
826 | 977 return 4; |
978 default: | |
979 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); | |
867 | 980 text_checking_assert (ichar_8_bit_fixed_p (x, object)); |
981 *ptr = ichar_to_raw_8_bit_fixed (x, object); | |
826 | 982 return 1; |
983 } | |
984 } | |
985 | |
986 /* Retrieve the character pointed to by SRC and store it as | |
987 internally-formatted text in DST. | |
988 */ | |
989 | |
990 DECLARE_INLINE_HEADER ( | |
991 Bytecount | |
867 | 992 itext_copy_ichar (const Ibyte *src, Ibyte *dst) |
826 | 993 ) |
994 { | |
995 return byte_ascii_p (*src) ? | |
867 | 996 simple_itext_copy_ichar (src, dst) : |
997 non_ascii_itext_copy_ichar (src, dst); | |
771 | 998 } |
999 | |
1000 #else /* not MULE */ | |
1001 | |
867 | 1002 # define itext_ichar(ptr) simple_itext_ichar (ptr) |
1003 # define itext_ichar_fmt(ptr, fmt, object) itext_ichar (ptr) | |
1004 # define itext_ichar_ascii_fmt(ptr, fmt, object) itext_ichar (ptr) | |
1005 # define itext_ichar_raw_fmt(ptr, fmt) itext_ichar (ptr) | |
1006 # define set_itext_ichar(ptr, x) simple_set_itext_ichar (ptr, x) | |
1007 # define set_itext_ichar_fmt(ptr, x, fmt, obj) set_itext_ichar (ptr, x) | |
1008 # define itext_copy_ichar(src, dst) simple_itext_copy_ichar (src, dst) | |
771 | 1009 |
1010 #endif /* not MULE */ | |
1011 | |
826 | 1012 /* Retrieve the character at offset N (in characters) from PTR, as an |
867 | 1013 Ichar. |
826 | 1014 */ |
1015 | |
867 | 1016 #define itext_ichar_n(ptr, offset) \ |
1017 itext_ichar (itext_n_addr (ptr, offset)) | |
771 | 1018 |
1019 | |
1020 /************************************************************************/ | |
1021 /* */ | |
826 | 1022 /* working with Lisp strings */ |
1023 /* */ | |
1024 /************************************************************************/ | |
1025 | |
1026 #define string_char_length(s) \ | |
1027 string_index_byte_to_char (s, XSTRING_LENGTH (s)) | |
1028 #define string_byte(s, i) (XSTRING_DATA (s)[i] + 0) | |
1029 /* In case we ever allow strings to be in a different format ... */ | |
1030 #define set_string_byte(s, i, c) (XSTRING_DATA (s)[i] = (c)) | |
1031 | |
1032 #define ASSERT_VALID_CHAR_STRING_INDEX_UNSAFE(s, x) do { \ | |
1033 text_checking_assert ((x) >= 0 && x <= string_char_length (s)); \ | |
1034 } while (0) | |
1035 | |
1036 #define ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE(s, x) do { \ | |
1037 text_checking_assert ((x) >= 0 && x <= XSTRING_LENGTH (s)); \ | |
867 | 1038 text_checking_assert (valid_ibyteptr_p (string_byte_addr (s, x))); \ |
826 | 1039 } while (0) |
1040 | |
1041 /* Convert offset I in string S to a pointer to text there. */ | |
1042 #define string_byte_addr(s, i) (&(XSTRING_DATA (s)[i])) | |
1043 /* Convert pointer to text in string S into the byte offset to that text. */ | |
1044 #define string_addr_to_byte(s, ptr) ((Bytecount) ((ptr) - XSTRING_DATA (s))) | |
867 | 1045 /* Return the Ichar at *CHARACTER* offset I. */ |
1046 #define string_ichar(s, i) itext_ichar (string_char_addr (s, i)) | |
826 | 1047 |
1048 #ifdef ERROR_CHECK_TEXT | |
1049 #define SLEDGEHAMMER_CHECK_ASCII_BEGIN | |
1050 #endif | |
1051 | |
1052 #ifdef SLEDGEHAMMER_CHECK_ASCII_BEGIN | |
1053 void sledgehammer_check_ascii_begin (Lisp_Object str); | |
1054 #else | |
1055 #define sledgehammer_check_ascii_begin(str) | |
1056 #endif | |
1057 | |
1058 /* Make an alloca'd copy of a Lisp string */ | |
1059 #define LISP_STRING_TO_ALLOCA(s, lval) \ | |
1060 do { \ | |
1315 | 1061 Ibyte **_lta_ = (Ibyte **) &(lval); \ |
826 | 1062 Lisp_Object _lta_2 = (s); \ |
2367 | 1063 *_lta_ = alloca_ibytes (1 + XSTRING_LENGTH (_lta_2)); \ |
826 | 1064 memcpy (*_lta_, XSTRING_DATA (_lta_2), 1 + XSTRING_LENGTH (_lta_2)); \ |
1065 } while (0) | |
1066 | |
1067 void resize_string (Lisp_Object s, Bytecount pos, Bytecount delta); | |
1068 | |
1069 /* Convert a byte index into a string into a char index. */ | |
1070 DECLARE_INLINE_HEADER ( | |
1071 Charcount | |
4853 | 1072 string_index_byte_to_char (Lisp_Object s, Bytecount idx) |
826 | 1073 ) |
1074 { | |
1075 Charcount retval; | |
1076 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, idx); | |
1077 #ifdef MULE | |
1078 if (idx <= (Bytecount) XSTRING_ASCII_BEGIN (s)) | |
1079 retval = (Charcount) idx; | |
1080 else | |
1081 retval = (XSTRING_ASCII_BEGIN (s) + | |
1082 bytecount_to_charcount (XSTRING_DATA (s) + | |
1083 XSTRING_ASCII_BEGIN (s), | |
1084 idx - XSTRING_ASCII_BEGIN (s))); | |
1085 # ifdef SLEDGEHAMMER_CHECK_ASCII_BEGIN | |
1086 assert (retval == bytecount_to_charcount (XSTRING_DATA (s), idx)); | |
1087 # endif | |
1088 #else | |
1089 retval = (Charcount) idx; | |
1090 #endif | |
1091 /* Don't call ASSERT_VALID_CHAR_STRING_INDEX_UNSAFE() here because it will | |
1092 call string_index_byte_to_char(). */ | |
1093 return retval; | |
1094 } | |
1095 | |
1096 /* Convert a char index into a string into a byte index. */ | |
1097 DECLARE_INLINE_HEADER ( | |
1098 Bytecount | |
4853 | 1099 string_index_char_to_byte (Lisp_Object s, Charcount idx) |
826 | 1100 ) |
1101 { | |
1102 Bytecount retval; | |
1103 ASSERT_VALID_CHAR_STRING_INDEX_UNSAFE (s, idx); | |
1104 #ifdef MULE | |
1105 if (idx <= (Charcount) XSTRING_ASCII_BEGIN (s)) | |
1106 retval = (Bytecount) idx; | |
1107 else | |
1108 retval = (XSTRING_ASCII_BEGIN (s) + | |
1109 charcount_to_bytecount (XSTRING_DATA (s) + | |
1110 XSTRING_ASCII_BEGIN (s), | |
1111 idx - XSTRING_ASCII_BEGIN (s))); | |
1112 # ifdef SLEDGEHAMMER_CHECK_ASCII_BEGIN | |
1113 assert (retval == charcount_to_bytecount (XSTRING_DATA (s), idx)); | |
1114 # endif | |
1115 #else | |
1116 retval = (Bytecount) idx; | |
1117 #endif | |
1118 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, retval); | |
1119 return retval; | |
1120 } | |
1121 | |
1122 /* Convert a substring length (starting at byte offset OFF) from bytes to | |
1123 chars. */ | |
1124 DECLARE_INLINE_HEADER ( | |
1125 Charcount | |
4853 | 1126 string_offset_byte_to_char_len (Lisp_Object s, Bytecount off, Bytecount len) |
826 | 1127 ) |
1128 { | |
1129 Charcount retval; | |
1130 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, off); | |
1131 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, off + len); | |
1132 #ifdef MULE | |
1133 if (off + len <= (Bytecount) XSTRING_ASCII_BEGIN (s)) | |
1134 retval = (Charcount) len; | |
1135 else if (off < (Bytecount) XSTRING_ASCII_BEGIN (s)) | |
1136 retval = | |
1137 XSTRING_ASCII_BEGIN (s) - (Charcount) off + | |
1138 bytecount_to_charcount (XSTRING_DATA (s) + XSTRING_ASCII_BEGIN (s), | |
1139 len - (XSTRING_ASCII_BEGIN (s) - off)); | |
1140 else | |
1141 retval = bytecount_to_charcount (XSTRING_DATA (s) + off, len); | |
1142 # ifdef SLEDGEHAMMER_CHECK_ASCII_BEGIN | |
1143 assert (retval == bytecount_to_charcount (XSTRING_DATA (s) + off, len)); | |
1144 # endif | |
1145 #else | |
1146 retval = (Charcount) len; | |
1147 #endif | |
1148 return retval; | |
1149 } | |
1150 | |
1151 /* Convert a substring length (starting at byte offset OFF) from chars to | |
1152 bytes. */ | |
1153 DECLARE_INLINE_HEADER ( | |
1154 Bytecount | |
4853 | 1155 string_offset_char_to_byte_len (Lisp_Object s, Bytecount off, Charcount len) |
826 | 1156 ) |
1157 { | |
1158 Bytecount retval; | |
1159 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, off); | |
1160 #ifdef MULE | |
1161 /* casts to avoid errors from combining Bytecount/Charcount and warnings | |
1162 from signed/unsigned comparisons */ | |
1163 if (off + (Bytecount) len <= (Bytecount) XSTRING_ASCII_BEGIN (s)) | |
1164 retval = (Bytecount) len; | |
1165 else if (off < (Bytecount) XSTRING_ASCII_BEGIN (s)) | |
1166 retval = | |
1167 XSTRING_ASCII_BEGIN (s) - off + | |
1168 charcount_to_bytecount (XSTRING_DATA (s) + XSTRING_ASCII_BEGIN (s), | |
1169 len - (XSTRING_ASCII_BEGIN (s) - | |
1170 (Charcount) off)); | |
1171 else | |
1172 retval = charcount_to_bytecount (XSTRING_DATA (s) + off, len); | |
1173 # ifdef SLEDGEHAMMER_CHECK_ASCII_BEGIN | |
1174 assert (retval == charcount_to_bytecount (XSTRING_DATA (s) + off, len)); | |
1175 # endif | |
1176 #else | |
1177 retval = (Bytecount) len; | |
1178 #endif | |
1179 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, off + retval); | |
1180 return retval; | |
1181 } | |
1182 | |
1183 DECLARE_INLINE_HEADER ( | |
867 | 1184 const Ibyte * |
826 | 1185 string_char_addr (Lisp_Object s, Charcount idx) |
1186 ) | |
1187 { | |
1188 return XSTRING_DATA (s) + string_index_char_to_byte (s, idx); | |
1189 } | |
1190 | |
1191 /* WARNING: If you modify an existing string, you must call | |
1192 bump_string_modiff() afterwards. */ | |
1193 #ifdef MULE | |
867 | 1194 void set_string_char (Lisp_Object s, Charcount i, Ichar c); |
826 | 1195 #else |
1196 #define set_string_char(s, i, c) set_string_byte (s, i, c) | |
1197 #endif /* not MULE */ | |
1198 | |
1199 /* Return index to character before the one at IDX. */ | |
1200 DECLARE_INLINE_HEADER ( | |
1201 Bytecount | |
1202 prev_string_index (Lisp_Object s, Bytecount idx) | |
1203 ) | |
1204 { | |
867 | 1205 const Ibyte *ptr = string_byte_addr (s, idx); |
1206 DEC_IBYTEPTR (ptr); | |
826 | 1207 return string_addr_to_byte (s, ptr); |
1208 } | |
1209 | |
1210 /* Return index to character after the one at IDX. */ | |
1211 DECLARE_INLINE_HEADER ( | |
1212 Bytecount | |
1213 next_string_index (Lisp_Object s, Bytecount idx) | |
1214 ) | |
1215 { | |
867 | 1216 const Ibyte *ptr = string_byte_addr (s, idx); |
1217 INC_IBYTEPTR (ptr); | |
826 | 1218 return string_addr_to_byte (s, ptr); |
1219 } | |
1220 | |
1221 | |
1222 /************************************************************************/ | |
1223 /* */ | |
771 | 1224 /* working with Eistrings */ |
1225 /* */ | |
1226 /************************************************************************/ | |
1227 | |
1228 /* | |
1229 #### NOTE: This is a work in progress. Neither the API nor especially | |
1230 the implementation is finished. | |
1231 | |
1232 NOTE: An Eistring is a structure that makes it easy to work with | |
1233 internally-formatted strings of data. It provides operations similar | |
1234 in feel to the standard strcpy(), strcat(), strlen(), etc., but | |
1235 | |
1236 (a) it is Mule-correct | |
1237 (b) it does dynamic allocation so you never have to worry about size | |
793 | 1238 restrictions |
851 | 1239 (c) it comes in an ALLOCA() variety (all allocation is stack-local, |
793 | 1240 so there is no need to explicitly clean up) as well as a malloc() |
1241 variety | |
1242 (d) it knows its own length, so it does not suffer from standard null | |
1243 byte brain-damage -- but it null-terminates the data anyway, so | |
1244 it can be passed to standard routines | |
1245 (e) it provides a much more powerful set of operations and knows about | |
771 | 1246 all the standard places where string data might reside: Lisp_Objects, |
867 | 1247 other Eistrings, Ibyte * data with or without an explicit length, |
1248 ASCII strings, Ichars, etc. | |
793 | 1249 (f) it provides easy operations to convert to/from externally-formatted |
1250 data, and is easier to use than the standard TO_INTERNAL_FORMAT | |
771 | 1251 and TO_EXTERNAL_FORMAT macros. (An Eistring can store both the internal |
1252 and external version of its data, but the external version is only | |
1253 initialized or changed when you call eito_external().) | |
1254 | |
793 | 1255 The idea is to make it as easy to write Mule-correct string manipulation |
1256 code as it is to write normal string manipulation code. We also make | |
1257 the API sufficiently general that it can handle multiple internal data | |
1258 formats (e.g. some fixed-width optimizing formats and a default variable | |
1259 width format) and allows for *ANY* data format we might choose in the | |
1260 future for the default format, including UCS2. (In other words, we can't | |
1261 assume that the internal format is ASCII-compatible and we can't assume | |
1262 it doesn't have embedded null bytes. We do assume, however, that any | |
1263 chosen format will have the concept of null-termination.) All of this is | |
1264 hidden from the user. | |
771 | 1265 |
1266 #### It is really too bad that we don't have a real object-oriented | |
1267 language, or at least a language with polymorphism! | |
1268 | |
1269 | |
1270 ********************************************** | |
1271 * Declaration * | |
1272 ********************************************** | |
1273 | |
1274 To declare an Eistring, either put one of the following in the local | |
1275 variable section: | |
1276 | |
1277 DECLARE_EISTRING (name); | |
2367 | 1278 Declare a new Eistring and initialize it to the empy string. This |
1279 is a standard local variable declaration and can go anywhere in the | |
1280 variable declaration section. NAME itself is declared as an | |
1281 Eistring *, and its storage declared on the stack. | |
771 | 1282 |
1283 DECLARE_EISTRING_MALLOC (name); | |
2367 | 1284 Declare and initialize a new Eistring, which uses malloc()ed |
1285 instead of ALLOCA()ed data. This is a standard local variable | |
1286 declaration and can go anywhere in the variable declaration | |
1287 section. Once you initialize the Eistring, you will have to free | |
1288 it using eifree() to avoid memory leaks. You will need to use this | |
1289 form if you are passing an Eistring to any function that modifies | |
1290 it (otherwise, the modified data may be in stack space and get | |
1291 overwritten when the function returns). | |
771 | 1292 |
1293 or use | |
1294 | |
793 | 1295 Eistring ei; |
1296 void eiinit (Eistring *ei); | |
1297 void eiinit_malloc (Eistring *einame); | |
771 | 1298 If you need to put an Eistring elsewhere than in a local variable |
1299 declaration (e.g. in a structure), declare it as shown and then | |
1300 call one of the init macros. | |
1301 | |
1302 Also note: | |
1303 | |
793 | 1304 void eifree (Eistring *ei); |
771 | 1305 If you declared an Eistring to use malloc() to hold its data, |
1306 or converted it to the heap using eito_malloc(), then this | |
1307 releases any data in it and afterwards resets the Eistring | |
1308 using eiinit_malloc(). Otherwise, it just resets the Eistring | |
1309 using eiinit(). | |
1310 | |
1311 | |
1312 ********************************************** | |
1313 * Conventions * | |
1314 ********************************************** | |
1315 | |
1316 - The names of the functions have been chosen, where possible, to | |
1317 match the names of str*() functions in the standard C API. | |
1318 - | |
1319 | |
1320 | |
1321 ********************************************** | |
1322 * Initialization * | |
1323 ********************************************** | |
1324 | |
1325 void eireset (Eistring *eistr); | |
1326 Initialize the Eistring to the empty string. | |
1327 | |
1328 void eicpy_* (Eistring *eistr, ...); | |
1329 Initialize the Eistring from somewhere: | |
1330 | |
1331 void eicpy_ei (Eistring *eistr, Eistring *eistr2); | |
1332 ... from another Eistring. | |
1333 void eicpy_lstr (Eistring *eistr, Lisp_Object lisp_string); | |
1334 ... from a Lisp_Object string. | |
867 | 1335 void eicpy_ch (Eistring *eistr, Ichar ch); |
1336 ... from an Ichar (this can be a conventional C character). | |
771 | 1337 |
1338 void eicpy_lstr_off (Eistring *eistr, Lisp_Object lisp_string, | |
1339 Bytecount off, Charcount charoff, | |
1340 Bytecount len, Charcount charlen); | |
1341 ... from a section of a Lisp_Object string. | |
1342 void eicpy_lbuf (Eistring *eistr, Lisp_Object lisp_buf, | |
1343 Bytecount off, Charcount charoff, | |
1344 Bytecount len, Charcount charlen); | |
1345 ... from a section of a Lisp_Object buffer. | |
867 | 1346 void eicpy_raw (Eistring *eistr, const Ibyte *data, Bytecount len); |
771 | 1347 ... from raw internal-format data in the default internal format. |
867 | 1348 void eicpy_rawz (Eistring *eistr, const Ibyte *data); |
771 | 1349 ... from raw internal-format data in the default internal format |
1350 that is "null-terminated" (the meaning of this depends on the nature | |
1351 of the default internal format). | |
867 | 1352 void eicpy_raw_fmt (Eistring *eistr, const Ibyte *data, Bytecount len, |
826 | 1353 Internal_Format intfmt, Lisp_Object object); |
771 | 1354 ... from raw internal-format data in the specified format. |
867 | 1355 void eicpy_rawz_fmt (Eistring *eistr, const Ibyte *data, |
826 | 1356 Internal_Format intfmt, Lisp_Object object); |
771 | 1357 ... from raw internal-format data in the specified format that is |
1358 "null-terminated" (the meaning of this depends on the nature of | |
1359 the specific format). | |
2421 | 1360 void eicpy_ascii (Eistring *eistr, const Ascbyte *ascstr); |
771 | 1361 ... from an ASCII null-terminated string. Non-ASCII characters in |
2500 | 1362 the string are *ILLEGAL* (read ABORT() with error-checking defined). |
2421 | 1363 void eicpy_ascii_len (Eistring *eistr, const Ascbyte *ascstr, len); |
771 | 1364 ... from an ASCII string, with length specified. Non-ASCII characters |
2500 | 1365 in the string are *ILLEGAL* (read ABORT() with error-checking defined). |
771 | 1366 void eicpy_ext (Eistring *eistr, const Extbyte *extdata, |
1318 | 1367 Lisp_Object codesys); |
771 | 1368 ... from external null-terminated data, with coding system specified. |
1369 void eicpy_ext_len (Eistring *eistr, const Extbyte *extdata, | |
1318 | 1370 Bytecount extlen, Lisp_Object codesys); |
771 | 1371 ... from external data, with length and coding system specified. |
1372 void eicpy_lstream (Eistring *eistr, Lisp_Object lstream); | |
1373 ... from an lstream; reads data till eof. Data must be in default | |
1374 internal format; otherwise, interpose a decoding lstream. | |
1375 | |
1376 | |
1377 ********************************************** | |
1378 * Getting the data out of the Eistring * | |
1379 ********************************************** | |
1380 | |
867 | 1381 Ibyte *eidata (Eistring *eistr); |
771 | 1382 Return a pointer to the raw data in an Eistring. This is NOT |
1383 a copy. | |
1384 | |
1385 Lisp_Object eimake_string (Eistring *eistr); | |
1386 Make a Lisp string out of the Eistring. | |
1387 | |
1388 Lisp_Object eimake_string_off (Eistring *eistr, | |
1389 Bytecount off, Charcount charoff, | |
1390 Bytecount len, Charcount charlen); | |
1391 Make a Lisp string out of a section of the Eistring. | |
1392 | |
867 | 1393 void eicpyout_alloca (Eistring *eistr, LVALUE: Ibyte *ptr_out, |
771 | 1394 LVALUE: Bytecount len_out); |
851 | 1395 Make an ALLOCA() copy of the data in the Eistring, using the |
1396 default internal format. Due to the nature of ALLOCA(), this | |
771 | 1397 must be a macro, with all lvalues passed in as parameters. |
793 | 1398 (More specifically, not all compilers correctly handle using |
851 | 1399 ALLOCA() as the argument to a function call -- GCC on x86 |
1400 didn't used to, for example.) A pointer to the ALLOCA()ed data | |
793 | 1401 is stored in PTR_OUT, and the length of the data (not including |
1402 the terminating zero) is stored in LEN_OUT. | |
771 | 1403 |
867 | 1404 void eicpyout_alloca_fmt (Eistring *eistr, LVALUE: Ibyte *ptr_out, |
771 | 1405 LVALUE: Bytecount len_out, |
826 | 1406 Internal_Format intfmt, Lisp_Object object); |
771 | 1407 Like eicpyout_alloca(), but converts to the specified internal |
1408 format. (No formats other than FORMAT_DEFAULT are currently | |
1409 implemented, and you get an assertion failure if you try.) | |
1410 | |
867 | 1411 Ibyte *eicpyout_malloc (Eistring *eistr, Bytecount *intlen_out); |
771 | 1412 Make a malloc() copy of the data in the Eistring, using the |
1413 default internal format. This is a real function. No lvalues | |
1414 passed in. Returns the new data, and stores the length (not | |
1415 including the terminating zero) using INTLEN_OUT, unless it's | |
1416 a NULL pointer. | |
1417 | |
867 | 1418 Ibyte *eicpyout_malloc_fmt (Eistring *eistr, Internal_Format intfmt, |
826 | 1419 Bytecount *intlen_out, Lisp_Object object); |
771 | 1420 Like eicpyout_malloc(), but converts to the specified internal |
1421 format. (No formats other than FORMAT_DEFAULT are currently | |
1422 implemented, and you get an assertion failure if you try.) | |
1423 | |
1424 | |
1425 ********************************************** | |
1426 * Moving to the heap * | |
1427 ********************************************** | |
1428 | |
1429 void eito_malloc (Eistring *eistr); | |
1430 Move this Eistring to the heap. Its data will be stored in a | |
1431 malloc()ed block rather than the stack. Subsequent changes to | |
1432 this Eistring will realloc() the block as necessary. Use this | |
1433 when you want the Eistring to remain in scope past the end of | |
1434 this function call. You will have to manually free the data | |
1435 in the Eistring using eifree(). | |
1436 | |
1437 void eito_alloca (Eistring *eistr); | |
1438 Move this Eistring back to the stack, if it was moved to the | |
1439 heap with eito_malloc(). This will automatically free any | |
1440 heap-allocated data. | |
1441 | |
1442 | |
1443 | |
1444 ********************************************** | |
1445 * Retrieving the length * | |
1446 ********************************************** | |
1447 | |
1448 Bytecount eilen (Eistring *eistr); | |
1449 Return the length of the internal data, in bytes. See also | |
1450 eiextlen(), below. | |
1451 Charcount eicharlen (Eistring *eistr); | |
1452 Return the length of the internal data, in characters. | |
1453 | |
1454 | |
1455 ********************************************** | |
1456 * Working with positions * | |
1457 ********************************************** | |
1458 | |
1459 Bytecount eicharpos_to_bytepos (Eistring *eistr, Charcount charpos); | |
1460 Convert a char offset to a byte offset. | |
1461 Charcount eibytepos_to_charpos (Eistring *eistr, Bytecount bytepos); | |
1462 Convert a byte offset to a char offset. | |
1463 Bytecount eiincpos (Eistring *eistr, Bytecount bytepos); | |
1464 Increment the given position by one character. | |
1465 Bytecount eiincpos_n (Eistring *eistr, Bytecount bytepos, Charcount n); | |
1466 Increment the given position by N characters. | |
1467 Bytecount eidecpos (Eistring *eistr, Bytecount bytepos); | |
1468 Decrement the given position by one character. | |
1469 Bytecount eidecpos_n (Eistring *eistr, Bytecount bytepos, Charcount n); | |
1470 Deccrement the given position by N characters. | |
1471 | |
1472 | |
1473 ********************************************** | |
1474 * Getting the character at a position * | |
1475 ********************************************** | |
1476 | |
867 | 1477 Ichar eigetch (Eistring *eistr, Bytecount bytepos); |
771 | 1478 Return the character at a particular byte offset. |
867 | 1479 Ichar eigetch_char (Eistring *eistr, Charcount charpos); |
771 | 1480 Return the character at a particular character offset. |
1481 | |
1482 | |
1483 ********************************************** | |
1484 * Setting the character at a position * | |
1485 ********************************************** | |
1486 | |
867 | 1487 Ichar eisetch (Eistring *eistr, Bytecount bytepos, Ichar chr); |
771 | 1488 Set the character at a particular byte offset. |
867 | 1489 Ichar eisetch_char (Eistring *eistr, Charcount charpos, Ichar chr); |
771 | 1490 Set the character at a particular character offset. |
1491 | |
1492 | |
1493 ********************************************** | |
1494 * Concatenation * | |
1495 ********************************************** | |
1496 | |
1497 void eicat_* (Eistring *eistr, ...); | |
1498 Concatenate onto the end of the Eistring, with data coming from the | |
1499 same places as above: | |
1500 | |
1501 void eicat_ei (Eistring *eistr, Eistring *eistr2); | |
1502 ... from another Eistring. | |
2421 | 1503 void eicat_ascii (Eistring *eistr, Ascbyte *ascstr); |
771 | 1504 ... from an ASCII null-terminated string. Non-ASCII characters in |
2500 | 1505 the string are *ILLEGAL* (read ABORT() with error-checking defined). |
867 | 1506 void eicat_raw (ei, const Ibyte *data, Bytecount len); |
771 | 1507 ... from raw internal-format data in the default internal format. |
867 | 1508 void eicat_rawz (ei, const Ibyte *data); |
771 | 1509 ... from raw internal-format data in the default internal format |
1510 that is "null-terminated" (the meaning of this depends on the nature | |
1511 of the default internal format). | |
1512 void eicat_lstr (ei, Lisp_Object lisp_string); | |
1513 ... from a Lisp_Object string. | |
867 | 1514 void eicat_ch (ei, Ichar ch); |
1515 ... from an Ichar. | |
771 | 1516 |
1517 (All except the first variety are convenience functions. | |
1518 In the general case, create another Eistring from the source.) | |
1519 | |
1520 | |
1521 ********************************************** | |
1522 * Replacement * | |
1523 ********************************************** | |
1524 | |
1525 void eisub_* (Eistring *eistr, Bytecount off, Charcount charoff, | |
1526 Bytecount len, Charcount charlen, ...); | |
1527 Replace a section of the Eistring, specifically: | |
1528 | |
1529 void eisub_ei (Eistring *eistr, Bytecount off, Charcount charoff, | |
1530 Bytecount len, Charcount charlen, Eistring *eistr2); | |
1531 ... with another Eistring. | |
2421 | 1532 void eisub_ascii (Eistring *eistr, Bytecount off, Charcount charoff, |
1533 Bytecount len, Charcount charlen, Ascbyte *ascstr); | |
771 | 1534 ... with an ASCII null-terminated string. Non-ASCII characters in |
2500 | 1535 the string are *ILLEGAL* (read ABORT() with error-checking defined). |
771 | 1536 void eisub_ch (Eistring *eistr, Bytecount off, Charcount charoff, |
867 | 1537 Bytecount len, Charcount charlen, Ichar ch); |
1538 ... with an Ichar. | |
771 | 1539 |
1540 void eidel (Eistring *eistr, Bytecount off, Charcount charoff, | |
1541 Bytecount len, Charcount charlen); | |
1542 Delete a section of the Eistring. | |
1543 | |
1544 | |
1545 ********************************************** | |
1546 * Converting to an external format * | |
1547 ********************************************** | |
1548 | |
1318 | 1549 void eito_external (Eistring *eistr, Lisp_Object codesys); |
771 | 1550 Convert the Eistring to an external format and store the result |
1551 in the string. NOTE: Further changes to the Eistring will *NOT* | |
1552 change the external data stored in the string. You will have to | |
1553 call eito_external() again in such a case if you want the external | |
1554 data. | |
1555 | |
1556 Extbyte *eiextdata (Eistring *eistr); | |
1557 Return a pointer to the external data stored in the Eistring as | |
1558 a result of a prior call to eito_external(). | |
1559 | |
1560 Bytecount eiextlen (Eistring *eistr); | |
1561 Return the length in bytes of the external data stored in the | |
1562 Eistring as a result of a prior call to eito_external(). | |
1563 | |
1564 | |
1565 ********************************************** | |
1566 * Searching in the Eistring for a character * | |
1567 ********************************************** | |
1568 | |
867 | 1569 Bytecount eichr (Eistring *eistr, Ichar chr); |
1570 Charcount eichr_char (Eistring *eistr, Ichar chr); | |
1571 Bytecount eichr_off (Eistring *eistr, Ichar chr, Bytecount off, | |
771 | 1572 Charcount charoff); |
867 | 1573 Charcount eichr_off_char (Eistring *eistr, Ichar chr, Bytecount off, |
771 | 1574 Charcount charoff); |
867 | 1575 Bytecount eirchr (Eistring *eistr, Ichar chr); |
1576 Charcount eirchr_char (Eistring *eistr, Ichar chr); | |
1577 Bytecount eirchr_off (Eistring *eistr, Ichar chr, Bytecount off, | |
771 | 1578 Charcount charoff); |
867 | 1579 Charcount eirchr_off_char (Eistring *eistr, Ichar chr, Bytecount off, |
771 | 1580 Charcount charoff); |
1581 | |
1582 | |
1583 ********************************************** | |
1584 * Searching in the Eistring for a string * | |
1585 ********************************************** | |
1586 | |
1587 Bytecount eistr_ei (Eistring *eistr, Eistring *eistr2); | |
1588 Charcount eistr_ei_char (Eistring *eistr, Eistring *eistr2); | |
1589 Bytecount eistr_ei_off (Eistring *eistr, Eistring *eistr2, Bytecount off, | |
1590 Charcount charoff); | |
1591 Charcount eistr_ei_off_char (Eistring *eistr, Eistring *eistr2, | |
1592 Bytecount off, Charcount charoff); | |
1593 Bytecount eirstr_ei (Eistring *eistr, Eistring *eistr2); | |
1594 Charcount eirstr_ei_char (Eistring *eistr, Eistring *eistr2); | |
1595 Bytecount eirstr_ei_off (Eistring *eistr, Eistring *eistr2, Bytecount off, | |
1596 Charcount charoff); | |
1597 Charcount eirstr_ei_off_char (Eistring *eistr, Eistring *eistr2, | |
1598 Bytecount off, Charcount charoff); | |
1599 | |
2421 | 1600 Bytecount eistr_ascii (Eistring *eistr, Ascbyte *ascstr); |
1601 Charcount eistr_ascii_char (Eistring *eistr, Ascbyte *ascstr); | |
1602 Bytecount eistr_ascii_off (Eistring *eistr, Ascbyte *ascstr, Bytecount off, | |
771 | 1603 Charcount charoff); |
2421 | 1604 Charcount eistr_ascii_off_char (Eistring *eistr, Ascbyte *ascstr, |
771 | 1605 Bytecount off, Charcount charoff); |
2421 | 1606 Bytecount eirstr_ascii (Eistring *eistr, Ascbyte *ascstr); |
1607 Charcount eirstr_ascii_char (Eistring *eistr, Ascbyte *ascstr); | |
1608 Bytecount eirstr_ascii_off (Eistring *eistr, Ascbyte *ascstr, | |
771 | 1609 Bytecount off, Charcount charoff); |
2421 | 1610 Charcount eirstr_ascii_off_char (Eistring *eistr, Ascbyte *ascstr, |
771 | 1611 Bytecount off, Charcount charoff); |
1612 | |
1613 | |
1614 ********************************************** | |
1615 * Comparison * | |
1616 ********************************************** | |
1617 | |
1618 int eicmp_* (Eistring *eistr, ...); | |
1619 int eicmp_off_* (Eistring *eistr, Bytecount off, Charcount charoff, | |
1620 Bytecount len, Charcount charlen, ...); | |
1621 int eicasecmp_* (Eistring *eistr, ...); | |
1622 int eicasecmp_off_* (Eistring *eistr, Bytecount off, Charcount charoff, | |
1623 Bytecount len, Charcount charlen, ...); | |
1624 int eicasecmp_i18n_* (Eistring *eistr, ...); | |
1625 int eicasecmp_i18n_off_* (Eistring *eistr, Bytecount off, Charcount charoff, | |
1626 Bytecount len, Charcount charlen, ...); | |
1627 | |
1628 Compare the Eistring with the other data. Return value same as | |
1629 from strcmp. The `*' is either `ei' for another Eistring (in | |
1630 which case `...' is an Eistring), or `c' for a pure-ASCII string | |
1631 (in which case `...' is a pointer to that string). For anything | |
1632 more complex, first create an Eistring out of the source. | |
1633 Comparison is either simple (`eicmp_...'), ASCII case-folding | |
1634 (`eicasecmp_...'), or multilingual case-folding | |
1635 (`eicasecmp_i18n_...). | |
1636 | |
1637 | |
1638 More specifically, the prototypes are: | |
1639 | |
1640 int eicmp_ei (Eistring *eistr, Eistring *eistr2); | |
1641 int eicmp_off_ei (Eistring *eistr, Bytecount off, Charcount charoff, | |
1642 Bytecount len, Charcount charlen, Eistring *eistr2); | |
1643 int eicasecmp_ei (Eistring *eistr, Eistring *eistr2); | |
1644 int eicasecmp_off_ei (Eistring *eistr, Bytecount off, Charcount charoff, | |
1645 Bytecount len, Charcount charlen, Eistring *eistr2); | |
1646 int eicasecmp_i18n_ei (Eistring *eistr, Eistring *eistr2); | |
1647 int eicasecmp_i18n_off_ei (Eistring *eistr, Bytecount off, | |
1648 Charcount charoff, Bytecount len, | |
1649 Charcount charlen, Eistring *eistr2); | |
1650 | |
2421 | 1651 int eicmp_ascii (Eistring *eistr, Ascbyte *ascstr); |
1652 int eicmp_off_ascii (Eistring *eistr, Bytecount off, Charcount charoff, | |
1653 Bytecount len, Charcount charlen, Ascbyte *ascstr); | |
1654 int eicasecmp_ascii (Eistring *eistr, Ascbyte *ascstr); | |
1655 int eicasecmp_off_ascii (Eistring *eistr, Bytecount off, Charcount charoff, | |
771 | 1656 Bytecount len, Charcount charlen, |
2421 | 1657 Ascbyte *ascstr); |
1658 int eicasecmp_i18n_ascii (Eistring *eistr, Ascbyte *ascstr); | |
1659 int eicasecmp_i18n_off_ascii (Eistring *eistr, Bytecount off, Charcount charoff, | |
771 | 1660 Bytecount len, Charcount charlen, |
2421 | 1661 Ascbyte *ascstr); |
771 | 1662 |
1663 | |
1664 ********************************************** | |
1665 * Case-changing the Eistring * | |
1666 ********************************************** | |
1667 | |
1668 void eilwr (Eistring *eistr); | |
1669 Convert all characters in the Eistring to lowercase. | |
1670 void eiupr (Eistring *eistr); | |
1671 Convert all characters in the Eistring to uppercase. | |
1672 */ | |
1673 | |
1674 | |
1675 /* Principles for writing Eistring functions: | |
1676 | |
1677 (1) Unfortunately, we have to write most of the Eistring functions | |
851 | 1678 as macros, because of the use of ALLOCA(). The principle used |
771 | 1679 below to assure no conflict in local variables is to prefix all |
1680 local variables with "ei" plus a number, which should be unique | |
1681 among macros. In practice, when finding a new number, find the | |
1682 highest so far used, and add 1. | |
1683 | |
1684 (2) We also suffix the Eistring fields with an _ to avoid problems | |
1685 with macro parameters of the same name. (And as the standard | |
1686 signal not to access these fields directly.) | |
1687 | |
1688 (3) We maintain both the length in bytes and chars of the data in | |
1689 the Eistring at all times, for convenient retrieval by outside | |
1690 functions. That means when writing functions that manipulate | |
1691 Eistrings, you too need to keep both lengths up to date for all | |
1692 data that you work with. | |
1693 | |
1694 (4) When writing a new type of operation (e.g. substitution), you | |
1695 will often find yourself working with outside data, and thus | |
1696 have a series of related API's, for different forms that the | |
1697 outside data is in. Generally, you will want to choose a | |
1698 subset of the forms supported by eicpy_*, which has to be | |
1699 totally general because that's the fundamental way to get data | |
1700 into an Eistring, and once the data is into the string, it | |
1701 would be to create a whole series of Ei operations that work on | |
1702 nothing but Eistrings. Although theoretically nice, in | |
1703 practice it's a hassle, so we suggest that you provide | |
1704 convenience functions. In particular, there are two paths you | |
1705 can take. One is minimalist -- it only allows other Eistrings | |
867 | 1706 and ASCII data, and Ichars if the particular operation makes |
771 | 1707 sense with a character. The other provides interfaces for the |
1708 most commonly-used forms -- Eistring, ASCII data, Lisp string, | |
1709 raw internal-format string with length, raw internal-format | |
867 | 1710 string without, and possibly Ichar. (In the function names, |
771 | 1711 these are designated `ei', `c', `lstr', `raw', `rawz', and |
1712 `ch', respectively.) | |
1713 | |
1714 (5) When coding a new type of operation, such as was discussed in | |
1715 previous section, the correct approach is to declare an worker | |
1716 function that does the work of everything, and is called by the | |
1717 other "container" macros that handle the different outside data | |
1718 forms. The data coming into the worker function, which | |
1719 typically ends in `_1', is in the form of three parameters: | |
1720 DATA, LEN, CHARLEN. (See point [3] about having two lengths and | |
1721 keeping them in sync.) | |
1722 | |
1723 (6) Handling argument evaluation in macros: We take great care | |
1724 never to evaluate any argument more than once in any macro, | |
1725 except the initial Eistring parameter. This can and will be | |
1726 evaluated multiple times, but it should pretty much always just | |
1727 be a simple variable. This means, for example, that if an | |
1728 Eistring is the second (not first) argument of a macro, it | |
1729 doesn't fall under the "initial Eistring" exemption, so it | |
1730 needs protection against multi-evaluation. (Take the address of | |
1731 the Eistring structure, store in a temporary variable, and use | |
1732 temporary variable for all access to the Eistring. | |
1733 Essentially, we want it to appear as if these Eistring macros | |
1734 are functions -- we would like to declare them as functions but | |
851 | 1735 they use ALLOCA(), so we can't (and we can't make them inline |
1736 functions either -- ALLOCA() is explicitly disallowed in inline | |
771 | 1737 functions.) |
1738 | |
1739 (7) Note that our rules regarding multiple evaluation are *more* | |
1740 strict than the rules listed above under the heading "working | |
1741 with raw internal-format data". | |
1742 */ | |
1743 | |
1744 | |
1745 /* ----- Declaration ----- */ | |
1746 | |
1747 typedef struct | |
1748 { | |
1749 /* Data for the Eistring, stored in the default internal format. | |
1750 Always includes terminating null. */ | |
867 | 1751 Ibyte *data_; |
771 | 1752 /* Total number of bytes allocated in DATA (including null). */ |
1753 Bytecount max_size_allocated_; | |
1754 Bytecount bytelen_; | |
1755 Charcount charlen_; | |
1756 int mallocp_; | |
1757 | |
1758 Extbyte *extdata_; | |
1759 Bytecount extlen_; | |
1760 } Eistring; | |
1761 | |
1762 extern Eistring the_eistring_zero_init, the_eistring_malloc_zero_init; | |
1763 | |
1764 #define DECLARE_EISTRING(name) \ | |
1765 Eistring __ ## name ## __storage__ = the_eistring_zero_init; \ | |
1766 Eistring *name = & __ ## name ## __storage__ | |
1767 #define DECLARE_EISTRING_MALLOC(name) \ | |
1768 Eistring __ ## name ## __storage__ = the_eistring_malloc_zero_init; \ | |
1769 Eistring *name = & __ ## name ## __storage__ | |
1770 | |
1771 #define eiinit(ei) \ | |
1772 do { \ | |
793 | 1773 *(ei) = the_eistring_zero_init; \ |
771 | 1774 } while (0) |
1775 | |
1776 #define eiinit_malloc(ei) \ | |
1777 do { \ | |
793 | 1778 *(ei) = the_eistring_malloc_zero_init; \ |
771 | 1779 } while (0) |
1780 | |
1781 | |
1782 /* ----- Utility ----- */ | |
1783 | |
1784 /* Make sure both LEN and CHARLEN are specified, in case one is given | |
1785 as -1. PTR evaluated at most once, others multiply. */ | |
1786 #define eifixup_bytechar(ptr, len, charlen) \ | |
1787 do { \ | |
1788 if ((len) == -1) \ | |
1789 (len) = charcount_to_bytecount (ptr, charlen); \ | |
1790 else if ((charlen) == -1) \ | |
1791 (charlen) = bytecount_to_charcount (ptr, len); \ | |
1792 } while (0) | |
1793 | |
1794 /* Make sure LEN is specified, in case it's is given as -1. PTR | |
1795 evaluated at most once, others multiply. */ | |
1796 #define eifixup_byte(ptr, len, charlen) \ | |
1797 do { \ | |
1798 if ((len) == -1) \ | |
1799 (len) = charcount_to_bytecount (ptr, charlen); \ | |
1800 } while (0) | |
1801 | |
1802 /* Make sure CHARLEN is specified, in case it's is given as -1. PTR | |
1803 evaluated at most once, others multiply. */ | |
1804 #define eifixup_char(ptr, len, charlen) \ | |
1805 do { \ | |
1806 if ((charlen) == -1) \ | |
1807 (charlen) = bytecount_to_charcount (ptr, len); \ | |
1808 } while (0) | |
1809 | |
1810 | |
1811 | |
1812 /* Make sure we can hold NEWBYTELEN bytes (which is NEWCHARLEN chars) | |
1813 plus a zero terminator. Preserve existing data as much as possible, | |
1814 including existing zero terminator. Put a new zero terminator where it | |
1815 should go if NEWZ if non-zero. All args but EI are evalled only once. */ | |
1816 | |
1817 #define EI_ALLOC(ei, newbytelen, newcharlen, newz) \ | |
1818 do { \ | |
1819 int ei1oldeibytelen = (ei)->bytelen_; \ | |
1820 \ | |
1821 (ei)->charlen_ = (newcharlen); \ | |
1822 (ei)->bytelen_ = (newbytelen); \ | |
1823 \ | |
1824 if (ei1oldeibytelen != (ei)->bytelen_) \ | |
1825 { \ | |
1826 int ei1newsize = (ei)->max_size_allocated_; \ | |
1827 while (ei1newsize < (ei)->bytelen_ + 1) \ | |
1828 { \ | |
1829 ei1newsize = (int) (ei1newsize * 1.5); \ | |
1830 if (ei1newsize < 32) \ | |
1831 ei1newsize = 32; \ | |
1832 } \ | |
1833 if (ei1newsize != (ei)->max_size_allocated_) \ | |
1834 { \ | |
1835 if ((ei)->mallocp_) \ | |
1836 /* xrealloc always preserves existing data as much as possible */ \ | |
1333 | 1837 (ei)->data_ = (Ibyte *) xrealloc ((ei)->data_, ei1newsize); \ |
771 | 1838 else \ |
1839 { \ | |
851 | 1840 /* We don't have realloc, so ALLOCA() more space and copy the \ |
771 | 1841 data into it. */ \ |
867 | 1842 Ibyte *ei1oldeidata = (ei)->data_; \ |
2367 | 1843 (ei)->data_ = alloca_ibytes (ei1newsize); \ |
771 | 1844 if (ei1oldeidata) \ |
1845 memcpy ((ei)->data_, ei1oldeidata, ei1oldeibytelen + 1); \ | |
1846 } \ | |
1847 (ei)->max_size_allocated_ = ei1newsize; \ | |
1848 } \ | |
1849 if (newz) \ | |
1850 (ei)->data_[(ei)->bytelen_] = '\0'; \ | |
1851 } \ | |
1852 } while (0) | |
1853 | |
1854 #define EI_ALLOC_AND_COPY(ei, data, bytelen, charlen) \ | |
1855 do { \ | |
1856 EI_ALLOC (ei, bytelen, charlen, 1); \ | |
1857 memcpy ((ei)->data_, data, (ei)->bytelen_); \ | |
1858 } while (0) | |
1859 | |
1860 /* ----- Initialization ----- */ | |
1861 | |
1862 #define eicpy_ei(ei, eicpy) \ | |
1863 do { \ | |
1864 const Eistring *ei2 = (eicpy); \ | |
1865 EI_ALLOC_AND_COPY (ei, ei2->data_, ei2->bytelen_, ei2->charlen_); \ | |
1866 } while (0) | |
1867 | |
1868 #define eicpy_lstr(ei, lisp_string) \ | |
1869 do { \ | |
1870 Lisp_Object ei3 = (lisp_string); \ | |
1871 EI_ALLOC_AND_COPY (ei, XSTRING_DATA (ei3), XSTRING_LENGTH (ei3), \ | |
1333 | 1872 string_char_length (ei3)); \ |
771 | 1873 } while (0) |
1874 | |
1875 #define eicpy_lstr_off(ei, lisp_string, off, charoff, len, charlen) \ | |
1876 do { \ | |
1877 Lisp_Object ei23lstr = (lisp_string); \ | |
1878 int ei23off = (off); \ | |
1879 int ei23charoff = (charoff); \ | |
1880 int ei23len = (len); \ | |
1881 int ei23charlen = (charlen); \ | |
867 | 1882 const Ibyte *ei23data = XSTRING_DATA (ei23lstr); \ |
771 | 1883 \ |
1884 int ei23oldbytelen = (ei)->bytelen_; \ | |
1885 \ | |
1886 eifixup_byte (ei23data, ei23off, ei23charoff); \ | |
1887 eifixup_bytechar (ei23data + ei23off, ei23len, ei23charlen); \ | |
1888 \ | |
1889 EI_ALLOC_AND_COPY (ei, ei23data + ei23off, ei23len, ei23charlen); \ | |
1890 } while (0) | |
1891 | |
826 | 1892 #define eicpy_raw_fmt(ei, ptr, len, fmt, object) \ |
771 | 1893 do { \ |
1333 | 1894 const Ibyte *ei12ptr = (ptr); \ |
771 | 1895 Internal_Format ei12fmt = (fmt); \ |
1896 int ei12len = (len); \ | |
1897 assert (ei12fmt == FORMAT_DEFAULT); \ | |
1898 EI_ALLOC_AND_COPY (ei, ei12ptr, ei12len, \ | |
1899 bytecount_to_charcount (ei12ptr, ei12len)); \ | |
1900 } while (0) | |
1901 | |
826 | 1902 #define eicpy_raw(ei, ptr, len) \ |
1903 eicpy_raw_fmt (ei, ptr, len, FORMAT_DEFAULT, Qnil) | |
1904 | |
1905 #define eicpy_rawz_fmt(ei, ptr, fmt, object) \ | |
1906 do { \ | |
867 | 1907 const Ibyte *ei12p1ptr = (ptr); \ |
826 | 1908 Internal_Format ei12p1fmt = (fmt); \ |
1909 assert (ei12p1fmt == FORMAT_DEFAULT); \ | |
1910 eicpy_raw_fmt (ei, ei12p1ptr, qxestrlen (ei12p1ptr), fmt, object); \ | |
771 | 1911 } while (0) |
1912 | |
826 | 1913 #define eicpy_rawz(ei, ptr) eicpy_rawz_fmt (ei, ptr, FORMAT_DEFAULT, Qnil) |
771 | 1914 |
1333 | 1915 #define eicpy_ch(ei, ch) \ |
1916 do { \ | |
867 | 1917 Ibyte ei12p2[MAX_ICHAR_LEN]; \ |
1918 Bytecount ei12p2len = set_itext_ichar (ei12p2, ch); \ | |
1333 | 1919 EI_ALLOC_AND_COPY (ei, ei12p2, ei12p2len, 1); \ |
771 | 1920 } while (0) |
1921 | |
2421 | 1922 #define eicpy_ascii(ei, ascstr) \ |
771 | 1923 do { \ |
2421 | 1924 const Ascbyte *ei4 = (ascstr); \ |
771 | 1925 \ |
2367 | 1926 ASSERT_ASCTEXT_ASCII (ei4); \ |
771 | 1927 eicpy_ext (ei, ei4, Qbinary); \ |
1928 } while (0) | |
1929 | |
2421 | 1930 #define eicpy_ascii_len(ei, ascstr, c_len) \ |
771 | 1931 do { \ |
2421 | 1932 const Ascbyte *ei6 = (ascstr); \ |
771 | 1933 int ei6len = (c_len); \ |
1934 \ | |
2367 | 1935 ASSERT_ASCTEXT_ASCII_LEN (ei6, ei6len); \ |
771 | 1936 eicpy_ext_len (ei, ei6, ei6len, Qbinary); \ |
1937 } while (0) | |
1938 | |
1318 | 1939 #define eicpy_ext_len(ei, extdata, extlen, codesys) \ |
771 | 1940 do { \ |
1941 const Extbyte *ei7 = (extdata); \ | |
1942 int ei7len = (extlen); \ | |
1943 \ | |
1318 | 1944 SIZED_EXTERNAL_TO_SIZED_C_STRING (ei7, ei7len, (ei)->data_, \ |
1945 (ei)->bytelen_, codesys); \ | |
771 | 1946 (ei)->max_size_allocated_ = (ei)->bytelen_ + 1; \ |
1947 (ei)->charlen_ = bytecount_to_charcount ((ei)->data_, (ei)->bytelen_); \ | |
1948 } while (0) | |
1949 | |
1318 | 1950 #define eicpy_ext(ei, extdata, codesys) \ |
1951 do { \ | |
1952 const Extbyte *ei8 = (extdata); \ | |
1953 \ | |
1954 eicpy_ext_len (ei, ei8, dfc_external_data_len (ei8, codesys), \ | |
1955 codesys); \ | |
771 | 1956 } while (0) |
1957 | |
1958 #define eicpy_lbuf(eistr, lisp_buf, off, charoff, len, charlen) \ | |
1959 NOT YET IMPLEMENTED | |
1960 | |
1961 #define eicpy_lstream(eistr, lstream) \ | |
1962 NOT YET IMPLEMENTED | |
1963 | |
867 | 1964 #define eireset(eistr) eicpy_rawz (eistr, (Ibyte *) "") |
771 | 1965 |
1966 /* ----- Getting the data out of the Eistring ----- */ | |
1967 | |
1968 #define eidata(ei) ((ei)->data_) | |
1969 | |
1970 #define eimake_string(ei) make_string (eidata (ei), eilen (ei)) | |
1971 | |
1972 #define eimake_string_off(eistr, off, charoff, len, charlen) \ | |
1973 do { \ | |
1974 Lisp_Object ei24lstr; \ | |
1975 int ei24off = (off); \ | |
1976 int ei24charoff = (charoff); \ | |
1977 int ei24len = (len); \ | |
1978 int ei24charlen = (charlen); \ | |
1979 \ | |
1980 eifixup_byte ((eistr)->data_, ei24off, ei24charoff); \ | |
1981 eifixup_byte ((eistr)->data_ + ei24off, ei24len, ei24charlen); \ | |
1982 \ | |
1983 return make_string ((eistr)->data_ + ei24off, ei24len); \ | |
1984 } while (0) | |
1985 | |
1986 #define eicpyout_alloca(eistr, ptrout, lenout) \ | |
826 | 1987 eicpyout_alloca_fmt (eistr, ptrout, lenout, FORMAT_DEFAULT, Qnil) |
771 | 1988 #define eicpyout_malloc(eistr, lenout) \ |
826 | 1989 eicpyout_malloc_fmt (eistr, lenout, FORMAT_DEFAULT, Qnil) |
867 | 1990 Ibyte *eicpyout_malloc_fmt (Eistring *eistr, Bytecount *len_out, |
826 | 1991 Internal_Format fmt, Lisp_Object object); |
1992 #define eicpyout_alloca_fmt(eistr, ptrout, lenout, fmt, object) \ | |
771 | 1993 do { \ |
1994 Internal_Format ei23fmt = (fmt); \ | |
867 | 1995 Ibyte *ei23ptrout = &(ptrout); \ |
771 | 1996 Bytecount *ei23lenout = &(lenout); \ |
1997 \ | |
1998 assert (ei23fmt == FORMAT_DEFAULT); \ | |
1999 \ | |
2000 *ei23lenout = (eistr)->bytelen_; \ | |
2367 | 2001 *ei23ptrout = alloca_ibytes ((eistr)->bytelen_ + 1); \ |
771 | 2002 memcpy (*ei23ptrout, (eistr)->data_, (eistr)->bytelen_ + 1); \ |
2003 } while (0) | |
2004 | |
2005 /* ----- Moving to the heap ----- */ | |
2006 | |
2007 #define eifree(ei) \ | |
2008 do { \ | |
2009 if ((ei)->mallocp_) \ | |
2010 { \ | |
2011 if ((ei)->data_) \ | |
1726 | 2012 xfree ((ei)->data_, Ibyte *); \ |
771 | 2013 if ((ei)->extdata_) \ |
1726 | 2014 xfree ((ei)->extdata_, Extbyte *); \ |
771 | 2015 eiinit_malloc (ei); \ |
2016 } \ | |
2017 else \ | |
2018 eiinit (ei); \ | |
2019 } while (0) | |
2020 | |
2021 int eifind_large_enough_buffer (int oldbufsize, int needed_size); | |
2022 void eito_malloc_1 (Eistring *ei); | |
2023 | |
2024 #define eito_malloc(ei) eito_malloc_1 (ei) | |
2025 | |
2026 #define eito_alloca(ei) \ | |
2027 do { \ | |
2028 if (!(ei)->mallocp_) \ | |
2029 return; \ | |
2030 (ei)->mallocp_ = 0; \ | |
2031 if ((ei)->data_) \ | |
2032 { \ | |
867 | 2033 Ibyte *ei13newdata; \ |
771 | 2034 \ |
2035 (ei)->max_size_allocated_ = \ | |
2036 eifind_large_enough_buffer (0, (ei)->bytelen_ + 1); \ | |
2367 | 2037 ei13newdata = alloca_ibytes ((ei)->max_size_allocated_); \ |
771 | 2038 memcpy (ei13newdata, (ei)->data_, (ei)->bytelen_ + 1); \ |
1726 | 2039 xfree ((ei)->data_, Ibyte *); \ |
771 | 2040 (ei)->data_ = ei13newdata; \ |
2041 } \ | |
2042 \ | |
2043 if ((ei)->extdata_) \ | |
2044 { \ | |
2367 | 2045 Extbyte *ei13newdata = alloca_extbytes ((ei)->extlen_ + 2); \ |
771 | 2046 \ |
2047 memcpy (ei13newdata, (ei)->extdata_, (ei)->extlen_); \ | |
2048 /* Double null-terminate in case of Unicode data */ \ | |
2049 ei13newdata[(ei)->extlen_] = '\0'; \ | |
2050 ei13newdata[(ei)->extlen_ + 1] = '\0'; \ | |
1726 | 2051 xfree ((ei)->extdata_, Extbyte *); \ |
771 | 2052 (ei)->extdata_ = ei13newdata; \ |
2053 } \ | |
2054 } while (0) | |
2055 | |
2056 | |
2057 /* ----- Retrieving the length ----- */ | |
2058 | |
2059 #define eilen(ei) ((ei)->bytelen_) | |
2060 #define eicharlen(ei) ((ei)->charlen_) | |
2061 | |
2062 | |
2063 /* ----- Working with positions ----- */ | |
2064 | |
2065 #define eicharpos_to_bytepos(ei, charpos) \ | |
2066 charcount_to_bytecount ((ei)->data_, charpos) | |
2067 #define eibytepos_to_charpos(ei, bytepos) \ | |
2068 bytecount_to_charcount ((ei)->data_, bytepos) | |
2069 | |
2070 DECLARE_INLINE_HEADER (Bytecount eiincpos_1 (Eistring *eistr, | |
2071 Bytecount bytepos, | |
2072 Charcount n)) | |
2073 { | |
867 | 2074 Ibyte *pos = eistr->data_ + bytepos; |
814 | 2075 Charcount i; |
771 | 2076 |
800 | 2077 text_checking_assert (bytepos >= 0 && bytepos <= eistr->bytelen_); |
2078 text_checking_assert (n >= 0 && n <= eistr->charlen_); | |
771 | 2079 /* We could check N more correctly now, but that would require a |
2080 call to bytecount_to_charcount(), which would be needlessly | |
2081 expensive (it would convert O(N) algorithms into O(N^2) algorithms | |
800 | 2082 with ERROR_CHECK_TEXT, which would be bad). If N is bad, we are |
867 | 2083 guaranteed to catch it either inside INC_IBYTEPTR() or in the check |
771 | 2084 below. */ |
2085 for (i = 0; i < n; i++) | |
867 | 2086 INC_IBYTEPTR (pos); |
800 | 2087 text_checking_assert (pos - eistr->data_ <= eistr->bytelen_); |
771 | 2088 return pos - eistr->data_; |
2089 } | |
2090 | |
2091 #define eiincpos (ei, bytepos) eiincpos_1 (ei, bytepos, 1) | |
2092 #define eiincpos_n (ei, bytepos, n) eiincpos_1 (ei, bytepos, n) | |
2093 | |
2094 DECLARE_INLINE_HEADER (Bytecount eidecpos_1 (Eistring *eistr, | |
2095 Bytecount bytepos, | |
2096 Charcount n)) | |
2097 { | |
867 | 2098 Ibyte *pos = eistr->data_ + bytepos; |
771 | 2099 int i; |
2100 | |
800 | 2101 text_checking_assert (bytepos >= 0 && bytepos <= eistr->bytelen_); |
2102 text_checking_assert (n >= 0 && n <= eistr->charlen_); | |
771 | 2103 /* We could check N more correctly now, but ... see above. */ |
2104 for (i = 0; i < n; i++) | |
867 | 2105 DEC_IBYTEPTR (pos); |
800 | 2106 text_checking_assert (pos - eistr->data_ <= eistr->bytelen_); |
771 | 2107 return pos - eistr->data_; |
2108 } | |
2109 | |
2110 #define eidecpos (ei, bytepos) eidecpos_1 (ei, bytepos, 1) | |
2111 #define eidecpos_n (ei, bytepos, n) eidecpos_1 (ei, bytepos, n) | |
2112 | |
2113 | |
2114 /* ----- Getting the character at a position ----- */ | |
2115 | |
2116 #define eigetch(ei, bytepos) \ | |
867 | 2117 itext_ichar ((ei)->data_ + (bytepos)) |
2118 #define eigetch_char(ei, charpos) itext_ichar_n ((ei)->data_, charpos) | |
771 | 2119 |
2120 | |
2121 /* ----- Setting the character at a position ----- */ | |
2122 | |
2123 #define eisetch(ei, bytepos, chr) \ | |
2124 eisub_ch (ei, bytepos, -1, -1, 1, chr) | |
2125 #define eisetch_char(ei, charpos, chr) \ | |
2126 eisub_ch (ei, -1, charpos, -1, 1, chr) | |
2127 | |
2128 | |
2129 /* ----- Concatenation ----- */ | |
2130 | |
2131 #define eicat_1(ei, data, bytelen, charlen) \ | |
2132 do { \ | |
2133 int ei14oldeibytelen = (ei)->bytelen_; \ | |
2134 int ei14bytelen = (bytelen); \ | |
2135 EI_ALLOC (ei, (ei)->bytelen_ + ei14bytelen, \ | |
2136 (ei)->charlen_ + (charlen), 1); \ | |
2137 memcpy ((ei)->data_ + ei14oldeibytelen, (data), \ | |
2138 ei14bytelen); \ | |
2139 } while (0) | |
2140 | |
2141 #define eicat_ei(ei, ei2) \ | |
2142 do { \ | |
2143 const Eistring *ei9 = (ei2); \ | |
2144 eicat_1 (ei, ei9->data_, ei9->bytelen_, ei9->charlen_); \ | |
2145 } while (0) | |
2146 | |
2421 | 2147 #define eicat_ascii(ei, ascstr) \ |
771 | 2148 do { \ |
2421 | 2149 const Ascbyte *ei15 = (ascstr); \ |
771 | 2150 int ei15len = strlen (ei15); \ |
2151 \ | |
2367 | 2152 ASSERT_ASCTEXT_ASCII_LEN (ei15, ei15len); \ |
771 | 2153 eicat_1 (ei, ei15, ei15len, \ |
867 | 2154 bytecount_to_charcount ((Ibyte *) ei15, ei15len)); \ |
771 | 2155 } while (0) |
2156 | |
2157 #define eicat_raw(ei, data, len) \ | |
2158 do { \ | |
2159 int ei16len = (len); \ | |
867 | 2160 const Ibyte *ei16data = (data); \ |
771 | 2161 eicat_1 (ei, ei16data, ei16len, \ |
2162 bytecount_to_charcount (ei16data, ei16len)); \ | |
2163 } while (0) | |
2164 | |
2165 #define eicat_rawz(ei, ptr) \ | |
2166 do { \ | |
867 | 2167 const Ibyte *ei16p5ptr = (ptr); \ |
771 | 2168 eicat_raw (ei, ei16p5ptr, qxestrlen (ei16p5ptr)); \ |
2169 } while (0) | |
2170 | |
2171 #define eicat_lstr(ei, lisp_string) \ | |
2172 do { \ | |
2173 Lisp_Object ei17 = (lisp_string); \ | |
2174 eicat_1 (ei, XSTRING_DATA (ei17), XSTRING_LENGTH (ei17), \ | |
826 | 2175 string_char_length (ei17)); \ |
771 | 2176 } while (0) |
2177 | |
2178 #define eicat_ch(ei, ch) \ | |
2179 do { \ | |
1333 | 2180 Ibyte ei22ch[MAX_ICHAR_LEN]; \ |
867 | 2181 Bytecount ei22len = set_itext_ichar (ei22ch, ch); \ |
771 | 2182 eicat_1 (ei, ei22ch, ei22len, 1); \ |
2183 } while (0) | |
2184 | |
2185 | |
2186 /* ----- Replacement ----- */ | |
2187 | |
2188 /* Replace the section of an Eistring at (OFF, LEN) with the data at | |
2189 SRC of length LEN. All positions have corresponding character values, | |
2190 and either can be -1 -- it will be computed from the other. */ | |
2191 | |
2192 #define eisub_1(ei, off, charoff, len, charlen, src, srclen, srccharlen) \ | |
2193 do { \ | |
2194 int ei18off = (off); \ | |
2195 int ei18charoff = (charoff); \ | |
2196 int ei18len = (len); \ | |
2197 int ei18charlen = (charlen); \ | |
867 | 2198 Ibyte *ei18src = (Ibyte *) (src); \ |
771 | 2199 int ei18srclen = (srclen); \ |
2200 int ei18srccharlen = (srccharlen); \ | |
2201 \ | |
2202 int ei18oldeibytelen = (ei)->bytelen_; \ | |
2203 \ | |
2204 eifixup_bytechar ((ei)->data_, ei18off, ei18charoff); \ | |
2205 eifixup_bytechar ((ei)->data_ + ei18off, ei18len, ei18charlen); \ | |
2206 eifixup_bytechar (ei18src, ei18srclen, ei18srccharlen); \ | |
2207 \ | |
2208 EI_ALLOC (ei, (ei)->bytelen_ + ei18srclen - ei18len, \ | |
2209 (ei)->charlen_ + ei18srccharlen - ei18charlen, 0); \ | |
2210 if (ei18len != ei18srclen) \ | |
2211 memmove ((ei)->data_ + ei18off + ei18srclen, \ | |
2212 (ei)->data_ + ei18off + ei18len, \ | |
2213 /* include zero terminator. */ \ | |
2214 ei18oldeibytelen - (ei18off + ei18len) + 1); \ | |
2215 if (ei18srclen > 0) \ | |
2216 memcpy ((ei)->data_ + ei18off, ei18src, ei18srclen); \ | |
2217 } while (0) | |
2218 | |
2219 #define eisub_ei(ei, off, charoff, len, charlen, ei2) \ | |
2220 do { \ | |
1333 | 2221 const Eistring *ei19 = (ei2); \ |
771 | 2222 eisub_1 (ei, off, charoff, len, charlen, ei19->data_, ei19->bytelen_, \ |
2223 ei19->charlen_); \ | |
2224 } while (0) | |
2225 | |
2421 | 2226 #define eisub_ascii(ei, off, charoff, len, charlen, ascstr) \ |
771 | 2227 do { \ |
2421 | 2228 const Ascbyte *ei20 = (ascstr); \ |
771 | 2229 int ei20len = strlen (ei20); \ |
2367 | 2230 ASSERT_ASCTEXT_ASCII_LEN (ei20, ei20len); \ |
771 | 2231 eisub_1 (ei, off, charoff, len, charlen, ei20, ei20len, -1); \ |
2232 } while (0) | |
2233 | |
2234 #define eisub_ch(ei, off, charoff, len, charlen, ch) \ | |
2235 do { \ | |
1333 | 2236 Ibyte ei21ch[MAX_ICHAR_LEN]; \ |
867 | 2237 Bytecount ei21len = set_itext_ichar (ei21ch, ch); \ |
771 | 2238 eisub_1 (ei, off, charoff, len, charlen, ei21ch, ei21len, 1); \ |
2239 } while (0) | |
2240 | |
2241 #define eidel(ei, off, charoff, len, charlen) \ | |
2242 eisub_1(ei, off, charoff, len, charlen, NULL, 0, 0) | |
2243 | |
2244 | |
2245 /* ----- Converting to an external format ----- */ | |
2246 | |
1333 | 2247 #define eito_external(ei, codesys) \ |
771 | 2248 do { \ |
2249 if ((ei)->mallocp_) \ | |
2250 { \ | |
2251 if ((ei)->extdata_) \ | |
2252 { \ | |
1726 | 2253 xfree ((ei)->extdata_, Extbyte *); \ |
771 | 2254 (ei)->extdata_ = 0; \ |
2255 } \ | |
2256 TO_EXTERNAL_FORMAT (DATA, ((ei)->data_, (ei)->bytelen_), \ | |
2257 MALLOC, ((ei)->extdata_, (ei)->extlen_), \ | |
1333 | 2258 codesys); \ |
771 | 2259 } \ |
2260 else \ | |
2261 TO_EXTERNAL_FORMAT (DATA, ((ei)->data_, (ei)->bytelen_), \ | |
2262 ALLOCA, ((ei)->extdata_, (ei)->extlen_), \ | |
1318 | 2263 codesys); \ |
771 | 2264 } while (0) |
2265 | |
2266 #define eiextdata(ei) ((ei)->extdata_) | |
2267 #define eiextlen(ei) ((ei)->extlen_) | |
2268 | |
2269 | |
2270 /* ----- Searching in the Eistring for a character ----- */ | |
2271 | |
2272 #define eichr(eistr, chr) \ | |
2273 NOT YET IMPLEMENTED | |
2274 #define eichr_char(eistr, chr) \ | |
2275 NOT YET IMPLEMENTED | |
2276 #define eichr_off(eistr, chr, off, charoff) \ | |
2277 NOT YET IMPLEMENTED | |
2278 #define eichr_off_char(eistr, chr, off, charoff) \ | |
2279 NOT YET IMPLEMENTED | |
2280 #define eirchr(eistr, chr) \ | |
2281 NOT YET IMPLEMENTED | |
2282 #define eirchr_char(eistr, chr) \ | |
2283 NOT YET IMPLEMENTED | |
2284 #define eirchr_off(eistr, chr, off, charoff) \ | |
2285 NOT YET IMPLEMENTED | |
2286 #define eirchr_off_char(eistr, chr, off, charoff) \ | |
2287 NOT YET IMPLEMENTED | |
2288 | |
2289 | |
2290 /* ----- Searching in the Eistring for a string ----- */ | |
2291 | |
2292 #define eistr_ei(eistr, eistr2) \ | |
2293 NOT YET IMPLEMENTED | |
2294 #define eistr_ei_char(eistr, eistr2) \ | |
2295 NOT YET IMPLEMENTED | |
2296 #define eistr_ei_off(eistr, eistr2, off, charoff) \ | |
2297 NOT YET IMPLEMENTED | |
2298 #define eistr_ei_off_char(eistr, eistr2, off, charoff) \ | |
2299 NOT YET IMPLEMENTED | |
2300 #define eirstr_ei(eistr, eistr2) \ | |
2301 NOT YET IMPLEMENTED | |
2302 #define eirstr_ei_char(eistr, eistr2) \ | |
2303 NOT YET IMPLEMENTED | |
2304 #define eirstr_ei_off(eistr, eistr2, off, charoff) \ | |
2305 NOT YET IMPLEMENTED | |
2306 #define eirstr_ei_off_char(eistr, eistr2, off, charoff) \ | |
2307 NOT YET IMPLEMENTED | |
2308 | |
2421 | 2309 #define eistr_ascii(eistr, ascstr) \ |
771 | 2310 NOT YET IMPLEMENTED |
2421 | 2311 #define eistr_ascii_char(eistr, ascstr) \ |
771 | 2312 NOT YET IMPLEMENTED |
2421 | 2313 #define eistr_ascii_off(eistr, ascstr, off, charoff) \ |
771 | 2314 NOT YET IMPLEMENTED |
2421 | 2315 #define eistr_ascii_off_char(eistr, ascstr, off, charoff) \ |
771 | 2316 NOT YET IMPLEMENTED |
2421 | 2317 #define eirstr_ascii(eistr, ascstr) \ |
771 | 2318 NOT YET IMPLEMENTED |
2421 | 2319 #define eirstr_ascii_char(eistr, ascstr) \ |
771 | 2320 NOT YET IMPLEMENTED |
2421 | 2321 #define eirstr_ascii_off(eistr, ascstr, off, charoff) \ |
771 | 2322 NOT YET IMPLEMENTED |
2421 | 2323 #define eirstr_ascii_off_char(eistr, ascstr, off, charoff) \ |
771 | 2324 NOT YET IMPLEMENTED |
2325 | |
2326 | |
2327 /* ----- Comparison ----- */ | |
2328 | |
2329 int eicmp_1 (Eistring *ei, Bytecount off, Charcount charoff, | |
867 | 2330 Bytecount len, Charcount charlen, const Ibyte *data, |
2526 | 2331 const Eistring *ei2, int is_ascii, int fold_case); |
771 | 2332 |
2333 #define eicmp_ei(eistr, eistr2) \ | |
2334 eicmp_1 (eistr, 0, -1, -1, -1, 0, eistr2, 0, 0) | |
2335 #define eicmp_off_ei(eistr, off, charoff, len, charlen, eistr2) \ | |
2336 eicmp_1 (eistr, off, charoff, len, charlen, 0, eistr2, 0, 0) | |
2337 #define eicasecmp_ei(eistr, eistr2) \ | |
2338 eicmp_1 (eistr, 0, -1, -1, -1, 0, eistr2, 0, 1) | |
2339 #define eicasecmp_off_ei(eistr, off, charoff, len, charlen, eistr2) \ | |
2340 eicmp_1 (eistr, off, charoff, len, charlen, 0, eistr2, 0, 1) | |
2341 #define eicasecmp_i18n_ei(eistr, eistr2) \ | |
2342 eicmp_1 (eistr, 0, -1, -1, -1, 0, eistr2, 0, 2) | |
2343 #define eicasecmp_i18n_off_ei(eistr, off, charoff, len, charlen, eistr2) \ | |
2344 eicmp_1 (eistr, off, charoff, len, charlen, 0, eistr2, 0, 2) | |
2345 | |
2421 | 2346 #define eicmp_ascii(eistr, ascstr) \ |
2347 eicmp_1 (eistr, 0, -1, -1, -1, (const Ibyte *) ascstr, 0, 1, 0) | |
2348 #define eicmp_off_ascii(eistr, off, charoff, len, charlen, ascstr) \ | |
2349 eicmp_1 (eistr, off, charoff, len, charlen, (const Ibyte *) ascstr, 0, 1, 0) | |
2350 #define eicasecmp_ascii(eistr, ascstr) \ | |
2351 eicmp_1 (eistr, 0, -1, -1, -1, (const Ibyte *) ascstr, 0, 1, 1) | |
2352 #define eicasecmp_off_ascii(eistr, off, charoff, len, charlen, ascstr) \ | |
2353 eicmp_1 (eistr, off, charoff, len, charlen, (const Ibyte *) ascstr, 0, 1, 1) | |
2354 #define eicasecmp_i18n_ascii(eistr, ascstr) \ | |
2355 eicmp_1 (eistr, 0, -1, -1, -1, (const Ibyte *) ascstr, 0, 1, 2) | |
2356 #define eicasecmp_i18n_off_ascii(eistr, off, charoff, len, charlen, ascstr) \ | |
2357 eicmp_1 (eistr, off, charoff, len, charlen, (const Ibyte *) ascstr, 0, 1, 2) | |
771 | 2358 |
2359 | |
2360 /* ----- Case-changing the Eistring ----- */ | |
2361 | |
867 | 2362 int eistr_casefiddle_1 (Ibyte *olddata, Bytecount len, Ibyte *newdata, |
771 | 2363 int downp); |
2364 | |
2365 #define EI_CASECHANGE(ei, downp) \ | |
2366 do { \ | |
867 | 2367 int ei11new_allocmax = (ei)->charlen_ * MAX_ICHAR_LEN + 1; \ |
1333 | 2368 Ibyte *ei11storage = \ |
2367 | 2369 (Ibyte *) alloca_ibytes (ei11new_allocmax); \ |
771 | 2370 int ei11newlen = eistr_casefiddle_1 ((ei)->data_, (ei)->bytelen_, \ |
2371 ei11storage, downp); \ | |
2372 \ | |
2373 if (ei11newlen) \ | |
2374 { \ | |
2375 (ei)->max_size_allocated_ = ei11new_allocmax; \ | |
1333 | 2376 (ei)->data_ = ei11storage; \ |
771 | 2377 (ei)->bytelen_ = ei11newlen; \ |
2378 /* charlen is the same. */ \ | |
2379 } \ | |
2380 } while (0) | |
2381 | |
2382 #define eilwr(ei) EI_CASECHANGE (ei, 1) | |
2383 #define eiupr(ei) EI_CASECHANGE (ei, 0) | |
2384 | |
1743 | 2385 END_C_DECLS |
1650 | 2386 |
771 | 2387 |
2388 /************************************************************************/ | |
2389 /* */ | |
2390 /* Converting between internal and external format */ | |
2391 /* */ | |
2392 /************************************************************************/ | |
2393 /* | |
1318 | 2394 The macros below are used for converting data between different formats. |
2395 Generally, the data is textual, and the formats are related to | |
2396 internationalization (e.g. converting between internal-format text and | |
2397 UTF-8) -- but the mechanism is general, and could be used for anything, | |
2398 e.g. decoding gzipped data. | |
2399 | |
2400 In general, conversion involves a source of data, a sink, the existing | |
2401 format of the source data, and the desired format of the sink. The | |
2402 macros below, however, always require that either the source or sink is | |
2403 internal-format text. Therefore, in practice the conversions below | |
2404 involve source, sink, an external format (specified by a coding system), | |
2405 and the direction of conversion (internal->external or vice-versa). | |
2406 | |
2407 Sources and sinks can be raw data (sized or unsized -- when unsized, | |
2408 input data is assumed to be null-terminated [double null-terminated for | |
2409 Unicode-format data], and on output the length is not stored anywhere), | |
2410 Lisp strings, Lisp buffers, lstreams, and opaque data objects. When the | |
2411 output is raw data, the result can be allocated either with alloca() or | |
2412 malloc(). (There is currently no provision for writing into a fixed | |
2413 buffer. If you want this, use alloca() output and then copy the data -- | |
2414 but be careful with the size! Unless you are very sure of the encoding | |
2415 being used, upper bounds for the size are not in general computable.) | |
2416 The obvious restrictions on source and sink types apply (e.g. Lisp | |
2417 strings are a source and sink only for internal data). | |
2418 | |
2419 All raw data outputted will contain an extra null byte (two bytes for | |
2420 Unicode -- currently, in fact, all output data, whether internal or | |
2421 external, is double-null-terminated, but you can't count on this; see | |
2422 below). This means that enough space is allocated to contain the extra | |
2423 nulls; however, these nulls are not reflected in the returned output | |
2424 size. | |
2425 | |
2426 The most basic macros are TO_EXTERNAL_FORMAT and TO_INTERNAL_FORMAT. | |
2427 These can be used to convert between any kinds of sources or sinks. | |
2428 However, 99% of conversions involve raw data or Lisp strings as both | |
2429 source and sink, and usually data is output as alloca() rather than | |
2430 malloc(). For this reason, convenience macros are defined for many types | |
2431 of conversions involving raw data and/or Lisp strings, especially when | |
2432 the output is an alloca()ed string. (When the destination is a | |
2433 Lisp_String, there are other functions that should be used instead -- | |
2434 build_ext_string() and make_ext_string(), for example.) The convenience | |
2435 macros are of two types -- the older kind that store the result into a | |
2436 specified variable, and the newer kind that return the result. The newer | |
2437 kind of macros don't exist when the output is sized data, because that | |
2438 would have two return values. NOTE: All convenience macros are | |
2439 ultimately defined in terms of TO_EXTERNAL_FORMAT and TO_INTERNAL_FORMAT. | |
2440 Thus, any comments below about the workings of these macros also apply to | |
2441 all convenience macros. | |
2442 | |
2443 TO_EXTERNAL_FORMAT (source_type, source, sink_type, sink, codesys) | |
2444 TO_INTERNAL_FORMAT (source_type, source, sink_type, sink, codesys) | |
771 | 2445 |
2446 Typical use is | |
2447 | |
2367 | 2448 TO_EXTERNAL_FORMAT (LISP_STRING, str, C_STRING_MALLOC, ptr, Qfile_name); |
2449 | |
2450 which means that the contents of the lisp string `str' are written | |
2451 to a malloc'ed memory area which will be pointed to by `ptr', after the | |
2452 function returns. The conversion will be done using the `file-name' | |
2453 coding system (which will be controlled by the user indirectly by | |
2454 setting or binding the variable `file-name-coding-system'). | |
2455 | |
2456 Some sources and sinks require two C variables to specify. We use | |
2457 some preprocessor magic to allow different source and sink types, and | |
2458 even different numbers of arguments to specify different types of | |
2459 sources and sinks. | |
2460 | |
2461 So we can have a call that looks like | |
2462 | |
2463 TO_INTERNAL_FORMAT (DATA, (ptr, len), | |
2464 MALLOC, (ptr, len), | |
2465 coding_system); | |
2466 | |
2467 The parenthesized argument pairs are required to make the | |
2468 preprocessor magic work. | |
771 | 2469 |
2470 NOTE: GC is inhibited during the entire operation of these macros. This | |
2471 is because frequently the data to be converted comes from strings but | |
2472 gets passed in as just DATA, and GC may move around the string data. If | |
2473 we didn't inhibit GC, there'd have to be a lot of messy recoding, | |
2474 alloca-copying of strings and other annoying stuff. | |
2475 | |
2476 The source or sink can be specified in one of these ways: | |
2477 | |
2478 DATA, (ptr, len), // input data is a fixed buffer of size len | |
851 | 2479 ALLOCA, (ptr, len), // output data is in a ALLOCA()ed buffer of size len |
771 | 2480 MALLOC, (ptr, len), // output data is in a malloc()ed buffer of size len |
2481 C_STRING_ALLOCA, ptr, // equivalent to ALLOCA (ptr, len_ignored) on output | |
2482 C_STRING_MALLOC, ptr, // equivalent to MALLOC (ptr, len_ignored) on output | |
2483 C_STRING, ptr, // equivalent to DATA, (ptr, strlen/wcslen (ptr)) | |
2484 // on input (the Unicode version is used when correct) | |
2485 LISP_STRING, string, // input or output is a Lisp_Object of type string | |
2486 LISP_BUFFER, buffer, // output is written to (point) in lisp buffer | |
2487 LISP_LSTREAM, lstream, // input or output is a Lisp_Object of type lstream | |
2488 LISP_OPAQUE, object, // input or output is a Lisp_Object of type opaque | |
2489 | |
2490 When specifying the sink, use lvalues, since the macro will assign to them, | |
2491 except when the sink is an lstream or a lisp buffer. | |
2492 | |
2367 | 2493 For the sink types `ALLOCA' and `C_STRING_ALLOCA', the resulting text is |
2494 stored in a stack-allocated buffer, which is automatically freed on | |
2495 returning from the function. However, the sink types `MALLOC' and | |
2496 `C_STRING_MALLOC' return `xmalloc()'ed memory. The caller is responsible | |
2497 for freeing this memory using `xfree()'. | |
2498 | |
771 | 2499 The macros accept the kinds of sources and sinks appropriate for |
2500 internal and external data representation. See the type_checking_assert | |
2501 macros below for the actual allowed types. | |
2502 | |
2503 Since some sources and sinks use one argument (a Lisp_Object) to | |
2504 specify them, while others take a (pointer, length) pair, we use | |
2505 some C preprocessor trickery to allow pair arguments to be specified | |
2506 by parenthesizing them, as in the examples above. | |
2507 | |
2508 Anything prefixed by dfc_ (`data format conversion') is private. | |
2509 They are only used to implement these macros. | |
2510 | |
2511 [[Using C_STRING* is appropriate for using with external APIs that | |
2512 take null-terminated strings. For internal data, we should try to | |
2513 be '\0'-clean - i.e. allow arbitrary data to contain embedded '\0'. | |
2514 | |
2515 Sometime in the future we might allow output to C_STRING_ALLOCA or | |
2516 C_STRING_MALLOC _only_ with TO_EXTERNAL_FORMAT(), not | |
2517 TO_INTERNAL_FORMAT().]] | |
2518 | |
2519 The above comments are not true. Frequently (most of the time, in | |
2520 fact), external strings come as zero-terminated entities, where the | |
2521 zero-termination is the only way to find out the length. Even in | |
2522 cases where you can get the length, most of the time the system will | |
2523 still use the null to signal the end of the string, and there will | |
2524 still be no way to either send in or receive a string with embedded | |
2525 nulls. In such situations, it's pointless to track the length | |
2526 because null bytes can never be in the string. We have a lot of | |
2527 operations that make it easy to operate on zero-terminated strings, | |
2528 and forcing the user the deal with the length everywhere would only | |
2529 make the code uglier and more complicated, for no gain. --ben | |
2530 | |
2531 There is no problem using the same lvalue for source and sink. | |
2532 | |
2533 Also, when pointers are required, the code (currently at least) is | |
2534 lax and allows any pointer types, either in the source or the sink. | |
2535 This makes it possible, e.g., to deal with internal format data held | |
2536 in char *'s or external format data held in WCHAR * (i.e. Unicode). | |
2537 | |
2538 Finally, whenever storage allocation is called for, extra space is | |
2539 allocated for a terminating zero, and such a zero is stored in the | |
2540 appropriate place, regardless of whether the source data was | |
2541 specified using a length or was specified as zero-terminated. This | |
2542 allows you to freely pass the resulting data, no matter how | |
2543 obtained, to a routine that expects zero termination (modulo, of | |
2544 course, that any embedded zeros in the resulting text will cause | |
2545 truncation). In fact, currently two embedded zeros are allocated | |
2546 and stored after the data result. This is to allow for the | |
2547 possibility of storing a Unicode value on output, which needs the | |
2548 two zeros. Currently, however, the two zeros are stored regardless | |
2549 of whether the conversion is internal or external and regardless of | |
2550 whether the external coding system is in fact Unicode. This | |
2551 behavior may change in the future, and you cannot rely on this -- | |
2552 the most you can rely on is that sink data in Unicode format will | |
2553 have two terminating nulls, which combine to form one Unicode null | |
2367 | 2554 character. |
2555 | |
2556 NOTE: You might ask, why are these not written as functions that | |
2557 *RETURN* the converted string, since that would allow them to be used | |
2558 much more conveniently, without having to constantly declare temporary | |
2559 variables? The answer is that in fact I originally did write the | |
2560 routines that way, but that required either | |
2561 | |
2562 (a) calling alloca() inside of a function call, or | |
2563 (b) using expressions separated by commas and a global temporary variable, or | |
2564 (c) using the GCC extension ({ ... }). | |
2565 | |
2566 Turned out that all of the above had bugs, all caused by GCC (hence the | |
2567 comments about "those GCC wankers" and "ream gcc up the ass"). As for | |
2568 (a), some versions of GCC (especially on Intel platforms), which had | |
2569 buggy implementations of alloca() that couldn't handle being called | |
2570 inside of a function call -- they just decremented the stack right in the | |
2571 middle of pushing args. Oops, crash with stack trashing, very bad. (b) | |
2572 was an attempt to fix (a), and that led to further GCC crashes, esp. when | |
2573 you had two such calls in a single subexpression, because GCC couldn't be | |
2574 counted upon to follow even a minimally reasonable order of execution. | |
2575 True, you can't count on one argument being evaluated before another, but | |
2576 GCC would actually interleave them so that the temp var got stomped on by | |
2577 one while the other was accessing it. So I tried (c), which was | |
2578 problematic because that GCC extension has more bugs in it than a | |
2579 termite's nest. | |
2580 | |
2581 So reluctantly I converted to the current way. Now, that was awhile ago | |
2582 (c. 1994), and it appears that the bug involving alloca in function calls | |
2583 has long since been fixed. More recently, I defined the new-dfc routines | |
2584 down below, which DO allow exactly such convenience of returning your | |
2585 args rather than store them in temp variables, and I also wrote a | |
2586 configure check to see whether alloca() causes crashes inside of function | |
2587 calls, and if so use the portable alloca() implementation in alloca.c. | |
2588 If you define TEST_NEW_DFC, the old routines get written in terms of the | |
2589 new ones, and I've had a beta put out with this on and it appeared to | |
2590 this appears to cause no problems -- so we should consider | |
2591 switching, and feel no compunctions about writing further such function- | |
2592 like alloca() routines in lieu of statement-like ones. --ben */ | |
771 | 2593 |
2594 #define TO_EXTERNAL_FORMAT(source_type, source, sink_type, sink, codesys) \ | |
2595 do { \ | |
2596 dfc_conversion_type dfc_simplified_source_type; \ | |
2597 dfc_conversion_type dfc_simplified_sink_type; \ | |
2598 dfc_conversion_data dfc_source; \ | |
2599 dfc_conversion_data dfc_sink; \ | |
2600 Lisp_Object dfc_codesys = (codesys); \ | |
2601 \ | |
2602 type_checking_assert \ | |
2603 ((DFC_TYPE_##source_type == DFC_TYPE_DATA || \ | |
2604 DFC_TYPE_##source_type == DFC_TYPE_C_STRING || \ | |
2605 DFC_TYPE_##source_type == DFC_TYPE_LISP_STRING || \ | |
2606 DFC_TYPE_##source_type == DFC_TYPE_LISP_OPAQUE || \ | |
2607 DFC_TYPE_##source_type == DFC_TYPE_LISP_LSTREAM) \ | |
2608 && \ | |
2609 (DFC_TYPE_##sink_type == DFC_TYPE_ALLOCA || \ | |
2610 DFC_TYPE_##sink_type == DFC_TYPE_MALLOC || \ | |
2611 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_ALLOCA || \ | |
2612 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_MALLOC || \ | |
2613 DFC_TYPE_##sink_type == DFC_TYPE_LISP_LSTREAM || \ | |
2614 DFC_TYPE_##sink_type == DFC_TYPE_LISP_OPAQUE)); \ | |
2615 \ | |
2616 DFC_EXT_SOURCE_##source_type##_TO_ARGS (source, dfc_codesys); \ | |
2617 DFC_SINK_##sink_type##_TO_ARGS (sink); \ | |
2618 \ | |
2619 dfc_convert_to_external_format (dfc_simplified_source_type, &dfc_source, \ | |
2620 dfc_codesys, \ | |
2621 dfc_simplified_sink_type, &dfc_sink); \ | |
2622 \ | |
2623 DFC_##sink_type##_USE_CONVERTED_DATA (sink); \ | |
2624 } while (0) | |
2625 | |
2626 #define TO_INTERNAL_FORMAT(source_type, source, sink_type, sink, codesys) \ | |
2627 do { \ | |
2628 dfc_conversion_type dfc_simplified_source_type; \ | |
2629 dfc_conversion_type dfc_simplified_sink_type; \ | |
2630 dfc_conversion_data dfc_source; \ | |
2631 dfc_conversion_data dfc_sink; \ | |
2632 Lisp_Object dfc_codesys = (codesys); \ | |
2633 \ | |
2634 type_checking_assert \ | |
2635 ((DFC_TYPE_##source_type == DFC_TYPE_DATA || \ | |
2636 DFC_TYPE_##source_type == DFC_TYPE_C_STRING || \ | |
2637 DFC_TYPE_##source_type == DFC_TYPE_LISP_OPAQUE || \ | |
2638 DFC_TYPE_##source_type == DFC_TYPE_LISP_LSTREAM) \ | |
2639 && \ | |
2640 (DFC_TYPE_##sink_type == DFC_TYPE_ALLOCA || \ | |
2641 DFC_TYPE_##sink_type == DFC_TYPE_MALLOC || \ | |
2642 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_ALLOCA || \ | |
2643 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_MALLOC || \ | |
2644 DFC_TYPE_##sink_type == DFC_TYPE_LISP_STRING || \ | |
2645 DFC_TYPE_##sink_type == DFC_TYPE_LISP_LSTREAM || \ | |
2646 DFC_TYPE_##sink_type == DFC_TYPE_LISP_BUFFER)); \ | |
2647 \ | |
2648 DFC_INT_SOURCE_##source_type##_TO_ARGS (source, dfc_codesys); \ | |
2649 DFC_SINK_##sink_type##_TO_ARGS (sink); \ | |
2650 \ | |
2651 dfc_convert_to_internal_format (dfc_simplified_source_type, &dfc_source, \ | |
2652 dfc_codesys, \ | |
2653 dfc_simplified_sink_type, &dfc_sink); \ | |
2654 \ | |
2655 DFC_##sink_type##_USE_CONVERTED_DATA (sink); \ | |
2656 } while (0) | |
2657 | |
814 | 2658 #ifdef __cplusplus |
771 | 2659 |
814 | 2660 /* Error if you try to use a union here: "member `struct {anonymous |
2661 union}::{anonymous} {anonymous union}::data' with constructor not allowed | |
2662 in union" (Bytecount is a class) */ | |
2663 | |
2664 typedef struct | |
2665 #else | |
771 | 2666 typedef union |
814 | 2667 #endif |
771 | 2668 { |
2669 struct { const void *ptr; Bytecount len; } data; | |
2670 Lisp_Object lisp_object; | |
2671 } dfc_conversion_data; | |
2672 | |
2673 enum dfc_conversion_type | |
2674 { | |
2675 DFC_TYPE_DATA, | |
2676 DFC_TYPE_ALLOCA, | |
2677 DFC_TYPE_MALLOC, | |
2678 DFC_TYPE_C_STRING, | |
2679 DFC_TYPE_C_STRING_ALLOCA, | |
2680 DFC_TYPE_C_STRING_MALLOC, | |
2681 DFC_TYPE_LISP_STRING, | |
2682 DFC_TYPE_LISP_LSTREAM, | |
2683 DFC_TYPE_LISP_OPAQUE, | |
2684 DFC_TYPE_LISP_BUFFER | |
2685 }; | |
2686 typedef enum dfc_conversion_type dfc_conversion_type; | |
2687 | |
1743 | 2688 BEGIN_C_DECLS |
1650 | 2689 |
771 | 2690 /* WARNING: These use a static buffer. This can lead to disaster if |
2691 these functions are not used *very* carefully. Another reason to only use | |
2692 TO_EXTERNAL_FORMAT() and TO_INTERNAL_FORMAT(). */ | |
1632 | 2693 MODULE_API void |
771 | 2694 dfc_convert_to_external_format (dfc_conversion_type source_type, |
2695 dfc_conversion_data *source, | |
1318 | 2696 Lisp_Object codesys, |
771 | 2697 dfc_conversion_type sink_type, |
2698 dfc_conversion_data *sink); | |
1632 | 2699 MODULE_API void |
771 | 2700 dfc_convert_to_internal_format (dfc_conversion_type source_type, |
2701 dfc_conversion_data *source, | |
1318 | 2702 Lisp_Object codesys, |
771 | 2703 dfc_conversion_type sink_type, |
2704 dfc_conversion_data *sink); | |
2705 /* CPP Trickery */ | |
2706 #define DFC_CPP_CAR(x,y) (x) | |
2707 #define DFC_CPP_CDR(x,y) (y) | |
2708 | |
2709 /* Convert `source' to args for dfc_convert_to_external_format() */ | |
2710 #define DFC_EXT_SOURCE_DATA_TO_ARGS(val, codesys) do { \ | |
2711 dfc_source.data.ptr = DFC_CPP_CAR val; \ | |
2712 dfc_source.data.len = DFC_CPP_CDR val; \ | |
2713 dfc_simplified_source_type = DFC_TYPE_DATA; \ | |
2714 } while (0) | |
2715 #define DFC_EXT_SOURCE_C_STRING_TO_ARGS(val, codesys) do { \ | |
2716 dfc_source.data.len = \ | |
2717 strlen ((char *) (dfc_source.data.ptr = (val))); \ | |
2718 dfc_simplified_source_type = DFC_TYPE_DATA; \ | |
2719 } while (0) | |
2720 #define DFC_EXT_SOURCE_LISP_STRING_TO_ARGS(val, codesys) do { \ | |
2721 Lisp_Object dfc_slsta = (val); \ | |
2722 type_checking_assert (STRINGP (dfc_slsta)); \ | |
2723 dfc_source.lisp_object = dfc_slsta; \ | |
2724 dfc_simplified_source_type = DFC_TYPE_LISP_STRING; \ | |
2725 } while (0) | |
2726 #define DFC_EXT_SOURCE_LISP_LSTREAM_TO_ARGS(val, codesys) do { \ | |
2727 Lisp_Object dfc_sllta = (val); \ | |
2728 type_checking_assert (LSTREAMP (dfc_sllta)); \ | |
2729 dfc_source.lisp_object = dfc_sllta; \ | |
2730 dfc_simplified_source_type = DFC_TYPE_LISP_LSTREAM; \ | |
2731 } while (0) | |
2732 #define DFC_EXT_SOURCE_LISP_OPAQUE_TO_ARGS(val, codesys) do { \ | |
2733 Lisp_Opaque *dfc_slota = XOPAQUE (val); \ | |
2734 dfc_source.data.ptr = OPAQUE_DATA (dfc_slota); \ | |
2735 dfc_source.data.len = OPAQUE_SIZE (dfc_slota); \ | |
2736 dfc_simplified_source_type = DFC_TYPE_DATA; \ | |
2737 } while (0) | |
2738 | |
2739 /* Convert `source' to args for dfc_convert_to_internal_format() */ | |
2740 #define DFC_INT_SOURCE_DATA_TO_ARGS(val, codesys) \ | |
2741 DFC_EXT_SOURCE_DATA_TO_ARGS (val, codesys) | |
2742 #define DFC_INT_SOURCE_C_STRING_TO_ARGS(val, codesys) do { \ | |
2743 dfc_source.data.len = dfc_external_data_len (dfc_source.data.ptr = (val), \ | |
2744 codesys); \ | |
2745 dfc_simplified_source_type = DFC_TYPE_DATA; \ | |
2746 } while (0) | |
2747 #define DFC_INT_SOURCE_LISP_STRING_TO_ARGS(val, codesys) \ | |
2748 DFC_EXT_SOURCE_LISP_STRING_TO_ARGS (val, codesys) | |
2749 #define DFC_INT_SOURCE_LISP_LSTREAM_TO_ARGS(val, codesys) \ | |
2750 DFC_EXT_SOURCE_LISP_LSTREAM_TO_ARGS (val, codesys) | |
2751 #define DFC_INT_SOURCE_LISP_OPAQUE_TO_ARGS(val, codesys) \ | |
2752 DFC_EXT_SOURCE_LISP_OPAQUE_TO_ARGS (val, codesys) | |
2753 | |
2754 /* Convert `sink' to args for dfc_convert_to_*_format() */ | |
2755 #define DFC_SINK_ALLOCA_TO_ARGS(val) \ | |
2756 dfc_simplified_sink_type = DFC_TYPE_DATA | |
2757 #define DFC_SINK_C_STRING_ALLOCA_TO_ARGS(val) \ | |
2758 dfc_simplified_sink_type = DFC_TYPE_DATA | |
2759 #define DFC_SINK_MALLOC_TO_ARGS(val) \ | |
2760 dfc_simplified_sink_type = DFC_TYPE_DATA | |
2761 #define DFC_SINK_C_STRING_MALLOC_TO_ARGS(val) \ | |
2762 dfc_simplified_sink_type = DFC_TYPE_DATA | |
2763 #define DFC_SINK_LISP_STRING_TO_ARGS(val) \ | |
2764 dfc_simplified_sink_type = DFC_TYPE_DATA | |
2765 #define DFC_SINK_LISP_OPAQUE_TO_ARGS(val) \ | |
2766 dfc_simplified_sink_type = DFC_TYPE_DATA | |
2767 #define DFC_SINK_LISP_LSTREAM_TO_ARGS(val) do { \ | |
2768 Lisp_Object dfc_sllta = (val); \ | |
2769 type_checking_assert (LSTREAMP (dfc_sllta)); \ | |
2770 dfc_sink.lisp_object = dfc_sllta; \ | |
2771 dfc_simplified_sink_type = DFC_TYPE_LISP_LSTREAM; \ | |
2772 } while (0) | |
2773 #define DFC_SINK_LISP_BUFFER_TO_ARGS(val) do { \ | |
2774 struct buffer *dfc_slbta = XBUFFER (val); \ | |
2775 dfc_sink.lisp_object = \ | |
2776 make_lisp_buffer_output_stream \ | |
2777 (dfc_slbta, BUF_PT (dfc_slbta), 0); \ | |
2778 dfc_simplified_sink_type = DFC_TYPE_LISP_LSTREAM; \ | |
2779 } while (0) | |
2780 | |
2781 /* Assign to the `sink' lvalue(s) using the converted data. */ | |
2782 /* + 2 because we double zero-extended to account for Unicode conversion */ | |
2783 typedef union { char c; void *p; } *dfc_aliasing_voidpp; | |
2784 #define DFC_ALLOCA_USE_CONVERTED_DATA(sink) do { \ | |
851 | 2785 void * dfc_sink_ret = ALLOCA (dfc_sink.data.len + 2); \ |
771 | 2786 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2); \ |
2367 | 2787 VOIDP_CAST (DFC_CPP_CAR sink) = dfc_sink_ret; \ |
771 | 2788 (DFC_CPP_CDR sink) = dfc_sink.data.len; \ |
2789 } while (0) | |
2790 #define DFC_MALLOC_USE_CONVERTED_DATA(sink) do { \ | |
2791 void * dfc_sink_ret = xmalloc (dfc_sink.data.len + 2); \ | |
2792 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2); \ | |
2367 | 2793 VOIDP_CAST (DFC_CPP_CAR sink) = dfc_sink_ret; \ |
771 | 2794 (DFC_CPP_CDR sink) = dfc_sink.data.len; \ |
2795 } while (0) | |
2796 #define DFC_C_STRING_ALLOCA_USE_CONVERTED_DATA(sink) do { \ | |
851 | 2797 void * dfc_sink_ret = ALLOCA (dfc_sink.data.len + 2); \ |
771 | 2798 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2); \ |
2367 | 2799 VOIDP_CAST (sink) = dfc_sink_ret; \ |
771 | 2800 } while (0) |
2801 #define DFC_C_STRING_MALLOC_USE_CONVERTED_DATA(sink) do { \ | |
2802 void * dfc_sink_ret = xmalloc (dfc_sink.data.len + 2); \ | |
2803 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2); \ | |
2367 | 2804 VOIDP_CAST (sink) = dfc_sink_ret; \ |
771 | 2805 } while (0) |
2806 #define DFC_LISP_STRING_USE_CONVERTED_DATA(sink) \ | |
867 | 2807 sink = make_string ((Ibyte *) dfc_sink.data.ptr, dfc_sink.data.len) |
771 | 2808 #define DFC_LISP_OPAQUE_USE_CONVERTED_DATA(sink) \ |
2809 sink = make_opaque (dfc_sink.data.ptr, dfc_sink.data.len) | |
2810 #define DFC_LISP_LSTREAM_USE_CONVERTED_DATA(sink) /* data already used */ | |
2811 #define DFC_LISP_BUFFER_USE_CONVERTED_DATA(sink) \ | |
2812 Lstream_delete (XLSTREAM (dfc_sink.lisp_object)) | |
2813 | |
1318 | 2814 /* #define TEST_NEW_DFC */ |
2815 | |
771 | 2816 /* Convenience macros for extremely common invocations */ |
1318 | 2817 #ifdef TEST_NEW_DFC |
2818 #define C_STRING_TO_EXTERNAL(in, out, codesys) \ | |
2819 do { * (Extbyte **) &(out) = \ | |
2820 NEW_C_STRING_TO_EXTERNAL (in, codesys); } while (0) | |
2821 #define SIZED_C_STRING_TO_EXTERNAL(in, inlen, out, codesys) \ | |
2822 do { * (Extbyte **) &(out) = \ | |
2823 NEW_SIZED_C_STRING_TO_EXTERNAL (in, inlen, codesys); } while (0) | |
2824 #define EXTERNAL_TO_C_STRING(in, out, codesys) \ | |
2825 do { * (Ibyte **) &(out) = \ | |
2826 NEW_EXTERNAL_TO_C_STRING (in, codesys); } while (0) | |
2827 #define SIZED_EXTERNAL_TO_C_STRING(in, inlen, out, codesys) \ | |
2828 do { * (Ibyte **) &(out) = \ | |
2829 NEW_SIZED_EXTERNAL_TO_C_STRING (in, inlen, codesys); } while (0) | |
2830 #define LISP_STRING_TO_EXTERNAL(in, out, codesys) \ | |
2831 do { * (Extbyte **) &(out) = \ | |
2832 NEW_LISP_STRING_TO_EXTERNAL (in, codesys); } while (0) | |
2833 #else | |
2834 #define C_STRING_TO_EXTERNAL(in, out, codesys) \ | |
2835 TO_EXTERNAL_FORMAT (C_STRING, in, C_STRING_ALLOCA, out, codesys) | |
2836 #define SIZED_C_STRING_TO_EXTERNAL(in, inlen, out, codesys) \ | |
2837 TO_EXTERNAL_FORMAT (DATA, (in, inlen), C_STRING_ALLOCA, out, codesys) | |
2838 #define EXTERNAL_TO_C_STRING(in, out, codesys) \ | |
2839 TO_INTERNAL_FORMAT (C_STRING, in, C_STRING_ALLOCA, out, codesys) | |
2840 #define SIZED_EXTERNAL_TO_C_STRING(in, inlen, out, codesys) \ | |
2841 TO_INTERNAL_FORMAT (DATA, (in, inlen), C_STRING_ALLOCA, out, codesys) | |
2842 #define LISP_STRING_TO_EXTERNAL(in, out, codesys) \ | |
2843 TO_EXTERNAL_FORMAT (LISP_STRING, in, C_STRING_ALLOCA, out, codesys) | |
2844 #endif /* TEST_NEW_DFC */ | |
2845 | |
2846 #define C_STRING_TO_SIZED_EXTERNAL(in, out, outlen, codesys) \ | |
2847 TO_EXTERNAL_FORMAT (C_STRING, in, ALLOCA, (out, outlen), codesys) | |
2848 #define SIZED_C_STRING_TO_SIZED_EXTERNAL(in, inlen, out, outlen, codesys) \ | |
2849 TO_EXTERNAL_FORMAT (DATA, (in, inlen), ALLOCA, (out, outlen), codesys) | |
2850 #define EXTERNAL_TO_SIZED_C_STRING(in, out, outlen, codesys) \ | |
2851 TO_INTERNAL_FORMAT (C_STRING, in, ALLOCA, (out, outlen), codesys) | |
2852 #define SIZED_EXTERNAL_TO_SIZED_C_STRING(in, inlen, out, outlen, codesys) \ | |
2853 TO_INTERNAL_FORMAT (DATA, (in, inlen), ALLOCA, (out, outlen), codesys) | |
2854 #define LISP_STRING_TO_SIZED_EXTERNAL(in, out, outlen, codesys) \ | |
2855 TO_EXTERNAL_FORMAT (LISP_STRING, in, ALLOCA, (out, outlen), codesys) | |
2856 | |
2857 /* In place of EXTERNAL_TO_LISP_STRING(), use build_ext_string() and/or | |
2858 make_ext_string(). */ | |
2859 | |
2860 #ifdef TEST_NEW_DFC | |
2861 #define C_STRING_TO_EXTERNAL_MALLOC(in, out, codesys) \ | |
2862 do { * (Extbyte **) &(out) = \ | |
2863 NEW_C_STRING_TO_EXTERNAL_MALLOC (in, codesys); } while (0) | |
2367 | 2864 #define SIZED_C_STRING_TO_EXTERNAL_MALLOC(in, inlen, out, codesys) \ |
2865 do { * (Extbyte **) &(out) = \ | |
2866 NEW_SIZED_C_STRING_TO_EXTERNAL_MALLOC (in, inlen, codesys); } \ | |
2867 while (0) | |
1318 | 2868 #define EXTERNAL_TO_C_STRING_MALLOC(in, out, codesys) \ |
2869 do { * (Ibyte **) &(out) = \ | |
2870 NEW_EXTERNAL_TO_C_STRING_MALLOC (in, codesys); } while (0) | |
2367 | 2871 #define SIZED_EXTERNAL_TO_C_STRING_MALLOC(in, inlen, out, codesys) \ |
2872 do { * (Ibyte **) &(out) = \ | |
2873 NEW_SIZED_EXTERNAL_TO_C_STRING_MALLOC (in, inlen, codesys); } \ | |
2874 while (0) | |
1318 | 2875 #define LISP_STRING_TO_EXTERNAL_MALLOC(in, out, codesys) \ |
2876 do { * (Extbyte **) &(out) = \ | |
2877 NEW_LISP_STRING_TO_EXTERNAL_MALLOC (in, codesys); } while (0) | |
2878 #else | |
2879 #define C_STRING_TO_EXTERNAL_MALLOC(in, out, codesys) \ | |
2880 TO_EXTERNAL_FORMAT (C_STRING, in, C_STRING_MALLOC, out, codesys) | |
2367 | 2881 #define SIZED_C_STRING_TO_EXTERNAL_MALLOC(in, inlen, out, codesys) \ |
2882 TO_EXTERNAL_FORMAT (DATA, (in, inlen), C_STRING_MALLOC, out, codesys) | |
1318 | 2883 #define EXTERNAL_TO_C_STRING_MALLOC(in, out, codesys) \ |
2884 TO_INTERNAL_FORMAT (C_STRING, in, C_STRING_MALLOC, out, codesys) | |
2367 | 2885 #define SIZED_EXTERNAL_TO_C_STRING_MALLOC(in, inlen, out, codesys) \ |
2886 TO_INTERNAL_FORMAT (DATA, (in, inlen), C_STRING_MALLOC, out, codesys) | |
1318 | 2887 #define LISP_STRING_TO_EXTERNAL_MALLOC(in, out, codesys) \ |
2888 TO_EXTERNAL_FORMAT (LISP_STRING, in, C_STRING_MALLOC, out, codesys) | |
2889 #endif /* TEST_NEW_DFC */ | |
2890 | |
2367 | 2891 #define C_STRING_TO_SIZED_EXTERNAL_MALLOC(in, out, outlen, codesys) \ |
2892 TO_EXTERNAL_FORMAT (C_STRING, in, MALLOC, (out, outlen), codesys) | |
2893 #define SIZED_C_STRING_TO_SIZED_EXTERNAL_MALLOC(in, inlen, out, outlen, \ | |
2894 codesys) \ | |
2895 TO_EXTERNAL_FORMAT (DATA, (in, inlen), MALLOC, (out, outlen), codesys) | |
2896 #define EXTERNAL_TO_SIZED_C_STRING_MALLOC(in, out, outlen, codesys) \ | |
2897 TO_INTERNAL_FORMAT (C_STRING, in, MALLOC, (out, outlen), codesys) | |
2898 #define SIZED_EXTERNAL_TO_SIZED_C_STRING_MALLOC(in, inlen, out, outlen, \ | |
2899 codesys) \ | |
2900 TO_INTERNAL_FORMAT (DATA, (in, inlen), MALLOC, (out, outlen), codesys) | |
2901 #define LISP_STRING_TO_SIZED_EXTERNAL_MALLOC(in, out, outlen, codesys) \ | |
2902 TO_EXTERNAL_FORMAT (LISP_STRING, in, MALLOC, (out, outlen), codesys) | |
2903 | |
1318 | 2904 enum new_dfc_src_type |
2905 { | |
2906 DFC_EXTERNAL, | |
2907 DFC_SIZED_EXTERNAL, | |
2908 DFC_INTERNAL, | |
2909 DFC_SIZED_INTERNAL, | |
2910 DFC_LISP_STRING | |
2911 }; | |
2912 | |
1632 | 2913 MODULE_API void *new_dfc_convert_malloc (const void *src, Bytecount src_size, |
2914 enum new_dfc_src_type type, | |
2915 Lisp_Object codesys); | |
2367 | 2916 MODULE_API Bytecount new_dfc_convert_size (const char *srctext, |
2917 const void *src, | |
1632 | 2918 Bytecount src_size, |
2919 enum new_dfc_src_type type, | |
2920 Lisp_Object codesys); | |
2367 | 2921 MODULE_API void *new_dfc_convert_copy_data (const char *srctext, |
2922 void *alloca_data); | |
1318 | 2923 |
1743 | 2924 END_C_DECLS |
1650 | 2925 |
1318 | 2926 /* Version of EXTERNAL_TO_C_STRING that *RETURNS* the translated string, |
2927 still in alloca() space. Requires some trickiness to do this, but gets | |
2928 it done! */ | |
2929 | |
2930 /* NOTE: If you make two invocations of the dfc functions below in the same | |
2931 subexpression and use the exact same expression for the source in both | |
2932 cases, you will lose. In this unlikely case, you will get an abort, and | |
2933 need to rewrite the code. | |
2934 */ | |
2935 | |
2936 /* We need to use ALLOCA_FUNCALL_OK here. Some compilers have been known | |
2937 to choke when alloca() occurs as a funcall argument, and so we check | |
2938 this in configure. Rewriting the expressions below to use a temporary | |
2939 variable, so that the call to alloca() is outside of | |
2382 | 2940 new_dfc_convert_copy_data(), won't help because the entire NEW_DFC call |
1318 | 2941 could be inside of a function call. */ |
2942 | |
2943 #define NEW_DFC_CONVERT_1_ALLOCA(src, src_size, type, codesys) \ | |
2367 | 2944 new_dfc_convert_copy_data \ |
1318 | 2945 (#src, ALLOCA_FUNCALL_OK (new_dfc_convert_size (#src, src, src_size, \ |
2946 type, codesys))) | |
2947 | |
2948 #define NEW_EXTERNAL_TO_C_STRING(src, codesys) \ | |
2949 (Ibyte *) NEW_DFC_CONVERT_1_ALLOCA (src, -1, DFC_EXTERNAL, codesys) | |
2950 #define NEW_EXTERNAL_TO_C_STRING_MALLOC(src, codesys) \ | |
2951 (Ibyte *) new_dfc_convert_malloc (src, -1, DFC_EXTERNAL, codesys) | |
2952 #define NEW_SIZED_EXTERNAL_TO_C_STRING(src, len, codesys) \ | |
2953 (Ibyte *) NEW_DFC_CONVERT_1_ALLOCA (src, len, DFC_SIZED_EXTERNAL, codesys) | |
2954 #define NEW_SIZED_EXTERNAL_TO_C_STRING_MALLOC(src, len, codesys) \ | |
2955 (Ibyte *) new_dfc_convert_malloc (src, len, DFC_SIZED_EXTERNAL, codesys) | |
2956 #define NEW_C_STRING_TO_EXTERNAL(src, codesys) \ | |
2957 (Extbyte *) NEW_DFC_CONVERT_1_ALLOCA (src, -1, DFC_INTERNAL, codesys) | |
2958 #define NEW_C_STRING_TO_EXTERNAL_MALLOC(src, codesys) \ | |
2959 (Extbyte *) new_dfc_convert_malloc (src, -1, DFC_INTERNAL, codesys) | |
2960 #define NEW_SIZED_C_STRING_TO_EXTERNAL(src, len, codesys) \ | |
2961 (Extbyte *) NEW_DFC_CONVERT_1_ALLOCA (src, len, DFC_SIZED_INTERNAL, codesys) | |
2962 #define NEW_SIZED_C_STRING_TO_EXTERNAL_MALLOC(src, len, codesys) \ | |
2963 (Extbyte *) new_dfc_convert_malloc (src, len, DFC_SIZED_INTERNAL, codesys) | |
2964 #define NEW_LISP_STRING_TO_EXTERNAL(src, codesys) \ | |
2965 (Extbyte *) NEW_DFC_CONVERT_1_ALLOCA (LISP_TO_VOID (src), -1, \ | |
2966 DFC_LISP_STRING, codesys) | |
2967 #define NEW_LISP_STRING_TO_EXTERNAL_MALLOC(src, codesys) \ | |
2968 (Extbyte *) new_dfc_convert_malloc (LISP_TO_VOID (src), -1, \ | |
2969 DFC_LISP_STRING, codesys) | |
771 | 2970 |
2367 | 2971 /* Wexttext functions. The type of Wexttext is selected at compile time |
2972 and will sometimes be wchar_t, sometimes char. */ | |
2973 | |
2974 int wcscmp_ascii (const wchar_t *s1, const Ascbyte *s2); | |
2975 int wcsncmp_ascii (const wchar_t *s1, const Ascbyte *s2, Charcount len); | |
2976 | |
2977 #ifdef WEXTTEXT_IS_WIDE /* defined under MS Windows i.e. WIN32_NATIVE */ | |
2978 #define WEXTTEXT_ZTERM_SIZE sizeof (wchar_t) | |
2979 /* Extra indirection needed in case of manifest constant as arg */ | |
2980 #define WEXTSTRING_1(arg) L##arg | |
2981 #define WEXTSTRING(arg) WEXTSTRING_1(arg) | |
2982 #define wext_strlen wcslen | |
2983 #define wext_strcmp wcscmp | |
2984 #define wext_strncmp wcsncmp | |
2985 #define wext_strcmp_ascii wcscmp_ascii | |
2986 #define wext_strncmp_ascii wcsncmp_ascii | |
2987 #define wext_strcpy wcscpy | |
2988 #define wext_strncpy wcsncpy | |
2989 #define wext_strchr wcschr | |
2990 #define wext_strrchr wcsrchr | |
2991 #define wext_strdup wcsdup | |
2992 #define wext_atol(str) wcstol (str, 0, 10) | |
2993 #define wext_sprintf wsprintfW /* Huh? both wsprintfA and wsprintfW? */ | |
2994 #define wext_getenv _wgetenv | |
2995 #define build_wext_string(str, cs) build_ext_string ((Extbyte *) str, cs) | |
2996 #define WEXTTEXT_TO_8_BIT(arg) WEXTTEXT_TO_MULTIBYTE(arg) | |
2997 #ifdef WIN32_NATIVE | |
2998 int XCDECL wext_retry_open (const Wexttext *path, int oflag, ...); | |
2999 #else | |
3000 #error Cannot handle Wexttext yet on this system | |
3001 #endif | |
3002 #define wext_access _waccess | |
3003 #define wext_stat _wstat | |
3004 #else | |
3005 #define WEXTTEXT_ZTERM_SIZE sizeof (char) | |
3006 #define WEXTSTRING(arg) arg | |
3007 #define wext_strlen strlen | |
3008 #define wext_strcmp strcmp | |
3009 #define wext_strncmp strncmp | |
3010 #define wext_strcmp_ascii strcmp | |
3011 #define wext_strncmp_ascii strncmp | |
3012 #define wext_strcpy strcpy | |
3013 #define wext_strncpy strncpy | |
3014 #define wext_strchr strchr | |
3015 #define wext_strrchr strrchr | |
3016 #define wext_strdup xstrdup | |
3017 #define wext_atol(str) atol (str) | |
3018 #define wext_sprintf sprintf | |
3019 #define wext_getenv getenv | |
3020 #define build_wext_string build_ext_string | |
3021 #define wext_retry_open retry_open | |
3022 #define wext_access access | |
3023 #define wext_stat stat | |
3024 #define WEXTTEXT_TO_8_BIT(arg) ((Extbyte *) arg) | |
3025 #endif | |
3026 | |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3027 /* Standins for various encodings. |
1318 | 3028 |
3029 About encodings in X: | |
3030 | |
3031 X works with 5 different encodings: | |
3032 | |
3033 -- "Host Portable Character Encoding" == printable ASCII + space, tab, | |
3034 newline | |
3035 | |
3036 -- STRING encoding == ASCII + Latin-1 + tab, newline | |
3037 | |
3038 -- Locale-specific encoding | |
3039 | |
3040 -- Compound text == STRING encoding + ISO-2022 escape sequences to | |
3041 switch between different locale-specific encodings. | |
3042 | |
3043 -- ANSI C wide-character encoding | |
3044 | |
3045 The Host Portable Character Encoding (HPCE) is used for atom names, font | |
3046 names, color names, keysyms, geometry strings, resource manager quarks, | |
3047 display names, locale names, and various other things. When describing | |
3048 such strings, the X manual typically says "If the ... is not in the Host | |
3049 Portable Character Encoding, the result is implementation dependent." | |
3050 | |
3051 The wide-character encoding is used only in the Xwc* functions, which | |
3052 are provided as equivalents to Xmb* functions. | |
3053 | |
3054 STRING and compound text are used in the value of string properties and | |
3055 selection data, both of which are values with an associated type atom, | |
3056 which can be STRING or COMPOUND_TEXT. It can also be a locale name, as | |
3057 specified in setlocale() (#### as usual, there is no normalization | |
3058 whatsoever of these names). | |
3059 | |
3060 X also defines a type called "TEXT", which is used only as a requested | |
3061 type, and produces data in a type "convenient to the owner". However, | |
3062 there is some indication that X expects this to be the locale-specific | |
3063 encoding. | |
3064 | |
3065 According to the glossary, the locale is used in | |
3066 | |
3067 -- Encoding and processing of input method text | |
3068 -- Encoding of resource files and values | |
3069 -- Encoding and imaging of text strings | |
3070 -- Encoding and decoding for inter-client text communication | |
3071 | |
3072 The functions XmbTextListToTextProperty and XmbTextPropertyToTextList | |
3073 (and Xwc* equivalents) can be used to convert between the | |
3074 locale-specific encoding (XTextStyle), STRING (XStringStyle), and | |
3075 compound text (XCompoundTextStyle), as well as XStdICCTextStyle, which | |
3076 converts to STRING if possible, and if not, COMPOUND_TEXT. This is | |
3077 used, for example, in XmbSetWMProperties, in the window_name and | |
3078 icon_name properties (WM_NAME and WM_ICON_NAME), which are in the | |
3079 locale-specific encoding on input, and are stored as STRING if possible, | |
3080 COMPOUND_TEXT otherwise. | |
3081 */ | |
771 | 3082 |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3083 #ifdef WEXTTEXT_IS_WIDE |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3084 #define Qcommand_argument_encoding Qmswindows_unicode |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3085 #define Qenvironment_variable_encoding Qmswindows_unicode |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3086 #else |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3087 #define Qcommand_argument_encoding Qnative |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3088 #define Qenvironment_variable_encoding Qnative |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3089 #endif |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3090 #define Qunix_host_name_encoding Qnative |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3091 #define Qunix_service_name_encoding Qnative |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3092 #define Qtime_function_encoding Qnative |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3093 #define Qtime_zone_encoding Qtime_function_encoding |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3094 #define Qmswindows_host_name_encoding Qmswindows_multibyte |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3095 #define Qmswindows_service_name_encoding Qmswindows_multibyte |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3096 #define Quser_name_encoding Qnative |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3097 #define Qerror_message_encoding Qnative |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3098 #define Qjpeg_error_message_encoding Qerror_message_encoding |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3099 #define Qtooltalk_encoding Qnative |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3100 #define Qgtk_encoding Qnative |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3101 |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3102 #define Qdll_symbol_encoding Qnative |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3103 #define Qdll_function_name_encoding Qdll_symbol_encoding |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3104 #define Qdll_variable_name_encoding Qdll_symbol_encoding |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3105 #define Qdll_filename_encoding Qfile_name |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3106 #define Qemodule_string_encoding Qnative |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3107 |
771 | 3108 /* !!#### Need to verify the encoding used in lwlib -- Qnative or Qctext? |
3109 Almost certainly the former. Use a standin for now. */ | |
3110 #define Qlwlib_encoding Qnative | |
3111 | |
1318 | 3112 /* The Host Portable Character Encoding. */ |
3113 #define Qx_hpc_encoding Qnative | |
3114 | |
3115 #define Qx_atom_name_encoding Qx_hpc_encoding | |
3116 #define Qx_font_name_encoding Qx_hpc_encoding | |
3117 #define Qx_color_name_encoding Qx_hpc_encoding | |
3118 #define Qx_keysym_encoding Qx_hpc_encoding | |
3119 #define Qx_geometry_encoding Qx_hpc_encoding | |
3120 #define Qx_resource_name_encoding Qx_hpc_encoding | |
3121 #define Qx_application_class_encoding Qx_hpc_encoding | |
771 | 3122 /* the following probably must agree with Qcommand_argument_encoding and |
3123 Qenvironment_variable_encoding */ | |
1318 | 3124 #define Qx_display_name_encoding Qx_hpc_encoding |
3125 #define Qx_xpm_data_encoding Qx_hpc_encoding | |
4834
b3ea9c582280
Use new cygwin_conv_path API with Cygwin 1.7 for converting names between Win32 and POSIX, UTF-8-aware, with attendant changes elsewhere
Ben Wing <ben@xemacs.org>
parents:
4790
diff
changeset
|
3126 #define Qx_error_message_encoding Qx_hpc_encoding |
1318 | 3127 |
2367 | 3128 /* !!#### Verify these! */ |
3129 #define Qxt_widget_arg_encoding Qnative | |
3130 #define Qdt_dnd_encoding Qnative | |
3131 | |
1318 | 3132 /* RedHat 6.2 contains a locale called "Francais" with the C-cedilla |
3133 encoded in ISO2022! */ | |
3134 #define Qlocale_name_encoding Qctext | |
771 | 3135 |
3136 #define Qstrerror_encoding Qnative | |
3137 | |
1318 | 3138 /* !!#### This exists to remind us that our hexify routine is totally |
3139 un-Muleized. */ | |
3140 #define Qdnd_hexify_encoding Qascii | |
3141 | |
771 | 3142 #define GET_STRERROR(var, num) \ |
3143 do { \ | |
3144 int __gsnum__ = (num); \ | |
3145 Extbyte * __gserr__ = strerror (__gsnum__); \ | |
3146 \ | |
3147 if (!__gserr__) \ | |
3148 { \ | |
867 | 3149 var = alloca_ibytes (99); \ |
771 | 3150 qxesprintf (var, "Unknown error %d", __gsnum__); \ |
3151 } \ | |
3152 else \ | |
3153 EXTERNAL_TO_C_STRING (__gserr__, var, Qstrerror_encoding); \ | |
3154 } while (0) | |
3155 | |
3156 #endif /* INCLUDED_text_h_ */ |