comparison src/text.h @ 826:6728e641994e

[xemacs-hg @ 2002-05-05 11:30:15 by ben] syntax cache, 8-bit-format, lots of code cleanup README.packages: Update info about --package-path. i.c: Create an inheritable event and pass it on to XEmacs, so that ^C can be handled properly. Intercept ^C and signal the event. "Stop Build" in VC++ now works. bytecomp-runtime.el: Doc string changes. compat.el: Some attempts to redo this to make it truly useful and fix the "multiple versions interacting with each other" problem. Not yet done. Currently doesn't work. files.el: Use with-obsolete-variable to avoid warnings in new revert-buffer code. xemacs.mak: Split up CFLAGS into a version without flags specifying the C library. The problem seems to be that minitar depends on zlib, which depends specifically on libc.lib, not on any of the other C libraries. Unless you compile with libc.lib, you get errors -- specifically, no _errno in the other libraries, which must make it something other than an int. (#### But this doesn't seem to obtain in XEmacs, which also uses zlib, and can be linked with any of the C libraries. Maybe zlib is used differently and doesn't need errno, or maybe XEmacs provides an int errno; ... I don't understand. Makefile.in.in: Fix so that packages are around when testing. abbrev.c, alloc.c, buffer.c, buffer.h, bytecode.c, callint.c, casefiddle.c, casetab.c, casetab.h, charset.h, chartab.c, chartab.h, cmds.c, console-msw.h, console-stream.c, console-x.c, console.c, console.h, data.c, device-msw.c, device.c, device.h, dialog-msw.c, dialog-x.c, dired-msw.c, dired.c, doc.c, doprnt.c, dumper.c, editfns.c, elhash.c, emacs.c, eval.c, event-Xt.c, event-gtk.c, event-msw.c, event-stream.c, events.c, events.h, extents.c, extents.h, faces.c, file-coding.c, file-coding.h, fileio.c, fns.c, font-lock.c, frame-gtk.c, frame-msw.c, frame-x.c, frame.c, frame.h, glade.c, glyphs-gtk.c, glyphs-msw.c, glyphs-msw.h, glyphs-x.c, glyphs.c, glyphs.h, gui-msw.c, gui-x.c, gui.h, gutter.h, hash.h, indent.c, insdel.c, intl-win32.c, intl.c, keymap.c, lisp-disunion.h, lisp-union.h, lisp.h, lread.c, lrecord.h, lstream.c, lstream.h, marker.c, menubar-gtk.c, menubar-msw.c, menubar-x.c, menubar.c, minibuf.c, mule-ccl.c, mule-charset.c, mule-coding.c, mule-wnnfns.c, nas.c, objects-msw.c, objects-x.c, opaque.c, postgresql.c, print.c, process-nt.c, process-unix.c, process.c, process.h, profile.c, rangetab.c, redisplay-gtk.c, redisplay-msw.c, redisplay-output.c, redisplay-x.c, redisplay.c, redisplay.h, regex.c, regex.h, scrollbar-msw.c, search.c, select-x.c, specifier.c, specifier.h, symbols.c, symsinit.h, syntax.c, syntax.h, syswindows.h, tests.c, text.c, text.h, tooltalk.c, ui-byhand.c, ui-gtk.c, unicode.c, win32.c, window.c: Another big Ben patch. -- FUNCTIONALITY CHANGES: add partial support for 8-bit-fixed, 16-bit-fixed, and 32-bit-fixed formats. not quite done yet. (in particular, needs functions to actually convert the buffer.) NOTE: lots of changes to regex.c here. also, many new *_fmt() inline funs that take an Internal_Format argument. redo syntax cache code. make the cache per-buffer; keep the cache valid across calls to functions that use it. also keep it valid across insertions/deletions and extent changes, as much as is possible. eliminate the junky regex-reentrancy code by passing in the relevant lisp info to the regex routines as local vars. add general mechanism in extents code for signalling extent changes. fix numerous problems with the case-table implementation; yoshiki never properly transferred many algorithms from old-style to new-style case tables. redo char tables to support a default argument, so that mapping only occurs over changed args. change many chartab functions to accept Lisp_Object instead of Lisp_Char_Table *. comment out the code in font-lock.c by default, because font-lock.el no longer uses it. we should consider eliminating it entirely. Don't output bell as ^G in console-stream when not a TTY. add -mswindows-termination-handle to interface with i.c, so we can properly kill a build. add more error-checking to buffer/string macros. add some additional buffer_or_string_() funs. -- INTERFACE CHANGES AFFECTING MORE CODE: switch the arguments of write_c_string and friends to be consistent with write_fmt_string, which must have printcharfun first. change BI_* macros to BYTE_* for increased clarity; similarly for bi_* local vars. change VOID_TO_LISP to be a one-argument function. eliminate no-longer-needed CVOID_TO_LISP. -- char/string macro changes: rename MAKE_CHAR() to make_emchar() for slightly less confusion with make_char(). (The former generates an Emchar, the latter a Lisp object. Conceivably we should rename make_char() -> wrap_char() and similarly for make_int(), make_float().) Similar changes for other *CHAR* macros -- we now consistently use names with `emchar' whenever we are working with Emchars. Any remaining name with just `char' always refers to a Lisp object. rename macros with XSTRING_* to string_* except for those that reference actual fields in the Lisp_String object, following conventions used elsewhere. rename set_string_{data,length} macros (the only ones to work with a Lisp_String_* instead of a Lisp_Object) to set_lispstringp_* to make the difference clear. try to be consistent about caps vs. lowercase in macro/inline-fun names for chars and such, which wasn't the case before. we now reserve caps either for XFOO_ macros that reference object fields (e.g. XSTRING_DATA) or for things that have non-function semantics, e.g. directly modifying an arg (BREAKUP_EMCHAR) or evaluating an arg (any arg) more than once. otherwise, use lowercase. here is a summary of most of the macros/inline funs changed by all of the above changes: BYTE_*_P -> byte_*_p XSTRING_BYTE -> string_byte set_string_data/length -> set_lispstringp_data/length XSTRING_CHAR_LENGTH -> string_char_length XSTRING_CHAR -> string_emchar INTBYTE_FIRST_BYTE_P -> intbyte_first_byte_p INTBYTE_LEADING_BYTE_P -> intbyte_leading_byte_p charptr_copy_char -> charptr_copy_emchar LEADING_BYTE_* -> leading_byte_* CHAR_* -> EMCHAR_* *_CHAR_* -> *_EMCHAR_* *_CHAR -> *_EMCHAR CHARSET_BY_ -> charset_by_* BYTE_SHIFT_JIS* -> byte_shift_jis* BYTE_BIG5* -> byte_big5* REP_BYTES_BY_FIRST_BYTE -> rep_bytes_by_first_byte char_to_unicode -> emchar_to_unicode valid_char_p -> valid_emchar_p Change intbyte_strcmp -> qxestrcmp_c (duplicated functionality). -- INTERFACE CHANGES AFFECTING LESS CODE: use DECLARE_INLINE_HEADER in various places. remove '#ifdef emacs' from XEmacs-only files. eliminate CHAR_TABLE_VALUE(), which duplicated the functionality of get_char_table(). add BUFFER_TEXT_LOOP to simplify iterations over buffer text. define typedefs for signed and unsigned types of fixed sizes (INT_32_BIT, UINT_32_BIT, etc.). create ALIGN_FOR_TYPE as a higher-level interface onto ALIGN_SIZE; fix code to use it. add charptr_emchar_len to return the text length of the character pointed to by a ptr; use it in place of charcount_to_bytecount(..., 1). add emchar_len to return the text length of a given character. add types Bytexpos and Charxpos to generalize Bytebpos/Bytecount and Charbpos/Charcount, in code (particularly, the extents code and redisplay code) that works with either kind of index. rename redisplay struct params with names such as `charbpos' to e.g. `charpos' when they are e.g. a Charxpos, not a Charbpos. eliminate xxDEFUN in place of DEFUN; no longer necessary with changes awhile back to doc.c. split up big ugly combined list of EXFUNs in lisp.h on a file-by-file basis, since other prototypes are similarly split. rewrite some "*_UNSAFE" macros as inline funs and eliminate the _UNSAFE suffix. move most string code from lisp.h to text.h; the string code and text.h code is now intertwined in such a fashion that they need to be in the same place and partially interleaved. (you can't create forward references for inline funs) automated/lisp-tests.el, automated/symbol-tests.el, automated/test-harness.el: Fix test harness to output FAIL messages to stderr when in batch mode. Fix up some problems in lisp-tests/symbol-tests that were causing spurious failures.
author ben
date Sun, 05 May 2002 11:33:57 +0000
parents a634e3b7acc8
children 5d09ddada9ae
comparison
equal deleted inserted replaced
825:eb3bc15a6e0f 826:6728e641994e
50 50
51 /* If you want more than this, you need to include charset.h */ 51 /* If you want more than this, you need to include charset.h */
52 52
53 #ifndef MULE 53 #ifndef MULE
54 54
55 #define REP_BYTES_BY_FIRST_BYTE(fb) 1 55 #define rep_bytes_by_first_byte(fb) 1
56 #define BYTE_ASCII_P(byte) 1 56 #define byte_ascii_p(byte) 1
57 # define MAX_EMCHAR_LEN 1 57 #define MAX_EMCHAR_LEN 1
58 58
59 #else /* MULE */ 59 #else /* MULE */
60 60
61 /* These are carefully designed to work if BYTE is signed or unsigned. */ 61 /* These are carefully designed to work if BYTE is signed or unsigned. */
62 /* Note that SPC and DEL are considered ASCII, not control. */ 62 /* Note that SPC and DEL are considered ASCII, not control. */
63 63
64 #define BYTE_ASCII_P(byte) (((byte) & ~0x7f) == 0) 64 #define byte_ascii_p(byte) (((byte) & ~0x7f) == 0)
65 #define BYTE_C0_P(byte) (((byte) & ~0x1f) == 0) 65 #define byte_c0_p(byte) (((byte) & ~0x1f) == 0)
66 #define BYTE_C1_P(byte) (((byte) & ~0x1f) == 0x80) 66 #define byte_c1_p(byte) (((byte) & ~0x1f) == 0x80)
67 67
68 /* Does BYTE represent the first byte of a character? */ 68 /* Does BYTE represent the first byte of a character? */
69 69
70 #define INTBYTE_FIRST_BYTE_P(byte) ((byte) < 0xA0) 70 #ifdef ERROR_CHECK_TEXT
71
72 DECLARE_INLINE_HEADER (
73 int
74 intbyte_first_byte_p_1 (int byte, const char *file, int line)
75 )
76 {
77 assert_at_line (byte >= 0 && byte < 256, file, line);
78 return byte < 0xA0;
79 }
80
81 #define intbyte_first_byte_p(byte) \
82 intbyte_first_byte_p_1 (byte, __FILE__, __LINE__)
83
84 #else
85
86 #define intbyte_first_byte_p(byte) ((byte) < 0xA0)
87
88 #endif
89
90 #ifdef ERROR_CHECK_TEXT
71 91
72 /* Does BYTE represent the first byte of a multi-byte character? */ 92 /* Does BYTE represent the first byte of a multi-byte character? */
73 93
74 #define INTBYTE_LEADING_BYTE_P(byte) BYTE_C1_P (byte) 94 DECLARE_INLINE_HEADER (
95 int
96 intbyte_leading_byte_p_1 (int byte, const char *file, int line)
97 )
98 {
99 assert_at_line (byte >= 0 && byte < 256, file, line);
100 return byte_c1_p (byte);
101 }
102
103 #define intbyte_leading_byte_p(byte) \
104 intbyte_leading_byte_p_1 (byte, __FILE__, __LINE__)
105
106 #else
107
108 #define intbyte_leading_byte_p(byte) byte_c1_p (byte)
109
110 #endif
75 111
76 /* Table of number of bytes in the string representation of a character 112 /* Table of number of bytes in the string representation of a character
77 indexed by the first byte of that representation. 113 indexed by the first byte of that representation.
78 114
79 This value can be derived in other ways -- e.g. something like 115 This value can be derived in other ways -- e.g. something like
80 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (first_byte)) 116 XCHARSET_REP_BYTES (charset_by_leading_byte (first_byte))
81 but it's faster this way. */ 117 but it's faster this way. */
82 extern const Bytecount rep_bytes_by_first_byte[0xA0]; 118 extern const Bytecount rep_bytes_by_first_byte[0xA0];
83 119
84 /* Number of bytes in the string representation of a character. */ 120 /* Number of bytes in the string representation of a character. */
85 121
86 #ifdef ERROR_CHECK_TEXT 122 #ifdef ERROR_CHECK_TEXT
87 123
88 INLINE_HEADER Bytecount REP_BYTES_BY_FIRST_BYTE_1 (int fb, const char *file, 124 DECLARE_INLINE_HEADER (
89 int line); 125 Bytecount
90 INLINE_HEADER Bytecount 126 rep_bytes_by_first_byte_1 (int fb, const char *file, int line)
91 REP_BYTES_BY_FIRST_BYTE_1 (int fb, const char *file, int line) 127 )
92 { 128 {
93 assert_at_line (fb < 0xA0, file, line); 129 assert_at_line (fb >= 0 && fb < 0xA0, file, line);
94 return rep_bytes_by_first_byte[fb]; 130 return rep_bytes_by_first_byte[fb];
95 } 131 }
96 132
97 #define REP_BYTES_BY_FIRST_BYTE(fb) \ 133 #define rep_bytes_by_first_byte(fb) \
98 REP_BYTES_BY_FIRST_BYTE_1 (fb, __FILE__, __LINE__) 134 rep_bytes_by_first_byte_1 (fb, __FILE__, __LINE__)
99 135
100 #else /* ERROR_CHECK_TEXT */ 136 #else /* ERROR_CHECK_TEXT */
101 137
102 #define REP_BYTES_BY_FIRST_BYTE(fb) (rep_bytes_by_first_byte[fb]) 138 #define rep_bytes_by_first_byte(fb) (rep_bytes_by_first_byte[fb])
103 139
104 #endif /* ERROR_CHECK_TEXT */ 140 #endif /* ERROR_CHECK_TEXT */
105 141
106 /* Is this character represented by more than one byte in a string? */ 142 /* Is this character represented by more than one byte in a string in the
107 143 default format? */
108 #define CHAR_MULTIBYTE_P(c) ((c) >= 0x80) 144
109 145 #define emchar_multibyte_p(c) ((c) >= 0x80)
110 #define CHAR_ASCII_P(c) (!CHAR_MULTIBYTE_P (c)) 146
147 #define emchar_ascii_p(c) (!emchar_multibyte_p (c))
148
149 /* Maximum number of bytes per Emacs character when represented as text, in
150 any format.
151 */
111 152
112 #define MAX_EMCHAR_LEN 4 153 #define MAX_EMCHAR_LEN 4
154
155 #endif /* not MULE */
156
157 /* ---------------- Handling non-default formats ----------------- */
158
159 /* We support, at least to some extent, formats other than the default
160 variable-width format, for speed; all of these alternative formats are
161 fixed-width. Currently we only handle these non-default formats in
162 buffers, because access to their text is strictly controlled and thus
163 the details of the format mostly compartmentalized. The only really
164 tricky part is the search code -- the regex, Boyer-Moore, and
165 simple-search algorithms in search.c and regex.c. All other code that
166 knows directly about the buffer representation is the basic code to
167 modify or retrieve the buffer text.
168
169 Supporting fixed-width formats in Lisp strings is harder, but possible
170 -- FSF currently does this, for example. In this case, however,
171 probably only 8-bit-fixed is reasonable for Lisp strings -- getting
172 non-ASCII-compatible fixed-width formats to work is much, much harder
173 because a lot of code assumes that strings are ASCII-compatible
174 (i.e. ASCII + other characters represented exclusively using high-bit
175 bytes) and a lot of code mixes Lisp strings and non-Lisp strings freely.
176
177 The different possible fixed-width formats are 8-bit fixed, 16-bit
178 fixed, and 32-bit fixed. The latter can represent all possible
179 characters, but at a substantial memory penalty. The other two can
180 represent only a subset of the possible characters. How these subsets
181 are defined can be simple or very tricky.
182
183 Currently we support only the default format and the 8-bit fixed format,
184 and in the latter, we only allow these to be the first 256 characters in
185 an Emchar (ASCII and Latin 1).
186
187 One reasonable approach for 8-bit fixed is to allow the upper half to
188 represent any 1-byte charset, which is specified on a per-buffer basis.
189 This should work fairly well in practice since most documents are in
190 only one foreign language (possibly with some English mixed in). I
191 think FSF does something like this; or at least, they have something
192 called nonascii-translation-table and use it when converting from
193 8-bit-fixed text ("unibyte text") to default text ("multibyte text").
194 With 16-bit fixed, you could do something like assign chunks of the 64K
195 worth of characters to charsets as they're encountered in documents.
196 This should work well with most Asian documents.
197
198 If/when we switch to using Unicode internally, we might have formats more
199 like this:
200
201 -- UTF-8 or some extension as the default format. Perl uses an
202 extension that handles 64-bit chars and requires as much as 13 bytes per
203 char, vs. the standard of 31-bit chars and 6 bytes max. UTF-8 has the
204 same basic properties as our own variable-width format (see text.c,
205 Internal String Encoding) and so most code would not need to be changed.
206
207 -- UTF-16 as a "pseudo-fixed" format (i.e. 16-bit fixed plus surrogates
208 for representing characters not in the BMP, aka >= 65536). The vast
209 majority of documents will have no surrogates in them so byte/char
210 conversion will be very fast.
211
212 -- an 8-bit fixed format, like currently.
213
214 -- possibly, UCS-4 as a 32-bit fixed format.
215
216 The fixed-width formats essentially treat the buffer as an array of
217 8-bit, 16-bit or 32-bit integers. This means that how they are stored
218 in memory (in particular, big-endian or little-endian) depends on the
219 native format of the machine's processor. It also means we have to
220 worry a bit about alignment (basically, we just need to keep the gap an
221 integral size of the character size, and get things aligned properly
222 when converting the buffer between formats).
223 */
224 typedef enum internal_format
225 {
226 FORMAT_DEFAULT,
227 FORMAT_8_BIT_FIXED,
228 FORMAT_16_BIT_FIXED, /* not implemented */
229 FORMAT_32_BIT_FIXED /* not implemented */
230 } Internal_Format;
231
232 #ifdef MULE
233 /* "OBJECT" below will usually be a buffer, string, or nil. This needs to
234 be passed in because the interpretation of 8-bit-fixed and 16-bit-fixed
235 values may depend on the buffer, e.g. depending on what language the
236 text in the buffer is in. */
237
238 /* True if Emchar CH can be represented in 8-bit-fixed format. */
239 #define emchar_8_bit_fixed_p(ch, object) (((ch) & ~0xff) == 0)
240 /* Convert Emchar CH to an 8-bit int, as will be stored in the buffer. */
241 #define emchar_to_raw_8_bit_fixed(ch, object) ((Intbyte) (ch))
242 /* Convert the other way. */
243 #define raw_8_bit_fixed_to_emchar(ch, object) ((Emchar) (ch))
244
245 #define emchar_16_bit_fixed_p(ch, object) (((ch) & ~0xffff) == 0)
246 /* Convert Emchar CH to a 16-bit int, as will be stored in the buffer. */
247 #define emchar_to_raw_16_bit_fixed(ch, object) ((UINT_16_BIT) (ch))
248 /* Convert the other way. */
249 #define raw_16_bit_fixed_to_emchar(ch, object) ((Emchar) (ch))
250
251 /* Convert Emchar CH to a 32-bit int, as will be stored in the buffer. */
252 #define emchar_to_raw_32_bit_fixed(ch, object) ((UINT_32_BIT) (ch))
253 /* Convert the other way. */
254 #define raw_32_bit_fixed_to_emchar(ch, object) ((Emchar) (ch))
255
256 /* Return the "raw value" of a character as stored in the buffer. In the
257 default format, this is just the same as the character. In fixed-width
258 formats, this is the actual value in the buffer, which will be limited
259 to the range as established by the format. This is used when searching
260 for a character in a buffer -- it's faster to convert the character to
261 the raw value and look for that, than repeatedly convert each raw value
262 in the buffer into a character. */
263
264 DECLARE_INLINE_HEADER (
265 Raw_Emchar
266 emchar_to_raw (Emchar ch, Internal_Format fmt, Lisp_Object object)
267 )
268 {
269 switch (fmt)
270 {
271 case FORMAT_DEFAULT:
272 return (Raw_Emchar) ch;
273 case FORMAT_16_BIT_FIXED:
274 text_checking_assert (emchar_16_bit_fixed_p (ch, object));
275 return (Raw_Emchar) emchar_to_raw_16_bit_fixed (ch, object);
276 case FORMAT_32_BIT_FIXED:
277 return (Raw_Emchar) emchar_to_raw_32_bit_fixed (ch, object);
278 default:
279 text_checking_assert (fmt == FORMAT_8_BIT_FIXED);
280 text_checking_assert (emchar_8_bit_fixed_p (ch, object));
281 return (Raw_Emchar) emchar_to_raw_8_bit_fixed (ch, object);
282 }
283 }
284
285 /* Return whether CH is representable in the given format in the given
286 object. */
287
288 DECLARE_INLINE_HEADER (
289 int
290 emchar_fits_in_format (Emchar ch, Internal_Format fmt, Lisp_Object object)
291 )
292 {
293 switch (fmt)
294 {
295 case FORMAT_DEFAULT:
296 return 1;
297 case FORMAT_16_BIT_FIXED:
298 return emchar_16_bit_fixed_p (ch, object);
299 case FORMAT_32_BIT_FIXED:
300 return 1;
301 default:
302 text_checking_assert (fmt == FORMAT_8_BIT_FIXED);
303 return emchar_8_bit_fixed_p (ch, object);
304 }
305 }
306
307 /* Assuming the formats are the same, return whether the two objects
308 represent text in exactly the same way. */
309
310 DECLARE_INLINE_HEADER (
311 int
312 objects_have_same_internal_representation (Lisp_Object srcobj,
313 Lisp_Object dstobj)
314 )
315 {
316 /* &&#### implement this properly when we allow per-object format
317 differences */
318 return 1;
319 }
320
321 #else
322
323 #define emchar_to_raw(ch, fmt, object) ((Raw_Emchar) (ch))
324 #define emchar_fits_in_format(ch, fmt, object) 1
325 #define objects_have_same_internal_representation(srcobj, dstobj) 1
113 326
114 #endif /* MULE */ 327 #endif /* MULE */
115 328
116 int dfc_coding_system_is_unicode (Lisp_Object coding_system); 329 int dfc_coding_system_is_unicode (Lisp_Object coding_system);
117 330
130 /* */ 343 /* */
131 /* working with raw internal-format data */ 344 /* working with raw internal-format data */
132 /* */ 345 /* */
133 /************************************************************************/ 346 /************************************************************************/
134 347
135 /* NOTE: In all the following macros, we follow these rules concerning 348 /*
136 multiple evaluation of the arguments: 349 Use the following functions/macros on contiguous text in any of the
350 internal formats. Those that take a format arg work on all internal
351 formats; the others work only on the default (variable-width under Mule)
352 format. If the text you're operating on is known to come from a buffer,
353 use the buffer-level functions in buffer.h, which automatically know the
354 correct format and handle the gap.
355
356 Some terminology:
357
358 "charptr" appearing in the macros means "internal-format text" -- type
359 `Intbyte *'. Operations on such pointers themselves, rather than on the
360 text being pointed to, have "charptr" instead of "charptr" in the macro
361 name. "emchar" in the macro names means an Emchar -- the representation
362 of a character as a single integer rather than a series of bytes, as part
363 of "charptr". Many of the macros below are for converting between the
364 two representations of characters.
365
366 Note also that we try to consistently distinguish between an "Emchar" and
367 a Lisp character. Stuff working with Lisp characters often just says
368 "char", so we consistently use "Emchar" when that's what we're working
369 with. */
370
371 /* The three golden rules of macros:
137 372
138 1) Anything that's an lvalue can be evaluated more than once. 373 1) Anything that's an lvalue can be evaluated more than once.
139 2) Anything that's a Lisp Object can be evaluated more than once. 374
140 This should probably be changed, but this follows the way 375 2) Macros where anything else can be evaluated more than once should
141 that all the macros in lisp.h do things. 376 have the word "unsafe" in their name (exceptions may be made for
142 3) 'struct buffer *' arguments can be evaluated more than once. 377 large sets of macros that evaluate arguments of certain types more
143 4) Nothing else can be evaluated more than once. Use inline 378 than once, e.g. struct buffer * arguments, when clearly indicated in
379 the macro documentation). These macros are generally meant to be
380 called only by other macros that have already stored the calling
381 values in temporary variables.
382
383 3) Nothing else can be evaluated more than once. Use inline
144 functions, if necessary, to prevent multiple evaluation. 384 functions, if necessary, to prevent multiple evaluation.
145 5) An exception to (4) is that there are some macros below that 385
146 may evaluate their arguments more than once. They are all 386 NOTE: The functions and macros below are given full prototypes in their
147 denoted with the word "unsafe" in their name and are generally 387 docs, even when the implementation is a macro. In such cases, passing
148 meant to be called only by other macros that have already 388 an argument of a type other than expected will produce undefined
149 stored the calling values in temporary variables. 389 results. Also, given that macros can do things functions can't (in
150 390 particular, directly modify arguments as if they were passed by
151 391 reference), the declaration syntax has been extended to include the
152 Use the following functions/macros on contiguous strings of data. 392 call-by-reference syntax from C++, where an & after a type indicates
153 If the text you're operating on is known to come from a buffer, use 393 that the argument is an lvalue and is passed by reference, i.e. the
154 the buffer-level functions below -- they know about the gap and may 394 function can modify its value. (This is equivalent in C to passing a
155 be more efficient. 395 pointer to the argument, but without the need to explicitly worry about
156 396 pointers.)
157 397
158 ---------------------------------------------------------------------------- 398 When to capitalize macros:
159 (A) For working with charptr's (pointers to internally-formatted text): 399
160 ---------------------------------------------------------------------------- 400 -- Capitalize macros doing stuff obviously impossible with (C)
161 401 functions, e.g. directly modifying arguments as if they were passed by
162 VALID_CHARPTR_P (ptr): 402 reference.
163 Given a charptr, does it point to the beginning of a character? 403
164 404 -- Capitalize macros that evaluate *any* argument more than once regardless
165 ASSERT_VALID_CHARPTR (ptr): 405 of whether that's "allowed" (e.g. buffer arguments).
166 If error-checking is enabled, assert that the given charptr 406
167 points to the beginning of a character. Otherwise, do nothing. 407 -- Capitalize macros that directly access a field in a Lisp_Object or
168 408 its equivalent underlying structure. In such cases, access through the
169 INC_CHARPTR (ptr): 409 Lisp_Object precedes the macro with an X, and access through the underlying
170 Given a charptr (assumed to point at the beginning of a character), 410 structure doesn't.
171 modify that pointer so it points to the beginning of the next 411
172 character. 412 -- Capitalize certain other basic macros relating to Lisp_Objects; e.g.
173 413 FRAMEP, CHECK_FRAME, etc.
174 DEC_CHARPTR (ptr): 414
175 Given a charptr (assumed to point at the beginning of a 415 -- Try to avoid capitalizing any other macros.
176 character or at the very end of the text), modify that pointer
177 so it points to the beginning of the previous character.
178
179 VALIDATE_CHARPTR_BACKWARD (ptr):
180 Make sure that PTR is pointing to the beginning of a character.
181 If not, back up until this is the case. Note that there are not
182 too many places where it is legitimate to do this sort of thing.
183 It's an error if you're passed an "invalid" char * pointer.
184 NOTE: PTR *must* be pointing to a valid part of the string (i.e.
185 not the very end, unless the string is zero-terminated or
186 something) in order for this function to not cause crashes.
187
188 VALIDATE_CHARPTR_FORWARD (ptr):
189 Make sure that PTR is pointing to the beginning of a character.
190 If not, move forward until this is the case. Note that there
191 are not too many places where it is legitimate to do this sort
192 of thing. It's an error if you're passed an "invalid" char *
193 pointer.
194
195 ---------------------------------------------------------------------
196 (B) For working with the length (in bytes and characters) of a
197 section of internally-formatted text:
198 ---------------------------------------------------------------------
199
200 bytecount_to_charcount (ptr, nbi):
201 Given a pointer to a text string and a length in bytes,
202 return the equivalent length in characters.
203
204 charcount_to_bytecount (ptr, nch):
205 Given a pointer to a text string and a length in characters,
206 return the equivalent length in bytes.
207
208 charptr_n_addr (ptr, n):
209 Return a pointer to the beginning of the character offset N
210 (in characters) from PTR.
211
212 -------------------------------------------------------------------------
213 (C) For retrieving or changing the character pointed to by a charptr:
214 -------------------------------------------------------------------------
215
216 charptr_emchar (ptr):
217 Retrieve the character pointed to by PTR as an Emchar.
218
219 charptr_emchar_n (ptr, n):
220 Retrieve the character at offset N (in characters) from PTR,
221 as an Emchar.
222
223 set_charptr_emchar (ptr, ch):
224 Store the character CH (an Emchar) as internally-formatted
225 text starting at PTR. Return the number of bytes stored.
226
227 charptr_copy_char (src, dst):
228 Retrieve the character pointed to by SRC and store it as
229 internally-formatted text in DST.
230
231 ----------------------------------
232 (D) For working with Emchars:
233 ----------------------------------
234
235 [Note that there are other functions/macros for working with Emchars
236 in charset.h, for retrieving the charset of an Emchar and such.]
237
238 valid_char_p (ch):
239 Return whether the given Emchar is valid.
240
241 CHARP (ch):
242 Return whether the given Lisp_Object is a character.
243
244 CHECK_CHAR_COERCE_INT (ch):
245 Signal an error if CH is not a valid character or integer Lisp_Object.
246 If CH is an integer Lisp_Object, convert it to a character Lisp_Object,
247 but merely by repackaging, without performing tests for char validity.
248
249 MAX_EMCHAR_LEN:
250 Maximum number of buffer bytes per Emacs character.
251 */ 416 */
252 417
253 /* ---------------------------------------------------------------------- */ 418 /* ---------------------------------------------------------------------- */
254 /* (A) For working with charptr's (pointers to internally-formatted text) */ 419 /* Working with charptr's (pointers to internally-formatted text) */
255 /* ---------------------------------------------------------------------- */ 420 /* ---------------------------------------------------------------------- */
256 421
422 /* Given an charptr, does it point to the beginning of a character?
423 */
424
257 #ifdef MULE 425 #ifdef MULE
258 # define VALID_CHARPTR_P(ptr) INTBYTE_FIRST_BYTE_P (* (unsigned char *) ptr) 426 # define valid_charptr_p(ptr) intbyte_first_byte_p (* (ptr))
259 #else 427 #else
260 # define VALID_CHARPTR_P(ptr) 1 428 # define valid_charptr_p(ptr) 1
261 #endif 429 #endif
262 430
263 #ifdef ERROR_CHECK_TEXT 431 /* If error-checking is enabled, assert that the given charptr points to
264 # define ASSERT_VALID_CHARPTR(ptr) assert (VALID_CHARPTR_P (ptr)) 432 the beginning of a character. Otherwise, do nothing.
265 #else 433 */
266 # define ASSERT_VALID_CHARPTR(ptr) 434
267 #endif 435 #define assert_valid_charptr(ptr) text_checking_assert (valid_charptr_p (ptr))
268 436
269 /* Note that INC_CHARPTR() and DEC_CHARPTR() have to be written in 437 /* Given a charptr (assumed to point at the beginning of a character),
438 modify that pointer so it points to the beginning of the next character.
439
440 Note that INC_CHARPTR() and DEC_CHARPTR() have to be written in
270 completely separate ways. INC_CHARPTR() cannot use the DEC_CHARPTR() 441 completely separate ways. INC_CHARPTR() cannot use the DEC_CHARPTR()
271 trick of looking for a valid first byte because it might run off 442 trick of looking for a valid first byte because it might run off
272 the end of the string. DEC_CHARPTR() can't use the INC_CHARPTR() 443 the end of the string. DEC_CHARPTR() can't use the INC_CHARPTR()
273 method because it doesn't have easy access to the first byte of 444 method because it doesn't have easy access to the first byte of
274 the character it's moving over. */ 445 the character it's moving over. */
275 446
276 #define REAL_INC_CHARPTR(ptr) \ 447 #define INC_CHARPTR(ptr) do { \
277 ((void) ((ptr) += REP_BYTES_BY_FIRST_BYTE (* (unsigned char *) (ptr)))) 448 assert_valid_charptr (ptr); \
278 449 (ptr) += rep_bytes_by_first_byte (* (ptr)); \
279 #define REAL_INC_CHARBYTEBPOS(ptr, pos) \ 450 } while (0)
280 (pos += REP_BYTES_BY_FIRST_BYTE (* (unsigned char *) (ptr))) 451
281 452 #define INC_CHARPTR_FMT(ptr, fmt) \
282 #define REAL_DEC_CHARPTR(ptr) do { \ 453 do { \
283 (ptr)--; \ 454 Internal_Format __icf_fmt = (fmt); \
284 } while (!VALID_CHARPTR_P (ptr)) 455 switch (__icf_fmt) \
456 { \
457 case FORMAT_DEFAULT: \
458 INC_CHARPTR (ptr); \
459 break; \
460 case FORMAT_16_BIT_FIXED: \
461 text_checking_assert (ptr == ALIGN_PTR (ptr, UINT_16_BIT)); \
462 (ptr) += 2; \
463 break; \
464 case FORMAT_32_BIT_FIXED: \
465 text_checking_assert (ptr == ALIGN_PTR (ptr, UINT_32_BIT)); \
466 (ptr) += 4; \
467 break; \
468 default: \
469 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); \
470 (ptr)++; \
471 break; \
472 } \
473 } while (0)
474
475 /* Given a charptr (assumed to point at the beginning of a character or at
476 the very end of the text), modify that pointer so it points to the
477 beginning of the previous character.
478 */
285 479
286 #ifdef ERROR_CHECK_TEXT 480 #ifdef ERROR_CHECK_TEXT
287 #define INC_CHARPTR(ptr) do { \ 481 /* We use a separate definition to avoid warnings about unused dc_ptr1 */
288 ASSERT_VALID_CHARPTR (ptr); \ 482 #define DEC_CHARPTR(ptr) do { \
289 REAL_INC_CHARPTR (ptr); \ 483 const Intbyte *dc_ptr1 = (ptr); \
290 } while (0) 484 do { \
291 485 (ptr)--; \
292 #define INC_CHARBYTEBPOS(ptr, pos) do { \ 486 } while (!valid_charptr_p (ptr)); \
293 ASSERT_VALID_CHARPTR (ptr); \ 487 text_checking_assert (dc_ptr1 - (ptr) == rep_bytes_by_first_byte (*(ptr))); \
294 REAL_INC_CHARBYTEBPOS (ptr, pos); \ 488 } while (0)
295 } while (0) 489 #else
296 490 #define DEC_CHARPTR(ptr) do { \
297 #define DEC_CHARPTR(ptr) do { \ 491 do { \
298 const Intbyte *dc_ptr1 = (ptr); \ 492 (ptr)--; \
299 const Intbyte *dc_ptr2 = dc_ptr1; \ 493 } while (!valid_charptr_p (ptr)); \
300 REAL_DEC_CHARPTR (dc_ptr2); \ 494 } while (0)
301 assert (dc_ptr1 - dc_ptr2 == \ 495 #endif /* ERROR_CHECK_TEXT */
302 REP_BYTES_BY_FIRST_BYTE (*dc_ptr2)); \ 496
303 (ptr) = (Intbyte *) dc_ptr2; \ 497 #define DEC_CHARPTR_FMT(ptr, fmt) \
304 } while (0) 498 do { \
305 499 Internal_Format __icf_fmt = (fmt); \
306 #else /* ! ERROR_CHECK_TEXT */ 500 switch (__icf_fmt) \
307 #define INC_CHARBYTEBPOS(ptr, pos) REAL_INC_CHARBYTEBPOS (ptr, pos) 501 { \
308 #define INC_CHARPTR(ptr) REAL_INC_CHARPTR (ptr) 502 case FORMAT_DEFAULT: \
309 #define DEC_CHARPTR(ptr) REAL_DEC_CHARPTR (ptr) 503 DEC_CHARPTR (ptr); \
310 #endif /* ! ERROR_CHECK_TEXT */ 504 break; \
505 case FORMAT_16_BIT_FIXED: \
506 text_checking_assert (ptr == ALIGN_PTR (ptr, UINT_16_BIT)); \
507 (ptr) -= 2; \
508 break; \
509 case FORMAT_32_BIT_FIXED: \
510 text_checking_assert (ptr == ALIGN_PTR (ptr, UINT_32_BIT)); \
511 (ptr) -= 4; \
512 break; \
513 default: \
514 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); \
515 (ptr)--; \
516 break; \
517 } \
518 } while (0)
311 519
312 #ifdef MULE 520 #ifdef MULE
313 521
522 /* Make sure that PTR is pointing to the beginning of a character. If not,
523 back up until this is the case. Note that there are not too many places
524 where it is legitimate to do this sort of thing. It's an error if
525 you're passed an "invalid" char * pointer. NOTE: PTR *must* be pointing
526 to a valid part of the string (i.e. not the very end, unless the string
527 is zero-terminated or something) in order for this function to not cause
528 crashes.
529 */
530
314 /* Note that this reads the byte at *PTR! */ 531 /* Note that this reads the byte at *PTR! */
315 532
316 #define VALIDATE_CHARPTR_BACKWARD(ptr) do { \ 533 #define VALIDATE_CHARPTR_BACKWARD(ptr) do { \
317 while (!VALID_CHARPTR_P (ptr)) ptr--; \ 534 while (!valid_charptr_p (ptr)) ptr--; \
318 } while (0) 535 } while (0)
319 536
320 /* Given a Intbyte string at PTR of size N, possibly with a partial 537 /* Make sure that PTR is pointing to the beginning of a character. If not,
321 character at the end, return the size of the longest substring of 538 move forward until this is the case. Note that there are not too many
322 complete characters. Does not assume that the byte at *(PTR + N) is 539 places where it is legitimate to do this sort of thing. It's an error
323 readable. */ 540 if you're passed an "invalid" char * pointer.
324 DECLARE_INLINE_HEADER ( 541 */
325 Bytecount
326 validate_intbyte_string_backward (const Intbyte *ptr, Bytecount n)
327 )
328 {
329 const Intbyte *ptr2;
330
331 if (n == 0)
332 return n;
333 ptr2 = ptr + n - 1;
334 VALIDATE_CHARPTR_BACKWARD (ptr2);
335 if (ptr2 + REP_BYTES_BY_FIRST_BYTE (*ptr2) != ptr + n)
336 return ptr2 - ptr;
337 return n;
338 }
339 542
340 /* This needs to be trickier than VALIDATE_CHARPTR_BACKWARD() to avoid the 543 /* This needs to be trickier than VALIDATE_CHARPTR_BACKWARD() to avoid the
341 possibility of running off the end of the string. */ 544 possibility of running off the end of the string. */
342 545
343 #define VALIDATE_CHARPTR_FORWARD(ptr) do { \ 546 #define VALIDATE_CHARPTR_FORWARD(ptr) do { \
351 } while (0) 554 } while (0)
352 555
353 #else /* not MULE */ 556 #else /* not MULE */
354 #define VALIDATE_CHARPTR_BACKWARD(ptr) 557 #define VALIDATE_CHARPTR_BACKWARD(ptr)
355 #define VALIDATE_CHARPTR_FORWARD(ptr) 558 #define VALIDATE_CHARPTR_FORWARD(ptr)
559 #endif /* not MULE */
560
561 #ifdef MULE
562
563 /* Given a Intbyte string at PTR of size N, possibly with a partial
564 character at the end, return the size of the longest substring of
565 complete characters. Does not assume that the byte at *(PTR + N) is
566 readable. Note that there are not too many places where it is
567 legitimate to do this sort of thing. It's an error if you're passed an
568 "invalid" offset. */
569
570 DECLARE_INLINE_HEADER (
571 Bytecount
572 validate_intbyte_string_backward (const Intbyte *ptr, Bytecount n)
573 )
574 {
575 const Intbyte *ptr2;
576
577 if (n == 0)
578 return n;
579 ptr2 = ptr + n - 1;
580 VALIDATE_CHARPTR_BACKWARD (ptr2);
581 if (ptr2 + rep_bytes_by_first_byte (*ptr2) != ptr + n)
582 return ptr2 - ptr;
583 return n;
584 }
585
586 #else
587
356 #define validate_intbyte_string_backward(ptr, n) (n) 588 #define validate_intbyte_string_backward(ptr, n) (n)
357 #endif /* not MULE */ 589
590 #endif /* MULE */
358 591
359 /* -------------------------------------------------------------- */ 592 /* -------------------------------------------------------------- */
360 /* (B) For working with the length (in bytes and characters) of a */ 593 /* Working with the length (in bytes and characters) of a */
361 /* section of internally-formatted text */ 594 /* section of internally-formatted text */
362 /* -------------------------------------------------------------- */ 595 /* -------------------------------------------------------------- */
363 596
364 INLINE_HEADER const Intbyte * 597 #ifdef MULE
365 charptr_n_addr (const Intbyte *ptr, Charcount offset); 598
366 INLINE_HEADER const Intbyte * 599 Charcount bytecount_to_charcount_fun (const Intbyte *ptr, Bytecount len);
600 Bytecount charcount_to_bytecount_fun (const Intbyte *ptr, Charcount len);
601
602 /* Given a pointer to a text string and a length in bytes, return
603 the equivalent length in characters. */
604
605 DECLARE_INLINE_HEADER (
606 Charcount
607 bytecount_to_charcount (const Intbyte *ptr, Bytecount len)
608 )
609 {
610 if (len < 20) /* Just a random guess, but it should be more or less correct.
611 If number of bytes is small, just do a simple loop,
612 which should be more efficient. */
613 {
614 Charcount count = 0;
615 const Intbyte *end = ptr + len;
616 while (ptr < end)
617 {
618 INC_CHARPTR (ptr);
619 count++;
620 }
621 /* Bomb out if the specified substring ends in the middle
622 of a character. Note that we might have already gotten
623 a core dump above from an invalid reference, but at least
624 we will get no farther than here.
625
626 This also catches len < 0. */
627 text_checking_assert (ptr == end);
628
629 return count;
630 }
631 else
632 return bytecount_to_charcount_fun (ptr, len);
633 }
634
635 /* Given a pointer to a text string and a length in characters, return the
636 equivalent length in bytes.
637 */
638
639 DECLARE_INLINE_HEADER (
640 Bytecount
641 charcount_to_bytecount (const Intbyte *ptr, Charcount len)
642 )
643 {
644 text_checking_assert (len >= 0);
645 if (len < 20) /* See above */
646 {
647 const Intbyte *newptr = ptr;
648 while (len > 0)
649 {
650 INC_CHARPTR (newptr);
651 len--;
652 }
653 return newptr - ptr;
654 }
655 else
656 return charcount_to_bytecount_fun (ptr, len);
657 }
658
659 /* Given a pointer to a text string in the specified format and a length in
660 bytes, return the equivalent length in characters.
661 */
662
663 DECLARE_INLINE_HEADER (
664 Charcount
665 bytecount_to_charcount_fmt (const Intbyte *ptr, Bytecount len,
666 Internal_Format fmt)
667 )
668 {
669 switch (fmt)
670 {
671 case FORMAT_DEFAULT:
672 return bytecount_to_charcount (ptr, len);
673 case FORMAT_16_BIT_FIXED:
674 text_checking_assert (ptr == ALIGN_PTR (ptr, UINT_16_BIT));
675 return (Charcount) (len << 1);
676 case FORMAT_32_BIT_FIXED:
677 text_checking_assert (ptr == ALIGN_PTR (ptr, UINT_32_BIT));
678 return (Charcount) (len << 2);
679 default:
680 text_checking_assert (fmt == FORMAT_8_BIT_FIXED);
681 return (Charcount) len;
682 }
683 }
684
685 /* Given a pointer to a text string in the specified format and a length in
686 characters, return the equivalent length in bytes.
687 */
688
689 DECLARE_INLINE_HEADER (
690 Bytecount
691 charcount_to_bytecount_fmt (const Intbyte *ptr, Charcount len,
692 Internal_Format fmt)
693 )
694 {
695 switch (fmt)
696 {
697 case FORMAT_DEFAULT:
698 return charcount_to_bytecount (ptr, len);
699 case FORMAT_16_BIT_FIXED:
700 text_checking_assert (ptr == ALIGN_PTR (ptr, UINT_16_BIT));
701 text_checking_assert (!(len & 1));
702 return (Bytecount) (len >> 1);
703 case FORMAT_32_BIT_FIXED:
704 text_checking_assert (!(len & 3));
705 text_checking_assert (ptr == ALIGN_PTR (ptr, UINT_32_BIT));
706 return (Bytecount) (len >> 2);
707 default:
708 text_checking_assert (fmt == FORMAT_8_BIT_FIXED);
709 return (Bytecount) len;
710 }
711 }
712
713 #else
714
715 #define bytecount_to_charcount(ptr, len) ((Charcount) (len))
716 #define bytecount_to_charcount_fmt(ptr, len, fmt) ((Charcount) (len))
717 #define charcount_to_bytecount(ptr, len) ((Bytecount) (len))
718 #define charcount_to_bytecount_fmt(ptr, len, fmt) ((Bytecount) (len))
719
720 #endif /* MULE */
721
722 /* Return the length of the first character at PTR. Equivalent to
723 charcount_to_bytecount (ptr, 1).
724
725 [Since charcount_to_bytecount() is Written as inline, a smart compiler
726 should really optimize charcount_to_bytecount (ptr, 1) to the same as
727 the following, with no error checking. But since this idiom occurs so
728 often, we'll be helpful and define a special macro for it.]
729 */
730
731 #define charptr_emchar_len(ptr) rep_bytes_by_first_byte (*(ptr))
732
733 /* Return the length of the first character at PTR, which is in the
734 specified internal format. Equivalent to charcount_to_bytecount_fmt
735 (ptr, 1, fmt).
736 */
737
738 DECLARE_INLINE_HEADER (
739 Bytecount
740 charptr_emchar_len_fmt (const Intbyte *ptr, Internal_Format fmt)
741 )
742 {
743 switch (fmt)
744 {
745 case FORMAT_DEFAULT:
746 return charptr_emchar_len (ptr);
747 case FORMAT_16_BIT_FIXED:
748 text_checking_assert (ptr == ALIGN_PTR (ptr, UINT_16_BIT));
749 return 2;
750 case FORMAT_32_BIT_FIXED:
751 text_checking_assert (ptr == ALIGN_PTR (ptr, UINT_32_BIT));
752 return 4;
753 default:
754 text_checking_assert (fmt == FORMAT_8_BIT_FIXED);
755 return 1;
756 }
757 }
758
759 /* Return a pointer to the beginning of the character offset N (in
760 characters) from PTR.
761 */
762
763 DECLARE_INLINE_HEADER (
764 const Intbyte *
367 charptr_n_addr (const Intbyte *ptr, Charcount offset) 765 charptr_n_addr (const Intbyte *ptr, Charcount offset)
766 )
368 { 767 {
369 return ptr + charcount_to_bytecount (ptr, offset); 768 return ptr + charcount_to_bytecount (ptr, offset);
370 } 769 }
371 770
771 /* Given a charptr and an offset into the text pointed to by the charptr,
772 modify the offset so it points to the beginning of the next character.
773 */
774
775 #define INC_BYTECOUNT(ptr, pos) do { \
776 assert_valid_charptr (ptr); \
777 (pos += rep_bytes_by_first_byte (* ((ptr) + (pos)))); \
778 } while (0)
779
372 /* -------------------------------------------------------------------- */ 780 /* -------------------------------------------------------------------- */
373 /* (C) For retrieving or changing the character pointed to by a charptr */ 781 /* Retrieving or changing the character pointed to by a charptr */
374 /* -------------------------------------------------------------------- */ 782 /* -------------------------------------------------------------------- */
375 783
376 #define simple_charptr_emchar(ptr) ((Emchar) (ptr)[0]) 784 #define simple_charptr_emchar(ptr) ((Emchar) (ptr)[0])
377 #define simple_set_charptr_emchar(ptr, x) \ 785 #define simple_set_charptr_emchar(ptr, x) \
378 ((ptr)[0] = (Intbyte) (x), (Bytecount) 1) 786 ((ptr)[0] = (Intbyte) (x), (Bytecount) 1)
379 #define simple_charptr_copy_char(src, dst) \ 787 #define simple_charptr_copy_emchar(src, dst) \
380 ((dst)[0] = *(src), (Bytecount) 1) 788 ((dst)[0] = *(src), (Bytecount) 1)
381 789
382 #ifdef MULE 790 #ifdef MULE
383 791
384 Emchar non_ascii_charptr_emchar (const Intbyte *ptr); 792 Emchar non_ascii_charptr_emchar (const Intbyte *ptr);
385 Bytecount non_ascii_set_charptr_emchar (Intbyte *ptr, Emchar c); 793 Bytecount non_ascii_set_charptr_emchar (Intbyte *ptr, Emchar c);
386 Bytecount non_ascii_charptr_copy_char (const Intbyte *src, Intbyte *dst); 794 Bytecount non_ascii_charptr_copy_emchar (const Intbyte *src, Intbyte *dst);
387 795
388 INLINE_HEADER Emchar charptr_emchar (const Intbyte *ptr); 796 /* Retrieve the character pointed to by PTR as an Emchar. */
389 INLINE_HEADER Emchar 797
798 DECLARE_INLINE_HEADER (
799 Emchar
390 charptr_emchar (const Intbyte *ptr) 800 charptr_emchar (const Intbyte *ptr)
391 { 801 )
392 return BYTE_ASCII_P (*ptr) ? 802 {
803 return byte_ascii_p (*ptr) ?
393 simple_charptr_emchar (ptr) : 804 simple_charptr_emchar (ptr) :
394 non_ascii_charptr_emchar (ptr); 805 non_ascii_charptr_emchar (ptr);
395 } 806 }
396 807
397 INLINE_HEADER Bytecount set_charptr_emchar (Intbyte *ptr, Emchar x); 808 /* Retrieve the character pointed to by PTR (a pointer to text in the
398 INLINE_HEADER Bytecount 809 format FMT, coming from OBJECT [a buffer, string?, or nil]) as an
810 Emchar.
811
812 Note: For these and other *_fmt() functions, if you pass in a constant
813 FMT, the switch will be optimized out of existence. Therefore, there is
814 no need to create separate versions for the various formats for
815 "efficiency reasons". In fact, we don't really need charptr_emchar()
816 and such written separately, but they are used often so it's simpler
817 that way. */
818
819 DECLARE_INLINE_HEADER (
820 Emchar
821 charptr_emchar_fmt (const Intbyte *ptr, Internal_Format fmt,
822 Lisp_Object object)
823 )
824 {
825 switch (fmt)
826 {
827 case FORMAT_DEFAULT:
828 return charptr_emchar (ptr);
829 case FORMAT_16_BIT_FIXED:
830 text_checking_assert (ptr == ALIGN_PTR (ptr, UINT_16_BIT));
831 return raw_16_bit_fixed_to_emchar (* (UINT_16_BIT *) ptr, object);
832 case FORMAT_32_BIT_FIXED:
833 text_checking_assert (ptr == ALIGN_PTR (ptr, UINT_32_BIT));
834 return raw_32_bit_fixed_to_emchar (* (UINT_32_BIT *) ptr, object);
835 default:
836 text_checking_assert (fmt == FORMAT_8_BIT_FIXED);
837 return raw_8_bit_fixed_to_emchar (*ptr, object);
838 }
839 }
840
841 /* Return the character at PTR (which is in format FMT), suitable for
842 comparison with an ASCII character. This guarantees that if the
843 character at PTR is ASCII (range 0 - 127), that character will be
844 returned; otherwise, some character outside of the ASCII range will be
845 returned, but not necessarily the character actually at PTR. This will
846 be faster than charptr_emchar_fmt() for some formats -- in particular,
847 FORMAT_DEFAULT. */
848
849 DECLARE_INLINE_HEADER (
850 Emchar
851 charptr_emchar_ascii_fmt (const Intbyte *ptr, Internal_Format fmt,
852 Lisp_Object object)
853 )
854 {
855 switch (fmt)
856 {
857 case FORMAT_DEFAULT:
858 return (Emchar) *ptr;
859 case FORMAT_16_BIT_FIXED:
860 text_checking_assert (ptr == ALIGN_PTR (ptr, UINT_16_BIT));
861 return raw_16_bit_fixed_to_emchar (* (UINT_16_BIT *) ptr, object);
862 case FORMAT_32_BIT_FIXED:
863 text_checking_assert (ptr == ALIGN_PTR (ptr, UINT_32_BIT));
864 return raw_32_bit_fixed_to_emchar (* (UINT_32_BIT *) ptr, object);
865 default:
866 text_checking_assert (fmt == FORMAT_8_BIT_FIXED);
867 return raw_8_bit_fixed_to_emchar (*ptr, object);
868 }
869 }
870
871 /* Return the "raw value" of the character at PTR, in format FMT. This is
872 useful when searching for a character; convert the character using
873 emchar_to_raw(). */
874
875 DECLARE_INLINE_HEADER (
876 Raw_Emchar
877 charptr_emchar_raw_fmt (const Intbyte *ptr, Internal_Format fmt)
878 )
879 {
880 switch (fmt)
881 {
882 case FORMAT_DEFAULT:
883 return (Raw_Emchar) charptr_emchar (ptr);
884 case FORMAT_16_BIT_FIXED:
885 text_checking_assert (ptr == ALIGN_PTR (ptr, UINT_16_BIT));
886 return (Raw_Emchar) (* (UINT_16_BIT *) ptr);
887 case FORMAT_32_BIT_FIXED:
888 text_checking_assert (ptr == ALIGN_PTR (ptr, UINT_32_BIT));
889 return (Raw_Emchar) (* (UINT_32_BIT *) ptr);
890 default:
891 text_checking_assert (fmt == FORMAT_8_BIT_FIXED);
892 return (Raw_Emchar) (*ptr);
893 }
894 }
895
896 /* Store the character CH (an Emchar) as internally-formatted text starting
897 at PTR. Return the number of bytes stored.
898 */
899
900 DECLARE_INLINE_HEADER (
901 Bytecount
399 set_charptr_emchar (Intbyte *ptr, Emchar x) 902 set_charptr_emchar (Intbyte *ptr, Emchar x)
400 { 903 )
401 return !CHAR_MULTIBYTE_P (x) ? 904 {
905 return !emchar_multibyte_p (x) ?
402 simple_set_charptr_emchar (ptr, x) : 906 simple_set_charptr_emchar (ptr, x) :
403 non_ascii_set_charptr_emchar (ptr, x); 907 non_ascii_set_charptr_emchar (ptr, x);
404 } 908 }
405 909
406 INLINE_HEADER Bytecount 910 /* Store the character CH (an Emchar) as internally-formatted text of
407 charptr_copy_char (const Intbyte *src, Intbyte *dst); 911 format FMT starting at PTR, which comes from OBJECT. Return the number
408 INLINE_HEADER Bytecount 912 of bytes stored.
409 charptr_copy_char (const Intbyte *src, Intbyte *dst) 913 */
410 { 914
411 return BYTE_ASCII_P (*src) ? 915 DECLARE_INLINE_HEADER (
412 simple_charptr_copy_char (src, dst) : 916 Bytecount
413 non_ascii_charptr_copy_char (src, dst); 917 set_charptr_emchar_fmt (Intbyte *ptr, Emchar x, Internal_Format fmt,
918 Lisp_Object object)
919 )
920 {
921 switch (fmt)
922 {
923 case FORMAT_DEFAULT:
924 return set_charptr_emchar (ptr, x);
925 case FORMAT_16_BIT_FIXED:
926 text_checking_assert (emchar_16_bit_fixed_p (x, object));
927 text_checking_assert (ptr == ALIGN_PTR (ptr, UINT_16_BIT));
928 * (UINT_16_BIT *) ptr = emchar_to_raw_16_bit_fixed (x, object);
929 return 2;
930 case FORMAT_32_BIT_FIXED:
931 text_checking_assert (ptr == ALIGN_PTR (ptr, UINT_32_BIT));
932 * (UINT_32_BIT *) ptr = emchar_to_raw_32_bit_fixed (x, object);
933 return 4;
934 default:
935 text_checking_assert (fmt == FORMAT_8_BIT_FIXED);
936 text_checking_assert (emchar_8_bit_fixed_p (x, object));
937 *ptr = emchar_to_raw_8_bit_fixed (x, object);
938 return 1;
939 }
940 }
941
942 /* Retrieve the character pointed to by SRC and store it as
943 internally-formatted text in DST.
944 */
945
946 DECLARE_INLINE_HEADER (
947 Bytecount
948 charptr_copy_emchar (const Intbyte *src, Intbyte *dst)
949 )
950 {
951 return byte_ascii_p (*src) ?
952 simple_charptr_copy_emchar (src, dst) :
953 non_ascii_charptr_copy_emchar (src, dst);
414 } 954 }
415 955
416 #else /* not MULE */ 956 #else /* not MULE */
417 957
418 # define charptr_emchar(ptr) simple_charptr_emchar (ptr) 958 # define charptr_emchar(ptr) simple_charptr_emchar (ptr)
419 # define set_charptr_emchar(ptr, x) simple_set_charptr_emchar (ptr, x) 959 # define charptr_emchar_fmt(ptr, fmt, object) charptr_emchar (ptr)
420 # define charptr_copy_char(src, dst) simple_charptr_copy_char (src, dst) 960 # define charptr_emchar_ascii_fmt(ptr, fmt, object) charptr_emchar (ptr)
961 # define charptr_emchar_raw_fmt(ptr, fmt) charptr_emchar (ptr)
962 # define set_charptr_emchar(ptr, x) simple_set_charptr_emchar (ptr, x)
963 # define set_charptr_emchar_fmt(ptr, x, fmt, obj) set_charptr_emchar (ptr, x)
964 # define charptr_copy_emchar(src, dst) simple_charptr_copy_emchar (src, dst)
421 965
422 #endif /* not MULE */ 966 #endif /* not MULE */
423 967
968 /* Retrieve the character at offset N (in characters) from PTR, as an
969 Emchar.
970 */
971
424 #define charptr_emchar_n(ptr, offset) \ 972 #define charptr_emchar_n(ptr, offset) \
425 charptr_emchar (charptr_n_addr (ptr, offset)) 973 charptr_emchar (charptr_n_addr (ptr, offset))
426 974
427 975
428 /* ---------------------------- */ 976 /* ---------------------------- */
429 /* (D) For working with Emchars */ 977 /* Working with Emchars */
430 /* ---------------------------- */ 978 /* ---------------------------- */
431 979
980 /* NOTE: There are other functions/macros for working with Emchars in
981 charset.h, for retrieving the charset of an Emchar, the length of an
982 Emchar when converted to text, etc.
983 */
984
432 #ifdef MULE 985 #ifdef MULE
433 986
434 int non_ascii_valid_char_p (Emchar ch); 987 int non_ascii_valid_emchar_p (Emchar ch);
435 988
436 INLINE_HEADER int valid_char_p (Emchar ch); 989 /* Return whether the given Emchar is valid.
437 INLINE_HEADER int 990 */
438 valid_char_p (Emchar ch) 991
439 { 992 DECLARE_INLINE_HEADER (
440 return (! (ch & ~0xFF)) || non_ascii_valid_char_p (ch); 993 int
994 valid_emchar_p (Emchar ch)
995 )
996 {
997 return (! (ch & ~0xFF)) || non_ascii_valid_emchar_p (ch);
441 } 998 }
442 999
443 #else /* not MULE */ 1000 #else /* not MULE */
444 1001
445 #define valid_char_p(ch) (! (ch & ~0xFF)) 1002 #define valid_emchar_p(ch) (! (ch & ~0xFF))
446 1003
447 #endif /* not MULE */ 1004 #endif /* not MULE */
448 1005
449 #define CHAR_INTP(x) (INTP (x) && valid_char_p (XINT (x))) 1006 #define CHAR_INTP(x) (INTP (x) && valid_emchar_p (XINT (x)))
450 1007
451 #define CHAR_OR_CHAR_INTP(x) (CHARP (x) || CHAR_INTP (x)) 1008 #define CHAR_OR_CHAR_INTP(x) (CHARP (x) || CHAR_INTP (x))
452 1009
453 INLINE_HEADER Emchar XCHAR_OR_CHAR_INT (Lisp_Object obj); 1010 DECLARE_INLINE_HEADER (
454 INLINE_HEADER Emchar 1011 Emchar
455 XCHAR_OR_CHAR_INT (Lisp_Object obj) 1012 XCHAR_OR_CHAR_INT (Lisp_Object obj)
1013 )
456 { 1014 {
457 return CHARP (obj) ? XCHAR (obj) : XINT (obj); 1015 return CHARP (obj) ? XCHAR (obj) : XINT (obj);
458 } 1016 }
1017
1018 /* Signal an error if CH is not a valid character or integer Lisp_Object.
1019 If CH is an integer Lisp_Object, convert it to a character Lisp_Object,
1020 but merely by repackaging, without performing tests for char validity.
1021 */
459 1022
460 #define CHECK_CHAR_COERCE_INT(x) do { \ 1023 #define CHECK_CHAR_COERCE_INT(x) do { \
461 if (CHARP (x)) \ 1024 if (CHARP (x)) \
462 ; \ 1025 ; \
463 else if (CHAR_INTP (x)) \ 1026 else if (CHAR_INTP (x)) \
464 x = make_char (XINT (x)); \ 1027 x = make_char (XINT (x)); \
465 else \ 1028 else \
466 x = wrong_type_argument (Qcharacterp, x); \ 1029 x = wrong_type_argument (Qcharacterp, x); \
467 } while (0) 1030 } while (0)
468 1031
1032
1033
1034 /************************************************************************/
1035 /* */
1036 /* working with Lisp strings */
1037 /* */
1038 /************************************************************************/
1039
1040 #define string_char_length(s) \
1041 string_index_byte_to_char (s, XSTRING_LENGTH (s))
1042 #define string_byte(s, i) (XSTRING_DATA (s)[i] + 0)
1043 /* In case we ever allow strings to be in a different format ... */
1044 #define set_string_byte(s, i, c) (XSTRING_DATA (s)[i] = (c))
1045
1046 #define ASSERT_VALID_CHAR_STRING_INDEX_UNSAFE(s, x) do { \
1047 text_checking_assert ((x) >= 0 && x <= string_char_length (s)); \
1048 } while (0)
1049
1050 #define ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE(s, x) do { \
1051 text_checking_assert ((x) >= 0 && x <= XSTRING_LENGTH (s)); \
1052 text_checking_assert (valid_charptr_p (string_byte_addr (s, x))); \
1053 } while (0)
1054
1055 /* Convert offset I in string S to a pointer to text there. */
1056 #define string_byte_addr(s, i) (&(XSTRING_DATA (s)[i]))
1057 /* Convert pointer to text in string S into the byte offset to that text. */
1058 #define string_addr_to_byte(s, ptr) ((Bytecount) ((ptr) - XSTRING_DATA (s)))
1059 /* Return the Emchar at *CHARACTER* offset I. */
1060 #define string_emchar(s, i) charptr_emchar (string_char_addr (s, i))
1061
1062 #ifdef ERROR_CHECK_TEXT
1063 #define SLEDGEHAMMER_CHECK_ASCII_BEGIN
1064 #endif
1065
1066 #ifdef SLEDGEHAMMER_CHECK_ASCII_BEGIN
1067 void sledgehammer_check_ascii_begin (Lisp_Object str);
1068 #else
1069 #define sledgehammer_check_ascii_begin(str)
1070 #endif
1071
1072 /* Make an alloca'd copy of a Lisp string */
1073 #define LISP_STRING_TO_ALLOCA(s, lval) \
1074 do { \
1075 Intbyte **_lta_ = (Intbyte **) &(lval); \
1076 Lisp_Object _lta_2 = (s); \
1077 *_lta_ = alloca_array (Intbyte, 1 + XSTRING_LENGTH (_lta_2)); \
1078 memcpy (*_lta_, XSTRING_DATA (_lta_2), 1 + XSTRING_LENGTH (_lta_2)); \
1079 } while (0)
1080
1081 /* Make an alloca'd copy of a Intbyte * */
1082 #define INTBYTE_STRING_TO_ALLOCA(p, lval) \
1083 do { \
1084 Intbyte **_bsta_ = (Intbyte **) &(lval); \
1085 const Intbyte *_bsta_2 = (p); \
1086 Bytecount _bsta_3 = qxestrlen (_bsta_2); \
1087 *_bsta_ = alloca_array (Intbyte, 1 + _bsta_3); \
1088 memcpy (*_bsta_, _bsta_2, 1 + _bsta_3); \
1089 } while (0)
1090
1091 #define alloca_intbytes(num) alloca_array (Intbyte, num)
1092 #define alloca_extbytes(num) alloca_array (Extbyte, num)
1093
1094 void resize_string (Lisp_Object s, Bytecount pos, Bytecount delta);
1095
1096 /* Convert a byte index into a string into a char index. */
1097 DECLARE_INLINE_HEADER (
1098 Charcount
1099 string_index_byte_to_char (Lisp_Object s, Bytecount idx)
1100 )
1101 {
1102 Charcount retval;
1103 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, idx);
1104 #ifdef MULE
1105 if (idx <= (Bytecount) XSTRING_ASCII_BEGIN (s))
1106 retval = (Charcount) idx;
1107 else
1108 retval = (XSTRING_ASCII_BEGIN (s) +
1109 bytecount_to_charcount (XSTRING_DATA (s) +
1110 XSTRING_ASCII_BEGIN (s),
1111 idx - XSTRING_ASCII_BEGIN (s)));
1112 # ifdef SLEDGEHAMMER_CHECK_ASCII_BEGIN
1113 assert (retval == bytecount_to_charcount (XSTRING_DATA (s), idx));
1114 # endif
1115 #else
1116 retval = (Charcount) idx;
1117 #endif
1118 /* Don't call ASSERT_VALID_CHAR_STRING_INDEX_UNSAFE() here because it will
1119 call string_index_byte_to_char(). */
1120 return retval;
1121 }
1122
1123 /* Convert a char index into a string into a byte index. */
1124 DECLARE_INLINE_HEADER (
1125 Bytecount
1126 string_index_char_to_byte (Lisp_Object s, Charcount idx)
1127 )
1128 {
1129 Bytecount retval;
1130 ASSERT_VALID_CHAR_STRING_INDEX_UNSAFE (s, idx);
1131 #ifdef MULE
1132 if (idx <= (Charcount) XSTRING_ASCII_BEGIN (s))
1133 retval = (Bytecount) idx;
1134 else
1135 retval = (XSTRING_ASCII_BEGIN (s) +
1136 charcount_to_bytecount (XSTRING_DATA (s) +
1137 XSTRING_ASCII_BEGIN (s),
1138 idx - XSTRING_ASCII_BEGIN (s)));
1139 # ifdef SLEDGEHAMMER_CHECK_ASCII_BEGIN
1140 assert (retval == charcount_to_bytecount (XSTRING_DATA (s), idx));
1141 # endif
1142 #else
1143 retval = (Bytecount) idx;
1144 #endif
1145 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, retval);
1146 return retval;
1147 }
1148
1149 /* Convert a substring length (starting at byte offset OFF) from bytes to
1150 chars. */
1151 DECLARE_INLINE_HEADER (
1152 Charcount
1153 string_offset_byte_to_char_len (Lisp_Object s, Bytecount off, Bytecount len)
1154 )
1155 {
1156 Charcount retval;
1157 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, off);
1158 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, off + len);
1159 #ifdef MULE
1160 if (off + len <= (Bytecount) XSTRING_ASCII_BEGIN (s))
1161 retval = (Charcount) len;
1162 else if (off < (Bytecount) XSTRING_ASCII_BEGIN (s))
1163 retval =
1164 XSTRING_ASCII_BEGIN (s) - (Charcount) off +
1165 bytecount_to_charcount (XSTRING_DATA (s) + XSTRING_ASCII_BEGIN (s),
1166 len - (XSTRING_ASCII_BEGIN (s) - off));
1167 else
1168 retval = bytecount_to_charcount (XSTRING_DATA (s) + off, len);
1169 # ifdef SLEDGEHAMMER_CHECK_ASCII_BEGIN
1170 assert (retval == bytecount_to_charcount (XSTRING_DATA (s) + off, len));
1171 # endif
1172 #else
1173 retval = (Charcount) len;
1174 #endif
1175 return retval;
1176 }
1177
1178 /* Convert a substring length (starting at byte offset OFF) from chars to
1179 bytes. */
1180 DECLARE_INLINE_HEADER (
1181 Bytecount
1182 string_offset_char_to_byte_len (Lisp_Object s, Bytecount off, Charcount len)
1183 )
1184 {
1185 Bytecount retval;
1186 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, off);
1187 #ifdef MULE
1188 /* casts to avoid errors from combining Bytecount/Charcount and warnings
1189 from signed/unsigned comparisons */
1190 if (off + (Bytecount) len <= (Bytecount) XSTRING_ASCII_BEGIN (s))
1191 retval = (Bytecount) len;
1192 else if (off < (Bytecount) XSTRING_ASCII_BEGIN (s))
1193 retval =
1194 XSTRING_ASCII_BEGIN (s) - off +
1195 charcount_to_bytecount (XSTRING_DATA (s) + XSTRING_ASCII_BEGIN (s),
1196 len - (XSTRING_ASCII_BEGIN (s) -
1197 (Charcount) off));
1198 else
1199 retval = charcount_to_bytecount (XSTRING_DATA (s) + off, len);
1200 # ifdef SLEDGEHAMMER_CHECK_ASCII_BEGIN
1201 assert (retval == charcount_to_bytecount (XSTRING_DATA (s) + off, len));
1202 # endif
1203 #else
1204 retval = (Bytecount) len;
1205 #endif
1206 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, off + retval);
1207 return retval;
1208 }
1209
1210 DECLARE_INLINE_HEADER (
1211 const Intbyte *
1212 string_char_addr (Lisp_Object s, Charcount idx)
1213 )
1214 {
1215 return XSTRING_DATA (s) + string_index_char_to_byte (s, idx);
1216 }
1217
1218 /* WARNING: If you modify an existing string, you must call
1219 bump_string_modiff() afterwards. */
1220 #ifdef MULE
1221 void set_string_char (Lisp_Object s, Charcount i, Emchar c);
1222 #else
1223 #define set_string_char(s, i, c) set_string_byte (s, i, c)
1224 #endif /* not MULE */
1225
1226 /* Return index to character before the one at IDX. */
1227 DECLARE_INLINE_HEADER (
1228 Bytecount
1229 prev_string_index (Lisp_Object s, Bytecount idx)
1230 )
1231 {
1232 const Intbyte *ptr = string_byte_addr (s, idx);
1233 DEC_CHARPTR (ptr);
1234 return string_addr_to_byte (s, ptr);
1235 }
1236
1237 /* Return index to character after the one at IDX. */
1238 DECLARE_INLINE_HEADER (
1239 Bytecount
1240 next_string_index (Lisp_Object s, Bytecount idx)
1241 )
1242 {
1243 const Intbyte *ptr = string_byte_addr (s, idx);
1244 INC_CHARPTR (ptr);
1245 return string_addr_to_byte (s, ptr);
1246 }
469 1247
470 1248
471 /************************************************************************/ 1249 /************************************************************************/
472 /* */ 1250 /* */
473 /* working with Eistrings */ 1251 /* working with Eistrings */
596 void eicpy_rawz (Eistring *eistr, const Intbyte *data); 1374 void eicpy_rawz (Eistring *eistr, const Intbyte *data);
597 ... from raw internal-format data in the default internal format 1375 ... from raw internal-format data in the default internal format
598 that is "null-terminated" (the meaning of this depends on the nature 1376 that is "null-terminated" (the meaning of this depends on the nature
599 of the default internal format). 1377 of the default internal format).
600 void eicpy_raw_fmt (Eistring *eistr, const Intbyte *data, Bytecount len, 1378 void eicpy_raw_fmt (Eistring *eistr, const Intbyte *data, Bytecount len,
601 Internal_Format intfmt); 1379 Internal_Format intfmt, Lisp_Object object);
602 ... from raw internal-format data in the specified format. 1380 ... from raw internal-format data in the specified format.
603 void eicpy_rawz_fmt (Eistring *eistr, const Intbyte *data, 1381 void eicpy_rawz_fmt (Eistring *eistr, const Intbyte *data,
604 Internal_Format intfmt); 1382 Internal_Format intfmt, Lisp_Object object);
605 ... from raw internal-format data in the specified format that is 1383 ... from raw internal-format data in the specified format that is
606 "null-terminated" (the meaning of this depends on the nature of 1384 "null-terminated" (the meaning of this depends on the nature of
607 the specific format). 1385 the specific format).
608 void eicpy_c (Eistring *eistr, const Char_ASCII *c_string); 1386 void eicpy_c (Eistring *eistr, const Char_ASCII *c_string);
609 ... from an ASCII null-terminated string. Non-ASCII characters in 1387 ... from an ASCII null-terminated string. Non-ASCII characters in
649 is stored in PTR_OUT, and the length of the data (not including 1427 is stored in PTR_OUT, and the length of the data (not including
650 the terminating zero) is stored in LEN_OUT. 1428 the terminating zero) is stored in LEN_OUT.
651 1429
652 void eicpyout_alloca_fmt (Eistring *eistr, LVALUE: Intbyte *ptr_out, 1430 void eicpyout_alloca_fmt (Eistring *eistr, LVALUE: Intbyte *ptr_out,
653 LVALUE: Bytecount len_out, 1431 LVALUE: Bytecount len_out,
654 Internal_Format intfmt); 1432 Internal_Format intfmt, Lisp_Object object);
655 Like eicpyout_alloca(), but converts to the specified internal 1433 Like eicpyout_alloca(), but converts to the specified internal
656 format. (No formats other than FORMAT_DEFAULT are currently 1434 format. (No formats other than FORMAT_DEFAULT are currently
657 implemented, and you get an assertion failure if you try.) 1435 implemented, and you get an assertion failure if you try.)
658 1436
659 Intbyte *eicpyout_malloc (Eistring *eistr, Bytecount *intlen_out); 1437 Intbyte *eicpyout_malloc (Eistring *eistr, Bytecount *intlen_out);
662 passed in. Returns the new data, and stores the length (not 1440 passed in. Returns the new data, and stores the length (not
663 including the terminating zero) using INTLEN_OUT, unless it's 1441 including the terminating zero) using INTLEN_OUT, unless it's
664 a NULL pointer. 1442 a NULL pointer.
665 1443
666 Intbyte *eicpyout_malloc_fmt (Eistring *eistr, Internal_Format intfmt, 1444 Intbyte *eicpyout_malloc_fmt (Eistring *eistr, Internal_Format intfmt,
667 Bytecount *intlen_out); 1445 Bytecount *intlen_out, Lisp_Object object);
668 Like eicpyout_malloc(), but converts to the specified internal 1446 Like eicpyout_malloc(), but converts to the specified internal
669 format. (No formats other than FORMAT_DEFAULT are currently 1447 format. (No formats other than FORMAT_DEFAULT are currently
670 implemented, and you get an assertion failure if you try.) 1448 implemented, and you get an assertion failure if you try.)
671 1449
672 1450
1004 int mallocp_; 1782 int mallocp_;
1005 1783
1006 Extbyte *extdata_; 1784 Extbyte *extdata_;
1007 Bytecount extlen_; 1785 Bytecount extlen_;
1008 } Eistring; 1786 } Eistring;
1009
1010 typedef enum internal_format
1011 {
1012 FORMAT_DEFAULT,
1013 FORMAT_FIXED_8,
1014 FORMAT_FIXED_16,
1015 FORMAT_FIXED_32
1016 } Internal_Format;
1017 1787
1018 extern Eistring the_eistring_zero_init, the_eistring_malloc_zero_init; 1788 extern Eistring the_eistring_zero_init, the_eistring_malloc_zero_init;
1019 1789
1020 #define DECLARE_EISTRING(name) \ 1790 #define DECLARE_EISTRING(name) \
1021 Eistring __ ## name ## __storage__ = the_eistring_zero_init; \ 1791 Eistring __ ## name ## __storage__ = the_eistring_zero_init; \
1144 1914
1145 #define eicpy_lstr(ei, lisp_string) \ 1915 #define eicpy_lstr(ei, lisp_string) \
1146 do { \ 1916 do { \
1147 Lisp_Object ei3 = (lisp_string); \ 1917 Lisp_Object ei3 = (lisp_string); \
1148 EI_ALLOC_AND_COPY (ei, XSTRING_DATA (ei3), XSTRING_LENGTH (ei3), \ 1918 EI_ALLOC_AND_COPY (ei, XSTRING_DATA (ei3), XSTRING_LENGTH (ei3), \
1149 XSTRING_CHAR_LENGTH (ei3)); \ 1919 string_char_length (ei3)); \
1150 } while (0) 1920 } while (0)
1151 1921
1152 #define eicpy_lstr_off(ei, lisp_string, off, charoff, len, charlen) \ 1922 #define eicpy_lstr_off(ei, lisp_string, off, charoff, len, charlen) \
1153 do { \ 1923 do { \
1154 Lisp_Object ei23lstr = (lisp_string); \ 1924 Lisp_Object ei23lstr = (lisp_string); \
1164 eifixup_bytechar (ei23data + ei23off, ei23len, ei23charlen); \ 1934 eifixup_bytechar (ei23data + ei23off, ei23len, ei23charlen); \
1165 \ 1935 \
1166 EI_ALLOC_AND_COPY (ei, ei23data + ei23off, ei23len, ei23charlen); \ 1936 EI_ALLOC_AND_COPY (ei, ei23data + ei23off, ei23len, ei23charlen); \
1167 } while (0) 1937 } while (0)
1168 1938
1169 #define eicpy_raw_fmt(ei, ptr, len, fmt) \ 1939 #define eicpy_raw_fmt(ei, ptr, len, fmt, object) \
1170 do { \ 1940 do { \
1171 const Intbyte *ei12ptr = (ptr); \ 1941 const Intbyte *ei12ptr = (ptr); \
1172 Internal_Format ei12fmt = (fmt); \ 1942 Internal_Format ei12fmt = (fmt); \
1173 int ei12len = (len); \ 1943 int ei12len = (len); \
1174 assert (ei12fmt == FORMAT_DEFAULT); \ 1944 assert (ei12fmt == FORMAT_DEFAULT); \
1175 EI_ALLOC_AND_COPY (ei, ei12ptr, ei12len, \ 1945 EI_ALLOC_AND_COPY (ei, ei12ptr, ei12len, \
1176 bytecount_to_charcount (ei12ptr, ei12len)); \ 1946 bytecount_to_charcount (ei12ptr, ei12len)); \
1177 } while (0) 1947 } while (0)
1178 1948
1179 #define eicpy_raw(ei, ptr, len) eicpy_raw_fmt (ei, ptr, len, FORMAT_DEFAULT) 1949 #define eicpy_raw(ei, ptr, len) \
1180 1950 eicpy_raw_fmt (ei, ptr, len, FORMAT_DEFAULT, Qnil)
1181 #define eicpy_rawz_fmt(ei, ptr, fmt) \ 1951
1182 do { \ 1952 #define eicpy_rawz_fmt(ei, ptr, fmt, object) \
1183 const Intbyte *ei12p1ptr = (ptr); \ 1953 do { \
1184 Internal_Format ei12p1fmt = (fmt); \ 1954 const Intbyte *ei12p1ptr = (ptr); \
1185 assert (ei12p1fmt == FORMAT_DEFAULT); \ 1955 Internal_Format ei12p1fmt = (fmt); \
1186 eicpy_raw_fmt (ei, ei12p1ptr, qxestrlen (ei12p1ptr), fmt); \ 1956 assert (ei12p1fmt == FORMAT_DEFAULT); \
1187 } while (0) 1957 eicpy_raw_fmt (ei, ei12p1ptr, qxestrlen (ei12p1ptr), fmt, object); \
1188 1958 } while (0)
1189 #define eicpy_rawz(ei, ptr) eicpy_rawz_fmt (ei, ptr, FORMAT_DEFAULT) 1959
1960 #define eicpy_rawz(ei, ptr) eicpy_rawz_fmt (ei, ptr, FORMAT_DEFAULT, Qnil)
1190 1961
1191 #define eicpy_ch(ei, ch) \ 1962 #define eicpy_ch(ei, ch) \
1192 do { \ 1963 do { \
1193 Intbyte ei12p2[MAX_EMCHAR_LEN]; \ 1964 Intbyte ei12p2[MAX_EMCHAR_LEN]; \
1194 Bytecount ei12p2len = set_charptr_emchar (ei12p2, ch); \ 1965 Bytecount ei12p2len = set_charptr_emchar (ei12p2, ch); \
1259 \ 2030 \
1260 return make_string ((eistr)->data_ + ei24off, ei24len); \ 2031 return make_string ((eistr)->data_ + ei24off, ei24len); \
1261 } while (0) 2032 } while (0)
1262 2033
1263 #define eicpyout_alloca(eistr, ptrout, lenout) \ 2034 #define eicpyout_alloca(eistr, ptrout, lenout) \
1264 eicpyout_alloca_fmt (eistr, ptrout, lenout, FORMAT_DEFAULT) 2035 eicpyout_alloca_fmt (eistr, ptrout, lenout, FORMAT_DEFAULT, Qnil)
1265 #define eicpyout_malloc(eistr, lenout) \ 2036 #define eicpyout_malloc(eistr, lenout) \
1266 eicpyout_malloc_fmt (eistr, lenout, FORMAT_DEFAULT) 2037 eicpyout_malloc_fmt (eistr, lenout, FORMAT_DEFAULT, Qnil)
1267 Intbyte *eicpyout_malloc_fmt (Eistring *eistr, Bytecount *len_out, 2038 Intbyte *eicpyout_malloc_fmt (Eistring *eistr, Bytecount *len_out,
1268 Internal_Format fmt); 2039 Internal_Format fmt, Lisp_Object object);
1269 #define eicpyout_alloca_fmt(eistr, ptrout, lenout, fmt) \ 2040 #define eicpyout_alloca_fmt(eistr, ptrout, lenout, fmt, object) \
1270 do { \ 2041 do { \
1271 Internal_Format ei23fmt = (fmt); \ 2042 Internal_Format ei23fmt = (fmt); \
1272 Intbyte *ei23ptrout = &(ptrout); \ 2043 Intbyte *ei23ptrout = &(ptrout); \
1273 Bytecount *ei23lenout = &(lenout); \ 2044 Bytecount *ei23lenout = &(lenout); \
1274 \ 2045 \
1447 2218
1448 #define eicat_lstr(ei, lisp_string) \ 2219 #define eicat_lstr(ei, lisp_string) \
1449 do { \ 2220 do { \
1450 Lisp_Object ei17 = (lisp_string); \ 2221 Lisp_Object ei17 = (lisp_string); \
1451 eicat_1 (ei, XSTRING_DATA (ei17), XSTRING_LENGTH (ei17), \ 2222 eicat_1 (ei, XSTRING_DATA (ei17), XSTRING_LENGTH (ei17), \
1452 XSTRING_CHAR_LENGTH (ei17)); \ 2223 string_char_length (ei17)); \
1453 } while (0) 2224 } while (0)
1454 2225
1455 #define eicat_ch(ei, ch) \ 2226 #define eicat_ch(ei, ch) \
1456 do { \ 2227 do { \
1457 Intbyte ei22ch[MAX_EMCHAR_LEN]; \ 2228 Intbyte ei22ch[MAX_EMCHAR_LEN]; \
2021 int __gsnum__ = (num); \ 2792 int __gsnum__ = (num); \
2022 Extbyte * __gserr__ = strerror (__gsnum__); \ 2793 Extbyte * __gserr__ = strerror (__gsnum__); \
2023 \ 2794 \
2024 if (!__gserr__) \ 2795 if (!__gserr__) \
2025 { \ 2796 { \
2026 var = alloca_intbytes (99); \ 2797 var = alloca_intbytes (99); \
2027 qxesprintf (var, "Unknown error %d", __gsnum__); \ 2798 qxesprintf (var, "Unknown error %d", __gsnum__); \
2028 } \ 2799 } \
2029 else \ 2800 else \
2030 EXTERNAL_TO_C_STRING (__gserr__, var, Qstrerror_encoding); \ 2801 EXTERNAL_TO_C_STRING (__gserr__, var, Qstrerror_encoding); \
2031 } while (0) 2802 } while (0)
2032 2803
2033 /************************************************************************/
2034 /* Lisp string representation convenience functions */
2035 /************************************************************************/
2036
2037 /* Because the representation of internally formatted data is subject
2038 to change, it's bad style to do something like
2039
2040 strcmp (XSTRING_DATA (s), "foo")
2041
2042 Instead, use the portable:
2043
2044 intbyte_strcmp (XSTRING_DATA (s), "foo") or
2045 intbyte_memcmp (XSTRING_DATA (s), "foo", 3)
2046
2047 */
2048
2049 /* Like strcmp, except first arg points at internally formatted data,
2050 while the second points at a string of only ASCII chars. */
2051 DECLARE_INLINE_HEADER (
2052 int
2053 intbyte_strcmp (const Intbyte *bp, const char *ascii_string)
2054 )
2055 {
2056 #ifdef MULE
2057 while (1)
2058 {
2059 int diff;
2060 type_checking_assert (BYTE_ASCII_P (*ascii_string));
2061 if ((diff = charptr_emchar (bp) - *(Intbyte *) ascii_string) != 0)
2062 return diff;
2063 if (*ascii_string == '\0')
2064 return 0;
2065 ascii_string++;
2066 INC_CHARPTR (bp);
2067 }
2068 #else
2069 return strcmp ((char *)bp, ascii_string);
2070 #endif
2071 }
2072
2073 /* Like memcmp, except first arg points at internally formatted data,
2074 while the second points at a string of only ASCII chars. */
2075
2076 DECLARE_INLINE_HEADER (
2077 int
2078 intbyte_memcmp (const Intbyte *bp, const char *ascii_string, Bytecount len)
2079 )
2080 {
2081 #ifdef MULE
2082 while (len--)
2083 {
2084 int diff = charptr_emchar (bp) - *(Intbyte *) ascii_string;
2085 type_checking_assert (BYTE_ASCII_P (*ascii_string));
2086 if (diff != 0)
2087 return diff;
2088 ascii_string++;
2089 INC_CHARPTR (bp);
2090 }
2091 return 0;
2092 #else
2093 return memcmp (bp, ascii_string, len);
2094 #endif
2095 }
2096
2097 #endif /* INCLUDED_text_h_ */ 2804 #endif /* INCLUDED_text_h_ */