771
|
1 /* Header file for text manipulation primitives and macros.
|
|
2 Copyright (C) 1985-1995 Free Software Foundation, Inc.
|
|
3 Copyright (C) 1995 Sun Microsystems, Inc.
|
788
|
4 Copyright (C) 2000, 2001, 2002 Ben Wing.
|
771
|
5
|
|
6 This file is part of XEmacs.
|
|
7
|
|
8 XEmacs is free software; you can redistribute it and/or modify it
|
|
9 under the terms of the GNU General Public License as published by the
|
|
10 Free Software Foundation; either version 2, or (at your option) any
|
|
11 later version.
|
|
12
|
|
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
|
|
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
16 for more details.
|
|
17
|
|
18 You should have received a copy of the GNU General Public License
|
|
19 along with XEmacs; see the file COPYING. If not, write to
|
|
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
21 Boston, MA 02111-1307, USA. */
|
|
22
|
|
23 /* Synched up with: FSF 19.30. */
|
|
24
|
|
25 /* Authorship:
|
|
26
|
|
27 Mostly written by Ben Wing, starting around 1995.
|
|
28 Current TO_IN/EXTERNAL_FORMAT macros written by Martin Buchholz,
|
|
29 designed by Ben Wing based on earlier macros by Ben Wing.
|
|
30 Separated out June 18, 2000 from buffer.h into text.h.
|
|
31 */
|
|
32
|
|
33 #ifndef INCLUDED_text_h_
|
|
34 #define INCLUDED_text_h_
|
|
35
|
|
36 #include <wchar.h>
|
|
37
|
|
38 /* ---------------------------------------------------------------------- */
|
|
39 /* Super-basic character properties */
|
|
40 /* ---------------------------------------------------------------------- */
|
|
41
|
|
42 /* These properties define the specifics of how our current encoding fits
|
|
43 in the basic model used for the encoding. Because this model is the same
|
|
44 as is used for UTF-8, all these properties could be defined for it, too.
|
|
45 This would instantly make the rest of this file work with UTF-8 (with
|
|
46 the exception of a few called functions that would need to be redefined).
|
|
47
|
|
48 (UTF-2000 implementers, take note!)
|
|
49 */
|
|
50
|
|
51 /* If you want more than this, you need to include charset.h */
|
|
52
|
|
53 #ifndef MULE
|
|
54
|
826
|
55 #define rep_bytes_by_first_byte(fb) 1
|
|
56 #define byte_ascii_p(byte) 1
|
|
57 #define MAX_EMCHAR_LEN 1
|
771
|
58
|
|
59 #else /* MULE */
|
|
60
|
|
61 /* These are carefully designed to work if BYTE is signed or unsigned. */
|
|
62 /* Note that SPC and DEL are considered ASCII, not control. */
|
|
63
|
826
|
64 #define byte_ascii_p(byte) (((byte) & ~0x7f) == 0)
|
|
65 #define byte_c0_p(byte) (((byte) & ~0x1f) == 0)
|
|
66 #define byte_c1_p(byte) (((byte) & ~0x1f) == 0x80)
|
771
|
67
|
|
68 /* Does BYTE represent the first byte of a character? */
|
|
69
|
826
|
70 #ifdef ERROR_CHECK_TEXT
|
|
71
|
|
72 DECLARE_INLINE_HEADER (
|
|
73 int
|
|
74 intbyte_first_byte_p_1 (int byte, const char *file, int line)
|
|
75 )
|
|
76 {
|
|
77 assert_at_line (byte >= 0 && byte < 256, file, line);
|
|
78 return byte < 0xA0;
|
|
79 }
|
|
80
|
|
81 #define intbyte_first_byte_p(byte) \
|
|
82 intbyte_first_byte_p_1 (byte, __FILE__, __LINE__)
|
|
83
|
|
84 #else
|
|
85
|
|
86 #define intbyte_first_byte_p(byte) ((byte) < 0xA0)
|
|
87
|
|
88 #endif
|
|
89
|
|
90 #ifdef ERROR_CHECK_TEXT
|
771
|
91
|
|
92 /* Does BYTE represent the first byte of a multi-byte character? */
|
|
93
|
826
|
94 DECLARE_INLINE_HEADER (
|
|
95 int
|
|
96 intbyte_leading_byte_p_1 (int byte, const char *file, int line)
|
|
97 )
|
|
98 {
|
|
99 assert_at_line (byte >= 0 && byte < 256, file, line);
|
|
100 return byte_c1_p (byte);
|
|
101 }
|
|
102
|
|
103 #define intbyte_leading_byte_p(byte) \
|
|
104 intbyte_leading_byte_p_1 (byte, __FILE__, __LINE__)
|
|
105
|
|
106 #else
|
|
107
|
|
108 #define intbyte_leading_byte_p(byte) byte_c1_p (byte)
|
|
109
|
|
110 #endif
|
771
|
111
|
|
112 /* Table of number of bytes in the string representation of a character
|
|
113 indexed by the first byte of that representation.
|
|
114
|
|
115 This value can be derived in other ways -- e.g. something like
|
826
|
116 XCHARSET_REP_BYTES (charset_by_leading_byte (first_byte))
|
771
|
117 but it's faster this way. */
|
|
118 extern const Bytecount rep_bytes_by_first_byte[0xA0];
|
|
119
|
|
120 /* Number of bytes in the string representation of a character. */
|
788
|
121
|
800
|
122 #ifdef ERROR_CHECK_TEXT
|
788
|
123
|
826
|
124 DECLARE_INLINE_HEADER (
|
|
125 Bytecount
|
|
126 rep_bytes_by_first_byte_1 (int fb, const char *file, int line)
|
|
127 )
|
771
|
128 {
|
826
|
129 assert_at_line (fb >= 0 && fb < 0xA0, file, line);
|
771
|
130 return rep_bytes_by_first_byte[fb];
|
|
131 }
|
|
132
|
826
|
133 #define rep_bytes_by_first_byte(fb) \
|
|
134 rep_bytes_by_first_byte_1 (fb, __FILE__, __LINE__)
|
788
|
135
|
800
|
136 #else /* ERROR_CHECK_TEXT */
|
788
|
137
|
826
|
138 #define rep_bytes_by_first_byte(fb) (rep_bytes_by_first_byte[fb])
|
788
|
139
|
800
|
140 #endif /* ERROR_CHECK_TEXT */
|
788
|
141
|
826
|
142 /* Is this character represented by more than one byte in a string in the
|
|
143 default format? */
|
|
144
|
|
145 #define emchar_multibyte_p(c) ((c) >= 0x80)
|
|
146
|
|
147 #define emchar_ascii_p(c) (!emchar_multibyte_p (c))
|
|
148
|
|
149 /* Maximum number of bytes per Emacs character when represented as text, in
|
|
150 any format.
|
|
151 */
|
771
|
152
|
|
153 #define MAX_EMCHAR_LEN 4
|
|
154
|
826
|
155 #endif /* not MULE */
|
|
156
|
|
157 /* ---------------- Handling non-default formats ----------------- */
|
|
158
|
|
159 /* We support, at least to some extent, formats other than the default
|
|
160 variable-width format, for speed; all of these alternative formats are
|
|
161 fixed-width. Currently we only handle these non-default formats in
|
|
162 buffers, because access to their text is strictly controlled and thus
|
|
163 the details of the format mostly compartmentalized. The only really
|
|
164 tricky part is the search code -- the regex, Boyer-Moore, and
|
|
165 simple-search algorithms in search.c and regex.c. All other code that
|
|
166 knows directly about the buffer representation is the basic code to
|
|
167 modify or retrieve the buffer text.
|
|
168
|
|
169 Supporting fixed-width formats in Lisp strings is harder, but possible
|
|
170 -- FSF currently does this, for example. In this case, however,
|
|
171 probably only 8-bit-fixed is reasonable for Lisp strings -- getting
|
|
172 non-ASCII-compatible fixed-width formats to work is much, much harder
|
|
173 because a lot of code assumes that strings are ASCII-compatible
|
|
174 (i.e. ASCII + other characters represented exclusively using high-bit
|
|
175 bytes) and a lot of code mixes Lisp strings and non-Lisp strings freely.
|
|
176
|
|
177 The different possible fixed-width formats are 8-bit fixed, 16-bit
|
|
178 fixed, and 32-bit fixed. The latter can represent all possible
|
|
179 characters, but at a substantial memory penalty. The other two can
|
|
180 represent only a subset of the possible characters. How these subsets
|
|
181 are defined can be simple or very tricky.
|
|
182
|
|
183 Currently we support only the default format and the 8-bit fixed format,
|
|
184 and in the latter, we only allow these to be the first 256 characters in
|
|
185 an Emchar (ASCII and Latin 1).
|
|
186
|
|
187 One reasonable approach for 8-bit fixed is to allow the upper half to
|
|
188 represent any 1-byte charset, which is specified on a per-buffer basis.
|
|
189 This should work fairly well in practice since most documents are in
|
|
190 only one foreign language (possibly with some English mixed in). I
|
|
191 think FSF does something like this; or at least, they have something
|
|
192 called nonascii-translation-table and use it when converting from
|
|
193 8-bit-fixed text ("unibyte text") to default text ("multibyte text").
|
|
194 With 16-bit fixed, you could do something like assign chunks of the 64K
|
|
195 worth of characters to charsets as they're encountered in documents.
|
|
196 This should work well with most Asian documents.
|
|
197
|
|
198 If/when we switch to using Unicode internally, we might have formats more
|
|
199 like this:
|
|
200
|
|
201 -- UTF-8 or some extension as the default format. Perl uses an
|
|
202 extension that handles 64-bit chars and requires as much as 13 bytes per
|
|
203 char, vs. the standard of 31-bit chars and 6 bytes max. UTF-8 has the
|
|
204 same basic properties as our own variable-width format (see text.c,
|
|
205 Internal String Encoding) and so most code would not need to be changed.
|
|
206
|
|
207 -- UTF-16 as a "pseudo-fixed" format (i.e. 16-bit fixed plus surrogates
|
|
208 for representing characters not in the BMP, aka >= 65536). The vast
|
|
209 majority of documents will have no surrogates in them so byte/char
|
|
210 conversion will be very fast.
|
|
211
|
|
212 -- an 8-bit fixed format, like currently.
|
|
213
|
|
214 -- possibly, UCS-4 as a 32-bit fixed format.
|
|
215
|
|
216 The fixed-width formats essentially treat the buffer as an array of
|
|
217 8-bit, 16-bit or 32-bit integers. This means that how they are stored
|
|
218 in memory (in particular, big-endian or little-endian) depends on the
|
|
219 native format of the machine's processor. It also means we have to
|
|
220 worry a bit about alignment (basically, we just need to keep the gap an
|
|
221 integral size of the character size, and get things aligned properly
|
|
222 when converting the buffer between formats).
|
|
223 */
|
|
224 typedef enum internal_format
|
|
225 {
|
|
226 FORMAT_DEFAULT,
|
|
227 FORMAT_8_BIT_FIXED,
|
|
228 FORMAT_16_BIT_FIXED, /* not implemented */
|
|
229 FORMAT_32_BIT_FIXED /* not implemented */
|
|
230 } Internal_Format;
|
|
231
|
|
232 #ifdef MULE
|
|
233 /* "OBJECT" below will usually be a buffer, string, or nil. This needs to
|
|
234 be passed in because the interpretation of 8-bit-fixed and 16-bit-fixed
|
|
235 values may depend on the buffer, e.g. depending on what language the
|
|
236 text in the buffer is in. */
|
|
237
|
|
238 /* True if Emchar CH can be represented in 8-bit-fixed format. */
|
|
239 #define emchar_8_bit_fixed_p(ch, object) (((ch) & ~0xff) == 0)
|
|
240 /* Convert Emchar CH to an 8-bit int, as will be stored in the buffer. */
|
|
241 #define emchar_to_raw_8_bit_fixed(ch, object) ((Intbyte) (ch))
|
|
242 /* Convert the other way. */
|
|
243 #define raw_8_bit_fixed_to_emchar(ch, object) ((Emchar) (ch))
|
|
244
|
|
245 #define emchar_16_bit_fixed_p(ch, object) (((ch) & ~0xffff) == 0)
|
|
246 /* Convert Emchar CH to a 16-bit int, as will be stored in the buffer. */
|
|
247 #define emchar_to_raw_16_bit_fixed(ch, object) ((UINT_16_BIT) (ch))
|
|
248 /* Convert the other way. */
|
|
249 #define raw_16_bit_fixed_to_emchar(ch, object) ((Emchar) (ch))
|
|
250
|
|
251 /* Convert Emchar CH to a 32-bit int, as will be stored in the buffer. */
|
|
252 #define emchar_to_raw_32_bit_fixed(ch, object) ((UINT_32_BIT) (ch))
|
|
253 /* Convert the other way. */
|
|
254 #define raw_32_bit_fixed_to_emchar(ch, object) ((Emchar) (ch))
|
|
255
|
|
256 /* Return the "raw value" of a character as stored in the buffer. In the
|
|
257 default format, this is just the same as the character. In fixed-width
|
|
258 formats, this is the actual value in the buffer, which will be limited
|
|
259 to the range as established by the format. This is used when searching
|
|
260 for a character in a buffer -- it's faster to convert the character to
|
|
261 the raw value and look for that, than repeatedly convert each raw value
|
|
262 in the buffer into a character. */
|
|
263
|
|
264 DECLARE_INLINE_HEADER (
|
|
265 Raw_Emchar
|
|
266 emchar_to_raw (Emchar ch, Internal_Format fmt, Lisp_Object object)
|
|
267 )
|
|
268 {
|
|
269 switch (fmt)
|
|
270 {
|
|
271 case FORMAT_DEFAULT:
|
|
272 return (Raw_Emchar) ch;
|
|
273 case FORMAT_16_BIT_FIXED:
|
|
274 text_checking_assert (emchar_16_bit_fixed_p (ch, object));
|
|
275 return (Raw_Emchar) emchar_to_raw_16_bit_fixed (ch, object);
|
|
276 case FORMAT_32_BIT_FIXED:
|
|
277 return (Raw_Emchar) emchar_to_raw_32_bit_fixed (ch, object);
|
|
278 default:
|
|
279 text_checking_assert (fmt == FORMAT_8_BIT_FIXED);
|
|
280 text_checking_assert (emchar_8_bit_fixed_p (ch, object));
|
|
281 return (Raw_Emchar) emchar_to_raw_8_bit_fixed (ch, object);
|
|
282 }
|
|
283 }
|
|
284
|
|
285 /* Return whether CH is representable in the given format in the given
|
|
286 object. */
|
|
287
|
|
288 DECLARE_INLINE_HEADER (
|
|
289 int
|
|
290 emchar_fits_in_format (Emchar ch, Internal_Format fmt, Lisp_Object object)
|
|
291 )
|
|
292 {
|
|
293 switch (fmt)
|
|
294 {
|
|
295 case FORMAT_DEFAULT:
|
|
296 return 1;
|
|
297 case FORMAT_16_BIT_FIXED:
|
|
298 return emchar_16_bit_fixed_p (ch, object);
|
|
299 case FORMAT_32_BIT_FIXED:
|
|
300 return 1;
|
|
301 default:
|
|
302 text_checking_assert (fmt == FORMAT_8_BIT_FIXED);
|
|
303 return emchar_8_bit_fixed_p (ch, object);
|
|
304 }
|
|
305 }
|
|
306
|
|
307 /* Assuming the formats are the same, return whether the two objects
|
|
308 represent text in exactly the same way. */
|
|
309
|
|
310 DECLARE_INLINE_HEADER (
|
|
311 int
|
|
312 objects_have_same_internal_representation (Lisp_Object srcobj,
|
|
313 Lisp_Object dstobj)
|
|
314 )
|
|
315 {
|
|
316 /* &&#### implement this properly when we allow per-object format
|
|
317 differences */
|
|
318 return 1;
|
|
319 }
|
|
320
|
|
321 #else
|
|
322
|
|
323 #define emchar_to_raw(ch, fmt, object) ((Raw_Emchar) (ch))
|
|
324 #define emchar_fits_in_format(ch, fmt, object) 1
|
|
325 #define objects_have_same_internal_representation(srcobj, dstobj) 1
|
|
326
|
771
|
327 #endif /* MULE */
|
|
328
|
|
329 int dfc_coding_system_is_unicode (Lisp_Object coding_system);
|
|
330
|
|
331 DECLARE_INLINE_HEADER (
|
|
332 Bytecount dfc_external_data_len (const void *ptr, Lisp_Object codesys)
|
|
333 )
|
|
334 {
|
|
335 if (dfc_coding_system_is_unicode (codesys))
|
|
336 return sizeof (wchar_t) * wcslen ((wchar_t *) ptr);
|
|
337 else
|
|
338 return strlen ((char *) ptr);
|
|
339 }
|
|
340
|
|
341
|
|
342 /************************************************************************/
|
|
343 /* */
|
|
344 /* working with raw internal-format data */
|
|
345 /* */
|
|
346 /************************************************************************/
|
|
347
|
826
|
348 /*
|
|
349 Use the following functions/macros on contiguous text in any of the
|
|
350 internal formats. Those that take a format arg work on all internal
|
|
351 formats; the others work only on the default (variable-width under Mule)
|
|
352 format. If the text you're operating on is known to come from a buffer,
|
|
353 use the buffer-level functions in buffer.h, which automatically know the
|
|
354 correct format and handle the gap.
|
|
355
|
|
356 Some terminology:
|
|
357
|
|
358 "charptr" appearing in the macros means "internal-format text" -- type
|
|
359 `Intbyte *'. Operations on such pointers themselves, rather than on the
|
|
360 text being pointed to, have "charptr" instead of "charptr" in the macro
|
|
361 name. "emchar" in the macro names means an Emchar -- the representation
|
|
362 of a character as a single integer rather than a series of bytes, as part
|
|
363 of "charptr". Many of the macros below are for converting between the
|
|
364 two representations of characters.
|
|
365
|
|
366 Note also that we try to consistently distinguish between an "Emchar" and
|
|
367 a Lisp character. Stuff working with Lisp characters often just says
|
|
368 "char", so we consistently use "Emchar" when that's what we're working
|
|
369 with. */
|
|
370
|
|
371 /* The three golden rules of macros:
|
771
|
372
|
|
373 1) Anything that's an lvalue can be evaluated more than once.
|
826
|
374
|
|
375 2) Macros where anything else can be evaluated more than once should
|
|
376 have the word "unsafe" in their name (exceptions may be made for
|
|
377 large sets of macros that evaluate arguments of certain types more
|
|
378 than once, e.g. struct buffer * arguments, when clearly indicated in
|
|
379 the macro documentation). These macros are generally meant to be
|
|
380 called only by other macros that have already stored the calling
|
|
381 values in temporary variables.
|
|
382
|
|
383 3) Nothing else can be evaluated more than once. Use inline
|
771
|
384 functions, if necessary, to prevent multiple evaluation.
|
826
|
385
|
|
386 NOTE: The functions and macros below are given full prototypes in their
|
|
387 docs, even when the implementation is a macro. In such cases, passing
|
|
388 an argument of a type other than expected will produce undefined
|
|
389 results. Also, given that macros can do things functions can't (in
|
|
390 particular, directly modify arguments as if they were passed by
|
|
391 reference), the declaration syntax has been extended to include the
|
|
392 call-by-reference syntax from C++, where an & after a type indicates
|
|
393 that the argument is an lvalue and is passed by reference, i.e. the
|
|
394 function can modify its value. (This is equivalent in C to passing a
|
|
395 pointer to the argument, but without the need to explicitly worry about
|
|
396 pointers.)
|
|
397
|
|
398 When to capitalize macros:
|
|
399
|
|
400 -- Capitalize macros doing stuff obviously impossible with (C)
|
|
401 functions, e.g. directly modifying arguments as if they were passed by
|
|
402 reference.
|
|
403
|
|
404 -- Capitalize macros that evaluate *any* argument more than once regardless
|
|
405 of whether that's "allowed" (e.g. buffer arguments).
|
|
406
|
|
407 -- Capitalize macros that directly access a field in a Lisp_Object or
|
|
408 its equivalent underlying structure. In such cases, access through the
|
|
409 Lisp_Object precedes the macro with an X, and access through the underlying
|
|
410 structure doesn't.
|
|
411
|
|
412 -- Capitalize certain other basic macros relating to Lisp_Objects; e.g.
|
|
413 FRAMEP, CHECK_FRAME, etc.
|
|
414
|
|
415 -- Try to avoid capitalizing any other macros.
|
771
|
416 */
|
|
417
|
|
418 /* ---------------------------------------------------------------------- */
|
826
|
419 /* Working with charptr's (pointers to internally-formatted text) */
|
771
|
420 /* ---------------------------------------------------------------------- */
|
|
421
|
826
|
422 /* Given an charptr, does it point to the beginning of a character?
|
|
423 */
|
|
424
|
771
|
425 #ifdef MULE
|
826
|
426 # define valid_charptr_p(ptr) intbyte_first_byte_p (* (ptr))
|
771
|
427 #else
|
826
|
428 # define valid_charptr_p(ptr) 1
|
771
|
429 #endif
|
|
430
|
826
|
431 /* If error-checking is enabled, assert that the given charptr points to
|
|
432 the beginning of a character. Otherwise, do nothing.
|
|
433 */
|
|
434
|
|
435 #define assert_valid_charptr(ptr) text_checking_assert (valid_charptr_p (ptr))
|
|
436
|
|
437 /* Given a charptr (assumed to point at the beginning of a character),
|
|
438 modify that pointer so it points to the beginning of the next character.
|
|
439
|
|
440 Note that INC_CHARPTR() and DEC_CHARPTR() have to be written in
|
771
|
441 completely separate ways. INC_CHARPTR() cannot use the DEC_CHARPTR()
|
|
442 trick of looking for a valid first byte because it might run off
|
|
443 the end of the string. DEC_CHARPTR() can't use the INC_CHARPTR()
|
|
444 method because it doesn't have easy access to the first byte of
|
|
445 the character it's moving over. */
|
|
446
|
826
|
447 #define INC_CHARPTR(ptr) do { \
|
|
448 assert_valid_charptr (ptr); \
|
|
449 (ptr) += rep_bytes_by_first_byte (* (ptr)); \
|
|
450 } while (0)
|
|
451
|
|
452 #define INC_CHARPTR_FMT(ptr, fmt) \
|
|
453 do { \
|
|
454 Internal_Format __icf_fmt = (fmt); \
|
|
455 switch (__icf_fmt) \
|
|
456 { \
|
|
457 case FORMAT_DEFAULT: \
|
|
458 INC_CHARPTR (ptr); \
|
|
459 break; \
|
|
460 case FORMAT_16_BIT_FIXED: \
|
|
461 text_checking_assert (ptr == ALIGN_PTR (ptr, UINT_16_BIT)); \
|
|
462 (ptr) += 2; \
|
|
463 break; \
|
|
464 case FORMAT_32_BIT_FIXED: \
|
|
465 text_checking_assert (ptr == ALIGN_PTR (ptr, UINT_32_BIT)); \
|
|
466 (ptr) += 4; \
|
|
467 break; \
|
|
468 default: \
|
|
469 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); \
|
|
470 (ptr)++; \
|
|
471 break; \
|
|
472 } \
|
|
473 } while (0)
|
|
474
|
|
475 /* Given a charptr (assumed to point at the beginning of a character or at
|
|
476 the very end of the text), modify that pointer so it points to the
|
|
477 beginning of the previous character.
|
|
478 */
|
771
|
479
|
800
|
480 #ifdef ERROR_CHECK_TEXT
|
826
|
481 /* We use a separate definition to avoid warnings about unused dc_ptr1 */
|
|
482 #define DEC_CHARPTR(ptr) do { \
|
|
483 const Intbyte *dc_ptr1 = (ptr); \
|
|
484 do { \
|
|
485 (ptr)--; \
|
|
486 } while (!valid_charptr_p (ptr)); \
|
|
487 text_checking_assert (dc_ptr1 - (ptr) == rep_bytes_by_first_byte (*(ptr))); \
|
771
|
488 } while (0)
|
826
|
489 #else
|
|
490 #define DEC_CHARPTR(ptr) do { \
|
|
491 do { \
|
|
492 (ptr)--; \
|
|
493 } while (!valid_charptr_p (ptr)); \
|
771
|
494 } while (0)
|
826
|
495 #endif /* ERROR_CHECK_TEXT */
|
|
496
|
|
497 #define DEC_CHARPTR_FMT(ptr, fmt) \
|
|
498 do { \
|
|
499 Internal_Format __icf_fmt = (fmt); \
|
|
500 switch (__icf_fmt) \
|
|
501 { \
|
|
502 case FORMAT_DEFAULT: \
|
|
503 DEC_CHARPTR (ptr); \
|
|
504 break; \
|
|
505 case FORMAT_16_BIT_FIXED: \
|
|
506 text_checking_assert (ptr == ALIGN_PTR (ptr, UINT_16_BIT)); \
|
|
507 (ptr) -= 2; \
|
|
508 break; \
|
|
509 case FORMAT_32_BIT_FIXED: \
|
|
510 text_checking_assert (ptr == ALIGN_PTR (ptr, UINT_32_BIT)); \
|
|
511 (ptr) -= 4; \
|
|
512 break; \
|
|
513 default: \
|
|
514 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); \
|
|
515 (ptr)--; \
|
|
516 break; \
|
|
517 } \
|
771
|
518 } while (0)
|
|
519
|
|
520 #ifdef MULE
|
|
521
|
826
|
522 /* Make sure that PTR is pointing to the beginning of a character. If not,
|
|
523 back up until this is the case. Note that there are not too many places
|
|
524 where it is legitimate to do this sort of thing. It's an error if
|
|
525 you're passed an "invalid" char * pointer. NOTE: PTR *must* be pointing
|
|
526 to a valid part of the string (i.e. not the very end, unless the string
|
|
527 is zero-terminated or something) in order for this function to not cause
|
|
528 crashes.
|
|
529 */
|
|
530
|
771
|
531 /* Note that this reads the byte at *PTR! */
|
|
532
|
|
533 #define VALIDATE_CHARPTR_BACKWARD(ptr) do { \
|
826
|
534 while (!valid_charptr_p (ptr)) ptr--; \
|
771
|
535 } while (0)
|
|
536
|
826
|
537 /* Make sure that PTR is pointing to the beginning of a character. If not,
|
|
538 move forward until this is the case. Note that there are not too many
|
|
539 places where it is legitimate to do this sort of thing. It's an error
|
|
540 if you're passed an "invalid" char * pointer.
|
|
541 */
|
771
|
542
|
|
543 /* This needs to be trickier than VALIDATE_CHARPTR_BACKWARD() to avoid the
|
|
544 possibility of running off the end of the string. */
|
|
545
|
|
546 #define VALIDATE_CHARPTR_FORWARD(ptr) do { \
|
|
547 Intbyte *vcf_ptr = (ptr); \
|
|
548 VALIDATE_CHARPTR_BACKWARD (vcf_ptr); \
|
|
549 if (vcf_ptr != (ptr)) \
|
|
550 { \
|
|
551 (ptr) = vcf_ptr; \
|
|
552 INC_CHARPTR (ptr); \
|
|
553 } \
|
|
554 } while (0)
|
|
555
|
|
556 #else /* not MULE */
|
|
557 #define VALIDATE_CHARPTR_BACKWARD(ptr)
|
|
558 #define VALIDATE_CHARPTR_FORWARD(ptr)
|
826
|
559 #endif /* not MULE */
|
|
560
|
|
561 #ifdef MULE
|
|
562
|
|
563 /* Given a Intbyte string at PTR of size N, possibly with a partial
|
|
564 character at the end, return the size of the longest substring of
|
|
565 complete characters. Does not assume that the byte at *(PTR + N) is
|
|
566 readable. Note that there are not too many places where it is
|
|
567 legitimate to do this sort of thing. It's an error if you're passed an
|
|
568 "invalid" offset. */
|
|
569
|
|
570 DECLARE_INLINE_HEADER (
|
|
571 Bytecount
|
|
572 validate_intbyte_string_backward (const Intbyte *ptr, Bytecount n)
|
|
573 )
|
|
574 {
|
|
575 const Intbyte *ptr2;
|
|
576
|
|
577 if (n == 0)
|
|
578 return n;
|
|
579 ptr2 = ptr + n - 1;
|
|
580 VALIDATE_CHARPTR_BACKWARD (ptr2);
|
|
581 if (ptr2 + rep_bytes_by_first_byte (*ptr2) != ptr + n)
|
|
582 return ptr2 - ptr;
|
|
583 return n;
|
|
584 }
|
|
585
|
|
586 #else
|
|
587
|
771
|
588 #define validate_intbyte_string_backward(ptr, n) (n)
|
826
|
589
|
|
590 #endif /* MULE */
|
771
|
591
|
|
592 /* -------------------------------------------------------------- */
|
826
|
593 /* Working with the length (in bytes and characters) of a */
|
|
594 /* section of internally-formatted text */
|
771
|
595 /* -------------------------------------------------------------- */
|
|
596
|
826
|
597 #ifdef MULE
|
|
598
|
|
599 Charcount bytecount_to_charcount_fun (const Intbyte *ptr, Bytecount len);
|
|
600 Bytecount charcount_to_bytecount_fun (const Intbyte *ptr, Charcount len);
|
|
601
|
|
602 /* Given a pointer to a text string and a length in bytes, return
|
|
603 the equivalent length in characters. */
|
|
604
|
|
605 DECLARE_INLINE_HEADER (
|
|
606 Charcount
|
|
607 bytecount_to_charcount (const Intbyte *ptr, Bytecount len)
|
|
608 )
|
|
609 {
|
|
610 if (len < 20) /* Just a random guess, but it should be more or less correct.
|
|
611 If number of bytes is small, just do a simple loop,
|
|
612 which should be more efficient. */
|
|
613 {
|
|
614 Charcount count = 0;
|
|
615 const Intbyte *end = ptr + len;
|
|
616 while (ptr < end)
|
|
617 {
|
|
618 INC_CHARPTR (ptr);
|
|
619 count++;
|
|
620 }
|
|
621 /* Bomb out if the specified substring ends in the middle
|
|
622 of a character. Note that we might have already gotten
|
|
623 a core dump above from an invalid reference, but at least
|
|
624 we will get no farther than here.
|
|
625
|
|
626 This also catches len < 0. */
|
|
627 text_checking_assert (ptr == end);
|
|
628
|
|
629 return count;
|
|
630 }
|
|
631 else
|
|
632 return bytecount_to_charcount_fun (ptr, len);
|
|
633 }
|
|
634
|
|
635 /* Given a pointer to a text string and a length in characters, return the
|
|
636 equivalent length in bytes.
|
|
637 */
|
|
638
|
|
639 DECLARE_INLINE_HEADER (
|
|
640 Bytecount
|
|
641 charcount_to_bytecount (const Intbyte *ptr, Charcount len)
|
|
642 )
|
|
643 {
|
|
644 text_checking_assert (len >= 0);
|
|
645 if (len < 20) /* See above */
|
|
646 {
|
|
647 const Intbyte *newptr = ptr;
|
|
648 while (len > 0)
|
|
649 {
|
|
650 INC_CHARPTR (newptr);
|
|
651 len--;
|
|
652 }
|
|
653 return newptr - ptr;
|
|
654 }
|
|
655 else
|
|
656 return charcount_to_bytecount_fun (ptr, len);
|
|
657 }
|
|
658
|
|
659 /* Given a pointer to a text string in the specified format and a length in
|
|
660 bytes, return the equivalent length in characters.
|
|
661 */
|
|
662
|
|
663 DECLARE_INLINE_HEADER (
|
|
664 Charcount
|
|
665 bytecount_to_charcount_fmt (const Intbyte *ptr, Bytecount len,
|
|
666 Internal_Format fmt)
|
|
667 )
|
|
668 {
|
|
669 switch (fmt)
|
|
670 {
|
|
671 case FORMAT_DEFAULT:
|
|
672 return bytecount_to_charcount (ptr, len);
|
|
673 case FORMAT_16_BIT_FIXED:
|
|
674 text_checking_assert (ptr == ALIGN_PTR (ptr, UINT_16_BIT));
|
|
675 return (Charcount) (len << 1);
|
|
676 case FORMAT_32_BIT_FIXED:
|
|
677 text_checking_assert (ptr == ALIGN_PTR (ptr, UINT_32_BIT));
|
|
678 return (Charcount) (len << 2);
|
|
679 default:
|
|
680 text_checking_assert (fmt == FORMAT_8_BIT_FIXED);
|
|
681 return (Charcount) len;
|
|
682 }
|
|
683 }
|
|
684
|
|
685 /* Given a pointer to a text string in the specified format and a length in
|
|
686 characters, return the equivalent length in bytes.
|
|
687 */
|
|
688
|
|
689 DECLARE_INLINE_HEADER (
|
|
690 Bytecount
|
|
691 charcount_to_bytecount_fmt (const Intbyte *ptr, Charcount len,
|
|
692 Internal_Format fmt)
|
|
693 )
|
|
694 {
|
|
695 switch (fmt)
|
|
696 {
|
|
697 case FORMAT_DEFAULT:
|
|
698 return charcount_to_bytecount (ptr, len);
|
|
699 case FORMAT_16_BIT_FIXED:
|
|
700 text_checking_assert (ptr == ALIGN_PTR (ptr, UINT_16_BIT));
|
|
701 text_checking_assert (!(len & 1));
|
|
702 return (Bytecount) (len >> 1);
|
|
703 case FORMAT_32_BIT_FIXED:
|
|
704 text_checking_assert (!(len & 3));
|
|
705 text_checking_assert (ptr == ALIGN_PTR (ptr, UINT_32_BIT));
|
|
706 return (Bytecount) (len >> 2);
|
|
707 default:
|
|
708 text_checking_assert (fmt == FORMAT_8_BIT_FIXED);
|
|
709 return (Bytecount) len;
|
|
710 }
|
|
711 }
|
|
712
|
|
713 #else
|
|
714
|
|
715 #define bytecount_to_charcount(ptr, len) ((Charcount) (len))
|
|
716 #define bytecount_to_charcount_fmt(ptr, len, fmt) ((Charcount) (len))
|
|
717 #define charcount_to_bytecount(ptr, len) ((Bytecount) (len))
|
|
718 #define charcount_to_bytecount_fmt(ptr, len, fmt) ((Bytecount) (len))
|
|
719
|
|
720 #endif /* MULE */
|
|
721
|
|
722 /* Return the length of the first character at PTR. Equivalent to
|
|
723 charcount_to_bytecount (ptr, 1).
|
|
724
|
|
725 [Since charcount_to_bytecount() is Written as inline, a smart compiler
|
|
726 should really optimize charcount_to_bytecount (ptr, 1) to the same as
|
|
727 the following, with no error checking. But since this idiom occurs so
|
|
728 often, we'll be helpful and define a special macro for it.]
|
|
729 */
|
|
730
|
|
731 #define charptr_emchar_len(ptr) rep_bytes_by_first_byte (*(ptr))
|
|
732
|
|
733 /* Return the length of the first character at PTR, which is in the
|
|
734 specified internal format. Equivalent to charcount_to_bytecount_fmt
|
|
735 (ptr, 1, fmt).
|
|
736 */
|
|
737
|
|
738 DECLARE_INLINE_HEADER (
|
|
739 Bytecount
|
|
740 charptr_emchar_len_fmt (const Intbyte *ptr, Internal_Format fmt)
|
|
741 )
|
|
742 {
|
|
743 switch (fmt)
|
|
744 {
|
|
745 case FORMAT_DEFAULT:
|
|
746 return charptr_emchar_len (ptr);
|
|
747 case FORMAT_16_BIT_FIXED:
|
|
748 text_checking_assert (ptr == ALIGN_PTR (ptr, UINT_16_BIT));
|
|
749 return 2;
|
|
750 case FORMAT_32_BIT_FIXED:
|
|
751 text_checking_assert (ptr == ALIGN_PTR (ptr, UINT_32_BIT));
|
|
752 return 4;
|
|
753 default:
|
|
754 text_checking_assert (fmt == FORMAT_8_BIT_FIXED);
|
|
755 return 1;
|
|
756 }
|
|
757 }
|
|
758
|
|
759 /* Return a pointer to the beginning of the character offset N (in
|
|
760 characters) from PTR.
|
|
761 */
|
|
762
|
|
763 DECLARE_INLINE_HEADER (
|
|
764 const Intbyte *
|
771
|
765 charptr_n_addr (const Intbyte *ptr, Charcount offset)
|
826
|
766 )
|
771
|
767 {
|
|
768 return ptr + charcount_to_bytecount (ptr, offset);
|
|
769 }
|
|
770
|
826
|
771 /* Given a charptr and an offset into the text pointed to by the charptr,
|
|
772 modify the offset so it points to the beginning of the next character.
|
|
773 */
|
|
774
|
|
775 #define INC_BYTECOUNT(ptr, pos) do { \
|
|
776 assert_valid_charptr (ptr); \
|
|
777 (pos += rep_bytes_by_first_byte (* ((ptr) + (pos)))); \
|
|
778 } while (0)
|
|
779
|
771
|
780 /* -------------------------------------------------------------------- */
|
826
|
781 /* Retrieving or changing the character pointed to by a charptr */
|
771
|
782 /* -------------------------------------------------------------------- */
|
|
783
|
|
784 #define simple_charptr_emchar(ptr) ((Emchar) (ptr)[0])
|
814
|
785 #define simple_set_charptr_emchar(ptr, x) \
|
|
786 ((ptr)[0] = (Intbyte) (x), (Bytecount) 1)
|
826
|
787 #define simple_charptr_copy_emchar(src, dst) \
|
814
|
788 ((dst)[0] = *(src), (Bytecount) 1)
|
771
|
789
|
|
790 #ifdef MULE
|
|
791
|
|
792 Emchar non_ascii_charptr_emchar (const Intbyte *ptr);
|
|
793 Bytecount non_ascii_set_charptr_emchar (Intbyte *ptr, Emchar c);
|
826
|
794 Bytecount non_ascii_charptr_copy_emchar (const Intbyte *src, Intbyte *dst);
|
|
795
|
|
796 /* Retrieve the character pointed to by PTR as an Emchar. */
|
|
797
|
|
798 DECLARE_INLINE_HEADER (
|
|
799 Emchar
|
771
|
800 charptr_emchar (const Intbyte *ptr)
|
826
|
801 )
|
771
|
802 {
|
826
|
803 return byte_ascii_p (*ptr) ?
|
771
|
804 simple_charptr_emchar (ptr) :
|
|
805 non_ascii_charptr_emchar (ptr);
|
|
806 }
|
|
807
|
826
|
808 /* Retrieve the character pointed to by PTR (a pointer to text in the
|
|
809 format FMT, coming from OBJECT [a buffer, string?, or nil]) as an
|
|
810 Emchar.
|
|
811
|
|
812 Note: For these and other *_fmt() functions, if you pass in a constant
|
|
813 FMT, the switch will be optimized out of existence. Therefore, there is
|
|
814 no need to create separate versions for the various formats for
|
|
815 "efficiency reasons". In fact, we don't really need charptr_emchar()
|
|
816 and such written separately, but they are used often so it's simpler
|
|
817 that way. */
|
|
818
|
|
819 DECLARE_INLINE_HEADER (
|
|
820 Emchar
|
|
821 charptr_emchar_fmt (const Intbyte *ptr, Internal_Format fmt,
|
|
822 Lisp_Object object)
|
|
823 )
|
|
824 {
|
|
825 switch (fmt)
|
|
826 {
|
|
827 case FORMAT_DEFAULT:
|
|
828 return charptr_emchar (ptr);
|
|
829 case FORMAT_16_BIT_FIXED:
|
|
830 text_checking_assert (ptr == ALIGN_PTR (ptr, UINT_16_BIT));
|
|
831 return raw_16_bit_fixed_to_emchar (* (UINT_16_BIT *) ptr, object);
|
|
832 case FORMAT_32_BIT_FIXED:
|
|
833 text_checking_assert (ptr == ALIGN_PTR (ptr, UINT_32_BIT));
|
|
834 return raw_32_bit_fixed_to_emchar (* (UINT_32_BIT *) ptr, object);
|
|
835 default:
|
|
836 text_checking_assert (fmt == FORMAT_8_BIT_FIXED);
|
|
837 return raw_8_bit_fixed_to_emchar (*ptr, object);
|
|
838 }
|
|
839 }
|
|
840
|
|
841 /* Return the character at PTR (which is in format FMT), suitable for
|
|
842 comparison with an ASCII character. This guarantees that if the
|
|
843 character at PTR is ASCII (range 0 - 127), that character will be
|
|
844 returned; otherwise, some character outside of the ASCII range will be
|
|
845 returned, but not necessarily the character actually at PTR. This will
|
|
846 be faster than charptr_emchar_fmt() for some formats -- in particular,
|
|
847 FORMAT_DEFAULT. */
|
|
848
|
|
849 DECLARE_INLINE_HEADER (
|
|
850 Emchar
|
|
851 charptr_emchar_ascii_fmt (const Intbyte *ptr, Internal_Format fmt,
|
|
852 Lisp_Object object)
|
|
853 )
|
|
854 {
|
|
855 switch (fmt)
|
|
856 {
|
|
857 case FORMAT_DEFAULT:
|
|
858 return (Emchar) *ptr;
|
|
859 case FORMAT_16_BIT_FIXED:
|
|
860 text_checking_assert (ptr == ALIGN_PTR (ptr, UINT_16_BIT));
|
|
861 return raw_16_bit_fixed_to_emchar (* (UINT_16_BIT *) ptr, object);
|
|
862 case FORMAT_32_BIT_FIXED:
|
|
863 text_checking_assert (ptr == ALIGN_PTR (ptr, UINT_32_BIT));
|
|
864 return raw_32_bit_fixed_to_emchar (* (UINT_32_BIT *) ptr, object);
|
|
865 default:
|
|
866 text_checking_assert (fmt == FORMAT_8_BIT_FIXED);
|
|
867 return raw_8_bit_fixed_to_emchar (*ptr, object);
|
|
868 }
|
|
869 }
|
|
870
|
|
871 /* Return the "raw value" of the character at PTR, in format FMT. This is
|
|
872 useful when searching for a character; convert the character using
|
|
873 emchar_to_raw(). */
|
|
874
|
|
875 DECLARE_INLINE_HEADER (
|
|
876 Raw_Emchar
|
|
877 charptr_emchar_raw_fmt (const Intbyte *ptr, Internal_Format fmt)
|
|
878 )
|
|
879 {
|
|
880 switch (fmt)
|
|
881 {
|
|
882 case FORMAT_DEFAULT:
|
|
883 return (Raw_Emchar) charptr_emchar (ptr);
|
|
884 case FORMAT_16_BIT_FIXED:
|
|
885 text_checking_assert (ptr == ALIGN_PTR (ptr, UINT_16_BIT));
|
|
886 return (Raw_Emchar) (* (UINT_16_BIT *) ptr);
|
|
887 case FORMAT_32_BIT_FIXED:
|
|
888 text_checking_assert (ptr == ALIGN_PTR (ptr, UINT_32_BIT));
|
|
889 return (Raw_Emchar) (* (UINT_32_BIT *) ptr);
|
|
890 default:
|
|
891 text_checking_assert (fmt == FORMAT_8_BIT_FIXED);
|
|
892 return (Raw_Emchar) (*ptr);
|
|
893 }
|
|
894 }
|
|
895
|
|
896 /* Store the character CH (an Emchar) as internally-formatted text starting
|
|
897 at PTR. Return the number of bytes stored.
|
|
898 */
|
|
899
|
|
900 DECLARE_INLINE_HEADER (
|
|
901 Bytecount
|
771
|
902 set_charptr_emchar (Intbyte *ptr, Emchar x)
|
826
|
903 )
|
771
|
904 {
|
826
|
905 return !emchar_multibyte_p (x) ?
|
771
|
906 simple_set_charptr_emchar (ptr, x) :
|
|
907 non_ascii_set_charptr_emchar (ptr, x);
|
|
908 }
|
|
909
|
826
|
910 /* Store the character CH (an Emchar) as internally-formatted text of
|
|
911 format FMT starting at PTR, which comes from OBJECT. Return the number
|
|
912 of bytes stored.
|
|
913 */
|
|
914
|
|
915 DECLARE_INLINE_HEADER (
|
|
916 Bytecount
|
|
917 set_charptr_emchar_fmt (Intbyte *ptr, Emchar x, Internal_Format fmt,
|
|
918 Lisp_Object object)
|
|
919 )
|
771
|
920 {
|
826
|
921 switch (fmt)
|
|
922 {
|
|
923 case FORMAT_DEFAULT:
|
|
924 return set_charptr_emchar (ptr, x);
|
|
925 case FORMAT_16_BIT_FIXED:
|
|
926 text_checking_assert (emchar_16_bit_fixed_p (x, object));
|
|
927 text_checking_assert (ptr == ALIGN_PTR (ptr, UINT_16_BIT));
|
|
928 * (UINT_16_BIT *) ptr = emchar_to_raw_16_bit_fixed (x, object);
|
|
929 return 2;
|
|
930 case FORMAT_32_BIT_FIXED:
|
|
931 text_checking_assert (ptr == ALIGN_PTR (ptr, UINT_32_BIT));
|
|
932 * (UINT_32_BIT *) ptr = emchar_to_raw_32_bit_fixed (x, object);
|
|
933 return 4;
|
|
934 default:
|
|
935 text_checking_assert (fmt == FORMAT_8_BIT_FIXED);
|
|
936 text_checking_assert (emchar_8_bit_fixed_p (x, object));
|
|
937 *ptr = emchar_to_raw_8_bit_fixed (x, object);
|
|
938 return 1;
|
|
939 }
|
|
940 }
|
|
941
|
|
942 /* Retrieve the character pointed to by SRC and store it as
|
|
943 internally-formatted text in DST.
|
|
944 */
|
|
945
|
|
946 DECLARE_INLINE_HEADER (
|
|
947 Bytecount
|
|
948 charptr_copy_emchar (const Intbyte *src, Intbyte *dst)
|
|
949 )
|
|
950 {
|
|
951 return byte_ascii_p (*src) ?
|
|
952 simple_charptr_copy_emchar (src, dst) :
|
|
953 non_ascii_charptr_copy_emchar (src, dst);
|
771
|
954 }
|
|
955
|
|
956 #else /* not MULE */
|
|
957
|
826
|
958 # define charptr_emchar(ptr) simple_charptr_emchar (ptr)
|
|
959 # define charptr_emchar_fmt(ptr, fmt, object) charptr_emchar (ptr)
|
|
960 # define charptr_emchar_ascii_fmt(ptr, fmt, object) charptr_emchar (ptr)
|
|
961 # define charptr_emchar_raw_fmt(ptr, fmt) charptr_emchar (ptr)
|
|
962 # define set_charptr_emchar(ptr, x) simple_set_charptr_emchar (ptr, x)
|
|
963 # define set_charptr_emchar_fmt(ptr, x, fmt, obj) set_charptr_emchar (ptr, x)
|
|
964 # define charptr_copy_emchar(src, dst) simple_charptr_copy_emchar (src, dst)
|
771
|
965
|
|
966 #endif /* not MULE */
|
|
967
|
826
|
968 /* Retrieve the character at offset N (in characters) from PTR, as an
|
|
969 Emchar.
|
|
970 */
|
|
971
|
771
|
972 #define charptr_emchar_n(ptr, offset) \
|
|
973 charptr_emchar (charptr_n_addr (ptr, offset))
|
|
974
|
|
975
|
|
976 /* ---------------------------- */
|
826
|
977 /* Working with Emchars */
|
771
|
978 /* ---------------------------- */
|
|
979
|
826
|
980 /* NOTE: There are other functions/macros for working with Emchars in
|
|
981 charset.h, for retrieving the charset of an Emchar, the length of an
|
|
982 Emchar when converted to text, etc.
|
|
983 */
|
|
984
|
771
|
985 #ifdef MULE
|
|
986
|
826
|
987 int non_ascii_valid_emchar_p (Emchar ch);
|
|
988
|
|
989 /* Return whether the given Emchar is valid.
|
|
990 */
|
|
991
|
|
992 DECLARE_INLINE_HEADER (
|
|
993 int
|
|
994 valid_emchar_p (Emchar ch)
|
|
995 )
|
771
|
996 {
|
826
|
997 return (! (ch & ~0xFF)) || non_ascii_valid_emchar_p (ch);
|
771
|
998 }
|
|
999
|
|
1000 #else /* not MULE */
|
|
1001
|
826
|
1002 #define valid_emchar_p(ch) (! (ch & ~0xFF))
|
771
|
1003
|
|
1004 #endif /* not MULE */
|
|
1005
|
826
|
1006 #define CHAR_INTP(x) (INTP (x) && valid_emchar_p (XINT (x)))
|
771
|
1007
|
|
1008 #define CHAR_OR_CHAR_INTP(x) (CHARP (x) || CHAR_INTP (x))
|
|
1009
|
826
|
1010 DECLARE_INLINE_HEADER (
|
|
1011 Emchar
|
771
|
1012 XCHAR_OR_CHAR_INT (Lisp_Object obj)
|
826
|
1013 )
|
771
|
1014 {
|
|
1015 return CHARP (obj) ? XCHAR (obj) : XINT (obj);
|
|
1016 }
|
|
1017
|
826
|
1018 /* Signal an error if CH is not a valid character or integer Lisp_Object.
|
|
1019 If CH is an integer Lisp_Object, convert it to a character Lisp_Object,
|
|
1020 but merely by repackaging, without performing tests for char validity.
|
|
1021 */
|
|
1022
|
771
|
1023 #define CHECK_CHAR_COERCE_INT(x) do { \
|
|
1024 if (CHARP (x)) \
|
|
1025 ; \
|
|
1026 else if (CHAR_INTP (x)) \
|
|
1027 x = make_char (XINT (x)); \
|
|
1028 else \
|
|
1029 x = wrong_type_argument (Qcharacterp, x); \
|
|
1030 } while (0)
|
|
1031
|
|
1032
|
|
1033
|
|
1034 /************************************************************************/
|
|
1035 /* */
|
826
|
1036 /* working with Lisp strings */
|
|
1037 /* */
|
|
1038 /************************************************************************/
|
|
1039
|
|
1040 #define string_char_length(s) \
|
|
1041 string_index_byte_to_char (s, XSTRING_LENGTH (s))
|
|
1042 #define string_byte(s, i) (XSTRING_DATA (s)[i] + 0)
|
|
1043 /* In case we ever allow strings to be in a different format ... */
|
|
1044 #define set_string_byte(s, i, c) (XSTRING_DATA (s)[i] = (c))
|
|
1045
|
|
1046 #define ASSERT_VALID_CHAR_STRING_INDEX_UNSAFE(s, x) do { \
|
|
1047 text_checking_assert ((x) >= 0 && x <= string_char_length (s)); \
|
|
1048 } while (0)
|
|
1049
|
|
1050 #define ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE(s, x) do { \
|
|
1051 text_checking_assert ((x) >= 0 && x <= XSTRING_LENGTH (s)); \
|
|
1052 text_checking_assert (valid_charptr_p (string_byte_addr (s, x))); \
|
|
1053 } while (0)
|
|
1054
|
|
1055 /* Convert offset I in string S to a pointer to text there. */
|
|
1056 #define string_byte_addr(s, i) (&(XSTRING_DATA (s)[i]))
|
|
1057 /* Convert pointer to text in string S into the byte offset to that text. */
|
|
1058 #define string_addr_to_byte(s, ptr) ((Bytecount) ((ptr) - XSTRING_DATA (s)))
|
|
1059 /* Return the Emchar at *CHARACTER* offset I. */
|
|
1060 #define string_emchar(s, i) charptr_emchar (string_char_addr (s, i))
|
|
1061
|
|
1062 #ifdef ERROR_CHECK_TEXT
|
|
1063 #define SLEDGEHAMMER_CHECK_ASCII_BEGIN
|
|
1064 #endif
|
|
1065
|
|
1066 #ifdef SLEDGEHAMMER_CHECK_ASCII_BEGIN
|
|
1067 void sledgehammer_check_ascii_begin (Lisp_Object str);
|
|
1068 #else
|
|
1069 #define sledgehammer_check_ascii_begin(str)
|
|
1070 #endif
|
|
1071
|
|
1072 /* Make an alloca'd copy of a Lisp string */
|
|
1073 #define LISP_STRING_TO_ALLOCA(s, lval) \
|
|
1074 do { \
|
|
1075 Intbyte **_lta_ = (Intbyte **) &(lval); \
|
|
1076 Lisp_Object _lta_2 = (s); \
|
|
1077 *_lta_ = alloca_array (Intbyte, 1 + XSTRING_LENGTH (_lta_2)); \
|
|
1078 memcpy (*_lta_, XSTRING_DATA (_lta_2), 1 + XSTRING_LENGTH (_lta_2)); \
|
|
1079 } while (0)
|
|
1080
|
|
1081 /* Make an alloca'd copy of a Intbyte * */
|
|
1082 #define INTBYTE_STRING_TO_ALLOCA(p, lval) \
|
|
1083 do { \
|
|
1084 Intbyte **_bsta_ = (Intbyte **) &(lval); \
|
|
1085 const Intbyte *_bsta_2 = (p); \
|
|
1086 Bytecount _bsta_3 = qxestrlen (_bsta_2); \
|
|
1087 *_bsta_ = alloca_array (Intbyte, 1 + _bsta_3); \
|
|
1088 memcpy (*_bsta_, _bsta_2, 1 + _bsta_3); \
|
|
1089 } while (0)
|
|
1090
|
|
1091 #define alloca_intbytes(num) alloca_array (Intbyte, num)
|
|
1092 #define alloca_extbytes(num) alloca_array (Extbyte, num)
|
|
1093
|
|
1094 void resize_string (Lisp_Object s, Bytecount pos, Bytecount delta);
|
|
1095
|
|
1096 /* Convert a byte index into a string into a char index. */
|
|
1097 DECLARE_INLINE_HEADER (
|
|
1098 Charcount
|
|
1099 string_index_byte_to_char (Lisp_Object s, Bytecount idx)
|
|
1100 )
|
|
1101 {
|
|
1102 Charcount retval;
|
|
1103 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, idx);
|
|
1104 #ifdef MULE
|
|
1105 if (idx <= (Bytecount) XSTRING_ASCII_BEGIN (s))
|
|
1106 retval = (Charcount) idx;
|
|
1107 else
|
|
1108 retval = (XSTRING_ASCII_BEGIN (s) +
|
|
1109 bytecount_to_charcount (XSTRING_DATA (s) +
|
|
1110 XSTRING_ASCII_BEGIN (s),
|
|
1111 idx - XSTRING_ASCII_BEGIN (s)));
|
|
1112 # ifdef SLEDGEHAMMER_CHECK_ASCII_BEGIN
|
|
1113 assert (retval == bytecount_to_charcount (XSTRING_DATA (s), idx));
|
|
1114 # endif
|
|
1115 #else
|
|
1116 retval = (Charcount) idx;
|
|
1117 #endif
|
|
1118 /* Don't call ASSERT_VALID_CHAR_STRING_INDEX_UNSAFE() here because it will
|
|
1119 call string_index_byte_to_char(). */
|
|
1120 return retval;
|
|
1121 }
|
|
1122
|
|
1123 /* Convert a char index into a string into a byte index. */
|
|
1124 DECLARE_INLINE_HEADER (
|
|
1125 Bytecount
|
|
1126 string_index_char_to_byte (Lisp_Object s, Charcount idx)
|
|
1127 )
|
|
1128 {
|
|
1129 Bytecount retval;
|
|
1130 ASSERT_VALID_CHAR_STRING_INDEX_UNSAFE (s, idx);
|
|
1131 #ifdef MULE
|
|
1132 if (idx <= (Charcount) XSTRING_ASCII_BEGIN (s))
|
|
1133 retval = (Bytecount) idx;
|
|
1134 else
|
|
1135 retval = (XSTRING_ASCII_BEGIN (s) +
|
|
1136 charcount_to_bytecount (XSTRING_DATA (s) +
|
|
1137 XSTRING_ASCII_BEGIN (s),
|
|
1138 idx - XSTRING_ASCII_BEGIN (s)));
|
|
1139 # ifdef SLEDGEHAMMER_CHECK_ASCII_BEGIN
|
|
1140 assert (retval == charcount_to_bytecount (XSTRING_DATA (s), idx));
|
|
1141 # endif
|
|
1142 #else
|
|
1143 retval = (Bytecount) idx;
|
|
1144 #endif
|
|
1145 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, retval);
|
|
1146 return retval;
|
|
1147 }
|
|
1148
|
|
1149 /* Convert a substring length (starting at byte offset OFF) from bytes to
|
|
1150 chars. */
|
|
1151 DECLARE_INLINE_HEADER (
|
|
1152 Charcount
|
|
1153 string_offset_byte_to_char_len (Lisp_Object s, Bytecount off, Bytecount len)
|
|
1154 )
|
|
1155 {
|
|
1156 Charcount retval;
|
|
1157 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, off);
|
|
1158 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, off + len);
|
|
1159 #ifdef MULE
|
|
1160 if (off + len <= (Bytecount) XSTRING_ASCII_BEGIN (s))
|
|
1161 retval = (Charcount) len;
|
|
1162 else if (off < (Bytecount) XSTRING_ASCII_BEGIN (s))
|
|
1163 retval =
|
|
1164 XSTRING_ASCII_BEGIN (s) - (Charcount) off +
|
|
1165 bytecount_to_charcount (XSTRING_DATA (s) + XSTRING_ASCII_BEGIN (s),
|
|
1166 len - (XSTRING_ASCII_BEGIN (s) - off));
|
|
1167 else
|
|
1168 retval = bytecount_to_charcount (XSTRING_DATA (s) + off, len);
|
|
1169 # ifdef SLEDGEHAMMER_CHECK_ASCII_BEGIN
|
|
1170 assert (retval == bytecount_to_charcount (XSTRING_DATA (s) + off, len));
|
|
1171 # endif
|
|
1172 #else
|
|
1173 retval = (Charcount) len;
|
|
1174 #endif
|
|
1175 return retval;
|
|
1176 }
|
|
1177
|
|
1178 /* Convert a substring length (starting at byte offset OFF) from chars to
|
|
1179 bytes. */
|
|
1180 DECLARE_INLINE_HEADER (
|
|
1181 Bytecount
|
|
1182 string_offset_char_to_byte_len (Lisp_Object s, Bytecount off, Charcount len)
|
|
1183 )
|
|
1184 {
|
|
1185 Bytecount retval;
|
|
1186 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, off);
|
|
1187 #ifdef MULE
|
|
1188 /* casts to avoid errors from combining Bytecount/Charcount and warnings
|
|
1189 from signed/unsigned comparisons */
|
|
1190 if (off + (Bytecount) len <= (Bytecount) XSTRING_ASCII_BEGIN (s))
|
|
1191 retval = (Bytecount) len;
|
|
1192 else if (off < (Bytecount) XSTRING_ASCII_BEGIN (s))
|
|
1193 retval =
|
|
1194 XSTRING_ASCII_BEGIN (s) - off +
|
|
1195 charcount_to_bytecount (XSTRING_DATA (s) + XSTRING_ASCII_BEGIN (s),
|
|
1196 len - (XSTRING_ASCII_BEGIN (s) -
|
|
1197 (Charcount) off));
|
|
1198 else
|
|
1199 retval = charcount_to_bytecount (XSTRING_DATA (s) + off, len);
|
|
1200 # ifdef SLEDGEHAMMER_CHECK_ASCII_BEGIN
|
|
1201 assert (retval == charcount_to_bytecount (XSTRING_DATA (s) + off, len));
|
|
1202 # endif
|
|
1203 #else
|
|
1204 retval = (Bytecount) len;
|
|
1205 #endif
|
|
1206 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, off + retval);
|
|
1207 return retval;
|
|
1208 }
|
|
1209
|
|
1210 DECLARE_INLINE_HEADER (
|
|
1211 const Intbyte *
|
|
1212 string_char_addr (Lisp_Object s, Charcount idx)
|
|
1213 )
|
|
1214 {
|
|
1215 return XSTRING_DATA (s) + string_index_char_to_byte (s, idx);
|
|
1216 }
|
|
1217
|
|
1218 /* WARNING: If you modify an existing string, you must call
|
|
1219 bump_string_modiff() afterwards. */
|
|
1220 #ifdef MULE
|
|
1221 void set_string_char (Lisp_Object s, Charcount i, Emchar c);
|
|
1222 #else
|
|
1223 #define set_string_char(s, i, c) set_string_byte (s, i, c)
|
|
1224 #endif /* not MULE */
|
|
1225
|
|
1226 /* Return index to character before the one at IDX. */
|
|
1227 DECLARE_INLINE_HEADER (
|
|
1228 Bytecount
|
|
1229 prev_string_index (Lisp_Object s, Bytecount idx)
|
|
1230 )
|
|
1231 {
|
|
1232 const Intbyte *ptr = string_byte_addr (s, idx);
|
|
1233 DEC_CHARPTR (ptr);
|
|
1234 return string_addr_to_byte (s, ptr);
|
|
1235 }
|
|
1236
|
|
1237 /* Return index to character after the one at IDX. */
|
|
1238 DECLARE_INLINE_HEADER (
|
|
1239 Bytecount
|
|
1240 next_string_index (Lisp_Object s, Bytecount idx)
|
|
1241 )
|
|
1242 {
|
|
1243 const Intbyte *ptr = string_byte_addr (s, idx);
|
|
1244 INC_CHARPTR (ptr);
|
|
1245 return string_addr_to_byte (s, ptr);
|
|
1246 }
|
|
1247
|
|
1248
|
|
1249 /************************************************************************/
|
|
1250 /* */
|
771
|
1251 /* working with Eistrings */
|
|
1252 /* */
|
|
1253 /************************************************************************/
|
|
1254
|
|
1255 /*
|
|
1256 #### NOTE: This is a work in progress. Neither the API nor especially
|
|
1257 the implementation is finished.
|
|
1258
|
|
1259 NOTE: An Eistring is a structure that makes it easy to work with
|
|
1260 internally-formatted strings of data. It provides operations similar
|
|
1261 in feel to the standard strcpy(), strcat(), strlen(), etc., but
|
|
1262
|
|
1263 (a) it is Mule-correct
|
|
1264 (b) it does dynamic allocation so you never have to worry about size
|
793
|
1265 restrictions
|
|
1266 (c) it comes in an alloca() variety (all allocation is stack-local,
|
|
1267 so there is no need to explicitly clean up) as well as a malloc()
|
|
1268 variety
|
|
1269 (d) it knows its own length, so it does not suffer from standard null
|
|
1270 byte brain-damage -- but it null-terminates the data anyway, so
|
|
1271 it can be passed to standard routines
|
|
1272 (e) it provides a much more powerful set of operations and knows about
|
771
|
1273 all the standard places where string data might reside: Lisp_Objects,
|
|
1274 other Eistrings, Intbyte * data with or without an explicit length,
|
|
1275 ASCII strings, Emchars, etc.
|
793
|
1276 (f) it provides easy operations to convert to/from externally-formatted
|
|
1277 data, and is easier to use than the standard TO_INTERNAL_FORMAT
|
771
|
1278 and TO_EXTERNAL_FORMAT macros. (An Eistring can store both the internal
|
|
1279 and external version of its data, but the external version is only
|
|
1280 initialized or changed when you call eito_external().)
|
|
1281
|
793
|
1282 The idea is to make it as easy to write Mule-correct string manipulation
|
|
1283 code as it is to write normal string manipulation code. We also make
|
|
1284 the API sufficiently general that it can handle multiple internal data
|
|
1285 formats (e.g. some fixed-width optimizing formats and a default variable
|
|
1286 width format) and allows for *ANY* data format we might choose in the
|
|
1287 future for the default format, including UCS2. (In other words, we can't
|
|
1288 assume that the internal format is ASCII-compatible and we can't assume
|
|
1289 it doesn't have embedded null bytes. We do assume, however, that any
|
|
1290 chosen format will have the concept of null-termination.) All of this is
|
|
1291 hidden from the user.
|
771
|
1292
|
|
1293 #### It is really too bad that we don't have a real object-oriented
|
|
1294 language, or at least a language with polymorphism!
|
|
1295
|
|
1296
|
|
1297 **********************************************
|
|
1298 * Declaration *
|
|
1299 **********************************************
|
|
1300
|
|
1301 To declare an Eistring, either put one of the following in the local
|
|
1302 variable section:
|
|
1303
|
|
1304 DECLARE_EISTRING (name);
|
|
1305 Declare a new Eistring. This is a standard local variable declaration
|
|
1306 and can go anywhere in the variable declaration section. NAME itself
|
|
1307 is declared as an Eistring *, and its storage declared on the stack.
|
|
1308
|
|
1309 DECLARE_EISTRING_MALLOC (name);
|
|
1310 Declare a new Eistring, which uses malloc()ed instead of alloca()ed
|
|
1311 data. This is a standard local variable declaration and can go
|
|
1312 anywhere in the variable declaration section. Once you initialize
|
|
1313 the Eistring, you will have to free it using eifree() to avoid
|
793
|
1314 memory leaks. You will need to use this form if you are passing
|
|
1315 an Eistring to any function that modifies it (otherwise, the
|
|
1316 modified data may be in stack space and get overwritten when the
|
|
1317 function returns).
|
771
|
1318
|
|
1319 or use
|
|
1320
|
793
|
1321 Eistring ei;
|
|
1322 void eiinit (Eistring *ei);
|
|
1323 void eiinit_malloc (Eistring *einame);
|
771
|
1324 If you need to put an Eistring elsewhere than in a local variable
|
|
1325 declaration (e.g. in a structure), declare it as shown and then
|
|
1326 call one of the init macros.
|
|
1327
|
|
1328 Also note:
|
|
1329
|
793
|
1330 void eifree (Eistring *ei);
|
771
|
1331 If you declared an Eistring to use malloc() to hold its data,
|
|
1332 or converted it to the heap using eito_malloc(), then this
|
|
1333 releases any data in it and afterwards resets the Eistring
|
|
1334 using eiinit_malloc(). Otherwise, it just resets the Eistring
|
|
1335 using eiinit().
|
|
1336
|
|
1337
|
|
1338 **********************************************
|
|
1339 * Conventions *
|
|
1340 **********************************************
|
|
1341
|
|
1342 - The names of the functions have been chosen, where possible, to
|
|
1343 match the names of str*() functions in the standard C API.
|
|
1344 -
|
|
1345
|
|
1346
|
|
1347 **********************************************
|
|
1348 * Initialization *
|
|
1349 **********************************************
|
|
1350
|
|
1351 void eireset (Eistring *eistr);
|
|
1352 Initialize the Eistring to the empty string.
|
|
1353
|
|
1354 void eicpy_* (Eistring *eistr, ...);
|
|
1355 Initialize the Eistring from somewhere:
|
|
1356
|
|
1357 void eicpy_ei (Eistring *eistr, Eistring *eistr2);
|
|
1358 ... from another Eistring.
|
|
1359 void eicpy_lstr (Eistring *eistr, Lisp_Object lisp_string);
|
|
1360 ... from a Lisp_Object string.
|
|
1361 void eicpy_ch (Eistring *eistr, Emchar ch);
|
793
|
1362 ... from an Emchar (this can be a conventional C character).
|
771
|
1363
|
|
1364 void eicpy_lstr_off (Eistring *eistr, Lisp_Object lisp_string,
|
|
1365 Bytecount off, Charcount charoff,
|
|
1366 Bytecount len, Charcount charlen);
|
|
1367 ... from a section of a Lisp_Object string.
|
|
1368 void eicpy_lbuf (Eistring *eistr, Lisp_Object lisp_buf,
|
|
1369 Bytecount off, Charcount charoff,
|
|
1370 Bytecount len, Charcount charlen);
|
|
1371 ... from a section of a Lisp_Object buffer.
|
|
1372 void eicpy_raw (Eistring *eistr, const Intbyte *data, Bytecount len);
|
|
1373 ... from raw internal-format data in the default internal format.
|
|
1374 void eicpy_rawz (Eistring *eistr, const Intbyte *data);
|
|
1375 ... from raw internal-format data in the default internal format
|
|
1376 that is "null-terminated" (the meaning of this depends on the nature
|
|
1377 of the default internal format).
|
|
1378 void eicpy_raw_fmt (Eistring *eistr, const Intbyte *data, Bytecount len,
|
826
|
1379 Internal_Format intfmt, Lisp_Object object);
|
771
|
1380 ... from raw internal-format data in the specified format.
|
|
1381 void eicpy_rawz_fmt (Eistring *eistr, const Intbyte *data,
|
826
|
1382 Internal_Format intfmt, Lisp_Object object);
|
771
|
1383 ... from raw internal-format data in the specified format that is
|
|
1384 "null-terminated" (the meaning of this depends on the nature of
|
|
1385 the specific format).
|
|
1386 void eicpy_c (Eistring *eistr, const Char_ASCII *c_string);
|
|
1387 ... from an ASCII null-terminated string. Non-ASCII characters in
|
|
1388 the string are *ILLEGAL* (read abort() with error-checking defined).
|
|
1389 void eicpy_c_len (Eistring *eistr, const Char_ASCII *c_string, len);
|
|
1390 ... from an ASCII string, with length specified. Non-ASCII characters
|
|
1391 in the string are *ILLEGAL* (read abort() with error-checking defined).
|
|
1392 void eicpy_ext (Eistring *eistr, const Extbyte *extdata,
|
|
1393 Lisp_Object coding_system);
|
|
1394 ... from external null-terminated data, with coding system specified.
|
|
1395 void eicpy_ext_len (Eistring *eistr, const Extbyte *extdata,
|
|
1396 Bytecount extlen, Lisp_Object coding_system);
|
|
1397 ... from external data, with length and coding system specified.
|
|
1398 void eicpy_lstream (Eistring *eistr, Lisp_Object lstream);
|
|
1399 ... from an lstream; reads data till eof. Data must be in default
|
|
1400 internal format; otherwise, interpose a decoding lstream.
|
|
1401
|
|
1402
|
|
1403 **********************************************
|
|
1404 * Getting the data out of the Eistring *
|
|
1405 **********************************************
|
|
1406
|
|
1407 Intbyte *eidata (Eistring *eistr);
|
|
1408 Return a pointer to the raw data in an Eistring. This is NOT
|
|
1409 a copy.
|
|
1410
|
|
1411 Lisp_Object eimake_string (Eistring *eistr);
|
|
1412 Make a Lisp string out of the Eistring.
|
|
1413
|
|
1414 Lisp_Object eimake_string_off (Eistring *eistr,
|
|
1415 Bytecount off, Charcount charoff,
|
|
1416 Bytecount len, Charcount charlen);
|
|
1417 Make a Lisp string out of a section of the Eistring.
|
|
1418
|
|
1419 void eicpyout_alloca (Eistring *eistr, LVALUE: Intbyte *ptr_out,
|
|
1420 LVALUE: Bytecount len_out);
|
|
1421 Make an alloca() copy of the data in the Eistring, using the
|
|
1422 default internal format. Due to the nature of alloca(), this
|
|
1423 must be a macro, with all lvalues passed in as parameters.
|
793
|
1424 (More specifically, not all compilers correctly handle using
|
|
1425 alloca() as the argument to a function call -- GCC on x86
|
|
1426 didn't used to, for example.) A pointer to the alloca()ed data
|
|
1427 is stored in PTR_OUT, and the length of the data (not including
|
|
1428 the terminating zero) is stored in LEN_OUT.
|
771
|
1429
|
|
1430 void eicpyout_alloca_fmt (Eistring *eistr, LVALUE: Intbyte *ptr_out,
|
|
1431 LVALUE: Bytecount len_out,
|
826
|
1432 Internal_Format intfmt, Lisp_Object object);
|
771
|
1433 Like eicpyout_alloca(), but converts to the specified internal
|
|
1434 format. (No formats other than FORMAT_DEFAULT are currently
|
|
1435 implemented, and you get an assertion failure if you try.)
|
|
1436
|
|
1437 Intbyte *eicpyout_malloc (Eistring *eistr, Bytecount *intlen_out);
|
|
1438 Make a malloc() copy of the data in the Eistring, using the
|
|
1439 default internal format. This is a real function. No lvalues
|
|
1440 passed in. Returns the new data, and stores the length (not
|
|
1441 including the terminating zero) using INTLEN_OUT, unless it's
|
|
1442 a NULL pointer.
|
|
1443
|
|
1444 Intbyte *eicpyout_malloc_fmt (Eistring *eistr, Internal_Format intfmt,
|
826
|
1445 Bytecount *intlen_out, Lisp_Object object);
|
771
|
1446 Like eicpyout_malloc(), but converts to the specified internal
|
|
1447 format. (No formats other than FORMAT_DEFAULT are currently
|
|
1448 implemented, and you get an assertion failure if you try.)
|
|
1449
|
|
1450
|
|
1451 **********************************************
|
|
1452 * Moving to the heap *
|
|
1453 **********************************************
|
|
1454
|
|
1455 void eito_malloc (Eistring *eistr);
|
|
1456 Move this Eistring to the heap. Its data will be stored in a
|
|
1457 malloc()ed block rather than the stack. Subsequent changes to
|
|
1458 this Eistring will realloc() the block as necessary. Use this
|
|
1459 when you want the Eistring to remain in scope past the end of
|
|
1460 this function call. You will have to manually free the data
|
|
1461 in the Eistring using eifree().
|
|
1462
|
|
1463 void eito_alloca (Eistring *eistr);
|
|
1464 Move this Eistring back to the stack, if it was moved to the
|
|
1465 heap with eito_malloc(). This will automatically free any
|
|
1466 heap-allocated data.
|
|
1467
|
|
1468
|
|
1469
|
|
1470 **********************************************
|
|
1471 * Retrieving the length *
|
|
1472 **********************************************
|
|
1473
|
|
1474 Bytecount eilen (Eistring *eistr);
|
|
1475 Return the length of the internal data, in bytes. See also
|
|
1476 eiextlen(), below.
|
|
1477 Charcount eicharlen (Eistring *eistr);
|
|
1478 Return the length of the internal data, in characters.
|
|
1479
|
|
1480
|
|
1481 **********************************************
|
|
1482 * Working with positions *
|
|
1483 **********************************************
|
|
1484
|
|
1485 Bytecount eicharpos_to_bytepos (Eistring *eistr, Charcount charpos);
|
|
1486 Convert a char offset to a byte offset.
|
|
1487 Charcount eibytepos_to_charpos (Eistring *eistr, Bytecount bytepos);
|
|
1488 Convert a byte offset to a char offset.
|
|
1489 Bytecount eiincpos (Eistring *eistr, Bytecount bytepos);
|
|
1490 Increment the given position by one character.
|
|
1491 Bytecount eiincpos_n (Eistring *eistr, Bytecount bytepos, Charcount n);
|
|
1492 Increment the given position by N characters.
|
|
1493 Bytecount eidecpos (Eistring *eistr, Bytecount bytepos);
|
|
1494 Decrement the given position by one character.
|
|
1495 Bytecount eidecpos_n (Eistring *eistr, Bytecount bytepos, Charcount n);
|
|
1496 Deccrement the given position by N characters.
|
|
1497
|
|
1498
|
|
1499 **********************************************
|
|
1500 * Getting the character at a position *
|
|
1501 **********************************************
|
|
1502
|
|
1503 Emchar eigetch (Eistring *eistr, Bytecount bytepos);
|
|
1504 Return the character at a particular byte offset.
|
|
1505 Emchar eigetch_char (Eistring *eistr, Charcount charpos);
|
|
1506 Return the character at a particular character offset.
|
|
1507
|
|
1508
|
|
1509 **********************************************
|
|
1510 * Setting the character at a position *
|
|
1511 **********************************************
|
|
1512
|
|
1513 Emchar eisetch (Eistring *eistr, Bytecount bytepos, Emchar chr);
|
|
1514 Set the character at a particular byte offset.
|
|
1515 Emchar eisetch_char (Eistring *eistr, Charcount charpos, Emchar chr);
|
|
1516 Set the character at a particular character offset.
|
|
1517
|
|
1518
|
|
1519 **********************************************
|
|
1520 * Concatenation *
|
|
1521 **********************************************
|
|
1522
|
|
1523 void eicat_* (Eistring *eistr, ...);
|
|
1524 Concatenate onto the end of the Eistring, with data coming from the
|
|
1525 same places as above:
|
|
1526
|
|
1527 void eicat_ei (Eistring *eistr, Eistring *eistr2);
|
|
1528 ... from another Eistring.
|
|
1529 void eicat_c (Eistring *eistr, Char_ASCII *c_string);
|
|
1530 ... from an ASCII null-terminated string. Non-ASCII characters in
|
|
1531 the string are *ILLEGAL* (read abort() with error-checking defined).
|
|
1532 void eicat_raw (ei, const Intbyte *data, Bytecount len);
|
|
1533 ... from raw internal-format data in the default internal format.
|
|
1534 void eicat_rawz (ei, const Intbyte *data);
|
|
1535 ... from raw internal-format data in the default internal format
|
|
1536 that is "null-terminated" (the meaning of this depends on the nature
|
|
1537 of the default internal format).
|
|
1538 void eicat_lstr (ei, Lisp_Object lisp_string);
|
|
1539 ... from a Lisp_Object string.
|
|
1540 void eicat_ch (ei, Emchar ch);
|
|
1541 ... from an Emchar.
|
|
1542
|
|
1543 (All except the first variety are convenience functions.
|
|
1544 In the general case, create another Eistring from the source.)
|
|
1545
|
|
1546
|
|
1547 **********************************************
|
|
1548 * Replacement *
|
|
1549 **********************************************
|
|
1550
|
|
1551 void eisub_* (Eistring *eistr, Bytecount off, Charcount charoff,
|
|
1552 Bytecount len, Charcount charlen, ...);
|
|
1553 Replace a section of the Eistring, specifically:
|
|
1554
|
|
1555 void eisub_ei (Eistring *eistr, Bytecount off, Charcount charoff,
|
|
1556 Bytecount len, Charcount charlen, Eistring *eistr2);
|
|
1557 ... with another Eistring.
|
|
1558 void eisub_c (Eistring *eistr, Bytecount off, Charcount charoff,
|
|
1559 Bytecount len, Charcount charlen, Char_ASCII *c_string);
|
|
1560 ... with an ASCII null-terminated string. Non-ASCII characters in
|
|
1561 the string are *ILLEGAL* (read abort() with error-checking defined).
|
|
1562 void eisub_ch (Eistring *eistr, Bytecount off, Charcount charoff,
|
|
1563 Bytecount len, Charcount charlen, Emchar ch);
|
|
1564 ... with an Emchar.
|
|
1565
|
|
1566 void eidel (Eistring *eistr, Bytecount off, Charcount charoff,
|
|
1567 Bytecount len, Charcount charlen);
|
|
1568 Delete a section of the Eistring.
|
|
1569
|
|
1570
|
|
1571 **********************************************
|
|
1572 * Converting to an external format *
|
|
1573 **********************************************
|
|
1574
|
|
1575 void eito_external (Eistring *eistr, Lisp_Object coding_system);
|
|
1576 Convert the Eistring to an external format and store the result
|
|
1577 in the string. NOTE: Further changes to the Eistring will *NOT*
|
|
1578 change the external data stored in the string. You will have to
|
|
1579 call eito_external() again in such a case if you want the external
|
|
1580 data.
|
|
1581
|
|
1582 Extbyte *eiextdata (Eistring *eistr);
|
|
1583 Return a pointer to the external data stored in the Eistring as
|
|
1584 a result of a prior call to eito_external().
|
|
1585
|
|
1586 Bytecount eiextlen (Eistring *eistr);
|
|
1587 Return the length in bytes of the external data stored in the
|
|
1588 Eistring as a result of a prior call to eito_external().
|
|
1589
|
|
1590
|
|
1591 **********************************************
|
|
1592 * Searching in the Eistring for a character *
|
|
1593 **********************************************
|
|
1594
|
|
1595 Bytecount eichr (Eistring *eistr, Emchar chr);
|
|
1596 Charcount eichr_char (Eistring *eistr, Emchar chr);
|
|
1597 Bytecount eichr_off (Eistring *eistr, Emchar chr, Bytecount off,
|
|
1598 Charcount charoff);
|
|
1599 Charcount eichr_off_char (Eistring *eistr, Emchar chr, Bytecount off,
|
|
1600 Charcount charoff);
|
|
1601 Bytecount eirchr (Eistring *eistr, Emchar chr);
|
|
1602 Charcount eirchr_char (Eistring *eistr, Emchar chr);
|
|
1603 Bytecount eirchr_off (Eistring *eistr, Emchar chr, Bytecount off,
|
|
1604 Charcount charoff);
|
|
1605 Charcount eirchr_off_char (Eistring *eistr, Emchar chr, Bytecount off,
|
|
1606 Charcount charoff);
|
|
1607
|
|
1608
|
|
1609 **********************************************
|
|
1610 * Searching in the Eistring for a string *
|
|
1611 **********************************************
|
|
1612
|
|
1613 Bytecount eistr_ei (Eistring *eistr, Eistring *eistr2);
|
|
1614 Charcount eistr_ei_char (Eistring *eistr, Eistring *eistr2);
|
|
1615 Bytecount eistr_ei_off (Eistring *eistr, Eistring *eistr2, Bytecount off,
|
|
1616 Charcount charoff);
|
|
1617 Charcount eistr_ei_off_char (Eistring *eistr, Eistring *eistr2,
|
|
1618 Bytecount off, Charcount charoff);
|
|
1619 Bytecount eirstr_ei (Eistring *eistr, Eistring *eistr2);
|
|
1620 Charcount eirstr_ei_char (Eistring *eistr, Eistring *eistr2);
|
|
1621 Bytecount eirstr_ei_off (Eistring *eistr, Eistring *eistr2, Bytecount off,
|
|
1622 Charcount charoff);
|
|
1623 Charcount eirstr_ei_off_char (Eistring *eistr, Eistring *eistr2,
|
|
1624 Bytecount off, Charcount charoff);
|
|
1625
|
|
1626 Bytecount eistr_c (Eistring *eistr, Char_ASCII *c_string);
|
|
1627 Charcount eistr_c_char (Eistring *eistr, Char_ASCII *c_string);
|
|
1628 Bytecount eistr_c_off (Eistring *eistr, Char_ASCII *c_string, Bytecount off,
|
|
1629 Charcount charoff);
|
|
1630 Charcount eistr_c_off_char (Eistring *eistr, Char_ASCII *c_string,
|
|
1631 Bytecount off, Charcount charoff);
|
|
1632 Bytecount eirstr_c (Eistring *eistr, Char_ASCII *c_string);
|
|
1633 Charcount eirstr_c_char (Eistring *eistr, Char_ASCII *c_string);
|
|
1634 Bytecount eirstr_c_off (Eistring *eistr, Char_ASCII *c_string,
|
|
1635 Bytecount off, Charcount charoff);
|
|
1636 Charcount eirstr_c_off_char (Eistring *eistr, Char_ASCII *c_string,
|
|
1637 Bytecount off, Charcount charoff);
|
|
1638
|
|
1639
|
|
1640 **********************************************
|
|
1641 * Comparison *
|
|
1642 **********************************************
|
|
1643
|
|
1644 int eicmp_* (Eistring *eistr, ...);
|
|
1645 int eicmp_off_* (Eistring *eistr, Bytecount off, Charcount charoff,
|
|
1646 Bytecount len, Charcount charlen, ...);
|
|
1647 int eicasecmp_* (Eistring *eistr, ...);
|
|
1648 int eicasecmp_off_* (Eistring *eistr, Bytecount off, Charcount charoff,
|
|
1649 Bytecount len, Charcount charlen, ...);
|
|
1650 int eicasecmp_i18n_* (Eistring *eistr, ...);
|
|
1651 int eicasecmp_i18n_off_* (Eistring *eistr, Bytecount off, Charcount charoff,
|
|
1652 Bytecount len, Charcount charlen, ...);
|
|
1653
|
|
1654 Compare the Eistring with the other data. Return value same as
|
|
1655 from strcmp. The `*' is either `ei' for another Eistring (in
|
|
1656 which case `...' is an Eistring), or `c' for a pure-ASCII string
|
|
1657 (in which case `...' is a pointer to that string). For anything
|
|
1658 more complex, first create an Eistring out of the source.
|
|
1659 Comparison is either simple (`eicmp_...'), ASCII case-folding
|
|
1660 (`eicasecmp_...'), or multilingual case-folding
|
|
1661 (`eicasecmp_i18n_...).
|
|
1662
|
|
1663
|
|
1664 More specifically, the prototypes are:
|
|
1665
|
|
1666 int eicmp_ei (Eistring *eistr, Eistring *eistr2);
|
|
1667 int eicmp_off_ei (Eistring *eistr, Bytecount off, Charcount charoff,
|
|
1668 Bytecount len, Charcount charlen, Eistring *eistr2);
|
|
1669 int eicasecmp_ei (Eistring *eistr, Eistring *eistr2);
|
|
1670 int eicasecmp_off_ei (Eistring *eistr, Bytecount off, Charcount charoff,
|
|
1671 Bytecount len, Charcount charlen, Eistring *eistr2);
|
|
1672 int eicasecmp_i18n_ei (Eistring *eistr, Eistring *eistr2);
|
|
1673 int eicasecmp_i18n_off_ei (Eistring *eistr, Bytecount off,
|
|
1674 Charcount charoff, Bytecount len,
|
|
1675 Charcount charlen, Eistring *eistr2);
|
|
1676
|
|
1677 int eicmp_c (Eistring *eistr, Char_ASCII *c_string);
|
|
1678 int eicmp_off_c (Eistring *eistr, Bytecount off, Charcount charoff,
|
|
1679 Bytecount len, Charcount charlen, Char_ASCII *c_string);
|
|
1680 int eicasecmp_c (Eistring *eistr, Char_ASCII *c_string);
|
|
1681 int eicasecmp_off_c (Eistring *eistr, Bytecount off, Charcount charoff,
|
|
1682 Bytecount len, Charcount charlen,
|
|
1683 Char_ASCII *c_string);
|
|
1684 int eicasecmp_i18n_c (Eistring *eistr, Char_ASCII *c_string);
|
|
1685 int eicasecmp_i18n_off_c (Eistring *eistr, Bytecount off, Charcount charoff,
|
|
1686 Bytecount len, Charcount charlen,
|
|
1687 Char_ASCII *c_string);
|
|
1688
|
|
1689
|
|
1690 **********************************************
|
|
1691 * Case-changing the Eistring *
|
|
1692 **********************************************
|
|
1693
|
|
1694 void eilwr (Eistring *eistr);
|
|
1695 Convert all characters in the Eistring to lowercase.
|
|
1696 void eiupr (Eistring *eistr);
|
|
1697 Convert all characters in the Eistring to uppercase.
|
|
1698 */
|
|
1699
|
|
1700
|
|
1701 /* Principles for writing Eistring functions:
|
|
1702
|
|
1703 (1) Unfortunately, we have to write most of the Eistring functions
|
|
1704 as macros, because of the use of alloca(). The principle used
|
|
1705 below to assure no conflict in local variables is to prefix all
|
|
1706 local variables with "ei" plus a number, which should be unique
|
|
1707 among macros. In practice, when finding a new number, find the
|
|
1708 highest so far used, and add 1.
|
|
1709
|
|
1710 (2) We also suffix the Eistring fields with an _ to avoid problems
|
|
1711 with macro parameters of the same name. (And as the standard
|
|
1712 signal not to access these fields directly.)
|
|
1713
|
|
1714 (3) We maintain both the length in bytes and chars of the data in
|
|
1715 the Eistring at all times, for convenient retrieval by outside
|
|
1716 functions. That means when writing functions that manipulate
|
|
1717 Eistrings, you too need to keep both lengths up to date for all
|
|
1718 data that you work with.
|
|
1719
|
|
1720 (4) When writing a new type of operation (e.g. substitution), you
|
|
1721 will often find yourself working with outside data, and thus
|
|
1722 have a series of related API's, for different forms that the
|
|
1723 outside data is in. Generally, you will want to choose a
|
|
1724 subset of the forms supported by eicpy_*, which has to be
|
|
1725 totally general because that's the fundamental way to get data
|
|
1726 into an Eistring, and once the data is into the string, it
|
|
1727 would be to create a whole series of Ei operations that work on
|
|
1728 nothing but Eistrings. Although theoretically nice, in
|
|
1729 practice it's a hassle, so we suggest that you provide
|
|
1730 convenience functions. In particular, there are two paths you
|
|
1731 can take. One is minimalist -- it only allows other Eistrings
|
|
1732 and ASCII data, and Emchars if the particular operation makes
|
|
1733 sense with a character. The other provides interfaces for the
|
|
1734 most commonly-used forms -- Eistring, ASCII data, Lisp string,
|
|
1735 raw internal-format string with length, raw internal-format
|
|
1736 string without, and possibly Emchar. (In the function names,
|
|
1737 these are designated `ei', `c', `lstr', `raw', `rawz', and
|
|
1738 `ch', respectively.)
|
|
1739
|
|
1740 (5) When coding a new type of operation, such as was discussed in
|
|
1741 previous section, the correct approach is to declare an worker
|
|
1742 function that does the work of everything, and is called by the
|
|
1743 other "container" macros that handle the different outside data
|
|
1744 forms. The data coming into the worker function, which
|
|
1745 typically ends in `_1', is in the form of three parameters:
|
|
1746 DATA, LEN, CHARLEN. (See point [3] about having two lengths and
|
|
1747 keeping them in sync.)
|
|
1748
|
|
1749 (6) Handling argument evaluation in macros: We take great care
|
|
1750 never to evaluate any argument more than once in any macro,
|
|
1751 except the initial Eistring parameter. This can and will be
|
|
1752 evaluated multiple times, but it should pretty much always just
|
|
1753 be a simple variable. This means, for example, that if an
|
|
1754 Eistring is the second (not first) argument of a macro, it
|
|
1755 doesn't fall under the "initial Eistring" exemption, so it
|
|
1756 needs protection against multi-evaluation. (Take the address of
|
|
1757 the Eistring structure, store in a temporary variable, and use
|
|
1758 temporary variable for all access to the Eistring.
|
|
1759 Essentially, we want it to appear as if these Eistring macros
|
|
1760 are functions -- we would like to declare them as functions but
|
|
1761 they use alloca(), so we can't (and we can't make them inline
|
|
1762 functions either -- alloca() is explicitly disallowed in inline
|
|
1763 functions.)
|
|
1764
|
|
1765 (7) Note that our rules regarding multiple evaluation are *more*
|
|
1766 strict than the rules listed above under the heading "working
|
|
1767 with raw internal-format data".
|
|
1768 */
|
|
1769
|
|
1770
|
|
1771 /* ----- Declaration ----- */
|
|
1772
|
|
1773 typedef struct
|
|
1774 {
|
|
1775 /* Data for the Eistring, stored in the default internal format.
|
|
1776 Always includes terminating null. */
|
|
1777 Intbyte *data_;
|
|
1778 /* Total number of bytes allocated in DATA (including null). */
|
|
1779 Bytecount max_size_allocated_;
|
|
1780 Bytecount bytelen_;
|
|
1781 Charcount charlen_;
|
|
1782 int mallocp_;
|
|
1783
|
|
1784 Extbyte *extdata_;
|
|
1785 Bytecount extlen_;
|
|
1786 } Eistring;
|
|
1787
|
|
1788 extern Eistring the_eistring_zero_init, the_eistring_malloc_zero_init;
|
|
1789
|
|
1790 #define DECLARE_EISTRING(name) \
|
|
1791 Eistring __ ## name ## __storage__ = the_eistring_zero_init; \
|
|
1792 Eistring *name = & __ ## name ## __storage__
|
|
1793 #define DECLARE_EISTRING_MALLOC(name) \
|
|
1794 Eistring __ ## name ## __storage__ = the_eistring_malloc_zero_init; \
|
|
1795 Eistring *name = & __ ## name ## __storage__
|
|
1796
|
|
1797 #define eiinit(ei) \
|
|
1798 do { \
|
793
|
1799 *(ei) = the_eistring_zero_init; \
|
771
|
1800 } while (0)
|
|
1801
|
|
1802 #define eiinit_malloc(ei) \
|
|
1803 do { \
|
793
|
1804 *(ei) = the_eistring_malloc_zero_init; \
|
771
|
1805 } while (0)
|
|
1806
|
|
1807
|
|
1808 /* ----- Utility ----- */
|
|
1809
|
|
1810 /* Make sure both LEN and CHARLEN are specified, in case one is given
|
|
1811 as -1. PTR evaluated at most once, others multiply. */
|
|
1812 #define eifixup_bytechar(ptr, len, charlen) \
|
|
1813 do { \
|
|
1814 if ((len) == -1) \
|
|
1815 (len) = charcount_to_bytecount (ptr, charlen); \
|
|
1816 else if ((charlen) == -1) \
|
|
1817 (charlen) = bytecount_to_charcount (ptr, len); \
|
|
1818 } while (0)
|
|
1819
|
|
1820 /* Make sure LEN is specified, in case it's is given as -1. PTR
|
|
1821 evaluated at most once, others multiply. */
|
|
1822 #define eifixup_byte(ptr, len, charlen) \
|
|
1823 do { \
|
|
1824 if ((len) == -1) \
|
|
1825 (len) = charcount_to_bytecount (ptr, charlen); \
|
|
1826 } while (0)
|
|
1827
|
|
1828 /* Make sure CHARLEN is specified, in case it's is given as -1. PTR
|
|
1829 evaluated at most once, others multiply. */
|
|
1830 #define eifixup_char(ptr, len, charlen) \
|
|
1831 do { \
|
|
1832 if ((charlen) == -1) \
|
|
1833 (charlen) = bytecount_to_charcount (ptr, len); \
|
|
1834 } while (0)
|
|
1835
|
|
1836
|
|
1837
|
|
1838 /* Make sure we can hold NEWBYTELEN bytes (which is NEWCHARLEN chars)
|
|
1839 plus a zero terminator. Preserve existing data as much as possible,
|
|
1840 including existing zero terminator. Put a new zero terminator where it
|
|
1841 should go if NEWZ if non-zero. All args but EI are evalled only once. */
|
|
1842
|
|
1843 #define EI_ALLOC(ei, newbytelen, newcharlen, newz) \
|
|
1844 do { \
|
|
1845 int ei1oldeibytelen = (ei)->bytelen_; \
|
|
1846 \
|
|
1847 (ei)->charlen_ = (newcharlen); \
|
|
1848 (ei)->bytelen_ = (newbytelen); \
|
|
1849 \
|
|
1850 if (ei1oldeibytelen != (ei)->bytelen_) \
|
|
1851 { \
|
|
1852 int ei1newsize = (ei)->max_size_allocated_; \
|
|
1853 while (ei1newsize < (ei)->bytelen_ + 1) \
|
|
1854 { \
|
|
1855 ei1newsize = (int) (ei1newsize * 1.5); \
|
|
1856 if (ei1newsize < 32) \
|
|
1857 ei1newsize = 32; \
|
|
1858 } \
|
|
1859 if (ei1newsize != (ei)->max_size_allocated_) \
|
|
1860 { \
|
|
1861 if ((ei)->mallocp_) \
|
|
1862 /* xrealloc always preserves existing data as much as possible */ \
|
|
1863 (ei)->data_ = (Intbyte *) xrealloc ((ei)->data_, ei1newsize); \
|
|
1864 else \
|
|
1865 { \
|
|
1866 /* We don't have realloc, so alloca() more space and copy the \
|
|
1867 data into it. */ \
|
|
1868 Intbyte *ei1oldeidata = (ei)->data_; \
|
|
1869 (ei)->data_ = (Intbyte *) alloca (ei1newsize); \
|
|
1870 if (ei1oldeidata) \
|
|
1871 memcpy ((ei)->data_, ei1oldeidata, ei1oldeibytelen + 1); \
|
|
1872 } \
|
|
1873 (ei)->max_size_allocated_ = ei1newsize; \
|
|
1874 } \
|
|
1875 if (newz) \
|
|
1876 (ei)->data_[(ei)->bytelen_] = '\0'; \
|
|
1877 } \
|
|
1878 } while (0)
|
|
1879
|
|
1880 #define EI_ALLOC_AND_COPY(ei, data, bytelen, charlen) \
|
|
1881 do { \
|
|
1882 EI_ALLOC (ei, bytelen, charlen, 1); \
|
|
1883 memcpy ((ei)->data_, data, (ei)->bytelen_); \
|
|
1884 } while (0)
|
|
1885
|
800
|
1886 #ifdef ERROR_CHECK_TEXT
|
771
|
1887 #define EI_ASSERT_ASCII(ptr, len) \
|
|
1888 do { \
|
|
1889 int ei5; \
|
|
1890 const Char_ASCII *ei5ptr = (ptr); \
|
|
1891 int ei5len = (len); \
|
|
1892 \
|
|
1893 for (ei5 = 0; ei5 < ei5len; ei5++) \
|
|
1894 assert (ei5ptr[ei5] >= 0x00 && ei5ptr[ei5] < 0x7F); \
|
|
1895 } while (0)
|
|
1896 #define EI_ASSERT_ASCIIZ(ptr) \
|
|
1897 do { \
|
|
1898 const Char_ASCII *ei5p1 = (ptr); \
|
|
1899 EI_ASSERT_ASCII (ei5p1, strlen (ei5p1)); \
|
|
1900 } while (0)
|
|
1901 #else
|
|
1902 #define EI_ASSERT_ASCII(ptr, len)
|
|
1903 #define EI_ASSERT_ASCIIZ(ptr)
|
|
1904 #endif
|
|
1905
|
|
1906
|
|
1907 /* ----- Initialization ----- */
|
|
1908
|
|
1909 #define eicpy_ei(ei, eicpy) \
|
|
1910 do { \
|
|
1911 const Eistring *ei2 = (eicpy); \
|
|
1912 EI_ALLOC_AND_COPY (ei, ei2->data_, ei2->bytelen_, ei2->charlen_); \
|
|
1913 } while (0)
|
|
1914
|
|
1915 #define eicpy_lstr(ei, lisp_string) \
|
|
1916 do { \
|
|
1917 Lisp_Object ei3 = (lisp_string); \
|
|
1918 EI_ALLOC_AND_COPY (ei, XSTRING_DATA (ei3), XSTRING_LENGTH (ei3), \
|
826
|
1919 string_char_length (ei3)); \
|
771
|
1920 } while (0)
|
|
1921
|
|
1922 #define eicpy_lstr_off(ei, lisp_string, off, charoff, len, charlen) \
|
|
1923 do { \
|
|
1924 Lisp_Object ei23lstr = (lisp_string); \
|
|
1925 int ei23off = (off); \
|
|
1926 int ei23charoff = (charoff); \
|
|
1927 int ei23len = (len); \
|
|
1928 int ei23charlen = (charlen); \
|
|
1929 const Intbyte *ei23data = XSTRING_DATA (ei23lstr); \
|
|
1930 \
|
|
1931 int ei23oldbytelen = (ei)->bytelen_; \
|
|
1932 \
|
|
1933 eifixup_byte (ei23data, ei23off, ei23charoff); \
|
|
1934 eifixup_bytechar (ei23data + ei23off, ei23len, ei23charlen); \
|
|
1935 \
|
|
1936 EI_ALLOC_AND_COPY (ei, ei23data + ei23off, ei23len, ei23charlen); \
|
|
1937 } while (0)
|
|
1938
|
826
|
1939 #define eicpy_raw_fmt(ei, ptr, len, fmt, object) \
|
771
|
1940 do { \
|
|
1941 const Intbyte *ei12ptr = (ptr); \
|
|
1942 Internal_Format ei12fmt = (fmt); \
|
|
1943 int ei12len = (len); \
|
|
1944 assert (ei12fmt == FORMAT_DEFAULT); \
|
|
1945 EI_ALLOC_AND_COPY (ei, ei12ptr, ei12len, \
|
|
1946 bytecount_to_charcount (ei12ptr, ei12len)); \
|
|
1947 } while (0)
|
|
1948
|
826
|
1949 #define eicpy_raw(ei, ptr, len) \
|
|
1950 eicpy_raw_fmt (ei, ptr, len, FORMAT_DEFAULT, Qnil)
|
|
1951
|
|
1952 #define eicpy_rawz_fmt(ei, ptr, fmt, object) \
|
|
1953 do { \
|
|
1954 const Intbyte *ei12p1ptr = (ptr); \
|
|
1955 Internal_Format ei12p1fmt = (fmt); \
|
|
1956 assert (ei12p1fmt == FORMAT_DEFAULT); \
|
|
1957 eicpy_raw_fmt (ei, ei12p1ptr, qxestrlen (ei12p1ptr), fmt, object); \
|
771
|
1958 } while (0)
|
|
1959
|
826
|
1960 #define eicpy_rawz(ei, ptr) eicpy_rawz_fmt (ei, ptr, FORMAT_DEFAULT, Qnil)
|
771
|
1961
|
|
1962 #define eicpy_ch(ei, ch) \
|
|
1963 do { \
|
|
1964 Intbyte ei12p2[MAX_EMCHAR_LEN]; \
|
|
1965 Bytecount ei12p2len = set_charptr_emchar (ei12p2, ch); \
|
|
1966 EI_ALLOC_AND_COPY (ei, ei12p2, ei12p2len, 1); \
|
|
1967 } while (0)
|
|
1968
|
|
1969 #define eicpy_c(ei, c_string) \
|
|
1970 do { \
|
|
1971 const Char_ASCII *ei4 = (c_string); \
|
|
1972 \
|
|
1973 EI_ASSERT_ASCIIZ (ei4); \
|
|
1974 eicpy_ext (ei, ei4, Qbinary); \
|
|
1975 } while (0)
|
|
1976
|
|
1977 #define eicpy_c_len(ei, c_string, c_len) \
|
|
1978 do { \
|
|
1979 const Char_ASCII *ei6 = (c_string); \
|
|
1980 int ei6len = (c_len); \
|
|
1981 \
|
|
1982 EI_ASSERT_ASCII (ei6, ei6len); \
|
|
1983 eicpy_ext_len (ei, ei6, ei6len, Qbinary); \
|
|
1984 } while (0)
|
|
1985
|
|
1986 #define eicpy_ext_len(ei, extdata, extlen, coding_system) \
|
|
1987 do { \
|
|
1988 const Extbyte *ei7 = (extdata); \
|
|
1989 int ei7len = (extlen); \
|
|
1990 \
|
|
1991 TO_INTERNAL_FORMAT (DATA, (ei7, ei7len), \
|
|
1992 ALLOCA, ((ei)->data_, (ei)->bytelen_), \
|
|
1993 coding_system); \
|
|
1994 (ei)->max_size_allocated_ = (ei)->bytelen_ + 1; \
|
|
1995 (ei)->charlen_ = bytecount_to_charcount ((ei)->data_, (ei)->bytelen_); \
|
|
1996 } while (0)
|
|
1997
|
|
1998 #define eicpy_ext(ei, extdata, coding_system) \
|
|
1999 do { \
|
|
2000 const Extbyte *ei8 = (extdata); \
|
|
2001 \
|
|
2002 eicpy_ext_len (ei, ei8, dfc_external_data_len (ei8, coding_system), \
|
|
2003 coding_system); \
|
|
2004 } while (0)
|
|
2005
|
|
2006 #define eicpy_lbuf(eistr, lisp_buf, off, charoff, len, charlen) \
|
|
2007 NOT YET IMPLEMENTED
|
|
2008
|
|
2009 #define eicpy_lstream(eistr, lstream) \
|
|
2010 NOT YET IMPLEMENTED
|
|
2011
|
|
2012 #define eireset(eistr) eicpy_rawz (eistr, (Intbyte *) "")
|
|
2013
|
|
2014 /* ----- Getting the data out of the Eistring ----- */
|
|
2015
|
|
2016 #define eidata(ei) ((ei)->data_)
|
|
2017
|
|
2018 #define eimake_string(ei) make_string (eidata (ei), eilen (ei))
|
|
2019
|
|
2020 #define eimake_string_off(eistr, off, charoff, len, charlen) \
|
|
2021 do { \
|
|
2022 Lisp_Object ei24lstr; \
|
|
2023 int ei24off = (off); \
|
|
2024 int ei24charoff = (charoff); \
|
|
2025 int ei24len = (len); \
|
|
2026 int ei24charlen = (charlen); \
|
|
2027 \
|
|
2028 eifixup_byte ((eistr)->data_, ei24off, ei24charoff); \
|
|
2029 eifixup_byte ((eistr)->data_ + ei24off, ei24len, ei24charlen); \
|
|
2030 \
|
|
2031 return make_string ((eistr)->data_ + ei24off, ei24len); \
|
|
2032 } while (0)
|
|
2033
|
|
2034 #define eicpyout_alloca(eistr, ptrout, lenout) \
|
826
|
2035 eicpyout_alloca_fmt (eistr, ptrout, lenout, FORMAT_DEFAULT, Qnil)
|
771
|
2036 #define eicpyout_malloc(eistr, lenout) \
|
826
|
2037 eicpyout_malloc_fmt (eistr, lenout, FORMAT_DEFAULT, Qnil)
|
771
|
2038 Intbyte *eicpyout_malloc_fmt (Eistring *eistr, Bytecount *len_out,
|
826
|
2039 Internal_Format fmt, Lisp_Object object);
|
|
2040 #define eicpyout_alloca_fmt(eistr, ptrout, lenout, fmt, object) \
|
771
|
2041 do { \
|
|
2042 Internal_Format ei23fmt = (fmt); \
|
|
2043 Intbyte *ei23ptrout = &(ptrout); \
|
|
2044 Bytecount *ei23lenout = &(lenout); \
|
|
2045 \
|
|
2046 assert (ei23fmt == FORMAT_DEFAULT); \
|
|
2047 \
|
|
2048 *ei23lenout = (eistr)->bytelen_; \
|
|
2049 *ei23ptrout = alloca_array (Intbyte, (eistr)->bytelen_ + 1); \
|
|
2050 memcpy (*ei23ptrout, (eistr)->data_, (eistr)->bytelen_ + 1); \
|
|
2051 } while (0)
|
|
2052
|
|
2053 /* ----- Moving to the heap ----- */
|
|
2054
|
|
2055 #define eifree(ei) \
|
|
2056 do { \
|
|
2057 if ((ei)->mallocp_) \
|
|
2058 { \
|
|
2059 if ((ei)->data_) \
|
|
2060 xfree ((ei)->data_); \
|
|
2061 if ((ei)->extdata_) \
|
|
2062 xfree ((ei)->extdata_); \
|
|
2063 eiinit_malloc (ei); \
|
|
2064 } \
|
|
2065 else \
|
|
2066 eiinit (ei); \
|
|
2067 } while (0)
|
|
2068
|
|
2069 int eifind_large_enough_buffer (int oldbufsize, int needed_size);
|
|
2070 void eito_malloc_1 (Eistring *ei);
|
|
2071
|
|
2072 #define eito_malloc(ei) eito_malloc_1 (ei)
|
|
2073
|
|
2074 #define eito_alloca(ei) \
|
|
2075 do { \
|
|
2076 if (!(ei)->mallocp_) \
|
|
2077 return; \
|
|
2078 (ei)->mallocp_ = 0; \
|
|
2079 if ((ei)->data_) \
|
|
2080 { \
|
|
2081 Intbyte *ei13newdata; \
|
|
2082 \
|
|
2083 (ei)->max_size_allocated_ = \
|
|
2084 eifind_large_enough_buffer (0, (ei)->bytelen_ + 1); \
|
|
2085 ei13newdata = (Intbyte *) alloca ((ei)->max_size_allocated_); \
|
|
2086 memcpy (ei13newdata, (ei)->data_, (ei)->bytelen_ + 1); \
|
|
2087 xfree ((ei)->data_); \
|
|
2088 (ei)->data_ = ei13newdata; \
|
|
2089 } \
|
|
2090 \
|
|
2091 if ((ei)->extdata_) \
|
|
2092 { \
|
|
2093 Extbyte *ei13newdata = (Extbyte *) alloca ((ei)->extlen_ + 2); \
|
|
2094 \
|
|
2095 memcpy (ei13newdata, (ei)->extdata_, (ei)->extlen_); \
|
|
2096 /* Double null-terminate in case of Unicode data */ \
|
|
2097 ei13newdata[(ei)->extlen_] = '\0'; \
|
|
2098 ei13newdata[(ei)->extlen_ + 1] = '\0'; \
|
|
2099 xfree ((ei)->extdata_); \
|
|
2100 (ei)->extdata_ = ei13newdata; \
|
|
2101 } \
|
|
2102 } while (0)
|
|
2103
|
|
2104
|
|
2105 /* ----- Retrieving the length ----- */
|
|
2106
|
|
2107 #define eilen(ei) ((ei)->bytelen_)
|
|
2108 #define eicharlen(ei) ((ei)->charlen_)
|
|
2109
|
|
2110
|
|
2111 /* ----- Working with positions ----- */
|
|
2112
|
|
2113 #define eicharpos_to_bytepos(ei, charpos) \
|
|
2114 charcount_to_bytecount ((ei)->data_, charpos)
|
|
2115 #define eibytepos_to_charpos(ei, bytepos) \
|
|
2116 bytecount_to_charcount ((ei)->data_, bytepos)
|
|
2117
|
|
2118 DECLARE_INLINE_HEADER (Bytecount eiincpos_1 (Eistring *eistr,
|
|
2119 Bytecount bytepos,
|
|
2120 Charcount n))
|
|
2121 {
|
|
2122 Intbyte *pos = eistr->data_ + bytepos;
|
814
|
2123 Charcount i;
|
771
|
2124
|
800
|
2125 text_checking_assert (bytepos >= 0 && bytepos <= eistr->bytelen_);
|
|
2126 text_checking_assert (n >= 0 && n <= eistr->charlen_);
|
771
|
2127 /* We could check N more correctly now, but that would require a
|
|
2128 call to bytecount_to_charcount(), which would be needlessly
|
|
2129 expensive (it would convert O(N) algorithms into O(N^2) algorithms
|
800
|
2130 with ERROR_CHECK_TEXT, which would be bad). If N is bad, we are
|
771
|
2131 guaranteed to catch it either inside INC_CHARPTR() or in the check
|
|
2132 below. */
|
|
2133 for (i = 0; i < n; i++)
|
|
2134 INC_CHARPTR (pos);
|
800
|
2135 text_checking_assert (pos - eistr->data_ <= eistr->bytelen_);
|
771
|
2136 return pos - eistr->data_;
|
|
2137 }
|
|
2138
|
|
2139 #define eiincpos (ei, bytepos) eiincpos_1 (ei, bytepos, 1)
|
|
2140 #define eiincpos_n (ei, bytepos, n) eiincpos_1 (ei, bytepos, n)
|
|
2141
|
|
2142 DECLARE_INLINE_HEADER (Bytecount eidecpos_1 (Eistring *eistr,
|
|
2143 Bytecount bytepos,
|
|
2144 Charcount n))
|
|
2145 {
|
|
2146 Intbyte *pos = eistr->data_ + bytepos;
|
|
2147 int i;
|
|
2148
|
800
|
2149 text_checking_assert (bytepos >= 0 && bytepos <= eistr->bytelen_);
|
|
2150 text_checking_assert (n >= 0 && n <= eistr->charlen_);
|
771
|
2151 /* We could check N more correctly now, but ... see above. */
|
|
2152 for (i = 0; i < n; i++)
|
|
2153 DEC_CHARPTR (pos);
|
800
|
2154 text_checking_assert (pos - eistr->data_ <= eistr->bytelen_);
|
771
|
2155 return pos - eistr->data_;
|
|
2156 }
|
|
2157
|
|
2158 #define eidecpos (ei, bytepos) eidecpos_1 (ei, bytepos, 1)
|
|
2159 #define eidecpos_n (ei, bytepos, n) eidecpos_1 (ei, bytepos, n)
|
|
2160
|
|
2161
|
|
2162 /* ----- Getting the character at a position ----- */
|
|
2163
|
|
2164 #define eigetch(ei, bytepos) \
|
|
2165 charptr_emchar ((ei)->data_ + (bytepos))
|
|
2166 #define eigetch_char(ei, charpos) charptr_emchar_n ((ei)->data_, charpos)
|
|
2167
|
|
2168
|
|
2169 /* ----- Setting the character at a position ----- */
|
|
2170
|
|
2171 #define eisetch(ei, bytepos, chr) \
|
|
2172 eisub_ch (ei, bytepos, -1, -1, 1, chr)
|
|
2173 #define eisetch_char(ei, charpos, chr) \
|
|
2174 eisub_ch (ei, -1, charpos, -1, 1, chr)
|
|
2175
|
|
2176
|
|
2177 /* ----- Concatenation ----- */
|
|
2178
|
|
2179 #define eicat_1(ei, data, bytelen, charlen) \
|
|
2180 do { \
|
|
2181 int ei14oldeibytelen = (ei)->bytelen_; \
|
|
2182 int ei14bytelen = (bytelen); \
|
|
2183 EI_ALLOC (ei, (ei)->bytelen_ + ei14bytelen, \
|
|
2184 (ei)->charlen_ + (charlen), 1); \
|
|
2185 memcpy ((ei)->data_ + ei14oldeibytelen, (data), \
|
|
2186 ei14bytelen); \
|
|
2187 } while (0)
|
|
2188
|
|
2189 #define eicat_ei(ei, ei2) \
|
|
2190 do { \
|
|
2191 const Eistring *ei9 = (ei2); \
|
|
2192 eicat_1 (ei, ei9->data_, ei9->bytelen_, ei9->charlen_); \
|
|
2193 } while (0)
|
|
2194
|
|
2195 #define eicat_c(ei, c_string) \
|
|
2196 do { \
|
|
2197 const Char_ASCII *ei15 = (c_string); \
|
|
2198 int ei15len = strlen (ei15); \
|
|
2199 \
|
|
2200 EI_ASSERT_ASCII (ei15, ei15len); \
|
|
2201 eicat_1 (ei, ei15, ei15len, \
|
|
2202 bytecount_to_charcount ((Intbyte *) ei15, ei15len)); \
|
|
2203 } while (0)
|
|
2204
|
|
2205 #define eicat_raw(ei, data, len) \
|
|
2206 do { \
|
|
2207 int ei16len = (len); \
|
|
2208 const Intbyte *ei16data = (data); \
|
|
2209 eicat_1 (ei, ei16data, ei16len, \
|
|
2210 bytecount_to_charcount (ei16data, ei16len)); \
|
|
2211 } while (0)
|
|
2212
|
|
2213 #define eicat_rawz(ei, ptr) \
|
|
2214 do { \
|
|
2215 const Intbyte *ei16p5ptr = (ptr); \
|
|
2216 eicat_raw (ei, ei16p5ptr, qxestrlen (ei16p5ptr)); \
|
|
2217 } while (0)
|
|
2218
|
|
2219 #define eicat_lstr(ei, lisp_string) \
|
|
2220 do { \
|
|
2221 Lisp_Object ei17 = (lisp_string); \
|
|
2222 eicat_1 (ei, XSTRING_DATA (ei17), XSTRING_LENGTH (ei17), \
|
826
|
2223 string_char_length (ei17)); \
|
771
|
2224 } while (0)
|
|
2225
|
|
2226 #define eicat_ch(ei, ch) \
|
|
2227 do { \
|
|
2228 Intbyte ei22ch[MAX_EMCHAR_LEN]; \
|
|
2229 Bytecount ei22len = set_charptr_emchar (ei22ch, ch); \
|
|
2230 eicat_1 (ei, ei22ch, ei22len, 1); \
|
|
2231 } while (0)
|
|
2232
|
|
2233
|
|
2234 /* ----- Replacement ----- */
|
|
2235
|
|
2236 /* Replace the section of an Eistring at (OFF, LEN) with the data at
|
|
2237 SRC of length LEN. All positions have corresponding character values,
|
|
2238 and either can be -1 -- it will be computed from the other. */
|
|
2239
|
|
2240 #define eisub_1(ei, off, charoff, len, charlen, src, srclen, srccharlen) \
|
|
2241 do { \
|
|
2242 int ei18off = (off); \
|
|
2243 int ei18charoff = (charoff); \
|
|
2244 int ei18len = (len); \
|
|
2245 int ei18charlen = (charlen); \
|
|
2246 Intbyte *ei18src = (Intbyte *) (src); \
|
|
2247 int ei18srclen = (srclen); \
|
|
2248 int ei18srccharlen = (srccharlen); \
|
|
2249 \
|
|
2250 int ei18oldeibytelen = (ei)->bytelen_; \
|
|
2251 \
|
|
2252 eifixup_bytechar ((ei)->data_, ei18off, ei18charoff); \
|
|
2253 eifixup_bytechar ((ei)->data_ + ei18off, ei18len, ei18charlen); \
|
|
2254 eifixup_bytechar (ei18src, ei18srclen, ei18srccharlen); \
|
|
2255 \
|
|
2256 EI_ALLOC (ei, (ei)->bytelen_ + ei18srclen - ei18len, \
|
|
2257 (ei)->charlen_ + ei18srccharlen - ei18charlen, 0); \
|
|
2258 if (ei18len != ei18srclen) \
|
|
2259 memmove ((ei)->data_ + ei18off + ei18srclen, \
|
|
2260 (ei)->data_ + ei18off + ei18len, \
|
|
2261 /* include zero terminator. */ \
|
|
2262 ei18oldeibytelen - (ei18off + ei18len) + 1); \
|
|
2263 if (ei18srclen > 0) \
|
|
2264 memcpy ((ei)->data_ + ei18off, ei18src, ei18srclen); \
|
|
2265 } while (0)
|
|
2266
|
|
2267 #define eisub_ei(ei, off, charoff, len, charlen, ei2) \
|
|
2268 do { \
|
|
2269 const Eistring *ei19 = (ei2); \
|
|
2270 eisub_1 (ei, off, charoff, len, charlen, ei19->data_, ei19->bytelen_, \
|
|
2271 ei19->charlen_); \
|
|
2272 } while (0)
|
|
2273
|
|
2274 #define eisub_c(ei, off, charoff, len, charlen, c_string) \
|
|
2275 do { \
|
|
2276 const Char_ASCII *ei20 = (c_string); \
|
|
2277 int ei20len = strlen (ei20); \
|
|
2278 EI_ASSERT_ASCII (ei20, ei20len); \
|
|
2279 eisub_1 (ei, off, charoff, len, charlen, ei20, ei20len, -1); \
|
|
2280 } while (0)
|
|
2281
|
|
2282 #define eisub_ch(ei, off, charoff, len, charlen, ch) \
|
|
2283 do { \
|
|
2284 Intbyte ei21ch[MAX_EMCHAR_LEN]; \
|
|
2285 Bytecount ei21len = set_charptr_emchar (ei21ch, ch); \
|
|
2286 eisub_1 (ei, off, charoff, len, charlen, ei21ch, ei21len, 1); \
|
|
2287 } while (0)
|
|
2288
|
|
2289 #define eidel(ei, off, charoff, len, charlen) \
|
|
2290 eisub_1(ei, off, charoff, len, charlen, NULL, 0, 0)
|
|
2291
|
|
2292
|
|
2293 /* ----- Converting to an external format ----- */
|
|
2294
|
|
2295 #define eito_external(ei, coding_system) \
|
|
2296 do { \
|
|
2297 if ((ei)->mallocp_) \
|
|
2298 { \
|
|
2299 if ((ei)->extdata_) \
|
|
2300 { \
|
|
2301 xfree ((ei)->extdata_); \
|
|
2302 (ei)->extdata_ = 0; \
|
|
2303 } \
|
|
2304 TO_EXTERNAL_FORMAT (DATA, ((ei)->data_, (ei)->bytelen_), \
|
|
2305 MALLOC, ((ei)->extdata_, (ei)->extlen_), \
|
|
2306 coding_system); \
|
|
2307 } \
|
|
2308 else \
|
|
2309 TO_EXTERNAL_FORMAT (DATA, ((ei)->data_, (ei)->bytelen_), \
|
|
2310 ALLOCA, ((ei)->extdata_, (ei)->extlen_), \
|
|
2311 coding_system); \
|
|
2312 } while (0)
|
|
2313
|
|
2314 #define eiextdata(ei) ((ei)->extdata_)
|
|
2315 #define eiextlen(ei) ((ei)->extlen_)
|
|
2316
|
|
2317
|
|
2318 /* ----- Searching in the Eistring for a character ----- */
|
|
2319
|
|
2320 #define eichr(eistr, chr) \
|
|
2321 NOT YET IMPLEMENTED
|
|
2322 #define eichr_char(eistr, chr) \
|
|
2323 NOT YET IMPLEMENTED
|
|
2324 #define eichr_off(eistr, chr, off, charoff) \
|
|
2325 NOT YET IMPLEMENTED
|
|
2326 #define eichr_off_char(eistr, chr, off, charoff) \
|
|
2327 NOT YET IMPLEMENTED
|
|
2328 #define eirchr(eistr, chr) \
|
|
2329 NOT YET IMPLEMENTED
|
|
2330 #define eirchr_char(eistr, chr) \
|
|
2331 NOT YET IMPLEMENTED
|
|
2332 #define eirchr_off(eistr, chr, off, charoff) \
|
|
2333 NOT YET IMPLEMENTED
|
|
2334 #define eirchr_off_char(eistr, chr, off, charoff) \
|
|
2335 NOT YET IMPLEMENTED
|
|
2336
|
|
2337
|
|
2338 /* ----- Searching in the Eistring for a string ----- */
|
|
2339
|
|
2340 #define eistr_ei(eistr, eistr2) \
|
|
2341 NOT YET IMPLEMENTED
|
|
2342 #define eistr_ei_char(eistr, eistr2) \
|
|
2343 NOT YET IMPLEMENTED
|
|
2344 #define eistr_ei_off(eistr, eistr2, off, charoff) \
|
|
2345 NOT YET IMPLEMENTED
|
|
2346 #define eistr_ei_off_char(eistr, eistr2, off, charoff) \
|
|
2347 NOT YET IMPLEMENTED
|
|
2348 #define eirstr_ei(eistr, eistr2) \
|
|
2349 NOT YET IMPLEMENTED
|
|
2350 #define eirstr_ei_char(eistr, eistr2) \
|
|
2351 NOT YET IMPLEMENTED
|
|
2352 #define eirstr_ei_off(eistr, eistr2, off, charoff) \
|
|
2353 NOT YET IMPLEMENTED
|
|
2354 #define eirstr_ei_off_char(eistr, eistr2, off, charoff) \
|
|
2355 NOT YET IMPLEMENTED
|
|
2356
|
|
2357 #define eistr_c(eistr, c_string) \
|
|
2358 NOT YET IMPLEMENTED
|
|
2359 #define eistr_c_char(eistr, c_string) \
|
|
2360 NOT YET IMPLEMENTED
|
|
2361 #define eistr_c_off(eistr, c_string, off, charoff) \
|
|
2362 NOT YET IMPLEMENTED
|
|
2363 #define eistr_c_off_char(eistr, c_string, off, charoff) \
|
|
2364 NOT YET IMPLEMENTED
|
|
2365 #define eirstr_c(eistr, c_string) \
|
|
2366 NOT YET IMPLEMENTED
|
|
2367 #define eirstr_c_char(eistr, c_string) \
|
|
2368 NOT YET IMPLEMENTED
|
|
2369 #define eirstr_c_off(eistr, c_string, off, charoff) \
|
|
2370 NOT YET IMPLEMENTED
|
|
2371 #define eirstr_c_off_char(eistr, c_string, off, charoff) \
|
|
2372 NOT YET IMPLEMENTED
|
|
2373
|
|
2374
|
|
2375 /* ----- Comparison ----- */
|
|
2376
|
|
2377 int eicmp_1 (Eistring *ei, Bytecount off, Charcount charoff,
|
|
2378 Bytecount len, Charcount charlen, const Intbyte *data,
|
|
2379 const Eistring *ei2, int is_c, int fold_case);
|
|
2380
|
|
2381 #define eicmp_ei(eistr, eistr2) \
|
|
2382 eicmp_1 (eistr, 0, -1, -1, -1, 0, eistr2, 0, 0)
|
|
2383 #define eicmp_off_ei(eistr, off, charoff, len, charlen, eistr2) \
|
|
2384 eicmp_1 (eistr, off, charoff, len, charlen, 0, eistr2, 0, 0)
|
|
2385 #define eicasecmp_ei(eistr, eistr2) \
|
|
2386 eicmp_1 (eistr, 0, -1, -1, -1, 0, eistr2, 0, 1)
|
|
2387 #define eicasecmp_off_ei(eistr, off, charoff, len, charlen, eistr2) \
|
|
2388 eicmp_1 (eistr, off, charoff, len, charlen, 0, eistr2, 0, 1)
|
|
2389 #define eicasecmp_i18n_ei(eistr, eistr2) \
|
|
2390 eicmp_1 (eistr, 0, -1, -1, -1, 0, eistr2, 0, 2)
|
|
2391 #define eicasecmp_i18n_off_ei(eistr, off, charoff, len, charlen, eistr2) \
|
|
2392 eicmp_1 (eistr, off, charoff, len, charlen, 0, eistr2, 0, 2)
|
|
2393
|
|
2394 #define eicmp_c(eistr, c_string) \
|
|
2395 eicmp_1 (eistr, 0, -1, -1, -1, c_string, 0, 1, 0)
|
|
2396 #define eicmp_off_c(eistr, off, charoff, len, charlen, c_string) \
|
|
2397 eicmp_1 (eistr, off, charoff, len, charlen, c_string, 0, 1, 0)
|
|
2398 #define eicasecmp_c(eistr, c_string) \
|
|
2399 eicmp_1 (eistr, 0, -1, -1, -1, c_string, 0, 1, 1)
|
|
2400 #define eicasecmp_off_c(eistr, off, charoff, len, charlen, c_string) \
|
|
2401 eicmp_1 (eistr, off, charoff, len, charlen, c_string, 0, 1, 1)
|
|
2402 #define eicasecmp_i18n_c(eistr, c_string) \
|
|
2403 eicmp_1 (eistr, 0, -1, -1, -1, c_string, 0, 1, 2)
|
|
2404 #define eicasecmp_i18n_off_c(eistr, off, charoff, len, charlen, c_string) \
|
|
2405 eicmp_1 (eistr, off, charoff, len, charlen, c_string, 0, 1, 2)
|
|
2406
|
|
2407
|
|
2408 /* ----- Case-changing the Eistring ----- */
|
|
2409
|
|
2410 int eistr_casefiddle_1 (Intbyte *olddata, Bytecount len, Intbyte *newdata,
|
|
2411 int downp);
|
|
2412
|
|
2413 #define EI_CASECHANGE(ei, downp) \
|
|
2414 do { \
|
|
2415 int ei11new_allocmax = (ei)->charlen_ * MAX_EMCHAR_LEN + 1; \
|
|
2416 Intbyte *ei11storage = (Intbyte *) alloca_array (Intbyte, \
|
|
2417 ei11new_allocmax); \
|
|
2418 int ei11newlen = eistr_casefiddle_1 ((ei)->data_, (ei)->bytelen_, \
|
|
2419 ei11storage, downp); \
|
|
2420 \
|
|
2421 if (ei11newlen) \
|
|
2422 { \
|
|
2423 (ei)->max_size_allocated_ = ei11new_allocmax; \
|
|
2424 (ei)->data_ = ei11storage; \
|
|
2425 (ei)->bytelen_ = ei11newlen; \
|
|
2426 /* charlen is the same. */ \
|
|
2427 } \
|
|
2428 } while (0)
|
|
2429
|
|
2430 #define eilwr(ei) EI_CASECHANGE (ei, 1)
|
|
2431 #define eiupr(ei) EI_CASECHANGE (ei, 0)
|
|
2432
|
|
2433
|
|
2434 /************************************************************************/
|
|
2435 /* */
|
|
2436 /* Converting between internal and external format */
|
|
2437 /* */
|
|
2438 /************************************************************************/
|
|
2439 /*
|
|
2440 All client code should use only the two macros
|
|
2441
|
|
2442 TO_EXTERNAL_FORMAT (source_type, source, sink_type, sink, coding_system)
|
|
2443 TO_INTERNAL_FORMAT (source_type, source, sink_type, sink, coding_system)
|
|
2444
|
|
2445 Typical use is
|
|
2446
|
|
2447 TO_EXTERNAL_FORMAT (DATA, (ptr, len),
|
|
2448 LISP_BUFFER, buffer,
|
|
2449 Qfile_name);
|
|
2450
|
|
2451 NOTE: GC is inhibited during the entire operation of these macros. This
|
|
2452 is because frequently the data to be converted comes from strings but
|
|
2453 gets passed in as just DATA, and GC may move around the string data. If
|
|
2454 we didn't inhibit GC, there'd have to be a lot of messy recoding,
|
|
2455 alloca-copying of strings and other annoying stuff.
|
|
2456
|
|
2457 The source or sink can be specified in one of these ways:
|
|
2458
|
|
2459 DATA, (ptr, len), // input data is a fixed buffer of size len
|
|
2460 ALLOCA, (ptr, len), // output data is in a alloca()ed buffer of size len
|
|
2461 MALLOC, (ptr, len), // output data is in a malloc()ed buffer of size len
|
|
2462 C_STRING_ALLOCA, ptr, // equivalent to ALLOCA (ptr, len_ignored) on output
|
|
2463 C_STRING_MALLOC, ptr, // equivalent to MALLOC (ptr, len_ignored) on output
|
|
2464 C_STRING, ptr, // equivalent to DATA, (ptr, strlen/wcslen (ptr))
|
|
2465 // on input (the Unicode version is used when correct)
|
|
2466 LISP_STRING, string, // input or output is a Lisp_Object of type string
|
|
2467 LISP_BUFFER, buffer, // output is written to (point) in lisp buffer
|
|
2468 LISP_LSTREAM, lstream, // input or output is a Lisp_Object of type lstream
|
|
2469 LISP_OPAQUE, object, // input or output is a Lisp_Object of type opaque
|
|
2470
|
|
2471 When specifying the sink, use lvalues, since the macro will assign to them,
|
|
2472 except when the sink is an lstream or a lisp buffer.
|
|
2473
|
|
2474 The macros accept the kinds of sources and sinks appropriate for
|
|
2475 internal and external data representation. See the type_checking_assert
|
|
2476 macros below for the actual allowed types.
|
|
2477
|
|
2478 Since some sources and sinks use one argument (a Lisp_Object) to
|
|
2479 specify them, while others take a (pointer, length) pair, we use
|
|
2480 some C preprocessor trickery to allow pair arguments to be specified
|
|
2481 by parenthesizing them, as in the examples above.
|
|
2482
|
|
2483 Anything prefixed by dfc_ (`data format conversion') is private.
|
|
2484 They are only used to implement these macros.
|
|
2485
|
|
2486 [[Using C_STRING* is appropriate for using with external APIs that
|
|
2487 take null-terminated strings. For internal data, we should try to
|
|
2488 be '\0'-clean - i.e. allow arbitrary data to contain embedded '\0'.
|
|
2489
|
|
2490 Sometime in the future we might allow output to C_STRING_ALLOCA or
|
|
2491 C_STRING_MALLOC _only_ with TO_EXTERNAL_FORMAT(), not
|
|
2492 TO_INTERNAL_FORMAT().]]
|
|
2493
|
|
2494 The above comments are not true. Frequently (most of the time, in
|
|
2495 fact), external strings come as zero-terminated entities, where the
|
|
2496 zero-termination is the only way to find out the length. Even in
|
|
2497 cases where you can get the length, most of the time the system will
|
|
2498 still use the null to signal the end of the string, and there will
|
|
2499 still be no way to either send in or receive a string with embedded
|
|
2500 nulls. In such situations, it's pointless to track the length
|
|
2501 because null bytes can never be in the string. We have a lot of
|
|
2502 operations that make it easy to operate on zero-terminated strings,
|
|
2503 and forcing the user the deal with the length everywhere would only
|
|
2504 make the code uglier and more complicated, for no gain. --ben
|
|
2505
|
|
2506 There is no problem using the same lvalue for source and sink.
|
|
2507
|
|
2508 Also, when pointers are required, the code (currently at least) is
|
|
2509 lax and allows any pointer types, either in the source or the sink.
|
|
2510 This makes it possible, e.g., to deal with internal format data held
|
|
2511 in char *'s or external format data held in WCHAR * (i.e. Unicode).
|
|
2512
|
|
2513 Finally, whenever storage allocation is called for, extra space is
|
|
2514 allocated for a terminating zero, and such a zero is stored in the
|
|
2515 appropriate place, regardless of whether the source data was
|
|
2516 specified using a length or was specified as zero-terminated. This
|
|
2517 allows you to freely pass the resulting data, no matter how
|
|
2518 obtained, to a routine that expects zero termination (modulo, of
|
|
2519 course, that any embedded zeros in the resulting text will cause
|
|
2520 truncation). In fact, currently two embedded zeros are allocated
|
|
2521 and stored after the data result. This is to allow for the
|
|
2522 possibility of storing a Unicode value on output, which needs the
|
|
2523 two zeros. Currently, however, the two zeros are stored regardless
|
|
2524 of whether the conversion is internal or external and regardless of
|
|
2525 whether the external coding system is in fact Unicode. This
|
|
2526 behavior may change in the future, and you cannot rely on this --
|
|
2527 the most you can rely on is that sink data in Unicode format will
|
|
2528 have two terminating nulls, which combine to form one Unicode null
|
|
2529 character. */
|
|
2530
|
|
2531 #define TO_EXTERNAL_FORMAT(source_type, source, sink_type, sink, codesys) \
|
|
2532 do { \
|
|
2533 dfc_conversion_type dfc_simplified_source_type; \
|
|
2534 dfc_conversion_type dfc_simplified_sink_type; \
|
|
2535 dfc_conversion_data dfc_source; \
|
|
2536 dfc_conversion_data dfc_sink; \
|
|
2537 Lisp_Object dfc_codesys = (codesys); \
|
|
2538 \
|
|
2539 type_checking_assert \
|
|
2540 ((DFC_TYPE_##source_type == DFC_TYPE_DATA || \
|
|
2541 DFC_TYPE_##source_type == DFC_TYPE_C_STRING || \
|
|
2542 DFC_TYPE_##source_type == DFC_TYPE_LISP_STRING || \
|
|
2543 DFC_TYPE_##source_type == DFC_TYPE_LISP_OPAQUE || \
|
|
2544 DFC_TYPE_##source_type == DFC_TYPE_LISP_LSTREAM) \
|
|
2545 && \
|
|
2546 (DFC_TYPE_##sink_type == DFC_TYPE_ALLOCA || \
|
|
2547 DFC_TYPE_##sink_type == DFC_TYPE_MALLOC || \
|
|
2548 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_ALLOCA || \
|
|
2549 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_MALLOC || \
|
|
2550 DFC_TYPE_##sink_type == DFC_TYPE_LISP_LSTREAM || \
|
|
2551 DFC_TYPE_##sink_type == DFC_TYPE_LISP_OPAQUE)); \
|
|
2552 \
|
|
2553 DFC_EXT_SOURCE_##source_type##_TO_ARGS (source, dfc_codesys); \
|
|
2554 DFC_SINK_##sink_type##_TO_ARGS (sink); \
|
|
2555 \
|
|
2556 dfc_convert_to_external_format (dfc_simplified_source_type, &dfc_source, \
|
|
2557 dfc_codesys, \
|
|
2558 dfc_simplified_sink_type, &dfc_sink); \
|
|
2559 \
|
|
2560 DFC_##sink_type##_USE_CONVERTED_DATA (sink); \
|
|
2561 } while (0)
|
|
2562
|
|
2563 #define TO_INTERNAL_FORMAT(source_type, source, sink_type, sink, codesys) \
|
|
2564 do { \
|
|
2565 dfc_conversion_type dfc_simplified_source_type; \
|
|
2566 dfc_conversion_type dfc_simplified_sink_type; \
|
|
2567 dfc_conversion_data dfc_source; \
|
|
2568 dfc_conversion_data dfc_sink; \
|
|
2569 Lisp_Object dfc_codesys = (codesys); \
|
|
2570 \
|
|
2571 type_checking_assert \
|
|
2572 ((DFC_TYPE_##source_type == DFC_TYPE_DATA || \
|
|
2573 DFC_TYPE_##source_type == DFC_TYPE_C_STRING || \
|
|
2574 DFC_TYPE_##source_type == DFC_TYPE_LISP_OPAQUE || \
|
|
2575 DFC_TYPE_##source_type == DFC_TYPE_LISP_LSTREAM) \
|
|
2576 && \
|
|
2577 (DFC_TYPE_##sink_type == DFC_TYPE_ALLOCA || \
|
|
2578 DFC_TYPE_##sink_type == DFC_TYPE_MALLOC || \
|
|
2579 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_ALLOCA || \
|
|
2580 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_MALLOC || \
|
|
2581 DFC_TYPE_##sink_type == DFC_TYPE_LISP_STRING || \
|
|
2582 DFC_TYPE_##sink_type == DFC_TYPE_LISP_LSTREAM || \
|
|
2583 DFC_TYPE_##sink_type == DFC_TYPE_LISP_BUFFER)); \
|
|
2584 \
|
|
2585 DFC_INT_SOURCE_##source_type##_TO_ARGS (source, dfc_codesys); \
|
|
2586 DFC_SINK_##sink_type##_TO_ARGS (sink); \
|
|
2587 \
|
|
2588 dfc_convert_to_internal_format (dfc_simplified_source_type, &dfc_source, \
|
|
2589 dfc_codesys, \
|
|
2590 dfc_simplified_sink_type, &dfc_sink); \
|
|
2591 \
|
|
2592 DFC_##sink_type##_USE_CONVERTED_DATA (sink); \
|
|
2593 } while (0)
|
|
2594
|
814
|
2595 #ifdef __cplusplus
|
771
|
2596
|
814
|
2597 /* Error if you try to use a union here: "member `struct {anonymous
|
|
2598 union}::{anonymous} {anonymous union}::data' with constructor not allowed
|
|
2599 in union" (Bytecount is a class) */
|
|
2600
|
|
2601 typedef struct
|
|
2602 #else
|
771
|
2603 typedef union
|
814
|
2604 #endif
|
771
|
2605 {
|
|
2606 struct { const void *ptr; Bytecount len; } data;
|
|
2607 Lisp_Object lisp_object;
|
|
2608 } dfc_conversion_data;
|
|
2609
|
|
2610 enum dfc_conversion_type
|
|
2611 {
|
|
2612 DFC_TYPE_DATA,
|
|
2613 DFC_TYPE_ALLOCA,
|
|
2614 DFC_TYPE_MALLOC,
|
|
2615 DFC_TYPE_C_STRING,
|
|
2616 DFC_TYPE_C_STRING_ALLOCA,
|
|
2617 DFC_TYPE_C_STRING_MALLOC,
|
|
2618 DFC_TYPE_LISP_STRING,
|
|
2619 DFC_TYPE_LISP_LSTREAM,
|
|
2620 DFC_TYPE_LISP_OPAQUE,
|
|
2621 DFC_TYPE_LISP_BUFFER
|
|
2622 };
|
|
2623 typedef enum dfc_conversion_type dfc_conversion_type;
|
|
2624
|
|
2625 /* WARNING: These use a static buffer. This can lead to disaster if
|
|
2626 these functions are not used *very* carefully. Another reason to only use
|
|
2627 TO_EXTERNAL_FORMAT() and TO_INTERNAL_FORMAT(). */
|
|
2628 void
|
|
2629 dfc_convert_to_external_format (dfc_conversion_type source_type,
|
|
2630 dfc_conversion_data *source,
|
|
2631 Lisp_Object coding_system,
|
|
2632 dfc_conversion_type sink_type,
|
|
2633 dfc_conversion_data *sink);
|
|
2634 void
|
|
2635 dfc_convert_to_internal_format (dfc_conversion_type source_type,
|
|
2636 dfc_conversion_data *source,
|
|
2637 Lisp_Object coding_system,
|
|
2638 dfc_conversion_type sink_type,
|
|
2639 dfc_conversion_data *sink);
|
|
2640 /* CPP Trickery */
|
|
2641 #define DFC_CPP_CAR(x,y) (x)
|
|
2642 #define DFC_CPP_CDR(x,y) (y)
|
|
2643
|
|
2644 /* Convert `source' to args for dfc_convert_to_external_format() */
|
|
2645 #define DFC_EXT_SOURCE_DATA_TO_ARGS(val, codesys) do { \
|
|
2646 dfc_source.data.ptr = DFC_CPP_CAR val; \
|
|
2647 dfc_source.data.len = DFC_CPP_CDR val; \
|
|
2648 dfc_simplified_source_type = DFC_TYPE_DATA; \
|
|
2649 } while (0)
|
|
2650 #define DFC_EXT_SOURCE_C_STRING_TO_ARGS(val, codesys) do { \
|
|
2651 dfc_source.data.len = \
|
|
2652 strlen ((char *) (dfc_source.data.ptr = (val))); \
|
|
2653 dfc_simplified_source_type = DFC_TYPE_DATA; \
|
|
2654 } while (0)
|
|
2655 #define DFC_EXT_SOURCE_LISP_STRING_TO_ARGS(val, codesys) do { \
|
|
2656 Lisp_Object dfc_slsta = (val); \
|
|
2657 type_checking_assert (STRINGP (dfc_slsta)); \
|
|
2658 dfc_source.lisp_object = dfc_slsta; \
|
|
2659 dfc_simplified_source_type = DFC_TYPE_LISP_STRING; \
|
|
2660 } while (0)
|
|
2661 #define DFC_EXT_SOURCE_LISP_LSTREAM_TO_ARGS(val, codesys) do { \
|
|
2662 Lisp_Object dfc_sllta = (val); \
|
|
2663 type_checking_assert (LSTREAMP (dfc_sllta)); \
|
|
2664 dfc_source.lisp_object = dfc_sllta; \
|
|
2665 dfc_simplified_source_type = DFC_TYPE_LISP_LSTREAM; \
|
|
2666 } while (0)
|
|
2667 #define DFC_EXT_SOURCE_LISP_OPAQUE_TO_ARGS(val, codesys) do { \
|
|
2668 Lisp_Opaque *dfc_slota = XOPAQUE (val); \
|
|
2669 dfc_source.data.ptr = OPAQUE_DATA (dfc_slota); \
|
|
2670 dfc_source.data.len = OPAQUE_SIZE (dfc_slota); \
|
|
2671 dfc_simplified_source_type = DFC_TYPE_DATA; \
|
|
2672 } while (0)
|
|
2673
|
|
2674 /* Convert `source' to args for dfc_convert_to_internal_format() */
|
|
2675 #define DFC_INT_SOURCE_DATA_TO_ARGS(val, codesys) \
|
|
2676 DFC_EXT_SOURCE_DATA_TO_ARGS (val, codesys)
|
|
2677 #define DFC_INT_SOURCE_C_STRING_TO_ARGS(val, codesys) do { \
|
|
2678 dfc_source.data.len = dfc_external_data_len (dfc_source.data.ptr = (val), \
|
|
2679 codesys); \
|
|
2680 dfc_simplified_source_type = DFC_TYPE_DATA; \
|
|
2681 } while (0)
|
|
2682 #define DFC_INT_SOURCE_LISP_STRING_TO_ARGS(val, codesys) \
|
|
2683 DFC_EXT_SOURCE_LISP_STRING_TO_ARGS (val, codesys)
|
|
2684 #define DFC_INT_SOURCE_LISP_LSTREAM_TO_ARGS(val, codesys) \
|
|
2685 DFC_EXT_SOURCE_LISP_LSTREAM_TO_ARGS (val, codesys)
|
|
2686 #define DFC_INT_SOURCE_LISP_OPAQUE_TO_ARGS(val, codesys) \
|
|
2687 DFC_EXT_SOURCE_LISP_OPAQUE_TO_ARGS (val, codesys)
|
|
2688
|
|
2689 /* Convert `sink' to args for dfc_convert_to_*_format() */
|
|
2690 #define DFC_SINK_ALLOCA_TO_ARGS(val) \
|
|
2691 dfc_simplified_sink_type = DFC_TYPE_DATA
|
|
2692 #define DFC_SINK_C_STRING_ALLOCA_TO_ARGS(val) \
|
|
2693 dfc_simplified_sink_type = DFC_TYPE_DATA
|
|
2694 #define DFC_SINK_MALLOC_TO_ARGS(val) \
|
|
2695 dfc_simplified_sink_type = DFC_TYPE_DATA
|
|
2696 #define DFC_SINK_C_STRING_MALLOC_TO_ARGS(val) \
|
|
2697 dfc_simplified_sink_type = DFC_TYPE_DATA
|
|
2698 #define DFC_SINK_LISP_STRING_TO_ARGS(val) \
|
|
2699 dfc_simplified_sink_type = DFC_TYPE_DATA
|
|
2700 #define DFC_SINK_LISP_OPAQUE_TO_ARGS(val) \
|
|
2701 dfc_simplified_sink_type = DFC_TYPE_DATA
|
|
2702 #define DFC_SINK_LISP_LSTREAM_TO_ARGS(val) do { \
|
|
2703 Lisp_Object dfc_sllta = (val); \
|
|
2704 type_checking_assert (LSTREAMP (dfc_sllta)); \
|
|
2705 dfc_sink.lisp_object = dfc_sllta; \
|
|
2706 dfc_simplified_sink_type = DFC_TYPE_LISP_LSTREAM; \
|
|
2707 } while (0)
|
|
2708 #define DFC_SINK_LISP_BUFFER_TO_ARGS(val) do { \
|
|
2709 struct buffer *dfc_slbta = XBUFFER (val); \
|
|
2710 dfc_sink.lisp_object = \
|
|
2711 make_lisp_buffer_output_stream \
|
|
2712 (dfc_slbta, BUF_PT (dfc_slbta), 0); \
|
|
2713 dfc_simplified_sink_type = DFC_TYPE_LISP_LSTREAM; \
|
|
2714 } while (0)
|
|
2715
|
|
2716 /* Assign to the `sink' lvalue(s) using the converted data. */
|
|
2717 /* + 2 because we double zero-extended to account for Unicode conversion */
|
|
2718 typedef union { char c; void *p; } *dfc_aliasing_voidpp;
|
|
2719 #define DFC_ALLOCA_USE_CONVERTED_DATA(sink) do { \
|
|
2720 void * dfc_sink_ret = alloca (dfc_sink.data.len + 2); \
|
|
2721 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2); \
|
|
2722 ((dfc_aliasing_voidpp) &(DFC_CPP_CAR sink))->p = dfc_sink_ret; \
|
|
2723 (DFC_CPP_CDR sink) = dfc_sink.data.len; \
|
|
2724 } while (0)
|
|
2725 #define DFC_MALLOC_USE_CONVERTED_DATA(sink) do { \
|
|
2726 void * dfc_sink_ret = xmalloc (dfc_sink.data.len + 2); \
|
|
2727 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2); \
|
|
2728 ((dfc_aliasing_voidpp) &(DFC_CPP_CAR sink))->p = dfc_sink_ret; \
|
|
2729 (DFC_CPP_CDR sink) = dfc_sink.data.len; \
|
|
2730 } while (0)
|
|
2731 #define DFC_C_STRING_ALLOCA_USE_CONVERTED_DATA(sink) do { \
|
|
2732 void * dfc_sink_ret = alloca (dfc_sink.data.len + 2); \
|
|
2733 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2); \
|
|
2734 ((dfc_aliasing_voidpp) &(sink))->p = dfc_sink_ret; \
|
|
2735 } while (0)
|
|
2736 #define DFC_C_STRING_MALLOC_USE_CONVERTED_DATA(sink) do { \
|
|
2737 void * dfc_sink_ret = xmalloc (dfc_sink.data.len + 2); \
|
|
2738 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2); \
|
|
2739 ((dfc_aliasing_voidpp) &(sink))->p = dfc_sink_ret; \
|
|
2740 } while (0)
|
|
2741 #define DFC_LISP_STRING_USE_CONVERTED_DATA(sink) \
|
|
2742 sink = make_string ((Intbyte *) dfc_sink.data.ptr, dfc_sink.data.len)
|
|
2743 #define DFC_LISP_OPAQUE_USE_CONVERTED_DATA(sink) \
|
|
2744 sink = make_opaque (dfc_sink.data.ptr, dfc_sink.data.len)
|
|
2745 #define DFC_LISP_LSTREAM_USE_CONVERTED_DATA(sink) /* data already used */
|
|
2746 #define DFC_LISP_BUFFER_USE_CONVERTED_DATA(sink) \
|
|
2747 Lstream_delete (XLSTREAM (dfc_sink.lisp_object))
|
|
2748
|
|
2749 /* Convenience macros for extremely common invocations */
|
|
2750 #define C_STRING_TO_EXTERNAL(in, out, coding_system) \
|
|
2751 TO_EXTERNAL_FORMAT (C_STRING, in, C_STRING_ALLOCA, out, coding_system)
|
|
2752 #define C_STRING_TO_EXTERNAL_MALLOC(in, out, coding_system) \
|
|
2753 TO_EXTERNAL_FORMAT (C_STRING, in, C_STRING_MALLOC, out, coding_system)
|
|
2754 #define EXTERNAL_TO_C_STRING(in, out, coding_system) \
|
|
2755 TO_INTERNAL_FORMAT (C_STRING, in, C_STRING_ALLOCA, out, coding_system)
|
|
2756 #define EXTERNAL_TO_C_STRING_MALLOC(in, out, coding_system) \
|
|
2757 TO_INTERNAL_FORMAT (C_STRING, in, C_STRING_MALLOC, out, coding_system)
|
|
2758 #define LISP_STRING_TO_EXTERNAL(in, out, coding_system) \
|
|
2759 TO_EXTERNAL_FORMAT (LISP_STRING, in, C_STRING_ALLOCA, out, coding_system)
|
|
2760 #define LISP_STRING_TO_EXTERNAL_MALLOC(in, out, coding_system) \
|
|
2761 TO_EXTERNAL_FORMAT (LISP_STRING, in, C_STRING_MALLOC, out, coding_system)
|
|
2762
|
|
2763 /* Standins for various encodings, until we know them better */
|
|
2764 #define Qcommand_argument_encoding Qnative
|
|
2765 #define Qenvironment_variable_encoding Qnative
|
|
2766 #define Qunix_host_name_encoding Qnative
|
|
2767 #define Qunix_service_name_encoding Qnative
|
|
2768 #define Qmswindows_host_name_encoding Qmswindows_multibyte
|
|
2769 #define Qmswindows_service_name_encoding Qmswindows_multibyte
|
|
2770
|
|
2771 /* Standins for various X encodings, until we know them better */
|
|
2772
|
|
2773 /* !!#### Need to verify the encoding used in lwlib -- Qnative or Qctext?
|
|
2774 Almost certainly the former. Use a standin for now. */
|
|
2775 #define Qlwlib_encoding Qnative
|
|
2776
|
|
2777 #define Qx_atom_name_encoding Qctext
|
|
2778 /* font names are often stored in atoms, so it gets sticky if we set this
|
|
2779 to something different from atom-name encoding */
|
|
2780 #define Qx_font_name_encoding Qctext
|
|
2781
|
|
2782 #define Qx_color_name_encoding Qctext
|
|
2783
|
|
2784 /* the following probably must agree with Qcommand_argument_encoding and
|
|
2785 Qenvironment_variable_encoding */
|
|
2786 #define Qx_display_name_encoding Qnative
|
|
2787
|
|
2788 #define Qstrerror_encoding Qnative
|
|
2789
|
|
2790 #define GET_STRERROR(var, num) \
|
|
2791 do { \
|
|
2792 int __gsnum__ = (num); \
|
|
2793 Extbyte * __gserr__ = strerror (__gsnum__); \
|
|
2794 \
|
|
2795 if (!__gserr__) \
|
|
2796 { \
|
826
|
2797 var = alloca_intbytes (99); \
|
771
|
2798 qxesprintf (var, "Unknown error %d", __gsnum__); \
|
|
2799 } \
|
|
2800 else \
|
|
2801 EXTERNAL_TO_C_STRING (__gserr__, var, Qstrerror_encoding); \
|
|
2802 } while (0)
|
|
2803
|
|
2804 #endif /* INCLUDED_text_h_ */
|