771
|
1 /* Header file for text manipulation primitives and macros.
|
|
2 Copyright (C) 1985-1995 Free Software Foundation, Inc.
|
|
3 Copyright (C) 1995 Sun Microsystems, Inc.
|
3063
|
4 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005 Ben Wing.
|
771
|
5
|
|
6 This file is part of XEmacs.
|
|
7
|
|
8 XEmacs is free software; you can redistribute it and/or modify it
|
|
9 under the terms of the GNU General Public License as published by the
|
|
10 Free Software Foundation; either version 2, or (at your option) any
|
|
11 later version.
|
|
12
|
|
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
|
|
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
16 for more details.
|
|
17
|
|
18 You should have received a copy of the GNU General Public License
|
|
19 along with XEmacs; see the file COPYING. If not, write to
|
|
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
21 Boston, MA 02111-1307, USA. */
|
|
22
|
|
23 /* Synched up with: FSF 19.30. */
|
|
24
|
|
25 /* Authorship:
|
|
26
|
|
27 Mostly written by Ben Wing, starting around 1995.
|
|
28 Current TO_IN/EXTERNAL_FORMAT macros written by Martin Buchholz,
|
|
29 designed by Ben Wing based on earlier macros by Ben Wing.
|
|
30 Separated out June 18, 2000 from buffer.h into text.h.
|
|
31 */
|
|
32
|
|
33 #ifndef INCLUDED_text_h_
|
|
34 #define INCLUDED_text_h_
|
|
35
|
912
|
36 #ifdef HAVE_WCHAR_H
|
771
|
37 #include <wchar.h>
|
912
|
38 #else
|
1257
|
39 size_t wcslen (const wchar_t *);
|
912
|
40 #endif
|
1204
|
41 #ifndef HAVE_STRLWR
|
1257
|
42 char *strlwr (char *);
|
1204
|
43 #endif
|
|
44 #ifndef HAVE_STRUPR
|
1257
|
45 char *strupr (char *);
|
1204
|
46 #endif
|
771
|
47
|
1743
|
48 BEGIN_C_DECLS
|
1650
|
49
|
771
|
50 /* ---------------------------------------------------------------------- */
|
|
51 /* Super-basic character properties */
|
|
52 /* ---------------------------------------------------------------------- */
|
|
53
|
|
54 /* These properties define the specifics of how our current encoding fits
|
|
55 in the basic model used for the encoding. Because this model is the same
|
|
56 as is used for UTF-8, all these properties could be defined for it, too.
|
|
57 This would instantly make the rest of this file work with UTF-8 (with
|
|
58 the exception of a few called functions that would need to be redefined).
|
|
59
|
|
60 (UTF-2000 implementers, take note!)
|
|
61 */
|
|
62
|
|
63 /* If you want more than this, you need to include charset.h */
|
|
64
|
|
65 #ifndef MULE
|
|
66
|
826
|
67 #define rep_bytes_by_first_byte(fb) 1
|
|
68 #define byte_ascii_p(byte) 1
|
867
|
69 #define MAX_ICHAR_LEN 1
|
771
|
70
|
|
71 #else /* MULE */
|
|
72
|
|
73 /* These are carefully designed to work if BYTE is signed or unsigned. */
|
|
74 /* Note that SPC and DEL are considered ASCII, not control. */
|
|
75
|
826
|
76 #define byte_ascii_p(byte) (((byte) & ~0x7f) == 0)
|
|
77 #define byte_c0_p(byte) (((byte) & ~0x1f) == 0)
|
|
78 #define byte_c1_p(byte) (((byte) & ~0x1f) == 0x80)
|
771
|
79
|
|
80 /* Does BYTE represent the first byte of a character? */
|
|
81
|
826
|
82 #ifdef ERROR_CHECK_TEXT
|
|
83
|
|
84 DECLARE_INLINE_HEADER (
|
|
85 int
|
867
|
86 ibyte_first_byte_p_1 (int byte, const char *file, int line)
|
826
|
87 )
|
|
88 {
|
|
89 assert_at_line (byte >= 0 && byte < 256, file, line);
|
|
90 return byte < 0xA0;
|
|
91 }
|
|
92
|
867
|
93 #define ibyte_first_byte_p(byte) \
|
|
94 ibyte_first_byte_p_1 (byte, __FILE__, __LINE__)
|
826
|
95
|
|
96 #else
|
|
97
|
867
|
98 #define ibyte_first_byte_p(byte) ((byte) < 0xA0)
|
826
|
99
|
|
100 #endif
|
|
101
|
|
102 #ifdef ERROR_CHECK_TEXT
|
771
|
103
|
|
104 /* Does BYTE represent the first byte of a multi-byte character? */
|
|
105
|
826
|
106 DECLARE_INLINE_HEADER (
|
|
107 int
|
867
|
108 ibyte_leading_byte_p_1 (int byte, const char *file, int line)
|
826
|
109 )
|
|
110 {
|
|
111 assert_at_line (byte >= 0 && byte < 256, file, line);
|
|
112 return byte_c1_p (byte);
|
|
113 }
|
|
114
|
867
|
115 #define ibyte_leading_byte_p(byte) \
|
|
116 ibyte_leading_byte_p_1 (byte, __FILE__, __LINE__)
|
826
|
117
|
|
118 #else
|
|
119
|
867
|
120 #define ibyte_leading_byte_p(byte) byte_c1_p (byte)
|
826
|
121
|
|
122 #endif
|
771
|
123
|
|
124 /* Table of number of bytes in the string representation of a character
|
|
125 indexed by the first byte of that representation.
|
|
126
|
|
127 This value can be derived in other ways -- e.g. something like
|
826
|
128 XCHARSET_REP_BYTES (charset_by_leading_byte (first_byte))
|
771
|
129 but it's faster this way. */
|
1632
|
130 extern MODULE_API const Bytecount rep_bytes_by_first_byte[0xA0];
|
771
|
131
|
|
132 /* Number of bytes in the string representation of a character. */
|
788
|
133
|
800
|
134 #ifdef ERROR_CHECK_TEXT
|
788
|
135
|
826
|
136 DECLARE_INLINE_HEADER (
|
|
137 Bytecount
|
|
138 rep_bytes_by_first_byte_1 (int fb, const char *file, int line)
|
|
139 )
|
771
|
140 {
|
826
|
141 assert_at_line (fb >= 0 && fb < 0xA0, file, line);
|
771
|
142 return rep_bytes_by_first_byte[fb];
|
|
143 }
|
|
144
|
826
|
145 #define rep_bytes_by_first_byte(fb) \
|
|
146 rep_bytes_by_first_byte_1 (fb, __FILE__, __LINE__)
|
788
|
147
|
800
|
148 #else /* ERROR_CHECK_TEXT */
|
788
|
149
|
826
|
150 #define rep_bytes_by_first_byte(fb) (rep_bytes_by_first_byte[fb])
|
788
|
151
|
800
|
152 #endif /* ERROR_CHECK_TEXT */
|
788
|
153
|
826
|
154 /* Is this character represented by more than one byte in a string in the
|
|
155 default format? */
|
|
156
|
867
|
157 #define ichar_multibyte_p(c) ((c) >= 0x80)
|
|
158
|
|
159 #define ichar_ascii_p(c) (!ichar_multibyte_p (c))
|
826
|
160
|
|
161 /* Maximum number of bytes per Emacs character when represented as text, in
|
|
162 any format.
|
|
163 */
|
771
|
164
|
867
|
165 #define MAX_ICHAR_LEN 4
|
771
|
166
|
826
|
167 #endif /* not MULE */
|
|
168
|
2367
|
169 /* For more discussion, see text.c, "handling non-default formats" */
|
|
170
|
826
|
171 typedef enum internal_format
|
|
172 {
|
|
173 FORMAT_DEFAULT,
|
|
174 FORMAT_8_BIT_FIXED,
|
|
175 FORMAT_16_BIT_FIXED, /* not implemented */
|
|
176 FORMAT_32_BIT_FIXED /* not implemented */
|
|
177 } Internal_Format;
|
|
178
|
|
179 #ifdef MULE
|
|
180 /* "OBJECT" below will usually be a buffer, string, or nil. This needs to
|
|
181 be passed in because the interpretation of 8-bit-fixed and 16-bit-fixed
|
|
182 values may depend on the buffer, e.g. depending on what language the
|
|
183 text in the buffer is in. */
|
|
184
|
867
|
185 /* True if Ichar CH can be represented in 8-bit-fixed format. */
|
|
186 #define ichar_8_bit_fixed_p(ch, object) (((ch) & ~0xff) == 0)
|
|
187 /* Convert Ichar CH to an 8-bit int, as will be stored in the buffer. */
|
|
188 #define ichar_to_raw_8_bit_fixed(ch, object) ((Ibyte) (ch))
|
826
|
189 /* Convert the other way. */
|
867
|
190 #define raw_8_bit_fixed_to_ichar(ch, object) ((Ichar) (ch))
|
|
191
|
|
192 #define ichar_16_bit_fixed_p(ch, object) (((ch) & ~0xffff) == 0)
|
|
193 /* Convert Ichar CH to a 16-bit int, as will be stored in the buffer. */
|
|
194 #define ichar_to_raw_16_bit_fixed(ch, object) ((UINT_16_BIT) (ch))
|
826
|
195 /* Convert the other way. */
|
867
|
196 #define raw_16_bit_fixed_to_ichar(ch, object) ((Ichar) (ch))
|
|
197
|
|
198 /* Convert Ichar CH to a 32-bit int, as will be stored in the buffer. */
|
|
199 #define ichar_to_raw_32_bit_fixed(ch, object) ((UINT_32_BIT) (ch))
|
826
|
200 /* Convert the other way. */
|
867
|
201 #define raw_32_bit_fixed_to_ichar(ch, object) ((Ichar) (ch))
|
826
|
202
|
|
203 /* Return the "raw value" of a character as stored in the buffer. In the
|
|
204 default format, this is just the same as the character. In fixed-width
|
|
205 formats, this is the actual value in the buffer, which will be limited
|
|
206 to the range as established by the format. This is used when searching
|
|
207 for a character in a buffer -- it's faster to convert the character to
|
|
208 the raw value and look for that, than repeatedly convert each raw value
|
|
209 in the buffer into a character. */
|
|
210
|
|
211 DECLARE_INLINE_HEADER (
|
867
|
212 Raw_Ichar
|
2286
|
213 ichar_to_raw (Ichar ch, Internal_Format fmt,
|
|
214 Lisp_Object UNUSED (object))
|
826
|
215 )
|
|
216 {
|
|
217 switch (fmt)
|
|
218 {
|
|
219 case FORMAT_DEFAULT:
|
867
|
220 return (Raw_Ichar) ch;
|
826
|
221 case FORMAT_16_BIT_FIXED:
|
867
|
222 text_checking_assert (ichar_16_bit_fixed_p (ch, object));
|
|
223 return (Raw_Ichar) ichar_to_raw_16_bit_fixed (ch, object);
|
826
|
224 case FORMAT_32_BIT_FIXED:
|
867
|
225 return (Raw_Ichar) ichar_to_raw_32_bit_fixed (ch, object);
|
826
|
226 default:
|
|
227 text_checking_assert (fmt == FORMAT_8_BIT_FIXED);
|
867
|
228 text_checking_assert (ichar_8_bit_fixed_p (ch, object));
|
|
229 return (Raw_Ichar) ichar_to_raw_8_bit_fixed (ch, object);
|
826
|
230 }
|
|
231 }
|
|
232
|
|
233 /* Return whether CH is representable in the given format in the given
|
|
234 object. */
|
|
235
|
|
236 DECLARE_INLINE_HEADER (
|
|
237 int
|
2286
|
238 ichar_fits_in_format (Ichar ch, Internal_Format fmt,
|
|
239 Lisp_Object UNUSED (object))
|
826
|
240 )
|
|
241 {
|
|
242 switch (fmt)
|
|
243 {
|
|
244 case FORMAT_DEFAULT:
|
|
245 return 1;
|
|
246 case FORMAT_16_BIT_FIXED:
|
867
|
247 return ichar_16_bit_fixed_p (ch, object);
|
826
|
248 case FORMAT_32_BIT_FIXED:
|
|
249 return 1;
|
|
250 default:
|
|
251 text_checking_assert (fmt == FORMAT_8_BIT_FIXED);
|
867
|
252 return ichar_8_bit_fixed_p (ch, object);
|
826
|
253 }
|
|
254 }
|
|
255
|
|
256 /* Assuming the formats are the same, return whether the two objects
|
|
257 represent text in exactly the same way. */
|
|
258
|
|
259 DECLARE_INLINE_HEADER (
|
|
260 int
|
2286
|
261 objects_have_same_internal_representation (Lisp_Object UNUSED (srcobj),
|
|
262 Lisp_Object UNUSED (dstobj))
|
826
|
263 )
|
|
264 {
|
|
265 /* &&#### implement this properly when we allow per-object format
|
|
266 differences */
|
|
267 return 1;
|
|
268 }
|
|
269
|
|
270 #else
|
|
271
|
867
|
272 #define ichar_to_raw(ch, fmt, object) ((Raw_Ichar) (ch))
|
|
273 #define ichar_fits_in_format(ch, fmt, object) 1
|
826
|
274 #define objects_have_same_internal_representation(srcobj, dstobj) 1
|
|
275
|
771
|
276 #endif /* MULE */
|
|
277
|
1632
|
278 MODULE_API int dfc_coding_system_is_unicode (Lisp_Object codesys);
|
771
|
279
|
|
280 DECLARE_INLINE_HEADER (
|
|
281 Bytecount dfc_external_data_len (const void *ptr, Lisp_Object codesys)
|
|
282 )
|
|
283 {
|
|
284 if (dfc_coding_system_is_unicode (codesys))
|
|
285 return sizeof (wchar_t) * wcslen ((wchar_t *) ptr);
|
|
286 else
|
|
287 return strlen ((char *) ptr);
|
|
288 }
|
|
289
|
|
290
|
|
291 /************************************************************************/
|
|
292 /* */
|
|
293 /* working with raw internal-format data */
|
|
294 /* */
|
|
295 /************************************************************************/
|
|
296
|
826
|
297 /*
|
|
298 Use the following functions/macros on contiguous text in any of the
|
|
299 internal formats. Those that take a format arg work on all internal
|
|
300 formats; the others work only on the default (variable-width under Mule)
|
|
301 format. If the text you're operating on is known to come from a buffer,
|
|
302 use the buffer-level functions in buffer.h, which automatically know the
|
|
303 correct format and handle the gap.
|
|
304
|
|
305 Some terminology:
|
|
306
|
867
|
307 "itext" appearing in the macros means "internal-format text" -- type
|
|
308 `Ibyte *'. Operations on such pointers themselves, rather than on the
|
|
309 text being pointed to, have "itext" instead of "itext" in the macro
|
|
310 name. "ichar" in the macro names means an Ichar -- the representation
|
826
|
311 of a character as a single integer rather than a series of bytes, as part
|
867
|
312 of "itext". Many of the macros below are for converting between the
|
826
|
313 two representations of characters.
|
|
314
|
867
|
315 Note also that we try to consistently distinguish between an "Ichar" and
|
826
|
316 a Lisp character. Stuff working with Lisp characters often just says
|
867
|
317 "char", so we consistently use "Ichar" when that's what we're working
|
826
|
318 with. */
|
|
319
|
|
320 /* The three golden rules of macros:
|
771
|
321
|
|
322 1) Anything that's an lvalue can be evaluated more than once.
|
826
|
323
|
|
324 2) Macros where anything else can be evaluated more than once should
|
|
325 have the word "unsafe" in their name (exceptions may be made for
|
|
326 large sets of macros that evaluate arguments of certain types more
|
|
327 than once, e.g. struct buffer * arguments, when clearly indicated in
|
|
328 the macro documentation). These macros are generally meant to be
|
|
329 called only by other macros that have already stored the calling
|
|
330 values in temporary variables.
|
|
331
|
|
332 3) Nothing else can be evaluated more than once. Use inline
|
771
|
333 functions, if necessary, to prevent multiple evaluation.
|
826
|
334
|
|
335 NOTE: The functions and macros below are given full prototypes in their
|
|
336 docs, even when the implementation is a macro. In such cases, passing
|
|
337 an argument of a type other than expected will produce undefined
|
|
338 results. Also, given that macros can do things functions can't (in
|
|
339 particular, directly modify arguments as if they were passed by
|
|
340 reference), the declaration syntax has been extended to include the
|
|
341 call-by-reference syntax from C++, where an & after a type indicates
|
|
342 that the argument is an lvalue and is passed by reference, i.e. the
|
|
343 function can modify its value. (This is equivalent in C to passing a
|
|
344 pointer to the argument, but without the need to explicitly worry about
|
|
345 pointers.)
|
|
346
|
|
347 When to capitalize macros:
|
|
348
|
|
349 -- Capitalize macros doing stuff obviously impossible with (C)
|
|
350 functions, e.g. directly modifying arguments as if they were passed by
|
|
351 reference.
|
|
352
|
|
353 -- Capitalize macros that evaluate *any* argument more than once regardless
|
|
354 of whether that's "allowed" (e.g. buffer arguments).
|
|
355
|
|
356 -- Capitalize macros that directly access a field in a Lisp_Object or
|
|
357 its equivalent underlying structure. In such cases, access through the
|
|
358 Lisp_Object precedes the macro with an X, and access through the underlying
|
|
359 structure doesn't.
|
|
360
|
|
361 -- Capitalize certain other basic macros relating to Lisp_Objects; e.g.
|
|
362 FRAMEP, CHECK_FRAME, etc.
|
|
363
|
|
364 -- Try to avoid capitalizing any other macros.
|
771
|
365 */
|
|
366
|
|
367 /* ---------------------------------------------------------------------- */
|
867
|
368 /* Working with itext's (pointers to internally-formatted text) */
|
771
|
369 /* ---------------------------------------------------------------------- */
|
|
370
|
867
|
371 /* Given an itext, does it point to the beginning of a character?
|
826
|
372 */
|
|
373
|
771
|
374 #ifdef MULE
|
867
|
375 # define valid_ibyteptr_p(ptr) ibyte_first_byte_p (* (ptr))
|
771
|
376 #else
|
867
|
377 # define valid_ibyteptr_p(ptr) 1
|
771
|
378 #endif
|
|
379
|
867
|
380 /* If error-checking is enabled, assert that the given itext points to
|
826
|
381 the beginning of a character. Otherwise, do nothing.
|
|
382 */
|
|
383
|
867
|
384 #define assert_valid_ibyteptr(ptr) text_checking_assert (valid_ibyteptr_p (ptr))
|
|
385
|
|
386 /* Given a itext (assumed to point at the beginning of a character),
|
826
|
387 modify that pointer so it points to the beginning of the next character.
|
|
388
|
867
|
389 Note that INC_IBYTEPTR() and DEC_IBYTEPTR() have to be written in
|
|
390 completely separate ways. INC_IBYTEPTR() cannot use the DEC_IBYTEPTR()
|
771
|
391 trick of looking for a valid first byte because it might run off
|
867
|
392 the end of the string. DEC_IBYTEPTR() can't use the INC_IBYTEPTR()
|
771
|
393 method because it doesn't have easy access to the first byte of
|
|
394 the character it's moving over. */
|
|
395
|
867
|
396 #define INC_IBYTEPTR(ptr) do { \
|
|
397 assert_valid_ibyteptr (ptr); \
|
826
|
398 (ptr) += rep_bytes_by_first_byte (* (ptr)); \
|
|
399 } while (0)
|
|
400
|
1204
|
401 #define INC_IBYTEPTR_FMT(ptr, fmt) \
|
|
402 do { \
|
|
403 Internal_Format __icf_fmt = (fmt); \
|
|
404 switch (__icf_fmt) \
|
|
405 { \
|
|
406 case FORMAT_DEFAULT: \
|
|
407 INC_IBYTEPTR (ptr); \
|
|
408 break; \
|
|
409 case FORMAT_16_BIT_FIXED: \
|
|
410 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT)); \
|
|
411 (ptr) += 2; \
|
|
412 break; \
|
|
413 case FORMAT_32_BIT_FIXED: \
|
|
414 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT)); \
|
|
415 (ptr) += 4; \
|
|
416 break; \
|
|
417 default: \
|
|
418 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); \
|
|
419 (ptr)++; \
|
|
420 break; \
|
|
421 } \
|
826
|
422 } while (0)
|
|
423
|
867
|
424 /* Given a itext (assumed to point at the beginning of a character or at
|
826
|
425 the very end of the text), modify that pointer so it points to the
|
|
426 beginning of the previous character.
|
|
427 */
|
771
|
428
|
800
|
429 #ifdef ERROR_CHECK_TEXT
|
826
|
430 /* We use a separate definition to avoid warnings about unused dc_ptr1 */
|
867
|
431 #define DEC_IBYTEPTR(ptr) do { \
|
1333
|
432 const Ibyte *dc_ptr1 = (ptr); \
|
826
|
433 do { \
|
|
434 (ptr)--; \
|
867
|
435 } while (!valid_ibyteptr_p (ptr)); \
|
826
|
436 text_checking_assert (dc_ptr1 - (ptr) == rep_bytes_by_first_byte (*(ptr))); \
|
771
|
437 } while (0)
|
826
|
438 #else
|
867
|
439 #define DEC_IBYTEPTR(ptr) do { \
|
826
|
440 do { \
|
|
441 (ptr)--; \
|
867
|
442 } while (!valid_ibyteptr_p (ptr)); \
|
771
|
443 } while (0)
|
826
|
444 #endif /* ERROR_CHECK_TEXT */
|
|
445
|
1204
|
446 #define DEC_IBYTEPTR_FMT(ptr, fmt) \
|
|
447 do { \
|
|
448 Internal_Format __icf_fmt = (fmt); \
|
|
449 switch (__icf_fmt) \
|
|
450 { \
|
|
451 case FORMAT_DEFAULT: \
|
|
452 DEC_IBYTEPTR (ptr); \
|
|
453 break; \
|
|
454 case FORMAT_16_BIT_FIXED: \
|
|
455 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT)); \
|
|
456 (ptr) -= 2; \
|
|
457 break; \
|
|
458 case FORMAT_32_BIT_FIXED: \
|
|
459 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT)); \
|
|
460 (ptr) -= 4; \
|
|
461 break; \
|
|
462 default: \
|
|
463 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); \
|
|
464 (ptr)--; \
|
|
465 break; \
|
|
466 } \
|
771
|
467 } while (0)
|
|
468
|
|
469 #ifdef MULE
|
|
470
|
826
|
471 /* Make sure that PTR is pointing to the beginning of a character. If not,
|
|
472 back up until this is the case. Note that there are not too many places
|
|
473 where it is legitimate to do this sort of thing. It's an error if
|
|
474 you're passed an "invalid" char * pointer. NOTE: PTR *must* be pointing
|
|
475 to a valid part of the string (i.e. not the very end, unless the string
|
|
476 is zero-terminated or something) in order for this function to not cause
|
|
477 crashes.
|
|
478 */
|
|
479
|
771
|
480 /* Note that this reads the byte at *PTR! */
|
|
481
|
867
|
482 #define VALIDATE_IBYTEPTR_BACKWARD(ptr) do { \
|
|
483 while (!valid_ibyteptr_p (ptr)) ptr--; \
|
771
|
484 } while (0)
|
|
485
|
826
|
486 /* Make sure that PTR is pointing to the beginning of a character. If not,
|
|
487 move forward until this is the case. Note that there are not too many
|
|
488 places where it is legitimate to do this sort of thing. It's an error
|
|
489 if you're passed an "invalid" char * pointer.
|
|
490 */
|
771
|
491
|
867
|
492 /* This needs to be trickier than VALIDATE_IBYTEPTR_BACKWARD() to avoid the
|
771
|
493 possibility of running off the end of the string. */
|
|
494
|
867
|
495 #define VALIDATE_IBYTEPTR_FORWARD(ptr) do { \
|
|
496 Ibyte *vcf_ptr = (ptr); \
|
|
497 VALIDATE_IBYTEPTR_BACKWARD (vcf_ptr); \
|
771
|
498 if (vcf_ptr != (ptr)) \
|
|
499 { \
|
|
500 (ptr) = vcf_ptr; \
|
867
|
501 INC_IBYTEPTR (ptr); \
|
771
|
502 } \
|
|
503 } while (0)
|
|
504
|
|
505 #else /* not MULE */
|
867
|
506 #define VALIDATE_IBYTEPTR_BACKWARD(ptr)
|
|
507 #define VALIDATE_IBYTEPTR_FORWARD(ptr)
|
826
|
508 #endif /* not MULE */
|
|
509
|
|
510 #ifdef MULE
|
|
511
|
867
|
512 /* Given a Ibyte string at PTR of size N, possibly with a partial
|
826
|
513 character at the end, return the size of the longest substring of
|
|
514 complete characters. Does not assume that the byte at *(PTR + N) is
|
|
515 readable. Note that there are not too many places where it is
|
|
516 legitimate to do this sort of thing. It's an error if you're passed an
|
|
517 "invalid" offset. */
|
|
518
|
|
519 DECLARE_INLINE_HEADER (
|
|
520 Bytecount
|
867
|
521 validate_ibyte_string_backward (const Ibyte *ptr, Bytecount n)
|
826
|
522 )
|
|
523 {
|
867
|
524 const Ibyte *ptr2;
|
826
|
525
|
|
526 if (n == 0)
|
|
527 return n;
|
|
528 ptr2 = ptr + n - 1;
|
867
|
529 VALIDATE_IBYTEPTR_BACKWARD (ptr2);
|
826
|
530 if (ptr2 + rep_bytes_by_first_byte (*ptr2) != ptr + n)
|
|
531 return ptr2 - ptr;
|
|
532 return n;
|
|
533 }
|
|
534
|
|
535 #else
|
|
536
|
867
|
537 #define validate_ibyte_string_backward(ptr, n) (n)
|
826
|
538
|
|
539 #endif /* MULE */
|
771
|
540
|
2367
|
541 #ifdef ERROR_CHECK_TEXT
|
|
542 #define ASSERT_ASCTEXT_ASCII_LEN(ptr, len) \
|
|
543 do { \
|
|
544 int aia2; \
|
|
545 const Ascbyte *aia2ptr = (ptr); \
|
|
546 int aia2len = (len); \
|
|
547 \
|
|
548 for (aia2 = 0; aia2 < aia2len; aia2++) \
|
|
549 assert (aia2ptr[aia2] >= 0x00 && aia2ptr[aia2] < 0x7F); \
|
|
550 } while (0)
|
|
551 #define ASSERT_ASCTEXT_ASCII(ptr) \
|
|
552 do { \
|
|
553 const Ascbyte *aiaz2 = (ptr); \
|
|
554 ASSERT_ASCTEXT_ASCII_LEN (aiaz2, strlen (aiaz2)); \
|
|
555 } while (0)
|
|
556 #else
|
|
557 #define ASSERT_ASCTEXT_ASCII_LEN(ptr, len)
|
|
558 #define ASSERT_ASCTEXT_ASCII(ptr)
|
|
559 #endif
|
|
560
|
771
|
561 /* -------------------------------------------------------------- */
|
826
|
562 /* Working with the length (in bytes and characters) of a */
|
|
563 /* section of internally-formatted text */
|
771
|
564 /* -------------------------------------------------------------- */
|
|
565
|
826
|
566 #ifdef MULE
|
|
567
|
1632
|
568 MODULE_API Charcount
|
|
569 bytecount_to_charcount_fun (const Ibyte *ptr, Bytecount len);
|
|
570 MODULE_API Bytecount
|
|
571 charcount_to_bytecount_fun (const Ibyte *ptr, Charcount len);
|
826
|
572
|
|
573 /* Given a pointer to a text string and a length in bytes, return
|
|
574 the equivalent length in characters. */
|
|
575
|
|
576 DECLARE_INLINE_HEADER (
|
|
577 Charcount
|
867
|
578 bytecount_to_charcount (const Ibyte *ptr, Bytecount len)
|
826
|
579 )
|
|
580 {
|
|
581 if (len < 20) /* Just a random guess, but it should be more or less correct.
|
|
582 If number of bytes is small, just do a simple loop,
|
|
583 which should be more efficient. */
|
|
584 {
|
|
585 Charcount count = 0;
|
867
|
586 const Ibyte *end = ptr + len;
|
826
|
587 while (ptr < end)
|
|
588 {
|
867
|
589 INC_IBYTEPTR (ptr);
|
826
|
590 count++;
|
|
591 }
|
|
592 /* Bomb out if the specified substring ends in the middle
|
|
593 of a character. Note that we might have already gotten
|
|
594 a core dump above from an invalid reference, but at least
|
|
595 we will get no farther than here.
|
|
596
|
|
597 This also catches len < 0. */
|
|
598 text_checking_assert (ptr == end);
|
|
599
|
|
600 return count;
|
|
601 }
|
|
602 else
|
|
603 return bytecount_to_charcount_fun (ptr, len);
|
|
604 }
|
|
605
|
|
606 /* Given a pointer to a text string and a length in characters, return the
|
|
607 equivalent length in bytes.
|
|
608 */
|
|
609
|
|
610 DECLARE_INLINE_HEADER (
|
|
611 Bytecount
|
867
|
612 charcount_to_bytecount (const Ibyte *ptr, Charcount len)
|
826
|
613 )
|
|
614 {
|
|
615 text_checking_assert (len >= 0);
|
|
616 if (len < 20) /* See above */
|
|
617 {
|
867
|
618 const Ibyte *newptr = ptr;
|
826
|
619 while (len > 0)
|
|
620 {
|
867
|
621 INC_IBYTEPTR (newptr);
|
826
|
622 len--;
|
|
623 }
|
|
624 return newptr - ptr;
|
|
625 }
|
|
626 else
|
|
627 return charcount_to_bytecount_fun (ptr, len);
|
|
628 }
|
|
629
|
2367
|
630 MODULE_API Bytecount
|
|
631 charcount_to_bytecount_down_fun (const Ibyte *ptr, Charcount len);
|
|
632
|
|
633 /* Given a pointer to a text string and a length in bytes, return
|
|
634 the equivalent length in characters of the stretch [PTR - LEN, PTR). */
|
|
635
|
|
636 DECLARE_INLINE_HEADER (
|
|
637 Charcount
|
|
638 bytecount_to_charcount_down (const Ibyte *ptr, Bytecount len)
|
|
639 )
|
|
640 {
|
|
641 /* No need to be clever here */
|
|
642 return bytecount_to_charcount (ptr - len, len);
|
|
643 }
|
|
644
|
|
645 /* Given a pointer to a text string and a length in characters, return the
|
|
646 equivalent length in bytes of the stretch of characters of that length
|
|
647 BEFORE the pointer.
|
|
648 */
|
|
649
|
|
650 DECLARE_INLINE_HEADER (
|
|
651 Bytecount
|
|
652 charcount_to_bytecount_down (const Ibyte *ptr, Charcount len)
|
|
653 )
|
|
654 {
|
|
655 #define SLEDGEHAMMER_CHECK_TEXT
|
|
656 #ifdef SLEDGEHAMMER_CHECK_TEXT
|
|
657 Charcount len1 = len;
|
|
658 Bytecount ret1, ret2;
|
|
659
|
|
660 /* To test the correctness of the function version, always do the
|
|
661 calculation both ways and check that the values are the same. */
|
|
662 text_checking_assert (len >= 0);
|
|
663 {
|
|
664 const Ibyte *newptr = ptr;
|
|
665 while (len1 > 0)
|
|
666 {
|
|
667 DEC_IBYTEPTR (newptr);
|
|
668 len1--;
|
|
669 }
|
|
670 ret1 = ptr - newptr;
|
|
671 }
|
|
672 ret2 = charcount_to_bytecount_down_fun (ptr, len);
|
|
673 text_checking_assert (ret1 == ret2);
|
|
674 return ret1;
|
|
675 #else
|
|
676 text_checking_assert (len >= 0);
|
|
677 if (len < 20) /* See above */
|
|
678 {
|
|
679 const Ibyte *newptr = ptr;
|
|
680 while (len > 0)
|
|
681 {
|
|
682 DEC_IBYTEPTR (newptr);
|
|
683 len--;
|
|
684 }
|
|
685 return ptr - newptr;
|
|
686 }
|
|
687 else
|
|
688 return charcount_to_bytecount_down_fun (ptr, len);
|
|
689 #endif /* SLEDGEHAMMER_CHECK_TEXT */
|
|
690 }
|
|
691
|
826
|
692 /* Given a pointer to a text string in the specified format and a length in
|
|
693 bytes, return the equivalent length in characters.
|
|
694 */
|
|
695
|
|
696 DECLARE_INLINE_HEADER (
|
|
697 Charcount
|
867
|
698 bytecount_to_charcount_fmt (const Ibyte *ptr, Bytecount len,
|
826
|
699 Internal_Format fmt)
|
|
700 )
|
|
701 {
|
|
702 switch (fmt)
|
|
703 {
|
|
704 case FORMAT_DEFAULT:
|
|
705 return bytecount_to_charcount (ptr, len);
|
|
706 case FORMAT_16_BIT_FIXED:
|
1204
|
707 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT));
|
826
|
708 return (Charcount) (len << 1);
|
|
709 case FORMAT_32_BIT_FIXED:
|
1204
|
710 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT));
|
826
|
711 return (Charcount) (len << 2);
|
|
712 default:
|
|
713 text_checking_assert (fmt == FORMAT_8_BIT_FIXED);
|
|
714 return (Charcount) len;
|
|
715 }
|
|
716 }
|
|
717
|
|
718 /* Given a pointer to a text string in the specified format and a length in
|
|
719 characters, return the equivalent length in bytes.
|
|
720 */
|
|
721
|
|
722 DECLARE_INLINE_HEADER (
|
|
723 Bytecount
|
867
|
724 charcount_to_bytecount_fmt (const Ibyte *ptr, Charcount len,
|
826
|
725 Internal_Format fmt)
|
|
726 )
|
|
727 {
|
|
728 switch (fmt)
|
|
729 {
|
|
730 case FORMAT_DEFAULT:
|
|
731 return charcount_to_bytecount (ptr, len);
|
|
732 case FORMAT_16_BIT_FIXED:
|
1204
|
733 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT));
|
826
|
734 text_checking_assert (!(len & 1));
|
|
735 return (Bytecount) (len >> 1);
|
|
736 case FORMAT_32_BIT_FIXED:
|
|
737 text_checking_assert (!(len & 3));
|
1204
|
738 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT));
|
826
|
739 return (Bytecount) (len >> 2);
|
|
740 default:
|
|
741 text_checking_assert (fmt == FORMAT_8_BIT_FIXED);
|
|
742 return (Bytecount) len;
|
|
743 }
|
|
744 }
|
|
745
|
|
746 #else
|
|
747
|
|
748 #define bytecount_to_charcount(ptr, len) ((Charcount) (len))
|
|
749 #define bytecount_to_charcount_fmt(ptr, len, fmt) ((Charcount) (len))
|
|
750 #define charcount_to_bytecount(ptr, len) ((Bytecount) (len))
|
|
751 #define charcount_to_bytecount_fmt(ptr, len, fmt) ((Bytecount) (len))
|
|
752
|
|
753 #endif /* MULE */
|
|
754
|
|
755 /* Return the length of the first character at PTR. Equivalent to
|
|
756 charcount_to_bytecount (ptr, 1).
|
|
757
|
|
758 [Since charcount_to_bytecount() is Written as inline, a smart compiler
|
|
759 should really optimize charcount_to_bytecount (ptr, 1) to the same as
|
|
760 the following, with no error checking. But since this idiom occurs so
|
|
761 often, we'll be helpful and define a special macro for it.]
|
|
762 */
|
|
763
|
867
|
764 #define itext_ichar_len(ptr) rep_bytes_by_first_byte (*(ptr))
|
826
|
765
|
|
766 /* Return the length of the first character at PTR, which is in the
|
|
767 specified internal format. Equivalent to charcount_to_bytecount_fmt
|
|
768 (ptr, 1, fmt).
|
|
769 */
|
|
770
|
|
771 DECLARE_INLINE_HEADER (
|
|
772 Bytecount
|
2333
|
773 itext_ichar_len_fmt (const Ibyte *USED_IF_MULE_OR_CHECK_TEXT (ptr),
|
|
774 Internal_Format fmt)
|
826
|
775 )
|
|
776 {
|
|
777 switch (fmt)
|
|
778 {
|
|
779 case FORMAT_DEFAULT:
|
867
|
780 return itext_ichar_len (ptr);
|
826
|
781 case FORMAT_16_BIT_FIXED:
|
1204
|
782 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT));
|
826
|
783 return 2;
|
|
784 case FORMAT_32_BIT_FIXED:
|
1204
|
785 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT));
|
826
|
786 return 4;
|
|
787 default:
|
|
788 text_checking_assert (fmt == FORMAT_8_BIT_FIXED);
|
|
789 return 1;
|
|
790 }
|
|
791 }
|
|
792
|
|
793 /* Return a pointer to the beginning of the character offset N (in
|
|
794 characters) from PTR.
|
|
795 */
|
|
796
|
|
797 DECLARE_INLINE_HEADER (
|
867
|
798 const Ibyte *
|
|
799 itext_n_addr (const Ibyte *ptr, Charcount offset)
|
826
|
800 )
|
771
|
801 {
|
|
802 return ptr + charcount_to_bytecount (ptr, offset);
|
|
803 }
|
|
804
|
867
|
805 /* Given a itext and an offset into the text pointed to by the itext,
|
826
|
806 modify the offset so it points to the beginning of the next character.
|
|
807 */
|
|
808
|
|
809 #define INC_BYTECOUNT(ptr, pos) do { \
|
867
|
810 assert_valid_ibyteptr (ptr); \
|
826
|
811 (pos += rep_bytes_by_first_byte (* ((ptr) + (pos)))); \
|
|
812 } while (0)
|
|
813
|
771
|
814 /* -------------------------------------------------------------------- */
|
867
|
815 /* Retrieving or changing the character pointed to by a itext */
|
771
|
816 /* -------------------------------------------------------------------- */
|
|
817
|
867
|
818 #define simple_itext_ichar(ptr) ((Ichar) (ptr)[0])
|
|
819 #define simple_set_itext_ichar(ptr, x) \
|
|
820 ((ptr)[0] = (Ibyte) (x), (Bytecount) 1)
|
|
821 #define simple_itext_copy_ichar(src, dst) \
|
814
|
822 ((dst)[0] = *(src), (Bytecount) 1)
|
771
|
823
|
|
824 #ifdef MULE
|
|
825
|
1632
|
826 MODULE_API Ichar non_ascii_itext_ichar (const Ibyte *ptr);
|
|
827 MODULE_API Bytecount non_ascii_set_itext_ichar (Ibyte *ptr, Ichar c);
|
|
828 MODULE_API Bytecount non_ascii_itext_copy_ichar (const Ibyte *src, Ibyte *dst);
|
867
|
829
|
|
830 /* Retrieve the character pointed to by PTR as an Ichar. */
|
826
|
831
|
|
832 DECLARE_INLINE_HEADER (
|
867
|
833 Ichar
|
|
834 itext_ichar (const Ibyte *ptr)
|
826
|
835 )
|
771
|
836 {
|
826
|
837 return byte_ascii_p (*ptr) ?
|
867
|
838 simple_itext_ichar (ptr) :
|
|
839 non_ascii_itext_ichar (ptr);
|
771
|
840 }
|
|
841
|
826
|
842 /* Retrieve the character pointed to by PTR (a pointer to text in the
|
|
843 format FMT, coming from OBJECT [a buffer, string?, or nil]) as an
|
867
|
844 Ichar.
|
826
|
845
|
|
846 Note: For these and other *_fmt() functions, if you pass in a constant
|
|
847 FMT, the switch will be optimized out of existence. Therefore, there is
|
|
848 no need to create separate versions for the various formats for
|
867
|
849 "efficiency reasons". In fact, we don't really need itext_ichar()
|
826
|
850 and such written separately, but they are used often so it's simpler
|
|
851 that way. */
|
|
852
|
|
853 DECLARE_INLINE_HEADER (
|
867
|
854 Ichar
|
|
855 itext_ichar_fmt (const Ibyte *ptr, Internal_Format fmt,
|
2286
|
856 Lisp_Object UNUSED (object))
|
826
|
857 )
|
|
858 {
|
|
859 switch (fmt)
|
|
860 {
|
|
861 case FORMAT_DEFAULT:
|
867
|
862 return itext_ichar (ptr);
|
826
|
863 case FORMAT_16_BIT_FIXED:
|
1204
|
864 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT));
|
867
|
865 return raw_16_bit_fixed_to_ichar (* (UINT_16_BIT *) ptr, object);
|
826
|
866 case FORMAT_32_BIT_FIXED:
|
1204
|
867 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT));
|
867
|
868 return raw_32_bit_fixed_to_ichar (* (UINT_32_BIT *) ptr, object);
|
826
|
869 default:
|
|
870 text_checking_assert (fmt == FORMAT_8_BIT_FIXED);
|
867
|
871 return raw_8_bit_fixed_to_ichar (*ptr, object);
|
826
|
872 }
|
|
873 }
|
|
874
|
|
875 /* Return the character at PTR (which is in format FMT), suitable for
|
|
876 comparison with an ASCII character. This guarantees that if the
|
|
877 character at PTR is ASCII (range 0 - 127), that character will be
|
|
878 returned; otherwise, some character outside of the ASCII range will be
|
|
879 returned, but not necessarily the character actually at PTR. This will
|
867
|
880 be faster than itext_ichar_fmt() for some formats -- in particular,
|
826
|
881 FORMAT_DEFAULT. */
|
|
882
|
|
883 DECLARE_INLINE_HEADER (
|
867
|
884 Ichar
|
|
885 itext_ichar_ascii_fmt (const Ibyte *ptr, Internal_Format fmt,
|
2286
|
886 Lisp_Object UNUSED (object))
|
826
|
887 )
|
|
888 {
|
|
889 switch (fmt)
|
|
890 {
|
|
891 case FORMAT_DEFAULT:
|
867
|
892 return (Ichar) *ptr;
|
826
|
893 case FORMAT_16_BIT_FIXED:
|
1204
|
894 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT));
|
867
|
895 return raw_16_bit_fixed_to_ichar (* (UINT_16_BIT *) ptr, object);
|
826
|
896 case FORMAT_32_BIT_FIXED:
|
1204
|
897 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT));
|
867
|
898 return raw_32_bit_fixed_to_ichar (* (UINT_32_BIT *) ptr, object);
|
826
|
899 default:
|
|
900 text_checking_assert (fmt == FORMAT_8_BIT_FIXED);
|
867
|
901 return raw_8_bit_fixed_to_ichar (*ptr, object);
|
826
|
902 }
|
|
903 }
|
|
904
|
|
905 /* Return the "raw value" of the character at PTR, in format FMT. This is
|
|
906 useful when searching for a character; convert the character using
|
867
|
907 ichar_to_raw(). */
|
826
|
908
|
|
909 DECLARE_INLINE_HEADER (
|
867
|
910 Raw_Ichar
|
|
911 itext_ichar_raw_fmt (const Ibyte *ptr, Internal_Format fmt)
|
826
|
912 )
|
|
913 {
|
|
914 switch (fmt)
|
|
915 {
|
|
916 case FORMAT_DEFAULT:
|
867
|
917 return (Raw_Ichar) itext_ichar (ptr);
|
826
|
918 case FORMAT_16_BIT_FIXED:
|
1204
|
919 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT));
|
867
|
920 return (Raw_Ichar) (* (UINT_16_BIT *) ptr);
|
826
|
921 case FORMAT_32_BIT_FIXED:
|
1204
|
922 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT));
|
867
|
923 return (Raw_Ichar) (* (UINT_32_BIT *) ptr);
|
826
|
924 default:
|
|
925 text_checking_assert (fmt == FORMAT_8_BIT_FIXED);
|
867
|
926 return (Raw_Ichar) (*ptr);
|
826
|
927 }
|
|
928 }
|
|
929
|
867
|
930 /* Store the character CH (an Ichar) as internally-formatted text starting
|
826
|
931 at PTR. Return the number of bytes stored.
|
|
932 */
|
|
933
|
|
934 DECLARE_INLINE_HEADER (
|
|
935 Bytecount
|
867
|
936 set_itext_ichar (Ibyte *ptr, Ichar x)
|
826
|
937 )
|
771
|
938 {
|
867
|
939 return !ichar_multibyte_p (x) ?
|
|
940 simple_set_itext_ichar (ptr, x) :
|
|
941 non_ascii_set_itext_ichar (ptr, x);
|
771
|
942 }
|
|
943
|
867
|
944 /* Store the character CH (an Ichar) as internally-formatted text of
|
826
|
945 format FMT starting at PTR, which comes from OBJECT. Return the number
|
|
946 of bytes stored.
|
|
947 */
|
|
948
|
|
949 DECLARE_INLINE_HEADER (
|
|
950 Bytecount
|
867
|
951 set_itext_ichar_fmt (Ibyte *ptr, Ichar x, Internal_Format fmt,
|
2286
|
952 Lisp_Object UNUSED (object))
|
826
|
953 )
|
771
|
954 {
|
826
|
955 switch (fmt)
|
|
956 {
|
|
957 case FORMAT_DEFAULT:
|
867
|
958 return set_itext_ichar (ptr, x);
|
826
|
959 case FORMAT_16_BIT_FIXED:
|
867
|
960 text_checking_assert (ichar_16_bit_fixed_p (x, object));
|
1204
|
961 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT));
|
867
|
962 * (UINT_16_BIT *) ptr = ichar_to_raw_16_bit_fixed (x, object);
|
826
|
963 return 2;
|
|
964 case FORMAT_32_BIT_FIXED:
|
1204
|
965 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT));
|
867
|
966 * (UINT_32_BIT *) ptr = ichar_to_raw_32_bit_fixed (x, object);
|
826
|
967 return 4;
|
|
968 default:
|
|
969 text_checking_assert (fmt == FORMAT_8_BIT_FIXED);
|
867
|
970 text_checking_assert (ichar_8_bit_fixed_p (x, object));
|
|
971 *ptr = ichar_to_raw_8_bit_fixed (x, object);
|
826
|
972 return 1;
|
|
973 }
|
|
974 }
|
|
975
|
|
976 /* Retrieve the character pointed to by SRC and store it as
|
|
977 internally-formatted text in DST.
|
|
978 */
|
|
979
|
|
980 DECLARE_INLINE_HEADER (
|
|
981 Bytecount
|
867
|
982 itext_copy_ichar (const Ibyte *src, Ibyte *dst)
|
826
|
983 )
|
|
984 {
|
|
985 return byte_ascii_p (*src) ?
|
867
|
986 simple_itext_copy_ichar (src, dst) :
|
|
987 non_ascii_itext_copy_ichar (src, dst);
|
771
|
988 }
|
|
989
|
|
990 #else /* not MULE */
|
|
991
|
867
|
992 # define itext_ichar(ptr) simple_itext_ichar (ptr)
|
|
993 # define itext_ichar_fmt(ptr, fmt, object) itext_ichar (ptr)
|
|
994 # define itext_ichar_ascii_fmt(ptr, fmt, object) itext_ichar (ptr)
|
|
995 # define itext_ichar_raw_fmt(ptr, fmt) itext_ichar (ptr)
|
|
996 # define set_itext_ichar(ptr, x) simple_set_itext_ichar (ptr, x)
|
|
997 # define set_itext_ichar_fmt(ptr, x, fmt, obj) set_itext_ichar (ptr, x)
|
|
998 # define itext_copy_ichar(src, dst) simple_itext_copy_ichar (src, dst)
|
771
|
999
|
|
1000 #endif /* not MULE */
|
|
1001
|
826
|
1002 /* Retrieve the character at offset N (in characters) from PTR, as an
|
867
|
1003 Ichar.
|
826
|
1004 */
|
|
1005
|
867
|
1006 #define itext_ichar_n(ptr, offset) \
|
|
1007 itext_ichar (itext_n_addr (ptr, offset))
|
771
|
1008
|
|
1009
|
|
1010 /************************************************************************/
|
|
1011 /* */
|
826
|
1012 /* working with Lisp strings */
|
|
1013 /* */
|
|
1014 /************************************************************************/
|
|
1015
|
|
1016 #define string_char_length(s) \
|
|
1017 string_index_byte_to_char (s, XSTRING_LENGTH (s))
|
|
1018 #define string_byte(s, i) (XSTRING_DATA (s)[i] + 0)
|
|
1019 /* In case we ever allow strings to be in a different format ... */
|
|
1020 #define set_string_byte(s, i, c) (XSTRING_DATA (s)[i] = (c))
|
|
1021
|
|
1022 #define ASSERT_VALID_CHAR_STRING_INDEX_UNSAFE(s, x) do { \
|
|
1023 text_checking_assert ((x) >= 0 && x <= string_char_length (s)); \
|
|
1024 } while (0)
|
|
1025
|
|
1026 #define ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE(s, x) do { \
|
|
1027 text_checking_assert ((x) >= 0 && x <= XSTRING_LENGTH (s)); \
|
867
|
1028 text_checking_assert (valid_ibyteptr_p (string_byte_addr (s, x))); \
|
826
|
1029 } while (0)
|
|
1030
|
|
1031 /* Convert offset I in string S to a pointer to text there. */
|
|
1032 #define string_byte_addr(s, i) (&(XSTRING_DATA (s)[i]))
|
|
1033 /* Convert pointer to text in string S into the byte offset to that text. */
|
|
1034 #define string_addr_to_byte(s, ptr) ((Bytecount) ((ptr) - XSTRING_DATA (s)))
|
867
|
1035 /* Return the Ichar at *CHARACTER* offset I. */
|
|
1036 #define string_ichar(s, i) itext_ichar (string_char_addr (s, i))
|
826
|
1037
|
|
1038 #ifdef ERROR_CHECK_TEXT
|
|
1039 #define SLEDGEHAMMER_CHECK_ASCII_BEGIN
|
|
1040 #endif
|
|
1041
|
|
1042 #ifdef SLEDGEHAMMER_CHECK_ASCII_BEGIN
|
|
1043 void sledgehammer_check_ascii_begin (Lisp_Object str);
|
|
1044 #else
|
|
1045 #define sledgehammer_check_ascii_begin(str)
|
|
1046 #endif
|
|
1047
|
|
1048 /* Make an alloca'd copy of a Lisp string */
|
|
1049 #define LISP_STRING_TO_ALLOCA(s, lval) \
|
|
1050 do { \
|
1315
|
1051 Ibyte **_lta_ = (Ibyte **) &(lval); \
|
826
|
1052 Lisp_Object _lta_2 = (s); \
|
2367
|
1053 *_lta_ = alloca_ibytes (1 + XSTRING_LENGTH (_lta_2)); \
|
826
|
1054 memcpy (*_lta_, XSTRING_DATA (_lta_2), 1 + XSTRING_LENGTH (_lta_2)); \
|
|
1055 } while (0)
|
|
1056
|
|
1057 void resize_string (Lisp_Object s, Bytecount pos, Bytecount delta);
|
|
1058
|
|
1059 /* Convert a byte index into a string into a char index. */
|
|
1060 DECLARE_INLINE_HEADER (
|
|
1061 Charcount
|
2333
|
1062 string_index_byte_to_char (Lisp_Object USED_IF_MULE_OR_CHECK_TEXT (s),
|
|
1063 Bytecount idx)
|
826
|
1064 )
|
|
1065 {
|
|
1066 Charcount retval;
|
|
1067 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, idx);
|
|
1068 #ifdef MULE
|
|
1069 if (idx <= (Bytecount) XSTRING_ASCII_BEGIN (s))
|
|
1070 retval = (Charcount) idx;
|
|
1071 else
|
|
1072 retval = (XSTRING_ASCII_BEGIN (s) +
|
|
1073 bytecount_to_charcount (XSTRING_DATA (s) +
|
|
1074 XSTRING_ASCII_BEGIN (s),
|
|
1075 idx - XSTRING_ASCII_BEGIN (s)));
|
|
1076 # ifdef SLEDGEHAMMER_CHECK_ASCII_BEGIN
|
|
1077 assert (retval == bytecount_to_charcount (XSTRING_DATA (s), idx));
|
|
1078 # endif
|
|
1079 #else
|
|
1080 retval = (Charcount) idx;
|
|
1081 #endif
|
|
1082 /* Don't call ASSERT_VALID_CHAR_STRING_INDEX_UNSAFE() here because it will
|
|
1083 call string_index_byte_to_char(). */
|
|
1084 return retval;
|
|
1085 }
|
|
1086
|
|
1087 /* Convert a char index into a string into a byte index. */
|
|
1088 DECLARE_INLINE_HEADER (
|
|
1089 Bytecount
|
2333
|
1090 string_index_char_to_byte (Lisp_Object USED_IF_MULE_OR_CHECK_TEXT (s),
|
|
1091 Charcount idx)
|
826
|
1092 )
|
|
1093 {
|
|
1094 Bytecount retval;
|
|
1095 ASSERT_VALID_CHAR_STRING_INDEX_UNSAFE (s, idx);
|
|
1096 #ifdef MULE
|
|
1097 if (idx <= (Charcount) XSTRING_ASCII_BEGIN (s))
|
|
1098 retval = (Bytecount) idx;
|
|
1099 else
|
|
1100 retval = (XSTRING_ASCII_BEGIN (s) +
|
|
1101 charcount_to_bytecount (XSTRING_DATA (s) +
|
|
1102 XSTRING_ASCII_BEGIN (s),
|
|
1103 idx - XSTRING_ASCII_BEGIN (s)));
|
|
1104 # ifdef SLEDGEHAMMER_CHECK_ASCII_BEGIN
|
|
1105 assert (retval == charcount_to_bytecount (XSTRING_DATA (s), idx));
|
|
1106 # endif
|
|
1107 #else
|
|
1108 retval = (Bytecount) idx;
|
|
1109 #endif
|
|
1110 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, retval);
|
|
1111 return retval;
|
|
1112 }
|
|
1113
|
|
1114 /* Convert a substring length (starting at byte offset OFF) from bytes to
|
|
1115 chars. */
|
|
1116 DECLARE_INLINE_HEADER (
|
|
1117 Charcount
|
2333
|
1118 string_offset_byte_to_char_len (Lisp_Object USED_IF_MULE_OR_CHECK_TEXT (s),
|
|
1119 Bytecount USED_IF_MULE_OR_CHECK_TEXT (off),
|
|
1120 Bytecount len)
|
826
|
1121 )
|
|
1122 {
|
|
1123 Charcount retval;
|
|
1124 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, off);
|
|
1125 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, off + len);
|
|
1126 #ifdef MULE
|
|
1127 if (off + len <= (Bytecount) XSTRING_ASCII_BEGIN (s))
|
|
1128 retval = (Charcount) len;
|
|
1129 else if (off < (Bytecount) XSTRING_ASCII_BEGIN (s))
|
|
1130 retval =
|
|
1131 XSTRING_ASCII_BEGIN (s) - (Charcount) off +
|
|
1132 bytecount_to_charcount (XSTRING_DATA (s) + XSTRING_ASCII_BEGIN (s),
|
|
1133 len - (XSTRING_ASCII_BEGIN (s) - off));
|
|
1134 else
|
|
1135 retval = bytecount_to_charcount (XSTRING_DATA (s) + off, len);
|
|
1136 # ifdef SLEDGEHAMMER_CHECK_ASCII_BEGIN
|
|
1137 assert (retval == bytecount_to_charcount (XSTRING_DATA (s) + off, len));
|
|
1138 # endif
|
|
1139 #else
|
|
1140 retval = (Charcount) len;
|
|
1141 #endif
|
|
1142 return retval;
|
|
1143 }
|
|
1144
|
|
1145 /* Convert a substring length (starting at byte offset OFF) from chars to
|
|
1146 bytes. */
|
|
1147 DECLARE_INLINE_HEADER (
|
|
1148 Bytecount
|
2333
|
1149 string_offset_char_to_byte_len (Lisp_Object USED_IF_MULE_OR_CHECK_TEXT (s),
|
|
1150 Bytecount USED_IF_MULE_OR_CHECK_TEXT (off),
|
|
1151 Charcount len)
|
826
|
1152 )
|
|
1153 {
|
|
1154 Bytecount retval;
|
|
1155 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, off);
|
|
1156 #ifdef MULE
|
|
1157 /* casts to avoid errors from combining Bytecount/Charcount and warnings
|
|
1158 from signed/unsigned comparisons */
|
|
1159 if (off + (Bytecount) len <= (Bytecount) XSTRING_ASCII_BEGIN (s))
|
|
1160 retval = (Bytecount) len;
|
|
1161 else if (off < (Bytecount) XSTRING_ASCII_BEGIN (s))
|
|
1162 retval =
|
|
1163 XSTRING_ASCII_BEGIN (s) - off +
|
|
1164 charcount_to_bytecount (XSTRING_DATA (s) + XSTRING_ASCII_BEGIN (s),
|
|
1165 len - (XSTRING_ASCII_BEGIN (s) -
|
|
1166 (Charcount) off));
|
|
1167 else
|
|
1168 retval = charcount_to_bytecount (XSTRING_DATA (s) + off, len);
|
|
1169 # ifdef SLEDGEHAMMER_CHECK_ASCII_BEGIN
|
|
1170 assert (retval == charcount_to_bytecount (XSTRING_DATA (s) + off, len));
|
|
1171 # endif
|
|
1172 #else
|
|
1173 retval = (Bytecount) len;
|
|
1174 #endif
|
|
1175 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, off + retval);
|
|
1176 return retval;
|
|
1177 }
|
|
1178
|
|
1179 DECLARE_INLINE_HEADER (
|
867
|
1180 const Ibyte *
|
826
|
1181 string_char_addr (Lisp_Object s, Charcount idx)
|
|
1182 )
|
|
1183 {
|
|
1184 return XSTRING_DATA (s) + string_index_char_to_byte (s, idx);
|
|
1185 }
|
|
1186
|
|
1187 /* WARNING: If you modify an existing string, you must call
|
|
1188 bump_string_modiff() afterwards. */
|
|
1189 #ifdef MULE
|
867
|
1190 void set_string_char (Lisp_Object s, Charcount i, Ichar c);
|
826
|
1191 #else
|
|
1192 #define set_string_char(s, i, c) set_string_byte (s, i, c)
|
|
1193 #endif /* not MULE */
|
|
1194
|
|
1195 /* Return index to character before the one at IDX. */
|
|
1196 DECLARE_INLINE_HEADER (
|
|
1197 Bytecount
|
|
1198 prev_string_index (Lisp_Object s, Bytecount idx)
|
|
1199 )
|
|
1200 {
|
867
|
1201 const Ibyte *ptr = string_byte_addr (s, idx);
|
|
1202 DEC_IBYTEPTR (ptr);
|
826
|
1203 return string_addr_to_byte (s, ptr);
|
|
1204 }
|
|
1205
|
|
1206 /* Return index to character after the one at IDX. */
|
|
1207 DECLARE_INLINE_HEADER (
|
|
1208 Bytecount
|
|
1209 next_string_index (Lisp_Object s, Bytecount idx)
|
|
1210 )
|
|
1211 {
|
867
|
1212 const Ibyte *ptr = string_byte_addr (s, idx);
|
|
1213 INC_IBYTEPTR (ptr);
|
826
|
1214 return string_addr_to_byte (s, ptr);
|
|
1215 }
|
|
1216
|
|
1217
|
|
1218 /************************************************************************/
|
|
1219 /* */
|
771
|
1220 /* working with Eistrings */
|
|
1221 /* */
|
|
1222 /************************************************************************/
|
|
1223
|
|
1224 /*
|
|
1225 #### NOTE: This is a work in progress. Neither the API nor especially
|
|
1226 the implementation is finished.
|
|
1227
|
|
1228 NOTE: An Eistring is a structure that makes it easy to work with
|
|
1229 internally-formatted strings of data. It provides operations similar
|
|
1230 in feel to the standard strcpy(), strcat(), strlen(), etc., but
|
|
1231
|
|
1232 (a) it is Mule-correct
|
|
1233 (b) it does dynamic allocation so you never have to worry about size
|
793
|
1234 restrictions
|
851
|
1235 (c) it comes in an ALLOCA() variety (all allocation is stack-local,
|
793
|
1236 so there is no need to explicitly clean up) as well as a malloc()
|
|
1237 variety
|
|
1238 (d) it knows its own length, so it does not suffer from standard null
|
|
1239 byte brain-damage -- but it null-terminates the data anyway, so
|
|
1240 it can be passed to standard routines
|
|
1241 (e) it provides a much more powerful set of operations and knows about
|
771
|
1242 all the standard places where string data might reside: Lisp_Objects,
|
867
|
1243 other Eistrings, Ibyte * data with or without an explicit length,
|
|
1244 ASCII strings, Ichars, etc.
|
793
|
1245 (f) it provides easy operations to convert to/from externally-formatted
|
|
1246 data, and is easier to use than the standard TO_INTERNAL_FORMAT
|
771
|
1247 and TO_EXTERNAL_FORMAT macros. (An Eistring can store both the internal
|
|
1248 and external version of its data, but the external version is only
|
|
1249 initialized or changed when you call eito_external().)
|
|
1250
|
793
|
1251 The idea is to make it as easy to write Mule-correct string manipulation
|
|
1252 code as it is to write normal string manipulation code. We also make
|
|
1253 the API sufficiently general that it can handle multiple internal data
|
|
1254 formats (e.g. some fixed-width optimizing formats and a default variable
|
|
1255 width format) and allows for *ANY* data format we might choose in the
|
|
1256 future for the default format, including UCS2. (In other words, we can't
|
|
1257 assume that the internal format is ASCII-compatible and we can't assume
|
|
1258 it doesn't have embedded null bytes. We do assume, however, that any
|
|
1259 chosen format will have the concept of null-termination.) All of this is
|
|
1260 hidden from the user.
|
771
|
1261
|
|
1262 #### It is really too bad that we don't have a real object-oriented
|
|
1263 language, or at least a language with polymorphism!
|
|
1264
|
|
1265
|
|
1266 **********************************************
|
|
1267 * Declaration *
|
|
1268 **********************************************
|
|
1269
|
|
1270 To declare an Eistring, either put one of the following in the local
|
|
1271 variable section:
|
|
1272
|
|
1273 DECLARE_EISTRING (name);
|
2367
|
1274 Declare a new Eistring and initialize it to the empy string. This
|
|
1275 is a standard local variable declaration and can go anywhere in the
|
|
1276 variable declaration section. NAME itself is declared as an
|
|
1277 Eistring *, and its storage declared on the stack.
|
771
|
1278
|
|
1279 DECLARE_EISTRING_MALLOC (name);
|
2367
|
1280 Declare and initialize a new Eistring, which uses malloc()ed
|
|
1281 instead of ALLOCA()ed data. This is a standard local variable
|
|
1282 declaration and can go anywhere in the variable declaration
|
|
1283 section. Once you initialize the Eistring, you will have to free
|
|
1284 it using eifree() to avoid memory leaks. You will need to use this
|
|
1285 form if you are passing an Eistring to any function that modifies
|
|
1286 it (otherwise, the modified data may be in stack space and get
|
|
1287 overwritten when the function returns).
|
771
|
1288
|
|
1289 or use
|
|
1290
|
793
|
1291 Eistring ei;
|
|
1292 void eiinit (Eistring *ei);
|
|
1293 void eiinit_malloc (Eistring *einame);
|
771
|
1294 If you need to put an Eistring elsewhere than in a local variable
|
|
1295 declaration (e.g. in a structure), declare it as shown and then
|
|
1296 call one of the init macros.
|
|
1297
|
|
1298 Also note:
|
|
1299
|
793
|
1300 void eifree (Eistring *ei);
|
771
|
1301 If you declared an Eistring to use malloc() to hold its data,
|
|
1302 or converted it to the heap using eito_malloc(), then this
|
|
1303 releases any data in it and afterwards resets the Eistring
|
|
1304 using eiinit_malloc(). Otherwise, it just resets the Eistring
|
|
1305 using eiinit().
|
|
1306
|
|
1307
|
|
1308 **********************************************
|
|
1309 * Conventions *
|
|
1310 **********************************************
|
|
1311
|
|
1312 - The names of the functions have been chosen, where possible, to
|
|
1313 match the names of str*() functions in the standard C API.
|
|
1314 -
|
|
1315
|
|
1316
|
|
1317 **********************************************
|
|
1318 * Initialization *
|
|
1319 **********************************************
|
|
1320
|
|
1321 void eireset (Eistring *eistr);
|
|
1322 Initialize the Eistring to the empty string.
|
|
1323
|
|
1324 void eicpy_* (Eistring *eistr, ...);
|
|
1325 Initialize the Eistring from somewhere:
|
|
1326
|
|
1327 void eicpy_ei (Eistring *eistr, Eistring *eistr2);
|
|
1328 ... from another Eistring.
|
|
1329 void eicpy_lstr (Eistring *eistr, Lisp_Object lisp_string);
|
|
1330 ... from a Lisp_Object string.
|
867
|
1331 void eicpy_ch (Eistring *eistr, Ichar ch);
|
|
1332 ... from an Ichar (this can be a conventional C character).
|
771
|
1333
|
|
1334 void eicpy_lstr_off (Eistring *eistr, Lisp_Object lisp_string,
|
|
1335 Bytecount off, Charcount charoff,
|
|
1336 Bytecount len, Charcount charlen);
|
|
1337 ... from a section of a Lisp_Object string.
|
|
1338 void eicpy_lbuf (Eistring *eistr, Lisp_Object lisp_buf,
|
|
1339 Bytecount off, Charcount charoff,
|
|
1340 Bytecount len, Charcount charlen);
|
|
1341 ... from a section of a Lisp_Object buffer.
|
867
|
1342 void eicpy_raw (Eistring *eistr, const Ibyte *data, Bytecount len);
|
771
|
1343 ... from raw internal-format data in the default internal format.
|
867
|
1344 void eicpy_rawz (Eistring *eistr, const Ibyte *data);
|
771
|
1345 ... from raw internal-format data in the default internal format
|
|
1346 that is "null-terminated" (the meaning of this depends on the nature
|
|
1347 of the default internal format).
|
867
|
1348 void eicpy_raw_fmt (Eistring *eistr, const Ibyte *data, Bytecount len,
|
826
|
1349 Internal_Format intfmt, Lisp_Object object);
|
771
|
1350 ... from raw internal-format data in the specified format.
|
867
|
1351 void eicpy_rawz_fmt (Eistring *eistr, const Ibyte *data,
|
826
|
1352 Internal_Format intfmt, Lisp_Object object);
|
771
|
1353 ... from raw internal-format data in the specified format that is
|
|
1354 "null-terminated" (the meaning of this depends on the nature of
|
|
1355 the specific format).
|
2421
|
1356 void eicpy_ascii (Eistring *eistr, const Ascbyte *ascstr);
|
771
|
1357 ... from an ASCII null-terminated string. Non-ASCII characters in
|
2500
|
1358 the string are *ILLEGAL* (read ABORT() with error-checking defined).
|
2421
|
1359 void eicpy_ascii_len (Eistring *eistr, const Ascbyte *ascstr, len);
|
771
|
1360 ... from an ASCII string, with length specified. Non-ASCII characters
|
2500
|
1361 in the string are *ILLEGAL* (read ABORT() with error-checking defined).
|
771
|
1362 void eicpy_ext (Eistring *eistr, const Extbyte *extdata,
|
1318
|
1363 Lisp_Object codesys);
|
771
|
1364 ... from external null-terminated data, with coding system specified.
|
|
1365 void eicpy_ext_len (Eistring *eistr, const Extbyte *extdata,
|
1318
|
1366 Bytecount extlen, Lisp_Object codesys);
|
771
|
1367 ... from external data, with length and coding system specified.
|
|
1368 void eicpy_lstream (Eistring *eistr, Lisp_Object lstream);
|
|
1369 ... from an lstream; reads data till eof. Data must be in default
|
|
1370 internal format; otherwise, interpose a decoding lstream.
|
|
1371
|
|
1372
|
|
1373 **********************************************
|
|
1374 * Getting the data out of the Eistring *
|
|
1375 **********************************************
|
|
1376
|
867
|
1377 Ibyte *eidata (Eistring *eistr);
|
771
|
1378 Return a pointer to the raw data in an Eistring. This is NOT
|
|
1379 a copy.
|
|
1380
|
|
1381 Lisp_Object eimake_string (Eistring *eistr);
|
|
1382 Make a Lisp string out of the Eistring.
|
|
1383
|
|
1384 Lisp_Object eimake_string_off (Eistring *eistr,
|
|
1385 Bytecount off, Charcount charoff,
|
|
1386 Bytecount len, Charcount charlen);
|
|
1387 Make a Lisp string out of a section of the Eistring.
|
|
1388
|
867
|
1389 void eicpyout_alloca (Eistring *eistr, LVALUE: Ibyte *ptr_out,
|
771
|
1390 LVALUE: Bytecount len_out);
|
851
|
1391 Make an ALLOCA() copy of the data in the Eistring, using the
|
|
1392 default internal format. Due to the nature of ALLOCA(), this
|
771
|
1393 must be a macro, with all lvalues passed in as parameters.
|
793
|
1394 (More specifically, not all compilers correctly handle using
|
851
|
1395 ALLOCA() as the argument to a function call -- GCC on x86
|
|
1396 didn't used to, for example.) A pointer to the ALLOCA()ed data
|
793
|
1397 is stored in PTR_OUT, and the length of the data (not including
|
|
1398 the terminating zero) is stored in LEN_OUT.
|
771
|
1399
|
867
|
1400 void eicpyout_alloca_fmt (Eistring *eistr, LVALUE: Ibyte *ptr_out,
|
771
|
1401 LVALUE: Bytecount len_out,
|
826
|
1402 Internal_Format intfmt, Lisp_Object object);
|
771
|
1403 Like eicpyout_alloca(), but converts to the specified internal
|
|
1404 format. (No formats other than FORMAT_DEFAULT are currently
|
|
1405 implemented, and you get an assertion failure if you try.)
|
|
1406
|
867
|
1407 Ibyte *eicpyout_malloc (Eistring *eistr, Bytecount *intlen_out);
|
771
|
1408 Make a malloc() copy of the data in the Eistring, using the
|
|
1409 default internal format. This is a real function. No lvalues
|
|
1410 passed in. Returns the new data, and stores the length (not
|
|
1411 including the terminating zero) using INTLEN_OUT, unless it's
|
|
1412 a NULL pointer.
|
|
1413
|
867
|
1414 Ibyte *eicpyout_malloc_fmt (Eistring *eistr, Internal_Format intfmt,
|
826
|
1415 Bytecount *intlen_out, Lisp_Object object);
|
771
|
1416 Like eicpyout_malloc(), but converts to the specified internal
|
|
1417 format. (No formats other than FORMAT_DEFAULT are currently
|
|
1418 implemented, and you get an assertion failure if you try.)
|
|
1419
|
|
1420
|
|
1421 **********************************************
|
|
1422 * Moving to the heap *
|
|
1423 **********************************************
|
|
1424
|
|
1425 void eito_malloc (Eistring *eistr);
|
|
1426 Move this Eistring to the heap. Its data will be stored in a
|
|
1427 malloc()ed block rather than the stack. Subsequent changes to
|
|
1428 this Eistring will realloc() the block as necessary. Use this
|
|
1429 when you want the Eistring to remain in scope past the end of
|
|
1430 this function call. You will have to manually free the data
|
|
1431 in the Eistring using eifree().
|
|
1432
|
|
1433 void eito_alloca (Eistring *eistr);
|
|
1434 Move this Eistring back to the stack, if it was moved to the
|
|
1435 heap with eito_malloc(). This will automatically free any
|
|
1436 heap-allocated data.
|
|
1437
|
|
1438
|
|
1439
|
|
1440 **********************************************
|
|
1441 * Retrieving the length *
|
|
1442 **********************************************
|
|
1443
|
|
1444 Bytecount eilen (Eistring *eistr);
|
|
1445 Return the length of the internal data, in bytes. See also
|
|
1446 eiextlen(), below.
|
|
1447 Charcount eicharlen (Eistring *eistr);
|
|
1448 Return the length of the internal data, in characters.
|
|
1449
|
|
1450
|
|
1451 **********************************************
|
|
1452 * Working with positions *
|
|
1453 **********************************************
|
|
1454
|
|
1455 Bytecount eicharpos_to_bytepos (Eistring *eistr, Charcount charpos);
|
|
1456 Convert a char offset to a byte offset.
|
|
1457 Charcount eibytepos_to_charpos (Eistring *eistr, Bytecount bytepos);
|
|
1458 Convert a byte offset to a char offset.
|
|
1459 Bytecount eiincpos (Eistring *eistr, Bytecount bytepos);
|
|
1460 Increment the given position by one character.
|
|
1461 Bytecount eiincpos_n (Eistring *eistr, Bytecount bytepos, Charcount n);
|
|
1462 Increment the given position by N characters.
|
|
1463 Bytecount eidecpos (Eistring *eistr, Bytecount bytepos);
|
|
1464 Decrement the given position by one character.
|
|
1465 Bytecount eidecpos_n (Eistring *eistr, Bytecount bytepos, Charcount n);
|
|
1466 Deccrement the given position by N characters.
|
|
1467
|
|
1468
|
|
1469 **********************************************
|
|
1470 * Getting the character at a position *
|
|
1471 **********************************************
|
|
1472
|
867
|
1473 Ichar eigetch (Eistring *eistr, Bytecount bytepos);
|
771
|
1474 Return the character at a particular byte offset.
|
867
|
1475 Ichar eigetch_char (Eistring *eistr, Charcount charpos);
|
771
|
1476 Return the character at a particular character offset.
|
|
1477
|
|
1478
|
|
1479 **********************************************
|
|
1480 * Setting the character at a position *
|
|
1481 **********************************************
|
|
1482
|
867
|
1483 Ichar eisetch (Eistring *eistr, Bytecount bytepos, Ichar chr);
|
771
|
1484 Set the character at a particular byte offset.
|
867
|
1485 Ichar eisetch_char (Eistring *eistr, Charcount charpos, Ichar chr);
|
771
|
1486 Set the character at a particular character offset.
|
|
1487
|
|
1488
|
|
1489 **********************************************
|
|
1490 * Concatenation *
|
|
1491 **********************************************
|
|
1492
|
|
1493 void eicat_* (Eistring *eistr, ...);
|
|
1494 Concatenate onto the end of the Eistring, with data coming from the
|
|
1495 same places as above:
|
|
1496
|
|
1497 void eicat_ei (Eistring *eistr, Eistring *eistr2);
|
|
1498 ... from another Eistring.
|
2421
|
1499 void eicat_ascii (Eistring *eistr, Ascbyte *ascstr);
|
771
|
1500 ... from an ASCII null-terminated string. Non-ASCII characters in
|
2500
|
1501 the string are *ILLEGAL* (read ABORT() with error-checking defined).
|
867
|
1502 void eicat_raw (ei, const Ibyte *data, Bytecount len);
|
771
|
1503 ... from raw internal-format data in the default internal format.
|
867
|
1504 void eicat_rawz (ei, const Ibyte *data);
|
771
|
1505 ... from raw internal-format data in the default internal format
|
|
1506 that is "null-terminated" (the meaning of this depends on the nature
|
|
1507 of the default internal format).
|
|
1508 void eicat_lstr (ei, Lisp_Object lisp_string);
|
|
1509 ... from a Lisp_Object string.
|
867
|
1510 void eicat_ch (ei, Ichar ch);
|
|
1511 ... from an Ichar.
|
771
|
1512
|
|
1513 (All except the first variety are convenience functions.
|
|
1514 In the general case, create another Eistring from the source.)
|
|
1515
|
|
1516
|
|
1517 **********************************************
|
|
1518 * Replacement *
|
|
1519 **********************************************
|
|
1520
|
|
1521 void eisub_* (Eistring *eistr, Bytecount off, Charcount charoff,
|
|
1522 Bytecount len, Charcount charlen, ...);
|
|
1523 Replace a section of the Eistring, specifically:
|
|
1524
|
|
1525 void eisub_ei (Eistring *eistr, Bytecount off, Charcount charoff,
|
|
1526 Bytecount len, Charcount charlen, Eistring *eistr2);
|
|
1527 ... with another Eistring.
|
2421
|
1528 void eisub_ascii (Eistring *eistr, Bytecount off, Charcount charoff,
|
|
1529 Bytecount len, Charcount charlen, Ascbyte *ascstr);
|
771
|
1530 ... with an ASCII null-terminated string. Non-ASCII characters in
|
2500
|
1531 the string are *ILLEGAL* (read ABORT() with error-checking defined).
|
771
|
1532 void eisub_ch (Eistring *eistr, Bytecount off, Charcount charoff,
|
867
|
1533 Bytecount len, Charcount charlen, Ichar ch);
|
|
1534 ... with an Ichar.
|
771
|
1535
|
|
1536 void eidel (Eistring *eistr, Bytecount off, Charcount charoff,
|
|
1537 Bytecount len, Charcount charlen);
|
|
1538 Delete a section of the Eistring.
|
|
1539
|
|
1540
|
|
1541 **********************************************
|
|
1542 * Converting to an external format *
|
|
1543 **********************************************
|
|
1544
|
1318
|
1545 void eito_external (Eistring *eistr, Lisp_Object codesys);
|
771
|
1546 Convert the Eistring to an external format and store the result
|
|
1547 in the string. NOTE: Further changes to the Eistring will *NOT*
|
|
1548 change the external data stored in the string. You will have to
|
|
1549 call eito_external() again in such a case if you want the external
|
|
1550 data.
|
|
1551
|
|
1552 Extbyte *eiextdata (Eistring *eistr);
|
|
1553 Return a pointer to the external data stored in the Eistring as
|
|
1554 a result of a prior call to eito_external().
|
|
1555
|
|
1556 Bytecount eiextlen (Eistring *eistr);
|
|
1557 Return the length in bytes of the external data stored in the
|
|
1558 Eistring as a result of a prior call to eito_external().
|
|
1559
|
|
1560
|
|
1561 **********************************************
|
|
1562 * Searching in the Eistring for a character *
|
|
1563 **********************************************
|
|
1564
|
867
|
1565 Bytecount eichr (Eistring *eistr, Ichar chr);
|
|
1566 Charcount eichr_char (Eistring *eistr, Ichar chr);
|
|
1567 Bytecount eichr_off (Eistring *eistr, Ichar chr, Bytecount off,
|
771
|
1568 Charcount charoff);
|
867
|
1569 Charcount eichr_off_char (Eistring *eistr, Ichar chr, Bytecount off,
|
771
|
1570 Charcount charoff);
|
867
|
1571 Bytecount eirchr (Eistring *eistr, Ichar chr);
|
|
1572 Charcount eirchr_char (Eistring *eistr, Ichar chr);
|
|
1573 Bytecount eirchr_off (Eistring *eistr, Ichar chr, Bytecount off,
|
771
|
1574 Charcount charoff);
|
867
|
1575 Charcount eirchr_off_char (Eistring *eistr, Ichar chr, Bytecount off,
|
771
|
1576 Charcount charoff);
|
|
1577
|
|
1578
|
|
1579 **********************************************
|
|
1580 * Searching in the Eistring for a string *
|
|
1581 **********************************************
|
|
1582
|
|
1583 Bytecount eistr_ei (Eistring *eistr, Eistring *eistr2);
|
|
1584 Charcount eistr_ei_char (Eistring *eistr, Eistring *eistr2);
|
|
1585 Bytecount eistr_ei_off (Eistring *eistr, Eistring *eistr2, Bytecount off,
|
|
1586 Charcount charoff);
|
|
1587 Charcount eistr_ei_off_char (Eistring *eistr, Eistring *eistr2,
|
|
1588 Bytecount off, Charcount charoff);
|
|
1589 Bytecount eirstr_ei (Eistring *eistr, Eistring *eistr2);
|
|
1590 Charcount eirstr_ei_char (Eistring *eistr, Eistring *eistr2);
|
|
1591 Bytecount eirstr_ei_off (Eistring *eistr, Eistring *eistr2, Bytecount off,
|
|
1592 Charcount charoff);
|
|
1593 Charcount eirstr_ei_off_char (Eistring *eistr, Eistring *eistr2,
|
|
1594 Bytecount off, Charcount charoff);
|
|
1595
|
2421
|
1596 Bytecount eistr_ascii (Eistring *eistr, Ascbyte *ascstr);
|
|
1597 Charcount eistr_ascii_char (Eistring *eistr, Ascbyte *ascstr);
|
|
1598 Bytecount eistr_ascii_off (Eistring *eistr, Ascbyte *ascstr, Bytecount off,
|
771
|
1599 Charcount charoff);
|
2421
|
1600 Charcount eistr_ascii_off_char (Eistring *eistr, Ascbyte *ascstr,
|
771
|
1601 Bytecount off, Charcount charoff);
|
2421
|
1602 Bytecount eirstr_ascii (Eistring *eistr, Ascbyte *ascstr);
|
|
1603 Charcount eirstr_ascii_char (Eistring *eistr, Ascbyte *ascstr);
|
|
1604 Bytecount eirstr_ascii_off (Eistring *eistr, Ascbyte *ascstr,
|
771
|
1605 Bytecount off, Charcount charoff);
|
2421
|
1606 Charcount eirstr_ascii_off_char (Eistring *eistr, Ascbyte *ascstr,
|
771
|
1607 Bytecount off, Charcount charoff);
|
|
1608
|
|
1609
|
|
1610 **********************************************
|
|
1611 * Comparison *
|
|
1612 **********************************************
|
|
1613
|
|
1614 int eicmp_* (Eistring *eistr, ...);
|
|
1615 int eicmp_off_* (Eistring *eistr, Bytecount off, Charcount charoff,
|
|
1616 Bytecount len, Charcount charlen, ...);
|
|
1617 int eicasecmp_* (Eistring *eistr, ...);
|
|
1618 int eicasecmp_off_* (Eistring *eistr, Bytecount off, Charcount charoff,
|
|
1619 Bytecount len, Charcount charlen, ...);
|
|
1620 int eicasecmp_i18n_* (Eistring *eistr, ...);
|
|
1621 int eicasecmp_i18n_off_* (Eistring *eistr, Bytecount off, Charcount charoff,
|
|
1622 Bytecount len, Charcount charlen, ...);
|
|
1623
|
|
1624 Compare the Eistring with the other data. Return value same as
|
|
1625 from strcmp. The `*' is either `ei' for another Eistring (in
|
|
1626 which case `...' is an Eistring), or `c' for a pure-ASCII string
|
|
1627 (in which case `...' is a pointer to that string). For anything
|
|
1628 more complex, first create an Eistring out of the source.
|
|
1629 Comparison is either simple (`eicmp_...'), ASCII case-folding
|
|
1630 (`eicasecmp_...'), or multilingual case-folding
|
|
1631 (`eicasecmp_i18n_...).
|
|
1632
|
|
1633
|
|
1634 More specifically, the prototypes are:
|
|
1635
|
|
1636 int eicmp_ei (Eistring *eistr, Eistring *eistr2);
|
|
1637 int eicmp_off_ei (Eistring *eistr, Bytecount off, Charcount charoff,
|
|
1638 Bytecount len, Charcount charlen, Eistring *eistr2);
|
|
1639 int eicasecmp_ei (Eistring *eistr, Eistring *eistr2);
|
|
1640 int eicasecmp_off_ei (Eistring *eistr, Bytecount off, Charcount charoff,
|
|
1641 Bytecount len, Charcount charlen, Eistring *eistr2);
|
|
1642 int eicasecmp_i18n_ei (Eistring *eistr, Eistring *eistr2);
|
|
1643 int eicasecmp_i18n_off_ei (Eistring *eistr, Bytecount off,
|
|
1644 Charcount charoff, Bytecount len,
|
|
1645 Charcount charlen, Eistring *eistr2);
|
|
1646
|
2421
|
1647 int eicmp_ascii (Eistring *eistr, Ascbyte *ascstr);
|
|
1648 int eicmp_off_ascii (Eistring *eistr, Bytecount off, Charcount charoff,
|
|
1649 Bytecount len, Charcount charlen, Ascbyte *ascstr);
|
|
1650 int eicasecmp_ascii (Eistring *eistr, Ascbyte *ascstr);
|
|
1651 int eicasecmp_off_ascii (Eistring *eistr, Bytecount off, Charcount charoff,
|
771
|
1652 Bytecount len, Charcount charlen,
|
2421
|
1653 Ascbyte *ascstr);
|
|
1654 int eicasecmp_i18n_ascii (Eistring *eistr, Ascbyte *ascstr);
|
|
1655 int eicasecmp_i18n_off_ascii (Eistring *eistr, Bytecount off, Charcount charoff,
|
771
|
1656 Bytecount len, Charcount charlen,
|
2421
|
1657 Ascbyte *ascstr);
|
771
|
1658
|
|
1659
|
|
1660 **********************************************
|
|
1661 * Case-changing the Eistring *
|
|
1662 **********************************************
|
|
1663
|
|
1664 void eilwr (Eistring *eistr);
|
|
1665 Convert all characters in the Eistring to lowercase.
|
|
1666 void eiupr (Eistring *eistr);
|
|
1667 Convert all characters in the Eistring to uppercase.
|
|
1668 */
|
|
1669
|
|
1670
|
|
1671 /* Principles for writing Eistring functions:
|
|
1672
|
|
1673 (1) Unfortunately, we have to write most of the Eistring functions
|
851
|
1674 as macros, because of the use of ALLOCA(). The principle used
|
771
|
1675 below to assure no conflict in local variables is to prefix all
|
|
1676 local variables with "ei" plus a number, which should be unique
|
|
1677 among macros. In practice, when finding a new number, find the
|
|
1678 highest so far used, and add 1.
|
|
1679
|
|
1680 (2) We also suffix the Eistring fields with an _ to avoid problems
|
|
1681 with macro parameters of the same name. (And as the standard
|
|
1682 signal not to access these fields directly.)
|
|
1683
|
|
1684 (3) We maintain both the length in bytes and chars of the data in
|
|
1685 the Eistring at all times, for convenient retrieval by outside
|
|
1686 functions. That means when writing functions that manipulate
|
|
1687 Eistrings, you too need to keep both lengths up to date for all
|
|
1688 data that you work with.
|
|
1689
|
|
1690 (4) When writing a new type of operation (e.g. substitution), you
|
|
1691 will often find yourself working with outside data, and thus
|
|
1692 have a series of related API's, for different forms that the
|
|
1693 outside data is in. Generally, you will want to choose a
|
|
1694 subset of the forms supported by eicpy_*, which has to be
|
|
1695 totally general because that's the fundamental way to get data
|
|
1696 into an Eistring, and once the data is into the string, it
|
|
1697 would be to create a whole series of Ei operations that work on
|
|
1698 nothing but Eistrings. Although theoretically nice, in
|
|
1699 practice it's a hassle, so we suggest that you provide
|
|
1700 convenience functions. In particular, there are two paths you
|
|
1701 can take. One is minimalist -- it only allows other Eistrings
|
867
|
1702 and ASCII data, and Ichars if the particular operation makes
|
771
|
1703 sense with a character. The other provides interfaces for the
|
|
1704 most commonly-used forms -- Eistring, ASCII data, Lisp string,
|
|
1705 raw internal-format string with length, raw internal-format
|
867
|
1706 string without, and possibly Ichar. (In the function names,
|
771
|
1707 these are designated `ei', `c', `lstr', `raw', `rawz', and
|
|
1708 `ch', respectively.)
|
|
1709
|
|
1710 (5) When coding a new type of operation, such as was discussed in
|
|
1711 previous section, the correct approach is to declare an worker
|
|
1712 function that does the work of everything, and is called by the
|
|
1713 other "container" macros that handle the different outside data
|
|
1714 forms. The data coming into the worker function, which
|
|
1715 typically ends in `_1', is in the form of three parameters:
|
|
1716 DATA, LEN, CHARLEN. (See point [3] about having two lengths and
|
|
1717 keeping them in sync.)
|
|
1718
|
|
1719 (6) Handling argument evaluation in macros: We take great care
|
|
1720 never to evaluate any argument more than once in any macro,
|
|
1721 except the initial Eistring parameter. This can and will be
|
|
1722 evaluated multiple times, but it should pretty much always just
|
|
1723 be a simple variable. This means, for example, that if an
|
|
1724 Eistring is the second (not first) argument of a macro, it
|
|
1725 doesn't fall under the "initial Eistring" exemption, so it
|
|
1726 needs protection against multi-evaluation. (Take the address of
|
|
1727 the Eistring structure, store in a temporary variable, and use
|
|
1728 temporary variable for all access to the Eistring.
|
|
1729 Essentially, we want it to appear as if these Eistring macros
|
|
1730 are functions -- we would like to declare them as functions but
|
851
|
1731 they use ALLOCA(), so we can't (and we can't make them inline
|
|
1732 functions either -- ALLOCA() is explicitly disallowed in inline
|
771
|
1733 functions.)
|
|
1734
|
|
1735 (7) Note that our rules regarding multiple evaluation are *more*
|
|
1736 strict than the rules listed above under the heading "working
|
|
1737 with raw internal-format data".
|
|
1738 */
|
|
1739
|
|
1740
|
|
1741 /* ----- Declaration ----- */
|
|
1742
|
|
1743 typedef struct
|
|
1744 {
|
|
1745 /* Data for the Eistring, stored in the default internal format.
|
|
1746 Always includes terminating null. */
|
867
|
1747 Ibyte *data_;
|
771
|
1748 /* Total number of bytes allocated in DATA (including null). */
|
|
1749 Bytecount max_size_allocated_;
|
|
1750 Bytecount bytelen_;
|
|
1751 Charcount charlen_;
|
|
1752 int mallocp_;
|
|
1753
|
|
1754 Extbyte *extdata_;
|
|
1755 Bytecount extlen_;
|
|
1756 } Eistring;
|
|
1757
|
|
1758 extern Eistring the_eistring_zero_init, the_eistring_malloc_zero_init;
|
|
1759
|
|
1760 #define DECLARE_EISTRING(name) \
|
|
1761 Eistring __ ## name ## __storage__ = the_eistring_zero_init; \
|
|
1762 Eistring *name = & __ ## name ## __storage__
|
|
1763 #define DECLARE_EISTRING_MALLOC(name) \
|
|
1764 Eistring __ ## name ## __storage__ = the_eistring_malloc_zero_init; \
|
|
1765 Eistring *name = & __ ## name ## __storage__
|
|
1766
|
|
1767 #define eiinit(ei) \
|
|
1768 do { \
|
793
|
1769 *(ei) = the_eistring_zero_init; \
|
771
|
1770 } while (0)
|
|
1771
|
|
1772 #define eiinit_malloc(ei) \
|
|
1773 do { \
|
793
|
1774 *(ei) = the_eistring_malloc_zero_init; \
|
771
|
1775 } while (0)
|
|
1776
|
|
1777
|
|
1778 /* ----- Utility ----- */
|
|
1779
|
|
1780 /* Make sure both LEN and CHARLEN are specified, in case one is given
|
|
1781 as -1. PTR evaluated at most once, others multiply. */
|
|
1782 #define eifixup_bytechar(ptr, len, charlen) \
|
|
1783 do { \
|
|
1784 if ((len) == -1) \
|
|
1785 (len) = charcount_to_bytecount (ptr, charlen); \
|
|
1786 else if ((charlen) == -1) \
|
|
1787 (charlen) = bytecount_to_charcount (ptr, len); \
|
|
1788 } while (0)
|
|
1789
|
|
1790 /* Make sure LEN is specified, in case it's is given as -1. PTR
|
|
1791 evaluated at most once, others multiply. */
|
|
1792 #define eifixup_byte(ptr, len, charlen) \
|
|
1793 do { \
|
|
1794 if ((len) == -1) \
|
|
1795 (len) = charcount_to_bytecount (ptr, charlen); \
|
|
1796 } while (0)
|
|
1797
|
|
1798 /* Make sure CHARLEN is specified, in case it's is given as -1. PTR
|
|
1799 evaluated at most once, others multiply. */
|
|
1800 #define eifixup_char(ptr, len, charlen) \
|
|
1801 do { \
|
|
1802 if ((charlen) == -1) \
|
|
1803 (charlen) = bytecount_to_charcount (ptr, len); \
|
|
1804 } while (0)
|
|
1805
|
|
1806
|
|
1807
|
|
1808 /* Make sure we can hold NEWBYTELEN bytes (which is NEWCHARLEN chars)
|
|
1809 plus a zero terminator. Preserve existing data as much as possible,
|
|
1810 including existing zero terminator. Put a new zero terminator where it
|
|
1811 should go if NEWZ if non-zero. All args but EI are evalled only once. */
|
|
1812
|
|
1813 #define EI_ALLOC(ei, newbytelen, newcharlen, newz) \
|
|
1814 do { \
|
|
1815 int ei1oldeibytelen = (ei)->bytelen_; \
|
|
1816 \
|
|
1817 (ei)->charlen_ = (newcharlen); \
|
|
1818 (ei)->bytelen_ = (newbytelen); \
|
|
1819 \
|
|
1820 if (ei1oldeibytelen != (ei)->bytelen_) \
|
|
1821 { \
|
|
1822 int ei1newsize = (ei)->max_size_allocated_; \
|
|
1823 while (ei1newsize < (ei)->bytelen_ + 1) \
|
|
1824 { \
|
|
1825 ei1newsize = (int) (ei1newsize * 1.5); \
|
|
1826 if (ei1newsize < 32) \
|
|
1827 ei1newsize = 32; \
|
|
1828 } \
|
|
1829 if (ei1newsize != (ei)->max_size_allocated_) \
|
|
1830 { \
|
|
1831 if ((ei)->mallocp_) \
|
|
1832 /* xrealloc always preserves existing data as much as possible */ \
|
1333
|
1833 (ei)->data_ = (Ibyte *) xrealloc ((ei)->data_, ei1newsize); \
|
771
|
1834 else \
|
|
1835 { \
|
851
|
1836 /* We don't have realloc, so ALLOCA() more space and copy the \
|
771
|
1837 data into it. */ \
|
867
|
1838 Ibyte *ei1oldeidata = (ei)->data_; \
|
2367
|
1839 (ei)->data_ = alloca_ibytes (ei1newsize); \
|
771
|
1840 if (ei1oldeidata) \
|
|
1841 memcpy ((ei)->data_, ei1oldeidata, ei1oldeibytelen + 1); \
|
|
1842 } \
|
|
1843 (ei)->max_size_allocated_ = ei1newsize; \
|
|
1844 } \
|
|
1845 if (newz) \
|
|
1846 (ei)->data_[(ei)->bytelen_] = '\0'; \
|
|
1847 } \
|
|
1848 } while (0)
|
|
1849
|
|
1850 #define EI_ALLOC_AND_COPY(ei, data, bytelen, charlen) \
|
|
1851 do { \
|
|
1852 EI_ALLOC (ei, bytelen, charlen, 1); \
|
|
1853 memcpy ((ei)->data_, data, (ei)->bytelen_); \
|
|
1854 } while (0)
|
|
1855
|
|
1856 /* ----- Initialization ----- */
|
|
1857
|
|
1858 #define eicpy_ei(ei, eicpy) \
|
|
1859 do { \
|
|
1860 const Eistring *ei2 = (eicpy); \
|
|
1861 EI_ALLOC_AND_COPY (ei, ei2->data_, ei2->bytelen_, ei2->charlen_); \
|
|
1862 } while (0)
|
|
1863
|
|
1864 #define eicpy_lstr(ei, lisp_string) \
|
|
1865 do { \
|
|
1866 Lisp_Object ei3 = (lisp_string); \
|
|
1867 EI_ALLOC_AND_COPY (ei, XSTRING_DATA (ei3), XSTRING_LENGTH (ei3), \
|
1333
|
1868 string_char_length (ei3)); \
|
771
|
1869 } while (0)
|
|
1870
|
|
1871 #define eicpy_lstr_off(ei, lisp_string, off, charoff, len, charlen) \
|
|
1872 do { \
|
|
1873 Lisp_Object ei23lstr = (lisp_string); \
|
|
1874 int ei23off = (off); \
|
|
1875 int ei23charoff = (charoff); \
|
|
1876 int ei23len = (len); \
|
|
1877 int ei23charlen = (charlen); \
|
867
|
1878 const Ibyte *ei23data = XSTRING_DATA (ei23lstr); \
|
771
|
1879 \
|
|
1880 int ei23oldbytelen = (ei)->bytelen_; \
|
|
1881 \
|
|
1882 eifixup_byte (ei23data, ei23off, ei23charoff); \
|
|
1883 eifixup_bytechar (ei23data + ei23off, ei23len, ei23charlen); \
|
|
1884 \
|
|
1885 EI_ALLOC_AND_COPY (ei, ei23data + ei23off, ei23len, ei23charlen); \
|
|
1886 } while (0)
|
|
1887
|
826
|
1888 #define eicpy_raw_fmt(ei, ptr, len, fmt, object) \
|
771
|
1889 do { \
|
1333
|
1890 const Ibyte *ei12ptr = (ptr); \
|
771
|
1891 Internal_Format ei12fmt = (fmt); \
|
|
1892 int ei12len = (len); \
|
|
1893 assert (ei12fmt == FORMAT_DEFAULT); \
|
|
1894 EI_ALLOC_AND_COPY (ei, ei12ptr, ei12len, \
|
|
1895 bytecount_to_charcount (ei12ptr, ei12len)); \
|
|
1896 } while (0)
|
|
1897
|
826
|
1898 #define eicpy_raw(ei, ptr, len) \
|
|
1899 eicpy_raw_fmt (ei, ptr, len, FORMAT_DEFAULT, Qnil)
|
|
1900
|
|
1901 #define eicpy_rawz_fmt(ei, ptr, fmt, object) \
|
|
1902 do { \
|
867
|
1903 const Ibyte *ei12p1ptr = (ptr); \
|
826
|
1904 Internal_Format ei12p1fmt = (fmt); \
|
|
1905 assert (ei12p1fmt == FORMAT_DEFAULT); \
|
|
1906 eicpy_raw_fmt (ei, ei12p1ptr, qxestrlen (ei12p1ptr), fmt, object); \
|
771
|
1907 } while (0)
|
|
1908
|
826
|
1909 #define eicpy_rawz(ei, ptr) eicpy_rawz_fmt (ei, ptr, FORMAT_DEFAULT, Qnil)
|
771
|
1910
|
1333
|
1911 #define eicpy_ch(ei, ch) \
|
|
1912 do { \
|
867
|
1913 Ibyte ei12p2[MAX_ICHAR_LEN]; \
|
|
1914 Bytecount ei12p2len = set_itext_ichar (ei12p2, ch); \
|
1333
|
1915 EI_ALLOC_AND_COPY (ei, ei12p2, ei12p2len, 1); \
|
771
|
1916 } while (0)
|
|
1917
|
2421
|
1918 #define eicpy_ascii(ei, ascstr) \
|
771
|
1919 do { \
|
2421
|
1920 const Ascbyte *ei4 = (ascstr); \
|
771
|
1921 \
|
2367
|
1922 ASSERT_ASCTEXT_ASCII (ei4); \
|
771
|
1923 eicpy_ext (ei, ei4, Qbinary); \
|
|
1924 } while (0)
|
|
1925
|
2421
|
1926 #define eicpy_ascii_len(ei, ascstr, c_len) \
|
771
|
1927 do { \
|
2421
|
1928 const Ascbyte *ei6 = (ascstr); \
|
771
|
1929 int ei6len = (c_len); \
|
|
1930 \
|
2367
|
1931 ASSERT_ASCTEXT_ASCII_LEN (ei6, ei6len); \
|
771
|
1932 eicpy_ext_len (ei, ei6, ei6len, Qbinary); \
|
|
1933 } while (0)
|
|
1934
|
1318
|
1935 #define eicpy_ext_len(ei, extdata, extlen, codesys) \
|
771
|
1936 do { \
|
|
1937 const Extbyte *ei7 = (extdata); \
|
|
1938 int ei7len = (extlen); \
|
|
1939 \
|
1318
|
1940 SIZED_EXTERNAL_TO_SIZED_C_STRING (ei7, ei7len, (ei)->data_, \
|
|
1941 (ei)->bytelen_, codesys); \
|
771
|
1942 (ei)->max_size_allocated_ = (ei)->bytelen_ + 1; \
|
|
1943 (ei)->charlen_ = bytecount_to_charcount ((ei)->data_, (ei)->bytelen_); \
|
|
1944 } while (0)
|
|
1945
|
1318
|
1946 #define eicpy_ext(ei, extdata, codesys) \
|
|
1947 do { \
|
|
1948 const Extbyte *ei8 = (extdata); \
|
|
1949 \
|
|
1950 eicpy_ext_len (ei, ei8, dfc_external_data_len (ei8, codesys), \
|
|
1951 codesys); \
|
771
|
1952 } while (0)
|
|
1953
|
|
1954 #define eicpy_lbuf(eistr, lisp_buf, off, charoff, len, charlen) \
|
|
1955 NOT YET IMPLEMENTED
|
|
1956
|
|
1957 #define eicpy_lstream(eistr, lstream) \
|
|
1958 NOT YET IMPLEMENTED
|
|
1959
|
867
|
1960 #define eireset(eistr) eicpy_rawz (eistr, (Ibyte *) "")
|
771
|
1961
|
|
1962 /* ----- Getting the data out of the Eistring ----- */
|
|
1963
|
|
1964 #define eidata(ei) ((ei)->data_)
|
|
1965
|
|
1966 #define eimake_string(ei) make_string (eidata (ei), eilen (ei))
|
|
1967
|
|
1968 #define eimake_string_off(eistr, off, charoff, len, charlen) \
|
|
1969 do { \
|
|
1970 Lisp_Object ei24lstr; \
|
|
1971 int ei24off = (off); \
|
|
1972 int ei24charoff = (charoff); \
|
|
1973 int ei24len = (len); \
|
|
1974 int ei24charlen = (charlen); \
|
|
1975 \
|
|
1976 eifixup_byte ((eistr)->data_, ei24off, ei24charoff); \
|
|
1977 eifixup_byte ((eistr)->data_ + ei24off, ei24len, ei24charlen); \
|
|
1978 \
|
|
1979 return make_string ((eistr)->data_ + ei24off, ei24len); \
|
|
1980 } while (0)
|
|
1981
|
|
1982 #define eicpyout_alloca(eistr, ptrout, lenout) \
|
826
|
1983 eicpyout_alloca_fmt (eistr, ptrout, lenout, FORMAT_DEFAULT, Qnil)
|
771
|
1984 #define eicpyout_malloc(eistr, lenout) \
|
826
|
1985 eicpyout_malloc_fmt (eistr, lenout, FORMAT_DEFAULT, Qnil)
|
867
|
1986 Ibyte *eicpyout_malloc_fmt (Eistring *eistr, Bytecount *len_out,
|
826
|
1987 Internal_Format fmt, Lisp_Object object);
|
|
1988 #define eicpyout_alloca_fmt(eistr, ptrout, lenout, fmt, object) \
|
771
|
1989 do { \
|
|
1990 Internal_Format ei23fmt = (fmt); \
|
867
|
1991 Ibyte *ei23ptrout = &(ptrout); \
|
771
|
1992 Bytecount *ei23lenout = &(lenout); \
|
|
1993 \
|
|
1994 assert (ei23fmt == FORMAT_DEFAULT); \
|
|
1995 \
|
|
1996 *ei23lenout = (eistr)->bytelen_; \
|
2367
|
1997 *ei23ptrout = alloca_ibytes ((eistr)->bytelen_ + 1); \
|
771
|
1998 memcpy (*ei23ptrout, (eistr)->data_, (eistr)->bytelen_ + 1); \
|
|
1999 } while (0)
|
|
2000
|
|
2001 /* ----- Moving to the heap ----- */
|
|
2002
|
|
2003 #define eifree(ei) \
|
|
2004 do { \
|
|
2005 if ((ei)->mallocp_) \
|
|
2006 { \
|
|
2007 if ((ei)->data_) \
|
1726
|
2008 xfree ((ei)->data_, Ibyte *); \
|
771
|
2009 if ((ei)->extdata_) \
|
1726
|
2010 xfree ((ei)->extdata_, Extbyte *); \
|
771
|
2011 eiinit_malloc (ei); \
|
|
2012 } \
|
|
2013 else \
|
|
2014 eiinit (ei); \
|
|
2015 } while (0)
|
|
2016
|
|
2017 int eifind_large_enough_buffer (int oldbufsize, int needed_size);
|
|
2018 void eito_malloc_1 (Eistring *ei);
|
|
2019
|
|
2020 #define eito_malloc(ei) eito_malloc_1 (ei)
|
|
2021
|
|
2022 #define eito_alloca(ei) \
|
|
2023 do { \
|
|
2024 if (!(ei)->mallocp_) \
|
|
2025 return; \
|
|
2026 (ei)->mallocp_ = 0; \
|
|
2027 if ((ei)->data_) \
|
|
2028 { \
|
867
|
2029 Ibyte *ei13newdata; \
|
771
|
2030 \
|
|
2031 (ei)->max_size_allocated_ = \
|
|
2032 eifind_large_enough_buffer (0, (ei)->bytelen_ + 1); \
|
2367
|
2033 ei13newdata = alloca_ibytes ((ei)->max_size_allocated_); \
|
771
|
2034 memcpy (ei13newdata, (ei)->data_, (ei)->bytelen_ + 1); \
|
1726
|
2035 xfree ((ei)->data_, Ibyte *); \
|
771
|
2036 (ei)->data_ = ei13newdata; \
|
|
2037 } \
|
|
2038 \
|
|
2039 if ((ei)->extdata_) \
|
|
2040 { \
|
2367
|
2041 Extbyte *ei13newdata = alloca_extbytes ((ei)->extlen_ + 2); \
|
771
|
2042 \
|
|
2043 memcpy (ei13newdata, (ei)->extdata_, (ei)->extlen_); \
|
|
2044 /* Double null-terminate in case of Unicode data */ \
|
|
2045 ei13newdata[(ei)->extlen_] = '\0'; \
|
|
2046 ei13newdata[(ei)->extlen_ + 1] = '\0'; \
|
1726
|
2047 xfree ((ei)->extdata_, Extbyte *); \
|
771
|
2048 (ei)->extdata_ = ei13newdata; \
|
|
2049 } \
|
|
2050 } while (0)
|
|
2051
|
|
2052
|
|
2053 /* ----- Retrieving the length ----- */
|
|
2054
|
|
2055 #define eilen(ei) ((ei)->bytelen_)
|
|
2056 #define eicharlen(ei) ((ei)->charlen_)
|
|
2057
|
|
2058
|
|
2059 /* ----- Working with positions ----- */
|
|
2060
|
|
2061 #define eicharpos_to_bytepos(ei, charpos) \
|
|
2062 charcount_to_bytecount ((ei)->data_, charpos)
|
|
2063 #define eibytepos_to_charpos(ei, bytepos) \
|
|
2064 bytecount_to_charcount ((ei)->data_, bytepos)
|
|
2065
|
|
2066 DECLARE_INLINE_HEADER (Bytecount eiincpos_1 (Eistring *eistr,
|
|
2067 Bytecount bytepos,
|
|
2068 Charcount n))
|
|
2069 {
|
867
|
2070 Ibyte *pos = eistr->data_ + bytepos;
|
814
|
2071 Charcount i;
|
771
|
2072
|
800
|
2073 text_checking_assert (bytepos >= 0 && bytepos <= eistr->bytelen_);
|
|
2074 text_checking_assert (n >= 0 && n <= eistr->charlen_);
|
771
|
2075 /* We could check N more correctly now, but that would require a
|
|
2076 call to bytecount_to_charcount(), which would be needlessly
|
|
2077 expensive (it would convert O(N) algorithms into O(N^2) algorithms
|
800
|
2078 with ERROR_CHECK_TEXT, which would be bad). If N is bad, we are
|
867
|
2079 guaranteed to catch it either inside INC_IBYTEPTR() or in the check
|
771
|
2080 below. */
|
|
2081 for (i = 0; i < n; i++)
|
867
|
2082 INC_IBYTEPTR (pos);
|
800
|
2083 text_checking_assert (pos - eistr->data_ <= eistr->bytelen_);
|
771
|
2084 return pos - eistr->data_;
|
|
2085 }
|
|
2086
|
|
2087 #define eiincpos (ei, bytepos) eiincpos_1 (ei, bytepos, 1)
|
|
2088 #define eiincpos_n (ei, bytepos, n) eiincpos_1 (ei, bytepos, n)
|
|
2089
|
|
2090 DECLARE_INLINE_HEADER (Bytecount eidecpos_1 (Eistring *eistr,
|
|
2091 Bytecount bytepos,
|
|
2092 Charcount n))
|
|
2093 {
|
867
|
2094 Ibyte *pos = eistr->data_ + bytepos;
|
771
|
2095 int i;
|
|
2096
|
800
|
2097 text_checking_assert (bytepos >= 0 && bytepos <= eistr->bytelen_);
|
|
2098 text_checking_assert (n >= 0 && n <= eistr->charlen_);
|
771
|
2099 /* We could check N more correctly now, but ... see above. */
|
|
2100 for (i = 0; i < n; i++)
|
867
|
2101 DEC_IBYTEPTR (pos);
|
800
|
2102 text_checking_assert (pos - eistr->data_ <= eistr->bytelen_);
|
771
|
2103 return pos - eistr->data_;
|
|
2104 }
|
|
2105
|
|
2106 #define eidecpos (ei, bytepos) eidecpos_1 (ei, bytepos, 1)
|
|
2107 #define eidecpos_n (ei, bytepos, n) eidecpos_1 (ei, bytepos, n)
|
|
2108
|
|
2109
|
|
2110 /* ----- Getting the character at a position ----- */
|
|
2111
|
|
2112 #define eigetch(ei, bytepos) \
|
867
|
2113 itext_ichar ((ei)->data_ + (bytepos))
|
|
2114 #define eigetch_char(ei, charpos) itext_ichar_n ((ei)->data_, charpos)
|
771
|
2115
|
|
2116
|
|
2117 /* ----- Setting the character at a position ----- */
|
|
2118
|
|
2119 #define eisetch(ei, bytepos, chr) \
|
|
2120 eisub_ch (ei, bytepos, -1, -1, 1, chr)
|
|
2121 #define eisetch_char(ei, charpos, chr) \
|
|
2122 eisub_ch (ei, -1, charpos, -1, 1, chr)
|
|
2123
|
|
2124
|
|
2125 /* ----- Concatenation ----- */
|
|
2126
|
|
2127 #define eicat_1(ei, data, bytelen, charlen) \
|
|
2128 do { \
|
|
2129 int ei14oldeibytelen = (ei)->bytelen_; \
|
|
2130 int ei14bytelen = (bytelen); \
|
|
2131 EI_ALLOC (ei, (ei)->bytelen_ + ei14bytelen, \
|
|
2132 (ei)->charlen_ + (charlen), 1); \
|
|
2133 memcpy ((ei)->data_ + ei14oldeibytelen, (data), \
|
|
2134 ei14bytelen); \
|
|
2135 } while (0)
|
|
2136
|
|
2137 #define eicat_ei(ei, ei2) \
|
|
2138 do { \
|
|
2139 const Eistring *ei9 = (ei2); \
|
|
2140 eicat_1 (ei, ei9->data_, ei9->bytelen_, ei9->charlen_); \
|
|
2141 } while (0)
|
|
2142
|
2421
|
2143 #define eicat_ascii(ei, ascstr) \
|
771
|
2144 do { \
|
2421
|
2145 const Ascbyte *ei15 = (ascstr); \
|
771
|
2146 int ei15len = strlen (ei15); \
|
|
2147 \
|
2367
|
2148 ASSERT_ASCTEXT_ASCII_LEN (ei15, ei15len); \
|
771
|
2149 eicat_1 (ei, ei15, ei15len, \
|
867
|
2150 bytecount_to_charcount ((Ibyte *) ei15, ei15len)); \
|
771
|
2151 } while (0)
|
|
2152
|
|
2153 #define eicat_raw(ei, data, len) \
|
|
2154 do { \
|
|
2155 int ei16len = (len); \
|
867
|
2156 const Ibyte *ei16data = (data); \
|
771
|
2157 eicat_1 (ei, ei16data, ei16len, \
|
|
2158 bytecount_to_charcount (ei16data, ei16len)); \
|
|
2159 } while (0)
|
|
2160
|
|
2161 #define eicat_rawz(ei, ptr) \
|
|
2162 do { \
|
867
|
2163 const Ibyte *ei16p5ptr = (ptr); \
|
771
|
2164 eicat_raw (ei, ei16p5ptr, qxestrlen (ei16p5ptr)); \
|
|
2165 } while (0)
|
|
2166
|
|
2167 #define eicat_lstr(ei, lisp_string) \
|
|
2168 do { \
|
|
2169 Lisp_Object ei17 = (lisp_string); \
|
|
2170 eicat_1 (ei, XSTRING_DATA (ei17), XSTRING_LENGTH (ei17), \
|
826
|
2171 string_char_length (ei17)); \
|
771
|
2172 } while (0)
|
|
2173
|
|
2174 #define eicat_ch(ei, ch) \
|
|
2175 do { \
|
1333
|
2176 Ibyte ei22ch[MAX_ICHAR_LEN]; \
|
867
|
2177 Bytecount ei22len = set_itext_ichar (ei22ch, ch); \
|
771
|
2178 eicat_1 (ei, ei22ch, ei22len, 1); \
|
|
2179 } while (0)
|
|
2180
|
|
2181
|
|
2182 /* ----- Replacement ----- */
|
|
2183
|
|
2184 /* Replace the section of an Eistring at (OFF, LEN) with the data at
|
|
2185 SRC of length LEN. All positions have corresponding character values,
|
|
2186 and either can be -1 -- it will be computed from the other. */
|
|
2187
|
|
2188 #define eisub_1(ei, off, charoff, len, charlen, src, srclen, srccharlen) \
|
|
2189 do { \
|
|
2190 int ei18off = (off); \
|
|
2191 int ei18charoff = (charoff); \
|
|
2192 int ei18len = (len); \
|
|
2193 int ei18charlen = (charlen); \
|
867
|
2194 Ibyte *ei18src = (Ibyte *) (src); \
|
771
|
2195 int ei18srclen = (srclen); \
|
|
2196 int ei18srccharlen = (srccharlen); \
|
|
2197 \
|
|
2198 int ei18oldeibytelen = (ei)->bytelen_; \
|
|
2199 \
|
|
2200 eifixup_bytechar ((ei)->data_, ei18off, ei18charoff); \
|
|
2201 eifixup_bytechar ((ei)->data_ + ei18off, ei18len, ei18charlen); \
|
|
2202 eifixup_bytechar (ei18src, ei18srclen, ei18srccharlen); \
|
|
2203 \
|
|
2204 EI_ALLOC (ei, (ei)->bytelen_ + ei18srclen - ei18len, \
|
|
2205 (ei)->charlen_ + ei18srccharlen - ei18charlen, 0); \
|
|
2206 if (ei18len != ei18srclen) \
|
|
2207 memmove ((ei)->data_ + ei18off + ei18srclen, \
|
|
2208 (ei)->data_ + ei18off + ei18len, \
|
|
2209 /* include zero terminator. */ \
|
|
2210 ei18oldeibytelen - (ei18off + ei18len) + 1); \
|
|
2211 if (ei18srclen > 0) \
|
|
2212 memcpy ((ei)->data_ + ei18off, ei18src, ei18srclen); \
|
|
2213 } while (0)
|
|
2214
|
|
2215 #define eisub_ei(ei, off, charoff, len, charlen, ei2) \
|
|
2216 do { \
|
1333
|
2217 const Eistring *ei19 = (ei2); \
|
771
|
2218 eisub_1 (ei, off, charoff, len, charlen, ei19->data_, ei19->bytelen_, \
|
|
2219 ei19->charlen_); \
|
|
2220 } while (0)
|
|
2221
|
2421
|
2222 #define eisub_ascii(ei, off, charoff, len, charlen, ascstr) \
|
771
|
2223 do { \
|
2421
|
2224 const Ascbyte *ei20 = (ascstr); \
|
771
|
2225 int ei20len = strlen (ei20); \
|
2367
|
2226 ASSERT_ASCTEXT_ASCII_LEN (ei20, ei20len); \
|
771
|
2227 eisub_1 (ei, off, charoff, len, charlen, ei20, ei20len, -1); \
|
|
2228 } while (0)
|
|
2229
|
|
2230 #define eisub_ch(ei, off, charoff, len, charlen, ch) \
|
|
2231 do { \
|
1333
|
2232 Ibyte ei21ch[MAX_ICHAR_LEN]; \
|
867
|
2233 Bytecount ei21len = set_itext_ichar (ei21ch, ch); \
|
771
|
2234 eisub_1 (ei, off, charoff, len, charlen, ei21ch, ei21len, 1); \
|
|
2235 } while (0)
|
|
2236
|
|
2237 #define eidel(ei, off, charoff, len, charlen) \
|
|
2238 eisub_1(ei, off, charoff, len, charlen, NULL, 0, 0)
|
|
2239
|
|
2240
|
|
2241 /* ----- Converting to an external format ----- */
|
|
2242
|
1333
|
2243 #define eito_external(ei, codesys) \
|
771
|
2244 do { \
|
|
2245 if ((ei)->mallocp_) \
|
|
2246 { \
|
|
2247 if ((ei)->extdata_) \
|
|
2248 { \
|
1726
|
2249 xfree ((ei)->extdata_, Extbyte *); \
|
771
|
2250 (ei)->extdata_ = 0; \
|
|
2251 } \
|
|
2252 TO_EXTERNAL_FORMAT (DATA, ((ei)->data_, (ei)->bytelen_), \
|
|
2253 MALLOC, ((ei)->extdata_, (ei)->extlen_), \
|
1333
|
2254 codesys); \
|
771
|
2255 } \
|
|
2256 else \
|
|
2257 TO_EXTERNAL_FORMAT (DATA, ((ei)->data_, (ei)->bytelen_), \
|
|
2258 ALLOCA, ((ei)->extdata_, (ei)->extlen_), \
|
1318
|
2259 codesys); \
|
771
|
2260 } while (0)
|
|
2261
|
|
2262 #define eiextdata(ei) ((ei)->extdata_)
|
|
2263 #define eiextlen(ei) ((ei)->extlen_)
|
|
2264
|
|
2265
|
|
2266 /* ----- Searching in the Eistring for a character ----- */
|
|
2267
|
|
2268 #define eichr(eistr, chr) \
|
|
2269 NOT YET IMPLEMENTED
|
|
2270 #define eichr_char(eistr, chr) \
|
|
2271 NOT YET IMPLEMENTED
|
|
2272 #define eichr_off(eistr, chr, off, charoff) \
|
|
2273 NOT YET IMPLEMENTED
|
|
2274 #define eichr_off_char(eistr, chr, off, charoff) \
|
|
2275 NOT YET IMPLEMENTED
|
|
2276 #define eirchr(eistr, chr) \
|
|
2277 NOT YET IMPLEMENTED
|
|
2278 #define eirchr_char(eistr, chr) \
|
|
2279 NOT YET IMPLEMENTED
|
|
2280 #define eirchr_off(eistr, chr, off, charoff) \
|
|
2281 NOT YET IMPLEMENTED
|
|
2282 #define eirchr_off_char(eistr, chr, off, charoff) \
|
|
2283 NOT YET IMPLEMENTED
|
|
2284
|
|
2285
|
|
2286 /* ----- Searching in the Eistring for a string ----- */
|
|
2287
|
|
2288 #define eistr_ei(eistr, eistr2) \
|
|
2289 NOT YET IMPLEMENTED
|
|
2290 #define eistr_ei_char(eistr, eistr2) \
|
|
2291 NOT YET IMPLEMENTED
|
|
2292 #define eistr_ei_off(eistr, eistr2, off, charoff) \
|
|
2293 NOT YET IMPLEMENTED
|
|
2294 #define eistr_ei_off_char(eistr, eistr2, off, charoff) \
|
|
2295 NOT YET IMPLEMENTED
|
|
2296 #define eirstr_ei(eistr, eistr2) \
|
|
2297 NOT YET IMPLEMENTED
|
|
2298 #define eirstr_ei_char(eistr, eistr2) \
|
|
2299 NOT YET IMPLEMENTED
|
|
2300 #define eirstr_ei_off(eistr, eistr2, off, charoff) \
|
|
2301 NOT YET IMPLEMENTED
|
|
2302 #define eirstr_ei_off_char(eistr, eistr2, off, charoff) \
|
|
2303 NOT YET IMPLEMENTED
|
|
2304
|
2421
|
2305 #define eistr_ascii(eistr, ascstr) \
|
771
|
2306 NOT YET IMPLEMENTED
|
2421
|
2307 #define eistr_ascii_char(eistr, ascstr) \
|
771
|
2308 NOT YET IMPLEMENTED
|
2421
|
2309 #define eistr_ascii_off(eistr, ascstr, off, charoff) \
|
771
|
2310 NOT YET IMPLEMENTED
|
2421
|
2311 #define eistr_ascii_off_char(eistr, ascstr, off, charoff) \
|
771
|
2312 NOT YET IMPLEMENTED
|
2421
|
2313 #define eirstr_ascii(eistr, ascstr) \
|
771
|
2314 NOT YET IMPLEMENTED
|
2421
|
2315 #define eirstr_ascii_char(eistr, ascstr) \
|
771
|
2316 NOT YET IMPLEMENTED
|
2421
|
2317 #define eirstr_ascii_off(eistr, ascstr, off, charoff) \
|
771
|
2318 NOT YET IMPLEMENTED
|
2421
|
2319 #define eirstr_ascii_off_char(eistr, ascstr, off, charoff) \
|
771
|
2320 NOT YET IMPLEMENTED
|
|
2321
|
|
2322
|
|
2323 /* ----- Comparison ----- */
|
|
2324
|
|
2325 int eicmp_1 (Eistring *ei, Bytecount off, Charcount charoff,
|
867
|
2326 Bytecount len, Charcount charlen, const Ibyte *data,
|
2526
|
2327 const Eistring *ei2, int is_ascii, int fold_case);
|
771
|
2328
|
|
2329 #define eicmp_ei(eistr, eistr2) \
|
|
2330 eicmp_1 (eistr, 0, -1, -1, -1, 0, eistr2, 0, 0)
|
|
2331 #define eicmp_off_ei(eistr, off, charoff, len, charlen, eistr2) \
|
|
2332 eicmp_1 (eistr, off, charoff, len, charlen, 0, eistr2, 0, 0)
|
|
2333 #define eicasecmp_ei(eistr, eistr2) \
|
|
2334 eicmp_1 (eistr, 0, -1, -1, -1, 0, eistr2, 0, 1)
|
|
2335 #define eicasecmp_off_ei(eistr, off, charoff, len, charlen, eistr2) \
|
|
2336 eicmp_1 (eistr, off, charoff, len, charlen, 0, eistr2, 0, 1)
|
|
2337 #define eicasecmp_i18n_ei(eistr, eistr2) \
|
|
2338 eicmp_1 (eistr, 0, -1, -1, -1, 0, eistr2, 0, 2)
|
|
2339 #define eicasecmp_i18n_off_ei(eistr, off, charoff, len, charlen, eistr2) \
|
|
2340 eicmp_1 (eistr, off, charoff, len, charlen, 0, eistr2, 0, 2)
|
|
2341
|
2421
|
2342 #define eicmp_ascii(eistr, ascstr) \
|
|
2343 eicmp_1 (eistr, 0, -1, -1, -1, (const Ibyte *) ascstr, 0, 1, 0)
|
|
2344 #define eicmp_off_ascii(eistr, off, charoff, len, charlen, ascstr) \
|
|
2345 eicmp_1 (eistr, off, charoff, len, charlen, (const Ibyte *) ascstr, 0, 1, 0)
|
|
2346 #define eicasecmp_ascii(eistr, ascstr) \
|
|
2347 eicmp_1 (eistr, 0, -1, -1, -1, (const Ibyte *) ascstr, 0, 1, 1)
|
|
2348 #define eicasecmp_off_ascii(eistr, off, charoff, len, charlen, ascstr) \
|
|
2349 eicmp_1 (eistr, off, charoff, len, charlen, (const Ibyte *) ascstr, 0, 1, 1)
|
|
2350 #define eicasecmp_i18n_ascii(eistr, ascstr) \
|
|
2351 eicmp_1 (eistr, 0, -1, -1, -1, (const Ibyte *) ascstr, 0, 1, 2)
|
|
2352 #define eicasecmp_i18n_off_ascii(eistr, off, charoff, len, charlen, ascstr) \
|
|
2353 eicmp_1 (eistr, off, charoff, len, charlen, (const Ibyte *) ascstr, 0, 1, 2)
|
771
|
2354
|
|
2355
|
|
2356 /* ----- Case-changing the Eistring ----- */
|
|
2357
|
867
|
2358 int eistr_casefiddle_1 (Ibyte *olddata, Bytecount len, Ibyte *newdata,
|
771
|
2359 int downp);
|
|
2360
|
|
2361 #define EI_CASECHANGE(ei, downp) \
|
|
2362 do { \
|
867
|
2363 int ei11new_allocmax = (ei)->charlen_ * MAX_ICHAR_LEN + 1; \
|
1333
|
2364 Ibyte *ei11storage = \
|
2367
|
2365 (Ibyte *) alloca_ibytes (ei11new_allocmax); \
|
771
|
2366 int ei11newlen = eistr_casefiddle_1 ((ei)->data_, (ei)->bytelen_, \
|
|
2367 ei11storage, downp); \
|
|
2368 \
|
|
2369 if (ei11newlen) \
|
|
2370 { \
|
|
2371 (ei)->max_size_allocated_ = ei11new_allocmax; \
|
1333
|
2372 (ei)->data_ = ei11storage; \
|
771
|
2373 (ei)->bytelen_ = ei11newlen; \
|
|
2374 /* charlen is the same. */ \
|
|
2375 } \
|
|
2376 } while (0)
|
|
2377
|
|
2378 #define eilwr(ei) EI_CASECHANGE (ei, 1)
|
|
2379 #define eiupr(ei) EI_CASECHANGE (ei, 0)
|
|
2380
|
1743
|
2381 END_C_DECLS
|
1650
|
2382
|
771
|
2383
|
|
2384 /************************************************************************/
|
|
2385 /* */
|
|
2386 /* Converting between internal and external format */
|
|
2387 /* */
|
|
2388 /************************************************************************/
|
|
2389 /*
|
1318
|
2390 The macros below are used for converting data between different formats.
|
|
2391 Generally, the data is textual, and the formats are related to
|
|
2392 internationalization (e.g. converting between internal-format text and
|
|
2393 UTF-8) -- but the mechanism is general, and could be used for anything,
|
|
2394 e.g. decoding gzipped data.
|
|
2395
|
|
2396 In general, conversion involves a source of data, a sink, the existing
|
|
2397 format of the source data, and the desired format of the sink. The
|
|
2398 macros below, however, always require that either the source or sink is
|
|
2399 internal-format text. Therefore, in practice the conversions below
|
|
2400 involve source, sink, an external format (specified by a coding system),
|
|
2401 and the direction of conversion (internal->external or vice-versa).
|
|
2402
|
|
2403 Sources and sinks can be raw data (sized or unsized -- when unsized,
|
|
2404 input data is assumed to be null-terminated [double null-terminated for
|
|
2405 Unicode-format data], and on output the length is not stored anywhere),
|
|
2406 Lisp strings, Lisp buffers, lstreams, and opaque data objects. When the
|
|
2407 output is raw data, the result can be allocated either with alloca() or
|
|
2408 malloc(). (There is currently no provision for writing into a fixed
|
|
2409 buffer. If you want this, use alloca() output and then copy the data --
|
|
2410 but be careful with the size! Unless you are very sure of the encoding
|
|
2411 being used, upper bounds for the size are not in general computable.)
|
|
2412 The obvious restrictions on source and sink types apply (e.g. Lisp
|
|
2413 strings are a source and sink only for internal data).
|
|
2414
|
|
2415 All raw data outputted will contain an extra null byte (two bytes for
|
|
2416 Unicode -- currently, in fact, all output data, whether internal or
|
|
2417 external, is double-null-terminated, but you can't count on this; see
|
|
2418 below). This means that enough space is allocated to contain the extra
|
|
2419 nulls; however, these nulls are not reflected in the returned output
|
|
2420 size.
|
|
2421
|
|
2422 The most basic macros are TO_EXTERNAL_FORMAT and TO_INTERNAL_FORMAT.
|
|
2423 These can be used to convert between any kinds of sources or sinks.
|
|
2424 However, 99% of conversions involve raw data or Lisp strings as both
|
|
2425 source and sink, and usually data is output as alloca() rather than
|
|
2426 malloc(). For this reason, convenience macros are defined for many types
|
|
2427 of conversions involving raw data and/or Lisp strings, especially when
|
|
2428 the output is an alloca()ed string. (When the destination is a
|
|
2429 Lisp_String, there are other functions that should be used instead --
|
|
2430 build_ext_string() and make_ext_string(), for example.) The convenience
|
|
2431 macros are of two types -- the older kind that store the result into a
|
|
2432 specified variable, and the newer kind that return the result. The newer
|
|
2433 kind of macros don't exist when the output is sized data, because that
|
|
2434 would have two return values. NOTE: All convenience macros are
|
|
2435 ultimately defined in terms of TO_EXTERNAL_FORMAT and TO_INTERNAL_FORMAT.
|
|
2436 Thus, any comments below about the workings of these macros also apply to
|
|
2437 all convenience macros.
|
|
2438
|
|
2439 TO_EXTERNAL_FORMAT (source_type, source, sink_type, sink, codesys)
|
|
2440 TO_INTERNAL_FORMAT (source_type, source, sink_type, sink, codesys)
|
771
|
2441
|
|
2442 Typical use is
|
|
2443
|
2367
|
2444 TO_EXTERNAL_FORMAT (LISP_STRING, str, C_STRING_MALLOC, ptr, Qfile_name);
|
|
2445
|
|
2446 which means that the contents of the lisp string `str' are written
|
|
2447 to a malloc'ed memory area which will be pointed to by `ptr', after the
|
|
2448 function returns. The conversion will be done using the `file-name'
|
|
2449 coding system (which will be controlled by the user indirectly by
|
|
2450 setting or binding the variable `file-name-coding-system').
|
|
2451
|
|
2452 Some sources and sinks require two C variables to specify. We use
|
|
2453 some preprocessor magic to allow different source and sink types, and
|
|
2454 even different numbers of arguments to specify different types of
|
|
2455 sources and sinks.
|
|
2456
|
|
2457 So we can have a call that looks like
|
|
2458
|
|
2459 TO_INTERNAL_FORMAT (DATA, (ptr, len),
|
|
2460 MALLOC, (ptr, len),
|
|
2461 coding_system);
|
|
2462
|
|
2463 The parenthesized argument pairs are required to make the
|
|
2464 preprocessor magic work.
|
771
|
2465
|
|
2466 NOTE: GC is inhibited during the entire operation of these macros. This
|
|
2467 is because frequently the data to be converted comes from strings but
|
|
2468 gets passed in as just DATA, and GC may move around the string data. If
|
|
2469 we didn't inhibit GC, there'd have to be a lot of messy recoding,
|
|
2470 alloca-copying of strings and other annoying stuff.
|
|
2471
|
|
2472 The source or sink can be specified in one of these ways:
|
|
2473
|
|
2474 DATA, (ptr, len), // input data is a fixed buffer of size len
|
851
|
2475 ALLOCA, (ptr, len), // output data is in a ALLOCA()ed buffer of size len
|
771
|
2476 MALLOC, (ptr, len), // output data is in a malloc()ed buffer of size len
|
|
2477 C_STRING_ALLOCA, ptr, // equivalent to ALLOCA (ptr, len_ignored) on output
|
|
2478 C_STRING_MALLOC, ptr, // equivalent to MALLOC (ptr, len_ignored) on output
|
|
2479 C_STRING, ptr, // equivalent to DATA, (ptr, strlen/wcslen (ptr))
|
|
2480 // on input (the Unicode version is used when correct)
|
|
2481 LISP_STRING, string, // input or output is a Lisp_Object of type string
|
|
2482 LISP_BUFFER, buffer, // output is written to (point) in lisp buffer
|
|
2483 LISP_LSTREAM, lstream, // input or output is a Lisp_Object of type lstream
|
|
2484 LISP_OPAQUE, object, // input or output is a Lisp_Object of type opaque
|
|
2485
|
|
2486 When specifying the sink, use lvalues, since the macro will assign to them,
|
|
2487 except when the sink is an lstream or a lisp buffer.
|
|
2488
|
2367
|
2489 For the sink types `ALLOCA' and `C_STRING_ALLOCA', the resulting text is
|
|
2490 stored in a stack-allocated buffer, which is automatically freed on
|
|
2491 returning from the function. However, the sink types `MALLOC' and
|
|
2492 `C_STRING_MALLOC' return `xmalloc()'ed memory. The caller is responsible
|
|
2493 for freeing this memory using `xfree()'.
|
|
2494
|
771
|
2495 The macros accept the kinds of sources and sinks appropriate for
|
|
2496 internal and external data representation. See the type_checking_assert
|
|
2497 macros below for the actual allowed types.
|
|
2498
|
|
2499 Since some sources and sinks use one argument (a Lisp_Object) to
|
|
2500 specify them, while others take a (pointer, length) pair, we use
|
|
2501 some C preprocessor trickery to allow pair arguments to be specified
|
|
2502 by parenthesizing them, as in the examples above.
|
|
2503
|
|
2504 Anything prefixed by dfc_ (`data format conversion') is private.
|
|
2505 They are only used to implement these macros.
|
|
2506
|
|
2507 [[Using C_STRING* is appropriate for using with external APIs that
|
|
2508 take null-terminated strings. For internal data, we should try to
|
|
2509 be '\0'-clean - i.e. allow arbitrary data to contain embedded '\0'.
|
|
2510
|
|
2511 Sometime in the future we might allow output to C_STRING_ALLOCA or
|
|
2512 C_STRING_MALLOC _only_ with TO_EXTERNAL_FORMAT(), not
|
|
2513 TO_INTERNAL_FORMAT().]]
|
|
2514
|
|
2515 The above comments are not true. Frequently (most of the time, in
|
|
2516 fact), external strings come as zero-terminated entities, where the
|
|
2517 zero-termination is the only way to find out the length. Even in
|
|
2518 cases where you can get the length, most of the time the system will
|
|
2519 still use the null to signal the end of the string, and there will
|
|
2520 still be no way to either send in or receive a string with embedded
|
|
2521 nulls. In such situations, it's pointless to track the length
|
|
2522 because null bytes can never be in the string. We have a lot of
|
|
2523 operations that make it easy to operate on zero-terminated strings,
|
|
2524 and forcing the user the deal with the length everywhere would only
|
|
2525 make the code uglier and more complicated, for no gain. --ben
|
|
2526
|
|
2527 There is no problem using the same lvalue for source and sink.
|
|
2528
|
|
2529 Also, when pointers are required, the code (currently at least) is
|
|
2530 lax and allows any pointer types, either in the source or the sink.
|
|
2531 This makes it possible, e.g., to deal with internal format data held
|
|
2532 in char *'s or external format data held in WCHAR * (i.e. Unicode).
|
|
2533
|
|
2534 Finally, whenever storage allocation is called for, extra space is
|
|
2535 allocated for a terminating zero, and such a zero is stored in the
|
|
2536 appropriate place, regardless of whether the source data was
|
|
2537 specified using a length or was specified as zero-terminated. This
|
|
2538 allows you to freely pass the resulting data, no matter how
|
|
2539 obtained, to a routine that expects zero termination (modulo, of
|
|
2540 course, that any embedded zeros in the resulting text will cause
|
|
2541 truncation). In fact, currently two embedded zeros are allocated
|
|
2542 and stored after the data result. This is to allow for the
|
|
2543 possibility of storing a Unicode value on output, which needs the
|
|
2544 two zeros. Currently, however, the two zeros are stored regardless
|
|
2545 of whether the conversion is internal or external and regardless of
|
|
2546 whether the external coding system is in fact Unicode. This
|
|
2547 behavior may change in the future, and you cannot rely on this --
|
|
2548 the most you can rely on is that sink data in Unicode format will
|
|
2549 have two terminating nulls, which combine to form one Unicode null
|
2367
|
2550 character.
|
|
2551
|
|
2552 NOTE: You might ask, why are these not written as functions that
|
|
2553 *RETURN* the converted string, since that would allow them to be used
|
|
2554 much more conveniently, without having to constantly declare temporary
|
|
2555 variables? The answer is that in fact I originally did write the
|
|
2556 routines that way, but that required either
|
|
2557
|
|
2558 (a) calling alloca() inside of a function call, or
|
|
2559 (b) using expressions separated by commas and a global temporary variable, or
|
|
2560 (c) using the GCC extension ({ ... }).
|
|
2561
|
|
2562 Turned out that all of the above had bugs, all caused by GCC (hence the
|
|
2563 comments about "those GCC wankers" and "ream gcc up the ass"). As for
|
|
2564 (a), some versions of GCC (especially on Intel platforms), which had
|
|
2565 buggy implementations of alloca() that couldn't handle being called
|
|
2566 inside of a function call -- they just decremented the stack right in the
|
|
2567 middle of pushing args. Oops, crash with stack trashing, very bad. (b)
|
|
2568 was an attempt to fix (a), and that led to further GCC crashes, esp. when
|
|
2569 you had two such calls in a single subexpression, because GCC couldn't be
|
|
2570 counted upon to follow even a minimally reasonable order of execution.
|
|
2571 True, you can't count on one argument being evaluated before another, but
|
|
2572 GCC would actually interleave them so that the temp var got stomped on by
|
|
2573 one while the other was accessing it. So I tried (c), which was
|
|
2574 problematic because that GCC extension has more bugs in it than a
|
|
2575 termite's nest.
|
|
2576
|
|
2577 So reluctantly I converted to the current way. Now, that was awhile ago
|
|
2578 (c. 1994), and it appears that the bug involving alloca in function calls
|
|
2579 has long since been fixed. More recently, I defined the new-dfc routines
|
|
2580 down below, which DO allow exactly such convenience of returning your
|
|
2581 args rather than store them in temp variables, and I also wrote a
|
|
2582 configure check to see whether alloca() causes crashes inside of function
|
|
2583 calls, and if so use the portable alloca() implementation in alloca.c.
|
|
2584 If you define TEST_NEW_DFC, the old routines get written in terms of the
|
|
2585 new ones, and I've had a beta put out with this on and it appeared to
|
|
2586 this appears to cause no problems -- so we should consider
|
|
2587 switching, and feel no compunctions about writing further such function-
|
|
2588 like alloca() routines in lieu of statement-like ones. --ben */
|
771
|
2589
|
|
2590 #define TO_EXTERNAL_FORMAT(source_type, source, sink_type, sink, codesys) \
|
|
2591 do { \
|
|
2592 dfc_conversion_type dfc_simplified_source_type; \
|
|
2593 dfc_conversion_type dfc_simplified_sink_type; \
|
|
2594 dfc_conversion_data dfc_source; \
|
|
2595 dfc_conversion_data dfc_sink; \
|
|
2596 Lisp_Object dfc_codesys = (codesys); \
|
|
2597 \
|
|
2598 type_checking_assert \
|
|
2599 ((DFC_TYPE_##source_type == DFC_TYPE_DATA || \
|
|
2600 DFC_TYPE_##source_type == DFC_TYPE_C_STRING || \
|
|
2601 DFC_TYPE_##source_type == DFC_TYPE_LISP_STRING || \
|
|
2602 DFC_TYPE_##source_type == DFC_TYPE_LISP_OPAQUE || \
|
|
2603 DFC_TYPE_##source_type == DFC_TYPE_LISP_LSTREAM) \
|
|
2604 && \
|
|
2605 (DFC_TYPE_##sink_type == DFC_TYPE_ALLOCA || \
|
|
2606 DFC_TYPE_##sink_type == DFC_TYPE_MALLOC || \
|
|
2607 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_ALLOCA || \
|
|
2608 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_MALLOC || \
|
|
2609 DFC_TYPE_##sink_type == DFC_TYPE_LISP_LSTREAM || \
|
|
2610 DFC_TYPE_##sink_type == DFC_TYPE_LISP_OPAQUE)); \
|
|
2611 \
|
|
2612 DFC_EXT_SOURCE_##source_type##_TO_ARGS (source, dfc_codesys); \
|
|
2613 DFC_SINK_##sink_type##_TO_ARGS (sink); \
|
|
2614 \
|
|
2615 dfc_convert_to_external_format (dfc_simplified_source_type, &dfc_source, \
|
|
2616 dfc_codesys, \
|
|
2617 dfc_simplified_sink_type, &dfc_sink); \
|
|
2618 \
|
|
2619 DFC_##sink_type##_USE_CONVERTED_DATA (sink); \
|
|
2620 } while (0)
|
|
2621
|
|
2622 #define TO_INTERNAL_FORMAT(source_type, source, sink_type, sink, codesys) \
|
|
2623 do { \
|
|
2624 dfc_conversion_type dfc_simplified_source_type; \
|
|
2625 dfc_conversion_type dfc_simplified_sink_type; \
|
|
2626 dfc_conversion_data dfc_source; \
|
|
2627 dfc_conversion_data dfc_sink; \
|
|
2628 Lisp_Object dfc_codesys = (codesys); \
|
|
2629 \
|
|
2630 type_checking_assert \
|
|
2631 ((DFC_TYPE_##source_type == DFC_TYPE_DATA || \
|
|
2632 DFC_TYPE_##source_type == DFC_TYPE_C_STRING || \
|
|
2633 DFC_TYPE_##source_type == DFC_TYPE_LISP_OPAQUE || \
|
|
2634 DFC_TYPE_##source_type == DFC_TYPE_LISP_LSTREAM) \
|
|
2635 && \
|
|
2636 (DFC_TYPE_##sink_type == DFC_TYPE_ALLOCA || \
|
|
2637 DFC_TYPE_##sink_type == DFC_TYPE_MALLOC || \
|
|
2638 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_ALLOCA || \
|
|
2639 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_MALLOC || \
|
|
2640 DFC_TYPE_##sink_type == DFC_TYPE_LISP_STRING || \
|
|
2641 DFC_TYPE_##sink_type == DFC_TYPE_LISP_LSTREAM || \
|
|
2642 DFC_TYPE_##sink_type == DFC_TYPE_LISP_BUFFER)); \
|
|
2643 \
|
|
2644 DFC_INT_SOURCE_##source_type##_TO_ARGS (source, dfc_codesys); \
|
|
2645 DFC_SINK_##sink_type##_TO_ARGS (sink); \
|
|
2646 \
|
|
2647 dfc_convert_to_internal_format (dfc_simplified_source_type, &dfc_source, \
|
|
2648 dfc_codesys, \
|
|
2649 dfc_simplified_sink_type, &dfc_sink); \
|
|
2650 \
|
|
2651 DFC_##sink_type##_USE_CONVERTED_DATA (sink); \
|
|
2652 } while (0)
|
|
2653
|
814
|
2654 #ifdef __cplusplus
|
771
|
2655
|
814
|
2656 /* Error if you try to use a union here: "member `struct {anonymous
|
|
2657 union}::{anonymous} {anonymous union}::data' with constructor not allowed
|
|
2658 in union" (Bytecount is a class) */
|
|
2659
|
|
2660 typedef struct
|
|
2661 #else
|
771
|
2662 typedef union
|
814
|
2663 #endif
|
771
|
2664 {
|
|
2665 struct { const void *ptr; Bytecount len; } data;
|
|
2666 Lisp_Object lisp_object;
|
|
2667 } dfc_conversion_data;
|
|
2668
|
|
2669 enum dfc_conversion_type
|
|
2670 {
|
|
2671 DFC_TYPE_DATA,
|
|
2672 DFC_TYPE_ALLOCA,
|
|
2673 DFC_TYPE_MALLOC,
|
|
2674 DFC_TYPE_C_STRING,
|
|
2675 DFC_TYPE_C_STRING_ALLOCA,
|
|
2676 DFC_TYPE_C_STRING_MALLOC,
|
|
2677 DFC_TYPE_LISP_STRING,
|
|
2678 DFC_TYPE_LISP_LSTREAM,
|
|
2679 DFC_TYPE_LISP_OPAQUE,
|
|
2680 DFC_TYPE_LISP_BUFFER
|
|
2681 };
|
|
2682 typedef enum dfc_conversion_type dfc_conversion_type;
|
|
2683
|
1743
|
2684 BEGIN_C_DECLS
|
1650
|
2685
|
771
|
2686 /* WARNING: These use a static buffer. This can lead to disaster if
|
|
2687 these functions are not used *very* carefully. Another reason to only use
|
|
2688 TO_EXTERNAL_FORMAT() and TO_INTERNAL_FORMAT(). */
|
1632
|
2689 MODULE_API void
|
771
|
2690 dfc_convert_to_external_format (dfc_conversion_type source_type,
|
|
2691 dfc_conversion_data *source,
|
1318
|
2692 Lisp_Object codesys,
|
771
|
2693 dfc_conversion_type sink_type,
|
|
2694 dfc_conversion_data *sink);
|
1632
|
2695 MODULE_API void
|
771
|
2696 dfc_convert_to_internal_format (dfc_conversion_type source_type,
|
|
2697 dfc_conversion_data *source,
|
1318
|
2698 Lisp_Object codesys,
|
771
|
2699 dfc_conversion_type sink_type,
|
|
2700 dfc_conversion_data *sink);
|
|
2701 /* CPP Trickery */
|
|
2702 #define DFC_CPP_CAR(x,y) (x)
|
|
2703 #define DFC_CPP_CDR(x,y) (y)
|
|
2704
|
|
2705 /* Convert `source' to args for dfc_convert_to_external_format() */
|
|
2706 #define DFC_EXT_SOURCE_DATA_TO_ARGS(val, codesys) do { \
|
|
2707 dfc_source.data.ptr = DFC_CPP_CAR val; \
|
|
2708 dfc_source.data.len = DFC_CPP_CDR val; \
|
|
2709 dfc_simplified_source_type = DFC_TYPE_DATA; \
|
|
2710 } while (0)
|
|
2711 #define DFC_EXT_SOURCE_C_STRING_TO_ARGS(val, codesys) do { \
|
|
2712 dfc_source.data.len = \
|
|
2713 strlen ((char *) (dfc_source.data.ptr = (val))); \
|
|
2714 dfc_simplified_source_type = DFC_TYPE_DATA; \
|
|
2715 } while (0)
|
|
2716 #define DFC_EXT_SOURCE_LISP_STRING_TO_ARGS(val, codesys) do { \
|
|
2717 Lisp_Object dfc_slsta = (val); \
|
|
2718 type_checking_assert (STRINGP (dfc_slsta)); \
|
|
2719 dfc_source.lisp_object = dfc_slsta; \
|
|
2720 dfc_simplified_source_type = DFC_TYPE_LISP_STRING; \
|
|
2721 } while (0)
|
|
2722 #define DFC_EXT_SOURCE_LISP_LSTREAM_TO_ARGS(val, codesys) do { \
|
|
2723 Lisp_Object dfc_sllta = (val); \
|
|
2724 type_checking_assert (LSTREAMP (dfc_sllta)); \
|
|
2725 dfc_source.lisp_object = dfc_sllta; \
|
|
2726 dfc_simplified_source_type = DFC_TYPE_LISP_LSTREAM; \
|
|
2727 } while (0)
|
|
2728 #define DFC_EXT_SOURCE_LISP_OPAQUE_TO_ARGS(val, codesys) do { \
|
|
2729 Lisp_Opaque *dfc_slota = XOPAQUE (val); \
|
|
2730 dfc_source.data.ptr = OPAQUE_DATA (dfc_slota); \
|
|
2731 dfc_source.data.len = OPAQUE_SIZE (dfc_slota); \
|
|
2732 dfc_simplified_source_type = DFC_TYPE_DATA; \
|
|
2733 } while (0)
|
|
2734
|
|
2735 /* Convert `source' to args for dfc_convert_to_internal_format() */
|
|
2736 #define DFC_INT_SOURCE_DATA_TO_ARGS(val, codesys) \
|
|
2737 DFC_EXT_SOURCE_DATA_TO_ARGS (val, codesys)
|
|
2738 #define DFC_INT_SOURCE_C_STRING_TO_ARGS(val, codesys) do { \
|
|
2739 dfc_source.data.len = dfc_external_data_len (dfc_source.data.ptr = (val), \
|
|
2740 codesys); \
|
|
2741 dfc_simplified_source_type = DFC_TYPE_DATA; \
|
|
2742 } while (0)
|
|
2743 #define DFC_INT_SOURCE_LISP_STRING_TO_ARGS(val, codesys) \
|
|
2744 DFC_EXT_SOURCE_LISP_STRING_TO_ARGS (val, codesys)
|
|
2745 #define DFC_INT_SOURCE_LISP_LSTREAM_TO_ARGS(val, codesys) \
|
|
2746 DFC_EXT_SOURCE_LISP_LSTREAM_TO_ARGS (val, codesys)
|
|
2747 #define DFC_INT_SOURCE_LISP_OPAQUE_TO_ARGS(val, codesys) \
|
|
2748 DFC_EXT_SOURCE_LISP_OPAQUE_TO_ARGS (val, codesys)
|
|
2749
|
|
2750 /* Convert `sink' to args for dfc_convert_to_*_format() */
|
|
2751 #define DFC_SINK_ALLOCA_TO_ARGS(val) \
|
|
2752 dfc_simplified_sink_type = DFC_TYPE_DATA
|
|
2753 #define DFC_SINK_C_STRING_ALLOCA_TO_ARGS(val) \
|
|
2754 dfc_simplified_sink_type = DFC_TYPE_DATA
|
|
2755 #define DFC_SINK_MALLOC_TO_ARGS(val) \
|
|
2756 dfc_simplified_sink_type = DFC_TYPE_DATA
|
|
2757 #define DFC_SINK_C_STRING_MALLOC_TO_ARGS(val) \
|
|
2758 dfc_simplified_sink_type = DFC_TYPE_DATA
|
|
2759 #define DFC_SINK_LISP_STRING_TO_ARGS(val) \
|
|
2760 dfc_simplified_sink_type = DFC_TYPE_DATA
|
|
2761 #define DFC_SINK_LISP_OPAQUE_TO_ARGS(val) \
|
|
2762 dfc_simplified_sink_type = DFC_TYPE_DATA
|
|
2763 #define DFC_SINK_LISP_LSTREAM_TO_ARGS(val) do { \
|
|
2764 Lisp_Object dfc_sllta = (val); \
|
|
2765 type_checking_assert (LSTREAMP (dfc_sllta)); \
|
|
2766 dfc_sink.lisp_object = dfc_sllta; \
|
|
2767 dfc_simplified_sink_type = DFC_TYPE_LISP_LSTREAM; \
|
|
2768 } while (0)
|
|
2769 #define DFC_SINK_LISP_BUFFER_TO_ARGS(val) do { \
|
|
2770 struct buffer *dfc_slbta = XBUFFER (val); \
|
|
2771 dfc_sink.lisp_object = \
|
|
2772 make_lisp_buffer_output_stream \
|
|
2773 (dfc_slbta, BUF_PT (dfc_slbta), 0); \
|
|
2774 dfc_simplified_sink_type = DFC_TYPE_LISP_LSTREAM; \
|
|
2775 } while (0)
|
|
2776
|
|
2777 /* Assign to the `sink' lvalue(s) using the converted data. */
|
|
2778 /* + 2 because we double zero-extended to account for Unicode conversion */
|
|
2779 typedef union { char c; void *p; } *dfc_aliasing_voidpp;
|
|
2780 #define DFC_ALLOCA_USE_CONVERTED_DATA(sink) do { \
|
851
|
2781 void * dfc_sink_ret = ALLOCA (dfc_sink.data.len + 2); \
|
771
|
2782 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2); \
|
2367
|
2783 VOIDP_CAST (DFC_CPP_CAR sink) = dfc_sink_ret; \
|
771
|
2784 (DFC_CPP_CDR sink) = dfc_sink.data.len; \
|
|
2785 } while (0)
|
|
2786 #define DFC_MALLOC_USE_CONVERTED_DATA(sink) do { \
|
|
2787 void * dfc_sink_ret = xmalloc (dfc_sink.data.len + 2); \
|
|
2788 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2); \
|
2367
|
2789 VOIDP_CAST (DFC_CPP_CAR sink) = dfc_sink_ret; \
|
771
|
2790 (DFC_CPP_CDR sink) = dfc_sink.data.len; \
|
|
2791 } while (0)
|
|
2792 #define DFC_C_STRING_ALLOCA_USE_CONVERTED_DATA(sink) do { \
|
851
|
2793 void * dfc_sink_ret = ALLOCA (dfc_sink.data.len + 2); \
|
771
|
2794 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2); \
|
2367
|
2795 VOIDP_CAST (sink) = dfc_sink_ret; \
|
771
|
2796 } while (0)
|
|
2797 #define DFC_C_STRING_MALLOC_USE_CONVERTED_DATA(sink) do { \
|
|
2798 void * dfc_sink_ret = xmalloc (dfc_sink.data.len + 2); \
|
|
2799 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2); \
|
2367
|
2800 VOIDP_CAST (sink) = dfc_sink_ret; \
|
771
|
2801 } while (0)
|
|
2802 #define DFC_LISP_STRING_USE_CONVERTED_DATA(sink) \
|
867
|
2803 sink = make_string ((Ibyte *) dfc_sink.data.ptr, dfc_sink.data.len)
|
771
|
2804 #define DFC_LISP_OPAQUE_USE_CONVERTED_DATA(sink) \
|
|
2805 sink = make_opaque (dfc_sink.data.ptr, dfc_sink.data.len)
|
|
2806 #define DFC_LISP_LSTREAM_USE_CONVERTED_DATA(sink) /* data already used */
|
|
2807 #define DFC_LISP_BUFFER_USE_CONVERTED_DATA(sink) \
|
|
2808 Lstream_delete (XLSTREAM (dfc_sink.lisp_object))
|
|
2809
|
1318
|
2810 /* #define TEST_NEW_DFC */
|
|
2811
|
771
|
2812 /* Convenience macros for extremely common invocations */
|
1318
|
2813 #ifdef TEST_NEW_DFC
|
|
2814 #define C_STRING_TO_EXTERNAL(in, out, codesys) \
|
|
2815 do { * (Extbyte **) &(out) = \
|
|
2816 NEW_C_STRING_TO_EXTERNAL (in, codesys); } while (0)
|
|
2817 #define SIZED_C_STRING_TO_EXTERNAL(in, inlen, out, codesys) \
|
|
2818 do { * (Extbyte **) &(out) = \
|
|
2819 NEW_SIZED_C_STRING_TO_EXTERNAL (in, inlen, codesys); } while (0)
|
|
2820 #define EXTERNAL_TO_C_STRING(in, out, codesys) \
|
|
2821 do { * (Ibyte **) &(out) = \
|
|
2822 NEW_EXTERNAL_TO_C_STRING (in, codesys); } while (0)
|
|
2823 #define SIZED_EXTERNAL_TO_C_STRING(in, inlen, out, codesys) \
|
|
2824 do { * (Ibyte **) &(out) = \
|
|
2825 NEW_SIZED_EXTERNAL_TO_C_STRING (in, inlen, codesys); } while (0)
|
|
2826 #define LISP_STRING_TO_EXTERNAL(in, out, codesys) \
|
|
2827 do { * (Extbyte **) &(out) = \
|
|
2828 NEW_LISP_STRING_TO_EXTERNAL (in, codesys); } while (0)
|
|
2829 #else
|
|
2830 #define C_STRING_TO_EXTERNAL(in, out, codesys) \
|
|
2831 TO_EXTERNAL_FORMAT (C_STRING, in, C_STRING_ALLOCA, out, codesys)
|
|
2832 #define SIZED_C_STRING_TO_EXTERNAL(in, inlen, out, codesys) \
|
|
2833 TO_EXTERNAL_FORMAT (DATA, (in, inlen), C_STRING_ALLOCA, out, codesys)
|
|
2834 #define EXTERNAL_TO_C_STRING(in, out, codesys) \
|
|
2835 TO_INTERNAL_FORMAT (C_STRING, in, C_STRING_ALLOCA, out, codesys)
|
|
2836 #define SIZED_EXTERNAL_TO_C_STRING(in, inlen, out, codesys) \
|
|
2837 TO_INTERNAL_FORMAT (DATA, (in, inlen), C_STRING_ALLOCA, out, codesys)
|
|
2838 #define LISP_STRING_TO_EXTERNAL(in, out, codesys) \
|
|
2839 TO_EXTERNAL_FORMAT (LISP_STRING, in, C_STRING_ALLOCA, out, codesys)
|
|
2840 #endif /* TEST_NEW_DFC */
|
|
2841
|
|
2842 #define C_STRING_TO_SIZED_EXTERNAL(in, out, outlen, codesys) \
|
|
2843 TO_EXTERNAL_FORMAT (C_STRING, in, ALLOCA, (out, outlen), codesys)
|
|
2844 #define SIZED_C_STRING_TO_SIZED_EXTERNAL(in, inlen, out, outlen, codesys) \
|
|
2845 TO_EXTERNAL_FORMAT (DATA, (in, inlen), ALLOCA, (out, outlen), codesys)
|
|
2846 #define EXTERNAL_TO_SIZED_C_STRING(in, out, outlen, codesys) \
|
|
2847 TO_INTERNAL_FORMAT (C_STRING, in, ALLOCA, (out, outlen), codesys)
|
|
2848 #define SIZED_EXTERNAL_TO_SIZED_C_STRING(in, inlen, out, outlen, codesys) \
|
|
2849 TO_INTERNAL_FORMAT (DATA, (in, inlen), ALLOCA, (out, outlen), codesys)
|
|
2850 #define LISP_STRING_TO_SIZED_EXTERNAL(in, out, outlen, codesys) \
|
|
2851 TO_EXTERNAL_FORMAT (LISP_STRING, in, ALLOCA, (out, outlen), codesys)
|
|
2852
|
|
2853 /* In place of EXTERNAL_TO_LISP_STRING(), use build_ext_string() and/or
|
|
2854 make_ext_string(). */
|
|
2855
|
|
2856 #ifdef TEST_NEW_DFC
|
|
2857 #define C_STRING_TO_EXTERNAL_MALLOC(in, out, codesys) \
|
|
2858 do { * (Extbyte **) &(out) = \
|
|
2859 NEW_C_STRING_TO_EXTERNAL_MALLOC (in, codesys); } while (0)
|
2367
|
2860 #define SIZED_C_STRING_TO_EXTERNAL_MALLOC(in, inlen, out, codesys) \
|
|
2861 do { * (Extbyte **) &(out) = \
|
|
2862 NEW_SIZED_C_STRING_TO_EXTERNAL_MALLOC (in, inlen, codesys); } \
|
|
2863 while (0)
|
1318
|
2864 #define EXTERNAL_TO_C_STRING_MALLOC(in, out, codesys) \
|
|
2865 do { * (Ibyte **) &(out) = \
|
|
2866 NEW_EXTERNAL_TO_C_STRING_MALLOC (in, codesys); } while (0)
|
2367
|
2867 #define SIZED_EXTERNAL_TO_C_STRING_MALLOC(in, inlen, out, codesys) \
|
|
2868 do { * (Ibyte **) &(out) = \
|
|
2869 NEW_SIZED_EXTERNAL_TO_C_STRING_MALLOC (in, inlen, codesys); } \
|
|
2870 while (0)
|
1318
|
2871 #define LISP_STRING_TO_EXTERNAL_MALLOC(in, out, codesys) \
|
|
2872 do { * (Extbyte **) &(out) = \
|
|
2873 NEW_LISP_STRING_TO_EXTERNAL_MALLOC (in, codesys); } while (0)
|
|
2874 #else
|
|
2875 #define C_STRING_TO_EXTERNAL_MALLOC(in, out, codesys) \
|
|
2876 TO_EXTERNAL_FORMAT (C_STRING, in, C_STRING_MALLOC, out, codesys)
|
2367
|
2877 #define SIZED_C_STRING_TO_EXTERNAL_MALLOC(in, inlen, out, codesys) \
|
|
2878 TO_EXTERNAL_FORMAT (DATA, (in, inlen), C_STRING_MALLOC, out, codesys)
|
1318
|
2879 #define EXTERNAL_TO_C_STRING_MALLOC(in, out, codesys) \
|
|
2880 TO_INTERNAL_FORMAT (C_STRING, in, C_STRING_MALLOC, out, codesys)
|
2367
|
2881 #define SIZED_EXTERNAL_TO_C_STRING_MALLOC(in, inlen, out, codesys) \
|
|
2882 TO_INTERNAL_FORMAT (DATA, (in, inlen), C_STRING_MALLOC, out, codesys)
|
1318
|
2883 #define LISP_STRING_TO_EXTERNAL_MALLOC(in, out, codesys) \
|
|
2884 TO_EXTERNAL_FORMAT (LISP_STRING, in, C_STRING_MALLOC, out, codesys)
|
|
2885 #endif /* TEST_NEW_DFC */
|
|
2886
|
2367
|
2887 #define C_STRING_TO_SIZED_EXTERNAL_MALLOC(in, out, outlen, codesys) \
|
|
2888 TO_EXTERNAL_FORMAT (C_STRING, in, MALLOC, (out, outlen), codesys)
|
|
2889 #define SIZED_C_STRING_TO_SIZED_EXTERNAL_MALLOC(in, inlen, out, outlen, \
|
|
2890 codesys) \
|
|
2891 TO_EXTERNAL_FORMAT (DATA, (in, inlen), MALLOC, (out, outlen), codesys)
|
|
2892 #define EXTERNAL_TO_SIZED_C_STRING_MALLOC(in, out, outlen, codesys) \
|
|
2893 TO_INTERNAL_FORMAT (C_STRING, in, MALLOC, (out, outlen), codesys)
|
|
2894 #define SIZED_EXTERNAL_TO_SIZED_C_STRING_MALLOC(in, inlen, out, outlen, \
|
|
2895 codesys) \
|
|
2896 TO_INTERNAL_FORMAT (DATA, (in, inlen), MALLOC, (out, outlen), codesys)
|
|
2897 #define LISP_STRING_TO_SIZED_EXTERNAL_MALLOC(in, out, outlen, codesys) \
|
|
2898 TO_EXTERNAL_FORMAT (LISP_STRING, in, MALLOC, (out, outlen), codesys)
|
|
2899
|
1318
|
2900 enum new_dfc_src_type
|
|
2901 {
|
|
2902 DFC_EXTERNAL,
|
|
2903 DFC_SIZED_EXTERNAL,
|
|
2904 DFC_INTERNAL,
|
|
2905 DFC_SIZED_INTERNAL,
|
|
2906 DFC_LISP_STRING
|
|
2907 };
|
|
2908
|
1632
|
2909 MODULE_API void *new_dfc_convert_malloc (const void *src, Bytecount src_size,
|
|
2910 enum new_dfc_src_type type,
|
|
2911 Lisp_Object codesys);
|
2367
|
2912 MODULE_API Bytecount new_dfc_convert_size (const char *srctext,
|
|
2913 const void *src,
|
1632
|
2914 Bytecount src_size,
|
|
2915 enum new_dfc_src_type type,
|
|
2916 Lisp_Object codesys);
|
2367
|
2917 MODULE_API void *new_dfc_convert_copy_data (const char *srctext,
|
|
2918 void *alloca_data);
|
1318
|
2919
|
1743
|
2920 END_C_DECLS
|
1650
|
2921
|
1318
|
2922 /* Version of EXTERNAL_TO_C_STRING that *RETURNS* the translated string,
|
|
2923 still in alloca() space. Requires some trickiness to do this, but gets
|
|
2924 it done! */
|
|
2925
|
|
2926 /* NOTE: If you make two invocations of the dfc functions below in the same
|
|
2927 subexpression and use the exact same expression for the source in both
|
|
2928 cases, you will lose. In this unlikely case, you will get an abort, and
|
|
2929 need to rewrite the code.
|
|
2930 */
|
|
2931
|
|
2932 /* We need to use ALLOCA_FUNCALL_OK here. Some compilers have been known
|
|
2933 to choke when alloca() occurs as a funcall argument, and so we check
|
|
2934 this in configure. Rewriting the expressions below to use a temporary
|
|
2935 variable, so that the call to alloca() is outside of
|
2382
|
2936 new_dfc_convert_copy_data(), won't help because the entire NEW_DFC call
|
1318
|
2937 could be inside of a function call. */
|
|
2938
|
|
2939 #define NEW_DFC_CONVERT_1_ALLOCA(src, src_size, type, codesys) \
|
2367
|
2940 new_dfc_convert_copy_data \
|
1318
|
2941 (#src, ALLOCA_FUNCALL_OK (new_dfc_convert_size (#src, src, src_size, \
|
|
2942 type, codesys)))
|
|
2943
|
|
2944 #define NEW_EXTERNAL_TO_C_STRING(src, codesys) \
|
|
2945 (Ibyte *) NEW_DFC_CONVERT_1_ALLOCA (src, -1, DFC_EXTERNAL, codesys)
|
|
2946 #define NEW_EXTERNAL_TO_C_STRING_MALLOC(src, codesys) \
|
|
2947 (Ibyte *) new_dfc_convert_malloc (src, -1, DFC_EXTERNAL, codesys)
|
|
2948 #define NEW_SIZED_EXTERNAL_TO_C_STRING(src, len, codesys) \
|
|
2949 (Ibyte *) NEW_DFC_CONVERT_1_ALLOCA (src, len, DFC_SIZED_EXTERNAL, codesys)
|
|
2950 #define NEW_SIZED_EXTERNAL_TO_C_STRING_MALLOC(src, len, codesys) \
|
|
2951 (Ibyte *) new_dfc_convert_malloc (src, len, DFC_SIZED_EXTERNAL, codesys)
|
|
2952 #define NEW_C_STRING_TO_EXTERNAL(src, codesys) \
|
|
2953 (Extbyte *) NEW_DFC_CONVERT_1_ALLOCA (src, -1, DFC_INTERNAL, codesys)
|
|
2954 #define NEW_C_STRING_TO_EXTERNAL_MALLOC(src, codesys) \
|
|
2955 (Extbyte *) new_dfc_convert_malloc (src, -1, DFC_INTERNAL, codesys)
|
|
2956 #define NEW_SIZED_C_STRING_TO_EXTERNAL(src, len, codesys) \
|
|
2957 (Extbyte *) NEW_DFC_CONVERT_1_ALLOCA (src, len, DFC_SIZED_INTERNAL, codesys)
|
|
2958 #define NEW_SIZED_C_STRING_TO_EXTERNAL_MALLOC(src, len, codesys) \
|
|
2959 (Extbyte *) new_dfc_convert_malloc (src, len, DFC_SIZED_INTERNAL, codesys)
|
|
2960 #define NEW_LISP_STRING_TO_EXTERNAL(src, codesys) \
|
|
2961 (Extbyte *) NEW_DFC_CONVERT_1_ALLOCA (LISP_TO_VOID (src), -1, \
|
|
2962 DFC_LISP_STRING, codesys)
|
|
2963 #define NEW_LISP_STRING_TO_EXTERNAL_MALLOC(src, codesys) \
|
|
2964 (Extbyte *) new_dfc_convert_malloc (LISP_TO_VOID (src), -1, \
|
|
2965 DFC_LISP_STRING, codesys)
|
771
|
2966
|
2367
|
2967 /* Standins for various encodings. */
|
|
2968 #ifdef WEXTTEXT_IS_WIDE
|
|
2969 #define Qcommand_argument_encoding Qmswindows_unicode
|
|
2970 #define Qenvironment_variable_encoding Qmswindows_unicode
|
|
2971 #else
|
771
|
2972 #define Qcommand_argument_encoding Qnative
|
|
2973 #define Qenvironment_variable_encoding Qnative
|
2367
|
2974 #endif
|
771
|
2975 #define Qunix_host_name_encoding Qnative
|
|
2976 #define Qunix_service_name_encoding Qnative
|
|
2977 #define Qmswindows_host_name_encoding Qmswindows_multibyte
|
|
2978 #define Qmswindows_service_name_encoding Qmswindows_multibyte
|
|
2979
|
2367
|
2980 /* Wexttext functions. The type of Wexttext is selected at compile time
|
|
2981 and will sometimes be wchar_t, sometimes char. */
|
|
2982
|
|
2983 int wcscmp_ascii (const wchar_t *s1, const Ascbyte *s2);
|
|
2984 int wcsncmp_ascii (const wchar_t *s1, const Ascbyte *s2, Charcount len);
|
|
2985
|
|
2986 #ifdef WEXTTEXT_IS_WIDE /* defined under MS Windows i.e. WIN32_NATIVE */
|
|
2987 #define WEXTTEXT_ZTERM_SIZE sizeof (wchar_t)
|
|
2988 /* Extra indirection needed in case of manifest constant as arg */
|
|
2989 #define WEXTSTRING_1(arg) L##arg
|
|
2990 #define WEXTSTRING(arg) WEXTSTRING_1(arg)
|
|
2991 #define wext_strlen wcslen
|
|
2992 #define wext_strcmp wcscmp
|
|
2993 #define wext_strncmp wcsncmp
|
|
2994 #define wext_strcmp_ascii wcscmp_ascii
|
|
2995 #define wext_strncmp_ascii wcsncmp_ascii
|
|
2996 #define wext_strcpy wcscpy
|
|
2997 #define wext_strncpy wcsncpy
|
|
2998 #define wext_strchr wcschr
|
|
2999 #define wext_strrchr wcsrchr
|
|
3000 #define wext_strdup wcsdup
|
|
3001 #define wext_atol(str) wcstol (str, 0, 10)
|
|
3002 #define wext_sprintf wsprintfW /* Huh? both wsprintfA and wsprintfW? */
|
|
3003 #define wext_getenv _wgetenv
|
|
3004 #define build_wext_string(str, cs) build_ext_string ((Extbyte *) str, cs)
|
|
3005 #define WEXTTEXT_TO_8_BIT(arg) WEXTTEXT_TO_MULTIBYTE(arg)
|
|
3006 #ifdef WIN32_NATIVE
|
|
3007 int XCDECL wext_retry_open (const Wexttext *path, int oflag, ...);
|
|
3008 #else
|
|
3009 #error Cannot handle Wexttext yet on this system
|
|
3010 #endif
|
|
3011 #define wext_access _waccess
|
|
3012 #define wext_stat _wstat
|
|
3013 #else
|
|
3014 #define WEXTTEXT_ZTERM_SIZE sizeof (char)
|
|
3015 #define WEXTSTRING(arg) arg
|
|
3016 #define wext_strlen strlen
|
|
3017 #define wext_strcmp strcmp
|
|
3018 #define wext_strncmp strncmp
|
|
3019 #define wext_strcmp_ascii strcmp
|
|
3020 #define wext_strncmp_ascii strncmp
|
|
3021 #define wext_strcpy strcpy
|
|
3022 #define wext_strncpy strncpy
|
|
3023 #define wext_strchr strchr
|
|
3024 #define wext_strrchr strrchr
|
|
3025 #define wext_strdup xstrdup
|
|
3026 #define wext_atol(str) atol (str)
|
|
3027 #define wext_sprintf sprintf
|
|
3028 #define wext_getenv getenv
|
|
3029 #define build_wext_string build_ext_string
|
|
3030 #define wext_retry_open retry_open
|
|
3031 #define wext_access access
|
|
3032 #define wext_stat stat
|
|
3033 #define WEXTTEXT_TO_8_BIT(arg) ((Extbyte *) arg)
|
|
3034 #endif
|
|
3035
|
|
3036 /* Standins for various X encodings.
|
1318
|
3037
|
|
3038 About encodings in X:
|
|
3039
|
|
3040 X works with 5 different encodings:
|
|
3041
|
|
3042 -- "Host Portable Character Encoding" == printable ASCII + space, tab,
|
|
3043 newline
|
|
3044
|
|
3045 -- STRING encoding == ASCII + Latin-1 + tab, newline
|
|
3046
|
|
3047 -- Locale-specific encoding
|
|
3048
|
|
3049 -- Compound text == STRING encoding + ISO-2022 escape sequences to
|
|
3050 switch between different locale-specific encodings.
|
|
3051
|
|
3052 -- ANSI C wide-character encoding
|
|
3053
|
|
3054 The Host Portable Character Encoding (HPCE) is used for atom names, font
|
|
3055 names, color names, keysyms, geometry strings, resource manager quarks,
|
|
3056 display names, locale names, and various other things. When describing
|
|
3057 such strings, the X manual typically says "If the ... is not in the Host
|
|
3058 Portable Character Encoding, the result is implementation dependent."
|
|
3059
|
|
3060 The wide-character encoding is used only in the Xwc* functions, which
|
|
3061 are provided as equivalents to Xmb* functions.
|
|
3062
|
|
3063 STRING and compound text are used in the value of string properties and
|
|
3064 selection data, both of which are values with an associated type atom,
|
|
3065 which can be STRING or COMPOUND_TEXT. It can also be a locale name, as
|
|
3066 specified in setlocale() (#### as usual, there is no normalization
|
|
3067 whatsoever of these names).
|
|
3068
|
|
3069 X also defines a type called "TEXT", which is used only as a requested
|
|
3070 type, and produces data in a type "convenient to the owner". However,
|
|
3071 there is some indication that X expects this to be the locale-specific
|
|
3072 encoding.
|
|
3073
|
|
3074 According to the glossary, the locale is used in
|
|
3075
|
|
3076 -- Encoding and processing of input method text
|
|
3077 -- Encoding of resource files and values
|
|
3078 -- Encoding and imaging of text strings
|
|
3079 -- Encoding and decoding for inter-client text communication
|
|
3080
|
|
3081 The functions XmbTextListToTextProperty and XmbTextPropertyToTextList
|
|
3082 (and Xwc* equivalents) can be used to convert between the
|
|
3083 locale-specific encoding (XTextStyle), STRING (XStringStyle), and
|
|
3084 compound text (XCompoundTextStyle), as well as XStdICCTextStyle, which
|
|
3085 converts to STRING if possible, and if not, COMPOUND_TEXT. This is
|
|
3086 used, for example, in XmbSetWMProperties, in the window_name and
|
|
3087 icon_name properties (WM_NAME and WM_ICON_NAME), which are in the
|
|
3088 locale-specific encoding on input, and are stored as STRING if possible,
|
|
3089 COMPOUND_TEXT otherwise.
|
|
3090 */
|
771
|
3091
|
|
3092 /* !!#### Need to verify the encoding used in lwlib -- Qnative or Qctext?
|
|
3093 Almost certainly the former. Use a standin for now. */
|
|
3094 #define Qlwlib_encoding Qnative
|
|
3095
|
1318
|
3096 /* The Host Portable Character Encoding. */
|
|
3097 #define Qx_hpc_encoding Qnative
|
|
3098
|
|
3099 #define Qx_atom_name_encoding Qx_hpc_encoding
|
|
3100 #define Qx_font_name_encoding Qx_hpc_encoding
|
|
3101 #define Qx_color_name_encoding Qx_hpc_encoding
|
|
3102 #define Qx_keysym_encoding Qx_hpc_encoding
|
|
3103 #define Qx_geometry_encoding Qx_hpc_encoding
|
|
3104 #define Qx_resource_name_encoding Qx_hpc_encoding
|
|
3105 #define Qx_application_class_encoding Qx_hpc_encoding
|
771
|
3106 /* the following probably must agree with Qcommand_argument_encoding and
|
|
3107 Qenvironment_variable_encoding */
|
1318
|
3108 #define Qx_display_name_encoding Qx_hpc_encoding
|
|
3109 #define Qx_xpm_data_encoding Qx_hpc_encoding
|
|
3110
|
2367
|
3111 /* !!#### Verify these! */
|
|
3112 #define Qxt_widget_arg_encoding Qnative
|
|
3113 #define Qdt_dnd_encoding Qnative
|
|
3114 #define Qoffix_dnd_encoding Qnative
|
|
3115
|
1318
|
3116 /* RedHat 6.2 contains a locale called "Francais" with the C-cedilla
|
|
3117 encoded in ISO2022! */
|
|
3118 #define Qlocale_name_encoding Qctext
|
771
|
3119
|
|
3120 #define Qstrerror_encoding Qnative
|
|
3121
|
1318
|
3122 /* Encoding for strings coming from Offix drag-n-drop */
|
|
3123 #define Qoffix_dnd_encoding Qnative
|
|
3124
|
|
3125 /* !!#### This exists to remind us that our hexify routine is totally
|
|
3126 un-Muleized. */
|
|
3127 #define Qdnd_hexify_encoding Qascii
|
|
3128
|
771
|
3129 #define GET_STRERROR(var, num) \
|
|
3130 do { \
|
|
3131 int __gsnum__ = (num); \
|
|
3132 Extbyte * __gserr__ = strerror (__gsnum__); \
|
|
3133 \
|
|
3134 if (!__gserr__) \
|
|
3135 { \
|
867
|
3136 var = alloca_ibytes (99); \
|
771
|
3137 qxesprintf (var, "Unknown error %d", __gsnum__); \
|
|
3138 } \
|
|
3139 else \
|
|
3140 EXTERNAL_TO_C_STRING (__gserr__, var, Qstrerror_encoding); \
|
|
3141 } while (0)
|
|
3142
|
|
3143 #endif /* INCLUDED_text_h_ */
|