771
|
1 /* Header file for text manipulation primitives and macros.
|
|
2 Copyright (C) 1985-1995 Free Software Foundation, Inc.
|
|
3 Copyright (C) 1995 Sun Microsystems, Inc.
|
788
|
4 Copyright (C) 2000, 2001, 2002 Ben Wing.
|
771
|
5
|
|
6 This file is part of XEmacs.
|
|
7
|
|
8 XEmacs is free software; you can redistribute it and/or modify it
|
|
9 under the terms of the GNU General Public License as published by the
|
|
10 Free Software Foundation; either version 2, or (at your option) any
|
|
11 later version.
|
|
12
|
|
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
|
|
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
16 for more details.
|
|
17
|
|
18 You should have received a copy of the GNU General Public License
|
|
19 along with XEmacs; see the file COPYING. If not, write to
|
|
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
21 Boston, MA 02111-1307, USA. */
|
|
22
|
|
23 /* Synched up with: FSF 19.30. */
|
|
24
|
|
25 /* Authorship:
|
|
26
|
|
27 Mostly written by Ben Wing, starting around 1995.
|
|
28 Current TO_IN/EXTERNAL_FORMAT macros written by Martin Buchholz,
|
|
29 designed by Ben Wing based on earlier macros by Ben Wing.
|
|
30 Separated out June 18, 2000 from buffer.h into text.h.
|
|
31 */
|
|
32
|
|
33 #ifndef INCLUDED_text_h_
|
|
34 #define INCLUDED_text_h_
|
|
35
|
|
36 #include <wchar.h>
|
|
37
|
|
38 /* ---------------------------------------------------------------------- */
|
|
39 /* Super-basic character properties */
|
|
40 /* ---------------------------------------------------------------------- */
|
|
41
|
|
42 /* These properties define the specifics of how our current encoding fits
|
|
43 in the basic model used for the encoding. Because this model is the same
|
|
44 as is used for UTF-8, all these properties could be defined for it, too.
|
|
45 This would instantly make the rest of this file work with UTF-8 (with
|
|
46 the exception of a few called functions that would need to be redefined).
|
|
47
|
|
48 (UTF-2000 implementers, take note!)
|
|
49 */
|
|
50
|
|
51 /* If you want more than this, you need to include charset.h */
|
|
52
|
|
53 #ifndef MULE
|
|
54
|
|
55 #define REP_BYTES_BY_FIRST_BYTE(fb) 1
|
|
56 #define BYTE_ASCII_P(byte) 1
|
|
57 # define MAX_EMCHAR_LEN 1
|
|
58
|
|
59 #else /* MULE */
|
|
60
|
|
61 /* These are carefully designed to work if BYTE is signed or unsigned. */
|
|
62 /* Note that SPC and DEL are considered ASCII, not control. */
|
|
63
|
|
64 #define BYTE_ASCII_P(byte) (((byte) & ~0x7f) == 0)
|
|
65 #define BYTE_C0_P(byte) (((byte) & ~0x1f) == 0)
|
|
66 #define BYTE_C1_P(byte) (((byte) & ~0x1f) == 0x80)
|
|
67
|
|
68 /* Does BYTE represent the first byte of a character? */
|
|
69
|
|
70 #define INTBYTE_FIRST_BYTE_P(byte) ((byte) < 0xA0)
|
|
71
|
|
72 /* Does BYTE represent the first byte of a multi-byte character? */
|
|
73
|
|
74 #define INTBYTE_LEADING_BYTE_P(byte) BYTE_C1_P (byte)
|
|
75
|
|
76 /* Table of number of bytes in the string representation of a character
|
|
77 indexed by the first byte of that representation.
|
|
78
|
|
79 This value can be derived in other ways -- e.g. something like
|
|
80 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (first_byte))
|
|
81 but it's faster this way. */
|
|
82 extern const Bytecount rep_bytes_by_first_byte[0xA0];
|
|
83
|
|
84 /* Number of bytes in the string representation of a character. */
|
788
|
85
|
800
|
86 #ifdef ERROR_CHECK_TEXT
|
788
|
87
|
814
|
88 INLINE_HEADER Bytecount REP_BYTES_BY_FIRST_BYTE_1 (int fb, const char *file,
|
|
89 int line);
|
|
90 INLINE_HEADER Bytecount
|
788
|
91 REP_BYTES_BY_FIRST_BYTE_1 (int fb, const char *file, int line)
|
771
|
92 {
|
788
|
93 assert_at_line (fb < 0xA0, file, line);
|
771
|
94 return rep_bytes_by_first_byte[fb];
|
|
95 }
|
|
96
|
788
|
97 #define REP_BYTES_BY_FIRST_BYTE(fb) \
|
|
98 REP_BYTES_BY_FIRST_BYTE_1 (fb, __FILE__, __LINE__)
|
|
99
|
800
|
100 #else /* ERROR_CHECK_TEXT */
|
788
|
101
|
|
102 #define REP_BYTES_BY_FIRST_BYTE(fb) (rep_bytes_by_first_byte[fb])
|
|
103
|
800
|
104 #endif /* ERROR_CHECK_TEXT */
|
788
|
105
|
771
|
106 /* Is this character represented by more than one byte in a string? */
|
|
107
|
|
108 #define CHAR_MULTIBYTE_P(c) ((c) >= 0x80)
|
|
109
|
|
110 #define CHAR_ASCII_P(c) (!CHAR_MULTIBYTE_P (c))
|
|
111
|
|
112 #define MAX_EMCHAR_LEN 4
|
|
113
|
|
114 #endif /* MULE */
|
|
115
|
|
116 int dfc_coding_system_is_unicode (Lisp_Object coding_system);
|
|
117
|
|
118 DECLARE_INLINE_HEADER (
|
|
119 Bytecount dfc_external_data_len (const void *ptr, Lisp_Object codesys)
|
|
120 )
|
|
121 {
|
|
122 if (dfc_coding_system_is_unicode (codesys))
|
|
123 return sizeof (wchar_t) * wcslen ((wchar_t *) ptr);
|
|
124 else
|
|
125 return strlen ((char *) ptr);
|
|
126 }
|
|
127
|
|
128
|
|
129 /************************************************************************/
|
|
130 /* */
|
|
131 /* working with raw internal-format data */
|
|
132 /* */
|
|
133 /************************************************************************/
|
|
134
|
|
135 /* NOTE: In all the following macros, we follow these rules concerning
|
|
136 multiple evaluation of the arguments:
|
|
137
|
|
138 1) Anything that's an lvalue can be evaluated more than once.
|
|
139 2) Anything that's a Lisp Object can be evaluated more than once.
|
|
140 This should probably be changed, but this follows the way
|
|
141 that all the macros in lisp.h do things.
|
|
142 3) 'struct buffer *' arguments can be evaluated more than once.
|
|
143 4) Nothing else can be evaluated more than once. Use inline
|
|
144 functions, if necessary, to prevent multiple evaluation.
|
|
145 5) An exception to (4) is that there are some macros below that
|
|
146 may evaluate their arguments more than once. They are all
|
|
147 denoted with the word "unsafe" in their name and are generally
|
|
148 meant to be called only by other macros that have already
|
|
149 stored the calling values in temporary variables.
|
|
150
|
|
151
|
|
152 Use the following functions/macros on contiguous strings of data.
|
|
153 If the text you're operating on is known to come from a buffer, use
|
|
154 the buffer-level functions below -- they know about the gap and may
|
|
155 be more efficient.
|
|
156
|
|
157
|
|
158 ----------------------------------------------------------------------------
|
|
159 (A) For working with charptr's (pointers to internally-formatted text):
|
|
160 ----------------------------------------------------------------------------
|
|
161
|
|
162 VALID_CHARPTR_P (ptr):
|
|
163 Given a charptr, does it point to the beginning of a character?
|
|
164
|
|
165 ASSERT_VALID_CHARPTR (ptr):
|
|
166 If error-checking is enabled, assert that the given charptr
|
|
167 points to the beginning of a character. Otherwise, do nothing.
|
|
168
|
|
169 INC_CHARPTR (ptr):
|
|
170 Given a charptr (assumed to point at the beginning of a character),
|
|
171 modify that pointer so it points to the beginning of the next
|
|
172 character.
|
|
173
|
|
174 DEC_CHARPTR (ptr):
|
|
175 Given a charptr (assumed to point at the beginning of a
|
|
176 character or at the very end of the text), modify that pointer
|
|
177 so it points to the beginning of the previous character.
|
|
178
|
|
179 VALIDATE_CHARPTR_BACKWARD (ptr):
|
|
180 Make sure that PTR is pointing to the beginning of a character.
|
|
181 If not, back up until this is the case. Note that there are not
|
|
182 too many places where it is legitimate to do this sort of thing.
|
|
183 It's an error if you're passed an "invalid" char * pointer.
|
|
184 NOTE: PTR *must* be pointing to a valid part of the string (i.e.
|
|
185 not the very end, unless the string is zero-terminated or
|
|
186 something) in order for this function to not cause crashes.
|
|
187
|
|
188 VALIDATE_CHARPTR_FORWARD (ptr):
|
|
189 Make sure that PTR is pointing to the beginning of a character.
|
|
190 If not, move forward until this is the case. Note that there
|
|
191 are not too many places where it is legitimate to do this sort
|
|
192 of thing. It's an error if you're passed an "invalid" char *
|
|
193 pointer.
|
|
194
|
|
195 ---------------------------------------------------------------------
|
|
196 (B) For working with the length (in bytes and characters) of a
|
|
197 section of internally-formatted text:
|
|
198 ---------------------------------------------------------------------
|
|
199
|
|
200 bytecount_to_charcount (ptr, nbi):
|
|
201 Given a pointer to a text string and a length in bytes,
|
|
202 return the equivalent length in characters.
|
|
203
|
|
204 charcount_to_bytecount (ptr, nch):
|
|
205 Given a pointer to a text string and a length in characters,
|
|
206 return the equivalent length in bytes.
|
|
207
|
|
208 charptr_n_addr (ptr, n):
|
|
209 Return a pointer to the beginning of the character offset N
|
|
210 (in characters) from PTR.
|
|
211
|
|
212 -------------------------------------------------------------------------
|
|
213 (C) For retrieving or changing the character pointed to by a charptr:
|
|
214 -------------------------------------------------------------------------
|
|
215
|
|
216 charptr_emchar (ptr):
|
|
217 Retrieve the character pointed to by PTR as an Emchar.
|
|
218
|
|
219 charptr_emchar_n (ptr, n):
|
|
220 Retrieve the character at offset N (in characters) from PTR,
|
|
221 as an Emchar.
|
|
222
|
|
223 set_charptr_emchar (ptr, ch):
|
|
224 Store the character CH (an Emchar) as internally-formatted
|
|
225 text starting at PTR. Return the number of bytes stored.
|
|
226
|
|
227 charptr_copy_char (src, dst):
|
|
228 Retrieve the character pointed to by SRC and store it as
|
|
229 internally-formatted text in DST.
|
|
230
|
|
231 ----------------------------------
|
|
232 (D) For working with Emchars:
|
|
233 ----------------------------------
|
|
234
|
|
235 [Note that there are other functions/macros for working with Emchars
|
|
236 in charset.h, for retrieving the charset of an Emchar and such.]
|
|
237
|
|
238 valid_char_p (ch):
|
|
239 Return whether the given Emchar is valid.
|
|
240
|
|
241 CHARP (ch):
|
|
242 Return whether the given Lisp_Object is a character.
|
|
243
|
|
244 CHECK_CHAR_COERCE_INT (ch):
|
|
245 Signal an error if CH is not a valid character or integer Lisp_Object.
|
|
246 If CH is an integer Lisp_Object, convert it to a character Lisp_Object,
|
|
247 but merely by repackaging, without performing tests for char validity.
|
|
248
|
|
249 MAX_EMCHAR_LEN:
|
|
250 Maximum number of buffer bytes per Emacs character.
|
|
251 */
|
|
252
|
|
253 /* ---------------------------------------------------------------------- */
|
|
254 /* (A) For working with charptr's (pointers to internally-formatted text) */
|
|
255 /* ---------------------------------------------------------------------- */
|
|
256
|
|
257 #ifdef MULE
|
|
258 # define VALID_CHARPTR_P(ptr) INTBYTE_FIRST_BYTE_P (* (unsigned char *) ptr)
|
|
259 #else
|
|
260 # define VALID_CHARPTR_P(ptr) 1
|
|
261 #endif
|
|
262
|
800
|
263 #ifdef ERROR_CHECK_TEXT
|
771
|
264 # define ASSERT_VALID_CHARPTR(ptr) assert (VALID_CHARPTR_P (ptr))
|
|
265 #else
|
|
266 # define ASSERT_VALID_CHARPTR(ptr)
|
|
267 #endif
|
|
268
|
|
269 /* Note that INC_CHARPTR() and DEC_CHARPTR() have to be written in
|
|
270 completely separate ways. INC_CHARPTR() cannot use the DEC_CHARPTR()
|
|
271 trick of looking for a valid first byte because it might run off
|
|
272 the end of the string. DEC_CHARPTR() can't use the INC_CHARPTR()
|
|
273 method because it doesn't have easy access to the first byte of
|
|
274 the character it's moving over. */
|
|
275
|
|
276 #define REAL_INC_CHARPTR(ptr) \
|
|
277 ((void) ((ptr) += REP_BYTES_BY_FIRST_BYTE (* (unsigned char *) (ptr))))
|
|
278
|
|
279 #define REAL_INC_CHARBYTEBPOS(ptr, pos) \
|
|
280 (pos += REP_BYTES_BY_FIRST_BYTE (* (unsigned char *) (ptr)))
|
|
281
|
|
282 #define REAL_DEC_CHARPTR(ptr) do { \
|
|
283 (ptr)--; \
|
|
284 } while (!VALID_CHARPTR_P (ptr))
|
|
285
|
800
|
286 #ifdef ERROR_CHECK_TEXT
|
771
|
287 #define INC_CHARPTR(ptr) do { \
|
|
288 ASSERT_VALID_CHARPTR (ptr); \
|
|
289 REAL_INC_CHARPTR (ptr); \
|
|
290 } while (0)
|
|
291
|
|
292 #define INC_CHARBYTEBPOS(ptr, pos) do { \
|
|
293 ASSERT_VALID_CHARPTR (ptr); \
|
|
294 REAL_INC_CHARBYTEBPOS (ptr, pos); \
|
|
295 } while (0)
|
|
296
|
|
297 #define DEC_CHARPTR(ptr) do { \
|
|
298 const Intbyte *dc_ptr1 = (ptr); \
|
|
299 const Intbyte *dc_ptr2 = dc_ptr1; \
|
|
300 REAL_DEC_CHARPTR (dc_ptr2); \
|
|
301 assert (dc_ptr1 - dc_ptr2 == \
|
|
302 REP_BYTES_BY_FIRST_BYTE (*dc_ptr2)); \
|
|
303 (ptr) = (Intbyte *) dc_ptr2; \
|
|
304 } while (0)
|
|
305
|
800
|
306 #else /* ! ERROR_CHECK_TEXT */
|
771
|
307 #define INC_CHARBYTEBPOS(ptr, pos) REAL_INC_CHARBYTEBPOS (ptr, pos)
|
|
308 #define INC_CHARPTR(ptr) REAL_INC_CHARPTR (ptr)
|
|
309 #define DEC_CHARPTR(ptr) REAL_DEC_CHARPTR (ptr)
|
800
|
310 #endif /* ! ERROR_CHECK_TEXT */
|
771
|
311
|
|
312 #ifdef MULE
|
|
313
|
|
314 /* Note that this reads the byte at *PTR! */
|
|
315
|
|
316 #define VALIDATE_CHARPTR_BACKWARD(ptr) do { \
|
|
317 while (!VALID_CHARPTR_P (ptr)) ptr--; \
|
|
318 } while (0)
|
|
319
|
|
320 /* Given a Intbyte string at PTR of size N, possibly with a partial
|
|
321 character at the end, return the size of the longest substring of
|
|
322 complete characters. Does not assume that the byte at *(PTR + N) is
|
|
323 readable. */
|
|
324 DECLARE_INLINE_HEADER (
|
|
325 Bytecount
|
814
|
326 validate_intbyte_string_backward (const Intbyte *ptr, Bytecount n)
|
771
|
327 )
|
|
328 {
|
814
|
329 const Intbyte *ptr2;
|
771
|
330
|
|
331 if (n == 0)
|
|
332 return n;
|
|
333 ptr2 = ptr + n - 1;
|
|
334 VALIDATE_CHARPTR_BACKWARD (ptr2);
|
|
335 if (ptr2 + REP_BYTES_BY_FIRST_BYTE (*ptr2) != ptr + n)
|
|
336 return ptr2 - ptr;
|
|
337 return n;
|
|
338 }
|
|
339
|
|
340 /* This needs to be trickier than VALIDATE_CHARPTR_BACKWARD() to avoid the
|
|
341 possibility of running off the end of the string. */
|
|
342
|
|
343 #define VALIDATE_CHARPTR_FORWARD(ptr) do { \
|
|
344 Intbyte *vcf_ptr = (ptr); \
|
|
345 VALIDATE_CHARPTR_BACKWARD (vcf_ptr); \
|
|
346 if (vcf_ptr != (ptr)) \
|
|
347 { \
|
|
348 (ptr) = vcf_ptr; \
|
|
349 INC_CHARPTR (ptr); \
|
|
350 } \
|
|
351 } while (0)
|
|
352
|
|
353 #else /* not MULE */
|
|
354 #define VALIDATE_CHARPTR_BACKWARD(ptr)
|
|
355 #define VALIDATE_CHARPTR_FORWARD(ptr)
|
|
356 #define validate_intbyte_string_backward(ptr, n) (n)
|
|
357 #endif /* not MULE */
|
|
358
|
|
359 /* -------------------------------------------------------------- */
|
|
360 /* (B) For working with the length (in bytes and characters) of a */
|
|
361 /* section of internally-formatted text */
|
|
362 /* -------------------------------------------------------------- */
|
|
363
|
|
364 INLINE_HEADER const Intbyte *
|
|
365 charptr_n_addr (const Intbyte *ptr, Charcount offset);
|
|
366 INLINE_HEADER const Intbyte *
|
|
367 charptr_n_addr (const Intbyte *ptr, Charcount offset)
|
|
368 {
|
|
369 return ptr + charcount_to_bytecount (ptr, offset);
|
|
370 }
|
|
371
|
|
372 /* -------------------------------------------------------------------- */
|
|
373 /* (C) For retrieving or changing the character pointed to by a charptr */
|
|
374 /* -------------------------------------------------------------------- */
|
|
375
|
|
376 #define simple_charptr_emchar(ptr) ((Emchar) (ptr)[0])
|
814
|
377 #define simple_set_charptr_emchar(ptr, x) \
|
|
378 ((ptr)[0] = (Intbyte) (x), (Bytecount) 1)
|
|
379 #define simple_charptr_copy_char(src, dst) \
|
|
380 ((dst)[0] = *(src), (Bytecount) 1)
|
771
|
381
|
|
382 #ifdef MULE
|
|
383
|
|
384 Emchar non_ascii_charptr_emchar (const Intbyte *ptr);
|
|
385 Bytecount non_ascii_set_charptr_emchar (Intbyte *ptr, Emchar c);
|
|
386 Bytecount non_ascii_charptr_copy_char (const Intbyte *src, Intbyte *dst);
|
|
387
|
|
388 INLINE_HEADER Emchar charptr_emchar (const Intbyte *ptr);
|
|
389 INLINE_HEADER Emchar
|
|
390 charptr_emchar (const Intbyte *ptr)
|
|
391 {
|
|
392 return BYTE_ASCII_P (*ptr) ?
|
|
393 simple_charptr_emchar (ptr) :
|
|
394 non_ascii_charptr_emchar (ptr);
|
|
395 }
|
|
396
|
|
397 INLINE_HEADER Bytecount set_charptr_emchar (Intbyte *ptr, Emchar x);
|
|
398 INLINE_HEADER Bytecount
|
|
399 set_charptr_emchar (Intbyte *ptr, Emchar x)
|
|
400 {
|
|
401 return !CHAR_MULTIBYTE_P (x) ?
|
|
402 simple_set_charptr_emchar (ptr, x) :
|
|
403 non_ascii_set_charptr_emchar (ptr, x);
|
|
404 }
|
|
405
|
|
406 INLINE_HEADER Bytecount
|
|
407 charptr_copy_char (const Intbyte *src, Intbyte *dst);
|
|
408 INLINE_HEADER Bytecount
|
|
409 charptr_copy_char (const Intbyte *src, Intbyte *dst)
|
|
410 {
|
|
411 return BYTE_ASCII_P (*src) ?
|
|
412 simple_charptr_copy_char (src, dst) :
|
|
413 non_ascii_charptr_copy_char (src, dst);
|
|
414 }
|
|
415
|
|
416 #else /* not MULE */
|
|
417
|
|
418 # define charptr_emchar(ptr) simple_charptr_emchar (ptr)
|
|
419 # define set_charptr_emchar(ptr, x) simple_set_charptr_emchar (ptr, x)
|
|
420 # define charptr_copy_char(src, dst) simple_charptr_copy_char (src, dst)
|
|
421
|
|
422 #endif /* not MULE */
|
|
423
|
|
424 #define charptr_emchar_n(ptr, offset) \
|
|
425 charptr_emchar (charptr_n_addr (ptr, offset))
|
|
426
|
|
427
|
|
428 /* ---------------------------- */
|
|
429 /* (D) For working with Emchars */
|
|
430 /* ---------------------------- */
|
|
431
|
|
432 #ifdef MULE
|
|
433
|
|
434 int non_ascii_valid_char_p (Emchar ch);
|
|
435
|
|
436 INLINE_HEADER int valid_char_p (Emchar ch);
|
|
437 INLINE_HEADER int
|
|
438 valid_char_p (Emchar ch)
|
|
439 {
|
|
440 return (! (ch & ~0xFF)) || non_ascii_valid_char_p (ch);
|
|
441 }
|
|
442
|
|
443 #else /* not MULE */
|
|
444
|
|
445 #define valid_char_p(ch) (! (ch & ~0xFF))
|
|
446
|
|
447 #endif /* not MULE */
|
|
448
|
|
449 #define CHAR_INTP(x) (INTP (x) && valid_char_p (XINT (x)))
|
|
450
|
|
451 #define CHAR_OR_CHAR_INTP(x) (CHARP (x) || CHAR_INTP (x))
|
|
452
|
|
453 INLINE_HEADER Emchar XCHAR_OR_CHAR_INT (Lisp_Object obj);
|
|
454 INLINE_HEADER Emchar
|
|
455 XCHAR_OR_CHAR_INT (Lisp_Object obj)
|
|
456 {
|
|
457 return CHARP (obj) ? XCHAR (obj) : XINT (obj);
|
|
458 }
|
|
459
|
|
460 #define CHECK_CHAR_COERCE_INT(x) do { \
|
|
461 if (CHARP (x)) \
|
|
462 ; \
|
|
463 else if (CHAR_INTP (x)) \
|
|
464 x = make_char (XINT (x)); \
|
|
465 else \
|
|
466 x = wrong_type_argument (Qcharacterp, x); \
|
|
467 } while (0)
|
|
468
|
|
469
|
|
470
|
|
471 /************************************************************************/
|
|
472 /* */
|
|
473 /* working with Eistrings */
|
|
474 /* */
|
|
475 /************************************************************************/
|
|
476
|
|
477 /*
|
|
478 #### NOTE: This is a work in progress. Neither the API nor especially
|
|
479 the implementation is finished.
|
|
480
|
|
481 NOTE: An Eistring is a structure that makes it easy to work with
|
|
482 internally-formatted strings of data. It provides operations similar
|
|
483 in feel to the standard strcpy(), strcat(), strlen(), etc., but
|
|
484
|
|
485 (a) it is Mule-correct
|
|
486 (b) it does dynamic allocation so you never have to worry about size
|
793
|
487 restrictions
|
|
488 (c) it comes in an alloca() variety (all allocation is stack-local,
|
|
489 so there is no need to explicitly clean up) as well as a malloc()
|
|
490 variety
|
|
491 (d) it knows its own length, so it does not suffer from standard null
|
|
492 byte brain-damage -- but it null-terminates the data anyway, so
|
|
493 it can be passed to standard routines
|
|
494 (e) it provides a much more powerful set of operations and knows about
|
771
|
495 all the standard places where string data might reside: Lisp_Objects,
|
|
496 other Eistrings, Intbyte * data with or without an explicit length,
|
|
497 ASCII strings, Emchars, etc.
|
793
|
498 (f) it provides easy operations to convert to/from externally-formatted
|
|
499 data, and is easier to use than the standard TO_INTERNAL_FORMAT
|
771
|
500 and TO_EXTERNAL_FORMAT macros. (An Eistring can store both the internal
|
|
501 and external version of its data, but the external version is only
|
|
502 initialized or changed when you call eito_external().)
|
|
503
|
793
|
504 The idea is to make it as easy to write Mule-correct string manipulation
|
|
505 code as it is to write normal string manipulation code. We also make
|
|
506 the API sufficiently general that it can handle multiple internal data
|
|
507 formats (e.g. some fixed-width optimizing formats and a default variable
|
|
508 width format) and allows for *ANY* data format we might choose in the
|
|
509 future for the default format, including UCS2. (In other words, we can't
|
|
510 assume that the internal format is ASCII-compatible and we can't assume
|
|
511 it doesn't have embedded null bytes. We do assume, however, that any
|
|
512 chosen format will have the concept of null-termination.) All of this is
|
|
513 hidden from the user.
|
771
|
514
|
|
515 #### It is really too bad that we don't have a real object-oriented
|
|
516 language, or at least a language with polymorphism!
|
|
517
|
|
518
|
|
519 **********************************************
|
|
520 * Declaration *
|
|
521 **********************************************
|
|
522
|
|
523 To declare an Eistring, either put one of the following in the local
|
|
524 variable section:
|
|
525
|
|
526 DECLARE_EISTRING (name);
|
|
527 Declare a new Eistring. This is a standard local variable declaration
|
|
528 and can go anywhere in the variable declaration section. NAME itself
|
|
529 is declared as an Eistring *, and its storage declared on the stack.
|
|
530
|
|
531 DECLARE_EISTRING_MALLOC (name);
|
|
532 Declare a new Eistring, which uses malloc()ed instead of alloca()ed
|
|
533 data. This is a standard local variable declaration and can go
|
|
534 anywhere in the variable declaration section. Once you initialize
|
|
535 the Eistring, you will have to free it using eifree() to avoid
|
793
|
536 memory leaks. You will need to use this form if you are passing
|
|
537 an Eistring to any function that modifies it (otherwise, the
|
|
538 modified data may be in stack space and get overwritten when the
|
|
539 function returns).
|
771
|
540
|
|
541 or use
|
|
542
|
793
|
543 Eistring ei;
|
|
544 void eiinit (Eistring *ei);
|
|
545 void eiinit_malloc (Eistring *einame);
|
771
|
546 If you need to put an Eistring elsewhere than in a local variable
|
|
547 declaration (e.g. in a structure), declare it as shown and then
|
|
548 call one of the init macros.
|
|
549
|
|
550 Also note:
|
|
551
|
793
|
552 void eifree (Eistring *ei);
|
771
|
553 If you declared an Eistring to use malloc() to hold its data,
|
|
554 or converted it to the heap using eito_malloc(), then this
|
|
555 releases any data in it and afterwards resets the Eistring
|
|
556 using eiinit_malloc(). Otherwise, it just resets the Eistring
|
|
557 using eiinit().
|
|
558
|
|
559
|
|
560 **********************************************
|
|
561 * Conventions *
|
|
562 **********************************************
|
|
563
|
|
564 - The names of the functions have been chosen, where possible, to
|
|
565 match the names of str*() functions in the standard C API.
|
|
566 -
|
|
567
|
|
568
|
|
569 **********************************************
|
|
570 * Initialization *
|
|
571 **********************************************
|
|
572
|
|
573 void eireset (Eistring *eistr);
|
|
574 Initialize the Eistring to the empty string.
|
|
575
|
|
576 void eicpy_* (Eistring *eistr, ...);
|
|
577 Initialize the Eistring from somewhere:
|
|
578
|
|
579 void eicpy_ei (Eistring *eistr, Eistring *eistr2);
|
|
580 ... from another Eistring.
|
|
581 void eicpy_lstr (Eistring *eistr, Lisp_Object lisp_string);
|
|
582 ... from a Lisp_Object string.
|
|
583 void eicpy_ch (Eistring *eistr, Emchar ch);
|
793
|
584 ... from an Emchar (this can be a conventional C character).
|
771
|
585
|
|
586 void eicpy_lstr_off (Eistring *eistr, Lisp_Object lisp_string,
|
|
587 Bytecount off, Charcount charoff,
|
|
588 Bytecount len, Charcount charlen);
|
|
589 ... from a section of a Lisp_Object string.
|
|
590 void eicpy_lbuf (Eistring *eistr, Lisp_Object lisp_buf,
|
|
591 Bytecount off, Charcount charoff,
|
|
592 Bytecount len, Charcount charlen);
|
|
593 ... from a section of a Lisp_Object buffer.
|
|
594 void eicpy_raw (Eistring *eistr, const Intbyte *data, Bytecount len);
|
|
595 ... from raw internal-format data in the default internal format.
|
|
596 void eicpy_rawz (Eistring *eistr, const Intbyte *data);
|
|
597 ... from raw internal-format data in the default internal format
|
|
598 that is "null-terminated" (the meaning of this depends on the nature
|
|
599 of the default internal format).
|
|
600 void eicpy_raw_fmt (Eistring *eistr, const Intbyte *data, Bytecount len,
|
|
601 Internal_Format intfmt);
|
|
602 ... from raw internal-format data in the specified format.
|
|
603 void eicpy_rawz_fmt (Eistring *eistr, const Intbyte *data,
|
|
604 Internal_Format intfmt);
|
|
605 ... from raw internal-format data in the specified format that is
|
|
606 "null-terminated" (the meaning of this depends on the nature of
|
|
607 the specific format).
|
|
608 void eicpy_c (Eistring *eistr, const Char_ASCII *c_string);
|
|
609 ... from an ASCII null-terminated string. Non-ASCII characters in
|
|
610 the string are *ILLEGAL* (read abort() with error-checking defined).
|
|
611 void eicpy_c_len (Eistring *eistr, const Char_ASCII *c_string, len);
|
|
612 ... from an ASCII string, with length specified. Non-ASCII characters
|
|
613 in the string are *ILLEGAL* (read abort() with error-checking defined).
|
|
614 void eicpy_ext (Eistring *eistr, const Extbyte *extdata,
|
|
615 Lisp_Object coding_system);
|
|
616 ... from external null-terminated data, with coding system specified.
|
|
617 void eicpy_ext_len (Eistring *eistr, const Extbyte *extdata,
|
|
618 Bytecount extlen, Lisp_Object coding_system);
|
|
619 ... from external data, with length and coding system specified.
|
|
620 void eicpy_lstream (Eistring *eistr, Lisp_Object lstream);
|
|
621 ... from an lstream; reads data till eof. Data must be in default
|
|
622 internal format; otherwise, interpose a decoding lstream.
|
|
623
|
|
624
|
|
625 **********************************************
|
|
626 * Getting the data out of the Eistring *
|
|
627 **********************************************
|
|
628
|
|
629 Intbyte *eidata (Eistring *eistr);
|
|
630 Return a pointer to the raw data in an Eistring. This is NOT
|
|
631 a copy.
|
|
632
|
|
633 Lisp_Object eimake_string (Eistring *eistr);
|
|
634 Make a Lisp string out of the Eistring.
|
|
635
|
|
636 Lisp_Object eimake_string_off (Eistring *eistr,
|
|
637 Bytecount off, Charcount charoff,
|
|
638 Bytecount len, Charcount charlen);
|
|
639 Make a Lisp string out of a section of the Eistring.
|
|
640
|
|
641 void eicpyout_alloca (Eistring *eistr, LVALUE: Intbyte *ptr_out,
|
|
642 LVALUE: Bytecount len_out);
|
|
643 Make an alloca() copy of the data in the Eistring, using the
|
|
644 default internal format. Due to the nature of alloca(), this
|
|
645 must be a macro, with all lvalues passed in as parameters.
|
793
|
646 (More specifically, not all compilers correctly handle using
|
|
647 alloca() as the argument to a function call -- GCC on x86
|
|
648 didn't used to, for example.) A pointer to the alloca()ed data
|
|
649 is stored in PTR_OUT, and the length of the data (not including
|
|
650 the terminating zero) is stored in LEN_OUT.
|
771
|
651
|
|
652 void eicpyout_alloca_fmt (Eistring *eistr, LVALUE: Intbyte *ptr_out,
|
|
653 LVALUE: Bytecount len_out,
|
|
654 Internal_Format intfmt);
|
|
655 Like eicpyout_alloca(), but converts to the specified internal
|
|
656 format. (No formats other than FORMAT_DEFAULT are currently
|
|
657 implemented, and you get an assertion failure if you try.)
|
|
658
|
|
659 Intbyte *eicpyout_malloc (Eistring *eistr, Bytecount *intlen_out);
|
|
660 Make a malloc() copy of the data in the Eistring, using the
|
|
661 default internal format. This is a real function. No lvalues
|
|
662 passed in. Returns the new data, and stores the length (not
|
|
663 including the terminating zero) using INTLEN_OUT, unless it's
|
|
664 a NULL pointer.
|
|
665
|
|
666 Intbyte *eicpyout_malloc_fmt (Eistring *eistr, Internal_Format intfmt,
|
|
667 Bytecount *intlen_out);
|
|
668 Like eicpyout_malloc(), but converts to the specified internal
|
|
669 format. (No formats other than FORMAT_DEFAULT are currently
|
|
670 implemented, and you get an assertion failure if you try.)
|
|
671
|
|
672
|
|
673 **********************************************
|
|
674 * Moving to the heap *
|
|
675 **********************************************
|
|
676
|
|
677 void eito_malloc (Eistring *eistr);
|
|
678 Move this Eistring to the heap. Its data will be stored in a
|
|
679 malloc()ed block rather than the stack. Subsequent changes to
|
|
680 this Eistring will realloc() the block as necessary. Use this
|
|
681 when you want the Eistring to remain in scope past the end of
|
|
682 this function call. You will have to manually free the data
|
|
683 in the Eistring using eifree().
|
|
684
|
|
685 void eito_alloca (Eistring *eistr);
|
|
686 Move this Eistring back to the stack, if it was moved to the
|
|
687 heap with eito_malloc(). This will automatically free any
|
|
688 heap-allocated data.
|
|
689
|
|
690
|
|
691
|
|
692 **********************************************
|
|
693 * Retrieving the length *
|
|
694 **********************************************
|
|
695
|
|
696 Bytecount eilen (Eistring *eistr);
|
|
697 Return the length of the internal data, in bytes. See also
|
|
698 eiextlen(), below.
|
|
699 Charcount eicharlen (Eistring *eistr);
|
|
700 Return the length of the internal data, in characters.
|
|
701
|
|
702
|
|
703 **********************************************
|
|
704 * Working with positions *
|
|
705 **********************************************
|
|
706
|
|
707 Bytecount eicharpos_to_bytepos (Eistring *eistr, Charcount charpos);
|
|
708 Convert a char offset to a byte offset.
|
|
709 Charcount eibytepos_to_charpos (Eistring *eistr, Bytecount bytepos);
|
|
710 Convert a byte offset to a char offset.
|
|
711 Bytecount eiincpos (Eistring *eistr, Bytecount bytepos);
|
|
712 Increment the given position by one character.
|
|
713 Bytecount eiincpos_n (Eistring *eistr, Bytecount bytepos, Charcount n);
|
|
714 Increment the given position by N characters.
|
|
715 Bytecount eidecpos (Eistring *eistr, Bytecount bytepos);
|
|
716 Decrement the given position by one character.
|
|
717 Bytecount eidecpos_n (Eistring *eistr, Bytecount bytepos, Charcount n);
|
|
718 Deccrement the given position by N characters.
|
|
719
|
|
720
|
|
721 **********************************************
|
|
722 * Getting the character at a position *
|
|
723 **********************************************
|
|
724
|
|
725 Emchar eigetch (Eistring *eistr, Bytecount bytepos);
|
|
726 Return the character at a particular byte offset.
|
|
727 Emchar eigetch_char (Eistring *eistr, Charcount charpos);
|
|
728 Return the character at a particular character offset.
|
|
729
|
|
730
|
|
731 **********************************************
|
|
732 * Setting the character at a position *
|
|
733 **********************************************
|
|
734
|
|
735 Emchar eisetch (Eistring *eistr, Bytecount bytepos, Emchar chr);
|
|
736 Set the character at a particular byte offset.
|
|
737 Emchar eisetch_char (Eistring *eistr, Charcount charpos, Emchar chr);
|
|
738 Set the character at a particular character offset.
|
|
739
|
|
740
|
|
741 **********************************************
|
|
742 * Concatenation *
|
|
743 **********************************************
|
|
744
|
|
745 void eicat_* (Eistring *eistr, ...);
|
|
746 Concatenate onto the end of the Eistring, with data coming from the
|
|
747 same places as above:
|
|
748
|
|
749 void eicat_ei (Eistring *eistr, Eistring *eistr2);
|
|
750 ... from another Eistring.
|
|
751 void eicat_c (Eistring *eistr, Char_ASCII *c_string);
|
|
752 ... from an ASCII null-terminated string. Non-ASCII characters in
|
|
753 the string are *ILLEGAL* (read abort() with error-checking defined).
|
|
754 void eicat_raw (ei, const Intbyte *data, Bytecount len);
|
|
755 ... from raw internal-format data in the default internal format.
|
|
756 void eicat_rawz (ei, const Intbyte *data);
|
|
757 ... from raw internal-format data in the default internal format
|
|
758 that is "null-terminated" (the meaning of this depends on the nature
|
|
759 of the default internal format).
|
|
760 void eicat_lstr (ei, Lisp_Object lisp_string);
|
|
761 ... from a Lisp_Object string.
|
|
762 void eicat_ch (ei, Emchar ch);
|
|
763 ... from an Emchar.
|
|
764
|
|
765 (All except the first variety are convenience functions.
|
|
766 In the general case, create another Eistring from the source.)
|
|
767
|
|
768
|
|
769 **********************************************
|
|
770 * Replacement *
|
|
771 **********************************************
|
|
772
|
|
773 void eisub_* (Eistring *eistr, Bytecount off, Charcount charoff,
|
|
774 Bytecount len, Charcount charlen, ...);
|
|
775 Replace a section of the Eistring, specifically:
|
|
776
|
|
777 void eisub_ei (Eistring *eistr, Bytecount off, Charcount charoff,
|
|
778 Bytecount len, Charcount charlen, Eistring *eistr2);
|
|
779 ... with another Eistring.
|
|
780 void eisub_c (Eistring *eistr, Bytecount off, Charcount charoff,
|
|
781 Bytecount len, Charcount charlen, Char_ASCII *c_string);
|
|
782 ... with an ASCII null-terminated string. Non-ASCII characters in
|
|
783 the string are *ILLEGAL* (read abort() with error-checking defined).
|
|
784 void eisub_ch (Eistring *eistr, Bytecount off, Charcount charoff,
|
|
785 Bytecount len, Charcount charlen, Emchar ch);
|
|
786 ... with an Emchar.
|
|
787
|
|
788 void eidel (Eistring *eistr, Bytecount off, Charcount charoff,
|
|
789 Bytecount len, Charcount charlen);
|
|
790 Delete a section of the Eistring.
|
|
791
|
|
792
|
|
793 **********************************************
|
|
794 * Converting to an external format *
|
|
795 **********************************************
|
|
796
|
|
797 void eito_external (Eistring *eistr, Lisp_Object coding_system);
|
|
798 Convert the Eistring to an external format and store the result
|
|
799 in the string. NOTE: Further changes to the Eistring will *NOT*
|
|
800 change the external data stored in the string. You will have to
|
|
801 call eito_external() again in such a case if you want the external
|
|
802 data.
|
|
803
|
|
804 Extbyte *eiextdata (Eistring *eistr);
|
|
805 Return a pointer to the external data stored in the Eistring as
|
|
806 a result of a prior call to eito_external().
|
|
807
|
|
808 Bytecount eiextlen (Eistring *eistr);
|
|
809 Return the length in bytes of the external data stored in the
|
|
810 Eistring as a result of a prior call to eito_external().
|
|
811
|
|
812
|
|
813 **********************************************
|
|
814 * Searching in the Eistring for a character *
|
|
815 **********************************************
|
|
816
|
|
817 Bytecount eichr (Eistring *eistr, Emchar chr);
|
|
818 Charcount eichr_char (Eistring *eistr, Emchar chr);
|
|
819 Bytecount eichr_off (Eistring *eistr, Emchar chr, Bytecount off,
|
|
820 Charcount charoff);
|
|
821 Charcount eichr_off_char (Eistring *eistr, Emchar chr, Bytecount off,
|
|
822 Charcount charoff);
|
|
823 Bytecount eirchr (Eistring *eistr, Emchar chr);
|
|
824 Charcount eirchr_char (Eistring *eistr, Emchar chr);
|
|
825 Bytecount eirchr_off (Eistring *eistr, Emchar chr, Bytecount off,
|
|
826 Charcount charoff);
|
|
827 Charcount eirchr_off_char (Eistring *eistr, Emchar chr, Bytecount off,
|
|
828 Charcount charoff);
|
|
829
|
|
830
|
|
831 **********************************************
|
|
832 * Searching in the Eistring for a string *
|
|
833 **********************************************
|
|
834
|
|
835 Bytecount eistr_ei (Eistring *eistr, Eistring *eistr2);
|
|
836 Charcount eistr_ei_char (Eistring *eistr, Eistring *eistr2);
|
|
837 Bytecount eistr_ei_off (Eistring *eistr, Eistring *eistr2, Bytecount off,
|
|
838 Charcount charoff);
|
|
839 Charcount eistr_ei_off_char (Eistring *eistr, Eistring *eistr2,
|
|
840 Bytecount off, Charcount charoff);
|
|
841 Bytecount eirstr_ei (Eistring *eistr, Eistring *eistr2);
|
|
842 Charcount eirstr_ei_char (Eistring *eistr, Eistring *eistr2);
|
|
843 Bytecount eirstr_ei_off (Eistring *eistr, Eistring *eistr2, Bytecount off,
|
|
844 Charcount charoff);
|
|
845 Charcount eirstr_ei_off_char (Eistring *eistr, Eistring *eistr2,
|
|
846 Bytecount off, Charcount charoff);
|
|
847
|
|
848 Bytecount eistr_c (Eistring *eistr, Char_ASCII *c_string);
|
|
849 Charcount eistr_c_char (Eistring *eistr, Char_ASCII *c_string);
|
|
850 Bytecount eistr_c_off (Eistring *eistr, Char_ASCII *c_string, Bytecount off,
|
|
851 Charcount charoff);
|
|
852 Charcount eistr_c_off_char (Eistring *eistr, Char_ASCII *c_string,
|
|
853 Bytecount off, Charcount charoff);
|
|
854 Bytecount eirstr_c (Eistring *eistr, Char_ASCII *c_string);
|
|
855 Charcount eirstr_c_char (Eistring *eistr, Char_ASCII *c_string);
|
|
856 Bytecount eirstr_c_off (Eistring *eistr, Char_ASCII *c_string,
|
|
857 Bytecount off, Charcount charoff);
|
|
858 Charcount eirstr_c_off_char (Eistring *eistr, Char_ASCII *c_string,
|
|
859 Bytecount off, Charcount charoff);
|
|
860
|
|
861
|
|
862 **********************************************
|
|
863 * Comparison *
|
|
864 **********************************************
|
|
865
|
|
866 int eicmp_* (Eistring *eistr, ...);
|
|
867 int eicmp_off_* (Eistring *eistr, Bytecount off, Charcount charoff,
|
|
868 Bytecount len, Charcount charlen, ...);
|
|
869 int eicasecmp_* (Eistring *eistr, ...);
|
|
870 int eicasecmp_off_* (Eistring *eistr, Bytecount off, Charcount charoff,
|
|
871 Bytecount len, Charcount charlen, ...);
|
|
872 int eicasecmp_i18n_* (Eistring *eistr, ...);
|
|
873 int eicasecmp_i18n_off_* (Eistring *eistr, Bytecount off, Charcount charoff,
|
|
874 Bytecount len, Charcount charlen, ...);
|
|
875
|
|
876 Compare the Eistring with the other data. Return value same as
|
|
877 from strcmp. The `*' is either `ei' for another Eistring (in
|
|
878 which case `...' is an Eistring), or `c' for a pure-ASCII string
|
|
879 (in which case `...' is a pointer to that string). For anything
|
|
880 more complex, first create an Eistring out of the source.
|
|
881 Comparison is either simple (`eicmp_...'), ASCII case-folding
|
|
882 (`eicasecmp_...'), or multilingual case-folding
|
|
883 (`eicasecmp_i18n_...).
|
|
884
|
|
885
|
|
886 More specifically, the prototypes are:
|
|
887
|
|
888 int eicmp_ei (Eistring *eistr, Eistring *eistr2);
|
|
889 int eicmp_off_ei (Eistring *eistr, Bytecount off, Charcount charoff,
|
|
890 Bytecount len, Charcount charlen, Eistring *eistr2);
|
|
891 int eicasecmp_ei (Eistring *eistr, Eistring *eistr2);
|
|
892 int eicasecmp_off_ei (Eistring *eistr, Bytecount off, Charcount charoff,
|
|
893 Bytecount len, Charcount charlen, Eistring *eistr2);
|
|
894 int eicasecmp_i18n_ei (Eistring *eistr, Eistring *eistr2);
|
|
895 int eicasecmp_i18n_off_ei (Eistring *eistr, Bytecount off,
|
|
896 Charcount charoff, Bytecount len,
|
|
897 Charcount charlen, Eistring *eistr2);
|
|
898
|
|
899 int eicmp_c (Eistring *eistr, Char_ASCII *c_string);
|
|
900 int eicmp_off_c (Eistring *eistr, Bytecount off, Charcount charoff,
|
|
901 Bytecount len, Charcount charlen, Char_ASCII *c_string);
|
|
902 int eicasecmp_c (Eistring *eistr, Char_ASCII *c_string);
|
|
903 int eicasecmp_off_c (Eistring *eistr, Bytecount off, Charcount charoff,
|
|
904 Bytecount len, Charcount charlen,
|
|
905 Char_ASCII *c_string);
|
|
906 int eicasecmp_i18n_c (Eistring *eistr, Char_ASCII *c_string);
|
|
907 int eicasecmp_i18n_off_c (Eistring *eistr, Bytecount off, Charcount charoff,
|
|
908 Bytecount len, Charcount charlen,
|
|
909 Char_ASCII *c_string);
|
|
910
|
|
911
|
|
912 **********************************************
|
|
913 * Case-changing the Eistring *
|
|
914 **********************************************
|
|
915
|
|
916 void eilwr (Eistring *eistr);
|
|
917 Convert all characters in the Eistring to lowercase.
|
|
918 void eiupr (Eistring *eistr);
|
|
919 Convert all characters in the Eistring to uppercase.
|
|
920 */
|
|
921
|
|
922
|
|
923 /* Principles for writing Eistring functions:
|
|
924
|
|
925 (1) Unfortunately, we have to write most of the Eistring functions
|
|
926 as macros, because of the use of alloca(). The principle used
|
|
927 below to assure no conflict in local variables is to prefix all
|
|
928 local variables with "ei" plus a number, which should be unique
|
|
929 among macros. In practice, when finding a new number, find the
|
|
930 highest so far used, and add 1.
|
|
931
|
|
932 (2) We also suffix the Eistring fields with an _ to avoid problems
|
|
933 with macro parameters of the same name. (And as the standard
|
|
934 signal not to access these fields directly.)
|
|
935
|
|
936 (3) We maintain both the length in bytes and chars of the data in
|
|
937 the Eistring at all times, for convenient retrieval by outside
|
|
938 functions. That means when writing functions that manipulate
|
|
939 Eistrings, you too need to keep both lengths up to date for all
|
|
940 data that you work with.
|
|
941
|
|
942 (4) When writing a new type of operation (e.g. substitution), you
|
|
943 will often find yourself working with outside data, and thus
|
|
944 have a series of related API's, for different forms that the
|
|
945 outside data is in. Generally, you will want to choose a
|
|
946 subset of the forms supported by eicpy_*, which has to be
|
|
947 totally general because that's the fundamental way to get data
|
|
948 into an Eistring, and once the data is into the string, it
|
|
949 would be to create a whole series of Ei operations that work on
|
|
950 nothing but Eistrings. Although theoretically nice, in
|
|
951 practice it's a hassle, so we suggest that you provide
|
|
952 convenience functions. In particular, there are two paths you
|
|
953 can take. One is minimalist -- it only allows other Eistrings
|
|
954 and ASCII data, and Emchars if the particular operation makes
|
|
955 sense with a character. The other provides interfaces for the
|
|
956 most commonly-used forms -- Eistring, ASCII data, Lisp string,
|
|
957 raw internal-format string with length, raw internal-format
|
|
958 string without, and possibly Emchar. (In the function names,
|
|
959 these are designated `ei', `c', `lstr', `raw', `rawz', and
|
|
960 `ch', respectively.)
|
|
961
|
|
962 (5) When coding a new type of operation, such as was discussed in
|
|
963 previous section, the correct approach is to declare an worker
|
|
964 function that does the work of everything, and is called by the
|
|
965 other "container" macros that handle the different outside data
|
|
966 forms. The data coming into the worker function, which
|
|
967 typically ends in `_1', is in the form of three parameters:
|
|
968 DATA, LEN, CHARLEN. (See point [3] about having two lengths and
|
|
969 keeping them in sync.)
|
|
970
|
|
971 (6) Handling argument evaluation in macros: We take great care
|
|
972 never to evaluate any argument more than once in any macro,
|
|
973 except the initial Eistring parameter. This can and will be
|
|
974 evaluated multiple times, but it should pretty much always just
|
|
975 be a simple variable. This means, for example, that if an
|
|
976 Eistring is the second (not first) argument of a macro, it
|
|
977 doesn't fall under the "initial Eistring" exemption, so it
|
|
978 needs protection against multi-evaluation. (Take the address of
|
|
979 the Eistring structure, store in a temporary variable, and use
|
|
980 temporary variable for all access to the Eistring.
|
|
981 Essentially, we want it to appear as if these Eistring macros
|
|
982 are functions -- we would like to declare them as functions but
|
|
983 they use alloca(), so we can't (and we can't make them inline
|
|
984 functions either -- alloca() is explicitly disallowed in inline
|
|
985 functions.)
|
|
986
|
|
987 (7) Note that our rules regarding multiple evaluation are *more*
|
|
988 strict than the rules listed above under the heading "working
|
|
989 with raw internal-format data".
|
|
990 */
|
|
991
|
|
992
|
|
993 /* ----- Declaration ----- */
|
|
994
|
|
995 typedef struct
|
|
996 {
|
|
997 /* Data for the Eistring, stored in the default internal format.
|
|
998 Always includes terminating null. */
|
|
999 Intbyte *data_;
|
|
1000 /* Total number of bytes allocated in DATA (including null). */
|
|
1001 Bytecount max_size_allocated_;
|
|
1002 Bytecount bytelen_;
|
|
1003 Charcount charlen_;
|
|
1004 int mallocp_;
|
|
1005
|
|
1006 Extbyte *extdata_;
|
|
1007 Bytecount extlen_;
|
|
1008 } Eistring;
|
|
1009
|
|
1010 typedef enum internal_format
|
|
1011 {
|
|
1012 FORMAT_DEFAULT,
|
|
1013 FORMAT_FIXED_8,
|
|
1014 FORMAT_FIXED_16,
|
|
1015 FORMAT_FIXED_32
|
|
1016 } Internal_Format;
|
|
1017
|
|
1018 extern Eistring the_eistring_zero_init, the_eistring_malloc_zero_init;
|
|
1019
|
|
1020 #define DECLARE_EISTRING(name) \
|
|
1021 Eistring __ ## name ## __storage__ = the_eistring_zero_init; \
|
|
1022 Eistring *name = & __ ## name ## __storage__
|
|
1023 #define DECLARE_EISTRING_MALLOC(name) \
|
|
1024 Eistring __ ## name ## __storage__ = the_eistring_malloc_zero_init; \
|
|
1025 Eistring *name = & __ ## name ## __storage__
|
|
1026
|
|
1027 #define eiinit(ei) \
|
|
1028 do { \
|
793
|
1029 *(ei) = the_eistring_zero_init; \
|
771
|
1030 } while (0)
|
|
1031
|
|
1032 #define eiinit_malloc(ei) \
|
|
1033 do { \
|
793
|
1034 *(ei) = the_eistring_malloc_zero_init; \
|
771
|
1035 } while (0)
|
|
1036
|
|
1037
|
|
1038 /* ----- Utility ----- */
|
|
1039
|
|
1040 /* Make sure both LEN and CHARLEN are specified, in case one is given
|
|
1041 as -1. PTR evaluated at most once, others multiply. */
|
|
1042 #define eifixup_bytechar(ptr, len, charlen) \
|
|
1043 do { \
|
|
1044 if ((len) == -1) \
|
|
1045 (len) = charcount_to_bytecount (ptr, charlen); \
|
|
1046 else if ((charlen) == -1) \
|
|
1047 (charlen) = bytecount_to_charcount (ptr, len); \
|
|
1048 } while (0)
|
|
1049
|
|
1050 /* Make sure LEN is specified, in case it's is given as -1. PTR
|
|
1051 evaluated at most once, others multiply. */
|
|
1052 #define eifixup_byte(ptr, len, charlen) \
|
|
1053 do { \
|
|
1054 if ((len) == -1) \
|
|
1055 (len) = charcount_to_bytecount (ptr, charlen); \
|
|
1056 } while (0)
|
|
1057
|
|
1058 /* Make sure CHARLEN is specified, in case it's is given as -1. PTR
|
|
1059 evaluated at most once, others multiply. */
|
|
1060 #define eifixup_char(ptr, len, charlen) \
|
|
1061 do { \
|
|
1062 if ((charlen) == -1) \
|
|
1063 (charlen) = bytecount_to_charcount (ptr, len); \
|
|
1064 } while (0)
|
|
1065
|
|
1066
|
|
1067
|
|
1068 /* Make sure we can hold NEWBYTELEN bytes (which is NEWCHARLEN chars)
|
|
1069 plus a zero terminator. Preserve existing data as much as possible,
|
|
1070 including existing zero terminator. Put a new zero terminator where it
|
|
1071 should go if NEWZ if non-zero. All args but EI are evalled only once. */
|
|
1072
|
|
1073 #define EI_ALLOC(ei, newbytelen, newcharlen, newz) \
|
|
1074 do { \
|
|
1075 int ei1oldeibytelen = (ei)->bytelen_; \
|
|
1076 \
|
|
1077 (ei)->charlen_ = (newcharlen); \
|
|
1078 (ei)->bytelen_ = (newbytelen); \
|
|
1079 \
|
|
1080 if (ei1oldeibytelen != (ei)->bytelen_) \
|
|
1081 { \
|
|
1082 int ei1newsize = (ei)->max_size_allocated_; \
|
|
1083 while (ei1newsize < (ei)->bytelen_ + 1) \
|
|
1084 { \
|
|
1085 ei1newsize = (int) (ei1newsize * 1.5); \
|
|
1086 if (ei1newsize < 32) \
|
|
1087 ei1newsize = 32; \
|
|
1088 } \
|
|
1089 if (ei1newsize != (ei)->max_size_allocated_) \
|
|
1090 { \
|
|
1091 if ((ei)->mallocp_) \
|
|
1092 /* xrealloc always preserves existing data as much as possible */ \
|
|
1093 (ei)->data_ = (Intbyte *) xrealloc ((ei)->data_, ei1newsize); \
|
|
1094 else \
|
|
1095 { \
|
|
1096 /* We don't have realloc, so alloca() more space and copy the \
|
|
1097 data into it. */ \
|
|
1098 Intbyte *ei1oldeidata = (ei)->data_; \
|
|
1099 (ei)->data_ = (Intbyte *) alloca (ei1newsize); \
|
|
1100 if (ei1oldeidata) \
|
|
1101 memcpy ((ei)->data_, ei1oldeidata, ei1oldeibytelen + 1); \
|
|
1102 } \
|
|
1103 (ei)->max_size_allocated_ = ei1newsize; \
|
|
1104 } \
|
|
1105 if (newz) \
|
|
1106 (ei)->data_[(ei)->bytelen_] = '\0'; \
|
|
1107 } \
|
|
1108 } while (0)
|
|
1109
|
|
1110 #define EI_ALLOC_AND_COPY(ei, data, bytelen, charlen) \
|
|
1111 do { \
|
|
1112 EI_ALLOC (ei, bytelen, charlen, 1); \
|
|
1113 memcpy ((ei)->data_, data, (ei)->bytelen_); \
|
|
1114 } while (0)
|
|
1115
|
800
|
1116 #ifdef ERROR_CHECK_TEXT
|
771
|
1117 #define EI_ASSERT_ASCII(ptr, len) \
|
|
1118 do { \
|
|
1119 int ei5; \
|
|
1120 const Char_ASCII *ei5ptr = (ptr); \
|
|
1121 int ei5len = (len); \
|
|
1122 \
|
|
1123 for (ei5 = 0; ei5 < ei5len; ei5++) \
|
|
1124 assert (ei5ptr[ei5] >= 0x00 && ei5ptr[ei5] < 0x7F); \
|
|
1125 } while (0)
|
|
1126 #define EI_ASSERT_ASCIIZ(ptr) \
|
|
1127 do { \
|
|
1128 const Char_ASCII *ei5p1 = (ptr); \
|
|
1129 EI_ASSERT_ASCII (ei5p1, strlen (ei5p1)); \
|
|
1130 } while (0)
|
|
1131 #else
|
|
1132 #define EI_ASSERT_ASCII(ptr, len)
|
|
1133 #define EI_ASSERT_ASCIIZ(ptr)
|
|
1134 #endif
|
|
1135
|
|
1136
|
|
1137 /* ----- Initialization ----- */
|
|
1138
|
|
1139 #define eicpy_ei(ei, eicpy) \
|
|
1140 do { \
|
|
1141 const Eistring *ei2 = (eicpy); \
|
|
1142 EI_ALLOC_AND_COPY (ei, ei2->data_, ei2->bytelen_, ei2->charlen_); \
|
|
1143 } while (0)
|
|
1144
|
|
1145 #define eicpy_lstr(ei, lisp_string) \
|
|
1146 do { \
|
|
1147 Lisp_Object ei3 = (lisp_string); \
|
|
1148 EI_ALLOC_AND_COPY (ei, XSTRING_DATA (ei3), XSTRING_LENGTH (ei3), \
|
|
1149 XSTRING_CHAR_LENGTH (ei3)); \
|
|
1150 } while (0)
|
|
1151
|
|
1152 #define eicpy_lstr_off(ei, lisp_string, off, charoff, len, charlen) \
|
|
1153 do { \
|
|
1154 Lisp_Object ei23lstr = (lisp_string); \
|
|
1155 int ei23off = (off); \
|
|
1156 int ei23charoff = (charoff); \
|
|
1157 int ei23len = (len); \
|
|
1158 int ei23charlen = (charlen); \
|
|
1159 const Intbyte *ei23data = XSTRING_DATA (ei23lstr); \
|
|
1160 \
|
|
1161 int ei23oldbytelen = (ei)->bytelen_; \
|
|
1162 \
|
|
1163 eifixup_byte (ei23data, ei23off, ei23charoff); \
|
|
1164 eifixup_bytechar (ei23data + ei23off, ei23len, ei23charlen); \
|
|
1165 \
|
|
1166 EI_ALLOC_AND_COPY (ei, ei23data + ei23off, ei23len, ei23charlen); \
|
|
1167 } while (0)
|
|
1168
|
|
1169 #define eicpy_raw_fmt(ei, ptr, len, fmt) \
|
|
1170 do { \
|
|
1171 const Intbyte *ei12ptr = (ptr); \
|
|
1172 Internal_Format ei12fmt = (fmt); \
|
|
1173 int ei12len = (len); \
|
|
1174 assert (ei12fmt == FORMAT_DEFAULT); \
|
|
1175 EI_ALLOC_AND_COPY (ei, ei12ptr, ei12len, \
|
|
1176 bytecount_to_charcount (ei12ptr, ei12len)); \
|
|
1177 } while (0)
|
|
1178
|
|
1179 #define eicpy_raw(ei, ptr, len) eicpy_raw_fmt (ei, ptr, len, FORMAT_DEFAULT)
|
|
1180
|
|
1181 #define eicpy_rawz_fmt(ei, ptr, fmt) \
|
|
1182 do { \
|
|
1183 const Intbyte *ei12p1ptr = (ptr); \
|
|
1184 Internal_Format ei12p1fmt = (fmt); \
|
|
1185 assert (ei12p1fmt == FORMAT_DEFAULT); \
|
|
1186 eicpy_raw_fmt (ei, ei12p1ptr, qxestrlen (ei12p1ptr), fmt); \
|
|
1187 } while (0)
|
|
1188
|
|
1189 #define eicpy_rawz(ei, ptr) eicpy_rawz_fmt (ei, ptr, FORMAT_DEFAULT)
|
|
1190
|
|
1191 #define eicpy_ch(ei, ch) \
|
|
1192 do { \
|
|
1193 Intbyte ei12p2[MAX_EMCHAR_LEN]; \
|
|
1194 Bytecount ei12p2len = set_charptr_emchar (ei12p2, ch); \
|
|
1195 EI_ALLOC_AND_COPY (ei, ei12p2, ei12p2len, 1); \
|
|
1196 } while (0)
|
|
1197
|
|
1198 #define eicpy_c(ei, c_string) \
|
|
1199 do { \
|
|
1200 const Char_ASCII *ei4 = (c_string); \
|
|
1201 \
|
|
1202 EI_ASSERT_ASCIIZ (ei4); \
|
|
1203 eicpy_ext (ei, ei4, Qbinary); \
|
|
1204 } while (0)
|
|
1205
|
|
1206 #define eicpy_c_len(ei, c_string, c_len) \
|
|
1207 do { \
|
|
1208 const Char_ASCII *ei6 = (c_string); \
|
|
1209 int ei6len = (c_len); \
|
|
1210 \
|
|
1211 EI_ASSERT_ASCII (ei6, ei6len); \
|
|
1212 eicpy_ext_len (ei, ei6, ei6len, Qbinary); \
|
|
1213 } while (0)
|
|
1214
|
|
1215 #define eicpy_ext_len(ei, extdata, extlen, coding_system) \
|
|
1216 do { \
|
|
1217 const Extbyte *ei7 = (extdata); \
|
|
1218 int ei7len = (extlen); \
|
|
1219 \
|
|
1220 TO_INTERNAL_FORMAT (DATA, (ei7, ei7len), \
|
|
1221 ALLOCA, ((ei)->data_, (ei)->bytelen_), \
|
|
1222 coding_system); \
|
|
1223 (ei)->max_size_allocated_ = (ei)->bytelen_ + 1; \
|
|
1224 (ei)->charlen_ = bytecount_to_charcount ((ei)->data_, (ei)->bytelen_); \
|
|
1225 } while (0)
|
|
1226
|
|
1227 #define eicpy_ext(ei, extdata, coding_system) \
|
|
1228 do { \
|
|
1229 const Extbyte *ei8 = (extdata); \
|
|
1230 \
|
|
1231 eicpy_ext_len (ei, ei8, dfc_external_data_len (ei8, coding_system), \
|
|
1232 coding_system); \
|
|
1233 } while (0)
|
|
1234
|
|
1235 #define eicpy_lbuf(eistr, lisp_buf, off, charoff, len, charlen) \
|
|
1236 NOT YET IMPLEMENTED
|
|
1237
|
|
1238 #define eicpy_lstream(eistr, lstream) \
|
|
1239 NOT YET IMPLEMENTED
|
|
1240
|
|
1241 #define eireset(eistr) eicpy_rawz (eistr, (Intbyte *) "")
|
|
1242
|
|
1243 /* ----- Getting the data out of the Eistring ----- */
|
|
1244
|
|
1245 #define eidata(ei) ((ei)->data_)
|
|
1246
|
|
1247 #define eimake_string(ei) make_string (eidata (ei), eilen (ei))
|
|
1248
|
|
1249 #define eimake_string_off(eistr, off, charoff, len, charlen) \
|
|
1250 do { \
|
|
1251 Lisp_Object ei24lstr; \
|
|
1252 int ei24off = (off); \
|
|
1253 int ei24charoff = (charoff); \
|
|
1254 int ei24len = (len); \
|
|
1255 int ei24charlen = (charlen); \
|
|
1256 \
|
|
1257 eifixup_byte ((eistr)->data_, ei24off, ei24charoff); \
|
|
1258 eifixup_byte ((eistr)->data_ + ei24off, ei24len, ei24charlen); \
|
|
1259 \
|
|
1260 return make_string ((eistr)->data_ + ei24off, ei24len); \
|
|
1261 } while (0)
|
|
1262
|
|
1263 #define eicpyout_alloca(eistr, ptrout, lenout) \
|
|
1264 eicpyout_alloca_fmt (eistr, ptrout, lenout, FORMAT_DEFAULT)
|
|
1265 #define eicpyout_malloc(eistr, lenout) \
|
|
1266 eicpyout_malloc_fmt (eistr, lenout, FORMAT_DEFAULT)
|
|
1267 Intbyte *eicpyout_malloc_fmt (Eistring *eistr, Bytecount *len_out,
|
|
1268 Internal_Format fmt);
|
|
1269 #define eicpyout_alloca_fmt(eistr, ptrout, lenout, fmt) \
|
|
1270 do { \
|
|
1271 Internal_Format ei23fmt = (fmt); \
|
|
1272 Intbyte *ei23ptrout = &(ptrout); \
|
|
1273 Bytecount *ei23lenout = &(lenout); \
|
|
1274 \
|
|
1275 assert (ei23fmt == FORMAT_DEFAULT); \
|
|
1276 \
|
|
1277 *ei23lenout = (eistr)->bytelen_; \
|
|
1278 *ei23ptrout = alloca_array (Intbyte, (eistr)->bytelen_ + 1); \
|
|
1279 memcpy (*ei23ptrout, (eistr)->data_, (eistr)->bytelen_ + 1); \
|
|
1280 } while (0)
|
|
1281
|
|
1282 /* ----- Moving to the heap ----- */
|
|
1283
|
|
1284 #define eifree(ei) \
|
|
1285 do { \
|
|
1286 if ((ei)->mallocp_) \
|
|
1287 { \
|
|
1288 if ((ei)->data_) \
|
|
1289 xfree ((ei)->data_); \
|
|
1290 if ((ei)->extdata_) \
|
|
1291 xfree ((ei)->extdata_); \
|
|
1292 eiinit_malloc (ei); \
|
|
1293 } \
|
|
1294 else \
|
|
1295 eiinit (ei); \
|
|
1296 } while (0)
|
|
1297
|
|
1298 int eifind_large_enough_buffer (int oldbufsize, int needed_size);
|
|
1299 void eito_malloc_1 (Eistring *ei);
|
|
1300
|
|
1301 #define eito_malloc(ei) eito_malloc_1 (ei)
|
|
1302
|
|
1303 #define eito_alloca(ei) \
|
|
1304 do { \
|
|
1305 if (!(ei)->mallocp_) \
|
|
1306 return; \
|
|
1307 (ei)->mallocp_ = 0; \
|
|
1308 if ((ei)->data_) \
|
|
1309 { \
|
|
1310 Intbyte *ei13newdata; \
|
|
1311 \
|
|
1312 (ei)->max_size_allocated_ = \
|
|
1313 eifind_large_enough_buffer (0, (ei)->bytelen_ + 1); \
|
|
1314 ei13newdata = (Intbyte *) alloca ((ei)->max_size_allocated_); \
|
|
1315 memcpy (ei13newdata, (ei)->data_, (ei)->bytelen_ + 1); \
|
|
1316 xfree ((ei)->data_); \
|
|
1317 (ei)->data_ = ei13newdata; \
|
|
1318 } \
|
|
1319 \
|
|
1320 if ((ei)->extdata_) \
|
|
1321 { \
|
|
1322 Extbyte *ei13newdata = (Extbyte *) alloca ((ei)->extlen_ + 2); \
|
|
1323 \
|
|
1324 memcpy (ei13newdata, (ei)->extdata_, (ei)->extlen_); \
|
|
1325 /* Double null-terminate in case of Unicode data */ \
|
|
1326 ei13newdata[(ei)->extlen_] = '\0'; \
|
|
1327 ei13newdata[(ei)->extlen_ + 1] = '\0'; \
|
|
1328 xfree ((ei)->extdata_); \
|
|
1329 (ei)->extdata_ = ei13newdata; \
|
|
1330 } \
|
|
1331 } while (0)
|
|
1332
|
|
1333
|
|
1334 /* ----- Retrieving the length ----- */
|
|
1335
|
|
1336 #define eilen(ei) ((ei)->bytelen_)
|
|
1337 #define eicharlen(ei) ((ei)->charlen_)
|
|
1338
|
|
1339
|
|
1340 /* ----- Working with positions ----- */
|
|
1341
|
|
1342 #define eicharpos_to_bytepos(ei, charpos) \
|
|
1343 charcount_to_bytecount ((ei)->data_, charpos)
|
|
1344 #define eibytepos_to_charpos(ei, bytepos) \
|
|
1345 bytecount_to_charcount ((ei)->data_, bytepos)
|
|
1346
|
|
1347 DECLARE_INLINE_HEADER (Bytecount eiincpos_1 (Eistring *eistr,
|
|
1348 Bytecount bytepos,
|
|
1349 Charcount n))
|
|
1350 {
|
|
1351 Intbyte *pos = eistr->data_ + bytepos;
|
814
|
1352 Charcount i;
|
771
|
1353
|
800
|
1354 text_checking_assert (bytepos >= 0 && bytepos <= eistr->bytelen_);
|
|
1355 text_checking_assert (n >= 0 && n <= eistr->charlen_);
|
771
|
1356 /* We could check N more correctly now, but that would require a
|
|
1357 call to bytecount_to_charcount(), which would be needlessly
|
|
1358 expensive (it would convert O(N) algorithms into O(N^2) algorithms
|
800
|
1359 with ERROR_CHECK_TEXT, which would be bad). If N is bad, we are
|
771
|
1360 guaranteed to catch it either inside INC_CHARPTR() or in the check
|
|
1361 below. */
|
|
1362 for (i = 0; i < n; i++)
|
|
1363 INC_CHARPTR (pos);
|
800
|
1364 text_checking_assert (pos - eistr->data_ <= eistr->bytelen_);
|
771
|
1365 return pos - eistr->data_;
|
|
1366 }
|
|
1367
|
|
1368 #define eiincpos (ei, bytepos) eiincpos_1 (ei, bytepos, 1)
|
|
1369 #define eiincpos_n (ei, bytepos, n) eiincpos_1 (ei, bytepos, n)
|
|
1370
|
|
1371 DECLARE_INLINE_HEADER (Bytecount eidecpos_1 (Eistring *eistr,
|
|
1372 Bytecount bytepos,
|
|
1373 Charcount n))
|
|
1374 {
|
|
1375 Intbyte *pos = eistr->data_ + bytepos;
|
|
1376 int i;
|
|
1377
|
800
|
1378 text_checking_assert (bytepos >= 0 && bytepos <= eistr->bytelen_);
|
|
1379 text_checking_assert (n >= 0 && n <= eistr->charlen_);
|
771
|
1380 /* We could check N more correctly now, but ... see above. */
|
|
1381 for (i = 0; i < n; i++)
|
|
1382 DEC_CHARPTR (pos);
|
800
|
1383 text_checking_assert (pos - eistr->data_ <= eistr->bytelen_);
|
771
|
1384 return pos - eistr->data_;
|
|
1385 }
|
|
1386
|
|
1387 #define eidecpos (ei, bytepos) eidecpos_1 (ei, bytepos, 1)
|
|
1388 #define eidecpos_n (ei, bytepos, n) eidecpos_1 (ei, bytepos, n)
|
|
1389
|
|
1390
|
|
1391 /* ----- Getting the character at a position ----- */
|
|
1392
|
|
1393 #define eigetch(ei, bytepos) \
|
|
1394 charptr_emchar ((ei)->data_ + (bytepos))
|
|
1395 #define eigetch_char(ei, charpos) charptr_emchar_n ((ei)->data_, charpos)
|
|
1396
|
|
1397
|
|
1398 /* ----- Setting the character at a position ----- */
|
|
1399
|
|
1400 #define eisetch(ei, bytepos, chr) \
|
|
1401 eisub_ch (ei, bytepos, -1, -1, 1, chr)
|
|
1402 #define eisetch_char(ei, charpos, chr) \
|
|
1403 eisub_ch (ei, -1, charpos, -1, 1, chr)
|
|
1404
|
|
1405
|
|
1406 /* ----- Concatenation ----- */
|
|
1407
|
|
1408 #define eicat_1(ei, data, bytelen, charlen) \
|
|
1409 do { \
|
|
1410 int ei14oldeibytelen = (ei)->bytelen_; \
|
|
1411 int ei14bytelen = (bytelen); \
|
|
1412 EI_ALLOC (ei, (ei)->bytelen_ + ei14bytelen, \
|
|
1413 (ei)->charlen_ + (charlen), 1); \
|
|
1414 memcpy ((ei)->data_ + ei14oldeibytelen, (data), \
|
|
1415 ei14bytelen); \
|
|
1416 } while (0)
|
|
1417
|
|
1418 #define eicat_ei(ei, ei2) \
|
|
1419 do { \
|
|
1420 const Eistring *ei9 = (ei2); \
|
|
1421 eicat_1 (ei, ei9->data_, ei9->bytelen_, ei9->charlen_); \
|
|
1422 } while (0)
|
|
1423
|
|
1424 #define eicat_c(ei, c_string) \
|
|
1425 do { \
|
|
1426 const Char_ASCII *ei15 = (c_string); \
|
|
1427 int ei15len = strlen (ei15); \
|
|
1428 \
|
|
1429 EI_ASSERT_ASCII (ei15, ei15len); \
|
|
1430 eicat_1 (ei, ei15, ei15len, \
|
|
1431 bytecount_to_charcount ((Intbyte *) ei15, ei15len)); \
|
|
1432 } while (0)
|
|
1433
|
|
1434 #define eicat_raw(ei, data, len) \
|
|
1435 do { \
|
|
1436 int ei16len = (len); \
|
|
1437 const Intbyte *ei16data = (data); \
|
|
1438 eicat_1 (ei, ei16data, ei16len, \
|
|
1439 bytecount_to_charcount (ei16data, ei16len)); \
|
|
1440 } while (0)
|
|
1441
|
|
1442 #define eicat_rawz(ei, ptr) \
|
|
1443 do { \
|
|
1444 const Intbyte *ei16p5ptr = (ptr); \
|
|
1445 eicat_raw (ei, ei16p5ptr, qxestrlen (ei16p5ptr)); \
|
|
1446 } while (0)
|
|
1447
|
|
1448 #define eicat_lstr(ei, lisp_string) \
|
|
1449 do { \
|
|
1450 Lisp_Object ei17 = (lisp_string); \
|
|
1451 eicat_1 (ei, XSTRING_DATA (ei17), XSTRING_LENGTH (ei17), \
|
|
1452 XSTRING_CHAR_LENGTH (ei17)); \
|
|
1453 } while (0)
|
|
1454
|
|
1455 #define eicat_ch(ei, ch) \
|
|
1456 do { \
|
|
1457 Intbyte ei22ch[MAX_EMCHAR_LEN]; \
|
|
1458 Bytecount ei22len = set_charptr_emchar (ei22ch, ch); \
|
|
1459 eicat_1 (ei, ei22ch, ei22len, 1); \
|
|
1460 } while (0)
|
|
1461
|
|
1462
|
|
1463 /* ----- Replacement ----- */
|
|
1464
|
|
1465 /* Replace the section of an Eistring at (OFF, LEN) with the data at
|
|
1466 SRC of length LEN. All positions have corresponding character values,
|
|
1467 and either can be -1 -- it will be computed from the other. */
|
|
1468
|
|
1469 #define eisub_1(ei, off, charoff, len, charlen, src, srclen, srccharlen) \
|
|
1470 do { \
|
|
1471 int ei18off = (off); \
|
|
1472 int ei18charoff = (charoff); \
|
|
1473 int ei18len = (len); \
|
|
1474 int ei18charlen = (charlen); \
|
|
1475 Intbyte *ei18src = (Intbyte *) (src); \
|
|
1476 int ei18srclen = (srclen); \
|
|
1477 int ei18srccharlen = (srccharlen); \
|
|
1478 \
|
|
1479 int ei18oldeibytelen = (ei)->bytelen_; \
|
|
1480 \
|
|
1481 eifixup_bytechar ((ei)->data_, ei18off, ei18charoff); \
|
|
1482 eifixup_bytechar ((ei)->data_ + ei18off, ei18len, ei18charlen); \
|
|
1483 eifixup_bytechar (ei18src, ei18srclen, ei18srccharlen); \
|
|
1484 \
|
|
1485 EI_ALLOC (ei, (ei)->bytelen_ + ei18srclen - ei18len, \
|
|
1486 (ei)->charlen_ + ei18srccharlen - ei18charlen, 0); \
|
|
1487 if (ei18len != ei18srclen) \
|
|
1488 memmove ((ei)->data_ + ei18off + ei18srclen, \
|
|
1489 (ei)->data_ + ei18off + ei18len, \
|
|
1490 /* include zero terminator. */ \
|
|
1491 ei18oldeibytelen - (ei18off + ei18len) + 1); \
|
|
1492 if (ei18srclen > 0) \
|
|
1493 memcpy ((ei)->data_ + ei18off, ei18src, ei18srclen); \
|
|
1494 } while (0)
|
|
1495
|
|
1496 #define eisub_ei(ei, off, charoff, len, charlen, ei2) \
|
|
1497 do { \
|
|
1498 const Eistring *ei19 = (ei2); \
|
|
1499 eisub_1 (ei, off, charoff, len, charlen, ei19->data_, ei19->bytelen_, \
|
|
1500 ei19->charlen_); \
|
|
1501 } while (0)
|
|
1502
|
|
1503 #define eisub_c(ei, off, charoff, len, charlen, c_string) \
|
|
1504 do { \
|
|
1505 const Char_ASCII *ei20 = (c_string); \
|
|
1506 int ei20len = strlen (ei20); \
|
|
1507 EI_ASSERT_ASCII (ei20, ei20len); \
|
|
1508 eisub_1 (ei, off, charoff, len, charlen, ei20, ei20len, -1); \
|
|
1509 } while (0)
|
|
1510
|
|
1511 #define eisub_ch(ei, off, charoff, len, charlen, ch) \
|
|
1512 do { \
|
|
1513 Intbyte ei21ch[MAX_EMCHAR_LEN]; \
|
|
1514 Bytecount ei21len = set_charptr_emchar (ei21ch, ch); \
|
|
1515 eisub_1 (ei, off, charoff, len, charlen, ei21ch, ei21len, 1); \
|
|
1516 } while (0)
|
|
1517
|
|
1518 #define eidel(ei, off, charoff, len, charlen) \
|
|
1519 eisub_1(ei, off, charoff, len, charlen, NULL, 0, 0)
|
|
1520
|
|
1521
|
|
1522 /* ----- Converting to an external format ----- */
|
|
1523
|
|
1524 #define eito_external(ei, coding_system) \
|
|
1525 do { \
|
|
1526 if ((ei)->mallocp_) \
|
|
1527 { \
|
|
1528 if ((ei)->extdata_) \
|
|
1529 { \
|
|
1530 xfree ((ei)->extdata_); \
|
|
1531 (ei)->extdata_ = 0; \
|
|
1532 } \
|
|
1533 TO_EXTERNAL_FORMAT (DATA, ((ei)->data_, (ei)->bytelen_), \
|
|
1534 MALLOC, ((ei)->extdata_, (ei)->extlen_), \
|
|
1535 coding_system); \
|
|
1536 } \
|
|
1537 else \
|
|
1538 TO_EXTERNAL_FORMAT (DATA, ((ei)->data_, (ei)->bytelen_), \
|
|
1539 ALLOCA, ((ei)->extdata_, (ei)->extlen_), \
|
|
1540 coding_system); \
|
|
1541 } while (0)
|
|
1542
|
|
1543 #define eiextdata(ei) ((ei)->extdata_)
|
|
1544 #define eiextlen(ei) ((ei)->extlen_)
|
|
1545
|
|
1546
|
|
1547 /* ----- Searching in the Eistring for a character ----- */
|
|
1548
|
|
1549 #define eichr(eistr, chr) \
|
|
1550 NOT YET IMPLEMENTED
|
|
1551 #define eichr_char(eistr, chr) \
|
|
1552 NOT YET IMPLEMENTED
|
|
1553 #define eichr_off(eistr, chr, off, charoff) \
|
|
1554 NOT YET IMPLEMENTED
|
|
1555 #define eichr_off_char(eistr, chr, off, charoff) \
|
|
1556 NOT YET IMPLEMENTED
|
|
1557 #define eirchr(eistr, chr) \
|
|
1558 NOT YET IMPLEMENTED
|
|
1559 #define eirchr_char(eistr, chr) \
|
|
1560 NOT YET IMPLEMENTED
|
|
1561 #define eirchr_off(eistr, chr, off, charoff) \
|
|
1562 NOT YET IMPLEMENTED
|
|
1563 #define eirchr_off_char(eistr, chr, off, charoff) \
|
|
1564 NOT YET IMPLEMENTED
|
|
1565
|
|
1566
|
|
1567 /* ----- Searching in the Eistring for a string ----- */
|
|
1568
|
|
1569 #define eistr_ei(eistr, eistr2) \
|
|
1570 NOT YET IMPLEMENTED
|
|
1571 #define eistr_ei_char(eistr, eistr2) \
|
|
1572 NOT YET IMPLEMENTED
|
|
1573 #define eistr_ei_off(eistr, eistr2, off, charoff) \
|
|
1574 NOT YET IMPLEMENTED
|
|
1575 #define eistr_ei_off_char(eistr, eistr2, off, charoff) \
|
|
1576 NOT YET IMPLEMENTED
|
|
1577 #define eirstr_ei(eistr, eistr2) \
|
|
1578 NOT YET IMPLEMENTED
|
|
1579 #define eirstr_ei_char(eistr, eistr2) \
|
|
1580 NOT YET IMPLEMENTED
|
|
1581 #define eirstr_ei_off(eistr, eistr2, off, charoff) \
|
|
1582 NOT YET IMPLEMENTED
|
|
1583 #define eirstr_ei_off_char(eistr, eistr2, off, charoff) \
|
|
1584 NOT YET IMPLEMENTED
|
|
1585
|
|
1586 #define eistr_c(eistr, c_string) \
|
|
1587 NOT YET IMPLEMENTED
|
|
1588 #define eistr_c_char(eistr, c_string) \
|
|
1589 NOT YET IMPLEMENTED
|
|
1590 #define eistr_c_off(eistr, c_string, off, charoff) \
|
|
1591 NOT YET IMPLEMENTED
|
|
1592 #define eistr_c_off_char(eistr, c_string, off, charoff) \
|
|
1593 NOT YET IMPLEMENTED
|
|
1594 #define eirstr_c(eistr, c_string) \
|
|
1595 NOT YET IMPLEMENTED
|
|
1596 #define eirstr_c_char(eistr, c_string) \
|
|
1597 NOT YET IMPLEMENTED
|
|
1598 #define eirstr_c_off(eistr, c_string, off, charoff) \
|
|
1599 NOT YET IMPLEMENTED
|
|
1600 #define eirstr_c_off_char(eistr, c_string, off, charoff) \
|
|
1601 NOT YET IMPLEMENTED
|
|
1602
|
|
1603
|
|
1604 /* ----- Comparison ----- */
|
|
1605
|
|
1606 int eicmp_1 (Eistring *ei, Bytecount off, Charcount charoff,
|
|
1607 Bytecount len, Charcount charlen, const Intbyte *data,
|
|
1608 const Eistring *ei2, int is_c, int fold_case);
|
|
1609
|
|
1610 #define eicmp_ei(eistr, eistr2) \
|
|
1611 eicmp_1 (eistr, 0, -1, -1, -1, 0, eistr2, 0, 0)
|
|
1612 #define eicmp_off_ei(eistr, off, charoff, len, charlen, eistr2) \
|
|
1613 eicmp_1 (eistr, off, charoff, len, charlen, 0, eistr2, 0, 0)
|
|
1614 #define eicasecmp_ei(eistr, eistr2) \
|
|
1615 eicmp_1 (eistr, 0, -1, -1, -1, 0, eistr2, 0, 1)
|
|
1616 #define eicasecmp_off_ei(eistr, off, charoff, len, charlen, eistr2) \
|
|
1617 eicmp_1 (eistr, off, charoff, len, charlen, 0, eistr2, 0, 1)
|
|
1618 #define eicasecmp_i18n_ei(eistr, eistr2) \
|
|
1619 eicmp_1 (eistr, 0, -1, -1, -1, 0, eistr2, 0, 2)
|
|
1620 #define eicasecmp_i18n_off_ei(eistr, off, charoff, len, charlen, eistr2) \
|
|
1621 eicmp_1 (eistr, off, charoff, len, charlen, 0, eistr2, 0, 2)
|
|
1622
|
|
1623 #define eicmp_c(eistr, c_string) \
|
|
1624 eicmp_1 (eistr, 0, -1, -1, -1, c_string, 0, 1, 0)
|
|
1625 #define eicmp_off_c(eistr, off, charoff, len, charlen, c_string) \
|
|
1626 eicmp_1 (eistr, off, charoff, len, charlen, c_string, 0, 1, 0)
|
|
1627 #define eicasecmp_c(eistr, c_string) \
|
|
1628 eicmp_1 (eistr, 0, -1, -1, -1, c_string, 0, 1, 1)
|
|
1629 #define eicasecmp_off_c(eistr, off, charoff, len, charlen, c_string) \
|
|
1630 eicmp_1 (eistr, off, charoff, len, charlen, c_string, 0, 1, 1)
|
|
1631 #define eicasecmp_i18n_c(eistr, c_string) \
|
|
1632 eicmp_1 (eistr, 0, -1, -1, -1, c_string, 0, 1, 2)
|
|
1633 #define eicasecmp_i18n_off_c(eistr, off, charoff, len, charlen, c_string) \
|
|
1634 eicmp_1 (eistr, off, charoff, len, charlen, c_string, 0, 1, 2)
|
|
1635
|
|
1636
|
|
1637 /* ----- Case-changing the Eistring ----- */
|
|
1638
|
|
1639 int eistr_casefiddle_1 (Intbyte *olddata, Bytecount len, Intbyte *newdata,
|
|
1640 int downp);
|
|
1641
|
|
1642 #define EI_CASECHANGE(ei, downp) \
|
|
1643 do { \
|
|
1644 int ei11new_allocmax = (ei)->charlen_ * MAX_EMCHAR_LEN + 1; \
|
|
1645 Intbyte *ei11storage = (Intbyte *) alloca_array (Intbyte, \
|
|
1646 ei11new_allocmax); \
|
|
1647 int ei11newlen = eistr_casefiddle_1 ((ei)->data_, (ei)->bytelen_, \
|
|
1648 ei11storage, downp); \
|
|
1649 \
|
|
1650 if (ei11newlen) \
|
|
1651 { \
|
|
1652 (ei)->max_size_allocated_ = ei11new_allocmax; \
|
|
1653 (ei)->data_ = ei11storage; \
|
|
1654 (ei)->bytelen_ = ei11newlen; \
|
|
1655 /* charlen is the same. */ \
|
|
1656 } \
|
|
1657 } while (0)
|
|
1658
|
|
1659 #define eilwr(ei) EI_CASECHANGE (ei, 1)
|
|
1660 #define eiupr(ei) EI_CASECHANGE (ei, 0)
|
|
1661
|
|
1662
|
|
1663 /************************************************************************/
|
|
1664 /* */
|
|
1665 /* Converting between internal and external format */
|
|
1666 /* */
|
|
1667 /************************************************************************/
|
|
1668 /*
|
|
1669 All client code should use only the two macros
|
|
1670
|
|
1671 TO_EXTERNAL_FORMAT (source_type, source, sink_type, sink, coding_system)
|
|
1672 TO_INTERNAL_FORMAT (source_type, source, sink_type, sink, coding_system)
|
|
1673
|
|
1674 Typical use is
|
|
1675
|
|
1676 TO_EXTERNAL_FORMAT (DATA, (ptr, len),
|
|
1677 LISP_BUFFER, buffer,
|
|
1678 Qfile_name);
|
|
1679
|
|
1680 NOTE: GC is inhibited during the entire operation of these macros. This
|
|
1681 is because frequently the data to be converted comes from strings but
|
|
1682 gets passed in as just DATA, and GC may move around the string data. If
|
|
1683 we didn't inhibit GC, there'd have to be a lot of messy recoding,
|
|
1684 alloca-copying of strings and other annoying stuff.
|
|
1685
|
|
1686 The source or sink can be specified in one of these ways:
|
|
1687
|
|
1688 DATA, (ptr, len), // input data is a fixed buffer of size len
|
|
1689 ALLOCA, (ptr, len), // output data is in a alloca()ed buffer of size len
|
|
1690 MALLOC, (ptr, len), // output data is in a malloc()ed buffer of size len
|
|
1691 C_STRING_ALLOCA, ptr, // equivalent to ALLOCA (ptr, len_ignored) on output
|
|
1692 C_STRING_MALLOC, ptr, // equivalent to MALLOC (ptr, len_ignored) on output
|
|
1693 C_STRING, ptr, // equivalent to DATA, (ptr, strlen/wcslen (ptr))
|
|
1694 // on input (the Unicode version is used when correct)
|
|
1695 LISP_STRING, string, // input or output is a Lisp_Object of type string
|
|
1696 LISP_BUFFER, buffer, // output is written to (point) in lisp buffer
|
|
1697 LISP_LSTREAM, lstream, // input or output is a Lisp_Object of type lstream
|
|
1698 LISP_OPAQUE, object, // input or output is a Lisp_Object of type opaque
|
|
1699
|
|
1700 When specifying the sink, use lvalues, since the macro will assign to them,
|
|
1701 except when the sink is an lstream or a lisp buffer.
|
|
1702
|
|
1703 The macros accept the kinds of sources and sinks appropriate for
|
|
1704 internal and external data representation. See the type_checking_assert
|
|
1705 macros below for the actual allowed types.
|
|
1706
|
|
1707 Since some sources and sinks use one argument (a Lisp_Object) to
|
|
1708 specify them, while others take a (pointer, length) pair, we use
|
|
1709 some C preprocessor trickery to allow pair arguments to be specified
|
|
1710 by parenthesizing them, as in the examples above.
|
|
1711
|
|
1712 Anything prefixed by dfc_ (`data format conversion') is private.
|
|
1713 They are only used to implement these macros.
|
|
1714
|
|
1715 [[Using C_STRING* is appropriate for using with external APIs that
|
|
1716 take null-terminated strings. For internal data, we should try to
|
|
1717 be '\0'-clean - i.e. allow arbitrary data to contain embedded '\0'.
|
|
1718
|
|
1719 Sometime in the future we might allow output to C_STRING_ALLOCA or
|
|
1720 C_STRING_MALLOC _only_ with TO_EXTERNAL_FORMAT(), not
|
|
1721 TO_INTERNAL_FORMAT().]]
|
|
1722
|
|
1723 The above comments are not true. Frequently (most of the time, in
|
|
1724 fact), external strings come as zero-terminated entities, where the
|
|
1725 zero-termination is the only way to find out the length. Even in
|
|
1726 cases where you can get the length, most of the time the system will
|
|
1727 still use the null to signal the end of the string, and there will
|
|
1728 still be no way to either send in or receive a string with embedded
|
|
1729 nulls. In such situations, it's pointless to track the length
|
|
1730 because null bytes can never be in the string. We have a lot of
|
|
1731 operations that make it easy to operate on zero-terminated strings,
|
|
1732 and forcing the user the deal with the length everywhere would only
|
|
1733 make the code uglier and more complicated, for no gain. --ben
|
|
1734
|
|
1735 There is no problem using the same lvalue for source and sink.
|
|
1736
|
|
1737 Also, when pointers are required, the code (currently at least) is
|
|
1738 lax and allows any pointer types, either in the source or the sink.
|
|
1739 This makes it possible, e.g., to deal with internal format data held
|
|
1740 in char *'s or external format data held in WCHAR * (i.e. Unicode).
|
|
1741
|
|
1742 Finally, whenever storage allocation is called for, extra space is
|
|
1743 allocated for a terminating zero, and such a zero is stored in the
|
|
1744 appropriate place, regardless of whether the source data was
|
|
1745 specified using a length or was specified as zero-terminated. This
|
|
1746 allows you to freely pass the resulting data, no matter how
|
|
1747 obtained, to a routine that expects zero termination (modulo, of
|
|
1748 course, that any embedded zeros in the resulting text will cause
|
|
1749 truncation). In fact, currently two embedded zeros are allocated
|
|
1750 and stored after the data result. This is to allow for the
|
|
1751 possibility of storing a Unicode value on output, which needs the
|
|
1752 two zeros. Currently, however, the two zeros are stored regardless
|
|
1753 of whether the conversion is internal or external and regardless of
|
|
1754 whether the external coding system is in fact Unicode. This
|
|
1755 behavior may change in the future, and you cannot rely on this --
|
|
1756 the most you can rely on is that sink data in Unicode format will
|
|
1757 have two terminating nulls, which combine to form one Unicode null
|
|
1758 character. */
|
|
1759
|
|
1760 #define TO_EXTERNAL_FORMAT(source_type, source, sink_type, sink, codesys) \
|
|
1761 do { \
|
|
1762 dfc_conversion_type dfc_simplified_source_type; \
|
|
1763 dfc_conversion_type dfc_simplified_sink_type; \
|
|
1764 dfc_conversion_data dfc_source; \
|
|
1765 dfc_conversion_data dfc_sink; \
|
|
1766 Lisp_Object dfc_codesys = (codesys); \
|
|
1767 \
|
|
1768 type_checking_assert \
|
|
1769 ((DFC_TYPE_##source_type == DFC_TYPE_DATA || \
|
|
1770 DFC_TYPE_##source_type == DFC_TYPE_C_STRING || \
|
|
1771 DFC_TYPE_##source_type == DFC_TYPE_LISP_STRING || \
|
|
1772 DFC_TYPE_##source_type == DFC_TYPE_LISP_OPAQUE || \
|
|
1773 DFC_TYPE_##source_type == DFC_TYPE_LISP_LSTREAM) \
|
|
1774 && \
|
|
1775 (DFC_TYPE_##sink_type == DFC_TYPE_ALLOCA || \
|
|
1776 DFC_TYPE_##sink_type == DFC_TYPE_MALLOC || \
|
|
1777 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_ALLOCA || \
|
|
1778 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_MALLOC || \
|
|
1779 DFC_TYPE_##sink_type == DFC_TYPE_LISP_LSTREAM || \
|
|
1780 DFC_TYPE_##sink_type == DFC_TYPE_LISP_OPAQUE)); \
|
|
1781 \
|
|
1782 DFC_EXT_SOURCE_##source_type##_TO_ARGS (source, dfc_codesys); \
|
|
1783 DFC_SINK_##sink_type##_TO_ARGS (sink); \
|
|
1784 \
|
|
1785 dfc_convert_to_external_format (dfc_simplified_source_type, &dfc_source, \
|
|
1786 dfc_codesys, \
|
|
1787 dfc_simplified_sink_type, &dfc_sink); \
|
|
1788 \
|
|
1789 DFC_##sink_type##_USE_CONVERTED_DATA (sink); \
|
|
1790 } while (0)
|
|
1791
|
|
1792 #define TO_INTERNAL_FORMAT(source_type, source, sink_type, sink, codesys) \
|
|
1793 do { \
|
|
1794 dfc_conversion_type dfc_simplified_source_type; \
|
|
1795 dfc_conversion_type dfc_simplified_sink_type; \
|
|
1796 dfc_conversion_data dfc_source; \
|
|
1797 dfc_conversion_data dfc_sink; \
|
|
1798 Lisp_Object dfc_codesys = (codesys); \
|
|
1799 \
|
|
1800 type_checking_assert \
|
|
1801 ((DFC_TYPE_##source_type == DFC_TYPE_DATA || \
|
|
1802 DFC_TYPE_##source_type == DFC_TYPE_C_STRING || \
|
|
1803 DFC_TYPE_##source_type == DFC_TYPE_LISP_OPAQUE || \
|
|
1804 DFC_TYPE_##source_type == DFC_TYPE_LISP_LSTREAM) \
|
|
1805 && \
|
|
1806 (DFC_TYPE_##sink_type == DFC_TYPE_ALLOCA || \
|
|
1807 DFC_TYPE_##sink_type == DFC_TYPE_MALLOC || \
|
|
1808 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_ALLOCA || \
|
|
1809 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_MALLOC || \
|
|
1810 DFC_TYPE_##sink_type == DFC_TYPE_LISP_STRING || \
|
|
1811 DFC_TYPE_##sink_type == DFC_TYPE_LISP_LSTREAM || \
|
|
1812 DFC_TYPE_##sink_type == DFC_TYPE_LISP_BUFFER)); \
|
|
1813 \
|
|
1814 DFC_INT_SOURCE_##source_type##_TO_ARGS (source, dfc_codesys); \
|
|
1815 DFC_SINK_##sink_type##_TO_ARGS (sink); \
|
|
1816 \
|
|
1817 dfc_convert_to_internal_format (dfc_simplified_source_type, &dfc_source, \
|
|
1818 dfc_codesys, \
|
|
1819 dfc_simplified_sink_type, &dfc_sink); \
|
|
1820 \
|
|
1821 DFC_##sink_type##_USE_CONVERTED_DATA (sink); \
|
|
1822 } while (0)
|
|
1823
|
814
|
1824 #ifdef __cplusplus
|
771
|
1825
|
814
|
1826 /* Error if you try to use a union here: "member `struct {anonymous
|
|
1827 union}::{anonymous} {anonymous union}::data' with constructor not allowed
|
|
1828 in union" (Bytecount is a class) */
|
|
1829
|
|
1830 typedef struct
|
|
1831 #else
|
771
|
1832 typedef union
|
814
|
1833 #endif
|
771
|
1834 {
|
|
1835 struct { const void *ptr; Bytecount len; } data;
|
|
1836 Lisp_Object lisp_object;
|
|
1837 } dfc_conversion_data;
|
|
1838
|
|
1839 enum dfc_conversion_type
|
|
1840 {
|
|
1841 DFC_TYPE_DATA,
|
|
1842 DFC_TYPE_ALLOCA,
|
|
1843 DFC_TYPE_MALLOC,
|
|
1844 DFC_TYPE_C_STRING,
|
|
1845 DFC_TYPE_C_STRING_ALLOCA,
|
|
1846 DFC_TYPE_C_STRING_MALLOC,
|
|
1847 DFC_TYPE_LISP_STRING,
|
|
1848 DFC_TYPE_LISP_LSTREAM,
|
|
1849 DFC_TYPE_LISP_OPAQUE,
|
|
1850 DFC_TYPE_LISP_BUFFER
|
|
1851 };
|
|
1852 typedef enum dfc_conversion_type dfc_conversion_type;
|
|
1853
|
|
1854 /* WARNING: These use a static buffer. This can lead to disaster if
|
|
1855 these functions are not used *very* carefully. Another reason to only use
|
|
1856 TO_EXTERNAL_FORMAT() and TO_INTERNAL_FORMAT(). */
|
|
1857 void
|
|
1858 dfc_convert_to_external_format (dfc_conversion_type source_type,
|
|
1859 dfc_conversion_data *source,
|
|
1860 Lisp_Object coding_system,
|
|
1861 dfc_conversion_type sink_type,
|
|
1862 dfc_conversion_data *sink);
|
|
1863 void
|
|
1864 dfc_convert_to_internal_format (dfc_conversion_type source_type,
|
|
1865 dfc_conversion_data *source,
|
|
1866 Lisp_Object coding_system,
|
|
1867 dfc_conversion_type sink_type,
|
|
1868 dfc_conversion_data *sink);
|
|
1869 /* CPP Trickery */
|
|
1870 #define DFC_CPP_CAR(x,y) (x)
|
|
1871 #define DFC_CPP_CDR(x,y) (y)
|
|
1872
|
|
1873 /* Convert `source' to args for dfc_convert_to_external_format() */
|
|
1874 #define DFC_EXT_SOURCE_DATA_TO_ARGS(val, codesys) do { \
|
|
1875 dfc_source.data.ptr = DFC_CPP_CAR val; \
|
|
1876 dfc_source.data.len = DFC_CPP_CDR val; \
|
|
1877 dfc_simplified_source_type = DFC_TYPE_DATA; \
|
|
1878 } while (0)
|
|
1879 #define DFC_EXT_SOURCE_C_STRING_TO_ARGS(val, codesys) do { \
|
|
1880 dfc_source.data.len = \
|
|
1881 strlen ((char *) (dfc_source.data.ptr = (val))); \
|
|
1882 dfc_simplified_source_type = DFC_TYPE_DATA; \
|
|
1883 } while (0)
|
|
1884 #define DFC_EXT_SOURCE_LISP_STRING_TO_ARGS(val, codesys) do { \
|
|
1885 Lisp_Object dfc_slsta = (val); \
|
|
1886 type_checking_assert (STRINGP (dfc_slsta)); \
|
|
1887 dfc_source.lisp_object = dfc_slsta; \
|
|
1888 dfc_simplified_source_type = DFC_TYPE_LISP_STRING; \
|
|
1889 } while (0)
|
|
1890 #define DFC_EXT_SOURCE_LISP_LSTREAM_TO_ARGS(val, codesys) do { \
|
|
1891 Lisp_Object dfc_sllta = (val); \
|
|
1892 type_checking_assert (LSTREAMP (dfc_sllta)); \
|
|
1893 dfc_source.lisp_object = dfc_sllta; \
|
|
1894 dfc_simplified_source_type = DFC_TYPE_LISP_LSTREAM; \
|
|
1895 } while (0)
|
|
1896 #define DFC_EXT_SOURCE_LISP_OPAQUE_TO_ARGS(val, codesys) do { \
|
|
1897 Lisp_Opaque *dfc_slota = XOPAQUE (val); \
|
|
1898 dfc_source.data.ptr = OPAQUE_DATA (dfc_slota); \
|
|
1899 dfc_source.data.len = OPAQUE_SIZE (dfc_slota); \
|
|
1900 dfc_simplified_source_type = DFC_TYPE_DATA; \
|
|
1901 } while (0)
|
|
1902
|
|
1903 /* Convert `source' to args for dfc_convert_to_internal_format() */
|
|
1904 #define DFC_INT_SOURCE_DATA_TO_ARGS(val, codesys) \
|
|
1905 DFC_EXT_SOURCE_DATA_TO_ARGS (val, codesys)
|
|
1906 #define DFC_INT_SOURCE_C_STRING_TO_ARGS(val, codesys) do { \
|
|
1907 dfc_source.data.len = dfc_external_data_len (dfc_source.data.ptr = (val), \
|
|
1908 codesys); \
|
|
1909 dfc_simplified_source_type = DFC_TYPE_DATA; \
|
|
1910 } while (0)
|
|
1911 #define DFC_INT_SOURCE_LISP_STRING_TO_ARGS(val, codesys) \
|
|
1912 DFC_EXT_SOURCE_LISP_STRING_TO_ARGS (val, codesys)
|
|
1913 #define DFC_INT_SOURCE_LISP_LSTREAM_TO_ARGS(val, codesys) \
|
|
1914 DFC_EXT_SOURCE_LISP_LSTREAM_TO_ARGS (val, codesys)
|
|
1915 #define DFC_INT_SOURCE_LISP_OPAQUE_TO_ARGS(val, codesys) \
|
|
1916 DFC_EXT_SOURCE_LISP_OPAQUE_TO_ARGS (val, codesys)
|
|
1917
|
|
1918 /* Convert `sink' to args for dfc_convert_to_*_format() */
|
|
1919 #define DFC_SINK_ALLOCA_TO_ARGS(val) \
|
|
1920 dfc_simplified_sink_type = DFC_TYPE_DATA
|
|
1921 #define DFC_SINK_C_STRING_ALLOCA_TO_ARGS(val) \
|
|
1922 dfc_simplified_sink_type = DFC_TYPE_DATA
|
|
1923 #define DFC_SINK_MALLOC_TO_ARGS(val) \
|
|
1924 dfc_simplified_sink_type = DFC_TYPE_DATA
|
|
1925 #define DFC_SINK_C_STRING_MALLOC_TO_ARGS(val) \
|
|
1926 dfc_simplified_sink_type = DFC_TYPE_DATA
|
|
1927 #define DFC_SINK_LISP_STRING_TO_ARGS(val) \
|
|
1928 dfc_simplified_sink_type = DFC_TYPE_DATA
|
|
1929 #define DFC_SINK_LISP_OPAQUE_TO_ARGS(val) \
|
|
1930 dfc_simplified_sink_type = DFC_TYPE_DATA
|
|
1931 #define DFC_SINK_LISP_LSTREAM_TO_ARGS(val) do { \
|
|
1932 Lisp_Object dfc_sllta = (val); \
|
|
1933 type_checking_assert (LSTREAMP (dfc_sllta)); \
|
|
1934 dfc_sink.lisp_object = dfc_sllta; \
|
|
1935 dfc_simplified_sink_type = DFC_TYPE_LISP_LSTREAM; \
|
|
1936 } while (0)
|
|
1937 #define DFC_SINK_LISP_BUFFER_TO_ARGS(val) do { \
|
|
1938 struct buffer *dfc_slbta = XBUFFER (val); \
|
|
1939 dfc_sink.lisp_object = \
|
|
1940 make_lisp_buffer_output_stream \
|
|
1941 (dfc_slbta, BUF_PT (dfc_slbta), 0); \
|
|
1942 dfc_simplified_sink_type = DFC_TYPE_LISP_LSTREAM; \
|
|
1943 } while (0)
|
|
1944
|
|
1945 /* Assign to the `sink' lvalue(s) using the converted data. */
|
|
1946 /* + 2 because we double zero-extended to account for Unicode conversion */
|
|
1947 typedef union { char c; void *p; } *dfc_aliasing_voidpp;
|
|
1948 #define DFC_ALLOCA_USE_CONVERTED_DATA(sink) do { \
|
|
1949 void * dfc_sink_ret = alloca (dfc_sink.data.len + 2); \
|
|
1950 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2); \
|
|
1951 ((dfc_aliasing_voidpp) &(DFC_CPP_CAR sink))->p = dfc_sink_ret; \
|
|
1952 (DFC_CPP_CDR sink) = dfc_sink.data.len; \
|
|
1953 } while (0)
|
|
1954 #define DFC_MALLOC_USE_CONVERTED_DATA(sink) do { \
|
|
1955 void * dfc_sink_ret = xmalloc (dfc_sink.data.len + 2); \
|
|
1956 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2); \
|
|
1957 ((dfc_aliasing_voidpp) &(DFC_CPP_CAR sink))->p = dfc_sink_ret; \
|
|
1958 (DFC_CPP_CDR sink) = dfc_sink.data.len; \
|
|
1959 } while (0)
|
|
1960 #define DFC_C_STRING_ALLOCA_USE_CONVERTED_DATA(sink) do { \
|
|
1961 void * dfc_sink_ret = alloca (dfc_sink.data.len + 2); \
|
|
1962 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2); \
|
|
1963 ((dfc_aliasing_voidpp) &(sink))->p = dfc_sink_ret; \
|
|
1964 } while (0)
|
|
1965 #define DFC_C_STRING_MALLOC_USE_CONVERTED_DATA(sink) do { \
|
|
1966 void * dfc_sink_ret = xmalloc (dfc_sink.data.len + 2); \
|
|
1967 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2); \
|
|
1968 ((dfc_aliasing_voidpp) &(sink))->p = dfc_sink_ret; \
|
|
1969 } while (0)
|
|
1970 #define DFC_LISP_STRING_USE_CONVERTED_DATA(sink) \
|
|
1971 sink = make_string ((Intbyte *) dfc_sink.data.ptr, dfc_sink.data.len)
|
|
1972 #define DFC_LISP_OPAQUE_USE_CONVERTED_DATA(sink) \
|
|
1973 sink = make_opaque (dfc_sink.data.ptr, dfc_sink.data.len)
|
|
1974 #define DFC_LISP_LSTREAM_USE_CONVERTED_DATA(sink) /* data already used */
|
|
1975 #define DFC_LISP_BUFFER_USE_CONVERTED_DATA(sink) \
|
|
1976 Lstream_delete (XLSTREAM (dfc_sink.lisp_object))
|
|
1977
|
|
1978 /* Convenience macros for extremely common invocations */
|
|
1979 #define C_STRING_TO_EXTERNAL(in, out, coding_system) \
|
|
1980 TO_EXTERNAL_FORMAT (C_STRING, in, C_STRING_ALLOCA, out, coding_system)
|
|
1981 #define C_STRING_TO_EXTERNAL_MALLOC(in, out, coding_system) \
|
|
1982 TO_EXTERNAL_FORMAT (C_STRING, in, C_STRING_MALLOC, out, coding_system)
|
|
1983 #define EXTERNAL_TO_C_STRING(in, out, coding_system) \
|
|
1984 TO_INTERNAL_FORMAT (C_STRING, in, C_STRING_ALLOCA, out, coding_system)
|
|
1985 #define EXTERNAL_TO_C_STRING_MALLOC(in, out, coding_system) \
|
|
1986 TO_INTERNAL_FORMAT (C_STRING, in, C_STRING_MALLOC, out, coding_system)
|
|
1987 #define LISP_STRING_TO_EXTERNAL(in, out, coding_system) \
|
|
1988 TO_EXTERNAL_FORMAT (LISP_STRING, in, C_STRING_ALLOCA, out, coding_system)
|
|
1989 #define LISP_STRING_TO_EXTERNAL_MALLOC(in, out, coding_system) \
|
|
1990 TO_EXTERNAL_FORMAT (LISP_STRING, in, C_STRING_MALLOC, out, coding_system)
|
|
1991
|
|
1992 /* Standins for various encodings, until we know them better */
|
|
1993 #define Qcommand_argument_encoding Qnative
|
|
1994 #define Qenvironment_variable_encoding Qnative
|
|
1995 #define Qunix_host_name_encoding Qnative
|
|
1996 #define Qunix_service_name_encoding Qnative
|
|
1997 #define Qmswindows_host_name_encoding Qmswindows_multibyte
|
|
1998 #define Qmswindows_service_name_encoding Qmswindows_multibyte
|
|
1999
|
|
2000 /* Standins for various X encodings, until we know them better */
|
|
2001
|
|
2002 /* !!#### Need to verify the encoding used in lwlib -- Qnative or Qctext?
|
|
2003 Almost certainly the former. Use a standin for now. */
|
|
2004 #define Qlwlib_encoding Qnative
|
|
2005
|
|
2006 #define Qx_atom_name_encoding Qctext
|
|
2007 /* font names are often stored in atoms, so it gets sticky if we set this
|
|
2008 to something different from atom-name encoding */
|
|
2009 #define Qx_font_name_encoding Qctext
|
|
2010
|
|
2011 #define Qx_color_name_encoding Qctext
|
|
2012
|
|
2013 /* the following probably must agree with Qcommand_argument_encoding and
|
|
2014 Qenvironment_variable_encoding */
|
|
2015 #define Qx_display_name_encoding Qnative
|
|
2016
|
|
2017 #define Qstrerror_encoding Qnative
|
|
2018
|
|
2019 #define GET_STRERROR(var, num) \
|
|
2020 do { \
|
|
2021 int __gsnum__ = (num); \
|
|
2022 Extbyte * __gserr__ = strerror (__gsnum__); \
|
|
2023 \
|
|
2024 if (!__gserr__) \
|
|
2025 { \
|
|
2026 var = alloca_intbytes (99); \
|
|
2027 qxesprintf (var, "Unknown error %d", __gsnum__); \
|
|
2028 } \
|
|
2029 else \
|
|
2030 EXTERNAL_TO_C_STRING (__gserr__, var, Qstrerror_encoding); \
|
|
2031 } while (0)
|
|
2032
|
|
2033 /************************************************************************/
|
|
2034 /* Lisp string representation convenience functions */
|
|
2035 /************************************************************************/
|
|
2036
|
|
2037 /* Because the representation of internally formatted data is subject
|
|
2038 to change, it's bad style to do something like
|
|
2039
|
|
2040 strcmp (XSTRING_DATA (s), "foo")
|
|
2041
|
|
2042 Instead, use the portable:
|
|
2043
|
|
2044 intbyte_strcmp (XSTRING_DATA (s), "foo") or
|
|
2045 intbyte_memcmp (XSTRING_DATA (s), "foo", 3)
|
|
2046
|
|
2047 */
|
|
2048
|
|
2049 /* Like strcmp, except first arg points at internally formatted data,
|
|
2050 while the second points at a string of only ASCII chars. */
|
|
2051 DECLARE_INLINE_HEADER (
|
|
2052 int
|
|
2053 intbyte_strcmp (const Intbyte *bp, const char *ascii_string)
|
|
2054 )
|
|
2055 {
|
|
2056 #ifdef MULE
|
|
2057 while (1)
|
|
2058 {
|
|
2059 int diff;
|
|
2060 type_checking_assert (BYTE_ASCII_P (*ascii_string));
|
|
2061 if ((diff = charptr_emchar (bp) - *(Intbyte *) ascii_string) != 0)
|
|
2062 return diff;
|
|
2063 if (*ascii_string == '\0')
|
|
2064 return 0;
|
|
2065 ascii_string++;
|
|
2066 INC_CHARPTR (bp);
|
|
2067 }
|
|
2068 #else
|
|
2069 return strcmp ((char *)bp, ascii_string);
|
|
2070 #endif
|
|
2071 }
|
|
2072
|
|
2073 /* Like memcmp, except first arg points at internally formatted data,
|
|
2074 while the second points at a string of only ASCII chars. */
|
|
2075
|
|
2076 DECLARE_INLINE_HEADER (
|
|
2077 int
|
|
2078 intbyte_memcmp (const Intbyte *bp, const char *ascii_string, Bytecount len)
|
|
2079 )
|
|
2080 {
|
|
2081 #ifdef MULE
|
|
2082 while (len--)
|
|
2083 {
|
|
2084 int diff = charptr_emchar (bp) - *(Intbyte *) ascii_string;
|
|
2085 type_checking_assert (BYTE_ASCII_P (*ascii_string));
|
|
2086 if (diff != 0)
|
|
2087 return diff;
|
|
2088 ascii_string++;
|
|
2089 INC_CHARPTR (bp);
|
|
2090 }
|
|
2091 return 0;
|
|
2092 #else
|
|
2093 return memcmp (bp, ascii_string, len);
|
|
2094 #endif
|
|
2095 }
|
|
2096
|
|
2097 #endif /* INCLUDED_text_h_ */
|