comparison src/text.h @ 771:943eaba38521

[xemacs-hg @ 2002-03-13 08:51:24 by ben] The big ben-mule-21-5 check-in! Various files were added and deleted. See CHANGES-ben-mule. There are still some test suite failures. No crashes, though. Many of the failures have to do with problems in the test suite itself rather than in the actual code. I'll be addressing these in the next day or so -- none of the test suite failures are at all critical. Meanwhile I'll be trying to address the biggest issues -- i.e. build or run failures, which will almost certainly happen on various platforms. All comments should be sent to ben@xemacs.org -- use a Cc: if necessary when sending to mailing lists. There will be pre- and post- tags, something like pre-ben-mule-21-5-merge-in, and post-ben-mule-21-5-merge-in.
author ben
date Wed, 13 Mar 2002 08:54:06 +0000
parents
children 026c5bf9c134
comparison
equal deleted inserted replaced
770:336a418893b5 771:943eaba38521
1 /* Header file for text manipulation primitives and macros.
2 Copyright (C) 1985-1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc.
4 Copyright (C) 2000, 2001 Ben Wing.
5
6 This file is part of XEmacs.
7
8 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 2, or (at your option) any
11 later version.
12
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with XEmacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
22
23 /* Synched up with: FSF 19.30. */
24
25 /* Authorship:
26
27 Mostly written by Ben Wing, starting around 1995.
28 Current TO_IN/EXTERNAL_FORMAT macros written by Martin Buchholz,
29 designed by Ben Wing based on earlier macros by Ben Wing.
30 Separated out June 18, 2000 from buffer.h into text.h.
31 */
32
33 #ifndef INCLUDED_text_h_
34 #define INCLUDED_text_h_
35
36 #include <wchar.h>
37
38 /* ---------------------------------------------------------------------- */
39 /* Super-basic character properties */
40 /* ---------------------------------------------------------------------- */
41
42 /* These properties define the specifics of how our current encoding fits
43 in the basic model used for the encoding. Because this model is the same
44 as is used for UTF-8, all these properties could be defined for it, too.
45 This would instantly make the rest of this file work with UTF-8 (with
46 the exception of a few called functions that would need to be redefined).
47
48 (UTF-2000 implementers, take note!)
49 */
50
51 /* If you want more than this, you need to include charset.h */
52
53 #ifndef MULE
54
55 #define REP_BYTES_BY_FIRST_BYTE(fb) 1
56 #define BYTE_ASCII_P(byte) 1
57 # define MAX_EMCHAR_LEN 1
58
59 #else /* MULE */
60
61 /* These are carefully designed to work if BYTE is signed or unsigned. */
62 /* Note that SPC and DEL are considered ASCII, not control. */
63
64 #define BYTE_ASCII_P(byte) (((byte) & ~0x7f) == 0)
65 #define BYTE_C0_P(byte) (((byte) & ~0x1f) == 0)
66 #define BYTE_C1_P(byte) (((byte) & ~0x1f) == 0x80)
67
68 /* Does BYTE represent the first byte of a character? */
69
70 #define INTBYTE_FIRST_BYTE_P(byte) ((byte) < 0xA0)
71
72 /* Does BYTE represent the first byte of a multi-byte character? */
73
74 #define INTBYTE_LEADING_BYTE_P(byte) BYTE_C1_P (byte)
75
76 /* Table of number of bytes in the string representation of a character
77 indexed by the first byte of that representation.
78
79 This value can be derived in other ways -- e.g. something like
80 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (first_byte))
81 but it's faster this way. */
82 extern const Bytecount rep_bytes_by_first_byte[0xA0];
83
84 /* Number of bytes in the string representation of a character. */
85 INLINE_HEADER int REP_BYTES_BY_FIRST_BYTE (int fb);
86 INLINE_HEADER int
87 REP_BYTES_BY_FIRST_BYTE (int fb)
88 {
89 type_checking_assert (fb < 0xA0);
90 return rep_bytes_by_first_byte[fb];
91 }
92
93 /* Is this character represented by more than one byte in a string? */
94
95 #define CHAR_MULTIBYTE_P(c) ((c) >= 0x80)
96
97 #define CHAR_ASCII_P(c) (!CHAR_MULTIBYTE_P (c))
98
99 #define MAX_EMCHAR_LEN 4
100
101 #endif /* MULE */
102
103 int dfc_coding_system_is_unicode (Lisp_Object coding_system);
104
105 DECLARE_INLINE_HEADER (
106 Bytecount dfc_external_data_len (const void *ptr, Lisp_Object codesys)
107 )
108 {
109 if (dfc_coding_system_is_unicode (codesys))
110 return sizeof (wchar_t) * wcslen ((wchar_t *) ptr);
111 else
112 return strlen ((char *) ptr);
113 }
114
115
116 /************************************************************************/
117 /* */
118 /* working with raw internal-format data */
119 /* */
120 /************************************************************************/
121
122 /* NOTE: In all the following macros, we follow these rules concerning
123 multiple evaluation of the arguments:
124
125 1) Anything that's an lvalue can be evaluated more than once.
126 2) Anything that's a Lisp Object can be evaluated more than once.
127 This should probably be changed, but this follows the way
128 that all the macros in lisp.h do things.
129 3) 'struct buffer *' arguments can be evaluated more than once.
130 4) Nothing else can be evaluated more than once. Use inline
131 functions, if necessary, to prevent multiple evaluation.
132 5) An exception to (4) is that there are some macros below that
133 may evaluate their arguments more than once. They are all
134 denoted with the word "unsafe" in their name and are generally
135 meant to be called only by other macros that have already
136 stored the calling values in temporary variables.
137
138
139 Use the following functions/macros on contiguous strings of data.
140 If the text you're operating on is known to come from a buffer, use
141 the buffer-level functions below -- they know about the gap and may
142 be more efficient.
143
144
145 ----------------------------------------------------------------------------
146 (A) For working with charptr's (pointers to internally-formatted text):
147 ----------------------------------------------------------------------------
148
149 VALID_CHARPTR_P (ptr):
150 Given a charptr, does it point to the beginning of a character?
151
152 ASSERT_VALID_CHARPTR (ptr):
153 If error-checking is enabled, assert that the given charptr
154 points to the beginning of a character. Otherwise, do nothing.
155
156 INC_CHARPTR (ptr):
157 Given a charptr (assumed to point at the beginning of a character),
158 modify that pointer so it points to the beginning of the next
159 character.
160
161 DEC_CHARPTR (ptr):
162 Given a charptr (assumed to point at the beginning of a
163 character or at the very end of the text), modify that pointer
164 so it points to the beginning of the previous character.
165
166 VALIDATE_CHARPTR_BACKWARD (ptr):
167 Make sure that PTR is pointing to the beginning of a character.
168 If not, back up until this is the case. Note that there are not
169 too many places where it is legitimate to do this sort of thing.
170 It's an error if you're passed an "invalid" char * pointer.
171 NOTE: PTR *must* be pointing to a valid part of the string (i.e.
172 not the very end, unless the string is zero-terminated or
173 something) in order for this function to not cause crashes.
174
175 VALIDATE_CHARPTR_FORWARD (ptr):
176 Make sure that PTR is pointing to the beginning of a character.
177 If not, move forward until this is the case. Note that there
178 are not too many places where it is legitimate to do this sort
179 of thing. It's an error if you're passed an "invalid" char *
180 pointer.
181
182 ---------------------------------------------------------------------
183 (B) For working with the length (in bytes and characters) of a
184 section of internally-formatted text:
185 ---------------------------------------------------------------------
186
187 bytecount_to_charcount (ptr, nbi):
188 Given a pointer to a text string and a length in bytes,
189 return the equivalent length in characters.
190
191 charcount_to_bytecount (ptr, nch):
192 Given a pointer to a text string and a length in characters,
193 return the equivalent length in bytes.
194
195 charptr_n_addr (ptr, n):
196 Return a pointer to the beginning of the character offset N
197 (in characters) from PTR.
198
199 -------------------------------------------------------------------------
200 (C) For retrieving or changing the character pointed to by a charptr:
201 -------------------------------------------------------------------------
202
203 charptr_emchar (ptr):
204 Retrieve the character pointed to by PTR as an Emchar.
205
206 charptr_emchar_n (ptr, n):
207 Retrieve the character at offset N (in characters) from PTR,
208 as an Emchar.
209
210 set_charptr_emchar (ptr, ch):
211 Store the character CH (an Emchar) as internally-formatted
212 text starting at PTR. Return the number of bytes stored.
213
214 charptr_copy_char (src, dst):
215 Retrieve the character pointed to by SRC and store it as
216 internally-formatted text in DST.
217
218 ----------------------------------
219 (D) For working with Emchars:
220 ----------------------------------
221
222 [Note that there are other functions/macros for working with Emchars
223 in charset.h, for retrieving the charset of an Emchar and such.]
224
225 valid_char_p (ch):
226 Return whether the given Emchar is valid.
227
228 CHARP (ch):
229 Return whether the given Lisp_Object is a character.
230
231 CHECK_CHAR_COERCE_INT (ch):
232 Signal an error if CH is not a valid character or integer Lisp_Object.
233 If CH is an integer Lisp_Object, convert it to a character Lisp_Object,
234 but merely by repackaging, without performing tests for char validity.
235
236 MAX_EMCHAR_LEN:
237 Maximum number of buffer bytes per Emacs character.
238 */
239
240 /* ---------------------------------------------------------------------- */
241 /* (A) For working with charptr's (pointers to internally-formatted text) */
242 /* ---------------------------------------------------------------------- */
243
244 #ifdef MULE
245 # define VALID_CHARPTR_P(ptr) INTBYTE_FIRST_BYTE_P (* (unsigned char *) ptr)
246 #else
247 # define VALID_CHARPTR_P(ptr) 1
248 #endif
249
250 #ifdef ERROR_CHECK_CHARBPOS
251 # define ASSERT_VALID_CHARPTR(ptr) assert (VALID_CHARPTR_P (ptr))
252 #else
253 # define ASSERT_VALID_CHARPTR(ptr)
254 #endif
255
256 /* Note that INC_CHARPTR() and DEC_CHARPTR() have to be written in
257 completely separate ways. INC_CHARPTR() cannot use the DEC_CHARPTR()
258 trick of looking for a valid first byte because it might run off
259 the end of the string. DEC_CHARPTR() can't use the INC_CHARPTR()
260 method because it doesn't have easy access to the first byte of
261 the character it's moving over. */
262
263 #define REAL_INC_CHARPTR(ptr) \
264 ((void) ((ptr) += REP_BYTES_BY_FIRST_BYTE (* (unsigned char *) (ptr))))
265
266 #define REAL_INC_CHARBYTEBPOS(ptr, pos) \
267 (pos += REP_BYTES_BY_FIRST_BYTE (* (unsigned char *) (ptr)))
268
269 #define REAL_DEC_CHARPTR(ptr) do { \
270 (ptr)--; \
271 } while (!VALID_CHARPTR_P (ptr))
272
273 #ifdef ERROR_CHECK_CHARBPOS
274 #define INC_CHARPTR(ptr) do { \
275 ASSERT_VALID_CHARPTR (ptr); \
276 REAL_INC_CHARPTR (ptr); \
277 } while (0)
278
279 #define INC_CHARBYTEBPOS(ptr, pos) do { \
280 ASSERT_VALID_CHARPTR (ptr); \
281 REAL_INC_CHARBYTEBPOS (ptr, pos); \
282 } while (0)
283
284 #define DEC_CHARPTR(ptr) do { \
285 const Intbyte *dc_ptr1 = (ptr); \
286 const Intbyte *dc_ptr2 = dc_ptr1; \
287 REAL_DEC_CHARPTR (dc_ptr2); \
288 assert (dc_ptr1 - dc_ptr2 == \
289 REP_BYTES_BY_FIRST_BYTE (*dc_ptr2)); \
290 (ptr) = (Intbyte *) dc_ptr2; \
291 } while (0)
292
293 #else /* ! ERROR_CHECK_CHARBPOS */
294 #define INC_CHARBYTEBPOS(ptr, pos) REAL_INC_CHARBYTEBPOS (ptr, pos)
295 #define INC_CHARPTR(ptr) REAL_INC_CHARPTR (ptr)
296 #define DEC_CHARPTR(ptr) REAL_DEC_CHARPTR (ptr)
297 #endif /* ! ERROR_CHECK_CHARBPOS */
298
299 #ifdef MULE
300
301 /* Note that this reads the byte at *PTR! */
302
303 #define VALIDATE_CHARPTR_BACKWARD(ptr) do { \
304 while (!VALID_CHARPTR_P (ptr)) ptr--; \
305 } while (0)
306
307 /* Given a Intbyte string at PTR of size N, possibly with a partial
308 character at the end, return the size of the longest substring of
309 complete characters. Does not assume that the byte at *(PTR + N) is
310 readable. */
311 DECLARE_INLINE_HEADER (
312 Bytecount
313 validate_intbyte_string_backward (Intbyte *ptr, Bytecount n)
314 )
315 {
316 Intbyte *ptr2;
317
318 if (n == 0)
319 return n;
320 ptr2 = ptr + n - 1;
321 VALIDATE_CHARPTR_BACKWARD (ptr2);
322 if (ptr2 + REP_BYTES_BY_FIRST_BYTE (*ptr2) != ptr + n)
323 return ptr2 - ptr;
324 return n;
325 }
326
327 /* This needs to be trickier than VALIDATE_CHARPTR_BACKWARD() to avoid the
328 possibility of running off the end of the string. */
329
330 #define VALIDATE_CHARPTR_FORWARD(ptr) do { \
331 Intbyte *vcf_ptr = (ptr); \
332 VALIDATE_CHARPTR_BACKWARD (vcf_ptr); \
333 if (vcf_ptr != (ptr)) \
334 { \
335 (ptr) = vcf_ptr; \
336 INC_CHARPTR (ptr); \
337 } \
338 } while (0)
339
340 #else /* not MULE */
341 #define VALIDATE_CHARPTR_BACKWARD(ptr)
342 #define VALIDATE_CHARPTR_FORWARD(ptr)
343 #define validate_intbyte_string_backward(ptr, n) (n)
344 #endif /* not MULE */
345
346 /* -------------------------------------------------------------- */
347 /* (B) For working with the length (in bytes and characters) of a */
348 /* section of internally-formatted text */
349 /* -------------------------------------------------------------- */
350
351 INLINE_HEADER const Intbyte *
352 charptr_n_addr (const Intbyte *ptr, Charcount offset);
353 INLINE_HEADER const Intbyte *
354 charptr_n_addr (const Intbyte *ptr, Charcount offset)
355 {
356 return ptr + charcount_to_bytecount (ptr, offset);
357 }
358
359 /* -------------------------------------------------------------------- */
360 /* (C) For retrieving or changing the character pointed to by a charptr */
361 /* -------------------------------------------------------------------- */
362
363 #define simple_charptr_emchar(ptr) ((Emchar) (ptr)[0])
364 #define simple_set_charptr_emchar(ptr, x) ((ptr)[0] = (Intbyte) (x), 1)
365 #define simple_charptr_copy_char(src, dst) ((dst)[0] = *(src), 1)
366
367 #ifdef MULE
368
369 Emchar non_ascii_charptr_emchar (const Intbyte *ptr);
370 Bytecount non_ascii_set_charptr_emchar (Intbyte *ptr, Emchar c);
371 Bytecount non_ascii_charptr_copy_char (const Intbyte *src, Intbyte *dst);
372
373 INLINE_HEADER Emchar charptr_emchar (const Intbyte *ptr);
374 INLINE_HEADER Emchar
375 charptr_emchar (const Intbyte *ptr)
376 {
377 return BYTE_ASCII_P (*ptr) ?
378 simple_charptr_emchar (ptr) :
379 non_ascii_charptr_emchar (ptr);
380 }
381
382 INLINE_HEADER Bytecount set_charptr_emchar (Intbyte *ptr, Emchar x);
383 INLINE_HEADER Bytecount
384 set_charptr_emchar (Intbyte *ptr, Emchar x)
385 {
386 return !CHAR_MULTIBYTE_P (x) ?
387 simple_set_charptr_emchar (ptr, x) :
388 non_ascii_set_charptr_emchar (ptr, x);
389 }
390
391 INLINE_HEADER Bytecount
392 charptr_copy_char (const Intbyte *src, Intbyte *dst);
393 INLINE_HEADER Bytecount
394 charptr_copy_char (const Intbyte *src, Intbyte *dst)
395 {
396 return BYTE_ASCII_P (*src) ?
397 simple_charptr_copy_char (src, dst) :
398 non_ascii_charptr_copy_char (src, dst);
399 }
400
401 #else /* not MULE */
402
403 # define charptr_emchar(ptr) simple_charptr_emchar (ptr)
404 # define set_charptr_emchar(ptr, x) simple_set_charptr_emchar (ptr, x)
405 # define charptr_copy_char(src, dst) simple_charptr_copy_char (src, dst)
406
407 #endif /* not MULE */
408
409 #define charptr_emchar_n(ptr, offset) \
410 charptr_emchar (charptr_n_addr (ptr, offset))
411
412
413 /* ---------------------------- */
414 /* (D) For working with Emchars */
415 /* ---------------------------- */
416
417 #ifdef MULE
418
419 int non_ascii_valid_char_p (Emchar ch);
420
421 INLINE_HEADER int valid_char_p (Emchar ch);
422 INLINE_HEADER int
423 valid_char_p (Emchar ch)
424 {
425 return (! (ch & ~0xFF)) || non_ascii_valid_char_p (ch);
426 }
427
428 #else /* not MULE */
429
430 #define valid_char_p(ch) (! (ch & ~0xFF))
431
432 #endif /* not MULE */
433
434 #define CHAR_INTP(x) (INTP (x) && valid_char_p (XINT (x)))
435
436 #define CHAR_OR_CHAR_INTP(x) (CHARP (x) || CHAR_INTP (x))
437
438 INLINE_HEADER Emchar XCHAR_OR_CHAR_INT (Lisp_Object obj);
439 INLINE_HEADER Emchar
440 XCHAR_OR_CHAR_INT (Lisp_Object obj)
441 {
442 return CHARP (obj) ? XCHAR (obj) : XINT (obj);
443 }
444
445 #define CHECK_CHAR_COERCE_INT(x) do { \
446 if (CHARP (x)) \
447 ; \
448 else if (CHAR_INTP (x)) \
449 x = make_char (XINT (x)); \
450 else \
451 x = wrong_type_argument (Qcharacterp, x); \
452 } while (0)
453
454
455
456 /************************************************************************/
457 /* */
458 /* working with Eistrings */
459 /* */
460 /************************************************************************/
461
462 /*
463 #### NOTE: This is a work in progress. Neither the API nor especially
464 the implementation is finished.
465
466 NOTE: An Eistring is a structure that makes it easy to work with
467 internally-formatted strings of data. It provides operations similar
468 in feel to the standard strcpy(), strcat(), strlen(), etc., but
469
470 (a) it is Mule-correct
471 (b) it does dynamic allocation so you never have to worry about size
472 restrictions (and all allocation is stack-local using alloca(), so
473 there is no need to explicitly clean up)
474 (c) it knows its own length, so it does not suffer from standard null
475 byte brain-damage
476 (d) it provides a much more powerful set of operations and knows about
477 all the standard places where string data might reside: Lisp_Objects,
478 other Eistrings, Intbyte * data with or without an explicit length,
479 ASCII strings, Emchars, etc.
480 (e) it provides easy operations to convert to/from externally-formatted
481 data, and is much easier to use than the standard TO_INTERNAL_FORMAT
482 and TO_EXTERNAL_FORMAT macros. (An Eistring can store both the internal
483 and external version of its data, but the external version is only
484 initialized or changed when you call eito_external().)
485
486 The idea is to make it as easy to write Mule-correct string
487 manipulation code as it is to write normal string manipulation
488 code. We also make the API sufficiently general that it can handle
489 multiple internal data formats (e.g. some fixed-width optimizing
490 formats and a default variable width format) and allows for *ANY*
491 data format we might choose in the future for the default format,
492 including UCS2. (In other words, we can't assume that the internal
493 format is ASCII-compatible and we can't assume it doesn't have
494 embedded null bytes. We do assume, however, that any chosen format
495 will have the concept of null-termination.) All of this is hidden
496 from the user.
497
498 #### It is really too bad that we don't have a real object-oriented
499 language, or at least a language with polymorphism!
500
501
502 **********************************************
503 * Declaration *
504 **********************************************
505
506 To declare an Eistring, either put one of the following in the local
507 variable section:
508
509 DECLARE_EISTRING (name);
510 Declare a new Eistring. This is a standard local variable declaration
511 and can go anywhere in the variable declaration section. NAME itself
512 is declared as an Eistring *, and its storage declared on the stack.
513
514 DECLARE_EISTRING_MALLOC (name);
515 Declare a new Eistring, which uses malloc()ed instead of alloca()ed
516 data. This is a standard local variable declaration and can go
517 anywhere in the variable declaration section. Once you initialize
518 the Eistring, you will have to free it using eifree() to avoid
519 memory leaks.
520
521 or use
522
523 Eistring name;
524 void eiinit (Eistring name);
525 void eiinit_malloc (Eistring name);
526 If you need to put an Eistring elsewhere than in a local variable
527 declaration (e.g. in a structure), declare it as shown and then
528 call one of the init macros.
529
530 Also note:
531
532 void eifree (Eistring ei);
533 If you declared an Eistring to use malloc() to hold its data,
534 or converted it to the heap using eito_malloc(), then this
535 releases any data in it and afterwards resets the Eistring
536 using eiinit_malloc(). Otherwise, it just resets the Eistring
537 using eiinit().
538
539
540 **********************************************
541 * Conventions *
542 **********************************************
543
544 - The names of the functions have been chosen, where possible, to
545 match the names of str*() functions in the standard C API.
546 -
547
548
549 **********************************************
550 * Initialization *
551 **********************************************
552
553 void eireset (Eistring *eistr);
554 Initialize the Eistring to the empty string.
555
556 void eicpy_* (Eistring *eistr, ...);
557 Initialize the Eistring from somewhere:
558
559 void eicpy_ei (Eistring *eistr, Eistring *eistr2);
560 ... from another Eistring.
561 void eicpy_lstr (Eistring *eistr, Lisp_Object lisp_string);
562 ... from a Lisp_Object string.
563 void eicpy_ch (Eistring *eistr, Emchar ch);
564 ... from an Emchar.
565
566 void eicpy_lstr_off (Eistring *eistr, Lisp_Object lisp_string,
567 Bytecount off, Charcount charoff,
568 Bytecount len, Charcount charlen);
569 ... from a section of a Lisp_Object string.
570 void eicpy_lbuf (Eistring *eistr, Lisp_Object lisp_buf,
571 Bytecount off, Charcount charoff,
572 Bytecount len, Charcount charlen);
573 ... from a section of a Lisp_Object buffer.
574 void eicpy_raw (Eistring *eistr, const Intbyte *data, Bytecount len);
575 ... from raw internal-format data in the default internal format.
576 void eicpy_rawz (Eistring *eistr, const Intbyte *data);
577 ... from raw internal-format data in the default internal format
578 that is "null-terminated" (the meaning of this depends on the nature
579 of the default internal format).
580 void eicpy_raw_fmt (Eistring *eistr, const Intbyte *data, Bytecount len,
581 Internal_Format intfmt);
582 ... from raw internal-format data in the specified format.
583 void eicpy_rawz_fmt (Eistring *eistr, const Intbyte *data,
584 Internal_Format intfmt);
585 ... from raw internal-format data in the specified format that is
586 "null-terminated" (the meaning of this depends on the nature of
587 the specific format).
588 void eicpy_c (Eistring *eistr, const Char_ASCII *c_string);
589 ... from an ASCII null-terminated string. Non-ASCII characters in
590 the string are *ILLEGAL* (read abort() with error-checking defined).
591 void eicpy_c_len (Eistring *eistr, const Char_ASCII *c_string, len);
592 ... from an ASCII string, with length specified. Non-ASCII characters
593 in the string are *ILLEGAL* (read abort() with error-checking defined).
594 void eicpy_ext (Eistring *eistr, const Extbyte *extdata,
595 Lisp_Object coding_system);
596 ... from external null-terminated data, with coding system specified.
597 void eicpy_ext_len (Eistring *eistr, const Extbyte *extdata,
598 Bytecount extlen, Lisp_Object coding_system);
599 ... from external data, with length and coding system specified.
600 void eicpy_lstream (Eistring *eistr, Lisp_Object lstream);
601 ... from an lstream; reads data till eof. Data must be in default
602 internal format; otherwise, interpose a decoding lstream.
603
604
605 **********************************************
606 * Getting the data out of the Eistring *
607 **********************************************
608
609 Intbyte *eidata (Eistring *eistr);
610 Return a pointer to the raw data in an Eistring. This is NOT
611 a copy.
612
613 Lisp_Object eimake_string (Eistring *eistr);
614 Make a Lisp string out of the Eistring.
615
616 Lisp_Object eimake_string_off (Eistring *eistr,
617 Bytecount off, Charcount charoff,
618 Bytecount len, Charcount charlen);
619 Make a Lisp string out of a section of the Eistring.
620
621 void eicpyout_alloca (Eistring *eistr, LVALUE: Intbyte *ptr_out,
622 LVALUE: Bytecount len_out);
623 Make an alloca() copy of the data in the Eistring, using the
624 default internal format. Due to the nature of alloca(), this
625 must be a macro, with all lvalues passed in as parameters.
626 A pointer to the alloca()ed data is stored in PTR_OUT, and
627 the length of the data (not including the terminating zero)
628 is stored in LEN_OUT.
629
630 void eicpyout_alloca_fmt (Eistring *eistr, LVALUE: Intbyte *ptr_out,
631 LVALUE: Bytecount len_out,
632 Internal_Format intfmt);
633 Like eicpyout_alloca(), but converts to the specified internal
634 format. (No formats other than FORMAT_DEFAULT are currently
635 implemented, and you get an assertion failure if you try.)
636
637 Intbyte *eicpyout_malloc (Eistring *eistr, Bytecount *intlen_out);
638 Make a malloc() copy of the data in the Eistring, using the
639 default internal format. This is a real function. No lvalues
640 passed in. Returns the new data, and stores the length (not
641 including the terminating zero) using INTLEN_OUT, unless it's
642 a NULL pointer.
643
644 Intbyte *eicpyout_malloc_fmt (Eistring *eistr, Internal_Format intfmt,
645 Bytecount *intlen_out);
646 Like eicpyout_malloc(), but converts to the specified internal
647 format. (No formats other than FORMAT_DEFAULT are currently
648 implemented, and you get an assertion failure if you try.)
649
650
651 **********************************************
652 * Moving to the heap *
653 **********************************************
654
655 void eito_malloc (Eistring *eistr);
656 Move this Eistring to the heap. Its data will be stored in a
657 malloc()ed block rather than the stack. Subsequent changes to
658 this Eistring will realloc() the block as necessary. Use this
659 when you want the Eistring to remain in scope past the end of
660 this function call. You will have to manually free the data
661 in the Eistring using eifree().
662
663 void eito_alloca (Eistring *eistr);
664 Move this Eistring back to the stack, if it was moved to the
665 heap with eito_malloc(). This will automatically free any
666 heap-allocated data.
667
668
669
670 **********************************************
671 * Retrieving the length *
672 **********************************************
673
674 Bytecount eilen (Eistring *eistr);
675 Return the length of the internal data, in bytes. See also
676 eiextlen(), below.
677 Charcount eicharlen (Eistring *eistr);
678 Return the length of the internal data, in characters.
679
680
681 **********************************************
682 * Working with positions *
683 **********************************************
684
685 Bytecount eicharpos_to_bytepos (Eistring *eistr, Charcount charpos);
686 Convert a char offset to a byte offset.
687 Charcount eibytepos_to_charpos (Eistring *eistr, Bytecount bytepos);
688 Convert a byte offset to a char offset.
689 Bytecount eiincpos (Eistring *eistr, Bytecount bytepos);
690 Increment the given position by one character.
691 Bytecount eiincpos_n (Eistring *eistr, Bytecount bytepos, Charcount n);
692 Increment the given position by N characters.
693 Bytecount eidecpos (Eistring *eistr, Bytecount bytepos);
694 Decrement the given position by one character.
695 Bytecount eidecpos_n (Eistring *eistr, Bytecount bytepos, Charcount n);
696 Deccrement the given position by N characters.
697
698
699 **********************************************
700 * Getting the character at a position *
701 **********************************************
702
703 Emchar eigetch (Eistring *eistr, Bytecount bytepos);
704 Return the character at a particular byte offset.
705 Emchar eigetch_char (Eistring *eistr, Charcount charpos);
706 Return the character at a particular character offset.
707
708
709 **********************************************
710 * Setting the character at a position *
711 **********************************************
712
713 Emchar eisetch (Eistring *eistr, Bytecount bytepos, Emchar chr);
714 Set the character at a particular byte offset.
715 Emchar eisetch_char (Eistring *eistr, Charcount charpos, Emchar chr);
716 Set the character at a particular character offset.
717
718
719 **********************************************
720 * Concatenation *
721 **********************************************
722
723 void eicat_* (Eistring *eistr, ...);
724 Concatenate onto the end of the Eistring, with data coming from the
725 same places as above:
726
727 void eicat_ei (Eistring *eistr, Eistring *eistr2);
728 ... from another Eistring.
729 void eicat_c (Eistring *eistr, Char_ASCII *c_string);
730 ... from an ASCII null-terminated string. Non-ASCII characters in
731 the string are *ILLEGAL* (read abort() with error-checking defined).
732 void eicat_raw (ei, const Intbyte *data, Bytecount len);
733 ... from raw internal-format data in the default internal format.
734 void eicat_rawz (ei, const Intbyte *data);
735 ... from raw internal-format data in the default internal format
736 that is "null-terminated" (the meaning of this depends on the nature
737 of the default internal format).
738 void eicat_lstr (ei, Lisp_Object lisp_string);
739 ... from a Lisp_Object string.
740 void eicat_ch (ei, Emchar ch);
741 ... from an Emchar.
742
743 (All except the first variety are convenience functions.
744 In the general case, create another Eistring from the source.)
745
746
747 **********************************************
748 * Replacement *
749 **********************************************
750
751 void eisub_* (Eistring *eistr, Bytecount off, Charcount charoff,
752 Bytecount len, Charcount charlen, ...);
753 Replace a section of the Eistring, specifically:
754
755 void eisub_ei (Eistring *eistr, Bytecount off, Charcount charoff,
756 Bytecount len, Charcount charlen, Eistring *eistr2);
757 ... with another Eistring.
758 void eisub_c (Eistring *eistr, Bytecount off, Charcount charoff,
759 Bytecount len, Charcount charlen, Char_ASCII *c_string);
760 ... with an ASCII null-terminated string. Non-ASCII characters in
761 the string are *ILLEGAL* (read abort() with error-checking defined).
762 void eisub_ch (Eistring *eistr, Bytecount off, Charcount charoff,
763 Bytecount len, Charcount charlen, Emchar ch);
764 ... with an Emchar.
765
766 void eidel (Eistring *eistr, Bytecount off, Charcount charoff,
767 Bytecount len, Charcount charlen);
768 Delete a section of the Eistring.
769
770
771 **********************************************
772 * Converting to an external format *
773 **********************************************
774
775 void eito_external (Eistring *eistr, Lisp_Object coding_system);
776 Convert the Eistring to an external format and store the result
777 in the string. NOTE: Further changes to the Eistring will *NOT*
778 change the external data stored in the string. You will have to
779 call eito_external() again in such a case if you want the external
780 data.
781
782 Extbyte *eiextdata (Eistring *eistr);
783 Return a pointer to the external data stored in the Eistring as
784 a result of a prior call to eito_external().
785
786 Bytecount eiextlen (Eistring *eistr);
787 Return the length in bytes of the external data stored in the
788 Eistring as a result of a prior call to eito_external().
789
790
791 **********************************************
792 * Searching in the Eistring for a character *
793 **********************************************
794
795 Bytecount eichr (Eistring *eistr, Emchar chr);
796 Charcount eichr_char (Eistring *eistr, Emchar chr);
797 Bytecount eichr_off (Eistring *eistr, Emchar chr, Bytecount off,
798 Charcount charoff);
799 Charcount eichr_off_char (Eistring *eistr, Emchar chr, Bytecount off,
800 Charcount charoff);
801 Bytecount eirchr (Eistring *eistr, Emchar chr);
802 Charcount eirchr_char (Eistring *eistr, Emchar chr);
803 Bytecount eirchr_off (Eistring *eistr, Emchar chr, Bytecount off,
804 Charcount charoff);
805 Charcount eirchr_off_char (Eistring *eistr, Emchar chr, Bytecount off,
806 Charcount charoff);
807
808
809 **********************************************
810 * Searching in the Eistring for a string *
811 **********************************************
812
813 Bytecount eistr_ei (Eistring *eistr, Eistring *eistr2);
814 Charcount eistr_ei_char (Eistring *eistr, Eistring *eistr2);
815 Bytecount eistr_ei_off (Eistring *eistr, Eistring *eistr2, Bytecount off,
816 Charcount charoff);
817 Charcount eistr_ei_off_char (Eistring *eistr, Eistring *eistr2,
818 Bytecount off, Charcount charoff);
819 Bytecount eirstr_ei (Eistring *eistr, Eistring *eistr2);
820 Charcount eirstr_ei_char (Eistring *eistr, Eistring *eistr2);
821 Bytecount eirstr_ei_off (Eistring *eistr, Eistring *eistr2, Bytecount off,
822 Charcount charoff);
823 Charcount eirstr_ei_off_char (Eistring *eistr, Eistring *eistr2,
824 Bytecount off, Charcount charoff);
825
826 Bytecount eistr_c (Eistring *eistr, Char_ASCII *c_string);
827 Charcount eistr_c_char (Eistring *eistr, Char_ASCII *c_string);
828 Bytecount eistr_c_off (Eistring *eistr, Char_ASCII *c_string, Bytecount off,
829 Charcount charoff);
830 Charcount eistr_c_off_char (Eistring *eistr, Char_ASCII *c_string,
831 Bytecount off, Charcount charoff);
832 Bytecount eirstr_c (Eistring *eistr, Char_ASCII *c_string);
833 Charcount eirstr_c_char (Eistring *eistr, Char_ASCII *c_string);
834 Bytecount eirstr_c_off (Eistring *eistr, Char_ASCII *c_string,
835 Bytecount off, Charcount charoff);
836 Charcount eirstr_c_off_char (Eistring *eistr, Char_ASCII *c_string,
837 Bytecount off, Charcount charoff);
838
839
840 **********************************************
841 * Comparison *
842 **********************************************
843
844 int eicmp_* (Eistring *eistr, ...);
845 int eicmp_off_* (Eistring *eistr, Bytecount off, Charcount charoff,
846 Bytecount len, Charcount charlen, ...);
847 int eicasecmp_* (Eistring *eistr, ...);
848 int eicasecmp_off_* (Eistring *eistr, Bytecount off, Charcount charoff,
849 Bytecount len, Charcount charlen, ...);
850 int eicasecmp_i18n_* (Eistring *eistr, ...);
851 int eicasecmp_i18n_off_* (Eistring *eistr, Bytecount off, Charcount charoff,
852 Bytecount len, Charcount charlen, ...);
853
854 Compare the Eistring with the other data. Return value same as
855 from strcmp. The `*' is either `ei' for another Eistring (in
856 which case `...' is an Eistring), or `c' for a pure-ASCII string
857 (in which case `...' is a pointer to that string). For anything
858 more complex, first create an Eistring out of the source.
859 Comparison is either simple (`eicmp_...'), ASCII case-folding
860 (`eicasecmp_...'), or multilingual case-folding
861 (`eicasecmp_i18n_...).
862
863
864 More specifically, the prototypes are:
865
866 int eicmp_ei (Eistring *eistr, Eistring *eistr2);
867 int eicmp_off_ei (Eistring *eistr, Bytecount off, Charcount charoff,
868 Bytecount len, Charcount charlen, Eistring *eistr2);
869 int eicasecmp_ei (Eistring *eistr, Eistring *eistr2);
870 int eicasecmp_off_ei (Eistring *eistr, Bytecount off, Charcount charoff,
871 Bytecount len, Charcount charlen, Eistring *eistr2);
872 int eicasecmp_i18n_ei (Eistring *eistr, Eistring *eistr2);
873 int eicasecmp_i18n_off_ei (Eistring *eistr, Bytecount off,
874 Charcount charoff, Bytecount len,
875 Charcount charlen, Eistring *eistr2);
876
877 int eicmp_c (Eistring *eistr, Char_ASCII *c_string);
878 int eicmp_off_c (Eistring *eistr, Bytecount off, Charcount charoff,
879 Bytecount len, Charcount charlen, Char_ASCII *c_string);
880 int eicasecmp_c (Eistring *eistr, Char_ASCII *c_string);
881 int eicasecmp_off_c (Eistring *eistr, Bytecount off, Charcount charoff,
882 Bytecount len, Charcount charlen,
883 Char_ASCII *c_string);
884 int eicasecmp_i18n_c (Eistring *eistr, Char_ASCII *c_string);
885 int eicasecmp_i18n_off_c (Eistring *eistr, Bytecount off, Charcount charoff,
886 Bytecount len, Charcount charlen,
887 Char_ASCII *c_string);
888
889
890 **********************************************
891 * Case-changing the Eistring *
892 **********************************************
893
894 void eilwr (Eistring *eistr);
895 Convert all characters in the Eistring to lowercase.
896 void eiupr (Eistring *eistr);
897 Convert all characters in the Eistring to uppercase.
898 */
899
900
901 /* Principles for writing Eistring functions:
902
903 (1) Unfortunately, we have to write most of the Eistring functions
904 as macros, because of the use of alloca(). The principle used
905 below to assure no conflict in local variables is to prefix all
906 local variables with "ei" plus a number, which should be unique
907 among macros. In practice, when finding a new number, find the
908 highest so far used, and add 1.
909
910 (2) We also suffix the Eistring fields with an _ to avoid problems
911 with macro parameters of the same name. (And as the standard
912 signal not to access these fields directly.)
913
914 (3) We maintain both the length in bytes and chars of the data in
915 the Eistring at all times, for convenient retrieval by outside
916 functions. That means when writing functions that manipulate
917 Eistrings, you too need to keep both lengths up to date for all
918 data that you work with.
919
920 (4) When writing a new type of operation (e.g. substitution), you
921 will often find yourself working with outside data, and thus
922 have a series of related API's, for different forms that the
923 outside data is in. Generally, you will want to choose a
924 subset of the forms supported by eicpy_*, which has to be
925 totally general because that's the fundamental way to get data
926 into an Eistring, and once the data is into the string, it
927 would be to create a whole series of Ei operations that work on
928 nothing but Eistrings. Although theoretically nice, in
929 practice it's a hassle, so we suggest that you provide
930 convenience functions. In particular, there are two paths you
931 can take. One is minimalist -- it only allows other Eistrings
932 and ASCII data, and Emchars if the particular operation makes
933 sense with a character. The other provides interfaces for the
934 most commonly-used forms -- Eistring, ASCII data, Lisp string,
935 raw internal-format string with length, raw internal-format
936 string without, and possibly Emchar. (In the function names,
937 these are designated `ei', `c', `lstr', `raw', `rawz', and
938 `ch', respectively.)
939
940 (5) When coding a new type of operation, such as was discussed in
941 previous section, the correct approach is to declare an worker
942 function that does the work of everything, and is called by the
943 other "container" macros that handle the different outside data
944 forms. The data coming into the worker function, which
945 typically ends in `_1', is in the form of three parameters:
946 DATA, LEN, CHARLEN. (See point [3] about having two lengths and
947 keeping them in sync.)
948
949 (6) Handling argument evaluation in macros: We take great care
950 never to evaluate any argument more than once in any macro,
951 except the initial Eistring parameter. This can and will be
952 evaluated multiple times, but it should pretty much always just
953 be a simple variable. This means, for example, that if an
954 Eistring is the second (not first) argument of a macro, it
955 doesn't fall under the "initial Eistring" exemption, so it
956 needs protection against multi-evaluation. (Take the address of
957 the Eistring structure, store in a temporary variable, and use
958 temporary variable for all access to the Eistring.
959 Essentially, we want it to appear as if these Eistring macros
960 are functions -- we would like to declare them as functions but
961 they use alloca(), so we can't (and we can't make them inline
962 functions either -- alloca() is explicitly disallowed in inline
963 functions.)
964
965 (7) Note that our rules regarding multiple evaluation are *more*
966 strict than the rules listed above under the heading "working
967 with raw internal-format data".
968 */
969
970
971 /* ----- Declaration ----- */
972
973 typedef struct
974 {
975 /* Data for the Eistring, stored in the default internal format.
976 Always includes terminating null. */
977 Intbyte *data_;
978 /* Total number of bytes allocated in DATA (including null). */
979 Bytecount max_size_allocated_;
980 Bytecount bytelen_;
981 Charcount charlen_;
982 int mallocp_;
983
984 Extbyte *extdata_;
985 Bytecount extlen_;
986 } Eistring;
987
988 typedef enum internal_format
989 {
990 FORMAT_DEFAULT,
991 FORMAT_FIXED_8,
992 FORMAT_FIXED_16,
993 FORMAT_FIXED_32
994 } Internal_Format;
995
996 extern Eistring the_eistring_zero_init, the_eistring_malloc_zero_init;
997
998 #define DECLARE_EISTRING(name) \
999 Eistring __ ## name ## __storage__ = the_eistring_zero_init; \
1000 Eistring *name = & __ ## name ## __storage__
1001 #define DECLARE_EISTRING_MALLOC(name) \
1002 Eistring __ ## name ## __storage__ = the_eistring_malloc_zero_init; \
1003 Eistring *name = & __ ## name ## __storage__
1004
1005 #define eiinit(ei) \
1006 do { \
1007 (ei) = the_eistring_zero_init; \
1008 } while (0)
1009
1010 #define eiinit_malloc(ei) \
1011 do { \
1012 (ei) = the_eistring_malloc_zero_init; \
1013 } while (0)
1014
1015
1016 /* ----- Utility ----- */
1017
1018 /* Make sure both LEN and CHARLEN are specified, in case one is given
1019 as -1. PTR evaluated at most once, others multiply. */
1020 #define eifixup_bytechar(ptr, len, charlen) \
1021 do { \
1022 if ((len) == -1) \
1023 (len) = charcount_to_bytecount (ptr, charlen); \
1024 else if ((charlen) == -1) \
1025 (charlen) = bytecount_to_charcount (ptr, len); \
1026 } while (0)
1027
1028 /* Make sure LEN is specified, in case it's is given as -1. PTR
1029 evaluated at most once, others multiply. */
1030 #define eifixup_byte(ptr, len, charlen) \
1031 do { \
1032 if ((len) == -1) \
1033 (len) = charcount_to_bytecount (ptr, charlen); \
1034 } while (0)
1035
1036 /* Make sure CHARLEN is specified, in case it's is given as -1. PTR
1037 evaluated at most once, others multiply. */
1038 #define eifixup_char(ptr, len, charlen) \
1039 do { \
1040 if ((charlen) == -1) \
1041 (charlen) = bytecount_to_charcount (ptr, len); \
1042 } while (0)
1043
1044
1045
1046 /* Make sure we can hold NEWBYTELEN bytes (which is NEWCHARLEN chars)
1047 plus a zero terminator. Preserve existing data as much as possible,
1048 including existing zero terminator. Put a new zero terminator where it
1049 should go if NEWZ if non-zero. All args but EI are evalled only once. */
1050
1051 #define EI_ALLOC(ei, newbytelen, newcharlen, newz) \
1052 do { \
1053 int ei1oldeibytelen = (ei)->bytelen_; \
1054 \
1055 (ei)->charlen_ = (newcharlen); \
1056 (ei)->bytelen_ = (newbytelen); \
1057 \
1058 if (ei1oldeibytelen != (ei)->bytelen_) \
1059 { \
1060 int ei1newsize = (ei)->max_size_allocated_; \
1061 while (ei1newsize < (ei)->bytelen_ + 1) \
1062 { \
1063 ei1newsize = (int) (ei1newsize * 1.5); \
1064 if (ei1newsize < 32) \
1065 ei1newsize = 32; \
1066 } \
1067 if (ei1newsize != (ei)->max_size_allocated_) \
1068 { \
1069 if ((ei)->mallocp_) \
1070 /* xrealloc always preserves existing data as much as possible */ \
1071 (ei)->data_ = (Intbyte *) xrealloc ((ei)->data_, ei1newsize); \
1072 else \
1073 { \
1074 /* We don't have realloc, so alloca() more space and copy the \
1075 data into it. */ \
1076 Intbyte *ei1oldeidata = (ei)->data_; \
1077 (ei)->data_ = (Intbyte *) alloca (ei1newsize); \
1078 if (ei1oldeidata) \
1079 memcpy ((ei)->data_, ei1oldeidata, ei1oldeibytelen + 1); \
1080 } \
1081 (ei)->max_size_allocated_ = ei1newsize; \
1082 } \
1083 if (newz) \
1084 (ei)->data_[(ei)->bytelen_] = '\0'; \
1085 } \
1086 } while (0)
1087
1088 #define EI_ALLOC_AND_COPY(ei, data, bytelen, charlen) \
1089 do { \
1090 EI_ALLOC (ei, bytelen, charlen, 1); \
1091 memcpy ((ei)->data_, data, (ei)->bytelen_); \
1092 } while (0)
1093
1094 #ifdef ERROR_CHECK_CHARBPOS
1095 #define EI_ASSERT_ASCII(ptr, len) \
1096 do { \
1097 int ei5; \
1098 const Char_ASCII *ei5ptr = (ptr); \
1099 int ei5len = (len); \
1100 \
1101 for (ei5 = 0; ei5 < ei5len; ei5++) \
1102 assert (ei5ptr[ei5] >= 0x00 && ei5ptr[ei5] < 0x7F); \
1103 } while (0)
1104 #define EI_ASSERT_ASCIIZ(ptr) \
1105 do { \
1106 const Char_ASCII *ei5p1 = (ptr); \
1107 EI_ASSERT_ASCII (ei5p1, strlen (ei5p1)); \
1108 } while (0)
1109 #else
1110 #define EI_ASSERT_ASCII(ptr, len)
1111 #define EI_ASSERT_ASCIIZ(ptr)
1112 #endif
1113
1114
1115 /* ----- Initialization ----- */
1116
1117 #define eicpy_ei(ei, eicpy) \
1118 do { \
1119 const Eistring *ei2 = (eicpy); \
1120 EI_ALLOC_AND_COPY (ei, ei2->data_, ei2->bytelen_, ei2->charlen_); \
1121 } while (0)
1122
1123 #define eicpy_lstr(ei, lisp_string) \
1124 do { \
1125 Lisp_Object ei3 = (lisp_string); \
1126 EI_ALLOC_AND_COPY (ei, XSTRING_DATA (ei3), XSTRING_LENGTH (ei3), \
1127 XSTRING_CHAR_LENGTH (ei3)); \
1128 } while (0)
1129
1130 #define eicpy_lstr_off(ei, lisp_string, off, charoff, len, charlen) \
1131 do { \
1132 Lisp_Object ei23lstr = (lisp_string); \
1133 int ei23off = (off); \
1134 int ei23charoff = (charoff); \
1135 int ei23len = (len); \
1136 int ei23charlen = (charlen); \
1137 const Intbyte *ei23data = XSTRING_DATA (ei23lstr); \
1138 \
1139 int ei23oldbytelen = (ei)->bytelen_; \
1140 \
1141 eifixup_byte (ei23data, ei23off, ei23charoff); \
1142 eifixup_bytechar (ei23data + ei23off, ei23len, ei23charlen); \
1143 \
1144 EI_ALLOC_AND_COPY (ei, ei23data + ei23off, ei23len, ei23charlen); \
1145 } while (0)
1146
1147 #define eicpy_raw_fmt(ei, ptr, len, fmt) \
1148 do { \
1149 const Intbyte *ei12ptr = (ptr); \
1150 Internal_Format ei12fmt = (fmt); \
1151 int ei12len = (len); \
1152 assert (ei12fmt == FORMAT_DEFAULT); \
1153 EI_ALLOC_AND_COPY (ei, ei12ptr, ei12len, \
1154 bytecount_to_charcount (ei12ptr, ei12len)); \
1155 } while (0)
1156
1157 #define eicpy_raw(ei, ptr, len) eicpy_raw_fmt (ei, ptr, len, FORMAT_DEFAULT)
1158
1159 #define eicpy_rawz_fmt(ei, ptr, fmt) \
1160 do { \
1161 const Intbyte *ei12p1ptr = (ptr); \
1162 Internal_Format ei12p1fmt = (fmt); \
1163 assert (ei12p1fmt == FORMAT_DEFAULT); \
1164 eicpy_raw_fmt (ei, ei12p1ptr, qxestrlen (ei12p1ptr), fmt); \
1165 } while (0)
1166
1167 #define eicpy_rawz(ei, ptr) eicpy_rawz_fmt (ei, ptr, FORMAT_DEFAULT)
1168
1169 #define eicpy_ch(ei, ch) \
1170 do { \
1171 Intbyte ei12p2[MAX_EMCHAR_LEN]; \
1172 Bytecount ei12p2len = set_charptr_emchar (ei12p2, ch); \
1173 EI_ALLOC_AND_COPY (ei, ei12p2, ei12p2len, 1); \
1174 } while (0)
1175
1176 #define eicpy_c(ei, c_string) \
1177 do { \
1178 const Char_ASCII *ei4 = (c_string); \
1179 \
1180 EI_ASSERT_ASCIIZ (ei4); \
1181 eicpy_ext (ei, ei4, Qbinary); \
1182 } while (0)
1183
1184 #define eicpy_c_len(ei, c_string, c_len) \
1185 do { \
1186 const Char_ASCII *ei6 = (c_string); \
1187 int ei6len = (c_len); \
1188 \
1189 EI_ASSERT_ASCII (ei6, ei6len); \
1190 eicpy_ext_len (ei, ei6, ei6len, Qbinary); \
1191 } while (0)
1192
1193 #define eicpy_ext_len(ei, extdata, extlen, coding_system) \
1194 do { \
1195 const Extbyte *ei7 = (extdata); \
1196 int ei7len = (extlen); \
1197 \
1198 TO_INTERNAL_FORMAT (DATA, (ei7, ei7len), \
1199 ALLOCA, ((ei)->data_, (ei)->bytelen_), \
1200 coding_system); \
1201 (ei)->max_size_allocated_ = (ei)->bytelen_ + 1; \
1202 (ei)->charlen_ = bytecount_to_charcount ((ei)->data_, (ei)->bytelen_); \
1203 } while (0)
1204
1205 #define eicpy_ext(ei, extdata, coding_system) \
1206 do { \
1207 const Extbyte *ei8 = (extdata); \
1208 \
1209 eicpy_ext_len (ei, ei8, dfc_external_data_len (ei8, coding_system), \
1210 coding_system); \
1211 } while (0)
1212
1213 #define eicpy_lbuf(eistr, lisp_buf, off, charoff, len, charlen) \
1214 NOT YET IMPLEMENTED
1215
1216 #define eicpy_lstream(eistr, lstream) \
1217 NOT YET IMPLEMENTED
1218
1219 #define eireset(eistr) eicpy_rawz (eistr, (Intbyte *) "")
1220
1221 /* ----- Getting the data out of the Eistring ----- */
1222
1223 #define eidata(ei) ((ei)->data_)
1224
1225 #define eimake_string(ei) make_string (eidata (ei), eilen (ei))
1226
1227 #define eimake_string_off(eistr, off, charoff, len, charlen) \
1228 do { \
1229 Lisp_Object ei24lstr; \
1230 int ei24off = (off); \
1231 int ei24charoff = (charoff); \
1232 int ei24len = (len); \
1233 int ei24charlen = (charlen); \
1234 \
1235 eifixup_byte ((eistr)->data_, ei24off, ei24charoff); \
1236 eifixup_byte ((eistr)->data_ + ei24off, ei24len, ei24charlen); \
1237 \
1238 return make_string ((eistr)->data_ + ei24off, ei24len); \
1239 } while (0)
1240
1241 #define eicpyout_alloca(eistr, ptrout, lenout) \
1242 eicpyout_alloca_fmt (eistr, ptrout, lenout, FORMAT_DEFAULT)
1243 #define eicpyout_malloc(eistr, lenout) \
1244 eicpyout_malloc_fmt (eistr, lenout, FORMAT_DEFAULT)
1245 Intbyte *eicpyout_malloc_fmt (Eistring *eistr, Bytecount *len_out,
1246 Internal_Format fmt);
1247 #define eicpyout_alloca_fmt(eistr, ptrout, lenout, fmt) \
1248 do { \
1249 Internal_Format ei23fmt = (fmt); \
1250 Intbyte *ei23ptrout = &(ptrout); \
1251 Bytecount *ei23lenout = &(lenout); \
1252 \
1253 assert (ei23fmt == FORMAT_DEFAULT); \
1254 \
1255 *ei23lenout = (eistr)->bytelen_; \
1256 *ei23ptrout = alloca_array (Intbyte, (eistr)->bytelen_ + 1); \
1257 memcpy (*ei23ptrout, (eistr)->data_, (eistr)->bytelen_ + 1); \
1258 } while (0)
1259
1260
1261 /* ----- Moving to the heap ----- */
1262
1263 #define eifree(ei) \
1264 do { \
1265 if ((ei)->mallocp_) \
1266 { \
1267 if ((ei)->data_) \
1268 xfree ((ei)->data_); \
1269 if ((ei)->extdata_) \
1270 xfree ((ei)->extdata_); \
1271 eiinit_malloc (ei); \
1272 } \
1273 else \
1274 eiinit (ei); \
1275 } while (0)
1276
1277 int eifind_large_enough_buffer (int oldbufsize, int needed_size);
1278 void eito_malloc_1 (Eistring *ei);
1279
1280 #define eito_malloc(ei) eito_malloc_1 (ei)
1281
1282 #define eito_alloca(ei) \
1283 do { \
1284 if (!(ei)->mallocp_) \
1285 return; \
1286 (ei)->mallocp_ = 0; \
1287 if ((ei)->data_) \
1288 { \
1289 Intbyte *ei13newdata; \
1290 \
1291 (ei)->max_size_allocated_ = \
1292 eifind_large_enough_buffer (0, (ei)->bytelen_ + 1); \
1293 ei13newdata = (Intbyte *) alloca ((ei)->max_size_allocated_); \
1294 memcpy (ei13newdata, (ei)->data_, (ei)->bytelen_ + 1); \
1295 xfree ((ei)->data_); \
1296 (ei)->data_ = ei13newdata; \
1297 } \
1298 \
1299 if ((ei)->extdata_) \
1300 { \
1301 Extbyte *ei13newdata = (Extbyte *) alloca ((ei)->extlen_ + 2); \
1302 \
1303 memcpy (ei13newdata, (ei)->extdata_, (ei)->extlen_); \
1304 /* Double null-terminate in case of Unicode data */ \
1305 ei13newdata[(ei)->extlen_] = '\0'; \
1306 ei13newdata[(ei)->extlen_ + 1] = '\0'; \
1307 xfree ((ei)->extdata_); \
1308 (ei)->extdata_ = ei13newdata; \
1309 } \
1310 } while (0)
1311
1312
1313 /* ----- Retrieving the length ----- */
1314
1315 #define eilen(ei) ((ei)->bytelen_)
1316 #define eicharlen(ei) ((ei)->charlen_)
1317
1318
1319 /* ----- Working with positions ----- */
1320
1321 #define eicharpos_to_bytepos(ei, charpos) \
1322 charcount_to_bytecount ((ei)->data_, charpos)
1323 #define eibytepos_to_charpos(ei, bytepos) \
1324 bytecount_to_charcount ((ei)->data_, bytepos)
1325
1326 DECLARE_INLINE_HEADER (Bytecount eiincpos_1 (Eistring *eistr,
1327 Bytecount bytepos,
1328 Charcount n))
1329 {
1330 Intbyte *pos = eistr->data_ + bytepos;
1331 int i;
1332
1333 charbpos_checking_assert (bytepos >= 0 && bytepos <= eistr->bytelen_);
1334 charbpos_checking_assert (n >= 0 && n <= eistr->charlen_);
1335 /* We could check N more correctly now, but that would require a
1336 call to bytecount_to_charcount(), which would be needlessly
1337 expensive (it would convert O(N) algorithms into O(N^2) algorithms
1338 with ERROR_CHECK_CHARBPOS, which would be bad). If N is bad, we are
1339 guaranteed to catch it either inside INC_CHARPTR() or in the check
1340 below. */
1341 for (i = 0; i < n; i++)
1342 INC_CHARPTR (pos);
1343 charbpos_checking_assert (pos - eistr->data_ <= eistr->bytelen_);
1344 return pos - eistr->data_;
1345 }
1346
1347 #define eiincpos (ei, bytepos) eiincpos_1 (ei, bytepos, 1)
1348 #define eiincpos_n (ei, bytepos, n) eiincpos_1 (ei, bytepos, n)
1349
1350 DECLARE_INLINE_HEADER (Bytecount eidecpos_1 (Eistring *eistr,
1351 Bytecount bytepos,
1352 Charcount n))
1353 {
1354 Intbyte *pos = eistr->data_ + bytepos;
1355 int i;
1356
1357 charbpos_checking_assert (bytepos >= 0 && bytepos <= eistr->bytelen_);
1358 charbpos_checking_assert (n >= 0 && n <= eistr->charlen_);
1359 /* We could check N more correctly now, but ... see above. */
1360 for (i = 0; i < n; i++)
1361 DEC_CHARPTR (pos);
1362 charbpos_checking_assert (pos - eistr->data_ <= eistr->bytelen_);
1363 return pos - eistr->data_;
1364 }
1365
1366 #define eidecpos (ei, bytepos) eidecpos_1 (ei, bytepos, 1)
1367 #define eidecpos_n (ei, bytepos, n) eidecpos_1 (ei, bytepos, n)
1368
1369
1370 /* ----- Getting the character at a position ----- */
1371
1372 #define eigetch(ei, bytepos) \
1373 charptr_emchar ((ei)->data_ + (bytepos))
1374 #define eigetch_char(ei, charpos) charptr_emchar_n ((ei)->data_, charpos)
1375
1376
1377 /* ----- Setting the character at a position ----- */
1378
1379 #define eisetch(ei, bytepos, chr) \
1380 eisub_ch (ei, bytepos, -1, -1, 1, chr)
1381 #define eisetch_char(ei, charpos, chr) \
1382 eisub_ch (ei, -1, charpos, -1, 1, chr)
1383
1384
1385 /* ----- Concatenation ----- */
1386
1387 #define eicat_1(ei, data, bytelen, charlen) \
1388 do { \
1389 int ei14oldeibytelen = (ei)->bytelen_; \
1390 int ei14bytelen = (bytelen); \
1391 EI_ALLOC (ei, (ei)->bytelen_ + ei14bytelen, \
1392 (ei)->charlen_ + (charlen), 1); \
1393 memcpy ((ei)->data_ + ei14oldeibytelen, (data), \
1394 ei14bytelen); \
1395 } while (0)
1396
1397 #define eicat_ei(ei, ei2) \
1398 do { \
1399 const Eistring *ei9 = (ei2); \
1400 eicat_1 (ei, ei9->data_, ei9->bytelen_, ei9->charlen_); \
1401 } while (0)
1402
1403 #define eicat_c(ei, c_string) \
1404 do { \
1405 const Char_ASCII *ei15 = (c_string); \
1406 int ei15len = strlen (ei15); \
1407 \
1408 EI_ASSERT_ASCII (ei15, ei15len); \
1409 eicat_1 (ei, ei15, ei15len, \
1410 bytecount_to_charcount ((Intbyte *) ei15, ei15len)); \
1411 } while (0)
1412
1413 #define eicat_raw(ei, data, len) \
1414 do { \
1415 int ei16len = (len); \
1416 const Intbyte *ei16data = (data); \
1417 eicat_1 (ei, ei16data, ei16len, \
1418 bytecount_to_charcount (ei16data, ei16len)); \
1419 } while (0)
1420
1421 #define eicat_rawz(ei, ptr) \
1422 do { \
1423 const Intbyte *ei16p5ptr = (ptr); \
1424 eicat_raw (ei, ei16p5ptr, qxestrlen (ei16p5ptr)); \
1425 } while (0)
1426
1427 #define eicat_lstr(ei, lisp_string) \
1428 do { \
1429 Lisp_Object ei17 = (lisp_string); \
1430 eicat_1 (ei, XSTRING_DATA (ei17), XSTRING_LENGTH (ei17), \
1431 XSTRING_CHAR_LENGTH (ei17)); \
1432 } while (0)
1433
1434 #define eicat_ch(ei, ch) \
1435 do { \
1436 Intbyte ei22ch[MAX_EMCHAR_LEN]; \
1437 Bytecount ei22len = set_charptr_emchar (ei22ch, ch); \
1438 eicat_1 (ei, ei22ch, ei22len, 1); \
1439 } while (0)
1440
1441
1442 /* ----- Replacement ----- */
1443
1444 /* Replace the section of an Eistring at (OFF, LEN) with the data at
1445 SRC of length LEN. All positions have corresponding character values,
1446 and either can be -1 -- it will be computed from the other. */
1447
1448 #define eisub_1(ei, off, charoff, len, charlen, src, srclen, srccharlen) \
1449 do { \
1450 int ei18off = (off); \
1451 int ei18charoff = (charoff); \
1452 int ei18len = (len); \
1453 int ei18charlen = (charlen); \
1454 Intbyte *ei18src = (Intbyte *) (src); \
1455 int ei18srclen = (srclen); \
1456 int ei18srccharlen = (srccharlen); \
1457 \
1458 int ei18oldeibytelen = (ei)->bytelen_; \
1459 \
1460 eifixup_bytechar ((ei)->data_, ei18off, ei18charoff); \
1461 eifixup_bytechar ((ei)->data_ + ei18off, ei18len, ei18charlen); \
1462 eifixup_bytechar (ei18src, ei18srclen, ei18srccharlen); \
1463 \
1464 EI_ALLOC (ei, (ei)->bytelen_ + ei18srclen - ei18len, \
1465 (ei)->charlen_ + ei18srccharlen - ei18charlen, 0); \
1466 if (ei18len != ei18srclen) \
1467 memmove ((ei)->data_ + ei18off + ei18srclen, \
1468 (ei)->data_ + ei18off + ei18len, \
1469 /* include zero terminator. */ \
1470 ei18oldeibytelen - (ei18off + ei18len) + 1); \
1471 if (ei18srclen > 0) \
1472 memcpy ((ei)->data_ + ei18off, ei18src, ei18srclen); \
1473 } while (0)
1474
1475 #define eisub_ei(ei, off, charoff, len, charlen, ei2) \
1476 do { \
1477 const Eistring *ei19 = (ei2); \
1478 eisub_1 (ei, off, charoff, len, charlen, ei19->data_, ei19->bytelen_, \
1479 ei19->charlen_); \
1480 } while (0)
1481
1482 #define eisub_c(ei, off, charoff, len, charlen, c_string) \
1483 do { \
1484 const Char_ASCII *ei20 = (c_string); \
1485 int ei20len = strlen (ei20); \
1486 EI_ASSERT_ASCII (ei20, ei20len); \
1487 eisub_1 (ei, off, charoff, len, charlen, ei20, ei20len, -1); \
1488 } while (0)
1489
1490 #define eisub_ch(ei, off, charoff, len, charlen, ch) \
1491 do { \
1492 Intbyte ei21ch[MAX_EMCHAR_LEN]; \
1493 Bytecount ei21len = set_charptr_emchar (ei21ch, ch); \
1494 eisub_1 (ei, off, charoff, len, charlen, ei21ch, ei21len, 1); \
1495 } while (0)
1496
1497 #define eidel(ei, off, charoff, len, charlen) \
1498 eisub_1(ei, off, charoff, len, charlen, NULL, 0, 0)
1499
1500
1501 /* ----- Converting to an external format ----- */
1502
1503 #define eito_external(ei, coding_system) \
1504 do { \
1505 if ((ei)->mallocp_) \
1506 { \
1507 if ((ei)->extdata_) \
1508 { \
1509 xfree ((ei)->extdata_); \
1510 (ei)->extdata_ = 0; \
1511 } \
1512 TO_EXTERNAL_FORMAT (DATA, ((ei)->data_, (ei)->bytelen_), \
1513 MALLOC, ((ei)->extdata_, (ei)->extlen_), \
1514 coding_system); \
1515 } \
1516 else \
1517 TO_EXTERNAL_FORMAT (DATA, ((ei)->data_, (ei)->bytelen_), \
1518 ALLOCA, ((ei)->extdata_, (ei)->extlen_), \
1519 coding_system); \
1520 } while (0)
1521
1522 #define eiextdata(ei) ((ei)->extdata_)
1523 #define eiextlen(ei) ((ei)->extlen_)
1524
1525
1526 /* ----- Searching in the Eistring for a character ----- */
1527
1528 #define eichr(eistr, chr) \
1529 NOT YET IMPLEMENTED
1530 #define eichr_char(eistr, chr) \
1531 NOT YET IMPLEMENTED
1532 #define eichr_off(eistr, chr, off, charoff) \
1533 NOT YET IMPLEMENTED
1534 #define eichr_off_char(eistr, chr, off, charoff) \
1535 NOT YET IMPLEMENTED
1536 #define eirchr(eistr, chr) \
1537 NOT YET IMPLEMENTED
1538 #define eirchr_char(eistr, chr) \
1539 NOT YET IMPLEMENTED
1540 #define eirchr_off(eistr, chr, off, charoff) \
1541 NOT YET IMPLEMENTED
1542 #define eirchr_off_char(eistr, chr, off, charoff) \
1543 NOT YET IMPLEMENTED
1544
1545
1546 /* ----- Searching in the Eistring for a string ----- */
1547
1548 #define eistr_ei(eistr, eistr2) \
1549 NOT YET IMPLEMENTED
1550 #define eistr_ei_char(eistr, eistr2) \
1551 NOT YET IMPLEMENTED
1552 #define eistr_ei_off(eistr, eistr2, off, charoff) \
1553 NOT YET IMPLEMENTED
1554 #define eistr_ei_off_char(eistr, eistr2, off, charoff) \
1555 NOT YET IMPLEMENTED
1556 #define eirstr_ei(eistr, eistr2) \
1557 NOT YET IMPLEMENTED
1558 #define eirstr_ei_char(eistr, eistr2) \
1559 NOT YET IMPLEMENTED
1560 #define eirstr_ei_off(eistr, eistr2, off, charoff) \
1561 NOT YET IMPLEMENTED
1562 #define eirstr_ei_off_char(eistr, eistr2, off, charoff) \
1563 NOT YET IMPLEMENTED
1564
1565 #define eistr_c(eistr, c_string) \
1566 NOT YET IMPLEMENTED
1567 #define eistr_c_char(eistr, c_string) \
1568 NOT YET IMPLEMENTED
1569 #define eistr_c_off(eistr, c_string, off, charoff) \
1570 NOT YET IMPLEMENTED
1571 #define eistr_c_off_char(eistr, c_string, off, charoff) \
1572 NOT YET IMPLEMENTED
1573 #define eirstr_c(eistr, c_string) \
1574 NOT YET IMPLEMENTED
1575 #define eirstr_c_char(eistr, c_string) \
1576 NOT YET IMPLEMENTED
1577 #define eirstr_c_off(eistr, c_string, off, charoff) \
1578 NOT YET IMPLEMENTED
1579 #define eirstr_c_off_char(eistr, c_string, off, charoff) \
1580 NOT YET IMPLEMENTED
1581
1582
1583 /* ----- Comparison ----- */
1584
1585 int eicmp_1 (Eistring *ei, Bytecount off, Charcount charoff,
1586 Bytecount len, Charcount charlen, const Intbyte *data,
1587 const Eistring *ei2, int is_c, int fold_case);
1588
1589 #define eicmp_ei(eistr, eistr2) \
1590 eicmp_1 (eistr, 0, -1, -1, -1, 0, eistr2, 0, 0)
1591 #define eicmp_off_ei(eistr, off, charoff, len, charlen, eistr2) \
1592 eicmp_1 (eistr, off, charoff, len, charlen, 0, eistr2, 0, 0)
1593 #define eicasecmp_ei(eistr, eistr2) \
1594 eicmp_1 (eistr, 0, -1, -1, -1, 0, eistr2, 0, 1)
1595 #define eicasecmp_off_ei(eistr, off, charoff, len, charlen, eistr2) \
1596 eicmp_1 (eistr, off, charoff, len, charlen, 0, eistr2, 0, 1)
1597 #define eicasecmp_i18n_ei(eistr, eistr2) \
1598 eicmp_1 (eistr, 0, -1, -1, -1, 0, eistr2, 0, 2)
1599 #define eicasecmp_i18n_off_ei(eistr, off, charoff, len, charlen, eistr2) \
1600 eicmp_1 (eistr, off, charoff, len, charlen, 0, eistr2, 0, 2)
1601
1602 #define eicmp_c(eistr, c_string) \
1603 eicmp_1 (eistr, 0, -1, -1, -1, c_string, 0, 1, 0)
1604 #define eicmp_off_c(eistr, off, charoff, len, charlen, c_string) \
1605 eicmp_1 (eistr, off, charoff, len, charlen, c_string, 0, 1, 0)
1606 #define eicasecmp_c(eistr, c_string) \
1607 eicmp_1 (eistr, 0, -1, -1, -1, c_string, 0, 1, 1)
1608 #define eicasecmp_off_c(eistr, off, charoff, len, charlen, c_string) \
1609 eicmp_1 (eistr, off, charoff, len, charlen, c_string, 0, 1, 1)
1610 #define eicasecmp_i18n_c(eistr, c_string) \
1611 eicmp_1 (eistr, 0, -1, -1, -1, c_string, 0, 1, 2)
1612 #define eicasecmp_i18n_off_c(eistr, off, charoff, len, charlen, c_string) \
1613 eicmp_1 (eistr, off, charoff, len, charlen, c_string, 0, 1, 2)
1614
1615
1616 /* ----- Case-changing the Eistring ----- */
1617
1618 int eistr_casefiddle_1 (Intbyte *olddata, Bytecount len, Intbyte *newdata,
1619 int downp);
1620
1621 #define EI_CASECHANGE(ei, downp) \
1622 do { \
1623 int ei11new_allocmax = (ei)->charlen_ * MAX_EMCHAR_LEN + 1; \
1624 Intbyte *ei11storage = (Intbyte *) alloca_array (Intbyte, \
1625 ei11new_allocmax); \
1626 int ei11newlen = eistr_casefiddle_1 ((ei)->data_, (ei)->bytelen_, \
1627 ei11storage, downp); \
1628 \
1629 if (ei11newlen) \
1630 { \
1631 (ei)->max_size_allocated_ = ei11new_allocmax; \
1632 (ei)->data_ = ei11storage; \
1633 (ei)->bytelen_ = ei11newlen; \
1634 /* charlen is the same. */ \
1635 } \
1636 } while (0)
1637
1638 #define eilwr(ei) EI_CASECHANGE (ei, 1)
1639 #define eiupr(ei) EI_CASECHANGE (ei, 0)
1640
1641
1642 /************************************************************************/
1643 /* */
1644 /* Converting between internal and external format */
1645 /* */
1646 /************************************************************************/
1647 /*
1648 All client code should use only the two macros
1649
1650 TO_EXTERNAL_FORMAT (source_type, source, sink_type, sink, coding_system)
1651 TO_INTERNAL_FORMAT (source_type, source, sink_type, sink, coding_system)
1652
1653 Typical use is
1654
1655 TO_EXTERNAL_FORMAT (DATA, (ptr, len),
1656 LISP_BUFFER, buffer,
1657 Qfile_name);
1658
1659 NOTE: GC is inhibited during the entire operation of these macros. This
1660 is because frequently the data to be converted comes from strings but
1661 gets passed in as just DATA, and GC may move around the string data. If
1662 we didn't inhibit GC, there'd have to be a lot of messy recoding,
1663 alloca-copying of strings and other annoying stuff.
1664
1665 The source or sink can be specified in one of these ways:
1666
1667 DATA, (ptr, len), // input data is a fixed buffer of size len
1668 ALLOCA, (ptr, len), // output data is in a alloca()ed buffer of size len
1669 MALLOC, (ptr, len), // output data is in a malloc()ed buffer of size len
1670 C_STRING_ALLOCA, ptr, // equivalent to ALLOCA (ptr, len_ignored) on output
1671 C_STRING_MALLOC, ptr, // equivalent to MALLOC (ptr, len_ignored) on output
1672 C_STRING, ptr, // equivalent to DATA, (ptr, strlen/wcslen (ptr))
1673 // on input (the Unicode version is used when correct)
1674 LISP_STRING, string, // input or output is a Lisp_Object of type string
1675 LISP_BUFFER, buffer, // output is written to (point) in lisp buffer
1676 LISP_LSTREAM, lstream, // input or output is a Lisp_Object of type lstream
1677 LISP_OPAQUE, object, // input or output is a Lisp_Object of type opaque
1678
1679 When specifying the sink, use lvalues, since the macro will assign to them,
1680 except when the sink is an lstream or a lisp buffer.
1681
1682 The macros accept the kinds of sources and sinks appropriate for
1683 internal and external data representation. See the type_checking_assert
1684 macros below for the actual allowed types.
1685
1686 Since some sources and sinks use one argument (a Lisp_Object) to
1687 specify them, while others take a (pointer, length) pair, we use
1688 some C preprocessor trickery to allow pair arguments to be specified
1689 by parenthesizing them, as in the examples above.
1690
1691 Anything prefixed by dfc_ (`data format conversion') is private.
1692 They are only used to implement these macros.
1693
1694 [[Using C_STRING* is appropriate for using with external APIs that
1695 take null-terminated strings. For internal data, we should try to
1696 be '\0'-clean - i.e. allow arbitrary data to contain embedded '\0'.
1697
1698 Sometime in the future we might allow output to C_STRING_ALLOCA or
1699 C_STRING_MALLOC _only_ with TO_EXTERNAL_FORMAT(), not
1700 TO_INTERNAL_FORMAT().]]
1701
1702 The above comments are not true. Frequently (most of the time, in
1703 fact), external strings come as zero-terminated entities, where the
1704 zero-termination is the only way to find out the length. Even in
1705 cases where you can get the length, most of the time the system will
1706 still use the null to signal the end of the string, and there will
1707 still be no way to either send in or receive a string with embedded
1708 nulls. In such situations, it's pointless to track the length
1709 because null bytes can never be in the string. We have a lot of
1710 operations that make it easy to operate on zero-terminated strings,
1711 and forcing the user the deal with the length everywhere would only
1712 make the code uglier and more complicated, for no gain. --ben
1713
1714 There is no problem using the same lvalue for source and sink.
1715
1716 Also, when pointers are required, the code (currently at least) is
1717 lax and allows any pointer types, either in the source or the sink.
1718 This makes it possible, e.g., to deal with internal format data held
1719 in char *'s or external format data held in WCHAR * (i.e. Unicode).
1720
1721 Finally, whenever storage allocation is called for, extra space is
1722 allocated for a terminating zero, and such a zero is stored in the
1723 appropriate place, regardless of whether the source data was
1724 specified using a length or was specified as zero-terminated. This
1725 allows you to freely pass the resulting data, no matter how
1726 obtained, to a routine that expects zero termination (modulo, of
1727 course, that any embedded zeros in the resulting text will cause
1728 truncation). In fact, currently two embedded zeros are allocated
1729 and stored after the data result. This is to allow for the
1730 possibility of storing a Unicode value on output, which needs the
1731 two zeros. Currently, however, the two zeros are stored regardless
1732 of whether the conversion is internal or external and regardless of
1733 whether the external coding system is in fact Unicode. This
1734 behavior may change in the future, and you cannot rely on this --
1735 the most you can rely on is that sink data in Unicode format will
1736 have two terminating nulls, which combine to form one Unicode null
1737 character. */
1738
1739 #define TO_EXTERNAL_FORMAT(source_type, source, sink_type, sink, codesys) \
1740 do { \
1741 dfc_conversion_type dfc_simplified_source_type; \
1742 dfc_conversion_type dfc_simplified_sink_type; \
1743 dfc_conversion_data dfc_source; \
1744 dfc_conversion_data dfc_sink; \
1745 Lisp_Object dfc_codesys = (codesys); \
1746 \
1747 type_checking_assert \
1748 ((DFC_TYPE_##source_type == DFC_TYPE_DATA || \
1749 DFC_TYPE_##source_type == DFC_TYPE_C_STRING || \
1750 DFC_TYPE_##source_type == DFC_TYPE_LISP_STRING || \
1751 DFC_TYPE_##source_type == DFC_TYPE_LISP_OPAQUE || \
1752 DFC_TYPE_##source_type == DFC_TYPE_LISP_LSTREAM) \
1753 && \
1754 (DFC_TYPE_##sink_type == DFC_TYPE_ALLOCA || \
1755 DFC_TYPE_##sink_type == DFC_TYPE_MALLOC || \
1756 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_ALLOCA || \
1757 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_MALLOC || \
1758 DFC_TYPE_##sink_type == DFC_TYPE_LISP_LSTREAM || \
1759 DFC_TYPE_##sink_type == DFC_TYPE_LISP_OPAQUE)); \
1760 \
1761 DFC_EXT_SOURCE_##source_type##_TO_ARGS (source, dfc_codesys); \
1762 DFC_SINK_##sink_type##_TO_ARGS (sink); \
1763 \
1764 dfc_convert_to_external_format (dfc_simplified_source_type, &dfc_source, \
1765 dfc_codesys, \
1766 dfc_simplified_sink_type, &dfc_sink); \
1767 \
1768 DFC_##sink_type##_USE_CONVERTED_DATA (sink); \
1769 } while (0)
1770
1771 #define TO_INTERNAL_FORMAT(source_type, source, sink_type, sink, codesys) \
1772 do { \
1773 dfc_conversion_type dfc_simplified_source_type; \
1774 dfc_conversion_type dfc_simplified_sink_type; \
1775 dfc_conversion_data dfc_source; \
1776 dfc_conversion_data dfc_sink; \
1777 Lisp_Object dfc_codesys = (codesys); \
1778 \
1779 type_checking_assert \
1780 ((DFC_TYPE_##source_type == DFC_TYPE_DATA || \
1781 DFC_TYPE_##source_type == DFC_TYPE_C_STRING || \
1782 DFC_TYPE_##source_type == DFC_TYPE_LISP_OPAQUE || \
1783 DFC_TYPE_##source_type == DFC_TYPE_LISP_LSTREAM) \
1784 && \
1785 (DFC_TYPE_##sink_type == DFC_TYPE_ALLOCA || \
1786 DFC_TYPE_##sink_type == DFC_TYPE_MALLOC || \
1787 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_ALLOCA || \
1788 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_MALLOC || \
1789 DFC_TYPE_##sink_type == DFC_TYPE_LISP_STRING || \
1790 DFC_TYPE_##sink_type == DFC_TYPE_LISP_LSTREAM || \
1791 DFC_TYPE_##sink_type == DFC_TYPE_LISP_BUFFER)); \
1792 \
1793 DFC_INT_SOURCE_##source_type##_TO_ARGS (source, dfc_codesys); \
1794 DFC_SINK_##sink_type##_TO_ARGS (sink); \
1795 \
1796 dfc_convert_to_internal_format (dfc_simplified_source_type, &dfc_source, \
1797 dfc_codesys, \
1798 dfc_simplified_sink_type, &dfc_sink); \
1799 \
1800 DFC_##sink_type##_USE_CONVERTED_DATA (sink); \
1801 } while (0)
1802
1803
1804 typedef union
1805 {
1806 struct { const void *ptr; Bytecount len; } data;
1807 Lisp_Object lisp_object;
1808 } dfc_conversion_data;
1809
1810 enum dfc_conversion_type
1811 {
1812 DFC_TYPE_DATA,
1813 DFC_TYPE_ALLOCA,
1814 DFC_TYPE_MALLOC,
1815 DFC_TYPE_C_STRING,
1816 DFC_TYPE_C_STRING_ALLOCA,
1817 DFC_TYPE_C_STRING_MALLOC,
1818 DFC_TYPE_LISP_STRING,
1819 DFC_TYPE_LISP_LSTREAM,
1820 DFC_TYPE_LISP_OPAQUE,
1821 DFC_TYPE_LISP_BUFFER
1822 };
1823 typedef enum dfc_conversion_type dfc_conversion_type;
1824
1825 /* WARNING: These use a static buffer. This can lead to disaster if
1826 these functions are not used *very* carefully. Another reason to only use
1827 TO_EXTERNAL_FORMAT() and TO_INTERNAL_FORMAT(). */
1828 void
1829 dfc_convert_to_external_format (dfc_conversion_type source_type,
1830 dfc_conversion_data *source,
1831 Lisp_Object coding_system,
1832 dfc_conversion_type sink_type,
1833 dfc_conversion_data *sink);
1834 void
1835 dfc_convert_to_internal_format (dfc_conversion_type source_type,
1836 dfc_conversion_data *source,
1837 Lisp_Object coding_system,
1838 dfc_conversion_type sink_type,
1839 dfc_conversion_data *sink);
1840 /* CPP Trickery */
1841 #define DFC_CPP_CAR(x,y) (x)
1842 #define DFC_CPP_CDR(x,y) (y)
1843
1844 /* Convert `source' to args for dfc_convert_to_external_format() */
1845 #define DFC_EXT_SOURCE_DATA_TO_ARGS(val, codesys) do { \
1846 dfc_source.data.ptr = DFC_CPP_CAR val; \
1847 dfc_source.data.len = DFC_CPP_CDR val; \
1848 dfc_simplified_source_type = DFC_TYPE_DATA; \
1849 } while (0)
1850 #define DFC_EXT_SOURCE_C_STRING_TO_ARGS(val, codesys) do { \
1851 dfc_source.data.len = \
1852 strlen ((char *) (dfc_source.data.ptr = (val))); \
1853 dfc_simplified_source_type = DFC_TYPE_DATA; \
1854 } while (0)
1855 #define DFC_EXT_SOURCE_LISP_STRING_TO_ARGS(val, codesys) do { \
1856 Lisp_Object dfc_slsta = (val); \
1857 type_checking_assert (STRINGP (dfc_slsta)); \
1858 dfc_source.lisp_object = dfc_slsta; \
1859 dfc_simplified_source_type = DFC_TYPE_LISP_STRING; \
1860 } while (0)
1861 #define DFC_EXT_SOURCE_LISP_LSTREAM_TO_ARGS(val, codesys) do { \
1862 Lisp_Object dfc_sllta = (val); \
1863 type_checking_assert (LSTREAMP (dfc_sllta)); \
1864 dfc_source.lisp_object = dfc_sllta; \
1865 dfc_simplified_source_type = DFC_TYPE_LISP_LSTREAM; \
1866 } while (0)
1867 #define DFC_EXT_SOURCE_LISP_OPAQUE_TO_ARGS(val, codesys) do { \
1868 Lisp_Opaque *dfc_slota = XOPAQUE (val); \
1869 dfc_source.data.ptr = OPAQUE_DATA (dfc_slota); \
1870 dfc_source.data.len = OPAQUE_SIZE (dfc_slota); \
1871 dfc_simplified_source_type = DFC_TYPE_DATA; \
1872 } while (0)
1873
1874 /* Convert `source' to args for dfc_convert_to_internal_format() */
1875 #define DFC_INT_SOURCE_DATA_TO_ARGS(val, codesys) \
1876 DFC_EXT_SOURCE_DATA_TO_ARGS (val, codesys)
1877 #define DFC_INT_SOURCE_C_STRING_TO_ARGS(val, codesys) do { \
1878 dfc_source.data.len = dfc_external_data_len (dfc_source.data.ptr = (val), \
1879 codesys); \
1880 dfc_simplified_source_type = DFC_TYPE_DATA; \
1881 } while (0)
1882 #define DFC_INT_SOURCE_LISP_STRING_TO_ARGS(val, codesys) \
1883 DFC_EXT_SOURCE_LISP_STRING_TO_ARGS (val, codesys)
1884 #define DFC_INT_SOURCE_LISP_LSTREAM_TO_ARGS(val, codesys) \
1885 DFC_EXT_SOURCE_LISP_LSTREAM_TO_ARGS (val, codesys)
1886 #define DFC_INT_SOURCE_LISP_OPAQUE_TO_ARGS(val, codesys) \
1887 DFC_EXT_SOURCE_LISP_OPAQUE_TO_ARGS (val, codesys)
1888
1889 /* Convert `sink' to args for dfc_convert_to_*_format() */
1890 #define DFC_SINK_ALLOCA_TO_ARGS(val) \
1891 dfc_simplified_sink_type = DFC_TYPE_DATA
1892 #define DFC_SINK_C_STRING_ALLOCA_TO_ARGS(val) \
1893 dfc_simplified_sink_type = DFC_TYPE_DATA
1894 #define DFC_SINK_MALLOC_TO_ARGS(val) \
1895 dfc_simplified_sink_type = DFC_TYPE_DATA
1896 #define DFC_SINK_C_STRING_MALLOC_TO_ARGS(val) \
1897 dfc_simplified_sink_type = DFC_TYPE_DATA
1898 #define DFC_SINK_LISP_STRING_TO_ARGS(val) \
1899 dfc_simplified_sink_type = DFC_TYPE_DATA
1900 #define DFC_SINK_LISP_OPAQUE_TO_ARGS(val) \
1901 dfc_simplified_sink_type = DFC_TYPE_DATA
1902 #define DFC_SINK_LISP_LSTREAM_TO_ARGS(val) do { \
1903 Lisp_Object dfc_sllta = (val); \
1904 type_checking_assert (LSTREAMP (dfc_sllta)); \
1905 dfc_sink.lisp_object = dfc_sllta; \
1906 dfc_simplified_sink_type = DFC_TYPE_LISP_LSTREAM; \
1907 } while (0)
1908 #define DFC_SINK_LISP_BUFFER_TO_ARGS(val) do { \
1909 struct buffer *dfc_slbta = XBUFFER (val); \
1910 dfc_sink.lisp_object = \
1911 make_lisp_buffer_output_stream \
1912 (dfc_slbta, BUF_PT (dfc_slbta), 0); \
1913 dfc_simplified_sink_type = DFC_TYPE_LISP_LSTREAM; \
1914 } while (0)
1915
1916 /* Assign to the `sink' lvalue(s) using the converted data. */
1917 /* + 2 because we double zero-extended to account for Unicode conversion */
1918 typedef union { char c; void *p; } *dfc_aliasing_voidpp;
1919 #define DFC_ALLOCA_USE_CONVERTED_DATA(sink) do { \
1920 void * dfc_sink_ret = alloca (dfc_sink.data.len + 2); \
1921 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2); \
1922 ((dfc_aliasing_voidpp) &(DFC_CPP_CAR sink))->p = dfc_sink_ret; \
1923 (DFC_CPP_CDR sink) = dfc_sink.data.len; \
1924 } while (0)
1925 #define DFC_MALLOC_USE_CONVERTED_DATA(sink) do { \
1926 void * dfc_sink_ret = xmalloc (dfc_sink.data.len + 2); \
1927 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2); \
1928 ((dfc_aliasing_voidpp) &(DFC_CPP_CAR sink))->p = dfc_sink_ret; \
1929 (DFC_CPP_CDR sink) = dfc_sink.data.len; \
1930 } while (0)
1931 #define DFC_C_STRING_ALLOCA_USE_CONVERTED_DATA(sink) do { \
1932 void * dfc_sink_ret = alloca (dfc_sink.data.len + 2); \
1933 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2); \
1934 ((dfc_aliasing_voidpp) &(sink))->p = dfc_sink_ret; \
1935 } while (0)
1936 #define DFC_C_STRING_MALLOC_USE_CONVERTED_DATA(sink) do { \
1937 void * dfc_sink_ret = xmalloc (dfc_sink.data.len + 2); \
1938 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2); \
1939 ((dfc_aliasing_voidpp) &(sink))->p = dfc_sink_ret; \
1940 } while (0)
1941 #define DFC_LISP_STRING_USE_CONVERTED_DATA(sink) \
1942 sink = make_string ((Intbyte *) dfc_sink.data.ptr, dfc_sink.data.len)
1943 #define DFC_LISP_OPAQUE_USE_CONVERTED_DATA(sink) \
1944 sink = make_opaque (dfc_sink.data.ptr, dfc_sink.data.len)
1945 #define DFC_LISP_LSTREAM_USE_CONVERTED_DATA(sink) /* data already used */
1946 #define DFC_LISP_BUFFER_USE_CONVERTED_DATA(sink) \
1947 Lstream_delete (XLSTREAM (dfc_sink.lisp_object))
1948
1949 /* Convenience macros for extremely common invocations */
1950 #define C_STRING_TO_EXTERNAL(in, out, coding_system) \
1951 TO_EXTERNAL_FORMAT (C_STRING, in, C_STRING_ALLOCA, out, coding_system)
1952 #define C_STRING_TO_EXTERNAL_MALLOC(in, out, coding_system) \
1953 TO_EXTERNAL_FORMAT (C_STRING, in, C_STRING_MALLOC, out, coding_system)
1954 #define EXTERNAL_TO_C_STRING(in, out, coding_system) \
1955 TO_INTERNAL_FORMAT (C_STRING, in, C_STRING_ALLOCA, out, coding_system)
1956 #define EXTERNAL_TO_C_STRING_MALLOC(in, out, coding_system) \
1957 TO_INTERNAL_FORMAT (C_STRING, in, C_STRING_MALLOC, out, coding_system)
1958 #define LISP_STRING_TO_EXTERNAL(in, out, coding_system) \
1959 TO_EXTERNAL_FORMAT (LISP_STRING, in, C_STRING_ALLOCA, out, coding_system)
1960 #define LISP_STRING_TO_EXTERNAL_MALLOC(in, out, coding_system) \
1961 TO_EXTERNAL_FORMAT (LISP_STRING, in, C_STRING_MALLOC, out, coding_system)
1962
1963 /* Standins for various encodings, until we know them better */
1964 #define Qcommand_argument_encoding Qnative
1965 #define Qenvironment_variable_encoding Qnative
1966 #define Qunix_host_name_encoding Qnative
1967 #define Qunix_service_name_encoding Qnative
1968 #define Qmswindows_host_name_encoding Qmswindows_multibyte
1969 #define Qmswindows_service_name_encoding Qmswindows_multibyte
1970
1971 /* Standins for various X encodings, until we know them better */
1972
1973 /* !!#### Need to verify the encoding used in lwlib -- Qnative or Qctext?
1974 Almost certainly the former. Use a standin for now. */
1975 #define Qlwlib_encoding Qnative
1976
1977 #define Qx_atom_name_encoding Qctext
1978 /* font names are often stored in atoms, so it gets sticky if we set this
1979 to something different from atom-name encoding */
1980 #define Qx_font_name_encoding Qctext
1981
1982 #define Qx_color_name_encoding Qctext
1983
1984 /* the following probably must agree with Qcommand_argument_encoding and
1985 Qenvironment_variable_encoding */
1986 #define Qx_display_name_encoding Qnative
1987
1988 #define Qstrerror_encoding Qnative
1989
1990 #define GET_STRERROR(var, num) \
1991 do { \
1992 int __gsnum__ = (num); \
1993 Extbyte * __gserr__ = strerror (__gsnum__); \
1994 \
1995 if (!__gserr__) \
1996 { \
1997 var = alloca_intbytes (99); \
1998 qxesprintf (var, "Unknown error %d", __gsnum__); \
1999 } \
2000 else \
2001 EXTERNAL_TO_C_STRING (__gserr__, var, Qstrerror_encoding); \
2002 } while (0)
2003
2004 /************************************************************************/
2005 /* Lisp string representation convenience functions */
2006 /************************************************************************/
2007
2008 /* Because the representation of internally formatted data is subject
2009 to change, it's bad style to do something like
2010
2011 strcmp (XSTRING_DATA (s), "foo")
2012
2013 Instead, use the portable:
2014
2015 intbyte_strcmp (XSTRING_DATA (s), "foo") or
2016 intbyte_memcmp (XSTRING_DATA (s), "foo", 3)
2017
2018 */
2019
2020 /* Like strcmp, except first arg points at internally formatted data,
2021 while the second points at a string of only ASCII chars. */
2022 DECLARE_INLINE_HEADER (
2023 int
2024 intbyte_strcmp (const Intbyte *bp, const char *ascii_string)
2025 )
2026 {
2027 #ifdef MULE
2028 while (1)
2029 {
2030 int diff;
2031 type_checking_assert (BYTE_ASCII_P (*ascii_string));
2032 if ((diff = charptr_emchar (bp) - *(Intbyte *) ascii_string) != 0)
2033 return diff;
2034 if (*ascii_string == '\0')
2035 return 0;
2036 ascii_string++;
2037 INC_CHARPTR (bp);
2038 }
2039 #else
2040 return strcmp ((char *)bp, ascii_string);
2041 #endif
2042 }
2043
2044 /* Like memcmp, except first arg points at internally formatted data,
2045 while the second points at a string of only ASCII chars. */
2046
2047 DECLARE_INLINE_HEADER (
2048 int
2049 intbyte_memcmp (const Intbyte *bp, const char *ascii_string, Bytecount len)
2050 )
2051 {
2052 #ifdef MULE
2053 while (len--)
2054 {
2055 int diff = charptr_emchar (bp) - *(Intbyte *) ascii_string;
2056 type_checking_assert (BYTE_ASCII_P (*ascii_string));
2057 if (diff != 0)
2058 return diff;
2059 ascii_string++;
2060 INC_CHARPTR (bp);
2061 }
2062 return 0;
2063 #else
2064 return memcmp (bp, ascii_string, len);
2065 #endif
2066 }
2067
2068 #endif /* INCLUDED_text_h_ */