Mercurial > hg > xemacs-beta
comparison src/text.h @ 771:943eaba38521
[xemacs-hg @ 2002-03-13 08:51:24 by ben]
The big ben-mule-21-5 check-in!
Various files were added and deleted. See CHANGES-ben-mule.
There are still some test suite failures. No crashes, though.
Many of the failures have to do with problems in the test suite itself
rather than in the actual code. I'll be addressing these in the next
day or so -- none of the test suite failures are at all critical.
Meanwhile I'll be trying to address the biggest issues -- i.e. build
or run failures, which will almost certainly happen on various platforms.
All comments should be sent to ben@xemacs.org -- use a Cc: if necessary
when sending to mailing lists. There will be pre- and post- tags,
something like
pre-ben-mule-21-5-merge-in, and
post-ben-mule-21-5-merge-in.
author | ben |
---|---|
date | Wed, 13 Mar 2002 08:54:06 +0000 |
parents | |
children | 026c5bf9c134 |
comparison
equal
deleted
inserted
replaced
770:336a418893b5 | 771:943eaba38521 |
---|---|
1 /* Header file for text manipulation primitives and macros. | |
2 Copyright (C) 1985-1995 Free Software Foundation, Inc. | |
3 Copyright (C) 1995 Sun Microsystems, Inc. | |
4 Copyright (C) 2000, 2001 Ben Wing. | |
5 | |
6 This file is part of XEmacs. | |
7 | |
8 XEmacs is free software; you can redistribute it and/or modify it | |
9 under the terms of the GNU General Public License as published by the | |
10 Free Software Foundation; either version 2, or (at your option) any | |
11 later version. | |
12 | |
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT | |
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 for more details. | |
17 | |
18 You should have received a copy of the GNU General Public License | |
19 along with XEmacs; see the file COPYING. If not, write to | |
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
21 Boston, MA 02111-1307, USA. */ | |
22 | |
23 /* Synched up with: FSF 19.30. */ | |
24 | |
25 /* Authorship: | |
26 | |
27 Mostly written by Ben Wing, starting around 1995. | |
28 Current TO_IN/EXTERNAL_FORMAT macros written by Martin Buchholz, | |
29 designed by Ben Wing based on earlier macros by Ben Wing. | |
30 Separated out June 18, 2000 from buffer.h into text.h. | |
31 */ | |
32 | |
33 #ifndef INCLUDED_text_h_ | |
34 #define INCLUDED_text_h_ | |
35 | |
36 #include <wchar.h> | |
37 | |
38 /* ---------------------------------------------------------------------- */ | |
39 /* Super-basic character properties */ | |
40 /* ---------------------------------------------------------------------- */ | |
41 | |
42 /* These properties define the specifics of how our current encoding fits | |
43 in the basic model used for the encoding. Because this model is the same | |
44 as is used for UTF-8, all these properties could be defined for it, too. | |
45 This would instantly make the rest of this file work with UTF-8 (with | |
46 the exception of a few called functions that would need to be redefined). | |
47 | |
48 (UTF-2000 implementers, take note!) | |
49 */ | |
50 | |
51 /* If you want more than this, you need to include charset.h */ | |
52 | |
53 #ifndef MULE | |
54 | |
55 #define REP_BYTES_BY_FIRST_BYTE(fb) 1 | |
56 #define BYTE_ASCII_P(byte) 1 | |
57 # define MAX_EMCHAR_LEN 1 | |
58 | |
59 #else /* MULE */ | |
60 | |
61 /* These are carefully designed to work if BYTE is signed or unsigned. */ | |
62 /* Note that SPC and DEL are considered ASCII, not control. */ | |
63 | |
64 #define BYTE_ASCII_P(byte) (((byte) & ~0x7f) == 0) | |
65 #define BYTE_C0_P(byte) (((byte) & ~0x1f) == 0) | |
66 #define BYTE_C1_P(byte) (((byte) & ~0x1f) == 0x80) | |
67 | |
68 /* Does BYTE represent the first byte of a character? */ | |
69 | |
70 #define INTBYTE_FIRST_BYTE_P(byte) ((byte) < 0xA0) | |
71 | |
72 /* Does BYTE represent the first byte of a multi-byte character? */ | |
73 | |
74 #define INTBYTE_LEADING_BYTE_P(byte) BYTE_C1_P (byte) | |
75 | |
76 /* Table of number of bytes in the string representation of a character | |
77 indexed by the first byte of that representation. | |
78 | |
79 This value can be derived in other ways -- e.g. something like | |
80 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (first_byte)) | |
81 but it's faster this way. */ | |
82 extern const Bytecount rep_bytes_by_first_byte[0xA0]; | |
83 | |
84 /* Number of bytes in the string representation of a character. */ | |
85 INLINE_HEADER int REP_BYTES_BY_FIRST_BYTE (int fb); | |
86 INLINE_HEADER int | |
87 REP_BYTES_BY_FIRST_BYTE (int fb) | |
88 { | |
89 type_checking_assert (fb < 0xA0); | |
90 return rep_bytes_by_first_byte[fb]; | |
91 } | |
92 | |
93 /* Is this character represented by more than one byte in a string? */ | |
94 | |
95 #define CHAR_MULTIBYTE_P(c) ((c) >= 0x80) | |
96 | |
97 #define CHAR_ASCII_P(c) (!CHAR_MULTIBYTE_P (c)) | |
98 | |
99 #define MAX_EMCHAR_LEN 4 | |
100 | |
101 #endif /* MULE */ | |
102 | |
103 int dfc_coding_system_is_unicode (Lisp_Object coding_system); | |
104 | |
105 DECLARE_INLINE_HEADER ( | |
106 Bytecount dfc_external_data_len (const void *ptr, Lisp_Object codesys) | |
107 ) | |
108 { | |
109 if (dfc_coding_system_is_unicode (codesys)) | |
110 return sizeof (wchar_t) * wcslen ((wchar_t *) ptr); | |
111 else | |
112 return strlen ((char *) ptr); | |
113 } | |
114 | |
115 | |
116 /************************************************************************/ | |
117 /* */ | |
118 /* working with raw internal-format data */ | |
119 /* */ | |
120 /************************************************************************/ | |
121 | |
122 /* NOTE: In all the following macros, we follow these rules concerning | |
123 multiple evaluation of the arguments: | |
124 | |
125 1) Anything that's an lvalue can be evaluated more than once. | |
126 2) Anything that's a Lisp Object can be evaluated more than once. | |
127 This should probably be changed, but this follows the way | |
128 that all the macros in lisp.h do things. | |
129 3) 'struct buffer *' arguments can be evaluated more than once. | |
130 4) Nothing else can be evaluated more than once. Use inline | |
131 functions, if necessary, to prevent multiple evaluation. | |
132 5) An exception to (4) is that there are some macros below that | |
133 may evaluate their arguments more than once. They are all | |
134 denoted with the word "unsafe" in their name and are generally | |
135 meant to be called only by other macros that have already | |
136 stored the calling values in temporary variables. | |
137 | |
138 | |
139 Use the following functions/macros on contiguous strings of data. | |
140 If the text you're operating on is known to come from a buffer, use | |
141 the buffer-level functions below -- they know about the gap and may | |
142 be more efficient. | |
143 | |
144 | |
145 ---------------------------------------------------------------------------- | |
146 (A) For working with charptr's (pointers to internally-formatted text): | |
147 ---------------------------------------------------------------------------- | |
148 | |
149 VALID_CHARPTR_P (ptr): | |
150 Given a charptr, does it point to the beginning of a character? | |
151 | |
152 ASSERT_VALID_CHARPTR (ptr): | |
153 If error-checking is enabled, assert that the given charptr | |
154 points to the beginning of a character. Otherwise, do nothing. | |
155 | |
156 INC_CHARPTR (ptr): | |
157 Given a charptr (assumed to point at the beginning of a character), | |
158 modify that pointer so it points to the beginning of the next | |
159 character. | |
160 | |
161 DEC_CHARPTR (ptr): | |
162 Given a charptr (assumed to point at the beginning of a | |
163 character or at the very end of the text), modify that pointer | |
164 so it points to the beginning of the previous character. | |
165 | |
166 VALIDATE_CHARPTR_BACKWARD (ptr): | |
167 Make sure that PTR is pointing to the beginning of a character. | |
168 If not, back up until this is the case. Note that there are not | |
169 too many places where it is legitimate to do this sort of thing. | |
170 It's an error if you're passed an "invalid" char * pointer. | |
171 NOTE: PTR *must* be pointing to a valid part of the string (i.e. | |
172 not the very end, unless the string is zero-terminated or | |
173 something) in order for this function to not cause crashes. | |
174 | |
175 VALIDATE_CHARPTR_FORWARD (ptr): | |
176 Make sure that PTR is pointing to the beginning of a character. | |
177 If not, move forward until this is the case. Note that there | |
178 are not too many places where it is legitimate to do this sort | |
179 of thing. It's an error if you're passed an "invalid" char * | |
180 pointer. | |
181 | |
182 --------------------------------------------------------------------- | |
183 (B) For working with the length (in bytes and characters) of a | |
184 section of internally-formatted text: | |
185 --------------------------------------------------------------------- | |
186 | |
187 bytecount_to_charcount (ptr, nbi): | |
188 Given a pointer to a text string and a length in bytes, | |
189 return the equivalent length in characters. | |
190 | |
191 charcount_to_bytecount (ptr, nch): | |
192 Given a pointer to a text string and a length in characters, | |
193 return the equivalent length in bytes. | |
194 | |
195 charptr_n_addr (ptr, n): | |
196 Return a pointer to the beginning of the character offset N | |
197 (in characters) from PTR. | |
198 | |
199 ------------------------------------------------------------------------- | |
200 (C) For retrieving or changing the character pointed to by a charptr: | |
201 ------------------------------------------------------------------------- | |
202 | |
203 charptr_emchar (ptr): | |
204 Retrieve the character pointed to by PTR as an Emchar. | |
205 | |
206 charptr_emchar_n (ptr, n): | |
207 Retrieve the character at offset N (in characters) from PTR, | |
208 as an Emchar. | |
209 | |
210 set_charptr_emchar (ptr, ch): | |
211 Store the character CH (an Emchar) as internally-formatted | |
212 text starting at PTR. Return the number of bytes stored. | |
213 | |
214 charptr_copy_char (src, dst): | |
215 Retrieve the character pointed to by SRC and store it as | |
216 internally-formatted text in DST. | |
217 | |
218 ---------------------------------- | |
219 (D) For working with Emchars: | |
220 ---------------------------------- | |
221 | |
222 [Note that there are other functions/macros for working with Emchars | |
223 in charset.h, for retrieving the charset of an Emchar and such.] | |
224 | |
225 valid_char_p (ch): | |
226 Return whether the given Emchar is valid. | |
227 | |
228 CHARP (ch): | |
229 Return whether the given Lisp_Object is a character. | |
230 | |
231 CHECK_CHAR_COERCE_INT (ch): | |
232 Signal an error if CH is not a valid character or integer Lisp_Object. | |
233 If CH is an integer Lisp_Object, convert it to a character Lisp_Object, | |
234 but merely by repackaging, without performing tests for char validity. | |
235 | |
236 MAX_EMCHAR_LEN: | |
237 Maximum number of buffer bytes per Emacs character. | |
238 */ | |
239 | |
240 /* ---------------------------------------------------------------------- */ | |
241 /* (A) For working with charptr's (pointers to internally-formatted text) */ | |
242 /* ---------------------------------------------------------------------- */ | |
243 | |
244 #ifdef MULE | |
245 # define VALID_CHARPTR_P(ptr) INTBYTE_FIRST_BYTE_P (* (unsigned char *) ptr) | |
246 #else | |
247 # define VALID_CHARPTR_P(ptr) 1 | |
248 #endif | |
249 | |
250 #ifdef ERROR_CHECK_CHARBPOS | |
251 # define ASSERT_VALID_CHARPTR(ptr) assert (VALID_CHARPTR_P (ptr)) | |
252 #else | |
253 # define ASSERT_VALID_CHARPTR(ptr) | |
254 #endif | |
255 | |
256 /* Note that INC_CHARPTR() and DEC_CHARPTR() have to be written in | |
257 completely separate ways. INC_CHARPTR() cannot use the DEC_CHARPTR() | |
258 trick of looking for a valid first byte because it might run off | |
259 the end of the string. DEC_CHARPTR() can't use the INC_CHARPTR() | |
260 method because it doesn't have easy access to the first byte of | |
261 the character it's moving over. */ | |
262 | |
263 #define REAL_INC_CHARPTR(ptr) \ | |
264 ((void) ((ptr) += REP_BYTES_BY_FIRST_BYTE (* (unsigned char *) (ptr)))) | |
265 | |
266 #define REAL_INC_CHARBYTEBPOS(ptr, pos) \ | |
267 (pos += REP_BYTES_BY_FIRST_BYTE (* (unsigned char *) (ptr))) | |
268 | |
269 #define REAL_DEC_CHARPTR(ptr) do { \ | |
270 (ptr)--; \ | |
271 } while (!VALID_CHARPTR_P (ptr)) | |
272 | |
273 #ifdef ERROR_CHECK_CHARBPOS | |
274 #define INC_CHARPTR(ptr) do { \ | |
275 ASSERT_VALID_CHARPTR (ptr); \ | |
276 REAL_INC_CHARPTR (ptr); \ | |
277 } while (0) | |
278 | |
279 #define INC_CHARBYTEBPOS(ptr, pos) do { \ | |
280 ASSERT_VALID_CHARPTR (ptr); \ | |
281 REAL_INC_CHARBYTEBPOS (ptr, pos); \ | |
282 } while (0) | |
283 | |
284 #define DEC_CHARPTR(ptr) do { \ | |
285 const Intbyte *dc_ptr1 = (ptr); \ | |
286 const Intbyte *dc_ptr2 = dc_ptr1; \ | |
287 REAL_DEC_CHARPTR (dc_ptr2); \ | |
288 assert (dc_ptr1 - dc_ptr2 == \ | |
289 REP_BYTES_BY_FIRST_BYTE (*dc_ptr2)); \ | |
290 (ptr) = (Intbyte *) dc_ptr2; \ | |
291 } while (0) | |
292 | |
293 #else /* ! ERROR_CHECK_CHARBPOS */ | |
294 #define INC_CHARBYTEBPOS(ptr, pos) REAL_INC_CHARBYTEBPOS (ptr, pos) | |
295 #define INC_CHARPTR(ptr) REAL_INC_CHARPTR (ptr) | |
296 #define DEC_CHARPTR(ptr) REAL_DEC_CHARPTR (ptr) | |
297 #endif /* ! ERROR_CHECK_CHARBPOS */ | |
298 | |
299 #ifdef MULE | |
300 | |
301 /* Note that this reads the byte at *PTR! */ | |
302 | |
303 #define VALIDATE_CHARPTR_BACKWARD(ptr) do { \ | |
304 while (!VALID_CHARPTR_P (ptr)) ptr--; \ | |
305 } while (0) | |
306 | |
307 /* Given a Intbyte string at PTR of size N, possibly with a partial | |
308 character at the end, return the size of the longest substring of | |
309 complete characters. Does not assume that the byte at *(PTR + N) is | |
310 readable. */ | |
311 DECLARE_INLINE_HEADER ( | |
312 Bytecount | |
313 validate_intbyte_string_backward (Intbyte *ptr, Bytecount n) | |
314 ) | |
315 { | |
316 Intbyte *ptr2; | |
317 | |
318 if (n == 0) | |
319 return n; | |
320 ptr2 = ptr + n - 1; | |
321 VALIDATE_CHARPTR_BACKWARD (ptr2); | |
322 if (ptr2 + REP_BYTES_BY_FIRST_BYTE (*ptr2) != ptr + n) | |
323 return ptr2 - ptr; | |
324 return n; | |
325 } | |
326 | |
327 /* This needs to be trickier than VALIDATE_CHARPTR_BACKWARD() to avoid the | |
328 possibility of running off the end of the string. */ | |
329 | |
330 #define VALIDATE_CHARPTR_FORWARD(ptr) do { \ | |
331 Intbyte *vcf_ptr = (ptr); \ | |
332 VALIDATE_CHARPTR_BACKWARD (vcf_ptr); \ | |
333 if (vcf_ptr != (ptr)) \ | |
334 { \ | |
335 (ptr) = vcf_ptr; \ | |
336 INC_CHARPTR (ptr); \ | |
337 } \ | |
338 } while (0) | |
339 | |
340 #else /* not MULE */ | |
341 #define VALIDATE_CHARPTR_BACKWARD(ptr) | |
342 #define VALIDATE_CHARPTR_FORWARD(ptr) | |
343 #define validate_intbyte_string_backward(ptr, n) (n) | |
344 #endif /* not MULE */ | |
345 | |
346 /* -------------------------------------------------------------- */ | |
347 /* (B) For working with the length (in bytes and characters) of a */ | |
348 /* section of internally-formatted text */ | |
349 /* -------------------------------------------------------------- */ | |
350 | |
351 INLINE_HEADER const Intbyte * | |
352 charptr_n_addr (const Intbyte *ptr, Charcount offset); | |
353 INLINE_HEADER const Intbyte * | |
354 charptr_n_addr (const Intbyte *ptr, Charcount offset) | |
355 { | |
356 return ptr + charcount_to_bytecount (ptr, offset); | |
357 } | |
358 | |
359 /* -------------------------------------------------------------------- */ | |
360 /* (C) For retrieving or changing the character pointed to by a charptr */ | |
361 /* -------------------------------------------------------------------- */ | |
362 | |
363 #define simple_charptr_emchar(ptr) ((Emchar) (ptr)[0]) | |
364 #define simple_set_charptr_emchar(ptr, x) ((ptr)[0] = (Intbyte) (x), 1) | |
365 #define simple_charptr_copy_char(src, dst) ((dst)[0] = *(src), 1) | |
366 | |
367 #ifdef MULE | |
368 | |
369 Emchar non_ascii_charptr_emchar (const Intbyte *ptr); | |
370 Bytecount non_ascii_set_charptr_emchar (Intbyte *ptr, Emchar c); | |
371 Bytecount non_ascii_charptr_copy_char (const Intbyte *src, Intbyte *dst); | |
372 | |
373 INLINE_HEADER Emchar charptr_emchar (const Intbyte *ptr); | |
374 INLINE_HEADER Emchar | |
375 charptr_emchar (const Intbyte *ptr) | |
376 { | |
377 return BYTE_ASCII_P (*ptr) ? | |
378 simple_charptr_emchar (ptr) : | |
379 non_ascii_charptr_emchar (ptr); | |
380 } | |
381 | |
382 INLINE_HEADER Bytecount set_charptr_emchar (Intbyte *ptr, Emchar x); | |
383 INLINE_HEADER Bytecount | |
384 set_charptr_emchar (Intbyte *ptr, Emchar x) | |
385 { | |
386 return !CHAR_MULTIBYTE_P (x) ? | |
387 simple_set_charptr_emchar (ptr, x) : | |
388 non_ascii_set_charptr_emchar (ptr, x); | |
389 } | |
390 | |
391 INLINE_HEADER Bytecount | |
392 charptr_copy_char (const Intbyte *src, Intbyte *dst); | |
393 INLINE_HEADER Bytecount | |
394 charptr_copy_char (const Intbyte *src, Intbyte *dst) | |
395 { | |
396 return BYTE_ASCII_P (*src) ? | |
397 simple_charptr_copy_char (src, dst) : | |
398 non_ascii_charptr_copy_char (src, dst); | |
399 } | |
400 | |
401 #else /* not MULE */ | |
402 | |
403 # define charptr_emchar(ptr) simple_charptr_emchar (ptr) | |
404 # define set_charptr_emchar(ptr, x) simple_set_charptr_emchar (ptr, x) | |
405 # define charptr_copy_char(src, dst) simple_charptr_copy_char (src, dst) | |
406 | |
407 #endif /* not MULE */ | |
408 | |
409 #define charptr_emchar_n(ptr, offset) \ | |
410 charptr_emchar (charptr_n_addr (ptr, offset)) | |
411 | |
412 | |
413 /* ---------------------------- */ | |
414 /* (D) For working with Emchars */ | |
415 /* ---------------------------- */ | |
416 | |
417 #ifdef MULE | |
418 | |
419 int non_ascii_valid_char_p (Emchar ch); | |
420 | |
421 INLINE_HEADER int valid_char_p (Emchar ch); | |
422 INLINE_HEADER int | |
423 valid_char_p (Emchar ch) | |
424 { | |
425 return (! (ch & ~0xFF)) || non_ascii_valid_char_p (ch); | |
426 } | |
427 | |
428 #else /* not MULE */ | |
429 | |
430 #define valid_char_p(ch) (! (ch & ~0xFF)) | |
431 | |
432 #endif /* not MULE */ | |
433 | |
434 #define CHAR_INTP(x) (INTP (x) && valid_char_p (XINT (x))) | |
435 | |
436 #define CHAR_OR_CHAR_INTP(x) (CHARP (x) || CHAR_INTP (x)) | |
437 | |
438 INLINE_HEADER Emchar XCHAR_OR_CHAR_INT (Lisp_Object obj); | |
439 INLINE_HEADER Emchar | |
440 XCHAR_OR_CHAR_INT (Lisp_Object obj) | |
441 { | |
442 return CHARP (obj) ? XCHAR (obj) : XINT (obj); | |
443 } | |
444 | |
445 #define CHECK_CHAR_COERCE_INT(x) do { \ | |
446 if (CHARP (x)) \ | |
447 ; \ | |
448 else if (CHAR_INTP (x)) \ | |
449 x = make_char (XINT (x)); \ | |
450 else \ | |
451 x = wrong_type_argument (Qcharacterp, x); \ | |
452 } while (0) | |
453 | |
454 | |
455 | |
456 /************************************************************************/ | |
457 /* */ | |
458 /* working with Eistrings */ | |
459 /* */ | |
460 /************************************************************************/ | |
461 | |
462 /* | |
463 #### NOTE: This is a work in progress. Neither the API nor especially | |
464 the implementation is finished. | |
465 | |
466 NOTE: An Eistring is a structure that makes it easy to work with | |
467 internally-formatted strings of data. It provides operations similar | |
468 in feel to the standard strcpy(), strcat(), strlen(), etc., but | |
469 | |
470 (a) it is Mule-correct | |
471 (b) it does dynamic allocation so you never have to worry about size | |
472 restrictions (and all allocation is stack-local using alloca(), so | |
473 there is no need to explicitly clean up) | |
474 (c) it knows its own length, so it does not suffer from standard null | |
475 byte brain-damage | |
476 (d) it provides a much more powerful set of operations and knows about | |
477 all the standard places where string data might reside: Lisp_Objects, | |
478 other Eistrings, Intbyte * data with or without an explicit length, | |
479 ASCII strings, Emchars, etc. | |
480 (e) it provides easy operations to convert to/from externally-formatted | |
481 data, and is much easier to use than the standard TO_INTERNAL_FORMAT | |
482 and TO_EXTERNAL_FORMAT macros. (An Eistring can store both the internal | |
483 and external version of its data, but the external version is only | |
484 initialized or changed when you call eito_external().) | |
485 | |
486 The idea is to make it as easy to write Mule-correct string | |
487 manipulation code as it is to write normal string manipulation | |
488 code. We also make the API sufficiently general that it can handle | |
489 multiple internal data formats (e.g. some fixed-width optimizing | |
490 formats and a default variable width format) and allows for *ANY* | |
491 data format we might choose in the future for the default format, | |
492 including UCS2. (In other words, we can't assume that the internal | |
493 format is ASCII-compatible and we can't assume it doesn't have | |
494 embedded null bytes. We do assume, however, that any chosen format | |
495 will have the concept of null-termination.) All of this is hidden | |
496 from the user. | |
497 | |
498 #### It is really too bad that we don't have a real object-oriented | |
499 language, or at least a language with polymorphism! | |
500 | |
501 | |
502 ********************************************** | |
503 * Declaration * | |
504 ********************************************** | |
505 | |
506 To declare an Eistring, either put one of the following in the local | |
507 variable section: | |
508 | |
509 DECLARE_EISTRING (name); | |
510 Declare a new Eistring. This is a standard local variable declaration | |
511 and can go anywhere in the variable declaration section. NAME itself | |
512 is declared as an Eistring *, and its storage declared on the stack. | |
513 | |
514 DECLARE_EISTRING_MALLOC (name); | |
515 Declare a new Eistring, which uses malloc()ed instead of alloca()ed | |
516 data. This is a standard local variable declaration and can go | |
517 anywhere in the variable declaration section. Once you initialize | |
518 the Eistring, you will have to free it using eifree() to avoid | |
519 memory leaks. | |
520 | |
521 or use | |
522 | |
523 Eistring name; | |
524 void eiinit (Eistring name); | |
525 void eiinit_malloc (Eistring name); | |
526 If you need to put an Eistring elsewhere than in a local variable | |
527 declaration (e.g. in a structure), declare it as shown and then | |
528 call one of the init macros. | |
529 | |
530 Also note: | |
531 | |
532 void eifree (Eistring ei); | |
533 If you declared an Eistring to use malloc() to hold its data, | |
534 or converted it to the heap using eito_malloc(), then this | |
535 releases any data in it and afterwards resets the Eistring | |
536 using eiinit_malloc(). Otherwise, it just resets the Eistring | |
537 using eiinit(). | |
538 | |
539 | |
540 ********************************************** | |
541 * Conventions * | |
542 ********************************************** | |
543 | |
544 - The names of the functions have been chosen, where possible, to | |
545 match the names of str*() functions in the standard C API. | |
546 - | |
547 | |
548 | |
549 ********************************************** | |
550 * Initialization * | |
551 ********************************************** | |
552 | |
553 void eireset (Eistring *eistr); | |
554 Initialize the Eistring to the empty string. | |
555 | |
556 void eicpy_* (Eistring *eistr, ...); | |
557 Initialize the Eistring from somewhere: | |
558 | |
559 void eicpy_ei (Eistring *eistr, Eistring *eistr2); | |
560 ... from another Eistring. | |
561 void eicpy_lstr (Eistring *eistr, Lisp_Object lisp_string); | |
562 ... from a Lisp_Object string. | |
563 void eicpy_ch (Eistring *eistr, Emchar ch); | |
564 ... from an Emchar. | |
565 | |
566 void eicpy_lstr_off (Eistring *eistr, Lisp_Object lisp_string, | |
567 Bytecount off, Charcount charoff, | |
568 Bytecount len, Charcount charlen); | |
569 ... from a section of a Lisp_Object string. | |
570 void eicpy_lbuf (Eistring *eistr, Lisp_Object lisp_buf, | |
571 Bytecount off, Charcount charoff, | |
572 Bytecount len, Charcount charlen); | |
573 ... from a section of a Lisp_Object buffer. | |
574 void eicpy_raw (Eistring *eistr, const Intbyte *data, Bytecount len); | |
575 ... from raw internal-format data in the default internal format. | |
576 void eicpy_rawz (Eistring *eistr, const Intbyte *data); | |
577 ... from raw internal-format data in the default internal format | |
578 that is "null-terminated" (the meaning of this depends on the nature | |
579 of the default internal format). | |
580 void eicpy_raw_fmt (Eistring *eistr, const Intbyte *data, Bytecount len, | |
581 Internal_Format intfmt); | |
582 ... from raw internal-format data in the specified format. | |
583 void eicpy_rawz_fmt (Eistring *eistr, const Intbyte *data, | |
584 Internal_Format intfmt); | |
585 ... from raw internal-format data in the specified format that is | |
586 "null-terminated" (the meaning of this depends on the nature of | |
587 the specific format). | |
588 void eicpy_c (Eistring *eistr, const Char_ASCII *c_string); | |
589 ... from an ASCII null-terminated string. Non-ASCII characters in | |
590 the string are *ILLEGAL* (read abort() with error-checking defined). | |
591 void eicpy_c_len (Eistring *eistr, const Char_ASCII *c_string, len); | |
592 ... from an ASCII string, with length specified. Non-ASCII characters | |
593 in the string are *ILLEGAL* (read abort() with error-checking defined). | |
594 void eicpy_ext (Eistring *eistr, const Extbyte *extdata, | |
595 Lisp_Object coding_system); | |
596 ... from external null-terminated data, with coding system specified. | |
597 void eicpy_ext_len (Eistring *eistr, const Extbyte *extdata, | |
598 Bytecount extlen, Lisp_Object coding_system); | |
599 ... from external data, with length and coding system specified. | |
600 void eicpy_lstream (Eistring *eistr, Lisp_Object lstream); | |
601 ... from an lstream; reads data till eof. Data must be in default | |
602 internal format; otherwise, interpose a decoding lstream. | |
603 | |
604 | |
605 ********************************************** | |
606 * Getting the data out of the Eistring * | |
607 ********************************************** | |
608 | |
609 Intbyte *eidata (Eistring *eistr); | |
610 Return a pointer to the raw data in an Eistring. This is NOT | |
611 a copy. | |
612 | |
613 Lisp_Object eimake_string (Eistring *eistr); | |
614 Make a Lisp string out of the Eistring. | |
615 | |
616 Lisp_Object eimake_string_off (Eistring *eistr, | |
617 Bytecount off, Charcount charoff, | |
618 Bytecount len, Charcount charlen); | |
619 Make a Lisp string out of a section of the Eistring. | |
620 | |
621 void eicpyout_alloca (Eistring *eistr, LVALUE: Intbyte *ptr_out, | |
622 LVALUE: Bytecount len_out); | |
623 Make an alloca() copy of the data in the Eistring, using the | |
624 default internal format. Due to the nature of alloca(), this | |
625 must be a macro, with all lvalues passed in as parameters. | |
626 A pointer to the alloca()ed data is stored in PTR_OUT, and | |
627 the length of the data (not including the terminating zero) | |
628 is stored in LEN_OUT. | |
629 | |
630 void eicpyout_alloca_fmt (Eistring *eistr, LVALUE: Intbyte *ptr_out, | |
631 LVALUE: Bytecount len_out, | |
632 Internal_Format intfmt); | |
633 Like eicpyout_alloca(), but converts to the specified internal | |
634 format. (No formats other than FORMAT_DEFAULT are currently | |
635 implemented, and you get an assertion failure if you try.) | |
636 | |
637 Intbyte *eicpyout_malloc (Eistring *eistr, Bytecount *intlen_out); | |
638 Make a malloc() copy of the data in the Eistring, using the | |
639 default internal format. This is a real function. No lvalues | |
640 passed in. Returns the new data, and stores the length (not | |
641 including the terminating zero) using INTLEN_OUT, unless it's | |
642 a NULL pointer. | |
643 | |
644 Intbyte *eicpyout_malloc_fmt (Eistring *eistr, Internal_Format intfmt, | |
645 Bytecount *intlen_out); | |
646 Like eicpyout_malloc(), but converts to the specified internal | |
647 format. (No formats other than FORMAT_DEFAULT are currently | |
648 implemented, and you get an assertion failure if you try.) | |
649 | |
650 | |
651 ********************************************** | |
652 * Moving to the heap * | |
653 ********************************************** | |
654 | |
655 void eito_malloc (Eistring *eistr); | |
656 Move this Eistring to the heap. Its data will be stored in a | |
657 malloc()ed block rather than the stack. Subsequent changes to | |
658 this Eistring will realloc() the block as necessary. Use this | |
659 when you want the Eistring to remain in scope past the end of | |
660 this function call. You will have to manually free the data | |
661 in the Eistring using eifree(). | |
662 | |
663 void eito_alloca (Eistring *eistr); | |
664 Move this Eistring back to the stack, if it was moved to the | |
665 heap with eito_malloc(). This will automatically free any | |
666 heap-allocated data. | |
667 | |
668 | |
669 | |
670 ********************************************** | |
671 * Retrieving the length * | |
672 ********************************************** | |
673 | |
674 Bytecount eilen (Eistring *eistr); | |
675 Return the length of the internal data, in bytes. See also | |
676 eiextlen(), below. | |
677 Charcount eicharlen (Eistring *eistr); | |
678 Return the length of the internal data, in characters. | |
679 | |
680 | |
681 ********************************************** | |
682 * Working with positions * | |
683 ********************************************** | |
684 | |
685 Bytecount eicharpos_to_bytepos (Eistring *eistr, Charcount charpos); | |
686 Convert a char offset to a byte offset. | |
687 Charcount eibytepos_to_charpos (Eistring *eistr, Bytecount bytepos); | |
688 Convert a byte offset to a char offset. | |
689 Bytecount eiincpos (Eistring *eistr, Bytecount bytepos); | |
690 Increment the given position by one character. | |
691 Bytecount eiincpos_n (Eistring *eistr, Bytecount bytepos, Charcount n); | |
692 Increment the given position by N characters. | |
693 Bytecount eidecpos (Eistring *eistr, Bytecount bytepos); | |
694 Decrement the given position by one character. | |
695 Bytecount eidecpos_n (Eistring *eistr, Bytecount bytepos, Charcount n); | |
696 Deccrement the given position by N characters. | |
697 | |
698 | |
699 ********************************************** | |
700 * Getting the character at a position * | |
701 ********************************************** | |
702 | |
703 Emchar eigetch (Eistring *eistr, Bytecount bytepos); | |
704 Return the character at a particular byte offset. | |
705 Emchar eigetch_char (Eistring *eistr, Charcount charpos); | |
706 Return the character at a particular character offset. | |
707 | |
708 | |
709 ********************************************** | |
710 * Setting the character at a position * | |
711 ********************************************** | |
712 | |
713 Emchar eisetch (Eistring *eistr, Bytecount bytepos, Emchar chr); | |
714 Set the character at a particular byte offset. | |
715 Emchar eisetch_char (Eistring *eistr, Charcount charpos, Emchar chr); | |
716 Set the character at a particular character offset. | |
717 | |
718 | |
719 ********************************************** | |
720 * Concatenation * | |
721 ********************************************** | |
722 | |
723 void eicat_* (Eistring *eistr, ...); | |
724 Concatenate onto the end of the Eistring, with data coming from the | |
725 same places as above: | |
726 | |
727 void eicat_ei (Eistring *eistr, Eistring *eistr2); | |
728 ... from another Eistring. | |
729 void eicat_c (Eistring *eistr, Char_ASCII *c_string); | |
730 ... from an ASCII null-terminated string. Non-ASCII characters in | |
731 the string are *ILLEGAL* (read abort() with error-checking defined). | |
732 void eicat_raw (ei, const Intbyte *data, Bytecount len); | |
733 ... from raw internal-format data in the default internal format. | |
734 void eicat_rawz (ei, const Intbyte *data); | |
735 ... from raw internal-format data in the default internal format | |
736 that is "null-terminated" (the meaning of this depends on the nature | |
737 of the default internal format). | |
738 void eicat_lstr (ei, Lisp_Object lisp_string); | |
739 ... from a Lisp_Object string. | |
740 void eicat_ch (ei, Emchar ch); | |
741 ... from an Emchar. | |
742 | |
743 (All except the first variety are convenience functions. | |
744 In the general case, create another Eistring from the source.) | |
745 | |
746 | |
747 ********************************************** | |
748 * Replacement * | |
749 ********************************************** | |
750 | |
751 void eisub_* (Eistring *eistr, Bytecount off, Charcount charoff, | |
752 Bytecount len, Charcount charlen, ...); | |
753 Replace a section of the Eistring, specifically: | |
754 | |
755 void eisub_ei (Eistring *eistr, Bytecount off, Charcount charoff, | |
756 Bytecount len, Charcount charlen, Eistring *eistr2); | |
757 ... with another Eistring. | |
758 void eisub_c (Eistring *eistr, Bytecount off, Charcount charoff, | |
759 Bytecount len, Charcount charlen, Char_ASCII *c_string); | |
760 ... with an ASCII null-terminated string. Non-ASCII characters in | |
761 the string are *ILLEGAL* (read abort() with error-checking defined). | |
762 void eisub_ch (Eistring *eistr, Bytecount off, Charcount charoff, | |
763 Bytecount len, Charcount charlen, Emchar ch); | |
764 ... with an Emchar. | |
765 | |
766 void eidel (Eistring *eistr, Bytecount off, Charcount charoff, | |
767 Bytecount len, Charcount charlen); | |
768 Delete a section of the Eistring. | |
769 | |
770 | |
771 ********************************************** | |
772 * Converting to an external format * | |
773 ********************************************** | |
774 | |
775 void eito_external (Eistring *eistr, Lisp_Object coding_system); | |
776 Convert the Eistring to an external format and store the result | |
777 in the string. NOTE: Further changes to the Eistring will *NOT* | |
778 change the external data stored in the string. You will have to | |
779 call eito_external() again in such a case if you want the external | |
780 data. | |
781 | |
782 Extbyte *eiextdata (Eistring *eistr); | |
783 Return a pointer to the external data stored in the Eistring as | |
784 a result of a prior call to eito_external(). | |
785 | |
786 Bytecount eiextlen (Eistring *eistr); | |
787 Return the length in bytes of the external data stored in the | |
788 Eistring as a result of a prior call to eito_external(). | |
789 | |
790 | |
791 ********************************************** | |
792 * Searching in the Eistring for a character * | |
793 ********************************************** | |
794 | |
795 Bytecount eichr (Eistring *eistr, Emchar chr); | |
796 Charcount eichr_char (Eistring *eistr, Emchar chr); | |
797 Bytecount eichr_off (Eistring *eistr, Emchar chr, Bytecount off, | |
798 Charcount charoff); | |
799 Charcount eichr_off_char (Eistring *eistr, Emchar chr, Bytecount off, | |
800 Charcount charoff); | |
801 Bytecount eirchr (Eistring *eistr, Emchar chr); | |
802 Charcount eirchr_char (Eistring *eistr, Emchar chr); | |
803 Bytecount eirchr_off (Eistring *eistr, Emchar chr, Bytecount off, | |
804 Charcount charoff); | |
805 Charcount eirchr_off_char (Eistring *eistr, Emchar chr, Bytecount off, | |
806 Charcount charoff); | |
807 | |
808 | |
809 ********************************************** | |
810 * Searching in the Eistring for a string * | |
811 ********************************************** | |
812 | |
813 Bytecount eistr_ei (Eistring *eistr, Eistring *eistr2); | |
814 Charcount eistr_ei_char (Eistring *eistr, Eistring *eistr2); | |
815 Bytecount eistr_ei_off (Eistring *eistr, Eistring *eistr2, Bytecount off, | |
816 Charcount charoff); | |
817 Charcount eistr_ei_off_char (Eistring *eistr, Eistring *eistr2, | |
818 Bytecount off, Charcount charoff); | |
819 Bytecount eirstr_ei (Eistring *eistr, Eistring *eistr2); | |
820 Charcount eirstr_ei_char (Eistring *eistr, Eistring *eistr2); | |
821 Bytecount eirstr_ei_off (Eistring *eistr, Eistring *eistr2, Bytecount off, | |
822 Charcount charoff); | |
823 Charcount eirstr_ei_off_char (Eistring *eistr, Eistring *eistr2, | |
824 Bytecount off, Charcount charoff); | |
825 | |
826 Bytecount eistr_c (Eistring *eistr, Char_ASCII *c_string); | |
827 Charcount eistr_c_char (Eistring *eistr, Char_ASCII *c_string); | |
828 Bytecount eistr_c_off (Eistring *eistr, Char_ASCII *c_string, Bytecount off, | |
829 Charcount charoff); | |
830 Charcount eistr_c_off_char (Eistring *eistr, Char_ASCII *c_string, | |
831 Bytecount off, Charcount charoff); | |
832 Bytecount eirstr_c (Eistring *eistr, Char_ASCII *c_string); | |
833 Charcount eirstr_c_char (Eistring *eistr, Char_ASCII *c_string); | |
834 Bytecount eirstr_c_off (Eistring *eistr, Char_ASCII *c_string, | |
835 Bytecount off, Charcount charoff); | |
836 Charcount eirstr_c_off_char (Eistring *eistr, Char_ASCII *c_string, | |
837 Bytecount off, Charcount charoff); | |
838 | |
839 | |
840 ********************************************** | |
841 * Comparison * | |
842 ********************************************** | |
843 | |
844 int eicmp_* (Eistring *eistr, ...); | |
845 int eicmp_off_* (Eistring *eistr, Bytecount off, Charcount charoff, | |
846 Bytecount len, Charcount charlen, ...); | |
847 int eicasecmp_* (Eistring *eistr, ...); | |
848 int eicasecmp_off_* (Eistring *eistr, Bytecount off, Charcount charoff, | |
849 Bytecount len, Charcount charlen, ...); | |
850 int eicasecmp_i18n_* (Eistring *eistr, ...); | |
851 int eicasecmp_i18n_off_* (Eistring *eistr, Bytecount off, Charcount charoff, | |
852 Bytecount len, Charcount charlen, ...); | |
853 | |
854 Compare the Eistring with the other data. Return value same as | |
855 from strcmp. The `*' is either `ei' for another Eistring (in | |
856 which case `...' is an Eistring), or `c' for a pure-ASCII string | |
857 (in which case `...' is a pointer to that string). For anything | |
858 more complex, first create an Eistring out of the source. | |
859 Comparison is either simple (`eicmp_...'), ASCII case-folding | |
860 (`eicasecmp_...'), or multilingual case-folding | |
861 (`eicasecmp_i18n_...). | |
862 | |
863 | |
864 More specifically, the prototypes are: | |
865 | |
866 int eicmp_ei (Eistring *eistr, Eistring *eistr2); | |
867 int eicmp_off_ei (Eistring *eistr, Bytecount off, Charcount charoff, | |
868 Bytecount len, Charcount charlen, Eistring *eistr2); | |
869 int eicasecmp_ei (Eistring *eistr, Eistring *eistr2); | |
870 int eicasecmp_off_ei (Eistring *eistr, Bytecount off, Charcount charoff, | |
871 Bytecount len, Charcount charlen, Eistring *eistr2); | |
872 int eicasecmp_i18n_ei (Eistring *eistr, Eistring *eistr2); | |
873 int eicasecmp_i18n_off_ei (Eistring *eistr, Bytecount off, | |
874 Charcount charoff, Bytecount len, | |
875 Charcount charlen, Eistring *eistr2); | |
876 | |
877 int eicmp_c (Eistring *eistr, Char_ASCII *c_string); | |
878 int eicmp_off_c (Eistring *eistr, Bytecount off, Charcount charoff, | |
879 Bytecount len, Charcount charlen, Char_ASCII *c_string); | |
880 int eicasecmp_c (Eistring *eistr, Char_ASCII *c_string); | |
881 int eicasecmp_off_c (Eistring *eistr, Bytecount off, Charcount charoff, | |
882 Bytecount len, Charcount charlen, | |
883 Char_ASCII *c_string); | |
884 int eicasecmp_i18n_c (Eistring *eistr, Char_ASCII *c_string); | |
885 int eicasecmp_i18n_off_c (Eistring *eistr, Bytecount off, Charcount charoff, | |
886 Bytecount len, Charcount charlen, | |
887 Char_ASCII *c_string); | |
888 | |
889 | |
890 ********************************************** | |
891 * Case-changing the Eistring * | |
892 ********************************************** | |
893 | |
894 void eilwr (Eistring *eistr); | |
895 Convert all characters in the Eistring to lowercase. | |
896 void eiupr (Eistring *eistr); | |
897 Convert all characters in the Eistring to uppercase. | |
898 */ | |
899 | |
900 | |
901 /* Principles for writing Eistring functions: | |
902 | |
903 (1) Unfortunately, we have to write most of the Eistring functions | |
904 as macros, because of the use of alloca(). The principle used | |
905 below to assure no conflict in local variables is to prefix all | |
906 local variables with "ei" plus a number, which should be unique | |
907 among macros. In practice, when finding a new number, find the | |
908 highest so far used, and add 1. | |
909 | |
910 (2) We also suffix the Eistring fields with an _ to avoid problems | |
911 with macro parameters of the same name. (And as the standard | |
912 signal not to access these fields directly.) | |
913 | |
914 (3) We maintain both the length in bytes and chars of the data in | |
915 the Eistring at all times, for convenient retrieval by outside | |
916 functions. That means when writing functions that manipulate | |
917 Eistrings, you too need to keep both lengths up to date for all | |
918 data that you work with. | |
919 | |
920 (4) When writing a new type of operation (e.g. substitution), you | |
921 will often find yourself working with outside data, and thus | |
922 have a series of related API's, for different forms that the | |
923 outside data is in. Generally, you will want to choose a | |
924 subset of the forms supported by eicpy_*, which has to be | |
925 totally general because that's the fundamental way to get data | |
926 into an Eistring, and once the data is into the string, it | |
927 would be to create a whole series of Ei operations that work on | |
928 nothing but Eistrings. Although theoretically nice, in | |
929 practice it's a hassle, so we suggest that you provide | |
930 convenience functions. In particular, there are two paths you | |
931 can take. One is minimalist -- it only allows other Eistrings | |
932 and ASCII data, and Emchars if the particular operation makes | |
933 sense with a character. The other provides interfaces for the | |
934 most commonly-used forms -- Eistring, ASCII data, Lisp string, | |
935 raw internal-format string with length, raw internal-format | |
936 string without, and possibly Emchar. (In the function names, | |
937 these are designated `ei', `c', `lstr', `raw', `rawz', and | |
938 `ch', respectively.) | |
939 | |
940 (5) When coding a new type of operation, such as was discussed in | |
941 previous section, the correct approach is to declare an worker | |
942 function that does the work of everything, and is called by the | |
943 other "container" macros that handle the different outside data | |
944 forms. The data coming into the worker function, which | |
945 typically ends in `_1', is in the form of three parameters: | |
946 DATA, LEN, CHARLEN. (See point [3] about having two lengths and | |
947 keeping them in sync.) | |
948 | |
949 (6) Handling argument evaluation in macros: We take great care | |
950 never to evaluate any argument more than once in any macro, | |
951 except the initial Eistring parameter. This can and will be | |
952 evaluated multiple times, but it should pretty much always just | |
953 be a simple variable. This means, for example, that if an | |
954 Eistring is the second (not first) argument of a macro, it | |
955 doesn't fall under the "initial Eistring" exemption, so it | |
956 needs protection against multi-evaluation. (Take the address of | |
957 the Eistring structure, store in a temporary variable, and use | |
958 temporary variable for all access to the Eistring. | |
959 Essentially, we want it to appear as if these Eistring macros | |
960 are functions -- we would like to declare them as functions but | |
961 they use alloca(), so we can't (and we can't make them inline | |
962 functions either -- alloca() is explicitly disallowed in inline | |
963 functions.) | |
964 | |
965 (7) Note that our rules regarding multiple evaluation are *more* | |
966 strict than the rules listed above under the heading "working | |
967 with raw internal-format data". | |
968 */ | |
969 | |
970 | |
971 /* ----- Declaration ----- */ | |
972 | |
973 typedef struct | |
974 { | |
975 /* Data for the Eistring, stored in the default internal format. | |
976 Always includes terminating null. */ | |
977 Intbyte *data_; | |
978 /* Total number of bytes allocated in DATA (including null). */ | |
979 Bytecount max_size_allocated_; | |
980 Bytecount bytelen_; | |
981 Charcount charlen_; | |
982 int mallocp_; | |
983 | |
984 Extbyte *extdata_; | |
985 Bytecount extlen_; | |
986 } Eistring; | |
987 | |
988 typedef enum internal_format | |
989 { | |
990 FORMAT_DEFAULT, | |
991 FORMAT_FIXED_8, | |
992 FORMAT_FIXED_16, | |
993 FORMAT_FIXED_32 | |
994 } Internal_Format; | |
995 | |
996 extern Eistring the_eistring_zero_init, the_eistring_malloc_zero_init; | |
997 | |
998 #define DECLARE_EISTRING(name) \ | |
999 Eistring __ ## name ## __storage__ = the_eistring_zero_init; \ | |
1000 Eistring *name = & __ ## name ## __storage__ | |
1001 #define DECLARE_EISTRING_MALLOC(name) \ | |
1002 Eistring __ ## name ## __storage__ = the_eistring_malloc_zero_init; \ | |
1003 Eistring *name = & __ ## name ## __storage__ | |
1004 | |
1005 #define eiinit(ei) \ | |
1006 do { \ | |
1007 (ei) = the_eistring_zero_init; \ | |
1008 } while (0) | |
1009 | |
1010 #define eiinit_malloc(ei) \ | |
1011 do { \ | |
1012 (ei) = the_eistring_malloc_zero_init; \ | |
1013 } while (0) | |
1014 | |
1015 | |
1016 /* ----- Utility ----- */ | |
1017 | |
1018 /* Make sure both LEN and CHARLEN are specified, in case one is given | |
1019 as -1. PTR evaluated at most once, others multiply. */ | |
1020 #define eifixup_bytechar(ptr, len, charlen) \ | |
1021 do { \ | |
1022 if ((len) == -1) \ | |
1023 (len) = charcount_to_bytecount (ptr, charlen); \ | |
1024 else if ((charlen) == -1) \ | |
1025 (charlen) = bytecount_to_charcount (ptr, len); \ | |
1026 } while (0) | |
1027 | |
1028 /* Make sure LEN is specified, in case it's is given as -1. PTR | |
1029 evaluated at most once, others multiply. */ | |
1030 #define eifixup_byte(ptr, len, charlen) \ | |
1031 do { \ | |
1032 if ((len) == -1) \ | |
1033 (len) = charcount_to_bytecount (ptr, charlen); \ | |
1034 } while (0) | |
1035 | |
1036 /* Make sure CHARLEN is specified, in case it's is given as -1. PTR | |
1037 evaluated at most once, others multiply. */ | |
1038 #define eifixup_char(ptr, len, charlen) \ | |
1039 do { \ | |
1040 if ((charlen) == -1) \ | |
1041 (charlen) = bytecount_to_charcount (ptr, len); \ | |
1042 } while (0) | |
1043 | |
1044 | |
1045 | |
1046 /* Make sure we can hold NEWBYTELEN bytes (which is NEWCHARLEN chars) | |
1047 plus a zero terminator. Preserve existing data as much as possible, | |
1048 including existing zero terminator. Put a new zero terminator where it | |
1049 should go if NEWZ if non-zero. All args but EI are evalled only once. */ | |
1050 | |
1051 #define EI_ALLOC(ei, newbytelen, newcharlen, newz) \ | |
1052 do { \ | |
1053 int ei1oldeibytelen = (ei)->bytelen_; \ | |
1054 \ | |
1055 (ei)->charlen_ = (newcharlen); \ | |
1056 (ei)->bytelen_ = (newbytelen); \ | |
1057 \ | |
1058 if (ei1oldeibytelen != (ei)->bytelen_) \ | |
1059 { \ | |
1060 int ei1newsize = (ei)->max_size_allocated_; \ | |
1061 while (ei1newsize < (ei)->bytelen_ + 1) \ | |
1062 { \ | |
1063 ei1newsize = (int) (ei1newsize * 1.5); \ | |
1064 if (ei1newsize < 32) \ | |
1065 ei1newsize = 32; \ | |
1066 } \ | |
1067 if (ei1newsize != (ei)->max_size_allocated_) \ | |
1068 { \ | |
1069 if ((ei)->mallocp_) \ | |
1070 /* xrealloc always preserves existing data as much as possible */ \ | |
1071 (ei)->data_ = (Intbyte *) xrealloc ((ei)->data_, ei1newsize); \ | |
1072 else \ | |
1073 { \ | |
1074 /* We don't have realloc, so alloca() more space and copy the \ | |
1075 data into it. */ \ | |
1076 Intbyte *ei1oldeidata = (ei)->data_; \ | |
1077 (ei)->data_ = (Intbyte *) alloca (ei1newsize); \ | |
1078 if (ei1oldeidata) \ | |
1079 memcpy ((ei)->data_, ei1oldeidata, ei1oldeibytelen + 1); \ | |
1080 } \ | |
1081 (ei)->max_size_allocated_ = ei1newsize; \ | |
1082 } \ | |
1083 if (newz) \ | |
1084 (ei)->data_[(ei)->bytelen_] = '\0'; \ | |
1085 } \ | |
1086 } while (0) | |
1087 | |
1088 #define EI_ALLOC_AND_COPY(ei, data, bytelen, charlen) \ | |
1089 do { \ | |
1090 EI_ALLOC (ei, bytelen, charlen, 1); \ | |
1091 memcpy ((ei)->data_, data, (ei)->bytelen_); \ | |
1092 } while (0) | |
1093 | |
1094 #ifdef ERROR_CHECK_CHARBPOS | |
1095 #define EI_ASSERT_ASCII(ptr, len) \ | |
1096 do { \ | |
1097 int ei5; \ | |
1098 const Char_ASCII *ei5ptr = (ptr); \ | |
1099 int ei5len = (len); \ | |
1100 \ | |
1101 for (ei5 = 0; ei5 < ei5len; ei5++) \ | |
1102 assert (ei5ptr[ei5] >= 0x00 && ei5ptr[ei5] < 0x7F); \ | |
1103 } while (0) | |
1104 #define EI_ASSERT_ASCIIZ(ptr) \ | |
1105 do { \ | |
1106 const Char_ASCII *ei5p1 = (ptr); \ | |
1107 EI_ASSERT_ASCII (ei5p1, strlen (ei5p1)); \ | |
1108 } while (0) | |
1109 #else | |
1110 #define EI_ASSERT_ASCII(ptr, len) | |
1111 #define EI_ASSERT_ASCIIZ(ptr) | |
1112 #endif | |
1113 | |
1114 | |
1115 /* ----- Initialization ----- */ | |
1116 | |
1117 #define eicpy_ei(ei, eicpy) \ | |
1118 do { \ | |
1119 const Eistring *ei2 = (eicpy); \ | |
1120 EI_ALLOC_AND_COPY (ei, ei2->data_, ei2->bytelen_, ei2->charlen_); \ | |
1121 } while (0) | |
1122 | |
1123 #define eicpy_lstr(ei, lisp_string) \ | |
1124 do { \ | |
1125 Lisp_Object ei3 = (lisp_string); \ | |
1126 EI_ALLOC_AND_COPY (ei, XSTRING_DATA (ei3), XSTRING_LENGTH (ei3), \ | |
1127 XSTRING_CHAR_LENGTH (ei3)); \ | |
1128 } while (0) | |
1129 | |
1130 #define eicpy_lstr_off(ei, lisp_string, off, charoff, len, charlen) \ | |
1131 do { \ | |
1132 Lisp_Object ei23lstr = (lisp_string); \ | |
1133 int ei23off = (off); \ | |
1134 int ei23charoff = (charoff); \ | |
1135 int ei23len = (len); \ | |
1136 int ei23charlen = (charlen); \ | |
1137 const Intbyte *ei23data = XSTRING_DATA (ei23lstr); \ | |
1138 \ | |
1139 int ei23oldbytelen = (ei)->bytelen_; \ | |
1140 \ | |
1141 eifixup_byte (ei23data, ei23off, ei23charoff); \ | |
1142 eifixup_bytechar (ei23data + ei23off, ei23len, ei23charlen); \ | |
1143 \ | |
1144 EI_ALLOC_AND_COPY (ei, ei23data + ei23off, ei23len, ei23charlen); \ | |
1145 } while (0) | |
1146 | |
1147 #define eicpy_raw_fmt(ei, ptr, len, fmt) \ | |
1148 do { \ | |
1149 const Intbyte *ei12ptr = (ptr); \ | |
1150 Internal_Format ei12fmt = (fmt); \ | |
1151 int ei12len = (len); \ | |
1152 assert (ei12fmt == FORMAT_DEFAULT); \ | |
1153 EI_ALLOC_AND_COPY (ei, ei12ptr, ei12len, \ | |
1154 bytecount_to_charcount (ei12ptr, ei12len)); \ | |
1155 } while (0) | |
1156 | |
1157 #define eicpy_raw(ei, ptr, len) eicpy_raw_fmt (ei, ptr, len, FORMAT_DEFAULT) | |
1158 | |
1159 #define eicpy_rawz_fmt(ei, ptr, fmt) \ | |
1160 do { \ | |
1161 const Intbyte *ei12p1ptr = (ptr); \ | |
1162 Internal_Format ei12p1fmt = (fmt); \ | |
1163 assert (ei12p1fmt == FORMAT_DEFAULT); \ | |
1164 eicpy_raw_fmt (ei, ei12p1ptr, qxestrlen (ei12p1ptr), fmt); \ | |
1165 } while (0) | |
1166 | |
1167 #define eicpy_rawz(ei, ptr) eicpy_rawz_fmt (ei, ptr, FORMAT_DEFAULT) | |
1168 | |
1169 #define eicpy_ch(ei, ch) \ | |
1170 do { \ | |
1171 Intbyte ei12p2[MAX_EMCHAR_LEN]; \ | |
1172 Bytecount ei12p2len = set_charptr_emchar (ei12p2, ch); \ | |
1173 EI_ALLOC_AND_COPY (ei, ei12p2, ei12p2len, 1); \ | |
1174 } while (0) | |
1175 | |
1176 #define eicpy_c(ei, c_string) \ | |
1177 do { \ | |
1178 const Char_ASCII *ei4 = (c_string); \ | |
1179 \ | |
1180 EI_ASSERT_ASCIIZ (ei4); \ | |
1181 eicpy_ext (ei, ei4, Qbinary); \ | |
1182 } while (0) | |
1183 | |
1184 #define eicpy_c_len(ei, c_string, c_len) \ | |
1185 do { \ | |
1186 const Char_ASCII *ei6 = (c_string); \ | |
1187 int ei6len = (c_len); \ | |
1188 \ | |
1189 EI_ASSERT_ASCII (ei6, ei6len); \ | |
1190 eicpy_ext_len (ei, ei6, ei6len, Qbinary); \ | |
1191 } while (0) | |
1192 | |
1193 #define eicpy_ext_len(ei, extdata, extlen, coding_system) \ | |
1194 do { \ | |
1195 const Extbyte *ei7 = (extdata); \ | |
1196 int ei7len = (extlen); \ | |
1197 \ | |
1198 TO_INTERNAL_FORMAT (DATA, (ei7, ei7len), \ | |
1199 ALLOCA, ((ei)->data_, (ei)->bytelen_), \ | |
1200 coding_system); \ | |
1201 (ei)->max_size_allocated_ = (ei)->bytelen_ + 1; \ | |
1202 (ei)->charlen_ = bytecount_to_charcount ((ei)->data_, (ei)->bytelen_); \ | |
1203 } while (0) | |
1204 | |
1205 #define eicpy_ext(ei, extdata, coding_system) \ | |
1206 do { \ | |
1207 const Extbyte *ei8 = (extdata); \ | |
1208 \ | |
1209 eicpy_ext_len (ei, ei8, dfc_external_data_len (ei8, coding_system), \ | |
1210 coding_system); \ | |
1211 } while (0) | |
1212 | |
1213 #define eicpy_lbuf(eistr, lisp_buf, off, charoff, len, charlen) \ | |
1214 NOT YET IMPLEMENTED | |
1215 | |
1216 #define eicpy_lstream(eistr, lstream) \ | |
1217 NOT YET IMPLEMENTED | |
1218 | |
1219 #define eireset(eistr) eicpy_rawz (eistr, (Intbyte *) "") | |
1220 | |
1221 /* ----- Getting the data out of the Eistring ----- */ | |
1222 | |
1223 #define eidata(ei) ((ei)->data_) | |
1224 | |
1225 #define eimake_string(ei) make_string (eidata (ei), eilen (ei)) | |
1226 | |
1227 #define eimake_string_off(eistr, off, charoff, len, charlen) \ | |
1228 do { \ | |
1229 Lisp_Object ei24lstr; \ | |
1230 int ei24off = (off); \ | |
1231 int ei24charoff = (charoff); \ | |
1232 int ei24len = (len); \ | |
1233 int ei24charlen = (charlen); \ | |
1234 \ | |
1235 eifixup_byte ((eistr)->data_, ei24off, ei24charoff); \ | |
1236 eifixup_byte ((eistr)->data_ + ei24off, ei24len, ei24charlen); \ | |
1237 \ | |
1238 return make_string ((eistr)->data_ + ei24off, ei24len); \ | |
1239 } while (0) | |
1240 | |
1241 #define eicpyout_alloca(eistr, ptrout, lenout) \ | |
1242 eicpyout_alloca_fmt (eistr, ptrout, lenout, FORMAT_DEFAULT) | |
1243 #define eicpyout_malloc(eistr, lenout) \ | |
1244 eicpyout_malloc_fmt (eistr, lenout, FORMAT_DEFAULT) | |
1245 Intbyte *eicpyout_malloc_fmt (Eistring *eistr, Bytecount *len_out, | |
1246 Internal_Format fmt); | |
1247 #define eicpyout_alloca_fmt(eistr, ptrout, lenout, fmt) \ | |
1248 do { \ | |
1249 Internal_Format ei23fmt = (fmt); \ | |
1250 Intbyte *ei23ptrout = &(ptrout); \ | |
1251 Bytecount *ei23lenout = &(lenout); \ | |
1252 \ | |
1253 assert (ei23fmt == FORMAT_DEFAULT); \ | |
1254 \ | |
1255 *ei23lenout = (eistr)->bytelen_; \ | |
1256 *ei23ptrout = alloca_array (Intbyte, (eistr)->bytelen_ + 1); \ | |
1257 memcpy (*ei23ptrout, (eistr)->data_, (eistr)->bytelen_ + 1); \ | |
1258 } while (0) | |
1259 | |
1260 | |
1261 /* ----- Moving to the heap ----- */ | |
1262 | |
1263 #define eifree(ei) \ | |
1264 do { \ | |
1265 if ((ei)->mallocp_) \ | |
1266 { \ | |
1267 if ((ei)->data_) \ | |
1268 xfree ((ei)->data_); \ | |
1269 if ((ei)->extdata_) \ | |
1270 xfree ((ei)->extdata_); \ | |
1271 eiinit_malloc (ei); \ | |
1272 } \ | |
1273 else \ | |
1274 eiinit (ei); \ | |
1275 } while (0) | |
1276 | |
1277 int eifind_large_enough_buffer (int oldbufsize, int needed_size); | |
1278 void eito_malloc_1 (Eistring *ei); | |
1279 | |
1280 #define eito_malloc(ei) eito_malloc_1 (ei) | |
1281 | |
1282 #define eito_alloca(ei) \ | |
1283 do { \ | |
1284 if (!(ei)->mallocp_) \ | |
1285 return; \ | |
1286 (ei)->mallocp_ = 0; \ | |
1287 if ((ei)->data_) \ | |
1288 { \ | |
1289 Intbyte *ei13newdata; \ | |
1290 \ | |
1291 (ei)->max_size_allocated_ = \ | |
1292 eifind_large_enough_buffer (0, (ei)->bytelen_ + 1); \ | |
1293 ei13newdata = (Intbyte *) alloca ((ei)->max_size_allocated_); \ | |
1294 memcpy (ei13newdata, (ei)->data_, (ei)->bytelen_ + 1); \ | |
1295 xfree ((ei)->data_); \ | |
1296 (ei)->data_ = ei13newdata; \ | |
1297 } \ | |
1298 \ | |
1299 if ((ei)->extdata_) \ | |
1300 { \ | |
1301 Extbyte *ei13newdata = (Extbyte *) alloca ((ei)->extlen_ + 2); \ | |
1302 \ | |
1303 memcpy (ei13newdata, (ei)->extdata_, (ei)->extlen_); \ | |
1304 /* Double null-terminate in case of Unicode data */ \ | |
1305 ei13newdata[(ei)->extlen_] = '\0'; \ | |
1306 ei13newdata[(ei)->extlen_ + 1] = '\0'; \ | |
1307 xfree ((ei)->extdata_); \ | |
1308 (ei)->extdata_ = ei13newdata; \ | |
1309 } \ | |
1310 } while (0) | |
1311 | |
1312 | |
1313 /* ----- Retrieving the length ----- */ | |
1314 | |
1315 #define eilen(ei) ((ei)->bytelen_) | |
1316 #define eicharlen(ei) ((ei)->charlen_) | |
1317 | |
1318 | |
1319 /* ----- Working with positions ----- */ | |
1320 | |
1321 #define eicharpos_to_bytepos(ei, charpos) \ | |
1322 charcount_to_bytecount ((ei)->data_, charpos) | |
1323 #define eibytepos_to_charpos(ei, bytepos) \ | |
1324 bytecount_to_charcount ((ei)->data_, bytepos) | |
1325 | |
1326 DECLARE_INLINE_HEADER (Bytecount eiincpos_1 (Eistring *eistr, | |
1327 Bytecount bytepos, | |
1328 Charcount n)) | |
1329 { | |
1330 Intbyte *pos = eistr->data_ + bytepos; | |
1331 int i; | |
1332 | |
1333 charbpos_checking_assert (bytepos >= 0 && bytepos <= eistr->bytelen_); | |
1334 charbpos_checking_assert (n >= 0 && n <= eistr->charlen_); | |
1335 /* We could check N more correctly now, but that would require a | |
1336 call to bytecount_to_charcount(), which would be needlessly | |
1337 expensive (it would convert O(N) algorithms into O(N^2) algorithms | |
1338 with ERROR_CHECK_CHARBPOS, which would be bad). If N is bad, we are | |
1339 guaranteed to catch it either inside INC_CHARPTR() or in the check | |
1340 below. */ | |
1341 for (i = 0; i < n; i++) | |
1342 INC_CHARPTR (pos); | |
1343 charbpos_checking_assert (pos - eistr->data_ <= eistr->bytelen_); | |
1344 return pos - eistr->data_; | |
1345 } | |
1346 | |
1347 #define eiincpos (ei, bytepos) eiincpos_1 (ei, bytepos, 1) | |
1348 #define eiincpos_n (ei, bytepos, n) eiincpos_1 (ei, bytepos, n) | |
1349 | |
1350 DECLARE_INLINE_HEADER (Bytecount eidecpos_1 (Eistring *eistr, | |
1351 Bytecount bytepos, | |
1352 Charcount n)) | |
1353 { | |
1354 Intbyte *pos = eistr->data_ + bytepos; | |
1355 int i; | |
1356 | |
1357 charbpos_checking_assert (bytepos >= 0 && bytepos <= eistr->bytelen_); | |
1358 charbpos_checking_assert (n >= 0 && n <= eistr->charlen_); | |
1359 /* We could check N more correctly now, but ... see above. */ | |
1360 for (i = 0; i < n; i++) | |
1361 DEC_CHARPTR (pos); | |
1362 charbpos_checking_assert (pos - eistr->data_ <= eistr->bytelen_); | |
1363 return pos - eistr->data_; | |
1364 } | |
1365 | |
1366 #define eidecpos (ei, bytepos) eidecpos_1 (ei, bytepos, 1) | |
1367 #define eidecpos_n (ei, bytepos, n) eidecpos_1 (ei, bytepos, n) | |
1368 | |
1369 | |
1370 /* ----- Getting the character at a position ----- */ | |
1371 | |
1372 #define eigetch(ei, bytepos) \ | |
1373 charptr_emchar ((ei)->data_ + (bytepos)) | |
1374 #define eigetch_char(ei, charpos) charptr_emchar_n ((ei)->data_, charpos) | |
1375 | |
1376 | |
1377 /* ----- Setting the character at a position ----- */ | |
1378 | |
1379 #define eisetch(ei, bytepos, chr) \ | |
1380 eisub_ch (ei, bytepos, -1, -1, 1, chr) | |
1381 #define eisetch_char(ei, charpos, chr) \ | |
1382 eisub_ch (ei, -1, charpos, -1, 1, chr) | |
1383 | |
1384 | |
1385 /* ----- Concatenation ----- */ | |
1386 | |
1387 #define eicat_1(ei, data, bytelen, charlen) \ | |
1388 do { \ | |
1389 int ei14oldeibytelen = (ei)->bytelen_; \ | |
1390 int ei14bytelen = (bytelen); \ | |
1391 EI_ALLOC (ei, (ei)->bytelen_ + ei14bytelen, \ | |
1392 (ei)->charlen_ + (charlen), 1); \ | |
1393 memcpy ((ei)->data_ + ei14oldeibytelen, (data), \ | |
1394 ei14bytelen); \ | |
1395 } while (0) | |
1396 | |
1397 #define eicat_ei(ei, ei2) \ | |
1398 do { \ | |
1399 const Eistring *ei9 = (ei2); \ | |
1400 eicat_1 (ei, ei9->data_, ei9->bytelen_, ei9->charlen_); \ | |
1401 } while (0) | |
1402 | |
1403 #define eicat_c(ei, c_string) \ | |
1404 do { \ | |
1405 const Char_ASCII *ei15 = (c_string); \ | |
1406 int ei15len = strlen (ei15); \ | |
1407 \ | |
1408 EI_ASSERT_ASCII (ei15, ei15len); \ | |
1409 eicat_1 (ei, ei15, ei15len, \ | |
1410 bytecount_to_charcount ((Intbyte *) ei15, ei15len)); \ | |
1411 } while (0) | |
1412 | |
1413 #define eicat_raw(ei, data, len) \ | |
1414 do { \ | |
1415 int ei16len = (len); \ | |
1416 const Intbyte *ei16data = (data); \ | |
1417 eicat_1 (ei, ei16data, ei16len, \ | |
1418 bytecount_to_charcount (ei16data, ei16len)); \ | |
1419 } while (0) | |
1420 | |
1421 #define eicat_rawz(ei, ptr) \ | |
1422 do { \ | |
1423 const Intbyte *ei16p5ptr = (ptr); \ | |
1424 eicat_raw (ei, ei16p5ptr, qxestrlen (ei16p5ptr)); \ | |
1425 } while (0) | |
1426 | |
1427 #define eicat_lstr(ei, lisp_string) \ | |
1428 do { \ | |
1429 Lisp_Object ei17 = (lisp_string); \ | |
1430 eicat_1 (ei, XSTRING_DATA (ei17), XSTRING_LENGTH (ei17), \ | |
1431 XSTRING_CHAR_LENGTH (ei17)); \ | |
1432 } while (0) | |
1433 | |
1434 #define eicat_ch(ei, ch) \ | |
1435 do { \ | |
1436 Intbyte ei22ch[MAX_EMCHAR_LEN]; \ | |
1437 Bytecount ei22len = set_charptr_emchar (ei22ch, ch); \ | |
1438 eicat_1 (ei, ei22ch, ei22len, 1); \ | |
1439 } while (0) | |
1440 | |
1441 | |
1442 /* ----- Replacement ----- */ | |
1443 | |
1444 /* Replace the section of an Eistring at (OFF, LEN) with the data at | |
1445 SRC of length LEN. All positions have corresponding character values, | |
1446 and either can be -1 -- it will be computed from the other. */ | |
1447 | |
1448 #define eisub_1(ei, off, charoff, len, charlen, src, srclen, srccharlen) \ | |
1449 do { \ | |
1450 int ei18off = (off); \ | |
1451 int ei18charoff = (charoff); \ | |
1452 int ei18len = (len); \ | |
1453 int ei18charlen = (charlen); \ | |
1454 Intbyte *ei18src = (Intbyte *) (src); \ | |
1455 int ei18srclen = (srclen); \ | |
1456 int ei18srccharlen = (srccharlen); \ | |
1457 \ | |
1458 int ei18oldeibytelen = (ei)->bytelen_; \ | |
1459 \ | |
1460 eifixup_bytechar ((ei)->data_, ei18off, ei18charoff); \ | |
1461 eifixup_bytechar ((ei)->data_ + ei18off, ei18len, ei18charlen); \ | |
1462 eifixup_bytechar (ei18src, ei18srclen, ei18srccharlen); \ | |
1463 \ | |
1464 EI_ALLOC (ei, (ei)->bytelen_ + ei18srclen - ei18len, \ | |
1465 (ei)->charlen_ + ei18srccharlen - ei18charlen, 0); \ | |
1466 if (ei18len != ei18srclen) \ | |
1467 memmove ((ei)->data_ + ei18off + ei18srclen, \ | |
1468 (ei)->data_ + ei18off + ei18len, \ | |
1469 /* include zero terminator. */ \ | |
1470 ei18oldeibytelen - (ei18off + ei18len) + 1); \ | |
1471 if (ei18srclen > 0) \ | |
1472 memcpy ((ei)->data_ + ei18off, ei18src, ei18srclen); \ | |
1473 } while (0) | |
1474 | |
1475 #define eisub_ei(ei, off, charoff, len, charlen, ei2) \ | |
1476 do { \ | |
1477 const Eistring *ei19 = (ei2); \ | |
1478 eisub_1 (ei, off, charoff, len, charlen, ei19->data_, ei19->bytelen_, \ | |
1479 ei19->charlen_); \ | |
1480 } while (0) | |
1481 | |
1482 #define eisub_c(ei, off, charoff, len, charlen, c_string) \ | |
1483 do { \ | |
1484 const Char_ASCII *ei20 = (c_string); \ | |
1485 int ei20len = strlen (ei20); \ | |
1486 EI_ASSERT_ASCII (ei20, ei20len); \ | |
1487 eisub_1 (ei, off, charoff, len, charlen, ei20, ei20len, -1); \ | |
1488 } while (0) | |
1489 | |
1490 #define eisub_ch(ei, off, charoff, len, charlen, ch) \ | |
1491 do { \ | |
1492 Intbyte ei21ch[MAX_EMCHAR_LEN]; \ | |
1493 Bytecount ei21len = set_charptr_emchar (ei21ch, ch); \ | |
1494 eisub_1 (ei, off, charoff, len, charlen, ei21ch, ei21len, 1); \ | |
1495 } while (0) | |
1496 | |
1497 #define eidel(ei, off, charoff, len, charlen) \ | |
1498 eisub_1(ei, off, charoff, len, charlen, NULL, 0, 0) | |
1499 | |
1500 | |
1501 /* ----- Converting to an external format ----- */ | |
1502 | |
1503 #define eito_external(ei, coding_system) \ | |
1504 do { \ | |
1505 if ((ei)->mallocp_) \ | |
1506 { \ | |
1507 if ((ei)->extdata_) \ | |
1508 { \ | |
1509 xfree ((ei)->extdata_); \ | |
1510 (ei)->extdata_ = 0; \ | |
1511 } \ | |
1512 TO_EXTERNAL_FORMAT (DATA, ((ei)->data_, (ei)->bytelen_), \ | |
1513 MALLOC, ((ei)->extdata_, (ei)->extlen_), \ | |
1514 coding_system); \ | |
1515 } \ | |
1516 else \ | |
1517 TO_EXTERNAL_FORMAT (DATA, ((ei)->data_, (ei)->bytelen_), \ | |
1518 ALLOCA, ((ei)->extdata_, (ei)->extlen_), \ | |
1519 coding_system); \ | |
1520 } while (0) | |
1521 | |
1522 #define eiextdata(ei) ((ei)->extdata_) | |
1523 #define eiextlen(ei) ((ei)->extlen_) | |
1524 | |
1525 | |
1526 /* ----- Searching in the Eistring for a character ----- */ | |
1527 | |
1528 #define eichr(eistr, chr) \ | |
1529 NOT YET IMPLEMENTED | |
1530 #define eichr_char(eistr, chr) \ | |
1531 NOT YET IMPLEMENTED | |
1532 #define eichr_off(eistr, chr, off, charoff) \ | |
1533 NOT YET IMPLEMENTED | |
1534 #define eichr_off_char(eistr, chr, off, charoff) \ | |
1535 NOT YET IMPLEMENTED | |
1536 #define eirchr(eistr, chr) \ | |
1537 NOT YET IMPLEMENTED | |
1538 #define eirchr_char(eistr, chr) \ | |
1539 NOT YET IMPLEMENTED | |
1540 #define eirchr_off(eistr, chr, off, charoff) \ | |
1541 NOT YET IMPLEMENTED | |
1542 #define eirchr_off_char(eistr, chr, off, charoff) \ | |
1543 NOT YET IMPLEMENTED | |
1544 | |
1545 | |
1546 /* ----- Searching in the Eistring for a string ----- */ | |
1547 | |
1548 #define eistr_ei(eistr, eistr2) \ | |
1549 NOT YET IMPLEMENTED | |
1550 #define eistr_ei_char(eistr, eistr2) \ | |
1551 NOT YET IMPLEMENTED | |
1552 #define eistr_ei_off(eistr, eistr2, off, charoff) \ | |
1553 NOT YET IMPLEMENTED | |
1554 #define eistr_ei_off_char(eistr, eistr2, off, charoff) \ | |
1555 NOT YET IMPLEMENTED | |
1556 #define eirstr_ei(eistr, eistr2) \ | |
1557 NOT YET IMPLEMENTED | |
1558 #define eirstr_ei_char(eistr, eistr2) \ | |
1559 NOT YET IMPLEMENTED | |
1560 #define eirstr_ei_off(eistr, eistr2, off, charoff) \ | |
1561 NOT YET IMPLEMENTED | |
1562 #define eirstr_ei_off_char(eistr, eistr2, off, charoff) \ | |
1563 NOT YET IMPLEMENTED | |
1564 | |
1565 #define eistr_c(eistr, c_string) \ | |
1566 NOT YET IMPLEMENTED | |
1567 #define eistr_c_char(eistr, c_string) \ | |
1568 NOT YET IMPLEMENTED | |
1569 #define eistr_c_off(eistr, c_string, off, charoff) \ | |
1570 NOT YET IMPLEMENTED | |
1571 #define eistr_c_off_char(eistr, c_string, off, charoff) \ | |
1572 NOT YET IMPLEMENTED | |
1573 #define eirstr_c(eistr, c_string) \ | |
1574 NOT YET IMPLEMENTED | |
1575 #define eirstr_c_char(eistr, c_string) \ | |
1576 NOT YET IMPLEMENTED | |
1577 #define eirstr_c_off(eistr, c_string, off, charoff) \ | |
1578 NOT YET IMPLEMENTED | |
1579 #define eirstr_c_off_char(eistr, c_string, off, charoff) \ | |
1580 NOT YET IMPLEMENTED | |
1581 | |
1582 | |
1583 /* ----- Comparison ----- */ | |
1584 | |
1585 int eicmp_1 (Eistring *ei, Bytecount off, Charcount charoff, | |
1586 Bytecount len, Charcount charlen, const Intbyte *data, | |
1587 const Eistring *ei2, int is_c, int fold_case); | |
1588 | |
1589 #define eicmp_ei(eistr, eistr2) \ | |
1590 eicmp_1 (eistr, 0, -1, -1, -1, 0, eistr2, 0, 0) | |
1591 #define eicmp_off_ei(eistr, off, charoff, len, charlen, eistr2) \ | |
1592 eicmp_1 (eistr, off, charoff, len, charlen, 0, eistr2, 0, 0) | |
1593 #define eicasecmp_ei(eistr, eistr2) \ | |
1594 eicmp_1 (eistr, 0, -1, -1, -1, 0, eistr2, 0, 1) | |
1595 #define eicasecmp_off_ei(eistr, off, charoff, len, charlen, eistr2) \ | |
1596 eicmp_1 (eistr, off, charoff, len, charlen, 0, eistr2, 0, 1) | |
1597 #define eicasecmp_i18n_ei(eistr, eistr2) \ | |
1598 eicmp_1 (eistr, 0, -1, -1, -1, 0, eistr2, 0, 2) | |
1599 #define eicasecmp_i18n_off_ei(eistr, off, charoff, len, charlen, eistr2) \ | |
1600 eicmp_1 (eistr, off, charoff, len, charlen, 0, eistr2, 0, 2) | |
1601 | |
1602 #define eicmp_c(eistr, c_string) \ | |
1603 eicmp_1 (eistr, 0, -1, -1, -1, c_string, 0, 1, 0) | |
1604 #define eicmp_off_c(eistr, off, charoff, len, charlen, c_string) \ | |
1605 eicmp_1 (eistr, off, charoff, len, charlen, c_string, 0, 1, 0) | |
1606 #define eicasecmp_c(eistr, c_string) \ | |
1607 eicmp_1 (eistr, 0, -1, -1, -1, c_string, 0, 1, 1) | |
1608 #define eicasecmp_off_c(eistr, off, charoff, len, charlen, c_string) \ | |
1609 eicmp_1 (eistr, off, charoff, len, charlen, c_string, 0, 1, 1) | |
1610 #define eicasecmp_i18n_c(eistr, c_string) \ | |
1611 eicmp_1 (eistr, 0, -1, -1, -1, c_string, 0, 1, 2) | |
1612 #define eicasecmp_i18n_off_c(eistr, off, charoff, len, charlen, c_string) \ | |
1613 eicmp_1 (eistr, off, charoff, len, charlen, c_string, 0, 1, 2) | |
1614 | |
1615 | |
1616 /* ----- Case-changing the Eistring ----- */ | |
1617 | |
1618 int eistr_casefiddle_1 (Intbyte *olddata, Bytecount len, Intbyte *newdata, | |
1619 int downp); | |
1620 | |
1621 #define EI_CASECHANGE(ei, downp) \ | |
1622 do { \ | |
1623 int ei11new_allocmax = (ei)->charlen_ * MAX_EMCHAR_LEN + 1; \ | |
1624 Intbyte *ei11storage = (Intbyte *) alloca_array (Intbyte, \ | |
1625 ei11new_allocmax); \ | |
1626 int ei11newlen = eistr_casefiddle_1 ((ei)->data_, (ei)->bytelen_, \ | |
1627 ei11storage, downp); \ | |
1628 \ | |
1629 if (ei11newlen) \ | |
1630 { \ | |
1631 (ei)->max_size_allocated_ = ei11new_allocmax; \ | |
1632 (ei)->data_ = ei11storage; \ | |
1633 (ei)->bytelen_ = ei11newlen; \ | |
1634 /* charlen is the same. */ \ | |
1635 } \ | |
1636 } while (0) | |
1637 | |
1638 #define eilwr(ei) EI_CASECHANGE (ei, 1) | |
1639 #define eiupr(ei) EI_CASECHANGE (ei, 0) | |
1640 | |
1641 | |
1642 /************************************************************************/ | |
1643 /* */ | |
1644 /* Converting between internal and external format */ | |
1645 /* */ | |
1646 /************************************************************************/ | |
1647 /* | |
1648 All client code should use only the two macros | |
1649 | |
1650 TO_EXTERNAL_FORMAT (source_type, source, sink_type, sink, coding_system) | |
1651 TO_INTERNAL_FORMAT (source_type, source, sink_type, sink, coding_system) | |
1652 | |
1653 Typical use is | |
1654 | |
1655 TO_EXTERNAL_FORMAT (DATA, (ptr, len), | |
1656 LISP_BUFFER, buffer, | |
1657 Qfile_name); | |
1658 | |
1659 NOTE: GC is inhibited during the entire operation of these macros. This | |
1660 is because frequently the data to be converted comes from strings but | |
1661 gets passed in as just DATA, and GC may move around the string data. If | |
1662 we didn't inhibit GC, there'd have to be a lot of messy recoding, | |
1663 alloca-copying of strings and other annoying stuff. | |
1664 | |
1665 The source or sink can be specified in one of these ways: | |
1666 | |
1667 DATA, (ptr, len), // input data is a fixed buffer of size len | |
1668 ALLOCA, (ptr, len), // output data is in a alloca()ed buffer of size len | |
1669 MALLOC, (ptr, len), // output data is in a malloc()ed buffer of size len | |
1670 C_STRING_ALLOCA, ptr, // equivalent to ALLOCA (ptr, len_ignored) on output | |
1671 C_STRING_MALLOC, ptr, // equivalent to MALLOC (ptr, len_ignored) on output | |
1672 C_STRING, ptr, // equivalent to DATA, (ptr, strlen/wcslen (ptr)) | |
1673 // on input (the Unicode version is used when correct) | |
1674 LISP_STRING, string, // input or output is a Lisp_Object of type string | |
1675 LISP_BUFFER, buffer, // output is written to (point) in lisp buffer | |
1676 LISP_LSTREAM, lstream, // input or output is a Lisp_Object of type lstream | |
1677 LISP_OPAQUE, object, // input or output is a Lisp_Object of type opaque | |
1678 | |
1679 When specifying the sink, use lvalues, since the macro will assign to them, | |
1680 except when the sink is an lstream or a lisp buffer. | |
1681 | |
1682 The macros accept the kinds of sources and sinks appropriate for | |
1683 internal and external data representation. See the type_checking_assert | |
1684 macros below for the actual allowed types. | |
1685 | |
1686 Since some sources and sinks use one argument (a Lisp_Object) to | |
1687 specify them, while others take a (pointer, length) pair, we use | |
1688 some C preprocessor trickery to allow pair arguments to be specified | |
1689 by parenthesizing them, as in the examples above. | |
1690 | |
1691 Anything prefixed by dfc_ (`data format conversion') is private. | |
1692 They are only used to implement these macros. | |
1693 | |
1694 [[Using C_STRING* is appropriate for using with external APIs that | |
1695 take null-terminated strings. For internal data, we should try to | |
1696 be '\0'-clean - i.e. allow arbitrary data to contain embedded '\0'. | |
1697 | |
1698 Sometime in the future we might allow output to C_STRING_ALLOCA or | |
1699 C_STRING_MALLOC _only_ with TO_EXTERNAL_FORMAT(), not | |
1700 TO_INTERNAL_FORMAT().]] | |
1701 | |
1702 The above comments are not true. Frequently (most of the time, in | |
1703 fact), external strings come as zero-terminated entities, where the | |
1704 zero-termination is the only way to find out the length. Even in | |
1705 cases where you can get the length, most of the time the system will | |
1706 still use the null to signal the end of the string, and there will | |
1707 still be no way to either send in or receive a string with embedded | |
1708 nulls. In such situations, it's pointless to track the length | |
1709 because null bytes can never be in the string. We have a lot of | |
1710 operations that make it easy to operate on zero-terminated strings, | |
1711 and forcing the user the deal with the length everywhere would only | |
1712 make the code uglier and more complicated, for no gain. --ben | |
1713 | |
1714 There is no problem using the same lvalue for source and sink. | |
1715 | |
1716 Also, when pointers are required, the code (currently at least) is | |
1717 lax and allows any pointer types, either in the source or the sink. | |
1718 This makes it possible, e.g., to deal with internal format data held | |
1719 in char *'s or external format data held in WCHAR * (i.e. Unicode). | |
1720 | |
1721 Finally, whenever storage allocation is called for, extra space is | |
1722 allocated for a terminating zero, and such a zero is stored in the | |
1723 appropriate place, regardless of whether the source data was | |
1724 specified using a length or was specified as zero-terminated. This | |
1725 allows you to freely pass the resulting data, no matter how | |
1726 obtained, to a routine that expects zero termination (modulo, of | |
1727 course, that any embedded zeros in the resulting text will cause | |
1728 truncation). In fact, currently two embedded zeros are allocated | |
1729 and stored after the data result. This is to allow for the | |
1730 possibility of storing a Unicode value on output, which needs the | |
1731 two zeros. Currently, however, the two zeros are stored regardless | |
1732 of whether the conversion is internal or external and regardless of | |
1733 whether the external coding system is in fact Unicode. This | |
1734 behavior may change in the future, and you cannot rely on this -- | |
1735 the most you can rely on is that sink data in Unicode format will | |
1736 have two terminating nulls, which combine to form one Unicode null | |
1737 character. */ | |
1738 | |
1739 #define TO_EXTERNAL_FORMAT(source_type, source, sink_type, sink, codesys) \ | |
1740 do { \ | |
1741 dfc_conversion_type dfc_simplified_source_type; \ | |
1742 dfc_conversion_type dfc_simplified_sink_type; \ | |
1743 dfc_conversion_data dfc_source; \ | |
1744 dfc_conversion_data dfc_sink; \ | |
1745 Lisp_Object dfc_codesys = (codesys); \ | |
1746 \ | |
1747 type_checking_assert \ | |
1748 ((DFC_TYPE_##source_type == DFC_TYPE_DATA || \ | |
1749 DFC_TYPE_##source_type == DFC_TYPE_C_STRING || \ | |
1750 DFC_TYPE_##source_type == DFC_TYPE_LISP_STRING || \ | |
1751 DFC_TYPE_##source_type == DFC_TYPE_LISP_OPAQUE || \ | |
1752 DFC_TYPE_##source_type == DFC_TYPE_LISP_LSTREAM) \ | |
1753 && \ | |
1754 (DFC_TYPE_##sink_type == DFC_TYPE_ALLOCA || \ | |
1755 DFC_TYPE_##sink_type == DFC_TYPE_MALLOC || \ | |
1756 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_ALLOCA || \ | |
1757 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_MALLOC || \ | |
1758 DFC_TYPE_##sink_type == DFC_TYPE_LISP_LSTREAM || \ | |
1759 DFC_TYPE_##sink_type == DFC_TYPE_LISP_OPAQUE)); \ | |
1760 \ | |
1761 DFC_EXT_SOURCE_##source_type##_TO_ARGS (source, dfc_codesys); \ | |
1762 DFC_SINK_##sink_type##_TO_ARGS (sink); \ | |
1763 \ | |
1764 dfc_convert_to_external_format (dfc_simplified_source_type, &dfc_source, \ | |
1765 dfc_codesys, \ | |
1766 dfc_simplified_sink_type, &dfc_sink); \ | |
1767 \ | |
1768 DFC_##sink_type##_USE_CONVERTED_DATA (sink); \ | |
1769 } while (0) | |
1770 | |
1771 #define TO_INTERNAL_FORMAT(source_type, source, sink_type, sink, codesys) \ | |
1772 do { \ | |
1773 dfc_conversion_type dfc_simplified_source_type; \ | |
1774 dfc_conversion_type dfc_simplified_sink_type; \ | |
1775 dfc_conversion_data dfc_source; \ | |
1776 dfc_conversion_data dfc_sink; \ | |
1777 Lisp_Object dfc_codesys = (codesys); \ | |
1778 \ | |
1779 type_checking_assert \ | |
1780 ((DFC_TYPE_##source_type == DFC_TYPE_DATA || \ | |
1781 DFC_TYPE_##source_type == DFC_TYPE_C_STRING || \ | |
1782 DFC_TYPE_##source_type == DFC_TYPE_LISP_OPAQUE || \ | |
1783 DFC_TYPE_##source_type == DFC_TYPE_LISP_LSTREAM) \ | |
1784 && \ | |
1785 (DFC_TYPE_##sink_type == DFC_TYPE_ALLOCA || \ | |
1786 DFC_TYPE_##sink_type == DFC_TYPE_MALLOC || \ | |
1787 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_ALLOCA || \ | |
1788 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_MALLOC || \ | |
1789 DFC_TYPE_##sink_type == DFC_TYPE_LISP_STRING || \ | |
1790 DFC_TYPE_##sink_type == DFC_TYPE_LISP_LSTREAM || \ | |
1791 DFC_TYPE_##sink_type == DFC_TYPE_LISP_BUFFER)); \ | |
1792 \ | |
1793 DFC_INT_SOURCE_##source_type##_TO_ARGS (source, dfc_codesys); \ | |
1794 DFC_SINK_##sink_type##_TO_ARGS (sink); \ | |
1795 \ | |
1796 dfc_convert_to_internal_format (dfc_simplified_source_type, &dfc_source, \ | |
1797 dfc_codesys, \ | |
1798 dfc_simplified_sink_type, &dfc_sink); \ | |
1799 \ | |
1800 DFC_##sink_type##_USE_CONVERTED_DATA (sink); \ | |
1801 } while (0) | |
1802 | |
1803 | |
1804 typedef union | |
1805 { | |
1806 struct { const void *ptr; Bytecount len; } data; | |
1807 Lisp_Object lisp_object; | |
1808 } dfc_conversion_data; | |
1809 | |
1810 enum dfc_conversion_type | |
1811 { | |
1812 DFC_TYPE_DATA, | |
1813 DFC_TYPE_ALLOCA, | |
1814 DFC_TYPE_MALLOC, | |
1815 DFC_TYPE_C_STRING, | |
1816 DFC_TYPE_C_STRING_ALLOCA, | |
1817 DFC_TYPE_C_STRING_MALLOC, | |
1818 DFC_TYPE_LISP_STRING, | |
1819 DFC_TYPE_LISP_LSTREAM, | |
1820 DFC_TYPE_LISP_OPAQUE, | |
1821 DFC_TYPE_LISP_BUFFER | |
1822 }; | |
1823 typedef enum dfc_conversion_type dfc_conversion_type; | |
1824 | |
1825 /* WARNING: These use a static buffer. This can lead to disaster if | |
1826 these functions are not used *very* carefully. Another reason to only use | |
1827 TO_EXTERNAL_FORMAT() and TO_INTERNAL_FORMAT(). */ | |
1828 void | |
1829 dfc_convert_to_external_format (dfc_conversion_type source_type, | |
1830 dfc_conversion_data *source, | |
1831 Lisp_Object coding_system, | |
1832 dfc_conversion_type sink_type, | |
1833 dfc_conversion_data *sink); | |
1834 void | |
1835 dfc_convert_to_internal_format (dfc_conversion_type source_type, | |
1836 dfc_conversion_data *source, | |
1837 Lisp_Object coding_system, | |
1838 dfc_conversion_type sink_type, | |
1839 dfc_conversion_data *sink); | |
1840 /* CPP Trickery */ | |
1841 #define DFC_CPP_CAR(x,y) (x) | |
1842 #define DFC_CPP_CDR(x,y) (y) | |
1843 | |
1844 /* Convert `source' to args for dfc_convert_to_external_format() */ | |
1845 #define DFC_EXT_SOURCE_DATA_TO_ARGS(val, codesys) do { \ | |
1846 dfc_source.data.ptr = DFC_CPP_CAR val; \ | |
1847 dfc_source.data.len = DFC_CPP_CDR val; \ | |
1848 dfc_simplified_source_type = DFC_TYPE_DATA; \ | |
1849 } while (0) | |
1850 #define DFC_EXT_SOURCE_C_STRING_TO_ARGS(val, codesys) do { \ | |
1851 dfc_source.data.len = \ | |
1852 strlen ((char *) (dfc_source.data.ptr = (val))); \ | |
1853 dfc_simplified_source_type = DFC_TYPE_DATA; \ | |
1854 } while (0) | |
1855 #define DFC_EXT_SOURCE_LISP_STRING_TO_ARGS(val, codesys) do { \ | |
1856 Lisp_Object dfc_slsta = (val); \ | |
1857 type_checking_assert (STRINGP (dfc_slsta)); \ | |
1858 dfc_source.lisp_object = dfc_slsta; \ | |
1859 dfc_simplified_source_type = DFC_TYPE_LISP_STRING; \ | |
1860 } while (0) | |
1861 #define DFC_EXT_SOURCE_LISP_LSTREAM_TO_ARGS(val, codesys) do { \ | |
1862 Lisp_Object dfc_sllta = (val); \ | |
1863 type_checking_assert (LSTREAMP (dfc_sllta)); \ | |
1864 dfc_source.lisp_object = dfc_sllta; \ | |
1865 dfc_simplified_source_type = DFC_TYPE_LISP_LSTREAM; \ | |
1866 } while (0) | |
1867 #define DFC_EXT_SOURCE_LISP_OPAQUE_TO_ARGS(val, codesys) do { \ | |
1868 Lisp_Opaque *dfc_slota = XOPAQUE (val); \ | |
1869 dfc_source.data.ptr = OPAQUE_DATA (dfc_slota); \ | |
1870 dfc_source.data.len = OPAQUE_SIZE (dfc_slota); \ | |
1871 dfc_simplified_source_type = DFC_TYPE_DATA; \ | |
1872 } while (0) | |
1873 | |
1874 /* Convert `source' to args for dfc_convert_to_internal_format() */ | |
1875 #define DFC_INT_SOURCE_DATA_TO_ARGS(val, codesys) \ | |
1876 DFC_EXT_SOURCE_DATA_TO_ARGS (val, codesys) | |
1877 #define DFC_INT_SOURCE_C_STRING_TO_ARGS(val, codesys) do { \ | |
1878 dfc_source.data.len = dfc_external_data_len (dfc_source.data.ptr = (val), \ | |
1879 codesys); \ | |
1880 dfc_simplified_source_type = DFC_TYPE_DATA; \ | |
1881 } while (0) | |
1882 #define DFC_INT_SOURCE_LISP_STRING_TO_ARGS(val, codesys) \ | |
1883 DFC_EXT_SOURCE_LISP_STRING_TO_ARGS (val, codesys) | |
1884 #define DFC_INT_SOURCE_LISP_LSTREAM_TO_ARGS(val, codesys) \ | |
1885 DFC_EXT_SOURCE_LISP_LSTREAM_TO_ARGS (val, codesys) | |
1886 #define DFC_INT_SOURCE_LISP_OPAQUE_TO_ARGS(val, codesys) \ | |
1887 DFC_EXT_SOURCE_LISP_OPAQUE_TO_ARGS (val, codesys) | |
1888 | |
1889 /* Convert `sink' to args for dfc_convert_to_*_format() */ | |
1890 #define DFC_SINK_ALLOCA_TO_ARGS(val) \ | |
1891 dfc_simplified_sink_type = DFC_TYPE_DATA | |
1892 #define DFC_SINK_C_STRING_ALLOCA_TO_ARGS(val) \ | |
1893 dfc_simplified_sink_type = DFC_TYPE_DATA | |
1894 #define DFC_SINK_MALLOC_TO_ARGS(val) \ | |
1895 dfc_simplified_sink_type = DFC_TYPE_DATA | |
1896 #define DFC_SINK_C_STRING_MALLOC_TO_ARGS(val) \ | |
1897 dfc_simplified_sink_type = DFC_TYPE_DATA | |
1898 #define DFC_SINK_LISP_STRING_TO_ARGS(val) \ | |
1899 dfc_simplified_sink_type = DFC_TYPE_DATA | |
1900 #define DFC_SINK_LISP_OPAQUE_TO_ARGS(val) \ | |
1901 dfc_simplified_sink_type = DFC_TYPE_DATA | |
1902 #define DFC_SINK_LISP_LSTREAM_TO_ARGS(val) do { \ | |
1903 Lisp_Object dfc_sllta = (val); \ | |
1904 type_checking_assert (LSTREAMP (dfc_sllta)); \ | |
1905 dfc_sink.lisp_object = dfc_sllta; \ | |
1906 dfc_simplified_sink_type = DFC_TYPE_LISP_LSTREAM; \ | |
1907 } while (0) | |
1908 #define DFC_SINK_LISP_BUFFER_TO_ARGS(val) do { \ | |
1909 struct buffer *dfc_slbta = XBUFFER (val); \ | |
1910 dfc_sink.lisp_object = \ | |
1911 make_lisp_buffer_output_stream \ | |
1912 (dfc_slbta, BUF_PT (dfc_slbta), 0); \ | |
1913 dfc_simplified_sink_type = DFC_TYPE_LISP_LSTREAM; \ | |
1914 } while (0) | |
1915 | |
1916 /* Assign to the `sink' lvalue(s) using the converted data. */ | |
1917 /* + 2 because we double zero-extended to account for Unicode conversion */ | |
1918 typedef union { char c; void *p; } *dfc_aliasing_voidpp; | |
1919 #define DFC_ALLOCA_USE_CONVERTED_DATA(sink) do { \ | |
1920 void * dfc_sink_ret = alloca (dfc_sink.data.len + 2); \ | |
1921 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2); \ | |
1922 ((dfc_aliasing_voidpp) &(DFC_CPP_CAR sink))->p = dfc_sink_ret; \ | |
1923 (DFC_CPP_CDR sink) = dfc_sink.data.len; \ | |
1924 } while (0) | |
1925 #define DFC_MALLOC_USE_CONVERTED_DATA(sink) do { \ | |
1926 void * dfc_sink_ret = xmalloc (dfc_sink.data.len + 2); \ | |
1927 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2); \ | |
1928 ((dfc_aliasing_voidpp) &(DFC_CPP_CAR sink))->p = dfc_sink_ret; \ | |
1929 (DFC_CPP_CDR sink) = dfc_sink.data.len; \ | |
1930 } while (0) | |
1931 #define DFC_C_STRING_ALLOCA_USE_CONVERTED_DATA(sink) do { \ | |
1932 void * dfc_sink_ret = alloca (dfc_sink.data.len + 2); \ | |
1933 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2); \ | |
1934 ((dfc_aliasing_voidpp) &(sink))->p = dfc_sink_ret; \ | |
1935 } while (0) | |
1936 #define DFC_C_STRING_MALLOC_USE_CONVERTED_DATA(sink) do { \ | |
1937 void * dfc_sink_ret = xmalloc (dfc_sink.data.len + 2); \ | |
1938 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2); \ | |
1939 ((dfc_aliasing_voidpp) &(sink))->p = dfc_sink_ret; \ | |
1940 } while (0) | |
1941 #define DFC_LISP_STRING_USE_CONVERTED_DATA(sink) \ | |
1942 sink = make_string ((Intbyte *) dfc_sink.data.ptr, dfc_sink.data.len) | |
1943 #define DFC_LISP_OPAQUE_USE_CONVERTED_DATA(sink) \ | |
1944 sink = make_opaque (dfc_sink.data.ptr, dfc_sink.data.len) | |
1945 #define DFC_LISP_LSTREAM_USE_CONVERTED_DATA(sink) /* data already used */ | |
1946 #define DFC_LISP_BUFFER_USE_CONVERTED_DATA(sink) \ | |
1947 Lstream_delete (XLSTREAM (dfc_sink.lisp_object)) | |
1948 | |
1949 /* Convenience macros for extremely common invocations */ | |
1950 #define C_STRING_TO_EXTERNAL(in, out, coding_system) \ | |
1951 TO_EXTERNAL_FORMAT (C_STRING, in, C_STRING_ALLOCA, out, coding_system) | |
1952 #define C_STRING_TO_EXTERNAL_MALLOC(in, out, coding_system) \ | |
1953 TO_EXTERNAL_FORMAT (C_STRING, in, C_STRING_MALLOC, out, coding_system) | |
1954 #define EXTERNAL_TO_C_STRING(in, out, coding_system) \ | |
1955 TO_INTERNAL_FORMAT (C_STRING, in, C_STRING_ALLOCA, out, coding_system) | |
1956 #define EXTERNAL_TO_C_STRING_MALLOC(in, out, coding_system) \ | |
1957 TO_INTERNAL_FORMAT (C_STRING, in, C_STRING_MALLOC, out, coding_system) | |
1958 #define LISP_STRING_TO_EXTERNAL(in, out, coding_system) \ | |
1959 TO_EXTERNAL_FORMAT (LISP_STRING, in, C_STRING_ALLOCA, out, coding_system) | |
1960 #define LISP_STRING_TO_EXTERNAL_MALLOC(in, out, coding_system) \ | |
1961 TO_EXTERNAL_FORMAT (LISP_STRING, in, C_STRING_MALLOC, out, coding_system) | |
1962 | |
1963 /* Standins for various encodings, until we know them better */ | |
1964 #define Qcommand_argument_encoding Qnative | |
1965 #define Qenvironment_variable_encoding Qnative | |
1966 #define Qunix_host_name_encoding Qnative | |
1967 #define Qunix_service_name_encoding Qnative | |
1968 #define Qmswindows_host_name_encoding Qmswindows_multibyte | |
1969 #define Qmswindows_service_name_encoding Qmswindows_multibyte | |
1970 | |
1971 /* Standins for various X encodings, until we know them better */ | |
1972 | |
1973 /* !!#### Need to verify the encoding used in lwlib -- Qnative or Qctext? | |
1974 Almost certainly the former. Use a standin for now. */ | |
1975 #define Qlwlib_encoding Qnative | |
1976 | |
1977 #define Qx_atom_name_encoding Qctext | |
1978 /* font names are often stored in atoms, so it gets sticky if we set this | |
1979 to something different from atom-name encoding */ | |
1980 #define Qx_font_name_encoding Qctext | |
1981 | |
1982 #define Qx_color_name_encoding Qctext | |
1983 | |
1984 /* the following probably must agree with Qcommand_argument_encoding and | |
1985 Qenvironment_variable_encoding */ | |
1986 #define Qx_display_name_encoding Qnative | |
1987 | |
1988 #define Qstrerror_encoding Qnative | |
1989 | |
1990 #define GET_STRERROR(var, num) \ | |
1991 do { \ | |
1992 int __gsnum__ = (num); \ | |
1993 Extbyte * __gserr__ = strerror (__gsnum__); \ | |
1994 \ | |
1995 if (!__gserr__) \ | |
1996 { \ | |
1997 var = alloca_intbytes (99); \ | |
1998 qxesprintf (var, "Unknown error %d", __gsnum__); \ | |
1999 } \ | |
2000 else \ | |
2001 EXTERNAL_TO_C_STRING (__gserr__, var, Qstrerror_encoding); \ | |
2002 } while (0) | |
2003 | |
2004 /************************************************************************/ | |
2005 /* Lisp string representation convenience functions */ | |
2006 /************************************************************************/ | |
2007 | |
2008 /* Because the representation of internally formatted data is subject | |
2009 to change, it's bad style to do something like | |
2010 | |
2011 strcmp (XSTRING_DATA (s), "foo") | |
2012 | |
2013 Instead, use the portable: | |
2014 | |
2015 intbyte_strcmp (XSTRING_DATA (s), "foo") or | |
2016 intbyte_memcmp (XSTRING_DATA (s), "foo", 3) | |
2017 | |
2018 */ | |
2019 | |
2020 /* Like strcmp, except first arg points at internally formatted data, | |
2021 while the second points at a string of only ASCII chars. */ | |
2022 DECLARE_INLINE_HEADER ( | |
2023 int | |
2024 intbyte_strcmp (const Intbyte *bp, const char *ascii_string) | |
2025 ) | |
2026 { | |
2027 #ifdef MULE | |
2028 while (1) | |
2029 { | |
2030 int diff; | |
2031 type_checking_assert (BYTE_ASCII_P (*ascii_string)); | |
2032 if ((diff = charptr_emchar (bp) - *(Intbyte *) ascii_string) != 0) | |
2033 return diff; | |
2034 if (*ascii_string == '\0') | |
2035 return 0; | |
2036 ascii_string++; | |
2037 INC_CHARPTR (bp); | |
2038 } | |
2039 #else | |
2040 return strcmp ((char *)bp, ascii_string); | |
2041 #endif | |
2042 } | |
2043 | |
2044 /* Like memcmp, except first arg points at internally formatted data, | |
2045 while the second points at a string of only ASCII chars. */ | |
2046 | |
2047 DECLARE_INLINE_HEADER ( | |
2048 int | |
2049 intbyte_memcmp (const Intbyte *bp, const char *ascii_string, Bytecount len) | |
2050 ) | |
2051 { | |
2052 #ifdef MULE | |
2053 while (len--) | |
2054 { | |
2055 int diff = charptr_emchar (bp) - *(Intbyte *) ascii_string; | |
2056 type_checking_assert (BYTE_ASCII_P (*ascii_string)); | |
2057 if (diff != 0) | |
2058 return diff; | |
2059 ascii_string++; | |
2060 INC_CHARPTR (bp); | |
2061 } | |
2062 return 0; | |
2063 #else | |
2064 return memcmp (bp, ascii_string, len); | |
2065 #endif | |
2066 } | |
2067 | |
2068 #endif /* INCLUDED_text_h_ */ |