771
+ − 1 /* Header file for text manipulation primitives and macros.
+ − 2 Copyright (C) 1985-1995 Free Software Foundation, Inc.
+ − 3 Copyright (C) 1995 Sun Microsystems, Inc.
788
+ − 4 Copyright (C) 2000, 2001, 2002 Ben Wing.
771
+ − 5
+ − 6 This file is part of XEmacs.
+ − 7
+ − 8 XEmacs is free software; you can redistribute it and/or modify it
+ − 9 under the terms of the GNU General Public License as published by the
+ − 10 Free Software Foundation; either version 2, or (at your option) any
+ − 11 later version.
+ − 12
+ − 13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
+ − 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ − 15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ − 16 for more details.
+ − 17
+ − 18 You should have received a copy of the GNU General Public License
+ − 19 along with XEmacs; see the file COPYING. If not, write to
+ − 20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ − 21 Boston, MA 02111-1307, USA. */
+ − 22
+ − 23 /* Synched up with: FSF 19.30. */
+ − 24
+ − 25 /* Authorship:
+ − 26
+ − 27 Mostly written by Ben Wing, starting around 1995.
+ − 28 Current TO_IN/EXTERNAL_FORMAT macros written by Martin Buchholz,
+ − 29 designed by Ben Wing based on earlier macros by Ben Wing.
+ − 30 Separated out June 18, 2000 from buffer.h into text.h.
+ − 31 */
+ − 32
+ − 33 #ifndef INCLUDED_text_h_
+ − 34 #define INCLUDED_text_h_
+ − 35
+ − 36 #include <wchar.h>
+ − 37
+ − 38 /* ---------------------------------------------------------------------- */
+ − 39 /* Super-basic character properties */
+ − 40 /* ---------------------------------------------------------------------- */
+ − 41
+ − 42 /* These properties define the specifics of how our current encoding fits
+ − 43 in the basic model used for the encoding. Because this model is the same
+ − 44 as is used for UTF-8, all these properties could be defined for it, too.
+ − 45 This would instantly make the rest of this file work with UTF-8 (with
+ − 46 the exception of a few called functions that would need to be redefined).
+ − 47
+ − 48 (UTF-2000 implementers, take note!)
+ − 49 */
+ − 50
+ − 51 /* If you want more than this, you need to include charset.h */
+ − 52
+ − 53 #ifndef MULE
+ − 54
+ − 55 #define REP_BYTES_BY_FIRST_BYTE(fb) 1
+ − 56 #define BYTE_ASCII_P(byte) 1
+ − 57 # define MAX_EMCHAR_LEN 1
+ − 58
+ − 59 #else /* MULE */
+ − 60
+ − 61 /* These are carefully designed to work if BYTE is signed or unsigned. */
+ − 62 /* Note that SPC and DEL are considered ASCII, not control. */
+ − 63
+ − 64 #define BYTE_ASCII_P(byte) (((byte) & ~0x7f) == 0)
+ − 65 #define BYTE_C0_P(byte) (((byte) & ~0x1f) == 0)
+ − 66 #define BYTE_C1_P(byte) (((byte) & ~0x1f) == 0x80)
+ − 67
+ − 68 /* Does BYTE represent the first byte of a character? */
+ − 69
+ − 70 #define INTBYTE_FIRST_BYTE_P(byte) ((byte) < 0xA0)
+ − 71
+ − 72 /* Does BYTE represent the first byte of a multi-byte character? */
+ − 73
+ − 74 #define INTBYTE_LEADING_BYTE_P(byte) BYTE_C1_P (byte)
+ − 75
+ − 76 /* Table of number of bytes in the string representation of a character
+ − 77 indexed by the first byte of that representation.
+ − 78
+ − 79 This value can be derived in other ways -- e.g. something like
+ − 80 XCHARSET_REP_BYTES (CHARSET_BY_LEADING_BYTE (first_byte))
+ − 81 but it's faster this way. */
+ − 82 extern const Bytecount rep_bytes_by_first_byte[0xA0];
+ − 83
+ − 84 /* Number of bytes in the string representation of a character. */
788
+ − 85
800
+ − 86 #ifdef ERROR_CHECK_TEXT
788
+ − 87
+ − 88 INLINE_HEADER int REP_BYTES_BY_FIRST_BYTE_1 (int fb, const char *file,
+ − 89 int line);
771
+ − 90 INLINE_HEADER int
788
+ − 91 REP_BYTES_BY_FIRST_BYTE_1 (int fb, const char *file, int line)
771
+ − 92 {
788
+ − 93 assert_at_line (fb < 0xA0, file, line);
771
+ − 94 return rep_bytes_by_first_byte[fb];
+ − 95 }
+ − 96
788
+ − 97 #define REP_BYTES_BY_FIRST_BYTE(fb) \
+ − 98 REP_BYTES_BY_FIRST_BYTE_1 (fb, __FILE__, __LINE__)
+ − 99
800
+ − 100 #else /* ERROR_CHECK_TEXT */
788
+ − 101
+ − 102 #define REP_BYTES_BY_FIRST_BYTE(fb) (rep_bytes_by_first_byte[fb])
+ − 103
800
+ − 104 #endif /* ERROR_CHECK_TEXT */
788
+ − 105
771
+ − 106 /* Is this character represented by more than one byte in a string? */
+ − 107
+ − 108 #define CHAR_MULTIBYTE_P(c) ((c) >= 0x80)
+ − 109
+ − 110 #define CHAR_ASCII_P(c) (!CHAR_MULTIBYTE_P (c))
+ − 111
+ − 112 #define MAX_EMCHAR_LEN 4
+ − 113
+ − 114 #endif /* MULE */
+ − 115
+ − 116 int dfc_coding_system_is_unicode (Lisp_Object coding_system);
+ − 117
+ − 118 DECLARE_INLINE_HEADER (
+ − 119 Bytecount dfc_external_data_len (const void *ptr, Lisp_Object codesys)
+ − 120 )
+ − 121 {
+ − 122 if (dfc_coding_system_is_unicode (codesys))
+ − 123 return sizeof (wchar_t) * wcslen ((wchar_t *) ptr);
+ − 124 else
+ − 125 return strlen ((char *) ptr);
+ − 126 }
+ − 127
+ − 128
+ − 129 /************************************************************************/
+ − 130 /* */
+ − 131 /* working with raw internal-format data */
+ − 132 /* */
+ − 133 /************************************************************************/
+ − 134
+ − 135 /* NOTE: In all the following macros, we follow these rules concerning
+ − 136 multiple evaluation of the arguments:
+ − 137
+ − 138 1) Anything that's an lvalue can be evaluated more than once.
+ − 139 2) Anything that's a Lisp Object can be evaluated more than once.
+ − 140 This should probably be changed, but this follows the way
+ − 141 that all the macros in lisp.h do things.
+ − 142 3) 'struct buffer *' arguments can be evaluated more than once.
+ − 143 4) Nothing else can be evaluated more than once. Use inline
+ − 144 functions, if necessary, to prevent multiple evaluation.
+ − 145 5) An exception to (4) is that there are some macros below that
+ − 146 may evaluate their arguments more than once. They are all
+ − 147 denoted with the word "unsafe" in their name and are generally
+ − 148 meant to be called only by other macros that have already
+ − 149 stored the calling values in temporary variables.
+ − 150
+ − 151
+ − 152 Use the following functions/macros on contiguous strings of data.
+ − 153 If the text you're operating on is known to come from a buffer, use
+ − 154 the buffer-level functions below -- they know about the gap and may
+ − 155 be more efficient.
+ − 156
+ − 157
+ − 158 ----------------------------------------------------------------------------
+ − 159 (A) For working with charptr's (pointers to internally-formatted text):
+ − 160 ----------------------------------------------------------------------------
+ − 161
+ − 162 VALID_CHARPTR_P (ptr):
+ − 163 Given a charptr, does it point to the beginning of a character?
+ − 164
+ − 165 ASSERT_VALID_CHARPTR (ptr):
+ − 166 If error-checking is enabled, assert that the given charptr
+ − 167 points to the beginning of a character. Otherwise, do nothing.
+ − 168
+ − 169 INC_CHARPTR (ptr):
+ − 170 Given a charptr (assumed to point at the beginning of a character),
+ − 171 modify that pointer so it points to the beginning of the next
+ − 172 character.
+ − 173
+ − 174 DEC_CHARPTR (ptr):
+ − 175 Given a charptr (assumed to point at the beginning of a
+ − 176 character or at the very end of the text), modify that pointer
+ − 177 so it points to the beginning of the previous character.
+ − 178
+ − 179 VALIDATE_CHARPTR_BACKWARD (ptr):
+ − 180 Make sure that PTR is pointing to the beginning of a character.
+ − 181 If not, back up until this is the case. Note that there are not
+ − 182 too many places where it is legitimate to do this sort of thing.
+ − 183 It's an error if you're passed an "invalid" char * pointer.
+ − 184 NOTE: PTR *must* be pointing to a valid part of the string (i.e.
+ − 185 not the very end, unless the string is zero-terminated or
+ − 186 something) in order for this function to not cause crashes.
+ − 187
+ − 188 VALIDATE_CHARPTR_FORWARD (ptr):
+ − 189 Make sure that PTR is pointing to the beginning of a character.
+ − 190 If not, move forward until this is the case. Note that there
+ − 191 are not too many places where it is legitimate to do this sort
+ − 192 of thing. It's an error if you're passed an "invalid" char *
+ − 193 pointer.
+ − 194
+ − 195 ---------------------------------------------------------------------
+ − 196 (B) For working with the length (in bytes and characters) of a
+ − 197 section of internally-formatted text:
+ − 198 ---------------------------------------------------------------------
+ − 199
+ − 200 bytecount_to_charcount (ptr, nbi):
+ − 201 Given a pointer to a text string and a length in bytes,
+ − 202 return the equivalent length in characters.
+ − 203
+ − 204 charcount_to_bytecount (ptr, nch):
+ − 205 Given a pointer to a text string and a length in characters,
+ − 206 return the equivalent length in bytes.
+ − 207
+ − 208 charptr_n_addr (ptr, n):
+ − 209 Return a pointer to the beginning of the character offset N
+ − 210 (in characters) from PTR.
+ − 211
+ − 212 -------------------------------------------------------------------------
+ − 213 (C) For retrieving or changing the character pointed to by a charptr:
+ − 214 -------------------------------------------------------------------------
+ − 215
+ − 216 charptr_emchar (ptr):
+ − 217 Retrieve the character pointed to by PTR as an Emchar.
+ − 218
+ − 219 charptr_emchar_n (ptr, n):
+ − 220 Retrieve the character at offset N (in characters) from PTR,
+ − 221 as an Emchar.
+ − 222
+ − 223 set_charptr_emchar (ptr, ch):
+ − 224 Store the character CH (an Emchar) as internally-formatted
+ − 225 text starting at PTR. Return the number of bytes stored.
+ − 226
+ − 227 charptr_copy_char (src, dst):
+ − 228 Retrieve the character pointed to by SRC and store it as
+ − 229 internally-formatted text in DST.
+ − 230
+ − 231 ----------------------------------
+ − 232 (D) For working with Emchars:
+ − 233 ----------------------------------
+ − 234
+ − 235 [Note that there are other functions/macros for working with Emchars
+ − 236 in charset.h, for retrieving the charset of an Emchar and such.]
+ − 237
+ − 238 valid_char_p (ch):
+ − 239 Return whether the given Emchar is valid.
+ − 240
+ − 241 CHARP (ch):
+ − 242 Return whether the given Lisp_Object is a character.
+ − 243
+ − 244 CHECK_CHAR_COERCE_INT (ch):
+ − 245 Signal an error if CH is not a valid character or integer Lisp_Object.
+ − 246 If CH is an integer Lisp_Object, convert it to a character Lisp_Object,
+ − 247 but merely by repackaging, without performing tests for char validity.
+ − 248
+ − 249 MAX_EMCHAR_LEN:
+ − 250 Maximum number of buffer bytes per Emacs character.
+ − 251 */
+ − 252
+ − 253 /* ---------------------------------------------------------------------- */
+ − 254 /* (A) For working with charptr's (pointers to internally-formatted text) */
+ − 255 /* ---------------------------------------------------------------------- */
+ − 256
+ − 257 #ifdef MULE
+ − 258 # define VALID_CHARPTR_P(ptr) INTBYTE_FIRST_BYTE_P (* (unsigned char *) ptr)
+ − 259 #else
+ − 260 # define VALID_CHARPTR_P(ptr) 1
+ − 261 #endif
+ − 262
800
+ − 263 #ifdef ERROR_CHECK_TEXT
771
+ − 264 # define ASSERT_VALID_CHARPTR(ptr) assert (VALID_CHARPTR_P (ptr))
+ − 265 #else
+ − 266 # define ASSERT_VALID_CHARPTR(ptr)
+ − 267 #endif
+ − 268
+ − 269 /* Note that INC_CHARPTR() and DEC_CHARPTR() have to be written in
+ − 270 completely separate ways. INC_CHARPTR() cannot use the DEC_CHARPTR()
+ − 271 trick of looking for a valid first byte because it might run off
+ − 272 the end of the string. DEC_CHARPTR() can't use the INC_CHARPTR()
+ − 273 method because it doesn't have easy access to the first byte of
+ − 274 the character it's moving over. */
+ − 275
+ − 276 #define REAL_INC_CHARPTR(ptr) \
+ − 277 ((void) ((ptr) += REP_BYTES_BY_FIRST_BYTE (* (unsigned char *) (ptr))))
+ − 278
+ − 279 #define REAL_INC_CHARBYTEBPOS(ptr, pos) \
+ − 280 (pos += REP_BYTES_BY_FIRST_BYTE (* (unsigned char *) (ptr)))
+ − 281
+ − 282 #define REAL_DEC_CHARPTR(ptr) do { \
+ − 283 (ptr)--; \
+ − 284 } while (!VALID_CHARPTR_P (ptr))
+ − 285
800
+ − 286 #ifdef ERROR_CHECK_TEXT
771
+ − 287 #define INC_CHARPTR(ptr) do { \
+ − 288 ASSERT_VALID_CHARPTR (ptr); \
+ − 289 REAL_INC_CHARPTR (ptr); \
+ − 290 } while (0)
+ − 291
+ − 292 #define INC_CHARBYTEBPOS(ptr, pos) do { \
+ − 293 ASSERT_VALID_CHARPTR (ptr); \
+ − 294 REAL_INC_CHARBYTEBPOS (ptr, pos); \
+ − 295 } while (0)
+ − 296
+ − 297 #define DEC_CHARPTR(ptr) do { \
+ − 298 const Intbyte *dc_ptr1 = (ptr); \
+ − 299 const Intbyte *dc_ptr2 = dc_ptr1; \
+ − 300 REAL_DEC_CHARPTR (dc_ptr2); \
+ − 301 assert (dc_ptr1 - dc_ptr2 == \
+ − 302 REP_BYTES_BY_FIRST_BYTE (*dc_ptr2)); \
+ − 303 (ptr) = (Intbyte *) dc_ptr2; \
+ − 304 } while (0)
+ − 305
800
+ − 306 #else /* ! ERROR_CHECK_TEXT */
771
+ − 307 #define INC_CHARBYTEBPOS(ptr, pos) REAL_INC_CHARBYTEBPOS (ptr, pos)
+ − 308 #define INC_CHARPTR(ptr) REAL_INC_CHARPTR (ptr)
+ − 309 #define DEC_CHARPTR(ptr) REAL_DEC_CHARPTR (ptr)
800
+ − 310 #endif /* ! ERROR_CHECK_TEXT */
771
+ − 311
+ − 312 #ifdef MULE
+ − 313
+ − 314 /* Note that this reads the byte at *PTR! */
+ − 315
+ − 316 #define VALIDATE_CHARPTR_BACKWARD(ptr) do { \
+ − 317 while (!VALID_CHARPTR_P (ptr)) ptr--; \
+ − 318 } while (0)
+ − 319
+ − 320 /* Given a Intbyte string at PTR of size N, possibly with a partial
+ − 321 character at the end, return the size of the longest substring of
+ − 322 complete characters. Does not assume that the byte at *(PTR + N) is
+ − 323 readable. */
+ − 324 DECLARE_INLINE_HEADER (
+ − 325 Bytecount
+ − 326 validate_intbyte_string_backward (Intbyte *ptr, Bytecount n)
+ − 327 )
+ − 328 {
+ − 329 Intbyte *ptr2;
+ − 330
+ − 331 if (n == 0)
+ − 332 return n;
+ − 333 ptr2 = ptr + n - 1;
+ − 334 VALIDATE_CHARPTR_BACKWARD (ptr2);
+ − 335 if (ptr2 + REP_BYTES_BY_FIRST_BYTE (*ptr2) != ptr + n)
+ − 336 return ptr2 - ptr;
+ − 337 return n;
+ − 338 }
+ − 339
+ − 340 /* This needs to be trickier than VALIDATE_CHARPTR_BACKWARD() to avoid the
+ − 341 possibility of running off the end of the string. */
+ − 342
+ − 343 #define VALIDATE_CHARPTR_FORWARD(ptr) do { \
+ − 344 Intbyte *vcf_ptr = (ptr); \
+ − 345 VALIDATE_CHARPTR_BACKWARD (vcf_ptr); \
+ − 346 if (vcf_ptr != (ptr)) \
+ − 347 { \
+ − 348 (ptr) = vcf_ptr; \
+ − 349 INC_CHARPTR (ptr); \
+ − 350 } \
+ − 351 } while (0)
+ − 352
+ − 353 #else /* not MULE */
+ − 354 #define VALIDATE_CHARPTR_BACKWARD(ptr)
+ − 355 #define VALIDATE_CHARPTR_FORWARD(ptr)
+ − 356 #define validate_intbyte_string_backward(ptr, n) (n)
+ − 357 #endif /* not MULE */
+ − 358
+ − 359 /* -------------------------------------------------------------- */
+ − 360 /* (B) For working with the length (in bytes and characters) of a */
+ − 361 /* section of internally-formatted text */
+ − 362 /* -------------------------------------------------------------- */
+ − 363
+ − 364 INLINE_HEADER const Intbyte *
+ − 365 charptr_n_addr (const Intbyte *ptr, Charcount offset);
+ − 366 INLINE_HEADER const Intbyte *
+ − 367 charptr_n_addr (const Intbyte *ptr, Charcount offset)
+ − 368 {
+ − 369 return ptr + charcount_to_bytecount (ptr, offset);
+ − 370 }
+ − 371
+ − 372 /* -------------------------------------------------------------------- */
+ − 373 /* (C) For retrieving or changing the character pointed to by a charptr */
+ − 374 /* -------------------------------------------------------------------- */
+ − 375
+ − 376 #define simple_charptr_emchar(ptr) ((Emchar) (ptr)[0])
+ − 377 #define simple_set_charptr_emchar(ptr, x) ((ptr)[0] = (Intbyte) (x), 1)
+ − 378 #define simple_charptr_copy_char(src, dst) ((dst)[0] = *(src), 1)
+ − 379
+ − 380 #ifdef MULE
+ − 381
+ − 382 Emchar non_ascii_charptr_emchar (const Intbyte *ptr);
+ − 383 Bytecount non_ascii_set_charptr_emchar (Intbyte *ptr, Emchar c);
+ − 384 Bytecount non_ascii_charptr_copy_char (const Intbyte *src, Intbyte *dst);
+ − 385
+ − 386 INLINE_HEADER Emchar charptr_emchar (const Intbyte *ptr);
+ − 387 INLINE_HEADER Emchar
+ − 388 charptr_emchar (const Intbyte *ptr)
+ − 389 {
+ − 390 return BYTE_ASCII_P (*ptr) ?
+ − 391 simple_charptr_emchar (ptr) :
+ − 392 non_ascii_charptr_emchar (ptr);
+ − 393 }
+ − 394
+ − 395 INLINE_HEADER Bytecount set_charptr_emchar (Intbyte *ptr, Emchar x);
+ − 396 INLINE_HEADER Bytecount
+ − 397 set_charptr_emchar (Intbyte *ptr, Emchar x)
+ − 398 {
+ − 399 return !CHAR_MULTIBYTE_P (x) ?
+ − 400 simple_set_charptr_emchar (ptr, x) :
+ − 401 non_ascii_set_charptr_emchar (ptr, x);
+ − 402 }
+ − 403
+ − 404 INLINE_HEADER Bytecount
+ − 405 charptr_copy_char (const Intbyte *src, Intbyte *dst);
+ − 406 INLINE_HEADER Bytecount
+ − 407 charptr_copy_char (const Intbyte *src, Intbyte *dst)
+ − 408 {
+ − 409 return BYTE_ASCII_P (*src) ?
+ − 410 simple_charptr_copy_char (src, dst) :
+ − 411 non_ascii_charptr_copy_char (src, dst);
+ − 412 }
+ − 413
+ − 414 #else /* not MULE */
+ − 415
+ − 416 # define charptr_emchar(ptr) simple_charptr_emchar (ptr)
+ − 417 # define set_charptr_emchar(ptr, x) simple_set_charptr_emchar (ptr, x)
+ − 418 # define charptr_copy_char(src, dst) simple_charptr_copy_char (src, dst)
+ − 419
+ − 420 #endif /* not MULE */
+ − 421
+ − 422 #define charptr_emchar_n(ptr, offset) \
+ − 423 charptr_emchar (charptr_n_addr (ptr, offset))
+ − 424
+ − 425
+ − 426 /* ---------------------------- */
+ − 427 /* (D) For working with Emchars */
+ − 428 /* ---------------------------- */
+ − 429
+ − 430 #ifdef MULE
+ − 431
+ − 432 int non_ascii_valid_char_p (Emchar ch);
+ − 433
+ − 434 INLINE_HEADER int valid_char_p (Emchar ch);
+ − 435 INLINE_HEADER int
+ − 436 valid_char_p (Emchar ch)
+ − 437 {
+ − 438 return (! (ch & ~0xFF)) || non_ascii_valid_char_p (ch);
+ − 439 }
+ − 440
+ − 441 #else /* not MULE */
+ − 442
+ − 443 #define valid_char_p(ch) (! (ch & ~0xFF))
+ − 444
+ − 445 #endif /* not MULE */
+ − 446
+ − 447 #define CHAR_INTP(x) (INTP (x) && valid_char_p (XINT (x)))
+ − 448
+ − 449 #define CHAR_OR_CHAR_INTP(x) (CHARP (x) || CHAR_INTP (x))
+ − 450
+ − 451 INLINE_HEADER Emchar XCHAR_OR_CHAR_INT (Lisp_Object obj);
+ − 452 INLINE_HEADER Emchar
+ − 453 XCHAR_OR_CHAR_INT (Lisp_Object obj)
+ − 454 {
+ − 455 return CHARP (obj) ? XCHAR (obj) : XINT (obj);
+ − 456 }
+ − 457
+ − 458 #define CHECK_CHAR_COERCE_INT(x) do { \
+ − 459 if (CHARP (x)) \
+ − 460 ; \
+ − 461 else if (CHAR_INTP (x)) \
+ − 462 x = make_char (XINT (x)); \
+ − 463 else \
+ − 464 x = wrong_type_argument (Qcharacterp, x); \
+ − 465 } while (0)
+ − 466
+ − 467
+ − 468
+ − 469 /************************************************************************/
+ − 470 /* */
+ − 471 /* working with Eistrings */
+ − 472 /* */
+ − 473 /************************************************************************/
+ − 474
+ − 475 /*
+ − 476 #### NOTE: This is a work in progress. Neither the API nor especially
+ − 477 the implementation is finished.
+ − 478
+ − 479 NOTE: An Eistring is a structure that makes it easy to work with
+ − 480 internally-formatted strings of data. It provides operations similar
+ − 481 in feel to the standard strcpy(), strcat(), strlen(), etc., but
+ − 482
+ − 483 (a) it is Mule-correct
+ − 484 (b) it does dynamic allocation so you never have to worry about size
793
+ − 485 restrictions
+ − 486 (c) it comes in an alloca() variety (all allocation is stack-local,
+ − 487 so there is no need to explicitly clean up) as well as a malloc()
+ − 488 variety
+ − 489 (d) it knows its own length, so it does not suffer from standard null
+ − 490 byte brain-damage -- but it null-terminates the data anyway, so
+ − 491 it can be passed to standard routines
+ − 492 (e) it provides a much more powerful set of operations and knows about
771
+ − 493 all the standard places where string data might reside: Lisp_Objects,
+ − 494 other Eistrings, Intbyte * data with or without an explicit length,
+ − 495 ASCII strings, Emchars, etc.
793
+ − 496 (f) it provides easy operations to convert to/from externally-formatted
+ − 497 data, and is easier to use than the standard TO_INTERNAL_FORMAT
771
+ − 498 and TO_EXTERNAL_FORMAT macros. (An Eistring can store both the internal
+ − 499 and external version of its data, but the external version is only
+ − 500 initialized or changed when you call eito_external().)
+ − 501
793
+ − 502 The idea is to make it as easy to write Mule-correct string manipulation
+ − 503 code as it is to write normal string manipulation code. We also make
+ − 504 the API sufficiently general that it can handle multiple internal data
+ − 505 formats (e.g. some fixed-width optimizing formats and a default variable
+ − 506 width format) and allows for *ANY* data format we might choose in the
+ − 507 future for the default format, including UCS2. (In other words, we can't
+ − 508 assume that the internal format is ASCII-compatible and we can't assume
+ − 509 it doesn't have embedded null bytes. We do assume, however, that any
+ − 510 chosen format will have the concept of null-termination.) All of this is
+ − 511 hidden from the user.
771
+ − 512
+ − 513 #### It is really too bad that we don't have a real object-oriented
+ − 514 language, or at least a language with polymorphism!
+ − 515
+ − 516
+ − 517 **********************************************
+ − 518 * Declaration *
+ − 519 **********************************************
+ − 520
+ − 521 To declare an Eistring, either put one of the following in the local
+ − 522 variable section:
+ − 523
+ − 524 DECLARE_EISTRING (name);
+ − 525 Declare a new Eistring. This is a standard local variable declaration
+ − 526 and can go anywhere in the variable declaration section. NAME itself
+ − 527 is declared as an Eistring *, and its storage declared on the stack.
+ − 528
+ − 529 DECLARE_EISTRING_MALLOC (name);
+ − 530 Declare a new Eistring, which uses malloc()ed instead of alloca()ed
+ − 531 data. This is a standard local variable declaration and can go
+ − 532 anywhere in the variable declaration section. Once you initialize
+ − 533 the Eistring, you will have to free it using eifree() to avoid
793
+ − 534 memory leaks. You will need to use this form if you are passing
+ − 535 an Eistring to any function that modifies it (otherwise, the
+ − 536 modified data may be in stack space and get overwritten when the
+ − 537 function returns).
771
+ − 538
+ − 539 or use
+ − 540
793
+ − 541 Eistring ei;
+ − 542 void eiinit (Eistring *ei);
+ − 543 void eiinit_malloc (Eistring *einame);
771
+ − 544 If you need to put an Eistring elsewhere than in a local variable
+ − 545 declaration (e.g. in a structure), declare it as shown and then
+ − 546 call one of the init macros.
+ − 547
+ − 548 Also note:
+ − 549
793
+ − 550 void eifree (Eistring *ei);
771
+ − 551 If you declared an Eistring to use malloc() to hold its data,
+ − 552 or converted it to the heap using eito_malloc(), then this
+ − 553 releases any data in it and afterwards resets the Eistring
+ − 554 using eiinit_malloc(). Otherwise, it just resets the Eistring
+ − 555 using eiinit().
+ − 556
+ − 557
+ − 558 **********************************************
+ − 559 * Conventions *
+ − 560 **********************************************
+ − 561
+ − 562 - The names of the functions have been chosen, where possible, to
+ − 563 match the names of str*() functions in the standard C API.
+ − 564 -
+ − 565
+ − 566
+ − 567 **********************************************
+ − 568 * Initialization *
+ − 569 **********************************************
+ − 570
+ − 571 void eireset (Eistring *eistr);
+ − 572 Initialize the Eistring to the empty string.
+ − 573
+ − 574 void eicpy_* (Eistring *eistr, ...);
+ − 575 Initialize the Eistring from somewhere:
+ − 576
+ − 577 void eicpy_ei (Eistring *eistr, Eistring *eistr2);
+ − 578 ... from another Eistring.
+ − 579 void eicpy_lstr (Eistring *eistr, Lisp_Object lisp_string);
+ − 580 ... from a Lisp_Object string.
+ − 581 void eicpy_ch (Eistring *eistr, Emchar ch);
793
+ − 582 ... from an Emchar (this can be a conventional C character).
771
+ − 583
+ − 584 void eicpy_lstr_off (Eistring *eistr, Lisp_Object lisp_string,
+ − 585 Bytecount off, Charcount charoff,
+ − 586 Bytecount len, Charcount charlen);
+ − 587 ... from a section of a Lisp_Object string.
+ − 588 void eicpy_lbuf (Eistring *eistr, Lisp_Object lisp_buf,
+ − 589 Bytecount off, Charcount charoff,
+ − 590 Bytecount len, Charcount charlen);
+ − 591 ... from a section of a Lisp_Object buffer.
+ − 592 void eicpy_raw (Eistring *eistr, const Intbyte *data, Bytecount len);
+ − 593 ... from raw internal-format data in the default internal format.
+ − 594 void eicpy_rawz (Eistring *eistr, const Intbyte *data);
+ − 595 ... from raw internal-format data in the default internal format
+ − 596 that is "null-terminated" (the meaning of this depends on the nature
+ − 597 of the default internal format).
+ − 598 void eicpy_raw_fmt (Eistring *eistr, const Intbyte *data, Bytecount len,
+ − 599 Internal_Format intfmt);
+ − 600 ... from raw internal-format data in the specified format.
+ − 601 void eicpy_rawz_fmt (Eistring *eistr, const Intbyte *data,
+ − 602 Internal_Format intfmt);
+ − 603 ... from raw internal-format data in the specified format that is
+ − 604 "null-terminated" (the meaning of this depends on the nature of
+ − 605 the specific format).
+ − 606 void eicpy_c (Eistring *eistr, const Char_ASCII *c_string);
+ − 607 ... from an ASCII null-terminated string. Non-ASCII characters in
+ − 608 the string are *ILLEGAL* (read abort() with error-checking defined).
+ − 609 void eicpy_c_len (Eistring *eistr, const Char_ASCII *c_string, len);
+ − 610 ... from an ASCII string, with length specified. Non-ASCII characters
+ − 611 in the string are *ILLEGAL* (read abort() with error-checking defined).
+ − 612 void eicpy_ext (Eistring *eistr, const Extbyte *extdata,
+ − 613 Lisp_Object coding_system);
+ − 614 ... from external null-terminated data, with coding system specified.
+ − 615 void eicpy_ext_len (Eistring *eistr, const Extbyte *extdata,
+ − 616 Bytecount extlen, Lisp_Object coding_system);
+ − 617 ... from external data, with length and coding system specified.
+ − 618 void eicpy_lstream (Eistring *eistr, Lisp_Object lstream);
+ − 619 ... from an lstream; reads data till eof. Data must be in default
+ − 620 internal format; otherwise, interpose a decoding lstream.
+ − 621
+ − 622
+ − 623 **********************************************
+ − 624 * Getting the data out of the Eistring *
+ − 625 **********************************************
+ − 626
+ − 627 Intbyte *eidata (Eistring *eistr);
+ − 628 Return a pointer to the raw data in an Eistring. This is NOT
+ − 629 a copy.
+ − 630
+ − 631 Lisp_Object eimake_string (Eistring *eistr);
+ − 632 Make a Lisp string out of the Eistring.
+ − 633
+ − 634 Lisp_Object eimake_string_off (Eistring *eistr,
+ − 635 Bytecount off, Charcount charoff,
+ − 636 Bytecount len, Charcount charlen);
+ − 637 Make a Lisp string out of a section of the Eistring.
+ − 638
+ − 639 void eicpyout_alloca (Eistring *eistr, LVALUE: Intbyte *ptr_out,
+ − 640 LVALUE: Bytecount len_out);
+ − 641 Make an alloca() copy of the data in the Eistring, using the
+ − 642 default internal format. Due to the nature of alloca(), this
+ − 643 must be a macro, with all lvalues passed in as parameters.
793
+ − 644 (More specifically, not all compilers correctly handle using
+ − 645 alloca() as the argument to a function call -- GCC on x86
+ − 646 didn't used to, for example.) A pointer to the alloca()ed data
+ − 647 is stored in PTR_OUT, and the length of the data (not including
+ − 648 the terminating zero) is stored in LEN_OUT.
771
+ − 649
+ − 650 void eicpyout_alloca_fmt (Eistring *eistr, LVALUE: Intbyte *ptr_out,
+ − 651 LVALUE: Bytecount len_out,
+ − 652 Internal_Format intfmt);
+ − 653 Like eicpyout_alloca(), but converts to the specified internal
+ − 654 format. (No formats other than FORMAT_DEFAULT are currently
+ − 655 implemented, and you get an assertion failure if you try.)
+ − 656
+ − 657 Intbyte *eicpyout_malloc (Eistring *eistr, Bytecount *intlen_out);
+ − 658 Make a malloc() copy of the data in the Eistring, using the
+ − 659 default internal format. This is a real function. No lvalues
+ − 660 passed in. Returns the new data, and stores the length (not
+ − 661 including the terminating zero) using INTLEN_OUT, unless it's
+ − 662 a NULL pointer.
+ − 663
+ − 664 Intbyte *eicpyout_malloc_fmt (Eistring *eistr, Internal_Format intfmt,
+ − 665 Bytecount *intlen_out);
+ − 666 Like eicpyout_malloc(), but converts to the specified internal
+ − 667 format. (No formats other than FORMAT_DEFAULT are currently
+ − 668 implemented, and you get an assertion failure if you try.)
+ − 669
+ − 670
+ − 671 **********************************************
+ − 672 * Moving to the heap *
+ − 673 **********************************************
+ − 674
+ − 675 void eito_malloc (Eistring *eistr);
+ − 676 Move this Eistring to the heap. Its data will be stored in a
+ − 677 malloc()ed block rather than the stack. Subsequent changes to
+ − 678 this Eistring will realloc() the block as necessary. Use this
+ − 679 when you want the Eistring to remain in scope past the end of
+ − 680 this function call. You will have to manually free the data
+ − 681 in the Eistring using eifree().
+ − 682
+ − 683 void eito_alloca (Eistring *eistr);
+ − 684 Move this Eistring back to the stack, if it was moved to the
+ − 685 heap with eito_malloc(). This will automatically free any
+ − 686 heap-allocated data.
+ − 687
+ − 688
+ − 689
+ − 690 **********************************************
+ − 691 * Retrieving the length *
+ − 692 **********************************************
+ − 693
+ − 694 Bytecount eilen (Eistring *eistr);
+ − 695 Return the length of the internal data, in bytes. See also
+ − 696 eiextlen(), below.
+ − 697 Charcount eicharlen (Eistring *eistr);
+ − 698 Return the length of the internal data, in characters.
+ − 699
+ − 700
+ − 701 **********************************************
+ − 702 * Working with positions *
+ − 703 **********************************************
+ − 704
+ − 705 Bytecount eicharpos_to_bytepos (Eistring *eistr, Charcount charpos);
+ − 706 Convert a char offset to a byte offset.
+ − 707 Charcount eibytepos_to_charpos (Eistring *eistr, Bytecount bytepos);
+ − 708 Convert a byte offset to a char offset.
+ − 709 Bytecount eiincpos (Eistring *eistr, Bytecount bytepos);
+ − 710 Increment the given position by one character.
+ − 711 Bytecount eiincpos_n (Eistring *eistr, Bytecount bytepos, Charcount n);
+ − 712 Increment the given position by N characters.
+ − 713 Bytecount eidecpos (Eistring *eistr, Bytecount bytepos);
+ − 714 Decrement the given position by one character.
+ − 715 Bytecount eidecpos_n (Eistring *eistr, Bytecount bytepos, Charcount n);
+ − 716 Deccrement the given position by N characters.
+ − 717
+ − 718
+ − 719 **********************************************
+ − 720 * Getting the character at a position *
+ − 721 **********************************************
+ − 722
+ − 723 Emchar eigetch (Eistring *eistr, Bytecount bytepos);
+ − 724 Return the character at a particular byte offset.
+ − 725 Emchar eigetch_char (Eistring *eistr, Charcount charpos);
+ − 726 Return the character at a particular character offset.
+ − 727
+ − 728
+ − 729 **********************************************
+ − 730 * Setting the character at a position *
+ − 731 **********************************************
+ − 732
+ − 733 Emchar eisetch (Eistring *eistr, Bytecount bytepos, Emchar chr);
+ − 734 Set the character at a particular byte offset.
+ − 735 Emchar eisetch_char (Eistring *eistr, Charcount charpos, Emchar chr);
+ − 736 Set the character at a particular character offset.
+ − 737
+ − 738
+ − 739 **********************************************
+ − 740 * Concatenation *
+ − 741 **********************************************
+ − 742
+ − 743 void eicat_* (Eistring *eistr, ...);
+ − 744 Concatenate onto the end of the Eistring, with data coming from the
+ − 745 same places as above:
+ − 746
+ − 747 void eicat_ei (Eistring *eistr, Eistring *eistr2);
+ − 748 ... from another Eistring.
+ − 749 void eicat_c (Eistring *eistr, Char_ASCII *c_string);
+ − 750 ... from an ASCII null-terminated string. Non-ASCII characters in
+ − 751 the string are *ILLEGAL* (read abort() with error-checking defined).
+ − 752 void eicat_raw (ei, const Intbyte *data, Bytecount len);
+ − 753 ... from raw internal-format data in the default internal format.
+ − 754 void eicat_rawz (ei, const Intbyte *data);
+ − 755 ... from raw internal-format data in the default internal format
+ − 756 that is "null-terminated" (the meaning of this depends on the nature
+ − 757 of the default internal format).
+ − 758 void eicat_lstr (ei, Lisp_Object lisp_string);
+ − 759 ... from a Lisp_Object string.
+ − 760 void eicat_ch (ei, Emchar ch);
+ − 761 ... from an Emchar.
+ − 762
+ − 763 (All except the first variety are convenience functions.
+ − 764 In the general case, create another Eistring from the source.)
+ − 765
+ − 766
+ − 767 **********************************************
+ − 768 * Replacement *
+ − 769 **********************************************
+ − 770
+ − 771 void eisub_* (Eistring *eistr, Bytecount off, Charcount charoff,
+ − 772 Bytecount len, Charcount charlen, ...);
+ − 773 Replace a section of the Eistring, specifically:
+ − 774
+ − 775 void eisub_ei (Eistring *eistr, Bytecount off, Charcount charoff,
+ − 776 Bytecount len, Charcount charlen, Eistring *eistr2);
+ − 777 ... with another Eistring.
+ − 778 void eisub_c (Eistring *eistr, Bytecount off, Charcount charoff,
+ − 779 Bytecount len, Charcount charlen, Char_ASCII *c_string);
+ − 780 ... with an ASCII null-terminated string. Non-ASCII characters in
+ − 781 the string are *ILLEGAL* (read abort() with error-checking defined).
+ − 782 void eisub_ch (Eistring *eistr, Bytecount off, Charcount charoff,
+ − 783 Bytecount len, Charcount charlen, Emchar ch);
+ − 784 ... with an Emchar.
+ − 785
+ − 786 void eidel (Eistring *eistr, Bytecount off, Charcount charoff,
+ − 787 Bytecount len, Charcount charlen);
+ − 788 Delete a section of the Eistring.
+ − 789
+ − 790
+ − 791 **********************************************
+ − 792 * Converting to an external format *
+ − 793 **********************************************
+ − 794
+ − 795 void eito_external (Eistring *eistr, Lisp_Object coding_system);
+ − 796 Convert the Eistring to an external format and store the result
+ − 797 in the string. NOTE: Further changes to the Eistring will *NOT*
+ − 798 change the external data stored in the string. You will have to
+ − 799 call eito_external() again in such a case if you want the external
+ − 800 data.
+ − 801
+ − 802 Extbyte *eiextdata (Eistring *eistr);
+ − 803 Return a pointer to the external data stored in the Eistring as
+ − 804 a result of a prior call to eito_external().
+ − 805
+ − 806 Bytecount eiextlen (Eistring *eistr);
+ − 807 Return the length in bytes of the external data stored in the
+ − 808 Eistring as a result of a prior call to eito_external().
+ − 809
+ − 810
+ − 811 **********************************************
+ − 812 * Searching in the Eistring for a character *
+ − 813 **********************************************
+ − 814
+ − 815 Bytecount eichr (Eistring *eistr, Emchar chr);
+ − 816 Charcount eichr_char (Eistring *eistr, Emchar chr);
+ − 817 Bytecount eichr_off (Eistring *eistr, Emchar chr, Bytecount off,
+ − 818 Charcount charoff);
+ − 819 Charcount eichr_off_char (Eistring *eistr, Emchar chr, Bytecount off,
+ − 820 Charcount charoff);
+ − 821 Bytecount eirchr (Eistring *eistr, Emchar chr);
+ − 822 Charcount eirchr_char (Eistring *eistr, Emchar chr);
+ − 823 Bytecount eirchr_off (Eistring *eistr, Emchar chr, Bytecount off,
+ − 824 Charcount charoff);
+ − 825 Charcount eirchr_off_char (Eistring *eistr, Emchar chr, Bytecount off,
+ − 826 Charcount charoff);
+ − 827
+ − 828
+ − 829 **********************************************
+ − 830 * Searching in the Eistring for a string *
+ − 831 **********************************************
+ − 832
+ − 833 Bytecount eistr_ei (Eistring *eistr, Eistring *eistr2);
+ − 834 Charcount eistr_ei_char (Eistring *eistr, Eistring *eistr2);
+ − 835 Bytecount eistr_ei_off (Eistring *eistr, Eistring *eistr2, Bytecount off,
+ − 836 Charcount charoff);
+ − 837 Charcount eistr_ei_off_char (Eistring *eistr, Eistring *eistr2,
+ − 838 Bytecount off, Charcount charoff);
+ − 839 Bytecount eirstr_ei (Eistring *eistr, Eistring *eistr2);
+ − 840 Charcount eirstr_ei_char (Eistring *eistr, Eistring *eistr2);
+ − 841 Bytecount eirstr_ei_off (Eistring *eistr, Eistring *eistr2, Bytecount off,
+ − 842 Charcount charoff);
+ − 843 Charcount eirstr_ei_off_char (Eistring *eistr, Eistring *eistr2,
+ − 844 Bytecount off, Charcount charoff);
+ − 845
+ − 846 Bytecount eistr_c (Eistring *eistr, Char_ASCII *c_string);
+ − 847 Charcount eistr_c_char (Eistring *eistr, Char_ASCII *c_string);
+ − 848 Bytecount eistr_c_off (Eistring *eistr, Char_ASCII *c_string, Bytecount off,
+ − 849 Charcount charoff);
+ − 850 Charcount eistr_c_off_char (Eistring *eistr, Char_ASCII *c_string,
+ − 851 Bytecount off, Charcount charoff);
+ − 852 Bytecount eirstr_c (Eistring *eistr, Char_ASCII *c_string);
+ − 853 Charcount eirstr_c_char (Eistring *eistr, Char_ASCII *c_string);
+ − 854 Bytecount eirstr_c_off (Eistring *eistr, Char_ASCII *c_string,
+ − 855 Bytecount off, Charcount charoff);
+ − 856 Charcount eirstr_c_off_char (Eistring *eistr, Char_ASCII *c_string,
+ − 857 Bytecount off, Charcount charoff);
+ − 858
+ − 859
+ − 860 **********************************************
+ − 861 * Comparison *
+ − 862 **********************************************
+ − 863
+ − 864 int eicmp_* (Eistring *eistr, ...);
+ − 865 int eicmp_off_* (Eistring *eistr, Bytecount off, Charcount charoff,
+ − 866 Bytecount len, Charcount charlen, ...);
+ − 867 int eicasecmp_* (Eistring *eistr, ...);
+ − 868 int eicasecmp_off_* (Eistring *eistr, Bytecount off, Charcount charoff,
+ − 869 Bytecount len, Charcount charlen, ...);
+ − 870 int eicasecmp_i18n_* (Eistring *eistr, ...);
+ − 871 int eicasecmp_i18n_off_* (Eistring *eistr, Bytecount off, Charcount charoff,
+ − 872 Bytecount len, Charcount charlen, ...);
+ − 873
+ − 874 Compare the Eistring with the other data. Return value same as
+ − 875 from strcmp. The `*' is either `ei' for another Eistring (in
+ − 876 which case `...' is an Eistring), or `c' for a pure-ASCII string
+ − 877 (in which case `...' is a pointer to that string). For anything
+ − 878 more complex, first create an Eistring out of the source.
+ − 879 Comparison is either simple (`eicmp_...'), ASCII case-folding
+ − 880 (`eicasecmp_...'), or multilingual case-folding
+ − 881 (`eicasecmp_i18n_...).
+ − 882
+ − 883
+ − 884 More specifically, the prototypes are:
+ − 885
+ − 886 int eicmp_ei (Eistring *eistr, Eistring *eistr2);
+ − 887 int eicmp_off_ei (Eistring *eistr, Bytecount off, Charcount charoff,
+ − 888 Bytecount len, Charcount charlen, Eistring *eistr2);
+ − 889 int eicasecmp_ei (Eistring *eistr, Eistring *eistr2);
+ − 890 int eicasecmp_off_ei (Eistring *eistr, Bytecount off, Charcount charoff,
+ − 891 Bytecount len, Charcount charlen, Eistring *eistr2);
+ − 892 int eicasecmp_i18n_ei (Eistring *eistr, Eistring *eistr2);
+ − 893 int eicasecmp_i18n_off_ei (Eistring *eistr, Bytecount off,
+ − 894 Charcount charoff, Bytecount len,
+ − 895 Charcount charlen, Eistring *eistr2);
+ − 896
+ − 897 int eicmp_c (Eistring *eistr, Char_ASCII *c_string);
+ − 898 int eicmp_off_c (Eistring *eistr, Bytecount off, Charcount charoff,
+ − 899 Bytecount len, Charcount charlen, Char_ASCII *c_string);
+ − 900 int eicasecmp_c (Eistring *eistr, Char_ASCII *c_string);
+ − 901 int eicasecmp_off_c (Eistring *eistr, Bytecount off, Charcount charoff,
+ − 902 Bytecount len, Charcount charlen,
+ − 903 Char_ASCII *c_string);
+ − 904 int eicasecmp_i18n_c (Eistring *eistr, Char_ASCII *c_string);
+ − 905 int eicasecmp_i18n_off_c (Eistring *eistr, Bytecount off, Charcount charoff,
+ − 906 Bytecount len, Charcount charlen,
+ − 907 Char_ASCII *c_string);
+ − 908
+ − 909
+ − 910 **********************************************
+ − 911 * Case-changing the Eistring *
+ − 912 **********************************************
+ − 913
+ − 914 void eilwr (Eistring *eistr);
+ − 915 Convert all characters in the Eistring to lowercase.
+ − 916 void eiupr (Eistring *eistr);
+ − 917 Convert all characters in the Eistring to uppercase.
+ − 918 */
+ − 919
+ − 920
+ − 921 /* Principles for writing Eistring functions:
+ − 922
+ − 923 (1) Unfortunately, we have to write most of the Eistring functions
+ − 924 as macros, because of the use of alloca(). The principle used
+ − 925 below to assure no conflict in local variables is to prefix all
+ − 926 local variables with "ei" plus a number, which should be unique
+ − 927 among macros. In practice, when finding a new number, find the
+ − 928 highest so far used, and add 1.
+ − 929
+ − 930 (2) We also suffix the Eistring fields with an _ to avoid problems
+ − 931 with macro parameters of the same name. (And as the standard
+ − 932 signal not to access these fields directly.)
+ − 933
+ − 934 (3) We maintain both the length in bytes and chars of the data in
+ − 935 the Eistring at all times, for convenient retrieval by outside
+ − 936 functions. That means when writing functions that manipulate
+ − 937 Eistrings, you too need to keep both lengths up to date for all
+ − 938 data that you work with.
+ − 939
+ − 940 (4) When writing a new type of operation (e.g. substitution), you
+ − 941 will often find yourself working with outside data, and thus
+ − 942 have a series of related API's, for different forms that the
+ − 943 outside data is in. Generally, you will want to choose a
+ − 944 subset of the forms supported by eicpy_*, which has to be
+ − 945 totally general because that's the fundamental way to get data
+ − 946 into an Eistring, and once the data is into the string, it
+ − 947 would be to create a whole series of Ei operations that work on
+ − 948 nothing but Eistrings. Although theoretically nice, in
+ − 949 practice it's a hassle, so we suggest that you provide
+ − 950 convenience functions. In particular, there are two paths you
+ − 951 can take. One is minimalist -- it only allows other Eistrings
+ − 952 and ASCII data, and Emchars if the particular operation makes
+ − 953 sense with a character. The other provides interfaces for the
+ − 954 most commonly-used forms -- Eistring, ASCII data, Lisp string,
+ − 955 raw internal-format string with length, raw internal-format
+ − 956 string without, and possibly Emchar. (In the function names,
+ − 957 these are designated `ei', `c', `lstr', `raw', `rawz', and
+ − 958 `ch', respectively.)
+ − 959
+ − 960 (5) When coding a new type of operation, such as was discussed in
+ − 961 previous section, the correct approach is to declare an worker
+ − 962 function that does the work of everything, and is called by the
+ − 963 other "container" macros that handle the different outside data
+ − 964 forms. The data coming into the worker function, which
+ − 965 typically ends in `_1', is in the form of three parameters:
+ − 966 DATA, LEN, CHARLEN. (See point [3] about having two lengths and
+ − 967 keeping them in sync.)
+ − 968
+ − 969 (6) Handling argument evaluation in macros: We take great care
+ − 970 never to evaluate any argument more than once in any macro,
+ − 971 except the initial Eistring parameter. This can and will be
+ − 972 evaluated multiple times, but it should pretty much always just
+ − 973 be a simple variable. This means, for example, that if an
+ − 974 Eistring is the second (not first) argument of a macro, it
+ − 975 doesn't fall under the "initial Eistring" exemption, so it
+ − 976 needs protection against multi-evaluation. (Take the address of
+ − 977 the Eistring structure, store in a temporary variable, and use
+ − 978 temporary variable for all access to the Eistring.
+ − 979 Essentially, we want it to appear as if these Eistring macros
+ − 980 are functions -- we would like to declare them as functions but
+ − 981 they use alloca(), so we can't (and we can't make them inline
+ − 982 functions either -- alloca() is explicitly disallowed in inline
+ − 983 functions.)
+ − 984
+ − 985 (7) Note that our rules regarding multiple evaluation are *more*
+ − 986 strict than the rules listed above under the heading "working
+ − 987 with raw internal-format data".
+ − 988 */
+ − 989
+ − 990
+ − 991 /* ----- Declaration ----- */
+ − 992
+ − 993 typedef struct
+ − 994 {
+ − 995 /* Data for the Eistring, stored in the default internal format.
+ − 996 Always includes terminating null. */
+ − 997 Intbyte *data_;
+ − 998 /* Total number of bytes allocated in DATA (including null). */
+ − 999 Bytecount max_size_allocated_;
+ − 1000 Bytecount bytelen_;
+ − 1001 Charcount charlen_;
+ − 1002 int mallocp_;
+ − 1003
+ − 1004 Extbyte *extdata_;
+ − 1005 Bytecount extlen_;
+ − 1006 } Eistring;
+ − 1007
+ − 1008 typedef enum internal_format
+ − 1009 {
+ − 1010 FORMAT_DEFAULT,
+ − 1011 FORMAT_FIXED_8,
+ − 1012 FORMAT_FIXED_16,
+ − 1013 FORMAT_FIXED_32
+ − 1014 } Internal_Format;
+ − 1015
+ − 1016 extern Eistring the_eistring_zero_init, the_eistring_malloc_zero_init;
+ − 1017
+ − 1018 #define DECLARE_EISTRING(name) \
+ − 1019 Eistring __ ## name ## __storage__ = the_eistring_zero_init; \
+ − 1020 Eistring *name = & __ ## name ## __storage__
+ − 1021 #define DECLARE_EISTRING_MALLOC(name) \
+ − 1022 Eistring __ ## name ## __storage__ = the_eistring_malloc_zero_init; \
+ − 1023 Eistring *name = & __ ## name ## __storage__
+ − 1024
+ − 1025 #define eiinit(ei) \
+ − 1026 do { \
793
+ − 1027 *(ei) = the_eistring_zero_init; \
771
+ − 1028 } while (0)
+ − 1029
+ − 1030 #define eiinit_malloc(ei) \
+ − 1031 do { \
793
+ − 1032 *(ei) = the_eistring_malloc_zero_init; \
771
+ − 1033 } while (0)
+ − 1034
+ − 1035
+ − 1036 /* ----- Utility ----- */
+ − 1037
+ − 1038 /* Make sure both LEN and CHARLEN are specified, in case one is given
+ − 1039 as -1. PTR evaluated at most once, others multiply. */
+ − 1040 #define eifixup_bytechar(ptr, len, charlen) \
+ − 1041 do { \
+ − 1042 if ((len) == -1) \
+ − 1043 (len) = charcount_to_bytecount (ptr, charlen); \
+ − 1044 else if ((charlen) == -1) \
+ − 1045 (charlen) = bytecount_to_charcount (ptr, len); \
+ − 1046 } while (0)
+ − 1047
+ − 1048 /* Make sure LEN is specified, in case it's is given as -1. PTR
+ − 1049 evaluated at most once, others multiply. */
+ − 1050 #define eifixup_byte(ptr, len, charlen) \
+ − 1051 do { \
+ − 1052 if ((len) == -1) \
+ − 1053 (len) = charcount_to_bytecount (ptr, charlen); \
+ − 1054 } while (0)
+ − 1055
+ − 1056 /* Make sure CHARLEN is specified, in case it's is given as -1. PTR
+ − 1057 evaluated at most once, others multiply. */
+ − 1058 #define eifixup_char(ptr, len, charlen) \
+ − 1059 do { \
+ − 1060 if ((charlen) == -1) \
+ − 1061 (charlen) = bytecount_to_charcount (ptr, len); \
+ − 1062 } while (0)
+ − 1063
+ − 1064
+ − 1065
+ − 1066 /* Make sure we can hold NEWBYTELEN bytes (which is NEWCHARLEN chars)
+ − 1067 plus a zero terminator. Preserve existing data as much as possible,
+ − 1068 including existing zero terminator. Put a new zero terminator where it
+ − 1069 should go if NEWZ if non-zero. All args but EI are evalled only once. */
+ − 1070
+ − 1071 #define EI_ALLOC(ei, newbytelen, newcharlen, newz) \
+ − 1072 do { \
+ − 1073 int ei1oldeibytelen = (ei)->bytelen_; \
+ − 1074 \
+ − 1075 (ei)->charlen_ = (newcharlen); \
+ − 1076 (ei)->bytelen_ = (newbytelen); \
+ − 1077 \
+ − 1078 if (ei1oldeibytelen != (ei)->bytelen_) \
+ − 1079 { \
+ − 1080 int ei1newsize = (ei)->max_size_allocated_; \
+ − 1081 while (ei1newsize < (ei)->bytelen_ + 1) \
+ − 1082 { \
+ − 1083 ei1newsize = (int) (ei1newsize * 1.5); \
+ − 1084 if (ei1newsize < 32) \
+ − 1085 ei1newsize = 32; \
+ − 1086 } \
+ − 1087 if (ei1newsize != (ei)->max_size_allocated_) \
+ − 1088 { \
+ − 1089 if ((ei)->mallocp_) \
+ − 1090 /* xrealloc always preserves existing data as much as possible */ \
+ − 1091 (ei)->data_ = (Intbyte *) xrealloc ((ei)->data_, ei1newsize); \
+ − 1092 else \
+ − 1093 { \
+ − 1094 /* We don't have realloc, so alloca() more space and copy the \
+ − 1095 data into it. */ \
+ − 1096 Intbyte *ei1oldeidata = (ei)->data_; \
+ − 1097 (ei)->data_ = (Intbyte *) alloca (ei1newsize); \
+ − 1098 if (ei1oldeidata) \
+ − 1099 memcpy ((ei)->data_, ei1oldeidata, ei1oldeibytelen + 1); \
+ − 1100 } \
+ − 1101 (ei)->max_size_allocated_ = ei1newsize; \
+ − 1102 } \
+ − 1103 if (newz) \
+ − 1104 (ei)->data_[(ei)->bytelen_] = '\0'; \
+ − 1105 } \
+ − 1106 } while (0)
+ − 1107
+ − 1108 #define EI_ALLOC_AND_COPY(ei, data, bytelen, charlen) \
+ − 1109 do { \
+ − 1110 EI_ALLOC (ei, bytelen, charlen, 1); \
+ − 1111 memcpy ((ei)->data_, data, (ei)->bytelen_); \
+ − 1112 } while (0)
+ − 1113
800
+ − 1114 #ifdef ERROR_CHECK_TEXT
771
+ − 1115 #define EI_ASSERT_ASCII(ptr, len) \
+ − 1116 do { \
+ − 1117 int ei5; \
+ − 1118 const Char_ASCII *ei5ptr = (ptr); \
+ − 1119 int ei5len = (len); \
+ − 1120 \
+ − 1121 for (ei5 = 0; ei5 < ei5len; ei5++) \
+ − 1122 assert (ei5ptr[ei5] >= 0x00 && ei5ptr[ei5] < 0x7F); \
+ − 1123 } while (0)
+ − 1124 #define EI_ASSERT_ASCIIZ(ptr) \
+ − 1125 do { \
+ − 1126 const Char_ASCII *ei5p1 = (ptr); \
+ − 1127 EI_ASSERT_ASCII (ei5p1, strlen (ei5p1)); \
+ − 1128 } while (0)
+ − 1129 #else
+ − 1130 #define EI_ASSERT_ASCII(ptr, len)
+ − 1131 #define EI_ASSERT_ASCIIZ(ptr)
+ − 1132 #endif
+ − 1133
+ − 1134
+ − 1135 /* ----- Initialization ----- */
+ − 1136
+ − 1137 #define eicpy_ei(ei, eicpy) \
+ − 1138 do { \
+ − 1139 const Eistring *ei2 = (eicpy); \
+ − 1140 EI_ALLOC_AND_COPY (ei, ei2->data_, ei2->bytelen_, ei2->charlen_); \
+ − 1141 } while (0)
+ − 1142
+ − 1143 #define eicpy_lstr(ei, lisp_string) \
+ − 1144 do { \
+ − 1145 Lisp_Object ei3 = (lisp_string); \
+ − 1146 EI_ALLOC_AND_COPY (ei, XSTRING_DATA (ei3), XSTRING_LENGTH (ei3), \
+ − 1147 XSTRING_CHAR_LENGTH (ei3)); \
+ − 1148 } while (0)
+ − 1149
+ − 1150 #define eicpy_lstr_off(ei, lisp_string, off, charoff, len, charlen) \
+ − 1151 do { \
+ − 1152 Lisp_Object ei23lstr = (lisp_string); \
+ − 1153 int ei23off = (off); \
+ − 1154 int ei23charoff = (charoff); \
+ − 1155 int ei23len = (len); \
+ − 1156 int ei23charlen = (charlen); \
+ − 1157 const Intbyte *ei23data = XSTRING_DATA (ei23lstr); \
+ − 1158 \
+ − 1159 int ei23oldbytelen = (ei)->bytelen_; \
+ − 1160 \
+ − 1161 eifixup_byte (ei23data, ei23off, ei23charoff); \
+ − 1162 eifixup_bytechar (ei23data + ei23off, ei23len, ei23charlen); \
+ − 1163 \
+ − 1164 EI_ALLOC_AND_COPY (ei, ei23data + ei23off, ei23len, ei23charlen); \
+ − 1165 } while (0)
+ − 1166
+ − 1167 #define eicpy_raw_fmt(ei, ptr, len, fmt) \
+ − 1168 do { \
+ − 1169 const Intbyte *ei12ptr = (ptr); \
+ − 1170 Internal_Format ei12fmt = (fmt); \
+ − 1171 int ei12len = (len); \
+ − 1172 assert (ei12fmt == FORMAT_DEFAULT); \
+ − 1173 EI_ALLOC_AND_COPY (ei, ei12ptr, ei12len, \
+ − 1174 bytecount_to_charcount (ei12ptr, ei12len)); \
+ − 1175 } while (0)
+ − 1176
+ − 1177 #define eicpy_raw(ei, ptr, len) eicpy_raw_fmt (ei, ptr, len, FORMAT_DEFAULT)
+ − 1178
+ − 1179 #define eicpy_rawz_fmt(ei, ptr, fmt) \
+ − 1180 do { \
+ − 1181 const Intbyte *ei12p1ptr = (ptr); \
+ − 1182 Internal_Format ei12p1fmt = (fmt); \
+ − 1183 assert (ei12p1fmt == FORMAT_DEFAULT); \
+ − 1184 eicpy_raw_fmt (ei, ei12p1ptr, qxestrlen (ei12p1ptr), fmt); \
+ − 1185 } while (0)
+ − 1186
+ − 1187 #define eicpy_rawz(ei, ptr) eicpy_rawz_fmt (ei, ptr, FORMAT_DEFAULT)
+ − 1188
+ − 1189 #define eicpy_ch(ei, ch) \
+ − 1190 do { \
+ − 1191 Intbyte ei12p2[MAX_EMCHAR_LEN]; \
+ − 1192 Bytecount ei12p2len = set_charptr_emchar (ei12p2, ch); \
+ − 1193 EI_ALLOC_AND_COPY (ei, ei12p2, ei12p2len, 1); \
+ − 1194 } while (0)
+ − 1195
+ − 1196 #define eicpy_c(ei, c_string) \
+ − 1197 do { \
+ − 1198 const Char_ASCII *ei4 = (c_string); \
+ − 1199 \
+ − 1200 EI_ASSERT_ASCIIZ (ei4); \
+ − 1201 eicpy_ext (ei, ei4, Qbinary); \
+ − 1202 } while (0)
+ − 1203
+ − 1204 #define eicpy_c_len(ei, c_string, c_len) \
+ − 1205 do { \
+ − 1206 const Char_ASCII *ei6 = (c_string); \
+ − 1207 int ei6len = (c_len); \
+ − 1208 \
+ − 1209 EI_ASSERT_ASCII (ei6, ei6len); \
+ − 1210 eicpy_ext_len (ei, ei6, ei6len, Qbinary); \
+ − 1211 } while (0)
+ − 1212
+ − 1213 #define eicpy_ext_len(ei, extdata, extlen, coding_system) \
+ − 1214 do { \
+ − 1215 const Extbyte *ei7 = (extdata); \
+ − 1216 int ei7len = (extlen); \
+ − 1217 \
+ − 1218 TO_INTERNAL_FORMAT (DATA, (ei7, ei7len), \
+ − 1219 ALLOCA, ((ei)->data_, (ei)->bytelen_), \
+ − 1220 coding_system); \
+ − 1221 (ei)->max_size_allocated_ = (ei)->bytelen_ + 1; \
+ − 1222 (ei)->charlen_ = bytecount_to_charcount ((ei)->data_, (ei)->bytelen_); \
+ − 1223 } while (0)
+ − 1224
+ − 1225 #define eicpy_ext(ei, extdata, coding_system) \
+ − 1226 do { \
+ − 1227 const Extbyte *ei8 = (extdata); \
+ − 1228 \
+ − 1229 eicpy_ext_len (ei, ei8, dfc_external_data_len (ei8, coding_system), \
+ − 1230 coding_system); \
+ − 1231 } while (0)
+ − 1232
+ − 1233 #define eicpy_lbuf(eistr, lisp_buf, off, charoff, len, charlen) \
+ − 1234 NOT YET IMPLEMENTED
+ − 1235
+ − 1236 #define eicpy_lstream(eistr, lstream) \
+ − 1237 NOT YET IMPLEMENTED
+ − 1238
+ − 1239 #define eireset(eistr) eicpy_rawz (eistr, (Intbyte *) "")
+ − 1240
+ − 1241 /* ----- Getting the data out of the Eistring ----- */
+ − 1242
+ − 1243 #define eidata(ei) ((ei)->data_)
+ − 1244
+ − 1245 #define eimake_string(ei) make_string (eidata (ei), eilen (ei))
+ − 1246
+ − 1247 #define eimake_string_off(eistr, off, charoff, len, charlen) \
+ − 1248 do { \
+ − 1249 Lisp_Object ei24lstr; \
+ − 1250 int ei24off = (off); \
+ − 1251 int ei24charoff = (charoff); \
+ − 1252 int ei24len = (len); \
+ − 1253 int ei24charlen = (charlen); \
+ − 1254 \
+ − 1255 eifixup_byte ((eistr)->data_, ei24off, ei24charoff); \
+ − 1256 eifixup_byte ((eistr)->data_ + ei24off, ei24len, ei24charlen); \
+ − 1257 \
+ − 1258 return make_string ((eistr)->data_ + ei24off, ei24len); \
+ − 1259 } while (0)
+ − 1260
+ − 1261 #define eicpyout_alloca(eistr, ptrout, lenout) \
+ − 1262 eicpyout_alloca_fmt (eistr, ptrout, lenout, FORMAT_DEFAULT)
+ − 1263 #define eicpyout_malloc(eistr, lenout) \
+ − 1264 eicpyout_malloc_fmt (eistr, lenout, FORMAT_DEFAULT)
+ − 1265 Intbyte *eicpyout_malloc_fmt (Eistring *eistr, Bytecount *len_out,
+ − 1266 Internal_Format fmt);
+ − 1267 #define eicpyout_alloca_fmt(eistr, ptrout, lenout, fmt) \
+ − 1268 do { \
+ − 1269 Internal_Format ei23fmt = (fmt); \
+ − 1270 Intbyte *ei23ptrout = &(ptrout); \
+ − 1271 Bytecount *ei23lenout = &(lenout); \
+ − 1272 \
+ − 1273 assert (ei23fmt == FORMAT_DEFAULT); \
+ − 1274 \
+ − 1275 *ei23lenout = (eistr)->bytelen_; \
+ − 1276 *ei23ptrout = alloca_array (Intbyte, (eistr)->bytelen_ + 1); \
+ − 1277 memcpy (*ei23ptrout, (eistr)->data_, (eistr)->bytelen_ + 1); \
+ − 1278 } while (0)
+ − 1279
+ − 1280 /* ----- Moving to the heap ----- */
+ − 1281
+ − 1282 #define eifree(ei) \
+ − 1283 do { \
+ − 1284 if ((ei)->mallocp_) \
+ − 1285 { \
+ − 1286 if ((ei)->data_) \
+ − 1287 xfree ((ei)->data_); \
+ − 1288 if ((ei)->extdata_) \
+ − 1289 xfree ((ei)->extdata_); \
+ − 1290 eiinit_malloc (ei); \
+ − 1291 } \
+ − 1292 else \
+ − 1293 eiinit (ei); \
+ − 1294 } while (0)
+ − 1295
+ − 1296 int eifind_large_enough_buffer (int oldbufsize, int needed_size);
+ − 1297 void eito_malloc_1 (Eistring *ei);
+ − 1298
+ − 1299 #define eito_malloc(ei) eito_malloc_1 (ei)
+ − 1300
+ − 1301 #define eito_alloca(ei) \
+ − 1302 do { \
+ − 1303 if (!(ei)->mallocp_) \
+ − 1304 return; \
+ − 1305 (ei)->mallocp_ = 0; \
+ − 1306 if ((ei)->data_) \
+ − 1307 { \
+ − 1308 Intbyte *ei13newdata; \
+ − 1309 \
+ − 1310 (ei)->max_size_allocated_ = \
+ − 1311 eifind_large_enough_buffer (0, (ei)->bytelen_ + 1); \
+ − 1312 ei13newdata = (Intbyte *) alloca ((ei)->max_size_allocated_); \
+ − 1313 memcpy (ei13newdata, (ei)->data_, (ei)->bytelen_ + 1); \
+ − 1314 xfree ((ei)->data_); \
+ − 1315 (ei)->data_ = ei13newdata; \
+ − 1316 } \
+ − 1317 \
+ − 1318 if ((ei)->extdata_) \
+ − 1319 { \
+ − 1320 Extbyte *ei13newdata = (Extbyte *) alloca ((ei)->extlen_ + 2); \
+ − 1321 \
+ − 1322 memcpy (ei13newdata, (ei)->extdata_, (ei)->extlen_); \
+ − 1323 /* Double null-terminate in case of Unicode data */ \
+ − 1324 ei13newdata[(ei)->extlen_] = '\0'; \
+ − 1325 ei13newdata[(ei)->extlen_ + 1] = '\0'; \
+ − 1326 xfree ((ei)->extdata_); \
+ − 1327 (ei)->extdata_ = ei13newdata; \
+ − 1328 } \
+ − 1329 } while (0)
+ − 1330
+ − 1331
+ − 1332 /* ----- Retrieving the length ----- */
+ − 1333
+ − 1334 #define eilen(ei) ((ei)->bytelen_)
+ − 1335 #define eicharlen(ei) ((ei)->charlen_)
+ − 1336
+ − 1337
+ − 1338 /* ----- Working with positions ----- */
+ − 1339
+ − 1340 #define eicharpos_to_bytepos(ei, charpos) \
+ − 1341 charcount_to_bytecount ((ei)->data_, charpos)
+ − 1342 #define eibytepos_to_charpos(ei, bytepos) \
+ − 1343 bytecount_to_charcount ((ei)->data_, bytepos)
+ − 1344
+ − 1345 DECLARE_INLINE_HEADER (Bytecount eiincpos_1 (Eistring *eistr,
+ − 1346 Bytecount bytepos,
+ − 1347 Charcount n))
+ − 1348 {
+ − 1349 Intbyte *pos = eistr->data_ + bytepos;
+ − 1350 int i;
+ − 1351
800
+ − 1352 text_checking_assert (bytepos >= 0 && bytepos <= eistr->bytelen_);
+ − 1353 text_checking_assert (n >= 0 && n <= eistr->charlen_);
771
+ − 1354 /* We could check N more correctly now, but that would require a
+ − 1355 call to bytecount_to_charcount(), which would be needlessly
+ − 1356 expensive (it would convert O(N) algorithms into O(N^2) algorithms
800
+ − 1357 with ERROR_CHECK_TEXT, which would be bad). If N is bad, we are
771
+ − 1358 guaranteed to catch it either inside INC_CHARPTR() or in the check
+ − 1359 below. */
+ − 1360 for (i = 0; i < n; i++)
+ − 1361 INC_CHARPTR (pos);
800
+ − 1362 text_checking_assert (pos - eistr->data_ <= eistr->bytelen_);
771
+ − 1363 return pos - eistr->data_;
+ − 1364 }
+ − 1365
+ − 1366 #define eiincpos (ei, bytepos) eiincpos_1 (ei, bytepos, 1)
+ − 1367 #define eiincpos_n (ei, bytepos, n) eiincpos_1 (ei, bytepos, n)
+ − 1368
+ − 1369 DECLARE_INLINE_HEADER (Bytecount eidecpos_1 (Eistring *eistr,
+ − 1370 Bytecount bytepos,
+ − 1371 Charcount n))
+ − 1372 {
+ − 1373 Intbyte *pos = eistr->data_ + bytepos;
+ − 1374 int i;
+ − 1375
800
+ − 1376 text_checking_assert (bytepos >= 0 && bytepos <= eistr->bytelen_);
+ − 1377 text_checking_assert (n >= 0 && n <= eistr->charlen_);
771
+ − 1378 /* We could check N more correctly now, but ... see above. */
+ − 1379 for (i = 0; i < n; i++)
+ − 1380 DEC_CHARPTR (pos);
800
+ − 1381 text_checking_assert (pos - eistr->data_ <= eistr->bytelen_);
771
+ − 1382 return pos - eistr->data_;
+ − 1383 }
+ − 1384
+ − 1385 #define eidecpos (ei, bytepos) eidecpos_1 (ei, bytepos, 1)
+ − 1386 #define eidecpos_n (ei, bytepos, n) eidecpos_1 (ei, bytepos, n)
+ − 1387
+ − 1388
+ − 1389 /* ----- Getting the character at a position ----- */
+ − 1390
+ − 1391 #define eigetch(ei, bytepos) \
+ − 1392 charptr_emchar ((ei)->data_ + (bytepos))
+ − 1393 #define eigetch_char(ei, charpos) charptr_emchar_n ((ei)->data_, charpos)
+ − 1394
+ − 1395
+ − 1396 /* ----- Setting the character at a position ----- */
+ − 1397
+ − 1398 #define eisetch(ei, bytepos, chr) \
+ − 1399 eisub_ch (ei, bytepos, -1, -1, 1, chr)
+ − 1400 #define eisetch_char(ei, charpos, chr) \
+ − 1401 eisub_ch (ei, -1, charpos, -1, 1, chr)
+ − 1402
+ − 1403
+ − 1404 /* ----- Concatenation ----- */
+ − 1405
+ − 1406 #define eicat_1(ei, data, bytelen, charlen) \
+ − 1407 do { \
+ − 1408 int ei14oldeibytelen = (ei)->bytelen_; \
+ − 1409 int ei14bytelen = (bytelen); \
+ − 1410 EI_ALLOC (ei, (ei)->bytelen_ + ei14bytelen, \
+ − 1411 (ei)->charlen_ + (charlen), 1); \
+ − 1412 memcpy ((ei)->data_ + ei14oldeibytelen, (data), \
+ − 1413 ei14bytelen); \
+ − 1414 } while (0)
+ − 1415
+ − 1416 #define eicat_ei(ei, ei2) \
+ − 1417 do { \
+ − 1418 const Eistring *ei9 = (ei2); \
+ − 1419 eicat_1 (ei, ei9->data_, ei9->bytelen_, ei9->charlen_); \
+ − 1420 } while (0)
+ − 1421
+ − 1422 #define eicat_c(ei, c_string) \
+ − 1423 do { \
+ − 1424 const Char_ASCII *ei15 = (c_string); \
+ − 1425 int ei15len = strlen (ei15); \
+ − 1426 \
+ − 1427 EI_ASSERT_ASCII (ei15, ei15len); \
+ − 1428 eicat_1 (ei, ei15, ei15len, \
+ − 1429 bytecount_to_charcount ((Intbyte *) ei15, ei15len)); \
+ − 1430 } while (0)
+ − 1431
+ − 1432 #define eicat_raw(ei, data, len) \
+ − 1433 do { \
+ − 1434 int ei16len = (len); \
+ − 1435 const Intbyte *ei16data = (data); \
+ − 1436 eicat_1 (ei, ei16data, ei16len, \
+ − 1437 bytecount_to_charcount (ei16data, ei16len)); \
+ − 1438 } while (0)
+ − 1439
+ − 1440 #define eicat_rawz(ei, ptr) \
+ − 1441 do { \
+ − 1442 const Intbyte *ei16p5ptr = (ptr); \
+ − 1443 eicat_raw (ei, ei16p5ptr, qxestrlen (ei16p5ptr)); \
+ − 1444 } while (0)
+ − 1445
+ − 1446 #define eicat_lstr(ei, lisp_string) \
+ − 1447 do { \
+ − 1448 Lisp_Object ei17 = (lisp_string); \
+ − 1449 eicat_1 (ei, XSTRING_DATA (ei17), XSTRING_LENGTH (ei17), \
+ − 1450 XSTRING_CHAR_LENGTH (ei17)); \
+ − 1451 } while (0)
+ − 1452
+ − 1453 #define eicat_ch(ei, ch) \
+ − 1454 do { \
+ − 1455 Intbyte ei22ch[MAX_EMCHAR_LEN]; \
+ − 1456 Bytecount ei22len = set_charptr_emchar (ei22ch, ch); \
+ − 1457 eicat_1 (ei, ei22ch, ei22len, 1); \
+ − 1458 } while (0)
+ − 1459
+ − 1460
+ − 1461 /* ----- Replacement ----- */
+ − 1462
+ − 1463 /* Replace the section of an Eistring at (OFF, LEN) with the data at
+ − 1464 SRC of length LEN. All positions have corresponding character values,
+ − 1465 and either can be -1 -- it will be computed from the other. */
+ − 1466
+ − 1467 #define eisub_1(ei, off, charoff, len, charlen, src, srclen, srccharlen) \
+ − 1468 do { \
+ − 1469 int ei18off = (off); \
+ − 1470 int ei18charoff = (charoff); \
+ − 1471 int ei18len = (len); \
+ − 1472 int ei18charlen = (charlen); \
+ − 1473 Intbyte *ei18src = (Intbyte *) (src); \
+ − 1474 int ei18srclen = (srclen); \
+ − 1475 int ei18srccharlen = (srccharlen); \
+ − 1476 \
+ − 1477 int ei18oldeibytelen = (ei)->bytelen_; \
+ − 1478 \
+ − 1479 eifixup_bytechar ((ei)->data_, ei18off, ei18charoff); \
+ − 1480 eifixup_bytechar ((ei)->data_ + ei18off, ei18len, ei18charlen); \
+ − 1481 eifixup_bytechar (ei18src, ei18srclen, ei18srccharlen); \
+ − 1482 \
+ − 1483 EI_ALLOC (ei, (ei)->bytelen_ + ei18srclen - ei18len, \
+ − 1484 (ei)->charlen_ + ei18srccharlen - ei18charlen, 0); \
+ − 1485 if (ei18len != ei18srclen) \
+ − 1486 memmove ((ei)->data_ + ei18off + ei18srclen, \
+ − 1487 (ei)->data_ + ei18off + ei18len, \
+ − 1488 /* include zero terminator. */ \
+ − 1489 ei18oldeibytelen - (ei18off + ei18len) + 1); \
+ − 1490 if (ei18srclen > 0) \
+ − 1491 memcpy ((ei)->data_ + ei18off, ei18src, ei18srclen); \
+ − 1492 } while (0)
+ − 1493
+ − 1494 #define eisub_ei(ei, off, charoff, len, charlen, ei2) \
+ − 1495 do { \
+ − 1496 const Eistring *ei19 = (ei2); \
+ − 1497 eisub_1 (ei, off, charoff, len, charlen, ei19->data_, ei19->bytelen_, \
+ − 1498 ei19->charlen_); \
+ − 1499 } while (0)
+ − 1500
+ − 1501 #define eisub_c(ei, off, charoff, len, charlen, c_string) \
+ − 1502 do { \
+ − 1503 const Char_ASCII *ei20 = (c_string); \
+ − 1504 int ei20len = strlen (ei20); \
+ − 1505 EI_ASSERT_ASCII (ei20, ei20len); \
+ − 1506 eisub_1 (ei, off, charoff, len, charlen, ei20, ei20len, -1); \
+ − 1507 } while (0)
+ − 1508
+ − 1509 #define eisub_ch(ei, off, charoff, len, charlen, ch) \
+ − 1510 do { \
+ − 1511 Intbyte ei21ch[MAX_EMCHAR_LEN]; \
+ − 1512 Bytecount ei21len = set_charptr_emchar (ei21ch, ch); \
+ − 1513 eisub_1 (ei, off, charoff, len, charlen, ei21ch, ei21len, 1); \
+ − 1514 } while (0)
+ − 1515
+ − 1516 #define eidel(ei, off, charoff, len, charlen) \
+ − 1517 eisub_1(ei, off, charoff, len, charlen, NULL, 0, 0)
+ − 1518
+ − 1519
+ − 1520 /* ----- Converting to an external format ----- */
+ − 1521
+ − 1522 #define eito_external(ei, coding_system) \
+ − 1523 do { \
+ − 1524 if ((ei)->mallocp_) \
+ − 1525 { \
+ − 1526 if ((ei)->extdata_) \
+ − 1527 { \
+ − 1528 xfree ((ei)->extdata_); \
+ − 1529 (ei)->extdata_ = 0; \
+ − 1530 } \
+ − 1531 TO_EXTERNAL_FORMAT (DATA, ((ei)->data_, (ei)->bytelen_), \
+ − 1532 MALLOC, ((ei)->extdata_, (ei)->extlen_), \
+ − 1533 coding_system); \
+ − 1534 } \
+ − 1535 else \
+ − 1536 TO_EXTERNAL_FORMAT (DATA, ((ei)->data_, (ei)->bytelen_), \
+ − 1537 ALLOCA, ((ei)->extdata_, (ei)->extlen_), \
+ − 1538 coding_system); \
+ − 1539 } while (0)
+ − 1540
+ − 1541 #define eiextdata(ei) ((ei)->extdata_)
+ − 1542 #define eiextlen(ei) ((ei)->extlen_)
+ − 1543
+ − 1544
+ − 1545 /* ----- Searching in the Eistring for a character ----- */
+ − 1546
+ − 1547 #define eichr(eistr, chr) \
+ − 1548 NOT YET IMPLEMENTED
+ − 1549 #define eichr_char(eistr, chr) \
+ − 1550 NOT YET IMPLEMENTED
+ − 1551 #define eichr_off(eistr, chr, off, charoff) \
+ − 1552 NOT YET IMPLEMENTED
+ − 1553 #define eichr_off_char(eistr, chr, off, charoff) \
+ − 1554 NOT YET IMPLEMENTED
+ − 1555 #define eirchr(eistr, chr) \
+ − 1556 NOT YET IMPLEMENTED
+ − 1557 #define eirchr_char(eistr, chr) \
+ − 1558 NOT YET IMPLEMENTED
+ − 1559 #define eirchr_off(eistr, chr, off, charoff) \
+ − 1560 NOT YET IMPLEMENTED
+ − 1561 #define eirchr_off_char(eistr, chr, off, charoff) \
+ − 1562 NOT YET IMPLEMENTED
+ − 1563
+ − 1564
+ − 1565 /* ----- Searching in the Eistring for a string ----- */
+ − 1566
+ − 1567 #define eistr_ei(eistr, eistr2) \
+ − 1568 NOT YET IMPLEMENTED
+ − 1569 #define eistr_ei_char(eistr, eistr2) \
+ − 1570 NOT YET IMPLEMENTED
+ − 1571 #define eistr_ei_off(eistr, eistr2, off, charoff) \
+ − 1572 NOT YET IMPLEMENTED
+ − 1573 #define eistr_ei_off_char(eistr, eistr2, off, charoff) \
+ − 1574 NOT YET IMPLEMENTED
+ − 1575 #define eirstr_ei(eistr, eistr2) \
+ − 1576 NOT YET IMPLEMENTED
+ − 1577 #define eirstr_ei_char(eistr, eistr2) \
+ − 1578 NOT YET IMPLEMENTED
+ − 1579 #define eirstr_ei_off(eistr, eistr2, off, charoff) \
+ − 1580 NOT YET IMPLEMENTED
+ − 1581 #define eirstr_ei_off_char(eistr, eistr2, off, charoff) \
+ − 1582 NOT YET IMPLEMENTED
+ − 1583
+ − 1584 #define eistr_c(eistr, c_string) \
+ − 1585 NOT YET IMPLEMENTED
+ − 1586 #define eistr_c_char(eistr, c_string) \
+ − 1587 NOT YET IMPLEMENTED
+ − 1588 #define eistr_c_off(eistr, c_string, off, charoff) \
+ − 1589 NOT YET IMPLEMENTED
+ − 1590 #define eistr_c_off_char(eistr, c_string, off, charoff) \
+ − 1591 NOT YET IMPLEMENTED
+ − 1592 #define eirstr_c(eistr, c_string) \
+ − 1593 NOT YET IMPLEMENTED
+ − 1594 #define eirstr_c_char(eistr, c_string) \
+ − 1595 NOT YET IMPLEMENTED
+ − 1596 #define eirstr_c_off(eistr, c_string, off, charoff) \
+ − 1597 NOT YET IMPLEMENTED
+ − 1598 #define eirstr_c_off_char(eistr, c_string, off, charoff) \
+ − 1599 NOT YET IMPLEMENTED
+ − 1600
+ − 1601
+ − 1602 /* ----- Comparison ----- */
+ − 1603
+ − 1604 int eicmp_1 (Eistring *ei, Bytecount off, Charcount charoff,
+ − 1605 Bytecount len, Charcount charlen, const Intbyte *data,
+ − 1606 const Eistring *ei2, int is_c, int fold_case);
+ − 1607
+ − 1608 #define eicmp_ei(eistr, eistr2) \
+ − 1609 eicmp_1 (eistr, 0, -1, -1, -1, 0, eistr2, 0, 0)
+ − 1610 #define eicmp_off_ei(eistr, off, charoff, len, charlen, eistr2) \
+ − 1611 eicmp_1 (eistr, off, charoff, len, charlen, 0, eistr2, 0, 0)
+ − 1612 #define eicasecmp_ei(eistr, eistr2) \
+ − 1613 eicmp_1 (eistr, 0, -1, -1, -1, 0, eistr2, 0, 1)
+ − 1614 #define eicasecmp_off_ei(eistr, off, charoff, len, charlen, eistr2) \
+ − 1615 eicmp_1 (eistr, off, charoff, len, charlen, 0, eistr2, 0, 1)
+ − 1616 #define eicasecmp_i18n_ei(eistr, eistr2) \
+ − 1617 eicmp_1 (eistr, 0, -1, -1, -1, 0, eistr2, 0, 2)
+ − 1618 #define eicasecmp_i18n_off_ei(eistr, off, charoff, len, charlen, eistr2) \
+ − 1619 eicmp_1 (eistr, off, charoff, len, charlen, 0, eistr2, 0, 2)
+ − 1620
+ − 1621 #define eicmp_c(eistr, c_string) \
+ − 1622 eicmp_1 (eistr, 0, -1, -1, -1, c_string, 0, 1, 0)
+ − 1623 #define eicmp_off_c(eistr, off, charoff, len, charlen, c_string) \
+ − 1624 eicmp_1 (eistr, off, charoff, len, charlen, c_string, 0, 1, 0)
+ − 1625 #define eicasecmp_c(eistr, c_string) \
+ − 1626 eicmp_1 (eistr, 0, -1, -1, -1, c_string, 0, 1, 1)
+ − 1627 #define eicasecmp_off_c(eistr, off, charoff, len, charlen, c_string) \
+ − 1628 eicmp_1 (eistr, off, charoff, len, charlen, c_string, 0, 1, 1)
+ − 1629 #define eicasecmp_i18n_c(eistr, c_string) \
+ − 1630 eicmp_1 (eistr, 0, -1, -1, -1, c_string, 0, 1, 2)
+ − 1631 #define eicasecmp_i18n_off_c(eistr, off, charoff, len, charlen, c_string) \
+ − 1632 eicmp_1 (eistr, off, charoff, len, charlen, c_string, 0, 1, 2)
+ − 1633
+ − 1634
+ − 1635 /* ----- Case-changing the Eistring ----- */
+ − 1636
+ − 1637 int eistr_casefiddle_1 (Intbyte *olddata, Bytecount len, Intbyte *newdata,
+ − 1638 int downp);
+ − 1639
+ − 1640 #define EI_CASECHANGE(ei, downp) \
+ − 1641 do { \
+ − 1642 int ei11new_allocmax = (ei)->charlen_ * MAX_EMCHAR_LEN + 1; \
+ − 1643 Intbyte *ei11storage = (Intbyte *) alloca_array (Intbyte, \
+ − 1644 ei11new_allocmax); \
+ − 1645 int ei11newlen = eistr_casefiddle_1 ((ei)->data_, (ei)->bytelen_, \
+ − 1646 ei11storage, downp); \
+ − 1647 \
+ − 1648 if (ei11newlen) \
+ − 1649 { \
+ − 1650 (ei)->max_size_allocated_ = ei11new_allocmax; \
+ − 1651 (ei)->data_ = ei11storage; \
+ − 1652 (ei)->bytelen_ = ei11newlen; \
+ − 1653 /* charlen is the same. */ \
+ − 1654 } \
+ − 1655 } while (0)
+ − 1656
+ − 1657 #define eilwr(ei) EI_CASECHANGE (ei, 1)
+ − 1658 #define eiupr(ei) EI_CASECHANGE (ei, 0)
+ − 1659
+ − 1660
+ − 1661 /************************************************************************/
+ − 1662 /* */
+ − 1663 /* Converting between internal and external format */
+ − 1664 /* */
+ − 1665 /************************************************************************/
+ − 1666 /*
+ − 1667 All client code should use only the two macros
+ − 1668
+ − 1669 TO_EXTERNAL_FORMAT (source_type, source, sink_type, sink, coding_system)
+ − 1670 TO_INTERNAL_FORMAT (source_type, source, sink_type, sink, coding_system)
+ − 1671
+ − 1672 Typical use is
+ − 1673
+ − 1674 TO_EXTERNAL_FORMAT (DATA, (ptr, len),
+ − 1675 LISP_BUFFER, buffer,
+ − 1676 Qfile_name);
+ − 1677
+ − 1678 NOTE: GC is inhibited during the entire operation of these macros. This
+ − 1679 is because frequently the data to be converted comes from strings but
+ − 1680 gets passed in as just DATA, and GC may move around the string data. If
+ − 1681 we didn't inhibit GC, there'd have to be a lot of messy recoding,
+ − 1682 alloca-copying of strings and other annoying stuff.
+ − 1683
+ − 1684 The source or sink can be specified in one of these ways:
+ − 1685
+ − 1686 DATA, (ptr, len), // input data is a fixed buffer of size len
+ − 1687 ALLOCA, (ptr, len), // output data is in a alloca()ed buffer of size len
+ − 1688 MALLOC, (ptr, len), // output data is in a malloc()ed buffer of size len
+ − 1689 C_STRING_ALLOCA, ptr, // equivalent to ALLOCA (ptr, len_ignored) on output
+ − 1690 C_STRING_MALLOC, ptr, // equivalent to MALLOC (ptr, len_ignored) on output
+ − 1691 C_STRING, ptr, // equivalent to DATA, (ptr, strlen/wcslen (ptr))
+ − 1692 // on input (the Unicode version is used when correct)
+ − 1693 LISP_STRING, string, // input or output is a Lisp_Object of type string
+ − 1694 LISP_BUFFER, buffer, // output is written to (point) in lisp buffer
+ − 1695 LISP_LSTREAM, lstream, // input or output is a Lisp_Object of type lstream
+ − 1696 LISP_OPAQUE, object, // input or output is a Lisp_Object of type opaque
+ − 1697
+ − 1698 When specifying the sink, use lvalues, since the macro will assign to them,
+ − 1699 except when the sink is an lstream or a lisp buffer.
+ − 1700
+ − 1701 The macros accept the kinds of sources and sinks appropriate for
+ − 1702 internal and external data representation. See the type_checking_assert
+ − 1703 macros below for the actual allowed types.
+ − 1704
+ − 1705 Since some sources and sinks use one argument (a Lisp_Object) to
+ − 1706 specify them, while others take a (pointer, length) pair, we use
+ − 1707 some C preprocessor trickery to allow pair arguments to be specified
+ − 1708 by parenthesizing them, as in the examples above.
+ − 1709
+ − 1710 Anything prefixed by dfc_ (`data format conversion') is private.
+ − 1711 They are only used to implement these macros.
+ − 1712
+ − 1713 [[Using C_STRING* is appropriate for using with external APIs that
+ − 1714 take null-terminated strings. For internal data, we should try to
+ − 1715 be '\0'-clean - i.e. allow arbitrary data to contain embedded '\0'.
+ − 1716
+ − 1717 Sometime in the future we might allow output to C_STRING_ALLOCA or
+ − 1718 C_STRING_MALLOC _only_ with TO_EXTERNAL_FORMAT(), not
+ − 1719 TO_INTERNAL_FORMAT().]]
+ − 1720
+ − 1721 The above comments are not true. Frequently (most of the time, in
+ − 1722 fact), external strings come as zero-terminated entities, where the
+ − 1723 zero-termination is the only way to find out the length. Even in
+ − 1724 cases where you can get the length, most of the time the system will
+ − 1725 still use the null to signal the end of the string, and there will
+ − 1726 still be no way to either send in or receive a string with embedded
+ − 1727 nulls. In such situations, it's pointless to track the length
+ − 1728 because null bytes can never be in the string. We have a lot of
+ − 1729 operations that make it easy to operate on zero-terminated strings,
+ − 1730 and forcing the user the deal with the length everywhere would only
+ − 1731 make the code uglier and more complicated, for no gain. --ben
+ − 1732
+ − 1733 There is no problem using the same lvalue for source and sink.
+ − 1734
+ − 1735 Also, when pointers are required, the code (currently at least) is
+ − 1736 lax and allows any pointer types, either in the source or the sink.
+ − 1737 This makes it possible, e.g., to deal with internal format data held
+ − 1738 in char *'s or external format data held in WCHAR * (i.e. Unicode).
+ − 1739
+ − 1740 Finally, whenever storage allocation is called for, extra space is
+ − 1741 allocated for a terminating zero, and such a zero is stored in the
+ − 1742 appropriate place, regardless of whether the source data was
+ − 1743 specified using a length or was specified as zero-terminated. This
+ − 1744 allows you to freely pass the resulting data, no matter how
+ − 1745 obtained, to a routine that expects zero termination (modulo, of
+ − 1746 course, that any embedded zeros in the resulting text will cause
+ − 1747 truncation). In fact, currently two embedded zeros are allocated
+ − 1748 and stored after the data result. This is to allow for the
+ − 1749 possibility of storing a Unicode value on output, which needs the
+ − 1750 two zeros. Currently, however, the two zeros are stored regardless
+ − 1751 of whether the conversion is internal or external and regardless of
+ − 1752 whether the external coding system is in fact Unicode. This
+ − 1753 behavior may change in the future, and you cannot rely on this --
+ − 1754 the most you can rely on is that sink data in Unicode format will
+ − 1755 have two terminating nulls, which combine to form one Unicode null
+ − 1756 character. */
+ − 1757
+ − 1758 #define TO_EXTERNAL_FORMAT(source_type, source, sink_type, sink, codesys) \
+ − 1759 do { \
+ − 1760 dfc_conversion_type dfc_simplified_source_type; \
+ − 1761 dfc_conversion_type dfc_simplified_sink_type; \
+ − 1762 dfc_conversion_data dfc_source; \
+ − 1763 dfc_conversion_data dfc_sink; \
+ − 1764 Lisp_Object dfc_codesys = (codesys); \
+ − 1765 \
+ − 1766 type_checking_assert \
+ − 1767 ((DFC_TYPE_##source_type == DFC_TYPE_DATA || \
+ − 1768 DFC_TYPE_##source_type == DFC_TYPE_C_STRING || \
+ − 1769 DFC_TYPE_##source_type == DFC_TYPE_LISP_STRING || \
+ − 1770 DFC_TYPE_##source_type == DFC_TYPE_LISP_OPAQUE || \
+ − 1771 DFC_TYPE_##source_type == DFC_TYPE_LISP_LSTREAM) \
+ − 1772 && \
+ − 1773 (DFC_TYPE_##sink_type == DFC_TYPE_ALLOCA || \
+ − 1774 DFC_TYPE_##sink_type == DFC_TYPE_MALLOC || \
+ − 1775 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_ALLOCA || \
+ − 1776 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_MALLOC || \
+ − 1777 DFC_TYPE_##sink_type == DFC_TYPE_LISP_LSTREAM || \
+ − 1778 DFC_TYPE_##sink_type == DFC_TYPE_LISP_OPAQUE)); \
+ − 1779 \
+ − 1780 DFC_EXT_SOURCE_##source_type##_TO_ARGS (source, dfc_codesys); \
+ − 1781 DFC_SINK_##sink_type##_TO_ARGS (sink); \
+ − 1782 \
+ − 1783 dfc_convert_to_external_format (dfc_simplified_source_type, &dfc_source, \
+ − 1784 dfc_codesys, \
+ − 1785 dfc_simplified_sink_type, &dfc_sink); \
+ − 1786 \
+ − 1787 DFC_##sink_type##_USE_CONVERTED_DATA (sink); \
+ − 1788 } while (0)
+ − 1789
+ − 1790 #define TO_INTERNAL_FORMAT(source_type, source, sink_type, sink, codesys) \
+ − 1791 do { \
+ − 1792 dfc_conversion_type dfc_simplified_source_type; \
+ − 1793 dfc_conversion_type dfc_simplified_sink_type; \
+ − 1794 dfc_conversion_data dfc_source; \
+ − 1795 dfc_conversion_data dfc_sink; \
+ − 1796 Lisp_Object dfc_codesys = (codesys); \
+ − 1797 \
+ − 1798 type_checking_assert \
+ − 1799 ((DFC_TYPE_##source_type == DFC_TYPE_DATA || \
+ − 1800 DFC_TYPE_##source_type == DFC_TYPE_C_STRING || \
+ − 1801 DFC_TYPE_##source_type == DFC_TYPE_LISP_OPAQUE || \
+ − 1802 DFC_TYPE_##source_type == DFC_TYPE_LISP_LSTREAM) \
+ − 1803 && \
+ − 1804 (DFC_TYPE_##sink_type == DFC_TYPE_ALLOCA || \
+ − 1805 DFC_TYPE_##sink_type == DFC_TYPE_MALLOC || \
+ − 1806 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_ALLOCA || \
+ − 1807 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_MALLOC || \
+ − 1808 DFC_TYPE_##sink_type == DFC_TYPE_LISP_STRING || \
+ − 1809 DFC_TYPE_##sink_type == DFC_TYPE_LISP_LSTREAM || \
+ − 1810 DFC_TYPE_##sink_type == DFC_TYPE_LISP_BUFFER)); \
+ − 1811 \
+ − 1812 DFC_INT_SOURCE_##source_type##_TO_ARGS (source, dfc_codesys); \
+ − 1813 DFC_SINK_##sink_type##_TO_ARGS (sink); \
+ − 1814 \
+ − 1815 dfc_convert_to_internal_format (dfc_simplified_source_type, &dfc_source, \
+ − 1816 dfc_codesys, \
+ − 1817 dfc_simplified_sink_type, &dfc_sink); \
+ − 1818 \
+ − 1819 DFC_##sink_type##_USE_CONVERTED_DATA (sink); \
+ − 1820 } while (0)
+ − 1821
+ − 1822
+ − 1823 typedef union
+ − 1824 {
+ − 1825 struct { const void *ptr; Bytecount len; } data;
+ − 1826 Lisp_Object lisp_object;
+ − 1827 } dfc_conversion_data;
+ − 1828
+ − 1829 enum dfc_conversion_type
+ − 1830 {
+ − 1831 DFC_TYPE_DATA,
+ − 1832 DFC_TYPE_ALLOCA,
+ − 1833 DFC_TYPE_MALLOC,
+ − 1834 DFC_TYPE_C_STRING,
+ − 1835 DFC_TYPE_C_STRING_ALLOCA,
+ − 1836 DFC_TYPE_C_STRING_MALLOC,
+ − 1837 DFC_TYPE_LISP_STRING,
+ − 1838 DFC_TYPE_LISP_LSTREAM,
+ − 1839 DFC_TYPE_LISP_OPAQUE,
+ − 1840 DFC_TYPE_LISP_BUFFER
+ − 1841 };
+ − 1842 typedef enum dfc_conversion_type dfc_conversion_type;
+ − 1843
+ − 1844 /* WARNING: These use a static buffer. This can lead to disaster if
+ − 1845 these functions are not used *very* carefully. Another reason to only use
+ − 1846 TO_EXTERNAL_FORMAT() and TO_INTERNAL_FORMAT(). */
+ − 1847 void
+ − 1848 dfc_convert_to_external_format (dfc_conversion_type source_type,
+ − 1849 dfc_conversion_data *source,
+ − 1850 Lisp_Object coding_system,
+ − 1851 dfc_conversion_type sink_type,
+ − 1852 dfc_conversion_data *sink);
+ − 1853 void
+ − 1854 dfc_convert_to_internal_format (dfc_conversion_type source_type,
+ − 1855 dfc_conversion_data *source,
+ − 1856 Lisp_Object coding_system,
+ − 1857 dfc_conversion_type sink_type,
+ − 1858 dfc_conversion_data *sink);
+ − 1859 /* CPP Trickery */
+ − 1860 #define DFC_CPP_CAR(x,y) (x)
+ − 1861 #define DFC_CPP_CDR(x,y) (y)
+ − 1862
+ − 1863 /* Convert `source' to args for dfc_convert_to_external_format() */
+ − 1864 #define DFC_EXT_SOURCE_DATA_TO_ARGS(val, codesys) do { \
+ − 1865 dfc_source.data.ptr = DFC_CPP_CAR val; \
+ − 1866 dfc_source.data.len = DFC_CPP_CDR val; \
+ − 1867 dfc_simplified_source_type = DFC_TYPE_DATA; \
+ − 1868 } while (0)
+ − 1869 #define DFC_EXT_SOURCE_C_STRING_TO_ARGS(val, codesys) do { \
+ − 1870 dfc_source.data.len = \
+ − 1871 strlen ((char *) (dfc_source.data.ptr = (val))); \
+ − 1872 dfc_simplified_source_type = DFC_TYPE_DATA; \
+ − 1873 } while (0)
+ − 1874 #define DFC_EXT_SOURCE_LISP_STRING_TO_ARGS(val, codesys) do { \
+ − 1875 Lisp_Object dfc_slsta = (val); \
+ − 1876 type_checking_assert (STRINGP (dfc_slsta)); \
+ − 1877 dfc_source.lisp_object = dfc_slsta; \
+ − 1878 dfc_simplified_source_type = DFC_TYPE_LISP_STRING; \
+ − 1879 } while (0)
+ − 1880 #define DFC_EXT_SOURCE_LISP_LSTREAM_TO_ARGS(val, codesys) do { \
+ − 1881 Lisp_Object dfc_sllta = (val); \
+ − 1882 type_checking_assert (LSTREAMP (dfc_sllta)); \
+ − 1883 dfc_source.lisp_object = dfc_sllta; \
+ − 1884 dfc_simplified_source_type = DFC_TYPE_LISP_LSTREAM; \
+ − 1885 } while (0)
+ − 1886 #define DFC_EXT_SOURCE_LISP_OPAQUE_TO_ARGS(val, codesys) do { \
+ − 1887 Lisp_Opaque *dfc_slota = XOPAQUE (val); \
+ − 1888 dfc_source.data.ptr = OPAQUE_DATA (dfc_slota); \
+ − 1889 dfc_source.data.len = OPAQUE_SIZE (dfc_slota); \
+ − 1890 dfc_simplified_source_type = DFC_TYPE_DATA; \
+ − 1891 } while (0)
+ − 1892
+ − 1893 /* Convert `source' to args for dfc_convert_to_internal_format() */
+ − 1894 #define DFC_INT_SOURCE_DATA_TO_ARGS(val, codesys) \
+ − 1895 DFC_EXT_SOURCE_DATA_TO_ARGS (val, codesys)
+ − 1896 #define DFC_INT_SOURCE_C_STRING_TO_ARGS(val, codesys) do { \
+ − 1897 dfc_source.data.len = dfc_external_data_len (dfc_source.data.ptr = (val), \
+ − 1898 codesys); \
+ − 1899 dfc_simplified_source_type = DFC_TYPE_DATA; \
+ − 1900 } while (0)
+ − 1901 #define DFC_INT_SOURCE_LISP_STRING_TO_ARGS(val, codesys) \
+ − 1902 DFC_EXT_SOURCE_LISP_STRING_TO_ARGS (val, codesys)
+ − 1903 #define DFC_INT_SOURCE_LISP_LSTREAM_TO_ARGS(val, codesys) \
+ − 1904 DFC_EXT_SOURCE_LISP_LSTREAM_TO_ARGS (val, codesys)
+ − 1905 #define DFC_INT_SOURCE_LISP_OPAQUE_TO_ARGS(val, codesys) \
+ − 1906 DFC_EXT_SOURCE_LISP_OPAQUE_TO_ARGS (val, codesys)
+ − 1907
+ − 1908 /* Convert `sink' to args for dfc_convert_to_*_format() */
+ − 1909 #define DFC_SINK_ALLOCA_TO_ARGS(val) \
+ − 1910 dfc_simplified_sink_type = DFC_TYPE_DATA
+ − 1911 #define DFC_SINK_C_STRING_ALLOCA_TO_ARGS(val) \
+ − 1912 dfc_simplified_sink_type = DFC_TYPE_DATA
+ − 1913 #define DFC_SINK_MALLOC_TO_ARGS(val) \
+ − 1914 dfc_simplified_sink_type = DFC_TYPE_DATA
+ − 1915 #define DFC_SINK_C_STRING_MALLOC_TO_ARGS(val) \
+ − 1916 dfc_simplified_sink_type = DFC_TYPE_DATA
+ − 1917 #define DFC_SINK_LISP_STRING_TO_ARGS(val) \
+ − 1918 dfc_simplified_sink_type = DFC_TYPE_DATA
+ − 1919 #define DFC_SINK_LISP_OPAQUE_TO_ARGS(val) \
+ − 1920 dfc_simplified_sink_type = DFC_TYPE_DATA
+ − 1921 #define DFC_SINK_LISP_LSTREAM_TO_ARGS(val) do { \
+ − 1922 Lisp_Object dfc_sllta = (val); \
+ − 1923 type_checking_assert (LSTREAMP (dfc_sllta)); \
+ − 1924 dfc_sink.lisp_object = dfc_sllta; \
+ − 1925 dfc_simplified_sink_type = DFC_TYPE_LISP_LSTREAM; \
+ − 1926 } while (0)
+ − 1927 #define DFC_SINK_LISP_BUFFER_TO_ARGS(val) do { \
+ − 1928 struct buffer *dfc_slbta = XBUFFER (val); \
+ − 1929 dfc_sink.lisp_object = \
+ − 1930 make_lisp_buffer_output_stream \
+ − 1931 (dfc_slbta, BUF_PT (dfc_slbta), 0); \
+ − 1932 dfc_simplified_sink_type = DFC_TYPE_LISP_LSTREAM; \
+ − 1933 } while (0)
+ − 1934
+ − 1935 /* Assign to the `sink' lvalue(s) using the converted data. */
+ − 1936 /* + 2 because we double zero-extended to account for Unicode conversion */
+ − 1937 typedef union { char c; void *p; } *dfc_aliasing_voidpp;
+ − 1938 #define DFC_ALLOCA_USE_CONVERTED_DATA(sink) do { \
+ − 1939 void * dfc_sink_ret = alloca (dfc_sink.data.len + 2); \
+ − 1940 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2); \
+ − 1941 ((dfc_aliasing_voidpp) &(DFC_CPP_CAR sink))->p = dfc_sink_ret; \
+ − 1942 (DFC_CPP_CDR sink) = dfc_sink.data.len; \
+ − 1943 } while (0)
+ − 1944 #define DFC_MALLOC_USE_CONVERTED_DATA(sink) do { \
+ − 1945 void * dfc_sink_ret = xmalloc (dfc_sink.data.len + 2); \
+ − 1946 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2); \
+ − 1947 ((dfc_aliasing_voidpp) &(DFC_CPP_CAR sink))->p = dfc_sink_ret; \
+ − 1948 (DFC_CPP_CDR sink) = dfc_sink.data.len; \
+ − 1949 } while (0)
+ − 1950 #define DFC_C_STRING_ALLOCA_USE_CONVERTED_DATA(sink) do { \
+ − 1951 void * dfc_sink_ret = alloca (dfc_sink.data.len + 2); \
+ − 1952 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2); \
+ − 1953 ((dfc_aliasing_voidpp) &(sink))->p = dfc_sink_ret; \
+ − 1954 } while (0)
+ − 1955 #define DFC_C_STRING_MALLOC_USE_CONVERTED_DATA(sink) do { \
+ − 1956 void * dfc_sink_ret = xmalloc (dfc_sink.data.len + 2); \
+ − 1957 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2); \
+ − 1958 ((dfc_aliasing_voidpp) &(sink))->p = dfc_sink_ret; \
+ − 1959 } while (0)
+ − 1960 #define DFC_LISP_STRING_USE_CONVERTED_DATA(sink) \
+ − 1961 sink = make_string ((Intbyte *) dfc_sink.data.ptr, dfc_sink.data.len)
+ − 1962 #define DFC_LISP_OPAQUE_USE_CONVERTED_DATA(sink) \
+ − 1963 sink = make_opaque (dfc_sink.data.ptr, dfc_sink.data.len)
+ − 1964 #define DFC_LISP_LSTREAM_USE_CONVERTED_DATA(sink) /* data already used */
+ − 1965 #define DFC_LISP_BUFFER_USE_CONVERTED_DATA(sink) \
+ − 1966 Lstream_delete (XLSTREAM (dfc_sink.lisp_object))
+ − 1967
+ − 1968 /* Convenience macros for extremely common invocations */
+ − 1969 #define C_STRING_TO_EXTERNAL(in, out, coding_system) \
+ − 1970 TO_EXTERNAL_FORMAT (C_STRING, in, C_STRING_ALLOCA, out, coding_system)
+ − 1971 #define C_STRING_TO_EXTERNAL_MALLOC(in, out, coding_system) \
+ − 1972 TO_EXTERNAL_FORMAT (C_STRING, in, C_STRING_MALLOC, out, coding_system)
+ − 1973 #define EXTERNAL_TO_C_STRING(in, out, coding_system) \
+ − 1974 TO_INTERNAL_FORMAT (C_STRING, in, C_STRING_ALLOCA, out, coding_system)
+ − 1975 #define EXTERNAL_TO_C_STRING_MALLOC(in, out, coding_system) \
+ − 1976 TO_INTERNAL_FORMAT (C_STRING, in, C_STRING_MALLOC, out, coding_system)
+ − 1977 #define LISP_STRING_TO_EXTERNAL(in, out, coding_system) \
+ − 1978 TO_EXTERNAL_FORMAT (LISP_STRING, in, C_STRING_ALLOCA, out, coding_system)
+ − 1979 #define LISP_STRING_TO_EXTERNAL_MALLOC(in, out, coding_system) \
+ − 1980 TO_EXTERNAL_FORMAT (LISP_STRING, in, C_STRING_MALLOC, out, coding_system)
+ − 1981
+ − 1982 /* Standins for various encodings, until we know them better */
+ − 1983 #define Qcommand_argument_encoding Qnative
+ − 1984 #define Qenvironment_variable_encoding Qnative
+ − 1985 #define Qunix_host_name_encoding Qnative
+ − 1986 #define Qunix_service_name_encoding Qnative
+ − 1987 #define Qmswindows_host_name_encoding Qmswindows_multibyte
+ − 1988 #define Qmswindows_service_name_encoding Qmswindows_multibyte
+ − 1989
+ − 1990 /* Standins for various X encodings, until we know them better */
+ − 1991
+ − 1992 /* !!#### Need to verify the encoding used in lwlib -- Qnative or Qctext?
+ − 1993 Almost certainly the former. Use a standin for now. */
+ − 1994 #define Qlwlib_encoding Qnative
+ − 1995
+ − 1996 #define Qx_atom_name_encoding Qctext
+ − 1997 /* font names are often stored in atoms, so it gets sticky if we set this
+ − 1998 to something different from atom-name encoding */
+ − 1999 #define Qx_font_name_encoding Qctext
+ − 2000
+ − 2001 #define Qx_color_name_encoding Qctext
+ − 2002
+ − 2003 /* the following probably must agree with Qcommand_argument_encoding and
+ − 2004 Qenvironment_variable_encoding */
+ − 2005 #define Qx_display_name_encoding Qnative
+ − 2006
+ − 2007 #define Qstrerror_encoding Qnative
+ − 2008
+ − 2009 #define GET_STRERROR(var, num) \
+ − 2010 do { \
+ − 2011 int __gsnum__ = (num); \
+ − 2012 Extbyte * __gserr__ = strerror (__gsnum__); \
+ − 2013 \
+ − 2014 if (!__gserr__) \
+ − 2015 { \
+ − 2016 var = alloca_intbytes (99); \
+ − 2017 qxesprintf (var, "Unknown error %d", __gsnum__); \
+ − 2018 } \
+ − 2019 else \
+ − 2020 EXTERNAL_TO_C_STRING (__gserr__, var, Qstrerror_encoding); \
+ − 2021 } while (0)
+ − 2022
+ − 2023 /************************************************************************/
+ − 2024 /* Lisp string representation convenience functions */
+ − 2025 /************************************************************************/
+ − 2026
+ − 2027 /* Because the representation of internally formatted data is subject
+ − 2028 to change, it's bad style to do something like
+ − 2029
+ − 2030 strcmp (XSTRING_DATA (s), "foo")
+ − 2031
+ − 2032 Instead, use the portable:
+ − 2033
+ − 2034 intbyte_strcmp (XSTRING_DATA (s), "foo") or
+ − 2035 intbyte_memcmp (XSTRING_DATA (s), "foo", 3)
+ − 2036
+ − 2037 */
+ − 2038
+ − 2039 /* Like strcmp, except first arg points at internally formatted data,
+ − 2040 while the second points at a string of only ASCII chars. */
+ − 2041 DECLARE_INLINE_HEADER (
+ − 2042 int
+ − 2043 intbyte_strcmp (const Intbyte *bp, const char *ascii_string)
+ − 2044 )
+ − 2045 {
+ − 2046 #ifdef MULE
+ − 2047 while (1)
+ − 2048 {
+ − 2049 int diff;
+ − 2050 type_checking_assert (BYTE_ASCII_P (*ascii_string));
+ − 2051 if ((diff = charptr_emchar (bp) - *(Intbyte *) ascii_string) != 0)
+ − 2052 return diff;
+ − 2053 if (*ascii_string == '\0')
+ − 2054 return 0;
+ − 2055 ascii_string++;
+ − 2056 INC_CHARPTR (bp);
+ − 2057 }
+ − 2058 #else
+ − 2059 return strcmp ((char *)bp, ascii_string);
+ − 2060 #endif
+ − 2061 }
+ − 2062
+ − 2063 /* Like memcmp, except first arg points at internally formatted data,
+ − 2064 while the second points at a string of only ASCII chars. */
+ − 2065
+ − 2066 DECLARE_INLINE_HEADER (
+ − 2067 int
+ − 2068 intbyte_memcmp (const Intbyte *bp, const char *ascii_string, Bytecount len)
+ − 2069 )
+ − 2070 {
+ − 2071 #ifdef MULE
+ − 2072 while (len--)
+ − 2073 {
+ − 2074 int diff = charptr_emchar (bp) - *(Intbyte *) ascii_string;
+ − 2075 type_checking_assert (BYTE_ASCII_P (*ascii_string));
+ − 2076 if (diff != 0)
+ − 2077 return diff;
+ − 2078 ascii_string++;
+ − 2079 INC_CHARPTR (bp);
+ − 2080 }
+ − 2081 return 0;
+ − 2082 #else
+ − 2083 return memcmp (bp, ascii_string, len);
+ − 2084 #endif
+ − 2085 }
+ − 2086
+ − 2087 #endif /* INCLUDED_text_h_ */