771
+ − 1 /* Header file for text manipulation primitives and macros.
+ − 2 Copyright (C) 1985-1995 Free Software Foundation, Inc.
+ − 3 Copyright (C) 1995 Sun Microsystems, Inc.
2367
+ − 4 Copyright (C) 2000, 2001, 2002, 2003, 2004 Ben Wing.
771
+ − 5
+ − 6 This file is part of XEmacs.
+ − 7
+ − 8 XEmacs is free software; you can redistribute it and/or modify it
+ − 9 under the terms of the GNU General Public License as published by the
+ − 10 Free Software Foundation; either version 2, or (at your option) any
+ − 11 later version.
+ − 12
+ − 13 XEmacs is distributed in the hope that it will be useful, but WITHOUT
+ − 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ − 15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ − 16 for more details.
+ − 17
+ − 18 You should have received a copy of the GNU General Public License
+ − 19 along with XEmacs; see the file COPYING. If not, write to
+ − 20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ − 21 Boston, MA 02111-1307, USA. */
+ − 22
+ − 23 /* Synched up with: FSF 19.30. */
+ − 24
+ − 25 /* Authorship:
+ − 26
+ − 27 Mostly written by Ben Wing, starting around 1995.
+ − 28 Current TO_IN/EXTERNAL_FORMAT macros written by Martin Buchholz,
+ − 29 designed by Ben Wing based on earlier macros by Ben Wing.
+ − 30 Separated out June 18, 2000 from buffer.h into text.h.
+ − 31 */
+ − 32
+ − 33 #ifndef INCLUDED_text_h_
+ − 34 #define INCLUDED_text_h_
+ − 35
912
+ − 36 #ifdef HAVE_WCHAR_H
771
+ − 37 #include <wchar.h>
912
+ − 38 #else
1257
+ − 39 size_t wcslen (const wchar_t *);
912
+ − 40 #endif
1204
+ − 41 #ifndef HAVE_STRLWR
1257
+ − 42 char *strlwr (char *);
1204
+ − 43 #endif
+ − 44 #ifndef HAVE_STRUPR
1257
+ − 45 char *strupr (char *);
1204
+ − 46 #endif
771
+ − 47
1743
+ − 48 BEGIN_C_DECLS
1650
+ − 49
771
+ − 50 /* ---------------------------------------------------------------------- */
+ − 51 /* Super-basic character properties */
+ − 52 /* ---------------------------------------------------------------------- */
+ − 53
+ − 54 /* These properties define the specifics of how our current encoding fits
+ − 55 in the basic model used for the encoding. Because this model is the same
+ − 56 as is used for UTF-8, all these properties could be defined for it, too.
+ − 57 This would instantly make the rest of this file work with UTF-8 (with
+ − 58 the exception of a few called functions that would need to be redefined).
+ − 59
+ − 60 (UTF-2000 implementers, take note!)
+ − 61 */
+ − 62
+ − 63 /* If you want more than this, you need to include charset.h */
+ − 64
+ − 65 #ifndef MULE
+ − 66
826
+ − 67 #define rep_bytes_by_first_byte(fb) 1
+ − 68 #define byte_ascii_p(byte) 1
867
+ − 69 #define MAX_ICHAR_LEN 1
771
+ − 70
+ − 71 #else /* MULE */
+ − 72
+ − 73 /* These are carefully designed to work if BYTE is signed or unsigned. */
+ − 74 /* Note that SPC and DEL are considered ASCII, not control. */
+ − 75
826
+ − 76 #define byte_ascii_p(byte) (((byte) & ~0x7f) == 0)
+ − 77 #define byte_c0_p(byte) (((byte) & ~0x1f) == 0)
+ − 78 #define byte_c1_p(byte) (((byte) & ~0x1f) == 0x80)
771
+ − 79
+ − 80 /* Does BYTE represent the first byte of a character? */
+ − 81
826
+ − 82 #ifdef ERROR_CHECK_TEXT
+ − 83
+ − 84 DECLARE_INLINE_HEADER (
+ − 85 int
867
+ − 86 ibyte_first_byte_p_1 (int byte, const char *file, int line)
826
+ − 87 )
+ − 88 {
+ − 89 assert_at_line (byte >= 0 && byte < 256, file, line);
+ − 90 return byte < 0xA0;
+ − 91 }
+ − 92
867
+ − 93 #define ibyte_first_byte_p(byte) \
+ − 94 ibyte_first_byte_p_1 (byte, __FILE__, __LINE__)
826
+ − 95
+ − 96 #else
+ − 97
867
+ − 98 #define ibyte_first_byte_p(byte) ((byte) < 0xA0)
826
+ − 99
+ − 100 #endif
+ − 101
+ − 102 #ifdef ERROR_CHECK_TEXT
771
+ − 103
+ − 104 /* Does BYTE represent the first byte of a multi-byte character? */
+ − 105
826
+ − 106 DECLARE_INLINE_HEADER (
+ − 107 int
867
+ − 108 ibyte_leading_byte_p_1 (int byte, const char *file, int line)
826
+ − 109 )
+ − 110 {
+ − 111 assert_at_line (byte >= 0 && byte < 256, file, line);
+ − 112 return byte_c1_p (byte);
+ − 113 }
+ − 114
867
+ − 115 #define ibyte_leading_byte_p(byte) \
+ − 116 ibyte_leading_byte_p_1 (byte, __FILE__, __LINE__)
826
+ − 117
+ − 118 #else
+ − 119
867
+ − 120 #define ibyte_leading_byte_p(byte) byte_c1_p (byte)
826
+ − 121
+ − 122 #endif
771
+ − 123
+ − 124 /* Table of number of bytes in the string representation of a character
+ − 125 indexed by the first byte of that representation.
+ − 126
+ − 127 This value can be derived in other ways -- e.g. something like
826
+ − 128 XCHARSET_REP_BYTES (charset_by_leading_byte (first_byte))
771
+ − 129 but it's faster this way. */
1632
+ − 130 extern MODULE_API const Bytecount rep_bytes_by_first_byte[0xA0];
771
+ − 131
+ − 132 /* Number of bytes in the string representation of a character. */
788
+ − 133
800
+ − 134 #ifdef ERROR_CHECK_TEXT
788
+ − 135
826
+ − 136 DECLARE_INLINE_HEADER (
+ − 137 Bytecount
+ − 138 rep_bytes_by_first_byte_1 (int fb, const char *file, int line)
+ − 139 )
771
+ − 140 {
826
+ − 141 assert_at_line (fb >= 0 && fb < 0xA0, file, line);
771
+ − 142 return rep_bytes_by_first_byte[fb];
+ − 143 }
+ − 144
826
+ − 145 #define rep_bytes_by_first_byte(fb) \
+ − 146 rep_bytes_by_first_byte_1 (fb, __FILE__, __LINE__)
788
+ − 147
800
+ − 148 #else /* ERROR_CHECK_TEXT */
788
+ − 149
826
+ − 150 #define rep_bytes_by_first_byte(fb) (rep_bytes_by_first_byte[fb])
788
+ − 151
800
+ − 152 #endif /* ERROR_CHECK_TEXT */
788
+ − 153
826
+ − 154 /* Is this character represented by more than one byte in a string in the
+ − 155 default format? */
+ − 156
867
+ − 157 #define ichar_multibyte_p(c) ((c) >= 0x80)
+ − 158
+ − 159 #define ichar_ascii_p(c) (!ichar_multibyte_p (c))
826
+ − 160
+ − 161 /* Maximum number of bytes per Emacs character when represented as text, in
+ − 162 any format.
+ − 163 */
771
+ − 164
867
+ − 165 #define MAX_ICHAR_LEN 4
771
+ − 166
826
+ − 167 #endif /* not MULE */
+ − 168
2367
+ − 169 /* For more discussion, see text.c, "handling non-default formats" */
+ − 170
826
+ − 171 typedef enum internal_format
+ − 172 {
+ − 173 FORMAT_DEFAULT,
+ − 174 FORMAT_8_BIT_FIXED,
+ − 175 FORMAT_16_BIT_FIXED, /* not implemented */
+ − 176 FORMAT_32_BIT_FIXED /* not implemented */
+ − 177 } Internal_Format;
+ − 178
+ − 179 #ifdef MULE
+ − 180 /* "OBJECT" below will usually be a buffer, string, or nil. This needs to
+ − 181 be passed in because the interpretation of 8-bit-fixed and 16-bit-fixed
+ − 182 values may depend on the buffer, e.g. depending on what language the
+ − 183 text in the buffer is in. */
+ − 184
867
+ − 185 /* True if Ichar CH can be represented in 8-bit-fixed format. */
+ − 186 #define ichar_8_bit_fixed_p(ch, object) (((ch) & ~0xff) == 0)
+ − 187 /* Convert Ichar CH to an 8-bit int, as will be stored in the buffer. */
+ − 188 #define ichar_to_raw_8_bit_fixed(ch, object) ((Ibyte) (ch))
826
+ − 189 /* Convert the other way. */
867
+ − 190 #define raw_8_bit_fixed_to_ichar(ch, object) ((Ichar) (ch))
+ − 191
+ − 192 #define ichar_16_bit_fixed_p(ch, object) (((ch) & ~0xffff) == 0)
+ − 193 /* Convert Ichar CH to a 16-bit int, as will be stored in the buffer. */
+ − 194 #define ichar_to_raw_16_bit_fixed(ch, object) ((UINT_16_BIT) (ch))
826
+ − 195 /* Convert the other way. */
867
+ − 196 #define raw_16_bit_fixed_to_ichar(ch, object) ((Ichar) (ch))
+ − 197
+ − 198 /* Convert Ichar CH to a 32-bit int, as will be stored in the buffer. */
+ − 199 #define ichar_to_raw_32_bit_fixed(ch, object) ((UINT_32_BIT) (ch))
826
+ − 200 /* Convert the other way. */
867
+ − 201 #define raw_32_bit_fixed_to_ichar(ch, object) ((Ichar) (ch))
826
+ − 202
+ − 203 /* Return the "raw value" of a character as stored in the buffer. In the
+ − 204 default format, this is just the same as the character. In fixed-width
+ − 205 formats, this is the actual value in the buffer, which will be limited
+ − 206 to the range as established by the format. This is used when searching
+ − 207 for a character in a buffer -- it's faster to convert the character to
+ − 208 the raw value and look for that, than repeatedly convert each raw value
+ − 209 in the buffer into a character. */
+ − 210
+ − 211 DECLARE_INLINE_HEADER (
867
+ − 212 Raw_Ichar
2286
+ − 213 ichar_to_raw (Ichar ch, Internal_Format fmt,
+ − 214 Lisp_Object UNUSED (object))
826
+ − 215 )
+ − 216 {
+ − 217 switch (fmt)
+ − 218 {
+ − 219 case FORMAT_DEFAULT:
867
+ − 220 return (Raw_Ichar) ch;
826
+ − 221 case FORMAT_16_BIT_FIXED:
867
+ − 222 text_checking_assert (ichar_16_bit_fixed_p (ch, object));
+ − 223 return (Raw_Ichar) ichar_to_raw_16_bit_fixed (ch, object);
826
+ − 224 case FORMAT_32_BIT_FIXED:
867
+ − 225 return (Raw_Ichar) ichar_to_raw_32_bit_fixed (ch, object);
826
+ − 226 default:
+ − 227 text_checking_assert (fmt == FORMAT_8_BIT_FIXED);
867
+ − 228 text_checking_assert (ichar_8_bit_fixed_p (ch, object));
+ − 229 return (Raw_Ichar) ichar_to_raw_8_bit_fixed (ch, object);
826
+ − 230 }
+ − 231 }
+ − 232
+ − 233 /* Return whether CH is representable in the given format in the given
+ − 234 object. */
+ − 235
+ − 236 DECLARE_INLINE_HEADER (
+ − 237 int
2286
+ − 238 ichar_fits_in_format (Ichar ch, Internal_Format fmt,
+ − 239 Lisp_Object UNUSED (object))
826
+ − 240 )
+ − 241 {
+ − 242 switch (fmt)
+ − 243 {
+ − 244 case FORMAT_DEFAULT:
+ − 245 return 1;
+ − 246 case FORMAT_16_BIT_FIXED:
867
+ − 247 return ichar_16_bit_fixed_p (ch, object);
826
+ − 248 case FORMAT_32_BIT_FIXED:
+ − 249 return 1;
+ − 250 default:
+ − 251 text_checking_assert (fmt == FORMAT_8_BIT_FIXED);
867
+ − 252 return ichar_8_bit_fixed_p (ch, object);
826
+ − 253 }
+ − 254 }
+ − 255
+ − 256 /* Assuming the formats are the same, return whether the two objects
+ − 257 represent text in exactly the same way. */
+ − 258
+ − 259 DECLARE_INLINE_HEADER (
+ − 260 int
2286
+ − 261 objects_have_same_internal_representation (Lisp_Object UNUSED (srcobj),
+ − 262 Lisp_Object UNUSED (dstobj))
826
+ − 263 )
+ − 264 {
+ − 265 /* &&#### implement this properly when we allow per-object format
+ − 266 differences */
+ − 267 return 1;
+ − 268 }
+ − 269
+ − 270 #else
+ − 271
867
+ − 272 #define ichar_to_raw(ch, fmt, object) ((Raw_Ichar) (ch))
+ − 273 #define ichar_fits_in_format(ch, fmt, object) 1
826
+ − 274 #define objects_have_same_internal_representation(srcobj, dstobj) 1
+ − 275
771
+ − 276 #endif /* MULE */
+ − 277
1632
+ − 278 MODULE_API int dfc_coding_system_is_unicode (Lisp_Object codesys);
771
+ − 279
+ − 280 DECLARE_INLINE_HEADER (
+ − 281 Bytecount dfc_external_data_len (const void *ptr, Lisp_Object codesys)
+ − 282 )
+ − 283 {
+ − 284 if (dfc_coding_system_is_unicode (codesys))
+ − 285 return sizeof (wchar_t) * wcslen ((wchar_t *) ptr);
+ − 286 else
+ − 287 return strlen ((char *) ptr);
+ − 288 }
+ − 289
+ − 290
+ − 291 /************************************************************************/
+ − 292 /* */
+ − 293 /* working with raw internal-format data */
+ − 294 /* */
+ − 295 /************************************************************************/
+ − 296
826
+ − 297 /*
+ − 298 Use the following functions/macros on contiguous text in any of the
+ − 299 internal formats. Those that take a format arg work on all internal
+ − 300 formats; the others work only on the default (variable-width under Mule)
+ − 301 format. If the text you're operating on is known to come from a buffer,
+ − 302 use the buffer-level functions in buffer.h, which automatically know the
+ − 303 correct format and handle the gap.
+ − 304
+ − 305 Some terminology:
+ − 306
867
+ − 307 "itext" appearing in the macros means "internal-format text" -- type
+ − 308 `Ibyte *'. Operations on such pointers themselves, rather than on the
+ − 309 text being pointed to, have "itext" instead of "itext" in the macro
+ − 310 name. "ichar" in the macro names means an Ichar -- the representation
826
+ − 311 of a character as a single integer rather than a series of bytes, as part
867
+ − 312 of "itext". Many of the macros below are for converting between the
826
+ − 313 two representations of characters.
+ − 314
867
+ − 315 Note also that we try to consistently distinguish between an "Ichar" and
826
+ − 316 a Lisp character. Stuff working with Lisp characters often just says
867
+ − 317 "char", so we consistently use "Ichar" when that's what we're working
826
+ − 318 with. */
+ − 319
+ − 320 /* The three golden rules of macros:
771
+ − 321
+ − 322 1) Anything that's an lvalue can be evaluated more than once.
826
+ − 323
+ − 324 2) Macros where anything else can be evaluated more than once should
+ − 325 have the word "unsafe" in their name (exceptions may be made for
+ − 326 large sets of macros that evaluate arguments of certain types more
+ − 327 than once, e.g. struct buffer * arguments, when clearly indicated in
+ − 328 the macro documentation). These macros are generally meant to be
+ − 329 called only by other macros that have already stored the calling
+ − 330 values in temporary variables.
+ − 331
+ − 332 3) Nothing else can be evaluated more than once. Use inline
771
+ − 333 functions, if necessary, to prevent multiple evaluation.
826
+ − 334
+ − 335 NOTE: The functions and macros below are given full prototypes in their
+ − 336 docs, even when the implementation is a macro. In such cases, passing
+ − 337 an argument of a type other than expected will produce undefined
+ − 338 results. Also, given that macros can do things functions can't (in
+ − 339 particular, directly modify arguments as if they were passed by
+ − 340 reference), the declaration syntax has been extended to include the
+ − 341 call-by-reference syntax from C++, where an & after a type indicates
+ − 342 that the argument is an lvalue and is passed by reference, i.e. the
+ − 343 function can modify its value. (This is equivalent in C to passing a
+ − 344 pointer to the argument, but without the need to explicitly worry about
+ − 345 pointers.)
+ − 346
+ − 347 When to capitalize macros:
+ − 348
+ − 349 -- Capitalize macros doing stuff obviously impossible with (C)
+ − 350 functions, e.g. directly modifying arguments as if they were passed by
+ − 351 reference.
+ − 352
+ − 353 -- Capitalize macros that evaluate *any* argument more than once regardless
+ − 354 of whether that's "allowed" (e.g. buffer arguments).
+ − 355
+ − 356 -- Capitalize macros that directly access a field in a Lisp_Object or
+ − 357 its equivalent underlying structure. In such cases, access through the
+ − 358 Lisp_Object precedes the macro with an X, and access through the underlying
+ − 359 structure doesn't.
+ − 360
+ − 361 -- Capitalize certain other basic macros relating to Lisp_Objects; e.g.
+ − 362 FRAMEP, CHECK_FRAME, etc.
+ − 363
+ − 364 -- Try to avoid capitalizing any other macros.
771
+ − 365 */
+ − 366
+ − 367 /* ---------------------------------------------------------------------- */
867
+ − 368 /* Working with itext's (pointers to internally-formatted text) */
771
+ − 369 /* ---------------------------------------------------------------------- */
+ − 370
867
+ − 371 /* Given an itext, does it point to the beginning of a character?
826
+ − 372 */
+ − 373
771
+ − 374 #ifdef MULE
867
+ − 375 # define valid_ibyteptr_p(ptr) ibyte_first_byte_p (* (ptr))
771
+ − 376 #else
867
+ − 377 # define valid_ibyteptr_p(ptr) 1
771
+ − 378 #endif
+ − 379
867
+ − 380 /* If error-checking is enabled, assert that the given itext points to
826
+ − 381 the beginning of a character. Otherwise, do nothing.
+ − 382 */
+ − 383
867
+ − 384 #define assert_valid_ibyteptr(ptr) text_checking_assert (valid_ibyteptr_p (ptr))
+ − 385
+ − 386 /* Given a itext (assumed to point at the beginning of a character),
826
+ − 387 modify that pointer so it points to the beginning of the next character.
+ − 388
867
+ − 389 Note that INC_IBYTEPTR() and DEC_IBYTEPTR() have to be written in
+ − 390 completely separate ways. INC_IBYTEPTR() cannot use the DEC_IBYTEPTR()
771
+ − 391 trick of looking for a valid first byte because it might run off
867
+ − 392 the end of the string. DEC_IBYTEPTR() can't use the INC_IBYTEPTR()
771
+ − 393 method because it doesn't have easy access to the first byte of
+ − 394 the character it's moving over. */
+ − 395
867
+ − 396 #define INC_IBYTEPTR(ptr) do { \
+ − 397 assert_valid_ibyteptr (ptr); \
826
+ − 398 (ptr) += rep_bytes_by_first_byte (* (ptr)); \
+ − 399 } while (0)
+ − 400
1204
+ − 401 #define INC_IBYTEPTR_FMT(ptr, fmt) \
+ − 402 do { \
+ − 403 Internal_Format __icf_fmt = (fmt); \
+ − 404 switch (__icf_fmt) \
+ − 405 { \
+ − 406 case FORMAT_DEFAULT: \
+ − 407 INC_IBYTEPTR (ptr); \
+ − 408 break; \
+ − 409 case FORMAT_16_BIT_FIXED: \
+ − 410 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT)); \
+ − 411 (ptr) += 2; \
+ − 412 break; \
+ − 413 case FORMAT_32_BIT_FIXED: \
+ − 414 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT)); \
+ − 415 (ptr) += 4; \
+ − 416 break; \
+ − 417 default: \
+ − 418 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); \
+ − 419 (ptr)++; \
+ − 420 break; \
+ − 421 } \
826
+ − 422 } while (0)
+ − 423
867
+ − 424 /* Given a itext (assumed to point at the beginning of a character or at
826
+ − 425 the very end of the text), modify that pointer so it points to the
+ − 426 beginning of the previous character.
+ − 427 */
771
+ − 428
800
+ − 429 #ifdef ERROR_CHECK_TEXT
826
+ − 430 /* We use a separate definition to avoid warnings about unused dc_ptr1 */
867
+ − 431 #define DEC_IBYTEPTR(ptr) do { \
1333
+ − 432 const Ibyte *dc_ptr1 = (ptr); \
826
+ − 433 do { \
+ − 434 (ptr)--; \
867
+ − 435 } while (!valid_ibyteptr_p (ptr)); \
826
+ − 436 text_checking_assert (dc_ptr1 - (ptr) == rep_bytes_by_first_byte (*(ptr))); \
771
+ − 437 } while (0)
826
+ − 438 #else
867
+ − 439 #define DEC_IBYTEPTR(ptr) do { \
826
+ − 440 do { \
+ − 441 (ptr)--; \
867
+ − 442 } while (!valid_ibyteptr_p (ptr)); \
771
+ − 443 } while (0)
826
+ − 444 #endif /* ERROR_CHECK_TEXT */
+ − 445
1204
+ − 446 #define DEC_IBYTEPTR_FMT(ptr, fmt) \
+ − 447 do { \
+ − 448 Internal_Format __icf_fmt = (fmt); \
+ − 449 switch (__icf_fmt) \
+ − 450 { \
+ − 451 case FORMAT_DEFAULT: \
+ − 452 DEC_IBYTEPTR (ptr); \
+ − 453 break; \
+ − 454 case FORMAT_16_BIT_FIXED: \
+ − 455 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT)); \
+ − 456 (ptr) -= 2; \
+ − 457 break; \
+ − 458 case FORMAT_32_BIT_FIXED: \
+ − 459 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT)); \
+ − 460 (ptr) -= 4; \
+ − 461 break; \
+ − 462 default: \
+ − 463 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); \
+ − 464 (ptr)--; \
+ − 465 break; \
+ − 466 } \
771
+ − 467 } while (0)
+ − 468
+ − 469 #ifdef MULE
+ − 470
826
+ − 471 /* Make sure that PTR is pointing to the beginning of a character. If not,
+ − 472 back up until this is the case. Note that there are not too many places
+ − 473 where it is legitimate to do this sort of thing. It's an error if
+ − 474 you're passed an "invalid" char * pointer. NOTE: PTR *must* be pointing
+ − 475 to a valid part of the string (i.e. not the very end, unless the string
+ − 476 is zero-terminated or something) in order for this function to not cause
+ − 477 crashes.
+ − 478 */
+ − 479
771
+ − 480 /* Note that this reads the byte at *PTR! */
+ − 481
867
+ − 482 #define VALIDATE_IBYTEPTR_BACKWARD(ptr) do { \
+ − 483 while (!valid_ibyteptr_p (ptr)) ptr--; \
771
+ − 484 } while (0)
+ − 485
826
+ − 486 /* Make sure that PTR is pointing to the beginning of a character. If not,
+ − 487 move forward until this is the case. Note that there are not too many
+ − 488 places where it is legitimate to do this sort of thing. It's an error
+ − 489 if you're passed an "invalid" char * pointer.
+ − 490 */
771
+ − 491
867
+ − 492 /* This needs to be trickier than VALIDATE_IBYTEPTR_BACKWARD() to avoid the
771
+ − 493 possibility of running off the end of the string. */
+ − 494
867
+ − 495 #define VALIDATE_IBYTEPTR_FORWARD(ptr) do { \
+ − 496 Ibyte *vcf_ptr = (ptr); \
+ − 497 VALIDATE_IBYTEPTR_BACKWARD (vcf_ptr); \
771
+ − 498 if (vcf_ptr != (ptr)) \
+ − 499 { \
+ − 500 (ptr) = vcf_ptr; \
867
+ − 501 INC_IBYTEPTR (ptr); \
771
+ − 502 } \
+ − 503 } while (0)
+ − 504
+ − 505 #else /* not MULE */
867
+ − 506 #define VALIDATE_IBYTEPTR_BACKWARD(ptr)
+ − 507 #define VALIDATE_IBYTEPTR_FORWARD(ptr)
826
+ − 508 #endif /* not MULE */
+ − 509
+ − 510 #ifdef MULE
+ − 511
867
+ − 512 /* Given a Ibyte string at PTR of size N, possibly with a partial
826
+ − 513 character at the end, return the size of the longest substring of
+ − 514 complete characters. Does not assume that the byte at *(PTR + N) is
+ − 515 readable. Note that there are not too many places where it is
+ − 516 legitimate to do this sort of thing. It's an error if you're passed an
+ − 517 "invalid" offset. */
+ − 518
+ − 519 DECLARE_INLINE_HEADER (
+ − 520 Bytecount
867
+ − 521 validate_ibyte_string_backward (const Ibyte *ptr, Bytecount n)
826
+ − 522 )
+ − 523 {
867
+ − 524 const Ibyte *ptr2;
826
+ − 525
+ − 526 if (n == 0)
+ − 527 return n;
+ − 528 ptr2 = ptr + n - 1;
867
+ − 529 VALIDATE_IBYTEPTR_BACKWARD (ptr2);
826
+ − 530 if (ptr2 + rep_bytes_by_first_byte (*ptr2) != ptr + n)
+ − 531 return ptr2 - ptr;
+ − 532 return n;
+ − 533 }
+ − 534
+ − 535 #else
+ − 536
867
+ − 537 #define validate_ibyte_string_backward(ptr, n) (n)
826
+ − 538
+ − 539 #endif /* MULE */
771
+ − 540
2367
+ − 541 #ifdef ERROR_CHECK_TEXT
+ − 542 #define ASSERT_ASCTEXT_ASCII_LEN(ptr, len) \
+ − 543 do { \
+ − 544 int aia2; \
+ − 545 const Ascbyte *aia2ptr = (ptr); \
+ − 546 int aia2len = (len); \
+ − 547 \
+ − 548 for (aia2 = 0; aia2 < aia2len; aia2++) \
+ − 549 assert (aia2ptr[aia2] >= 0x00 && aia2ptr[aia2] < 0x7F); \
+ − 550 } while (0)
+ − 551 #define ASSERT_ASCTEXT_ASCII(ptr) \
+ − 552 do { \
+ − 553 const Ascbyte *aiaz2 = (ptr); \
+ − 554 ASSERT_ASCTEXT_ASCII_LEN (aiaz2, strlen (aiaz2)); \
+ − 555 } while (0)
+ − 556 #else
+ − 557 #define ASSERT_ASCTEXT_ASCII_LEN(ptr, len)
+ − 558 #define ASSERT_ASCTEXT_ASCII(ptr)
+ − 559 #endif
+ − 560
771
+ − 561 /* -------------------------------------------------------------- */
826
+ − 562 /* Working with the length (in bytes and characters) of a */
+ − 563 /* section of internally-formatted text */
771
+ − 564 /* -------------------------------------------------------------- */
+ − 565
826
+ − 566 #ifdef MULE
+ − 567
1632
+ − 568 MODULE_API Charcount
+ − 569 bytecount_to_charcount_fun (const Ibyte *ptr, Bytecount len);
+ − 570 MODULE_API Bytecount
+ − 571 charcount_to_bytecount_fun (const Ibyte *ptr, Charcount len);
826
+ − 572
+ − 573 /* Given a pointer to a text string and a length in bytes, return
+ − 574 the equivalent length in characters. */
+ − 575
+ − 576 DECLARE_INLINE_HEADER (
+ − 577 Charcount
867
+ − 578 bytecount_to_charcount (const Ibyte *ptr, Bytecount len)
826
+ − 579 )
+ − 580 {
+ − 581 if (len < 20) /* Just a random guess, but it should be more or less correct.
+ − 582 If number of bytes is small, just do a simple loop,
+ − 583 which should be more efficient. */
+ − 584 {
+ − 585 Charcount count = 0;
867
+ − 586 const Ibyte *end = ptr + len;
826
+ − 587 while (ptr < end)
+ − 588 {
867
+ − 589 INC_IBYTEPTR (ptr);
826
+ − 590 count++;
+ − 591 }
+ − 592 /* Bomb out if the specified substring ends in the middle
+ − 593 of a character. Note that we might have already gotten
+ − 594 a core dump above from an invalid reference, but at least
+ − 595 we will get no farther than here.
+ − 596
+ − 597 This also catches len < 0. */
+ − 598 text_checking_assert (ptr == end);
+ − 599
+ − 600 return count;
+ − 601 }
+ − 602 else
+ − 603 return bytecount_to_charcount_fun (ptr, len);
+ − 604 }
+ − 605
+ − 606 /* Given a pointer to a text string and a length in characters, return the
+ − 607 equivalent length in bytes.
+ − 608 */
+ − 609
+ − 610 DECLARE_INLINE_HEADER (
+ − 611 Bytecount
867
+ − 612 charcount_to_bytecount (const Ibyte *ptr, Charcount len)
826
+ − 613 )
+ − 614 {
+ − 615 text_checking_assert (len >= 0);
+ − 616 if (len < 20) /* See above */
+ − 617 {
867
+ − 618 const Ibyte *newptr = ptr;
826
+ − 619 while (len > 0)
+ − 620 {
867
+ − 621 INC_IBYTEPTR (newptr);
826
+ − 622 len--;
+ − 623 }
+ − 624 return newptr - ptr;
+ − 625 }
+ − 626 else
+ − 627 return charcount_to_bytecount_fun (ptr, len);
+ − 628 }
+ − 629
2367
+ − 630 MODULE_API Bytecount
+ − 631 charcount_to_bytecount_down_fun (const Ibyte *ptr, Charcount len);
+ − 632
+ − 633 /* Given a pointer to a text string and a length in bytes, return
+ − 634 the equivalent length in characters of the stretch [PTR - LEN, PTR). */
+ − 635
+ − 636 DECLARE_INLINE_HEADER (
+ − 637 Charcount
+ − 638 bytecount_to_charcount_down (const Ibyte *ptr, Bytecount len)
+ − 639 )
+ − 640 {
+ − 641 /* No need to be clever here */
+ − 642 return bytecount_to_charcount (ptr - len, len);
+ − 643 }
+ − 644
+ − 645 /* Given a pointer to a text string and a length in characters, return the
+ − 646 equivalent length in bytes of the stretch of characters of that length
+ − 647 BEFORE the pointer.
+ − 648 */
+ − 649
+ − 650 DECLARE_INLINE_HEADER (
+ − 651 Bytecount
+ − 652 charcount_to_bytecount_down (const Ibyte *ptr, Charcount len)
+ − 653 )
+ − 654 {
+ − 655 #define SLEDGEHAMMER_CHECK_TEXT
+ − 656 #ifdef SLEDGEHAMMER_CHECK_TEXT
+ − 657 Charcount len1 = len;
+ − 658 Bytecount ret1, ret2;
+ − 659
+ − 660 /* To test the correctness of the function version, always do the
+ − 661 calculation both ways and check that the values are the same. */
+ − 662 text_checking_assert (len >= 0);
+ − 663 {
+ − 664 const Ibyte *newptr = ptr;
+ − 665 while (len1 > 0)
+ − 666 {
+ − 667 DEC_IBYTEPTR (newptr);
+ − 668 len1--;
+ − 669 }
+ − 670 ret1 = ptr - newptr;
+ − 671 }
+ − 672 ret2 = charcount_to_bytecount_down_fun (ptr, len);
+ − 673 text_checking_assert (ret1 == ret2);
+ − 674 return ret1;
+ − 675 #else
+ − 676 text_checking_assert (len >= 0);
+ − 677 if (len < 20) /* See above */
+ − 678 {
+ − 679 const Ibyte *newptr = ptr;
+ − 680 while (len > 0)
+ − 681 {
+ − 682 DEC_IBYTEPTR (newptr);
+ − 683 len--;
+ − 684 }
+ − 685 return ptr - newptr;
+ − 686 }
+ − 687 else
+ − 688 return charcount_to_bytecount_down_fun (ptr, len);
+ − 689 #endif /* SLEDGEHAMMER_CHECK_TEXT */
+ − 690 }
+ − 691
826
+ − 692 /* Given a pointer to a text string in the specified format and a length in
+ − 693 bytes, return the equivalent length in characters.
+ − 694 */
+ − 695
+ − 696 DECLARE_INLINE_HEADER (
+ − 697 Charcount
867
+ − 698 bytecount_to_charcount_fmt (const Ibyte *ptr, Bytecount len,
826
+ − 699 Internal_Format fmt)
+ − 700 )
+ − 701 {
+ − 702 switch (fmt)
+ − 703 {
+ − 704 case FORMAT_DEFAULT:
+ − 705 return bytecount_to_charcount (ptr, len);
+ − 706 case FORMAT_16_BIT_FIXED:
1204
+ − 707 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT));
826
+ − 708 return (Charcount) (len << 1);
+ − 709 case FORMAT_32_BIT_FIXED:
1204
+ − 710 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT));
826
+ − 711 return (Charcount) (len << 2);
+ − 712 default:
+ − 713 text_checking_assert (fmt == FORMAT_8_BIT_FIXED);
+ − 714 return (Charcount) len;
+ − 715 }
+ − 716 }
+ − 717
+ − 718 /* Given a pointer to a text string in the specified format and a length in
+ − 719 characters, return the equivalent length in bytes.
+ − 720 */
+ − 721
+ − 722 DECLARE_INLINE_HEADER (
+ − 723 Bytecount
867
+ − 724 charcount_to_bytecount_fmt (const Ibyte *ptr, Charcount len,
826
+ − 725 Internal_Format fmt)
+ − 726 )
+ − 727 {
+ − 728 switch (fmt)
+ − 729 {
+ − 730 case FORMAT_DEFAULT:
+ − 731 return charcount_to_bytecount (ptr, len);
+ − 732 case FORMAT_16_BIT_FIXED:
1204
+ − 733 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT));
826
+ − 734 text_checking_assert (!(len & 1));
+ − 735 return (Bytecount) (len >> 1);
+ − 736 case FORMAT_32_BIT_FIXED:
+ − 737 text_checking_assert (!(len & 3));
1204
+ − 738 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT));
826
+ − 739 return (Bytecount) (len >> 2);
+ − 740 default:
+ − 741 text_checking_assert (fmt == FORMAT_8_BIT_FIXED);
+ − 742 return (Bytecount) len;
+ − 743 }
+ − 744 }
+ − 745
+ − 746 #else
+ − 747
+ − 748 #define bytecount_to_charcount(ptr, len) ((Charcount) (len))
+ − 749 #define bytecount_to_charcount_fmt(ptr, len, fmt) ((Charcount) (len))
+ − 750 #define charcount_to_bytecount(ptr, len) ((Bytecount) (len))
+ − 751 #define charcount_to_bytecount_fmt(ptr, len, fmt) ((Bytecount) (len))
+ − 752
+ − 753 #endif /* MULE */
+ − 754
+ − 755 /* Return the length of the first character at PTR. Equivalent to
+ − 756 charcount_to_bytecount (ptr, 1).
+ − 757
+ − 758 [Since charcount_to_bytecount() is Written as inline, a smart compiler
+ − 759 should really optimize charcount_to_bytecount (ptr, 1) to the same as
+ − 760 the following, with no error checking. But since this idiom occurs so
+ − 761 often, we'll be helpful and define a special macro for it.]
+ − 762 */
+ − 763
867
+ − 764 #define itext_ichar_len(ptr) rep_bytes_by_first_byte (*(ptr))
826
+ − 765
+ − 766 /* Return the length of the first character at PTR, which is in the
+ − 767 specified internal format. Equivalent to charcount_to_bytecount_fmt
+ − 768 (ptr, 1, fmt).
+ − 769 */
+ − 770
+ − 771 DECLARE_INLINE_HEADER (
+ − 772 Bytecount
2333
+ − 773 itext_ichar_len_fmt (const Ibyte *USED_IF_MULE_OR_CHECK_TEXT (ptr),
+ − 774 Internal_Format fmt)
826
+ − 775 )
+ − 776 {
+ − 777 switch (fmt)
+ − 778 {
+ − 779 case FORMAT_DEFAULT:
867
+ − 780 return itext_ichar_len (ptr);
826
+ − 781 case FORMAT_16_BIT_FIXED:
1204
+ − 782 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT));
826
+ − 783 return 2;
+ − 784 case FORMAT_32_BIT_FIXED:
1204
+ − 785 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT));
826
+ − 786 return 4;
+ − 787 default:
+ − 788 text_checking_assert (fmt == FORMAT_8_BIT_FIXED);
+ − 789 return 1;
+ − 790 }
+ − 791 }
+ − 792
+ − 793 /* Return a pointer to the beginning of the character offset N (in
+ − 794 characters) from PTR.
+ − 795 */
+ − 796
+ − 797 DECLARE_INLINE_HEADER (
867
+ − 798 const Ibyte *
+ − 799 itext_n_addr (const Ibyte *ptr, Charcount offset)
826
+ − 800 )
771
+ − 801 {
+ − 802 return ptr + charcount_to_bytecount (ptr, offset);
+ − 803 }
+ − 804
867
+ − 805 /* Given a itext and an offset into the text pointed to by the itext,
826
+ − 806 modify the offset so it points to the beginning of the next character.
+ − 807 */
+ − 808
+ − 809 #define INC_BYTECOUNT(ptr, pos) do { \
867
+ − 810 assert_valid_ibyteptr (ptr); \
826
+ − 811 (pos += rep_bytes_by_first_byte (* ((ptr) + (pos)))); \
+ − 812 } while (0)
+ − 813
771
+ − 814 /* -------------------------------------------------------------------- */
867
+ − 815 /* Retrieving or changing the character pointed to by a itext */
771
+ − 816 /* -------------------------------------------------------------------- */
+ − 817
867
+ − 818 #define simple_itext_ichar(ptr) ((Ichar) (ptr)[0])
+ − 819 #define simple_set_itext_ichar(ptr, x) \
+ − 820 ((ptr)[0] = (Ibyte) (x), (Bytecount) 1)
+ − 821 #define simple_itext_copy_ichar(src, dst) \
814
+ − 822 ((dst)[0] = *(src), (Bytecount) 1)
771
+ − 823
+ − 824 #ifdef MULE
+ − 825
1632
+ − 826 MODULE_API Ichar non_ascii_itext_ichar (const Ibyte *ptr);
+ − 827 MODULE_API Bytecount non_ascii_set_itext_ichar (Ibyte *ptr, Ichar c);
+ − 828 MODULE_API Bytecount non_ascii_itext_copy_ichar (const Ibyte *src, Ibyte *dst);
867
+ − 829
+ − 830 /* Retrieve the character pointed to by PTR as an Ichar. */
826
+ − 831
+ − 832 DECLARE_INLINE_HEADER (
867
+ − 833 Ichar
+ − 834 itext_ichar (const Ibyte *ptr)
826
+ − 835 )
771
+ − 836 {
826
+ − 837 return byte_ascii_p (*ptr) ?
867
+ − 838 simple_itext_ichar (ptr) :
+ − 839 non_ascii_itext_ichar (ptr);
771
+ − 840 }
+ − 841
826
+ − 842 /* Retrieve the character pointed to by PTR (a pointer to text in the
+ − 843 format FMT, coming from OBJECT [a buffer, string?, or nil]) as an
867
+ − 844 Ichar.
826
+ − 845
+ − 846 Note: For these and other *_fmt() functions, if you pass in a constant
+ − 847 FMT, the switch will be optimized out of existence. Therefore, there is
+ − 848 no need to create separate versions for the various formats for
867
+ − 849 "efficiency reasons". In fact, we don't really need itext_ichar()
826
+ − 850 and such written separately, but they are used often so it's simpler
+ − 851 that way. */
+ − 852
+ − 853 DECLARE_INLINE_HEADER (
867
+ − 854 Ichar
+ − 855 itext_ichar_fmt (const Ibyte *ptr, Internal_Format fmt,
2286
+ − 856 Lisp_Object UNUSED (object))
826
+ − 857 )
+ − 858 {
+ − 859 switch (fmt)
+ − 860 {
+ − 861 case FORMAT_DEFAULT:
867
+ − 862 return itext_ichar (ptr);
826
+ − 863 case FORMAT_16_BIT_FIXED:
1204
+ − 864 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT));
867
+ − 865 return raw_16_bit_fixed_to_ichar (* (UINT_16_BIT *) ptr, object);
826
+ − 866 case FORMAT_32_BIT_FIXED:
1204
+ − 867 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT));
867
+ − 868 return raw_32_bit_fixed_to_ichar (* (UINT_32_BIT *) ptr, object);
826
+ − 869 default:
+ − 870 text_checking_assert (fmt == FORMAT_8_BIT_FIXED);
867
+ − 871 return raw_8_bit_fixed_to_ichar (*ptr, object);
826
+ − 872 }
+ − 873 }
+ − 874
+ − 875 /* Return the character at PTR (which is in format FMT), suitable for
+ − 876 comparison with an ASCII character. This guarantees that if the
+ − 877 character at PTR is ASCII (range 0 - 127), that character will be
+ − 878 returned; otherwise, some character outside of the ASCII range will be
+ − 879 returned, but not necessarily the character actually at PTR. This will
867
+ − 880 be faster than itext_ichar_fmt() for some formats -- in particular,
826
+ − 881 FORMAT_DEFAULT. */
+ − 882
+ − 883 DECLARE_INLINE_HEADER (
867
+ − 884 Ichar
+ − 885 itext_ichar_ascii_fmt (const Ibyte *ptr, Internal_Format fmt,
2286
+ − 886 Lisp_Object UNUSED (object))
826
+ − 887 )
+ − 888 {
+ − 889 switch (fmt)
+ − 890 {
+ − 891 case FORMAT_DEFAULT:
867
+ − 892 return (Ichar) *ptr;
826
+ − 893 case FORMAT_16_BIT_FIXED:
1204
+ − 894 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT));
867
+ − 895 return raw_16_bit_fixed_to_ichar (* (UINT_16_BIT *) ptr, object);
826
+ − 896 case FORMAT_32_BIT_FIXED:
1204
+ − 897 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT));
867
+ − 898 return raw_32_bit_fixed_to_ichar (* (UINT_32_BIT *) ptr, object);
826
+ − 899 default:
+ − 900 text_checking_assert (fmt == FORMAT_8_BIT_FIXED);
867
+ − 901 return raw_8_bit_fixed_to_ichar (*ptr, object);
826
+ − 902 }
+ − 903 }
+ − 904
+ − 905 /* Return the "raw value" of the character at PTR, in format FMT. This is
+ − 906 useful when searching for a character; convert the character using
867
+ − 907 ichar_to_raw(). */
826
+ − 908
+ − 909 DECLARE_INLINE_HEADER (
867
+ − 910 Raw_Ichar
+ − 911 itext_ichar_raw_fmt (const Ibyte *ptr, Internal_Format fmt)
826
+ − 912 )
+ − 913 {
+ − 914 switch (fmt)
+ − 915 {
+ − 916 case FORMAT_DEFAULT:
867
+ − 917 return (Raw_Ichar) itext_ichar (ptr);
826
+ − 918 case FORMAT_16_BIT_FIXED:
1204
+ − 919 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT));
867
+ − 920 return (Raw_Ichar) (* (UINT_16_BIT *) ptr);
826
+ − 921 case FORMAT_32_BIT_FIXED:
1204
+ − 922 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT));
867
+ − 923 return (Raw_Ichar) (* (UINT_32_BIT *) ptr);
826
+ − 924 default:
+ − 925 text_checking_assert (fmt == FORMAT_8_BIT_FIXED);
867
+ − 926 return (Raw_Ichar) (*ptr);
826
+ − 927 }
+ − 928 }
+ − 929
867
+ − 930 /* Store the character CH (an Ichar) as internally-formatted text starting
826
+ − 931 at PTR. Return the number of bytes stored.
+ − 932 */
+ − 933
+ − 934 DECLARE_INLINE_HEADER (
+ − 935 Bytecount
867
+ − 936 set_itext_ichar (Ibyte *ptr, Ichar x)
826
+ − 937 )
771
+ − 938 {
867
+ − 939 return !ichar_multibyte_p (x) ?
+ − 940 simple_set_itext_ichar (ptr, x) :
+ − 941 non_ascii_set_itext_ichar (ptr, x);
771
+ − 942 }
+ − 943
867
+ − 944 /* Store the character CH (an Ichar) as internally-formatted text of
826
+ − 945 format FMT starting at PTR, which comes from OBJECT. Return the number
+ − 946 of bytes stored.
+ − 947 */
+ − 948
+ − 949 DECLARE_INLINE_HEADER (
+ − 950 Bytecount
867
+ − 951 set_itext_ichar_fmt (Ibyte *ptr, Ichar x, Internal_Format fmt,
2286
+ − 952 Lisp_Object UNUSED (object))
826
+ − 953 )
771
+ − 954 {
826
+ − 955 switch (fmt)
+ − 956 {
+ − 957 case FORMAT_DEFAULT:
867
+ − 958 return set_itext_ichar (ptr, x);
826
+ − 959 case FORMAT_16_BIT_FIXED:
867
+ − 960 text_checking_assert (ichar_16_bit_fixed_p (x, object));
1204
+ − 961 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT));
867
+ − 962 * (UINT_16_BIT *) ptr = ichar_to_raw_16_bit_fixed (x, object);
826
+ − 963 return 2;
+ − 964 case FORMAT_32_BIT_FIXED:
1204
+ − 965 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT));
867
+ − 966 * (UINT_32_BIT *) ptr = ichar_to_raw_32_bit_fixed (x, object);
826
+ − 967 return 4;
+ − 968 default:
+ − 969 text_checking_assert (fmt == FORMAT_8_BIT_FIXED);
867
+ − 970 text_checking_assert (ichar_8_bit_fixed_p (x, object));
+ − 971 *ptr = ichar_to_raw_8_bit_fixed (x, object);
826
+ − 972 return 1;
+ − 973 }
+ − 974 }
+ − 975
+ − 976 /* Retrieve the character pointed to by SRC and store it as
+ − 977 internally-formatted text in DST.
+ − 978 */
+ − 979
+ − 980 DECLARE_INLINE_HEADER (
+ − 981 Bytecount
867
+ − 982 itext_copy_ichar (const Ibyte *src, Ibyte *dst)
826
+ − 983 )
+ − 984 {
+ − 985 return byte_ascii_p (*src) ?
867
+ − 986 simple_itext_copy_ichar (src, dst) :
+ − 987 non_ascii_itext_copy_ichar (src, dst);
771
+ − 988 }
+ − 989
+ − 990 #else /* not MULE */
+ − 991
867
+ − 992 # define itext_ichar(ptr) simple_itext_ichar (ptr)
+ − 993 # define itext_ichar_fmt(ptr, fmt, object) itext_ichar (ptr)
+ − 994 # define itext_ichar_ascii_fmt(ptr, fmt, object) itext_ichar (ptr)
+ − 995 # define itext_ichar_raw_fmt(ptr, fmt) itext_ichar (ptr)
+ − 996 # define set_itext_ichar(ptr, x) simple_set_itext_ichar (ptr, x)
+ − 997 # define set_itext_ichar_fmt(ptr, x, fmt, obj) set_itext_ichar (ptr, x)
+ − 998 # define itext_copy_ichar(src, dst) simple_itext_copy_ichar (src, dst)
771
+ − 999
+ − 1000 #endif /* not MULE */
+ − 1001
826
+ − 1002 /* Retrieve the character at offset N (in characters) from PTR, as an
867
+ − 1003 Ichar.
826
+ − 1004 */
+ − 1005
867
+ − 1006 #define itext_ichar_n(ptr, offset) \
+ − 1007 itext_ichar (itext_n_addr (ptr, offset))
771
+ − 1008
+ − 1009
+ − 1010 /* ---------------------------- */
2367
+ − 1011 /* Working with Ichars */
771
+ − 1012 /* ---------------------------- */
+ − 1013
867
+ − 1014 /* NOTE: There are other functions/macros for working with Ichars in
+ − 1015 charset.h, for retrieving the charset of an Ichar, the length of an
+ − 1016 Ichar when converted to text, etc.
826
+ − 1017 */
+ − 1018
771
+ − 1019 #ifdef MULE
+ − 1020
1632
+ − 1021 MODULE_API int non_ascii_valid_ichar_p (Ichar ch);
867
+ − 1022
+ − 1023 /* Return whether the given Ichar is valid.
826
+ − 1024 */
+ − 1025
+ − 1026 DECLARE_INLINE_HEADER (
+ − 1027 int
867
+ − 1028 valid_ichar_p (Ichar ch)
826
+ − 1029 )
771
+ − 1030 {
867
+ − 1031 return (! (ch & ~0xFF)) || non_ascii_valid_ichar_p (ch);
771
+ − 1032 }
+ − 1033
+ − 1034 #else /* not MULE */
+ − 1035
867
+ − 1036 #define valid_ichar_p(ch) (! (ch & ~0xFF))
771
+ − 1037
+ − 1038 #endif /* not MULE */
+ − 1039
831
+ − 1040 DECLARE_INLINE_HEADER (
+ − 1041 Lisp_Object
867
+ − 1042 make_char (Ichar val)
831
+ − 1043 )
+ − 1044 {
867
+ − 1045 type_checking_assert (valid_ichar_p (val));
831
+ − 1046 return make_char_1 (val);
+ − 1047 }
+ − 1048
867
+ − 1049 #define CHAR_INTP(x) (INTP (x) && valid_ichar_p (XINT (x)))
771
+ − 1050
+ − 1051 #define CHAR_OR_CHAR_INTP(x) (CHARP (x) || CHAR_INTP (x))
+ − 1052
826
+ − 1053 DECLARE_INLINE_HEADER (
867
+ − 1054 Ichar
771
+ − 1055 XCHAR_OR_CHAR_INT (Lisp_Object obj)
826
+ − 1056 )
771
+ − 1057 {
+ − 1058 return CHARP (obj) ? XCHAR (obj) : XINT (obj);
+ − 1059 }
+ − 1060
826
+ − 1061 /* Signal an error if CH is not a valid character or integer Lisp_Object.
+ − 1062 If CH is an integer Lisp_Object, convert it to a character Lisp_Object,
+ − 1063 but merely by repackaging, without performing tests for char validity.
+ − 1064 */
+ − 1065
771
+ − 1066 #define CHECK_CHAR_COERCE_INT(x) do { \
+ − 1067 if (CHARP (x)) \
+ − 1068 ; \
+ − 1069 else if (CHAR_INTP (x)) \
+ − 1070 x = make_char (XINT (x)); \
+ − 1071 else \
+ − 1072 x = wrong_type_argument (Qcharacterp, x); \
+ − 1073 } while (0)
+ − 1074
+ − 1075
+ − 1076
+ − 1077 /************************************************************************/
+ − 1078 /* */
826
+ − 1079 /* working with Lisp strings */
+ − 1080 /* */
+ − 1081 /************************************************************************/
+ − 1082
+ − 1083 #define string_char_length(s) \
+ − 1084 string_index_byte_to_char (s, XSTRING_LENGTH (s))
+ − 1085 #define string_byte(s, i) (XSTRING_DATA (s)[i] + 0)
+ − 1086 /* In case we ever allow strings to be in a different format ... */
+ − 1087 #define set_string_byte(s, i, c) (XSTRING_DATA (s)[i] = (c))
+ − 1088
+ − 1089 #define ASSERT_VALID_CHAR_STRING_INDEX_UNSAFE(s, x) do { \
+ − 1090 text_checking_assert ((x) >= 0 && x <= string_char_length (s)); \
+ − 1091 } while (0)
+ − 1092
+ − 1093 #define ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE(s, x) do { \
+ − 1094 text_checking_assert ((x) >= 0 && x <= XSTRING_LENGTH (s)); \
867
+ − 1095 text_checking_assert (valid_ibyteptr_p (string_byte_addr (s, x))); \
826
+ − 1096 } while (0)
+ − 1097
+ − 1098 /* Convert offset I in string S to a pointer to text there. */
+ − 1099 #define string_byte_addr(s, i) (&(XSTRING_DATA (s)[i]))
+ − 1100 /* Convert pointer to text in string S into the byte offset to that text. */
+ − 1101 #define string_addr_to_byte(s, ptr) ((Bytecount) ((ptr) - XSTRING_DATA (s)))
867
+ − 1102 /* Return the Ichar at *CHARACTER* offset I. */
+ − 1103 #define string_ichar(s, i) itext_ichar (string_char_addr (s, i))
826
+ − 1104
+ − 1105 #ifdef ERROR_CHECK_TEXT
+ − 1106 #define SLEDGEHAMMER_CHECK_ASCII_BEGIN
+ − 1107 #endif
+ − 1108
+ − 1109 #ifdef SLEDGEHAMMER_CHECK_ASCII_BEGIN
+ − 1110 void sledgehammer_check_ascii_begin (Lisp_Object str);
+ − 1111 #else
+ − 1112 #define sledgehammer_check_ascii_begin(str)
+ − 1113 #endif
+ − 1114
+ − 1115 /* Make an alloca'd copy of a Lisp string */
+ − 1116 #define LISP_STRING_TO_ALLOCA(s, lval) \
+ − 1117 do { \
1315
+ − 1118 Ibyte **_lta_ = (Ibyte **) &(lval); \
826
+ − 1119 Lisp_Object _lta_2 = (s); \
2367
+ − 1120 *_lta_ = alloca_ibytes (1 + XSTRING_LENGTH (_lta_2)); \
826
+ − 1121 memcpy (*_lta_, XSTRING_DATA (_lta_2), 1 + XSTRING_LENGTH (_lta_2)); \
+ − 1122 } while (0)
+ − 1123
+ − 1124 void resize_string (Lisp_Object s, Bytecount pos, Bytecount delta);
+ − 1125
+ − 1126 /* Convert a byte index into a string into a char index. */
+ − 1127 DECLARE_INLINE_HEADER (
+ − 1128 Charcount
2333
+ − 1129 string_index_byte_to_char (Lisp_Object USED_IF_MULE_OR_CHECK_TEXT (s),
+ − 1130 Bytecount idx)
826
+ − 1131 )
+ − 1132 {
+ − 1133 Charcount retval;
+ − 1134 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, idx);
+ − 1135 #ifdef MULE
+ − 1136 if (idx <= (Bytecount) XSTRING_ASCII_BEGIN (s))
+ − 1137 retval = (Charcount) idx;
+ − 1138 else
+ − 1139 retval = (XSTRING_ASCII_BEGIN (s) +
+ − 1140 bytecount_to_charcount (XSTRING_DATA (s) +
+ − 1141 XSTRING_ASCII_BEGIN (s),
+ − 1142 idx - XSTRING_ASCII_BEGIN (s)));
+ − 1143 # ifdef SLEDGEHAMMER_CHECK_ASCII_BEGIN
+ − 1144 assert (retval == bytecount_to_charcount (XSTRING_DATA (s), idx));
+ − 1145 # endif
+ − 1146 #else
+ − 1147 retval = (Charcount) idx;
+ − 1148 #endif
+ − 1149 /* Don't call ASSERT_VALID_CHAR_STRING_INDEX_UNSAFE() here because it will
+ − 1150 call string_index_byte_to_char(). */
+ − 1151 return retval;
+ − 1152 }
+ − 1153
+ − 1154 /* Convert a char index into a string into a byte index. */
+ − 1155 DECLARE_INLINE_HEADER (
+ − 1156 Bytecount
2333
+ − 1157 string_index_char_to_byte (Lisp_Object USED_IF_MULE_OR_CHECK_TEXT (s),
+ − 1158 Charcount idx)
826
+ − 1159 )
+ − 1160 {
+ − 1161 Bytecount retval;
+ − 1162 ASSERT_VALID_CHAR_STRING_INDEX_UNSAFE (s, idx);
+ − 1163 #ifdef MULE
+ − 1164 if (idx <= (Charcount) XSTRING_ASCII_BEGIN (s))
+ − 1165 retval = (Bytecount) idx;
+ − 1166 else
+ − 1167 retval = (XSTRING_ASCII_BEGIN (s) +
+ − 1168 charcount_to_bytecount (XSTRING_DATA (s) +
+ − 1169 XSTRING_ASCII_BEGIN (s),
+ − 1170 idx - XSTRING_ASCII_BEGIN (s)));
+ − 1171 # ifdef SLEDGEHAMMER_CHECK_ASCII_BEGIN
+ − 1172 assert (retval == charcount_to_bytecount (XSTRING_DATA (s), idx));
+ − 1173 # endif
+ − 1174 #else
+ − 1175 retval = (Bytecount) idx;
+ − 1176 #endif
+ − 1177 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, retval);
+ − 1178 return retval;
+ − 1179 }
+ − 1180
+ − 1181 /* Convert a substring length (starting at byte offset OFF) from bytes to
+ − 1182 chars. */
+ − 1183 DECLARE_INLINE_HEADER (
+ − 1184 Charcount
2333
+ − 1185 string_offset_byte_to_char_len (Lisp_Object USED_IF_MULE_OR_CHECK_TEXT (s),
+ − 1186 Bytecount USED_IF_MULE_OR_CHECK_TEXT (off),
+ − 1187 Bytecount len)
826
+ − 1188 )
+ − 1189 {
+ − 1190 Charcount retval;
+ − 1191 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, off);
+ − 1192 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, off + len);
+ − 1193 #ifdef MULE
+ − 1194 if (off + len <= (Bytecount) XSTRING_ASCII_BEGIN (s))
+ − 1195 retval = (Charcount) len;
+ − 1196 else if (off < (Bytecount) XSTRING_ASCII_BEGIN (s))
+ − 1197 retval =
+ − 1198 XSTRING_ASCII_BEGIN (s) - (Charcount) off +
+ − 1199 bytecount_to_charcount (XSTRING_DATA (s) + XSTRING_ASCII_BEGIN (s),
+ − 1200 len - (XSTRING_ASCII_BEGIN (s) - off));
+ − 1201 else
+ − 1202 retval = bytecount_to_charcount (XSTRING_DATA (s) + off, len);
+ − 1203 # ifdef SLEDGEHAMMER_CHECK_ASCII_BEGIN
+ − 1204 assert (retval == bytecount_to_charcount (XSTRING_DATA (s) + off, len));
+ − 1205 # endif
+ − 1206 #else
+ − 1207 retval = (Charcount) len;
+ − 1208 #endif
+ − 1209 return retval;
+ − 1210 }
+ − 1211
+ − 1212 /* Convert a substring length (starting at byte offset OFF) from chars to
+ − 1213 bytes. */
+ − 1214 DECLARE_INLINE_HEADER (
+ − 1215 Bytecount
2333
+ − 1216 string_offset_char_to_byte_len (Lisp_Object USED_IF_MULE_OR_CHECK_TEXT (s),
+ − 1217 Bytecount USED_IF_MULE_OR_CHECK_TEXT (off),
+ − 1218 Charcount len)
826
+ − 1219 )
+ − 1220 {
+ − 1221 Bytecount retval;
+ − 1222 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, off);
+ − 1223 #ifdef MULE
+ − 1224 /* casts to avoid errors from combining Bytecount/Charcount and warnings
+ − 1225 from signed/unsigned comparisons */
+ − 1226 if (off + (Bytecount) len <= (Bytecount) XSTRING_ASCII_BEGIN (s))
+ − 1227 retval = (Bytecount) len;
+ − 1228 else if (off < (Bytecount) XSTRING_ASCII_BEGIN (s))
+ − 1229 retval =
+ − 1230 XSTRING_ASCII_BEGIN (s) - off +
+ − 1231 charcount_to_bytecount (XSTRING_DATA (s) + XSTRING_ASCII_BEGIN (s),
+ − 1232 len - (XSTRING_ASCII_BEGIN (s) -
+ − 1233 (Charcount) off));
+ − 1234 else
+ − 1235 retval = charcount_to_bytecount (XSTRING_DATA (s) + off, len);
+ − 1236 # ifdef SLEDGEHAMMER_CHECK_ASCII_BEGIN
+ − 1237 assert (retval == charcount_to_bytecount (XSTRING_DATA (s) + off, len));
+ − 1238 # endif
+ − 1239 #else
+ − 1240 retval = (Bytecount) len;
+ − 1241 #endif
+ − 1242 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, off + retval);
+ − 1243 return retval;
+ − 1244 }
+ − 1245
+ − 1246 DECLARE_INLINE_HEADER (
867
+ − 1247 const Ibyte *
826
+ − 1248 string_char_addr (Lisp_Object s, Charcount idx)
+ − 1249 )
+ − 1250 {
+ − 1251 return XSTRING_DATA (s) + string_index_char_to_byte (s, idx);
+ − 1252 }
+ − 1253
+ − 1254 /* WARNING: If you modify an existing string, you must call
+ − 1255 bump_string_modiff() afterwards. */
+ − 1256 #ifdef MULE
867
+ − 1257 void set_string_char (Lisp_Object s, Charcount i, Ichar c);
826
+ − 1258 #else
+ − 1259 #define set_string_char(s, i, c) set_string_byte (s, i, c)
+ − 1260 #endif /* not MULE */
+ − 1261
+ − 1262 /* Return index to character before the one at IDX. */
+ − 1263 DECLARE_INLINE_HEADER (
+ − 1264 Bytecount
+ − 1265 prev_string_index (Lisp_Object s, Bytecount idx)
+ − 1266 )
+ − 1267 {
867
+ − 1268 const Ibyte *ptr = string_byte_addr (s, idx);
+ − 1269 DEC_IBYTEPTR (ptr);
826
+ − 1270 return string_addr_to_byte (s, ptr);
+ − 1271 }
+ − 1272
+ − 1273 /* Return index to character after the one at IDX. */
+ − 1274 DECLARE_INLINE_HEADER (
+ − 1275 Bytecount
+ − 1276 next_string_index (Lisp_Object s, Bytecount idx)
+ − 1277 )
+ − 1278 {
867
+ − 1279 const Ibyte *ptr = string_byte_addr (s, idx);
+ − 1280 INC_IBYTEPTR (ptr);
826
+ − 1281 return string_addr_to_byte (s, ptr);
+ − 1282 }
+ − 1283
+ − 1284
+ − 1285 /************************************************************************/
+ − 1286 /* */
771
+ − 1287 /* working with Eistrings */
+ − 1288 /* */
+ − 1289 /************************************************************************/
+ − 1290
+ − 1291 /*
+ − 1292 #### NOTE: This is a work in progress. Neither the API nor especially
+ − 1293 the implementation is finished.
+ − 1294
+ − 1295 NOTE: An Eistring is a structure that makes it easy to work with
+ − 1296 internally-formatted strings of data. It provides operations similar
+ − 1297 in feel to the standard strcpy(), strcat(), strlen(), etc., but
+ − 1298
+ − 1299 (a) it is Mule-correct
+ − 1300 (b) it does dynamic allocation so you never have to worry about size
793
+ − 1301 restrictions
851
+ − 1302 (c) it comes in an ALLOCA() variety (all allocation is stack-local,
793
+ − 1303 so there is no need to explicitly clean up) as well as a malloc()
+ − 1304 variety
+ − 1305 (d) it knows its own length, so it does not suffer from standard null
+ − 1306 byte brain-damage -- but it null-terminates the data anyway, so
+ − 1307 it can be passed to standard routines
+ − 1308 (e) it provides a much more powerful set of operations and knows about
771
+ − 1309 all the standard places where string data might reside: Lisp_Objects,
867
+ − 1310 other Eistrings, Ibyte * data with or without an explicit length,
+ − 1311 ASCII strings, Ichars, etc.
793
+ − 1312 (f) it provides easy operations to convert to/from externally-formatted
+ − 1313 data, and is easier to use than the standard TO_INTERNAL_FORMAT
771
+ − 1314 and TO_EXTERNAL_FORMAT macros. (An Eistring can store both the internal
+ − 1315 and external version of its data, but the external version is only
+ − 1316 initialized or changed when you call eito_external().)
+ − 1317
793
+ − 1318 The idea is to make it as easy to write Mule-correct string manipulation
+ − 1319 code as it is to write normal string manipulation code. We also make
+ − 1320 the API sufficiently general that it can handle multiple internal data
+ − 1321 formats (e.g. some fixed-width optimizing formats and a default variable
+ − 1322 width format) and allows for *ANY* data format we might choose in the
+ − 1323 future for the default format, including UCS2. (In other words, we can't
+ − 1324 assume that the internal format is ASCII-compatible and we can't assume
+ − 1325 it doesn't have embedded null bytes. We do assume, however, that any
+ − 1326 chosen format will have the concept of null-termination.) All of this is
+ − 1327 hidden from the user.
771
+ − 1328
+ − 1329 #### It is really too bad that we don't have a real object-oriented
+ − 1330 language, or at least a language with polymorphism!
+ − 1331
+ − 1332
+ − 1333 **********************************************
+ − 1334 * Declaration *
+ − 1335 **********************************************
+ − 1336
+ − 1337 To declare an Eistring, either put one of the following in the local
+ − 1338 variable section:
+ − 1339
+ − 1340 DECLARE_EISTRING (name);
2367
+ − 1341 Declare a new Eistring and initialize it to the empy string. This
+ − 1342 is a standard local variable declaration and can go anywhere in the
+ − 1343 variable declaration section. NAME itself is declared as an
+ − 1344 Eistring *, and its storage declared on the stack.
771
+ − 1345
+ − 1346 DECLARE_EISTRING_MALLOC (name);
2367
+ − 1347 Declare and initialize a new Eistring, which uses malloc()ed
+ − 1348 instead of ALLOCA()ed data. This is a standard local variable
+ − 1349 declaration and can go anywhere in the variable declaration
+ − 1350 section. Once you initialize the Eistring, you will have to free
+ − 1351 it using eifree() to avoid memory leaks. You will need to use this
+ − 1352 form if you are passing an Eistring to any function that modifies
+ − 1353 it (otherwise, the modified data may be in stack space and get
+ − 1354 overwritten when the function returns).
771
+ − 1355
+ − 1356 or use
+ − 1357
793
+ − 1358 Eistring ei;
+ − 1359 void eiinit (Eistring *ei);
+ − 1360 void eiinit_malloc (Eistring *einame);
771
+ − 1361 If you need to put an Eistring elsewhere than in a local variable
+ − 1362 declaration (e.g. in a structure), declare it as shown and then
+ − 1363 call one of the init macros.
+ − 1364
+ − 1365 Also note:
+ − 1366
793
+ − 1367 void eifree (Eistring *ei);
771
+ − 1368 If you declared an Eistring to use malloc() to hold its data,
+ − 1369 or converted it to the heap using eito_malloc(), then this
+ − 1370 releases any data in it and afterwards resets the Eistring
+ − 1371 using eiinit_malloc(). Otherwise, it just resets the Eistring
+ − 1372 using eiinit().
+ − 1373
+ − 1374
+ − 1375 **********************************************
+ − 1376 * Conventions *
+ − 1377 **********************************************
+ − 1378
+ − 1379 - The names of the functions have been chosen, where possible, to
+ − 1380 match the names of str*() functions in the standard C API.
+ − 1381 -
+ − 1382
+ − 1383
+ − 1384 **********************************************
+ − 1385 * Initialization *
+ − 1386 **********************************************
+ − 1387
+ − 1388 void eireset (Eistring *eistr);
+ − 1389 Initialize the Eistring to the empty string.
+ − 1390
+ − 1391 void eicpy_* (Eistring *eistr, ...);
+ − 1392 Initialize the Eistring from somewhere:
+ − 1393
+ − 1394 void eicpy_ei (Eistring *eistr, Eistring *eistr2);
+ − 1395 ... from another Eistring.
+ − 1396 void eicpy_lstr (Eistring *eistr, Lisp_Object lisp_string);
+ − 1397 ... from a Lisp_Object string.
867
+ − 1398 void eicpy_ch (Eistring *eistr, Ichar ch);
+ − 1399 ... from an Ichar (this can be a conventional C character).
771
+ − 1400
+ − 1401 void eicpy_lstr_off (Eistring *eistr, Lisp_Object lisp_string,
+ − 1402 Bytecount off, Charcount charoff,
+ − 1403 Bytecount len, Charcount charlen);
+ − 1404 ... from a section of a Lisp_Object string.
+ − 1405 void eicpy_lbuf (Eistring *eistr, Lisp_Object lisp_buf,
+ − 1406 Bytecount off, Charcount charoff,
+ − 1407 Bytecount len, Charcount charlen);
+ − 1408 ... from a section of a Lisp_Object buffer.
867
+ − 1409 void eicpy_raw (Eistring *eistr, const Ibyte *data, Bytecount len);
771
+ − 1410 ... from raw internal-format data in the default internal format.
867
+ − 1411 void eicpy_rawz (Eistring *eistr, const Ibyte *data);
771
+ − 1412 ... from raw internal-format data in the default internal format
+ − 1413 that is "null-terminated" (the meaning of this depends on the nature
+ − 1414 of the default internal format).
867
+ − 1415 void eicpy_raw_fmt (Eistring *eistr, const Ibyte *data, Bytecount len,
826
+ − 1416 Internal_Format intfmt, Lisp_Object object);
771
+ − 1417 ... from raw internal-format data in the specified format.
867
+ − 1418 void eicpy_rawz_fmt (Eistring *eistr, const Ibyte *data,
826
+ − 1419 Internal_Format intfmt, Lisp_Object object);
771
+ − 1420 ... from raw internal-format data in the specified format that is
+ − 1421 "null-terminated" (the meaning of this depends on the nature of
+ − 1422 the specific format).
2421
+ − 1423 void eicpy_ascii (Eistring *eistr, const Ascbyte *ascstr);
771
+ − 1424 ... from an ASCII null-terminated string. Non-ASCII characters in
+ − 1425 the string are *ILLEGAL* (read abort() with error-checking defined).
2421
+ − 1426 void eicpy_ascii_len (Eistring *eistr, const Ascbyte *ascstr, len);
771
+ − 1427 ... from an ASCII string, with length specified. Non-ASCII characters
+ − 1428 in the string are *ILLEGAL* (read abort() with error-checking defined).
+ − 1429 void eicpy_ext (Eistring *eistr, const Extbyte *extdata,
1318
+ − 1430 Lisp_Object codesys);
771
+ − 1431 ... from external null-terminated data, with coding system specified.
+ − 1432 void eicpy_ext_len (Eistring *eistr, const Extbyte *extdata,
1318
+ − 1433 Bytecount extlen, Lisp_Object codesys);
771
+ − 1434 ... from external data, with length and coding system specified.
+ − 1435 void eicpy_lstream (Eistring *eistr, Lisp_Object lstream);
+ − 1436 ... from an lstream; reads data till eof. Data must be in default
+ − 1437 internal format; otherwise, interpose a decoding lstream.
+ − 1438
+ − 1439
+ − 1440 **********************************************
+ − 1441 * Getting the data out of the Eistring *
+ − 1442 **********************************************
+ − 1443
867
+ − 1444 Ibyte *eidata (Eistring *eistr);
771
+ − 1445 Return a pointer to the raw data in an Eistring. This is NOT
+ − 1446 a copy.
+ − 1447
+ − 1448 Lisp_Object eimake_string (Eistring *eistr);
+ − 1449 Make a Lisp string out of the Eistring.
+ − 1450
+ − 1451 Lisp_Object eimake_string_off (Eistring *eistr,
+ − 1452 Bytecount off, Charcount charoff,
+ − 1453 Bytecount len, Charcount charlen);
+ − 1454 Make a Lisp string out of a section of the Eistring.
+ − 1455
867
+ − 1456 void eicpyout_alloca (Eistring *eistr, LVALUE: Ibyte *ptr_out,
771
+ − 1457 LVALUE: Bytecount len_out);
851
+ − 1458 Make an ALLOCA() copy of the data in the Eistring, using the
+ − 1459 default internal format. Due to the nature of ALLOCA(), this
771
+ − 1460 must be a macro, with all lvalues passed in as parameters.
793
+ − 1461 (More specifically, not all compilers correctly handle using
851
+ − 1462 ALLOCA() as the argument to a function call -- GCC on x86
+ − 1463 didn't used to, for example.) A pointer to the ALLOCA()ed data
793
+ − 1464 is stored in PTR_OUT, and the length of the data (not including
+ − 1465 the terminating zero) is stored in LEN_OUT.
771
+ − 1466
867
+ − 1467 void eicpyout_alloca_fmt (Eistring *eistr, LVALUE: Ibyte *ptr_out,
771
+ − 1468 LVALUE: Bytecount len_out,
826
+ − 1469 Internal_Format intfmt, Lisp_Object object);
771
+ − 1470 Like eicpyout_alloca(), but converts to the specified internal
+ − 1471 format. (No formats other than FORMAT_DEFAULT are currently
+ − 1472 implemented, and you get an assertion failure if you try.)
+ − 1473
867
+ − 1474 Ibyte *eicpyout_malloc (Eistring *eistr, Bytecount *intlen_out);
771
+ − 1475 Make a malloc() copy of the data in the Eistring, using the
+ − 1476 default internal format. This is a real function. No lvalues
+ − 1477 passed in. Returns the new data, and stores the length (not
+ − 1478 including the terminating zero) using INTLEN_OUT, unless it's
+ − 1479 a NULL pointer.
+ − 1480
867
+ − 1481 Ibyte *eicpyout_malloc_fmt (Eistring *eistr, Internal_Format intfmt,
826
+ − 1482 Bytecount *intlen_out, Lisp_Object object);
771
+ − 1483 Like eicpyout_malloc(), but converts to the specified internal
+ − 1484 format. (No formats other than FORMAT_DEFAULT are currently
+ − 1485 implemented, and you get an assertion failure if you try.)
+ − 1486
+ − 1487
+ − 1488 **********************************************
+ − 1489 * Moving to the heap *
+ − 1490 **********************************************
+ − 1491
+ − 1492 void eito_malloc (Eistring *eistr);
+ − 1493 Move this Eistring to the heap. Its data will be stored in a
+ − 1494 malloc()ed block rather than the stack. Subsequent changes to
+ − 1495 this Eistring will realloc() the block as necessary. Use this
+ − 1496 when you want the Eistring to remain in scope past the end of
+ − 1497 this function call. You will have to manually free the data
+ − 1498 in the Eistring using eifree().
+ − 1499
+ − 1500 void eito_alloca (Eistring *eistr);
+ − 1501 Move this Eistring back to the stack, if it was moved to the
+ − 1502 heap with eito_malloc(). This will automatically free any
+ − 1503 heap-allocated data.
+ − 1504
+ − 1505
+ − 1506
+ − 1507 **********************************************
+ − 1508 * Retrieving the length *
+ − 1509 **********************************************
+ − 1510
+ − 1511 Bytecount eilen (Eistring *eistr);
+ − 1512 Return the length of the internal data, in bytes. See also
+ − 1513 eiextlen(), below.
+ − 1514 Charcount eicharlen (Eistring *eistr);
+ − 1515 Return the length of the internal data, in characters.
+ − 1516
+ − 1517
+ − 1518 **********************************************
+ − 1519 * Working with positions *
+ − 1520 **********************************************
+ − 1521
+ − 1522 Bytecount eicharpos_to_bytepos (Eistring *eistr, Charcount charpos);
+ − 1523 Convert a char offset to a byte offset.
+ − 1524 Charcount eibytepos_to_charpos (Eistring *eistr, Bytecount bytepos);
+ − 1525 Convert a byte offset to a char offset.
+ − 1526 Bytecount eiincpos (Eistring *eistr, Bytecount bytepos);
+ − 1527 Increment the given position by one character.
+ − 1528 Bytecount eiincpos_n (Eistring *eistr, Bytecount bytepos, Charcount n);
+ − 1529 Increment the given position by N characters.
+ − 1530 Bytecount eidecpos (Eistring *eistr, Bytecount bytepos);
+ − 1531 Decrement the given position by one character.
+ − 1532 Bytecount eidecpos_n (Eistring *eistr, Bytecount bytepos, Charcount n);
+ − 1533 Deccrement the given position by N characters.
+ − 1534
+ − 1535
+ − 1536 **********************************************
+ − 1537 * Getting the character at a position *
+ − 1538 **********************************************
+ − 1539
867
+ − 1540 Ichar eigetch (Eistring *eistr, Bytecount bytepos);
771
+ − 1541 Return the character at a particular byte offset.
867
+ − 1542 Ichar eigetch_char (Eistring *eistr, Charcount charpos);
771
+ − 1543 Return the character at a particular character offset.
+ − 1544
+ − 1545
+ − 1546 **********************************************
+ − 1547 * Setting the character at a position *
+ − 1548 **********************************************
+ − 1549
867
+ − 1550 Ichar eisetch (Eistring *eistr, Bytecount bytepos, Ichar chr);
771
+ − 1551 Set the character at a particular byte offset.
867
+ − 1552 Ichar eisetch_char (Eistring *eistr, Charcount charpos, Ichar chr);
771
+ − 1553 Set the character at a particular character offset.
+ − 1554
+ − 1555
+ − 1556 **********************************************
+ − 1557 * Concatenation *
+ − 1558 **********************************************
+ − 1559
+ − 1560 void eicat_* (Eistring *eistr, ...);
+ − 1561 Concatenate onto the end of the Eistring, with data coming from the
+ − 1562 same places as above:
+ − 1563
+ − 1564 void eicat_ei (Eistring *eistr, Eistring *eistr2);
+ − 1565 ... from another Eistring.
2421
+ − 1566 void eicat_ascii (Eistring *eistr, Ascbyte *ascstr);
771
+ − 1567 ... from an ASCII null-terminated string. Non-ASCII characters in
+ − 1568 the string are *ILLEGAL* (read abort() with error-checking defined).
867
+ − 1569 void eicat_raw (ei, const Ibyte *data, Bytecount len);
771
+ − 1570 ... from raw internal-format data in the default internal format.
867
+ − 1571 void eicat_rawz (ei, const Ibyte *data);
771
+ − 1572 ... from raw internal-format data in the default internal format
+ − 1573 that is "null-terminated" (the meaning of this depends on the nature
+ − 1574 of the default internal format).
+ − 1575 void eicat_lstr (ei, Lisp_Object lisp_string);
+ − 1576 ... from a Lisp_Object string.
867
+ − 1577 void eicat_ch (ei, Ichar ch);
+ − 1578 ... from an Ichar.
771
+ − 1579
+ − 1580 (All except the first variety are convenience functions.
+ − 1581 In the general case, create another Eistring from the source.)
+ − 1582
+ − 1583
+ − 1584 **********************************************
+ − 1585 * Replacement *
+ − 1586 **********************************************
+ − 1587
+ − 1588 void eisub_* (Eistring *eistr, Bytecount off, Charcount charoff,
+ − 1589 Bytecount len, Charcount charlen, ...);
+ − 1590 Replace a section of the Eistring, specifically:
+ − 1591
+ − 1592 void eisub_ei (Eistring *eistr, Bytecount off, Charcount charoff,
+ − 1593 Bytecount len, Charcount charlen, Eistring *eistr2);
+ − 1594 ... with another Eistring.
2421
+ − 1595 void eisub_ascii (Eistring *eistr, Bytecount off, Charcount charoff,
+ − 1596 Bytecount len, Charcount charlen, Ascbyte *ascstr);
771
+ − 1597 ... with an ASCII null-terminated string. Non-ASCII characters in
+ − 1598 the string are *ILLEGAL* (read abort() with error-checking defined).
+ − 1599 void eisub_ch (Eistring *eistr, Bytecount off, Charcount charoff,
867
+ − 1600 Bytecount len, Charcount charlen, Ichar ch);
+ − 1601 ... with an Ichar.
771
+ − 1602
+ − 1603 void eidel (Eistring *eistr, Bytecount off, Charcount charoff,
+ − 1604 Bytecount len, Charcount charlen);
+ − 1605 Delete a section of the Eistring.
+ − 1606
+ − 1607
+ − 1608 **********************************************
+ − 1609 * Converting to an external format *
+ − 1610 **********************************************
+ − 1611
1318
+ − 1612 void eito_external (Eistring *eistr, Lisp_Object codesys);
771
+ − 1613 Convert the Eistring to an external format and store the result
+ − 1614 in the string. NOTE: Further changes to the Eistring will *NOT*
+ − 1615 change the external data stored in the string. You will have to
+ − 1616 call eito_external() again in such a case if you want the external
+ − 1617 data.
+ − 1618
+ − 1619 Extbyte *eiextdata (Eistring *eistr);
+ − 1620 Return a pointer to the external data stored in the Eistring as
+ − 1621 a result of a prior call to eito_external().
+ − 1622
+ − 1623 Bytecount eiextlen (Eistring *eistr);
+ − 1624 Return the length in bytes of the external data stored in the
+ − 1625 Eistring as a result of a prior call to eito_external().
+ − 1626
+ − 1627
+ − 1628 **********************************************
+ − 1629 * Searching in the Eistring for a character *
+ − 1630 **********************************************
+ − 1631
867
+ − 1632 Bytecount eichr (Eistring *eistr, Ichar chr);
+ − 1633 Charcount eichr_char (Eistring *eistr, Ichar chr);
+ − 1634 Bytecount eichr_off (Eistring *eistr, Ichar chr, Bytecount off,
771
+ − 1635 Charcount charoff);
867
+ − 1636 Charcount eichr_off_char (Eistring *eistr, Ichar chr, Bytecount off,
771
+ − 1637 Charcount charoff);
867
+ − 1638 Bytecount eirchr (Eistring *eistr, Ichar chr);
+ − 1639 Charcount eirchr_char (Eistring *eistr, Ichar chr);
+ − 1640 Bytecount eirchr_off (Eistring *eistr, Ichar chr, Bytecount off,
771
+ − 1641 Charcount charoff);
867
+ − 1642 Charcount eirchr_off_char (Eistring *eistr, Ichar chr, Bytecount off,
771
+ − 1643 Charcount charoff);
+ − 1644
+ − 1645
+ − 1646 **********************************************
+ − 1647 * Searching in the Eistring for a string *
+ − 1648 **********************************************
+ − 1649
+ − 1650 Bytecount eistr_ei (Eistring *eistr, Eistring *eistr2);
+ − 1651 Charcount eistr_ei_char (Eistring *eistr, Eistring *eistr2);
+ − 1652 Bytecount eistr_ei_off (Eistring *eistr, Eistring *eistr2, Bytecount off,
+ − 1653 Charcount charoff);
+ − 1654 Charcount eistr_ei_off_char (Eistring *eistr, Eistring *eistr2,
+ − 1655 Bytecount off, Charcount charoff);
+ − 1656 Bytecount eirstr_ei (Eistring *eistr, Eistring *eistr2);
+ − 1657 Charcount eirstr_ei_char (Eistring *eistr, Eistring *eistr2);
+ − 1658 Bytecount eirstr_ei_off (Eistring *eistr, Eistring *eistr2, Bytecount off,
+ − 1659 Charcount charoff);
+ − 1660 Charcount eirstr_ei_off_char (Eistring *eistr, Eistring *eistr2,
+ − 1661 Bytecount off, Charcount charoff);
+ − 1662
2421
+ − 1663 Bytecount eistr_ascii (Eistring *eistr, Ascbyte *ascstr);
+ − 1664 Charcount eistr_ascii_char (Eistring *eistr, Ascbyte *ascstr);
+ − 1665 Bytecount eistr_ascii_off (Eistring *eistr, Ascbyte *ascstr, Bytecount off,
771
+ − 1666 Charcount charoff);
2421
+ − 1667 Charcount eistr_ascii_off_char (Eistring *eistr, Ascbyte *ascstr,
771
+ − 1668 Bytecount off, Charcount charoff);
2421
+ − 1669 Bytecount eirstr_ascii (Eistring *eistr, Ascbyte *ascstr);
+ − 1670 Charcount eirstr_ascii_char (Eistring *eistr, Ascbyte *ascstr);
+ − 1671 Bytecount eirstr_ascii_off (Eistring *eistr, Ascbyte *ascstr,
771
+ − 1672 Bytecount off, Charcount charoff);
2421
+ − 1673 Charcount eirstr_ascii_off_char (Eistring *eistr, Ascbyte *ascstr,
771
+ − 1674 Bytecount off, Charcount charoff);
+ − 1675
+ − 1676
+ − 1677 **********************************************
+ − 1678 * Comparison *
+ − 1679 **********************************************
+ − 1680
+ − 1681 int eicmp_* (Eistring *eistr, ...);
+ − 1682 int eicmp_off_* (Eistring *eistr, Bytecount off, Charcount charoff,
+ − 1683 Bytecount len, Charcount charlen, ...);
+ − 1684 int eicasecmp_* (Eistring *eistr, ...);
+ − 1685 int eicasecmp_off_* (Eistring *eistr, Bytecount off, Charcount charoff,
+ − 1686 Bytecount len, Charcount charlen, ...);
+ − 1687 int eicasecmp_i18n_* (Eistring *eistr, ...);
+ − 1688 int eicasecmp_i18n_off_* (Eistring *eistr, Bytecount off, Charcount charoff,
+ − 1689 Bytecount len, Charcount charlen, ...);
+ − 1690
+ − 1691 Compare the Eistring with the other data. Return value same as
+ − 1692 from strcmp. The `*' is either `ei' for another Eistring (in
+ − 1693 which case `...' is an Eistring), or `c' for a pure-ASCII string
+ − 1694 (in which case `...' is a pointer to that string). For anything
+ − 1695 more complex, first create an Eistring out of the source.
+ − 1696 Comparison is either simple (`eicmp_...'), ASCII case-folding
+ − 1697 (`eicasecmp_...'), or multilingual case-folding
+ − 1698 (`eicasecmp_i18n_...).
+ − 1699
+ − 1700
+ − 1701 More specifically, the prototypes are:
+ − 1702
+ − 1703 int eicmp_ei (Eistring *eistr, Eistring *eistr2);
+ − 1704 int eicmp_off_ei (Eistring *eistr, Bytecount off, Charcount charoff,
+ − 1705 Bytecount len, Charcount charlen, Eistring *eistr2);
+ − 1706 int eicasecmp_ei (Eistring *eistr, Eistring *eistr2);
+ − 1707 int eicasecmp_off_ei (Eistring *eistr, Bytecount off, Charcount charoff,
+ − 1708 Bytecount len, Charcount charlen, Eistring *eistr2);
+ − 1709 int eicasecmp_i18n_ei (Eistring *eistr, Eistring *eistr2);
+ − 1710 int eicasecmp_i18n_off_ei (Eistring *eistr, Bytecount off,
+ − 1711 Charcount charoff, Bytecount len,
+ − 1712 Charcount charlen, Eistring *eistr2);
+ − 1713
2421
+ − 1714 int eicmp_ascii (Eistring *eistr, Ascbyte *ascstr);
+ − 1715 int eicmp_off_ascii (Eistring *eistr, Bytecount off, Charcount charoff,
+ − 1716 Bytecount len, Charcount charlen, Ascbyte *ascstr);
+ − 1717 int eicasecmp_ascii (Eistring *eistr, Ascbyte *ascstr);
+ − 1718 int eicasecmp_off_ascii (Eistring *eistr, Bytecount off, Charcount charoff,
771
+ − 1719 Bytecount len, Charcount charlen,
2421
+ − 1720 Ascbyte *ascstr);
+ − 1721 int eicasecmp_i18n_ascii (Eistring *eistr, Ascbyte *ascstr);
+ − 1722 int eicasecmp_i18n_off_ascii (Eistring *eistr, Bytecount off, Charcount charoff,
771
+ − 1723 Bytecount len, Charcount charlen,
2421
+ − 1724 Ascbyte *ascstr);
771
+ − 1725
+ − 1726
+ − 1727 **********************************************
+ − 1728 * Case-changing the Eistring *
+ − 1729 **********************************************
+ − 1730
+ − 1731 void eilwr (Eistring *eistr);
+ − 1732 Convert all characters in the Eistring to lowercase.
+ − 1733 void eiupr (Eistring *eistr);
+ − 1734 Convert all characters in the Eistring to uppercase.
+ − 1735 */
+ − 1736
+ − 1737
+ − 1738 /* Principles for writing Eistring functions:
+ − 1739
+ − 1740 (1) Unfortunately, we have to write most of the Eistring functions
851
+ − 1741 as macros, because of the use of ALLOCA(). The principle used
771
+ − 1742 below to assure no conflict in local variables is to prefix all
+ − 1743 local variables with "ei" plus a number, which should be unique
+ − 1744 among macros. In practice, when finding a new number, find the
+ − 1745 highest so far used, and add 1.
+ − 1746
+ − 1747 (2) We also suffix the Eistring fields with an _ to avoid problems
+ − 1748 with macro parameters of the same name. (And as the standard
+ − 1749 signal not to access these fields directly.)
+ − 1750
+ − 1751 (3) We maintain both the length in bytes and chars of the data in
+ − 1752 the Eistring at all times, for convenient retrieval by outside
+ − 1753 functions. That means when writing functions that manipulate
+ − 1754 Eistrings, you too need to keep both lengths up to date for all
+ − 1755 data that you work with.
+ − 1756
+ − 1757 (4) When writing a new type of operation (e.g. substitution), you
+ − 1758 will often find yourself working with outside data, and thus
+ − 1759 have a series of related API's, for different forms that the
+ − 1760 outside data is in. Generally, you will want to choose a
+ − 1761 subset of the forms supported by eicpy_*, which has to be
+ − 1762 totally general because that's the fundamental way to get data
+ − 1763 into an Eistring, and once the data is into the string, it
+ − 1764 would be to create a whole series of Ei operations that work on
+ − 1765 nothing but Eistrings. Although theoretically nice, in
+ − 1766 practice it's a hassle, so we suggest that you provide
+ − 1767 convenience functions. In particular, there are two paths you
+ − 1768 can take. One is minimalist -- it only allows other Eistrings
867
+ − 1769 and ASCII data, and Ichars if the particular operation makes
771
+ − 1770 sense with a character. The other provides interfaces for the
+ − 1771 most commonly-used forms -- Eistring, ASCII data, Lisp string,
+ − 1772 raw internal-format string with length, raw internal-format
867
+ − 1773 string without, and possibly Ichar. (In the function names,
771
+ − 1774 these are designated `ei', `c', `lstr', `raw', `rawz', and
+ − 1775 `ch', respectively.)
+ − 1776
+ − 1777 (5) When coding a new type of operation, such as was discussed in
+ − 1778 previous section, the correct approach is to declare an worker
+ − 1779 function that does the work of everything, and is called by the
+ − 1780 other "container" macros that handle the different outside data
+ − 1781 forms. The data coming into the worker function, which
+ − 1782 typically ends in `_1', is in the form of three parameters:
+ − 1783 DATA, LEN, CHARLEN. (See point [3] about having two lengths and
+ − 1784 keeping them in sync.)
+ − 1785
+ − 1786 (6) Handling argument evaluation in macros: We take great care
+ − 1787 never to evaluate any argument more than once in any macro,
+ − 1788 except the initial Eistring parameter. This can and will be
+ − 1789 evaluated multiple times, but it should pretty much always just
+ − 1790 be a simple variable. This means, for example, that if an
+ − 1791 Eistring is the second (not first) argument of a macro, it
+ − 1792 doesn't fall under the "initial Eistring" exemption, so it
+ − 1793 needs protection against multi-evaluation. (Take the address of
+ − 1794 the Eistring structure, store in a temporary variable, and use
+ − 1795 temporary variable for all access to the Eistring.
+ − 1796 Essentially, we want it to appear as if these Eistring macros
+ − 1797 are functions -- we would like to declare them as functions but
851
+ − 1798 they use ALLOCA(), so we can't (and we can't make them inline
+ − 1799 functions either -- ALLOCA() is explicitly disallowed in inline
771
+ − 1800 functions.)
+ − 1801
+ − 1802 (7) Note that our rules regarding multiple evaluation are *more*
+ − 1803 strict than the rules listed above under the heading "working
+ − 1804 with raw internal-format data".
+ − 1805 */
+ − 1806
+ − 1807
+ − 1808 /* ----- Declaration ----- */
+ − 1809
+ − 1810 typedef struct
+ − 1811 {
+ − 1812 /* Data for the Eistring, stored in the default internal format.
+ − 1813 Always includes terminating null. */
867
+ − 1814 Ibyte *data_;
771
+ − 1815 /* Total number of bytes allocated in DATA (including null). */
+ − 1816 Bytecount max_size_allocated_;
+ − 1817 Bytecount bytelen_;
+ − 1818 Charcount charlen_;
+ − 1819 int mallocp_;
+ − 1820
+ − 1821 Extbyte *extdata_;
+ − 1822 Bytecount extlen_;
+ − 1823 } Eistring;
+ − 1824
+ − 1825 extern Eistring the_eistring_zero_init, the_eistring_malloc_zero_init;
+ − 1826
+ − 1827 #define DECLARE_EISTRING(name) \
+ − 1828 Eistring __ ## name ## __storage__ = the_eistring_zero_init; \
+ − 1829 Eistring *name = & __ ## name ## __storage__
+ − 1830 #define DECLARE_EISTRING_MALLOC(name) \
+ − 1831 Eistring __ ## name ## __storage__ = the_eistring_malloc_zero_init; \
+ − 1832 Eistring *name = & __ ## name ## __storage__
+ − 1833
+ − 1834 #define eiinit(ei) \
+ − 1835 do { \
793
+ − 1836 *(ei) = the_eistring_zero_init; \
771
+ − 1837 } while (0)
+ − 1838
+ − 1839 #define eiinit_malloc(ei) \
+ − 1840 do { \
793
+ − 1841 *(ei) = the_eistring_malloc_zero_init; \
771
+ − 1842 } while (0)
+ − 1843
+ − 1844
+ − 1845 /* ----- Utility ----- */
+ − 1846
+ − 1847 /* Make sure both LEN and CHARLEN are specified, in case one is given
+ − 1848 as -1. PTR evaluated at most once, others multiply. */
+ − 1849 #define eifixup_bytechar(ptr, len, charlen) \
+ − 1850 do { \
+ − 1851 if ((len) == -1) \
+ − 1852 (len) = charcount_to_bytecount (ptr, charlen); \
+ − 1853 else if ((charlen) == -1) \
+ − 1854 (charlen) = bytecount_to_charcount (ptr, len); \
+ − 1855 } while (0)
+ − 1856
+ − 1857 /* Make sure LEN is specified, in case it's is given as -1. PTR
+ − 1858 evaluated at most once, others multiply. */
+ − 1859 #define eifixup_byte(ptr, len, charlen) \
+ − 1860 do { \
+ − 1861 if ((len) == -1) \
+ − 1862 (len) = charcount_to_bytecount (ptr, charlen); \
+ − 1863 } while (0)
+ − 1864
+ − 1865 /* Make sure CHARLEN is specified, in case it's is given as -1. PTR
+ − 1866 evaluated at most once, others multiply. */
+ − 1867 #define eifixup_char(ptr, len, charlen) \
+ − 1868 do { \
+ − 1869 if ((charlen) == -1) \
+ − 1870 (charlen) = bytecount_to_charcount (ptr, len); \
+ − 1871 } while (0)
+ − 1872
+ − 1873
+ − 1874
+ − 1875 /* Make sure we can hold NEWBYTELEN bytes (which is NEWCHARLEN chars)
+ − 1876 plus a zero terminator. Preserve existing data as much as possible,
+ − 1877 including existing zero terminator. Put a new zero terminator where it
+ − 1878 should go if NEWZ if non-zero. All args but EI are evalled only once. */
+ − 1879
+ − 1880 #define EI_ALLOC(ei, newbytelen, newcharlen, newz) \
+ − 1881 do { \
+ − 1882 int ei1oldeibytelen = (ei)->bytelen_; \
+ − 1883 \
+ − 1884 (ei)->charlen_ = (newcharlen); \
+ − 1885 (ei)->bytelen_ = (newbytelen); \
+ − 1886 \
+ − 1887 if (ei1oldeibytelen != (ei)->bytelen_) \
+ − 1888 { \
+ − 1889 int ei1newsize = (ei)->max_size_allocated_; \
+ − 1890 while (ei1newsize < (ei)->bytelen_ + 1) \
+ − 1891 { \
+ − 1892 ei1newsize = (int) (ei1newsize * 1.5); \
+ − 1893 if (ei1newsize < 32) \
+ − 1894 ei1newsize = 32; \
+ − 1895 } \
+ − 1896 if (ei1newsize != (ei)->max_size_allocated_) \
+ − 1897 { \
+ − 1898 if ((ei)->mallocp_) \
+ − 1899 /* xrealloc always preserves existing data as much as possible */ \
1333
+ − 1900 (ei)->data_ = (Ibyte *) xrealloc ((ei)->data_, ei1newsize); \
771
+ − 1901 else \
+ − 1902 { \
851
+ − 1903 /* We don't have realloc, so ALLOCA() more space and copy the \
771
+ − 1904 data into it. */ \
867
+ − 1905 Ibyte *ei1oldeidata = (ei)->data_; \
2367
+ − 1906 (ei)->data_ = alloca_ibytes (ei1newsize); \
771
+ − 1907 if (ei1oldeidata) \
+ − 1908 memcpy ((ei)->data_, ei1oldeidata, ei1oldeibytelen + 1); \
+ − 1909 } \
+ − 1910 (ei)->max_size_allocated_ = ei1newsize; \
+ − 1911 } \
+ − 1912 if (newz) \
+ − 1913 (ei)->data_[(ei)->bytelen_] = '\0'; \
+ − 1914 } \
+ − 1915 } while (0)
+ − 1916
+ − 1917 #define EI_ALLOC_AND_COPY(ei, data, bytelen, charlen) \
+ − 1918 do { \
+ − 1919 EI_ALLOC (ei, bytelen, charlen, 1); \
+ − 1920 memcpy ((ei)->data_, data, (ei)->bytelen_); \
+ − 1921 } while (0)
+ − 1922
+ − 1923 /* ----- Initialization ----- */
+ − 1924
+ − 1925 #define eicpy_ei(ei, eicpy) \
+ − 1926 do { \
+ − 1927 const Eistring *ei2 = (eicpy); \
+ − 1928 EI_ALLOC_AND_COPY (ei, ei2->data_, ei2->bytelen_, ei2->charlen_); \
+ − 1929 } while (0)
+ − 1930
+ − 1931 #define eicpy_lstr(ei, lisp_string) \
+ − 1932 do { \
+ − 1933 Lisp_Object ei3 = (lisp_string); \
+ − 1934 EI_ALLOC_AND_COPY (ei, XSTRING_DATA (ei3), XSTRING_LENGTH (ei3), \
1333
+ − 1935 string_char_length (ei3)); \
771
+ − 1936 } while (0)
+ − 1937
+ − 1938 #define eicpy_lstr_off(ei, lisp_string, off, charoff, len, charlen) \
+ − 1939 do { \
+ − 1940 Lisp_Object ei23lstr = (lisp_string); \
+ − 1941 int ei23off = (off); \
+ − 1942 int ei23charoff = (charoff); \
+ − 1943 int ei23len = (len); \
+ − 1944 int ei23charlen = (charlen); \
867
+ − 1945 const Ibyte *ei23data = XSTRING_DATA (ei23lstr); \
771
+ − 1946 \
+ − 1947 int ei23oldbytelen = (ei)->bytelen_; \
+ − 1948 \
+ − 1949 eifixup_byte (ei23data, ei23off, ei23charoff); \
+ − 1950 eifixup_bytechar (ei23data + ei23off, ei23len, ei23charlen); \
+ − 1951 \
+ − 1952 EI_ALLOC_AND_COPY (ei, ei23data + ei23off, ei23len, ei23charlen); \
+ − 1953 } while (0)
+ − 1954
826
+ − 1955 #define eicpy_raw_fmt(ei, ptr, len, fmt, object) \
771
+ − 1956 do { \
1333
+ − 1957 const Ibyte *ei12ptr = (ptr); \
771
+ − 1958 Internal_Format ei12fmt = (fmt); \
+ − 1959 int ei12len = (len); \
+ − 1960 assert (ei12fmt == FORMAT_DEFAULT); \
+ − 1961 EI_ALLOC_AND_COPY (ei, ei12ptr, ei12len, \
+ − 1962 bytecount_to_charcount (ei12ptr, ei12len)); \
+ − 1963 } while (0)
+ − 1964
826
+ − 1965 #define eicpy_raw(ei, ptr, len) \
+ − 1966 eicpy_raw_fmt (ei, ptr, len, FORMAT_DEFAULT, Qnil)
+ − 1967
+ − 1968 #define eicpy_rawz_fmt(ei, ptr, fmt, object) \
+ − 1969 do { \
867
+ − 1970 const Ibyte *ei12p1ptr = (ptr); \
826
+ − 1971 Internal_Format ei12p1fmt = (fmt); \
+ − 1972 assert (ei12p1fmt == FORMAT_DEFAULT); \
+ − 1973 eicpy_raw_fmt (ei, ei12p1ptr, qxestrlen (ei12p1ptr), fmt, object); \
771
+ − 1974 } while (0)
+ − 1975
826
+ − 1976 #define eicpy_rawz(ei, ptr) eicpy_rawz_fmt (ei, ptr, FORMAT_DEFAULT, Qnil)
771
+ − 1977
1333
+ − 1978 #define eicpy_ch(ei, ch) \
+ − 1979 do { \
867
+ − 1980 Ibyte ei12p2[MAX_ICHAR_LEN]; \
+ − 1981 Bytecount ei12p2len = set_itext_ichar (ei12p2, ch); \
1333
+ − 1982 EI_ALLOC_AND_COPY (ei, ei12p2, ei12p2len, 1); \
771
+ − 1983 } while (0)
+ − 1984
2421
+ − 1985 #define eicpy_ascii(ei, ascstr) \
771
+ − 1986 do { \
2421
+ − 1987 const Ascbyte *ei4 = (ascstr); \
771
+ − 1988 \
2367
+ − 1989 ASSERT_ASCTEXT_ASCII (ei4); \
771
+ − 1990 eicpy_ext (ei, ei4, Qbinary); \
+ − 1991 } while (0)
+ − 1992
2421
+ − 1993 #define eicpy_ascii_len(ei, ascstr, c_len) \
771
+ − 1994 do { \
2421
+ − 1995 const Ascbyte *ei6 = (ascstr); \
771
+ − 1996 int ei6len = (c_len); \
+ − 1997 \
2367
+ − 1998 ASSERT_ASCTEXT_ASCII_LEN (ei6, ei6len); \
771
+ − 1999 eicpy_ext_len (ei, ei6, ei6len, Qbinary); \
+ − 2000 } while (0)
+ − 2001
1318
+ − 2002 #define eicpy_ext_len(ei, extdata, extlen, codesys) \
771
+ − 2003 do { \
+ − 2004 const Extbyte *ei7 = (extdata); \
+ − 2005 int ei7len = (extlen); \
+ − 2006 \
1318
+ − 2007 SIZED_EXTERNAL_TO_SIZED_C_STRING (ei7, ei7len, (ei)->data_, \
+ − 2008 (ei)->bytelen_, codesys); \
771
+ − 2009 (ei)->max_size_allocated_ = (ei)->bytelen_ + 1; \
+ − 2010 (ei)->charlen_ = bytecount_to_charcount ((ei)->data_, (ei)->bytelen_); \
+ − 2011 } while (0)
+ − 2012
1318
+ − 2013 #define eicpy_ext(ei, extdata, codesys) \
+ − 2014 do { \
+ − 2015 const Extbyte *ei8 = (extdata); \
+ − 2016 \
+ − 2017 eicpy_ext_len (ei, ei8, dfc_external_data_len (ei8, codesys), \
+ − 2018 codesys); \
771
+ − 2019 } while (0)
+ − 2020
+ − 2021 #define eicpy_lbuf(eistr, lisp_buf, off, charoff, len, charlen) \
+ − 2022 NOT YET IMPLEMENTED
+ − 2023
+ − 2024 #define eicpy_lstream(eistr, lstream) \
+ − 2025 NOT YET IMPLEMENTED
+ − 2026
867
+ − 2027 #define eireset(eistr) eicpy_rawz (eistr, (Ibyte *) "")
771
+ − 2028
+ − 2029 /* ----- Getting the data out of the Eistring ----- */
+ − 2030
+ − 2031 #define eidata(ei) ((ei)->data_)
+ − 2032
+ − 2033 #define eimake_string(ei) make_string (eidata (ei), eilen (ei))
+ − 2034
+ − 2035 #define eimake_string_off(eistr, off, charoff, len, charlen) \
+ − 2036 do { \
+ − 2037 Lisp_Object ei24lstr; \
+ − 2038 int ei24off = (off); \
+ − 2039 int ei24charoff = (charoff); \
+ − 2040 int ei24len = (len); \
+ − 2041 int ei24charlen = (charlen); \
+ − 2042 \
+ − 2043 eifixup_byte ((eistr)->data_, ei24off, ei24charoff); \
+ − 2044 eifixup_byte ((eistr)->data_ + ei24off, ei24len, ei24charlen); \
+ − 2045 \
+ − 2046 return make_string ((eistr)->data_ + ei24off, ei24len); \
+ − 2047 } while (0)
+ − 2048
+ − 2049 #define eicpyout_alloca(eistr, ptrout, lenout) \
826
+ − 2050 eicpyout_alloca_fmt (eistr, ptrout, lenout, FORMAT_DEFAULT, Qnil)
771
+ − 2051 #define eicpyout_malloc(eistr, lenout) \
826
+ − 2052 eicpyout_malloc_fmt (eistr, lenout, FORMAT_DEFAULT, Qnil)
867
+ − 2053 Ibyte *eicpyout_malloc_fmt (Eistring *eistr, Bytecount *len_out,
826
+ − 2054 Internal_Format fmt, Lisp_Object object);
+ − 2055 #define eicpyout_alloca_fmt(eistr, ptrout, lenout, fmt, object) \
771
+ − 2056 do { \
+ − 2057 Internal_Format ei23fmt = (fmt); \
867
+ − 2058 Ibyte *ei23ptrout = &(ptrout); \
771
+ − 2059 Bytecount *ei23lenout = &(lenout); \
+ − 2060 \
+ − 2061 assert (ei23fmt == FORMAT_DEFAULT); \
+ − 2062 \
+ − 2063 *ei23lenout = (eistr)->bytelen_; \
2367
+ − 2064 *ei23ptrout = alloca_ibytes ((eistr)->bytelen_ + 1); \
771
+ − 2065 memcpy (*ei23ptrout, (eistr)->data_, (eistr)->bytelen_ + 1); \
+ − 2066 } while (0)
+ − 2067
+ − 2068 /* ----- Moving to the heap ----- */
+ − 2069
+ − 2070 #define eifree(ei) \
+ − 2071 do { \
+ − 2072 if ((ei)->mallocp_) \
+ − 2073 { \
+ − 2074 if ((ei)->data_) \
1726
+ − 2075 xfree ((ei)->data_, Ibyte *); \
771
+ − 2076 if ((ei)->extdata_) \
1726
+ − 2077 xfree ((ei)->extdata_, Extbyte *); \
771
+ − 2078 eiinit_malloc (ei); \
+ − 2079 } \
+ − 2080 else \
+ − 2081 eiinit (ei); \
+ − 2082 } while (0)
+ − 2083
+ − 2084 int eifind_large_enough_buffer (int oldbufsize, int needed_size);
+ − 2085 void eito_malloc_1 (Eistring *ei);
+ − 2086
+ − 2087 #define eito_malloc(ei) eito_malloc_1 (ei)
+ − 2088
+ − 2089 #define eito_alloca(ei) \
+ − 2090 do { \
+ − 2091 if (!(ei)->mallocp_) \
+ − 2092 return; \
+ − 2093 (ei)->mallocp_ = 0; \
+ − 2094 if ((ei)->data_) \
+ − 2095 { \
867
+ − 2096 Ibyte *ei13newdata; \
771
+ − 2097 \
+ − 2098 (ei)->max_size_allocated_ = \
+ − 2099 eifind_large_enough_buffer (0, (ei)->bytelen_ + 1); \
2367
+ − 2100 ei13newdata = alloca_ibytes ((ei)->max_size_allocated_); \
771
+ − 2101 memcpy (ei13newdata, (ei)->data_, (ei)->bytelen_ + 1); \
1726
+ − 2102 xfree ((ei)->data_, Ibyte *); \
771
+ − 2103 (ei)->data_ = ei13newdata; \
+ − 2104 } \
+ − 2105 \
+ − 2106 if ((ei)->extdata_) \
+ − 2107 { \
2367
+ − 2108 Extbyte *ei13newdata = alloca_extbytes ((ei)->extlen_ + 2); \
771
+ − 2109 \
+ − 2110 memcpy (ei13newdata, (ei)->extdata_, (ei)->extlen_); \
+ − 2111 /* Double null-terminate in case of Unicode data */ \
+ − 2112 ei13newdata[(ei)->extlen_] = '\0'; \
+ − 2113 ei13newdata[(ei)->extlen_ + 1] = '\0'; \
1726
+ − 2114 xfree ((ei)->extdata_, Extbyte *); \
771
+ − 2115 (ei)->extdata_ = ei13newdata; \
+ − 2116 } \
+ − 2117 } while (0)
+ − 2118
+ − 2119
+ − 2120 /* ----- Retrieving the length ----- */
+ − 2121
+ − 2122 #define eilen(ei) ((ei)->bytelen_)
+ − 2123 #define eicharlen(ei) ((ei)->charlen_)
+ − 2124
+ − 2125
+ − 2126 /* ----- Working with positions ----- */
+ − 2127
+ − 2128 #define eicharpos_to_bytepos(ei, charpos) \
+ − 2129 charcount_to_bytecount ((ei)->data_, charpos)
+ − 2130 #define eibytepos_to_charpos(ei, bytepos) \
+ − 2131 bytecount_to_charcount ((ei)->data_, bytepos)
+ − 2132
+ − 2133 DECLARE_INLINE_HEADER (Bytecount eiincpos_1 (Eistring *eistr,
+ − 2134 Bytecount bytepos,
+ − 2135 Charcount n))
+ − 2136 {
867
+ − 2137 Ibyte *pos = eistr->data_ + bytepos;
814
+ − 2138 Charcount i;
771
+ − 2139
800
+ − 2140 text_checking_assert (bytepos >= 0 && bytepos <= eistr->bytelen_);
+ − 2141 text_checking_assert (n >= 0 && n <= eistr->charlen_);
771
+ − 2142 /* We could check N more correctly now, but that would require a
+ − 2143 call to bytecount_to_charcount(), which would be needlessly
+ − 2144 expensive (it would convert O(N) algorithms into O(N^2) algorithms
800
+ − 2145 with ERROR_CHECK_TEXT, which would be bad). If N is bad, we are
867
+ − 2146 guaranteed to catch it either inside INC_IBYTEPTR() or in the check
771
+ − 2147 below. */
+ − 2148 for (i = 0; i < n; i++)
867
+ − 2149 INC_IBYTEPTR (pos);
800
+ − 2150 text_checking_assert (pos - eistr->data_ <= eistr->bytelen_);
771
+ − 2151 return pos - eistr->data_;
+ − 2152 }
+ − 2153
+ − 2154 #define eiincpos (ei, bytepos) eiincpos_1 (ei, bytepos, 1)
+ − 2155 #define eiincpos_n (ei, bytepos, n) eiincpos_1 (ei, bytepos, n)
+ − 2156
+ − 2157 DECLARE_INLINE_HEADER (Bytecount eidecpos_1 (Eistring *eistr,
+ − 2158 Bytecount bytepos,
+ − 2159 Charcount n))
+ − 2160 {
867
+ − 2161 Ibyte *pos = eistr->data_ + bytepos;
771
+ − 2162 int i;
+ − 2163
800
+ − 2164 text_checking_assert (bytepos >= 0 && bytepos <= eistr->bytelen_);
+ − 2165 text_checking_assert (n >= 0 && n <= eistr->charlen_);
771
+ − 2166 /* We could check N more correctly now, but ... see above. */
+ − 2167 for (i = 0; i < n; i++)
867
+ − 2168 DEC_IBYTEPTR (pos);
800
+ − 2169 text_checking_assert (pos - eistr->data_ <= eistr->bytelen_);
771
+ − 2170 return pos - eistr->data_;
+ − 2171 }
+ − 2172
+ − 2173 #define eidecpos (ei, bytepos) eidecpos_1 (ei, bytepos, 1)
+ − 2174 #define eidecpos_n (ei, bytepos, n) eidecpos_1 (ei, bytepos, n)
+ − 2175
+ − 2176
+ − 2177 /* ----- Getting the character at a position ----- */
+ − 2178
+ − 2179 #define eigetch(ei, bytepos) \
867
+ − 2180 itext_ichar ((ei)->data_ + (bytepos))
+ − 2181 #define eigetch_char(ei, charpos) itext_ichar_n ((ei)->data_, charpos)
771
+ − 2182
+ − 2183
+ − 2184 /* ----- Setting the character at a position ----- */
+ − 2185
+ − 2186 #define eisetch(ei, bytepos, chr) \
+ − 2187 eisub_ch (ei, bytepos, -1, -1, 1, chr)
+ − 2188 #define eisetch_char(ei, charpos, chr) \
+ − 2189 eisub_ch (ei, -1, charpos, -1, 1, chr)
+ − 2190
+ − 2191
+ − 2192 /* ----- Concatenation ----- */
+ − 2193
+ − 2194 #define eicat_1(ei, data, bytelen, charlen) \
+ − 2195 do { \
+ − 2196 int ei14oldeibytelen = (ei)->bytelen_; \
+ − 2197 int ei14bytelen = (bytelen); \
+ − 2198 EI_ALLOC (ei, (ei)->bytelen_ + ei14bytelen, \
+ − 2199 (ei)->charlen_ + (charlen), 1); \
+ − 2200 memcpy ((ei)->data_ + ei14oldeibytelen, (data), \
+ − 2201 ei14bytelen); \
+ − 2202 } while (0)
+ − 2203
+ − 2204 #define eicat_ei(ei, ei2) \
+ − 2205 do { \
+ − 2206 const Eistring *ei9 = (ei2); \
+ − 2207 eicat_1 (ei, ei9->data_, ei9->bytelen_, ei9->charlen_); \
+ − 2208 } while (0)
+ − 2209
2421
+ − 2210 #define eicat_ascii(ei, ascstr) \
771
+ − 2211 do { \
2421
+ − 2212 const Ascbyte *ei15 = (ascstr); \
771
+ − 2213 int ei15len = strlen (ei15); \
+ − 2214 \
2367
+ − 2215 ASSERT_ASCTEXT_ASCII_LEN (ei15, ei15len); \
771
+ − 2216 eicat_1 (ei, ei15, ei15len, \
867
+ − 2217 bytecount_to_charcount ((Ibyte *) ei15, ei15len)); \
771
+ − 2218 } while (0)
+ − 2219
+ − 2220 #define eicat_raw(ei, data, len) \
+ − 2221 do { \
+ − 2222 int ei16len = (len); \
867
+ − 2223 const Ibyte *ei16data = (data); \
771
+ − 2224 eicat_1 (ei, ei16data, ei16len, \
+ − 2225 bytecount_to_charcount (ei16data, ei16len)); \
+ − 2226 } while (0)
+ − 2227
+ − 2228 #define eicat_rawz(ei, ptr) \
+ − 2229 do { \
867
+ − 2230 const Ibyte *ei16p5ptr = (ptr); \
771
+ − 2231 eicat_raw (ei, ei16p5ptr, qxestrlen (ei16p5ptr)); \
+ − 2232 } while (0)
+ − 2233
+ − 2234 #define eicat_lstr(ei, lisp_string) \
+ − 2235 do { \
+ − 2236 Lisp_Object ei17 = (lisp_string); \
+ − 2237 eicat_1 (ei, XSTRING_DATA (ei17), XSTRING_LENGTH (ei17), \
826
+ − 2238 string_char_length (ei17)); \
771
+ − 2239 } while (0)
+ − 2240
+ − 2241 #define eicat_ch(ei, ch) \
+ − 2242 do { \
1333
+ − 2243 Ibyte ei22ch[MAX_ICHAR_LEN]; \
867
+ − 2244 Bytecount ei22len = set_itext_ichar (ei22ch, ch); \
771
+ − 2245 eicat_1 (ei, ei22ch, ei22len, 1); \
+ − 2246 } while (0)
+ − 2247
+ − 2248
+ − 2249 /* ----- Replacement ----- */
+ − 2250
+ − 2251 /* Replace the section of an Eistring at (OFF, LEN) with the data at
+ − 2252 SRC of length LEN. All positions have corresponding character values,
+ − 2253 and either can be -1 -- it will be computed from the other. */
+ − 2254
+ − 2255 #define eisub_1(ei, off, charoff, len, charlen, src, srclen, srccharlen) \
+ − 2256 do { \
+ − 2257 int ei18off = (off); \
+ − 2258 int ei18charoff = (charoff); \
+ − 2259 int ei18len = (len); \
+ − 2260 int ei18charlen = (charlen); \
867
+ − 2261 Ibyte *ei18src = (Ibyte *) (src); \
771
+ − 2262 int ei18srclen = (srclen); \
+ − 2263 int ei18srccharlen = (srccharlen); \
+ − 2264 \
+ − 2265 int ei18oldeibytelen = (ei)->bytelen_; \
+ − 2266 \
+ − 2267 eifixup_bytechar ((ei)->data_, ei18off, ei18charoff); \
+ − 2268 eifixup_bytechar ((ei)->data_ + ei18off, ei18len, ei18charlen); \
+ − 2269 eifixup_bytechar (ei18src, ei18srclen, ei18srccharlen); \
+ − 2270 \
+ − 2271 EI_ALLOC (ei, (ei)->bytelen_ + ei18srclen - ei18len, \
+ − 2272 (ei)->charlen_ + ei18srccharlen - ei18charlen, 0); \
+ − 2273 if (ei18len != ei18srclen) \
+ − 2274 memmove ((ei)->data_ + ei18off + ei18srclen, \
+ − 2275 (ei)->data_ + ei18off + ei18len, \
+ − 2276 /* include zero terminator. */ \
+ − 2277 ei18oldeibytelen - (ei18off + ei18len) + 1); \
+ − 2278 if (ei18srclen > 0) \
+ − 2279 memcpy ((ei)->data_ + ei18off, ei18src, ei18srclen); \
+ − 2280 } while (0)
+ − 2281
+ − 2282 #define eisub_ei(ei, off, charoff, len, charlen, ei2) \
+ − 2283 do { \
1333
+ − 2284 const Eistring *ei19 = (ei2); \
771
+ − 2285 eisub_1 (ei, off, charoff, len, charlen, ei19->data_, ei19->bytelen_, \
+ − 2286 ei19->charlen_); \
+ − 2287 } while (0)
+ − 2288
2421
+ − 2289 #define eisub_ascii(ei, off, charoff, len, charlen, ascstr) \
771
+ − 2290 do { \
2421
+ − 2291 const Ascbyte *ei20 = (ascstr); \
771
+ − 2292 int ei20len = strlen (ei20); \
2367
+ − 2293 ASSERT_ASCTEXT_ASCII_LEN (ei20, ei20len); \
771
+ − 2294 eisub_1 (ei, off, charoff, len, charlen, ei20, ei20len, -1); \
+ − 2295 } while (0)
+ − 2296
+ − 2297 #define eisub_ch(ei, off, charoff, len, charlen, ch) \
+ − 2298 do { \
1333
+ − 2299 Ibyte ei21ch[MAX_ICHAR_LEN]; \
867
+ − 2300 Bytecount ei21len = set_itext_ichar (ei21ch, ch); \
771
+ − 2301 eisub_1 (ei, off, charoff, len, charlen, ei21ch, ei21len, 1); \
+ − 2302 } while (0)
+ − 2303
+ − 2304 #define eidel(ei, off, charoff, len, charlen) \
+ − 2305 eisub_1(ei, off, charoff, len, charlen, NULL, 0, 0)
+ − 2306
+ − 2307
+ − 2308 /* ----- Converting to an external format ----- */
+ − 2309
1333
+ − 2310 #define eito_external(ei, codesys) \
771
+ − 2311 do { \
+ − 2312 if ((ei)->mallocp_) \
+ − 2313 { \
+ − 2314 if ((ei)->extdata_) \
+ − 2315 { \
1726
+ − 2316 xfree ((ei)->extdata_, Extbyte *); \
771
+ − 2317 (ei)->extdata_ = 0; \
+ − 2318 } \
+ − 2319 TO_EXTERNAL_FORMAT (DATA, ((ei)->data_, (ei)->bytelen_), \
+ − 2320 MALLOC, ((ei)->extdata_, (ei)->extlen_), \
1333
+ − 2321 codesys); \
771
+ − 2322 } \
+ − 2323 else \
+ − 2324 TO_EXTERNAL_FORMAT (DATA, ((ei)->data_, (ei)->bytelen_), \
+ − 2325 ALLOCA, ((ei)->extdata_, (ei)->extlen_), \
1318
+ − 2326 codesys); \
771
+ − 2327 } while (0)
+ − 2328
+ − 2329 #define eiextdata(ei) ((ei)->extdata_)
+ − 2330 #define eiextlen(ei) ((ei)->extlen_)
+ − 2331
+ − 2332
+ − 2333 /* ----- Searching in the Eistring for a character ----- */
+ − 2334
+ − 2335 #define eichr(eistr, chr) \
+ − 2336 NOT YET IMPLEMENTED
+ − 2337 #define eichr_char(eistr, chr) \
+ − 2338 NOT YET IMPLEMENTED
+ − 2339 #define eichr_off(eistr, chr, off, charoff) \
+ − 2340 NOT YET IMPLEMENTED
+ − 2341 #define eichr_off_char(eistr, chr, off, charoff) \
+ − 2342 NOT YET IMPLEMENTED
+ − 2343 #define eirchr(eistr, chr) \
+ − 2344 NOT YET IMPLEMENTED
+ − 2345 #define eirchr_char(eistr, chr) \
+ − 2346 NOT YET IMPLEMENTED
+ − 2347 #define eirchr_off(eistr, chr, off, charoff) \
+ − 2348 NOT YET IMPLEMENTED
+ − 2349 #define eirchr_off_char(eistr, chr, off, charoff) \
+ − 2350 NOT YET IMPLEMENTED
+ − 2351
+ − 2352
+ − 2353 /* ----- Searching in the Eistring for a string ----- */
+ − 2354
+ − 2355 #define eistr_ei(eistr, eistr2) \
+ − 2356 NOT YET IMPLEMENTED
+ − 2357 #define eistr_ei_char(eistr, eistr2) \
+ − 2358 NOT YET IMPLEMENTED
+ − 2359 #define eistr_ei_off(eistr, eistr2, off, charoff) \
+ − 2360 NOT YET IMPLEMENTED
+ − 2361 #define eistr_ei_off_char(eistr, eistr2, off, charoff) \
+ − 2362 NOT YET IMPLEMENTED
+ − 2363 #define eirstr_ei(eistr, eistr2) \
+ − 2364 NOT YET IMPLEMENTED
+ − 2365 #define eirstr_ei_char(eistr, eistr2) \
+ − 2366 NOT YET IMPLEMENTED
+ − 2367 #define eirstr_ei_off(eistr, eistr2, off, charoff) \
+ − 2368 NOT YET IMPLEMENTED
+ − 2369 #define eirstr_ei_off_char(eistr, eistr2, off, charoff) \
+ − 2370 NOT YET IMPLEMENTED
+ − 2371
2421
+ − 2372 #define eistr_ascii(eistr, ascstr) \
771
+ − 2373 NOT YET IMPLEMENTED
2421
+ − 2374 #define eistr_ascii_char(eistr, ascstr) \
771
+ − 2375 NOT YET IMPLEMENTED
2421
+ − 2376 #define eistr_ascii_off(eistr, ascstr, off, charoff) \
771
+ − 2377 NOT YET IMPLEMENTED
2421
+ − 2378 #define eistr_ascii_off_char(eistr, ascstr, off, charoff) \
771
+ − 2379 NOT YET IMPLEMENTED
2421
+ − 2380 #define eirstr_ascii(eistr, ascstr) \
771
+ − 2381 NOT YET IMPLEMENTED
2421
+ − 2382 #define eirstr_ascii_char(eistr, ascstr) \
771
+ − 2383 NOT YET IMPLEMENTED
2421
+ − 2384 #define eirstr_ascii_off(eistr, ascstr, off, charoff) \
771
+ − 2385 NOT YET IMPLEMENTED
2421
+ − 2386 #define eirstr_ascii_off_char(eistr, ascstr, off, charoff) \
771
+ − 2387 NOT YET IMPLEMENTED
+ − 2388
+ − 2389
+ − 2390 /* ----- Comparison ----- */
+ − 2391
+ − 2392 int eicmp_1 (Eistring *ei, Bytecount off, Charcount charoff,
867
+ − 2393 Bytecount len, Charcount charlen, const Ibyte *data,
771
+ − 2394 const Eistring *ei2, int is_c, int fold_case);
+ − 2395
+ − 2396 #define eicmp_ei(eistr, eistr2) \
+ − 2397 eicmp_1 (eistr, 0, -1, -1, -1, 0, eistr2, 0, 0)
+ − 2398 #define eicmp_off_ei(eistr, off, charoff, len, charlen, eistr2) \
+ − 2399 eicmp_1 (eistr, off, charoff, len, charlen, 0, eistr2, 0, 0)
+ − 2400 #define eicasecmp_ei(eistr, eistr2) \
+ − 2401 eicmp_1 (eistr, 0, -1, -1, -1, 0, eistr2, 0, 1)
+ − 2402 #define eicasecmp_off_ei(eistr, off, charoff, len, charlen, eistr2) \
+ − 2403 eicmp_1 (eistr, off, charoff, len, charlen, 0, eistr2, 0, 1)
+ − 2404 #define eicasecmp_i18n_ei(eistr, eistr2) \
+ − 2405 eicmp_1 (eistr, 0, -1, -1, -1, 0, eistr2, 0, 2)
+ − 2406 #define eicasecmp_i18n_off_ei(eistr, off, charoff, len, charlen, eistr2) \
+ − 2407 eicmp_1 (eistr, off, charoff, len, charlen, 0, eistr2, 0, 2)
+ − 2408
2421
+ − 2409 #define eicmp_ascii(eistr, ascstr) \
+ − 2410 eicmp_1 (eistr, 0, -1, -1, -1, (const Ibyte *) ascstr, 0, 1, 0)
+ − 2411 #define eicmp_off_ascii(eistr, off, charoff, len, charlen, ascstr) \
+ − 2412 eicmp_1 (eistr, off, charoff, len, charlen, (const Ibyte *) ascstr, 0, 1, 0)
+ − 2413 #define eicasecmp_ascii(eistr, ascstr) \
+ − 2414 eicmp_1 (eistr, 0, -1, -1, -1, (const Ibyte *) ascstr, 0, 1, 1)
+ − 2415 #define eicasecmp_off_ascii(eistr, off, charoff, len, charlen, ascstr) \
+ − 2416 eicmp_1 (eistr, off, charoff, len, charlen, (const Ibyte *) ascstr, 0, 1, 1)
+ − 2417 #define eicasecmp_i18n_ascii(eistr, ascstr) \
+ − 2418 eicmp_1 (eistr, 0, -1, -1, -1, (const Ibyte *) ascstr, 0, 1, 2)
+ − 2419 #define eicasecmp_i18n_off_ascii(eistr, off, charoff, len, charlen, ascstr) \
+ − 2420 eicmp_1 (eistr, off, charoff, len, charlen, (const Ibyte *) ascstr, 0, 1, 2)
771
+ − 2421
+ − 2422
+ − 2423 /* ----- Case-changing the Eistring ----- */
+ − 2424
867
+ − 2425 int eistr_casefiddle_1 (Ibyte *olddata, Bytecount len, Ibyte *newdata,
771
+ − 2426 int downp);
+ − 2427
+ − 2428 #define EI_CASECHANGE(ei, downp) \
+ − 2429 do { \
867
+ − 2430 int ei11new_allocmax = (ei)->charlen_ * MAX_ICHAR_LEN + 1; \
1333
+ − 2431 Ibyte *ei11storage = \
2367
+ − 2432 (Ibyte *) alloca_ibytes (ei11new_allocmax); \
771
+ − 2433 int ei11newlen = eistr_casefiddle_1 ((ei)->data_, (ei)->bytelen_, \
+ − 2434 ei11storage, downp); \
+ − 2435 \
+ − 2436 if (ei11newlen) \
+ − 2437 { \
+ − 2438 (ei)->max_size_allocated_ = ei11new_allocmax; \
1333
+ − 2439 (ei)->data_ = ei11storage; \
771
+ − 2440 (ei)->bytelen_ = ei11newlen; \
+ − 2441 /* charlen is the same. */ \
+ − 2442 } \
+ − 2443 } while (0)
+ − 2444
+ − 2445 #define eilwr(ei) EI_CASECHANGE (ei, 1)
+ − 2446 #define eiupr(ei) EI_CASECHANGE (ei, 0)
+ − 2447
1743
+ − 2448 END_C_DECLS
1650
+ − 2449
771
+ − 2450
+ − 2451 /************************************************************************/
+ − 2452 /* */
+ − 2453 /* Converting between internal and external format */
+ − 2454 /* */
+ − 2455 /************************************************************************/
+ − 2456 /*
1318
+ − 2457 The macros below are used for converting data between different formats.
+ − 2458 Generally, the data is textual, and the formats are related to
+ − 2459 internationalization (e.g. converting between internal-format text and
+ − 2460 UTF-8) -- but the mechanism is general, and could be used for anything,
+ − 2461 e.g. decoding gzipped data.
+ − 2462
+ − 2463 In general, conversion involves a source of data, a sink, the existing
+ − 2464 format of the source data, and the desired format of the sink. The
+ − 2465 macros below, however, always require that either the source or sink is
+ − 2466 internal-format text. Therefore, in practice the conversions below
+ − 2467 involve source, sink, an external format (specified by a coding system),
+ − 2468 and the direction of conversion (internal->external or vice-versa).
+ − 2469
+ − 2470 Sources and sinks can be raw data (sized or unsized -- when unsized,
+ − 2471 input data is assumed to be null-terminated [double null-terminated for
+ − 2472 Unicode-format data], and on output the length is not stored anywhere),
+ − 2473 Lisp strings, Lisp buffers, lstreams, and opaque data objects. When the
+ − 2474 output is raw data, the result can be allocated either with alloca() or
+ − 2475 malloc(). (There is currently no provision for writing into a fixed
+ − 2476 buffer. If you want this, use alloca() output and then copy the data --
+ − 2477 but be careful with the size! Unless you are very sure of the encoding
+ − 2478 being used, upper bounds for the size are not in general computable.)
+ − 2479 The obvious restrictions on source and sink types apply (e.g. Lisp
+ − 2480 strings are a source and sink only for internal data).
+ − 2481
+ − 2482 All raw data outputted will contain an extra null byte (two bytes for
+ − 2483 Unicode -- currently, in fact, all output data, whether internal or
+ − 2484 external, is double-null-terminated, but you can't count on this; see
+ − 2485 below). This means that enough space is allocated to contain the extra
+ − 2486 nulls; however, these nulls are not reflected in the returned output
+ − 2487 size.
+ − 2488
+ − 2489 The most basic macros are TO_EXTERNAL_FORMAT and TO_INTERNAL_FORMAT.
+ − 2490 These can be used to convert between any kinds of sources or sinks.
+ − 2491 However, 99% of conversions involve raw data or Lisp strings as both
+ − 2492 source and sink, and usually data is output as alloca() rather than
+ − 2493 malloc(). For this reason, convenience macros are defined for many types
+ − 2494 of conversions involving raw data and/or Lisp strings, especially when
+ − 2495 the output is an alloca()ed string. (When the destination is a
+ − 2496 Lisp_String, there are other functions that should be used instead --
+ − 2497 build_ext_string() and make_ext_string(), for example.) The convenience
+ − 2498 macros are of two types -- the older kind that store the result into a
+ − 2499 specified variable, and the newer kind that return the result. The newer
+ − 2500 kind of macros don't exist when the output is sized data, because that
+ − 2501 would have two return values. NOTE: All convenience macros are
+ − 2502 ultimately defined in terms of TO_EXTERNAL_FORMAT and TO_INTERNAL_FORMAT.
+ − 2503 Thus, any comments below about the workings of these macros also apply to
+ − 2504 all convenience macros.
+ − 2505
+ − 2506 TO_EXTERNAL_FORMAT (source_type, source, sink_type, sink, codesys)
+ − 2507 TO_INTERNAL_FORMAT (source_type, source, sink_type, sink, codesys)
771
+ − 2508
+ − 2509 Typical use is
+ − 2510
2367
+ − 2511 TO_EXTERNAL_FORMAT (LISP_STRING, str, C_STRING_MALLOC, ptr, Qfile_name);
+ − 2512
+ − 2513 which means that the contents of the lisp string `str' are written
+ − 2514 to a malloc'ed memory area which will be pointed to by `ptr', after the
+ − 2515 function returns. The conversion will be done using the `file-name'
+ − 2516 coding system (which will be controlled by the user indirectly by
+ − 2517 setting or binding the variable `file-name-coding-system').
+ − 2518
+ − 2519 Some sources and sinks require two C variables to specify. We use
+ − 2520 some preprocessor magic to allow different source and sink types, and
+ − 2521 even different numbers of arguments to specify different types of
+ − 2522 sources and sinks.
+ − 2523
+ − 2524 So we can have a call that looks like
+ − 2525
+ − 2526 TO_INTERNAL_FORMAT (DATA, (ptr, len),
+ − 2527 MALLOC, (ptr, len),
+ − 2528 coding_system);
+ − 2529
+ − 2530 The parenthesized argument pairs are required to make the
+ − 2531 preprocessor magic work.
771
+ − 2532
+ − 2533 NOTE: GC is inhibited during the entire operation of these macros. This
+ − 2534 is because frequently the data to be converted comes from strings but
+ − 2535 gets passed in as just DATA, and GC may move around the string data. If
+ − 2536 we didn't inhibit GC, there'd have to be a lot of messy recoding,
+ − 2537 alloca-copying of strings and other annoying stuff.
+ − 2538
+ − 2539 The source or sink can be specified in one of these ways:
+ − 2540
+ − 2541 DATA, (ptr, len), // input data is a fixed buffer of size len
851
+ − 2542 ALLOCA, (ptr, len), // output data is in a ALLOCA()ed buffer of size len
771
+ − 2543 MALLOC, (ptr, len), // output data is in a malloc()ed buffer of size len
+ − 2544 C_STRING_ALLOCA, ptr, // equivalent to ALLOCA (ptr, len_ignored) on output
+ − 2545 C_STRING_MALLOC, ptr, // equivalent to MALLOC (ptr, len_ignored) on output
+ − 2546 C_STRING, ptr, // equivalent to DATA, (ptr, strlen/wcslen (ptr))
+ − 2547 // on input (the Unicode version is used when correct)
+ − 2548 LISP_STRING, string, // input or output is a Lisp_Object of type string
+ − 2549 LISP_BUFFER, buffer, // output is written to (point) in lisp buffer
+ − 2550 LISP_LSTREAM, lstream, // input or output is a Lisp_Object of type lstream
+ − 2551 LISP_OPAQUE, object, // input or output is a Lisp_Object of type opaque
+ − 2552
+ − 2553 When specifying the sink, use lvalues, since the macro will assign to them,
+ − 2554 except when the sink is an lstream or a lisp buffer.
+ − 2555
2367
+ − 2556 For the sink types `ALLOCA' and `C_STRING_ALLOCA', the resulting text is
+ − 2557 stored in a stack-allocated buffer, which is automatically freed on
+ − 2558 returning from the function. However, the sink types `MALLOC' and
+ − 2559 `C_STRING_MALLOC' return `xmalloc()'ed memory. The caller is responsible
+ − 2560 for freeing this memory using `xfree()'.
+ − 2561
771
+ − 2562 The macros accept the kinds of sources and sinks appropriate for
+ − 2563 internal and external data representation. See the type_checking_assert
+ − 2564 macros below for the actual allowed types.
+ − 2565
+ − 2566 Since some sources and sinks use one argument (a Lisp_Object) to
+ − 2567 specify them, while others take a (pointer, length) pair, we use
+ − 2568 some C preprocessor trickery to allow pair arguments to be specified
+ − 2569 by parenthesizing them, as in the examples above.
+ − 2570
+ − 2571 Anything prefixed by dfc_ (`data format conversion') is private.
+ − 2572 They are only used to implement these macros.
+ − 2573
+ − 2574 [[Using C_STRING* is appropriate for using with external APIs that
+ − 2575 take null-terminated strings. For internal data, we should try to
+ − 2576 be '\0'-clean - i.e. allow arbitrary data to contain embedded '\0'.
+ − 2577
+ − 2578 Sometime in the future we might allow output to C_STRING_ALLOCA or
+ − 2579 C_STRING_MALLOC _only_ with TO_EXTERNAL_FORMAT(), not
+ − 2580 TO_INTERNAL_FORMAT().]]
+ − 2581
+ − 2582 The above comments are not true. Frequently (most of the time, in
+ − 2583 fact), external strings come as zero-terminated entities, where the
+ − 2584 zero-termination is the only way to find out the length. Even in
+ − 2585 cases where you can get the length, most of the time the system will
+ − 2586 still use the null to signal the end of the string, and there will
+ − 2587 still be no way to either send in or receive a string with embedded
+ − 2588 nulls. In such situations, it's pointless to track the length
+ − 2589 because null bytes can never be in the string. We have a lot of
+ − 2590 operations that make it easy to operate on zero-terminated strings,
+ − 2591 and forcing the user the deal with the length everywhere would only
+ − 2592 make the code uglier and more complicated, for no gain. --ben
+ − 2593
+ − 2594 There is no problem using the same lvalue for source and sink.
+ − 2595
+ − 2596 Also, when pointers are required, the code (currently at least) is
+ − 2597 lax and allows any pointer types, either in the source or the sink.
+ − 2598 This makes it possible, e.g., to deal with internal format data held
+ − 2599 in char *'s or external format data held in WCHAR * (i.e. Unicode).
+ − 2600
+ − 2601 Finally, whenever storage allocation is called for, extra space is
+ − 2602 allocated for a terminating zero, and such a zero is stored in the
+ − 2603 appropriate place, regardless of whether the source data was
+ − 2604 specified using a length or was specified as zero-terminated. This
+ − 2605 allows you to freely pass the resulting data, no matter how
+ − 2606 obtained, to a routine that expects zero termination (modulo, of
+ − 2607 course, that any embedded zeros in the resulting text will cause
+ − 2608 truncation). In fact, currently two embedded zeros are allocated
+ − 2609 and stored after the data result. This is to allow for the
+ − 2610 possibility of storing a Unicode value on output, which needs the
+ − 2611 two zeros. Currently, however, the two zeros are stored regardless
+ − 2612 of whether the conversion is internal or external and regardless of
+ − 2613 whether the external coding system is in fact Unicode. This
+ − 2614 behavior may change in the future, and you cannot rely on this --
+ − 2615 the most you can rely on is that sink data in Unicode format will
+ − 2616 have two terminating nulls, which combine to form one Unicode null
2367
+ − 2617 character.
+ − 2618
+ − 2619 NOTE: You might ask, why are these not written as functions that
+ − 2620 *RETURN* the converted string, since that would allow them to be used
+ − 2621 much more conveniently, without having to constantly declare temporary
+ − 2622 variables? The answer is that in fact I originally did write the
+ − 2623 routines that way, but that required either
+ − 2624
+ − 2625 (a) calling alloca() inside of a function call, or
+ − 2626 (b) using expressions separated by commas and a global temporary variable, or
+ − 2627 (c) using the GCC extension ({ ... }).
+ − 2628
+ − 2629 Turned out that all of the above had bugs, all caused by GCC (hence the
+ − 2630 comments about "those GCC wankers" and "ream gcc up the ass"). As for
+ − 2631 (a), some versions of GCC (especially on Intel platforms), which had
+ − 2632 buggy implementations of alloca() that couldn't handle being called
+ − 2633 inside of a function call -- they just decremented the stack right in the
+ − 2634 middle of pushing args. Oops, crash with stack trashing, very bad. (b)
+ − 2635 was an attempt to fix (a), and that led to further GCC crashes, esp. when
+ − 2636 you had two such calls in a single subexpression, because GCC couldn't be
+ − 2637 counted upon to follow even a minimally reasonable order of execution.
+ − 2638 True, you can't count on one argument being evaluated before another, but
+ − 2639 GCC would actually interleave them so that the temp var got stomped on by
+ − 2640 one while the other was accessing it. So I tried (c), which was
+ − 2641 problematic because that GCC extension has more bugs in it than a
+ − 2642 termite's nest.
+ − 2643
+ − 2644 So reluctantly I converted to the current way. Now, that was awhile ago
+ − 2645 (c. 1994), and it appears that the bug involving alloca in function calls
+ − 2646 has long since been fixed. More recently, I defined the new-dfc routines
+ − 2647 down below, which DO allow exactly such convenience of returning your
+ − 2648 args rather than store them in temp variables, and I also wrote a
+ − 2649 configure check to see whether alloca() causes crashes inside of function
+ − 2650 calls, and if so use the portable alloca() implementation in alloca.c.
+ − 2651 If you define TEST_NEW_DFC, the old routines get written in terms of the
+ − 2652 new ones, and I've had a beta put out with this on and it appeared to
+ − 2653 this appears to cause no problems -- so we should consider
+ − 2654 switching, and feel no compunctions about writing further such function-
+ − 2655 like alloca() routines in lieu of statement-like ones. --ben */
771
+ − 2656
+ − 2657 #define TO_EXTERNAL_FORMAT(source_type, source, sink_type, sink, codesys) \
+ − 2658 do { \
+ − 2659 dfc_conversion_type dfc_simplified_source_type; \
+ − 2660 dfc_conversion_type dfc_simplified_sink_type; \
+ − 2661 dfc_conversion_data dfc_source; \
+ − 2662 dfc_conversion_data dfc_sink; \
+ − 2663 Lisp_Object dfc_codesys = (codesys); \
+ − 2664 \
+ − 2665 type_checking_assert \
+ − 2666 ((DFC_TYPE_##source_type == DFC_TYPE_DATA || \
+ − 2667 DFC_TYPE_##source_type == DFC_TYPE_C_STRING || \
+ − 2668 DFC_TYPE_##source_type == DFC_TYPE_LISP_STRING || \
+ − 2669 DFC_TYPE_##source_type == DFC_TYPE_LISP_OPAQUE || \
+ − 2670 DFC_TYPE_##source_type == DFC_TYPE_LISP_LSTREAM) \
+ − 2671 && \
+ − 2672 (DFC_TYPE_##sink_type == DFC_TYPE_ALLOCA || \
+ − 2673 DFC_TYPE_##sink_type == DFC_TYPE_MALLOC || \
+ − 2674 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_ALLOCA || \
+ − 2675 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_MALLOC || \
+ − 2676 DFC_TYPE_##sink_type == DFC_TYPE_LISP_LSTREAM || \
+ − 2677 DFC_TYPE_##sink_type == DFC_TYPE_LISP_OPAQUE)); \
+ − 2678 \
+ − 2679 DFC_EXT_SOURCE_##source_type##_TO_ARGS (source, dfc_codesys); \
+ − 2680 DFC_SINK_##sink_type##_TO_ARGS (sink); \
+ − 2681 \
+ − 2682 dfc_convert_to_external_format (dfc_simplified_source_type, &dfc_source, \
+ − 2683 dfc_codesys, \
+ − 2684 dfc_simplified_sink_type, &dfc_sink); \
+ − 2685 \
+ − 2686 DFC_##sink_type##_USE_CONVERTED_DATA (sink); \
+ − 2687 } while (0)
+ − 2688
+ − 2689 #define TO_INTERNAL_FORMAT(source_type, source, sink_type, sink, codesys) \
+ − 2690 do { \
+ − 2691 dfc_conversion_type dfc_simplified_source_type; \
+ − 2692 dfc_conversion_type dfc_simplified_sink_type; \
+ − 2693 dfc_conversion_data dfc_source; \
+ − 2694 dfc_conversion_data dfc_sink; \
+ − 2695 Lisp_Object dfc_codesys = (codesys); \
+ − 2696 \
+ − 2697 type_checking_assert \
+ − 2698 ((DFC_TYPE_##source_type == DFC_TYPE_DATA || \
+ − 2699 DFC_TYPE_##source_type == DFC_TYPE_C_STRING || \
+ − 2700 DFC_TYPE_##source_type == DFC_TYPE_LISP_OPAQUE || \
+ − 2701 DFC_TYPE_##source_type == DFC_TYPE_LISP_LSTREAM) \
+ − 2702 && \
+ − 2703 (DFC_TYPE_##sink_type == DFC_TYPE_ALLOCA || \
+ − 2704 DFC_TYPE_##sink_type == DFC_TYPE_MALLOC || \
+ − 2705 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_ALLOCA || \
+ − 2706 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_MALLOC || \
+ − 2707 DFC_TYPE_##sink_type == DFC_TYPE_LISP_STRING || \
+ − 2708 DFC_TYPE_##sink_type == DFC_TYPE_LISP_LSTREAM || \
+ − 2709 DFC_TYPE_##sink_type == DFC_TYPE_LISP_BUFFER)); \
+ − 2710 \
+ − 2711 DFC_INT_SOURCE_##source_type##_TO_ARGS (source, dfc_codesys); \
+ − 2712 DFC_SINK_##sink_type##_TO_ARGS (sink); \
+ − 2713 \
+ − 2714 dfc_convert_to_internal_format (dfc_simplified_source_type, &dfc_source, \
+ − 2715 dfc_codesys, \
+ − 2716 dfc_simplified_sink_type, &dfc_sink); \
+ − 2717 \
+ − 2718 DFC_##sink_type##_USE_CONVERTED_DATA (sink); \
+ − 2719 } while (0)
+ − 2720
814
+ − 2721 #ifdef __cplusplus
771
+ − 2722
814
+ − 2723 /* Error if you try to use a union here: "member `struct {anonymous
+ − 2724 union}::{anonymous} {anonymous union}::data' with constructor not allowed
+ − 2725 in union" (Bytecount is a class) */
+ − 2726
+ − 2727 typedef struct
+ − 2728 #else
771
+ − 2729 typedef union
814
+ − 2730 #endif
771
+ − 2731 {
+ − 2732 struct { const void *ptr; Bytecount len; } data;
+ − 2733 Lisp_Object lisp_object;
+ − 2734 } dfc_conversion_data;
+ − 2735
+ − 2736 enum dfc_conversion_type
+ − 2737 {
+ − 2738 DFC_TYPE_DATA,
+ − 2739 DFC_TYPE_ALLOCA,
+ − 2740 DFC_TYPE_MALLOC,
+ − 2741 DFC_TYPE_C_STRING,
+ − 2742 DFC_TYPE_C_STRING_ALLOCA,
+ − 2743 DFC_TYPE_C_STRING_MALLOC,
+ − 2744 DFC_TYPE_LISP_STRING,
+ − 2745 DFC_TYPE_LISP_LSTREAM,
+ − 2746 DFC_TYPE_LISP_OPAQUE,
+ − 2747 DFC_TYPE_LISP_BUFFER
+ − 2748 };
+ − 2749 typedef enum dfc_conversion_type dfc_conversion_type;
+ − 2750
1743
+ − 2751 BEGIN_C_DECLS
1650
+ − 2752
771
+ − 2753 /* WARNING: These use a static buffer. This can lead to disaster if
+ − 2754 these functions are not used *very* carefully. Another reason to only use
+ − 2755 TO_EXTERNAL_FORMAT() and TO_INTERNAL_FORMAT(). */
1632
+ − 2756 MODULE_API void
771
+ − 2757 dfc_convert_to_external_format (dfc_conversion_type source_type,
+ − 2758 dfc_conversion_data *source,
1318
+ − 2759 Lisp_Object codesys,
771
+ − 2760 dfc_conversion_type sink_type,
+ − 2761 dfc_conversion_data *sink);
1632
+ − 2762 MODULE_API void
771
+ − 2763 dfc_convert_to_internal_format (dfc_conversion_type source_type,
+ − 2764 dfc_conversion_data *source,
1318
+ − 2765 Lisp_Object codesys,
771
+ − 2766 dfc_conversion_type sink_type,
+ − 2767 dfc_conversion_data *sink);
+ − 2768 /* CPP Trickery */
+ − 2769 #define DFC_CPP_CAR(x,y) (x)
+ − 2770 #define DFC_CPP_CDR(x,y) (y)
+ − 2771
+ − 2772 /* Convert `source' to args for dfc_convert_to_external_format() */
+ − 2773 #define DFC_EXT_SOURCE_DATA_TO_ARGS(val, codesys) do { \
+ − 2774 dfc_source.data.ptr = DFC_CPP_CAR val; \
+ − 2775 dfc_source.data.len = DFC_CPP_CDR val; \
+ − 2776 dfc_simplified_source_type = DFC_TYPE_DATA; \
+ − 2777 } while (0)
+ − 2778 #define DFC_EXT_SOURCE_C_STRING_TO_ARGS(val, codesys) do { \
+ − 2779 dfc_source.data.len = \
+ − 2780 strlen ((char *) (dfc_source.data.ptr = (val))); \
+ − 2781 dfc_simplified_source_type = DFC_TYPE_DATA; \
+ − 2782 } while (0)
+ − 2783 #define DFC_EXT_SOURCE_LISP_STRING_TO_ARGS(val, codesys) do { \
+ − 2784 Lisp_Object dfc_slsta = (val); \
+ − 2785 type_checking_assert (STRINGP (dfc_slsta)); \
+ − 2786 dfc_source.lisp_object = dfc_slsta; \
+ − 2787 dfc_simplified_source_type = DFC_TYPE_LISP_STRING; \
+ − 2788 } while (0)
+ − 2789 #define DFC_EXT_SOURCE_LISP_LSTREAM_TO_ARGS(val, codesys) do { \
+ − 2790 Lisp_Object dfc_sllta = (val); \
+ − 2791 type_checking_assert (LSTREAMP (dfc_sllta)); \
+ − 2792 dfc_source.lisp_object = dfc_sllta; \
+ − 2793 dfc_simplified_source_type = DFC_TYPE_LISP_LSTREAM; \
+ − 2794 } while (0)
+ − 2795 #define DFC_EXT_SOURCE_LISP_OPAQUE_TO_ARGS(val, codesys) do { \
+ − 2796 Lisp_Opaque *dfc_slota = XOPAQUE (val); \
+ − 2797 dfc_source.data.ptr = OPAQUE_DATA (dfc_slota); \
+ − 2798 dfc_source.data.len = OPAQUE_SIZE (dfc_slota); \
+ − 2799 dfc_simplified_source_type = DFC_TYPE_DATA; \
+ − 2800 } while (0)
+ − 2801
+ − 2802 /* Convert `source' to args for dfc_convert_to_internal_format() */
+ − 2803 #define DFC_INT_SOURCE_DATA_TO_ARGS(val, codesys) \
+ − 2804 DFC_EXT_SOURCE_DATA_TO_ARGS (val, codesys)
+ − 2805 #define DFC_INT_SOURCE_C_STRING_TO_ARGS(val, codesys) do { \
+ − 2806 dfc_source.data.len = dfc_external_data_len (dfc_source.data.ptr = (val), \
+ − 2807 codesys); \
+ − 2808 dfc_simplified_source_type = DFC_TYPE_DATA; \
+ − 2809 } while (0)
+ − 2810 #define DFC_INT_SOURCE_LISP_STRING_TO_ARGS(val, codesys) \
+ − 2811 DFC_EXT_SOURCE_LISP_STRING_TO_ARGS (val, codesys)
+ − 2812 #define DFC_INT_SOURCE_LISP_LSTREAM_TO_ARGS(val, codesys) \
+ − 2813 DFC_EXT_SOURCE_LISP_LSTREAM_TO_ARGS (val, codesys)
+ − 2814 #define DFC_INT_SOURCE_LISP_OPAQUE_TO_ARGS(val, codesys) \
+ − 2815 DFC_EXT_SOURCE_LISP_OPAQUE_TO_ARGS (val, codesys)
+ − 2816
+ − 2817 /* Convert `sink' to args for dfc_convert_to_*_format() */
+ − 2818 #define DFC_SINK_ALLOCA_TO_ARGS(val) \
+ − 2819 dfc_simplified_sink_type = DFC_TYPE_DATA
+ − 2820 #define DFC_SINK_C_STRING_ALLOCA_TO_ARGS(val) \
+ − 2821 dfc_simplified_sink_type = DFC_TYPE_DATA
+ − 2822 #define DFC_SINK_MALLOC_TO_ARGS(val) \
+ − 2823 dfc_simplified_sink_type = DFC_TYPE_DATA
+ − 2824 #define DFC_SINK_C_STRING_MALLOC_TO_ARGS(val) \
+ − 2825 dfc_simplified_sink_type = DFC_TYPE_DATA
+ − 2826 #define DFC_SINK_LISP_STRING_TO_ARGS(val) \
+ − 2827 dfc_simplified_sink_type = DFC_TYPE_DATA
+ − 2828 #define DFC_SINK_LISP_OPAQUE_TO_ARGS(val) \
+ − 2829 dfc_simplified_sink_type = DFC_TYPE_DATA
+ − 2830 #define DFC_SINK_LISP_LSTREAM_TO_ARGS(val) do { \
+ − 2831 Lisp_Object dfc_sllta = (val); \
+ − 2832 type_checking_assert (LSTREAMP (dfc_sllta)); \
+ − 2833 dfc_sink.lisp_object = dfc_sllta; \
+ − 2834 dfc_simplified_sink_type = DFC_TYPE_LISP_LSTREAM; \
+ − 2835 } while (0)
+ − 2836 #define DFC_SINK_LISP_BUFFER_TO_ARGS(val) do { \
+ − 2837 struct buffer *dfc_slbta = XBUFFER (val); \
+ − 2838 dfc_sink.lisp_object = \
+ − 2839 make_lisp_buffer_output_stream \
+ − 2840 (dfc_slbta, BUF_PT (dfc_slbta), 0); \
+ − 2841 dfc_simplified_sink_type = DFC_TYPE_LISP_LSTREAM; \
+ − 2842 } while (0)
+ − 2843
+ − 2844 /* Assign to the `sink' lvalue(s) using the converted data. */
+ − 2845 /* + 2 because we double zero-extended to account for Unicode conversion */
+ − 2846 typedef union { char c; void *p; } *dfc_aliasing_voidpp;
+ − 2847 #define DFC_ALLOCA_USE_CONVERTED_DATA(sink) do { \
851
+ − 2848 void * dfc_sink_ret = ALLOCA (dfc_sink.data.len + 2); \
771
+ − 2849 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2); \
2367
+ − 2850 VOIDP_CAST (DFC_CPP_CAR sink) = dfc_sink_ret; \
771
+ − 2851 (DFC_CPP_CDR sink) = dfc_sink.data.len; \
+ − 2852 } while (0)
+ − 2853 #define DFC_MALLOC_USE_CONVERTED_DATA(sink) do { \
+ − 2854 void * dfc_sink_ret = xmalloc (dfc_sink.data.len + 2); \
+ − 2855 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2); \
2367
+ − 2856 VOIDP_CAST (DFC_CPP_CAR sink) = dfc_sink_ret; \
771
+ − 2857 (DFC_CPP_CDR sink) = dfc_sink.data.len; \
+ − 2858 } while (0)
+ − 2859 #define DFC_C_STRING_ALLOCA_USE_CONVERTED_DATA(sink) do { \
851
+ − 2860 void * dfc_sink_ret = ALLOCA (dfc_sink.data.len + 2); \
771
+ − 2861 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2); \
2367
+ − 2862 VOIDP_CAST (sink) = dfc_sink_ret; \
771
+ − 2863 } while (0)
+ − 2864 #define DFC_C_STRING_MALLOC_USE_CONVERTED_DATA(sink) do { \
+ − 2865 void * dfc_sink_ret = xmalloc (dfc_sink.data.len + 2); \
+ − 2866 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2); \
2367
+ − 2867 VOIDP_CAST (sink) = dfc_sink_ret; \
771
+ − 2868 } while (0)
+ − 2869 #define DFC_LISP_STRING_USE_CONVERTED_DATA(sink) \
867
+ − 2870 sink = make_string ((Ibyte *) dfc_sink.data.ptr, dfc_sink.data.len)
771
+ − 2871 #define DFC_LISP_OPAQUE_USE_CONVERTED_DATA(sink) \
+ − 2872 sink = make_opaque (dfc_sink.data.ptr, dfc_sink.data.len)
+ − 2873 #define DFC_LISP_LSTREAM_USE_CONVERTED_DATA(sink) /* data already used */
+ − 2874 #define DFC_LISP_BUFFER_USE_CONVERTED_DATA(sink) \
+ − 2875 Lstream_delete (XLSTREAM (dfc_sink.lisp_object))
+ − 2876
1318
+ − 2877 /* #define TEST_NEW_DFC */
+ − 2878
771
+ − 2879 /* Convenience macros for extremely common invocations */
1318
+ − 2880 #ifdef TEST_NEW_DFC
+ − 2881 #define C_STRING_TO_EXTERNAL(in, out, codesys) \
+ − 2882 do { * (Extbyte **) &(out) = \
+ − 2883 NEW_C_STRING_TO_EXTERNAL (in, codesys); } while (0)
+ − 2884 #define SIZED_C_STRING_TO_EXTERNAL(in, inlen, out, codesys) \
+ − 2885 do { * (Extbyte **) &(out) = \
+ − 2886 NEW_SIZED_C_STRING_TO_EXTERNAL (in, inlen, codesys); } while (0)
+ − 2887 #define EXTERNAL_TO_C_STRING(in, out, codesys) \
+ − 2888 do { * (Ibyte **) &(out) = \
+ − 2889 NEW_EXTERNAL_TO_C_STRING (in, codesys); } while (0)
+ − 2890 #define SIZED_EXTERNAL_TO_C_STRING(in, inlen, out, codesys) \
+ − 2891 do { * (Ibyte **) &(out) = \
+ − 2892 NEW_SIZED_EXTERNAL_TO_C_STRING (in, inlen, codesys); } while (0)
+ − 2893 #define LISP_STRING_TO_EXTERNAL(in, out, codesys) \
+ − 2894 do { * (Extbyte **) &(out) = \
+ − 2895 NEW_LISP_STRING_TO_EXTERNAL (in, codesys); } while (0)
+ − 2896 #else
+ − 2897 #define C_STRING_TO_EXTERNAL(in, out, codesys) \
+ − 2898 TO_EXTERNAL_FORMAT (C_STRING, in, C_STRING_ALLOCA, out, codesys)
+ − 2899 #define SIZED_C_STRING_TO_EXTERNAL(in, inlen, out, codesys) \
+ − 2900 TO_EXTERNAL_FORMAT (DATA, (in, inlen), C_STRING_ALLOCA, out, codesys)
+ − 2901 #define EXTERNAL_TO_C_STRING(in, out, codesys) \
+ − 2902 TO_INTERNAL_FORMAT (C_STRING, in, C_STRING_ALLOCA, out, codesys)
+ − 2903 #define SIZED_EXTERNAL_TO_C_STRING(in, inlen, out, codesys) \
+ − 2904 TO_INTERNAL_FORMAT (DATA, (in, inlen), C_STRING_ALLOCA, out, codesys)
+ − 2905 #define LISP_STRING_TO_EXTERNAL(in, out, codesys) \
+ − 2906 TO_EXTERNAL_FORMAT (LISP_STRING, in, C_STRING_ALLOCA, out, codesys)
+ − 2907 #endif /* TEST_NEW_DFC */
+ − 2908
+ − 2909 #define C_STRING_TO_SIZED_EXTERNAL(in, out, outlen, codesys) \
+ − 2910 TO_EXTERNAL_FORMAT (C_STRING, in, ALLOCA, (out, outlen), codesys)
+ − 2911 #define SIZED_C_STRING_TO_SIZED_EXTERNAL(in, inlen, out, outlen, codesys) \
+ − 2912 TO_EXTERNAL_FORMAT (DATA, (in, inlen), ALLOCA, (out, outlen), codesys)
+ − 2913 #define EXTERNAL_TO_SIZED_C_STRING(in, out, outlen, codesys) \
+ − 2914 TO_INTERNAL_FORMAT (C_STRING, in, ALLOCA, (out, outlen), codesys)
+ − 2915 #define SIZED_EXTERNAL_TO_SIZED_C_STRING(in, inlen, out, outlen, codesys) \
+ − 2916 TO_INTERNAL_FORMAT (DATA, (in, inlen), ALLOCA, (out, outlen), codesys)
+ − 2917 #define LISP_STRING_TO_SIZED_EXTERNAL(in, out, outlen, codesys) \
+ − 2918 TO_EXTERNAL_FORMAT (LISP_STRING, in, ALLOCA, (out, outlen), codesys)
+ − 2919
+ − 2920 /* In place of EXTERNAL_TO_LISP_STRING(), use build_ext_string() and/or
+ − 2921 make_ext_string(). */
+ − 2922
+ − 2923 #ifdef TEST_NEW_DFC
+ − 2924 #define C_STRING_TO_EXTERNAL_MALLOC(in, out, codesys) \
+ − 2925 do { * (Extbyte **) &(out) = \
+ − 2926 NEW_C_STRING_TO_EXTERNAL_MALLOC (in, codesys); } while (0)
2367
+ − 2927 #define SIZED_C_STRING_TO_EXTERNAL_MALLOC(in, inlen, out, codesys) \
+ − 2928 do { * (Extbyte **) &(out) = \
+ − 2929 NEW_SIZED_C_STRING_TO_EXTERNAL_MALLOC (in, inlen, codesys); } \
+ − 2930 while (0)
1318
+ − 2931 #define EXTERNAL_TO_C_STRING_MALLOC(in, out, codesys) \
+ − 2932 do { * (Ibyte **) &(out) = \
+ − 2933 NEW_EXTERNAL_TO_C_STRING_MALLOC (in, codesys); } while (0)
2367
+ − 2934 #define SIZED_EXTERNAL_TO_C_STRING_MALLOC(in, inlen, out, codesys) \
+ − 2935 do { * (Ibyte **) &(out) = \
+ − 2936 NEW_SIZED_EXTERNAL_TO_C_STRING_MALLOC (in, inlen, codesys); } \
+ − 2937 while (0)
1318
+ − 2938 #define LISP_STRING_TO_EXTERNAL_MALLOC(in, out, codesys) \
+ − 2939 do { * (Extbyte **) &(out) = \
+ − 2940 NEW_LISP_STRING_TO_EXTERNAL_MALLOC (in, codesys); } while (0)
+ − 2941 #else
+ − 2942 #define C_STRING_TO_EXTERNAL_MALLOC(in, out, codesys) \
+ − 2943 TO_EXTERNAL_FORMAT (C_STRING, in, C_STRING_MALLOC, out, codesys)
2367
+ − 2944 #define SIZED_C_STRING_TO_EXTERNAL_MALLOC(in, inlen, out, codesys) \
+ − 2945 TO_EXTERNAL_FORMAT (DATA, (in, inlen), C_STRING_MALLOC, out, codesys)
1318
+ − 2946 #define EXTERNAL_TO_C_STRING_MALLOC(in, out, codesys) \
+ − 2947 TO_INTERNAL_FORMAT (C_STRING, in, C_STRING_MALLOC, out, codesys)
2367
+ − 2948 #define SIZED_EXTERNAL_TO_C_STRING_MALLOC(in, inlen, out, codesys) \
+ − 2949 TO_INTERNAL_FORMAT (DATA, (in, inlen), C_STRING_MALLOC, out, codesys)
1318
+ − 2950 #define LISP_STRING_TO_EXTERNAL_MALLOC(in, out, codesys) \
+ − 2951 TO_EXTERNAL_FORMAT (LISP_STRING, in, C_STRING_MALLOC, out, codesys)
+ − 2952 #endif /* TEST_NEW_DFC */
+ − 2953
2367
+ − 2954 #define C_STRING_TO_SIZED_EXTERNAL_MALLOC(in, out, outlen, codesys) \
+ − 2955 TO_EXTERNAL_FORMAT (C_STRING, in, MALLOC, (out, outlen), codesys)
+ − 2956 #define SIZED_C_STRING_TO_SIZED_EXTERNAL_MALLOC(in, inlen, out, outlen, \
+ − 2957 codesys) \
+ − 2958 TO_EXTERNAL_FORMAT (DATA, (in, inlen), MALLOC, (out, outlen), codesys)
+ − 2959 #define EXTERNAL_TO_SIZED_C_STRING_MALLOC(in, out, outlen, codesys) \
+ − 2960 TO_INTERNAL_FORMAT (C_STRING, in, MALLOC, (out, outlen), codesys)
+ − 2961 #define SIZED_EXTERNAL_TO_SIZED_C_STRING_MALLOC(in, inlen, out, outlen, \
+ − 2962 codesys) \
+ − 2963 TO_INTERNAL_FORMAT (DATA, (in, inlen), MALLOC, (out, outlen), codesys)
+ − 2964 #define LISP_STRING_TO_SIZED_EXTERNAL_MALLOC(in, out, outlen, codesys) \
+ − 2965 TO_EXTERNAL_FORMAT (LISP_STRING, in, MALLOC, (out, outlen), codesys)
+ − 2966
1318
+ − 2967 enum new_dfc_src_type
+ − 2968 {
+ − 2969 DFC_EXTERNAL,
+ − 2970 DFC_SIZED_EXTERNAL,
+ − 2971 DFC_INTERNAL,
+ − 2972 DFC_SIZED_INTERNAL,
+ − 2973 DFC_LISP_STRING
+ − 2974 };
+ − 2975
1632
+ − 2976 MODULE_API void *new_dfc_convert_malloc (const void *src, Bytecount src_size,
+ − 2977 enum new_dfc_src_type type,
+ − 2978 Lisp_Object codesys);
2367
+ − 2979 MODULE_API Bytecount new_dfc_convert_size (const char *srctext,
+ − 2980 const void *src,
1632
+ − 2981 Bytecount src_size,
+ − 2982 enum new_dfc_src_type type,
+ − 2983 Lisp_Object codesys);
2367
+ − 2984 MODULE_API void *new_dfc_convert_copy_data (const char *srctext,
+ − 2985 void *alloca_data);
1318
+ − 2986
1743
+ − 2987 END_C_DECLS
1650
+ − 2988
1318
+ − 2989 /* Version of EXTERNAL_TO_C_STRING that *RETURNS* the translated string,
+ − 2990 still in alloca() space. Requires some trickiness to do this, but gets
+ − 2991 it done! */
+ − 2992
+ − 2993 /* NOTE: If you make two invocations of the dfc functions below in the same
+ − 2994 subexpression and use the exact same expression for the source in both
+ − 2995 cases, you will lose. In this unlikely case, you will get an abort, and
+ − 2996 need to rewrite the code.
+ − 2997 */
+ − 2998
+ − 2999 /* We need to use ALLOCA_FUNCALL_OK here. Some compilers have been known
+ − 3000 to choke when alloca() occurs as a funcall argument, and so we check
+ − 3001 this in configure. Rewriting the expressions below to use a temporary
+ − 3002 variable, so that the call to alloca() is outside of
2382
+ − 3003 new_dfc_convert_copy_data(), won't help because the entire NEW_DFC call
1318
+ − 3004 could be inside of a function call. */
+ − 3005
+ − 3006 #define NEW_DFC_CONVERT_1_ALLOCA(src, src_size, type, codesys) \
2367
+ − 3007 new_dfc_convert_copy_data \
1318
+ − 3008 (#src, ALLOCA_FUNCALL_OK (new_dfc_convert_size (#src, src, src_size, \
+ − 3009 type, codesys)))
+ − 3010
+ − 3011 #define NEW_EXTERNAL_TO_C_STRING(src, codesys) \
+ − 3012 (Ibyte *) NEW_DFC_CONVERT_1_ALLOCA (src, -1, DFC_EXTERNAL, codesys)
+ − 3013 #define NEW_EXTERNAL_TO_C_STRING_MALLOC(src, codesys) \
+ − 3014 (Ibyte *) new_dfc_convert_malloc (src, -1, DFC_EXTERNAL, codesys)
+ − 3015 #define NEW_SIZED_EXTERNAL_TO_C_STRING(src, len, codesys) \
+ − 3016 (Ibyte *) NEW_DFC_CONVERT_1_ALLOCA (src, len, DFC_SIZED_EXTERNAL, codesys)
+ − 3017 #define NEW_SIZED_EXTERNAL_TO_C_STRING_MALLOC(src, len, codesys) \
+ − 3018 (Ibyte *) new_dfc_convert_malloc (src, len, DFC_SIZED_EXTERNAL, codesys)
+ − 3019 #define NEW_C_STRING_TO_EXTERNAL(src, codesys) \
+ − 3020 (Extbyte *) NEW_DFC_CONVERT_1_ALLOCA (src, -1, DFC_INTERNAL, codesys)
+ − 3021 #define NEW_C_STRING_TO_EXTERNAL_MALLOC(src, codesys) \
+ − 3022 (Extbyte *) new_dfc_convert_malloc (src, -1, DFC_INTERNAL, codesys)
+ − 3023 #define NEW_SIZED_C_STRING_TO_EXTERNAL(src, len, codesys) \
+ − 3024 (Extbyte *) NEW_DFC_CONVERT_1_ALLOCA (src, len, DFC_SIZED_INTERNAL, codesys)
+ − 3025 #define NEW_SIZED_C_STRING_TO_EXTERNAL_MALLOC(src, len, codesys) \
+ − 3026 (Extbyte *) new_dfc_convert_malloc (src, len, DFC_SIZED_INTERNAL, codesys)
+ − 3027 #define NEW_LISP_STRING_TO_EXTERNAL(src, codesys) \
+ − 3028 (Extbyte *) NEW_DFC_CONVERT_1_ALLOCA (LISP_TO_VOID (src), -1, \
+ − 3029 DFC_LISP_STRING, codesys)
+ − 3030 #define NEW_LISP_STRING_TO_EXTERNAL_MALLOC(src, codesys) \
+ − 3031 (Extbyte *) new_dfc_convert_malloc (LISP_TO_VOID (src), -1, \
+ − 3032 DFC_LISP_STRING, codesys)
771
+ − 3033
2367
+ − 3034 /* Standins for various encodings. */
+ − 3035 #ifdef WEXTTEXT_IS_WIDE
+ − 3036 #define Qcommand_argument_encoding Qmswindows_unicode
+ − 3037 #define Qenvironment_variable_encoding Qmswindows_unicode
+ − 3038 #else
771
+ − 3039 #define Qcommand_argument_encoding Qnative
+ − 3040 #define Qenvironment_variable_encoding Qnative
2367
+ − 3041 #endif
771
+ − 3042 #define Qunix_host_name_encoding Qnative
+ − 3043 #define Qunix_service_name_encoding Qnative
+ − 3044 #define Qmswindows_host_name_encoding Qmswindows_multibyte
+ − 3045 #define Qmswindows_service_name_encoding Qmswindows_multibyte
+ − 3046
2367
+ − 3047 /* Wexttext functions. The type of Wexttext is selected at compile time
+ − 3048 and will sometimes be wchar_t, sometimes char. */
+ − 3049
+ − 3050 int wcscmp_ascii (const wchar_t *s1, const Ascbyte *s2);
+ − 3051 int wcsncmp_ascii (const wchar_t *s1, const Ascbyte *s2, Charcount len);
+ − 3052
+ − 3053 #ifdef WEXTTEXT_IS_WIDE /* defined under MS Windows i.e. WIN32_NATIVE */
+ − 3054 #define WEXTTEXT_ZTERM_SIZE sizeof (wchar_t)
+ − 3055 /* Extra indirection needed in case of manifest constant as arg */
+ − 3056 #define WEXTSTRING_1(arg) L##arg
+ − 3057 #define WEXTSTRING(arg) WEXTSTRING_1(arg)
+ − 3058 #define wext_strlen wcslen
+ − 3059 #define wext_strcmp wcscmp
+ − 3060 #define wext_strncmp wcsncmp
+ − 3061 #define wext_strcmp_ascii wcscmp_ascii
+ − 3062 #define wext_strncmp_ascii wcsncmp_ascii
+ − 3063 #define wext_strcpy wcscpy
+ − 3064 #define wext_strncpy wcsncpy
+ − 3065 #define wext_strchr wcschr
+ − 3066 #define wext_strrchr wcsrchr
+ − 3067 #define wext_strdup wcsdup
+ − 3068 #define wext_atol(str) wcstol (str, 0, 10)
+ − 3069 #define wext_sprintf wsprintfW /* Huh? both wsprintfA and wsprintfW? */
+ − 3070 #define wext_getenv _wgetenv
+ − 3071 #define build_wext_string(str, cs) build_ext_string ((Extbyte *) str, cs)
+ − 3072 #define WEXTTEXT_TO_8_BIT(arg) WEXTTEXT_TO_MULTIBYTE(arg)
+ − 3073 #ifdef WIN32_NATIVE
+ − 3074 int XCDECL wext_retry_open (const Wexttext *path, int oflag, ...);
+ − 3075 #else
+ − 3076 #error Cannot handle Wexttext yet on this system
+ − 3077 #endif
+ − 3078 #define wext_access _waccess
+ − 3079 #define wext_stat _wstat
+ − 3080 #else
+ − 3081 #define WEXTTEXT_ZTERM_SIZE sizeof (char)
+ − 3082 #define WEXTSTRING(arg) arg
+ − 3083 #define wext_strlen strlen
+ − 3084 #define wext_strcmp strcmp
+ − 3085 #define wext_strncmp strncmp
+ − 3086 #define wext_strcmp_ascii strcmp
+ − 3087 #define wext_strncmp_ascii strncmp
+ − 3088 #define wext_strcpy strcpy
+ − 3089 #define wext_strncpy strncpy
+ − 3090 #define wext_strchr strchr
+ − 3091 #define wext_strrchr strrchr
+ − 3092 #define wext_strdup xstrdup
+ − 3093 #define wext_atol(str) atol (str)
+ − 3094 #define wext_sprintf sprintf
+ − 3095 #define wext_getenv getenv
+ − 3096 #define build_wext_string build_ext_string
+ − 3097 #define wext_retry_open retry_open
+ − 3098 #define wext_access access
+ − 3099 #define wext_stat stat
+ − 3100 #define WEXTTEXT_TO_8_BIT(arg) ((Extbyte *) arg)
+ − 3101 #endif
+ − 3102
+ − 3103 /* Standins for various X encodings.
1318
+ − 3104
+ − 3105 About encodings in X:
+ − 3106
+ − 3107 X works with 5 different encodings:
+ − 3108
+ − 3109 -- "Host Portable Character Encoding" == printable ASCII + space, tab,
+ − 3110 newline
+ − 3111
+ − 3112 -- STRING encoding == ASCII + Latin-1 + tab, newline
+ − 3113
+ − 3114 -- Locale-specific encoding
+ − 3115
+ − 3116 -- Compound text == STRING encoding + ISO-2022 escape sequences to
+ − 3117 switch between different locale-specific encodings.
+ − 3118
+ − 3119 -- ANSI C wide-character encoding
+ − 3120
+ − 3121 The Host Portable Character Encoding (HPCE) is used for atom names, font
+ − 3122 names, color names, keysyms, geometry strings, resource manager quarks,
+ − 3123 display names, locale names, and various other things. When describing
+ − 3124 such strings, the X manual typically says "If the ... is not in the Host
+ − 3125 Portable Character Encoding, the result is implementation dependent."
+ − 3126
+ − 3127 The wide-character encoding is used only in the Xwc* functions, which
+ − 3128 are provided as equivalents to Xmb* functions.
+ − 3129
+ − 3130 STRING and compound text are used in the value of string properties and
+ − 3131 selection data, both of which are values with an associated type atom,
+ − 3132 which can be STRING or COMPOUND_TEXT. It can also be a locale name, as
+ − 3133 specified in setlocale() (#### as usual, there is no normalization
+ − 3134 whatsoever of these names).
+ − 3135
+ − 3136 X also defines a type called "TEXT", which is used only as a requested
+ − 3137 type, and produces data in a type "convenient to the owner". However,
+ − 3138 there is some indication that X expects this to be the locale-specific
+ − 3139 encoding.
+ − 3140
+ − 3141 According to the glossary, the locale is used in
+ − 3142
+ − 3143 -- Encoding and processing of input method text
+ − 3144 -- Encoding of resource files and values
+ − 3145 -- Encoding and imaging of text strings
+ − 3146 -- Encoding and decoding for inter-client text communication
+ − 3147
+ − 3148 The functions XmbTextListToTextProperty and XmbTextPropertyToTextList
+ − 3149 (and Xwc* equivalents) can be used to convert between the
+ − 3150 locale-specific encoding (XTextStyle), STRING (XStringStyle), and
+ − 3151 compound text (XCompoundTextStyle), as well as XStdICCTextStyle, which
+ − 3152 converts to STRING if possible, and if not, COMPOUND_TEXT. This is
+ − 3153 used, for example, in XmbSetWMProperties, in the window_name and
+ − 3154 icon_name properties (WM_NAME and WM_ICON_NAME), which are in the
+ − 3155 locale-specific encoding on input, and are stored as STRING if possible,
+ − 3156 COMPOUND_TEXT otherwise.
+ − 3157 */
771
+ − 3158
+ − 3159 /* !!#### Need to verify the encoding used in lwlib -- Qnative or Qctext?
+ − 3160 Almost certainly the former. Use a standin for now. */
+ − 3161 #define Qlwlib_encoding Qnative
+ − 3162
1318
+ − 3163 /* The Host Portable Character Encoding. */
+ − 3164 #define Qx_hpc_encoding Qnative
+ − 3165
+ − 3166 #define Qx_atom_name_encoding Qx_hpc_encoding
+ − 3167 #define Qx_font_name_encoding Qx_hpc_encoding
+ − 3168 #define Qx_color_name_encoding Qx_hpc_encoding
+ − 3169 #define Qx_keysym_encoding Qx_hpc_encoding
+ − 3170 #define Qx_geometry_encoding Qx_hpc_encoding
+ − 3171 #define Qx_resource_name_encoding Qx_hpc_encoding
+ − 3172 #define Qx_application_class_encoding Qx_hpc_encoding
771
+ − 3173 /* the following probably must agree with Qcommand_argument_encoding and
+ − 3174 Qenvironment_variable_encoding */
1318
+ − 3175 #define Qx_display_name_encoding Qx_hpc_encoding
+ − 3176 #define Qx_xpm_data_encoding Qx_hpc_encoding
+ − 3177
2367
+ − 3178 /* !!#### Verify these! */
+ − 3179 #define Qxt_widget_arg_encoding Qnative
+ − 3180 #define Qdt_dnd_encoding Qnative
+ − 3181 #define Qoffix_dnd_encoding Qnative
+ − 3182
1318
+ − 3183 /* RedHat 6.2 contains a locale called "Francais" with the C-cedilla
+ − 3184 encoded in ISO2022! */
+ − 3185 #define Qlocale_name_encoding Qctext
771
+ − 3186
+ − 3187 #define Qstrerror_encoding Qnative
+ − 3188
1318
+ − 3189 /* Encoding for strings coming from Offix drag-n-drop */
+ − 3190 #define Qoffix_dnd_encoding Qnative
+ − 3191
+ − 3192 /* !!#### This exists to remind us that our hexify routine is totally
+ − 3193 un-Muleized. */
+ − 3194 #define Qdnd_hexify_encoding Qascii
+ − 3195
771
+ − 3196 #define GET_STRERROR(var, num) \
+ − 3197 do { \
+ − 3198 int __gsnum__ = (num); \
+ − 3199 Extbyte * __gserr__ = strerror (__gsnum__); \
+ − 3200 \
+ − 3201 if (!__gserr__) \
+ − 3202 { \
867
+ − 3203 var = alloca_ibytes (99); \
771
+ − 3204 qxesprintf (var, "Unknown error %d", __gsnum__); \
+ − 3205 } \
+ − 3206 else \
+ − 3207 EXTERNAL_TO_C_STRING (__gserr__, var, Qstrerror_encoding); \
+ − 3208 } while (0)
+ − 3209
+ − 3210 #endif /* INCLUDED_text_h_ */