comparison src/buffer.h @ 771:943eaba38521

[xemacs-hg @ 2002-03-13 08:51:24 by ben] The big ben-mule-21-5 check-in! Various files were added and deleted. See CHANGES-ben-mule. There are still some test suite failures. No crashes, though. Many of the failures have to do with problems in the test suite itself rather than in the actual code. I'll be addressing these in the next day or so -- none of the test suite failures are at all critical. Meanwhile I'll be trying to address the biggest issues -- i.e. build or run failures, which will almost certainly happen on various platforms. All comments should be sent to ben@xemacs.org -- use a Cc: if necessary when sending to mailing lists. There will be pre- and post- tags, something like pre-ben-mule-21-5-merge-in, and post-ben-mule-21-5-merge-in.
author ben
date Wed, 13 Mar 2002 08:54:06 +0000
parents fdefd0186b75
children e38acbeb1cae
comparison
equal deleted inserted replaced
770:336a418893b5 771:943eaba38521
1 /* Header file for the buffer manipulation primitives. 1 /* Header file for the buffer manipulation primitives.
2 Copyright (C) 1985, 1986, 1992, 1993, 1994, 1995 2 Copyright (C) 1985, 1986, 1992, 1993, 1994, 1995
3 Free Software Foundation, Inc. 3 Free Software Foundation, Inc.
4 Copyright (C) 1995 Sun Microsystems, Inc. 4 Copyright (C) 1995 Sun Microsystems, Inc.
5 Copyright (C) 2001, 2002 Ben Wing.
5 6
6 This file is part of XEmacs. 7 This file is part of XEmacs.
7 8
8 XEmacs is free software; you can redistribute it and/or modify it 9 XEmacs is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the 10 under the terms of the GNU General Public License as published by the
29 Ben Wing: almost completely rewritten for Mule, 19.12. 30 Ben Wing: almost completely rewritten for Mule, 19.12.
30 */ 31 */
31 32
32 #ifndef INCLUDED_buffer_h_ 33 #ifndef INCLUDED_buffer_h_
33 #define INCLUDED_buffer_h_ 34 #define INCLUDED_buffer_h_
34
35 #ifdef MULE
36 #include "mule-charset.h"
37 #endif
38 35
39 #include "casetab.h" 36 #include "casetab.h"
40 #include "chartab.h" 37 #include "chartab.h"
41 38
42 /************************************************************************/ 39 /************************************************************************/
91 changed. */ 88 changed. */
92 long save_modiff; /* Previous value of modiff, as of last 89 long save_modiff; /* Previous value of modiff, as of last
93 time buffer visited or saved a file. */ 90 time buffer visited or saved a file. */
94 91
95 #ifdef MULE 92 #ifdef MULE
96 /* We keep track of a "known" region for very fast access. 93 /* We keep track of a "known" region for very fast access. This
97 This information is text-only so it goes here. */ 94 information is text-only so it goes here. We update this at each
95 change to the buffer, so if it's entirely ASCII, these will always
96 contain the minimum and maximum positions of the buffer. */
98 Charbpos mule_bufmin, mule_bufmax; 97 Charbpos mule_bufmin, mule_bufmax;
99 Bytebpos mule_bytmin, mule_bytmax; 98 Bytebpos mule_bytmin, mule_bytmax;
100 int mule_shifter, mule_three_p; 99 int mule_shifter, mule_three_p;
101 100
102 /* And we also cache 16 positions for fairly fast access near those 101 /* And we also cache 16 positions for fairly fast access near those
103 positions. */ 102 positions. */
104 Charbpos mule_charbpos_cache[16]; 103 Charbpos mule_charbpos_cache[16];
105 Bytebpos mule_bytebpos_cache[16]; 104 Bytebpos mule_bytebpos_cache[16];
105
106 int entirely_ascii_p;
106 #endif 107 #endif
107 108
108 /* Similar to the above, we keep track of positions for which line 109 /* Similar to the above, we keep track of positions for which line
109 number has last been calculated. See line-number.c. */ 110 number has last been calculated. See line-number.c. */
110 Lisp_Object line_number_cache; 111 Lisp_Object line_number_cache;
111 112
112 /* Change data that goes with the text. */ 113 /* Change data that goes with the text. */
113 struct buffer_text_change_data *changes; 114 struct buffer_text_change_data *changes;
114
115 }; 115 };
116 116
117 struct buffer 117 struct buffer
118 { 118 {
119 struct lcrecord_header header; 119 struct lcrecord_header header;
260 && (mps_bufvar = XBUFFER (XCAR (mps_bufcons)), 1) \ 260 && (mps_bufvar = XBUFFER (XCAR (mps_bufcons)), 1) \
261 && (mps_bufcons = XCDR (mps_bufcons), 1)); \ 261 && (mps_bufcons = XCDR (mps_bufcons), 1)); \
262 ) 262 )
263 263
264 264
265
266 /************************************************************************/
267 /* */
268 /* working with raw internal-format data */
269 /* */
270 /************************************************************************/
271
272 /* NOTE: In all the following macros, we follow these rules concerning
273 multiple evaluation of the arguments:
274
275 1) Anything that's an lvalue can be evaluated more than once.
276 2) Anything that's a Lisp Object can be evaluated more than once.
277 This should probably be changed, but this follows the way
278 that all the macros in lisp.h do things.
279 3) 'struct buffer *' arguments can be evaluated more than once.
280 4) Nothing else can be evaluated more than once. Use inline
281 functions, if necessary, to prevent multiple evaluation.
282 5) An exception to (4) is that there are some macros below that
283 may evaluate their arguments more than once. They are all
284 denoted with the word "unsafe" in their name and are generally
285 meant to be called only by other macros that have already
286 stored the calling values in temporary variables.
287
288
289 Use the following functions/macros on contiguous strings of data.
290 If the text you're operating on is known to come from a buffer, use
291 the buffer-level functions below -- they know about the gap and may
292 be more efficient.
293
294
295 (A) For working with charptr's (pointers to internally-formatted text):
296 -----------------------------------------------------------------------
297
298 VALID_CHARPTR_P (ptr):
299 Given a charptr, does it point to the beginning of a character?
300
301 ASSERT_VALID_CHARPTR (ptr):
302 If error-checking is enabled, assert that the given charptr
303 points to the beginning of a character. Otherwise, do nothing.
304
305 INC_CHARPTR (ptr):
306 Given a charptr (assumed to point at the beginning of a character),
307 modify that pointer so it points to the beginning of the next
308 character.
309
310 DEC_CHARPTR (ptr):
311 Given a charptr (assumed to point at the beginning of a
312 character or at the very end of the text), modify that pointer
313 so it points to the beginning of the previous character.
314
315 VALIDATE_CHARPTR_BACKWARD (ptr):
316 Make sure that PTR is pointing to the beginning of a character.
317 If not, back up until this is the case. Note that there are not
318 too many places where it is legitimate to do this sort of thing.
319 It's an error if you're passed an "invalid" char * pointer.
320 NOTE: PTR *must* be pointing to a valid part of the string (i.e.
321 not the very end, unless the string is zero-terminated or
322 something) in order for this function to not cause crashes.
323
324 VALIDATE_CHARPTR_FORWARD (ptr):
325 Make sure that PTR is pointing to the beginning of a character.
326 If not, move forward until this is the case. Note that there
327 are not too many places where it is legitimate to do this sort
328 of thing. It's an error if you're passed an "invalid" char *
329 pointer.
330
331
332 (B) For working with the length (in bytes and characters) of a
333 section of internally-formatted text:
334 --------------------------------------------------------------
335
336 bytecount_to_charcount (ptr, nbi):
337 Given a pointer to a text string and a length in bytes,
338 return the equivalent length in characters.
339
340 charcount_to_bytecount (ptr, nch):
341 Given a pointer to a text string and a length in characters,
342 return the equivalent length in bytes.
343
344 charptr_n_addr (ptr, n):
345 Return a pointer to the beginning of the character offset N
346 (in characters) from PTR.
347
348
349 (C) For retrieving or changing the character pointed to by a charptr:
350 ---------------------------------------------------------------------
351
352 charptr_emchar (ptr):
353 Retrieve the character pointed to by PTR as an Emchar.
354
355 charptr_emchar_n (ptr, n):
356 Retrieve the character at offset N (in characters) from PTR,
357 as an Emchar.
358
359 set_charptr_emchar (ptr, ch):
360 Store the character CH (an Emchar) as internally-formatted
361 text starting at PTR. Return the number of bytes stored.
362
363 charptr_copy_char (ptr, ptr2):
364 Retrieve the character pointed to by PTR and store it as
365 internally-formatted text in PTR2.
366
367
368 (D) For working with Emchars:
369 -----------------------------
370
371 [Note that there are other functions/macros for working with Emchars
372 in mule-charset.h, for retrieving the charset of an Emchar
373 and such. These are only valid when MULE is defined.]
374
375 valid_char_p (ch):
376 Return whether the given Emchar is valid.
377
378 CHARP (ch):
379 Return whether the given Lisp_Object is a character.
380
381 CHECK_CHAR_COERCE_INT (ch):
382 Signal an error if CH is not a valid character or integer Lisp_Object.
383 If CH is an integer Lisp_Object, convert it to a character Lisp_Object,
384 but merely by repackaging, without performing tests for char validity.
385
386 MAX_EMCHAR_LEN:
387 Maximum number of buffer bytes per Emacs character.
388
389 */
390
391
392 /* ---------------------------------------------------------------------- */
393 /* (A) For working with charptr's (pointers to internally-formatted text) */
394 /* ---------------------------------------------------------------------- */
395
396 #ifdef MULE
397 # define VALID_CHARPTR_P(ptr) INTBYTE_FIRST_BYTE_P (* (unsigned char *) ptr)
398 #else
399 # define VALID_CHARPTR_P(ptr) 1
400 #endif
401
402 #ifdef ERROR_CHECK_CHARBPOS
403 # define ASSERT_VALID_CHARPTR(ptr) assert (VALID_CHARPTR_P (ptr))
404 #else
405 # define ASSERT_VALID_CHARPTR(ptr)
406 #endif
407
408 /* Note that INC_CHARPTR() and DEC_CHARPTR() have to be written in
409 completely separate ways. INC_CHARPTR() cannot use the DEC_CHARPTR()
410 trick of looking for a valid first byte because it might run off
411 the end of the string. DEC_CHARPTR() can't use the INC_CHARPTR()
412 method because it doesn't have easy access to the first byte of
413 the character it's moving over. */
414
415 #define REAL_INC_CHARPTR(ptr) \
416 ((void) ((ptr) += REP_BYTES_BY_FIRST_BYTE (* (unsigned char *) (ptr))))
417
418 #define REAL_INC_CHARBYTEBPOS(ptr, pos) \
419 (pos += REP_BYTES_BY_FIRST_BYTE (* (unsigned char *) (ptr)))
420
421 #define REAL_DEC_CHARPTR(ptr) do { \
422 (ptr)--; \
423 } while (!VALID_CHARPTR_P (ptr))
424
425 #ifdef ERROR_CHECK_CHARBPOS
426 #define INC_CHARPTR(ptr) do { \
427 ASSERT_VALID_CHARPTR (ptr); \
428 REAL_INC_CHARPTR (ptr); \
429 } while (0)
430
431 #define INC_CHARBYTEBPOS(ptr, pos) do { \
432 ASSERT_VALID_CHARPTR (ptr); \
433 REAL_INC_CHARBYTEBPOS (ptr, pos); \
434 } while (0)
435
436 #define DEC_CHARPTR(ptr) do { \
437 const Intbyte *dc_ptr1 = (ptr); \
438 const Intbyte *dc_ptr2 = dc_ptr1; \
439 REAL_DEC_CHARPTR (dc_ptr2); \
440 assert (dc_ptr1 - dc_ptr2 == \
441 REP_BYTES_BY_FIRST_BYTE (*dc_ptr2)); \
442 (ptr) = (Intbyte *) dc_ptr2; \
443 } while (0)
444
445 #else /* ! ERROR_CHECK_CHARBPOS */
446 #define INC_CHARBYTEBPOS(ptr, pos) REAL_INC_CHARBYTEBPOS (ptr, pos)
447 #define INC_CHARPTR(ptr) REAL_INC_CHARPTR (ptr)
448 #define DEC_CHARPTR(ptr) REAL_DEC_CHARPTR (ptr)
449 #endif /* ! ERROR_CHECK_CHARBPOS */
450
451 #ifdef MULE
452
453 #define VALIDATE_CHARPTR_BACKWARD(ptr) do { \
454 while (!VALID_CHARPTR_P (ptr)) ptr--; \
455 } while (0)
456
457 /* This needs to be trickier to avoid the possibility of running off
458 the end of the string. */
459
460 #define VALIDATE_CHARPTR_FORWARD(ptr) do { \
461 Intbyte *vcf_ptr = (ptr); \
462 VALIDATE_CHARPTR_BACKWARD (vcf_ptr); \
463 if (vcf_ptr != (ptr)) \
464 { \
465 (ptr) = vcf_ptr; \
466 INC_CHARPTR (ptr); \
467 } \
468 } while (0)
469
470 #else /* not MULE */
471 #define VALIDATE_CHARPTR_BACKWARD(ptr)
472 #define VALIDATE_CHARPTR_FORWARD(ptr)
473 #endif /* not MULE */
474
475 /* -------------------------------------------------------------- */
476 /* (B) For working with the length (in bytes and characters) of a */
477 /* section of internally-formatted text */
478 /* -------------------------------------------------------------- */
479
480 INLINE_HEADER const Intbyte *
481 charptr_n_addr (const Intbyte *ptr, Charcount offset);
482 INLINE_HEADER const Intbyte *
483 charptr_n_addr (const Intbyte *ptr, Charcount offset)
484 {
485 return ptr + charcount_to_bytecount (ptr, offset);
486 }
487
488 /* -------------------------------------------------------------------- */
489 /* (C) For retrieving or changing the character pointed to by a charptr */
490 /* -------------------------------------------------------------------- */
491
492 #define simple_charptr_emchar(ptr) ((Emchar) (ptr)[0])
493 #define simple_set_charptr_emchar(ptr, x) ((ptr)[0] = (Intbyte) (x), 1)
494 #define simple_charptr_copy_char(ptr, ptr2) ((ptr2)[0] = *(ptr), 1)
495
496 #ifdef MULE
497
498 Emchar non_ascii_charptr_emchar (const Intbyte *ptr);
499 Bytecount non_ascii_set_charptr_emchar (Intbyte *ptr, Emchar c);
500 Bytecount non_ascii_charptr_copy_char (const Intbyte *src, Intbyte *dst);
501
502 INLINE_HEADER Emchar charptr_emchar (const Intbyte *ptr);
503 INLINE_HEADER Emchar
504 charptr_emchar (const Intbyte *ptr)
505 {
506 return BYTE_ASCII_P (*ptr) ?
507 simple_charptr_emchar (ptr) :
508 non_ascii_charptr_emchar (ptr);
509 }
510
511 INLINE_HEADER Bytecount set_charptr_emchar (Intbyte *ptr, Emchar x);
512 INLINE_HEADER Bytecount
513 set_charptr_emchar (Intbyte *ptr, Emchar x)
514 {
515 return !CHAR_MULTIBYTE_P (x) ?
516 simple_set_charptr_emchar (ptr, x) :
517 non_ascii_set_charptr_emchar (ptr, x);
518 }
519
520 /* Copy the character pointed to by SRC into DST.
521 Return the number of bytes copied. */
522 INLINE_HEADER Bytecount
523 charptr_copy_char (const Intbyte *src, Intbyte *dst);
524 INLINE_HEADER Bytecount
525 charptr_copy_char (const Intbyte *src, Intbyte *dst)
526 {
527 return BYTE_ASCII_P (*src) ?
528 simple_charptr_copy_char (src, dst) :
529 non_ascii_charptr_copy_char (src, dst);
530 }
531
532 #else /* not MULE */
533
534 # define charptr_emchar(ptr) simple_charptr_emchar (ptr)
535 # define set_charptr_emchar(ptr, x) simple_set_charptr_emchar (ptr, x)
536 # define charptr_copy_char(ptr, ptr2) simple_charptr_copy_char (ptr, ptr2)
537
538 #endif /* not MULE */
539
540 #define charptr_emchar_n(ptr, offset) \
541 charptr_emchar (charptr_n_addr (ptr, offset))
542
543
544 /* ---------------------------- */
545 /* (D) For working with Emchars */
546 /* ---------------------------- */
547
548 #ifdef MULE
549
550 int non_ascii_valid_char_p (Emchar ch);
551
552 INLINE_HEADER int valid_char_p (Emchar ch);
553 INLINE_HEADER int
554 valid_char_p (Emchar ch)
555 {
556 return (! (ch & ~0xFF)) || non_ascii_valid_char_p (ch);
557 }
558
559 #else /* not MULE */
560
561 #define valid_char_p(ch) (! (ch & ~0xFF))
562
563 #endif /* not MULE */
564
565 #define CHAR_INTP(x) (INTP (x) && valid_char_p (XINT (x)))
566
567 #define CHAR_OR_CHAR_INTP(x) (CHARP (x) || CHAR_INTP (x))
568
569 INLINE_HEADER Emchar XCHAR_OR_CHAR_INT (Lisp_Object obj);
570 INLINE_HEADER Emchar
571 XCHAR_OR_CHAR_INT (Lisp_Object obj)
572 {
573 return CHARP (obj) ? XCHAR (obj) : XINT (obj);
574 }
575
576 #define CHECK_CHAR_COERCE_INT(x) do { \
577 if (CHARP (x)) \
578 ; \
579 else if (CHAR_INTP (x)) \
580 x = make_char (XINT (x)); \
581 else \
582 x = wrong_type_argument (Qcharacterp, x); \
583 } while (0)
584
585 #ifdef MULE
586 # define MAX_EMCHAR_LEN 4
587 #else
588 # define MAX_EMCHAR_LEN 1
589 #endif
590
591
592 /*----------------------------------------------------------------------*/ 265 /*----------------------------------------------------------------------*/
593 /* Accessor macros for important positions in a buffer */ 266 /* Accessor macros for important positions in a buffer */
594 /*----------------------------------------------------------------------*/ 267 /*----------------------------------------------------------------------*/
595 268
596 /* We put them here because some stuff below wants them before the 269 /* We put them here because some stuff below wants them before the
716 /* Converting between Charbpos's and Bytebposs, for a buffer-or-string. 389 /* Converting between Charbpos's and Bytebposs, for a buffer-or-string.
717 For strings, this maps to the bytecount<->charcount converters. */ 390 For strings, this maps to the bytecount<->charcount converters. */
718 391
719 #define buffer_or_string_charbpos_to_bytebpos(obj, pos) \ 392 #define buffer_or_string_charbpos_to_bytebpos(obj, pos) \
720 (BUFFERP (obj) ? charbpos_to_bytebpos (XBUFFER (obj), pos) : \ 393 (BUFFERP (obj) ? charbpos_to_bytebpos (XBUFFER (obj), pos) : \
721 (Bytebpos) charcount_to_bytecount (XSTRING_DATA (obj), pos)) 394 (Bytebpos) XSTRING_INDEX_CHAR_TO_BYTE (obj, pos))
722 395
723 #define buffer_or_string_bytebpos_to_charbpos(obj, ind) \ 396 #define buffer_or_string_bytebpos_to_charbpos(obj, ind) \
724 (BUFFERP (obj) ? bytebpos_to_charbpos (XBUFFER (obj), ind) : \ 397 (BUFFERP (obj) ? bytebpos_to_charbpos (XBUFFER (obj), ind) : \
725 (Charbpos) bytecount_to_charcount (XSTRING_DATA (obj), ind)) 398 (Charbpos) XSTRING_INDEX_BYTE_TO_CHAR (obj, ind))
726 399
727 /* Similar for Charbpos's and Membposs. */ 400 /* Similar for Charbpos's and Membposs. */
728 401
729 #define buffer_or_string_charbpos_to_membpos(obj, pos) \ 402 #define buffer_or_string_charbpos_to_membpos(obj, pos) \
730 (BUFFERP (obj) ? charbpos_to_membpos (XBUFFER (obj), pos) : \ 403 (BUFFERP (obj) ? charbpos_to_membpos (XBUFFER (obj), pos) : \
731 (Membpos) charcount_to_bytecount (XSTRING_DATA (obj), pos)) 404 (Membpos) XSTRING_INDEX_CHAR_TO_BYTE (obj, pos))
732 405
733 #define buffer_or_string_membpos_to_charbpos(obj, ind) \ 406 #define buffer_or_string_membpos_to_charbpos(obj, ind) \
734 (BUFFERP (obj) ? membpos_to_charbpos (XBUFFER (obj), ind) : \ 407 (BUFFERP (obj) ? membpos_to_charbpos (XBUFFER (obj), ind) : \
735 (Charbpos) bytecount_to_charcount (XSTRING_DATA (obj), ind)) 408 (Charbpos) XSTRING_INDEX_BYTE_TO_CHAR (obj, ind))
736 409
737 /************************************************************************/ 410 /************************************************************************/
738 /* */ 411 /* */
739 /* working with buffer-level data */ 412 /* working with buffer-level data */
740 /* */ 413 /* */
893 /* Note that in the simplest case (no MULE, no ERROR_CHECK_CHARBPOS), 566 /* Note that in the simplest case (no MULE, no ERROR_CHECK_CHARBPOS),
894 this crap reduces down to simply (x)--. */ 567 this crap reduces down to simply (x)--. */
895 568
896 #define DEC_BYTEBPOS(buf, x) do \ 569 #define DEC_BYTEBPOS(buf, x) do \
897 { \ 570 { \
898 ASSERT_VALID_BYTEBPOS_BACKWARD_UNSAFE (buf, x); \ 571 ASSERT_VALID_BYTEBPOS_BACKWARD_UNSAFE (buf, x); \
899 /* Note that we do the decrement first to \ 572 /* Note that we do the decrement first to \
900 make sure that the pointer in \ 573 make sure that the pointer in \
901 VALIDATE_BYTEBPOS_BACKWARD() ends up on \ 574 VALIDATE_BYTEBPOS_BACKWARD() ends up on \
902 the correct side of the gap */ \ 575 the correct side of the gap */ \
903 (x)--; \ 576 (x)--; \
930 603
931 Bytebpos charbpos_to_bytebpos_func (struct buffer *buf, Charbpos x); 604 Bytebpos charbpos_to_bytebpos_func (struct buffer *buf, Charbpos x);
932 Charbpos bytebpos_to_charbpos_func (struct buffer *buf, Bytebpos x); 605 Charbpos bytebpos_to_charbpos_func (struct buffer *buf, Bytebpos x);
933 606
934 /* The basic algorithm we use is to keep track of a known region of 607 /* The basic algorithm we use is to keep track of a known region of
935 characters in each buffer, all of which are of the same width. We 608 characters in each buffer, all of which are of the same width. We keep
936 keep track of the boundaries of the region in both Charbpos and 609 track of the boundaries of the region in both Charbpos and Bytebpos
937 Bytebpos coordinates and also keep track of the char width, which 610 coordinates and also keep track of the char width, which is 1 - 4 bytes.
938 is 1 - 4 bytes. If the position we're translating is not in 611 If the position we're translating is not in the known region, then we
939 the known region, then we invoke a function to update the known 612 invoke a function to update the known region to surround the position in
940 region to surround the position in question. This assumes 613 question. This assumes locality of reference, which is usually the
941 locality of reference, which is usually the case. 614 case.
942 615
943 Note that the function to update the known region can be simple 616 Note that the function to update the known region can be simple or
944 or complicated depending on how much information we cache. 617 complicated depending on how much information we cache. In addition to
945 For the moment, we don't cache any information, and just move 618 the known region, we always cache the correct conversions for point,
946 linearly forward or back from the known region, with a few 619 BEGV, and ZV, and in addition to this we cache 16 positions where the
947 shortcuts to catch all-ASCII buffers. (Note that this will 620 conversion is known. We only look in the cache or update it when we
948 thrash with bad locality of reference.) A smarter method would 621 need to move the known region more than a certain amount (currently 50
949 be to keep some sort of pseudo-extent layer over the buffer; 622 chars), and then we throw away a "random" value and replace it with the
950 maybe keep track of the charbpos/bytebpos correspondence at the 623 newly calculated value.
951 beginning of each line, which would allow us to do a binary 624
952 search over the pseudo-extents to narrow things down to the 625 Finally, we maintain an extra flag that tracks whether the buffer is
953 correct line, at which point you could use a linear movement 626 entirely ASCII, to speed up the conversions even more. This flag is
954 method. This would also mesh well with efficiently 627 actually of dubious value because in an entirely-ASCII buffer the known
955 implementing a line-numbering scheme. 628 region will always span the entire buffer (in fact, we update the flag
956 629 based on this fact), and so all we're saving is a few machine cycles.
957 Note also that we have to multiply or divide by the char width 630
958 in order to convert the positions. We do some tricks to avoid 631 A potentially smarter method than what we do with known regions and
959 ever actually having to do a multiply or divide, because that 632 cached positions would be to keep some sort of pseudo-extent layer over
960 is typically an expensive operation (esp. divide). Multiplying 633 the buffer; maybe keep track of the charbpos/bytebpos correspondence at the
961 or dividing by 1, 2, or 4 can be implemented simply as a 634 beginning of each line, which would allow us to do a binary search over
962 shift left or shift right, and we keep track of a shifter value 635 the pseudo-extents to narrow things down to the correct line, at which
963 (0, 1, or 2) indicating how much to shift. Multiplying by 3 636 point you could use a linear movement method. This would also mesh well
964 can be implemented by doubling and then adding the original 637 with efficiently implementing a line-numbering scheme. However, you
965 value. Dividing by 3, alas, cannot be implemented in any 638 have to weigh the amount of time spent updating the cache vs. the
966 simple shift/subtract method, as far as I know; so we just 639 savings that result from it. In reality, we modify the buffer far less
967 do a table lookup. For simplicity, we use a table of size 640 often than we access it, so a cache of this sort that provides
968 128K, which indexes the "divide-by-3" values for the first 641 guaranteed LOG (N) performance (or perhaps N * LOG (N), if we set a
969 64K non-negative numbers. (Note that we can increase the 642 maximum on the cache size) would indeed be a win, particularly in very
970 size up to 384K, i.e. indexing the first 192K non-negative 643 large buffers. If we ever implement this, we should probably set a
971 numbers, while still using shorts in the array.) This also 644 reasonably high minimum below which we use the old method, because the
972 means that the size of the known region can be at most 645 time spent updating the fancy cache would likely become dominant when
973 64K for width-three characters. 646 making buffer modifications in smaller buffers.
647
648 Note also that we have to multiply or divide by the char width in order
649 to convert the positions. We do some tricks to avoid ever actually
650 having to do a multiply or divide, because that is typically an
651 expensive operation (esp. divide). Multiplying or dividing by 1, 2, or
652 4 can be implemented simply as a shift left or shift right, and we keep
653 track of a shifter value (0, 1, or 2) indicating how much to shift.
654 Multiplying by 3 can be implemented by doubling and then adding the
655 original value. Dividing by 3, alas, cannot be implemented in any
656 simple shift/subtract method, as far as I know; so we just do a table
657 lookup. For simplicity, we use a table of size 128K, which indexes the
658 "divide-by-3" values for the first 64K non-negative numbers. (Note that
659 we can increase the size up to 384K, i.e. indexing the first 192K
660 non-negative numbers, while still using shorts in the array.) This also
661 means that the size of the known region can be at most 64K for
662 width-three characters.
663
664 !!#### We should investigate the algorithm in GNU Emacs. I think it
665 does something similar, but it may differ in some details, and it's
666 worth seeing if anything can be gleaned.
974 */ 667 */
975 668
976 extern short three_to_one_table[]; 669 extern short three_to_one_table[];
977 670
978 INLINE_HEADER int real_charbpos_to_bytebpos (struct buffer *buf, Charbpos x); 671 INLINE_HEADER Bytebpos real_charbpos_to_bytebpos (struct buffer *buf, Charbpos x);
979 INLINE_HEADER int 672 INLINE_HEADER Bytebpos
980 real_charbpos_to_bytebpos (struct buffer *buf, Charbpos x) 673 real_charbpos_to_bytebpos (struct buffer *buf, Charbpos x)
981 { 674 {
675 if (buf->text->entirely_ascii_p)
676 return (Bytebpos) x;
982 if (x >= buf->text->mule_bufmin && x <= buf->text->mule_bufmax) 677 if (x >= buf->text->mule_bufmin && x <= buf->text->mule_bufmax)
983 return (buf->text->mule_bytmin + 678 return (buf->text->mule_bytmin +
984 ((x - buf->text->mule_bufmin) << buf->text->mule_shifter) + 679 ((x - buf->text->mule_bufmin) << buf->text->mule_shifter) +
985 (buf->text->mule_three_p ? (x - buf->text->mule_bufmin) : 0)); 680 (buf->text->mule_three_p ? (x - buf->text->mule_bufmin) : 0));
986 else 681 else
987 return charbpos_to_bytebpos_func (buf, x); 682 return charbpos_to_bytebpos_func (buf, x);
988 } 683 }
989 684
990 INLINE_HEADER int real_bytebpos_to_charbpos (struct buffer *buf, Bytebpos x); 685 INLINE_HEADER Charbpos real_bytebpos_to_charbpos (struct buffer *buf, Bytebpos x);
991 INLINE_HEADER int 686 INLINE_HEADER Charbpos
992 real_bytebpos_to_charbpos (struct buffer *buf, Bytebpos x) 687 real_bytebpos_to_charbpos (struct buffer *buf, Bytebpos x)
993 { 688 {
689 if (buf->text->entirely_ascii_p)
690 return (Charbpos) x;
994 if (x >= buf->text->mule_bytmin && x <= buf->text->mule_bytmax) 691 if (x >= buf->text->mule_bytmin && x <= buf->text->mule_bytmax)
995 return (buf->text->mule_bufmin + 692 return (buf->text->mule_bufmin +
996 ((buf->text->mule_three_p 693 ((buf->text->mule_three_p
997 ? three_to_one_table[x - buf->text->mule_bytmin] 694 ? three_to_one_table[x - buf->text->mule_bytmin]
998 : (x - buf->text->mule_bytmin) >> buf->text->mule_shifter))); 695 : (x - buf->text->mule_bytmin) >> buf->text->mule_shifter)));
1038 # define BI_BUF_CHARPTR_COPY_CHAR(buf, pos, str) \ 735 # define BI_BUF_CHARPTR_COPY_CHAR(buf, pos, str) \
1039 charptr_copy_char (BI_BUF_BYTE_ADDRESS (buf, pos), str) 736 charptr_copy_char (BI_BUF_BYTE_ADDRESS (buf, pos), str)
1040 #define BUF_CHARPTR_COPY_CHAR(buf, pos, str) \ 737 #define BUF_CHARPTR_COPY_CHAR(buf, pos, str) \
1041 BI_BUF_CHARPTR_COPY_CHAR (buf, charbpos_to_bytebpos (buf, pos), str) 738 BI_BUF_CHARPTR_COPY_CHAR (buf, charbpos_to_bytebpos (buf, pos), str)
1042 739
1043
1044 /************************************************************************/
1045 /* */
1046 /* Converting between internal and external format */
1047 /* */
1048 /************************************************************************/
1049 /*
1050 All client code should use only the two macros
1051
1052 TO_EXTERNAL_FORMAT (source_type, source, sink_type, sink, coding_system)
1053 TO_INTERNAL_FORMAT (source_type, source, sink_type, sink, coding_system)
1054
1055 Typical use is
1056
1057 TO_EXTERNAL_FORMAT (DATA, (ptr, len),
1058 LISP_BUFFER, buffer,
1059 Qfile_name);
1060
1061 The source or sink can be specified in one of these ways:
1062
1063 DATA, (ptr, len), // input data is a fixed buffer of size len
1064 ALLOCA, (ptr, len), // output data is in a alloca()ed buffer of size len
1065 MALLOC, (ptr, len), // output data is in a malloc()ed buffer of size len
1066 C_STRING_ALLOCA, ptr, // equivalent to ALLOCA (ptr, len_ignored) on output
1067 C_STRING_MALLOC, ptr, // equivalent to MALLOC (ptr, len_ignored) on output
1068 C_STRING, ptr, // equivalent to DATA, (ptr, strlen (ptr) + 1) on input
1069 LISP_STRING, string, // input or output is a Lisp_Object of type string
1070 LISP_BUFFER, buffer, // output is written to (point) in lisp buffer
1071 LISP_LSTREAM, lstream, // input or output is a Lisp_Object of type lstream
1072 LISP_OPAQUE, object, // input or output is a Lisp_Object of type opaque
1073
1074 When specifying the sink, use lvalues, since the macro will assign to them,
1075 except when the sink is an lstream or a lisp buffer.
1076
1077 The macros accept the kinds of sources and sinks appropriate for
1078 internal and external data representation. See the type_checking_assert
1079 macros below for the actual allowed types.
1080
1081 Since some sources and sinks use one argument (a Lisp_Object) to
1082 specify them, while others take a (pointer, length) pair, we use
1083 some C preprocessor trickery to allow pair arguments to be specified
1084 by parenthesizing them, as in the examples above.
1085
1086 Anything prefixed by dfc_ (`data format conversion') is private.
1087 They are only used to implement these macros.
1088
1089 Using C_STRING* is appropriate for using with external APIs that take
1090 null-terminated strings. For internal data, we should try to be
1091 '\0'-clean - i.e. allow arbitrary data to contain embedded '\0'.
1092
1093 Sometime in the future we might allow output to C_STRING_ALLOCA or
1094 C_STRING_MALLOC _only_ with TO_EXTERNAL_FORMAT(), not
1095 TO_INTERNAL_FORMAT(). */
1096
1097 #define TO_EXTERNAL_FORMAT(source_type, source, sink_type, sink, coding_system) \
1098 do { \
1099 dfc_conversion_type dfc_simplified_source_type; \
1100 dfc_conversion_type dfc_simplified_sink_type; \
1101 dfc_conversion_data dfc_source; \
1102 dfc_conversion_data dfc_sink; \
1103 \
1104 type_checking_assert \
1105 ((DFC_TYPE_##source_type == DFC_TYPE_DATA || \
1106 DFC_TYPE_##source_type == DFC_TYPE_C_STRING || \
1107 DFC_TYPE_##source_type == DFC_TYPE_LISP_STRING || \
1108 DFC_TYPE_##source_type == DFC_TYPE_LISP_OPAQUE || \
1109 DFC_TYPE_##source_type == DFC_TYPE_LISP_LSTREAM) \
1110 && \
1111 (DFC_TYPE_##sink_type == DFC_TYPE_ALLOCA || \
1112 DFC_TYPE_##sink_type == DFC_TYPE_MALLOC || \
1113 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_ALLOCA || \
1114 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_MALLOC || \
1115 DFC_TYPE_##sink_type == DFC_TYPE_LISP_LSTREAM || \
1116 DFC_TYPE_##sink_type == DFC_TYPE_LISP_OPAQUE)); \
1117 \
1118 DFC_SOURCE_##source_type##_TO_ARGS (source); \
1119 DFC_SINK_##sink_type##_TO_ARGS (sink); \
1120 \
1121 DFC_CONVERT_TO_EXTERNAL_FORMAT (dfc_simplified_source_type, &dfc_source, \
1122 coding_system, \
1123 dfc_simplified_sink_type, &dfc_sink); \
1124 \
1125 DFC_##sink_type##_USE_CONVERTED_DATA (sink); \
1126 } while (0)
1127
1128 #define TO_INTERNAL_FORMAT(source_type, source, sink_type, sink, coding_system) \
1129 do { \
1130 dfc_conversion_type dfc_simplified_source_type; \
1131 dfc_conversion_type dfc_simplified_sink_type; \
1132 dfc_conversion_data dfc_source; \
1133 dfc_conversion_data dfc_sink; \
1134 \
1135 type_checking_assert \
1136 ((DFC_TYPE_##source_type == DFC_TYPE_DATA || \
1137 DFC_TYPE_##source_type == DFC_TYPE_C_STRING || \
1138 DFC_TYPE_##source_type == DFC_TYPE_LISP_OPAQUE || \
1139 DFC_TYPE_##source_type == DFC_TYPE_LISP_LSTREAM) \
1140 && \
1141 (DFC_TYPE_##sink_type == DFC_TYPE_ALLOCA || \
1142 DFC_TYPE_##sink_type == DFC_TYPE_MALLOC || \
1143 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_ALLOCA || \
1144 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_MALLOC || \
1145 DFC_TYPE_##sink_type == DFC_TYPE_LISP_STRING || \
1146 DFC_TYPE_##sink_type == DFC_TYPE_LISP_LSTREAM || \
1147 DFC_TYPE_##sink_type == DFC_TYPE_LISP_BUFFER)); \
1148 \
1149 DFC_SOURCE_##source_type##_TO_ARGS (source); \
1150 DFC_SINK_##sink_type##_TO_ARGS (sink); \
1151 \
1152 DFC_CONVERT_TO_INTERNAL_FORMAT (dfc_simplified_source_type, &dfc_source, \
1153 coding_system, \
1154 dfc_simplified_sink_type, &dfc_sink); \
1155 \
1156 DFC_##sink_type##_USE_CONVERTED_DATA (sink); \
1157 } while (0)
1158
1159 #ifdef FILE_CODING
1160 #define DFC_CONVERT_TO_EXTERNAL_FORMAT dfc_convert_to_external_format
1161 #define DFC_CONVERT_TO_INTERNAL_FORMAT dfc_convert_to_internal_format
1162 #else
1163 /* ignore coding_system argument */
1164 #define DFC_CONVERT_TO_EXTERNAL_FORMAT(a, b, coding_system, c, d) \
1165 dfc_convert_to_external_format (a, b, c, d)
1166 #define DFC_CONVERT_TO_INTERNAL_FORMAT(a, b, coding_system, c, d) \
1167 dfc_convert_to_internal_format (a, b, c, d)
1168 #endif
1169
1170 typedef union
1171 {
1172 struct { const void *ptr; Bytecount len; } data;
1173 Lisp_Object lisp_object;
1174 } dfc_conversion_data;
1175
1176 enum dfc_conversion_type
1177 {
1178 DFC_TYPE_DATA,
1179 DFC_TYPE_ALLOCA,
1180 DFC_TYPE_MALLOC,
1181 DFC_TYPE_C_STRING,
1182 DFC_TYPE_C_STRING_ALLOCA,
1183 DFC_TYPE_C_STRING_MALLOC,
1184 DFC_TYPE_LISP_STRING,
1185 DFC_TYPE_LISP_LSTREAM,
1186 DFC_TYPE_LISP_OPAQUE,
1187 DFC_TYPE_LISP_BUFFER
1188 };
1189 typedef enum dfc_conversion_type dfc_conversion_type;
1190
1191 /* WARNING: These use a static buffer. This can lead to disaster if
1192 these functions are not used *very* carefully. Another reason to only use
1193 TO_EXTERNAL_FORMAT() and TO_INTERNAL_FORMAT(). */
1194 void
1195 dfc_convert_to_external_format (dfc_conversion_type source_type,
1196 dfc_conversion_data *source,
1197 #ifdef FILE_CODING
1198 Lisp_Object coding_system,
1199 #endif
1200 dfc_conversion_type sink_type,
1201 dfc_conversion_data *sink);
1202 void
1203 dfc_convert_to_internal_format (dfc_conversion_type source_type,
1204 dfc_conversion_data *source,
1205 #ifdef FILE_CODING
1206 Lisp_Object coding_system,
1207 #endif
1208 dfc_conversion_type sink_type,
1209 dfc_conversion_data *sink);
1210 /* CPP Trickery */
1211 #define DFC_CPP_CAR(x,y) (x)
1212 #define DFC_CPP_CDR(x,y) (y)
1213
1214 /* Convert `source' to args for dfc_convert_to_*_format() */
1215 #define DFC_SOURCE_DATA_TO_ARGS(val) do { \
1216 dfc_source.data.ptr = DFC_CPP_CAR val; \
1217 dfc_source.data.len = DFC_CPP_CDR val; \
1218 dfc_simplified_source_type = DFC_TYPE_DATA; \
1219 } while (0)
1220 #define DFC_SOURCE_C_STRING_TO_ARGS(val) do { \
1221 dfc_source.data.len = \
1222 strlen ((char *) (dfc_source.data.ptr = (val))); \
1223 dfc_simplified_source_type = DFC_TYPE_DATA; \
1224 } while (0)
1225 #define DFC_SOURCE_LISP_STRING_TO_ARGS(val) do { \
1226 Lisp_Object dfc_slsta = (val); \
1227 type_checking_assert (STRINGP (dfc_slsta)); \
1228 dfc_source.lisp_object = dfc_slsta; \
1229 dfc_simplified_source_type = DFC_TYPE_LISP_STRING; \
1230 } while (0)
1231 #define DFC_SOURCE_LISP_LSTREAM_TO_ARGS(val) do { \
1232 Lisp_Object dfc_sllta = (val); \
1233 type_checking_assert (LSTREAMP (dfc_sllta)); \
1234 dfc_source.lisp_object = dfc_sllta; \
1235 dfc_simplified_source_type = DFC_TYPE_LISP_LSTREAM; \
1236 } while (0)
1237 #define DFC_SOURCE_LISP_OPAQUE_TO_ARGS(val) do { \
1238 Lisp_Opaque *dfc_slota = XOPAQUE (val); \
1239 dfc_source.data.ptr = OPAQUE_DATA (dfc_slota); \
1240 dfc_source.data.len = OPAQUE_SIZE (dfc_slota); \
1241 dfc_simplified_source_type = DFC_TYPE_DATA; \
1242 } while (0)
1243
1244 /* Convert `sink' to args for dfc_convert_to_*_format() */
1245 #define DFC_SINK_ALLOCA_TO_ARGS(val) \
1246 dfc_simplified_sink_type = DFC_TYPE_DATA
1247 #define DFC_SINK_C_STRING_ALLOCA_TO_ARGS(val) \
1248 dfc_simplified_sink_type = DFC_TYPE_DATA
1249 #define DFC_SINK_MALLOC_TO_ARGS(val) \
1250 dfc_simplified_sink_type = DFC_TYPE_DATA
1251 #define DFC_SINK_C_STRING_MALLOC_TO_ARGS(val) \
1252 dfc_simplified_sink_type = DFC_TYPE_DATA
1253 #define DFC_SINK_LISP_STRING_TO_ARGS(val) \
1254 dfc_simplified_sink_type = DFC_TYPE_DATA
1255 #define DFC_SINK_LISP_OPAQUE_TO_ARGS(val) \
1256 dfc_simplified_sink_type = DFC_TYPE_DATA
1257 #define DFC_SINK_LISP_LSTREAM_TO_ARGS(val) do { \
1258 Lisp_Object dfc_sllta = (val); \
1259 type_checking_assert (LSTREAMP (dfc_sllta)); \
1260 dfc_sink.lisp_object = dfc_sllta; \
1261 dfc_simplified_sink_type = DFC_TYPE_LISP_LSTREAM; \
1262 } while (0)
1263 #define DFC_SINK_LISP_BUFFER_TO_ARGS(val) do { \
1264 struct buffer *dfc_slbta = XBUFFER (val); \
1265 dfc_sink.lisp_object = \
1266 make_lisp_buffer_output_stream \
1267 (dfc_slbta, BUF_PT (dfc_slbta), 0); \
1268 dfc_simplified_sink_type = DFC_TYPE_LISP_LSTREAM; \
1269 } while (0)
1270
1271 /* Assign to the `sink' lvalue(s) using the converted data. */
1272 typedef union { char c; void *p; } *dfc_aliasing_voidpp;
1273 #define DFC_ALLOCA_USE_CONVERTED_DATA(sink) do { \
1274 void * dfc_sink_ret = alloca (dfc_sink.data.len + 1); \
1275 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 1); \
1276 ((dfc_aliasing_voidpp) &(DFC_CPP_CAR sink))->p = dfc_sink_ret; \
1277 (DFC_CPP_CDR sink) = dfc_sink.data.len; \
1278 } while (0)
1279 #define DFC_MALLOC_USE_CONVERTED_DATA(sink) do { \
1280 void * dfc_sink_ret = xmalloc (dfc_sink.data.len + 1); \
1281 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 1); \
1282 ((dfc_aliasing_voidpp) &(DFC_CPP_CAR sink))->p = dfc_sink_ret; \
1283 (DFC_CPP_CDR sink) = dfc_sink.data.len; \
1284 } while (0)
1285 #define DFC_C_STRING_ALLOCA_USE_CONVERTED_DATA(sink) do { \
1286 void * dfc_sink_ret = alloca (dfc_sink.data.len + 1); \
1287 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 1); \
1288 ((dfc_aliasing_voidpp) &(sink))->p = dfc_sink_ret; \
1289 } while (0)
1290 #define DFC_C_STRING_MALLOC_USE_CONVERTED_DATA(sink) do { \
1291 void * dfc_sink_ret = xmalloc (dfc_sink.data.len + 1); \
1292 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 1); \
1293 ((dfc_aliasing_voidpp) &(sink))->p = dfc_sink_ret; \
1294 } while (0)
1295 #define DFC_LISP_STRING_USE_CONVERTED_DATA(sink) \
1296 sink = make_string ((Intbyte *) dfc_sink.data.ptr, dfc_sink.data.len)
1297 #define DFC_LISP_OPAQUE_USE_CONVERTED_DATA(sink) \
1298 sink = make_opaque (dfc_sink.data.ptr, dfc_sink.data.len)
1299 #define DFC_LISP_LSTREAM_USE_CONVERTED_DATA(sink) /* data already used */
1300 #define DFC_LISP_BUFFER_USE_CONVERTED_DATA(sink) \
1301 Lstream_delete (XLSTREAM (dfc_sink.lisp_object))
1302
1303 /* Someday we might want to distinguish between Qnative and Qfile_name
1304 by using coding-system aliases, but for now it suffices to have
1305 these be identical. Qnative can be used as the coding_system
1306 argument to TO_EXTERNAL_FORMAT() and TO_INTERNAL_FORMAT(). */
1307 #define Qnative Qfile_name
1308
1309 #if defined (WIN32_NATIVE) || defined (CYGWIN)
1310 /* #### kludge!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1311 Remove this as soon as my Mule code is integrated. */
1312 #define Qmswindows_tstr Qnative
1313 #endif
1314
1315 /* More stand-ins */
1316 #define Qcommand_argument_encoding Qnative
1317 #define Qenvironment_variable_encoding Qnative
1318
1319 /* Convenience macros for extremely common invocations */
1320 #define C_STRING_TO_EXTERNAL(in, out, coding_system) \
1321 TO_EXTERNAL_FORMAT (C_STRING, in, C_STRING_ALLOCA, out, coding_system)
1322 #define C_STRING_TO_EXTERNAL_MALLOC(in, out, coding_system) \
1323 TO_EXTERNAL_FORMAT (C_STRING, in, C_STRING_MALLOC, out, coding_system)
1324 #define EXTERNAL_TO_C_STRING(in, out, coding_system) \
1325 TO_INTERNAL_FORMAT (C_STRING, in, C_STRING_ALLOCA, out, coding_system)
1326 #define EXTERNAL_TO_C_STRING_MALLOC(in, out, coding_system) \
1327 TO_INTERNAL_FORMAT (C_STRING, in, C_STRING_MALLOC, out, coding_system)
1328 #define LISP_STRING_TO_EXTERNAL(in, out, coding_system) \
1329 TO_EXTERNAL_FORMAT (LISP_STRING, in, C_STRING_ALLOCA, out, coding_system)
1330 #define LISP_STRING_TO_EXTERNAL_MALLOC(in, out, coding_system) \
1331 TO_EXTERNAL_FORMAT (LISP_STRING, in, C_STRING_MALLOC, out, coding_system)
1332
1333
1334 /************************************************************************/
1335 /* */
1336 /* fake charset functions */
1337 /* */
1338 /************************************************************************/
1339
1340 /* used when MULE is not defined, so that Charset-type stuff can still
1341 be done */
1342
1343 #ifndef MULE
1344
1345 #define Vcharset_ascii Qnil
1346
1347 #define CHAR_CHARSET(ch) Vcharset_ascii
1348 #define CHAR_LEADING_BYTE(ch) LEADING_BYTE_ASCII
1349 #define LEADING_BYTE_ASCII 0x80
1350 #define NUM_LEADING_BYTES 1
1351 #define MIN_LEADING_BYTE 0x80
1352 #define CHARSETP(cs) 1
1353 #define CHARSET_BY_LEADING_BYTE(lb) Vcharset_ascii
1354 #define XCHARSET_LEADING_BYTE(cs) LEADING_BYTE_ASCII
1355 #define XCHARSET_GRAPHIC(cs) -1
1356 #define XCHARSET_COLUMNS(cs) 1
1357 #define XCHARSET_DIMENSION(cs) 1
1358 #define REP_BYTES_BY_FIRST_BYTE(fb) 1
1359 #define BREAKUP_CHAR(ch, charset, byte1, byte2) do { \
1360 (charset) = Vcharset_ascii; \
1361 (byte1) = (ch); \
1362 (byte2) = 0; \
1363 } while (0)
1364 #define BYTE_ASCII_P(byte) 1
1365
1366 #endif /* ! MULE */
1367 740
1368 /************************************************************************/ 741 /************************************************************************/
1369 /* */ 742 /* */
1370 /* higher-level buffer-position functions */ 743 /* higher-level buffer-position functions */
1371 /* */ 744 /* */
1446 819
1447 OLD_BI_CEILING_OF(n) = NEW_BI_CEILING_OF(n) - 1 820 OLD_BI_CEILING_OF(n) = NEW_BI_CEILING_OF(n) - 1
1448 OLD_BI_FLOOR_OF(n) = NEW_BI_FLOOR_OF(n + 1) 821 OLD_BI_FLOOR_OF(n) = NEW_BI_FLOOR_OF(n + 1)
1449 822
1450 The definitions were changed because the new definitions are more 823 The definitions were changed because the new definitions are more
1451 consistent with the way everything else works in Emacs. 824 consistent with the way everything else works in XEmacs.
1452 */ 825 */
1453 826
1454 /* Properties of CEILING_OF and FLOOR_OF (also apply to BI_ variants): 827 /* Properties of CEILING_OF and FLOOR_OF (also apply to BI_ variants):
1455 828
1456 1) FLOOR_OF (CEILING_OF (n)) = n 829 1) FLOOR_OF (CEILING_OF (n)) = n
1469 and 842 and
1470 843
1471 [BYTE_ADDRESS (FLOOR_OF (n)), BYTE_ADDRESS_BEFORE (n)] 844 [BYTE_ADDRESS (FLOOR_OF (n)), BYTE_ADDRESS_BEFORE (n)]
1472 845
1473 are contiguous. 846 are contiguous.
1474 */ 847
848 A typical loop using CEILING_OF to process contiguous ranges of text
849 between [from, to) looks like this:
850
851 {
852 Bytebpos pos = from;
853
854 while (pos < to)
855 {
856 Bytebpos ceil;
857
858 ceil = BI_BUF_CEILING_OF (buf, pos);
859 ceil = min (to, ceil);
860 process_intbyte_string (BI_BUF_BYTE_ADDRESS (buf, pos), ceil - pos);
861 pos = ceil;
862 }
863 }
864
865 Currently there will be at most two iterations in the loop, but it is
866 written in such a way that it will still work if the buffer
867 representation is changed to have multiple gaps in it.
868 */
1475 869
1476 870
1477 /* Return the maximum index in the buffer it is safe to scan forwards 871 /* Return the maximum index in the buffer it is safe to scan forwards
1478 past N to. This is used to prevent buffer scans from running into 872 past N to. This is used to prevent buffer scans from running into
1479 the gap (e.g. search.c). All characters between N and CEILING_OF(N) 873 the gap (e.g. search.c). All characters between N and CEILING_OF(N)
1507 (b)->text->gpt : BI_BUF_BEG (b)) 901 (b)->text->gpt : BI_BUF_BEG (b))
1508 #define BUF_FLOOR_OF_IGNORE_ACCESSIBLE(b, n) \ 902 #define BUF_FLOOR_OF_IGNORE_ACCESSIBLE(b, n) \
1509 bytebpos_to_charbpos \ 903 bytebpos_to_charbpos \
1510 (b, BI_BUF_FLOOR_OF_IGNORE_ACCESSIBLE (b, charbpos_to_bytebpos (b, n))) 904 (b, BI_BUF_FLOOR_OF_IGNORE_ACCESSIBLE (b, charbpos_to_bytebpos (b, n)))
1511 905
1512
1513 extern struct buffer *current_buffer;
1514
1515 /* This is the initial (startup) directory, as used for the *scratch* buffer.
1516 We're making this a global to make others aware of the startup directory.
1517 `initial_directory' is stored in external format.
1518 */
1519 extern char initial_directory[];
1520 extern void init_initial_directory (void); /* initialize initial_directory */
1521
1522 EXFUN (Fbuffer_disable_undo, 1);
1523 EXFUN (Fbuffer_modified_p, 1);
1524 EXFUN (Fbuffer_name, 1);
1525 EXFUN (Fcurrent_buffer, 0);
1526 EXFUN (Ferase_buffer, 1);
1527 EXFUN (Fget_buffer, 1);
1528 EXFUN (Fget_buffer_create, 1);
1529 EXFUN (Fget_file_buffer, 1);
1530 EXFUN (Fkill_buffer, 1);
1531 EXFUN (Fother_buffer, 3);
1532 EXFUN (Frecord_buffer, 1);
1533 EXFUN (Fset_buffer, 1);
1534 EXFUN (Fset_buffer_modified_p, 2);
1535
1536 extern Lisp_Object QSscratch, Qafter_change_function, Qafter_change_functions;
1537 extern Lisp_Object Qbefore_change_function, Qbefore_change_functions;
1538 extern Lisp_Object Qbuffer_or_string_p, Qdefault_directory, Qfirst_change_hook;
1539 extern Lisp_Object Qpermanent_local, Vafter_change_function;
1540 extern Lisp_Object Vafter_change_functions, Vbefore_change_function;
1541 extern Lisp_Object Vbefore_change_functions, Vbuffer_alist, Vbuffer_defaults;
1542 extern Lisp_Object Vinhibit_read_only, Vtransient_mark_mode;
1543
1544 /* This structure marks which slots in a buffer have corresponding 906 /* This structure marks which slots in a buffer have corresponding
1545 default values in Vbuffer_defaults. 907 default values in Vbuffer_defaults.
1546 Each such slot has a nonzero value in this structure. 908 Each such slot has a nonzero value in this structure.
1547 The value has only one nonzero bit. 909 The value has only one nonzero bit.
1548 910
1581 /* Avoid excess parentheses, or syntax errors may rear their heads. */ 943 /* Avoid excess parentheses, or syntax errors may rear their heads. */
1582 #define BUFFER_FREE(data) xfree (data) 944 #define BUFFER_FREE(data) xfree (data)
1583 #define R_ALLOC_DECLARE(var,data) 945 #define R_ALLOC_DECLARE(var,data)
1584 946
1585 #endif /* !REL_ALLOC */ 947 #endif /* !REL_ALLOC */
1586
1587 extern Lisp_Object Vbuffer_alist;
1588 void set_buffer_internal (struct buffer *b);
1589 struct buffer *decode_buffer (Lisp_Object buffer, int allow_string);
1590
1591 /* from editfns.c */
1592 void widen_buffer (struct buffer *b, int no_clip);
1593 int beginning_of_line_p (struct buffer *b, Charbpos pt);
1594
1595 /* from insdel.c */
1596 void set_buffer_point (struct buffer *buf, Charbpos pos, Bytebpos bipos);
1597 void find_charsets_in_intbyte_string (unsigned char *charsets,
1598 const Intbyte *str,
1599 Bytecount len);
1600 void find_charsets_in_emchar_string (unsigned char *charsets,
1601 const Emchar *str,
1602 Charcount len);
1603 int intbyte_string_displayed_columns (const Intbyte *str, Bytecount len);
1604 int emchar_string_displayed_columns (const Emchar *str, Charcount len);
1605 void convert_intbyte_string_into_emchar_dynarr (const Intbyte *str,
1606 Bytecount len,
1607 Emchar_dynarr *dyn);
1608 Charcount convert_intbyte_string_into_emchar_string (const Intbyte *str,
1609 Bytecount len,
1610 Emchar *arr);
1611 void convert_emchar_string_into_intbyte_dynarr (Emchar *arr, int nels,
1612 Intbyte_dynarr *dyn);
1613 Intbyte *convert_emchar_string_into_malloced_string (Emchar *arr, int nels,
1614 Bytecount *len_out);
1615 /* from marker.c */
1616 void init_buffer_markers (struct buffer *b);
1617 void uninit_buffer_markers (struct buffer *b);
1618
1619 /* flags for get_buffer_pos_char(), get_buffer_range_char(), etc. */
1620 /* At most one of GB_COERCE_RANGE and GB_NO_ERROR_IF_BAD should be
1621 specified. At most one of GB_NEGATIVE_FROM_END and GB_NO_ERROR_IF_BAD
1622 should be specified. */
1623
1624 #define GB_ALLOW_PAST_ACCESSIBLE (1 << 0)
1625 #define GB_ALLOW_NIL (1 << 1)
1626 #define GB_CHECK_ORDER (1 << 2)
1627 #define GB_COERCE_RANGE (1 << 3)
1628 #define GB_NO_ERROR_IF_BAD (1 << 4)
1629 #define GB_NEGATIVE_FROM_END (1 << 5)
1630 #define GB_HISTORICAL_STRING_BEHAVIOR (GB_NEGATIVE_FROM_END | GB_ALLOW_NIL)
1631
1632 Charbpos get_buffer_pos_char (struct buffer *b, Lisp_Object pos,
1633 unsigned int flags);
1634 Bytebpos get_buffer_pos_byte (struct buffer *b, Lisp_Object pos,
1635 unsigned int flags);
1636 void get_buffer_range_char (struct buffer *b, Lisp_Object from, Lisp_Object to,
1637 Charbpos *from_out, Charbpos *to_out,
1638 unsigned int flags);
1639 void get_buffer_range_byte (struct buffer *b, Lisp_Object from, Lisp_Object to,
1640 Bytebpos *from_out, Bytebpos *to_out,
1641 unsigned int flags);
1642 Charcount get_string_pos_char (Lisp_Object string, Lisp_Object pos,
1643 unsigned int flags);
1644 Bytecount get_string_pos_byte (Lisp_Object string, Lisp_Object pos,
1645 unsigned int flags);
1646 void get_string_range_char (Lisp_Object string, Lisp_Object from,
1647 Lisp_Object to, Charcount *from_out,
1648 Charcount *to_out, unsigned int flags);
1649 void get_string_range_byte (Lisp_Object string, Lisp_Object from,
1650 Lisp_Object to, Bytecount *from_out,
1651 Bytecount *to_out, unsigned int flags);
1652 Charbpos get_buffer_or_string_pos_char (Lisp_Object object, Lisp_Object pos,
1653 unsigned int flags);
1654 Bytebpos get_buffer_or_string_pos_byte (Lisp_Object object, Lisp_Object pos,
1655 unsigned int flags);
1656 void get_buffer_or_string_range_char (Lisp_Object object, Lisp_Object from,
1657 Lisp_Object to, Charbpos *from_out,
1658 Charbpos *to_out, unsigned int flags);
1659 void get_buffer_or_string_range_byte (Lisp_Object object, Lisp_Object from,
1660 Lisp_Object to, Bytebpos *from_out,
1661 Bytebpos *to_out, unsigned int flags);
1662 Charbpos buffer_or_string_accessible_begin_char (Lisp_Object object);
1663 Charbpos buffer_or_string_accessible_end_char (Lisp_Object object);
1664 Bytebpos buffer_or_string_accessible_begin_byte (Lisp_Object object);
1665 Bytebpos buffer_or_string_accessible_end_byte (Lisp_Object object);
1666 Charbpos buffer_or_string_absolute_begin_char (Lisp_Object object);
1667 Charbpos buffer_or_string_absolute_end_char (Lisp_Object object);
1668 Bytebpos buffer_or_string_absolute_begin_byte (Lisp_Object object);
1669 Bytebpos buffer_or_string_absolute_end_byte (Lisp_Object object);
1670 void record_buffer (Lisp_Object buf);
1671 Lisp_Object get_buffer (Lisp_Object name,
1672 int error_if_deleted_or_does_not_exist);
1673 int map_over_sharing_buffers (struct buffer *buf,
1674 int (*mapfun) (struct buffer *buf,
1675 void *closure),
1676 void *closure);
1677 948
1678 949
1679 /************************************************************************/ 950 /************************************************************************/
1680 /* Case conversion */ 951 /* Case conversion */
1681 /************************************************************************/ 952 /************************************************************************/
1713 TRT_TABLE_OF (Lisp_Object trt, Emchar c) 984 TRT_TABLE_OF (Lisp_Object trt, Emchar c)
1714 { 985 {
1715 return TRT_TABLE_CHAR_1 (trt, c); 986 return TRT_TABLE_CHAR_1 (trt, c);
1716 } 987 }
1717 988
989 INLINE_HEADER Lisp_Object BUFFER_CASE_TABLE (struct buffer *buf);
990 INLINE_HEADER Lisp_Object
991 BUFFER_CASE_TABLE (struct buffer *buf)
992 {
993 return buf ? buf->case_table : Vstandard_case_table;
994 }
995
1718 /* Macros used below. */ 996 /* Macros used below. */
1719 #define DOWNCASE_TABLE_OF(buf, c) \ 997 #define DOWNCASE_TABLE_OF(buf, c) \
1720 TRT_TABLE_OF (XCASE_TABLE_DOWNCASE (buf->case_table), c) 998 TRT_TABLE_OF (XCASE_TABLE_DOWNCASE (BUFFER_CASE_TABLE (buf)), c)
1721 #define UPCASE_TABLE_OF(buf, c) \ 999 #define UPCASE_TABLE_OF(buf, c) \
1722 TRT_TABLE_OF (XCASE_TABLE_UPCASE (buf->case_table), c) 1000 TRT_TABLE_OF (XCASE_TABLE_UPCASE (BUFFER_CASE_TABLE (buf)), c)
1723 1001
1724 /* 1 if CH is upper case. */ 1002 /* 1 if CH is upper case. */
1725 1003
1726 INLINE_HEADER int UPPERCASEP (struct buffer *buf, Emchar ch); 1004 INLINE_HEADER int UPPERCASEP (struct buffer *buf, Emchar ch);
1727 INLINE_HEADER int 1005 INLINE_HEADER int
1764 1042
1765 /* Downcase a character, or make no change if that cannot be done. */ 1043 /* Downcase a character, or make no change if that cannot be done. */
1766 1044
1767 #define DOWNCASE(buf, ch) DOWNCASE_TABLE_OF (buf, ch) 1045 #define DOWNCASE(buf, ch) DOWNCASE_TABLE_OF (buf, ch)
1768 1046
1769 /************************************************************************/
1770 /* Lisp string representation convenience functions */
1771 /************************************************************************/
1772 /* Because the representation of internally formatted data is subject to change,
1773 It's bad style to do something like strcmp (XSTRING_DATA (s), "foo")
1774 Instead, use the portable: intbyte_strcmp (XSTRING_DATA (s), "foo")
1775 or intbyte_memcmp (XSTRING_DATA (s), "foo", 3) */
1776
1777 /* Like strcmp, except first arg points at internally formatted data,
1778 while the second points at a string of only ASCII chars. */
1779 INLINE_HEADER int
1780 intbyte_strcmp (const Intbyte *bp, const char *ascii_string);
1781 INLINE_HEADER int
1782 intbyte_strcmp (const Intbyte *bp, const char *ascii_string)
1783 {
1784 #ifdef MULE
1785 while (1)
1786 {
1787 int diff;
1788 type_checking_assert (BYTE_ASCII_P (*ascii_string));
1789 if ((diff = charptr_emchar (bp) - *(Intbyte *) ascii_string) != 0)
1790 return diff;
1791 if (*ascii_string == '\0')
1792 return 0;
1793 ascii_string++;
1794 INC_CHARPTR (bp);
1795 }
1796 #else
1797 return strcmp ((char *)bp, ascii_string);
1798 #endif
1799 }
1800
1801
1802 /* Like memcmp, except first arg points at internally formatted data,
1803 while the second points at a string of only ASCII chars. */
1804 INLINE_HEADER int
1805 intbyte_memcmp (const Intbyte *bp, const char *ascii_string, Bytecount len);
1806 INLINE_HEADER int
1807 intbyte_memcmp (const Intbyte *bp, const char *ascii_string, Bytecount len)
1808 {
1809 #ifdef MULE
1810 while (len--)
1811 {
1812 int diff = charptr_emchar (bp) - *(Intbyte *) ascii_string;
1813 type_checking_assert (BYTE_ASCII_P (*ascii_string));
1814 if (diff != 0)
1815 return diff;
1816 ascii_string++;
1817 INC_CHARPTR (bp);
1818 }
1819 return 0;
1820 #else
1821 return memcmp (bp, ascii_string, len);
1822 #endif
1823 }
1824
1825 #endif /* INCLUDED_buffer_h_ */ 1047 #endif /* INCLUDED_buffer_h_ */