Mercurial > hg > xemacs-beta
comparison src/buffer.h @ 771:943eaba38521
[xemacs-hg @ 2002-03-13 08:51:24 by ben]
The big ben-mule-21-5 check-in!
Various files were added and deleted. See CHANGES-ben-mule.
There are still some test suite failures. No crashes, though.
Many of the failures have to do with problems in the test suite itself
rather than in the actual code. I'll be addressing these in the next
day or so -- none of the test suite failures are at all critical.
Meanwhile I'll be trying to address the biggest issues -- i.e. build
or run failures, which will almost certainly happen on various platforms.
All comments should be sent to ben@xemacs.org -- use a Cc: if necessary
when sending to mailing lists. There will be pre- and post- tags,
something like
pre-ben-mule-21-5-merge-in, and
post-ben-mule-21-5-merge-in.
author | ben |
---|---|
date | Wed, 13 Mar 2002 08:54:06 +0000 |
parents | fdefd0186b75 |
children | e38acbeb1cae |
comparison
equal
deleted
inserted
replaced
770:336a418893b5 | 771:943eaba38521 |
---|---|
1 /* Header file for the buffer manipulation primitives. | 1 /* Header file for the buffer manipulation primitives. |
2 Copyright (C) 1985, 1986, 1992, 1993, 1994, 1995 | 2 Copyright (C) 1985, 1986, 1992, 1993, 1994, 1995 |
3 Free Software Foundation, Inc. | 3 Free Software Foundation, Inc. |
4 Copyright (C) 1995 Sun Microsystems, Inc. | 4 Copyright (C) 1995 Sun Microsystems, Inc. |
5 Copyright (C) 2001, 2002 Ben Wing. | |
5 | 6 |
6 This file is part of XEmacs. | 7 This file is part of XEmacs. |
7 | 8 |
8 XEmacs is free software; you can redistribute it and/or modify it | 9 XEmacs is free software; you can redistribute it and/or modify it |
9 under the terms of the GNU General Public License as published by the | 10 under the terms of the GNU General Public License as published by the |
29 Ben Wing: almost completely rewritten for Mule, 19.12. | 30 Ben Wing: almost completely rewritten for Mule, 19.12. |
30 */ | 31 */ |
31 | 32 |
32 #ifndef INCLUDED_buffer_h_ | 33 #ifndef INCLUDED_buffer_h_ |
33 #define INCLUDED_buffer_h_ | 34 #define INCLUDED_buffer_h_ |
34 | |
35 #ifdef MULE | |
36 #include "mule-charset.h" | |
37 #endif | |
38 | 35 |
39 #include "casetab.h" | 36 #include "casetab.h" |
40 #include "chartab.h" | 37 #include "chartab.h" |
41 | 38 |
42 /************************************************************************/ | 39 /************************************************************************/ |
91 changed. */ | 88 changed. */ |
92 long save_modiff; /* Previous value of modiff, as of last | 89 long save_modiff; /* Previous value of modiff, as of last |
93 time buffer visited or saved a file. */ | 90 time buffer visited or saved a file. */ |
94 | 91 |
95 #ifdef MULE | 92 #ifdef MULE |
96 /* We keep track of a "known" region for very fast access. | 93 /* We keep track of a "known" region for very fast access. This |
97 This information is text-only so it goes here. */ | 94 information is text-only so it goes here. We update this at each |
95 change to the buffer, so if it's entirely ASCII, these will always | |
96 contain the minimum and maximum positions of the buffer. */ | |
98 Charbpos mule_bufmin, mule_bufmax; | 97 Charbpos mule_bufmin, mule_bufmax; |
99 Bytebpos mule_bytmin, mule_bytmax; | 98 Bytebpos mule_bytmin, mule_bytmax; |
100 int mule_shifter, mule_three_p; | 99 int mule_shifter, mule_three_p; |
101 | 100 |
102 /* And we also cache 16 positions for fairly fast access near those | 101 /* And we also cache 16 positions for fairly fast access near those |
103 positions. */ | 102 positions. */ |
104 Charbpos mule_charbpos_cache[16]; | 103 Charbpos mule_charbpos_cache[16]; |
105 Bytebpos mule_bytebpos_cache[16]; | 104 Bytebpos mule_bytebpos_cache[16]; |
105 | |
106 int entirely_ascii_p; | |
106 #endif | 107 #endif |
107 | 108 |
108 /* Similar to the above, we keep track of positions for which line | 109 /* Similar to the above, we keep track of positions for which line |
109 number has last been calculated. See line-number.c. */ | 110 number has last been calculated. See line-number.c. */ |
110 Lisp_Object line_number_cache; | 111 Lisp_Object line_number_cache; |
111 | 112 |
112 /* Change data that goes with the text. */ | 113 /* Change data that goes with the text. */ |
113 struct buffer_text_change_data *changes; | 114 struct buffer_text_change_data *changes; |
114 | |
115 }; | 115 }; |
116 | 116 |
117 struct buffer | 117 struct buffer |
118 { | 118 { |
119 struct lcrecord_header header; | 119 struct lcrecord_header header; |
260 && (mps_bufvar = XBUFFER (XCAR (mps_bufcons)), 1) \ | 260 && (mps_bufvar = XBUFFER (XCAR (mps_bufcons)), 1) \ |
261 && (mps_bufcons = XCDR (mps_bufcons), 1)); \ | 261 && (mps_bufcons = XCDR (mps_bufcons), 1)); \ |
262 ) | 262 ) |
263 | 263 |
264 | 264 |
265 | |
266 /************************************************************************/ | |
267 /* */ | |
268 /* working with raw internal-format data */ | |
269 /* */ | |
270 /************************************************************************/ | |
271 | |
272 /* NOTE: In all the following macros, we follow these rules concerning | |
273 multiple evaluation of the arguments: | |
274 | |
275 1) Anything that's an lvalue can be evaluated more than once. | |
276 2) Anything that's a Lisp Object can be evaluated more than once. | |
277 This should probably be changed, but this follows the way | |
278 that all the macros in lisp.h do things. | |
279 3) 'struct buffer *' arguments can be evaluated more than once. | |
280 4) Nothing else can be evaluated more than once. Use inline | |
281 functions, if necessary, to prevent multiple evaluation. | |
282 5) An exception to (4) is that there are some macros below that | |
283 may evaluate their arguments more than once. They are all | |
284 denoted with the word "unsafe" in their name and are generally | |
285 meant to be called only by other macros that have already | |
286 stored the calling values in temporary variables. | |
287 | |
288 | |
289 Use the following functions/macros on contiguous strings of data. | |
290 If the text you're operating on is known to come from a buffer, use | |
291 the buffer-level functions below -- they know about the gap and may | |
292 be more efficient. | |
293 | |
294 | |
295 (A) For working with charptr's (pointers to internally-formatted text): | |
296 ----------------------------------------------------------------------- | |
297 | |
298 VALID_CHARPTR_P (ptr): | |
299 Given a charptr, does it point to the beginning of a character? | |
300 | |
301 ASSERT_VALID_CHARPTR (ptr): | |
302 If error-checking is enabled, assert that the given charptr | |
303 points to the beginning of a character. Otherwise, do nothing. | |
304 | |
305 INC_CHARPTR (ptr): | |
306 Given a charptr (assumed to point at the beginning of a character), | |
307 modify that pointer so it points to the beginning of the next | |
308 character. | |
309 | |
310 DEC_CHARPTR (ptr): | |
311 Given a charptr (assumed to point at the beginning of a | |
312 character or at the very end of the text), modify that pointer | |
313 so it points to the beginning of the previous character. | |
314 | |
315 VALIDATE_CHARPTR_BACKWARD (ptr): | |
316 Make sure that PTR is pointing to the beginning of a character. | |
317 If not, back up until this is the case. Note that there are not | |
318 too many places where it is legitimate to do this sort of thing. | |
319 It's an error if you're passed an "invalid" char * pointer. | |
320 NOTE: PTR *must* be pointing to a valid part of the string (i.e. | |
321 not the very end, unless the string is zero-terminated or | |
322 something) in order for this function to not cause crashes. | |
323 | |
324 VALIDATE_CHARPTR_FORWARD (ptr): | |
325 Make sure that PTR is pointing to the beginning of a character. | |
326 If not, move forward until this is the case. Note that there | |
327 are not too many places where it is legitimate to do this sort | |
328 of thing. It's an error if you're passed an "invalid" char * | |
329 pointer. | |
330 | |
331 | |
332 (B) For working with the length (in bytes and characters) of a | |
333 section of internally-formatted text: | |
334 -------------------------------------------------------------- | |
335 | |
336 bytecount_to_charcount (ptr, nbi): | |
337 Given a pointer to a text string and a length in bytes, | |
338 return the equivalent length in characters. | |
339 | |
340 charcount_to_bytecount (ptr, nch): | |
341 Given a pointer to a text string and a length in characters, | |
342 return the equivalent length in bytes. | |
343 | |
344 charptr_n_addr (ptr, n): | |
345 Return a pointer to the beginning of the character offset N | |
346 (in characters) from PTR. | |
347 | |
348 | |
349 (C) For retrieving or changing the character pointed to by a charptr: | |
350 --------------------------------------------------------------------- | |
351 | |
352 charptr_emchar (ptr): | |
353 Retrieve the character pointed to by PTR as an Emchar. | |
354 | |
355 charptr_emchar_n (ptr, n): | |
356 Retrieve the character at offset N (in characters) from PTR, | |
357 as an Emchar. | |
358 | |
359 set_charptr_emchar (ptr, ch): | |
360 Store the character CH (an Emchar) as internally-formatted | |
361 text starting at PTR. Return the number of bytes stored. | |
362 | |
363 charptr_copy_char (ptr, ptr2): | |
364 Retrieve the character pointed to by PTR and store it as | |
365 internally-formatted text in PTR2. | |
366 | |
367 | |
368 (D) For working with Emchars: | |
369 ----------------------------- | |
370 | |
371 [Note that there are other functions/macros for working with Emchars | |
372 in mule-charset.h, for retrieving the charset of an Emchar | |
373 and such. These are only valid when MULE is defined.] | |
374 | |
375 valid_char_p (ch): | |
376 Return whether the given Emchar is valid. | |
377 | |
378 CHARP (ch): | |
379 Return whether the given Lisp_Object is a character. | |
380 | |
381 CHECK_CHAR_COERCE_INT (ch): | |
382 Signal an error if CH is not a valid character or integer Lisp_Object. | |
383 If CH is an integer Lisp_Object, convert it to a character Lisp_Object, | |
384 but merely by repackaging, without performing tests for char validity. | |
385 | |
386 MAX_EMCHAR_LEN: | |
387 Maximum number of buffer bytes per Emacs character. | |
388 | |
389 */ | |
390 | |
391 | |
392 /* ---------------------------------------------------------------------- */ | |
393 /* (A) For working with charptr's (pointers to internally-formatted text) */ | |
394 /* ---------------------------------------------------------------------- */ | |
395 | |
396 #ifdef MULE | |
397 # define VALID_CHARPTR_P(ptr) INTBYTE_FIRST_BYTE_P (* (unsigned char *) ptr) | |
398 #else | |
399 # define VALID_CHARPTR_P(ptr) 1 | |
400 #endif | |
401 | |
402 #ifdef ERROR_CHECK_CHARBPOS | |
403 # define ASSERT_VALID_CHARPTR(ptr) assert (VALID_CHARPTR_P (ptr)) | |
404 #else | |
405 # define ASSERT_VALID_CHARPTR(ptr) | |
406 #endif | |
407 | |
408 /* Note that INC_CHARPTR() and DEC_CHARPTR() have to be written in | |
409 completely separate ways. INC_CHARPTR() cannot use the DEC_CHARPTR() | |
410 trick of looking for a valid first byte because it might run off | |
411 the end of the string. DEC_CHARPTR() can't use the INC_CHARPTR() | |
412 method because it doesn't have easy access to the first byte of | |
413 the character it's moving over. */ | |
414 | |
415 #define REAL_INC_CHARPTR(ptr) \ | |
416 ((void) ((ptr) += REP_BYTES_BY_FIRST_BYTE (* (unsigned char *) (ptr)))) | |
417 | |
418 #define REAL_INC_CHARBYTEBPOS(ptr, pos) \ | |
419 (pos += REP_BYTES_BY_FIRST_BYTE (* (unsigned char *) (ptr))) | |
420 | |
421 #define REAL_DEC_CHARPTR(ptr) do { \ | |
422 (ptr)--; \ | |
423 } while (!VALID_CHARPTR_P (ptr)) | |
424 | |
425 #ifdef ERROR_CHECK_CHARBPOS | |
426 #define INC_CHARPTR(ptr) do { \ | |
427 ASSERT_VALID_CHARPTR (ptr); \ | |
428 REAL_INC_CHARPTR (ptr); \ | |
429 } while (0) | |
430 | |
431 #define INC_CHARBYTEBPOS(ptr, pos) do { \ | |
432 ASSERT_VALID_CHARPTR (ptr); \ | |
433 REAL_INC_CHARBYTEBPOS (ptr, pos); \ | |
434 } while (0) | |
435 | |
436 #define DEC_CHARPTR(ptr) do { \ | |
437 const Intbyte *dc_ptr1 = (ptr); \ | |
438 const Intbyte *dc_ptr2 = dc_ptr1; \ | |
439 REAL_DEC_CHARPTR (dc_ptr2); \ | |
440 assert (dc_ptr1 - dc_ptr2 == \ | |
441 REP_BYTES_BY_FIRST_BYTE (*dc_ptr2)); \ | |
442 (ptr) = (Intbyte *) dc_ptr2; \ | |
443 } while (0) | |
444 | |
445 #else /* ! ERROR_CHECK_CHARBPOS */ | |
446 #define INC_CHARBYTEBPOS(ptr, pos) REAL_INC_CHARBYTEBPOS (ptr, pos) | |
447 #define INC_CHARPTR(ptr) REAL_INC_CHARPTR (ptr) | |
448 #define DEC_CHARPTR(ptr) REAL_DEC_CHARPTR (ptr) | |
449 #endif /* ! ERROR_CHECK_CHARBPOS */ | |
450 | |
451 #ifdef MULE | |
452 | |
453 #define VALIDATE_CHARPTR_BACKWARD(ptr) do { \ | |
454 while (!VALID_CHARPTR_P (ptr)) ptr--; \ | |
455 } while (0) | |
456 | |
457 /* This needs to be trickier to avoid the possibility of running off | |
458 the end of the string. */ | |
459 | |
460 #define VALIDATE_CHARPTR_FORWARD(ptr) do { \ | |
461 Intbyte *vcf_ptr = (ptr); \ | |
462 VALIDATE_CHARPTR_BACKWARD (vcf_ptr); \ | |
463 if (vcf_ptr != (ptr)) \ | |
464 { \ | |
465 (ptr) = vcf_ptr; \ | |
466 INC_CHARPTR (ptr); \ | |
467 } \ | |
468 } while (0) | |
469 | |
470 #else /* not MULE */ | |
471 #define VALIDATE_CHARPTR_BACKWARD(ptr) | |
472 #define VALIDATE_CHARPTR_FORWARD(ptr) | |
473 #endif /* not MULE */ | |
474 | |
475 /* -------------------------------------------------------------- */ | |
476 /* (B) For working with the length (in bytes and characters) of a */ | |
477 /* section of internally-formatted text */ | |
478 /* -------------------------------------------------------------- */ | |
479 | |
480 INLINE_HEADER const Intbyte * | |
481 charptr_n_addr (const Intbyte *ptr, Charcount offset); | |
482 INLINE_HEADER const Intbyte * | |
483 charptr_n_addr (const Intbyte *ptr, Charcount offset) | |
484 { | |
485 return ptr + charcount_to_bytecount (ptr, offset); | |
486 } | |
487 | |
488 /* -------------------------------------------------------------------- */ | |
489 /* (C) For retrieving or changing the character pointed to by a charptr */ | |
490 /* -------------------------------------------------------------------- */ | |
491 | |
492 #define simple_charptr_emchar(ptr) ((Emchar) (ptr)[0]) | |
493 #define simple_set_charptr_emchar(ptr, x) ((ptr)[0] = (Intbyte) (x), 1) | |
494 #define simple_charptr_copy_char(ptr, ptr2) ((ptr2)[0] = *(ptr), 1) | |
495 | |
496 #ifdef MULE | |
497 | |
498 Emchar non_ascii_charptr_emchar (const Intbyte *ptr); | |
499 Bytecount non_ascii_set_charptr_emchar (Intbyte *ptr, Emchar c); | |
500 Bytecount non_ascii_charptr_copy_char (const Intbyte *src, Intbyte *dst); | |
501 | |
502 INLINE_HEADER Emchar charptr_emchar (const Intbyte *ptr); | |
503 INLINE_HEADER Emchar | |
504 charptr_emchar (const Intbyte *ptr) | |
505 { | |
506 return BYTE_ASCII_P (*ptr) ? | |
507 simple_charptr_emchar (ptr) : | |
508 non_ascii_charptr_emchar (ptr); | |
509 } | |
510 | |
511 INLINE_HEADER Bytecount set_charptr_emchar (Intbyte *ptr, Emchar x); | |
512 INLINE_HEADER Bytecount | |
513 set_charptr_emchar (Intbyte *ptr, Emchar x) | |
514 { | |
515 return !CHAR_MULTIBYTE_P (x) ? | |
516 simple_set_charptr_emchar (ptr, x) : | |
517 non_ascii_set_charptr_emchar (ptr, x); | |
518 } | |
519 | |
520 /* Copy the character pointed to by SRC into DST. | |
521 Return the number of bytes copied. */ | |
522 INLINE_HEADER Bytecount | |
523 charptr_copy_char (const Intbyte *src, Intbyte *dst); | |
524 INLINE_HEADER Bytecount | |
525 charptr_copy_char (const Intbyte *src, Intbyte *dst) | |
526 { | |
527 return BYTE_ASCII_P (*src) ? | |
528 simple_charptr_copy_char (src, dst) : | |
529 non_ascii_charptr_copy_char (src, dst); | |
530 } | |
531 | |
532 #else /* not MULE */ | |
533 | |
534 # define charptr_emchar(ptr) simple_charptr_emchar (ptr) | |
535 # define set_charptr_emchar(ptr, x) simple_set_charptr_emchar (ptr, x) | |
536 # define charptr_copy_char(ptr, ptr2) simple_charptr_copy_char (ptr, ptr2) | |
537 | |
538 #endif /* not MULE */ | |
539 | |
540 #define charptr_emchar_n(ptr, offset) \ | |
541 charptr_emchar (charptr_n_addr (ptr, offset)) | |
542 | |
543 | |
544 /* ---------------------------- */ | |
545 /* (D) For working with Emchars */ | |
546 /* ---------------------------- */ | |
547 | |
548 #ifdef MULE | |
549 | |
550 int non_ascii_valid_char_p (Emchar ch); | |
551 | |
552 INLINE_HEADER int valid_char_p (Emchar ch); | |
553 INLINE_HEADER int | |
554 valid_char_p (Emchar ch) | |
555 { | |
556 return (! (ch & ~0xFF)) || non_ascii_valid_char_p (ch); | |
557 } | |
558 | |
559 #else /* not MULE */ | |
560 | |
561 #define valid_char_p(ch) (! (ch & ~0xFF)) | |
562 | |
563 #endif /* not MULE */ | |
564 | |
565 #define CHAR_INTP(x) (INTP (x) && valid_char_p (XINT (x))) | |
566 | |
567 #define CHAR_OR_CHAR_INTP(x) (CHARP (x) || CHAR_INTP (x)) | |
568 | |
569 INLINE_HEADER Emchar XCHAR_OR_CHAR_INT (Lisp_Object obj); | |
570 INLINE_HEADER Emchar | |
571 XCHAR_OR_CHAR_INT (Lisp_Object obj) | |
572 { | |
573 return CHARP (obj) ? XCHAR (obj) : XINT (obj); | |
574 } | |
575 | |
576 #define CHECK_CHAR_COERCE_INT(x) do { \ | |
577 if (CHARP (x)) \ | |
578 ; \ | |
579 else if (CHAR_INTP (x)) \ | |
580 x = make_char (XINT (x)); \ | |
581 else \ | |
582 x = wrong_type_argument (Qcharacterp, x); \ | |
583 } while (0) | |
584 | |
585 #ifdef MULE | |
586 # define MAX_EMCHAR_LEN 4 | |
587 #else | |
588 # define MAX_EMCHAR_LEN 1 | |
589 #endif | |
590 | |
591 | |
592 /*----------------------------------------------------------------------*/ | 265 /*----------------------------------------------------------------------*/ |
593 /* Accessor macros for important positions in a buffer */ | 266 /* Accessor macros for important positions in a buffer */ |
594 /*----------------------------------------------------------------------*/ | 267 /*----------------------------------------------------------------------*/ |
595 | 268 |
596 /* We put them here because some stuff below wants them before the | 269 /* We put them here because some stuff below wants them before the |
716 /* Converting between Charbpos's and Bytebposs, for a buffer-or-string. | 389 /* Converting between Charbpos's and Bytebposs, for a buffer-or-string. |
717 For strings, this maps to the bytecount<->charcount converters. */ | 390 For strings, this maps to the bytecount<->charcount converters. */ |
718 | 391 |
719 #define buffer_or_string_charbpos_to_bytebpos(obj, pos) \ | 392 #define buffer_or_string_charbpos_to_bytebpos(obj, pos) \ |
720 (BUFFERP (obj) ? charbpos_to_bytebpos (XBUFFER (obj), pos) : \ | 393 (BUFFERP (obj) ? charbpos_to_bytebpos (XBUFFER (obj), pos) : \ |
721 (Bytebpos) charcount_to_bytecount (XSTRING_DATA (obj), pos)) | 394 (Bytebpos) XSTRING_INDEX_CHAR_TO_BYTE (obj, pos)) |
722 | 395 |
723 #define buffer_or_string_bytebpos_to_charbpos(obj, ind) \ | 396 #define buffer_or_string_bytebpos_to_charbpos(obj, ind) \ |
724 (BUFFERP (obj) ? bytebpos_to_charbpos (XBUFFER (obj), ind) : \ | 397 (BUFFERP (obj) ? bytebpos_to_charbpos (XBUFFER (obj), ind) : \ |
725 (Charbpos) bytecount_to_charcount (XSTRING_DATA (obj), ind)) | 398 (Charbpos) XSTRING_INDEX_BYTE_TO_CHAR (obj, ind)) |
726 | 399 |
727 /* Similar for Charbpos's and Membposs. */ | 400 /* Similar for Charbpos's and Membposs. */ |
728 | 401 |
729 #define buffer_or_string_charbpos_to_membpos(obj, pos) \ | 402 #define buffer_or_string_charbpos_to_membpos(obj, pos) \ |
730 (BUFFERP (obj) ? charbpos_to_membpos (XBUFFER (obj), pos) : \ | 403 (BUFFERP (obj) ? charbpos_to_membpos (XBUFFER (obj), pos) : \ |
731 (Membpos) charcount_to_bytecount (XSTRING_DATA (obj), pos)) | 404 (Membpos) XSTRING_INDEX_CHAR_TO_BYTE (obj, pos)) |
732 | 405 |
733 #define buffer_or_string_membpos_to_charbpos(obj, ind) \ | 406 #define buffer_or_string_membpos_to_charbpos(obj, ind) \ |
734 (BUFFERP (obj) ? membpos_to_charbpos (XBUFFER (obj), ind) : \ | 407 (BUFFERP (obj) ? membpos_to_charbpos (XBUFFER (obj), ind) : \ |
735 (Charbpos) bytecount_to_charcount (XSTRING_DATA (obj), ind)) | 408 (Charbpos) XSTRING_INDEX_BYTE_TO_CHAR (obj, ind)) |
736 | 409 |
737 /************************************************************************/ | 410 /************************************************************************/ |
738 /* */ | 411 /* */ |
739 /* working with buffer-level data */ | 412 /* working with buffer-level data */ |
740 /* */ | 413 /* */ |
893 /* Note that in the simplest case (no MULE, no ERROR_CHECK_CHARBPOS), | 566 /* Note that in the simplest case (no MULE, no ERROR_CHECK_CHARBPOS), |
894 this crap reduces down to simply (x)--. */ | 567 this crap reduces down to simply (x)--. */ |
895 | 568 |
896 #define DEC_BYTEBPOS(buf, x) do \ | 569 #define DEC_BYTEBPOS(buf, x) do \ |
897 { \ | 570 { \ |
898 ASSERT_VALID_BYTEBPOS_BACKWARD_UNSAFE (buf, x); \ | 571 ASSERT_VALID_BYTEBPOS_BACKWARD_UNSAFE (buf, x); \ |
899 /* Note that we do the decrement first to \ | 572 /* Note that we do the decrement first to \ |
900 make sure that the pointer in \ | 573 make sure that the pointer in \ |
901 VALIDATE_BYTEBPOS_BACKWARD() ends up on \ | 574 VALIDATE_BYTEBPOS_BACKWARD() ends up on \ |
902 the correct side of the gap */ \ | 575 the correct side of the gap */ \ |
903 (x)--; \ | 576 (x)--; \ |
930 | 603 |
931 Bytebpos charbpos_to_bytebpos_func (struct buffer *buf, Charbpos x); | 604 Bytebpos charbpos_to_bytebpos_func (struct buffer *buf, Charbpos x); |
932 Charbpos bytebpos_to_charbpos_func (struct buffer *buf, Bytebpos x); | 605 Charbpos bytebpos_to_charbpos_func (struct buffer *buf, Bytebpos x); |
933 | 606 |
934 /* The basic algorithm we use is to keep track of a known region of | 607 /* The basic algorithm we use is to keep track of a known region of |
935 characters in each buffer, all of which are of the same width. We | 608 characters in each buffer, all of which are of the same width. We keep |
936 keep track of the boundaries of the region in both Charbpos and | 609 track of the boundaries of the region in both Charbpos and Bytebpos |
937 Bytebpos coordinates and also keep track of the char width, which | 610 coordinates and also keep track of the char width, which is 1 - 4 bytes. |
938 is 1 - 4 bytes. If the position we're translating is not in | 611 If the position we're translating is not in the known region, then we |
939 the known region, then we invoke a function to update the known | 612 invoke a function to update the known region to surround the position in |
940 region to surround the position in question. This assumes | 613 question. This assumes locality of reference, which is usually the |
941 locality of reference, which is usually the case. | 614 case. |
942 | 615 |
943 Note that the function to update the known region can be simple | 616 Note that the function to update the known region can be simple or |
944 or complicated depending on how much information we cache. | 617 complicated depending on how much information we cache. In addition to |
945 For the moment, we don't cache any information, and just move | 618 the known region, we always cache the correct conversions for point, |
946 linearly forward or back from the known region, with a few | 619 BEGV, and ZV, and in addition to this we cache 16 positions where the |
947 shortcuts to catch all-ASCII buffers. (Note that this will | 620 conversion is known. We only look in the cache or update it when we |
948 thrash with bad locality of reference.) A smarter method would | 621 need to move the known region more than a certain amount (currently 50 |
949 be to keep some sort of pseudo-extent layer over the buffer; | 622 chars), and then we throw away a "random" value and replace it with the |
950 maybe keep track of the charbpos/bytebpos correspondence at the | 623 newly calculated value. |
951 beginning of each line, which would allow us to do a binary | 624 |
952 search over the pseudo-extents to narrow things down to the | 625 Finally, we maintain an extra flag that tracks whether the buffer is |
953 correct line, at which point you could use a linear movement | 626 entirely ASCII, to speed up the conversions even more. This flag is |
954 method. This would also mesh well with efficiently | 627 actually of dubious value because in an entirely-ASCII buffer the known |
955 implementing a line-numbering scheme. | 628 region will always span the entire buffer (in fact, we update the flag |
956 | 629 based on this fact), and so all we're saving is a few machine cycles. |
957 Note also that we have to multiply or divide by the char width | 630 |
958 in order to convert the positions. We do some tricks to avoid | 631 A potentially smarter method than what we do with known regions and |
959 ever actually having to do a multiply or divide, because that | 632 cached positions would be to keep some sort of pseudo-extent layer over |
960 is typically an expensive operation (esp. divide). Multiplying | 633 the buffer; maybe keep track of the charbpos/bytebpos correspondence at the |
961 or dividing by 1, 2, or 4 can be implemented simply as a | 634 beginning of each line, which would allow us to do a binary search over |
962 shift left or shift right, and we keep track of a shifter value | 635 the pseudo-extents to narrow things down to the correct line, at which |
963 (0, 1, or 2) indicating how much to shift. Multiplying by 3 | 636 point you could use a linear movement method. This would also mesh well |
964 can be implemented by doubling and then adding the original | 637 with efficiently implementing a line-numbering scheme. However, you |
965 value. Dividing by 3, alas, cannot be implemented in any | 638 have to weigh the amount of time spent updating the cache vs. the |
966 simple shift/subtract method, as far as I know; so we just | 639 savings that result from it. In reality, we modify the buffer far less |
967 do a table lookup. For simplicity, we use a table of size | 640 often than we access it, so a cache of this sort that provides |
968 128K, which indexes the "divide-by-3" values for the first | 641 guaranteed LOG (N) performance (or perhaps N * LOG (N), if we set a |
969 64K non-negative numbers. (Note that we can increase the | 642 maximum on the cache size) would indeed be a win, particularly in very |
970 size up to 384K, i.e. indexing the first 192K non-negative | 643 large buffers. If we ever implement this, we should probably set a |
971 numbers, while still using shorts in the array.) This also | 644 reasonably high minimum below which we use the old method, because the |
972 means that the size of the known region can be at most | 645 time spent updating the fancy cache would likely become dominant when |
973 64K for width-three characters. | 646 making buffer modifications in smaller buffers. |
647 | |
648 Note also that we have to multiply or divide by the char width in order | |
649 to convert the positions. We do some tricks to avoid ever actually | |
650 having to do a multiply or divide, because that is typically an | |
651 expensive operation (esp. divide). Multiplying or dividing by 1, 2, or | |
652 4 can be implemented simply as a shift left or shift right, and we keep | |
653 track of a shifter value (0, 1, or 2) indicating how much to shift. | |
654 Multiplying by 3 can be implemented by doubling and then adding the | |
655 original value. Dividing by 3, alas, cannot be implemented in any | |
656 simple shift/subtract method, as far as I know; so we just do a table | |
657 lookup. For simplicity, we use a table of size 128K, which indexes the | |
658 "divide-by-3" values for the first 64K non-negative numbers. (Note that | |
659 we can increase the size up to 384K, i.e. indexing the first 192K | |
660 non-negative numbers, while still using shorts in the array.) This also | |
661 means that the size of the known region can be at most 64K for | |
662 width-three characters. | |
663 | |
664 !!#### We should investigate the algorithm in GNU Emacs. I think it | |
665 does something similar, but it may differ in some details, and it's | |
666 worth seeing if anything can be gleaned. | |
974 */ | 667 */ |
975 | 668 |
976 extern short three_to_one_table[]; | 669 extern short three_to_one_table[]; |
977 | 670 |
978 INLINE_HEADER int real_charbpos_to_bytebpos (struct buffer *buf, Charbpos x); | 671 INLINE_HEADER Bytebpos real_charbpos_to_bytebpos (struct buffer *buf, Charbpos x); |
979 INLINE_HEADER int | 672 INLINE_HEADER Bytebpos |
980 real_charbpos_to_bytebpos (struct buffer *buf, Charbpos x) | 673 real_charbpos_to_bytebpos (struct buffer *buf, Charbpos x) |
981 { | 674 { |
675 if (buf->text->entirely_ascii_p) | |
676 return (Bytebpos) x; | |
982 if (x >= buf->text->mule_bufmin && x <= buf->text->mule_bufmax) | 677 if (x >= buf->text->mule_bufmin && x <= buf->text->mule_bufmax) |
983 return (buf->text->mule_bytmin + | 678 return (buf->text->mule_bytmin + |
984 ((x - buf->text->mule_bufmin) << buf->text->mule_shifter) + | 679 ((x - buf->text->mule_bufmin) << buf->text->mule_shifter) + |
985 (buf->text->mule_three_p ? (x - buf->text->mule_bufmin) : 0)); | 680 (buf->text->mule_three_p ? (x - buf->text->mule_bufmin) : 0)); |
986 else | 681 else |
987 return charbpos_to_bytebpos_func (buf, x); | 682 return charbpos_to_bytebpos_func (buf, x); |
988 } | 683 } |
989 | 684 |
990 INLINE_HEADER int real_bytebpos_to_charbpos (struct buffer *buf, Bytebpos x); | 685 INLINE_HEADER Charbpos real_bytebpos_to_charbpos (struct buffer *buf, Bytebpos x); |
991 INLINE_HEADER int | 686 INLINE_HEADER Charbpos |
992 real_bytebpos_to_charbpos (struct buffer *buf, Bytebpos x) | 687 real_bytebpos_to_charbpos (struct buffer *buf, Bytebpos x) |
993 { | 688 { |
689 if (buf->text->entirely_ascii_p) | |
690 return (Charbpos) x; | |
994 if (x >= buf->text->mule_bytmin && x <= buf->text->mule_bytmax) | 691 if (x >= buf->text->mule_bytmin && x <= buf->text->mule_bytmax) |
995 return (buf->text->mule_bufmin + | 692 return (buf->text->mule_bufmin + |
996 ((buf->text->mule_three_p | 693 ((buf->text->mule_three_p |
997 ? three_to_one_table[x - buf->text->mule_bytmin] | 694 ? three_to_one_table[x - buf->text->mule_bytmin] |
998 : (x - buf->text->mule_bytmin) >> buf->text->mule_shifter))); | 695 : (x - buf->text->mule_bytmin) >> buf->text->mule_shifter))); |
1038 # define BI_BUF_CHARPTR_COPY_CHAR(buf, pos, str) \ | 735 # define BI_BUF_CHARPTR_COPY_CHAR(buf, pos, str) \ |
1039 charptr_copy_char (BI_BUF_BYTE_ADDRESS (buf, pos), str) | 736 charptr_copy_char (BI_BUF_BYTE_ADDRESS (buf, pos), str) |
1040 #define BUF_CHARPTR_COPY_CHAR(buf, pos, str) \ | 737 #define BUF_CHARPTR_COPY_CHAR(buf, pos, str) \ |
1041 BI_BUF_CHARPTR_COPY_CHAR (buf, charbpos_to_bytebpos (buf, pos), str) | 738 BI_BUF_CHARPTR_COPY_CHAR (buf, charbpos_to_bytebpos (buf, pos), str) |
1042 | 739 |
1043 | |
1044 /************************************************************************/ | |
1045 /* */ | |
1046 /* Converting between internal and external format */ | |
1047 /* */ | |
1048 /************************************************************************/ | |
1049 /* | |
1050 All client code should use only the two macros | |
1051 | |
1052 TO_EXTERNAL_FORMAT (source_type, source, sink_type, sink, coding_system) | |
1053 TO_INTERNAL_FORMAT (source_type, source, sink_type, sink, coding_system) | |
1054 | |
1055 Typical use is | |
1056 | |
1057 TO_EXTERNAL_FORMAT (DATA, (ptr, len), | |
1058 LISP_BUFFER, buffer, | |
1059 Qfile_name); | |
1060 | |
1061 The source or sink can be specified in one of these ways: | |
1062 | |
1063 DATA, (ptr, len), // input data is a fixed buffer of size len | |
1064 ALLOCA, (ptr, len), // output data is in a alloca()ed buffer of size len | |
1065 MALLOC, (ptr, len), // output data is in a malloc()ed buffer of size len | |
1066 C_STRING_ALLOCA, ptr, // equivalent to ALLOCA (ptr, len_ignored) on output | |
1067 C_STRING_MALLOC, ptr, // equivalent to MALLOC (ptr, len_ignored) on output | |
1068 C_STRING, ptr, // equivalent to DATA, (ptr, strlen (ptr) + 1) on input | |
1069 LISP_STRING, string, // input or output is a Lisp_Object of type string | |
1070 LISP_BUFFER, buffer, // output is written to (point) in lisp buffer | |
1071 LISP_LSTREAM, lstream, // input or output is a Lisp_Object of type lstream | |
1072 LISP_OPAQUE, object, // input or output is a Lisp_Object of type opaque | |
1073 | |
1074 When specifying the sink, use lvalues, since the macro will assign to them, | |
1075 except when the sink is an lstream or a lisp buffer. | |
1076 | |
1077 The macros accept the kinds of sources and sinks appropriate for | |
1078 internal and external data representation. See the type_checking_assert | |
1079 macros below for the actual allowed types. | |
1080 | |
1081 Since some sources and sinks use one argument (a Lisp_Object) to | |
1082 specify them, while others take a (pointer, length) pair, we use | |
1083 some C preprocessor trickery to allow pair arguments to be specified | |
1084 by parenthesizing them, as in the examples above. | |
1085 | |
1086 Anything prefixed by dfc_ (`data format conversion') is private. | |
1087 They are only used to implement these macros. | |
1088 | |
1089 Using C_STRING* is appropriate for using with external APIs that take | |
1090 null-terminated strings. For internal data, we should try to be | |
1091 '\0'-clean - i.e. allow arbitrary data to contain embedded '\0'. | |
1092 | |
1093 Sometime in the future we might allow output to C_STRING_ALLOCA or | |
1094 C_STRING_MALLOC _only_ with TO_EXTERNAL_FORMAT(), not | |
1095 TO_INTERNAL_FORMAT(). */ | |
1096 | |
1097 #define TO_EXTERNAL_FORMAT(source_type, source, sink_type, sink, coding_system) \ | |
1098 do { \ | |
1099 dfc_conversion_type dfc_simplified_source_type; \ | |
1100 dfc_conversion_type dfc_simplified_sink_type; \ | |
1101 dfc_conversion_data dfc_source; \ | |
1102 dfc_conversion_data dfc_sink; \ | |
1103 \ | |
1104 type_checking_assert \ | |
1105 ((DFC_TYPE_##source_type == DFC_TYPE_DATA || \ | |
1106 DFC_TYPE_##source_type == DFC_TYPE_C_STRING || \ | |
1107 DFC_TYPE_##source_type == DFC_TYPE_LISP_STRING || \ | |
1108 DFC_TYPE_##source_type == DFC_TYPE_LISP_OPAQUE || \ | |
1109 DFC_TYPE_##source_type == DFC_TYPE_LISP_LSTREAM) \ | |
1110 && \ | |
1111 (DFC_TYPE_##sink_type == DFC_TYPE_ALLOCA || \ | |
1112 DFC_TYPE_##sink_type == DFC_TYPE_MALLOC || \ | |
1113 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_ALLOCA || \ | |
1114 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_MALLOC || \ | |
1115 DFC_TYPE_##sink_type == DFC_TYPE_LISP_LSTREAM || \ | |
1116 DFC_TYPE_##sink_type == DFC_TYPE_LISP_OPAQUE)); \ | |
1117 \ | |
1118 DFC_SOURCE_##source_type##_TO_ARGS (source); \ | |
1119 DFC_SINK_##sink_type##_TO_ARGS (sink); \ | |
1120 \ | |
1121 DFC_CONVERT_TO_EXTERNAL_FORMAT (dfc_simplified_source_type, &dfc_source, \ | |
1122 coding_system, \ | |
1123 dfc_simplified_sink_type, &dfc_sink); \ | |
1124 \ | |
1125 DFC_##sink_type##_USE_CONVERTED_DATA (sink); \ | |
1126 } while (0) | |
1127 | |
1128 #define TO_INTERNAL_FORMAT(source_type, source, sink_type, sink, coding_system) \ | |
1129 do { \ | |
1130 dfc_conversion_type dfc_simplified_source_type; \ | |
1131 dfc_conversion_type dfc_simplified_sink_type; \ | |
1132 dfc_conversion_data dfc_source; \ | |
1133 dfc_conversion_data dfc_sink; \ | |
1134 \ | |
1135 type_checking_assert \ | |
1136 ((DFC_TYPE_##source_type == DFC_TYPE_DATA || \ | |
1137 DFC_TYPE_##source_type == DFC_TYPE_C_STRING || \ | |
1138 DFC_TYPE_##source_type == DFC_TYPE_LISP_OPAQUE || \ | |
1139 DFC_TYPE_##source_type == DFC_TYPE_LISP_LSTREAM) \ | |
1140 && \ | |
1141 (DFC_TYPE_##sink_type == DFC_TYPE_ALLOCA || \ | |
1142 DFC_TYPE_##sink_type == DFC_TYPE_MALLOC || \ | |
1143 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_ALLOCA || \ | |
1144 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_MALLOC || \ | |
1145 DFC_TYPE_##sink_type == DFC_TYPE_LISP_STRING || \ | |
1146 DFC_TYPE_##sink_type == DFC_TYPE_LISP_LSTREAM || \ | |
1147 DFC_TYPE_##sink_type == DFC_TYPE_LISP_BUFFER)); \ | |
1148 \ | |
1149 DFC_SOURCE_##source_type##_TO_ARGS (source); \ | |
1150 DFC_SINK_##sink_type##_TO_ARGS (sink); \ | |
1151 \ | |
1152 DFC_CONVERT_TO_INTERNAL_FORMAT (dfc_simplified_source_type, &dfc_source, \ | |
1153 coding_system, \ | |
1154 dfc_simplified_sink_type, &dfc_sink); \ | |
1155 \ | |
1156 DFC_##sink_type##_USE_CONVERTED_DATA (sink); \ | |
1157 } while (0) | |
1158 | |
1159 #ifdef FILE_CODING | |
1160 #define DFC_CONVERT_TO_EXTERNAL_FORMAT dfc_convert_to_external_format | |
1161 #define DFC_CONVERT_TO_INTERNAL_FORMAT dfc_convert_to_internal_format | |
1162 #else | |
1163 /* ignore coding_system argument */ | |
1164 #define DFC_CONVERT_TO_EXTERNAL_FORMAT(a, b, coding_system, c, d) \ | |
1165 dfc_convert_to_external_format (a, b, c, d) | |
1166 #define DFC_CONVERT_TO_INTERNAL_FORMAT(a, b, coding_system, c, d) \ | |
1167 dfc_convert_to_internal_format (a, b, c, d) | |
1168 #endif | |
1169 | |
1170 typedef union | |
1171 { | |
1172 struct { const void *ptr; Bytecount len; } data; | |
1173 Lisp_Object lisp_object; | |
1174 } dfc_conversion_data; | |
1175 | |
1176 enum dfc_conversion_type | |
1177 { | |
1178 DFC_TYPE_DATA, | |
1179 DFC_TYPE_ALLOCA, | |
1180 DFC_TYPE_MALLOC, | |
1181 DFC_TYPE_C_STRING, | |
1182 DFC_TYPE_C_STRING_ALLOCA, | |
1183 DFC_TYPE_C_STRING_MALLOC, | |
1184 DFC_TYPE_LISP_STRING, | |
1185 DFC_TYPE_LISP_LSTREAM, | |
1186 DFC_TYPE_LISP_OPAQUE, | |
1187 DFC_TYPE_LISP_BUFFER | |
1188 }; | |
1189 typedef enum dfc_conversion_type dfc_conversion_type; | |
1190 | |
1191 /* WARNING: These use a static buffer. This can lead to disaster if | |
1192 these functions are not used *very* carefully. Another reason to only use | |
1193 TO_EXTERNAL_FORMAT() and TO_INTERNAL_FORMAT(). */ | |
1194 void | |
1195 dfc_convert_to_external_format (dfc_conversion_type source_type, | |
1196 dfc_conversion_data *source, | |
1197 #ifdef FILE_CODING | |
1198 Lisp_Object coding_system, | |
1199 #endif | |
1200 dfc_conversion_type sink_type, | |
1201 dfc_conversion_data *sink); | |
1202 void | |
1203 dfc_convert_to_internal_format (dfc_conversion_type source_type, | |
1204 dfc_conversion_data *source, | |
1205 #ifdef FILE_CODING | |
1206 Lisp_Object coding_system, | |
1207 #endif | |
1208 dfc_conversion_type sink_type, | |
1209 dfc_conversion_data *sink); | |
1210 /* CPP Trickery */ | |
1211 #define DFC_CPP_CAR(x,y) (x) | |
1212 #define DFC_CPP_CDR(x,y) (y) | |
1213 | |
1214 /* Convert `source' to args for dfc_convert_to_*_format() */ | |
1215 #define DFC_SOURCE_DATA_TO_ARGS(val) do { \ | |
1216 dfc_source.data.ptr = DFC_CPP_CAR val; \ | |
1217 dfc_source.data.len = DFC_CPP_CDR val; \ | |
1218 dfc_simplified_source_type = DFC_TYPE_DATA; \ | |
1219 } while (0) | |
1220 #define DFC_SOURCE_C_STRING_TO_ARGS(val) do { \ | |
1221 dfc_source.data.len = \ | |
1222 strlen ((char *) (dfc_source.data.ptr = (val))); \ | |
1223 dfc_simplified_source_type = DFC_TYPE_DATA; \ | |
1224 } while (0) | |
1225 #define DFC_SOURCE_LISP_STRING_TO_ARGS(val) do { \ | |
1226 Lisp_Object dfc_slsta = (val); \ | |
1227 type_checking_assert (STRINGP (dfc_slsta)); \ | |
1228 dfc_source.lisp_object = dfc_slsta; \ | |
1229 dfc_simplified_source_type = DFC_TYPE_LISP_STRING; \ | |
1230 } while (0) | |
1231 #define DFC_SOURCE_LISP_LSTREAM_TO_ARGS(val) do { \ | |
1232 Lisp_Object dfc_sllta = (val); \ | |
1233 type_checking_assert (LSTREAMP (dfc_sllta)); \ | |
1234 dfc_source.lisp_object = dfc_sllta; \ | |
1235 dfc_simplified_source_type = DFC_TYPE_LISP_LSTREAM; \ | |
1236 } while (0) | |
1237 #define DFC_SOURCE_LISP_OPAQUE_TO_ARGS(val) do { \ | |
1238 Lisp_Opaque *dfc_slota = XOPAQUE (val); \ | |
1239 dfc_source.data.ptr = OPAQUE_DATA (dfc_slota); \ | |
1240 dfc_source.data.len = OPAQUE_SIZE (dfc_slota); \ | |
1241 dfc_simplified_source_type = DFC_TYPE_DATA; \ | |
1242 } while (0) | |
1243 | |
1244 /* Convert `sink' to args for dfc_convert_to_*_format() */ | |
1245 #define DFC_SINK_ALLOCA_TO_ARGS(val) \ | |
1246 dfc_simplified_sink_type = DFC_TYPE_DATA | |
1247 #define DFC_SINK_C_STRING_ALLOCA_TO_ARGS(val) \ | |
1248 dfc_simplified_sink_type = DFC_TYPE_DATA | |
1249 #define DFC_SINK_MALLOC_TO_ARGS(val) \ | |
1250 dfc_simplified_sink_type = DFC_TYPE_DATA | |
1251 #define DFC_SINK_C_STRING_MALLOC_TO_ARGS(val) \ | |
1252 dfc_simplified_sink_type = DFC_TYPE_DATA | |
1253 #define DFC_SINK_LISP_STRING_TO_ARGS(val) \ | |
1254 dfc_simplified_sink_type = DFC_TYPE_DATA | |
1255 #define DFC_SINK_LISP_OPAQUE_TO_ARGS(val) \ | |
1256 dfc_simplified_sink_type = DFC_TYPE_DATA | |
1257 #define DFC_SINK_LISP_LSTREAM_TO_ARGS(val) do { \ | |
1258 Lisp_Object dfc_sllta = (val); \ | |
1259 type_checking_assert (LSTREAMP (dfc_sllta)); \ | |
1260 dfc_sink.lisp_object = dfc_sllta; \ | |
1261 dfc_simplified_sink_type = DFC_TYPE_LISP_LSTREAM; \ | |
1262 } while (0) | |
1263 #define DFC_SINK_LISP_BUFFER_TO_ARGS(val) do { \ | |
1264 struct buffer *dfc_slbta = XBUFFER (val); \ | |
1265 dfc_sink.lisp_object = \ | |
1266 make_lisp_buffer_output_stream \ | |
1267 (dfc_slbta, BUF_PT (dfc_slbta), 0); \ | |
1268 dfc_simplified_sink_type = DFC_TYPE_LISP_LSTREAM; \ | |
1269 } while (0) | |
1270 | |
1271 /* Assign to the `sink' lvalue(s) using the converted data. */ | |
1272 typedef union { char c; void *p; } *dfc_aliasing_voidpp; | |
1273 #define DFC_ALLOCA_USE_CONVERTED_DATA(sink) do { \ | |
1274 void * dfc_sink_ret = alloca (dfc_sink.data.len + 1); \ | |
1275 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 1); \ | |
1276 ((dfc_aliasing_voidpp) &(DFC_CPP_CAR sink))->p = dfc_sink_ret; \ | |
1277 (DFC_CPP_CDR sink) = dfc_sink.data.len; \ | |
1278 } while (0) | |
1279 #define DFC_MALLOC_USE_CONVERTED_DATA(sink) do { \ | |
1280 void * dfc_sink_ret = xmalloc (dfc_sink.data.len + 1); \ | |
1281 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 1); \ | |
1282 ((dfc_aliasing_voidpp) &(DFC_CPP_CAR sink))->p = dfc_sink_ret; \ | |
1283 (DFC_CPP_CDR sink) = dfc_sink.data.len; \ | |
1284 } while (0) | |
1285 #define DFC_C_STRING_ALLOCA_USE_CONVERTED_DATA(sink) do { \ | |
1286 void * dfc_sink_ret = alloca (dfc_sink.data.len + 1); \ | |
1287 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 1); \ | |
1288 ((dfc_aliasing_voidpp) &(sink))->p = dfc_sink_ret; \ | |
1289 } while (0) | |
1290 #define DFC_C_STRING_MALLOC_USE_CONVERTED_DATA(sink) do { \ | |
1291 void * dfc_sink_ret = xmalloc (dfc_sink.data.len + 1); \ | |
1292 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 1); \ | |
1293 ((dfc_aliasing_voidpp) &(sink))->p = dfc_sink_ret; \ | |
1294 } while (0) | |
1295 #define DFC_LISP_STRING_USE_CONVERTED_DATA(sink) \ | |
1296 sink = make_string ((Intbyte *) dfc_sink.data.ptr, dfc_sink.data.len) | |
1297 #define DFC_LISP_OPAQUE_USE_CONVERTED_DATA(sink) \ | |
1298 sink = make_opaque (dfc_sink.data.ptr, dfc_sink.data.len) | |
1299 #define DFC_LISP_LSTREAM_USE_CONVERTED_DATA(sink) /* data already used */ | |
1300 #define DFC_LISP_BUFFER_USE_CONVERTED_DATA(sink) \ | |
1301 Lstream_delete (XLSTREAM (dfc_sink.lisp_object)) | |
1302 | |
1303 /* Someday we might want to distinguish between Qnative and Qfile_name | |
1304 by using coding-system aliases, but for now it suffices to have | |
1305 these be identical. Qnative can be used as the coding_system | |
1306 argument to TO_EXTERNAL_FORMAT() and TO_INTERNAL_FORMAT(). */ | |
1307 #define Qnative Qfile_name | |
1308 | |
1309 #if defined (WIN32_NATIVE) || defined (CYGWIN) | |
1310 /* #### kludge!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! | |
1311 Remove this as soon as my Mule code is integrated. */ | |
1312 #define Qmswindows_tstr Qnative | |
1313 #endif | |
1314 | |
1315 /* More stand-ins */ | |
1316 #define Qcommand_argument_encoding Qnative | |
1317 #define Qenvironment_variable_encoding Qnative | |
1318 | |
1319 /* Convenience macros for extremely common invocations */ | |
1320 #define C_STRING_TO_EXTERNAL(in, out, coding_system) \ | |
1321 TO_EXTERNAL_FORMAT (C_STRING, in, C_STRING_ALLOCA, out, coding_system) | |
1322 #define C_STRING_TO_EXTERNAL_MALLOC(in, out, coding_system) \ | |
1323 TO_EXTERNAL_FORMAT (C_STRING, in, C_STRING_MALLOC, out, coding_system) | |
1324 #define EXTERNAL_TO_C_STRING(in, out, coding_system) \ | |
1325 TO_INTERNAL_FORMAT (C_STRING, in, C_STRING_ALLOCA, out, coding_system) | |
1326 #define EXTERNAL_TO_C_STRING_MALLOC(in, out, coding_system) \ | |
1327 TO_INTERNAL_FORMAT (C_STRING, in, C_STRING_MALLOC, out, coding_system) | |
1328 #define LISP_STRING_TO_EXTERNAL(in, out, coding_system) \ | |
1329 TO_EXTERNAL_FORMAT (LISP_STRING, in, C_STRING_ALLOCA, out, coding_system) | |
1330 #define LISP_STRING_TO_EXTERNAL_MALLOC(in, out, coding_system) \ | |
1331 TO_EXTERNAL_FORMAT (LISP_STRING, in, C_STRING_MALLOC, out, coding_system) | |
1332 | |
1333 | |
1334 /************************************************************************/ | |
1335 /* */ | |
1336 /* fake charset functions */ | |
1337 /* */ | |
1338 /************************************************************************/ | |
1339 | |
1340 /* used when MULE is not defined, so that Charset-type stuff can still | |
1341 be done */ | |
1342 | |
1343 #ifndef MULE | |
1344 | |
1345 #define Vcharset_ascii Qnil | |
1346 | |
1347 #define CHAR_CHARSET(ch) Vcharset_ascii | |
1348 #define CHAR_LEADING_BYTE(ch) LEADING_BYTE_ASCII | |
1349 #define LEADING_BYTE_ASCII 0x80 | |
1350 #define NUM_LEADING_BYTES 1 | |
1351 #define MIN_LEADING_BYTE 0x80 | |
1352 #define CHARSETP(cs) 1 | |
1353 #define CHARSET_BY_LEADING_BYTE(lb) Vcharset_ascii | |
1354 #define XCHARSET_LEADING_BYTE(cs) LEADING_BYTE_ASCII | |
1355 #define XCHARSET_GRAPHIC(cs) -1 | |
1356 #define XCHARSET_COLUMNS(cs) 1 | |
1357 #define XCHARSET_DIMENSION(cs) 1 | |
1358 #define REP_BYTES_BY_FIRST_BYTE(fb) 1 | |
1359 #define BREAKUP_CHAR(ch, charset, byte1, byte2) do { \ | |
1360 (charset) = Vcharset_ascii; \ | |
1361 (byte1) = (ch); \ | |
1362 (byte2) = 0; \ | |
1363 } while (0) | |
1364 #define BYTE_ASCII_P(byte) 1 | |
1365 | |
1366 #endif /* ! MULE */ | |
1367 | 740 |
1368 /************************************************************************/ | 741 /************************************************************************/ |
1369 /* */ | 742 /* */ |
1370 /* higher-level buffer-position functions */ | 743 /* higher-level buffer-position functions */ |
1371 /* */ | 744 /* */ |
1446 | 819 |
1447 OLD_BI_CEILING_OF(n) = NEW_BI_CEILING_OF(n) - 1 | 820 OLD_BI_CEILING_OF(n) = NEW_BI_CEILING_OF(n) - 1 |
1448 OLD_BI_FLOOR_OF(n) = NEW_BI_FLOOR_OF(n + 1) | 821 OLD_BI_FLOOR_OF(n) = NEW_BI_FLOOR_OF(n + 1) |
1449 | 822 |
1450 The definitions were changed because the new definitions are more | 823 The definitions were changed because the new definitions are more |
1451 consistent with the way everything else works in Emacs. | 824 consistent with the way everything else works in XEmacs. |
1452 */ | 825 */ |
1453 | 826 |
1454 /* Properties of CEILING_OF and FLOOR_OF (also apply to BI_ variants): | 827 /* Properties of CEILING_OF and FLOOR_OF (also apply to BI_ variants): |
1455 | 828 |
1456 1) FLOOR_OF (CEILING_OF (n)) = n | 829 1) FLOOR_OF (CEILING_OF (n)) = n |
1469 and | 842 and |
1470 | 843 |
1471 [BYTE_ADDRESS (FLOOR_OF (n)), BYTE_ADDRESS_BEFORE (n)] | 844 [BYTE_ADDRESS (FLOOR_OF (n)), BYTE_ADDRESS_BEFORE (n)] |
1472 | 845 |
1473 are contiguous. | 846 are contiguous. |
1474 */ | 847 |
848 A typical loop using CEILING_OF to process contiguous ranges of text | |
849 between [from, to) looks like this: | |
850 | |
851 { | |
852 Bytebpos pos = from; | |
853 | |
854 while (pos < to) | |
855 { | |
856 Bytebpos ceil; | |
857 | |
858 ceil = BI_BUF_CEILING_OF (buf, pos); | |
859 ceil = min (to, ceil); | |
860 process_intbyte_string (BI_BUF_BYTE_ADDRESS (buf, pos), ceil - pos); | |
861 pos = ceil; | |
862 } | |
863 } | |
864 | |
865 Currently there will be at most two iterations in the loop, but it is | |
866 written in such a way that it will still work if the buffer | |
867 representation is changed to have multiple gaps in it. | |
868 */ | |
1475 | 869 |
1476 | 870 |
1477 /* Return the maximum index in the buffer it is safe to scan forwards | 871 /* Return the maximum index in the buffer it is safe to scan forwards |
1478 past N to. This is used to prevent buffer scans from running into | 872 past N to. This is used to prevent buffer scans from running into |
1479 the gap (e.g. search.c). All characters between N and CEILING_OF(N) | 873 the gap (e.g. search.c). All characters between N and CEILING_OF(N) |
1507 (b)->text->gpt : BI_BUF_BEG (b)) | 901 (b)->text->gpt : BI_BUF_BEG (b)) |
1508 #define BUF_FLOOR_OF_IGNORE_ACCESSIBLE(b, n) \ | 902 #define BUF_FLOOR_OF_IGNORE_ACCESSIBLE(b, n) \ |
1509 bytebpos_to_charbpos \ | 903 bytebpos_to_charbpos \ |
1510 (b, BI_BUF_FLOOR_OF_IGNORE_ACCESSIBLE (b, charbpos_to_bytebpos (b, n))) | 904 (b, BI_BUF_FLOOR_OF_IGNORE_ACCESSIBLE (b, charbpos_to_bytebpos (b, n))) |
1511 | 905 |
1512 | |
1513 extern struct buffer *current_buffer; | |
1514 | |
1515 /* This is the initial (startup) directory, as used for the *scratch* buffer. | |
1516 We're making this a global to make others aware of the startup directory. | |
1517 `initial_directory' is stored in external format. | |
1518 */ | |
1519 extern char initial_directory[]; | |
1520 extern void init_initial_directory (void); /* initialize initial_directory */ | |
1521 | |
1522 EXFUN (Fbuffer_disable_undo, 1); | |
1523 EXFUN (Fbuffer_modified_p, 1); | |
1524 EXFUN (Fbuffer_name, 1); | |
1525 EXFUN (Fcurrent_buffer, 0); | |
1526 EXFUN (Ferase_buffer, 1); | |
1527 EXFUN (Fget_buffer, 1); | |
1528 EXFUN (Fget_buffer_create, 1); | |
1529 EXFUN (Fget_file_buffer, 1); | |
1530 EXFUN (Fkill_buffer, 1); | |
1531 EXFUN (Fother_buffer, 3); | |
1532 EXFUN (Frecord_buffer, 1); | |
1533 EXFUN (Fset_buffer, 1); | |
1534 EXFUN (Fset_buffer_modified_p, 2); | |
1535 | |
1536 extern Lisp_Object QSscratch, Qafter_change_function, Qafter_change_functions; | |
1537 extern Lisp_Object Qbefore_change_function, Qbefore_change_functions; | |
1538 extern Lisp_Object Qbuffer_or_string_p, Qdefault_directory, Qfirst_change_hook; | |
1539 extern Lisp_Object Qpermanent_local, Vafter_change_function; | |
1540 extern Lisp_Object Vafter_change_functions, Vbefore_change_function; | |
1541 extern Lisp_Object Vbefore_change_functions, Vbuffer_alist, Vbuffer_defaults; | |
1542 extern Lisp_Object Vinhibit_read_only, Vtransient_mark_mode; | |
1543 | |
1544 /* This structure marks which slots in a buffer have corresponding | 906 /* This structure marks which slots in a buffer have corresponding |
1545 default values in Vbuffer_defaults. | 907 default values in Vbuffer_defaults. |
1546 Each such slot has a nonzero value in this structure. | 908 Each such slot has a nonzero value in this structure. |
1547 The value has only one nonzero bit. | 909 The value has only one nonzero bit. |
1548 | 910 |
1581 /* Avoid excess parentheses, or syntax errors may rear their heads. */ | 943 /* Avoid excess parentheses, or syntax errors may rear their heads. */ |
1582 #define BUFFER_FREE(data) xfree (data) | 944 #define BUFFER_FREE(data) xfree (data) |
1583 #define R_ALLOC_DECLARE(var,data) | 945 #define R_ALLOC_DECLARE(var,data) |
1584 | 946 |
1585 #endif /* !REL_ALLOC */ | 947 #endif /* !REL_ALLOC */ |
1586 | |
1587 extern Lisp_Object Vbuffer_alist; | |
1588 void set_buffer_internal (struct buffer *b); | |
1589 struct buffer *decode_buffer (Lisp_Object buffer, int allow_string); | |
1590 | |
1591 /* from editfns.c */ | |
1592 void widen_buffer (struct buffer *b, int no_clip); | |
1593 int beginning_of_line_p (struct buffer *b, Charbpos pt); | |
1594 | |
1595 /* from insdel.c */ | |
1596 void set_buffer_point (struct buffer *buf, Charbpos pos, Bytebpos bipos); | |
1597 void find_charsets_in_intbyte_string (unsigned char *charsets, | |
1598 const Intbyte *str, | |
1599 Bytecount len); | |
1600 void find_charsets_in_emchar_string (unsigned char *charsets, | |
1601 const Emchar *str, | |
1602 Charcount len); | |
1603 int intbyte_string_displayed_columns (const Intbyte *str, Bytecount len); | |
1604 int emchar_string_displayed_columns (const Emchar *str, Charcount len); | |
1605 void convert_intbyte_string_into_emchar_dynarr (const Intbyte *str, | |
1606 Bytecount len, | |
1607 Emchar_dynarr *dyn); | |
1608 Charcount convert_intbyte_string_into_emchar_string (const Intbyte *str, | |
1609 Bytecount len, | |
1610 Emchar *arr); | |
1611 void convert_emchar_string_into_intbyte_dynarr (Emchar *arr, int nels, | |
1612 Intbyte_dynarr *dyn); | |
1613 Intbyte *convert_emchar_string_into_malloced_string (Emchar *arr, int nels, | |
1614 Bytecount *len_out); | |
1615 /* from marker.c */ | |
1616 void init_buffer_markers (struct buffer *b); | |
1617 void uninit_buffer_markers (struct buffer *b); | |
1618 | |
1619 /* flags for get_buffer_pos_char(), get_buffer_range_char(), etc. */ | |
1620 /* At most one of GB_COERCE_RANGE and GB_NO_ERROR_IF_BAD should be | |
1621 specified. At most one of GB_NEGATIVE_FROM_END and GB_NO_ERROR_IF_BAD | |
1622 should be specified. */ | |
1623 | |
1624 #define GB_ALLOW_PAST_ACCESSIBLE (1 << 0) | |
1625 #define GB_ALLOW_NIL (1 << 1) | |
1626 #define GB_CHECK_ORDER (1 << 2) | |
1627 #define GB_COERCE_RANGE (1 << 3) | |
1628 #define GB_NO_ERROR_IF_BAD (1 << 4) | |
1629 #define GB_NEGATIVE_FROM_END (1 << 5) | |
1630 #define GB_HISTORICAL_STRING_BEHAVIOR (GB_NEGATIVE_FROM_END | GB_ALLOW_NIL) | |
1631 | |
1632 Charbpos get_buffer_pos_char (struct buffer *b, Lisp_Object pos, | |
1633 unsigned int flags); | |
1634 Bytebpos get_buffer_pos_byte (struct buffer *b, Lisp_Object pos, | |
1635 unsigned int flags); | |
1636 void get_buffer_range_char (struct buffer *b, Lisp_Object from, Lisp_Object to, | |
1637 Charbpos *from_out, Charbpos *to_out, | |
1638 unsigned int flags); | |
1639 void get_buffer_range_byte (struct buffer *b, Lisp_Object from, Lisp_Object to, | |
1640 Bytebpos *from_out, Bytebpos *to_out, | |
1641 unsigned int flags); | |
1642 Charcount get_string_pos_char (Lisp_Object string, Lisp_Object pos, | |
1643 unsigned int flags); | |
1644 Bytecount get_string_pos_byte (Lisp_Object string, Lisp_Object pos, | |
1645 unsigned int flags); | |
1646 void get_string_range_char (Lisp_Object string, Lisp_Object from, | |
1647 Lisp_Object to, Charcount *from_out, | |
1648 Charcount *to_out, unsigned int flags); | |
1649 void get_string_range_byte (Lisp_Object string, Lisp_Object from, | |
1650 Lisp_Object to, Bytecount *from_out, | |
1651 Bytecount *to_out, unsigned int flags); | |
1652 Charbpos get_buffer_or_string_pos_char (Lisp_Object object, Lisp_Object pos, | |
1653 unsigned int flags); | |
1654 Bytebpos get_buffer_or_string_pos_byte (Lisp_Object object, Lisp_Object pos, | |
1655 unsigned int flags); | |
1656 void get_buffer_or_string_range_char (Lisp_Object object, Lisp_Object from, | |
1657 Lisp_Object to, Charbpos *from_out, | |
1658 Charbpos *to_out, unsigned int flags); | |
1659 void get_buffer_or_string_range_byte (Lisp_Object object, Lisp_Object from, | |
1660 Lisp_Object to, Bytebpos *from_out, | |
1661 Bytebpos *to_out, unsigned int flags); | |
1662 Charbpos buffer_or_string_accessible_begin_char (Lisp_Object object); | |
1663 Charbpos buffer_or_string_accessible_end_char (Lisp_Object object); | |
1664 Bytebpos buffer_or_string_accessible_begin_byte (Lisp_Object object); | |
1665 Bytebpos buffer_or_string_accessible_end_byte (Lisp_Object object); | |
1666 Charbpos buffer_or_string_absolute_begin_char (Lisp_Object object); | |
1667 Charbpos buffer_or_string_absolute_end_char (Lisp_Object object); | |
1668 Bytebpos buffer_or_string_absolute_begin_byte (Lisp_Object object); | |
1669 Bytebpos buffer_or_string_absolute_end_byte (Lisp_Object object); | |
1670 void record_buffer (Lisp_Object buf); | |
1671 Lisp_Object get_buffer (Lisp_Object name, | |
1672 int error_if_deleted_or_does_not_exist); | |
1673 int map_over_sharing_buffers (struct buffer *buf, | |
1674 int (*mapfun) (struct buffer *buf, | |
1675 void *closure), | |
1676 void *closure); | |
1677 | 948 |
1678 | 949 |
1679 /************************************************************************/ | 950 /************************************************************************/ |
1680 /* Case conversion */ | 951 /* Case conversion */ |
1681 /************************************************************************/ | 952 /************************************************************************/ |
1713 TRT_TABLE_OF (Lisp_Object trt, Emchar c) | 984 TRT_TABLE_OF (Lisp_Object trt, Emchar c) |
1714 { | 985 { |
1715 return TRT_TABLE_CHAR_1 (trt, c); | 986 return TRT_TABLE_CHAR_1 (trt, c); |
1716 } | 987 } |
1717 | 988 |
989 INLINE_HEADER Lisp_Object BUFFER_CASE_TABLE (struct buffer *buf); | |
990 INLINE_HEADER Lisp_Object | |
991 BUFFER_CASE_TABLE (struct buffer *buf) | |
992 { | |
993 return buf ? buf->case_table : Vstandard_case_table; | |
994 } | |
995 | |
1718 /* Macros used below. */ | 996 /* Macros used below. */ |
1719 #define DOWNCASE_TABLE_OF(buf, c) \ | 997 #define DOWNCASE_TABLE_OF(buf, c) \ |
1720 TRT_TABLE_OF (XCASE_TABLE_DOWNCASE (buf->case_table), c) | 998 TRT_TABLE_OF (XCASE_TABLE_DOWNCASE (BUFFER_CASE_TABLE (buf)), c) |
1721 #define UPCASE_TABLE_OF(buf, c) \ | 999 #define UPCASE_TABLE_OF(buf, c) \ |
1722 TRT_TABLE_OF (XCASE_TABLE_UPCASE (buf->case_table), c) | 1000 TRT_TABLE_OF (XCASE_TABLE_UPCASE (BUFFER_CASE_TABLE (buf)), c) |
1723 | 1001 |
1724 /* 1 if CH is upper case. */ | 1002 /* 1 if CH is upper case. */ |
1725 | 1003 |
1726 INLINE_HEADER int UPPERCASEP (struct buffer *buf, Emchar ch); | 1004 INLINE_HEADER int UPPERCASEP (struct buffer *buf, Emchar ch); |
1727 INLINE_HEADER int | 1005 INLINE_HEADER int |
1764 | 1042 |
1765 /* Downcase a character, or make no change if that cannot be done. */ | 1043 /* Downcase a character, or make no change if that cannot be done. */ |
1766 | 1044 |
1767 #define DOWNCASE(buf, ch) DOWNCASE_TABLE_OF (buf, ch) | 1045 #define DOWNCASE(buf, ch) DOWNCASE_TABLE_OF (buf, ch) |
1768 | 1046 |
1769 /************************************************************************/ | |
1770 /* Lisp string representation convenience functions */ | |
1771 /************************************************************************/ | |
1772 /* Because the representation of internally formatted data is subject to change, | |
1773 It's bad style to do something like strcmp (XSTRING_DATA (s), "foo") | |
1774 Instead, use the portable: intbyte_strcmp (XSTRING_DATA (s), "foo") | |
1775 or intbyte_memcmp (XSTRING_DATA (s), "foo", 3) */ | |
1776 | |
1777 /* Like strcmp, except first arg points at internally formatted data, | |
1778 while the second points at a string of only ASCII chars. */ | |
1779 INLINE_HEADER int | |
1780 intbyte_strcmp (const Intbyte *bp, const char *ascii_string); | |
1781 INLINE_HEADER int | |
1782 intbyte_strcmp (const Intbyte *bp, const char *ascii_string) | |
1783 { | |
1784 #ifdef MULE | |
1785 while (1) | |
1786 { | |
1787 int diff; | |
1788 type_checking_assert (BYTE_ASCII_P (*ascii_string)); | |
1789 if ((diff = charptr_emchar (bp) - *(Intbyte *) ascii_string) != 0) | |
1790 return diff; | |
1791 if (*ascii_string == '\0') | |
1792 return 0; | |
1793 ascii_string++; | |
1794 INC_CHARPTR (bp); | |
1795 } | |
1796 #else | |
1797 return strcmp ((char *)bp, ascii_string); | |
1798 #endif | |
1799 } | |
1800 | |
1801 | |
1802 /* Like memcmp, except first arg points at internally formatted data, | |
1803 while the second points at a string of only ASCII chars. */ | |
1804 INLINE_HEADER int | |
1805 intbyte_memcmp (const Intbyte *bp, const char *ascii_string, Bytecount len); | |
1806 INLINE_HEADER int | |
1807 intbyte_memcmp (const Intbyte *bp, const char *ascii_string, Bytecount len) | |
1808 { | |
1809 #ifdef MULE | |
1810 while (len--) | |
1811 { | |
1812 int diff = charptr_emchar (bp) - *(Intbyte *) ascii_string; | |
1813 type_checking_assert (BYTE_ASCII_P (*ascii_string)); | |
1814 if (diff != 0) | |
1815 return diff; | |
1816 ascii_string++; | |
1817 INC_CHARPTR (bp); | |
1818 } | |
1819 return 0; | |
1820 #else | |
1821 return memcmp (bp, ascii_string, len); | |
1822 #endif | |
1823 } | |
1824 | |
1825 #endif /* INCLUDED_buffer_h_ */ | 1047 #endif /* INCLUDED_buffer_h_ */ |