comparison src/text.c @ 1292:f3437b56874d

[xemacs-hg @ 2003-02-13 09:57:04 by ben] profile updates profile.c: Major reworking. Keep track of new information -- total function timing (includes descendants), GC usage, total GC usage (includes descendants). New functions to be called appropriately from eval.c, alloc.c to keep track of this information. Keep track of when we're actually in a function vs. in its profile, for more accurate timing counts. Track profile overhead separately. Create new mechanism for specifying "internal sections" that are tracked just like regular Lisp functions and even appear in the backtrace if `backtrace-with-internal-sections' is non-nil (t by default for error-checking builds). Add some KKCC information for the straight (non-Elisp) hash table used by profile, which contains Lisp objects in its keys -- but not used yet. Remove old ad-hoc methods for tracking garbage collection, redisplay (which was incorrect anyway when Lisp was called within these sections). Don't record any tick info when blocking under MS Windows, since the timer there is in real time rather than in process time. Make `start-profiling', `stop-profiling' interactive. Be consistent wrt. recursive functions and functions currently on the stack when starting or stopping -- together these make implementing the `total' values extremely difficult. When we start profiling, we act as if we just entered all the functions currently on the stack. Likewise when exiting. Create vars in_profile for tracking time spent inside of profiling, and profiling_lock for setting exclusive access to the main hash table when reading from it or modifying it. (protects against getting screwed up by the signal handle going off at the same time. profile.h: New file. Create macros for declaring internal profiling sections. lisp.h: Move profile-related stuff to profile.h. alloc.c: Keep track of total consing, for profile. Tell profile when we are consing. Use new profile-section method for noting garbage-collection. alloc.c: Abort if we attempt to call the allocator reentrantly. backtrace.h, eval.c: Add info for use by profile in the backtrace frame and transfer PUSH_BACKTRACE/POP_BACKTRACE from eval.c, for use with profile. elhash.c: Author comment. eval.c, lisp.h: New Lisp var `backtrace-with-internal-sections'. Set to t when error-checking is on. eval.c: When unwinding, eval.c: Report to profile when we are about-to-call and just-called wrt. a function. alloc.c, eval.c: Allow for "fake" backtrace frames, for internal sections (used by profile and `backtrace-with-internal-sections'. event-Xt.c, event-gtk.c, event-msw.c, event-tty.c: Record when we are actually blocking on an event, for profile's sake. event-stream.c: Record internal profiling sections for getting, dispatching events. extents.c: Record internal profiling sections for map_extents. hash.c, hash.h: Add pregrow_hash_table_if_necessary(). (Used in profile code since the signal handler is the main grower but can't allow a realloc(). We make sure, at critical points, that the table is large enough.) lread.c: Create internal profiling sections for `load' (which may be triggered internally by autoload, etc.). redisplay.c: Remove old profile_redisplay_flag. Use new macros to declare internal profiling section for redisplay. text.c: Use new macros to declare internal profiling sections for char-byte conversion and internal-external conversion. SEMI-UNRELATED CHANGES: ----------------------- text.c: Update the long comments.
author ben
date Thu, 13 Feb 2003 09:57:08 +0000
parents e22b0213b713
children 70921960b980
comparison
equal deleted inserted replaced
1291:3d99b5e6c6ec 1292:f3437b56874d
1 /* Buffer manipulation primitives for XEmacs. 1 /* Buffer manipulation primitives for XEmacs.
2 Copyright (C) 1995 Sun Microsystems, Inc. 2 Copyright (C) 1995 Sun Microsystems, Inc.
3 Copyright (C) 1995, 1996, 2000, 2001, 2002 Ben Wing. 3 Copyright (C) 1995, 1996, 2000, 2001, 2002, 2003 Ben Wing.
4 Copyright (C) 1999 Martin Buchholz. 4 Copyright (C) 1999 Martin Buchholz.
5 5
6 This file is part of XEmacs. 6 This file is part of XEmacs.
7 7
8 XEmacs is free software; you can redistribute it and/or modify it 8 XEmacs is free software; you can redistribute it and/or modify it
30 30
31 #include "buffer.h" 31 #include "buffer.h"
32 #include "charset.h" 32 #include "charset.h"
33 #include "file-coding.h" 33 #include "file-coding.h"
34 #include "lstream.h" 34 #include "lstream.h"
35 #include "profile.h"
35 36
36 37
37 /************************************************************************/ 38 /************************************************************************/
38 /* long comments */ 39 /* long comments */
39 /************************************************************************/ 40 /************************************************************************/
40 41
41 /* 42 /*
42 ========================================================================== 43 ==========================================================================
43 1. Character Sets 44 1. Intro to Characters, Character Sets, and Encodings
44 ========================================================================== 45 ==========================================================================
45
46 A character set (or "charset") is an ordered set of characters.
47 46
48 A character (which is, BTW, a surprisingly complex concept) is, in a 47 A character (which is, BTW, a surprisingly complex concept) is, in a
49 written representation of text, the most basic written unit that has a 48 written representation of text, the most basic written unit that has a
50 meaning of its own. It's comparable to a phoneme when analyzing words 49 meaning of its own. It's comparable to a phoneme when analyzing words
51 in spoken speech. Just like with a phoneme (which is an abstract 50 in spoken speech (for example, the sound of `t' in English, which in
52 concept, and is represented in actual spoken speech by one or more 51 fact has different pronunciations in different words -- aspirated in
53 allophones, ...&&#### finish this., a character is actually an abstract 52 `time', unaspirated in `stop', unreleased or even pronounced as a
54 concept 53 glottal stop in `button', etc. -- but logically is a single concept).
54 Like a phoneme, a character is an abstract concept defined by its
55 *meaning*. The character `lowercase f', for example, can always be used
56 to represent the first letter in the word `fill', regardless of whether
57 it's drawn upright or italic, whether the `fi' combination is drawn as a
58 single ligature, whether there are serifs on the bottom of the vertical
59 stroke, etc. (These different appearances of a single character are
60 often called "graphs" or "glyphs".) Our concern when representing text
61 is on representing the abstract characters, and not on their exact
62 appearance.
63
64 A character set (or "charset"), as we define it, is a set of characters,
65 each with an associated number (or set of numbers -- see below), called
66 a "code point". It's important to understand that a character is not
67 defined by any number attached to it, but by its meaning. For example,
68 ASCII and EBCDIC are two charsets containing exactly the same characters
69 (lowercase and uppercase letters, numbers 0 through 9, particular
70 punctuation marks) but with different numberings. The `comma' character
71 in ASCII and EBCDIC, for instance, is the same character despite having
72 a different numbering. Conversely, when comparing ASCII and JIS-Roman,
73 which look the same except that the latter has a yen sign substituted
74 for the backslash, we would say that the backslash and yen sign are
75 *not* the same characters, despite having the same number (95) and
76 despite the fact that all other characters are present in both charsets,
77 with the same numbering. ASCII and JIS-Roman, then, do *not* have
78 exactly the same characters in them (ASCII has a backslash character but
79 no yen-sign character, and vice-versa for JIS-Roman), unlike ASCII and
80 EBCDIC, even though the numberings in ASCII and JIS-Roman are closer.
81
82 It's also important to distinguish between charsets and encodings. For
83 a simple charset like ASCII, there is only one encoding normally used --
84 each character is represented by a single byte, with the same value as
85 its code point. For more complicated charsets, however, things are not
86 so obvious. Unicode version 2, for example, is a large charset with
87 thousands of characters, each indexed by a 16-bit number, often
88 represented in hex, e.g. 0x05D0 for the Hebrew letter "aleph". One
89 obvious encoding uses two bytes per character (actually two encodings,
90 depending on which of the two possible byte orderings is chosen). This
91 encoding is convenient for internal processing of Unicode text; however,
92 it's incompatible with ASCII, so a different encoding, e.g. UTF-8, is
93 usually used for external text, for example files or e-mail. UTF-8
94 represents Unicode characters with one to three bytes (often extended to
95 six bytes to handle characters with up to 31-bit indices). Unicode
96 characters 00 to 7F (identical with ASCII) are directly represented with
97 one byte, and other characters with two or more bytes, each in the range
98 80 to FF.
99
100 In general, a single encoding may be able to represent more than one
101 charset.
102
103 See also man/lispref/mule.texi.
55 104
105 ==========================================================================
106 2. Character Sets
107 ==========================================================================
108
56 A particular character in a charset is indexed using one or 109 A particular character in a charset is indexed using one or
57 more "position codes", which are non-negative integers. 110 more "position codes", which are non-negative integers.
58 The number of position codes needed to identify a particular 111 The number of position codes needed to identify a particular
59 character in a charset is called the "dimension" of the 112 character in a charset is called the "dimension" of the
60 charset. In XEmacs/Mule, all charsets have 1 or 2 dimensions, 113 charset. In XEmacs/Mule, all charsets have 1 or 2 dimensions,
129 160 - 255 Latin-1 32 - 127 182 160 - 255 Latin-1 32 - 127
130 183
131 This is a bit ad-hoc but gets the job done. 184 This is a bit ad-hoc but gets the job done.
132 185
133 ========================================================================== 186 ==========================================================================
134 2. Encodings 187 3. Encodings
135 ========================================================================== 188 ==========================================================================
136 189
137 An "encoding" is a way of numerically representing 190 An "encoding" is a way of numerically representing
138 characters from one or more character sets. If an encoding 191 characters from one or more character sets. If an encoding
139 only encompasses one character set, then the position codes 192 only encompasses one character set, then the position codes
210 0x1B 0x24 0x42 ESC $ B invoke Japanese-JISX0208 263 0x1B 0x24 0x42 ESC $ B invoke Japanese-JISX0208
211 264
212 Initially, Printing-ASCII is invoked. 265 Initially, Printing-ASCII is invoked.
213 266
214 ========================================================================== 267 ==========================================================================
215 3. Internal Mule Encodings 268 4. Internal Mule Encodings
216 ========================================================================== 269 ==========================================================================
217 270
218 In XEmacs/Mule, each character set is assigned a unique number, 271 In XEmacs/Mule, each character set is assigned a unique number,
219 called a "leading byte". This is used in the encodings of a 272 called a "leading byte". This is used in the encodings of a
220 character. Leading bytes are in the range 0x80 - 0xFF 273 character. Leading bytes are in the range 0x80 - 0xFF
334 byte in the textual representation of the character. (This is important 387 byte in the textual representation of the character. (This is important
335 because the Boyer-Moore algorithm works by looking at the last byte 388 because the Boyer-Moore algorithm works by looking at the last byte
336 of the search string and &&#### finish this. 389 of the search string and &&#### finish this.
337 390
338 ========================================================================== 391 ==========================================================================
339 4. Buffer Positions and Other Typedefs 392 5. Buffer Positions and Other Typedefs
340 ========================================================================== 393 ==========================================================================
341 394
342 A. Buffer Positions 395 A. Buffer Positions
343 396
344 There are three possible ways to specify positions in a buffer. All 397 There are three possible ways to specify positions in a buffer. All
381 except in the case of characters at the gap position. 434 except in the case of characters at the gap position.
382 435
383 B. Other Typedefs 436 B. Other Typedefs
384 437
385 Ichar: 438 Ichar:
386 ------- 439 ------
387 This typedef represents a single Emacs character, which can be 440 This typedef represents a single Emacs character, which can be
388 ASCII, ISO-8859, or some extended character, as would typically 441 ASCII, ISO-8859, or some extended character, as would typically
389 be used for Kanji. Note that the representation of a character 442 be used for Kanji. Note that the representation of a character
390 as an Ichar is *not* the same as the representation of that 443 as an Ichar is *not* the same as the representation of that
391 same character in a string; thus, you cannot do the standard 444 same character in a string; thus, you cannot do the standard
403 456
404 This means that Ichar values are upwardly compatible with 457 This means that Ichar values are upwardly compatible with
405 the standard 8-bit representation of ASCII/ISO-8859-1. 458 the standard 8-bit representation of ASCII/ISO-8859-1.
406 459
407 Ibyte: 460 Ibyte:
408 -------- 461 ------
409 The data in a buffer or string is logically made up of Ibyte 462 The data in a buffer or string is logically made up of Ibyte
410 objects, where a Ibyte takes up the same amount of space as a 463 objects, where a Ibyte takes up the same amount of space as a
411 char. (It is declared differently, though, to catch invalid 464 char. (It is declared differently, though, to catch invalid
412 usages.) Strings stored using Ibytes are said to be in 465 usages.) Strings stored using Ibytes are said to be in
413 "internal format". The important characteristics of internal 466 "internal format". The important characteristics of internal
426 character in constant time. 479 character in constant time.
427 -- When searching for a substring or an ASCII character 480 -- When searching for a substring or an ASCII character
428 within the string, you need merely use standard 481 within the string, you need merely use standard
429 searching routines. 482 searching routines.
430 483
431 array of char: 484 Extbyte:
432 -------------- 485 --------
433 Strings that go in or out of Emacs are in "external format", 486 Strings that go in or out of Emacs are in "external format",
434 typedef'ed as an array of char or a char *. There is more 487 typedef'ed as an array of char or a char *. There is more
435 than one external format (JIS, EUC, etc.) but they all 488 than one external format (JIS, EUC, etc.) but they all
436 have similar properties. They are modal encodings, 489 have similar properties. They are modal encodings,
437 which is to say that the meaning of particular bytes is 490 which is to say that the meaning of particular bytes is
513 MAX_BYTEBPOS_GAP_SIZE_3 is a multiple of 3. (As it happens, 566 MAX_BYTEBPOS_GAP_SIZE_3 is a multiple of 3. (As it happens,
514 65535 is a multiple of 3, but this may not always be the 567 65535 is a multiple of 3, but this may not always be the
515 case. #### unfinished 568 case. #### unfinished
516 569
517 ========================================================================== 570 ==========================================================================
518 5. Miscellaneous 571 6. Miscellaneous
519 ========================================================================== 572 ==========================================================================
520 573
521 A. Unicode Support 574 A. Unicode Support
522 575
523 Adding Unicode support is very desirable. Unicode will likely be a 576 Unicode support is very desirable. Currrently we know how to handle
524 very common representation in the future, and thus we should 577 externally-encoded Unicode data in various encodings -- UTF-16, UTF-8,
525 represent Unicode characters using three bytes instead of four. 578 etc. However, we really need to represent Unicode characters internally
526 This means we need to find leading bytes for Unicode. Given that 579 as-is, rather than converting to some language-specific character set.
527 there are 65,536 characters in Unicode and we can attach 96x96 = 580 For efficiency, we should represent Unicode characters using 3 bytes
528 9,216 characters per leading byte, we need eight leading bytes for 581 rather than 4. This means we need to find leading bytes for Unicode.
529 Unicode. We currently have four free (0x9A - 0x9D), and with a 582 Given that there are 65,536 characters in Unicode and we can attach
530 little bit of rearranging we can get five: ASCII doesn't really 583 96x96 = 9,216 characters per leading byte, we need eight leading bytes
531 need to take up a leading byte. (We could just as well use 0x7F, 584 for Unicode. We currently have four free (0x9A - 0x9D), and with a
532 with a little change to the functions that assume that 0x80 is the 585 little bit of rearranging we can get five: ASCII doesn't really need to
533 lowest leading byte.) This means we still need to dump three 586 take up a leading byte. (We could just as well use 0x7F, with a little
534 leading bytes and move them into private space. The CNS charsets 587 change to the functions that assume that 0x80 is the lowest leading
535 are good candidates since they are rarely used, and 588 byte.) This means we still need to dump three leading bytes and move
536 JAPANESE_JISX0208_1978 is becoming less and less used and could 589 them into private space. The CNS charsets are good candidates since
537 also be dumped. 590 they are rarely used, and JAPANESE_JISX0208_1978 is becoming less and
591 less used and could also be dumped.
538 592
539 B. Composite Characters 593 B. Composite Characters
540 594
541 Composite characters are characters constructed by overstriking two 595 Composite characters are characters constructed by overstriking two
542 or more regular characters. 596 or more regular characters.
621 static int composite_char_col_next; 675 static int composite_char_col_next;
622 676
623 #endif /* ENABLE_COMPOSITE_CHARS */ 677 #endif /* ENABLE_COMPOSITE_CHARS */
624 678
625 #endif /* MULE */ 679 #endif /* MULE */
680
681 Lisp_Object QSin_char_byte_conversion;
682 Lisp_Object QSin_internal_external_conversion;
626 683
627 684
628 /************************************************************************/ 685 /************************************************************************/
629 /* qxestr***() functions */ 686 /* qxestr***() functions */
630 /************************************************************************/ 687 /************************************************************************/
1597 int size; 1654 int size;
1598 int forward_p; 1655 int forward_p;
1599 Bytebpos retval; 1656 Bytebpos retval;
1600 int diff_so_far; 1657 int diff_so_far;
1601 int add_to_cache = 0; 1658 int add_to_cache = 0;
1659 PROFILE_DECLARE ();
1602 1660
1603 /* Check for some cached positions, for speed. */ 1661 /* Check for some cached positions, for speed. */
1604 if (x == BUF_PT (buf)) 1662 if (x == BUF_PT (buf))
1605 return BYTE_BUF_PT (buf); 1663 return BYTE_BUF_PT (buf);
1606 if (x == BUF_ZV (buf)) 1664 if (x == BUF_ZV (buf))
1607 return BYTE_BUF_ZV (buf); 1665 return BYTE_BUF_ZV (buf);
1608 if (x == BUF_BEGV (buf)) 1666 if (x == BUF_BEGV (buf))
1609 return BYTE_BUF_BEGV (buf); 1667 return BYTE_BUF_BEGV (buf);
1668
1669 PROFILE_RECORD_ENTERING_SECTION (QSin_char_byte_conversion);
1610 1670
1611 bufmin = buf->text->mule_bufmin; 1671 bufmin = buf->text->mule_bufmin;
1612 bufmax = buf->text->mule_bufmax; 1672 bufmax = buf->text->mule_bufmax;
1613 bytmin = buf->text->mule_bytmin; 1673 bytmin = buf->text->mule_bytmin;
1614 bytmax = buf->text->mule_bytmax; 1674 bytmax = buf->text->mule_bytmax;
1856 replace_loc = not_very_random_number & 15; 1916 replace_loc = not_very_random_number & 15;
1857 buf->text->mule_charbpos_cache[replace_loc] = x; 1917 buf->text->mule_charbpos_cache[replace_loc] = x;
1858 buf->text->mule_bytebpos_cache[replace_loc] = retval; 1918 buf->text->mule_bytebpos_cache[replace_loc] = retval;
1859 } 1919 }
1860 1920
1921 PROFILE_RECORD_EXITING_SECTION (QSin_char_byte_conversion);
1922
1861 return retval; 1923 return retval;
1862 } 1924 }
1863 1925
1864 /* The logic in this function is almost identical to the logic in 1926 /* The logic in this function is almost identical to the logic in
1865 the previous function. */ 1927 the previous function. */
1874 int size; 1936 int size;
1875 int forward_p; 1937 int forward_p;
1876 Charbpos retval; 1938 Charbpos retval;
1877 int diff_so_far; 1939 int diff_so_far;
1878 int add_to_cache = 0; 1940 int add_to_cache = 0;
1941 PROFILE_DECLARE ();
1879 1942
1880 /* Check for some cached positions, for speed. */ 1943 /* Check for some cached positions, for speed. */
1881 if (x == BYTE_BUF_PT (buf)) 1944 if (x == BYTE_BUF_PT (buf))
1882 return BUF_PT (buf); 1945 return BUF_PT (buf);
1883 if (x == BYTE_BUF_ZV (buf)) 1946 if (x == BYTE_BUF_ZV (buf))
1884 return BUF_ZV (buf); 1947 return BUF_ZV (buf);
1885 if (x == BYTE_BUF_BEGV (buf)) 1948 if (x == BYTE_BUF_BEGV (buf))
1886 return BUF_BEGV (buf); 1949 return BUF_BEGV (buf);
1950
1951 PROFILE_RECORD_ENTERING_SECTION (QSin_char_byte_conversion);
1887 1952
1888 bufmin = buf->text->mule_bufmin; 1953 bufmin = buf->text->mule_bufmin;
1889 bufmax = buf->text->mule_bufmax; 1954 bufmax = buf->text->mule_bufmax;
1890 bytmin = buf->text->mule_bytmin; 1955 bytmin = buf->text->mule_bytmin;
1891 bytmax = buf->text->mule_bytmax; 1956 bytmax = buf->text->mule_bytmax;
2132 not_very_random_number += 621; 2197 not_very_random_number += 621;
2133 replace_loc = not_very_random_number & 15; 2198 replace_loc = not_very_random_number & 15;
2134 buf->text->mule_charbpos_cache[replace_loc] = retval; 2199 buf->text->mule_charbpos_cache[replace_loc] = retval;
2135 buf->text->mule_bytebpos_cache[replace_loc] = x; 2200 buf->text->mule_bytebpos_cache[replace_loc] = x;
2136 } 2201 }
2202
2203 PROFILE_RECORD_EXITING_SECTION (QSin_char_byte_conversion);
2137 2204
2138 return retval; 2205 return retval;
2139 } 2206 }
2140 2207
2141 /* Text of length BYTELENGTH and CHARLENGTH (in different units) 2208 /* Text of length BYTELENGTH and CHARLENGTH (in different units)
2757 dfc_conversion_data *sink) 2824 dfc_conversion_data *sink)
2758 { 2825 {
2759 /* It's guaranteed that many callers are not prepared for GC here, 2826 /* It's guaranteed that many callers are not prepared for GC here,
2760 esp. given that this code conversion occurs in many very hidden 2827 esp. given that this code conversion occurs in many very hidden
2761 places. */ 2828 places. */
2762 int count = begin_gc_forbidden (); 2829 int count;
2763 Extbyte_dynarr *conversion_out_dynarr; 2830 Extbyte_dynarr *conversion_out_dynarr;
2831 PROFILE_DECLARE ();
2832
2833 PROFILE_RECORD_ENTERING_SECTION (QSin_internal_external_conversion);
2834
2835 count = begin_gc_forbidden ();
2764 2836
2765 type_checking_assert 2837 type_checking_assert
2766 (((source_type == DFC_TYPE_DATA) || 2838 (((source_type == DFC_TYPE_DATA) ||
2767 (source_type == DFC_TYPE_LISP_LSTREAM && LSTREAMP (source->lisp_object)) || 2839 (source_type == DFC_TYPE_LISP_LSTREAM && LSTREAMP (source->lisp_object)) ||
2768 (source_type == DFC_TYPE_LISP_STRING && STRINGP (source->lisp_object))) 2840 (source_type == DFC_TYPE_LISP_STRING && STRINGP (source->lisp_object)))
2943 /* double zero-extend because we may be dealing with Unicode data */ 3015 /* double zero-extend because we may be dealing with Unicode data */
2944 Dynarr_add (conversion_out_dynarr, '\0'); 3016 Dynarr_add (conversion_out_dynarr, '\0');
2945 Dynarr_add (conversion_out_dynarr, '\0'); 3017 Dynarr_add (conversion_out_dynarr, '\0');
2946 sink->data.ptr = Dynarr_atp (conversion_out_dynarr, 0); 3018 sink->data.ptr = Dynarr_atp (conversion_out_dynarr, 0);
2947 } 3019 }
3020
3021 PROFILE_RECORD_EXITING_SECTION (QSin_internal_external_conversion);
2948 } 3022 }
2949 3023
2950 void 3024 void
2951 dfc_convert_to_internal_format (dfc_conversion_type source_type, 3025 dfc_convert_to_internal_format (dfc_conversion_type source_type,
2952 dfc_conversion_data *source, 3026 dfc_conversion_data *source,
2955 dfc_conversion_data *sink) 3029 dfc_conversion_data *sink)
2956 { 3030 {
2957 /* It's guaranteed that many callers are not prepared for GC here, 3031 /* It's guaranteed that many callers are not prepared for GC here,
2958 esp. given that this code conversion occurs in many very hidden 3032 esp. given that this code conversion occurs in many very hidden
2959 places. */ 3033 places. */
2960 int count = begin_gc_forbidden (); 3034 int count;
2961 Ibyte_dynarr *conversion_in_dynarr; 3035 Ibyte_dynarr *conversion_in_dynarr;
3036 PROFILE_DECLARE ();
3037
3038 PROFILE_RECORD_ENTERING_SECTION (QSin_internal_external_conversion);
3039
3040 count = begin_gc_forbidden ();
2962 3041
2963 type_checking_assert 3042 type_checking_assert
2964 ((source_type == DFC_TYPE_DATA || 3043 ((source_type == DFC_TYPE_DATA ||
2965 source_type == DFC_TYPE_LISP_LSTREAM) 3044 source_type == DFC_TYPE_LISP_LSTREAM)
2966 && 3045 &&
3008 #else 3087 #else
3009 Dynarr_add_many (conversion_in_dynarr, source->data.ptr, source->data.len); 3088 Dynarr_add_many (conversion_in_dynarr, source->data.ptr, source->data.len);
3010 #endif 3089 #endif
3011 } 3090 }
3012 #ifdef HAVE_WIN32_CODING_SYSTEMS 3091 #ifdef HAVE_WIN32_CODING_SYSTEMS
3013 /* Optimize the common case involving Unicode where only ASCII/Latin-1 is involved */ 3092 /* Optimize the common case involving Unicode where only ASCII/Latin-1 is
3093 involved */
3014 else if (source_type != DFC_TYPE_LISP_LSTREAM && 3094 else if (source_type != DFC_TYPE_LISP_LSTREAM &&
3015 sink_type != DFC_TYPE_LISP_LSTREAM && 3095 sink_type != DFC_TYPE_LISP_LSTREAM &&
3016 dfc_coding_system_is_unicode (coding_system)) 3096 dfc_coding_system_is_unicode (coding_system))
3017 { 3097 {
3018 const Ibyte *ptr = (const Ibyte *) source->data.ptr + 1; 3098 const Ibyte *ptr = (const Ibyte *) source->data.ptr + 1;
3133 cases. So we add a second zero, just like for external data 3213 cases. So we add a second zero, just like for external data
3134 (in that case, because we may be converting to Unicode). */ 3214 (in that case, because we may be converting to Unicode). */
3135 Dynarr_add (conversion_in_dynarr, '\0'); 3215 Dynarr_add (conversion_in_dynarr, '\0');
3136 sink->data.ptr = Dynarr_atp (conversion_in_dynarr, 0); 3216 sink->data.ptr = Dynarr_atp (conversion_in_dynarr, 0);
3137 } 3217 }
3218
3219 PROFILE_RECORD_EXITING_SECTION (QSin_internal_external_conversion);
3138 } 3220 }
3139 3221
3140 3222
3141 /************************************************************************/ 3223 /************************************************************************/
3142 /* Basic Ichar functions */ 3224 /* Basic Ichar functions */
3666 void 3748 void
3667 vars_of_text (void) 3749 vars_of_text (void)
3668 { 3750 {
3669 reinit_vars_of_text (); 3751 reinit_vars_of_text ();
3670 3752
3753 QSin_char_byte_conversion = build_msg_string ("(in char-byte conversion)");
3754 staticpro (&QSin_char_byte_conversion);
3755 QSin_internal_external_conversion =
3756 build_msg_string ("(in internal-external conversion)");
3757 staticpro (&QSin_internal_external_conversion);
3758
3671 #ifdef ENABLE_COMPOSITE_CHARS 3759 #ifdef ENABLE_COMPOSITE_CHARS
3672 /* #### not dumped properly */ 3760 /* #### not dumped properly */
3673 composite_char_row_next = 32; 3761 composite_char_row_next = 32;
3674 composite_char_col_next = 32; 3762 composite_char_col_next = 32;
3675 3763