Mercurial > hg > xemacs-beta
annotate src/text.h @ 5802:236e4afc565d
Autoload within #'keymapp, as documented.
src/ChangeLog addition:
2014-07-02 Aidan Kehoe <kehoea@parhasard.net>
* keymap.c (Fkeymapp):
Autoload within this, as documented. Our callers are not prepared
to do the intelligent thing if a symbol that is fboundp to an
autoloaded keymap, is not itself #'keymapp.
lisp/ChangeLog addition:
2014-07-02 Aidan Kehoe <kehoea@parhasard.net>
* byte-optimize.el (side-effect-free-fns):
#'keymapp is not side-effect-free, it can autoload.
author | Aidan Kehoe <kehoea@parhasard.net> |
---|---|
date | Wed, 02 Jul 2014 17:45:49 +0100 |
parents | 6355de501637 |
children | b3824b7f5627 |
rev | line source |
---|---|
771 | 1 /* Header file for text manipulation primitives and macros. |
2 Copyright (C) 1985-1995 Free Software Foundation, Inc. | |
3 Copyright (C) 1995 Sun Microsystems, Inc. | |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
4 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2010 Ben Wing. |
771 | 5 |
6 This file is part of XEmacs. | |
7 | |
5402
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
5254
diff
changeset
|
8 XEmacs is free software: you can redistribute it and/or modify it |
771 | 9 under the terms of the GNU General Public License as published by the |
5402
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
5254
diff
changeset
|
10 Free Software Foundation, either version 3 of the License, or (at your |
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
5254
diff
changeset
|
11 option) any later version. |
771 | 12 |
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT | |
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 for more details. | |
17 | |
18 You should have received a copy of the GNU General Public License | |
5402
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
5254
diff
changeset
|
19 along with XEmacs. If not, see <http://www.gnu.org/licenses/>. */ |
771 | 20 |
21 /* Synched up with: FSF 19.30. */ | |
22 | |
23 /* Authorship: | |
24 | |
25 Mostly written by Ben Wing, starting around 1995. | |
26 Current TO_IN/EXTERNAL_FORMAT macros written by Martin Buchholz, | |
27 designed by Ben Wing based on earlier macros by Ben Wing. | |
28 Separated out June 18, 2000 from buffer.h into text.h. | |
29 */ | |
30 | |
31 #ifndef INCLUDED_text_h_ | |
32 #define INCLUDED_text_h_ | |
33 | |
912 | 34 #ifdef HAVE_WCHAR_H |
771 | 35 #include <wchar.h> |
912 | 36 #else |
1257 | 37 size_t wcslen (const wchar_t *); |
912 | 38 #endif |
1204 | 39 #ifndef HAVE_STRLWR |
1257 | 40 char *strlwr (char *); |
1204 | 41 #endif |
42 #ifndef HAVE_STRUPR | |
1257 | 43 char *strupr (char *); |
1204 | 44 #endif |
771 | 45 |
1743 | 46 BEGIN_C_DECLS |
1650 | 47 |
5200
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
48 /* Forward compatibility from ben-unicode-internal: Following used for |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
49 functions that do character conversion and need to handle errors. */ |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
50 |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
51 enum converr |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
52 { |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
53 /* ---- Basic actions ---- */ |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
54 |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
55 /* Do nothing upon failure and return a failure indication. |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
56 Same as what happens when the *_raw() version is called. */ |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
57 CONVERR_FAIL, |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
58 /* abort() on failure, i.e. crash. */ |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
59 CONVERR_ABORT, |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
60 /* Signal a Lisp error. */ |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
61 CONVERR_ERROR, |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
62 /* Try to "recover" and continue processing. Currently this is always |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
63 the same as CONVERR_SUBSTITUTE, where one of the substitution |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
64 characters defined below (CANT_CONVERT_*) is used. */ |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
65 CONVERR_SUCCEED, |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
66 |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
67 /* ---- More specific actions ---- */ |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
68 |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
69 /* Substitute something (0xFFFD, the Unicode replacement character, |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
70 when converting to Unicode or to a Unicode-internal Ichar, JISX0208 |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
71 GETA mark when converting to non-Mule Ichar). */ |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
72 CONVERR_SUBSTITUTE, |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
73 /* Use private Unicode space when converting to Unicode. */ |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
74 CONVERR_USE_PRIVATE |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
75 }; |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
76 |
5092
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
77 /************************************************************************/ |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
78 /* A short intro to the format of text and of characters */ |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
79 /************************************************************************/ |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
80 |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
81 /* |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
82 "internally formatted text" and the term "internal format" in |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
83 general are likely to refer to the format of text in buffers and |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
84 strings; "externally formatted text" and the term "external format" |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
85 refer to any text format used in the O.S. or elsewhere outside of |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
86 XEmacs. The format of text and of a character are related and |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
87 there must be a one-to-one relationship (hopefully through a |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
88 relatively simple algorithmic means of conversion) between a string |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
89 of text and an equivalent array of characters, but the conversion |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
90 between the two is NOT necessarily trivial. |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
91 |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
92 In a non-Mule XEmacs, allowed characters are numbered 0 through |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
93 255, where no fixed meaning is assigned to them, but (when |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
94 representing text, rather than bytes in a binary file) in practice |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
95 the lower half represents ASCII and the upper half some other 8-bit |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
96 character set (chosen by setting the font, case tables, syntax |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
97 tables, etc. appropriately for the character set through ad-hoc |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
98 means such as the `iso-8859-1' file and the |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
99 `standard-display-european' function). |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
100 |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
101 For more info, see `text.c' and the Internals Manual. |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
102 */ |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
103 |
771 | 104 /* ---------------------------------------------------------------------- */ |
105 /* Super-basic character properties */ | |
106 /* ---------------------------------------------------------------------- */ | |
107 | |
108 /* These properties define the specifics of how our current encoding fits | |
109 in the basic model used for the encoding. Because this model is the same | |
110 as is used for UTF-8, all these properties could be defined for it, too. | |
111 This would instantly make the rest of this file work with UTF-8 (with | |
112 the exception of a few called functions that would need to be redefined). | |
113 | |
114 (UTF-2000 implementers, take note!) | |
115 */ | |
116 | |
117 /* If you want more than this, you need to include charset.h */ | |
118 | |
119 #ifndef MULE | |
120 | |
826 | 121 #define rep_bytes_by_first_byte(fb) 1 |
122 #define byte_ascii_p(byte) 1 | |
867 | 123 #define MAX_ICHAR_LEN 1 |
771 | 124 |
125 #else /* MULE */ | |
126 | |
127 /* These are carefully designed to work if BYTE is signed or unsigned. */ | |
128 /* Note that SPC and DEL are considered ASCII, not control. */ | |
129 | |
826 | 130 #define byte_ascii_p(byte) (((byte) & ~0x7f) == 0) |
131 #define byte_c0_p(byte) (((byte) & ~0x1f) == 0) | |
132 #define byte_c1_p(byte) (((byte) & ~0x1f) == 0x80) | |
771 | 133 |
134 /* Does BYTE represent the first byte of a character? */ | |
135 | |
826 | 136 #ifdef ERROR_CHECK_TEXT |
137 | |
138 DECLARE_INLINE_HEADER ( | |
139 int | |
867 | 140 ibyte_first_byte_p_1 (int byte, const char *file, int line) |
826 | 141 ) |
142 { | |
143 assert_at_line (byte >= 0 && byte < 256, file, line); | |
144 return byte < 0xA0; | |
145 } | |
146 | |
867 | 147 #define ibyte_first_byte_p(byte) \ |
148 ibyte_first_byte_p_1 (byte, __FILE__, __LINE__) | |
826 | 149 |
150 #else | |
151 | |
867 | 152 #define ibyte_first_byte_p(byte) ((byte) < 0xA0) |
826 | 153 |
154 #endif | |
155 | |
156 #ifdef ERROR_CHECK_TEXT | |
771 | 157 |
158 /* Does BYTE represent the first byte of a multi-byte character? */ | |
159 | |
826 | 160 DECLARE_INLINE_HEADER ( |
161 int | |
867 | 162 ibyte_leading_byte_p_1 (int byte, const char *file, int line) |
826 | 163 ) |
164 { | |
165 assert_at_line (byte >= 0 && byte < 256, file, line); | |
166 return byte_c1_p (byte); | |
167 } | |
168 | |
867 | 169 #define ibyte_leading_byte_p(byte) \ |
170 ibyte_leading_byte_p_1 (byte, __FILE__, __LINE__) | |
826 | 171 |
172 #else | |
173 | |
867 | 174 #define ibyte_leading_byte_p(byte) byte_c1_p (byte) |
826 | 175 |
176 #endif | |
771 | 177 |
178 /* Table of number of bytes in the string representation of a character | |
179 indexed by the first byte of that representation. | |
180 | |
181 This value can be derived in other ways -- e.g. something like | |
826 | 182 XCHARSET_REP_BYTES (charset_by_leading_byte (first_byte)) |
771 | 183 but it's faster this way. */ |
1632 | 184 extern MODULE_API const Bytecount rep_bytes_by_first_byte[0xA0]; |
771 | 185 |
186 /* Number of bytes in the string representation of a character. */ | |
788 | 187 |
800 | 188 #ifdef ERROR_CHECK_TEXT |
788 | 189 |
826 | 190 DECLARE_INLINE_HEADER ( |
191 Bytecount | |
192 rep_bytes_by_first_byte_1 (int fb, const char *file, int line) | |
193 ) | |
771 | 194 { |
826 | 195 assert_at_line (fb >= 0 && fb < 0xA0, file, line); |
771 | 196 return rep_bytes_by_first_byte[fb]; |
197 } | |
198 | |
826 | 199 #define rep_bytes_by_first_byte(fb) \ |
200 rep_bytes_by_first_byte_1 (fb, __FILE__, __LINE__) | |
788 | 201 |
800 | 202 #else /* ERROR_CHECK_TEXT */ |
788 | 203 |
826 | 204 #define rep_bytes_by_first_byte(fb) (rep_bytes_by_first_byte[fb]) |
788 | 205 |
800 | 206 #endif /* ERROR_CHECK_TEXT */ |
788 | 207 |
826 | 208 /* Is this character represented by more than one byte in a string in the |
209 default format? */ | |
210 | |
867 | 211 #define ichar_multibyte_p(c) ((c) >= 0x80) |
212 | |
213 #define ichar_ascii_p(c) (!ichar_multibyte_p (c)) | |
826 | 214 |
215 /* Maximum number of bytes per Emacs character when represented as text, in | |
216 any format. | |
217 */ | |
771 | 218 |
867 | 219 #define MAX_ICHAR_LEN 4 |
771 | 220 |
826 | 221 #endif /* not MULE */ |
222 | |
5092
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
223 #ifdef MULE |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
224 |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
225 MODULE_API int non_ascii_valid_ichar_p (Ichar ch); |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
226 |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
227 /* Return whether the given Ichar is valid. |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
228 */ |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
229 |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
230 DECLARE_INLINE_HEADER ( |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
231 int |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
232 valid_ichar_p (Ichar ch) |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
233 ) |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
234 { |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
235 return (! (ch & ~0xFF)) || non_ascii_valid_ichar_p (ch); |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
236 } |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
237 |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
238 #else /* not MULE */ |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
239 |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
240 /* This works when CH is negative, and correctly returns non-zero only when CH |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
241 is in the range [0, 255], inclusive. */ |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
242 #define valid_ichar_p(ch) (! (ch & ~0xFF)) |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
243 |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
244 #endif /* not MULE */ |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
245 |
2367 | 246 /* For more discussion, see text.c, "handling non-default formats" */ |
247 | |
826 | 248 typedef enum internal_format |
249 { | |
250 FORMAT_DEFAULT, | |
251 FORMAT_8_BIT_FIXED, | |
252 FORMAT_16_BIT_FIXED, /* not implemented */ | |
253 FORMAT_32_BIT_FIXED /* not implemented */ | |
254 } Internal_Format; | |
255 | |
256 #ifdef MULE | |
257 /* "OBJECT" below will usually be a buffer, string, or nil. This needs to | |
258 be passed in because the interpretation of 8-bit-fixed and 16-bit-fixed | |
259 values may depend on the buffer, e.g. depending on what language the | |
260 text in the buffer is in. */ | |
261 | |
867 | 262 /* True if Ichar CH can be represented in 8-bit-fixed format. */ |
263 #define ichar_8_bit_fixed_p(ch, object) (((ch) & ~0xff) == 0) | |
264 /* Convert Ichar CH to an 8-bit int, as will be stored in the buffer. */ | |
265 #define ichar_to_raw_8_bit_fixed(ch, object) ((Ibyte) (ch)) | |
826 | 266 /* Convert the other way. */ |
867 | 267 #define raw_8_bit_fixed_to_ichar(ch, object) ((Ichar) (ch)) |
268 | |
269 #define ichar_16_bit_fixed_p(ch, object) (((ch) & ~0xffff) == 0) | |
270 /* Convert Ichar CH to a 16-bit int, as will be stored in the buffer. */ | |
271 #define ichar_to_raw_16_bit_fixed(ch, object) ((UINT_16_BIT) (ch)) | |
826 | 272 /* Convert the other way. */ |
867 | 273 #define raw_16_bit_fixed_to_ichar(ch, object) ((Ichar) (ch)) |
274 | |
275 /* Convert Ichar CH to a 32-bit int, as will be stored in the buffer. */ | |
276 #define ichar_to_raw_32_bit_fixed(ch, object) ((UINT_32_BIT) (ch)) | |
826 | 277 /* Convert the other way. */ |
867 | 278 #define raw_32_bit_fixed_to_ichar(ch, object) ((Ichar) (ch)) |
826 | 279 |
280 /* Return the "raw value" of a character as stored in the buffer. In the | |
281 default format, this is just the same as the character. In fixed-width | |
282 formats, this is the actual value in the buffer, which will be limited | |
283 to the range as established by the format. This is used when searching | |
284 for a character in a buffer -- it's faster to convert the character to | |
285 the raw value and look for that, than repeatedly convert each raw value | |
286 in the buffer into a character. */ | |
287 | |
288 DECLARE_INLINE_HEADER ( | |
867 | 289 Raw_Ichar |
2286 | 290 ichar_to_raw (Ichar ch, Internal_Format fmt, |
291 Lisp_Object UNUSED (object)) | |
826 | 292 ) |
293 { | |
294 switch (fmt) | |
295 { | |
296 case FORMAT_DEFAULT: | |
867 | 297 return (Raw_Ichar) ch; |
826 | 298 case FORMAT_16_BIT_FIXED: |
867 | 299 text_checking_assert (ichar_16_bit_fixed_p (ch, object)); |
300 return (Raw_Ichar) ichar_to_raw_16_bit_fixed (ch, object); | |
826 | 301 case FORMAT_32_BIT_FIXED: |
867 | 302 return (Raw_Ichar) ichar_to_raw_32_bit_fixed (ch, object); |
826 | 303 default: |
304 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); | |
867 | 305 text_checking_assert (ichar_8_bit_fixed_p (ch, object)); |
306 return (Raw_Ichar) ichar_to_raw_8_bit_fixed (ch, object); | |
826 | 307 } |
308 } | |
309 | |
310 /* Return whether CH is representable in the given format in the given | |
311 object. */ | |
312 | |
313 DECLARE_INLINE_HEADER ( | |
314 int | |
2286 | 315 ichar_fits_in_format (Ichar ch, Internal_Format fmt, |
316 Lisp_Object UNUSED (object)) | |
826 | 317 ) |
318 { | |
319 switch (fmt) | |
320 { | |
321 case FORMAT_DEFAULT: | |
322 return 1; | |
323 case FORMAT_16_BIT_FIXED: | |
867 | 324 return ichar_16_bit_fixed_p (ch, object); |
826 | 325 case FORMAT_32_BIT_FIXED: |
326 return 1; | |
327 default: | |
328 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); | |
867 | 329 return ichar_8_bit_fixed_p (ch, object); |
826 | 330 } |
331 } | |
332 | |
333 /* Assuming the formats are the same, return whether the two objects | |
334 represent text in exactly the same way. */ | |
335 | |
336 DECLARE_INLINE_HEADER ( | |
337 int | |
2286 | 338 objects_have_same_internal_representation (Lisp_Object UNUSED (srcobj), |
339 Lisp_Object UNUSED (dstobj)) | |
826 | 340 ) |
341 { | |
342 /* &&#### implement this properly when we allow per-object format | |
343 differences */ | |
344 return 1; | |
345 } | |
346 | |
347 #else | |
348 | |
867 | 349 #define ichar_to_raw(ch, fmt, object) ((Raw_Ichar) (ch)) |
350 #define ichar_fits_in_format(ch, fmt, object) 1 | |
826 | 351 #define objects_have_same_internal_representation(srcobj, dstobj) 1 |
352 | |
771 | 353 #endif /* MULE */ |
354 | |
1632 | 355 MODULE_API int dfc_coding_system_is_unicode (Lisp_Object codesys); |
771 | 356 |
357 DECLARE_INLINE_HEADER ( | |
358 Bytecount dfc_external_data_len (const void *ptr, Lisp_Object codesys) | |
359 ) | |
360 { | |
361 if (dfc_coding_system_is_unicode (codesys)) | |
362 return sizeof (wchar_t) * wcslen ((wchar_t *) ptr); | |
363 else | |
364 return strlen ((char *) ptr); | |
365 } | |
366 | |
367 | |
368 /************************************************************************/ | |
369 /* */ | |
370 /* working with raw internal-format data */ | |
371 /* */ | |
372 /************************************************************************/ | |
373 | |
826 | 374 /* |
375 Use the following functions/macros on contiguous text in any of the | |
376 internal formats. Those that take a format arg work on all internal | |
377 formats; the others work only on the default (variable-width under Mule) | |
378 format. If the text you're operating on is known to come from a buffer, | |
379 use the buffer-level functions in buffer.h, which automatically know the | |
380 correct format and handle the gap. | |
381 | |
382 Some terminology: | |
383 | |
867 | 384 "itext" appearing in the macros means "internal-format text" -- type |
385 `Ibyte *'. Operations on such pointers themselves, rather than on the | |
386 text being pointed to, have "itext" instead of "itext" in the macro | |
387 name. "ichar" in the macro names means an Ichar -- the representation | |
826 | 388 of a character as a single integer rather than a series of bytes, as part |
867 | 389 of "itext". Many of the macros below are for converting between the |
826 | 390 two representations of characters. |
391 | |
867 | 392 Note also that we try to consistently distinguish between an "Ichar" and |
826 | 393 a Lisp character. Stuff working with Lisp characters often just says |
867 | 394 "char", so we consistently use "Ichar" when that's what we're working |
826 | 395 with. */ |
396 | |
397 /* The three golden rules of macros: | |
771 | 398 |
399 1) Anything that's an lvalue can be evaluated more than once. | |
826 | 400 |
401 2) Macros where anything else can be evaluated more than once should | |
402 have the word "unsafe" in their name (exceptions may be made for | |
403 large sets of macros that evaluate arguments of certain types more | |
404 than once, e.g. struct buffer * arguments, when clearly indicated in | |
405 the macro documentation). These macros are generally meant to be | |
406 called only by other macros that have already stored the calling | |
407 values in temporary variables. | |
408 | |
409 3) Nothing else can be evaluated more than once. Use inline | |
771 | 410 functions, if necessary, to prevent multiple evaluation. |
826 | 411 |
412 NOTE: The functions and macros below are given full prototypes in their | |
413 docs, even when the implementation is a macro. In such cases, passing | |
414 an argument of a type other than expected will produce undefined | |
415 results. Also, given that macros can do things functions can't (in | |
416 particular, directly modify arguments as if they were passed by | |
417 reference), the declaration syntax has been extended to include the | |
418 call-by-reference syntax from C++, where an & after a type indicates | |
419 that the argument is an lvalue and is passed by reference, i.e. the | |
420 function can modify its value. (This is equivalent in C to passing a | |
421 pointer to the argument, but without the need to explicitly worry about | |
422 pointers.) | |
423 | |
424 When to capitalize macros: | |
425 | |
426 -- Capitalize macros doing stuff obviously impossible with (C) | |
427 functions, e.g. directly modifying arguments as if they were passed by | |
428 reference. | |
429 | |
430 -- Capitalize macros that evaluate *any* argument more than once regardless | |
431 of whether that's "allowed" (e.g. buffer arguments). | |
432 | |
433 -- Capitalize macros that directly access a field in a Lisp_Object or | |
434 its equivalent underlying structure. In such cases, access through the | |
435 Lisp_Object precedes the macro with an X, and access through the underlying | |
436 structure doesn't. | |
437 | |
438 -- Capitalize certain other basic macros relating to Lisp_Objects; e.g. | |
439 FRAMEP, CHECK_FRAME, etc. | |
440 | |
441 -- Try to avoid capitalizing any other macros. | |
771 | 442 */ |
443 | |
444 /* ---------------------------------------------------------------------- */ | |
867 | 445 /* Working with itext's (pointers to internally-formatted text) */ |
771 | 446 /* ---------------------------------------------------------------------- */ |
447 | |
867 | 448 /* Given an itext, does it point to the beginning of a character? |
826 | 449 */ |
450 | |
771 | 451 #ifdef MULE |
867 | 452 # define valid_ibyteptr_p(ptr) ibyte_first_byte_p (* (ptr)) |
771 | 453 #else |
867 | 454 # define valid_ibyteptr_p(ptr) 1 |
771 | 455 #endif |
456 | |
867 | 457 /* If error-checking is enabled, assert that the given itext points to |
826 | 458 the beginning of a character. Otherwise, do nothing. |
459 */ | |
460 | |
867 | 461 #define assert_valid_ibyteptr(ptr) text_checking_assert (valid_ibyteptr_p (ptr)) |
462 | |
463 /* Given a itext (assumed to point at the beginning of a character), | |
826 | 464 modify that pointer so it points to the beginning of the next character. |
465 | |
867 | 466 Note that INC_IBYTEPTR() and DEC_IBYTEPTR() have to be written in |
467 completely separate ways. INC_IBYTEPTR() cannot use the DEC_IBYTEPTR() | |
771 | 468 trick of looking for a valid first byte because it might run off |
867 | 469 the end of the string. DEC_IBYTEPTR() can't use the INC_IBYTEPTR() |
771 | 470 method because it doesn't have easy access to the first byte of |
471 the character it's moving over. */ | |
472 | |
867 | 473 #define INC_IBYTEPTR(ptr) do { \ |
474 assert_valid_ibyteptr (ptr); \ | |
826 | 475 (ptr) += rep_bytes_by_first_byte (* (ptr)); \ |
476 } while (0) | |
477 | |
1204 | 478 #define INC_IBYTEPTR_FMT(ptr, fmt) \ |
479 do { \ | |
480 Internal_Format __icf_fmt = (fmt); \ | |
481 switch (__icf_fmt) \ | |
482 { \ | |
483 case FORMAT_DEFAULT: \ | |
484 INC_IBYTEPTR (ptr); \ | |
485 break; \ | |
486 case FORMAT_16_BIT_FIXED: \ | |
487 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT)); \ | |
488 (ptr) += 2; \ | |
489 break; \ | |
490 case FORMAT_32_BIT_FIXED: \ | |
491 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT)); \ | |
492 (ptr) += 4; \ | |
493 break; \ | |
494 default: \ | |
495 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); \ | |
496 (ptr)++; \ | |
497 break; \ | |
498 } \ | |
826 | 499 } while (0) |
500 | |
867 | 501 /* Given a itext (assumed to point at the beginning of a character or at |
826 | 502 the very end of the text), modify that pointer so it points to the |
503 beginning of the previous character. | |
504 */ | |
771 | 505 |
800 | 506 #ifdef ERROR_CHECK_TEXT |
826 | 507 /* We use a separate definition to avoid warnings about unused dc_ptr1 */ |
867 | 508 #define DEC_IBYTEPTR(ptr) do { \ |
1333 | 509 const Ibyte *dc_ptr1 = (ptr); \ |
826 | 510 do { \ |
511 (ptr)--; \ | |
867 | 512 } while (!valid_ibyteptr_p (ptr)); \ |
826 | 513 text_checking_assert (dc_ptr1 - (ptr) == rep_bytes_by_first_byte (*(ptr))); \ |
771 | 514 } while (0) |
826 | 515 #else |
867 | 516 #define DEC_IBYTEPTR(ptr) do { \ |
826 | 517 do { \ |
518 (ptr)--; \ | |
867 | 519 } while (!valid_ibyteptr_p (ptr)); \ |
771 | 520 } while (0) |
826 | 521 #endif /* ERROR_CHECK_TEXT */ |
522 | |
1204 | 523 #define DEC_IBYTEPTR_FMT(ptr, fmt) \ |
524 do { \ | |
525 Internal_Format __icf_fmt = (fmt); \ | |
526 switch (__icf_fmt) \ | |
527 { \ | |
528 case FORMAT_DEFAULT: \ | |
529 DEC_IBYTEPTR (ptr); \ | |
530 break; \ | |
531 case FORMAT_16_BIT_FIXED: \ | |
532 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT)); \ | |
533 (ptr) -= 2; \ | |
534 break; \ | |
535 case FORMAT_32_BIT_FIXED: \ | |
536 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT)); \ | |
537 (ptr) -= 4; \ | |
538 break; \ | |
539 default: \ | |
540 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); \ | |
541 (ptr)--; \ | |
542 break; \ | |
543 } \ | |
771 | 544 } while (0) |
545 | |
546 #ifdef MULE | |
547 | |
826 | 548 /* Make sure that PTR is pointing to the beginning of a character. If not, |
549 back up until this is the case. Note that there are not too many places | |
550 where it is legitimate to do this sort of thing. It's an error if | |
551 you're passed an "invalid" char * pointer. NOTE: PTR *must* be pointing | |
552 to a valid part of the string (i.e. not the very end, unless the string | |
553 is zero-terminated or something) in order for this function to not cause | |
554 crashes. | |
555 */ | |
556 | |
771 | 557 /* Note that this reads the byte at *PTR! */ |
558 | |
867 | 559 #define VALIDATE_IBYTEPTR_BACKWARD(ptr) do { \ |
5026
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
560 while (!valid_ibyteptr_p (ptr)) ptr--; \ |
771 | 561 } while (0) |
562 | |
826 | 563 /* Make sure that PTR is pointing to the beginning of a character. If not, |
564 move forward until this is the case. Note that there are not too many | |
565 places where it is legitimate to do this sort of thing. It's an error | |
566 if you're passed an "invalid" char * pointer. | |
567 */ | |
771 | 568 |
867 | 569 /* This needs to be trickier than VALIDATE_IBYTEPTR_BACKWARD() to avoid the |
771 | 570 possibility of running off the end of the string. */ |
571 | |
867 | 572 #define VALIDATE_IBYTEPTR_FORWARD(ptr) do { \ |
573 Ibyte *vcf_ptr = (ptr); \ | |
574 VALIDATE_IBYTEPTR_BACKWARD (vcf_ptr); \ | |
771 | 575 if (vcf_ptr != (ptr)) \ |
576 { \ | |
577 (ptr) = vcf_ptr; \ | |
867 | 578 INC_IBYTEPTR (ptr); \ |
771 | 579 } \ |
580 } while (0) | |
581 | |
582 #else /* not MULE */ | |
867 | 583 #define VALIDATE_IBYTEPTR_BACKWARD(ptr) |
584 #define VALIDATE_IBYTEPTR_FORWARD(ptr) | |
826 | 585 #endif /* not MULE */ |
586 | |
587 #ifdef MULE | |
588 | |
867 | 589 /* Given a Ibyte string at PTR of size N, possibly with a partial |
826 | 590 character at the end, return the size of the longest substring of |
591 complete characters. Does not assume that the byte at *(PTR + N) is | |
592 readable. Note that there are not too many places where it is | |
593 legitimate to do this sort of thing. It's an error if you're passed an | |
594 "invalid" offset. */ | |
595 | |
596 DECLARE_INLINE_HEADER ( | |
597 Bytecount | |
867 | 598 validate_ibyte_string_backward (const Ibyte *ptr, Bytecount n) |
826 | 599 ) |
600 { | |
867 | 601 const Ibyte *ptr2; |
826 | 602 |
603 if (n == 0) | |
604 return n; | |
605 ptr2 = ptr + n - 1; | |
867 | 606 VALIDATE_IBYTEPTR_BACKWARD (ptr2); |
826 | 607 if (ptr2 + rep_bytes_by_first_byte (*ptr2) != ptr + n) |
608 return ptr2 - ptr; | |
609 return n; | |
610 } | |
611 | |
612 #else | |
613 | |
867 | 614 #define validate_ibyte_string_backward(ptr, n) (n) |
826 | 615 |
616 #endif /* MULE */ | |
771 | 617 |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
618 /* ASSERT_ASCTEXT_ASCII(ptr): Check that an Ascbyte * pointer points to |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
619 purely ASCII text. Useful for checking that putatively ASCII strings |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
620 (i.e. declared as Ascbyte * or const Ascbyte *) are actually ASCII. |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
621 This is important because otherwise we need to worry about what |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
622 encoding they are in -- internal or some external encoding. |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
623 |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
624 ASSERT_ASCTEXT_ASCII_LEN(ptr, len): Same as ASSERT_ASCTEXT_ASCII() |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
625 but where the length has been explicitly given. Useful if the string |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
626 may contain embedded zeroes. |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
627 */ |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
628 |
2367 | 629 #ifdef ERROR_CHECK_TEXT |
630 #define ASSERT_ASCTEXT_ASCII_LEN(ptr, len) \ | |
631 do { \ | |
632 int aia2; \ | |
633 const Ascbyte *aia2ptr = (ptr); \ | |
634 int aia2len = (len); \ | |
635 \ | |
636 for (aia2 = 0; aia2 < aia2len; aia2++) \ | |
637 assert (aia2ptr[aia2] >= 0x00 && aia2ptr[aia2] < 0x7F); \ | |
638 } while (0) | |
639 #define ASSERT_ASCTEXT_ASCII(ptr) \ | |
640 do { \ | |
641 const Ascbyte *aiaz2 = (ptr); \ | |
642 ASSERT_ASCTEXT_ASCII_LEN (aiaz2, strlen (aiaz2)); \ | |
643 } while (0) | |
644 #else | |
645 #define ASSERT_ASCTEXT_ASCII_LEN(ptr, len) | |
646 #define ASSERT_ASCTEXT_ASCII(ptr) | |
647 #endif | |
648 | |
771 | 649 /* -------------------------------------------------------------- */ |
826 | 650 /* Working with the length (in bytes and characters) of a */ |
651 /* section of internally-formatted text */ | |
771 | 652 /* -------------------------------------------------------------- */ |
653 | |
826 | 654 #ifdef MULE |
655 | |
1632 | 656 MODULE_API Charcount |
657 bytecount_to_charcount_fun (const Ibyte *ptr, Bytecount len); | |
658 MODULE_API Bytecount | |
659 charcount_to_bytecount_fun (const Ibyte *ptr, Charcount len); | |
826 | 660 |
661 /* Given a pointer to a text string and a length in bytes, return | |
662 the equivalent length in characters. */ | |
663 | |
664 DECLARE_INLINE_HEADER ( | |
665 Charcount | |
867 | 666 bytecount_to_charcount (const Ibyte *ptr, Bytecount len) |
826 | 667 ) |
668 { | |
669 if (len < 20) /* Just a random guess, but it should be more or less correct. | |
670 If number of bytes is small, just do a simple loop, | |
671 which should be more efficient. */ | |
672 { | |
673 Charcount count = 0; | |
867 | 674 const Ibyte *end = ptr + len; |
826 | 675 while (ptr < end) |
676 { | |
867 | 677 INC_IBYTEPTR (ptr); |
826 | 678 count++; |
679 } | |
680 /* Bomb out if the specified substring ends in the middle | |
681 of a character. Note that we might have already gotten | |
682 a core dump above from an invalid reference, but at least | |
683 we will get no farther than here. | |
684 | |
685 This also catches len < 0. */ | |
686 text_checking_assert (ptr == end); | |
687 | |
688 return count; | |
689 } | |
690 else | |
691 return bytecount_to_charcount_fun (ptr, len); | |
692 } | |
693 | |
694 /* Given a pointer to a text string and a length in characters, return the | |
695 equivalent length in bytes. | |
696 */ | |
697 | |
698 DECLARE_INLINE_HEADER ( | |
699 Bytecount | |
867 | 700 charcount_to_bytecount (const Ibyte *ptr, Charcount len) |
826 | 701 ) |
702 { | |
703 text_checking_assert (len >= 0); | |
704 if (len < 20) /* See above */ | |
705 { | |
867 | 706 const Ibyte *newptr = ptr; |
826 | 707 while (len > 0) |
708 { | |
867 | 709 INC_IBYTEPTR (newptr); |
826 | 710 len--; |
711 } | |
712 return newptr - ptr; | |
713 } | |
714 else | |
715 return charcount_to_bytecount_fun (ptr, len); | |
716 } | |
717 | |
2367 | 718 MODULE_API Bytecount |
719 charcount_to_bytecount_down_fun (const Ibyte *ptr, Charcount len); | |
720 | |
721 /* Given a pointer to a text string and a length in bytes, return | |
722 the equivalent length in characters of the stretch [PTR - LEN, PTR). */ | |
723 | |
724 DECLARE_INLINE_HEADER ( | |
725 Charcount | |
726 bytecount_to_charcount_down (const Ibyte *ptr, Bytecount len) | |
727 ) | |
728 { | |
729 /* No need to be clever here */ | |
730 return bytecount_to_charcount (ptr - len, len); | |
731 } | |
732 | |
733 /* Given a pointer to a text string and a length in characters, return the | |
734 equivalent length in bytes of the stretch of characters of that length | |
735 BEFORE the pointer. | |
736 */ | |
737 | |
738 DECLARE_INLINE_HEADER ( | |
739 Bytecount | |
740 charcount_to_bytecount_down (const Ibyte *ptr, Charcount len) | |
741 ) | |
742 { | |
743 #define SLEDGEHAMMER_CHECK_TEXT | |
744 #ifdef SLEDGEHAMMER_CHECK_TEXT | |
745 Charcount len1 = len; | |
746 Bytecount ret1, ret2; | |
747 | |
748 /* To test the correctness of the function version, always do the | |
749 calculation both ways and check that the values are the same. */ | |
750 text_checking_assert (len >= 0); | |
751 { | |
752 const Ibyte *newptr = ptr; | |
753 while (len1 > 0) | |
754 { | |
755 DEC_IBYTEPTR (newptr); | |
756 len1--; | |
757 } | |
758 ret1 = ptr - newptr; | |
759 } | |
760 ret2 = charcount_to_bytecount_down_fun (ptr, len); | |
761 text_checking_assert (ret1 == ret2); | |
762 return ret1; | |
763 #else | |
764 text_checking_assert (len >= 0); | |
765 if (len < 20) /* See above */ | |
766 { | |
767 const Ibyte *newptr = ptr; | |
768 while (len > 0) | |
769 { | |
770 DEC_IBYTEPTR (newptr); | |
771 len--; | |
772 } | |
773 return ptr - newptr; | |
774 } | |
775 else | |
776 return charcount_to_bytecount_down_fun (ptr, len); | |
777 #endif /* SLEDGEHAMMER_CHECK_TEXT */ | |
778 } | |
779 | |
826 | 780 /* Given a pointer to a text string in the specified format and a length in |
781 bytes, return the equivalent length in characters. | |
782 */ | |
783 | |
784 DECLARE_INLINE_HEADER ( | |
785 Charcount | |
867 | 786 bytecount_to_charcount_fmt (const Ibyte *ptr, Bytecount len, |
826 | 787 Internal_Format fmt) |
788 ) | |
789 { | |
790 switch (fmt) | |
791 { | |
792 case FORMAT_DEFAULT: | |
793 return bytecount_to_charcount (ptr, len); | |
794 case FORMAT_16_BIT_FIXED: | |
1204 | 795 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT)); |
826 | 796 return (Charcount) (len << 1); |
797 case FORMAT_32_BIT_FIXED: | |
1204 | 798 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT)); |
826 | 799 return (Charcount) (len << 2); |
800 default: | |
801 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); | |
802 return (Charcount) len; | |
803 } | |
804 } | |
805 | |
806 /* Given a pointer to a text string in the specified format and a length in | |
807 characters, return the equivalent length in bytes. | |
808 */ | |
809 | |
810 DECLARE_INLINE_HEADER ( | |
811 Bytecount | |
867 | 812 charcount_to_bytecount_fmt (const Ibyte *ptr, Charcount len, |
826 | 813 Internal_Format fmt) |
814 ) | |
815 { | |
816 switch (fmt) | |
817 { | |
818 case FORMAT_DEFAULT: | |
819 return charcount_to_bytecount (ptr, len); | |
820 case FORMAT_16_BIT_FIXED: | |
1204 | 821 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT)); |
826 | 822 text_checking_assert (!(len & 1)); |
823 return (Bytecount) (len >> 1); | |
824 case FORMAT_32_BIT_FIXED: | |
825 text_checking_assert (!(len & 3)); | |
1204 | 826 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT)); |
826 | 827 return (Bytecount) (len >> 2); |
828 default: | |
829 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); | |
830 return (Bytecount) len; | |
831 } | |
832 } | |
833 | |
5774
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
834 #ifdef EFFICIENT_INT_128_BIT |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
835 # define STRIDE_TYPE INT_128_BIT |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
836 # define HIGH_BIT_MASK \ |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
837 MAKE_128_BIT_UNSIGNED_CONSTANT (0x80808080808080808080808080808080) |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
838 #elif defined (EFFICIENT_INT_64_BIT) |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
839 # define STRIDE_TYPE INT_64_BIT |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
840 # define HIGH_BIT_MASK MAKE_64_BIT_UNSIGNED_CONSTANT (0x8080808080808080) |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
841 #else |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
842 # define STRIDE_TYPE INT_32_BIT |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
843 # define HIGH_BIT_MASK MAKE_32_BIT_UNSIGNED_CONSTANT (0x80808080) |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
844 #endif |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
845 |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
846 #define ALIGN_BITS ((EMACS_UINT) (ALIGNOF (STRIDE_TYPE) - 1)) |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
847 #define ALIGN_MASK (~ ALIGN_BITS) |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
848 #define ALIGNED(ptr) ((((EMACS_UINT) ptr) & ALIGN_BITS) == 0) |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
849 #define STRIDE sizeof (STRIDE_TYPE) |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
850 |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
851 /* Skip as many ASCII bytes as possible in the memory block [PTR, END). |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
852 Return pointer to the first non-ASCII byte. optimized for long |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
853 stretches of ASCII. */ |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
854 DECLARE_INLINE_HEADER ( |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
855 const Ibyte * |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
856 skip_ascii (const Ibyte *ptr, const Ibyte *end) |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
857 ) |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
858 { |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
859 const unsigned STRIDE_TYPE *ascii_end; |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
860 |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
861 /* Need to do in 3 sections -- before alignment start, aligned chunk, |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
862 after alignment end. */ |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
863 while (!ALIGNED (ptr)) |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
864 { |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
865 if (ptr == end || !byte_ascii_p (*ptr)) |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
866 return ptr; |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
867 ptr++; |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
868 } |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
869 ascii_end = (const unsigned STRIDE_TYPE *) ptr; |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
870 /* This loop screams, because we can detect ASCII |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
871 characters 4 or 8 at a time. */ |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
872 while ((const Ibyte *) ascii_end + STRIDE <= end |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
873 && !(*ascii_end & HIGH_BIT_MASK)) |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
874 ascii_end++; |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
875 ptr = (Ibyte *) ascii_end; |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
876 while (ptr < end && byte_ascii_p (*ptr)) |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
877 ptr++; |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
878 return ptr; |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
879 } |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
880 |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
881 /* Skip as many ASCII bytes as possible in the memory block [END, PTR), |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
882 going downwards. Return pointer to the location above the first |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
883 non-ASCII byte. Optimized for long stretches of ASCII. */ |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
884 DECLARE_INLINE_HEADER ( |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
885 const Ibyte * |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
886 skip_ascii_down (const Ibyte *ptr, const Ibyte *end) |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
887 ) |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
888 { |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
889 const unsigned STRIDE_TYPE *ascii_end; |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
890 |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
891 /* Need to do in 3 sections -- before alignment start, aligned chunk, |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
892 after alignment end. */ |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
893 while (!ALIGNED (ptr)) |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
894 { |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
895 if (ptr == end || !byte_ascii_p (*(ptr - 1))) |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
896 return ptr; |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
897 ptr--; |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
898 } |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
899 ascii_end = (const unsigned STRIDE_TYPE *) ptr - 1; |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
900 /* This loop screams, because we can detect ASCII |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
901 characters 4 or 8 at a time. */ |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
902 while ((const Ibyte *) ascii_end >= end |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
903 && !(*ascii_end & HIGH_BIT_MASK)) |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
904 ascii_end--; |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
905 ptr = (Ibyte *) (ascii_end + 1); |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
906 while (ptr > end && byte_ascii_p (*(ptr - 1))) |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
907 ptr--; |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
908 return ptr; |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
909 } |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
910 |
5784
0cb4f494a548
Have the result of coding_character_tell() reflect str->convert_to, too.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5774
diff
changeset
|
911 /* Return the character count of an lstream or coding buffer of internal |
0cb4f494a548
Have the result of coding_character_tell() reflect str->convert_to, too.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5774
diff
changeset
|
912 format text, counting partial characters at the beginning of the buffer |
0cb4f494a548
Have the result of coding_character_tell() reflect str->convert_to, too.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5774
diff
changeset
|
913 as whole characters, and *not* counting partial characters at the end of |
0cb4f494a548
Have the result of coding_character_tell() reflect str->convert_to, too.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5774
diff
changeset
|
914 the buffer. */ |
0cb4f494a548
Have the result of coding_character_tell() reflect str->convert_to, too.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5774
diff
changeset
|
915 Charcount buffered_bytecount_to_charcount (const Ibyte *, Bytecount len); |
0cb4f494a548
Have the result of coding_character_tell() reflect str->convert_to, too.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5774
diff
changeset
|
916 |
826 | 917 #else |
918 | |
919 #define bytecount_to_charcount(ptr, len) ((Charcount) (len)) | |
920 #define bytecount_to_charcount_fmt(ptr, len, fmt) ((Charcount) (len)) | |
921 #define charcount_to_bytecount(ptr, len) ((Bytecount) (len)) | |
922 #define charcount_to_bytecount_fmt(ptr, len, fmt) ((Bytecount) (len)) | |
5774
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
923 #define skip_ascii(ptr, end) end |
7a538e1a4676
Use skip_ascii() in no_conversion_convert() when encoding.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5402
diff
changeset
|
924 #define skip_ascii_down(ptr, end) end |
5786
6355de501637
Correct buffered_bytecount_to_charcount() on non-Mule.
Aidan Kehoe <kehoea@parhasard.net>
parents:
5784
diff
changeset
|
925 #define buffered_bytecount_to_charcount(ptr, len) (len) |
826 | 926 |
927 #endif /* MULE */ | |
928 | |
929 /* Return the length of the first character at PTR. Equivalent to | |
930 charcount_to_bytecount (ptr, 1). | |
931 | |
932 [Since charcount_to_bytecount() is Written as inline, a smart compiler | |
933 should really optimize charcount_to_bytecount (ptr, 1) to the same as | |
934 the following, with no error checking. But since this idiom occurs so | |
935 often, we'll be helpful and define a special macro for it.] | |
936 */ | |
937 | |
867 | 938 #define itext_ichar_len(ptr) rep_bytes_by_first_byte (*(ptr)) |
826 | 939 |
940 /* Return the length of the first character at PTR, which is in the | |
941 specified internal format. Equivalent to charcount_to_bytecount_fmt | |
942 (ptr, 1, fmt). | |
943 */ | |
944 | |
945 DECLARE_INLINE_HEADER ( | |
946 Bytecount | |
4853 | 947 itext_ichar_len_fmt (const Ibyte *ptr, Internal_Format fmt) |
826 | 948 ) |
949 { | |
950 switch (fmt) | |
951 { | |
952 case FORMAT_DEFAULT: | |
867 | 953 return itext_ichar_len (ptr); |
826 | 954 case FORMAT_16_BIT_FIXED: |
1204 | 955 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT)); |
826 | 956 return 2; |
957 case FORMAT_32_BIT_FIXED: | |
1204 | 958 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT)); |
826 | 959 return 4; |
960 default: | |
961 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); | |
962 return 1; | |
963 } | |
964 } | |
965 | |
966 /* Return a pointer to the beginning of the character offset N (in | |
967 characters) from PTR. | |
968 */ | |
969 | |
970 DECLARE_INLINE_HEADER ( | |
867 | 971 const Ibyte * |
972 itext_n_addr (const Ibyte *ptr, Charcount offset) | |
826 | 973 ) |
771 | 974 { |
975 return ptr + charcount_to_bytecount (ptr, offset); | |
976 } | |
977 | |
867 | 978 /* Given a itext and an offset into the text pointed to by the itext, |
826 | 979 modify the offset so it points to the beginning of the next character. |
980 */ | |
981 | |
982 #define INC_BYTECOUNT(ptr, pos) do { \ | |
867 | 983 assert_valid_ibyteptr (ptr); \ |
826 | 984 (pos += rep_bytes_by_first_byte (* ((ptr) + (pos)))); \ |
985 } while (0) | |
986 | |
771 | 987 /* -------------------------------------------------------------------- */ |
867 | 988 /* Retrieving or changing the character pointed to by a itext */ |
771 | 989 /* -------------------------------------------------------------------- */ |
990 | |
867 | 991 #define simple_itext_ichar(ptr) ((Ichar) (ptr)[0]) |
992 #define simple_set_itext_ichar(ptr, x) \ | |
993 ((ptr)[0] = (Ibyte) (x), (Bytecount) 1) | |
994 #define simple_itext_copy_ichar(src, dst) \ | |
814 | 995 ((dst)[0] = *(src), (Bytecount) 1) |
771 | 996 |
997 #ifdef MULE | |
998 | |
1632 | 999 MODULE_API Ichar non_ascii_itext_ichar (const Ibyte *ptr); |
1000 MODULE_API Bytecount non_ascii_set_itext_ichar (Ibyte *ptr, Ichar c); | |
1001 MODULE_API Bytecount non_ascii_itext_copy_ichar (const Ibyte *src, Ibyte *dst); | |
867 | 1002 |
1003 /* Retrieve the character pointed to by PTR as an Ichar. */ | |
826 | 1004 |
1005 DECLARE_INLINE_HEADER ( | |
867 | 1006 Ichar |
1007 itext_ichar (const Ibyte *ptr) | |
826 | 1008 ) |
771 | 1009 { |
826 | 1010 return byte_ascii_p (*ptr) ? |
867 | 1011 simple_itext_ichar (ptr) : |
1012 non_ascii_itext_ichar (ptr); | |
771 | 1013 } |
1014 | |
826 | 1015 /* Retrieve the character pointed to by PTR (a pointer to text in the |
1016 format FMT, coming from OBJECT [a buffer, string?, or nil]) as an | |
867 | 1017 Ichar. |
826 | 1018 |
1019 Note: For these and other *_fmt() functions, if you pass in a constant | |
1020 FMT, the switch will be optimized out of existence. Therefore, there is | |
1021 no need to create separate versions for the various formats for | |
867 | 1022 "efficiency reasons". In fact, we don't really need itext_ichar() |
826 | 1023 and such written separately, but they are used often so it's simpler |
1024 that way. */ | |
1025 | |
1026 DECLARE_INLINE_HEADER ( | |
867 | 1027 Ichar |
1028 itext_ichar_fmt (const Ibyte *ptr, Internal_Format fmt, | |
2286 | 1029 Lisp_Object UNUSED (object)) |
826 | 1030 ) |
1031 { | |
1032 switch (fmt) | |
1033 { | |
1034 case FORMAT_DEFAULT: | |
867 | 1035 return itext_ichar (ptr); |
826 | 1036 case FORMAT_16_BIT_FIXED: |
1204 | 1037 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT)); |
867 | 1038 return raw_16_bit_fixed_to_ichar (* (UINT_16_BIT *) ptr, object); |
826 | 1039 case FORMAT_32_BIT_FIXED: |
1204 | 1040 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT)); |
867 | 1041 return raw_32_bit_fixed_to_ichar (* (UINT_32_BIT *) ptr, object); |
826 | 1042 default: |
1043 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); | |
867 | 1044 return raw_8_bit_fixed_to_ichar (*ptr, object); |
826 | 1045 } |
1046 } | |
1047 | |
1048 /* Return the character at PTR (which is in format FMT), suitable for | |
1049 comparison with an ASCII character. This guarantees that if the | |
1050 character at PTR is ASCII (range 0 - 127), that character will be | |
1051 returned; otherwise, some character outside of the ASCII range will be | |
1052 returned, but not necessarily the character actually at PTR. This will | |
867 | 1053 be faster than itext_ichar_fmt() for some formats -- in particular, |
826 | 1054 FORMAT_DEFAULT. */ |
1055 | |
1056 DECLARE_INLINE_HEADER ( | |
867 | 1057 Ichar |
1058 itext_ichar_ascii_fmt (const Ibyte *ptr, Internal_Format fmt, | |
2286 | 1059 Lisp_Object UNUSED (object)) |
826 | 1060 ) |
1061 { | |
1062 switch (fmt) | |
1063 { | |
1064 case FORMAT_DEFAULT: | |
867 | 1065 return (Ichar) *ptr; |
826 | 1066 case FORMAT_16_BIT_FIXED: |
1204 | 1067 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT)); |
867 | 1068 return raw_16_bit_fixed_to_ichar (* (UINT_16_BIT *) ptr, object); |
826 | 1069 case FORMAT_32_BIT_FIXED: |
1204 | 1070 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT)); |
867 | 1071 return raw_32_bit_fixed_to_ichar (* (UINT_32_BIT *) ptr, object); |
826 | 1072 default: |
1073 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); | |
867 | 1074 return raw_8_bit_fixed_to_ichar (*ptr, object); |
826 | 1075 } |
1076 } | |
1077 | |
1078 /* Return the "raw value" of the character at PTR, in format FMT. This is | |
1079 useful when searching for a character; convert the character using | |
867 | 1080 ichar_to_raw(). */ |
826 | 1081 |
1082 DECLARE_INLINE_HEADER ( | |
867 | 1083 Raw_Ichar |
1084 itext_ichar_raw_fmt (const Ibyte *ptr, Internal_Format fmt) | |
826 | 1085 ) |
1086 { | |
1087 switch (fmt) | |
1088 { | |
1089 case FORMAT_DEFAULT: | |
867 | 1090 return (Raw_Ichar) itext_ichar (ptr); |
826 | 1091 case FORMAT_16_BIT_FIXED: |
1204 | 1092 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT)); |
867 | 1093 return (Raw_Ichar) (* (UINT_16_BIT *) ptr); |
826 | 1094 case FORMAT_32_BIT_FIXED: |
1204 | 1095 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT)); |
867 | 1096 return (Raw_Ichar) (* (UINT_32_BIT *) ptr); |
826 | 1097 default: |
1098 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); | |
867 | 1099 return (Raw_Ichar) (*ptr); |
826 | 1100 } |
1101 } | |
1102 | |
867 | 1103 /* Store the character CH (an Ichar) as internally-formatted text starting |
826 | 1104 at PTR. Return the number of bytes stored. |
1105 */ | |
1106 | |
1107 DECLARE_INLINE_HEADER ( | |
1108 Bytecount | |
867 | 1109 set_itext_ichar (Ibyte *ptr, Ichar x) |
826 | 1110 ) |
771 | 1111 { |
867 | 1112 return !ichar_multibyte_p (x) ? |
1113 simple_set_itext_ichar (ptr, x) : | |
1114 non_ascii_set_itext_ichar (ptr, x); | |
771 | 1115 } |
1116 | |
867 | 1117 /* Store the character CH (an Ichar) as internally-formatted text of |
826 | 1118 format FMT starting at PTR, which comes from OBJECT. Return the number |
1119 of bytes stored. | |
1120 */ | |
1121 | |
1122 DECLARE_INLINE_HEADER ( | |
1123 Bytecount | |
867 | 1124 set_itext_ichar_fmt (Ibyte *ptr, Ichar x, Internal_Format fmt, |
2286 | 1125 Lisp_Object UNUSED (object)) |
826 | 1126 ) |
771 | 1127 { |
826 | 1128 switch (fmt) |
1129 { | |
1130 case FORMAT_DEFAULT: | |
867 | 1131 return set_itext_ichar (ptr, x); |
826 | 1132 case FORMAT_16_BIT_FIXED: |
867 | 1133 text_checking_assert (ichar_16_bit_fixed_p (x, object)); |
1204 | 1134 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT)); |
867 | 1135 * (UINT_16_BIT *) ptr = ichar_to_raw_16_bit_fixed (x, object); |
826 | 1136 return 2; |
1137 case FORMAT_32_BIT_FIXED: | |
1204 | 1138 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT)); |
867 | 1139 * (UINT_32_BIT *) ptr = ichar_to_raw_32_bit_fixed (x, object); |
826 | 1140 return 4; |
1141 default: | |
1142 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); | |
867 | 1143 text_checking_assert (ichar_8_bit_fixed_p (x, object)); |
1144 *ptr = ichar_to_raw_8_bit_fixed (x, object); | |
826 | 1145 return 1; |
1146 } | |
1147 } | |
1148 | |
1149 /* Retrieve the character pointed to by SRC and store it as | |
1150 internally-formatted text in DST. | |
1151 */ | |
1152 | |
1153 DECLARE_INLINE_HEADER ( | |
1154 Bytecount | |
867 | 1155 itext_copy_ichar (const Ibyte *src, Ibyte *dst) |
826 | 1156 ) |
1157 { | |
1158 return byte_ascii_p (*src) ? | |
867 | 1159 simple_itext_copy_ichar (src, dst) : |
1160 non_ascii_itext_copy_ichar (src, dst); | |
771 | 1161 } |
1162 | |
1163 #else /* not MULE */ | |
1164 | |
867 | 1165 # define itext_ichar(ptr) simple_itext_ichar (ptr) |
1166 # define itext_ichar_fmt(ptr, fmt, object) itext_ichar (ptr) | |
1167 # define itext_ichar_ascii_fmt(ptr, fmt, object) itext_ichar (ptr) | |
1168 # define itext_ichar_raw_fmt(ptr, fmt) itext_ichar (ptr) | |
1169 # define set_itext_ichar(ptr, x) simple_set_itext_ichar (ptr, x) | |
1170 # define set_itext_ichar_fmt(ptr, x, fmt, obj) set_itext_ichar (ptr, x) | |
1171 # define itext_copy_ichar(src, dst) simple_itext_copy_ichar (src, dst) | |
771 | 1172 |
1173 #endif /* not MULE */ | |
1174 | |
826 | 1175 /* Retrieve the character at offset N (in characters) from PTR, as an |
867 | 1176 Ichar. |
826 | 1177 */ |
1178 | |
867 | 1179 #define itext_ichar_n(ptr, offset) \ |
1180 itext_ichar (itext_n_addr (ptr, offset)) | |
771 | 1181 |
1182 | |
1183 /************************************************************************/ | |
1184 /* */ | |
826 | 1185 /* working with Lisp strings */ |
1186 /* */ | |
1187 /************************************************************************/ | |
1188 | |
1189 #define string_char_length(s) \ | |
1190 string_index_byte_to_char (s, XSTRING_LENGTH (s)) | |
1191 #define string_byte(s, i) (XSTRING_DATA (s)[i] + 0) | |
1192 /* In case we ever allow strings to be in a different format ... */ | |
1193 #define set_string_byte(s, i, c) (XSTRING_DATA (s)[i] = (c)) | |
1194 | |
1195 #define ASSERT_VALID_CHAR_STRING_INDEX_UNSAFE(s, x) do { \ | |
1196 text_checking_assert ((x) >= 0 && x <= string_char_length (s)); \ | |
1197 } while (0) | |
1198 | |
1199 #define ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE(s, x) do { \ | |
1200 text_checking_assert ((x) >= 0 && x <= XSTRING_LENGTH (s)); \ | |
867 | 1201 text_checking_assert (valid_ibyteptr_p (string_byte_addr (s, x))); \ |
826 | 1202 } while (0) |
1203 | |
1204 /* Convert offset I in string S to a pointer to text there. */ | |
1205 #define string_byte_addr(s, i) (&(XSTRING_DATA (s)[i])) | |
1206 /* Convert pointer to text in string S into the byte offset to that text. */ | |
1207 #define string_addr_to_byte(s, ptr) ((Bytecount) ((ptr) - XSTRING_DATA (s))) | |
867 | 1208 /* Return the Ichar at *CHARACTER* offset I. */ |
1209 #define string_ichar(s, i) itext_ichar (string_char_addr (s, i)) | |
826 | 1210 |
1211 #ifdef ERROR_CHECK_TEXT | |
1212 #define SLEDGEHAMMER_CHECK_ASCII_BEGIN | |
1213 #endif | |
1214 | |
1215 #ifdef SLEDGEHAMMER_CHECK_ASCII_BEGIN | |
1216 void sledgehammer_check_ascii_begin (Lisp_Object str); | |
1217 #else | |
1218 #define sledgehammer_check_ascii_begin(str) | |
1219 #endif | |
1220 | |
1221 /* Make an alloca'd copy of a Lisp string */ | |
1222 #define LISP_STRING_TO_ALLOCA(s, lval) \ | |
1223 do { \ | |
1315 | 1224 Ibyte **_lta_ = (Ibyte **) &(lval); \ |
826 | 1225 Lisp_Object _lta_2 = (s); \ |
2367 | 1226 *_lta_ = alloca_ibytes (1 + XSTRING_LENGTH (_lta_2)); \ |
826 | 1227 memcpy (*_lta_, XSTRING_DATA (_lta_2), 1 + XSTRING_LENGTH (_lta_2)); \ |
1228 } while (0) | |
1229 | |
1230 void resize_string (Lisp_Object s, Bytecount pos, Bytecount delta); | |
1231 | |
1232 /* Convert a byte index into a string into a char index. */ | |
1233 DECLARE_INLINE_HEADER ( | |
1234 Charcount | |
4853 | 1235 string_index_byte_to_char (Lisp_Object s, Bytecount idx) |
826 | 1236 ) |
1237 { | |
1238 Charcount retval; | |
1239 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, idx); | |
1240 #ifdef MULE | |
1241 if (idx <= (Bytecount) XSTRING_ASCII_BEGIN (s)) | |
1242 retval = (Charcount) idx; | |
1243 else | |
1244 retval = (XSTRING_ASCII_BEGIN (s) + | |
1245 bytecount_to_charcount (XSTRING_DATA (s) + | |
1246 XSTRING_ASCII_BEGIN (s), | |
1247 idx - XSTRING_ASCII_BEGIN (s))); | |
1248 # ifdef SLEDGEHAMMER_CHECK_ASCII_BEGIN | |
1249 assert (retval == bytecount_to_charcount (XSTRING_DATA (s), idx)); | |
1250 # endif | |
1251 #else | |
1252 retval = (Charcount) idx; | |
1253 #endif | |
1254 /* Don't call ASSERT_VALID_CHAR_STRING_INDEX_UNSAFE() here because it will | |
1255 call string_index_byte_to_char(). */ | |
1256 return retval; | |
1257 } | |
1258 | |
1259 /* Convert a char index into a string into a byte index. */ | |
1260 DECLARE_INLINE_HEADER ( | |
1261 Bytecount | |
4853 | 1262 string_index_char_to_byte (Lisp_Object s, Charcount idx) |
826 | 1263 ) |
1264 { | |
1265 Bytecount retval; | |
1266 ASSERT_VALID_CHAR_STRING_INDEX_UNSAFE (s, idx); | |
1267 #ifdef MULE | |
1268 if (idx <= (Charcount) XSTRING_ASCII_BEGIN (s)) | |
1269 retval = (Bytecount) idx; | |
1270 else | |
1271 retval = (XSTRING_ASCII_BEGIN (s) + | |
1272 charcount_to_bytecount (XSTRING_DATA (s) + | |
1273 XSTRING_ASCII_BEGIN (s), | |
1274 idx - XSTRING_ASCII_BEGIN (s))); | |
1275 # ifdef SLEDGEHAMMER_CHECK_ASCII_BEGIN | |
1276 assert (retval == charcount_to_bytecount (XSTRING_DATA (s), idx)); | |
1277 # endif | |
1278 #else | |
1279 retval = (Bytecount) idx; | |
1280 #endif | |
1281 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, retval); | |
1282 return retval; | |
1283 } | |
1284 | |
1285 /* Convert a substring length (starting at byte offset OFF) from bytes to | |
1286 chars. */ | |
1287 DECLARE_INLINE_HEADER ( | |
1288 Charcount | |
4853 | 1289 string_offset_byte_to_char_len (Lisp_Object s, Bytecount off, Bytecount len) |
826 | 1290 ) |
1291 { | |
1292 Charcount retval; | |
1293 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, off); | |
1294 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, off + len); | |
1295 #ifdef MULE | |
1296 if (off + len <= (Bytecount) XSTRING_ASCII_BEGIN (s)) | |
1297 retval = (Charcount) len; | |
1298 else if (off < (Bytecount) XSTRING_ASCII_BEGIN (s)) | |
1299 retval = | |
1300 XSTRING_ASCII_BEGIN (s) - (Charcount) off + | |
1301 bytecount_to_charcount (XSTRING_DATA (s) + XSTRING_ASCII_BEGIN (s), | |
1302 len - (XSTRING_ASCII_BEGIN (s) - off)); | |
1303 else | |
1304 retval = bytecount_to_charcount (XSTRING_DATA (s) + off, len); | |
1305 # ifdef SLEDGEHAMMER_CHECK_ASCII_BEGIN | |
1306 assert (retval == bytecount_to_charcount (XSTRING_DATA (s) + off, len)); | |
1307 # endif | |
1308 #else | |
1309 retval = (Charcount) len; | |
1310 #endif | |
1311 return retval; | |
1312 } | |
1313 | |
1314 /* Convert a substring length (starting at byte offset OFF) from chars to | |
1315 bytes. */ | |
1316 DECLARE_INLINE_HEADER ( | |
1317 Bytecount | |
4853 | 1318 string_offset_char_to_byte_len (Lisp_Object s, Bytecount off, Charcount len) |
826 | 1319 ) |
1320 { | |
1321 Bytecount retval; | |
1322 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, off); | |
1323 #ifdef MULE | |
1324 /* casts to avoid errors from combining Bytecount/Charcount and warnings | |
1325 from signed/unsigned comparisons */ | |
1326 if (off + (Bytecount) len <= (Bytecount) XSTRING_ASCII_BEGIN (s)) | |
1327 retval = (Bytecount) len; | |
1328 else if (off < (Bytecount) XSTRING_ASCII_BEGIN (s)) | |
1329 retval = | |
1330 XSTRING_ASCII_BEGIN (s) - off + | |
1331 charcount_to_bytecount (XSTRING_DATA (s) + XSTRING_ASCII_BEGIN (s), | |
1332 len - (XSTRING_ASCII_BEGIN (s) - | |
1333 (Charcount) off)); | |
1334 else | |
1335 retval = charcount_to_bytecount (XSTRING_DATA (s) + off, len); | |
1336 # ifdef SLEDGEHAMMER_CHECK_ASCII_BEGIN | |
1337 assert (retval == charcount_to_bytecount (XSTRING_DATA (s) + off, len)); | |
1338 # endif | |
1339 #else | |
1340 retval = (Bytecount) len; | |
1341 #endif | |
1342 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, off + retval); | |
1343 return retval; | |
1344 } | |
1345 | |
1346 DECLARE_INLINE_HEADER ( | |
867 | 1347 const Ibyte * |
826 | 1348 string_char_addr (Lisp_Object s, Charcount idx) |
1349 ) | |
1350 { | |
1351 return XSTRING_DATA (s) + string_index_char_to_byte (s, idx); | |
1352 } | |
1353 | |
1354 /* WARNING: If you modify an existing string, you must call | |
1355 bump_string_modiff() afterwards. */ | |
1356 #ifdef MULE | |
867 | 1357 void set_string_char (Lisp_Object s, Charcount i, Ichar c); |
826 | 1358 #else |
1359 #define set_string_char(s, i, c) set_string_byte (s, i, c) | |
1360 #endif /* not MULE */ | |
1361 | |
1362 /* Return index to character before the one at IDX. */ | |
1363 DECLARE_INLINE_HEADER ( | |
1364 Bytecount | |
1365 prev_string_index (Lisp_Object s, Bytecount idx) | |
1366 ) | |
1367 { | |
867 | 1368 const Ibyte *ptr = string_byte_addr (s, idx); |
1369 DEC_IBYTEPTR (ptr); | |
826 | 1370 return string_addr_to_byte (s, ptr); |
1371 } | |
1372 | |
1373 /* Return index to character after the one at IDX. */ | |
1374 DECLARE_INLINE_HEADER ( | |
1375 Bytecount | |
1376 next_string_index (Lisp_Object s, Bytecount idx) | |
1377 ) | |
1378 { | |
867 | 1379 const Ibyte *ptr = string_byte_addr (s, idx); |
1380 INC_IBYTEPTR (ptr); | |
826 | 1381 return string_addr_to_byte (s, ptr); |
1382 } | |
1383 | |
1384 | |
1385 /************************************************************************/ | |
1386 /* */ | |
771 | 1387 /* working with Eistrings */ |
1388 /* */ | |
1389 /************************************************************************/ | |
1390 | |
1391 /* | |
1392 #### NOTE: This is a work in progress. Neither the API nor especially | |
1393 the implementation is finished. | |
1394 | |
1395 NOTE: An Eistring is a structure that makes it easy to work with | |
1396 internally-formatted strings of data. It provides operations similar | |
1397 in feel to the standard strcpy(), strcat(), strlen(), etc., but | |
1398 | |
1399 (a) it is Mule-correct | |
1400 (b) it does dynamic allocation so you never have to worry about size | |
793 | 1401 restrictions |
851 | 1402 (c) it comes in an ALLOCA() variety (all allocation is stack-local, |
793 | 1403 so there is no need to explicitly clean up) as well as a malloc() |
1404 variety | |
1405 (d) it knows its own length, so it does not suffer from standard null | |
1406 byte brain-damage -- but it null-terminates the data anyway, so | |
1407 it can be passed to standard routines | |
1408 (e) it provides a much more powerful set of operations and knows about | |
771 | 1409 all the standard places where string data might reside: Lisp_Objects, |
867 | 1410 other Eistrings, Ibyte * data with or without an explicit length, |
1411 ASCII strings, Ichars, etc. | |
793 | 1412 (f) it provides easy operations to convert to/from externally-formatted |
1413 data, and is easier to use than the standard TO_INTERNAL_FORMAT | |
771 | 1414 and TO_EXTERNAL_FORMAT macros. (An Eistring can store both the internal |
1415 and external version of its data, but the external version is only | |
1416 initialized or changed when you call eito_external().) | |
1417 | |
793 | 1418 The idea is to make it as easy to write Mule-correct string manipulation |
1419 code as it is to write normal string manipulation code. We also make | |
1420 the API sufficiently general that it can handle multiple internal data | |
1421 formats (e.g. some fixed-width optimizing formats and a default variable | |
1422 width format) and allows for *ANY* data format we might choose in the | |
1423 future for the default format, including UCS2. (In other words, we can't | |
1424 assume that the internal format is ASCII-compatible and we can't assume | |
1425 it doesn't have embedded null bytes. We do assume, however, that any | |
1426 chosen format will have the concept of null-termination.) All of this is | |
1427 hidden from the user. | |
771 | 1428 |
1429 #### It is really too bad that we don't have a real object-oriented | |
1430 language, or at least a language with polymorphism! | |
1431 | |
1432 | |
1433 ********************************************** | |
1434 * Declaration * | |
1435 ********************************************** | |
1436 | |
1437 To declare an Eistring, either put one of the following in the local | |
1438 variable section: | |
1439 | |
1440 DECLARE_EISTRING (name); | |
2367 | 1441 Declare a new Eistring and initialize it to the empy string. This |
1442 is a standard local variable declaration and can go anywhere in the | |
1443 variable declaration section. NAME itself is declared as an | |
1444 Eistring *, and its storage declared on the stack. | |
771 | 1445 |
1446 DECLARE_EISTRING_MALLOC (name); | |
2367 | 1447 Declare and initialize a new Eistring, which uses malloc()ed |
1448 instead of ALLOCA()ed data. This is a standard local variable | |
1449 declaration and can go anywhere in the variable declaration | |
1450 section. Once you initialize the Eistring, you will have to free | |
1451 it using eifree() to avoid memory leaks. You will need to use this | |
1452 form if you are passing an Eistring to any function that modifies | |
1453 it (otherwise, the modified data may be in stack space and get | |
1454 overwritten when the function returns). | |
771 | 1455 |
1456 or use | |
1457 | |
793 | 1458 Eistring ei; |
1459 void eiinit (Eistring *ei); | |
1460 void eiinit_malloc (Eistring *einame); | |
771 | 1461 If you need to put an Eistring elsewhere than in a local variable |
1462 declaration (e.g. in a structure), declare it as shown and then | |
1463 call one of the init macros. | |
1464 | |
1465 Also note: | |
1466 | |
793 | 1467 void eifree (Eistring *ei); |
771 | 1468 If you declared an Eistring to use malloc() to hold its data, |
1469 or converted it to the heap using eito_malloc(), then this | |
1470 releases any data in it and afterwards resets the Eistring | |
1471 using eiinit_malloc(). Otherwise, it just resets the Eistring | |
1472 using eiinit(). | |
1473 | |
1474 | |
1475 ********************************************** | |
1476 * Conventions * | |
1477 ********************************************** | |
1478 | |
1479 - The names of the functions have been chosen, where possible, to | |
1480 match the names of str*() functions in the standard C API. | |
1481 - | |
1482 | |
1483 | |
1484 ********************************************** | |
1485 * Initialization * | |
1486 ********************************************** | |
1487 | |
1488 void eireset (Eistring *eistr); | |
1489 Initialize the Eistring to the empty string. | |
1490 | |
1491 void eicpy_* (Eistring *eistr, ...); | |
1492 Initialize the Eistring from somewhere: | |
1493 | |
1494 void eicpy_ei (Eistring *eistr, Eistring *eistr2); | |
1495 ... from another Eistring. | |
1496 void eicpy_lstr (Eistring *eistr, Lisp_Object lisp_string); | |
1497 ... from a Lisp_Object string. | |
867 | 1498 void eicpy_ch (Eistring *eistr, Ichar ch); |
1499 ... from an Ichar (this can be a conventional C character). | |
771 | 1500 |
1501 void eicpy_lstr_off (Eistring *eistr, Lisp_Object lisp_string, | |
1502 Bytecount off, Charcount charoff, | |
1503 Bytecount len, Charcount charlen); | |
1504 ... from a section of a Lisp_Object string. | |
1505 void eicpy_lbuf (Eistring *eistr, Lisp_Object lisp_buf, | |
1506 Bytecount off, Charcount charoff, | |
1507 Bytecount len, Charcount charlen); | |
1508 ... from a section of a Lisp_Object buffer. | |
867 | 1509 void eicpy_raw (Eistring *eistr, const Ibyte *data, Bytecount len); |
771 | 1510 ... from raw internal-format data in the default internal format. |
867 | 1511 void eicpy_rawz (Eistring *eistr, const Ibyte *data); |
771 | 1512 ... from raw internal-format data in the default internal format |
1513 that is "null-terminated" (the meaning of this depends on the nature | |
1514 of the default internal format). | |
867 | 1515 void eicpy_raw_fmt (Eistring *eistr, const Ibyte *data, Bytecount len, |
826 | 1516 Internal_Format intfmt, Lisp_Object object); |
771 | 1517 ... from raw internal-format data in the specified format. |
867 | 1518 void eicpy_rawz_fmt (Eistring *eistr, const Ibyte *data, |
826 | 1519 Internal_Format intfmt, Lisp_Object object); |
771 | 1520 ... from raw internal-format data in the specified format that is |
1521 "null-terminated" (the meaning of this depends on the nature of | |
1522 the specific format). | |
2421 | 1523 void eicpy_ascii (Eistring *eistr, const Ascbyte *ascstr); |
771 | 1524 ... from an ASCII null-terminated string. Non-ASCII characters in |
2500 | 1525 the string are *ILLEGAL* (read ABORT() with error-checking defined). |
2421 | 1526 void eicpy_ascii_len (Eistring *eistr, const Ascbyte *ascstr, len); |
771 | 1527 ... from an ASCII string, with length specified. Non-ASCII characters |
2500 | 1528 in the string are *ILLEGAL* (read ABORT() with error-checking defined). |
771 | 1529 void eicpy_ext (Eistring *eistr, const Extbyte *extdata, |
1318 | 1530 Lisp_Object codesys); |
771 | 1531 ... from external null-terminated data, with coding system specified. |
1532 void eicpy_ext_len (Eistring *eistr, const Extbyte *extdata, | |
1318 | 1533 Bytecount extlen, Lisp_Object codesys); |
771 | 1534 ... from external data, with length and coding system specified. |
1535 void eicpy_lstream (Eistring *eistr, Lisp_Object lstream); | |
1536 ... from an lstream; reads data till eof. Data must be in default | |
1537 internal format; otherwise, interpose a decoding lstream. | |
1538 | |
1539 | |
1540 ********************************************** | |
1541 * Getting the data out of the Eistring * | |
1542 ********************************************** | |
1543 | |
867 | 1544 Ibyte *eidata (Eistring *eistr); |
771 | 1545 Return a pointer to the raw data in an Eistring. This is NOT |
1546 a copy. | |
1547 | |
1548 Lisp_Object eimake_string (Eistring *eistr); | |
1549 Make a Lisp string out of the Eistring. | |
1550 | |
1551 Lisp_Object eimake_string_off (Eistring *eistr, | |
1552 Bytecount off, Charcount charoff, | |
1553 Bytecount len, Charcount charlen); | |
1554 Make a Lisp string out of a section of the Eistring. | |
1555 | |
867 | 1556 void eicpyout_alloca (Eistring *eistr, LVALUE: Ibyte *ptr_out, |
771 | 1557 LVALUE: Bytecount len_out); |
851 | 1558 Make an ALLOCA() copy of the data in the Eistring, using the |
1559 default internal format. Due to the nature of ALLOCA(), this | |
771 | 1560 must be a macro, with all lvalues passed in as parameters. |
793 | 1561 (More specifically, not all compilers correctly handle using |
851 | 1562 ALLOCA() as the argument to a function call -- GCC on x86 |
1563 didn't used to, for example.) A pointer to the ALLOCA()ed data | |
793 | 1564 is stored in PTR_OUT, and the length of the data (not including |
1565 the terminating zero) is stored in LEN_OUT. | |
771 | 1566 |
867 | 1567 void eicpyout_alloca_fmt (Eistring *eistr, LVALUE: Ibyte *ptr_out, |
771 | 1568 LVALUE: Bytecount len_out, |
826 | 1569 Internal_Format intfmt, Lisp_Object object); |
771 | 1570 Like eicpyout_alloca(), but converts to the specified internal |
1571 format. (No formats other than FORMAT_DEFAULT are currently | |
1572 implemented, and you get an assertion failure if you try.) | |
1573 | |
867 | 1574 Ibyte *eicpyout_malloc (Eistring *eistr, Bytecount *intlen_out); |
771 | 1575 Make a malloc() copy of the data in the Eistring, using the |
1576 default internal format. This is a real function. No lvalues | |
1577 passed in. Returns the new data, and stores the length (not | |
1578 including the terminating zero) using INTLEN_OUT, unless it's | |
1579 a NULL pointer. | |
1580 | |
867 | 1581 Ibyte *eicpyout_malloc_fmt (Eistring *eistr, Internal_Format intfmt, |
826 | 1582 Bytecount *intlen_out, Lisp_Object object); |
771 | 1583 Like eicpyout_malloc(), but converts to the specified internal |
1584 format. (No formats other than FORMAT_DEFAULT are currently | |
1585 implemented, and you get an assertion failure if you try.) | |
1586 | |
1587 | |
1588 ********************************************** | |
1589 * Moving to the heap * | |
1590 ********************************************** | |
1591 | |
1592 void eito_malloc (Eistring *eistr); | |
1593 Move this Eistring to the heap. Its data will be stored in a | |
1594 malloc()ed block rather than the stack. Subsequent changes to | |
1595 this Eistring will realloc() the block as necessary. Use this | |
1596 when you want the Eistring to remain in scope past the end of | |
1597 this function call. You will have to manually free the data | |
1598 in the Eistring using eifree(). | |
1599 | |
1600 void eito_alloca (Eistring *eistr); | |
1601 Move this Eistring back to the stack, if it was moved to the | |
1602 heap with eito_malloc(). This will automatically free any | |
1603 heap-allocated data. | |
1604 | |
1605 | |
1606 | |
1607 ********************************************** | |
1608 * Retrieving the length * | |
1609 ********************************************** | |
1610 | |
1611 Bytecount eilen (Eistring *eistr); | |
1612 Return the length of the internal data, in bytes. See also | |
1613 eiextlen(), below. | |
1614 Charcount eicharlen (Eistring *eistr); | |
1615 Return the length of the internal data, in characters. | |
1616 | |
1617 | |
1618 ********************************************** | |
1619 * Working with positions * | |
1620 ********************************************** | |
1621 | |
1622 Bytecount eicharpos_to_bytepos (Eistring *eistr, Charcount charpos); | |
1623 Convert a char offset to a byte offset. | |
1624 Charcount eibytepos_to_charpos (Eistring *eistr, Bytecount bytepos); | |
1625 Convert a byte offset to a char offset. | |
1626 Bytecount eiincpos (Eistring *eistr, Bytecount bytepos); | |
1627 Increment the given position by one character. | |
1628 Bytecount eiincpos_n (Eistring *eistr, Bytecount bytepos, Charcount n); | |
1629 Increment the given position by N characters. | |
1630 Bytecount eidecpos (Eistring *eistr, Bytecount bytepos); | |
1631 Decrement the given position by one character. | |
1632 Bytecount eidecpos_n (Eistring *eistr, Bytecount bytepos, Charcount n); | |
1633 Deccrement the given position by N characters. | |
1634 | |
1635 | |
1636 ********************************************** | |
1637 * Getting the character at a position * | |
1638 ********************************************** | |
1639 | |
867 | 1640 Ichar eigetch (Eistring *eistr, Bytecount bytepos); |
771 | 1641 Return the character at a particular byte offset. |
867 | 1642 Ichar eigetch_char (Eistring *eistr, Charcount charpos); |
771 | 1643 Return the character at a particular character offset. |
1644 | |
1645 | |
1646 ********************************************** | |
1647 * Setting the character at a position * | |
1648 ********************************************** | |
1649 | |
867 | 1650 Ichar eisetch (Eistring *eistr, Bytecount bytepos, Ichar chr); |
771 | 1651 Set the character at a particular byte offset. |
867 | 1652 Ichar eisetch_char (Eistring *eistr, Charcount charpos, Ichar chr); |
771 | 1653 Set the character at a particular character offset. |
1654 | |
1655 | |
1656 ********************************************** | |
1657 * Concatenation * | |
1658 ********************************************** | |
1659 | |
1660 void eicat_* (Eistring *eistr, ...); | |
1661 Concatenate onto the end of the Eistring, with data coming from the | |
1662 same places as above: | |
1663 | |
1664 void eicat_ei (Eistring *eistr, Eistring *eistr2); | |
1665 ... from another Eistring. | |
2421 | 1666 void eicat_ascii (Eistring *eistr, Ascbyte *ascstr); |
771 | 1667 ... from an ASCII null-terminated string. Non-ASCII characters in |
2500 | 1668 the string are *ILLEGAL* (read ABORT() with error-checking defined). |
867 | 1669 void eicat_raw (ei, const Ibyte *data, Bytecount len); |
771 | 1670 ... from raw internal-format data in the default internal format. |
867 | 1671 void eicat_rawz (ei, const Ibyte *data); |
771 | 1672 ... from raw internal-format data in the default internal format |
1673 that is "null-terminated" (the meaning of this depends on the nature | |
1674 of the default internal format). | |
1675 void eicat_lstr (ei, Lisp_Object lisp_string); | |
1676 ... from a Lisp_Object string. | |
867 | 1677 void eicat_ch (ei, Ichar ch); |
1678 ... from an Ichar. | |
771 | 1679 |
1680 (All except the first variety are convenience functions. | |
1681 In the general case, create another Eistring from the source.) | |
1682 | |
1683 | |
1684 ********************************************** | |
1685 * Replacement * | |
1686 ********************************************** | |
1687 | |
1688 void eisub_* (Eistring *eistr, Bytecount off, Charcount charoff, | |
1689 Bytecount len, Charcount charlen, ...); | |
1690 Replace a section of the Eistring, specifically: | |
1691 | |
1692 void eisub_ei (Eistring *eistr, Bytecount off, Charcount charoff, | |
1693 Bytecount len, Charcount charlen, Eistring *eistr2); | |
1694 ... with another Eistring. | |
2421 | 1695 void eisub_ascii (Eistring *eistr, Bytecount off, Charcount charoff, |
1696 Bytecount len, Charcount charlen, Ascbyte *ascstr); | |
771 | 1697 ... with an ASCII null-terminated string. Non-ASCII characters in |
2500 | 1698 the string are *ILLEGAL* (read ABORT() with error-checking defined). |
771 | 1699 void eisub_ch (Eistring *eistr, Bytecount off, Charcount charoff, |
867 | 1700 Bytecount len, Charcount charlen, Ichar ch); |
1701 ... with an Ichar. | |
771 | 1702 |
1703 void eidel (Eistring *eistr, Bytecount off, Charcount charoff, | |
1704 Bytecount len, Charcount charlen); | |
1705 Delete a section of the Eistring. | |
1706 | |
1707 | |
1708 ********************************************** | |
1709 * Converting to an external format * | |
1710 ********************************************** | |
1711 | |
1318 | 1712 void eito_external (Eistring *eistr, Lisp_Object codesys); |
771 | 1713 Convert the Eistring to an external format and store the result |
1714 in the string. NOTE: Further changes to the Eistring will *NOT* | |
1715 change the external data stored in the string. You will have to | |
1716 call eito_external() again in such a case if you want the external | |
1717 data. | |
1718 | |
1719 Extbyte *eiextdata (Eistring *eistr); | |
1720 Return a pointer to the external data stored in the Eistring as | |
1721 a result of a prior call to eito_external(). | |
1722 | |
1723 Bytecount eiextlen (Eistring *eistr); | |
1724 Return the length in bytes of the external data stored in the | |
1725 Eistring as a result of a prior call to eito_external(). | |
1726 | |
1727 | |
1728 ********************************************** | |
1729 * Searching in the Eistring for a character * | |
1730 ********************************************** | |
1731 | |
867 | 1732 Bytecount eichr (Eistring *eistr, Ichar chr); |
1733 Charcount eichr_char (Eistring *eistr, Ichar chr); | |
1734 Bytecount eichr_off (Eistring *eistr, Ichar chr, Bytecount off, | |
771 | 1735 Charcount charoff); |
867 | 1736 Charcount eichr_off_char (Eistring *eistr, Ichar chr, Bytecount off, |
771 | 1737 Charcount charoff); |
867 | 1738 Bytecount eirchr (Eistring *eistr, Ichar chr); |
1739 Charcount eirchr_char (Eistring *eistr, Ichar chr); | |
1740 Bytecount eirchr_off (Eistring *eistr, Ichar chr, Bytecount off, | |
771 | 1741 Charcount charoff); |
867 | 1742 Charcount eirchr_off_char (Eistring *eistr, Ichar chr, Bytecount off, |
771 | 1743 Charcount charoff); |
1744 | |
1745 | |
1746 ********************************************** | |
1747 * Searching in the Eistring for a string * | |
1748 ********************************************** | |
1749 | |
1750 Bytecount eistr_ei (Eistring *eistr, Eistring *eistr2); | |
1751 Charcount eistr_ei_char (Eistring *eistr, Eistring *eistr2); | |
1752 Bytecount eistr_ei_off (Eistring *eistr, Eistring *eistr2, Bytecount off, | |
1753 Charcount charoff); | |
1754 Charcount eistr_ei_off_char (Eistring *eistr, Eistring *eistr2, | |
1755 Bytecount off, Charcount charoff); | |
1756 Bytecount eirstr_ei (Eistring *eistr, Eistring *eistr2); | |
1757 Charcount eirstr_ei_char (Eistring *eistr, Eistring *eistr2); | |
1758 Bytecount eirstr_ei_off (Eistring *eistr, Eistring *eistr2, Bytecount off, | |
1759 Charcount charoff); | |
1760 Charcount eirstr_ei_off_char (Eistring *eistr, Eistring *eistr2, | |
1761 Bytecount off, Charcount charoff); | |
1762 | |
2421 | 1763 Bytecount eistr_ascii (Eistring *eistr, Ascbyte *ascstr); |
1764 Charcount eistr_ascii_char (Eistring *eistr, Ascbyte *ascstr); | |
1765 Bytecount eistr_ascii_off (Eistring *eistr, Ascbyte *ascstr, Bytecount off, | |
771 | 1766 Charcount charoff); |
2421 | 1767 Charcount eistr_ascii_off_char (Eistring *eistr, Ascbyte *ascstr, |
771 | 1768 Bytecount off, Charcount charoff); |
2421 | 1769 Bytecount eirstr_ascii (Eistring *eistr, Ascbyte *ascstr); |
1770 Charcount eirstr_ascii_char (Eistring *eistr, Ascbyte *ascstr); | |
1771 Bytecount eirstr_ascii_off (Eistring *eistr, Ascbyte *ascstr, | |
771 | 1772 Bytecount off, Charcount charoff); |
2421 | 1773 Charcount eirstr_ascii_off_char (Eistring *eistr, Ascbyte *ascstr, |
771 | 1774 Bytecount off, Charcount charoff); |
1775 | |
1776 | |
1777 ********************************************** | |
1778 * Comparison * | |
1779 ********************************************** | |
1780 | |
1781 int eicmp_* (Eistring *eistr, ...); | |
1782 int eicmp_off_* (Eistring *eistr, Bytecount off, Charcount charoff, | |
1783 Bytecount len, Charcount charlen, ...); | |
1784 int eicasecmp_* (Eistring *eistr, ...); | |
1785 int eicasecmp_off_* (Eistring *eistr, Bytecount off, Charcount charoff, | |
1786 Bytecount len, Charcount charlen, ...); | |
1787 int eicasecmp_i18n_* (Eistring *eistr, ...); | |
1788 int eicasecmp_i18n_off_* (Eistring *eistr, Bytecount off, Charcount charoff, | |
1789 Bytecount len, Charcount charlen, ...); | |
1790 | |
1791 Compare the Eistring with the other data. Return value same as | |
1792 from strcmp. The `*' is either `ei' for another Eistring (in | |
1793 which case `...' is an Eistring), or `c' for a pure-ASCII string | |
1794 (in which case `...' is a pointer to that string). For anything | |
1795 more complex, first create an Eistring out of the source. | |
1796 Comparison is either simple (`eicmp_...'), ASCII case-folding | |
1797 (`eicasecmp_...'), or multilingual case-folding | |
1798 (`eicasecmp_i18n_...). | |
1799 | |
1800 | |
1801 More specifically, the prototypes are: | |
1802 | |
1803 int eicmp_ei (Eistring *eistr, Eistring *eistr2); | |
1804 int eicmp_off_ei (Eistring *eistr, Bytecount off, Charcount charoff, | |
1805 Bytecount len, Charcount charlen, Eistring *eistr2); | |
1806 int eicasecmp_ei (Eistring *eistr, Eistring *eistr2); | |
1807 int eicasecmp_off_ei (Eistring *eistr, Bytecount off, Charcount charoff, | |
1808 Bytecount len, Charcount charlen, Eistring *eistr2); | |
1809 int eicasecmp_i18n_ei (Eistring *eistr, Eistring *eistr2); | |
1810 int eicasecmp_i18n_off_ei (Eistring *eistr, Bytecount off, | |
1811 Charcount charoff, Bytecount len, | |
1812 Charcount charlen, Eistring *eistr2); | |
1813 | |
2421 | 1814 int eicmp_ascii (Eistring *eistr, Ascbyte *ascstr); |
1815 int eicmp_off_ascii (Eistring *eistr, Bytecount off, Charcount charoff, | |
1816 Bytecount len, Charcount charlen, Ascbyte *ascstr); | |
1817 int eicasecmp_ascii (Eistring *eistr, Ascbyte *ascstr); | |
1818 int eicasecmp_off_ascii (Eistring *eistr, Bytecount off, Charcount charoff, | |
771 | 1819 Bytecount len, Charcount charlen, |
2421 | 1820 Ascbyte *ascstr); |
1821 int eicasecmp_i18n_ascii (Eistring *eistr, Ascbyte *ascstr); | |
1822 int eicasecmp_i18n_off_ascii (Eistring *eistr, Bytecount off, Charcount charoff, | |
771 | 1823 Bytecount len, Charcount charlen, |
2421 | 1824 Ascbyte *ascstr); |
771 | 1825 |
1826 | |
1827 ********************************************** | |
1828 * Case-changing the Eistring * | |
1829 ********************************************** | |
1830 | |
1831 void eilwr (Eistring *eistr); | |
1832 Convert all characters in the Eistring to lowercase. | |
1833 void eiupr (Eistring *eistr); | |
1834 Convert all characters in the Eistring to uppercase. | |
1835 */ | |
1836 | |
1837 | |
1838 /* Principles for writing Eistring functions: | |
1839 | |
1840 (1) Unfortunately, we have to write most of the Eistring functions | |
851 | 1841 as macros, because of the use of ALLOCA(). The principle used |
771 | 1842 below to assure no conflict in local variables is to prefix all |
1843 local variables with "ei" plus a number, which should be unique | |
1844 among macros. In practice, when finding a new number, find the | |
1845 highest so far used, and add 1. | |
1846 | |
1847 (2) We also suffix the Eistring fields with an _ to avoid problems | |
1848 with macro parameters of the same name. (And as the standard | |
1849 signal not to access these fields directly.) | |
1850 | |
1851 (3) We maintain both the length in bytes and chars of the data in | |
1852 the Eistring at all times, for convenient retrieval by outside | |
1853 functions. That means when writing functions that manipulate | |
1854 Eistrings, you too need to keep both lengths up to date for all | |
1855 data that you work with. | |
1856 | |
1857 (4) When writing a new type of operation (e.g. substitution), you | |
1858 will often find yourself working with outside data, and thus | |
1859 have a series of related API's, for different forms that the | |
1860 outside data is in. Generally, you will want to choose a | |
1861 subset of the forms supported by eicpy_*, which has to be | |
1862 totally general because that's the fundamental way to get data | |
1863 into an Eistring, and once the data is into the string, it | |
1864 would be to create a whole series of Ei operations that work on | |
1865 nothing but Eistrings. Although theoretically nice, in | |
1866 practice it's a hassle, so we suggest that you provide | |
1867 convenience functions. In particular, there are two paths you | |
1868 can take. One is minimalist -- it only allows other Eistrings | |
867 | 1869 and ASCII data, and Ichars if the particular operation makes |
771 | 1870 sense with a character. The other provides interfaces for the |
1871 most commonly-used forms -- Eistring, ASCII data, Lisp string, | |
1872 raw internal-format string with length, raw internal-format | |
867 | 1873 string without, and possibly Ichar. (In the function names, |
771 | 1874 these are designated `ei', `c', `lstr', `raw', `rawz', and |
1875 `ch', respectively.) | |
1876 | |
1877 (5) When coding a new type of operation, such as was discussed in | |
1878 previous section, the correct approach is to declare an worker | |
1879 function that does the work of everything, and is called by the | |
1880 other "container" macros that handle the different outside data | |
1881 forms. The data coming into the worker function, which | |
1882 typically ends in `_1', is in the form of three parameters: | |
1883 DATA, LEN, CHARLEN. (See point [3] about having two lengths and | |
1884 keeping them in sync.) | |
1885 | |
1886 (6) Handling argument evaluation in macros: We take great care | |
1887 never to evaluate any argument more than once in any macro, | |
1888 except the initial Eistring parameter. This can and will be | |
1889 evaluated multiple times, but it should pretty much always just | |
1890 be a simple variable. This means, for example, that if an | |
1891 Eistring is the second (not first) argument of a macro, it | |
1892 doesn't fall under the "initial Eistring" exemption, so it | |
1893 needs protection against multi-evaluation. (Take the address of | |
1894 the Eistring structure, store in a temporary variable, and use | |
1895 temporary variable for all access to the Eistring. | |
1896 Essentially, we want it to appear as if these Eistring macros | |
1897 are functions -- we would like to declare them as functions but | |
851 | 1898 they use ALLOCA(), so we can't (and we can't make them inline |
1899 functions either -- ALLOCA() is explicitly disallowed in inline | |
771 | 1900 functions.) |
1901 | |
1902 (7) Note that our rules regarding multiple evaluation are *more* | |
1903 strict than the rules listed above under the heading "working | |
1904 with raw internal-format data". | |
1905 */ | |
1906 | |
1907 | |
1908 /* ----- Declaration ----- */ | |
1909 | |
1910 typedef struct | |
1911 { | |
1912 /* Data for the Eistring, stored in the default internal format. | |
1913 Always includes terminating null. */ | |
867 | 1914 Ibyte *data_; |
771 | 1915 /* Total number of bytes allocated in DATA (including null). */ |
1916 Bytecount max_size_allocated_; | |
1917 Bytecount bytelen_; | |
1918 Charcount charlen_; | |
1919 int mallocp_; | |
1920 | |
1921 Extbyte *extdata_; | |
1922 Bytecount extlen_; | |
1923 } Eistring; | |
1924 | |
1925 extern Eistring the_eistring_zero_init, the_eistring_malloc_zero_init; | |
1926 | |
1927 #define DECLARE_EISTRING(name) \ | |
1928 Eistring __ ## name ## __storage__ = the_eistring_zero_init; \ | |
1929 Eistring *name = & __ ## name ## __storage__ | |
1930 #define DECLARE_EISTRING_MALLOC(name) \ | |
1931 Eistring __ ## name ## __storage__ = the_eistring_malloc_zero_init; \ | |
1932 Eistring *name = & __ ## name ## __storage__ | |
1933 | |
1934 #define eiinit(ei) \ | |
1935 do { \ | |
793 | 1936 *(ei) = the_eistring_zero_init; \ |
771 | 1937 } while (0) |
1938 | |
1939 #define eiinit_malloc(ei) \ | |
1940 do { \ | |
793 | 1941 *(ei) = the_eistring_malloc_zero_init; \ |
771 | 1942 } while (0) |
1943 | |
1944 | |
1945 /* ----- Utility ----- */ | |
1946 | |
1947 /* Make sure both LEN and CHARLEN are specified, in case one is given | |
1948 as -1. PTR evaluated at most once, others multiply. */ | |
1949 #define eifixup_bytechar(ptr, len, charlen) \ | |
1950 do { \ | |
1951 if ((len) == -1) \ | |
1952 (len) = charcount_to_bytecount (ptr, charlen); \ | |
1953 else if ((charlen) == -1) \ | |
1954 (charlen) = bytecount_to_charcount (ptr, len); \ | |
1955 } while (0) | |
1956 | |
1957 /* Make sure LEN is specified, in case it's is given as -1. PTR | |
1958 evaluated at most once, others multiply. */ | |
1959 #define eifixup_byte(ptr, len, charlen) \ | |
1960 do { \ | |
1961 if ((len) == -1) \ | |
1962 (len) = charcount_to_bytecount (ptr, charlen); \ | |
1963 } while (0) | |
1964 | |
1965 /* Make sure CHARLEN is specified, in case it's is given as -1. PTR | |
1966 evaluated at most once, others multiply. */ | |
1967 #define eifixup_char(ptr, len, charlen) \ | |
1968 do { \ | |
1969 if ((charlen) == -1) \ | |
1970 (charlen) = bytecount_to_charcount (ptr, len); \ | |
1971 } while (0) | |
1972 | |
1973 | |
1974 | |
1975 /* Make sure we can hold NEWBYTELEN bytes (which is NEWCHARLEN chars) | |
1976 plus a zero terminator. Preserve existing data as much as possible, | |
1977 including existing zero terminator. Put a new zero terminator where it | |
1978 should go if NEWZ if non-zero. All args but EI are evalled only once. */ | |
1979 | |
5026
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1980 #define EI_ALLOC(ei, newbytelen, newcharlen, newz) \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1981 do { \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1982 int ei1oldeibytelen = (ei)->bytelen_; \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1983 \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1984 (ei)->charlen_ = (newcharlen); \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1985 (ei)->bytelen_ = (newbytelen); \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1986 \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1987 if (ei1oldeibytelen != (ei)->bytelen_) \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1988 { \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1989 int ei1newsize = (ei)->max_size_allocated_; \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1990 while (ei1newsize < (ei)->bytelen_ + 1) \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1991 { \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1992 ei1newsize = (int) (ei1newsize * 1.5); \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1993 if (ei1newsize < 32) \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1994 ei1newsize = 32; \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1995 } \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1996 if (ei1newsize != (ei)->max_size_allocated_) \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1997 { \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1998 if ((ei)->mallocp_) \ |
771 | 1999 /* xrealloc always preserves existing data as much as possible */ \ |
5026
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2000 (ei)->data_ = (Ibyte *) xrealloc ((ei)->data_, ei1newsize); \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2001 else \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2002 { \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2003 /* We don't have realloc, so ALLOCA() more space and copy the \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2004 data into it. */ \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2005 Ibyte *ei1oldeidata = (ei)->data_; \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2006 (ei)->data_ = alloca_ibytes (ei1newsize); \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2007 if (ei1oldeidata) \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2008 memcpy ((ei)->data_, ei1oldeidata, ei1oldeibytelen + 1); \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2009 } \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2010 (ei)->max_size_allocated_ = ei1newsize; \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2011 } \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2012 if (newz) \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2013 (ei)->data_[(ei)->bytelen_] = '\0'; \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2014 } \ |
771 | 2015 } while (0) |
2016 | |
2017 #define EI_ALLOC_AND_COPY(ei, data, bytelen, charlen) \ | |
2018 do { \ | |
2019 EI_ALLOC (ei, bytelen, charlen, 1); \ | |
2020 memcpy ((ei)->data_, data, (ei)->bytelen_); \ | |
2021 } while (0) | |
2022 | |
2023 /* ----- Initialization ----- */ | |
2024 | |
2025 #define eicpy_ei(ei, eicpy) \ | |
2026 do { \ | |
2027 const Eistring *ei2 = (eicpy); \ | |
2028 EI_ALLOC_AND_COPY (ei, ei2->data_, ei2->bytelen_, ei2->charlen_); \ | |
2029 } while (0) | |
2030 | |
2031 #define eicpy_lstr(ei, lisp_string) \ | |
2032 do { \ | |
2033 Lisp_Object ei3 = (lisp_string); \ | |
2034 EI_ALLOC_AND_COPY (ei, XSTRING_DATA (ei3), XSTRING_LENGTH (ei3), \ | |
1333 | 2035 string_char_length (ei3)); \ |
771 | 2036 } while (0) |
2037 | |
2038 #define eicpy_lstr_off(ei, lisp_string, off, charoff, len, charlen) \ | |
2039 do { \ | |
2040 Lisp_Object ei23lstr = (lisp_string); \ | |
2041 int ei23off = (off); \ | |
2042 int ei23charoff = (charoff); \ | |
2043 int ei23len = (len); \ | |
2044 int ei23charlen = (charlen); \ | |
867 | 2045 const Ibyte *ei23data = XSTRING_DATA (ei23lstr); \ |
771 | 2046 \ |
2047 int ei23oldbytelen = (ei)->bytelen_; \ | |
2048 \ | |
2049 eifixup_byte (ei23data, ei23off, ei23charoff); \ | |
2050 eifixup_bytechar (ei23data + ei23off, ei23len, ei23charlen); \ | |
2051 \ | |
2052 EI_ALLOC_AND_COPY (ei, ei23data + ei23off, ei23len, ei23charlen); \ | |
2053 } while (0) | |
2054 | |
826 | 2055 #define eicpy_raw_fmt(ei, ptr, len, fmt, object) \ |
771 | 2056 do { \ |
1333 | 2057 const Ibyte *ei12ptr = (ptr); \ |
771 | 2058 Internal_Format ei12fmt = (fmt); \ |
2059 int ei12len = (len); \ | |
2060 assert (ei12fmt == FORMAT_DEFAULT); \ | |
2061 EI_ALLOC_AND_COPY (ei, ei12ptr, ei12len, \ | |
2062 bytecount_to_charcount (ei12ptr, ei12len)); \ | |
2063 } while (0) | |
2064 | |
826 | 2065 #define eicpy_raw(ei, ptr, len) \ |
2066 eicpy_raw_fmt (ei, ptr, len, FORMAT_DEFAULT, Qnil) | |
2067 | |
2068 #define eicpy_rawz_fmt(ei, ptr, fmt, object) \ | |
2069 do { \ | |
867 | 2070 const Ibyte *ei12p1ptr = (ptr); \ |
826 | 2071 Internal_Format ei12p1fmt = (fmt); \ |
2072 assert (ei12p1fmt == FORMAT_DEFAULT); \ | |
2073 eicpy_raw_fmt (ei, ei12p1ptr, qxestrlen (ei12p1ptr), fmt, object); \ | |
771 | 2074 } while (0) |
2075 | |
826 | 2076 #define eicpy_rawz(ei, ptr) eicpy_rawz_fmt (ei, ptr, FORMAT_DEFAULT, Qnil) |
771 | 2077 |
1333 | 2078 #define eicpy_ch(ei, ch) \ |
2079 do { \ | |
867 | 2080 Ibyte ei12p2[MAX_ICHAR_LEN]; \ |
2081 Bytecount ei12p2len = set_itext_ichar (ei12p2, ch); \ | |
1333 | 2082 EI_ALLOC_AND_COPY (ei, ei12p2, ei12p2len, 1); \ |
771 | 2083 } while (0) |
2084 | |
2421 | 2085 #define eicpy_ascii(ei, ascstr) \ |
771 | 2086 do { \ |
2421 | 2087 const Ascbyte *ei4 = (ascstr); \ |
771 | 2088 \ |
2367 | 2089 ASSERT_ASCTEXT_ASCII (ei4); \ |
771 | 2090 eicpy_ext (ei, ei4, Qbinary); \ |
2091 } while (0) | |
2092 | |
2421 | 2093 #define eicpy_ascii_len(ei, ascstr, c_len) \ |
771 | 2094 do { \ |
2421 | 2095 const Ascbyte *ei6 = (ascstr); \ |
771 | 2096 int ei6len = (c_len); \ |
2097 \ | |
2367 | 2098 ASSERT_ASCTEXT_ASCII_LEN (ei6, ei6len); \ |
771 | 2099 eicpy_ext_len (ei, ei6, ei6len, Qbinary); \ |
2100 } while (0) | |
2101 | |
4981
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
2102 #define eicpy_ext_len(ei, extdata, extlen, codesys) \ |
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
2103 do { \ |
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
2104 const Extbyte *ei7 = (extdata); \ |
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
2105 int ei7len = (extlen); \ |
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
2106 \ |
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
2107 TO_INTERNAL_FORMAT (DATA, (ei7, ei7len), \ |
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
2108 ALLOCA, ((ei)->data_, (ei)->bytelen_), \ |
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
2109 codesys); \ |
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
2110 (ei)->max_size_allocated_ = (ei)->bytelen_ + 1; \ |
771 | 2111 (ei)->charlen_ = bytecount_to_charcount ((ei)->data_, (ei)->bytelen_); \ |
2112 } while (0) | |
2113 | |
1318 | 2114 #define eicpy_ext(ei, extdata, codesys) \ |
2115 do { \ | |
2116 const Extbyte *ei8 = (extdata); \ | |
2117 \ | |
2118 eicpy_ext_len (ei, ei8, dfc_external_data_len (ei8, codesys), \ | |
2119 codesys); \ | |
771 | 2120 } while (0) |
2121 | |
2122 #define eicpy_lbuf(eistr, lisp_buf, off, charoff, len, charlen) \ | |
2123 NOT YET IMPLEMENTED | |
2124 | |
2125 #define eicpy_lstream(eistr, lstream) \ | |
2126 NOT YET IMPLEMENTED | |
2127 | |
867 | 2128 #define eireset(eistr) eicpy_rawz (eistr, (Ibyte *) "") |
771 | 2129 |
2130 /* ----- Getting the data out of the Eistring ----- */ | |
2131 | |
2132 #define eidata(ei) ((ei)->data_) | |
2133 | |
2134 #define eimake_string(ei) make_string (eidata (ei), eilen (ei)) | |
2135 | |
2136 #define eimake_string_off(eistr, off, charoff, len, charlen) \ | |
2137 do { \ | |
2138 Lisp_Object ei24lstr; \ | |
2139 int ei24off = (off); \ | |
2140 int ei24charoff = (charoff); \ | |
2141 int ei24len = (len); \ | |
2142 int ei24charlen = (charlen); \ | |
2143 \ | |
2144 eifixup_byte ((eistr)->data_, ei24off, ei24charoff); \ | |
2145 eifixup_byte ((eistr)->data_ + ei24off, ei24len, ei24charlen); \ | |
2146 \ | |
2147 return make_string ((eistr)->data_ + ei24off, ei24len); \ | |
2148 } while (0) | |
2149 | |
2150 #define eicpyout_alloca(eistr, ptrout, lenout) \ | |
826 | 2151 eicpyout_alloca_fmt (eistr, ptrout, lenout, FORMAT_DEFAULT, Qnil) |
771 | 2152 #define eicpyout_malloc(eistr, lenout) \ |
826 | 2153 eicpyout_malloc_fmt (eistr, lenout, FORMAT_DEFAULT, Qnil) |
867 | 2154 Ibyte *eicpyout_malloc_fmt (Eistring *eistr, Bytecount *len_out, |
826 | 2155 Internal_Format fmt, Lisp_Object object); |
2156 #define eicpyout_alloca_fmt(eistr, ptrout, lenout, fmt, object) \ | |
771 | 2157 do { \ |
2158 Internal_Format ei23fmt = (fmt); \ | |
867 | 2159 Ibyte *ei23ptrout = &(ptrout); \ |
771 | 2160 Bytecount *ei23lenout = &(lenout); \ |
2161 \ | |
2162 assert (ei23fmt == FORMAT_DEFAULT); \ | |
2163 \ | |
2164 *ei23lenout = (eistr)->bytelen_; \ | |
5026
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2165 *ei23ptrout = alloca_ibytes ((eistr)->bytelen_ + 1); \ |
771 | 2166 memcpy (*ei23ptrout, (eistr)->data_, (eistr)->bytelen_ + 1); \ |
2167 } while (0) | |
2168 | |
2169 /* ----- Moving to the heap ----- */ | |
2170 | |
2171 #define eifree(ei) \ | |
2172 do { \ | |
2173 if ((ei)->mallocp_) \ | |
2174 { \ | |
2175 if ((ei)->data_) \ | |
5169
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5092
diff
changeset
|
2176 { \ |
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5092
diff
changeset
|
2177 xfree ((ei)->data_); \ |
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5092
diff
changeset
|
2178 (ei)->data_ = 0; \ |
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5092
diff
changeset
|
2179 } \ |
771 | 2180 if ((ei)->extdata_) \ |
5169
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5092
diff
changeset
|
2181 { \ |
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5092
diff
changeset
|
2182 xfree ((ei)->extdata_); \ |
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5092
diff
changeset
|
2183 (ei)->extdata_ = 0; \ |
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5092
diff
changeset
|
2184 } \ |
771 | 2185 eiinit_malloc (ei); \ |
2186 } \ | |
2187 else \ | |
2188 eiinit (ei); \ | |
2189 } while (0) | |
2190 | |
2191 int eifind_large_enough_buffer (int oldbufsize, int needed_size); | |
2192 void eito_malloc_1 (Eistring *ei); | |
2193 | |
2194 #define eito_malloc(ei) eito_malloc_1 (ei) | |
2195 | |
2196 #define eito_alloca(ei) \ | |
2197 do { \ | |
2198 if (!(ei)->mallocp_) \ | |
2199 return; \ | |
2200 (ei)->mallocp_ = 0; \ | |
2201 if ((ei)->data_) \ | |
2202 { \ | |
867 | 2203 Ibyte *ei13newdata; \ |
771 | 2204 \ |
2205 (ei)->max_size_allocated_ = \ | |
2206 eifind_large_enough_buffer (0, (ei)->bytelen_ + 1); \ | |
2367 | 2207 ei13newdata = alloca_ibytes ((ei)->max_size_allocated_); \ |
771 | 2208 memcpy (ei13newdata, (ei)->data_, (ei)->bytelen_ + 1); \ |
5026
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2209 xfree ((ei)->data_); \ |
771 | 2210 (ei)->data_ = ei13newdata; \ |
2211 } \ | |
2212 \ | |
2213 if ((ei)->extdata_) \ | |
2214 { \ | |
2367 | 2215 Extbyte *ei13newdata = alloca_extbytes ((ei)->extlen_ + 2); \ |
771 | 2216 \ |
2217 memcpy (ei13newdata, (ei)->extdata_, (ei)->extlen_); \ | |
2218 /* Double null-terminate in case of Unicode data */ \ | |
2219 ei13newdata[(ei)->extlen_] = '\0'; \ | |
2220 ei13newdata[(ei)->extlen_ + 1] = '\0'; \ | |
5026
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2221 xfree ((ei)->extdata_); \ |
771 | 2222 (ei)->extdata_ = ei13newdata; \ |
2223 } \ | |
2224 } while (0) | |
2225 | |
2226 | |
2227 /* ----- Retrieving the length ----- */ | |
2228 | |
2229 #define eilen(ei) ((ei)->bytelen_) | |
2230 #define eicharlen(ei) ((ei)->charlen_) | |
2231 | |
2232 | |
2233 /* ----- Working with positions ----- */ | |
2234 | |
2235 #define eicharpos_to_bytepos(ei, charpos) \ | |
2236 charcount_to_bytecount ((ei)->data_, charpos) | |
2237 #define eibytepos_to_charpos(ei, bytepos) \ | |
2238 bytecount_to_charcount ((ei)->data_, bytepos) | |
2239 | |
2240 DECLARE_INLINE_HEADER (Bytecount eiincpos_1 (Eistring *eistr, | |
2241 Bytecount bytepos, | |
2242 Charcount n)) | |
2243 { | |
867 | 2244 Ibyte *pos = eistr->data_ + bytepos; |
814 | 2245 Charcount i; |
771 | 2246 |
800 | 2247 text_checking_assert (bytepos >= 0 && bytepos <= eistr->bytelen_); |
2248 text_checking_assert (n >= 0 && n <= eistr->charlen_); | |
771 | 2249 /* We could check N more correctly now, but that would require a |
2250 call to bytecount_to_charcount(), which would be needlessly | |
2251 expensive (it would convert O(N) algorithms into O(N^2) algorithms | |
800 | 2252 with ERROR_CHECK_TEXT, which would be bad). If N is bad, we are |
867 | 2253 guaranteed to catch it either inside INC_IBYTEPTR() or in the check |
771 | 2254 below. */ |
2255 for (i = 0; i < n; i++) | |
867 | 2256 INC_IBYTEPTR (pos); |
800 | 2257 text_checking_assert (pos - eistr->data_ <= eistr->bytelen_); |
771 | 2258 return pos - eistr->data_; |
2259 } | |
2260 | |
2261 #define eiincpos (ei, bytepos) eiincpos_1 (ei, bytepos, 1) | |
2262 #define eiincpos_n (ei, bytepos, n) eiincpos_1 (ei, bytepos, n) | |
2263 | |
2264 DECLARE_INLINE_HEADER (Bytecount eidecpos_1 (Eistring *eistr, | |
2265 Bytecount bytepos, | |
2266 Charcount n)) | |
2267 { | |
867 | 2268 Ibyte *pos = eistr->data_ + bytepos; |
771 | 2269 int i; |
2270 | |
800 | 2271 text_checking_assert (bytepos >= 0 && bytepos <= eistr->bytelen_); |
2272 text_checking_assert (n >= 0 && n <= eistr->charlen_); | |
771 | 2273 /* We could check N more correctly now, but ... see above. */ |
2274 for (i = 0; i < n; i++) | |
867 | 2275 DEC_IBYTEPTR (pos); |
800 | 2276 text_checking_assert (pos - eistr->data_ <= eistr->bytelen_); |
771 | 2277 return pos - eistr->data_; |
2278 } | |
2279 | |
2280 #define eidecpos (ei, bytepos) eidecpos_1 (ei, bytepos, 1) | |
2281 #define eidecpos_n (ei, bytepos, n) eidecpos_1 (ei, bytepos, n) | |
2282 | |
2283 | |
2284 /* ----- Getting the character at a position ----- */ | |
2285 | |
2286 #define eigetch(ei, bytepos) \ | |
867 | 2287 itext_ichar ((ei)->data_ + (bytepos)) |
2288 #define eigetch_char(ei, charpos) itext_ichar_n ((ei)->data_, charpos) | |
771 | 2289 |
2290 | |
2291 /* ----- Setting the character at a position ----- */ | |
2292 | |
2293 #define eisetch(ei, bytepos, chr) \ | |
2294 eisub_ch (ei, bytepos, -1, -1, 1, chr) | |
2295 #define eisetch_char(ei, charpos, chr) \ | |
2296 eisub_ch (ei, -1, charpos, -1, 1, chr) | |
2297 | |
2298 | |
2299 /* ----- Concatenation ----- */ | |
2300 | |
2301 #define eicat_1(ei, data, bytelen, charlen) \ | |
2302 do { \ | |
2303 int ei14oldeibytelen = (ei)->bytelen_; \ | |
2304 int ei14bytelen = (bytelen); \ | |
2305 EI_ALLOC (ei, (ei)->bytelen_ + ei14bytelen, \ | |
2306 (ei)->charlen_ + (charlen), 1); \ | |
2307 memcpy ((ei)->data_ + ei14oldeibytelen, (data), \ | |
2308 ei14bytelen); \ | |
2309 } while (0) | |
2310 | |
2311 #define eicat_ei(ei, ei2) \ | |
2312 do { \ | |
2313 const Eistring *ei9 = (ei2); \ | |
2314 eicat_1 (ei, ei9->data_, ei9->bytelen_, ei9->charlen_); \ | |
2315 } while (0) | |
2316 | |
2421 | 2317 #define eicat_ascii(ei, ascstr) \ |
771 | 2318 do { \ |
2421 | 2319 const Ascbyte *ei15 = (ascstr); \ |
771 | 2320 int ei15len = strlen (ei15); \ |
2321 \ | |
2367 | 2322 ASSERT_ASCTEXT_ASCII_LEN (ei15, ei15len); \ |
771 | 2323 eicat_1 (ei, ei15, ei15len, \ |
867 | 2324 bytecount_to_charcount ((Ibyte *) ei15, ei15len)); \ |
771 | 2325 } while (0) |
2326 | |
2327 #define eicat_raw(ei, data, len) \ | |
2328 do { \ | |
2329 int ei16len = (len); \ | |
867 | 2330 const Ibyte *ei16data = (data); \ |
771 | 2331 eicat_1 (ei, ei16data, ei16len, \ |
2332 bytecount_to_charcount (ei16data, ei16len)); \ | |
2333 } while (0) | |
2334 | |
2335 #define eicat_rawz(ei, ptr) \ | |
2336 do { \ | |
867 | 2337 const Ibyte *ei16p5ptr = (ptr); \ |
771 | 2338 eicat_raw (ei, ei16p5ptr, qxestrlen (ei16p5ptr)); \ |
2339 } while (0) | |
2340 | |
2341 #define eicat_lstr(ei, lisp_string) \ | |
2342 do { \ | |
2343 Lisp_Object ei17 = (lisp_string); \ | |
2344 eicat_1 (ei, XSTRING_DATA (ei17), XSTRING_LENGTH (ei17), \ | |
826 | 2345 string_char_length (ei17)); \ |
771 | 2346 } while (0) |
2347 | |
2348 #define eicat_ch(ei, ch) \ | |
2349 do { \ | |
1333 | 2350 Ibyte ei22ch[MAX_ICHAR_LEN]; \ |
867 | 2351 Bytecount ei22len = set_itext_ichar (ei22ch, ch); \ |
771 | 2352 eicat_1 (ei, ei22ch, ei22len, 1); \ |
2353 } while (0) | |
2354 | |
2355 | |
2356 /* ----- Replacement ----- */ | |
2357 | |
2358 /* Replace the section of an Eistring at (OFF, LEN) with the data at | |
2359 SRC of length LEN. All positions have corresponding character values, | |
2360 and either can be -1 -- it will be computed from the other. */ | |
2361 | |
2362 #define eisub_1(ei, off, charoff, len, charlen, src, srclen, srccharlen) \ | |
2363 do { \ | |
2364 int ei18off = (off); \ | |
2365 int ei18charoff = (charoff); \ | |
2366 int ei18len = (len); \ | |
2367 int ei18charlen = (charlen); \ | |
867 | 2368 Ibyte *ei18src = (Ibyte *) (src); \ |
771 | 2369 int ei18srclen = (srclen); \ |
2370 int ei18srccharlen = (srccharlen); \ | |
2371 \ | |
2372 int ei18oldeibytelen = (ei)->bytelen_; \ | |
2373 \ | |
2374 eifixup_bytechar ((ei)->data_, ei18off, ei18charoff); \ | |
2375 eifixup_bytechar ((ei)->data_ + ei18off, ei18len, ei18charlen); \ | |
2376 eifixup_bytechar (ei18src, ei18srclen, ei18srccharlen); \ | |
2377 \ | |
2378 EI_ALLOC (ei, (ei)->bytelen_ + ei18srclen - ei18len, \ | |
2379 (ei)->charlen_ + ei18srccharlen - ei18charlen, 0); \ | |
2380 if (ei18len != ei18srclen) \ | |
2381 memmove ((ei)->data_ + ei18off + ei18srclen, \ | |
2382 (ei)->data_ + ei18off + ei18len, \ | |
2383 /* include zero terminator. */ \ | |
2384 ei18oldeibytelen - (ei18off + ei18len) + 1); \ | |
2385 if (ei18srclen > 0) \ | |
2386 memcpy ((ei)->data_ + ei18off, ei18src, ei18srclen); \ | |
2387 } while (0) | |
2388 | |
2389 #define eisub_ei(ei, off, charoff, len, charlen, ei2) \ | |
2390 do { \ | |
1333 | 2391 const Eistring *ei19 = (ei2); \ |
771 | 2392 eisub_1 (ei, off, charoff, len, charlen, ei19->data_, ei19->bytelen_, \ |
2393 ei19->charlen_); \ | |
2394 } while (0) | |
2395 | |
2421 | 2396 #define eisub_ascii(ei, off, charoff, len, charlen, ascstr) \ |
771 | 2397 do { \ |
2421 | 2398 const Ascbyte *ei20 = (ascstr); \ |
771 | 2399 int ei20len = strlen (ei20); \ |
2367 | 2400 ASSERT_ASCTEXT_ASCII_LEN (ei20, ei20len); \ |
771 | 2401 eisub_1 (ei, off, charoff, len, charlen, ei20, ei20len, -1); \ |
2402 } while (0) | |
2403 | |
2404 #define eisub_ch(ei, off, charoff, len, charlen, ch) \ | |
2405 do { \ | |
1333 | 2406 Ibyte ei21ch[MAX_ICHAR_LEN]; \ |
867 | 2407 Bytecount ei21len = set_itext_ichar (ei21ch, ch); \ |
771 | 2408 eisub_1 (ei, off, charoff, len, charlen, ei21ch, ei21len, 1); \ |
2409 } while (0) | |
2410 | |
2411 #define eidel(ei, off, charoff, len, charlen) \ | |
2412 eisub_1(ei, off, charoff, len, charlen, NULL, 0, 0) | |
2413 | |
2414 | |
2415 /* ----- Converting to an external format ----- */ | |
2416 | |
1333 | 2417 #define eito_external(ei, codesys) \ |
771 | 2418 do { \ |
2419 if ((ei)->mallocp_) \ | |
2420 { \ | |
2421 if ((ei)->extdata_) \ | |
2422 { \ | |
5026
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2423 xfree ((ei)->extdata_); \ |
771 | 2424 (ei)->extdata_ = 0; \ |
2425 } \ | |
2426 TO_EXTERNAL_FORMAT (DATA, ((ei)->data_, (ei)->bytelen_), \ | |
2427 MALLOC, ((ei)->extdata_, (ei)->extlen_), \ | |
1333 | 2428 codesys); \ |
771 | 2429 } \ |
2430 else \ | |
2431 TO_EXTERNAL_FORMAT (DATA, ((ei)->data_, (ei)->bytelen_), \ | |
2432 ALLOCA, ((ei)->extdata_, (ei)->extlen_), \ | |
1318 | 2433 codesys); \ |
771 | 2434 } while (0) |
2435 | |
2436 #define eiextdata(ei) ((ei)->extdata_) | |
2437 #define eiextlen(ei) ((ei)->extlen_) | |
2438 | |
2439 | |
2440 /* ----- Searching in the Eistring for a character ----- */ | |
2441 | |
2442 #define eichr(eistr, chr) \ | |
2443 NOT YET IMPLEMENTED | |
2444 #define eichr_char(eistr, chr) \ | |
2445 NOT YET IMPLEMENTED | |
2446 #define eichr_off(eistr, chr, off, charoff) \ | |
2447 NOT YET IMPLEMENTED | |
2448 #define eichr_off_char(eistr, chr, off, charoff) \ | |
2449 NOT YET IMPLEMENTED | |
2450 #define eirchr(eistr, chr) \ | |
2451 NOT YET IMPLEMENTED | |
2452 #define eirchr_char(eistr, chr) \ | |
2453 NOT YET IMPLEMENTED | |
2454 #define eirchr_off(eistr, chr, off, charoff) \ | |
2455 NOT YET IMPLEMENTED | |
2456 #define eirchr_off_char(eistr, chr, off, charoff) \ | |
2457 NOT YET IMPLEMENTED | |
2458 | |
2459 | |
2460 /* ----- Searching in the Eistring for a string ----- */ | |
2461 | |
2462 #define eistr_ei(eistr, eistr2) \ | |
2463 NOT YET IMPLEMENTED | |
2464 #define eistr_ei_char(eistr, eistr2) \ | |
2465 NOT YET IMPLEMENTED | |
2466 #define eistr_ei_off(eistr, eistr2, off, charoff) \ | |
2467 NOT YET IMPLEMENTED | |
2468 #define eistr_ei_off_char(eistr, eistr2, off, charoff) \ | |
2469 NOT YET IMPLEMENTED | |
2470 #define eirstr_ei(eistr, eistr2) \ | |
2471 NOT YET IMPLEMENTED | |
2472 #define eirstr_ei_char(eistr, eistr2) \ | |
2473 NOT YET IMPLEMENTED | |
2474 #define eirstr_ei_off(eistr, eistr2, off, charoff) \ | |
2475 NOT YET IMPLEMENTED | |
2476 #define eirstr_ei_off_char(eistr, eistr2, off, charoff) \ | |
2477 NOT YET IMPLEMENTED | |
2478 | |
2421 | 2479 #define eistr_ascii(eistr, ascstr) \ |
771 | 2480 NOT YET IMPLEMENTED |
2421 | 2481 #define eistr_ascii_char(eistr, ascstr) \ |
771 | 2482 NOT YET IMPLEMENTED |
2421 | 2483 #define eistr_ascii_off(eistr, ascstr, off, charoff) \ |
771 | 2484 NOT YET IMPLEMENTED |
2421 | 2485 #define eistr_ascii_off_char(eistr, ascstr, off, charoff) \ |
771 | 2486 NOT YET IMPLEMENTED |
2421 | 2487 #define eirstr_ascii(eistr, ascstr) \ |
771 | 2488 NOT YET IMPLEMENTED |
2421 | 2489 #define eirstr_ascii_char(eistr, ascstr) \ |
771 | 2490 NOT YET IMPLEMENTED |
2421 | 2491 #define eirstr_ascii_off(eistr, ascstr, off, charoff) \ |
771 | 2492 NOT YET IMPLEMENTED |
2421 | 2493 #define eirstr_ascii_off_char(eistr, ascstr, off, charoff) \ |
771 | 2494 NOT YET IMPLEMENTED |
2495 | |
2496 | |
2497 /* ----- Comparison ----- */ | |
2498 | |
2499 int eicmp_1 (Eistring *ei, Bytecount off, Charcount charoff, | |
867 | 2500 Bytecount len, Charcount charlen, const Ibyte *data, |
2526 | 2501 const Eistring *ei2, int is_ascii, int fold_case); |
771 | 2502 |
2503 #define eicmp_ei(eistr, eistr2) \ | |
2504 eicmp_1 (eistr, 0, -1, -1, -1, 0, eistr2, 0, 0) | |
2505 #define eicmp_off_ei(eistr, off, charoff, len, charlen, eistr2) \ | |
2506 eicmp_1 (eistr, off, charoff, len, charlen, 0, eistr2, 0, 0) | |
2507 #define eicasecmp_ei(eistr, eistr2) \ | |
2508 eicmp_1 (eistr, 0, -1, -1, -1, 0, eistr2, 0, 1) | |
2509 #define eicasecmp_off_ei(eistr, off, charoff, len, charlen, eistr2) \ | |
2510 eicmp_1 (eistr, off, charoff, len, charlen, 0, eistr2, 0, 1) | |
2511 #define eicasecmp_i18n_ei(eistr, eistr2) \ | |
2512 eicmp_1 (eistr, 0, -1, -1, -1, 0, eistr2, 0, 2) | |
2513 #define eicasecmp_i18n_off_ei(eistr, off, charoff, len, charlen, eistr2) \ | |
2514 eicmp_1 (eistr, off, charoff, len, charlen, 0, eistr2, 0, 2) | |
2515 | |
2421 | 2516 #define eicmp_ascii(eistr, ascstr) \ |
2517 eicmp_1 (eistr, 0, -1, -1, -1, (const Ibyte *) ascstr, 0, 1, 0) | |
2518 #define eicmp_off_ascii(eistr, off, charoff, len, charlen, ascstr) \ | |
2519 eicmp_1 (eistr, off, charoff, len, charlen, (const Ibyte *) ascstr, 0, 1, 0) | |
2520 #define eicasecmp_ascii(eistr, ascstr) \ | |
2521 eicmp_1 (eistr, 0, -1, -1, -1, (const Ibyte *) ascstr, 0, 1, 1) | |
2522 #define eicasecmp_off_ascii(eistr, off, charoff, len, charlen, ascstr) \ | |
2523 eicmp_1 (eistr, off, charoff, len, charlen, (const Ibyte *) ascstr, 0, 1, 1) | |
2524 #define eicasecmp_i18n_ascii(eistr, ascstr) \ | |
2525 eicmp_1 (eistr, 0, -1, -1, -1, (const Ibyte *) ascstr, 0, 1, 2) | |
2526 #define eicasecmp_i18n_off_ascii(eistr, off, charoff, len, charlen, ascstr) \ | |
2527 eicmp_1 (eistr, off, charoff, len, charlen, (const Ibyte *) ascstr, 0, 1, 2) | |
771 | 2528 |
2529 | |
2530 /* ----- Case-changing the Eistring ----- */ | |
2531 | |
867 | 2532 int eistr_casefiddle_1 (Ibyte *olddata, Bytecount len, Ibyte *newdata, |
771 | 2533 int downp); |
2534 | |
2535 #define EI_CASECHANGE(ei, downp) \ | |
2536 do { \ | |
867 | 2537 int ei11new_allocmax = (ei)->charlen_ * MAX_ICHAR_LEN + 1; \ |
1333 | 2538 Ibyte *ei11storage = \ |
2367 | 2539 (Ibyte *) alloca_ibytes (ei11new_allocmax); \ |
771 | 2540 int ei11newlen = eistr_casefiddle_1 ((ei)->data_, (ei)->bytelen_, \ |
2541 ei11storage, downp); \ | |
2542 \ | |
2543 if (ei11newlen) \ | |
2544 { \ | |
2545 (ei)->max_size_allocated_ = ei11new_allocmax; \ | |
1333 | 2546 (ei)->data_ = ei11storage; \ |
771 | 2547 (ei)->bytelen_ = ei11newlen; \ |
2548 /* charlen is the same. */ \ | |
2549 } \ | |
2550 } while (0) | |
2551 | |
2552 #define eilwr(ei) EI_CASECHANGE (ei, 1) | |
2553 #define eiupr(ei) EI_CASECHANGE (ei, 0) | |
2554 | |
1743 | 2555 END_C_DECLS |
1650 | 2556 |
771 | 2557 |
2558 /************************************************************************/ | |
2559 /* */ | |
2560 /* Converting between internal and external format */ | |
2561 /* */ | |
2562 /************************************************************************/ | |
2563 /* | |
1318 | 2564 The macros below are used for converting data between different formats. |
2565 Generally, the data is textual, and the formats are related to | |
2566 internationalization (e.g. converting between internal-format text and | |
2567 UTF-8) -- but the mechanism is general, and could be used for anything, | |
2568 e.g. decoding gzipped data. | |
2569 | |
2570 In general, conversion involves a source of data, a sink, the existing | |
2571 format of the source data, and the desired format of the sink. The | |
2572 macros below, however, always require that either the source or sink is | |
2573 internal-format text. Therefore, in practice the conversions below | |
2574 involve source, sink, an external format (specified by a coding system), | |
2575 and the direction of conversion (internal->external or vice-versa). | |
2576 | |
2577 Sources and sinks can be raw data (sized or unsized -- when unsized, | |
2578 input data is assumed to be null-terminated [double null-terminated for | |
2579 Unicode-format data], and on output the length is not stored anywhere), | |
2580 Lisp strings, Lisp buffers, lstreams, and opaque data objects. When the | |
2581 output is raw data, the result can be allocated either with alloca() or | |
2582 malloc(). (There is currently no provision for writing into a fixed | |
2583 buffer. If you want this, use alloca() output and then copy the data -- | |
2584 but be careful with the size! Unless you are very sure of the encoding | |
2585 being used, upper bounds for the size are not in general computable.) | |
2586 The obvious restrictions on source and sink types apply (e.g. Lisp | |
2587 strings are a source and sink only for internal data). | |
2588 | |
2589 All raw data outputted will contain an extra null byte (two bytes for | |
2590 Unicode -- currently, in fact, all output data, whether internal or | |
2591 external, is double-null-terminated, but you can't count on this; see | |
2592 below). This means that enough space is allocated to contain the extra | |
2593 nulls; however, these nulls are not reflected in the returned output | |
2594 size. | |
2595 | |
2596 The most basic macros are TO_EXTERNAL_FORMAT and TO_INTERNAL_FORMAT. | |
2597 These can be used to convert between any kinds of sources or sinks. | |
2598 However, 99% of conversions involve raw data or Lisp strings as both | |
2599 source and sink, and usually data is output as alloca() rather than | |
2600 malloc(). For this reason, convenience macros are defined for many types | |
5026
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2601 of conversions involving raw data and/or Lisp strings, when the output is |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2602 an alloca()ed or malloc()ed string. (When the destination is a |
1318 | 2603 Lisp_String, there are other functions that should be used instead -- |
5026
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2604 build_extstring() and make_extstring(), for example.) In general, the |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2605 convenience macros return their result as a return value, even if the |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2606 result is an alloca()ed string -- some trickery is required to do this, |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2607 but it's definitely possible. However, for macros whose result is a |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2608 "sized string" (i.e. a string plus a length), there are two values to |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2609 return, and both are returned through parameters. |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2610 |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2611 The convenience macros have the form: |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2612 |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2613 (a) (SIZED_)?EXTERNAL_TO_ITEXT(_MALLOC)? |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2614 (b) (ITEXT|LISP_STRING)_TO_(SIZED_)?EXTERNAL(_MALLOC)? |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2615 |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2616 Note also that there are some additional, more specific macros defined |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2617 elsewhere, for example macros like EXTERNAL_TO_TSTR() in syswindows.h for |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2618 conversions that specifically involve the `mswindows-tstr' coding system |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2619 (which is normally an alias of `mswindows-unicode', a variation of |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2620 UTF-16). |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2621 |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2622 Convenience macros of type (a) are for conversion from external to |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2623 internal, while type (b) macros convert internal to external. A few |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2624 notes: |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2625 |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2626 -- The output is an alloca()ed string unless `_MALLOC' is appended, |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2627 in which case it's a malloc()ed string. |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2628 -- When the destination says ITEXT, it means internally-formatted text of |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2629 type `Ibyte *' (which boils down to `unsigned char *'). |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2630 -- When the destination says EXTERNAL, it means externally-formatted |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2631 text of type `Extbyte *' (which boils down to `char *'). Because |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2632 `Ibyte *' and `Extbyte *' are different underlying types, accidentally |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2633 mixing them will generally lead to a warning under gcc, and an error |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2634 under g++. |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2635 -- When SIZED_EXTERNAL is involved, there are two parameters, one for |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2636 the string and one for its length. When SIZED_EXTERNAL is the |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2637 destination, these two parameters should be lvalues and will have the |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2638 result stored into them. |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2639 -- There is no LISP_STRING destination; use `build_extstring' instead of |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2640 `EXTERNAL_TO_LISP_STRING' and `make_extstring' instead of |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2641 `SIZED_EXTERNAL_TO_LISP_STRING'. |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2642 -- There is no SIZED_ITEXT type. If you need this: First, if your data |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2643 is coming from a Lisp string, it would be better to use the |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2644 LISP_STRING_TO_* macros. If this doesn't apply or work, call the |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2645 TO_EXTERNAL_FORMAT() or TO_INTERNAL_FORMAT() macros directly. |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2646 |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2647 Note that previously the convenience macros, like the raw TO_*_FORMAT |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2648 macros, were always written to store their arguments into a passed-in |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2649 lvalue rather than return them, due to major bugs in calling alloca() |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2650 inside of a function call on x86 gcc circa version 2.6. This has |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2651 apparently long since been fixed, but just to make sure we have a |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2652 `configure' test for broken alloca() in function calls, and in such case |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2653 the portable xemacs_c_alloca() implementation is substituted instead. |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2654 Note that this implementation actually uses malloc() but notes the stack |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2655 pointer at the time of allocation, and at next call any allocations |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2656 belonging to inner stack frames are freed. This isn't perfect but |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2657 more-or-less gets the job done as an emergency backup, and in most |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2658 circumstances it prevents arbitrary memory leakage -- at most you should |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2659 get a fixed amount of leakage. |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2660 |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2661 NOTE: All convenience macros are ultimately defined in terms of |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2662 TO_EXTERNAL_FORMAT and TO_INTERNAL_FORMAT. Thus, any comments below |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2663 about the workings of these macros also apply to all convenience macros. |
1318 | 2664 |
2665 TO_EXTERNAL_FORMAT (source_type, source, sink_type, sink, codesys) | |
2666 TO_INTERNAL_FORMAT (source_type, source, sink_type, sink, codesys) | |
771 | 2667 |
2668 Typical use is | |
2669 | |
2367 | 2670 TO_EXTERNAL_FORMAT (LISP_STRING, str, C_STRING_MALLOC, ptr, Qfile_name); |
2671 | |
2672 which means that the contents of the lisp string `str' are written | |
2673 to a malloc'ed memory area which will be pointed to by `ptr', after the | |
2674 function returns. The conversion will be done using the `file-name' | |
2675 coding system (which will be controlled by the user indirectly by | |
2676 setting or binding the variable `file-name-coding-system'). | |
2677 | |
2678 Some sources and sinks require two C variables to specify. We use | |
2679 some preprocessor magic to allow different source and sink types, and | |
2680 even different numbers of arguments to specify different types of | |
2681 sources and sinks. | |
2682 | |
2683 So we can have a call that looks like | |
2684 | |
2685 TO_INTERNAL_FORMAT (DATA, (ptr, len), | |
2686 MALLOC, (ptr, len), | |
2687 coding_system); | |
2688 | |
2689 The parenthesized argument pairs are required to make the | |
2690 preprocessor magic work. | |
771 | 2691 |
2692 NOTE: GC is inhibited during the entire operation of these macros. This | |
2693 is because frequently the data to be converted comes from strings but | |
2694 gets passed in as just DATA, and GC may move around the string data. If | |
2695 we didn't inhibit GC, there'd have to be a lot of messy recoding, | |
2696 alloca-copying of strings and other annoying stuff. | |
2697 | |
2698 The source or sink can be specified in one of these ways: | |
2699 | |
2700 DATA, (ptr, len), // input data is a fixed buffer of size len | |
851 | 2701 ALLOCA, (ptr, len), // output data is in a ALLOCA()ed buffer of size len |
771 | 2702 MALLOC, (ptr, len), // output data is in a malloc()ed buffer of size len |
2703 C_STRING_ALLOCA, ptr, // equivalent to ALLOCA (ptr, len_ignored) on output | |
2704 C_STRING_MALLOC, ptr, // equivalent to MALLOC (ptr, len_ignored) on output | |
2705 C_STRING, ptr, // equivalent to DATA, (ptr, strlen/wcslen (ptr)) | |
2706 // on input (the Unicode version is used when correct) | |
2707 LISP_STRING, string, // input or output is a Lisp_Object of type string | |
2708 LISP_BUFFER, buffer, // output is written to (point) in lisp buffer | |
2709 LISP_LSTREAM, lstream, // input or output is a Lisp_Object of type lstream | |
2710 LISP_OPAQUE, object, // input or output is a Lisp_Object of type opaque | |
2711 | |
2712 When specifying the sink, use lvalues, since the macro will assign to them, | |
2713 except when the sink is an lstream or a lisp buffer. | |
2714 | |
2367 | 2715 For the sink types `ALLOCA' and `C_STRING_ALLOCA', the resulting text is |
2716 stored in a stack-allocated buffer, which is automatically freed on | |
2717 returning from the function. However, the sink types `MALLOC' and | |
2718 `C_STRING_MALLOC' return `xmalloc()'ed memory. The caller is responsible | |
2719 for freeing this memory using `xfree()'. | |
2720 | |
771 | 2721 The macros accept the kinds of sources and sinks appropriate for |
2722 internal and external data representation. See the type_checking_assert | |
2723 macros below for the actual allowed types. | |
2724 | |
2725 Since some sources and sinks use one argument (a Lisp_Object) to | |
2726 specify them, while others take a (pointer, length) pair, we use | |
2727 some C preprocessor trickery to allow pair arguments to be specified | |
2728 by parenthesizing them, as in the examples above. | |
2729 | |
2730 Anything prefixed by dfc_ (`data format conversion') is private. | |
2731 They are only used to implement these macros. | |
2732 | |
5026
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2733 Using C_STRING* is appropriate for data that comes from or is going to |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2734 an external API that takes null-terminated strings, or when the string is |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2735 always intended to contain text and never binary data, e.g. file names. |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2736 Any time we are dealing with binary or general data, we must be '\0'-clean, |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2737 i.e. allow arbitrary data which might contain embedded '\0', by tracking |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2738 both pointer and length. |
771 | 2739 |
2740 There is no problem using the same lvalue for source and sink. | |
2741 | |
2742 Also, when pointers are required, the code (currently at least) is | |
2743 lax and allows any pointer types, either in the source or the sink. | |
2744 This makes it possible, e.g., to deal with internal format data held | |
2745 in char *'s or external format data held in WCHAR * (i.e. Unicode). | |
2746 | |
2747 Finally, whenever storage allocation is called for, extra space is | |
2748 allocated for a terminating zero, and such a zero is stored in the | |
2749 appropriate place, regardless of whether the source data was | |
2750 specified using a length or was specified as zero-terminated. This | |
2751 allows you to freely pass the resulting data, no matter how | |
2752 obtained, to a routine that expects zero termination (modulo, of | |
2753 course, that any embedded zeros in the resulting text will cause | |
2754 truncation). In fact, currently two embedded zeros are allocated | |
2755 and stored after the data result. This is to allow for the | |
2756 possibility of storing a Unicode value on output, which needs the | |
2757 two zeros. Currently, however, the two zeros are stored regardless | |
2758 of whether the conversion is internal or external and regardless of | |
2759 whether the external coding system is in fact Unicode. This | |
2760 behavior may change in the future, and you cannot rely on this -- | |
2761 the most you can rely on is that sink data in Unicode format will | |
2762 have two terminating nulls, which combine to form one Unicode null | |
2367 | 2763 character. |
5026
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2764 */ |
771 | 2765 |
2766 #define TO_EXTERNAL_FORMAT(source_type, source, sink_type, sink, codesys) \ | |
2767 do { \ | |
2768 dfc_conversion_type dfc_simplified_source_type; \ | |
2769 dfc_conversion_type dfc_simplified_sink_type; \ | |
2770 dfc_conversion_data dfc_source; \ | |
2771 dfc_conversion_data dfc_sink; \ | |
2772 Lisp_Object dfc_codesys = (codesys); \ | |
2773 \ | |
2774 type_checking_assert \ | |
2775 ((DFC_TYPE_##source_type == DFC_TYPE_DATA || \ | |
2776 DFC_TYPE_##source_type == DFC_TYPE_C_STRING || \ | |
2777 DFC_TYPE_##source_type == DFC_TYPE_LISP_STRING || \ | |
2778 DFC_TYPE_##source_type == DFC_TYPE_LISP_OPAQUE || \ | |
2779 DFC_TYPE_##source_type == DFC_TYPE_LISP_LSTREAM) \ | |
2780 && \ | |
2781 (DFC_TYPE_##sink_type == DFC_TYPE_ALLOCA || \ | |
2782 DFC_TYPE_##sink_type == DFC_TYPE_MALLOC || \ | |
2783 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_ALLOCA || \ | |
2784 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_MALLOC || \ | |
2785 DFC_TYPE_##sink_type == DFC_TYPE_LISP_LSTREAM || \ | |
2786 DFC_TYPE_##sink_type == DFC_TYPE_LISP_OPAQUE)); \ | |
2787 \ | |
2788 DFC_EXT_SOURCE_##source_type##_TO_ARGS (source, dfc_codesys); \ | |
2789 DFC_SINK_##sink_type##_TO_ARGS (sink); \ | |
2790 \ | |
2791 dfc_convert_to_external_format (dfc_simplified_source_type, &dfc_source, \ | |
2792 dfc_codesys, \ | |
2793 dfc_simplified_sink_type, &dfc_sink); \ | |
2794 \ | |
2795 DFC_##sink_type##_USE_CONVERTED_DATA (sink); \ | |
2796 } while (0) | |
2797 | |
2798 #define TO_INTERNAL_FORMAT(source_type, source, sink_type, sink, codesys) \ | |
2799 do { \ | |
2800 dfc_conversion_type dfc_simplified_source_type; \ | |
2801 dfc_conversion_type dfc_simplified_sink_type; \ | |
2802 dfc_conversion_data dfc_source; \ | |
2803 dfc_conversion_data dfc_sink; \ | |
2804 Lisp_Object dfc_codesys = (codesys); \ | |
2805 \ | |
2806 type_checking_assert \ | |
2807 ((DFC_TYPE_##source_type == DFC_TYPE_DATA || \ | |
2808 DFC_TYPE_##source_type == DFC_TYPE_C_STRING || \ | |
2809 DFC_TYPE_##source_type == DFC_TYPE_LISP_OPAQUE || \ | |
2810 DFC_TYPE_##source_type == DFC_TYPE_LISP_LSTREAM) \ | |
2811 && \ | |
2812 (DFC_TYPE_##sink_type == DFC_TYPE_ALLOCA || \ | |
2813 DFC_TYPE_##sink_type == DFC_TYPE_MALLOC || \ | |
2814 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_ALLOCA || \ | |
2815 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_MALLOC || \ | |
2816 DFC_TYPE_##sink_type == DFC_TYPE_LISP_STRING || \ | |
2817 DFC_TYPE_##sink_type == DFC_TYPE_LISP_LSTREAM || \ | |
2818 DFC_TYPE_##sink_type == DFC_TYPE_LISP_BUFFER)); \ | |
2819 \ | |
2820 DFC_INT_SOURCE_##source_type##_TO_ARGS (source, dfc_codesys); \ | |
2821 DFC_SINK_##sink_type##_TO_ARGS (sink); \ | |
2822 \ | |
2823 dfc_convert_to_internal_format (dfc_simplified_source_type, &dfc_source, \ | |
2824 dfc_codesys, \ | |
2825 dfc_simplified_sink_type, &dfc_sink); \ | |
2826 \ | |
2827 DFC_##sink_type##_USE_CONVERTED_DATA (sink); \ | |
2828 } while (0) | |
2829 | |
814 | 2830 #ifdef __cplusplus |
771 | 2831 |
814 | 2832 /* Error if you try to use a union here: "member `struct {anonymous |
2833 union}::{anonymous} {anonymous union}::data' with constructor not allowed | |
2834 in union" (Bytecount is a class) */ | |
2835 | |
2836 typedef struct | |
2837 #else | |
771 | 2838 typedef union |
814 | 2839 #endif |
771 | 2840 { |
2841 struct { const void *ptr; Bytecount len; } data; | |
2842 Lisp_Object lisp_object; | |
2843 } dfc_conversion_data; | |
2844 | |
2845 enum dfc_conversion_type | |
2846 { | |
2847 DFC_TYPE_DATA, | |
2848 DFC_TYPE_ALLOCA, | |
2849 DFC_TYPE_MALLOC, | |
2850 DFC_TYPE_C_STRING, | |
2851 DFC_TYPE_C_STRING_ALLOCA, | |
2852 DFC_TYPE_C_STRING_MALLOC, | |
2853 DFC_TYPE_LISP_STRING, | |
2854 DFC_TYPE_LISP_LSTREAM, | |
2855 DFC_TYPE_LISP_OPAQUE, | |
2856 DFC_TYPE_LISP_BUFFER | |
2857 }; | |
2858 typedef enum dfc_conversion_type dfc_conversion_type; | |
2859 | |
1743 | 2860 BEGIN_C_DECLS |
1650 | 2861 |
771 | 2862 /* WARNING: These use a static buffer. This can lead to disaster if |
2863 these functions are not used *very* carefully. Another reason to only use | |
2864 TO_EXTERNAL_FORMAT() and TO_INTERNAL_FORMAT(). */ | |
1632 | 2865 MODULE_API void |
771 | 2866 dfc_convert_to_external_format (dfc_conversion_type source_type, |
2867 dfc_conversion_data *source, | |
1318 | 2868 Lisp_Object codesys, |
771 | 2869 dfc_conversion_type sink_type, |
2870 dfc_conversion_data *sink); | |
1632 | 2871 MODULE_API void |
771 | 2872 dfc_convert_to_internal_format (dfc_conversion_type source_type, |
2873 dfc_conversion_data *source, | |
1318 | 2874 Lisp_Object codesys, |
771 | 2875 dfc_conversion_type sink_type, |
2876 dfc_conversion_data *sink); | |
2877 /* CPP Trickery */ | |
2878 #define DFC_CPP_CAR(x,y) (x) | |
2879 #define DFC_CPP_CDR(x,y) (y) | |
2880 | |
2881 /* Convert `source' to args for dfc_convert_to_external_format() */ | |
2882 #define DFC_EXT_SOURCE_DATA_TO_ARGS(val, codesys) do { \ | |
2883 dfc_source.data.ptr = DFC_CPP_CAR val; \ | |
2884 dfc_source.data.len = DFC_CPP_CDR val; \ | |
2885 dfc_simplified_source_type = DFC_TYPE_DATA; \ | |
2886 } while (0) | |
2887 #define DFC_EXT_SOURCE_C_STRING_TO_ARGS(val, codesys) do { \ | |
2888 dfc_source.data.len = \ | |
2889 strlen ((char *) (dfc_source.data.ptr = (val))); \ | |
2890 dfc_simplified_source_type = DFC_TYPE_DATA; \ | |
2891 } while (0) | |
2892 #define DFC_EXT_SOURCE_LISP_STRING_TO_ARGS(val, codesys) do { \ | |
2893 Lisp_Object dfc_slsta = (val); \ | |
2894 type_checking_assert (STRINGP (dfc_slsta)); \ | |
2895 dfc_source.lisp_object = dfc_slsta; \ | |
2896 dfc_simplified_source_type = DFC_TYPE_LISP_STRING; \ | |
2897 } while (0) | |
2898 #define DFC_EXT_SOURCE_LISP_LSTREAM_TO_ARGS(val, codesys) do { \ | |
2899 Lisp_Object dfc_sllta = (val); \ | |
2900 type_checking_assert (LSTREAMP (dfc_sllta)); \ | |
2901 dfc_source.lisp_object = dfc_sllta; \ | |
2902 dfc_simplified_source_type = DFC_TYPE_LISP_LSTREAM; \ | |
2903 } while (0) | |
2904 #define DFC_EXT_SOURCE_LISP_OPAQUE_TO_ARGS(val, codesys) do { \ | |
2905 Lisp_Opaque *dfc_slota = XOPAQUE (val); \ | |
2906 dfc_source.data.ptr = OPAQUE_DATA (dfc_slota); \ | |
2907 dfc_source.data.len = OPAQUE_SIZE (dfc_slota); \ | |
2908 dfc_simplified_source_type = DFC_TYPE_DATA; \ | |
2909 } while (0) | |
2910 | |
2911 /* Convert `source' to args for dfc_convert_to_internal_format() */ | |
2912 #define DFC_INT_SOURCE_DATA_TO_ARGS(val, codesys) \ | |
2913 DFC_EXT_SOURCE_DATA_TO_ARGS (val, codesys) | |
2914 #define DFC_INT_SOURCE_C_STRING_TO_ARGS(val, codesys) do { \ | |
2915 dfc_source.data.len = dfc_external_data_len (dfc_source.data.ptr = (val), \ | |
2916 codesys); \ | |
2917 dfc_simplified_source_type = DFC_TYPE_DATA; \ | |
2918 } while (0) | |
2919 #define DFC_INT_SOURCE_LISP_STRING_TO_ARGS(val, codesys) \ | |
2920 DFC_EXT_SOURCE_LISP_STRING_TO_ARGS (val, codesys) | |
2921 #define DFC_INT_SOURCE_LISP_LSTREAM_TO_ARGS(val, codesys) \ | |
2922 DFC_EXT_SOURCE_LISP_LSTREAM_TO_ARGS (val, codesys) | |
2923 #define DFC_INT_SOURCE_LISP_OPAQUE_TO_ARGS(val, codesys) \ | |
2924 DFC_EXT_SOURCE_LISP_OPAQUE_TO_ARGS (val, codesys) | |
2925 | |
2926 /* Convert `sink' to args for dfc_convert_to_*_format() */ | |
2927 #define DFC_SINK_ALLOCA_TO_ARGS(val) \ | |
2928 dfc_simplified_sink_type = DFC_TYPE_DATA | |
2929 #define DFC_SINK_C_STRING_ALLOCA_TO_ARGS(val) \ | |
2930 dfc_simplified_sink_type = DFC_TYPE_DATA | |
2931 #define DFC_SINK_MALLOC_TO_ARGS(val) \ | |
2932 dfc_simplified_sink_type = DFC_TYPE_DATA | |
2933 #define DFC_SINK_C_STRING_MALLOC_TO_ARGS(val) \ | |
2934 dfc_simplified_sink_type = DFC_TYPE_DATA | |
2935 #define DFC_SINK_LISP_STRING_TO_ARGS(val) \ | |
2936 dfc_simplified_sink_type = DFC_TYPE_DATA | |
2937 #define DFC_SINK_LISP_OPAQUE_TO_ARGS(val) \ | |
2938 dfc_simplified_sink_type = DFC_TYPE_DATA | |
2939 #define DFC_SINK_LISP_LSTREAM_TO_ARGS(val) do { \ | |
2940 Lisp_Object dfc_sllta = (val); \ | |
2941 type_checking_assert (LSTREAMP (dfc_sllta)); \ | |
2942 dfc_sink.lisp_object = dfc_sllta; \ | |
2943 dfc_simplified_sink_type = DFC_TYPE_LISP_LSTREAM; \ | |
2944 } while (0) | |
2945 #define DFC_SINK_LISP_BUFFER_TO_ARGS(val) do { \ | |
2946 struct buffer *dfc_slbta = XBUFFER (val); \ | |
2947 dfc_sink.lisp_object = \ | |
2948 make_lisp_buffer_output_stream \ | |
2949 (dfc_slbta, BUF_PT (dfc_slbta), 0); \ | |
2950 dfc_simplified_sink_type = DFC_TYPE_LISP_LSTREAM; \ | |
2951 } while (0) | |
2952 | |
2953 /* Assign to the `sink' lvalue(s) using the converted data. */ | |
2954 /* + 2 because we double zero-extended to account for Unicode conversion */ | |
2955 typedef union { char c; void *p; } *dfc_aliasing_voidpp; | |
2956 #define DFC_ALLOCA_USE_CONVERTED_DATA(sink) do { \ | |
851 | 2957 void * dfc_sink_ret = ALLOCA (dfc_sink.data.len + 2); \ |
771 | 2958 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2); \ |
2367 | 2959 VOIDP_CAST (DFC_CPP_CAR sink) = dfc_sink_ret; \ |
771 | 2960 (DFC_CPP_CDR sink) = dfc_sink.data.len; \ |
2961 } while (0) | |
2962 #define DFC_MALLOC_USE_CONVERTED_DATA(sink) do { \ | |
2963 void * dfc_sink_ret = xmalloc (dfc_sink.data.len + 2); \ | |
2964 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2); \ | |
2367 | 2965 VOIDP_CAST (DFC_CPP_CAR sink) = dfc_sink_ret; \ |
771 | 2966 (DFC_CPP_CDR sink) = dfc_sink.data.len; \ |
2967 } while (0) | |
2968 #define DFC_C_STRING_ALLOCA_USE_CONVERTED_DATA(sink) do { \ | |
851 | 2969 void * dfc_sink_ret = ALLOCA (dfc_sink.data.len + 2); \ |
771 | 2970 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2); \ |
2367 | 2971 VOIDP_CAST (sink) = dfc_sink_ret; \ |
771 | 2972 } while (0) |
2973 #define DFC_C_STRING_MALLOC_USE_CONVERTED_DATA(sink) do { \ | |
2974 void * dfc_sink_ret = xmalloc (dfc_sink.data.len + 2); \ | |
2975 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2); \ | |
2367 | 2976 VOIDP_CAST (sink) = dfc_sink_ret; \ |
771 | 2977 } while (0) |
5026
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2978 #define DFC_LISP_STRING_USE_CONVERTED_DATA(sink) \ |
867 | 2979 sink = make_string ((Ibyte *) dfc_sink.data.ptr, dfc_sink.data.len) |
5026
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2980 #define DFC_LISP_OPAQUE_USE_CONVERTED_DATA(sink) \ |
771 | 2981 sink = make_opaque (dfc_sink.data.ptr, dfc_sink.data.len) |
2982 #define DFC_LISP_LSTREAM_USE_CONVERTED_DATA(sink) /* data already used */ | |
5026
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2983 #define DFC_LISP_BUFFER_USE_CONVERTED_DATA(sink) \ |
771 | 2984 Lstream_delete (XLSTREAM (dfc_sink.lisp_object)) |
2985 | |
1318 | 2986 enum new_dfc_src_type |
2987 { | |
2988 DFC_EXTERNAL, | |
2989 DFC_SIZED_EXTERNAL, | |
2990 DFC_INTERNAL, | |
2991 DFC_SIZED_INTERNAL, | |
2992 DFC_LISP_STRING | |
2993 }; | |
2994 | |
1632 | 2995 MODULE_API void *new_dfc_convert_malloc (const void *src, Bytecount src_size, |
2996 enum new_dfc_src_type type, | |
2997 Lisp_Object codesys); | |
2367 | 2998 MODULE_API Bytecount new_dfc_convert_size (const char *srctext, |
2999 const void *src, | |
1632 | 3000 Bytecount src_size, |
3001 enum new_dfc_src_type type, | |
3002 Lisp_Object codesys); | |
2367 | 3003 MODULE_API void *new_dfc_convert_copy_data (const char *srctext, |
3004 void *alloca_data); | |
1318 | 3005 |
1743 | 3006 END_C_DECLS |
1650 | 3007 |
4981
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
3008 /* Version of EXTERNAL_TO_ITEXT that *RETURNS* the translated string, |
1318 | 3009 still in alloca() space. Requires some trickiness to do this, but gets |
3010 it done! */ | |
3011 | |
3012 /* NOTE: If you make two invocations of the dfc functions below in the same | |
3013 subexpression and use the exact same expression for the source in both | |
3014 cases, you will lose. In this unlikely case, you will get an abort, and | |
3015 need to rewrite the code. | |
3016 */ | |
3017 | |
3018 /* We need to use ALLOCA_FUNCALL_OK here. Some compilers have been known | |
3019 to choke when alloca() occurs as a funcall argument, and so we check | |
3020 this in configure. Rewriting the expressions below to use a temporary | |
3021 variable, so that the call to alloca() is outside of | |
2382 | 3022 new_dfc_convert_copy_data(), won't help because the entire NEW_DFC call |
1318 | 3023 could be inside of a function call. */ |
3024 | |
3025 #define NEW_DFC_CONVERT_1_ALLOCA(src, src_size, type, codesys) \ | |
2367 | 3026 new_dfc_convert_copy_data \ |
1318 | 3027 (#src, ALLOCA_FUNCALL_OK (new_dfc_convert_size (#src, src, src_size, \ |
3028 type, codesys))) | |
3029 | |
5026
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
3030 #define EXTERNAL_TO_ITEXT(src, codesys) \ |
4981
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
3031 ((Ibyte *) NEW_DFC_CONVERT_1_ALLOCA (src, -1, DFC_EXTERNAL, codesys)) |
5026
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
3032 #define EXTERNAL_TO_ITEXT_MALLOC(src, codesys) \ |
4981
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
3033 ((Ibyte *) new_dfc_convert_malloc (src, -1, DFC_EXTERNAL, codesys)) |
5026
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
3034 #define SIZED_EXTERNAL_TO_ITEXT(src, len, codesys) \ |
4981
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
3035 ((Ibyte *) NEW_DFC_CONVERT_1_ALLOCA (src, len, DFC_SIZED_EXTERNAL, codesys)) |
5026
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
3036 #define SIZED_EXTERNAL_TO_ITEXT_MALLOC(src, len, codesys) \ |
4981
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
3037 ((Ibyte *) new_dfc_convert_malloc (src, len, DFC_SIZED_EXTERNAL, codesys)) |
5026
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
3038 #define ITEXT_TO_EXTERNAL(src, codesys) \ |
4981
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
3039 ((Extbyte *) NEW_DFC_CONVERT_1_ALLOCA (src, -1, DFC_INTERNAL, codesys)) |
5026
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
3040 #define ITEXT_TO_EXTERNAL_MALLOC(src, codesys) \ |
4981
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
3041 ((Extbyte *) new_dfc_convert_malloc (src, -1, DFC_INTERNAL, codesys)) |
5026
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
3042 #define LISP_STRING_TO_EXTERNAL(src, codesys) \ |
5013 | 3043 ((Extbyte *) NEW_DFC_CONVERT_1_ALLOCA (STORE_LISP_IN_VOID (src), -1, \ |
4981
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
3044 DFC_LISP_STRING, codesys)) |
5026
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
3045 #define LISP_STRING_TO_EXTERNAL_MALLOC(src, codesys) \ |
5013 | 3046 ((Extbyte *) new_dfc_convert_malloc (STORE_LISP_IN_VOID (src), -1, \ |
4981
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
3047 DFC_LISP_STRING, codesys)) |
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
3048 /* In place of EXTERNAL_TO_LISP_STRING(), use build_extstring() and/or |
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
3049 make_extstring(). */ |
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
3050 |
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
3051 /* The next four have two outputs, so we make both of them be parameters */ |
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
3052 #define ITEXT_TO_SIZED_EXTERNAL(in, out, outlen, codesys) \ |
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
3053 TO_EXTERNAL_FORMAT (C_STRING, in, ALLOCA, (out, outlen), codesys) |
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
3054 #define LISP_STRING_TO_SIZED_EXTERNAL(in, out, outlen, codesys) \ |
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
3055 TO_EXTERNAL_FORMAT (LISP_STRING, in, ALLOCA, (out, outlen), codesys) |
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
3056 #define ITEXT_TO_SIZED_EXTERNAL_MALLOC(in, out, outlen, codesys) \ |
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
3057 TO_EXTERNAL_FORMAT (C_STRING, in, MALLOC, (out, outlen), codesys) |
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
3058 #define LISP_STRING_TO_SIZED_EXTERNAL_MALLOC(in, out, outlen, codesys) \ |
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
3059 TO_EXTERNAL_FORMAT (LISP_STRING, in, MALLOC, (out, outlen), codesys) |
771 | 3060 |
2367 | 3061 /* Wexttext functions. The type of Wexttext is selected at compile time |
3062 and will sometimes be wchar_t, sometimes char. */ | |
3063 | |
3064 int wcscmp_ascii (const wchar_t *s1, const Ascbyte *s2); | |
3065 int wcsncmp_ascii (const wchar_t *s1, const Ascbyte *s2, Charcount len); | |
3066 | |
3067 #ifdef WEXTTEXT_IS_WIDE /* defined under MS Windows i.e. WIN32_NATIVE */ | |
3068 #define WEXTTEXT_ZTERM_SIZE sizeof (wchar_t) | |
3069 /* Extra indirection needed in case of manifest constant as arg */ | |
3070 #define WEXTSTRING_1(arg) L##arg | |
3071 #define WEXTSTRING(arg) WEXTSTRING_1(arg) | |
3072 #define wext_strlen wcslen | |
3073 #define wext_strcmp wcscmp | |
3074 #define wext_strncmp wcsncmp | |
3075 #define wext_strcmp_ascii wcscmp_ascii | |
3076 #define wext_strncmp_ascii wcsncmp_ascii | |
3077 #define wext_strcpy wcscpy | |
3078 #define wext_strncpy wcsncpy | |
3079 #define wext_strchr wcschr | |
3080 #define wext_strrchr wcsrchr | |
3081 #define wext_strdup wcsdup | |
3082 #define wext_atol(str) wcstol (str, 0, 10) | |
3083 #define wext_sprintf wsprintfW /* Huh? both wsprintfA and wsprintfW? */ | |
3084 #define wext_getenv _wgetenv | |
4953
304aebb79cd3
function renamings to track names of char typedefs
Ben Wing <ben@xemacs.org>
parents:
4952
diff
changeset
|
3085 #define build_wext_string(str, cs) build_extstring ((Extbyte *) str, cs) |
2367 | 3086 #define WEXTTEXT_TO_8_BIT(arg) WEXTTEXT_TO_MULTIBYTE(arg) |
3087 #ifdef WIN32_NATIVE | |
3088 int XCDECL wext_retry_open (const Wexttext *path, int oflag, ...); | |
3089 #else | |
3090 #error Cannot handle Wexttext yet on this system | |
3091 #endif | |
3092 #define wext_access _waccess | |
3093 #define wext_stat _wstat | |
3094 #else | |
3095 #define WEXTTEXT_ZTERM_SIZE sizeof (char) | |
3096 #define WEXTSTRING(arg) arg | |
3097 #define wext_strlen strlen | |
3098 #define wext_strcmp strcmp | |
3099 #define wext_strncmp strncmp | |
3100 #define wext_strcmp_ascii strcmp | |
3101 #define wext_strncmp_ascii strncmp | |
3102 #define wext_strcpy strcpy | |
3103 #define wext_strncpy strncpy | |
3104 #define wext_strchr strchr | |
3105 #define wext_strrchr strrchr | |
3106 #define wext_strdup xstrdup | |
3107 #define wext_atol(str) atol (str) | |
3108 #define wext_sprintf sprintf | |
3109 #define wext_getenv getenv | |
4953
304aebb79cd3
function renamings to track names of char typedefs
Ben Wing <ben@xemacs.org>
parents:
4952
diff
changeset
|
3110 #define build_wext_string build_extstring |
2367 | 3111 #define wext_retry_open retry_open |
3112 #define wext_access access | |
3113 #define wext_stat stat | |
3114 #define WEXTTEXT_TO_8_BIT(arg) ((Extbyte *) arg) | |
3115 #endif | |
3116 | |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3117 /* Standins for various encodings. |
1318 | 3118 |
3119 About encodings in X: | |
3120 | |
3121 X works with 5 different encodings: | |
3122 | |
3123 -- "Host Portable Character Encoding" == printable ASCII + space, tab, | |
3124 newline | |
3125 | |
3126 -- STRING encoding == ASCII + Latin-1 + tab, newline | |
3127 | |
3128 -- Locale-specific encoding | |
3129 | |
3130 -- Compound text == STRING encoding + ISO-2022 escape sequences to | |
3131 switch between different locale-specific encodings. | |
3132 | |
3133 -- ANSI C wide-character encoding | |
3134 | |
3135 The Host Portable Character Encoding (HPCE) is used for atom names, font | |
3136 names, color names, keysyms, geometry strings, resource manager quarks, | |
3137 display names, locale names, and various other things. When describing | |
3138 such strings, the X manual typically says "If the ... is not in the Host | |
3139 Portable Character Encoding, the result is implementation dependent." | |
3140 | |
3141 The wide-character encoding is used only in the Xwc* functions, which | |
3142 are provided as equivalents to Xmb* functions. | |
3143 | |
3144 STRING and compound text are used in the value of string properties and | |
3145 selection data, both of which are values with an associated type atom, | |
3146 which can be STRING or COMPOUND_TEXT. It can also be a locale name, as | |
3147 specified in setlocale() (#### as usual, there is no normalization | |
3148 whatsoever of these names). | |
3149 | |
3150 X also defines a type called "TEXT", which is used only as a requested | |
3151 type, and produces data in a type "convenient to the owner". However, | |
3152 there is some indication that X expects this to be the locale-specific | |
3153 encoding. | |
3154 | |
3155 According to the glossary, the locale is used in | |
3156 | |
3157 -- Encoding and processing of input method text | |
3158 -- Encoding of resource files and values | |
3159 -- Encoding and imaging of text strings | |
3160 -- Encoding and decoding for inter-client text communication | |
3161 | |
3162 The functions XmbTextListToTextProperty and XmbTextPropertyToTextList | |
3163 (and Xwc* equivalents) can be used to convert between the | |
3164 locale-specific encoding (XTextStyle), STRING (XStringStyle), and | |
3165 compound text (XCompoundTextStyle), as well as XStdICCTextStyle, which | |
3166 converts to STRING if possible, and if not, COMPOUND_TEXT. This is | |
3167 used, for example, in XmbSetWMProperties, in the window_name and | |
3168 icon_name properties (WM_NAME and WM_ICON_NAME), which are in the | |
3169 locale-specific encoding on input, and are stored as STRING if possible, | |
3170 COMPOUND_TEXT otherwise. | |
3171 */ | |
771 | 3172 |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3173 #ifdef WEXTTEXT_IS_WIDE |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3174 #define Qcommand_argument_encoding Qmswindows_unicode |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3175 #define Qenvironment_variable_encoding Qmswindows_unicode |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3176 #else |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3177 #define Qcommand_argument_encoding Qnative |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3178 #define Qenvironment_variable_encoding Qnative |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3179 #endif |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3180 #define Qunix_host_name_encoding Qnative |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3181 #define Qunix_service_name_encoding Qnative |
5254
1537701f08a1
Support Roman month numbers, #'format-time-string
Aidan Kehoe <kehoea@parhasard.net>
parents:
5200
diff
changeset
|
3182 #define Qtime_function_encoding Qbinary |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3183 #define Qtime_zone_encoding Qtime_function_encoding |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3184 #define Qmswindows_host_name_encoding Qmswindows_multibyte |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3185 #define Qmswindows_service_name_encoding Qmswindows_multibyte |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3186 #define Quser_name_encoding Qnative |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3187 #define Qerror_message_encoding Qnative |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3188 #define Qjpeg_error_message_encoding Qerror_message_encoding |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3189 #define Qtooltalk_encoding Qnative |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3190 #define Qgtk_encoding Qnative |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3191 |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3192 #define Qdll_symbol_encoding Qnative |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3193 #define Qdll_function_name_encoding Qdll_symbol_encoding |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3194 #define Qdll_variable_name_encoding Qdll_symbol_encoding |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3195 #define Qdll_filename_encoding Qfile_name |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3196 #define Qemodule_string_encoding Qnative |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3197 |
771 | 3198 /* !!#### Need to verify the encoding used in lwlib -- Qnative or Qctext? |
3199 Almost certainly the former. Use a standin for now. */ | |
3200 #define Qlwlib_encoding Qnative | |
3201 | |
1318 | 3202 /* The Host Portable Character Encoding. */ |
3203 #define Qx_hpc_encoding Qnative | |
3204 | |
3205 #define Qx_atom_name_encoding Qx_hpc_encoding | |
3206 #define Qx_font_name_encoding Qx_hpc_encoding | |
3207 #define Qx_color_name_encoding Qx_hpc_encoding | |
3208 #define Qx_keysym_encoding Qx_hpc_encoding | |
3209 #define Qx_geometry_encoding Qx_hpc_encoding | |
3210 #define Qx_resource_name_encoding Qx_hpc_encoding | |
3211 #define Qx_application_class_encoding Qx_hpc_encoding | |
771 | 3212 /* the following probably must agree with Qcommand_argument_encoding and |
3213 Qenvironment_variable_encoding */ | |
1318 | 3214 #define Qx_display_name_encoding Qx_hpc_encoding |
3215 #define Qx_xpm_data_encoding Qx_hpc_encoding | |
4834
b3ea9c582280
Use new cygwin_conv_path API with Cygwin 1.7 for converting names between Win32 and POSIX, UTF-8-aware, with attendant changes elsewhere
Ben Wing <ben@xemacs.org>
parents:
4790
diff
changeset
|
3216 #define Qx_error_message_encoding Qx_hpc_encoding |
1318 | 3217 |
2367 | 3218 /* !!#### Verify these! */ |
3219 #define Qxt_widget_arg_encoding Qnative | |
3220 #define Qdt_dnd_encoding Qnative | |
3221 | |
1318 | 3222 /* RedHat 6.2 contains a locale called "Francais" with the C-cedilla |
3223 encoded in ISO2022! */ | |
3224 #define Qlocale_name_encoding Qctext | |
771 | 3225 |
3226 #define Qstrerror_encoding Qnative | |
3227 | |
1318 | 3228 /* !!#### This exists to remind us that our hexify routine is totally |
3229 un-Muleized. */ | |
3230 #define Qdnd_hexify_encoding Qascii | |
3231 | |
771 | 3232 #define GET_STRERROR(var, num) \ |
3233 do { \ | |
3234 int __gsnum__ = (num); \ | |
3235 Extbyte * __gserr__ = strerror (__gsnum__); \ | |
3236 \ | |
3237 if (!__gserr__) \ | |
3238 { \ | |
4981
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
3239 var = alloca_ibytes (99); \ |
771 | 3240 qxesprintf (var, "Unknown error %d", __gsnum__); \ |
3241 } \ | |
3242 else \ | |
4981
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
3243 var = EXTERNAL_TO_ITEXT (__gserr__, Qstrerror_encoding); \ |
771 | 3244 } while (0) |
3245 | |
3246 #endif /* INCLUDED_text_h_ */ |