Mercurial > hg > xemacs-beta
annotate src/text.h @ 5276:dd2976af8783
Add some missing #includes, termcap.c, hopefully fixing Adam Sjoegren's build.
2010-09-18 Aidan Kehoe <kehoea@parhasard.net>
* termcap.c:
Add a couple of missing includes here, which should fix builds
that use this file. (I have no access to such builds, but Mats'
buildbot shows output that indicates they fail at link time since
DEVICE_BAUD_RATE and IS_DIRECTORY_SEP are available.)
author | Aidan Kehoe <kehoea@parhasard.net> |
---|---|
date | Sat, 18 Sep 2010 15:03:54 +0100 |
parents | 1537701f08a1 |
children | 308d34e9f07d |
rev | line source |
---|---|
771 | 1 /* Header file for text manipulation primitives and macros. |
2 Copyright (C) 1985-1995 Free Software Foundation, Inc. | |
3 Copyright (C) 1995 Sun Microsystems, Inc. | |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
4 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2010 Ben Wing. |
771 | 5 |
6 This file is part of XEmacs. | |
7 | |
8 XEmacs is free software; you can redistribute it and/or modify it | |
9 under the terms of the GNU General Public License as published by the | |
10 Free Software Foundation; either version 2, or (at your option) any | |
11 later version. | |
12 | |
13 XEmacs is distributed in the hope that it will be useful, but WITHOUT | |
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 for more details. | |
17 | |
18 You should have received a copy of the GNU General Public License | |
19 along with XEmacs; see the file COPYING. If not, write to | |
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
21 Boston, MA 02111-1307, USA. */ | |
22 | |
23 /* Synched up with: FSF 19.30. */ | |
24 | |
25 /* Authorship: | |
26 | |
27 Mostly written by Ben Wing, starting around 1995. | |
28 Current TO_IN/EXTERNAL_FORMAT macros written by Martin Buchholz, | |
29 designed by Ben Wing based on earlier macros by Ben Wing. | |
30 Separated out June 18, 2000 from buffer.h into text.h. | |
31 */ | |
32 | |
33 #ifndef INCLUDED_text_h_ | |
34 #define INCLUDED_text_h_ | |
35 | |
912 | 36 #ifdef HAVE_WCHAR_H |
771 | 37 #include <wchar.h> |
912 | 38 #else |
1257 | 39 size_t wcslen (const wchar_t *); |
912 | 40 #endif |
1204 | 41 #ifndef HAVE_STRLWR |
1257 | 42 char *strlwr (char *); |
1204 | 43 #endif |
44 #ifndef HAVE_STRUPR | |
1257 | 45 char *strupr (char *); |
1204 | 46 #endif |
771 | 47 |
1743 | 48 BEGIN_C_DECLS |
1650 | 49 |
5200
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
50 /* Forward compatibility from ben-unicode-internal: Following used for |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
51 functions that do character conversion and need to handle errors. */ |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
52 |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
53 enum converr |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
54 { |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
55 /* ---- Basic actions ---- */ |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
56 |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
57 /* Do nothing upon failure and return a failure indication. |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
58 Same as what happens when the *_raw() version is called. */ |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
59 CONVERR_FAIL, |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
60 /* abort() on failure, i.e. crash. */ |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
61 CONVERR_ABORT, |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
62 /* Signal a Lisp error. */ |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
63 CONVERR_ERROR, |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
64 /* Try to "recover" and continue processing. Currently this is always |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
65 the same as CONVERR_SUBSTITUTE, where one of the substitution |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
66 characters defined below (CANT_CONVERT_*) is used. */ |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
67 CONVERR_SUCCEED, |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
68 |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
69 /* ---- More specific actions ---- */ |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
70 |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
71 /* Substitute something (0xFFFD, the Unicode replacement character, |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
72 when converting to Unicode or to a Unicode-internal Ichar, JISX0208 |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
73 GETA mark when converting to non-Mule Ichar). */ |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
74 CONVERR_SUBSTITUTE, |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
75 /* Use private Unicode space when converting to Unicode. */ |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
76 CONVERR_USE_PRIVATE |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
77 }; |
70ed8a0d8da8
port Mule-ization of mule-wnnfns.c from ben-unicode-internal
Ben Wing <ben@xemacs.org>
parents:
5169
diff
changeset
|
78 |
5092
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
79 /************************************************************************/ |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
80 /* A short intro to the format of text and of characters */ |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
81 /************************************************************************/ |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
82 |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
83 /* |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
84 "internally formatted text" and the term "internal format" in |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
85 general are likely to refer to the format of text in buffers and |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
86 strings; "externally formatted text" and the term "external format" |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
87 refer to any text format used in the O.S. or elsewhere outside of |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
88 XEmacs. The format of text and of a character are related and |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
89 there must be a one-to-one relationship (hopefully through a |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
90 relatively simple algorithmic means of conversion) between a string |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
91 of text and an equivalent array of characters, but the conversion |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
92 between the two is NOT necessarily trivial. |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
93 |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
94 In a non-Mule XEmacs, allowed characters are numbered 0 through |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
95 255, where no fixed meaning is assigned to them, but (when |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
96 representing text, rather than bytes in a binary file) in practice |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
97 the lower half represents ASCII and the upper half some other 8-bit |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
98 character set (chosen by setting the font, case tables, syntax |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
99 tables, etc. appropriately for the character set through ad-hoc |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
100 means such as the `iso-8859-1' file and the |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
101 `standard-display-european' function). |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
102 |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
103 For more info, see `text.c' and the Internals Manual. |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
104 */ |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
105 |
771 | 106 /* ---------------------------------------------------------------------- */ |
107 /* Super-basic character properties */ | |
108 /* ---------------------------------------------------------------------- */ | |
109 | |
110 /* These properties define the specifics of how our current encoding fits | |
111 in the basic model used for the encoding. Because this model is the same | |
112 as is used for UTF-8, all these properties could be defined for it, too. | |
113 This would instantly make the rest of this file work with UTF-8 (with | |
114 the exception of a few called functions that would need to be redefined). | |
115 | |
116 (UTF-2000 implementers, take note!) | |
117 */ | |
118 | |
119 /* If you want more than this, you need to include charset.h */ | |
120 | |
121 #ifndef MULE | |
122 | |
826 | 123 #define rep_bytes_by_first_byte(fb) 1 |
124 #define byte_ascii_p(byte) 1 | |
867 | 125 #define MAX_ICHAR_LEN 1 |
771 | 126 |
127 #else /* MULE */ | |
128 | |
129 /* These are carefully designed to work if BYTE is signed or unsigned. */ | |
130 /* Note that SPC and DEL are considered ASCII, not control. */ | |
131 | |
826 | 132 #define byte_ascii_p(byte) (((byte) & ~0x7f) == 0) |
133 #define byte_c0_p(byte) (((byte) & ~0x1f) == 0) | |
134 #define byte_c1_p(byte) (((byte) & ~0x1f) == 0x80) | |
771 | 135 |
136 /* Does BYTE represent the first byte of a character? */ | |
137 | |
826 | 138 #ifdef ERROR_CHECK_TEXT |
139 | |
140 DECLARE_INLINE_HEADER ( | |
141 int | |
867 | 142 ibyte_first_byte_p_1 (int byte, const char *file, int line) |
826 | 143 ) |
144 { | |
145 assert_at_line (byte >= 0 && byte < 256, file, line); | |
146 return byte < 0xA0; | |
147 } | |
148 | |
867 | 149 #define ibyte_first_byte_p(byte) \ |
150 ibyte_first_byte_p_1 (byte, __FILE__, __LINE__) | |
826 | 151 |
152 #else | |
153 | |
867 | 154 #define ibyte_first_byte_p(byte) ((byte) < 0xA0) |
826 | 155 |
156 #endif | |
157 | |
158 #ifdef ERROR_CHECK_TEXT | |
771 | 159 |
160 /* Does BYTE represent the first byte of a multi-byte character? */ | |
161 | |
826 | 162 DECLARE_INLINE_HEADER ( |
163 int | |
867 | 164 ibyte_leading_byte_p_1 (int byte, const char *file, int line) |
826 | 165 ) |
166 { | |
167 assert_at_line (byte >= 0 && byte < 256, file, line); | |
168 return byte_c1_p (byte); | |
169 } | |
170 | |
867 | 171 #define ibyte_leading_byte_p(byte) \ |
172 ibyte_leading_byte_p_1 (byte, __FILE__, __LINE__) | |
826 | 173 |
174 #else | |
175 | |
867 | 176 #define ibyte_leading_byte_p(byte) byte_c1_p (byte) |
826 | 177 |
178 #endif | |
771 | 179 |
180 /* Table of number of bytes in the string representation of a character | |
181 indexed by the first byte of that representation. | |
182 | |
183 This value can be derived in other ways -- e.g. something like | |
826 | 184 XCHARSET_REP_BYTES (charset_by_leading_byte (first_byte)) |
771 | 185 but it's faster this way. */ |
1632 | 186 extern MODULE_API const Bytecount rep_bytes_by_first_byte[0xA0]; |
771 | 187 |
188 /* Number of bytes in the string representation of a character. */ | |
788 | 189 |
800 | 190 #ifdef ERROR_CHECK_TEXT |
788 | 191 |
826 | 192 DECLARE_INLINE_HEADER ( |
193 Bytecount | |
194 rep_bytes_by_first_byte_1 (int fb, const char *file, int line) | |
195 ) | |
771 | 196 { |
826 | 197 assert_at_line (fb >= 0 && fb < 0xA0, file, line); |
771 | 198 return rep_bytes_by_first_byte[fb]; |
199 } | |
200 | |
826 | 201 #define rep_bytes_by_first_byte(fb) \ |
202 rep_bytes_by_first_byte_1 (fb, __FILE__, __LINE__) | |
788 | 203 |
800 | 204 #else /* ERROR_CHECK_TEXT */ |
788 | 205 |
826 | 206 #define rep_bytes_by_first_byte(fb) (rep_bytes_by_first_byte[fb]) |
788 | 207 |
800 | 208 #endif /* ERROR_CHECK_TEXT */ |
788 | 209 |
826 | 210 /* Is this character represented by more than one byte in a string in the |
211 default format? */ | |
212 | |
867 | 213 #define ichar_multibyte_p(c) ((c) >= 0x80) |
214 | |
215 #define ichar_ascii_p(c) (!ichar_multibyte_p (c)) | |
826 | 216 |
217 /* Maximum number of bytes per Emacs character when represented as text, in | |
218 any format. | |
219 */ | |
771 | 220 |
867 | 221 #define MAX_ICHAR_LEN 4 |
771 | 222 |
826 | 223 #endif /* not MULE */ |
224 | |
5092
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
225 #ifdef MULE |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
226 |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
227 MODULE_API int non_ascii_valid_ichar_p (Ichar ch); |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
228 |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
229 /* Return whether the given Ichar is valid. |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
230 */ |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
231 |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
232 DECLARE_INLINE_HEADER ( |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
233 int |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
234 valid_ichar_p (Ichar ch) |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
235 ) |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
236 { |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
237 return (! (ch & ~0xFF)) || non_ascii_valid_ichar_p (ch); |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
238 } |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
239 |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
240 #else /* not MULE */ |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
241 |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
242 /* This works when CH is negative, and correctly returns non-zero only when CH |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
243 is in the range [0, 255], inclusive. */ |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
244 #define valid_ichar_p(ch) (! (ch & ~0xFF)) |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
245 |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
246 #endif /* not MULE */ |
3aa3888729c3
move inclusion point of text.h to clean things up a bit
Ben Wing <ben@xemacs.org>
parents:
5027
diff
changeset
|
247 |
2367 | 248 /* For more discussion, see text.c, "handling non-default formats" */ |
249 | |
826 | 250 typedef enum internal_format |
251 { | |
252 FORMAT_DEFAULT, | |
253 FORMAT_8_BIT_FIXED, | |
254 FORMAT_16_BIT_FIXED, /* not implemented */ | |
255 FORMAT_32_BIT_FIXED /* not implemented */ | |
256 } Internal_Format; | |
257 | |
258 #ifdef MULE | |
259 /* "OBJECT" below will usually be a buffer, string, or nil. This needs to | |
260 be passed in because the interpretation of 8-bit-fixed and 16-bit-fixed | |
261 values may depend on the buffer, e.g. depending on what language the | |
262 text in the buffer is in. */ | |
263 | |
867 | 264 /* True if Ichar CH can be represented in 8-bit-fixed format. */ |
265 #define ichar_8_bit_fixed_p(ch, object) (((ch) & ~0xff) == 0) | |
266 /* Convert Ichar CH to an 8-bit int, as will be stored in the buffer. */ | |
267 #define ichar_to_raw_8_bit_fixed(ch, object) ((Ibyte) (ch)) | |
826 | 268 /* Convert the other way. */ |
867 | 269 #define raw_8_bit_fixed_to_ichar(ch, object) ((Ichar) (ch)) |
270 | |
271 #define ichar_16_bit_fixed_p(ch, object) (((ch) & ~0xffff) == 0) | |
272 /* Convert Ichar CH to a 16-bit int, as will be stored in the buffer. */ | |
273 #define ichar_to_raw_16_bit_fixed(ch, object) ((UINT_16_BIT) (ch)) | |
826 | 274 /* Convert the other way. */ |
867 | 275 #define raw_16_bit_fixed_to_ichar(ch, object) ((Ichar) (ch)) |
276 | |
277 /* Convert Ichar CH to a 32-bit int, as will be stored in the buffer. */ | |
278 #define ichar_to_raw_32_bit_fixed(ch, object) ((UINT_32_BIT) (ch)) | |
826 | 279 /* Convert the other way. */ |
867 | 280 #define raw_32_bit_fixed_to_ichar(ch, object) ((Ichar) (ch)) |
826 | 281 |
282 /* Return the "raw value" of a character as stored in the buffer. In the | |
283 default format, this is just the same as the character. In fixed-width | |
284 formats, this is the actual value in the buffer, which will be limited | |
285 to the range as established by the format. This is used when searching | |
286 for a character in a buffer -- it's faster to convert the character to | |
287 the raw value and look for that, than repeatedly convert each raw value | |
288 in the buffer into a character. */ | |
289 | |
290 DECLARE_INLINE_HEADER ( | |
867 | 291 Raw_Ichar |
2286 | 292 ichar_to_raw (Ichar ch, Internal_Format fmt, |
293 Lisp_Object UNUSED (object)) | |
826 | 294 ) |
295 { | |
296 switch (fmt) | |
297 { | |
298 case FORMAT_DEFAULT: | |
867 | 299 return (Raw_Ichar) ch; |
826 | 300 case FORMAT_16_BIT_FIXED: |
867 | 301 text_checking_assert (ichar_16_bit_fixed_p (ch, object)); |
302 return (Raw_Ichar) ichar_to_raw_16_bit_fixed (ch, object); | |
826 | 303 case FORMAT_32_BIT_FIXED: |
867 | 304 return (Raw_Ichar) ichar_to_raw_32_bit_fixed (ch, object); |
826 | 305 default: |
306 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); | |
867 | 307 text_checking_assert (ichar_8_bit_fixed_p (ch, object)); |
308 return (Raw_Ichar) ichar_to_raw_8_bit_fixed (ch, object); | |
826 | 309 } |
310 } | |
311 | |
312 /* Return whether CH is representable in the given format in the given | |
313 object. */ | |
314 | |
315 DECLARE_INLINE_HEADER ( | |
316 int | |
2286 | 317 ichar_fits_in_format (Ichar ch, Internal_Format fmt, |
318 Lisp_Object UNUSED (object)) | |
826 | 319 ) |
320 { | |
321 switch (fmt) | |
322 { | |
323 case FORMAT_DEFAULT: | |
324 return 1; | |
325 case FORMAT_16_BIT_FIXED: | |
867 | 326 return ichar_16_bit_fixed_p (ch, object); |
826 | 327 case FORMAT_32_BIT_FIXED: |
328 return 1; | |
329 default: | |
330 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); | |
867 | 331 return ichar_8_bit_fixed_p (ch, object); |
826 | 332 } |
333 } | |
334 | |
335 /* Assuming the formats are the same, return whether the two objects | |
336 represent text in exactly the same way. */ | |
337 | |
338 DECLARE_INLINE_HEADER ( | |
339 int | |
2286 | 340 objects_have_same_internal_representation (Lisp_Object UNUSED (srcobj), |
341 Lisp_Object UNUSED (dstobj)) | |
826 | 342 ) |
343 { | |
344 /* &&#### implement this properly when we allow per-object format | |
345 differences */ | |
346 return 1; | |
347 } | |
348 | |
349 #else | |
350 | |
867 | 351 #define ichar_to_raw(ch, fmt, object) ((Raw_Ichar) (ch)) |
352 #define ichar_fits_in_format(ch, fmt, object) 1 | |
826 | 353 #define objects_have_same_internal_representation(srcobj, dstobj) 1 |
354 | |
771 | 355 #endif /* MULE */ |
356 | |
1632 | 357 MODULE_API int dfc_coding_system_is_unicode (Lisp_Object codesys); |
771 | 358 |
359 DECLARE_INLINE_HEADER ( | |
360 Bytecount dfc_external_data_len (const void *ptr, Lisp_Object codesys) | |
361 ) | |
362 { | |
363 if (dfc_coding_system_is_unicode (codesys)) | |
364 return sizeof (wchar_t) * wcslen ((wchar_t *) ptr); | |
365 else | |
366 return strlen ((char *) ptr); | |
367 } | |
368 | |
369 | |
370 /************************************************************************/ | |
371 /* */ | |
372 /* working with raw internal-format data */ | |
373 /* */ | |
374 /************************************************************************/ | |
375 | |
826 | 376 /* |
377 Use the following functions/macros on contiguous text in any of the | |
378 internal formats. Those that take a format arg work on all internal | |
379 formats; the others work only on the default (variable-width under Mule) | |
380 format. If the text you're operating on is known to come from a buffer, | |
381 use the buffer-level functions in buffer.h, which automatically know the | |
382 correct format and handle the gap. | |
383 | |
384 Some terminology: | |
385 | |
867 | 386 "itext" appearing in the macros means "internal-format text" -- type |
387 `Ibyte *'. Operations on such pointers themselves, rather than on the | |
388 text being pointed to, have "itext" instead of "itext" in the macro | |
389 name. "ichar" in the macro names means an Ichar -- the representation | |
826 | 390 of a character as a single integer rather than a series of bytes, as part |
867 | 391 of "itext". Many of the macros below are for converting between the |
826 | 392 two representations of characters. |
393 | |
867 | 394 Note also that we try to consistently distinguish between an "Ichar" and |
826 | 395 a Lisp character. Stuff working with Lisp characters often just says |
867 | 396 "char", so we consistently use "Ichar" when that's what we're working |
826 | 397 with. */ |
398 | |
399 /* The three golden rules of macros: | |
771 | 400 |
401 1) Anything that's an lvalue can be evaluated more than once. | |
826 | 402 |
403 2) Macros where anything else can be evaluated more than once should | |
404 have the word "unsafe" in their name (exceptions may be made for | |
405 large sets of macros that evaluate arguments of certain types more | |
406 than once, e.g. struct buffer * arguments, when clearly indicated in | |
407 the macro documentation). These macros are generally meant to be | |
408 called only by other macros that have already stored the calling | |
409 values in temporary variables. | |
410 | |
411 3) Nothing else can be evaluated more than once. Use inline | |
771 | 412 functions, if necessary, to prevent multiple evaluation. |
826 | 413 |
414 NOTE: The functions and macros below are given full prototypes in their | |
415 docs, even when the implementation is a macro. In such cases, passing | |
416 an argument of a type other than expected will produce undefined | |
417 results. Also, given that macros can do things functions can't (in | |
418 particular, directly modify arguments as if they were passed by | |
419 reference), the declaration syntax has been extended to include the | |
420 call-by-reference syntax from C++, where an & after a type indicates | |
421 that the argument is an lvalue and is passed by reference, i.e. the | |
422 function can modify its value. (This is equivalent in C to passing a | |
423 pointer to the argument, but without the need to explicitly worry about | |
424 pointers.) | |
425 | |
426 When to capitalize macros: | |
427 | |
428 -- Capitalize macros doing stuff obviously impossible with (C) | |
429 functions, e.g. directly modifying arguments as if they were passed by | |
430 reference. | |
431 | |
432 -- Capitalize macros that evaluate *any* argument more than once regardless | |
433 of whether that's "allowed" (e.g. buffer arguments). | |
434 | |
435 -- Capitalize macros that directly access a field in a Lisp_Object or | |
436 its equivalent underlying structure. In such cases, access through the | |
437 Lisp_Object precedes the macro with an X, and access through the underlying | |
438 structure doesn't. | |
439 | |
440 -- Capitalize certain other basic macros relating to Lisp_Objects; e.g. | |
441 FRAMEP, CHECK_FRAME, etc. | |
442 | |
443 -- Try to avoid capitalizing any other macros. | |
771 | 444 */ |
445 | |
446 /* ---------------------------------------------------------------------- */ | |
867 | 447 /* Working with itext's (pointers to internally-formatted text) */ |
771 | 448 /* ---------------------------------------------------------------------- */ |
449 | |
867 | 450 /* Given an itext, does it point to the beginning of a character? |
826 | 451 */ |
452 | |
771 | 453 #ifdef MULE |
867 | 454 # define valid_ibyteptr_p(ptr) ibyte_first_byte_p (* (ptr)) |
771 | 455 #else |
867 | 456 # define valid_ibyteptr_p(ptr) 1 |
771 | 457 #endif |
458 | |
867 | 459 /* If error-checking is enabled, assert that the given itext points to |
826 | 460 the beginning of a character. Otherwise, do nothing. |
461 */ | |
462 | |
867 | 463 #define assert_valid_ibyteptr(ptr) text_checking_assert (valid_ibyteptr_p (ptr)) |
464 | |
465 /* Given a itext (assumed to point at the beginning of a character), | |
826 | 466 modify that pointer so it points to the beginning of the next character. |
467 | |
867 | 468 Note that INC_IBYTEPTR() and DEC_IBYTEPTR() have to be written in |
469 completely separate ways. INC_IBYTEPTR() cannot use the DEC_IBYTEPTR() | |
771 | 470 trick of looking for a valid first byte because it might run off |
867 | 471 the end of the string. DEC_IBYTEPTR() can't use the INC_IBYTEPTR() |
771 | 472 method because it doesn't have easy access to the first byte of |
473 the character it's moving over. */ | |
474 | |
867 | 475 #define INC_IBYTEPTR(ptr) do { \ |
476 assert_valid_ibyteptr (ptr); \ | |
826 | 477 (ptr) += rep_bytes_by_first_byte (* (ptr)); \ |
478 } while (0) | |
479 | |
1204 | 480 #define INC_IBYTEPTR_FMT(ptr, fmt) \ |
481 do { \ | |
482 Internal_Format __icf_fmt = (fmt); \ | |
483 switch (__icf_fmt) \ | |
484 { \ | |
485 case FORMAT_DEFAULT: \ | |
486 INC_IBYTEPTR (ptr); \ | |
487 break; \ | |
488 case FORMAT_16_BIT_FIXED: \ | |
489 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT)); \ | |
490 (ptr) += 2; \ | |
491 break; \ | |
492 case FORMAT_32_BIT_FIXED: \ | |
493 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT)); \ | |
494 (ptr) += 4; \ | |
495 break; \ | |
496 default: \ | |
497 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); \ | |
498 (ptr)++; \ | |
499 break; \ | |
500 } \ | |
826 | 501 } while (0) |
502 | |
867 | 503 /* Given a itext (assumed to point at the beginning of a character or at |
826 | 504 the very end of the text), modify that pointer so it points to the |
505 beginning of the previous character. | |
506 */ | |
771 | 507 |
800 | 508 #ifdef ERROR_CHECK_TEXT |
826 | 509 /* We use a separate definition to avoid warnings about unused dc_ptr1 */ |
867 | 510 #define DEC_IBYTEPTR(ptr) do { \ |
1333 | 511 const Ibyte *dc_ptr1 = (ptr); \ |
826 | 512 do { \ |
513 (ptr)--; \ | |
867 | 514 } while (!valid_ibyteptr_p (ptr)); \ |
826 | 515 text_checking_assert (dc_ptr1 - (ptr) == rep_bytes_by_first_byte (*(ptr))); \ |
771 | 516 } while (0) |
826 | 517 #else |
867 | 518 #define DEC_IBYTEPTR(ptr) do { \ |
826 | 519 do { \ |
520 (ptr)--; \ | |
867 | 521 } while (!valid_ibyteptr_p (ptr)); \ |
771 | 522 } while (0) |
826 | 523 #endif /* ERROR_CHECK_TEXT */ |
524 | |
1204 | 525 #define DEC_IBYTEPTR_FMT(ptr, fmt) \ |
526 do { \ | |
527 Internal_Format __icf_fmt = (fmt); \ | |
528 switch (__icf_fmt) \ | |
529 { \ | |
530 case FORMAT_DEFAULT: \ | |
531 DEC_IBYTEPTR (ptr); \ | |
532 break; \ | |
533 case FORMAT_16_BIT_FIXED: \ | |
534 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT)); \ | |
535 (ptr) -= 2; \ | |
536 break; \ | |
537 case FORMAT_32_BIT_FIXED: \ | |
538 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT)); \ | |
539 (ptr) -= 4; \ | |
540 break; \ | |
541 default: \ | |
542 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); \ | |
543 (ptr)--; \ | |
544 break; \ | |
545 } \ | |
771 | 546 } while (0) |
547 | |
548 #ifdef MULE | |
549 | |
826 | 550 /* Make sure that PTR is pointing to the beginning of a character. If not, |
551 back up until this is the case. Note that there are not too many places | |
552 where it is legitimate to do this sort of thing. It's an error if | |
553 you're passed an "invalid" char * pointer. NOTE: PTR *must* be pointing | |
554 to a valid part of the string (i.e. not the very end, unless the string | |
555 is zero-terminated or something) in order for this function to not cause | |
556 crashes. | |
557 */ | |
558 | |
771 | 559 /* Note that this reads the byte at *PTR! */ |
560 | |
867 | 561 #define VALIDATE_IBYTEPTR_BACKWARD(ptr) do { \ |
5026
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
562 while (!valid_ibyteptr_p (ptr)) ptr--; \ |
771 | 563 } while (0) |
564 | |
826 | 565 /* Make sure that PTR is pointing to the beginning of a character. If not, |
566 move forward until this is the case. Note that there are not too many | |
567 places where it is legitimate to do this sort of thing. It's an error | |
568 if you're passed an "invalid" char * pointer. | |
569 */ | |
771 | 570 |
867 | 571 /* This needs to be trickier than VALIDATE_IBYTEPTR_BACKWARD() to avoid the |
771 | 572 possibility of running off the end of the string. */ |
573 | |
867 | 574 #define VALIDATE_IBYTEPTR_FORWARD(ptr) do { \ |
575 Ibyte *vcf_ptr = (ptr); \ | |
576 VALIDATE_IBYTEPTR_BACKWARD (vcf_ptr); \ | |
771 | 577 if (vcf_ptr != (ptr)) \ |
578 { \ | |
579 (ptr) = vcf_ptr; \ | |
867 | 580 INC_IBYTEPTR (ptr); \ |
771 | 581 } \ |
582 } while (0) | |
583 | |
584 #else /* not MULE */ | |
867 | 585 #define VALIDATE_IBYTEPTR_BACKWARD(ptr) |
586 #define VALIDATE_IBYTEPTR_FORWARD(ptr) | |
826 | 587 #endif /* not MULE */ |
588 | |
589 #ifdef MULE | |
590 | |
867 | 591 /* Given a Ibyte string at PTR of size N, possibly with a partial |
826 | 592 character at the end, return the size of the longest substring of |
593 complete characters. Does not assume that the byte at *(PTR + N) is | |
594 readable. Note that there are not too many places where it is | |
595 legitimate to do this sort of thing. It's an error if you're passed an | |
596 "invalid" offset. */ | |
597 | |
598 DECLARE_INLINE_HEADER ( | |
599 Bytecount | |
867 | 600 validate_ibyte_string_backward (const Ibyte *ptr, Bytecount n) |
826 | 601 ) |
602 { | |
867 | 603 const Ibyte *ptr2; |
826 | 604 |
605 if (n == 0) | |
606 return n; | |
607 ptr2 = ptr + n - 1; | |
867 | 608 VALIDATE_IBYTEPTR_BACKWARD (ptr2); |
826 | 609 if (ptr2 + rep_bytes_by_first_byte (*ptr2) != ptr + n) |
610 return ptr2 - ptr; | |
611 return n; | |
612 } | |
613 | |
614 #else | |
615 | |
867 | 616 #define validate_ibyte_string_backward(ptr, n) (n) |
826 | 617 |
618 #endif /* MULE */ | |
771 | 619 |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
620 /* ASSERT_ASCTEXT_ASCII(ptr): Check that an Ascbyte * pointer points to |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
621 purely ASCII text. Useful for checking that putatively ASCII strings |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
622 (i.e. declared as Ascbyte * or const Ascbyte *) are actually ASCII. |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
623 This is important because otherwise we need to worry about what |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
624 encoding they are in -- internal or some external encoding. |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
625 |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
626 ASSERT_ASCTEXT_ASCII_LEN(ptr, len): Same as ASSERT_ASCTEXT_ASCII() |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
627 but where the length has been explicitly given. Useful if the string |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
628 may contain embedded zeroes. |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
629 */ |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
630 |
2367 | 631 #ifdef ERROR_CHECK_TEXT |
632 #define ASSERT_ASCTEXT_ASCII_LEN(ptr, len) \ | |
633 do { \ | |
634 int aia2; \ | |
635 const Ascbyte *aia2ptr = (ptr); \ | |
636 int aia2len = (len); \ | |
637 \ | |
638 for (aia2 = 0; aia2 < aia2len; aia2++) \ | |
639 assert (aia2ptr[aia2] >= 0x00 && aia2ptr[aia2] < 0x7F); \ | |
640 } while (0) | |
641 #define ASSERT_ASCTEXT_ASCII(ptr) \ | |
642 do { \ | |
643 const Ascbyte *aiaz2 = (ptr); \ | |
644 ASSERT_ASCTEXT_ASCII_LEN (aiaz2, strlen (aiaz2)); \ | |
645 } while (0) | |
646 #else | |
647 #define ASSERT_ASCTEXT_ASCII_LEN(ptr, len) | |
648 #define ASSERT_ASCTEXT_ASCII(ptr) | |
649 #endif | |
650 | |
771 | 651 /* -------------------------------------------------------------- */ |
826 | 652 /* Working with the length (in bytes and characters) of a */ |
653 /* section of internally-formatted text */ | |
771 | 654 /* -------------------------------------------------------------- */ |
655 | |
826 | 656 #ifdef MULE |
657 | |
1632 | 658 MODULE_API Charcount |
659 bytecount_to_charcount_fun (const Ibyte *ptr, Bytecount len); | |
660 MODULE_API Bytecount | |
661 charcount_to_bytecount_fun (const Ibyte *ptr, Charcount len); | |
826 | 662 |
663 /* Given a pointer to a text string and a length in bytes, return | |
664 the equivalent length in characters. */ | |
665 | |
666 DECLARE_INLINE_HEADER ( | |
667 Charcount | |
867 | 668 bytecount_to_charcount (const Ibyte *ptr, Bytecount len) |
826 | 669 ) |
670 { | |
671 if (len < 20) /* Just a random guess, but it should be more or less correct. | |
672 If number of bytes is small, just do a simple loop, | |
673 which should be more efficient. */ | |
674 { | |
675 Charcount count = 0; | |
867 | 676 const Ibyte *end = ptr + len; |
826 | 677 while (ptr < end) |
678 { | |
867 | 679 INC_IBYTEPTR (ptr); |
826 | 680 count++; |
681 } | |
682 /* Bomb out if the specified substring ends in the middle | |
683 of a character. Note that we might have already gotten | |
684 a core dump above from an invalid reference, but at least | |
685 we will get no farther than here. | |
686 | |
687 This also catches len < 0. */ | |
688 text_checking_assert (ptr == end); | |
689 | |
690 return count; | |
691 } | |
692 else | |
693 return bytecount_to_charcount_fun (ptr, len); | |
694 } | |
695 | |
696 /* Given a pointer to a text string and a length in characters, return the | |
697 equivalent length in bytes. | |
698 */ | |
699 | |
700 DECLARE_INLINE_HEADER ( | |
701 Bytecount | |
867 | 702 charcount_to_bytecount (const Ibyte *ptr, Charcount len) |
826 | 703 ) |
704 { | |
705 text_checking_assert (len >= 0); | |
706 if (len < 20) /* See above */ | |
707 { | |
867 | 708 const Ibyte *newptr = ptr; |
826 | 709 while (len > 0) |
710 { | |
867 | 711 INC_IBYTEPTR (newptr); |
826 | 712 len--; |
713 } | |
714 return newptr - ptr; | |
715 } | |
716 else | |
717 return charcount_to_bytecount_fun (ptr, len); | |
718 } | |
719 | |
2367 | 720 MODULE_API Bytecount |
721 charcount_to_bytecount_down_fun (const Ibyte *ptr, Charcount len); | |
722 | |
723 /* Given a pointer to a text string and a length in bytes, return | |
724 the equivalent length in characters of the stretch [PTR - LEN, PTR). */ | |
725 | |
726 DECLARE_INLINE_HEADER ( | |
727 Charcount | |
728 bytecount_to_charcount_down (const Ibyte *ptr, Bytecount len) | |
729 ) | |
730 { | |
731 /* No need to be clever here */ | |
732 return bytecount_to_charcount (ptr - len, len); | |
733 } | |
734 | |
735 /* Given a pointer to a text string and a length in characters, return the | |
736 equivalent length in bytes of the stretch of characters of that length | |
737 BEFORE the pointer. | |
738 */ | |
739 | |
740 DECLARE_INLINE_HEADER ( | |
741 Bytecount | |
742 charcount_to_bytecount_down (const Ibyte *ptr, Charcount len) | |
743 ) | |
744 { | |
745 #define SLEDGEHAMMER_CHECK_TEXT | |
746 #ifdef SLEDGEHAMMER_CHECK_TEXT | |
747 Charcount len1 = len; | |
748 Bytecount ret1, ret2; | |
749 | |
750 /* To test the correctness of the function version, always do the | |
751 calculation both ways and check that the values are the same. */ | |
752 text_checking_assert (len >= 0); | |
753 { | |
754 const Ibyte *newptr = ptr; | |
755 while (len1 > 0) | |
756 { | |
757 DEC_IBYTEPTR (newptr); | |
758 len1--; | |
759 } | |
760 ret1 = ptr - newptr; | |
761 } | |
762 ret2 = charcount_to_bytecount_down_fun (ptr, len); | |
763 text_checking_assert (ret1 == ret2); | |
764 return ret1; | |
765 #else | |
766 text_checking_assert (len >= 0); | |
767 if (len < 20) /* See above */ | |
768 { | |
769 const Ibyte *newptr = ptr; | |
770 while (len > 0) | |
771 { | |
772 DEC_IBYTEPTR (newptr); | |
773 len--; | |
774 } | |
775 return ptr - newptr; | |
776 } | |
777 else | |
778 return charcount_to_bytecount_down_fun (ptr, len); | |
779 #endif /* SLEDGEHAMMER_CHECK_TEXT */ | |
780 } | |
781 | |
826 | 782 /* Given a pointer to a text string in the specified format and a length in |
783 bytes, return the equivalent length in characters. | |
784 */ | |
785 | |
786 DECLARE_INLINE_HEADER ( | |
787 Charcount | |
867 | 788 bytecount_to_charcount_fmt (const Ibyte *ptr, Bytecount len, |
826 | 789 Internal_Format fmt) |
790 ) | |
791 { | |
792 switch (fmt) | |
793 { | |
794 case FORMAT_DEFAULT: | |
795 return bytecount_to_charcount (ptr, len); | |
796 case FORMAT_16_BIT_FIXED: | |
1204 | 797 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT)); |
826 | 798 return (Charcount) (len << 1); |
799 case FORMAT_32_BIT_FIXED: | |
1204 | 800 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT)); |
826 | 801 return (Charcount) (len << 2); |
802 default: | |
803 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); | |
804 return (Charcount) len; | |
805 } | |
806 } | |
807 | |
808 /* Given a pointer to a text string in the specified format and a length in | |
809 characters, return the equivalent length in bytes. | |
810 */ | |
811 | |
812 DECLARE_INLINE_HEADER ( | |
813 Bytecount | |
867 | 814 charcount_to_bytecount_fmt (const Ibyte *ptr, Charcount len, |
826 | 815 Internal_Format fmt) |
816 ) | |
817 { | |
818 switch (fmt) | |
819 { | |
820 case FORMAT_DEFAULT: | |
821 return charcount_to_bytecount (ptr, len); | |
822 case FORMAT_16_BIT_FIXED: | |
1204 | 823 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT)); |
826 | 824 text_checking_assert (!(len & 1)); |
825 return (Bytecount) (len >> 1); | |
826 case FORMAT_32_BIT_FIXED: | |
827 text_checking_assert (!(len & 3)); | |
1204 | 828 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT)); |
826 | 829 return (Bytecount) (len >> 2); |
830 default: | |
831 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); | |
832 return (Bytecount) len; | |
833 } | |
834 } | |
835 | |
836 #else | |
837 | |
838 #define bytecount_to_charcount(ptr, len) ((Charcount) (len)) | |
839 #define bytecount_to_charcount_fmt(ptr, len, fmt) ((Charcount) (len)) | |
840 #define charcount_to_bytecount(ptr, len) ((Bytecount) (len)) | |
841 #define charcount_to_bytecount_fmt(ptr, len, fmt) ((Bytecount) (len)) | |
842 | |
843 #endif /* MULE */ | |
844 | |
845 /* Return the length of the first character at PTR. Equivalent to | |
846 charcount_to_bytecount (ptr, 1). | |
847 | |
848 [Since charcount_to_bytecount() is Written as inline, a smart compiler | |
849 should really optimize charcount_to_bytecount (ptr, 1) to the same as | |
850 the following, with no error checking. But since this idiom occurs so | |
851 often, we'll be helpful and define a special macro for it.] | |
852 */ | |
853 | |
867 | 854 #define itext_ichar_len(ptr) rep_bytes_by_first_byte (*(ptr)) |
826 | 855 |
856 /* Return the length of the first character at PTR, which is in the | |
857 specified internal format. Equivalent to charcount_to_bytecount_fmt | |
858 (ptr, 1, fmt). | |
859 */ | |
860 | |
861 DECLARE_INLINE_HEADER ( | |
862 Bytecount | |
4853 | 863 itext_ichar_len_fmt (const Ibyte *ptr, Internal_Format fmt) |
826 | 864 ) |
865 { | |
866 switch (fmt) | |
867 { | |
868 case FORMAT_DEFAULT: | |
867 | 869 return itext_ichar_len (ptr); |
826 | 870 case FORMAT_16_BIT_FIXED: |
1204 | 871 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT)); |
826 | 872 return 2; |
873 case FORMAT_32_BIT_FIXED: | |
1204 | 874 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT)); |
826 | 875 return 4; |
876 default: | |
877 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); | |
878 return 1; | |
879 } | |
880 } | |
881 | |
882 /* Return a pointer to the beginning of the character offset N (in | |
883 characters) from PTR. | |
884 */ | |
885 | |
886 DECLARE_INLINE_HEADER ( | |
867 | 887 const Ibyte * |
888 itext_n_addr (const Ibyte *ptr, Charcount offset) | |
826 | 889 ) |
771 | 890 { |
891 return ptr + charcount_to_bytecount (ptr, offset); | |
892 } | |
893 | |
867 | 894 /* Given a itext and an offset into the text pointed to by the itext, |
826 | 895 modify the offset so it points to the beginning of the next character. |
896 */ | |
897 | |
898 #define INC_BYTECOUNT(ptr, pos) do { \ | |
867 | 899 assert_valid_ibyteptr (ptr); \ |
826 | 900 (pos += rep_bytes_by_first_byte (* ((ptr) + (pos)))); \ |
901 } while (0) | |
902 | |
771 | 903 /* -------------------------------------------------------------------- */ |
867 | 904 /* Retrieving or changing the character pointed to by a itext */ |
771 | 905 /* -------------------------------------------------------------------- */ |
906 | |
867 | 907 #define simple_itext_ichar(ptr) ((Ichar) (ptr)[0]) |
908 #define simple_set_itext_ichar(ptr, x) \ | |
909 ((ptr)[0] = (Ibyte) (x), (Bytecount) 1) | |
910 #define simple_itext_copy_ichar(src, dst) \ | |
814 | 911 ((dst)[0] = *(src), (Bytecount) 1) |
771 | 912 |
913 #ifdef MULE | |
914 | |
1632 | 915 MODULE_API Ichar non_ascii_itext_ichar (const Ibyte *ptr); |
916 MODULE_API Bytecount non_ascii_set_itext_ichar (Ibyte *ptr, Ichar c); | |
917 MODULE_API Bytecount non_ascii_itext_copy_ichar (const Ibyte *src, Ibyte *dst); | |
867 | 918 |
919 /* Retrieve the character pointed to by PTR as an Ichar. */ | |
826 | 920 |
921 DECLARE_INLINE_HEADER ( | |
867 | 922 Ichar |
923 itext_ichar (const Ibyte *ptr) | |
826 | 924 ) |
771 | 925 { |
826 | 926 return byte_ascii_p (*ptr) ? |
867 | 927 simple_itext_ichar (ptr) : |
928 non_ascii_itext_ichar (ptr); | |
771 | 929 } |
930 | |
826 | 931 /* Retrieve the character pointed to by PTR (a pointer to text in the |
932 format FMT, coming from OBJECT [a buffer, string?, or nil]) as an | |
867 | 933 Ichar. |
826 | 934 |
935 Note: For these and other *_fmt() functions, if you pass in a constant | |
936 FMT, the switch will be optimized out of existence. Therefore, there is | |
937 no need to create separate versions for the various formats for | |
867 | 938 "efficiency reasons". In fact, we don't really need itext_ichar() |
826 | 939 and such written separately, but they are used often so it's simpler |
940 that way. */ | |
941 | |
942 DECLARE_INLINE_HEADER ( | |
867 | 943 Ichar |
944 itext_ichar_fmt (const Ibyte *ptr, Internal_Format fmt, | |
2286 | 945 Lisp_Object UNUSED (object)) |
826 | 946 ) |
947 { | |
948 switch (fmt) | |
949 { | |
950 case FORMAT_DEFAULT: | |
867 | 951 return itext_ichar (ptr); |
826 | 952 case FORMAT_16_BIT_FIXED: |
1204 | 953 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT)); |
867 | 954 return raw_16_bit_fixed_to_ichar (* (UINT_16_BIT *) ptr, object); |
826 | 955 case FORMAT_32_BIT_FIXED: |
1204 | 956 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT)); |
867 | 957 return raw_32_bit_fixed_to_ichar (* (UINT_32_BIT *) ptr, object); |
826 | 958 default: |
959 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); | |
867 | 960 return raw_8_bit_fixed_to_ichar (*ptr, object); |
826 | 961 } |
962 } | |
963 | |
964 /* Return the character at PTR (which is in format FMT), suitable for | |
965 comparison with an ASCII character. This guarantees that if the | |
966 character at PTR is ASCII (range 0 - 127), that character will be | |
967 returned; otherwise, some character outside of the ASCII range will be | |
968 returned, but not necessarily the character actually at PTR. This will | |
867 | 969 be faster than itext_ichar_fmt() for some formats -- in particular, |
826 | 970 FORMAT_DEFAULT. */ |
971 | |
972 DECLARE_INLINE_HEADER ( | |
867 | 973 Ichar |
974 itext_ichar_ascii_fmt (const Ibyte *ptr, Internal_Format fmt, | |
2286 | 975 Lisp_Object UNUSED (object)) |
826 | 976 ) |
977 { | |
978 switch (fmt) | |
979 { | |
980 case FORMAT_DEFAULT: | |
867 | 981 return (Ichar) *ptr; |
826 | 982 case FORMAT_16_BIT_FIXED: |
1204 | 983 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT)); |
867 | 984 return raw_16_bit_fixed_to_ichar (* (UINT_16_BIT *) ptr, object); |
826 | 985 case FORMAT_32_BIT_FIXED: |
1204 | 986 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT)); |
867 | 987 return raw_32_bit_fixed_to_ichar (* (UINT_32_BIT *) ptr, object); |
826 | 988 default: |
989 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); | |
867 | 990 return raw_8_bit_fixed_to_ichar (*ptr, object); |
826 | 991 } |
992 } | |
993 | |
994 /* Return the "raw value" of the character at PTR, in format FMT. This is | |
995 useful when searching for a character; convert the character using | |
867 | 996 ichar_to_raw(). */ |
826 | 997 |
998 DECLARE_INLINE_HEADER ( | |
867 | 999 Raw_Ichar |
1000 itext_ichar_raw_fmt (const Ibyte *ptr, Internal_Format fmt) | |
826 | 1001 ) |
1002 { | |
1003 switch (fmt) | |
1004 { | |
1005 case FORMAT_DEFAULT: | |
867 | 1006 return (Raw_Ichar) itext_ichar (ptr); |
826 | 1007 case FORMAT_16_BIT_FIXED: |
1204 | 1008 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT)); |
867 | 1009 return (Raw_Ichar) (* (UINT_16_BIT *) ptr); |
826 | 1010 case FORMAT_32_BIT_FIXED: |
1204 | 1011 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT)); |
867 | 1012 return (Raw_Ichar) (* (UINT_32_BIT *) ptr); |
826 | 1013 default: |
1014 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); | |
867 | 1015 return (Raw_Ichar) (*ptr); |
826 | 1016 } |
1017 } | |
1018 | |
867 | 1019 /* Store the character CH (an Ichar) as internally-formatted text starting |
826 | 1020 at PTR. Return the number of bytes stored. |
1021 */ | |
1022 | |
1023 DECLARE_INLINE_HEADER ( | |
1024 Bytecount | |
867 | 1025 set_itext_ichar (Ibyte *ptr, Ichar x) |
826 | 1026 ) |
771 | 1027 { |
867 | 1028 return !ichar_multibyte_p (x) ? |
1029 simple_set_itext_ichar (ptr, x) : | |
1030 non_ascii_set_itext_ichar (ptr, x); | |
771 | 1031 } |
1032 | |
867 | 1033 /* Store the character CH (an Ichar) as internally-formatted text of |
826 | 1034 format FMT starting at PTR, which comes from OBJECT. Return the number |
1035 of bytes stored. | |
1036 */ | |
1037 | |
1038 DECLARE_INLINE_HEADER ( | |
1039 Bytecount | |
867 | 1040 set_itext_ichar_fmt (Ibyte *ptr, Ichar x, Internal_Format fmt, |
2286 | 1041 Lisp_Object UNUSED (object)) |
826 | 1042 ) |
771 | 1043 { |
826 | 1044 switch (fmt) |
1045 { | |
1046 case FORMAT_DEFAULT: | |
867 | 1047 return set_itext_ichar (ptr, x); |
826 | 1048 case FORMAT_16_BIT_FIXED: |
867 | 1049 text_checking_assert (ichar_16_bit_fixed_p (x, object)); |
1204 | 1050 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_16_BIT)); |
867 | 1051 * (UINT_16_BIT *) ptr = ichar_to_raw_16_bit_fixed (x, object); |
826 | 1052 return 2; |
1053 case FORMAT_32_BIT_FIXED: | |
1204 | 1054 text_checking_assert ((void *) ptr == ALIGN_PTR (ptr, UINT_32_BIT)); |
867 | 1055 * (UINT_32_BIT *) ptr = ichar_to_raw_32_bit_fixed (x, object); |
826 | 1056 return 4; |
1057 default: | |
1058 text_checking_assert (fmt == FORMAT_8_BIT_FIXED); | |
867 | 1059 text_checking_assert (ichar_8_bit_fixed_p (x, object)); |
1060 *ptr = ichar_to_raw_8_bit_fixed (x, object); | |
826 | 1061 return 1; |
1062 } | |
1063 } | |
1064 | |
1065 /* Retrieve the character pointed to by SRC and store it as | |
1066 internally-formatted text in DST. | |
1067 */ | |
1068 | |
1069 DECLARE_INLINE_HEADER ( | |
1070 Bytecount | |
867 | 1071 itext_copy_ichar (const Ibyte *src, Ibyte *dst) |
826 | 1072 ) |
1073 { | |
1074 return byte_ascii_p (*src) ? | |
867 | 1075 simple_itext_copy_ichar (src, dst) : |
1076 non_ascii_itext_copy_ichar (src, dst); | |
771 | 1077 } |
1078 | |
1079 #else /* not MULE */ | |
1080 | |
867 | 1081 # define itext_ichar(ptr) simple_itext_ichar (ptr) |
1082 # define itext_ichar_fmt(ptr, fmt, object) itext_ichar (ptr) | |
1083 # define itext_ichar_ascii_fmt(ptr, fmt, object) itext_ichar (ptr) | |
1084 # define itext_ichar_raw_fmt(ptr, fmt) itext_ichar (ptr) | |
1085 # define set_itext_ichar(ptr, x) simple_set_itext_ichar (ptr, x) | |
1086 # define set_itext_ichar_fmt(ptr, x, fmt, obj) set_itext_ichar (ptr, x) | |
1087 # define itext_copy_ichar(src, dst) simple_itext_copy_ichar (src, dst) | |
771 | 1088 |
1089 #endif /* not MULE */ | |
1090 | |
826 | 1091 /* Retrieve the character at offset N (in characters) from PTR, as an |
867 | 1092 Ichar. |
826 | 1093 */ |
1094 | |
867 | 1095 #define itext_ichar_n(ptr, offset) \ |
1096 itext_ichar (itext_n_addr (ptr, offset)) | |
771 | 1097 |
1098 | |
1099 /************************************************************************/ | |
1100 /* */ | |
826 | 1101 /* working with Lisp strings */ |
1102 /* */ | |
1103 /************************************************************************/ | |
1104 | |
1105 #define string_char_length(s) \ | |
1106 string_index_byte_to_char (s, XSTRING_LENGTH (s)) | |
1107 #define string_byte(s, i) (XSTRING_DATA (s)[i] + 0) | |
1108 /* In case we ever allow strings to be in a different format ... */ | |
1109 #define set_string_byte(s, i, c) (XSTRING_DATA (s)[i] = (c)) | |
1110 | |
1111 #define ASSERT_VALID_CHAR_STRING_INDEX_UNSAFE(s, x) do { \ | |
1112 text_checking_assert ((x) >= 0 && x <= string_char_length (s)); \ | |
1113 } while (0) | |
1114 | |
1115 #define ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE(s, x) do { \ | |
1116 text_checking_assert ((x) >= 0 && x <= XSTRING_LENGTH (s)); \ | |
867 | 1117 text_checking_assert (valid_ibyteptr_p (string_byte_addr (s, x))); \ |
826 | 1118 } while (0) |
1119 | |
1120 /* Convert offset I in string S to a pointer to text there. */ | |
1121 #define string_byte_addr(s, i) (&(XSTRING_DATA (s)[i])) | |
1122 /* Convert pointer to text in string S into the byte offset to that text. */ | |
1123 #define string_addr_to_byte(s, ptr) ((Bytecount) ((ptr) - XSTRING_DATA (s))) | |
867 | 1124 /* Return the Ichar at *CHARACTER* offset I. */ |
1125 #define string_ichar(s, i) itext_ichar (string_char_addr (s, i)) | |
826 | 1126 |
1127 #ifdef ERROR_CHECK_TEXT | |
1128 #define SLEDGEHAMMER_CHECK_ASCII_BEGIN | |
1129 #endif | |
1130 | |
1131 #ifdef SLEDGEHAMMER_CHECK_ASCII_BEGIN | |
1132 void sledgehammer_check_ascii_begin (Lisp_Object str); | |
1133 #else | |
1134 #define sledgehammer_check_ascii_begin(str) | |
1135 #endif | |
1136 | |
1137 /* Make an alloca'd copy of a Lisp string */ | |
1138 #define LISP_STRING_TO_ALLOCA(s, lval) \ | |
1139 do { \ | |
1315 | 1140 Ibyte **_lta_ = (Ibyte **) &(lval); \ |
826 | 1141 Lisp_Object _lta_2 = (s); \ |
2367 | 1142 *_lta_ = alloca_ibytes (1 + XSTRING_LENGTH (_lta_2)); \ |
826 | 1143 memcpy (*_lta_, XSTRING_DATA (_lta_2), 1 + XSTRING_LENGTH (_lta_2)); \ |
1144 } while (0) | |
1145 | |
1146 void resize_string (Lisp_Object s, Bytecount pos, Bytecount delta); | |
1147 | |
1148 /* Convert a byte index into a string into a char index. */ | |
1149 DECLARE_INLINE_HEADER ( | |
1150 Charcount | |
4853 | 1151 string_index_byte_to_char (Lisp_Object s, Bytecount idx) |
826 | 1152 ) |
1153 { | |
1154 Charcount retval; | |
1155 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, idx); | |
1156 #ifdef MULE | |
1157 if (idx <= (Bytecount) XSTRING_ASCII_BEGIN (s)) | |
1158 retval = (Charcount) idx; | |
1159 else | |
1160 retval = (XSTRING_ASCII_BEGIN (s) + | |
1161 bytecount_to_charcount (XSTRING_DATA (s) + | |
1162 XSTRING_ASCII_BEGIN (s), | |
1163 idx - XSTRING_ASCII_BEGIN (s))); | |
1164 # ifdef SLEDGEHAMMER_CHECK_ASCII_BEGIN | |
1165 assert (retval == bytecount_to_charcount (XSTRING_DATA (s), idx)); | |
1166 # endif | |
1167 #else | |
1168 retval = (Charcount) idx; | |
1169 #endif | |
1170 /* Don't call ASSERT_VALID_CHAR_STRING_INDEX_UNSAFE() here because it will | |
1171 call string_index_byte_to_char(). */ | |
1172 return retval; | |
1173 } | |
1174 | |
1175 /* Convert a char index into a string into a byte index. */ | |
1176 DECLARE_INLINE_HEADER ( | |
1177 Bytecount | |
4853 | 1178 string_index_char_to_byte (Lisp_Object s, Charcount idx) |
826 | 1179 ) |
1180 { | |
1181 Bytecount retval; | |
1182 ASSERT_VALID_CHAR_STRING_INDEX_UNSAFE (s, idx); | |
1183 #ifdef MULE | |
1184 if (idx <= (Charcount) XSTRING_ASCII_BEGIN (s)) | |
1185 retval = (Bytecount) idx; | |
1186 else | |
1187 retval = (XSTRING_ASCII_BEGIN (s) + | |
1188 charcount_to_bytecount (XSTRING_DATA (s) + | |
1189 XSTRING_ASCII_BEGIN (s), | |
1190 idx - XSTRING_ASCII_BEGIN (s))); | |
1191 # ifdef SLEDGEHAMMER_CHECK_ASCII_BEGIN | |
1192 assert (retval == charcount_to_bytecount (XSTRING_DATA (s), idx)); | |
1193 # endif | |
1194 #else | |
1195 retval = (Bytecount) idx; | |
1196 #endif | |
1197 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, retval); | |
1198 return retval; | |
1199 } | |
1200 | |
1201 /* Convert a substring length (starting at byte offset OFF) from bytes to | |
1202 chars. */ | |
1203 DECLARE_INLINE_HEADER ( | |
1204 Charcount | |
4853 | 1205 string_offset_byte_to_char_len (Lisp_Object s, Bytecount off, Bytecount len) |
826 | 1206 ) |
1207 { | |
1208 Charcount retval; | |
1209 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, off); | |
1210 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, off + len); | |
1211 #ifdef MULE | |
1212 if (off + len <= (Bytecount) XSTRING_ASCII_BEGIN (s)) | |
1213 retval = (Charcount) len; | |
1214 else if (off < (Bytecount) XSTRING_ASCII_BEGIN (s)) | |
1215 retval = | |
1216 XSTRING_ASCII_BEGIN (s) - (Charcount) off + | |
1217 bytecount_to_charcount (XSTRING_DATA (s) + XSTRING_ASCII_BEGIN (s), | |
1218 len - (XSTRING_ASCII_BEGIN (s) - off)); | |
1219 else | |
1220 retval = bytecount_to_charcount (XSTRING_DATA (s) + off, len); | |
1221 # ifdef SLEDGEHAMMER_CHECK_ASCII_BEGIN | |
1222 assert (retval == bytecount_to_charcount (XSTRING_DATA (s) + off, len)); | |
1223 # endif | |
1224 #else | |
1225 retval = (Charcount) len; | |
1226 #endif | |
1227 return retval; | |
1228 } | |
1229 | |
1230 /* Convert a substring length (starting at byte offset OFF) from chars to | |
1231 bytes. */ | |
1232 DECLARE_INLINE_HEADER ( | |
1233 Bytecount | |
4853 | 1234 string_offset_char_to_byte_len (Lisp_Object s, Bytecount off, Charcount len) |
826 | 1235 ) |
1236 { | |
1237 Bytecount retval; | |
1238 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, off); | |
1239 #ifdef MULE | |
1240 /* casts to avoid errors from combining Bytecount/Charcount and warnings | |
1241 from signed/unsigned comparisons */ | |
1242 if (off + (Bytecount) len <= (Bytecount) XSTRING_ASCII_BEGIN (s)) | |
1243 retval = (Bytecount) len; | |
1244 else if (off < (Bytecount) XSTRING_ASCII_BEGIN (s)) | |
1245 retval = | |
1246 XSTRING_ASCII_BEGIN (s) - off + | |
1247 charcount_to_bytecount (XSTRING_DATA (s) + XSTRING_ASCII_BEGIN (s), | |
1248 len - (XSTRING_ASCII_BEGIN (s) - | |
1249 (Charcount) off)); | |
1250 else | |
1251 retval = charcount_to_bytecount (XSTRING_DATA (s) + off, len); | |
1252 # ifdef SLEDGEHAMMER_CHECK_ASCII_BEGIN | |
1253 assert (retval == charcount_to_bytecount (XSTRING_DATA (s) + off, len)); | |
1254 # endif | |
1255 #else | |
1256 retval = (Bytecount) len; | |
1257 #endif | |
1258 ASSERT_VALID_BYTE_STRING_INDEX_UNSAFE (s, off + retval); | |
1259 return retval; | |
1260 } | |
1261 | |
1262 DECLARE_INLINE_HEADER ( | |
867 | 1263 const Ibyte * |
826 | 1264 string_char_addr (Lisp_Object s, Charcount idx) |
1265 ) | |
1266 { | |
1267 return XSTRING_DATA (s) + string_index_char_to_byte (s, idx); | |
1268 } | |
1269 | |
1270 /* WARNING: If you modify an existing string, you must call | |
1271 bump_string_modiff() afterwards. */ | |
1272 #ifdef MULE | |
867 | 1273 void set_string_char (Lisp_Object s, Charcount i, Ichar c); |
826 | 1274 #else |
1275 #define set_string_char(s, i, c) set_string_byte (s, i, c) | |
1276 #endif /* not MULE */ | |
1277 | |
1278 /* Return index to character before the one at IDX. */ | |
1279 DECLARE_INLINE_HEADER ( | |
1280 Bytecount | |
1281 prev_string_index (Lisp_Object s, Bytecount idx) | |
1282 ) | |
1283 { | |
867 | 1284 const Ibyte *ptr = string_byte_addr (s, idx); |
1285 DEC_IBYTEPTR (ptr); | |
826 | 1286 return string_addr_to_byte (s, ptr); |
1287 } | |
1288 | |
1289 /* Return index to character after the one at IDX. */ | |
1290 DECLARE_INLINE_HEADER ( | |
1291 Bytecount | |
1292 next_string_index (Lisp_Object s, Bytecount idx) | |
1293 ) | |
1294 { | |
867 | 1295 const Ibyte *ptr = string_byte_addr (s, idx); |
1296 INC_IBYTEPTR (ptr); | |
826 | 1297 return string_addr_to_byte (s, ptr); |
1298 } | |
1299 | |
1300 | |
1301 /************************************************************************/ | |
1302 /* */ | |
771 | 1303 /* working with Eistrings */ |
1304 /* */ | |
1305 /************************************************************************/ | |
1306 | |
1307 /* | |
1308 #### NOTE: This is a work in progress. Neither the API nor especially | |
1309 the implementation is finished. | |
1310 | |
1311 NOTE: An Eistring is a structure that makes it easy to work with | |
1312 internally-formatted strings of data. It provides operations similar | |
1313 in feel to the standard strcpy(), strcat(), strlen(), etc., but | |
1314 | |
1315 (a) it is Mule-correct | |
1316 (b) it does dynamic allocation so you never have to worry about size | |
793 | 1317 restrictions |
851 | 1318 (c) it comes in an ALLOCA() variety (all allocation is stack-local, |
793 | 1319 so there is no need to explicitly clean up) as well as a malloc() |
1320 variety | |
1321 (d) it knows its own length, so it does not suffer from standard null | |
1322 byte brain-damage -- but it null-terminates the data anyway, so | |
1323 it can be passed to standard routines | |
1324 (e) it provides a much more powerful set of operations and knows about | |
771 | 1325 all the standard places where string data might reside: Lisp_Objects, |
867 | 1326 other Eistrings, Ibyte * data with or without an explicit length, |
1327 ASCII strings, Ichars, etc. | |
793 | 1328 (f) it provides easy operations to convert to/from externally-formatted |
1329 data, and is easier to use than the standard TO_INTERNAL_FORMAT | |
771 | 1330 and TO_EXTERNAL_FORMAT macros. (An Eistring can store both the internal |
1331 and external version of its data, but the external version is only | |
1332 initialized or changed when you call eito_external().) | |
1333 | |
793 | 1334 The idea is to make it as easy to write Mule-correct string manipulation |
1335 code as it is to write normal string manipulation code. We also make | |
1336 the API sufficiently general that it can handle multiple internal data | |
1337 formats (e.g. some fixed-width optimizing formats and a default variable | |
1338 width format) and allows for *ANY* data format we might choose in the | |
1339 future for the default format, including UCS2. (In other words, we can't | |
1340 assume that the internal format is ASCII-compatible and we can't assume | |
1341 it doesn't have embedded null bytes. We do assume, however, that any | |
1342 chosen format will have the concept of null-termination.) All of this is | |
1343 hidden from the user. | |
771 | 1344 |
1345 #### It is really too bad that we don't have a real object-oriented | |
1346 language, or at least a language with polymorphism! | |
1347 | |
1348 | |
1349 ********************************************** | |
1350 * Declaration * | |
1351 ********************************************** | |
1352 | |
1353 To declare an Eistring, either put one of the following in the local | |
1354 variable section: | |
1355 | |
1356 DECLARE_EISTRING (name); | |
2367 | 1357 Declare a new Eistring and initialize it to the empy string. This |
1358 is a standard local variable declaration and can go anywhere in the | |
1359 variable declaration section. NAME itself is declared as an | |
1360 Eistring *, and its storage declared on the stack. | |
771 | 1361 |
1362 DECLARE_EISTRING_MALLOC (name); | |
2367 | 1363 Declare and initialize a new Eistring, which uses malloc()ed |
1364 instead of ALLOCA()ed data. This is a standard local variable | |
1365 declaration and can go anywhere in the variable declaration | |
1366 section. Once you initialize the Eistring, you will have to free | |
1367 it using eifree() to avoid memory leaks. You will need to use this | |
1368 form if you are passing an Eistring to any function that modifies | |
1369 it (otherwise, the modified data may be in stack space and get | |
1370 overwritten when the function returns). | |
771 | 1371 |
1372 or use | |
1373 | |
793 | 1374 Eistring ei; |
1375 void eiinit (Eistring *ei); | |
1376 void eiinit_malloc (Eistring *einame); | |
771 | 1377 If you need to put an Eistring elsewhere than in a local variable |
1378 declaration (e.g. in a structure), declare it as shown and then | |
1379 call one of the init macros. | |
1380 | |
1381 Also note: | |
1382 | |
793 | 1383 void eifree (Eistring *ei); |
771 | 1384 If you declared an Eistring to use malloc() to hold its data, |
1385 or converted it to the heap using eito_malloc(), then this | |
1386 releases any data in it and afterwards resets the Eistring | |
1387 using eiinit_malloc(). Otherwise, it just resets the Eistring | |
1388 using eiinit(). | |
1389 | |
1390 | |
1391 ********************************************** | |
1392 * Conventions * | |
1393 ********************************************** | |
1394 | |
1395 - The names of the functions have been chosen, where possible, to | |
1396 match the names of str*() functions in the standard C API. | |
1397 - | |
1398 | |
1399 | |
1400 ********************************************** | |
1401 * Initialization * | |
1402 ********************************************** | |
1403 | |
1404 void eireset (Eistring *eistr); | |
1405 Initialize the Eistring to the empty string. | |
1406 | |
1407 void eicpy_* (Eistring *eistr, ...); | |
1408 Initialize the Eistring from somewhere: | |
1409 | |
1410 void eicpy_ei (Eistring *eistr, Eistring *eistr2); | |
1411 ... from another Eistring. | |
1412 void eicpy_lstr (Eistring *eistr, Lisp_Object lisp_string); | |
1413 ... from a Lisp_Object string. | |
867 | 1414 void eicpy_ch (Eistring *eistr, Ichar ch); |
1415 ... from an Ichar (this can be a conventional C character). | |
771 | 1416 |
1417 void eicpy_lstr_off (Eistring *eistr, Lisp_Object lisp_string, | |
1418 Bytecount off, Charcount charoff, | |
1419 Bytecount len, Charcount charlen); | |
1420 ... from a section of a Lisp_Object string. | |
1421 void eicpy_lbuf (Eistring *eistr, Lisp_Object lisp_buf, | |
1422 Bytecount off, Charcount charoff, | |
1423 Bytecount len, Charcount charlen); | |
1424 ... from a section of a Lisp_Object buffer. | |
867 | 1425 void eicpy_raw (Eistring *eistr, const Ibyte *data, Bytecount len); |
771 | 1426 ... from raw internal-format data in the default internal format. |
867 | 1427 void eicpy_rawz (Eistring *eistr, const Ibyte *data); |
771 | 1428 ... from raw internal-format data in the default internal format |
1429 that is "null-terminated" (the meaning of this depends on the nature | |
1430 of the default internal format). | |
867 | 1431 void eicpy_raw_fmt (Eistring *eistr, const Ibyte *data, Bytecount len, |
826 | 1432 Internal_Format intfmt, Lisp_Object object); |
771 | 1433 ... from raw internal-format data in the specified format. |
867 | 1434 void eicpy_rawz_fmt (Eistring *eistr, const Ibyte *data, |
826 | 1435 Internal_Format intfmt, Lisp_Object object); |
771 | 1436 ... from raw internal-format data in the specified format that is |
1437 "null-terminated" (the meaning of this depends on the nature of | |
1438 the specific format). | |
2421 | 1439 void eicpy_ascii (Eistring *eistr, const Ascbyte *ascstr); |
771 | 1440 ... from an ASCII null-terminated string. Non-ASCII characters in |
2500 | 1441 the string are *ILLEGAL* (read ABORT() with error-checking defined). |
2421 | 1442 void eicpy_ascii_len (Eistring *eistr, const Ascbyte *ascstr, len); |
771 | 1443 ... from an ASCII string, with length specified. Non-ASCII characters |
2500 | 1444 in the string are *ILLEGAL* (read ABORT() with error-checking defined). |
771 | 1445 void eicpy_ext (Eistring *eistr, const Extbyte *extdata, |
1318 | 1446 Lisp_Object codesys); |
771 | 1447 ... from external null-terminated data, with coding system specified. |
1448 void eicpy_ext_len (Eistring *eistr, const Extbyte *extdata, | |
1318 | 1449 Bytecount extlen, Lisp_Object codesys); |
771 | 1450 ... from external data, with length and coding system specified. |
1451 void eicpy_lstream (Eistring *eistr, Lisp_Object lstream); | |
1452 ... from an lstream; reads data till eof. Data must be in default | |
1453 internal format; otherwise, interpose a decoding lstream. | |
1454 | |
1455 | |
1456 ********************************************** | |
1457 * Getting the data out of the Eistring * | |
1458 ********************************************** | |
1459 | |
867 | 1460 Ibyte *eidata (Eistring *eistr); |
771 | 1461 Return a pointer to the raw data in an Eistring. This is NOT |
1462 a copy. | |
1463 | |
1464 Lisp_Object eimake_string (Eistring *eistr); | |
1465 Make a Lisp string out of the Eistring. | |
1466 | |
1467 Lisp_Object eimake_string_off (Eistring *eistr, | |
1468 Bytecount off, Charcount charoff, | |
1469 Bytecount len, Charcount charlen); | |
1470 Make a Lisp string out of a section of the Eistring. | |
1471 | |
867 | 1472 void eicpyout_alloca (Eistring *eistr, LVALUE: Ibyte *ptr_out, |
771 | 1473 LVALUE: Bytecount len_out); |
851 | 1474 Make an ALLOCA() copy of the data in the Eistring, using the |
1475 default internal format. Due to the nature of ALLOCA(), this | |
771 | 1476 must be a macro, with all lvalues passed in as parameters. |
793 | 1477 (More specifically, not all compilers correctly handle using |
851 | 1478 ALLOCA() as the argument to a function call -- GCC on x86 |
1479 didn't used to, for example.) A pointer to the ALLOCA()ed data | |
793 | 1480 is stored in PTR_OUT, and the length of the data (not including |
1481 the terminating zero) is stored in LEN_OUT. | |
771 | 1482 |
867 | 1483 void eicpyout_alloca_fmt (Eistring *eistr, LVALUE: Ibyte *ptr_out, |
771 | 1484 LVALUE: Bytecount len_out, |
826 | 1485 Internal_Format intfmt, Lisp_Object object); |
771 | 1486 Like eicpyout_alloca(), but converts to the specified internal |
1487 format. (No formats other than FORMAT_DEFAULT are currently | |
1488 implemented, and you get an assertion failure if you try.) | |
1489 | |
867 | 1490 Ibyte *eicpyout_malloc (Eistring *eistr, Bytecount *intlen_out); |
771 | 1491 Make a malloc() copy of the data in the Eistring, using the |
1492 default internal format. This is a real function. No lvalues | |
1493 passed in. Returns the new data, and stores the length (not | |
1494 including the terminating zero) using INTLEN_OUT, unless it's | |
1495 a NULL pointer. | |
1496 | |
867 | 1497 Ibyte *eicpyout_malloc_fmt (Eistring *eistr, Internal_Format intfmt, |
826 | 1498 Bytecount *intlen_out, Lisp_Object object); |
771 | 1499 Like eicpyout_malloc(), but converts to the specified internal |
1500 format. (No formats other than FORMAT_DEFAULT are currently | |
1501 implemented, and you get an assertion failure if you try.) | |
1502 | |
1503 | |
1504 ********************************************** | |
1505 * Moving to the heap * | |
1506 ********************************************** | |
1507 | |
1508 void eito_malloc (Eistring *eistr); | |
1509 Move this Eistring to the heap. Its data will be stored in a | |
1510 malloc()ed block rather than the stack. Subsequent changes to | |
1511 this Eistring will realloc() the block as necessary. Use this | |
1512 when you want the Eistring to remain in scope past the end of | |
1513 this function call. You will have to manually free the data | |
1514 in the Eistring using eifree(). | |
1515 | |
1516 void eito_alloca (Eistring *eistr); | |
1517 Move this Eistring back to the stack, if it was moved to the | |
1518 heap with eito_malloc(). This will automatically free any | |
1519 heap-allocated data. | |
1520 | |
1521 | |
1522 | |
1523 ********************************************** | |
1524 * Retrieving the length * | |
1525 ********************************************** | |
1526 | |
1527 Bytecount eilen (Eistring *eistr); | |
1528 Return the length of the internal data, in bytes. See also | |
1529 eiextlen(), below. | |
1530 Charcount eicharlen (Eistring *eistr); | |
1531 Return the length of the internal data, in characters. | |
1532 | |
1533 | |
1534 ********************************************** | |
1535 * Working with positions * | |
1536 ********************************************** | |
1537 | |
1538 Bytecount eicharpos_to_bytepos (Eistring *eistr, Charcount charpos); | |
1539 Convert a char offset to a byte offset. | |
1540 Charcount eibytepos_to_charpos (Eistring *eistr, Bytecount bytepos); | |
1541 Convert a byte offset to a char offset. | |
1542 Bytecount eiincpos (Eistring *eistr, Bytecount bytepos); | |
1543 Increment the given position by one character. | |
1544 Bytecount eiincpos_n (Eistring *eistr, Bytecount bytepos, Charcount n); | |
1545 Increment the given position by N characters. | |
1546 Bytecount eidecpos (Eistring *eistr, Bytecount bytepos); | |
1547 Decrement the given position by one character. | |
1548 Bytecount eidecpos_n (Eistring *eistr, Bytecount bytepos, Charcount n); | |
1549 Deccrement the given position by N characters. | |
1550 | |
1551 | |
1552 ********************************************** | |
1553 * Getting the character at a position * | |
1554 ********************************************** | |
1555 | |
867 | 1556 Ichar eigetch (Eistring *eistr, Bytecount bytepos); |
771 | 1557 Return the character at a particular byte offset. |
867 | 1558 Ichar eigetch_char (Eistring *eistr, Charcount charpos); |
771 | 1559 Return the character at a particular character offset. |
1560 | |
1561 | |
1562 ********************************************** | |
1563 * Setting the character at a position * | |
1564 ********************************************** | |
1565 | |
867 | 1566 Ichar eisetch (Eistring *eistr, Bytecount bytepos, Ichar chr); |
771 | 1567 Set the character at a particular byte offset. |
867 | 1568 Ichar eisetch_char (Eistring *eistr, Charcount charpos, Ichar chr); |
771 | 1569 Set the character at a particular character offset. |
1570 | |
1571 | |
1572 ********************************************** | |
1573 * Concatenation * | |
1574 ********************************************** | |
1575 | |
1576 void eicat_* (Eistring *eistr, ...); | |
1577 Concatenate onto the end of the Eistring, with data coming from the | |
1578 same places as above: | |
1579 | |
1580 void eicat_ei (Eistring *eistr, Eistring *eistr2); | |
1581 ... from another Eistring. | |
2421 | 1582 void eicat_ascii (Eistring *eistr, Ascbyte *ascstr); |
771 | 1583 ... from an ASCII null-terminated string. Non-ASCII characters in |
2500 | 1584 the string are *ILLEGAL* (read ABORT() with error-checking defined). |
867 | 1585 void eicat_raw (ei, const Ibyte *data, Bytecount len); |
771 | 1586 ... from raw internal-format data in the default internal format. |
867 | 1587 void eicat_rawz (ei, const Ibyte *data); |
771 | 1588 ... from raw internal-format data in the default internal format |
1589 that is "null-terminated" (the meaning of this depends on the nature | |
1590 of the default internal format). | |
1591 void eicat_lstr (ei, Lisp_Object lisp_string); | |
1592 ... from a Lisp_Object string. | |
867 | 1593 void eicat_ch (ei, Ichar ch); |
1594 ... from an Ichar. | |
771 | 1595 |
1596 (All except the first variety are convenience functions. | |
1597 In the general case, create another Eistring from the source.) | |
1598 | |
1599 | |
1600 ********************************************** | |
1601 * Replacement * | |
1602 ********************************************** | |
1603 | |
1604 void eisub_* (Eistring *eistr, Bytecount off, Charcount charoff, | |
1605 Bytecount len, Charcount charlen, ...); | |
1606 Replace a section of the Eistring, specifically: | |
1607 | |
1608 void eisub_ei (Eistring *eistr, Bytecount off, Charcount charoff, | |
1609 Bytecount len, Charcount charlen, Eistring *eistr2); | |
1610 ... with another Eistring. | |
2421 | 1611 void eisub_ascii (Eistring *eistr, Bytecount off, Charcount charoff, |
1612 Bytecount len, Charcount charlen, Ascbyte *ascstr); | |
771 | 1613 ... with an ASCII null-terminated string. Non-ASCII characters in |
2500 | 1614 the string are *ILLEGAL* (read ABORT() with error-checking defined). |
771 | 1615 void eisub_ch (Eistring *eistr, Bytecount off, Charcount charoff, |
867 | 1616 Bytecount len, Charcount charlen, Ichar ch); |
1617 ... with an Ichar. | |
771 | 1618 |
1619 void eidel (Eistring *eistr, Bytecount off, Charcount charoff, | |
1620 Bytecount len, Charcount charlen); | |
1621 Delete a section of the Eistring. | |
1622 | |
1623 | |
1624 ********************************************** | |
1625 * Converting to an external format * | |
1626 ********************************************** | |
1627 | |
1318 | 1628 void eito_external (Eistring *eistr, Lisp_Object codesys); |
771 | 1629 Convert the Eistring to an external format and store the result |
1630 in the string. NOTE: Further changes to the Eistring will *NOT* | |
1631 change the external data stored in the string. You will have to | |
1632 call eito_external() again in such a case if you want the external | |
1633 data. | |
1634 | |
1635 Extbyte *eiextdata (Eistring *eistr); | |
1636 Return a pointer to the external data stored in the Eistring as | |
1637 a result of a prior call to eito_external(). | |
1638 | |
1639 Bytecount eiextlen (Eistring *eistr); | |
1640 Return the length in bytes of the external data stored in the | |
1641 Eistring as a result of a prior call to eito_external(). | |
1642 | |
1643 | |
1644 ********************************************** | |
1645 * Searching in the Eistring for a character * | |
1646 ********************************************** | |
1647 | |
867 | 1648 Bytecount eichr (Eistring *eistr, Ichar chr); |
1649 Charcount eichr_char (Eistring *eistr, Ichar chr); | |
1650 Bytecount eichr_off (Eistring *eistr, Ichar chr, Bytecount off, | |
771 | 1651 Charcount charoff); |
867 | 1652 Charcount eichr_off_char (Eistring *eistr, Ichar chr, Bytecount off, |
771 | 1653 Charcount charoff); |
867 | 1654 Bytecount eirchr (Eistring *eistr, Ichar chr); |
1655 Charcount eirchr_char (Eistring *eistr, Ichar chr); | |
1656 Bytecount eirchr_off (Eistring *eistr, Ichar chr, Bytecount off, | |
771 | 1657 Charcount charoff); |
867 | 1658 Charcount eirchr_off_char (Eistring *eistr, Ichar chr, Bytecount off, |
771 | 1659 Charcount charoff); |
1660 | |
1661 | |
1662 ********************************************** | |
1663 * Searching in the Eistring for a string * | |
1664 ********************************************** | |
1665 | |
1666 Bytecount eistr_ei (Eistring *eistr, Eistring *eistr2); | |
1667 Charcount eistr_ei_char (Eistring *eistr, Eistring *eistr2); | |
1668 Bytecount eistr_ei_off (Eistring *eistr, Eistring *eistr2, Bytecount off, | |
1669 Charcount charoff); | |
1670 Charcount eistr_ei_off_char (Eistring *eistr, Eistring *eistr2, | |
1671 Bytecount off, Charcount charoff); | |
1672 Bytecount eirstr_ei (Eistring *eistr, Eistring *eistr2); | |
1673 Charcount eirstr_ei_char (Eistring *eistr, Eistring *eistr2); | |
1674 Bytecount eirstr_ei_off (Eistring *eistr, Eistring *eistr2, Bytecount off, | |
1675 Charcount charoff); | |
1676 Charcount eirstr_ei_off_char (Eistring *eistr, Eistring *eistr2, | |
1677 Bytecount off, Charcount charoff); | |
1678 | |
2421 | 1679 Bytecount eistr_ascii (Eistring *eistr, Ascbyte *ascstr); |
1680 Charcount eistr_ascii_char (Eistring *eistr, Ascbyte *ascstr); | |
1681 Bytecount eistr_ascii_off (Eistring *eistr, Ascbyte *ascstr, Bytecount off, | |
771 | 1682 Charcount charoff); |
2421 | 1683 Charcount eistr_ascii_off_char (Eistring *eistr, Ascbyte *ascstr, |
771 | 1684 Bytecount off, Charcount charoff); |
2421 | 1685 Bytecount eirstr_ascii (Eistring *eistr, Ascbyte *ascstr); |
1686 Charcount eirstr_ascii_char (Eistring *eistr, Ascbyte *ascstr); | |
1687 Bytecount eirstr_ascii_off (Eistring *eistr, Ascbyte *ascstr, | |
771 | 1688 Bytecount off, Charcount charoff); |
2421 | 1689 Charcount eirstr_ascii_off_char (Eistring *eistr, Ascbyte *ascstr, |
771 | 1690 Bytecount off, Charcount charoff); |
1691 | |
1692 | |
1693 ********************************************** | |
1694 * Comparison * | |
1695 ********************************************** | |
1696 | |
1697 int eicmp_* (Eistring *eistr, ...); | |
1698 int eicmp_off_* (Eistring *eistr, Bytecount off, Charcount charoff, | |
1699 Bytecount len, Charcount charlen, ...); | |
1700 int eicasecmp_* (Eistring *eistr, ...); | |
1701 int eicasecmp_off_* (Eistring *eistr, Bytecount off, Charcount charoff, | |
1702 Bytecount len, Charcount charlen, ...); | |
1703 int eicasecmp_i18n_* (Eistring *eistr, ...); | |
1704 int eicasecmp_i18n_off_* (Eistring *eistr, Bytecount off, Charcount charoff, | |
1705 Bytecount len, Charcount charlen, ...); | |
1706 | |
1707 Compare the Eistring with the other data. Return value same as | |
1708 from strcmp. The `*' is either `ei' for another Eistring (in | |
1709 which case `...' is an Eistring), or `c' for a pure-ASCII string | |
1710 (in which case `...' is a pointer to that string). For anything | |
1711 more complex, first create an Eistring out of the source. | |
1712 Comparison is either simple (`eicmp_...'), ASCII case-folding | |
1713 (`eicasecmp_...'), or multilingual case-folding | |
1714 (`eicasecmp_i18n_...). | |
1715 | |
1716 | |
1717 More specifically, the prototypes are: | |
1718 | |
1719 int eicmp_ei (Eistring *eistr, Eistring *eistr2); | |
1720 int eicmp_off_ei (Eistring *eistr, Bytecount off, Charcount charoff, | |
1721 Bytecount len, Charcount charlen, Eistring *eistr2); | |
1722 int eicasecmp_ei (Eistring *eistr, Eistring *eistr2); | |
1723 int eicasecmp_off_ei (Eistring *eistr, Bytecount off, Charcount charoff, | |
1724 Bytecount len, Charcount charlen, Eistring *eistr2); | |
1725 int eicasecmp_i18n_ei (Eistring *eistr, Eistring *eistr2); | |
1726 int eicasecmp_i18n_off_ei (Eistring *eistr, Bytecount off, | |
1727 Charcount charoff, Bytecount len, | |
1728 Charcount charlen, Eistring *eistr2); | |
1729 | |
2421 | 1730 int eicmp_ascii (Eistring *eistr, Ascbyte *ascstr); |
1731 int eicmp_off_ascii (Eistring *eistr, Bytecount off, Charcount charoff, | |
1732 Bytecount len, Charcount charlen, Ascbyte *ascstr); | |
1733 int eicasecmp_ascii (Eistring *eistr, Ascbyte *ascstr); | |
1734 int eicasecmp_off_ascii (Eistring *eistr, Bytecount off, Charcount charoff, | |
771 | 1735 Bytecount len, Charcount charlen, |
2421 | 1736 Ascbyte *ascstr); |
1737 int eicasecmp_i18n_ascii (Eistring *eistr, Ascbyte *ascstr); | |
1738 int eicasecmp_i18n_off_ascii (Eistring *eistr, Bytecount off, Charcount charoff, | |
771 | 1739 Bytecount len, Charcount charlen, |
2421 | 1740 Ascbyte *ascstr); |
771 | 1741 |
1742 | |
1743 ********************************************** | |
1744 * Case-changing the Eistring * | |
1745 ********************************************** | |
1746 | |
1747 void eilwr (Eistring *eistr); | |
1748 Convert all characters in the Eistring to lowercase. | |
1749 void eiupr (Eistring *eistr); | |
1750 Convert all characters in the Eistring to uppercase. | |
1751 */ | |
1752 | |
1753 | |
1754 /* Principles for writing Eistring functions: | |
1755 | |
1756 (1) Unfortunately, we have to write most of the Eistring functions | |
851 | 1757 as macros, because of the use of ALLOCA(). The principle used |
771 | 1758 below to assure no conflict in local variables is to prefix all |
1759 local variables with "ei" plus a number, which should be unique | |
1760 among macros. In practice, when finding a new number, find the | |
1761 highest so far used, and add 1. | |
1762 | |
1763 (2) We also suffix the Eistring fields with an _ to avoid problems | |
1764 with macro parameters of the same name. (And as the standard | |
1765 signal not to access these fields directly.) | |
1766 | |
1767 (3) We maintain both the length in bytes and chars of the data in | |
1768 the Eistring at all times, for convenient retrieval by outside | |
1769 functions. That means when writing functions that manipulate | |
1770 Eistrings, you too need to keep both lengths up to date for all | |
1771 data that you work with. | |
1772 | |
1773 (4) When writing a new type of operation (e.g. substitution), you | |
1774 will often find yourself working with outside data, and thus | |
1775 have a series of related API's, for different forms that the | |
1776 outside data is in. Generally, you will want to choose a | |
1777 subset of the forms supported by eicpy_*, which has to be | |
1778 totally general because that's the fundamental way to get data | |
1779 into an Eistring, and once the data is into the string, it | |
1780 would be to create a whole series of Ei operations that work on | |
1781 nothing but Eistrings. Although theoretically nice, in | |
1782 practice it's a hassle, so we suggest that you provide | |
1783 convenience functions. In particular, there are two paths you | |
1784 can take. One is minimalist -- it only allows other Eistrings | |
867 | 1785 and ASCII data, and Ichars if the particular operation makes |
771 | 1786 sense with a character. The other provides interfaces for the |
1787 most commonly-used forms -- Eistring, ASCII data, Lisp string, | |
1788 raw internal-format string with length, raw internal-format | |
867 | 1789 string without, and possibly Ichar. (In the function names, |
771 | 1790 these are designated `ei', `c', `lstr', `raw', `rawz', and |
1791 `ch', respectively.) | |
1792 | |
1793 (5) When coding a new type of operation, such as was discussed in | |
1794 previous section, the correct approach is to declare an worker | |
1795 function that does the work of everything, and is called by the | |
1796 other "container" macros that handle the different outside data | |
1797 forms. The data coming into the worker function, which | |
1798 typically ends in `_1', is in the form of three parameters: | |
1799 DATA, LEN, CHARLEN. (See point [3] about having two lengths and | |
1800 keeping them in sync.) | |
1801 | |
1802 (6) Handling argument evaluation in macros: We take great care | |
1803 never to evaluate any argument more than once in any macro, | |
1804 except the initial Eistring parameter. This can and will be | |
1805 evaluated multiple times, but it should pretty much always just | |
1806 be a simple variable. This means, for example, that if an | |
1807 Eistring is the second (not first) argument of a macro, it | |
1808 doesn't fall under the "initial Eistring" exemption, so it | |
1809 needs protection against multi-evaluation. (Take the address of | |
1810 the Eistring structure, store in a temporary variable, and use | |
1811 temporary variable for all access to the Eistring. | |
1812 Essentially, we want it to appear as if these Eistring macros | |
1813 are functions -- we would like to declare them as functions but | |
851 | 1814 they use ALLOCA(), so we can't (and we can't make them inline |
1815 functions either -- ALLOCA() is explicitly disallowed in inline | |
771 | 1816 functions.) |
1817 | |
1818 (7) Note that our rules regarding multiple evaluation are *more* | |
1819 strict than the rules listed above under the heading "working | |
1820 with raw internal-format data". | |
1821 */ | |
1822 | |
1823 | |
1824 /* ----- Declaration ----- */ | |
1825 | |
1826 typedef struct | |
1827 { | |
1828 /* Data for the Eistring, stored in the default internal format. | |
1829 Always includes terminating null. */ | |
867 | 1830 Ibyte *data_; |
771 | 1831 /* Total number of bytes allocated in DATA (including null). */ |
1832 Bytecount max_size_allocated_; | |
1833 Bytecount bytelen_; | |
1834 Charcount charlen_; | |
1835 int mallocp_; | |
1836 | |
1837 Extbyte *extdata_; | |
1838 Bytecount extlen_; | |
1839 } Eistring; | |
1840 | |
1841 extern Eistring the_eistring_zero_init, the_eistring_malloc_zero_init; | |
1842 | |
1843 #define DECLARE_EISTRING(name) \ | |
1844 Eistring __ ## name ## __storage__ = the_eistring_zero_init; \ | |
1845 Eistring *name = & __ ## name ## __storage__ | |
1846 #define DECLARE_EISTRING_MALLOC(name) \ | |
1847 Eistring __ ## name ## __storage__ = the_eistring_malloc_zero_init; \ | |
1848 Eistring *name = & __ ## name ## __storage__ | |
1849 | |
1850 #define eiinit(ei) \ | |
1851 do { \ | |
793 | 1852 *(ei) = the_eistring_zero_init; \ |
771 | 1853 } while (0) |
1854 | |
1855 #define eiinit_malloc(ei) \ | |
1856 do { \ | |
793 | 1857 *(ei) = the_eistring_malloc_zero_init; \ |
771 | 1858 } while (0) |
1859 | |
1860 | |
1861 /* ----- Utility ----- */ | |
1862 | |
1863 /* Make sure both LEN and CHARLEN are specified, in case one is given | |
1864 as -1. PTR evaluated at most once, others multiply. */ | |
1865 #define eifixup_bytechar(ptr, len, charlen) \ | |
1866 do { \ | |
1867 if ((len) == -1) \ | |
1868 (len) = charcount_to_bytecount (ptr, charlen); \ | |
1869 else if ((charlen) == -1) \ | |
1870 (charlen) = bytecount_to_charcount (ptr, len); \ | |
1871 } while (0) | |
1872 | |
1873 /* Make sure LEN is specified, in case it's is given as -1. PTR | |
1874 evaluated at most once, others multiply. */ | |
1875 #define eifixup_byte(ptr, len, charlen) \ | |
1876 do { \ | |
1877 if ((len) == -1) \ | |
1878 (len) = charcount_to_bytecount (ptr, charlen); \ | |
1879 } while (0) | |
1880 | |
1881 /* Make sure CHARLEN is specified, in case it's is given as -1. PTR | |
1882 evaluated at most once, others multiply. */ | |
1883 #define eifixup_char(ptr, len, charlen) \ | |
1884 do { \ | |
1885 if ((charlen) == -1) \ | |
1886 (charlen) = bytecount_to_charcount (ptr, len); \ | |
1887 } while (0) | |
1888 | |
1889 | |
1890 | |
1891 /* Make sure we can hold NEWBYTELEN bytes (which is NEWCHARLEN chars) | |
1892 plus a zero terminator. Preserve existing data as much as possible, | |
1893 including existing zero terminator. Put a new zero terminator where it | |
1894 should go if NEWZ if non-zero. All args but EI are evalled only once. */ | |
1895 | |
5026
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1896 #define EI_ALLOC(ei, newbytelen, newcharlen, newz) \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1897 do { \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1898 int ei1oldeibytelen = (ei)->bytelen_; \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1899 \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1900 (ei)->charlen_ = (newcharlen); \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1901 (ei)->bytelen_ = (newbytelen); \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1902 \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1903 if (ei1oldeibytelen != (ei)->bytelen_) \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1904 { \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1905 int ei1newsize = (ei)->max_size_allocated_; \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1906 while (ei1newsize < (ei)->bytelen_ + 1) \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1907 { \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1908 ei1newsize = (int) (ei1newsize * 1.5); \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1909 if (ei1newsize < 32) \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1910 ei1newsize = 32; \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1911 } \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1912 if (ei1newsize != (ei)->max_size_allocated_) \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1913 { \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1914 if ((ei)->mallocp_) \ |
771 | 1915 /* xrealloc always preserves existing data as much as possible */ \ |
5026
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1916 (ei)->data_ = (Ibyte *) xrealloc ((ei)->data_, ei1newsize); \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1917 else \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1918 { \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1919 /* We don't have realloc, so ALLOCA() more space and copy the \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1920 data into it. */ \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1921 Ibyte *ei1oldeidata = (ei)->data_; \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1922 (ei)->data_ = alloca_ibytes (ei1newsize); \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1923 if (ei1oldeidata) \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1924 memcpy ((ei)->data_, ei1oldeidata, ei1oldeibytelen + 1); \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1925 } \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1926 (ei)->max_size_allocated_ = ei1newsize; \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1927 } \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1928 if (newz) \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1929 (ei)->data_[(ei)->bytelen_] = '\0'; \ |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
1930 } \ |
771 | 1931 } while (0) |
1932 | |
1933 #define EI_ALLOC_AND_COPY(ei, data, bytelen, charlen) \ | |
1934 do { \ | |
1935 EI_ALLOC (ei, bytelen, charlen, 1); \ | |
1936 memcpy ((ei)->data_, data, (ei)->bytelen_); \ | |
1937 } while (0) | |
1938 | |
1939 /* ----- Initialization ----- */ | |
1940 | |
1941 #define eicpy_ei(ei, eicpy) \ | |
1942 do { \ | |
1943 const Eistring *ei2 = (eicpy); \ | |
1944 EI_ALLOC_AND_COPY (ei, ei2->data_, ei2->bytelen_, ei2->charlen_); \ | |
1945 } while (0) | |
1946 | |
1947 #define eicpy_lstr(ei, lisp_string) \ | |
1948 do { \ | |
1949 Lisp_Object ei3 = (lisp_string); \ | |
1950 EI_ALLOC_AND_COPY (ei, XSTRING_DATA (ei3), XSTRING_LENGTH (ei3), \ | |
1333 | 1951 string_char_length (ei3)); \ |
771 | 1952 } while (0) |
1953 | |
1954 #define eicpy_lstr_off(ei, lisp_string, off, charoff, len, charlen) \ | |
1955 do { \ | |
1956 Lisp_Object ei23lstr = (lisp_string); \ | |
1957 int ei23off = (off); \ | |
1958 int ei23charoff = (charoff); \ | |
1959 int ei23len = (len); \ | |
1960 int ei23charlen = (charlen); \ | |
867 | 1961 const Ibyte *ei23data = XSTRING_DATA (ei23lstr); \ |
771 | 1962 \ |
1963 int ei23oldbytelen = (ei)->bytelen_; \ | |
1964 \ | |
1965 eifixup_byte (ei23data, ei23off, ei23charoff); \ | |
1966 eifixup_bytechar (ei23data + ei23off, ei23len, ei23charlen); \ | |
1967 \ | |
1968 EI_ALLOC_AND_COPY (ei, ei23data + ei23off, ei23len, ei23charlen); \ | |
1969 } while (0) | |
1970 | |
826 | 1971 #define eicpy_raw_fmt(ei, ptr, len, fmt, object) \ |
771 | 1972 do { \ |
1333 | 1973 const Ibyte *ei12ptr = (ptr); \ |
771 | 1974 Internal_Format ei12fmt = (fmt); \ |
1975 int ei12len = (len); \ | |
1976 assert (ei12fmt == FORMAT_DEFAULT); \ | |
1977 EI_ALLOC_AND_COPY (ei, ei12ptr, ei12len, \ | |
1978 bytecount_to_charcount (ei12ptr, ei12len)); \ | |
1979 } while (0) | |
1980 | |
826 | 1981 #define eicpy_raw(ei, ptr, len) \ |
1982 eicpy_raw_fmt (ei, ptr, len, FORMAT_DEFAULT, Qnil) | |
1983 | |
1984 #define eicpy_rawz_fmt(ei, ptr, fmt, object) \ | |
1985 do { \ | |
867 | 1986 const Ibyte *ei12p1ptr = (ptr); \ |
826 | 1987 Internal_Format ei12p1fmt = (fmt); \ |
1988 assert (ei12p1fmt == FORMAT_DEFAULT); \ | |
1989 eicpy_raw_fmt (ei, ei12p1ptr, qxestrlen (ei12p1ptr), fmt, object); \ | |
771 | 1990 } while (0) |
1991 | |
826 | 1992 #define eicpy_rawz(ei, ptr) eicpy_rawz_fmt (ei, ptr, FORMAT_DEFAULT, Qnil) |
771 | 1993 |
1333 | 1994 #define eicpy_ch(ei, ch) \ |
1995 do { \ | |
867 | 1996 Ibyte ei12p2[MAX_ICHAR_LEN]; \ |
1997 Bytecount ei12p2len = set_itext_ichar (ei12p2, ch); \ | |
1333 | 1998 EI_ALLOC_AND_COPY (ei, ei12p2, ei12p2len, 1); \ |
771 | 1999 } while (0) |
2000 | |
2421 | 2001 #define eicpy_ascii(ei, ascstr) \ |
771 | 2002 do { \ |
2421 | 2003 const Ascbyte *ei4 = (ascstr); \ |
771 | 2004 \ |
2367 | 2005 ASSERT_ASCTEXT_ASCII (ei4); \ |
771 | 2006 eicpy_ext (ei, ei4, Qbinary); \ |
2007 } while (0) | |
2008 | |
2421 | 2009 #define eicpy_ascii_len(ei, ascstr, c_len) \ |
771 | 2010 do { \ |
2421 | 2011 const Ascbyte *ei6 = (ascstr); \ |
771 | 2012 int ei6len = (c_len); \ |
2013 \ | |
2367 | 2014 ASSERT_ASCTEXT_ASCII_LEN (ei6, ei6len); \ |
771 | 2015 eicpy_ext_len (ei, ei6, ei6len, Qbinary); \ |
2016 } while (0) | |
2017 | |
4981
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
2018 #define eicpy_ext_len(ei, extdata, extlen, codesys) \ |
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
2019 do { \ |
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
2020 const Extbyte *ei7 = (extdata); \ |
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
2021 int ei7len = (extlen); \ |
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
2022 \ |
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
2023 TO_INTERNAL_FORMAT (DATA, (ei7, ei7len), \ |
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
2024 ALLOCA, ((ei)->data_, (ei)->bytelen_), \ |
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
2025 codesys); \ |
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
2026 (ei)->max_size_allocated_ = (ei)->bytelen_ + 1; \ |
771 | 2027 (ei)->charlen_ = bytecount_to_charcount ((ei)->data_, (ei)->bytelen_); \ |
2028 } while (0) | |
2029 | |
1318 | 2030 #define eicpy_ext(ei, extdata, codesys) \ |
2031 do { \ | |
2032 const Extbyte *ei8 = (extdata); \ | |
2033 \ | |
2034 eicpy_ext_len (ei, ei8, dfc_external_data_len (ei8, codesys), \ | |
2035 codesys); \ | |
771 | 2036 } while (0) |
2037 | |
2038 #define eicpy_lbuf(eistr, lisp_buf, off, charoff, len, charlen) \ | |
2039 NOT YET IMPLEMENTED | |
2040 | |
2041 #define eicpy_lstream(eistr, lstream) \ | |
2042 NOT YET IMPLEMENTED | |
2043 | |
867 | 2044 #define eireset(eistr) eicpy_rawz (eistr, (Ibyte *) "") |
771 | 2045 |
2046 /* ----- Getting the data out of the Eistring ----- */ | |
2047 | |
2048 #define eidata(ei) ((ei)->data_) | |
2049 | |
2050 #define eimake_string(ei) make_string (eidata (ei), eilen (ei)) | |
2051 | |
2052 #define eimake_string_off(eistr, off, charoff, len, charlen) \ | |
2053 do { \ | |
2054 Lisp_Object ei24lstr; \ | |
2055 int ei24off = (off); \ | |
2056 int ei24charoff = (charoff); \ | |
2057 int ei24len = (len); \ | |
2058 int ei24charlen = (charlen); \ | |
2059 \ | |
2060 eifixup_byte ((eistr)->data_, ei24off, ei24charoff); \ | |
2061 eifixup_byte ((eistr)->data_ + ei24off, ei24len, ei24charlen); \ | |
2062 \ | |
2063 return make_string ((eistr)->data_ + ei24off, ei24len); \ | |
2064 } while (0) | |
2065 | |
2066 #define eicpyout_alloca(eistr, ptrout, lenout) \ | |
826 | 2067 eicpyout_alloca_fmt (eistr, ptrout, lenout, FORMAT_DEFAULT, Qnil) |
771 | 2068 #define eicpyout_malloc(eistr, lenout) \ |
826 | 2069 eicpyout_malloc_fmt (eistr, lenout, FORMAT_DEFAULT, Qnil) |
867 | 2070 Ibyte *eicpyout_malloc_fmt (Eistring *eistr, Bytecount *len_out, |
826 | 2071 Internal_Format fmt, Lisp_Object object); |
2072 #define eicpyout_alloca_fmt(eistr, ptrout, lenout, fmt, object) \ | |
771 | 2073 do { \ |
2074 Internal_Format ei23fmt = (fmt); \ | |
867 | 2075 Ibyte *ei23ptrout = &(ptrout); \ |
771 | 2076 Bytecount *ei23lenout = &(lenout); \ |
2077 \ | |
2078 assert (ei23fmt == FORMAT_DEFAULT); \ | |
2079 \ | |
2080 *ei23lenout = (eistr)->bytelen_; \ | |
5026
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2081 *ei23ptrout = alloca_ibytes ((eistr)->bytelen_ + 1); \ |
771 | 2082 memcpy (*ei23ptrout, (eistr)->data_, (eistr)->bytelen_ + 1); \ |
2083 } while (0) | |
2084 | |
2085 /* ----- Moving to the heap ----- */ | |
2086 | |
2087 #define eifree(ei) \ | |
2088 do { \ | |
2089 if ((ei)->mallocp_) \ | |
2090 { \ | |
2091 if ((ei)->data_) \ | |
5169
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5092
diff
changeset
|
2092 { \ |
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5092
diff
changeset
|
2093 xfree ((ei)->data_); \ |
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5092
diff
changeset
|
2094 (ei)->data_ = 0; \ |
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5092
diff
changeset
|
2095 } \ |
771 | 2096 if ((ei)->extdata_) \ |
5169
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5092
diff
changeset
|
2097 { \ |
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5092
diff
changeset
|
2098 xfree ((ei)->extdata_); \ |
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5092
diff
changeset
|
2099 (ei)->extdata_ = 0; \ |
6c6d78781d59
cleanup of code related to xfree(), better KKCC backtrace capabilities, document XD_INLINE_LISP_OBJECT_BLOCK_PTR, fix some memory leaks, other code cleanup
Ben Wing <ben@xemacs.org>
parents:
5092
diff
changeset
|
2100 } \ |
771 | 2101 eiinit_malloc (ei); \ |
2102 } \ | |
2103 else \ | |
2104 eiinit (ei); \ | |
2105 } while (0) | |
2106 | |
2107 int eifind_large_enough_buffer (int oldbufsize, int needed_size); | |
2108 void eito_malloc_1 (Eistring *ei); | |
2109 | |
2110 #define eito_malloc(ei) eito_malloc_1 (ei) | |
2111 | |
2112 #define eito_alloca(ei) \ | |
2113 do { \ | |
2114 if (!(ei)->mallocp_) \ | |
2115 return; \ | |
2116 (ei)->mallocp_ = 0; \ | |
2117 if ((ei)->data_) \ | |
2118 { \ | |
867 | 2119 Ibyte *ei13newdata; \ |
771 | 2120 \ |
2121 (ei)->max_size_allocated_ = \ | |
2122 eifind_large_enough_buffer (0, (ei)->bytelen_ + 1); \ | |
2367 | 2123 ei13newdata = alloca_ibytes ((ei)->max_size_allocated_); \ |
771 | 2124 memcpy (ei13newdata, (ei)->data_, (ei)->bytelen_ + 1); \ |
5026
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2125 xfree ((ei)->data_); \ |
771 | 2126 (ei)->data_ = ei13newdata; \ |
2127 } \ | |
2128 \ | |
2129 if ((ei)->extdata_) \ | |
2130 { \ | |
2367 | 2131 Extbyte *ei13newdata = alloca_extbytes ((ei)->extlen_ + 2); \ |
771 | 2132 \ |
2133 memcpy (ei13newdata, (ei)->extdata_, (ei)->extlen_); \ | |
2134 /* Double null-terminate in case of Unicode data */ \ | |
2135 ei13newdata[(ei)->extlen_] = '\0'; \ | |
2136 ei13newdata[(ei)->extlen_ + 1] = '\0'; \ | |
5026
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2137 xfree ((ei)->extdata_); \ |
771 | 2138 (ei)->extdata_ = ei13newdata; \ |
2139 } \ | |
2140 } while (0) | |
2141 | |
2142 | |
2143 /* ----- Retrieving the length ----- */ | |
2144 | |
2145 #define eilen(ei) ((ei)->bytelen_) | |
2146 #define eicharlen(ei) ((ei)->charlen_) | |
2147 | |
2148 | |
2149 /* ----- Working with positions ----- */ | |
2150 | |
2151 #define eicharpos_to_bytepos(ei, charpos) \ | |
2152 charcount_to_bytecount ((ei)->data_, charpos) | |
2153 #define eibytepos_to_charpos(ei, bytepos) \ | |
2154 bytecount_to_charcount ((ei)->data_, bytepos) | |
2155 | |
2156 DECLARE_INLINE_HEADER (Bytecount eiincpos_1 (Eistring *eistr, | |
2157 Bytecount bytepos, | |
2158 Charcount n)) | |
2159 { | |
867 | 2160 Ibyte *pos = eistr->data_ + bytepos; |
814 | 2161 Charcount i; |
771 | 2162 |
800 | 2163 text_checking_assert (bytepos >= 0 && bytepos <= eistr->bytelen_); |
2164 text_checking_assert (n >= 0 && n <= eistr->charlen_); | |
771 | 2165 /* We could check N more correctly now, but that would require a |
2166 call to bytecount_to_charcount(), which would be needlessly | |
2167 expensive (it would convert O(N) algorithms into O(N^2) algorithms | |
800 | 2168 with ERROR_CHECK_TEXT, which would be bad). If N is bad, we are |
867 | 2169 guaranteed to catch it either inside INC_IBYTEPTR() or in the check |
771 | 2170 below. */ |
2171 for (i = 0; i < n; i++) | |
867 | 2172 INC_IBYTEPTR (pos); |
800 | 2173 text_checking_assert (pos - eistr->data_ <= eistr->bytelen_); |
771 | 2174 return pos - eistr->data_; |
2175 } | |
2176 | |
2177 #define eiincpos (ei, bytepos) eiincpos_1 (ei, bytepos, 1) | |
2178 #define eiincpos_n (ei, bytepos, n) eiincpos_1 (ei, bytepos, n) | |
2179 | |
2180 DECLARE_INLINE_HEADER (Bytecount eidecpos_1 (Eistring *eistr, | |
2181 Bytecount bytepos, | |
2182 Charcount n)) | |
2183 { | |
867 | 2184 Ibyte *pos = eistr->data_ + bytepos; |
771 | 2185 int i; |
2186 | |
800 | 2187 text_checking_assert (bytepos >= 0 && bytepos <= eistr->bytelen_); |
2188 text_checking_assert (n >= 0 && n <= eistr->charlen_); | |
771 | 2189 /* We could check N more correctly now, but ... see above. */ |
2190 for (i = 0; i < n; i++) | |
867 | 2191 DEC_IBYTEPTR (pos); |
800 | 2192 text_checking_assert (pos - eistr->data_ <= eistr->bytelen_); |
771 | 2193 return pos - eistr->data_; |
2194 } | |
2195 | |
2196 #define eidecpos (ei, bytepos) eidecpos_1 (ei, bytepos, 1) | |
2197 #define eidecpos_n (ei, bytepos, n) eidecpos_1 (ei, bytepos, n) | |
2198 | |
2199 | |
2200 /* ----- Getting the character at a position ----- */ | |
2201 | |
2202 #define eigetch(ei, bytepos) \ | |
867 | 2203 itext_ichar ((ei)->data_ + (bytepos)) |
2204 #define eigetch_char(ei, charpos) itext_ichar_n ((ei)->data_, charpos) | |
771 | 2205 |
2206 | |
2207 /* ----- Setting the character at a position ----- */ | |
2208 | |
2209 #define eisetch(ei, bytepos, chr) \ | |
2210 eisub_ch (ei, bytepos, -1, -1, 1, chr) | |
2211 #define eisetch_char(ei, charpos, chr) \ | |
2212 eisub_ch (ei, -1, charpos, -1, 1, chr) | |
2213 | |
2214 | |
2215 /* ----- Concatenation ----- */ | |
2216 | |
2217 #define eicat_1(ei, data, bytelen, charlen) \ | |
2218 do { \ | |
2219 int ei14oldeibytelen = (ei)->bytelen_; \ | |
2220 int ei14bytelen = (bytelen); \ | |
2221 EI_ALLOC (ei, (ei)->bytelen_ + ei14bytelen, \ | |
2222 (ei)->charlen_ + (charlen), 1); \ | |
2223 memcpy ((ei)->data_ + ei14oldeibytelen, (data), \ | |
2224 ei14bytelen); \ | |
2225 } while (0) | |
2226 | |
2227 #define eicat_ei(ei, ei2) \ | |
2228 do { \ | |
2229 const Eistring *ei9 = (ei2); \ | |
2230 eicat_1 (ei, ei9->data_, ei9->bytelen_, ei9->charlen_); \ | |
2231 } while (0) | |
2232 | |
2421 | 2233 #define eicat_ascii(ei, ascstr) \ |
771 | 2234 do { \ |
2421 | 2235 const Ascbyte *ei15 = (ascstr); \ |
771 | 2236 int ei15len = strlen (ei15); \ |
2237 \ | |
2367 | 2238 ASSERT_ASCTEXT_ASCII_LEN (ei15, ei15len); \ |
771 | 2239 eicat_1 (ei, ei15, ei15len, \ |
867 | 2240 bytecount_to_charcount ((Ibyte *) ei15, ei15len)); \ |
771 | 2241 } while (0) |
2242 | |
2243 #define eicat_raw(ei, data, len) \ | |
2244 do { \ | |
2245 int ei16len = (len); \ | |
867 | 2246 const Ibyte *ei16data = (data); \ |
771 | 2247 eicat_1 (ei, ei16data, ei16len, \ |
2248 bytecount_to_charcount (ei16data, ei16len)); \ | |
2249 } while (0) | |
2250 | |
2251 #define eicat_rawz(ei, ptr) \ | |
2252 do { \ | |
867 | 2253 const Ibyte *ei16p5ptr = (ptr); \ |
771 | 2254 eicat_raw (ei, ei16p5ptr, qxestrlen (ei16p5ptr)); \ |
2255 } while (0) | |
2256 | |
2257 #define eicat_lstr(ei, lisp_string) \ | |
2258 do { \ | |
2259 Lisp_Object ei17 = (lisp_string); \ | |
2260 eicat_1 (ei, XSTRING_DATA (ei17), XSTRING_LENGTH (ei17), \ | |
826 | 2261 string_char_length (ei17)); \ |
771 | 2262 } while (0) |
2263 | |
2264 #define eicat_ch(ei, ch) \ | |
2265 do { \ | |
1333 | 2266 Ibyte ei22ch[MAX_ICHAR_LEN]; \ |
867 | 2267 Bytecount ei22len = set_itext_ichar (ei22ch, ch); \ |
771 | 2268 eicat_1 (ei, ei22ch, ei22len, 1); \ |
2269 } while (0) | |
2270 | |
2271 | |
2272 /* ----- Replacement ----- */ | |
2273 | |
2274 /* Replace the section of an Eistring at (OFF, LEN) with the data at | |
2275 SRC of length LEN. All positions have corresponding character values, | |
2276 and either can be -1 -- it will be computed from the other. */ | |
2277 | |
2278 #define eisub_1(ei, off, charoff, len, charlen, src, srclen, srccharlen) \ | |
2279 do { \ | |
2280 int ei18off = (off); \ | |
2281 int ei18charoff = (charoff); \ | |
2282 int ei18len = (len); \ | |
2283 int ei18charlen = (charlen); \ | |
867 | 2284 Ibyte *ei18src = (Ibyte *) (src); \ |
771 | 2285 int ei18srclen = (srclen); \ |
2286 int ei18srccharlen = (srccharlen); \ | |
2287 \ | |
2288 int ei18oldeibytelen = (ei)->bytelen_; \ | |
2289 \ | |
2290 eifixup_bytechar ((ei)->data_, ei18off, ei18charoff); \ | |
2291 eifixup_bytechar ((ei)->data_ + ei18off, ei18len, ei18charlen); \ | |
2292 eifixup_bytechar (ei18src, ei18srclen, ei18srccharlen); \ | |
2293 \ | |
2294 EI_ALLOC (ei, (ei)->bytelen_ + ei18srclen - ei18len, \ | |
2295 (ei)->charlen_ + ei18srccharlen - ei18charlen, 0); \ | |
2296 if (ei18len != ei18srclen) \ | |
2297 memmove ((ei)->data_ + ei18off + ei18srclen, \ | |
2298 (ei)->data_ + ei18off + ei18len, \ | |
2299 /* include zero terminator. */ \ | |
2300 ei18oldeibytelen - (ei18off + ei18len) + 1); \ | |
2301 if (ei18srclen > 0) \ | |
2302 memcpy ((ei)->data_ + ei18off, ei18src, ei18srclen); \ | |
2303 } while (0) | |
2304 | |
2305 #define eisub_ei(ei, off, charoff, len, charlen, ei2) \ | |
2306 do { \ | |
1333 | 2307 const Eistring *ei19 = (ei2); \ |
771 | 2308 eisub_1 (ei, off, charoff, len, charlen, ei19->data_, ei19->bytelen_, \ |
2309 ei19->charlen_); \ | |
2310 } while (0) | |
2311 | |
2421 | 2312 #define eisub_ascii(ei, off, charoff, len, charlen, ascstr) \ |
771 | 2313 do { \ |
2421 | 2314 const Ascbyte *ei20 = (ascstr); \ |
771 | 2315 int ei20len = strlen (ei20); \ |
2367 | 2316 ASSERT_ASCTEXT_ASCII_LEN (ei20, ei20len); \ |
771 | 2317 eisub_1 (ei, off, charoff, len, charlen, ei20, ei20len, -1); \ |
2318 } while (0) | |
2319 | |
2320 #define eisub_ch(ei, off, charoff, len, charlen, ch) \ | |
2321 do { \ | |
1333 | 2322 Ibyte ei21ch[MAX_ICHAR_LEN]; \ |
867 | 2323 Bytecount ei21len = set_itext_ichar (ei21ch, ch); \ |
771 | 2324 eisub_1 (ei, off, charoff, len, charlen, ei21ch, ei21len, 1); \ |
2325 } while (0) | |
2326 | |
2327 #define eidel(ei, off, charoff, len, charlen) \ | |
2328 eisub_1(ei, off, charoff, len, charlen, NULL, 0, 0) | |
2329 | |
2330 | |
2331 /* ----- Converting to an external format ----- */ | |
2332 | |
1333 | 2333 #define eito_external(ei, codesys) \ |
771 | 2334 do { \ |
2335 if ((ei)->mallocp_) \ | |
2336 { \ | |
2337 if ((ei)->extdata_) \ | |
2338 { \ | |
5026
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2339 xfree ((ei)->extdata_); \ |
771 | 2340 (ei)->extdata_ = 0; \ |
2341 } \ | |
2342 TO_EXTERNAL_FORMAT (DATA, ((ei)->data_, (ei)->bytelen_), \ | |
2343 MALLOC, ((ei)->extdata_, (ei)->extlen_), \ | |
1333 | 2344 codesys); \ |
771 | 2345 } \ |
2346 else \ | |
2347 TO_EXTERNAL_FORMAT (DATA, ((ei)->data_, (ei)->bytelen_), \ | |
2348 ALLOCA, ((ei)->extdata_, (ei)->extlen_), \ | |
1318 | 2349 codesys); \ |
771 | 2350 } while (0) |
2351 | |
2352 #define eiextdata(ei) ((ei)->extdata_) | |
2353 #define eiextlen(ei) ((ei)->extlen_) | |
2354 | |
2355 | |
2356 /* ----- Searching in the Eistring for a character ----- */ | |
2357 | |
2358 #define eichr(eistr, chr) \ | |
2359 NOT YET IMPLEMENTED | |
2360 #define eichr_char(eistr, chr) \ | |
2361 NOT YET IMPLEMENTED | |
2362 #define eichr_off(eistr, chr, off, charoff) \ | |
2363 NOT YET IMPLEMENTED | |
2364 #define eichr_off_char(eistr, chr, off, charoff) \ | |
2365 NOT YET IMPLEMENTED | |
2366 #define eirchr(eistr, chr) \ | |
2367 NOT YET IMPLEMENTED | |
2368 #define eirchr_char(eistr, chr) \ | |
2369 NOT YET IMPLEMENTED | |
2370 #define eirchr_off(eistr, chr, off, charoff) \ | |
2371 NOT YET IMPLEMENTED | |
2372 #define eirchr_off_char(eistr, chr, off, charoff) \ | |
2373 NOT YET IMPLEMENTED | |
2374 | |
2375 | |
2376 /* ----- Searching in the Eistring for a string ----- */ | |
2377 | |
2378 #define eistr_ei(eistr, eistr2) \ | |
2379 NOT YET IMPLEMENTED | |
2380 #define eistr_ei_char(eistr, eistr2) \ | |
2381 NOT YET IMPLEMENTED | |
2382 #define eistr_ei_off(eistr, eistr2, off, charoff) \ | |
2383 NOT YET IMPLEMENTED | |
2384 #define eistr_ei_off_char(eistr, eistr2, off, charoff) \ | |
2385 NOT YET IMPLEMENTED | |
2386 #define eirstr_ei(eistr, eistr2) \ | |
2387 NOT YET IMPLEMENTED | |
2388 #define eirstr_ei_char(eistr, eistr2) \ | |
2389 NOT YET IMPLEMENTED | |
2390 #define eirstr_ei_off(eistr, eistr2, off, charoff) \ | |
2391 NOT YET IMPLEMENTED | |
2392 #define eirstr_ei_off_char(eistr, eistr2, off, charoff) \ | |
2393 NOT YET IMPLEMENTED | |
2394 | |
2421 | 2395 #define eistr_ascii(eistr, ascstr) \ |
771 | 2396 NOT YET IMPLEMENTED |
2421 | 2397 #define eistr_ascii_char(eistr, ascstr) \ |
771 | 2398 NOT YET IMPLEMENTED |
2421 | 2399 #define eistr_ascii_off(eistr, ascstr, off, charoff) \ |
771 | 2400 NOT YET IMPLEMENTED |
2421 | 2401 #define eistr_ascii_off_char(eistr, ascstr, off, charoff) \ |
771 | 2402 NOT YET IMPLEMENTED |
2421 | 2403 #define eirstr_ascii(eistr, ascstr) \ |
771 | 2404 NOT YET IMPLEMENTED |
2421 | 2405 #define eirstr_ascii_char(eistr, ascstr) \ |
771 | 2406 NOT YET IMPLEMENTED |
2421 | 2407 #define eirstr_ascii_off(eistr, ascstr, off, charoff) \ |
771 | 2408 NOT YET IMPLEMENTED |
2421 | 2409 #define eirstr_ascii_off_char(eistr, ascstr, off, charoff) \ |
771 | 2410 NOT YET IMPLEMENTED |
2411 | |
2412 | |
2413 /* ----- Comparison ----- */ | |
2414 | |
2415 int eicmp_1 (Eistring *ei, Bytecount off, Charcount charoff, | |
867 | 2416 Bytecount len, Charcount charlen, const Ibyte *data, |
2526 | 2417 const Eistring *ei2, int is_ascii, int fold_case); |
771 | 2418 |
2419 #define eicmp_ei(eistr, eistr2) \ | |
2420 eicmp_1 (eistr, 0, -1, -1, -1, 0, eistr2, 0, 0) | |
2421 #define eicmp_off_ei(eistr, off, charoff, len, charlen, eistr2) \ | |
2422 eicmp_1 (eistr, off, charoff, len, charlen, 0, eistr2, 0, 0) | |
2423 #define eicasecmp_ei(eistr, eistr2) \ | |
2424 eicmp_1 (eistr, 0, -1, -1, -1, 0, eistr2, 0, 1) | |
2425 #define eicasecmp_off_ei(eistr, off, charoff, len, charlen, eistr2) \ | |
2426 eicmp_1 (eistr, off, charoff, len, charlen, 0, eistr2, 0, 1) | |
2427 #define eicasecmp_i18n_ei(eistr, eistr2) \ | |
2428 eicmp_1 (eistr, 0, -1, -1, -1, 0, eistr2, 0, 2) | |
2429 #define eicasecmp_i18n_off_ei(eistr, off, charoff, len, charlen, eistr2) \ | |
2430 eicmp_1 (eistr, off, charoff, len, charlen, 0, eistr2, 0, 2) | |
2431 | |
2421 | 2432 #define eicmp_ascii(eistr, ascstr) \ |
2433 eicmp_1 (eistr, 0, -1, -1, -1, (const Ibyte *) ascstr, 0, 1, 0) | |
2434 #define eicmp_off_ascii(eistr, off, charoff, len, charlen, ascstr) \ | |
2435 eicmp_1 (eistr, off, charoff, len, charlen, (const Ibyte *) ascstr, 0, 1, 0) | |
2436 #define eicasecmp_ascii(eistr, ascstr) \ | |
2437 eicmp_1 (eistr, 0, -1, -1, -1, (const Ibyte *) ascstr, 0, 1, 1) | |
2438 #define eicasecmp_off_ascii(eistr, off, charoff, len, charlen, ascstr) \ | |
2439 eicmp_1 (eistr, off, charoff, len, charlen, (const Ibyte *) ascstr, 0, 1, 1) | |
2440 #define eicasecmp_i18n_ascii(eistr, ascstr) \ | |
2441 eicmp_1 (eistr, 0, -1, -1, -1, (const Ibyte *) ascstr, 0, 1, 2) | |
2442 #define eicasecmp_i18n_off_ascii(eistr, off, charoff, len, charlen, ascstr) \ | |
2443 eicmp_1 (eistr, off, charoff, len, charlen, (const Ibyte *) ascstr, 0, 1, 2) | |
771 | 2444 |
2445 | |
2446 /* ----- Case-changing the Eistring ----- */ | |
2447 | |
867 | 2448 int eistr_casefiddle_1 (Ibyte *olddata, Bytecount len, Ibyte *newdata, |
771 | 2449 int downp); |
2450 | |
2451 #define EI_CASECHANGE(ei, downp) \ | |
2452 do { \ | |
867 | 2453 int ei11new_allocmax = (ei)->charlen_ * MAX_ICHAR_LEN + 1; \ |
1333 | 2454 Ibyte *ei11storage = \ |
2367 | 2455 (Ibyte *) alloca_ibytes (ei11new_allocmax); \ |
771 | 2456 int ei11newlen = eistr_casefiddle_1 ((ei)->data_, (ei)->bytelen_, \ |
2457 ei11storage, downp); \ | |
2458 \ | |
2459 if (ei11newlen) \ | |
2460 { \ | |
2461 (ei)->max_size_allocated_ = ei11new_allocmax; \ | |
1333 | 2462 (ei)->data_ = ei11storage; \ |
771 | 2463 (ei)->bytelen_ = ei11newlen; \ |
2464 /* charlen is the same. */ \ | |
2465 } \ | |
2466 } while (0) | |
2467 | |
2468 #define eilwr(ei) EI_CASECHANGE (ei, 1) | |
2469 #define eiupr(ei) EI_CASECHANGE (ei, 0) | |
2470 | |
1743 | 2471 END_C_DECLS |
1650 | 2472 |
771 | 2473 |
2474 /************************************************************************/ | |
2475 /* */ | |
2476 /* Converting between internal and external format */ | |
2477 /* */ | |
2478 /************************************************************************/ | |
2479 /* | |
1318 | 2480 The macros below are used for converting data between different formats. |
2481 Generally, the data is textual, and the formats are related to | |
2482 internationalization (e.g. converting between internal-format text and | |
2483 UTF-8) -- but the mechanism is general, and could be used for anything, | |
2484 e.g. decoding gzipped data. | |
2485 | |
2486 In general, conversion involves a source of data, a sink, the existing | |
2487 format of the source data, and the desired format of the sink. The | |
2488 macros below, however, always require that either the source or sink is | |
2489 internal-format text. Therefore, in practice the conversions below | |
2490 involve source, sink, an external format (specified by a coding system), | |
2491 and the direction of conversion (internal->external or vice-versa). | |
2492 | |
2493 Sources and sinks can be raw data (sized or unsized -- when unsized, | |
2494 input data is assumed to be null-terminated [double null-terminated for | |
2495 Unicode-format data], and on output the length is not stored anywhere), | |
2496 Lisp strings, Lisp buffers, lstreams, and opaque data objects. When the | |
2497 output is raw data, the result can be allocated either with alloca() or | |
2498 malloc(). (There is currently no provision for writing into a fixed | |
2499 buffer. If you want this, use alloca() output and then copy the data -- | |
2500 but be careful with the size! Unless you are very sure of the encoding | |
2501 being used, upper bounds for the size are not in general computable.) | |
2502 The obvious restrictions on source and sink types apply (e.g. Lisp | |
2503 strings are a source and sink only for internal data). | |
2504 | |
2505 All raw data outputted will contain an extra null byte (two bytes for | |
2506 Unicode -- currently, in fact, all output data, whether internal or | |
2507 external, is double-null-terminated, but you can't count on this; see | |
2508 below). This means that enough space is allocated to contain the extra | |
2509 nulls; however, these nulls are not reflected in the returned output | |
2510 size. | |
2511 | |
2512 The most basic macros are TO_EXTERNAL_FORMAT and TO_INTERNAL_FORMAT. | |
2513 These can be used to convert between any kinds of sources or sinks. | |
2514 However, 99% of conversions involve raw data or Lisp strings as both | |
2515 source and sink, and usually data is output as alloca() rather than | |
2516 malloc(). For this reason, convenience macros are defined for many types | |
5026
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2517 of conversions involving raw data and/or Lisp strings, when the output is |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2518 an alloca()ed or malloc()ed string. (When the destination is a |
1318 | 2519 Lisp_String, there are other functions that should be used instead -- |
5026
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2520 build_extstring() and make_extstring(), for example.) In general, the |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2521 convenience macros return their result as a return value, even if the |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2522 result is an alloca()ed string -- some trickery is required to do this, |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2523 but it's definitely possible. However, for macros whose result is a |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2524 "sized string" (i.e. a string plus a length), there are two values to |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2525 return, and both are returned through parameters. |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2526 |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2527 The convenience macros have the form: |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2528 |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2529 (a) (SIZED_)?EXTERNAL_TO_ITEXT(_MALLOC)? |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2530 (b) (ITEXT|LISP_STRING)_TO_(SIZED_)?EXTERNAL(_MALLOC)? |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2531 |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2532 Note also that there are some additional, more specific macros defined |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2533 elsewhere, for example macros like EXTERNAL_TO_TSTR() in syswindows.h for |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2534 conversions that specifically involve the `mswindows-tstr' coding system |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2535 (which is normally an alias of `mswindows-unicode', a variation of |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2536 UTF-16). |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2537 |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2538 Convenience macros of type (a) are for conversion from external to |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2539 internal, while type (b) macros convert internal to external. A few |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2540 notes: |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2541 |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2542 -- The output is an alloca()ed string unless `_MALLOC' is appended, |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2543 in which case it's a malloc()ed string. |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2544 -- When the destination says ITEXT, it means internally-formatted text of |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2545 type `Ibyte *' (which boils down to `unsigned char *'). |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2546 -- When the destination says EXTERNAL, it means externally-formatted |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2547 text of type `Extbyte *' (which boils down to `char *'). Because |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2548 `Ibyte *' and `Extbyte *' are different underlying types, accidentally |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2549 mixing them will generally lead to a warning under gcc, and an error |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2550 under g++. |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2551 -- When SIZED_EXTERNAL is involved, there are two parameters, one for |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2552 the string and one for its length. When SIZED_EXTERNAL is the |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2553 destination, these two parameters should be lvalues and will have the |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2554 result stored into them. |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2555 -- There is no LISP_STRING destination; use `build_extstring' instead of |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2556 `EXTERNAL_TO_LISP_STRING' and `make_extstring' instead of |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2557 `SIZED_EXTERNAL_TO_LISP_STRING'. |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2558 -- There is no SIZED_ITEXT type. If you need this: First, if your data |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2559 is coming from a Lisp string, it would be better to use the |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2560 LISP_STRING_TO_* macros. If this doesn't apply or work, call the |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2561 TO_EXTERNAL_FORMAT() or TO_INTERNAL_FORMAT() macros directly. |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2562 |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2563 Note that previously the convenience macros, like the raw TO_*_FORMAT |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2564 macros, were always written to store their arguments into a passed-in |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2565 lvalue rather than return them, due to major bugs in calling alloca() |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2566 inside of a function call on x86 gcc circa version 2.6. This has |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2567 apparently long since been fixed, but just to make sure we have a |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2568 `configure' test for broken alloca() in function calls, and in such case |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2569 the portable xemacs_c_alloca() implementation is substituted instead. |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2570 Note that this implementation actually uses malloc() but notes the stack |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2571 pointer at the time of allocation, and at next call any allocations |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2572 belonging to inner stack frames are freed. This isn't perfect but |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2573 more-or-less gets the job done as an emergency backup, and in most |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2574 circumstances it prevents arbitrary memory leakage -- at most you should |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2575 get a fixed amount of leakage. |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2576 |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2577 NOTE: All convenience macros are ultimately defined in terms of |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2578 TO_EXTERNAL_FORMAT and TO_INTERNAL_FORMAT. Thus, any comments below |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2579 about the workings of these macros also apply to all convenience macros. |
1318 | 2580 |
2581 TO_EXTERNAL_FORMAT (source_type, source, sink_type, sink, codesys) | |
2582 TO_INTERNAL_FORMAT (source_type, source, sink_type, sink, codesys) | |
771 | 2583 |
2584 Typical use is | |
2585 | |
2367 | 2586 TO_EXTERNAL_FORMAT (LISP_STRING, str, C_STRING_MALLOC, ptr, Qfile_name); |
2587 | |
2588 which means that the contents of the lisp string `str' are written | |
2589 to a malloc'ed memory area which will be pointed to by `ptr', after the | |
2590 function returns. The conversion will be done using the `file-name' | |
2591 coding system (which will be controlled by the user indirectly by | |
2592 setting or binding the variable `file-name-coding-system'). | |
2593 | |
2594 Some sources and sinks require two C variables to specify. We use | |
2595 some preprocessor magic to allow different source and sink types, and | |
2596 even different numbers of arguments to specify different types of | |
2597 sources and sinks. | |
2598 | |
2599 So we can have a call that looks like | |
2600 | |
2601 TO_INTERNAL_FORMAT (DATA, (ptr, len), | |
2602 MALLOC, (ptr, len), | |
2603 coding_system); | |
2604 | |
2605 The parenthesized argument pairs are required to make the | |
2606 preprocessor magic work. | |
771 | 2607 |
2608 NOTE: GC is inhibited during the entire operation of these macros. This | |
2609 is because frequently the data to be converted comes from strings but | |
2610 gets passed in as just DATA, and GC may move around the string data. If | |
2611 we didn't inhibit GC, there'd have to be a lot of messy recoding, | |
2612 alloca-copying of strings and other annoying stuff. | |
2613 | |
2614 The source or sink can be specified in one of these ways: | |
2615 | |
2616 DATA, (ptr, len), // input data is a fixed buffer of size len | |
851 | 2617 ALLOCA, (ptr, len), // output data is in a ALLOCA()ed buffer of size len |
771 | 2618 MALLOC, (ptr, len), // output data is in a malloc()ed buffer of size len |
2619 C_STRING_ALLOCA, ptr, // equivalent to ALLOCA (ptr, len_ignored) on output | |
2620 C_STRING_MALLOC, ptr, // equivalent to MALLOC (ptr, len_ignored) on output | |
2621 C_STRING, ptr, // equivalent to DATA, (ptr, strlen/wcslen (ptr)) | |
2622 // on input (the Unicode version is used when correct) | |
2623 LISP_STRING, string, // input or output is a Lisp_Object of type string | |
2624 LISP_BUFFER, buffer, // output is written to (point) in lisp buffer | |
2625 LISP_LSTREAM, lstream, // input or output is a Lisp_Object of type lstream | |
2626 LISP_OPAQUE, object, // input or output is a Lisp_Object of type opaque | |
2627 | |
2628 When specifying the sink, use lvalues, since the macro will assign to them, | |
2629 except when the sink is an lstream or a lisp buffer. | |
2630 | |
2367 | 2631 For the sink types `ALLOCA' and `C_STRING_ALLOCA', the resulting text is |
2632 stored in a stack-allocated buffer, which is automatically freed on | |
2633 returning from the function. However, the sink types `MALLOC' and | |
2634 `C_STRING_MALLOC' return `xmalloc()'ed memory. The caller is responsible | |
2635 for freeing this memory using `xfree()'. | |
2636 | |
771 | 2637 The macros accept the kinds of sources and sinks appropriate for |
2638 internal and external data representation. See the type_checking_assert | |
2639 macros below for the actual allowed types. | |
2640 | |
2641 Since some sources and sinks use one argument (a Lisp_Object) to | |
2642 specify them, while others take a (pointer, length) pair, we use | |
2643 some C preprocessor trickery to allow pair arguments to be specified | |
2644 by parenthesizing them, as in the examples above. | |
2645 | |
2646 Anything prefixed by dfc_ (`data format conversion') is private. | |
2647 They are only used to implement these macros. | |
2648 | |
5026
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2649 Using C_STRING* is appropriate for data that comes from or is going to |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2650 an external API that takes null-terminated strings, or when the string is |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2651 always intended to contain text and never binary data, e.g. file names. |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2652 Any time we are dealing with binary or general data, we must be '\0'-clean, |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2653 i.e. allow arbitrary data which might contain embedded '\0', by tracking |
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2654 both pointer and length. |
771 | 2655 |
2656 There is no problem using the same lvalue for source and sink. | |
2657 | |
2658 Also, when pointers are required, the code (currently at least) is | |
2659 lax and allows any pointer types, either in the source or the sink. | |
2660 This makes it possible, e.g., to deal with internal format data held | |
2661 in char *'s or external format data held in WCHAR * (i.e. Unicode). | |
2662 | |
2663 Finally, whenever storage allocation is called for, extra space is | |
2664 allocated for a terminating zero, and such a zero is stored in the | |
2665 appropriate place, regardless of whether the source data was | |
2666 specified using a length or was specified as zero-terminated. This | |
2667 allows you to freely pass the resulting data, no matter how | |
2668 obtained, to a routine that expects zero termination (modulo, of | |
2669 course, that any embedded zeros in the resulting text will cause | |
2670 truncation). In fact, currently two embedded zeros are allocated | |
2671 and stored after the data result. This is to allow for the | |
2672 possibility of storing a Unicode value on output, which needs the | |
2673 two zeros. Currently, however, the two zeros are stored regardless | |
2674 of whether the conversion is internal or external and regardless of | |
2675 whether the external coding system is in fact Unicode. This | |
2676 behavior may change in the future, and you cannot rely on this -- | |
2677 the most you can rely on is that sink data in Unicode format will | |
2678 have two terminating nulls, which combine to form one Unicode null | |
2367 | 2679 character. |
5026
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2680 */ |
771 | 2681 |
2682 #define TO_EXTERNAL_FORMAT(source_type, source, sink_type, sink, codesys) \ | |
2683 do { \ | |
2684 dfc_conversion_type dfc_simplified_source_type; \ | |
2685 dfc_conversion_type dfc_simplified_sink_type; \ | |
2686 dfc_conversion_data dfc_source; \ | |
2687 dfc_conversion_data dfc_sink; \ | |
2688 Lisp_Object dfc_codesys = (codesys); \ | |
2689 \ | |
2690 type_checking_assert \ | |
2691 ((DFC_TYPE_##source_type == DFC_TYPE_DATA || \ | |
2692 DFC_TYPE_##source_type == DFC_TYPE_C_STRING || \ | |
2693 DFC_TYPE_##source_type == DFC_TYPE_LISP_STRING || \ | |
2694 DFC_TYPE_##source_type == DFC_TYPE_LISP_OPAQUE || \ | |
2695 DFC_TYPE_##source_type == DFC_TYPE_LISP_LSTREAM) \ | |
2696 && \ | |
2697 (DFC_TYPE_##sink_type == DFC_TYPE_ALLOCA || \ | |
2698 DFC_TYPE_##sink_type == DFC_TYPE_MALLOC || \ | |
2699 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_ALLOCA || \ | |
2700 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_MALLOC || \ | |
2701 DFC_TYPE_##sink_type == DFC_TYPE_LISP_LSTREAM || \ | |
2702 DFC_TYPE_##sink_type == DFC_TYPE_LISP_OPAQUE)); \ | |
2703 \ | |
2704 DFC_EXT_SOURCE_##source_type##_TO_ARGS (source, dfc_codesys); \ | |
2705 DFC_SINK_##sink_type##_TO_ARGS (sink); \ | |
2706 \ | |
2707 dfc_convert_to_external_format (dfc_simplified_source_type, &dfc_source, \ | |
2708 dfc_codesys, \ | |
2709 dfc_simplified_sink_type, &dfc_sink); \ | |
2710 \ | |
2711 DFC_##sink_type##_USE_CONVERTED_DATA (sink); \ | |
2712 } while (0) | |
2713 | |
2714 #define TO_INTERNAL_FORMAT(source_type, source, sink_type, sink, codesys) \ | |
2715 do { \ | |
2716 dfc_conversion_type dfc_simplified_source_type; \ | |
2717 dfc_conversion_type dfc_simplified_sink_type; \ | |
2718 dfc_conversion_data dfc_source; \ | |
2719 dfc_conversion_data dfc_sink; \ | |
2720 Lisp_Object dfc_codesys = (codesys); \ | |
2721 \ | |
2722 type_checking_assert \ | |
2723 ((DFC_TYPE_##source_type == DFC_TYPE_DATA || \ | |
2724 DFC_TYPE_##source_type == DFC_TYPE_C_STRING || \ | |
2725 DFC_TYPE_##source_type == DFC_TYPE_LISP_OPAQUE || \ | |
2726 DFC_TYPE_##source_type == DFC_TYPE_LISP_LSTREAM) \ | |
2727 && \ | |
2728 (DFC_TYPE_##sink_type == DFC_TYPE_ALLOCA || \ | |
2729 DFC_TYPE_##sink_type == DFC_TYPE_MALLOC || \ | |
2730 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_ALLOCA || \ | |
2731 DFC_TYPE_##sink_type == DFC_TYPE_C_STRING_MALLOC || \ | |
2732 DFC_TYPE_##sink_type == DFC_TYPE_LISP_STRING || \ | |
2733 DFC_TYPE_##sink_type == DFC_TYPE_LISP_LSTREAM || \ | |
2734 DFC_TYPE_##sink_type == DFC_TYPE_LISP_BUFFER)); \ | |
2735 \ | |
2736 DFC_INT_SOURCE_##source_type##_TO_ARGS (source, dfc_codesys); \ | |
2737 DFC_SINK_##sink_type##_TO_ARGS (sink); \ | |
2738 \ | |
2739 dfc_convert_to_internal_format (dfc_simplified_source_type, &dfc_source, \ | |
2740 dfc_codesys, \ | |
2741 dfc_simplified_sink_type, &dfc_sink); \ | |
2742 \ | |
2743 DFC_##sink_type##_USE_CONVERTED_DATA (sink); \ | |
2744 } while (0) | |
2745 | |
814 | 2746 #ifdef __cplusplus |
771 | 2747 |
814 | 2748 /* Error if you try to use a union here: "member `struct {anonymous |
2749 union}::{anonymous} {anonymous union}::data' with constructor not allowed | |
2750 in union" (Bytecount is a class) */ | |
2751 | |
2752 typedef struct | |
2753 #else | |
771 | 2754 typedef union |
814 | 2755 #endif |
771 | 2756 { |
2757 struct { const void *ptr; Bytecount len; } data; | |
2758 Lisp_Object lisp_object; | |
2759 } dfc_conversion_data; | |
2760 | |
2761 enum dfc_conversion_type | |
2762 { | |
2763 DFC_TYPE_DATA, | |
2764 DFC_TYPE_ALLOCA, | |
2765 DFC_TYPE_MALLOC, | |
2766 DFC_TYPE_C_STRING, | |
2767 DFC_TYPE_C_STRING_ALLOCA, | |
2768 DFC_TYPE_C_STRING_MALLOC, | |
2769 DFC_TYPE_LISP_STRING, | |
2770 DFC_TYPE_LISP_LSTREAM, | |
2771 DFC_TYPE_LISP_OPAQUE, | |
2772 DFC_TYPE_LISP_BUFFER | |
2773 }; | |
2774 typedef enum dfc_conversion_type dfc_conversion_type; | |
2775 | |
1743 | 2776 BEGIN_C_DECLS |
1650 | 2777 |
771 | 2778 /* WARNING: These use a static buffer. This can lead to disaster if |
2779 these functions are not used *very* carefully. Another reason to only use | |
2780 TO_EXTERNAL_FORMAT() and TO_INTERNAL_FORMAT(). */ | |
1632 | 2781 MODULE_API void |
771 | 2782 dfc_convert_to_external_format (dfc_conversion_type source_type, |
2783 dfc_conversion_data *source, | |
1318 | 2784 Lisp_Object codesys, |
771 | 2785 dfc_conversion_type sink_type, |
2786 dfc_conversion_data *sink); | |
1632 | 2787 MODULE_API void |
771 | 2788 dfc_convert_to_internal_format (dfc_conversion_type source_type, |
2789 dfc_conversion_data *source, | |
1318 | 2790 Lisp_Object codesys, |
771 | 2791 dfc_conversion_type sink_type, |
2792 dfc_conversion_data *sink); | |
2793 /* CPP Trickery */ | |
2794 #define DFC_CPP_CAR(x,y) (x) | |
2795 #define DFC_CPP_CDR(x,y) (y) | |
2796 | |
2797 /* Convert `source' to args for dfc_convert_to_external_format() */ | |
2798 #define DFC_EXT_SOURCE_DATA_TO_ARGS(val, codesys) do { \ | |
2799 dfc_source.data.ptr = DFC_CPP_CAR val; \ | |
2800 dfc_source.data.len = DFC_CPP_CDR val; \ | |
2801 dfc_simplified_source_type = DFC_TYPE_DATA; \ | |
2802 } while (0) | |
2803 #define DFC_EXT_SOURCE_C_STRING_TO_ARGS(val, codesys) do { \ | |
2804 dfc_source.data.len = \ | |
2805 strlen ((char *) (dfc_source.data.ptr = (val))); \ | |
2806 dfc_simplified_source_type = DFC_TYPE_DATA; \ | |
2807 } while (0) | |
2808 #define DFC_EXT_SOURCE_LISP_STRING_TO_ARGS(val, codesys) do { \ | |
2809 Lisp_Object dfc_slsta = (val); \ | |
2810 type_checking_assert (STRINGP (dfc_slsta)); \ | |
2811 dfc_source.lisp_object = dfc_slsta; \ | |
2812 dfc_simplified_source_type = DFC_TYPE_LISP_STRING; \ | |
2813 } while (0) | |
2814 #define DFC_EXT_SOURCE_LISP_LSTREAM_TO_ARGS(val, codesys) do { \ | |
2815 Lisp_Object dfc_sllta = (val); \ | |
2816 type_checking_assert (LSTREAMP (dfc_sllta)); \ | |
2817 dfc_source.lisp_object = dfc_sllta; \ | |
2818 dfc_simplified_source_type = DFC_TYPE_LISP_LSTREAM; \ | |
2819 } while (0) | |
2820 #define DFC_EXT_SOURCE_LISP_OPAQUE_TO_ARGS(val, codesys) do { \ | |
2821 Lisp_Opaque *dfc_slota = XOPAQUE (val); \ | |
2822 dfc_source.data.ptr = OPAQUE_DATA (dfc_slota); \ | |
2823 dfc_source.data.len = OPAQUE_SIZE (dfc_slota); \ | |
2824 dfc_simplified_source_type = DFC_TYPE_DATA; \ | |
2825 } while (0) | |
2826 | |
2827 /* Convert `source' to args for dfc_convert_to_internal_format() */ | |
2828 #define DFC_INT_SOURCE_DATA_TO_ARGS(val, codesys) \ | |
2829 DFC_EXT_SOURCE_DATA_TO_ARGS (val, codesys) | |
2830 #define DFC_INT_SOURCE_C_STRING_TO_ARGS(val, codesys) do { \ | |
2831 dfc_source.data.len = dfc_external_data_len (dfc_source.data.ptr = (val), \ | |
2832 codesys); \ | |
2833 dfc_simplified_source_type = DFC_TYPE_DATA; \ | |
2834 } while (0) | |
2835 #define DFC_INT_SOURCE_LISP_STRING_TO_ARGS(val, codesys) \ | |
2836 DFC_EXT_SOURCE_LISP_STRING_TO_ARGS (val, codesys) | |
2837 #define DFC_INT_SOURCE_LISP_LSTREAM_TO_ARGS(val, codesys) \ | |
2838 DFC_EXT_SOURCE_LISP_LSTREAM_TO_ARGS (val, codesys) | |
2839 #define DFC_INT_SOURCE_LISP_OPAQUE_TO_ARGS(val, codesys) \ | |
2840 DFC_EXT_SOURCE_LISP_OPAQUE_TO_ARGS (val, codesys) | |
2841 | |
2842 /* Convert `sink' to args for dfc_convert_to_*_format() */ | |
2843 #define DFC_SINK_ALLOCA_TO_ARGS(val) \ | |
2844 dfc_simplified_sink_type = DFC_TYPE_DATA | |
2845 #define DFC_SINK_C_STRING_ALLOCA_TO_ARGS(val) \ | |
2846 dfc_simplified_sink_type = DFC_TYPE_DATA | |
2847 #define DFC_SINK_MALLOC_TO_ARGS(val) \ | |
2848 dfc_simplified_sink_type = DFC_TYPE_DATA | |
2849 #define DFC_SINK_C_STRING_MALLOC_TO_ARGS(val) \ | |
2850 dfc_simplified_sink_type = DFC_TYPE_DATA | |
2851 #define DFC_SINK_LISP_STRING_TO_ARGS(val) \ | |
2852 dfc_simplified_sink_type = DFC_TYPE_DATA | |
2853 #define DFC_SINK_LISP_OPAQUE_TO_ARGS(val) \ | |
2854 dfc_simplified_sink_type = DFC_TYPE_DATA | |
2855 #define DFC_SINK_LISP_LSTREAM_TO_ARGS(val) do { \ | |
2856 Lisp_Object dfc_sllta = (val); \ | |
2857 type_checking_assert (LSTREAMP (dfc_sllta)); \ | |
2858 dfc_sink.lisp_object = dfc_sllta; \ | |
2859 dfc_simplified_sink_type = DFC_TYPE_LISP_LSTREAM; \ | |
2860 } while (0) | |
2861 #define DFC_SINK_LISP_BUFFER_TO_ARGS(val) do { \ | |
2862 struct buffer *dfc_slbta = XBUFFER (val); \ | |
2863 dfc_sink.lisp_object = \ | |
2864 make_lisp_buffer_output_stream \ | |
2865 (dfc_slbta, BUF_PT (dfc_slbta), 0); \ | |
2866 dfc_simplified_sink_type = DFC_TYPE_LISP_LSTREAM; \ | |
2867 } while (0) | |
2868 | |
2869 /* Assign to the `sink' lvalue(s) using the converted data. */ | |
2870 /* + 2 because we double zero-extended to account for Unicode conversion */ | |
2871 typedef union { char c; void *p; } *dfc_aliasing_voidpp; | |
2872 #define DFC_ALLOCA_USE_CONVERTED_DATA(sink) do { \ | |
851 | 2873 void * dfc_sink_ret = ALLOCA (dfc_sink.data.len + 2); \ |
771 | 2874 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2); \ |
2367 | 2875 VOIDP_CAST (DFC_CPP_CAR sink) = dfc_sink_ret; \ |
771 | 2876 (DFC_CPP_CDR sink) = dfc_sink.data.len; \ |
2877 } while (0) | |
2878 #define DFC_MALLOC_USE_CONVERTED_DATA(sink) do { \ | |
2879 void * dfc_sink_ret = xmalloc (dfc_sink.data.len + 2); \ | |
2880 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2); \ | |
2367 | 2881 VOIDP_CAST (DFC_CPP_CAR sink) = dfc_sink_ret; \ |
771 | 2882 (DFC_CPP_CDR sink) = dfc_sink.data.len; \ |
2883 } while (0) | |
2884 #define DFC_C_STRING_ALLOCA_USE_CONVERTED_DATA(sink) do { \ | |
851 | 2885 void * dfc_sink_ret = ALLOCA (dfc_sink.data.len + 2); \ |
771 | 2886 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2); \ |
2367 | 2887 VOIDP_CAST (sink) = dfc_sink_ret; \ |
771 | 2888 } while (0) |
2889 #define DFC_C_STRING_MALLOC_USE_CONVERTED_DATA(sink) do { \ | |
2890 void * dfc_sink_ret = xmalloc (dfc_sink.data.len + 2); \ | |
2891 memcpy (dfc_sink_ret, dfc_sink.data.ptr, dfc_sink.data.len + 2); \ | |
2367 | 2892 VOIDP_CAST (sink) = dfc_sink_ret; \ |
771 | 2893 } while (0) |
5026
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2894 #define DFC_LISP_STRING_USE_CONVERTED_DATA(sink) \ |
867 | 2895 sink = make_string ((Ibyte *) dfc_sink.data.ptr, dfc_sink.data.len) |
5026
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2896 #define DFC_LISP_OPAQUE_USE_CONVERTED_DATA(sink) \ |
771 | 2897 sink = make_opaque (dfc_sink.data.ptr, dfc_sink.data.len) |
2898 #define DFC_LISP_LSTREAM_USE_CONVERTED_DATA(sink) /* data already used */ | |
5026
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2899 #define DFC_LISP_BUFFER_USE_CONVERTED_DATA(sink) \ |
771 | 2900 Lstream_delete (XLSTREAM (dfc_sink.lisp_object)) |
2901 | |
1318 | 2902 enum new_dfc_src_type |
2903 { | |
2904 DFC_EXTERNAL, | |
2905 DFC_SIZED_EXTERNAL, | |
2906 DFC_INTERNAL, | |
2907 DFC_SIZED_INTERNAL, | |
2908 DFC_LISP_STRING | |
2909 }; | |
2910 | |
1632 | 2911 MODULE_API void *new_dfc_convert_malloc (const void *src, Bytecount src_size, |
2912 enum new_dfc_src_type type, | |
2913 Lisp_Object codesys); | |
2367 | 2914 MODULE_API Bytecount new_dfc_convert_size (const char *srctext, |
2915 const void *src, | |
1632 | 2916 Bytecount src_size, |
2917 enum new_dfc_src_type type, | |
2918 Lisp_Object codesys); | |
2367 | 2919 MODULE_API void *new_dfc_convert_copy_data (const char *srctext, |
2920 void *alloca_data); | |
1318 | 2921 |
1743 | 2922 END_C_DECLS |
1650 | 2923 |
4981
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
2924 /* Version of EXTERNAL_TO_ITEXT that *RETURNS* the translated string, |
1318 | 2925 still in alloca() space. Requires some trickiness to do this, but gets |
2926 it done! */ | |
2927 | |
2928 /* NOTE: If you make two invocations of the dfc functions below in the same | |
2929 subexpression and use the exact same expression for the source in both | |
2930 cases, you will lose. In this unlikely case, you will get an abort, and | |
2931 need to rewrite the code. | |
2932 */ | |
2933 | |
2934 /* We need to use ALLOCA_FUNCALL_OK here. Some compilers have been known | |
2935 to choke when alloca() occurs as a funcall argument, and so we check | |
2936 this in configure. Rewriting the expressions below to use a temporary | |
2937 variable, so that the call to alloca() is outside of | |
2382 | 2938 new_dfc_convert_copy_data(), won't help because the entire NEW_DFC call |
1318 | 2939 could be inside of a function call. */ |
2940 | |
2941 #define NEW_DFC_CONVERT_1_ALLOCA(src, src_size, type, codesys) \ | |
2367 | 2942 new_dfc_convert_copy_data \ |
1318 | 2943 (#src, ALLOCA_FUNCALL_OK (new_dfc_convert_size (#src, src, src_size, \ |
2944 type, codesys))) | |
2945 | |
5026
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2946 #define EXTERNAL_TO_ITEXT(src, codesys) \ |
4981
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
2947 ((Ibyte *) NEW_DFC_CONVERT_1_ALLOCA (src, -1, DFC_EXTERNAL, codesys)) |
5026
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2948 #define EXTERNAL_TO_ITEXT_MALLOC(src, codesys) \ |
4981
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
2949 ((Ibyte *) new_dfc_convert_malloc (src, -1, DFC_EXTERNAL, codesys)) |
5026
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2950 #define SIZED_EXTERNAL_TO_ITEXT(src, len, codesys) \ |
4981
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
2951 ((Ibyte *) NEW_DFC_CONVERT_1_ALLOCA (src, len, DFC_SIZED_EXTERNAL, codesys)) |
5026
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2952 #define SIZED_EXTERNAL_TO_ITEXT_MALLOC(src, len, codesys) \ |
4981
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
2953 ((Ibyte *) new_dfc_convert_malloc (src, len, DFC_SIZED_EXTERNAL, codesys)) |
5026
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2954 #define ITEXT_TO_EXTERNAL(src, codesys) \ |
4981
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
2955 ((Extbyte *) NEW_DFC_CONVERT_1_ALLOCA (src, -1, DFC_INTERNAL, codesys)) |
5026
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2956 #define ITEXT_TO_EXTERNAL_MALLOC(src, codesys) \ |
4981
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
2957 ((Extbyte *) new_dfc_convert_malloc (src, -1, DFC_INTERNAL, codesys)) |
5026
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2958 #define LISP_STRING_TO_EXTERNAL(src, codesys) \ |
5013 | 2959 ((Extbyte *) NEW_DFC_CONVERT_1_ALLOCA (STORE_LISP_IN_VOID (src), -1, \ |
4981
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
2960 DFC_LISP_STRING, codesys)) |
5026
46cf825f6158
revamp DFC comment in text.h, some whitespace cleanup
Ben Wing <ben@xemacs.org>
parents:
4982
diff
changeset
|
2961 #define LISP_STRING_TO_EXTERNAL_MALLOC(src, codesys) \ |
5013 | 2962 ((Extbyte *) new_dfc_convert_malloc (STORE_LISP_IN_VOID (src), -1, \ |
4981
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
2963 DFC_LISP_STRING, codesys)) |
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
2964 /* In place of EXTERNAL_TO_LISP_STRING(), use build_extstring() and/or |
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
2965 make_extstring(). */ |
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
2966 |
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
2967 /* The next four have two outputs, so we make both of them be parameters */ |
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
2968 #define ITEXT_TO_SIZED_EXTERNAL(in, out, outlen, codesys) \ |
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
2969 TO_EXTERNAL_FORMAT (C_STRING, in, ALLOCA, (out, outlen), codesys) |
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
2970 #define LISP_STRING_TO_SIZED_EXTERNAL(in, out, outlen, codesys) \ |
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
2971 TO_EXTERNAL_FORMAT (LISP_STRING, in, ALLOCA, (out, outlen), codesys) |
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
2972 #define ITEXT_TO_SIZED_EXTERNAL_MALLOC(in, out, outlen, codesys) \ |
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
2973 TO_EXTERNAL_FORMAT (C_STRING, in, MALLOC, (out, outlen), codesys) |
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
2974 #define LISP_STRING_TO_SIZED_EXTERNAL_MALLOC(in, out, outlen, codesys) \ |
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
2975 TO_EXTERNAL_FORMAT (LISP_STRING, in, MALLOC, (out, outlen), codesys) |
771 | 2976 |
2367 | 2977 /* Wexttext functions. The type of Wexttext is selected at compile time |
2978 and will sometimes be wchar_t, sometimes char. */ | |
2979 | |
2980 int wcscmp_ascii (const wchar_t *s1, const Ascbyte *s2); | |
2981 int wcsncmp_ascii (const wchar_t *s1, const Ascbyte *s2, Charcount len); | |
2982 | |
2983 #ifdef WEXTTEXT_IS_WIDE /* defined under MS Windows i.e. WIN32_NATIVE */ | |
2984 #define WEXTTEXT_ZTERM_SIZE sizeof (wchar_t) | |
2985 /* Extra indirection needed in case of manifest constant as arg */ | |
2986 #define WEXTSTRING_1(arg) L##arg | |
2987 #define WEXTSTRING(arg) WEXTSTRING_1(arg) | |
2988 #define wext_strlen wcslen | |
2989 #define wext_strcmp wcscmp | |
2990 #define wext_strncmp wcsncmp | |
2991 #define wext_strcmp_ascii wcscmp_ascii | |
2992 #define wext_strncmp_ascii wcsncmp_ascii | |
2993 #define wext_strcpy wcscpy | |
2994 #define wext_strncpy wcsncpy | |
2995 #define wext_strchr wcschr | |
2996 #define wext_strrchr wcsrchr | |
2997 #define wext_strdup wcsdup | |
2998 #define wext_atol(str) wcstol (str, 0, 10) | |
2999 #define wext_sprintf wsprintfW /* Huh? both wsprintfA and wsprintfW? */ | |
3000 #define wext_getenv _wgetenv | |
4953
304aebb79cd3
function renamings to track names of char typedefs
Ben Wing <ben@xemacs.org>
parents:
4952
diff
changeset
|
3001 #define build_wext_string(str, cs) build_extstring ((Extbyte *) str, cs) |
2367 | 3002 #define WEXTTEXT_TO_8_BIT(arg) WEXTTEXT_TO_MULTIBYTE(arg) |
3003 #ifdef WIN32_NATIVE | |
3004 int XCDECL wext_retry_open (const Wexttext *path, int oflag, ...); | |
3005 #else | |
3006 #error Cannot handle Wexttext yet on this system | |
3007 #endif | |
3008 #define wext_access _waccess | |
3009 #define wext_stat _wstat | |
3010 #else | |
3011 #define WEXTTEXT_ZTERM_SIZE sizeof (char) | |
3012 #define WEXTSTRING(arg) arg | |
3013 #define wext_strlen strlen | |
3014 #define wext_strcmp strcmp | |
3015 #define wext_strncmp strncmp | |
3016 #define wext_strcmp_ascii strcmp | |
3017 #define wext_strncmp_ascii strncmp | |
3018 #define wext_strcpy strcpy | |
3019 #define wext_strncpy strncpy | |
3020 #define wext_strchr strchr | |
3021 #define wext_strrchr strrchr | |
3022 #define wext_strdup xstrdup | |
3023 #define wext_atol(str) atol (str) | |
3024 #define wext_sprintf sprintf | |
3025 #define wext_getenv getenv | |
4953
304aebb79cd3
function renamings to track names of char typedefs
Ben Wing <ben@xemacs.org>
parents:
4952
diff
changeset
|
3026 #define build_wext_string build_extstring |
2367 | 3027 #define wext_retry_open retry_open |
3028 #define wext_access access | |
3029 #define wext_stat stat | |
3030 #define WEXTTEXT_TO_8_BIT(arg) ((Extbyte *) arg) | |
3031 #endif | |
3032 | |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3033 /* Standins for various encodings. |
1318 | 3034 |
3035 About encodings in X: | |
3036 | |
3037 X works with 5 different encodings: | |
3038 | |
3039 -- "Host Portable Character Encoding" == printable ASCII + space, tab, | |
3040 newline | |
3041 | |
3042 -- STRING encoding == ASCII + Latin-1 + tab, newline | |
3043 | |
3044 -- Locale-specific encoding | |
3045 | |
3046 -- Compound text == STRING encoding + ISO-2022 escape sequences to | |
3047 switch between different locale-specific encodings. | |
3048 | |
3049 -- ANSI C wide-character encoding | |
3050 | |
3051 The Host Portable Character Encoding (HPCE) is used for atom names, font | |
3052 names, color names, keysyms, geometry strings, resource manager quarks, | |
3053 display names, locale names, and various other things. When describing | |
3054 such strings, the X manual typically says "If the ... is not in the Host | |
3055 Portable Character Encoding, the result is implementation dependent." | |
3056 | |
3057 The wide-character encoding is used only in the Xwc* functions, which | |
3058 are provided as equivalents to Xmb* functions. | |
3059 | |
3060 STRING and compound text are used in the value of string properties and | |
3061 selection data, both of which are values with an associated type atom, | |
3062 which can be STRING or COMPOUND_TEXT. It can also be a locale name, as | |
3063 specified in setlocale() (#### as usual, there is no normalization | |
3064 whatsoever of these names). | |
3065 | |
3066 X also defines a type called "TEXT", which is used only as a requested | |
3067 type, and produces data in a type "convenient to the owner". However, | |
3068 there is some indication that X expects this to be the locale-specific | |
3069 encoding. | |
3070 | |
3071 According to the glossary, the locale is used in | |
3072 | |
3073 -- Encoding and processing of input method text | |
3074 -- Encoding of resource files and values | |
3075 -- Encoding and imaging of text strings | |
3076 -- Encoding and decoding for inter-client text communication | |
3077 | |
3078 The functions XmbTextListToTextProperty and XmbTextPropertyToTextList | |
3079 (and Xwc* equivalents) can be used to convert between the | |
3080 locale-specific encoding (XTextStyle), STRING (XStringStyle), and | |
3081 compound text (XCompoundTextStyle), as well as XStdICCTextStyle, which | |
3082 converts to STRING if possible, and if not, COMPOUND_TEXT. This is | |
3083 used, for example, in XmbSetWMProperties, in the window_name and | |
3084 icon_name properties (WM_NAME and WM_ICON_NAME), which are in the | |
3085 locale-specific encoding on input, and are stored as STRING if possible, | |
3086 COMPOUND_TEXT otherwise. | |
3087 */ | |
771 | 3088 |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3089 #ifdef WEXTTEXT_IS_WIDE |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3090 #define Qcommand_argument_encoding Qmswindows_unicode |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3091 #define Qenvironment_variable_encoding Qmswindows_unicode |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3092 #else |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3093 #define Qcommand_argument_encoding Qnative |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3094 #define Qenvironment_variable_encoding Qnative |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3095 #endif |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3096 #define Qunix_host_name_encoding Qnative |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3097 #define Qunix_service_name_encoding Qnative |
5254
1537701f08a1
Support Roman month numbers, #'format-time-string
Aidan Kehoe <kehoea@parhasard.net>
parents:
5200
diff
changeset
|
3098 #define Qtime_function_encoding Qbinary |
4952
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3099 #define Qtime_zone_encoding Qtime_function_encoding |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3100 #define Qmswindows_host_name_encoding Qmswindows_multibyte |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3101 #define Qmswindows_service_name_encoding Qmswindows_multibyte |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3102 #define Quser_name_encoding Qnative |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3103 #define Qerror_message_encoding Qnative |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3104 #define Qjpeg_error_message_encoding Qerror_message_encoding |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3105 #define Qtooltalk_encoding Qnative |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3106 #define Qgtk_encoding Qnative |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3107 |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3108 #define Qdll_symbol_encoding Qnative |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3109 #define Qdll_function_name_encoding Qdll_symbol_encoding |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3110 #define Qdll_variable_name_encoding Qdll_symbol_encoding |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3111 #define Qdll_filename_encoding Qfile_name |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3112 #define Qemodule_string_encoding Qnative |
19a72041c5ed
Mule-izing, various fixes related to char * arguments
Ben Wing <ben@xemacs.org>
parents:
4853
diff
changeset
|
3113 |
771 | 3114 /* !!#### Need to verify the encoding used in lwlib -- Qnative or Qctext? |
3115 Almost certainly the former. Use a standin for now. */ | |
3116 #define Qlwlib_encoding Qnative | |
3117 | |
1318 | 3118 /* The Host Portable Character Encoding. */ |
3119 #define Qx_hpc_encoding Qnative | |
3120 | |
3121 #define Qx_atom_name_encoding Qx_hpc_encoding | |
3122 #define Qx_font_name_encoding Qx_hpc_encoding | |
3123 #define Qx_color_name_encoding Qx_hpc_encoding | |
3124 #define Qx_keysym_encoding Qx_hpc_encoding | |
3125 #define Qx_geometry_encoding Qx_hpc_encoding | |
3126 #define Qx_resource_name_encoding Qx_hpc_encoding | |
3127 #define Qx_application_class_encoding Qx_hpc_encoding | |
771 | 3128 /* the following probably must agree with Qcommand_argument_encoding and |
3129 Qenvironment_variable_encoding */ | |
1318 | 3130 #define Qx_display_name_encoding Qx_hpc_encoding |
3131 #define Qx_xpm_data_encoding Qx_hpc_encoding | |
4834
b3ea9c582280
Use new cygwin_conv_path API with Cygwin 1.7 for converting names between Win32 and POSIX, UTF-8-aware, with attendant changes elsewhere
Ben Wing <ben@xemacs.org>
parents:
4790
diff
changeset
|
3132 #define Qx_error_message_encoding Qx_hpc_encoding |
1318 | 3133 |
2367 | 3134 /* !!#### Verify these! */ |
3135 #define Qxt_widget_arg_encoding Qnative | |
3136 #define Qdt_dnd_encoding Qnative | |
3137 | |
1318 | 3138 /* RedHat 6.2 contains a locale called "Francais" with the C-cedilla |
3139 encoded in ISO2022! */ | |
3140 #define Qlocale_name_encoding Qctext | |
771 | 3141 |
3142 #define Qstrerror_encoding Qnative | |
3143 | |
1318 | 3144 /* !!#### This exists to remind us that our hexify routine is totally |
3145 un-Muleized. */ | |
3146 #define Qdnd_hexify_encoding Qascii | |
3147 | |
771 | 3148 #define GET_STRERROR(var, num) \ |
3149 do { \ | |
3150 int __gsnum__ = (num); \ | |
3151 Extbyte * __gserr__ = strerror (__gsnum__); \ | |
3152 \ | |
3153 if (!__gserr__) \ | |
3154 { \ | |
4981
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
3155 var = alloca_ibytes (99); \ |
771 | 3156 qxesprintf (var, "Unknown error %d", __gsnum__); \ |
3157 } \ | |
3158 else \ | |
4981
4aebb0131297
Cleanups/renaming of EXTERNAL_TO_C_STRING and friends
Ben Wing <ben@xemacs.org>
parents:
4953
diff
changeset
|
3159 var = EXTERNAL_TO_ITEXT (__gserr__, Qstrerror_encoding); \ |
771 | 3160 } while (0) |
3161 | |
3162 #endif /* INCLUDED_text_h_ */ |