Mercurial > hg > xemacs-beta
comparison src/text.c @ 5118:e0db3c197671 ben-lisp-object
merge up to latest default branch, doesn't compile yet
author | Ben Wing <ben@xemacs.org> |
---|---|
date | Sat, 26 Dec 2009 21:18:49 -0600 |
parents | 38493c0fb952 |
children | 6ef8256a020a 19a72041c5ed |
comparison
equal
deleted
inserted
replaced
5117:3742ea8250b5 | 5118:e0db3c197671 |
---|---|
213 composite characters under Mule would decrease the number | 213 composite characters under Mule would decrease the number |
214 of possible faces that can exist. Mule already has shrunk | 214 of possible faces that can exist. Mule already has shrunk |
215 this to 2048, and further shrinkage would become uncomfortable. | 215 this to 2048, and further shrinkage would become uncomfortable. |
216 No such problems exist in XEmacs. | 216 No such problems exist in XEmacs. |
217 | 217 |
218 Composite characters could be represented as 0x8D C1 C2 C3, | 218 Composite characters could be represented as 0x8D C1 C2 C3, where each |
219 where each C[1-3] is in the range 0xA0 - 0xFF. This allows | 219 C[1-3] is in the range 0xA0 - 0xFF. This allows for slightly under |
220 for slightly under 2^20 (one million) composite characters | 220 2^20 (one million) composite characters over the XEmacs process |
221 over the XEmacs process lifetime, and you only need to | 221 lifetime. Or you could use 0x8D C1 C2 C3 C4, allowing for about 85 |
222 increase the size of a Mule character from 19 to 21 bits. | 222 million (slightly over 2^26) composite characters. |
223 Or you could use 0x8D C1 C2 C3 C4, allowing for about | |
224 85 million (slightly over 2^26) composite characters. | |
225 | 223 |
226 ========================================================================== | 224 ========================================================================== |
227 10. Internal API's | 225 10. Internal API's |
228 ========================================================================== | 226 ========================================================================== |
229 | 227 |
245 variable. | 243 variable. |
246 | 244 |
247 @item | 245 @item |
248 The Eistring API | 246 The Eistring API |
249 | 247 |
250 (This API is currently under-used) When doing simple things with | 248 \(This API is currently under-used) When doing simple things with |
251 internal text, the basic internal-format API's are enough. But to do | 249 internal text, the basic internal-format API's are enough. But to do |
252 things like delete or replace a substring, concatenate various strings, | 250 things like delete or replace a substring, concatenate various strings, |
253 etc. is difficult to do cleanly because of the allocation issues. | 251 etc. is difficult to do cleanly because of the allocation issues. |
254 The Eistring API is designed to deal with this, and provides a clean | 252 The Eistring API is designed to deal with this, and provides a clean |
255 way of modifying and building up internal text. (Note that the former | 253 way of modifying and building up internal text. (Note that the former |
1998 charsets[ichar_leading_byte (str[i]) - MIN_LEADING_BYTE] = 1; | 1996 charsets[ichar_leading_byte (str[i]) - MIN_LEADING_BYTE] = 1; |
1999 } | 1997 } |
2000 #endif | 1998 #endif |
2001 } | 1999 } |
2002 | 2000 |
2001 /* A couple of these functions should only be called on a non-Mule build. */ | |
2002 #ifdef MULE | |
2003 #define ASSERT_BUILT_WITH_MULE() assert(1) | |
2004 #else /* MULE */ | |
2005 #define ASSERT_BUILT_WITH_MULE() assert(0) | |
2006 #endif /* MULE */ | |
2007 | |
2003 int | 2008 int |
2004 ibyte_string_displayed_columns (const Ibyte *str, Bytecount len) | 2009 ibyte_string_displayed_columns (const Ibyte *str, Bytecount len) |
2005 { | 2010 { |
2006 int cols = 0; | 2011 int cols = 0; |
2007 const Ibyte *end = str + len; | 2012 const Ibyte *end = str + len; |
2013 Ichar ch; | |
2014 | |
2015 ASSERT_BUILT_WITH_MULE(); | |
2008 | 2016 |
2009 while (str < end) | 2017 while (str < end) |
2010 { | 2018 { |
2011 #ifdef MULE | 2019 ch = itext_ichar (str); |
2012 Ichar ch = itext_ichar (str); | |
2013 cols += XCHARSET_COLUMNS (ichar_charset (ch)); | 2020 cols += XCHARSET_COLUMNS (ichar_charset (ch)); |
2014 #else | |
2015 cols++; | |
2016 #endif | |
2017 INC_IBYTEPTR (str); | 2021 INC_IBYTEPTR (str); |
2018 } | 2022 } |
2019 | 2023 |
2020 return cols; | 2024 return cols; |
2021 } | 2025 } |
2022 | 2026 |
2023 int | 2027 int |
2024 ichar_string_displayed_columns (const Ichar *USED_IF_MULE (str), Charcount len) | 2028 ichar_string_displayed_columns (const Ichar * USED_IF_MULE(str), Charcount len) |
2025 { | 2029 { |
2026 #ifdef MULE | |
2027 int cols = 0; | 2030 int cols = 0; |
2028 int i; | 2031 int i; |
2029 | 2032 |
2033 ASSERT_BUILT_WITH_MULE(); | |
2034 | |
2030 for (i = 0; i < len; i++) | 2035 for (i = 0; i < len; i++) |
2031 cols += XCHARSET_COLUMNS (ichar_charset (str[i])); | 2036 cols += XCHARSET_COLUMNS (ichar_charset (str[i])); |
2032 | 2037 |
2033 return cols; | 2038 return cols; |
2034 #else /* not MULE */ | |
2035 return len; | |
2036 #endif | |
2037 } | 2039 } |
2038 | 2040 |
2039 Charcount | 2041 Charcount |
2040 ibyte_string_nonascii_chars (const Ibyte *USED_IF_MULE (str), | 2042 ibyte_string_nonascii_chars (const Ibyte *USED_IF_MULE (str), |
2041 Bytecount USED_IF_MULE (len)) | 2043 Bytecount USED_IF_MULE (len)) |
2136 int | 2138 int |
2137 eicmp_1 (Eistring *ei, Bytecount off, Charcount charoff, | 2139 eicmp_1 (Eistring *ei, Bytecount off, Charcount charoff, |
2138 Bytecount len, Charcount charlen, const Ibyte *data, | 2140 Bytecount len, Charcount charlen, const Ibyte *data, |
2139 const Eistring *ei2, int is_ascii, int fold_case) | 2141 const Eistring *ei2, int is_ascii, int fold_case) |
2140 { | 2142 { |
2143 assert ((data == 0) != (ei == 0)); | |
2144 assert ((is_ascii != 0) == (data != 0)); | |
2145 assert (fold_case >= 0 && fold_case <= 2); | |
2141 assert ((off < 0) != (charoff < 0)); | 2146 assert ((off < 0) != (charoff < 0)); |
2147 | |
2142 if (off < 0) | 2148 if (off < 0) |
2143 { | 2149 { |
2144 off = charcount_to_bytecount (ei->data_, charoff); | 2150 off = charcount_to_bytecount (ei->data_, charoff); |
2145 if (charlen < 0) | 2151 if (charlen < 0) |
2146 len = -1; | 2152 len = -1; |
2150 if (len < 0) | 2156 if (len < 0) |
2151 len = ei->bytelen_ - off; | 2157 len = ei->bytelen_ - off; |
2152 | 2158 |
2153 assert (off >= 0 && off <= ei->bytelen_); | 2159 assert (off >= 0 && off <= ei->bytelen_); |
2154 assert (len >= 0 && off + len <= ei->bytelen_); | 2160 assert (len >= 0 && off + len <= ei->bytelen_); |
2155 assert ((data == 0) != (ei == 0)); | |
2156 assert ((is_ascii != 0) == (data != 0)); | |
2157 assert (fold_case >= 0 && fold_case <= 2); | |
2158 | 2161 |
2159 { | 2162 { |
2160 Bytecount dstlen; | 2163 Bytecount dstlen; |
2161 const Ibyte *src = ei->data_, *dst; | 2164 const Ibyte *src = ei->data_, *dst; |
2162 | 2165 |
4692 int | 4695 int |
4693 non_ascii_valid_ichar_p (Ichar ch) | 4696 non_ascii_valid_ichar_p (Ichar ch) |
4694 { | 4697 { |
4695 int f1, f2, f3; | 4698 int f1, f2, f3; |
4696 | 4699 |
4697 /* Must have only lowest 19 bits set */ | 4700 /* Must have only lowest 21 bits set */ |
4698 if (ch & ~0x7FFFF) | 4701 if (ch & ~0x1FFFFF) |
4699 return 0; | 4702 return 0; |
4700 | 4703 |
4701 f1 = ichar_field1 (ch); | 4704 f1 = ichar_field1 (ch); |
4702 f2 = ichar_field2 (ch); | 4705 f2 = ichar_field2 (ch); |
4703 f3 = ichar_field3 (ch); | 4706 f3 = ichar_field3 (ch); |
4995 return make_int (octet1); | 4998 return make_int (octet1); |
4996 else | 4999 else |
4997 invalid_constant ("Octet number must be 0 or 1", n); | 5000 invalid_constant ("Octet number must be 0 or 1", n); |
4998 } | 5001 } |
4999 | 5002 |
5003 #endif /* MULE */ | |
5004 | |
5000 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /* | 5005 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /* |
5001 Return list of charset and one or two position-codes of CHAR. | 5006 Return list of charset and one or two position-codes of CHAR. |
5002 */ | 5007 */ |
5003 (character)) | 5008 (character)) |
5004 { | 5009 { |
5011 GCPRO2 (charset, rc); | 5016 GCPRO2 (charset, rc); |
5012 CHECK_CHAR_COERCE_INT (character); | 5017 CHECK_CHAR_COERCE_INT (character); |
5013 | 5018 |
5014 BREAKUP_ICHAR (XCHAR (character), charset, c1, c2); | 5019 BREAKUP_ICHAR (XCHAR (character), charset, c1, c2); |
5015 | 5020 |
5016 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2) | 5021 if (XCHARSET_DIMENSION (charset) == 2) |
5017 { | 5022 { |
5018 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2)); | 5023 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2)); |
5019 } | 5024 } |
5020 else | 5025 else |
5021 { | 5026 { |
5023 } | 5028 } |
5024 UNGCPRO; | 5029 UNGCPRO; |
5025 | 5030 |
5026 return rc; | 5031 return rc; |
5027 } | 5032 } |
5028 | |
5029 #endif /* MULE */ | |
5030 | 5033 |
5031 | 5034 |
5032 /************************************************************************/ | 5035 /************************************************************************/ |
5033 /* composite character functions */ | 5036 /* composite character functions */ |
5034 /************************************************************************/ | 5037 /************************************************************************/ |
5123 | 5126 |
5124 void | 5127 void |
5125 syms_of_text (void) | 5128 syms_of_text (void) |
5126 { | 5129 { |
5127 DEFSUBR (Fmake_char); | 5130 DEFSUBR (Fmake_char); |
5131 DEFSUBR (Fsplit_char); | |
5128 | 5132 |
5129 #ifdef MULE | 5133 #ifdef MULE |
5130 DEFSUBR (Fchar_charset); | 5134 DEFSUBR (Fchar_charset); |
5131 DEFSUBR (Fchar_octet); | 5135 DEFSUBR (Fchar_octet); |
5132 DEFSUBR (Fsplit_char); | |
5133 | 5136 |
5134 #ifdef ENABLE_COMPOSITE_CHARS | 5137 #ifdef ENABLE_COMPOSITE_CHARS |
5135 DEFSUBR (Fmake_composite_char); | 5138 DEFSUBR (Fmake_composite_char); |
5136 DEFSUBR (Fcomposite_char_string); | 5139 DEFSUBR (Fcomposite_char_string); |
5137 #endif | 5140 #endif |