comparison src/text.c @ 5118:e0db3c197671 ben-lisp-object

merge up to latest default branch, doesn't compile yet
author Ben Wing <ben@xemacs.org>
date Sat, 26 Dec 2009 21:18:49 -0600
parents 38493c0fb952
children 6ef8256a020a 19a72041c5ed
comparison
equal deleted inserted replaced
5117:3742ea8250b5 5118:e0db3c197671
213 composite characters under Mule would decrease the number 213 composite characters under Mule would decrease the number
214 of possible faces that can exist. Mule already has shrunk 214 of possible faces that can exist. Mule already has shrunk
215 this to 2048, and further shrinkage would become uncomfortable. 215 this to 2048, and further shrinkage would become uncomfortable.
216 No such problems exist in XEmacs. 216 No such problems exist in XEmacs.
217 217
218 Composite characters could be represented as 0x8D C1 C2 C3, 218 Composite characters could be represented as 0x8D C1 C2 C3, where each
219 where each C[1-3] is in the range 0xA0 - 0xFF. This allows 219 C[1-3] is in the range 0xA0 - 0xFF. This allows for slightly under
220 for slightly under 2^20 (one million) composite characters 220 2^20 (one million) composite characters over the XEmacs process
221 over the XEmacs process lifetime, and you only need to 221 lifetime. Or you could use 0x8D C1 C2 C3 C4, allowing for about 85
222 increase the size of a Mule character from 19 to 21 bits. 222 million (slightly over 2^26) composite characters.
223 Or you could use 0x8D C1 C2 C3 C4, allowing for about
224 85 million (slightly over 2^26) composite characters.
225 223
226 ========================================================================== 224 ==========================================================================
227 10. Internal API's 225 10. Internal API's
228 ========================================================================== 226 ==========================================================================
229 227
245 variable. 243 variable.
246 244
247 @item 245 @item
248 The Eistring API 246 The Eistring API
249 247
250 (This API is currently under-used) When doing simple things with 248 \(This API is currently under-used) When doing simple things with
251 internal text, the basic internal-format API's are enough. But to do 249 internal text, the basic internal-format API's are enough. But to do
252 things like delete or replace a substring, concatenate various strings, 250 things like delete or replace a substring, concatenate various strings,
253 etc. is difficult to do cleanly because of the allocation issues. 251 etc. is difficult to do cleanly because of the allocation issues.
254 The Eistring API is designed to deal with this, and provides a clean 252 The Eistring API is designed to deal with this, and provides a clean
255 way of modifying and building up internal text. (Note that the former 253 way of modifying and building up internal text. (Note that the former
1998 charsets[ichar_leading_byte (str[i]) - MIN_LEADING_BYTE] = 1; 1996 charsets[ichar_leading_byte (str[i]) - MIN_LEADING_BYTE] = 1;
1999 } 1997 }
2000 #endif 1998 #endif
2001 } 1999 }
2002 2000
2001 /* A couple of these functions should only be called on a non-Mule build. */
2002 #ifdef MULE
2003 #define ASSERT_BUILT_WITH_MULE() assert(1)
2004 #else /* MULE */
2005 #define ASSERT_BUILT_WITH_MULE() assert(0)
2006 #endif /* MULE */
2007
2003 int 2008 int
2004 ibyte_string_displayed_columns (const Ibyte *str, Bytecount len) 2009 ibyte_string_displayed_columns (const Ibyte *str, Bytecount len)
2005 { 2010 {
2006 int cols = 0; 2011 int cols = 0;
2007 const Ibyte *end = str + len; 2012 const Ibyte *end = str + len;
2013 Ichar ch;
2014
2015 ASSERT_BUILT_WITH_MULE();
2008 2016
2009 while (str < end) 2017 while (str < end)
2010 { 2018 {
2011 #ifdef MULE 2019 ch = itext_ichar (str);
2012 Ichar ch = itext_ichar (str);
2013 cols += XCHARSET_COLUMNS (ichar_charset (ch)); 2020 cols += XCHARSET_COLUMNS (ichar_charset (ch));
2014 #else
2015 cols++;
2016 #endif
2017 INC_IBYTEPTR (str); 2021 INC_IBYTEPTR (str);
2018 } 2022 }
2019 2023
2020 return cols; 2024 return cols;
2021 } 2025 }
2022 2026
2023 int 2027 int
2024 ichar_string_displayed_columns (const Ichar *USED_IF_MULE (str), Charcount len) 2028 ichar_string_displayed_columns (const Ichar * USED_IF_MULE(str), Charcount len)
2025 { 2029 {
2026 #ifdef MULE
2027 int cols = 0; 2030 int cols = 0;
2028 int i; 2031 int i;
2029 2032
2033 ASSERT_BUILT_WITH_MULE();
2034
2030 for (i = 0; i < len; i++) 2035 for (i = 0; i < len; i++)
2031 cols += XCHARSET_COLUMNS (ichar_charset (str[i])); 2036 cols += XCHARSET_COLUMNS (ichar_charset (str[i]));
2032 2037
2033 return cols; 2038 return cols;
2034 #else /* not MULE */
2035 return len;
2036 #endif
2037 } 2039 }
2038 2040
2039 Charcount 2041 Charcount
2040 ibyte_string_nonascii_chars (const Ibyte *USED_IF_MULE (str), 2042 ibyte_string_nonascii_chars (const Ibyte *USED_IF_MULE (str),
2041 Bytecount USED_IF_MULE (len)) 2043 Bytecount USED_IF_MULE (len))
2136 int 2138 int
2137 eicmp_1 (Eistring *ei, Bytecount off, Charcount charoff, 2139 eicmp_1 (Eistring *ei, Bytecount off, Charcount charoff,
2138 Bytecount len, Charcount charlen, const Ibyte *data, 2140 Bytecount len, Charcount charlen, const Ibyte *data,
2139 const Eistring *ei2, int is_ascii, int fold_case) 2141 const Eistring *ei2, int is_ascii, int fold_case)
2140 { 2142 {
2143 assert ((data == 0) != (ei == 0));
2144 assert ((is_ascii != 0) == (data != 0));
2145 assert (fold_case >= 0 && fold_case <= 2);
2141 assert ((off < 0) != (charoff < 0)); 2146 assert ((off < 0) != (charoff < 0));
2147
2142 if (off < 0) 2148 if (off < 0)
2143 { 2149 {
2144 off = charcount_to_bytecount (ei->data_, charoff); 2150 off = charcount_to_bytecount (ei->data_, charoff);
2145 if (charlen < 0) 2151 if (charlen < 0)
2146 len = -1; 2152 len = -1;
2150 if (len < 0) 2156 if (len < 0)
2151 len = ei->bytelen_ - off; 2157 len = ei->bytelen_ - off;
2152 2158
2153 assert (off >= 0 && off <= ei->bytelen_); 2159 assert (off >= 0 && off <= ei->bytelen_);
2154 assert (len >= 0 && off + len <= ei->bytelen_); 2160 assert (len >= 0 && off + len <= ei->bytelen_);
2155 assert ((data == 0) != (ei == 0));
2156 assert ((is_ascii != 0) == (data != 0));
2157 assert (fold_case >= 0 && fold_case <= 2);
2158 2161
2159 { 2162 {
2160 Bytecount dstlen; 2163 Bytecount dstlen;
2161 const Ibyte *src = ei->data_, *dst; 2164 const Ibyte *src = ei->data_, *dst;
2162 2165
4692 int 4695 int
4693 non_ascii_valid_ichar_p (Ichar ch) 4696 non_ascii_valid_ichar_p (Ichar ch)
4694 { 4697 {
4695 int f1, f2, f3; 4698 int f1, f2, f3;
4696 4699
4697 /* Must have only lowest 19 bits set */ 4700 /* Must have only lowest 21 bits set */
4698 if (ch & ~0x7FFFF) 4701 if (ch & ~0x1FFFFF)
4699 return 0; 4702 return 0;
4700 4703
4701 f1 = ichar_field1 (ch); 4704 f1 = ichar_field1 (ch);
4702 f2 = ichar_field2 (ch); 4705 f2 = ichar_field2 (ch);
4703 f3 = ichar_field3 (ch); 4706 f3 = ichar_field3 (ch);
4995 return make_int (octet1); 4998 return make_int (octet1);
4996 else 4999 else
4997 invalid_constant ("Octet number must be 0 or 1", n); 5000 invalid_constant ("Octet number must be 0 or 1", n);
4998 } 5001 }
4999 5002
5003 #endif /* MULE */
5004
5000 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /* 5005 DEFUN ("split-char", Fsplit_char, 1, 1, 0, /*
5001 Return list of charset and one or two position-codes of CHAR. 5006 Return list of charset and one or two position-codes of CHAR.
5002 */ 5007 */
5003 (character)) 5008 (character))
5004 { 5009 {
5011 GCPRO2 (charset, rc); 5016 GCPRO2 (charset, rc);
5012 CHECK_CHAR_COERCE_INT (character); 5017 CHECK_CHAR_COERCE_INT (character);
5013 5018
5014 BREAKUP_ICHAR (XCHAR (character), charset, c1, c2); 5019 BREAKUP_ICHAR (XCHAR (character), charset, c1, c2);
5015 5020
5016 if (XCHARSET_DIMENSION (Fget_charset (charset)) == 2) 5021 if (XCHARSET_DIMENSION (charset) == 2)
5017 { 5022 {
5018 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2)); 5023 rc = list3 (XCHARSET_NAME (charset), make_int (c1), make_int (c2));
5019 } 5024 }
5020 else 5025 else
5021 { 5026 {
5023 } 5028 }
5024 UNGCPRO; 5029 UNGCPRO;
5025 5030
5026 return rc; 5031 return rc;
5027 } 5032 }
5028
5029 #endif /* MULE */
5030 5033
5031 5034
5032 /************************************************************************/ 5035 /************************************************************************/
5033 /* composite character functions */ 5036 /* composite character functions */
5034 /************************************************************************/ 5037 /************************************************************************/
5123 5126
5124 void 5127 void
5125 syms_of_text (void) 5128 syms_of_text (void)
5126 { 5129 {
5127 DEFSUBR (Fmake_char); 5130 DEFSUBR (Fmake_char);
5131 DEFSUBR (Fsplit_char);
5128 5132
5129 #ifdef MULE 5133 #ifdef MULE
5130 DEFSUBR (Fchar_charset); 5134 DEFSUBR (Fchar_charset);
5131 DEFSUBR (Fchar_octet); 5135 DEFSUBR (Fchar_octet);
5132 DEFSUBR (Fsplit_char);
5133 5136
5134 #ifdef ENABLE_COMPOSITE_CHARS 5137 #ifdef ENABLE_COMPOSITE_CHARS
5135 DEFSUBR (Fmake_composite_char); 5138 DEFSUBR (Fmake_composite_char);
5136 DEFSUBR (Fcomposite_char_string); 5139 DEFSUBR (Fcomposite_char_string);
5137 #endif 5140 #endif