comparison src/redisplay-x.c @ 3896:7bde3a686fda

[xemacs-hg @ 2007-04-05 02:27:09 by stephent] Refactor separate_textual_runs, part 2 <87648fwhqx.fsf@uwakimon.sk.tsukuba.ac.jp>
author stephent
date Thu, 05 Apr 2007 02:27:11 +0000
parents a3c2418313d5
children 3d2a9b62e044
comparison
equal deleted inserted replaced
3895:a3c2418313d5 3896:7bde3a686fda
76 #define MINL(x,y) ((((unsigned long) (x)) < ((unsigned long) (y))) \ 76 #define MINL(x,y) ((((unsigned long) (x)) < ((unsigned long) (y))) \
77 ? ((unsigned long) (x)) : ((unsigned long) (y))) 77 ? ((unsigned long) (x)) : ((unsigned long) (y)))
78 #endif /* USE_XFT */ 78 #endif /* USE_XFT */
79 79
80 80
81 /* Note: We do not use the Xmb*() functions and XFontSets. 81 /* Note: We do not use the Xmb*() functions and XFontSets, nor the
82 Those functions are generally losing for a number of reasons: 82 Motif XFontLists and CompoundStrings.
83 83 Those functions are generally losing for a number of reasons.
84 1) They only support one locale (e.g. you could display 84 Most important, they only support one locale (e.g. you could
85 Japanese and ASCII text, but not mixed Japanese/Chinese 85 display Japanese and ASCII text, but not mixed Japanese/Chinese
86 text). You could maybe call setlocale() frequently 86 text). You could maybe call setlocale() frequently to try to deal
87 to try to deal with this, but that would generally 87 with this, but that would generally fail because an XFontSet is
88 fail because an XFontSet is tied to one locale and 88 tied to one locale and won't have the other character sets in it.
89 won't have the other character sets in it. 89
90 90 fontconfig (the font database for Xft) has some specifier-like
91 The following aren't true any more, but that doesn't make Xmb*() 91 properties, but it's not sufficient (witness the existence of
92 usable. One wonders about Xft and Pango, etc, tho'. Except they 92 Pango). Pango might do the trick, but it's not a cross-platform
93 aren't cross-platform solutions. FMH, as jwz would say. -- sjt 93 solution; it would need significant advantages to be worth the
94 [[ 94 effort.
95 2) Not all (or even very many) OS's support the useful 95 */
96 locales. For example, as far as I know SunOS and
97 Solaris only support the Japanese locale if you get the
98 special Asian-language version of the OS. Yuck yuck
99 yuck. Linux doesn't support the Japanese locale at
100 all.
101 3) The locale support in X only exists in R5, not in R4.
102 (Not sure how big of a problem this is: how many
103 people are using R4?)
104 4) Who knows if the multi-byte text format (which is locale-
105 specific) is even the same for the same locale on
106 different OS's? It's not even documented anywhere that
107 I can find what the multi-byte text format for the
108 Japanese locale under SunOS and Solaris is, but I assume
109 it's EUC.
110 ]]
111 */
112 96
113 /* #### Break me out into a separate header */ 97 /* #### Break me out into a separate header */
114 struct textual_run 98 struct textual_run
115 { 99 {
116 Lisp_Object charset; 100 Lisp_Object charset;
120 }; 104 };
121 105
122 /* Separate out the text in DYN into a series of textual runs of a 106 /* Separate out the text in DYN into a series of textual runs of a
123 particular charset. Also convert the characters as necessary into 107 particular charset. Also convert the characters as necessary into
124 the format needed by XDrawImageString(), XDrawImageString16(), et 108 the format needed by XDrawImageString(), XDrawImageString16(), et
125 al. (This means converting to one or two byte format, possibly 109 al. This means converting to one or two byte format, possibly
126 tweaking the high bits, and possibly running a CCL program.) You 110 tweaking the high bits, and possibly running a CCL program. You
127 must pre-allocate the space used and pass it in. (This is done so 111 must pre-allocate the space used and pass it in. (This is done so
128 you can ALLOCA () the space.) You need to allocate (2 * len) bytes 112 you can ALLOCA () the space.) (sizeof(bufchar) * len) bytes must be
129 of TEXT_STORAGE and (len * sizeof (struct textual_run)) bytes of 113 allocated for TEXT_STORAGE and (len * sizeof (struct textual_run))
130 RUN_STORAGE, where LEN is the length of the dynarr. 114 bytes of RUN_STORAGE, where LEN is the length of the dynarr.
115
116 bufchar might not be fixed width (in the case of UTF-8).
131 117
132 Returns the number of runs actually used. */ 118 Returns the number of runs actually used. */
133 119
134 /* Notes on Xft implementation 120 /* Notes on Xft implementation
135 121
139 font, rather than the character for the charset, and that's what would 125 font, rather than the character for the charset, and that's what would
140 determine the separation into runs. 126 determine the separation into runs.
141 - The widechar versions of fontconfig (and therefore Xft) functions 127 - The widechar versions of fontconfig (and therefore Xft) functions
142 seem to be just bigendian Unicode. So there's actually no need to use 128 seem to be just bigendian Unicode. So there's actually no need to use
143 the 8-bit versions in computing runs and runes, it would seem. 129 the 8-bit versions in computing runs and runes, it would seem.
144 - Mule won't "just work"; substantially more effort seems needed.
145 */ 130 */
146 131
147 #if !defined(USE_XFT) && !defined(MULE) 132 #if !defined(USE_XFT) && !defined(MULE)
148 static int 133 static int
149 separate_textual_runs_nomule (unsigned char *text_storage, 134 separate_textual_runs_nomule (unsigned char *text_storage,
164 return 1; 149 return 1;
165 } 150 }
166 #endif 151 #endif
167 152
168 #if defined(USE_XFT) && !defined(MULE) 153 #if defined(USE_XFT) && !defined(MULE)
154 /*
155 Note that in this configuration the "Croatian hack" of using an 8-bit,
156 non-Latin-1 font to get localized display without Mule simply isn't
157 available. That's by design -- Unicode does not aid or abet that kind
158 of punning.
159 This means that the cast to XftChar16 gives the correct "conversion" to
160 UCS-2.
161 #### Is there an alignment issue with text_storage?
162 */
169 static int 163 static int
170 separate_textual_runs_xft_nomule (unsigned char *text_storage, 164 separate_textual_runs_xft_nomule (unsigned char *text_storage,
171 struct textual_run *run_storage, 165 struct textual_run *run_storage,
172 const Ichar *str, Charcount len, 166 const Ichar *str, Charcount len,
173 struct face_cachel *UNUSED(cachel)) 167 struct face_cachel *UNUSED(cachel))
195 separate_textual_runs_xft_mule (unsigned char *text_storage, 189 separate_textual_runs_xft_mule (unsigned char *text_storage,
196 struct textual_run *run_storage, 190 struct textual_run *run_storage,
197 const Ichar *str, Charcount len, 191 const Ichar *str, Charcount len,
198 struct face_cachel *UNUSED(cachel)) 192 struct face_cachel *UNUSED(cachel))
199 { 193 {
200 Lisp_Object prev_charset = Qnil; 194 Lisp_Object prev_charset = Qunbound;
201 int runs_so_far = 0, i; 195 int runs_so_far = 0, i;
202 196
203 run_storage[0].ptr = text_storage; 197 run_storage[0].ptr = text_storage;
204 run_storage[0].len = len; 198 run_storage[0].len = len;
205 run_storage[0].dimension = 2; 199 run_storage[0].dimension = 2;
206 run_storage[0].charset = Qnil; 200 run_storage[0].charset = Qnil;
207 201
208 for (i = 0; i < len; i++) 202 for (i = 0; i < len; i++)
209 { 203 {
210 Ichar ch = str[i]; 204 Ichar ch = str[i];
211 Lisp_Object charset = ichar_charset(ch); 205 Lisp_Object charset = ichar_charset(ch);
212 int ucs = ichar_to_unicode(ch); 206 int ucs = ichar_to_unicode(ch);
213 207
214 /* If UCS is less than zero or greater than 0xFFFF, set ucs2 to 208 /* If UCS is less than zero or greater than 0xFFFF, set ucs2 to
215 REPLACMENT CHARACTER. */ 209 REPLACMENT CHARACTER. */
236 return runs_so_far; 230 return runs_so_far;
237 } 231 }
238 #endif 232 #endif
239 233
240 #if !defined(USE_XFT) && defined(MULE) 234 #if !defined(USE_XFT) && defined(MULE)
235 /*
236 This is the most complex function of this group, due to the various
237 indexing schemes used by different fonts. For our purposes, they
238 fall into three classes. Some fonts are indexed compatibly with ISO
239 2022; those fonts just use the Mule internal representation directly
240 (typically the high bit must be reset; this is determined by the `graphic'
241 flag). Some fonts are indexed by Unicode, specifically by UCS-2. These
242 are all translated using `ichar_to_unicode'. Finally some fonts have
243 irregular indexes, and must be translated ad hoc. In XEmacs ad hoc
244 translations are accomplished with CCL programs. */
241 static int 245 static int
242 separate_textual_runs_mule (unsigned char *text_storage, 246 separate_textual_runs_mule (unsigned char *text_storage,
243 struct textual_run *run_storage, 247 struct textual_run *run_storage,
244 const Ichar *str, Charcount len, 248 const Ichar *str, Charcount len,
245 struct face_cachel *cachel) 249 struct face_cachel *cachel)
246 { 250 {
247 Lisp_Object prev_charset = Qunbound; /* not Qnil because that is a 251 Lisp_Object prev_charset = Qunbound;
248 possible valid charset when
249 MULE is not defined */
250 int runs_so_far = 0, i; 252 int runs_so_far = 0, i;
251 Ibyte charset_leading_byte = LEADING_BYTE_ASCII; 253 Ibyte charset_leading_byte = LEADING_BYTE_ASCII;
252 int dimension = 1, graphic = 0, need_ccl_conversion = 0; 254 int dimension = 1, graphic = 0, need_ccl_conversion = 0;
253 Lisp_Object ccl_prog; 255 Lisp_Object ccl_prog;
254 struct ccl_program char_converter; 256 struct ccl_program char_converter;
257 259
258 for (i = 0; i < len; i++) 260 for (i = 0; i < len; i++)
259 { 261 {
260 Ichar ch = str[i]; 262 Ichar ch = str[i];
261 Lisp_Object charset; 263 Lisp_Object charset;
262 int byte1, byte2; /* Not UExbytes because BREAKUP_ICHAR takes 264 int byte1, byte2; /* BREAKUP_ICHAR dereferences the addresses
263 the addresses of its arguments and 265 of its arguments as pointer to int. */
264 dereferences those addresses as integer
265 pointers. */
266 BREAKUP_ICHAR (ch, charset, byte1, byte2); 266 BREAKUP_ICHAR (ch, charset, byte1, byte2);
267 267
268 if (!EQ (charset, prev_charset)) 268 if (!EQ (charset, prev_charset))
269 { 269 {
270 run_storage[runs_so_far].ptr = text_storage; 270 /* At this point, dimension' and `prev_charset' refer to just-
271 run_storage[runs_so_far].charset = charset; 271 completed run. `runs_so_far' and `text_storage' refer to the
272 272 run about to start. */
273 if (runs_so_far) 273 if (runs_so_far)
274 { 274 {
275 /* Update metadata for previous run. */
275 run_storage[runs_so_far - 1].len = 276 run_storage[runs_so_far - 1].len =
276 text_storage - run_storage[runs_so_far - 1].ptr; 277 text_storage - run_storage[runs_so_far - 1].ptr;
277 /* Checks the value for dimension from the previous run. */
278 if (2 == dimension) run_storage[runs_so_far - 1].len >>= 1; 278 if (2 == dimension) run_storage[runs_so_far - 1].len >>= 1;
279 } 279 }
280 280
281 /* Compute metadata for current run.
282 First, classify font.
283 If the font is indexed by UCS-2, set `translate_to_ucs_2'.
284 Else if the charset has a CCL program, set `need_ccl_conversion'.
285 Else if the font is indexed by an ISO 2022 "graphic register",
286 set `graphic'.
287 These flags are almost mutually exclusive, but we're sloppy
288 about resetting "shadowed" flags. So the flags must be checked
289 in the proper order in computing byte1 and byte2, below. */
281 charset_leading_byte = XCHARSET_LEADING_BYTE(charset); 290 charset_leading_byte = XCHARSET_LEADING_BYTE(charset);
282
283 translate_to_ucs_2 = 291 translate_to_ucs_2 =
284 bit_vector_bit(FACE_CACHEL_FONT_FINAL_STAGE 292 bit_vector_bit (FACE_CACHEL_FONT_FINAL_STAGE (cachel),
285 (cachel), 293 charset_leading_byte - MIN_LEADING_BYTE);
286 charset_leading_byte - MIN_LEADING_BYTE);
287
288 if (translate_to_ucs_2) 294 if (translate_to_ucs_2)
289 { 295 {
290 dimension = 2; 296 dimension = 2;
291 run_storage[runs_so_far].dimension = 2;
292 } 297 }
293 else 298 else
294 { 299 {
295 dimension = XCHARSET_DIMENSION (charset); 300 dimension = XCHARSET_DIMENSION (charset);
296 run_storage[runs_so_far].dimension = dimension; 301
297 302 /* Check for CCL charset.
303 If setup_ccl_program fails, we'll get a garbaged display.
304 This should never happen, and even if it does, it should
305 be harmless (unless the X server has buggy handling of
306 characters undefined in the font). It may be marginally
307 more useful to users and debuggers than substituting a
308 fixed replacement character. */
298 ccl_prog = XCHARSET_CCL_PROGRAM (charset); 309 ccl_prog = XCHARSET_CCL_PROGRAM (charset);
299 if ((!NILP (ccl_prog)) 310 if ((!NILP (ccl_prog))
300 && (setup_ccl_program (&char_converter, ccl_prog) >= 0)) 311 && (setup_ccl_program (&char_converter, ccl_prog) >= 0))
301 { 312 {
302 need_ccl_conversion = 1; 313 need_ccl_conversion = 1;
303 } 314 }
304 else 315 else
305 { 316 {
306 /* The graphic property is only relevant if we're neither 317 /* The charset must have an ISO 2022-compatible font index.
307 doing the CCL conversion nor doing the UTF-16 318 There are 2 "registers" (what such fonts use as index).
308 conversion; it's irrelevant otherwise. */ 319 GL (graphic == 0) has the high bit of each octet reset,
320 GR (graphic == 1) has it set. */
309 graphic = XCHARSET_GRAPHIC (charset); 321 graphic = XCHARSET_GRAPHIC (charset);
310 need_ccl_conversion = 0; 322 need_ccl_conversion = 0;
311 } 323 }
312 } 324 }
325
326 /* Initialize metadata for current run. */
327 run_storage[runs_so_far].ptr = text_storage;
328 run_storage[runs_so_far].charset = charset;
329 run_storage[runs_so_far].dimension = dimension;
330
331 /* Update loop variables. */
313 prev_charset = charset; 332 prev_charset = charset;
314
315 runs_so_far++; 333 runs_so_far++;
316 } 334 }
317 335
336 /* Must check flags in this order. See comment above. */
318 if (translate_to_ucs_2) 337 if (translate_to_ucs_2)
319 { 338 {
320 int ucs = ichar_to_unicode(ch); 339 int ucs = ichar_to_unicode(ch);
321 /* If UCS is less than zero or greater than 0xFFFF, set ucs2 to 340 /* If UCS is less than zero or greater than 0xFFFF, set ucs2 to
322 REPLACMENT CHARACTER. */ 341 REPLACMENT CHARACTER. */
323 ucs = (ucs & ~0xFFFF) ? 0xFFFD : ucs; 342 ucs = (ucs & ~0xFFFF) ? 0xFFFD : ucs;
324 343
325 /* Ignoring the "graphic" handling. */
326 byte1 = ucs >> 8; 344 byte1 = ucs >> 8;
327 byte2 = ucs; 345 byte2 = ucs;
328 } 346 }
329 else if (need_ccl_conversion) 347 else if (need_ccl_conversion)
330 { 348 {