comparison src/regex.c @ 867:804517e16990

[xemacs-hg @ 2002-06-05 09:54:39 by ben] Textual renaming: text/char names abbrev.c, alloc.c, buffer.c, buffer.h, bytecode.c, callint.c, casefiddle.c, casetab.c, charset.h, chartab.c, chartab.h, cmds.c, console-gtk.h, console-msw.c, console-msw.h, console-stream.c, console-tty.c, console-x.c, console-x.h, console.h, data.c, device-msw.c, device-x.c, dialog-msw.c, dired-msw.c, dired.c, doc.c, doprnt.c, editfns.c, eldap.c, emodules.c, eval.c, event-Xt.c, event-gtk.c, event-msw.c, event-stream.c, event-unixoid.c, events.c, events.h, file-coding.c, file-coding.h, fileio.c, filelock.c, fns.c, font-lock.c, frame-gtk.c, frame-msw.c, frame-x.c, frame.c, glyphs-eimage.c, glyphs-msw.c, glyphs-x.c, glyphs.c, glyphs.h, gpmevent.c, gui-x.c, gui-x.h, gui.c, gui.h, hpplay.c, indent.c, insdel.c, insdel.h, intl-win32.c, keymap.c, line-number.c, line-number.h, lisp-disunion.h, lisp-union.h, lisp.h, lread.c, lrecord.h, lstream.c, lstream.h, md5.c, menubar-msw.c, menubar-x.c, menubar.c, minibuf.c, mule-ccl.c, mule-charset.c, mule-coding.c, mule-wnnfns.c, ndir.h, nt.c, objects-gtk.c, objects-gtk.h, objects-msw.c, objects-tty.c, objects-x.c, objects.c, objects.h, postgresql.c, print.c, process-nt.c, process-unix.c, process.c, procimpl.h, realpath.c, redisplay-gtk.c, redisplay-msw.c, redisplay-output.c, redisplay-tty.c, redisplay-x.c, redisplay.c, redisplay.h, regex.c, search.c, select-common.h, select-gtk.c, select-x.c, sound.h, symbols.c, syntax.c, syntax.h, sysdep.c, sysdep.h, sysdir.h, sysfile.h, sysproc.h, syspwd.h, systime.h, syswindows.h, termcap.c, tests.c, text.c, text.h, toolbar-common.c, tooltalk.c, ui-gtk.c, unexnt.c, unicode.c, win32.c: Text/char naming rationalization. [a] distinguish between "charptr" when it refers to operations on the pointer itself and when it refers to operations on text; and [b] use consistent naming for everything referring to internal format, i.e. Itext == text in internal format Ibyte == a byte in such text Ichar == a char as represented in internal character format thus e.g. set_charptr_emchar -> set_itext_ichar The pre and post tags on either side of this change are: pre-internal-format-textual-renaming post-internal-format-textual-renaming See the Internals Manual for details of exactly how this was done, how to handle the change in your workspace, etc.
author ben
date Wed, 05 Jun 2002 09:58:45 +0000
parents 6728e641994e
children ccaf90c5a53a
comparison
equal deleted inserted replaced
866:613552a02607 867:804517e16990
99 { 99 {
100 } 100 }
101 101
102 #endif /* MULE */ 102 #endif /* MULE */
103 103
104 #define RE_TRANSLATE_1(ch) TRT_TABLE_OF (translate, (Emchar) ch) 104 #define RE_TRANSLATE_1(ch) TRT_TABLE_OF (translate, (Ichar) ch)
105 #define TRANSLATE_P(tr) (!NILP (tr)) 105 #define TRANSLATE_P(tr) (!NILP (tr))
106 106
107 /* Converts the pointer to the char to BEG-based offset from the start. */ 107 /* Converts the pointer to the char to BEG-based offset from the start. */
108 #define PTR_TO_OFFSET(d) (MATCHING_IN_FIRST_STRING \ 108 #define PTR_TO_OFFSET(d) (MATCHING_IN_FIRST_STRING \
109 ? (d) - string1 : (d) - (string2 - size1)) 109 ? (d) - string1 : (d) - (string2 - size1))
140 #endif 140 #endif
141 #endif 141 #endif
142 142
143 #include <stdlib.h> 143 #include <stdlib.h>
144 144
145 #define charptr_emchar(str) ((Emchar) (str)[0]) 145 #define itext_ichar(str) ((Ichar) (str)[0])
146 #define charptr_emchar_fmt(str, fmt, object) ((Emchar) (str)[0]) 146 #define itext_ichar_fmt(str, fmt, object) ((Ichar) (str)[0])
147 #define charptr_emchar_ascii_fmt(str, fmt, object) ((Emchar) (str)[0]) 147 #define itext_ichar_ascii_fmt(str, fmt, object) ((Ichar) (str)[0])
148 148
149 #if (LONGBITS > INTBITS) 149 #if (LONGBITS > INTBITS)
150 # define EMACS_INT long 150 # define EMACS_INT long
151 #else 151 #else
152 # define EMACS_INT int 152 # define EMACS_INT int
153 #endif 153 #endif
154 154
155 typedef int Emchar; 155 typedef int Ichar;
156 156
157 #define INC_CHARPTR(p) ((p)++) 157 #define INC_IBYTEPTR(p) ((p)++)
158 #define INC_CHARPTR_FMT(p, fmt) ((p)++) 158 #define INC_IBYTEPTR_FMT(p, fmt) ((p)++)
159 #define DEC_CHARPTR(p) ((p)--) 159 #define DEC_IBYTEPTR(p) ((p)--)
160 #define DEC_CHARPTR_FMT(p, fmt) ((p)--) 160 #define DEC_IBYTEPTR_FMT(p, fmt) ((p)--)
161 #define charptr_emchar_len(ptr) 1 161 #define itext_ichar_len(ptr) 1
162 #define charptr_emchar_len_fmt(ptr, fmt) 1 162 #define itext_ichar_len_fmt(ptr, fmt) 1
163 163
164 #include <string.h> 164 #include <string.h>
165 165
166 /* Define the syntax stuff for \<, \>, etc. */ 166 /* Define the syntax stuff for \<, \>, etc. */
167 167
1593 /* Fetch the next character in the uncompiled pattern, with no 1593 /* Fetch the next character in the uncompiled pattern, with no
1594 translation. */ 1594 translation. */
1595 #define PATFETCH_RAW(c) \ 1595 #define PATFETCH_RAW(c) \
1596 do {if (p == pend) return REG_EEND; \ 1596 do {if (p == pend) return REG_EEND; \
1597 assert (p < pend); \ 1597 assert (p < pend); \
1598 c = charptr_emchar (p); \ 1598 c = itext_ichar (p); \
1599 INC_CHARPTR (p); \ 1599 INC_IBYTEPTR (p); \
1600 } while (0) 1600 } while (0)
1601 1601
1602 /* Go backwards one character in the pattern. */ 1602 /* Go backwards one character in the pattern. */
1603 #define PATUNFETCH DEC_CHARPTR (p) 1603 #define PATUNFETCH DEC_IBYTEPTR (p)
1604 1604
1605 /* If `translate' is non-null, return translate[D], else just D. We 1605 /* If `translate' is non-null, return translate[D], else just D. We
1606 cast the subscript to translate because some data is declared as 1606 cast the subscript to translate because some data is declared as
1607 `char *', to avoid warnings when a string constant is passed. But 1607 `char *', to avoid warnings when a string constant is passed. But
1608 when we use a character as a subscript we must make it unsigned. */ 1608 when we use a character as a subscript we must make it unsigned. */
3183 BUF_PUSH (c); 3183 BUF_PUSH (c);
3184 (*pending_exact)++; 3184 (*pending_exact)++;
3185 #else 3185 #else
3186 { 3186 {
3187 Bytecount bt_count; 3187 Bytecount bt_count;
3188 Intbyte tmp_buf[MAX_EMCHAR_LEN]; 3188 Ibyte tmp_buf[MAX_ICHAR_LEN];
3189 int i; 3189 int i;
3190 3190
3191 bt_count = set_charptr_emchar (tmp_buf, c); 3191 bt_count = set_itext_ichar (tmp_buf, c);
3192 3192
3193 for (i = 0; i < bt_count; i++) 3193 for (i = 0; i < bt_count; i++)
3194 { 3194 {
3195 BUF_PUSH (tmp_buf[i]); 3195 BUF_PUSH (tmp_buf[i]);
3196 (*pending_exact)++; 3196 (*pending_exact)++;
3400 3400
3401 static reg_errcode_t 3401 static reg_errcode_t
3402 compile_range (re_char **p_ptr, re_char *pend, RE_TRANSLATE_TYPE translate, 3402 compile_range (re_char **p_ptr, re_char *pend, RE_TRANSLATE_TYPE translate,
3403 reg_syntax_t syntax, unsigned char *buf_end) 3403 reg_syntax_t syntax, unsigned char *buf_end)
3404 { 3404 {
3405 Emchar this_char; 3405 Ichar this_char;
3406 3406
3407 re_char *p = *p_ptr; 3407 re_char *p = *p_ptr;
3408 int range_start, range_end; 3408 int range_start, range_end;
3409 3409
3410 if (p == pend) 3410 if (p == pend)
3446 static reg_errcode_t 3446 static reg_errcode_t
3447 compile_extended_range (re_char **p_ptr, re_char *pend, 3447 compile_extended_range (re_char **p_ptr, re_char *pend,
3448 RE_TRANSLATE_TYPE translate, 3448 RE_TRANSLATE_TYPE translate,
3449 reg_syntax_t syntax, Lisp_Object rtab) 3449 reg_syntax_t syntax, Lisp_Object rtab)
3450 { 3450 {
3451 Emchar this_char, range_start, range_end; 3451 Ichar this_char, range_start, range_end;
3452 const Intbyte *p; 3452 const Ibyte *p;
3453 3453
3454 if (*p_ptr == pend) 3454 if (*p_ptr == pend)
3455 return REG_ERANGE; 3455 return REG_ERANGE;
3456 3456
3457 p = (const Intbyte *) *p_ptr; 3457 p = (const Ibyte *) *p_ptr;
3458 range_end = charptr_emchar (p); 3458 range_end = itext_ichar (p);
3459 p--; /* back to '-' */ 3459 p--; /* back to '-' */
3460 DEC_CHARPTR (p); /* back to start of range */ 3460 DEC_IBYTEPTR (p); /* back to start of range */
3461 /* We also want to fetch the endpoints without translating them; the 3461 /* We also want to fetch the endpoints without translating them; the
3462 appropriate translation is done in the bit-setting loop below. */ 3462 appropriate translation is done in the bit-setting loop below. */
3463 range_start = charptr_emchar (p); 3463 range_start = itext_ichar (p);
3464 INC_CHARPTR (*p_ptr); 3464 INC_IBYTEPTR (*p_ptr);
3465 3465
3466 /* If the start is after the end, the range is empty. */ 3466 /* If the start is after the end, the range is empty. */
3467 if (range_start > range_end) 3467 if (range_start > range_end)
3468 return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; 3468 return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
3469 3469
3470 /* Can't have ranges spanning different charsets, except maybe for 3470 /* Can't have ranges spanning different charsets, except maybe for
3471 ranges entirely within the first 256 chars. */ 3471 ranges entirely within the first 256 chars. */
3472 3472
3473 if ((range_start >= 0x100 || range_end >= 0x100) 3473 if ((range_start >= 0x100 || range_end >= 0x100)
3474 && emchar_leading_byte (range_start) != 3474 && ichar_leading_byte (range_start) !=
3475 emchar_leading_byte (range_end)) 3475 ichar_leading_byte (range_end))
3476 return REG_ERANGESPAN; 3476 return REG_ERANGESPAN;
3477 3477
3478 /* #### This might be way inefficient if the range encompasses 10,000 3478 /* #### This might be way inefficient if the range encompasses 10,000
3479 chars or something. To be efficient, you'd have to do something like 3479 chars or something. To be efficient, you'd have to do something like
3480 this: 3480 this:
3637 for (i = 0; i < nentries; i++) 3637 for (i = 0; i < nentries; i++)
3638 { 3638 {
3639 EMACS_INT first, last; 3639 EMACS_INT first, last;
3640 Lisp_Object dummy_val; 3640 Lisp_Object dummy_val;
3641 int jj; 3641 int jj;
3642 Intbyte strr[MAX_EMCHAR_LEN]; 3642 Ibyte strr[MAX_ICHAR_LEN];
3643 3643
3644 unified_range_table_get_range (p, i, &first, &last, 3644 unified_range_table_get_range (p, i, &first, &last,
3645 &dummy_val); 3645 &dummy_val);
3646 for (jj = first; jj <= last && jj < 0x80; jj++) 3646 for (jj = first; jj <= last && jj < 0x80; jj++)
3647 fastmap[jj] = 1; 3647 fastmap[jj] = 1;
3648 /* Ranges below 0x100 can span charsets, but there 3648 /* Ranges below 0x100 can span charsets, but there
3649 are only two (Control-1 and Latin-1), and 3649 are only two (Control-1 and Latin-1), and
3650 either first or last has to be in them. */ 3650 either first or last has to be in them. */
3651 set_charptr_emchar (strr, first); 3651 set_itext_ichar (strr, first);
3652 fastmap[*strr] = 1; 3652 fastmap[*strr] = 1;
3653 if (last < 0x100) 3653 if (last < 0x100)
3654 { 3654 {
3655 set_charptr_emchar (strr, last); 3655 set_itext_ichar (strr, last);
3656 fastmap[*strr] = 1; 3656 fastmap[*strr] = 1;
3657 } 3657 }
3658 } 3658 }
3659 } 3659 }
3660 break; 3660 break;
4100 return -1; 4100 return -1;
4101 else 4101 else
4102 { 4102 {
4103 d = ((const unsigned char *) 4103 d = ((const unsigned char *)
4104 (startpos >= size1 ? string2 - size1 : string1) + startpos); 4104 (startpos >= size1 ? string2 - size1 : string1) + startpos);
4105 range = charptr_emchar_len_fmt (d, fmt); 4105 range = itext_ichar_len_fmt (d, fmt);
4106 } 4106 }
4107 } 4107 }
4108 4108
4109 #ifdef emacs 4109 #ifdef emacs
4110 /* In a forward search for something that starts with \=. 4110 /* In a forward search for something that starts with \=.
4179 stop_d = d + range - lim; 4179 stop_d = d + range - lim;
4180 4180
4181 /* We want to find the next location (including the current 4181 /* We want to find the next location (including the current
4182 one) where the previous char is a newline, so back up one 4182 one) where the previous char is a newline, so back up one
4183 and search forward for a newline. */ 4183 and search forward for a newline. */
4184 DEC_CHARPTR_FMT (d, fmt); /* Ok, since startpos != size1. */ 4184 DEC_IBYTEPTR_FMT (d, fmt); /* Ok, since startpos != size1. */
4185 4185
4186 /* Written out as an if-else to avoid testing `translate' 4186 /* Written out as an if-else to avoid testing `translate'
4187 inside the loop. */ 4187 inside the loop. */
4188 if (TRANSLATE_P (translate)) 4188 if (TRANSLATE_P (translate))
4189 while (d < stop_d && 4189 while (d < stop_d &&
4190 RE_TRANSLATE_1 (charptr_emchar_fmt (d, fmt, lispobj)) 4190 RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj))
4191 != '\n') 4191 != '\n')
4192 INC_CHARPTR_FMT (d, fmt); 4192 INC_IBYTEPTR_FMT (d, fmt);
4193 else 4193 else
4194 while (d < stop_d && 4194 while (d < stop_d &&
4195 charptr_emchar_ascii_fmt (d, fmt, lispobj) != '\n') 4195 itext_ichar_ascii_fmt (d, fmt, lispobj) != '\n')
4196 INC_CHARPTR_FMT (d, fmt); 4196 INC_IBYTEPTR_FMT (d, fmt);
4197 4197
4198 /* If we were stopped by a newline, skip forward over it. 4198 /* If we were stopped by a newline, skip forward over it.
4199 Otherwise we will get in an infloop when our start position 4199 Otherwise we will get in an infloop when our start position
4200 was at begline. */ 4200 was at begline. */
4201 if (d < stop_d) 4201 if (d < stop_d)
4202 INC_CHARPTR_FMT (d, fmt); 4202 INC_IBYTEPTR_FMT (d, fmt);
4203 range -= d - orig_d; 4203 range -= d - orig_d;
4204 startpos += d - orig_d; 4204 startpos += d - orig_d;
4205 #if 1 4205 #if 1
4206 assert (!forward_search_p || range >= 0); 4206 assert (!forward_search_p || range >= 0);
4207 #endif 4207 #endif
4208 } 4208 }
4209 else if (range < 0) 4209 else if (range < 0)
4210 { 4210 {
4211 /* We're lazy, like in the fastmap code below */ 4211 /* We're lazy, like in the fastmap code below */
4212 Emchar c; 4212 Ichar c;
4213 4213
4214 d = ((const unsigned char *) 4214 d = ((const unsigned char *)
4215 (startpos >= size1 ? string2 - size1 : string1) + startpos); 4215 (startpos >= size1 ? string2 - size1 : string1) + startpos);
4216 DEC_CHARPTR_FMT (d, fmt); 4216 DEC_IBYTEPTR_FMT (d, fmt);
4217 c = charptr_emchar_fmt (d, fmt, lispobj); 4217 c = itext_ichar_fmt (d, fmt, lispobj);
4218 c = RE_TRANSLATE (c); 4218 c = RE_TRANSLATE (c);
4219 if (c != '\n') 4219 if (c != '\n')
4220 goto advance; 4220 goto advance;
4221 } 4221 }
4222 } 4222 }
4259 { 4259 {
4260 while (range > lim) 4260 while (range > lim)
4261 { 4261 {
4262 re_char *old_d = d; 4262 re_char *old_d = d;
4263 #ifdef MULE 4263 #ifdef MULE
4264 Intbyte tempch[MAX_EMCHAR_LEN]; 4264 Ibyte tempch[MAX_ICHAR_LEN];
4265 Emchar buf_ch = 4265 Ichar buf_ch =
4266 RE_TRANSLATE_1 (charptr_emchar_fmt (d, fmt, lispobj)); 4266 RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj));
4267 set_charptr_emchar (tempch, buf_ch); 4267 set_itext_ichar (tempch, buf_ch);
4268 if (fastmap[*tempch]) 4268 if (fastmap[*tempch])
4269 break; 4269 break;
4270 #else 4270 #else
4271 if (fastmap[(unsigned char) RE_TRANSLATE_1 (*d)]) 4271 if (fastmap[(unsigned char) RE_TRANSLATE_1 (*d)])
4272 break; 4272 break;
4273 #endif /* MULE */ 4273 #endif /* MULE */
4274 INC_CHARPTR_FMT (d, fmt); 4274 INC_IBYTEPTR_FMT (d, fmt);
4275 range -= (d - old_d); 4275 range -= (d - old_d);
4276 #if 1 4276 #if 1
4277 assert (!forward_search_p || range >= 0); 4277 assert (!forward_search_p || range >= 0);
4278 #endif 4278 #endif
4279 } 4279 }
4282 else if (fmt != FORMAT_DEFAULT) 4282 else if (fmt != FORMAT_DEFAULT)
4283 { 4283 {
4284 while (range > lim) 4284 while (range > lim)
4285 { 4285 {
4286 re_char *old_d = d; 4286 re_char *old_d = d;
4287 Intbyte tempch[MAX_EMCHAR_LEN]; 4287 Ibyte tempch[MAX_ICHAR_LEN];
4288 Emchar buf_ch = charptr_emchar_fmt (d, fmt, lispobj); 4288 Ichar buf_ch = itext_ichar_fmt (d, fmt, lispobj);
4289 set_charptr_emchar (tempch, buf_ch); 4289 set_itext_ichar (tempch, buf_ch);
4290 if (fastmap[*tempch]) 4290 if (fastmap[*tempch])
4291 break; 4291 break;
4292 INC_CHARPTR_FMT (d, fmt); 4292 INC_IBYTEPTR_FMT (d, fmt);
4293 range -= (d - old_d); 4293 range -= (d - old_d);
4294 #if 1 4294 #if 1
4295 assert (!forward_search_p || range >= 0); 4295 assert (!forward_search_p || range >= 0);
4296 #endif 4296 #endif
4297 } 4297 }
4300 else 4300 else
4301 { 4301 {
4302 while (range > lim && !fastmap[*d]) 4302 while (range > lim && !fastmap[*d])
4303 { 4303 {
4304 re_char *old_d = d; 4304 re_char *old_d = d;
4305 INC_CHARPTR (d); 4305 INC_IBYTEPTR (d);
4306 range -= (d - old_d); 4306 range -= (d - old_d);
4307 #if 1 4307 #if 1
4308 assert (!forward_search_p || range >= 0); 4308 assert (!forward_search_p || range >= 0);
4309 #endif 4309 #endif
4310 } 4310 }
4319 since backward searches aren't so common. */ 4319 since backward searches aren't so common. */
4320 d = ((const unsigned char *) 4320 d = ((const unsigned char *)
4321 (startpos >= size1 ? string2 - size1 : string1) + startpos); 4321 (startpos >= size1 ? string2 - size1 : string1) + startpos);
4322 #ifdef MULE 4322 #ifdef MULE
4323 { 4323 {
4324 Intbyte tempch[MAX_EMCHAR_LEN]; 4324 Ibyte tempch[MAX_ICHAR_LEN];
4325 Emchar buf_ch = 4325 Ichar buf_ch =
4326 RE_TRANSLATE (charptr_emchar_fmt (d, fmt, lispobj)); 4326 RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj));
4327 set_charptr_emchar (tempch, buf_ch); 4327 set_itext_ichar (tempch, buf_ch);
4328 if (!fastmap[*tempch]) 4328 if (!fastmap[*tempch])
4329 goto advance; 4329 goto advance;
4330 } 4330 }
4331 #else 4331 #else
4332 if (!fastmap[(unsigned char) RE_TRANSLATE (*d)]) 4332 if (!fastmap[(unsigned char) RE_TRANSLATE (*d)])
4365 else if (range > 0) 4365 else if (range > 0)
4366 { 4366 {
4367 Bytecount d_size; 4367 Bytecount d_size;
4368 d = ((const unsigned char *) 4368 d = ((const unsigned char *)
4369 (startpos >= size1 ? string2 - size1 : string1) + startpos); 4369 (startpos >= size1 ? string2 - size1 : string1) + startpos);
4370 d_size = charptr_emchar_len_fmt (d, fmt); 4370 d_size = itext_ichar_len_fmt (d, fmt);
4371 range -= d_size; 4371 range -= d_size;
4372 #if 1 4372 #if 1
4373 assert (!forward_search_p || range >= 0); 4373 assert (!forward_search_p || range >= 0);
4374 #endif 4374 #endif
4375 startpos += d_size; 4375 startpos += d_size;
4379 Bytecount d_size; 4379 Bytecount d_size;
4380 /* Note startpos > size1 not >=. If we are on the 4380 /* Note startpos > size1 not >=. If we are on the
4381 string1/string2 boundary, we want to backup into string1. */ 4381 string1/string2 boundary, we want to backup into string1. */
4382 d = ((const unsigned char *) 4382 d = ((const unsigned char *)
4383 (startpos > size1 ? string2 - size1 : string1) + startpos); 4383 (startpos > size1 ? string2 - size1 : string1) + startpos);
4384 DEC_CHARPTR_FMT (d, fmt); 4384 DEC_IBYTEPTR_FMT (d, fmt);
4385 d_size = charptr_emchar_len_fmt (d, fmt); 4385 d_size = itext_ichar_len_fmt (d, fmt);
4386 range += d_size; 4386 range += d_size;
4387 #if 1 4387 #if 1
4388 assert (!forward_search_p || range >= 0); 4388 assert (!forward_search_p || range >= 0);
4389 #endif 4389 #endif
4390 startpos -= d_size; 4390 startpos -= d_size;
4970 { 4970 {
4971 #ifdef MULE 4971 #ifdef MULE
4972 Bytecount pat_len; 4972 Bytecount pat_len;
4973 4973
4974 REGEX_PREFETCH (); 4974 REGEX_PREFETCH ();
4975 if (RE_TRANSLATE_1 (charptr_emchar_fmt (d, fmt, lispobj)) 4975 if (RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj))
4976 != charptr_emchar (p)) 4976 != itext_ichar (p))
4977 goto fail; 4977 goto fail;
4978 4978
4979 pat_len = charptr_emchar_len (p); 4979 pat_len = itext_ichar_len (p);
4980 p += pat_len; 4980 p += pat_len;
4981 INC_CHARPTR_FMT (d, fmt); 4981 INC_IBYTEPTR_FMT (d, fmt);
4982 4982
4983 mcnt -= pat_len; 4983 mcnt -= pat_len;
4984 #else /* not MULE */ 4984 #else /* not MULE */
4985 REGEX_PREFETCH (); 4985 REGEX_PREFETCH ();
4986 if ((unsigned char) RE_TRANSLATE_1 (*d++) != *p++) 4986 if ((unsigned char) RE_TRANSLATE_1 (*d++) != *p++)
5001 do 5001 do
5002 { 5002 {
5003 Bytecount pat_len; 5003 Bytecount pat_len;
5004 5004
5005 REGEX_PREFETCH (); 5005 REGEX_PREFETCH ();
5006 if (charptr_emchar_fmt (d, fmt, lispobj) != 5006 if (itext_ichar_fmt (d, fmt, lispobj) !=
5007 charptr_emchar (p)) 5007 itext_ichar (p))
5008 goto fail; 5008 goto fail;
5009 5009
5010 pat_len = charptr_emchar_len (p); 5010 pat_len = itext_ichar_len (p);
5011 p += pat_len; 5011 p += pat_len;
5012 INC_CHARPTR_FMT (d, fmt); 5012 INC_IBYTEPTR_FMT (d, fmt);
5013 5013
5014 mcnt -= pat_len; 5014 mcnt -= pat_len;
5015 } 5015 }
5016 while (mcnt > 0); 5016 while (mcnt > 0);
5017 } 5017 }
5036 DEBUG_PRINT1 ("EXECUTING anychar.\n"); 5036 DEBUG_PRINT1 ("EXECUTING anychar.\n");
5037 5037
5038 REGEX_PREFETCH (); 5038 REGEX_PREFETCH ();
5039 5039
5040 if ((!(bufp->syntax & RE_DOT_NEWLINE) && 5040 if ((!(bufp->syntax & RE_DOT_NEWLINE) &&
5041 RE_TRANSLATE (charptr_emchar_fmt (d, fmt, lispobj)) == '\n') 5041 RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)) == '\n')
5042 || (bufp->syntax & RE_DOT_NOT_NULL && 5042 || (bufp->syntax & RE_DOT_NOT_NULL &&
5043 RE_TRANSLATE (charptr_emchar_fmt (d, fmt, lispobj)) == 5043 RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)) ==
5044 '\000')) 5044 '\000'))
5045 goto fail; 5045 goto fail;
5046 5046
5047 SET_REGS_MATCHED (); 5047 SET_REGS_MATCHED ();
5048 DEBUG_PRINT2 (" Matched `%d'.\n", *d); 5048 DEBUG_PRINT2 (" Matched `%d'.\n", *d);
5049 INC_CHARPTR_FMT (d, fmt); /* XEmacs change */ 5049 INC_IBYTEPTR_FMT (d, fmt); /* XEmacs change */
5050 break; 5050 break;
5051 5051
5052 5052
5053 case charset: 5053 case charset:
5054 case charset_not: 5054 case charset_not:
5057 re_bool not_p = (re_opcode_t) *(p - 1) == charset_not; 5057 re_bool not_p = (re_opcode_t) *(p - 1) == charset_not;
5058 5058
5059 DEBUG_PRINT2 ("EXECUTING charset%s.\n", not_p ? "_not" : ""); 5059 DEBUG_PRINT2 ("EXECUTING charset%s.\n", not_p ? "_not" : "");
5060 5060
5061 REGEX_PREFETCH (); 5061 REGEX_PREFETCH ();
5062 c = charptr_emchar_fmt (d, fmt, lispobj); 5062 c = itext_ichar_fmt (d, fmt, lispobj);
5063 c = RE_TRANSLATE (c); /* The character to match. */ 5063 c = RE_TRANSLATE (c); /* The character to match. */
5064 5064
5065 /* Cast to `unsigned int' instead of `unsigned char' in case the 5065 /* Cast to `unsigned int' instead of `unsigned char' in case the
5066 bit list is a full 32 bytes long. */ 5066 bit list is a full 32 bytes long. */
5067 if (c < (unsigned int) (*p * BYTEWIDTH) 5067 if (c < (unsigned int) (*p * BYTEWIDTH)
5071 p += 1 + *p; 5071 p += 1 + *p;
5072 5072
5073 if (!not_p) goto fail; 5073 if (!not_p) goto fail;
5074 5074
5075 SET_REGS_MATCHED (); 5075 SET_REGS_MATCHED ();
5076 INC_CHARPTR_FMT (d, fmt); /* XEmacs change */ 5076 INC_IBYTEPTR_FMT (d, fmt); /* XEmacs change */
5077 break; 5077 break;
5078 } 5078 }
5079 5079
5080 #ifdef MULE 5080 #ifdef MULE
5081 case charset_mule: 5081 case charset_mule:
5082 case charset_mule_not: 5082 case charset_mule_not:
5083 { 5083 {
5084 REGISTER Emchar c; 5084 REGISTER Ichar c;
5085 re_bool not_p = (re_opcode_t) *(p - 1) == charset_mule_not; 5085 re_bool not_p = (re_opcode_t) *(p - 1) == charset_mule_not;
5086 5086
5087 DEBUG_PRINT2 ("EXECUTING charset_mule%s.\n", not_p ? "_not" : ""); 5087 DEBUG_PRINT2 ("EXECUTING charset_mule%s.\n", not_p ? "_not" : "");
5088 5088
5089 REGEX_PREFETCH (); 5089 REGEX_PREFETCH ();
5090 c = charptr_emchar_fmt (d, fmt, lispobj); 5090 c = itext_ichar_fmt (d, fmt, lispobj);
5091 c = RE_TRANSLATE (c); /* The character to match. */ 5091 c = RE_TRANSLATE (c); /* The character to match. */
5092 5092
5093 if (EQ (Qt, unified_range_table_lookup (p, c, Qnil))) 5093 if (EQ (Qt, unified_range_table_lookup (p, c, Qnil)))
5094 not_p = !not_p; 5094 not_p = !not_p;
5095 5095
5096 p += unified_range_table_bytes_used (p); 5096 p += unified_range_table_bytes_used (p);
5097 5097
5098 if (!not_p) goto fail; 5098 if (!not_p) goto fail;
5099 5099
5100 SET_REGS_MATCHED (); 5100 SET_REGS_MATCHED ();
5101 INC_CHARPTR_FMT (d, fmt); 5101 INC_IBYTEPTR_FMT (d, fmt);
5102 break; 5102 break;
5103 } 5103 }
5104 #endif /* MULE */ 5104 #endif /* MULE */
5105 5105
5106 5106
5382 if (!bufp->not_bol) break; 5382 if (!bufp->not_bol) break;
5383 } 5383 }
5384 else 5384 else
5385 { 5385 {
5386 re_char *d2 = d; 5386 re_char *d2 = d;
5387 DEC_CHARPTR (d2); 5387 DEC_IBYTEPTR (d2);
5388 if (charptr_emchar_ascii_fmt (d2, fmt, lispobj) == '\n' && 5388 if (itext_ichar_ascii_fmt (d2, fmt, lispobj) == '\n' &&
5389 bufp->newline_anchor) 5389 bufp->newline_anchor)
5390 break; 5390 break;
5391 } 5391 }
5392 /* In all other cases, we fail. */ 5392 /* In all other cases, we fail. */
5393 goto fail; 5393 goto fail;
5402 if (!bufp->not_eol) break; 5402 if (!bufp->not_eol) break;
5403 } 5403 }
5404 5404
5405 /* We have to ``prefetch'' the next character. */ 5405 /* We have to ``prefetch'' the next character. */
5406 else if ((d == end1 ? 5406 else if ((d == end1 ?
5407 charptr_emchar_ascii_fmt (string2, fmt, lispobj) : 5407 itext_ichar_ascii_fmt (string2, fmt, lispobj) :
5408 charptr_emchar_ascii_fmt (d, fmt, lispobj)) == '\n' 5408 itext_ichar_ascii_fmt (d, fmt, lispobj)) == '\n'
5409 && bufp->newline_anchor) 5409 && bufp->newline_anchor)
5410 { 5410 {
5411 break; 5411 break;
5412 } 5412 }
5413 goto fail; 5413 goto fail;
5791 re_char *d_after = POS_AFTER_GAP_UNSAFE (d); 5791 re_char *d_after = POS_AFTER_GAP_UNSAFE (d);
5792 5792
5793 /* emch1 is the character before d, syn1 is the syntax of 5793 /* emch1 is the character before d, syn1 is the syntax of
5794 emch1, emch2 is the character at d, and syn2 is the 5794 emch1, emch2 is the character at d, and syn2 is the
5795 syntax of emch2. */ 5795 syntax of emch2. */
5796 Emchar emch1, emch2; 5796 Ichar emch1, emch2;
5797 int syn1, syn2; 5797 int syn1, syn2;
5798 #ifdef emacs 5798 #ifdef emacs
5799 Charxpos pos_before; 5799 Charxpos pos_before;
5800 #endif 5800 #endif
5801 5801
5802 DEC_CHARPTR_FMT (d_before, fmt); 5802 DEC_IBYTEPTR_FMT (d_before, fmt);
5803 emch1 = charptr_emchar_fmt (d_before, fmt, lispobj); 5803 emch1 = itext_ichar_fmt (d_before, fmt, lispobj);
5804 emch2 = charptr_emchar_fmt (d_after, fmt, lispobj); 5804 emch2 = itext_ichar_fmt (d_after, fmt, lispobj);
5805 5805
5806 #ifdef emacs 5806 #ifdef emacs
5807 pos_before = 5807 pos_before =
5808 offset_to_charxpos (lispobj, PTR_TO_OFFSET (d)) - 1; 5808 offset_to_charxpos (lispobj, PTR_TO_OFFSET (d)) - 1;
5809 UPDATE_SYNTAX_CACHE (scache, pos_before); 5809 UPDATE_SYNTAX_CACHE (scache, pos_before);
5836 if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1))) 5836 if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
5837 break; 5837 break;
5838 5838
5839 */ 5839 */
5840 re_char *dtmp = POS_AFTER_GAP_UNSAFE (d); 5840 re_char *dtmp = POS_AFTER_GAP_UNSAFE (d);
5841 Emchar emch = charptr_emchar_fmt (dtmp, fmt, lispobj); 5841 Ichar emch = itext_ichar_fmt (dtmp, fmt, lispobj);
5842 #ifdef emacs 5842 #ifdef emacs
5843 Charxpos charpos = offset_to_charxpos (lispobj, PTR_TO_OFFSET (d)); 5843 Charxpos charpos = offset_to_charxpos (lispobj, PTR_TO_OFFSET (d));
5844 UPDATE_SYNTAX_CACHE (scache, charpos); 5844 UPDATE_SYNTAX_CACHE (scache, charpos);
5845 #endif 5845 #endif
5846 if (SYNTAX_FROM_CACHE (scache, emch) != Sword) 5846 if (SYNTAX_FROM_CACHE (scache, emch) != Sword)
5847 goto fail; 5847 goto fail;
5848 if (AT_STRINGS_BEG (d)) 5848 if (AT_STRINGS_BEG (d))
5849 break; 5849 break;
5850 dtmp = POS_BEFORE_GAP_UNSAFE (d); 5850 dtmp = POS_BEFORE_GAP_UNSAFE (d);
5851 DEC_CHARPTR_FMT (dtmp, fmt); 5851 DEC_IBYTEPTR_FMT (dtmp, fmt);
5852 emch = charptr_emchar_fmt (dtmp, fmt, lispobj); 5852 emch = itext_ichar_fmt (dtmp, fmt, lispobj);
5853 #ifdef emacs 5853 #ifdef emacs
5854 UPDATE_SYNTAX_CACHE_BACKWARD (scache, charpos - 1); 5854 UPDATE_SYNTAX_CACHE_BACKWARD (scache, charpos - 1);
5855 #endif 5855 #endif
5856 if (SYNTAX_FROM_CACHE (scache, emch) != Sword) 5856 if (SYNTAX_FROM_CACHE (scache, emch) != Sword)
5857 break; 5857 break;
5870 break; 5870 break;
5871 5871
5872 The or condition is incorrect (reversed). 5872 The or condition is incorrect (reversed).
5873 */ 5873 */
5874 re_char *dtmp; 5874 re_char *dtmp;
5875 Emchar emch; 5875 Ichar emch;
5876 #ifdef emacs 5876 #ifdef emacs
5877 Charxpos charpos = offset_to_charxpos (lispobj, PTR_TO_OFFSET (d)); 5877 Charxpos charpos = offset_to_charxpos (lispobj, PTR_TO_OFFSET (d));
5878 UPDATE_SYNTAX_CACHE (scache, charpos); 5878 UPDATE_SYNTAX_CACHE (scache, charpos);
5879 #endif 5879 #endif
5880 dtmp = POS_BEFORE_GAP_UNSAFE (d); 5880 dtmp = POS_BEFORE_GAP_UNSAFE (d);
5881 DEC_CHARPTR_FMT (dtmp, fmt); 5881 DEC_IBYTEPTR_FMT (dtmp, fmt);
5882 emch = charptr_emchar_fmt (dtmp, fmt, lispobj); 5882 emch = itext_ichar_fmt (dtmp, fmt, lispobj);
5883 if (SYNTAX_FROM_CACHE (scache, emch) != Sword) 5883 if (SYNTAX_FROM_CACHE (scache, emch) != Sword)
5884 goto fail; 5884 goto fail;
5885 if (AT_STRINGS_END (d)) 5885 if (AT_STRINGS_END (d))
5886 break; 5886 break;
5887 dtmp = POS_AFTER_GAP_UNSAFE (d); 5887 dtmp = POS_AFTER_GAP_UNSAFE (d);
5888 emch = charptr_emchar_fmt (dtmp, fmt, lispobj); 5888 emch = itext_ichar_fmt (dtmp, fmt, lispobj);
5889 #ifdef emacs 5889 #ifdef emacs
5890 UPDATE_SYNTAX_CACHE_FORWARD (scache, charpos + 1); 5890 UPDATE_SYNTAX_CACHE_FORWARD (scache, charpos + 1);
5891 #endif 5891 #endif
5892 if (SYNTAX_FROM_CACHE (scache, emch) != Sword) 5892 if (SYNTAX_FROM_CACHE (scache, emch) != Sword)
5893 break; 5893 break;
5930 matchsyntax: 5930 matchsyntax:
5931 should_succeed = 1; 5931 should_succeed = 1;
5932 matchornotsyntax: 5932 matchornotsyntax:
5933 { 5933 {
5934 int matches; 5934 int matches;
5935 Emchar emch; 5935 Ichar emch;
5936 5936
5937 REGEX_PREFETCH (); 5937 REGEX_PREFETCH ();
5938 UPDATE_SYNTAX_CACHE 5938 UPDATE_SYNTAX_CACHE
5939 (scache, offset_to_charxpos (lispobj, PTR_TO_OFFSET (d))); 5939 (scache, offset_to_charxpos (lispobj, PTR_TO_OFFSET (d)));
5940 5940
5941 emch = charptr_emchar_fmt (d, fmt, lispobj); 5941 emch = itext_ichar_fmt (d, fmt, lispobj);
5942 matches = (SYNTAX_FROM_CACHE (scache, emch) == 5942 matches = (SYNTAX_FROM_CACHE (scache, emch) ==
5943 (enum syntaxcode) mcnt); 5943 (enum syntaxcode) mcnt);
5944 INC_CHARPTR_FMT (d, fmt); 5944 INC_IBYTEPTR_FMT (d, fmt);
5945 if (matches != should_succeed) 5945 if (matches != should_succeed)
5946 goto fail; 5946 goto fail;
5947 SET_REGS_MATCHED (); 5947 SET_REGS_MATCHED ();
5948 } 5948 }
5949 break; 5949 break;
5964 /* 97/2/17 jhod Mule category code patch */ 5964 /* 97/2/17 jhod Mule category code patch */
5965 case categoryspec: 5965 case categoryspec:
5966 should_succeed = 1; 5966 should_succeed = 1;
5967 matchornotcategory: 5967 matchornotcategory:
5968 { 5968 {
5969 Emchar emch; 5969 Ichar emch;
5970 5970
5971 mcnt = *p++; 5971 mcnt = *p++;
5972 REGEX_PREFETCH (); 5972 REGEX_PREFETCH ();
5973 emch = charptr_emchar_fmt (d, fmt, lispobj); 5973 emch = itext_ichar_fmt (d, fmt, lispobj);
5974 INC_CHARPTR_FMT (d, fmt); 5974 INC_IBYTEPTR_FMT (d, fmt);
5975 if (check_category_char (emch, BUFFER_CATEGORY_TABLE (lispbuf), 5975 if (check_category_char (emch, BUFFER_CATEGORY_TABLE (lispbuf),
5976 mcnt, should_succeed)) 5976 mcnt, should_succeed))
5977 goto fail; 5977 goto fail;
5978 SET_REGS_MATCHED (); 5978 SET_REGS_MATCHED ();
5979 } 5979 }
6324 re_char *p1_end = s1 + len; 6324 re_char *p1_end = s1 + len;
6325 re_char *p2_end = s2 + len; 6325 re_char *p2_end = s2 + len;
6326 6326
6327 while (p1 != p1_end && p2 != p2_end) 6327 while (p1 != p1_end && p2 != p2_end)
6328 { 6328 {
6329 Emchar p1_ch, p2_ch; 6329 Ichar p1_ch, p2_ch;
6330 6330
6331 p1_ch = charptr_emchar_fmt (p1, fmt, lispobj); 6331 p1_ch = itext_ichar_fmt (p1, fmt, lispobj);
6332 p2_ch = charptr_emchar_fmt (p2, fmt, lispobj); 6332 p2_ch = itext_ichar_fmt (p2, fmt, lispobj);
6333 6333
6334 if (RE_TRANSLATE_1 (p1_ch) 6334 if (RE_TRANSLATE_1 (p1_ch)
6335 != RE_TRANSLATE_1 (p2_ch)) 6335 != RE_TRANSLATE_1 (p2_ch))
6336 return 1; 6336 return 1;
6337 INC_CHARPTR_FMT (p1, fmt); 6337 INC_IBYTEPTR_FMT (p1, fmt);
6338 INC_CHARPTR_FMT (p2, fmt); 6338 INC_IBYTEPTR_FMT (p2, fmt);
6339 } 6339 }
6340 #else /* not MULE */ 6340 #else /* not MULE */
6341 while (len) 6341 while (len)
6342 { 6342 {
6343 if (RE_TRANSLATE_1 (*p1++) != RE_TRANSLATE_1 (*p2++)) return 1; 6343 if (RE_TRANSLATE_1 (*p1++) != RE_TRANSLATE_1 (*p2++)) return 1;