Mercurial > hg > xemacs-beta
comparison src/regex.c @ 867:804517e16990
[xemacs-hg @ 2002-06-05 09:54:39 by ben]
Textual renaming: text/char names
abbrev.c, alloc.c, buffer.c, buffer.h, bytecode.c, callint.c, casefiddle.c, casetab.c, charset.h, chartab.c, chartab.h, cmds.c, console-gtk.h, console-msw.c, console-msw.h, console-stream.c, console-tty.c, console-x.c, console-x.h, console.h, data.c, device-msw.c, device-x.c, dialog-msw.c, dired-msw.c, dired.c, doc.c, doprnt.c, editfns.c, eldap.c, emodules.c, eval.c, event-Xt.c, event-gtk.c, event-msw.c, event-stream.c, event-unixoid.c, events.c, events.h, file-coding.c, file-coding.h, fileio.c, filelock.c, fns.c, font-lock.c, frame-gtk.c, frame-msw.c, frame-x.c, frame.c, glyphs-eimage.c, glyphs-msw.c, glyphs-x.c, glyphs.c, glyphs.h, gpmevent.c, gui-x.c, gui-x.h, gui.c, gui.h, hpplay.c, indent.c, insdel.c, insdel.h, intl-win32.c, keymap.c, line-number.c, line-number.h, lisp-disunion.h, lisp-union.h, lisp.h, lread.c, lrecord.h, lstream.c, lstream.h, md5.c, menubar-msw.c, menubar-x.c, menubar.c, minibuf.c, mule-ccl.c, mule-charset.c, mule-coding.c, mule-wnnfns.c, ndir.h, nt.c, objects-gtk.c, objects-gtk.h, objects-msw.c, objects-tty.c, objects-x.c, objects.c, objects.h, postgresql.c, print.c, process-nt.c, process-unix.c, process.c, procimpl.h, realpath.c, redisplay-gtk.c, redisplay-msw.c, redisplay-output.c, redisplay-tty.c, redisplay-x.c, redisplay.c, redisplay.h, regex.c, search.c, select-common.h, select-gtk.c, select-x.c, sound.h, symbols.c, syntax.c, syntax.h, sysdep.c, sysdep.h, sysdir.h, sysfile.h, sysproc.h, syspwd.h, systime.h, syswindows.h, termcap.c, tests.c, text.c, text.h, toolbar-common.c, tooltalk.c, ui-gtk.c, unexnt.c, unicode.c, win32.c: Text/char naming rationalization.
[a] distinguish between "charptr" when it refers to operations on
the pointer itself and when it refers to operations on text; and
[b] use consistent naming for everything referring to internal
format, i.e.
Itext == text in internal format
Ibyte == a byte in such text
Ichar == a char as represented in internal character format
thus e.g.
set_charptr_emchar -> set_itext_ichar
The pre and post tags on either side of this change are:
pre-internal-format-textual-renaming
post-internal-format-textual-renaming
See the Internals Manual for details of exactly how this was done,
how to handle the change in your workspace, etc.
author | ben |
---|---|
date | Wed, 05 Jun 2002 09:58:45 +0000 |
parents | 6728e641994e |
children | ccaf90c5a53a |
comparison
equal
deleted
inserted
replaced
866:613552a02607 | 867:804517e16990 |
---|---|
99 { | 99 { |
100 } | 100 } |
101 | 101 |
102 #endif /* MULE */ | 102 #endif /* MULE */ |
103 | 103 |
104 #define RE_TRANSLATE_1(ch) TRT_TABLE_OF (translate, (Emchar) ch) | 104 #define RE_TRANSLATE_1(ch) TRT_TABLE_OF (translate, (Ichar) ch) |
105 #define TRANSLATE_P(tr) (!NILP (tr)) | 105 #define TRANSLATE_P(tr) (!NILP (tr)) |
106 | 106 |
107 /* Converts the pointer to the char to BEG-based offset from the start. */ | 107 /* Converts the pointer to the char to BEG-based offset from the start. */ |
108 #define PTR_TO_OFFSET(d) (MATCHING_IN_FIRST_STRING \ | 108 #define PTR_TO_OFFSET(d) (MATCHING_IN_FIRST_STRING \ |
109 ? (d) - string1 : (d) - (string2 - size1)) | 109 ? (d) - string1 : (d) - (string2 - size1)) |
140 #endif | 140 #endif |
141 #endif | 141 #endif |
142 | 142 |
143 #include <stdlib.h> | 143 #include <stdlib.h> |
144 | 144 |
145 #define charptr_emchar(str) ((Emchar) (str)[0]) | 145 #define itext_ichar(str) ((Ichar) (str)[0]) |
146 #define charptr_emchar_fmt(str, fmt, object) ((Emchar) (str)[0]) | 146 #define itext_ichar_fmt(str, fmt, object) ((Ichar) (str)[0]) |
147 #define charptr_emchar_ascii_fmt(str, fmt, object) ((Emchar) (str)[0]) | 147 #define itext_ichar_ascii_fmt(str, fmt, object) ((Ichar) (str)[0]) |
148 | 148 |
149 #if (LONGBITS > INTBITS) | 149 #if (LONGBITS > INTBITS) |
150 # define EMACS_INT long | 150 # define EMACS_INT long |
151 #else | 151 #else |
152 # define EMACS_INT int | 152 # define EMACS_INT int |
153 #endif | 153 #endif |
154 | 154 |
155 typedef int Emchar; | 155 typedef int Ichar; |
156 | 156 |
157 #define INC_CHARPTR(p) ((p)++) | 157 #define INC_IBYTEPTR(p) ((p)++) |
158 #define INC_CHARPTR_FMT(p, fmt) ((p)++) | 158 #define INC_IBYTEPTR_FMT(p, fmt) ((p)++) |
159 #define DEC_CHARPTR(p) ((p)--) | 159 #define DEC_IBYTEPTR(p) ((p)--) |
160 #define DEC_CHARPTR_FMT(p, fmt) ((p)--) | 160 #define DEC_IBYTEPTR_FMT(p, fmt) ((p)--) |
161 #define charptr_emchar_len(ptr) 1 | 161 #define itext_ichar_len(ptr) 1 |
162 #define charptr_emchar_len_fmt(ptr, fmt) 1 | 162 #define itext_ichar_len_fmt(ptr, fmt) 1 |
163 | 163 |
164 #include <string.h> | 164 #include <string.h> |
165 | 165 |
166 /* Define the syntax stuff for \<, \>, etc. */ | 166 /* Define the syntax stuff for \<, \>, etc. */ |
167 | 167 |
1593 /* Fetch the next character in the uncompiled pattern, with no | 1593 /* Fetch the next character in the uncompiled pattern, with no |
1594 translation. */ | 1594 translation. */ |
1595 #define PATFETCH_RAW(c) \ | 1595 #define PATFETCH_RAW(c) \ |
1596 do {if (p == pend) return REG_EEND; \ | 1596 do {if (p == pend) return REG_EEND; \ |
1597 assert (p < pend); \ | 1597 assert (p < pend); \ |
1598 c = charptr_emchar (p); \ | 1598 c = itext_ichar (p); \ |
1599 INC_CHARPTR (p); \ | 1599 INC_IBYTEPTR (p); \ |
1600 } while (0) | 1600 } while (0) |
1601 | 1601 |
1602 /* Go backwards one character in the pattern. */ | 1602 /* Go backwards one character in the pattern. */ |
1603 #define PATUNFETCH DEC_CHARPTR (p) | 1603 #define PATUNFETCH DEC_IBYTEPTR (p) |
1604 | 1604 |
1605 /* If `translate' is non-null, return translate[D], else just D. We | 1605 /* If `translate' is non-null, return translate[D], else just D. We |
1606 cast the subscript to translate because some data is declared as | 1606 cast the subscript to translate because some data is declared as |
1607 `char *', to avoid warnings when a string constant is passed. But | 1607 `char *', to avoid warnings when a string constant is passed. But |
1608 when we use a character as a subscript we must make it unsigned. */ | 1608 when we use a character as a subscript we must make it unsigned. */ |
3183 BUF_PUSH (c); | 3183 BUF_PUSH (c); |
3184 (*pending_exact)++; | 3184 (*pending_exact)++; |
3185 #else | 3185 #else |
3186 { | 3186 { |
3187 Bytecount bt_count; | 3187 Bytecount bt_count; |
3188 Intbyte tmp_buf[MAX_EMCHAR_LEN]; | 3188 Ibyte tmp_buf[MAX_ICHAR_LEN]; |
3189 int i; | 3189 int i; |
3190 | 3190 |
3191 bt_count = set_charptr_emchar (tmp_buf, c); | 3191 bt_count = set_itext_ichar (tmp_buf, c); |
3192 | 3192 |
3193 for (i = 0; i < bt_count; i++) | 3193 for (i = 0; i < bt_count; i++) |
3194 { | 3194 { |
3195 BUF_PUSH (tmp_buf[i]); | 3195 BUF_PUSH (tmp_buf[i]); |
3196 (*pending_exact)++; | 3196 (*pending_exact)++; |
3400 | 3400 |
3401 static reg_errcode_t | 3401 static reg_errcode_t |
3402 compile_range (re_char **p_ptr, re_char *pend, RE_TRANSLATE_TYPE translate, | 3402 compile_range (re_char **p_ptr, re_char *pend, RE_TRANSLATE_TYPE translate, |
3403 reg_syntax_t syntax, unsigned char *buf_end) | 3403 reg_syntax_t syntax, unsigned char *buf_end) |
3404 { | 3404 { |
3405 Emchar this_char; | 3405 Ichar this_char; |
3406 | 3406 |
3407 re_char *p = *p_ptr; | 3407 re_char *p = *p_ptr; |
3408 int range_start, range_end; | 3408 int range_start, range_end; |
3409 | 3409 |
3410 if (p == pend) | 3410 if (p == pend) |
3446 static reg_errcode_t | 3446 static reg_errcode_t |
3447 compile_extended_range (re_char **p_ptr, re_char *pend, | 3447 compile_extended_range (re_char **p_ptr, re_char *pend, |
3448 RE_TRANSLATE_TYPE translate, | 3448 RE_TRANSLATE_TYPE translate, |
3449 reg_syntax_t syntax, Lisp_Object rtab) | 3449 reg_syntax_t syntax, Lisp_Object rtab) |
3450 { | 3450 { |
3451 Emchar this_char, range_start, range_end; | 3451 Ichar this_char, range_start, range_end; |
3452 const Intbyte *p; | 3452 const Ibyte *p; |
3453 | 3453 |
3454 if (*p_ptr == pend) | 3454 if (*p_ptr == pend) |
3455 return REG_ERANGE; | 3455 return REG_ERANGE; |
3456 | 3456 |
3457 p = (const Intbyte *) *p_ptr; | 3457 p = (const Ibyte *) *p_ptr; |
3458 range_end = charptr_emchar (p); | 3458 range_end = itext_ichar (p); |
3459 p--; /* back to '-' */ | 3459 p--; /* back to '-' */ |
3460 DEC_CHARPTR (p); /* back to start of range */ | 3460 DEC_IBYTEPTR (p); /* back to start of range */ |
3461 /* We also want to fetch the endpoints without translating them; the | 3461 /* We also want to fetch the endpoints without translating them; the |
3462 appropriate translation is done in the bit-setting loop below. */ | 3462 appropriate translation is done in the bit-setting loop below. */ |
3463 range_start = charptr_emchar (p); | 3463 range_start = itext_ichar (p); |
3464 INC_CHARPTR (*p_ptr); | 3464 INC_IBYTEPTR (*p_ptr); |
3465 | 3465 |
3466 /* If the start is after the end, the range is empty. */ | 3466 /* If the start is after the end, the range is empty. */ |
3467 if (range_start > range_end) | 3467 if (range_start > range_end) |
3468 return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; | 3468 return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; |
3469 | 3469 |
3470 /* Can't have ranges spanning different charsets, except maybe for | 3470 /* Can't have ranges spanning different charsets, except maybe for |
3471 ranges entirely within the first 256 chars. */ | 3471 ranges entirely within the first 256 chars. */ |
3472 | 3472 |
3473 if ((range_start >= 0x100 || range_end >= 0x100) | 3473 if ((range_start >= 0x100 || range_end >= 0x100) |
3474 && emchar_leading_byte (range_start) != | 3474 && ichar_leading_byte (range_start) != |
3475 emchar_leading_byte (range_end)) | 3475 ichar_leading_byte (range_end)) |
3476 return REG_ERANGESPAN; | 3476 return REG_ERANGESPAN; |
3477 | 3477 |
3478 /* #### This might be way inefficient if the range encompasses 10,000 | 3478 /* #### This might be way inefficient if the range encompasses 10,000 |
3479 chars or something. To be efficient, you'd have to do something like | 3479 chars or something. To be efficient, you'd have to do something like |
3480 this: | 3480 this: |
3637 for (i = 0; i < nentries; i++) | 3637 for (i = 0; i < nentries; i++) |
3638 { | 3638 { |
3639 EMACS_INT first, last; | 3639 EMACS_INT first, last; |
3640 Lisp_Object dummy_val; | 3640 Lisp_Object dummy_val; |
3641 int jj; | 3641 int jj; |
3642 Intbyte strr[MAX_EMCHAR_LEN]; | 3642 Ibyte strr[MAX_ICHAR_LEN]; |
3643 | 3643 |
3644 unified_range_table_get_range (p, i, &first, &last, | 3644 unified_range_table_get_range (p, i, &first, &last, |
3645 &dummy_val); | 3645 &dummy_val); |
3646 for (jj = first; jj <= last && jj < 0x80; jj++) | 3646 for (jj = first; jj <= last && jj < 0x80; jj++) |
3647 fastmap[jj] = 1; | 3647 fastmap[jj] = 1; |
3648 /* Ranges below 0x100 can span charsets, but there | 3648 /* Ranges below 0x100 can span charsets, but there |
3649 are only two (Control-1 and Latin-1), and | 3649 are only two (Control-1 and Latin-1), and |
3650 either first or last has to be in them. */ | 3650 either first or last has to be in them. */ |
3651 set_charptr_emchar (strr, first); | 3651 set_itext_ichar (strr, first); |
3652 fastmap[*strr] = 1; | 3652 fastmap[*strr] = 1; |
3653 if (last < 0x100) | 3653 if (last < 0x100) |
3654 { | 3654 { |
3655 set_charptr_emchar (strr, last); | 3655 set_itext_ichar (strr, last); |
3656 fastmap[*strr] = 1; | 3656 fastmap[*strr] = 1; |
3657 } | 3657 } |
3658 } | 3658 } |
3659 } | 3659 } |
3660 break; | 3660 break; |
4100 return -1; | 4100 return -1; |
4101 else | 4101 else |
4102 { | 4102 { |
4103 d = ((const unsigned char *) | 4103 d = ((const unsigned char *) |
4104 (startpos >= size1 ? string2 - size1 : string1) + startpos); | 4104 (startpos >= size1 ? string2 - size1 : string1) + startpos); |
4105 range = charptr_emchar_len_fmt (d, fmt); | 4105 range = itext_ichar_len_fmt (d, fmt); |
4106 } | 4106 } |
4107 } | 4107 } |
4108 | 4108 |
4109 #ifdef emacs | 4109 #ifdef emacs |
4110 /* In a forward search for something that starts with \=. | 4110 /* In a forward search for something that starts with \=. |
4179 stop_d = d + range - lim; | 4179 stop_d = d + range - lim; |
4180 | 4180 |
4181 /* We want to find the next location (including the current | 4181 /* We want to find the next location (including the current |
4182 one) where the previous char is a newline, so back up one | 4182 one) where the previous char is a newline, so back up one |
4183 and search forward for a newline. */ | 4183 and search forward for a newline. */ |
4184 DEC_CHARPTR_FMT (d, fmt); /* Ok, since startpos != size1. */ | 4184 DEC_IBYTEPTR_FMT (d, fmt); /* Ok, since startpos != size1. */ |
4185 | 4185 |
4186 /* Written out as an if-else to avoid testing `translate' | 4186 /* Written out as an if-else to avoid testing `translate' |
4187 inside the loop. */ | 4187 inside the loop. */ |
4188 if (TRANSLATE_P (translate)) | 4188 if (TRANSLATE_P (translate)) |
4189 while (d < stop_d && | 4189 while (d < stop_d && |
4190 RE_TRANSLATE_1 (charptr_emchar_fmt (d, fmt, lispobj)) | 4190 RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)) |
4191 != '\n') | 4191 != '\n') |
4192 INC_CHARPTR_FMT (d, fmt); | 4192 INC_IBYTEPTR_FMT (d, fmt); |
4193 else | 4193 else |
4194 while (d < stop_d && | 4194 while (d < stop_d && |
4195 charptr_emchar_ascii_fmt (d, fmt, lispobj) != '\n') | 4195 itext_ichar_ascii_fmt (d, fmt, lispobj) != '\n') |
4196 INC_CHARPTR_FMT (d, fmt); | 4196 INC_IBYTEPTR_FMT (d, fmt); |
4197 | 4197 |
4198 /* If we were stopped by a newline, skip forward over it. | 4198 /* If we were stopped by a newline, skip forward over it. |
4199 Otherwise we will get in an infloop when our start position | 4199 Otherwise we will get in an infloop when our start position |
4200 was at begline. */ | 4200 was at begline. */ |
4201 if (d < stop_d) | 4201 if (d < stop_d) |
4202 INC_CHARPTR_FMT (d, fmt); | 4202 INC_IBYTEPTR_FMT (d, fmt); |
4203 range -= d - orig_d; | 4203 range -= d - orig_d; |
4204 startpos += d - orig_d; | 4204 startpos += d - orig_d; |
4205 #if 1 | 4205 #if 1 |
4206 assert (!forward_search_p || range >= 0); | 4206 assert (!forward_search_p || range >= 0); |
4207 #endif | 4207 #endif |
4208 } | 4208 } |
4209 else if (range < 0) | 4209 else if (range < 0) |
4210 { | 4210 { |
4211 /* We're lazy, like in the fastmap code below */ | 4211 /* We're lazy, like in the fastmap code below */ |
4212 Emchar c; | 4212 Ichar c; |
4213 | 4213 |
4214 d = ((const unsigned char *) | 4214 d = ((const unsigned char *) |
4215 (startpos >= size1 ? string2 - size1 : string1) + startpos); | 4215 (startpos >= size1 ? string2 - size1 : string1) + startpos); |
4216 DEC_CHARPTR_FMT (d, fmt); | 4216 DEC_IBYTEPTR_FMT (d, fmt); |
4217 c = charptr_emchar_fmt (d, fmt, lispobj); | 4217 c = itext_ichar_fmt (d, fmt, lispobj); |
4218 c = RE_TRANSLATE (c); | 4218 c = RE_TRANSLATE (c); |
4219 if (c != '\n') | 4219 if (c != '\n') |
4220 goto advance; | 4220 goto advance; |
4221 } | 4221 } |
4222 } | 4222 } |
4259 { | 4259 { |
4260 while (range > lim) | 4260 while (range > lim) |
4261 { | 4261 { |
4262 re_char *old_d = d; | 4262 re_char *old_d = d; |
4263 #ifdef MULE | 4263 #ifdef MULE |
4264 Intbyte tempch[MAX_EMCHAR_LEN]; | 4264 Ibyte tempch[MAX_ICHAR_LEN]; |
4265 Emchar buf_ch = | 4265 Ichar buf_ch = |
4266 RE_TRANSLATE_1 (charptr_emchar_fmt (d, fmt, lispobj)); | 4266 RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)); |
4267 set_charptr_emchar (tempch, buf_ch); | 4267 set_itext_ichar (tempch, buf_ch); |
4268 if (fastmap[*tempch]) | 4268 if (fastmap[*tempch]) |
4269 break; | 4269 break; |
4270 #else | 4270 #else |
4271 if (fastmap[(unsigned char) RE_TRANSLATE_1 (*d)]) | 4271 if (fastmap[(unsigned char) RE_TRANSLATE_1 (*d)]) |
4272 break; | 4272 break; |
4273 #endif /* MULE */ | 4273 #endif /* MULE */ |
4274 INC_CHARPTR_FMT (d, fmt); | 4274 INC_IBYTEPTR_FMT (d, fmt); |
4275 range -= (d - old_d); | 4275 range -= (d - old_d); |
4276 #if 1 | 4276 #if 1 |
4277 assert (!forward_search_p || range >= 0); | 4277 assert (!forward_search_p || range >= 0); |
4278 #endif | 4278 #endif |
4279 } | 4279 } |
4282 else if (fmt != FORMAT_DEFAULT) | 4282 else if (fmt != FORMAT_DEFAULT) |
4283 { | 4283 { |
4284 while (range > lim) | 4284 while (range > lim) |
4285 { | 4285 { |
4286 re_char *old_d = d; | 4286 re_char *old_d = d; |
4287 Intbyte tempch[MAX_EMCHAR_LEN]; | 4287 Ibyte tempch[MAX_ICHAR_LEN]; |
4288 Emchar buf_ch = charptr_emchar_fmt (d, fmt, lispobj); | 4288 Ichar buf_ch = itext_ichar_fmt (d, fmt, lispobj); |
4289 set_charptr_emchar (tempch, buf_ch); | 4289 set_itext_ichar (tempch, buf_ch); |
4290 if (fastmap[*tempch]) | 4290 if (fastmap[*tempch]) |
4291 break; | 4291 break; |
4292 INC_CHARPTR_FMT (d, fmt); | 4292 INC_IBYTEPTR_FMT (d, fmt); |
4293 range -= (d - old_d); | 4293 range -= (d - old_d); |
4294 #if 1 | 4294 #if 1 |
4295 assert (!forward_search_p || range >= 0); | 4295 assert (!forward_search_p || range >= 0); |
4296 #endif | 4296 #endif |
4297 } | 4297 } |
4300 else | 4300 else |
4301 { | 4301 { |
4302 while (range > lim && !fastmap[*d]) | 4302 while (range > lim && !fastmap[*d]) |
4303 { | 4303 { |
4304 re_char *old_d = d; | 4304 re_char *old_d = d; |
4305 INC_CHARPTR (d); | 4305 INC_IBYTEPTR (d); |
4306 range -= (d - old_d); | 4306 range -= (d - old_d); |
4307 #if 1 | 4307 #if 1 |
4308 assert (!forward_search_p || range >= 0); | 4308 assert (!forward_search_p || range >= 0); |
4309 #endif | 4309 #endif |
4310 } | 4310 } |
4319 since backward searches aren't so common. */ | 4319 since backward searches aren't so common. */ |
4320 d = ((const unsigned char *) | 4320 d = ((const unsigned char *) |
4321 (startpos >= size1 ? string2 - size1 : string1) + startpos); | 4321 (startpos >= size1 ? string2 - size1 : string1) + startpos); |
4322 #ifdef MULE | 4322 #ifdef MULE |
4323 { | 4323 { |
4324 Intbyte tempch[MAX_EMCHAR_LEN]; | 4324 Ibyte tempch[MAX_ICHAR_LEN]; |
4325 Emchar buf_ch = | 4325 Ichar buf_ch = |
4326 RE_TRANSLATE (charptr_emchar_fmt (d, fmt, lispobj)); | 4326 RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)); |
4327 set_charptr_emchar (tempch, buf_ch); | 4327 set_itext_ichar (tempch, buf_ch); |
4328 if (!fastmap[*tempch]) | 4328 if (!fastmap[*tempch]) |
4329 goto advance; | 4329 goto advance; |
4330 } | 4330 } |
4331 #else | 4331 #else |
4332 if (!fastmap[(unsigned char) RE_TRANSLATE (*d)]) | 4332 if (!fastmap[(unsigned char) RE_TRANSLATE (*d)]) |
4365 else if (range > 0) | 4365 else if (range > 0) |
4366 { | 4366 { |
4367 Bytecount d_size; | 4367 Bytecount d_size; |
4368 d = ((const unsigned char *) | 4368 d = ((const unsigned char *) |
4369 (startpos >= size1 ? string2 - size1 : string1) + startpos); | 4369 (startpos >= size1 ? string2 - size1 : string1) + startpos); |
4370 d_size = charptr_emchar_len_fmt (d, fmt); | 4370 d_size = itext_ichar_len_fmt (d, fmt); |
4371 range -= d_size; | 4371 range -= d_size; |
4372 #if 1 | 4372 #if 1 |
4373 assert (!forward_search_p || range >= 0); | 4373 assert (!forward_search_p || range >= 0); |
4374 #endif | 4374 #endif |
4375 startpos += d_size; | 4375 startpos += d_size; |
4379 Bytecount d_size; | 4379 Bytecount d_size; |
4380 /* Note startpos > size1 not >=. If we are on the | 4380 /* Note startpos > size1 not >=. If we are on the |
4381 string1/string2 boundary, we want to backup into string1. */ | 4381 string1/string2 boundary, we want to backup into string1. */ |
4382 d = ((const unsigned char *) | 4382 d = ((const unsigned char *) |
4383 (startpos > size1 ? string2 - size1 : string1) + startpos); | 4383 (startpos > size1 ? string2 - size1 : string1) + startpos); |
4384 DEC_CHARPTR_FMT (d, fmt); | 4384 DEC_IBYTEPTR_FMT (d, fmt); |
4385 d_size = charptr_emchar_len_fmt (d, fmt); | 4385 d_size = itext_ichar_len_fmt (d, fmt); |
4386 range += d_size; | 4386 range += d_size; |
4387 #if 1 | 4387 #if 1 |
4388 assert (!forward_search_p || range >= 0); | 4388 assert (!forward_search_p || range >= 0); |
4389 #endif | 4389 #endif |
4390 startpos -= d_size; | 4390 startpos -= d_size; |
4970 { | 4970 { |
4971 #ifdef MULE | 4971 #ifdef MULE |
4972 Bytecount pat_len; | 4972 Bytecount pat_len; |
4973 | 4973 |
4974 REGEX_PREFETCH (); | 4974 REGEX_PREFETCH (); |
4975 if (RE_TRANSLATE_1 (charptr_emchar_fmt (d, fmt, lispobj)) | 4975 if (RE_TRANSLATE_1 (itext_ichar_fmt (d, fmt, lispobj)) |
4976 != charptr_emchar (p)) | 4976 != itext_ichar (p)) |
4977 goto fail; | 4977 goto fail; |
4978 | 4978 |
4979 pat_len = charptr_emchar_len (p); | 4979 pat_len = itext_ichar_len (p); |
4980 p += pat_len; | 4980 p += pat_len; |
4981 INC_CHARPTR_FMT (d, fmt); | 4981 INC_IBYTEPTR_FMT (d, fmt); |
4982 | 4982 |
4983 mcnt -= pat_len; | 4983 mcnt -= pat_len; |
4984 #else /* not MULE */ | 4984 #else /* not MULE */ |
4985 REGEX_PREFETCH (); | 4985 REGEX_PREFETCH (); |
4986 if ((unsigned char) RE_TRANSLATE_1 (*d++) != *p++) | 4986 if ((unsigned char) RE_TRANSLATE_1 (*d++) != *p++) |
5001 do | 5001 do |
5002 { | 5002 { |
5003 Bytecount pat_len; | 5003 Bytecount pat_len; |
5004 | 5004 |
5005 REGEX_PREFETCH (); | 5005 REGEX_PREFETCH (); |
5006 if (charptr_emchar_fmt (d, fmt, lispobj) != | 5006 if (itext_ichar_fmt (d, fmt, lispobj) != |
5007 charptr_emchar (p)) | 5007 itext_ichar (p)) |
5008 goto fail; | 5008 goto fail; |
5009 | 5009 |
5010 pat_len = charptr_emchar_len (p); | 5010 pat_len = itext_ichar_len (p); |
5011 p += pat_len; | 5011 p += pat_len; |
5012 INC_CHARPTR_FMT (d, fmt); | 5012 INC_IBYTEPTR_FMT (d, fmt); |
5013 | 5013 |
5014 mcnt -= pat_len; | 5014 mcnt -= pat_len; |
5015 } | 5015 } |
5016 while (mcnt > 0); | 5016 while (mcnt > 0); |
5017 } | 5017 } |
5036 DEBUG_PRINT1 ("EXECUTING anychar.\n"); | 5036 DEBUG_PRINT1 ("EXECUTING anychar.\n"); |
5037 | 5037 |
5038 REGEX_PREFETCH (); | 5038 REGEX_PREFETCH (); |
5039 | 5039 |
5040 if ((!(bufp->syntax & RE_DOT_NEWLINE) && | 5040 if ((!(bufp->syntax & RE_DOT_NEWLINE) && |
5041 RE_TRANSLATE (charptr_emchar_fmt (d, fmt, lispobj)) == '\n') | 5041 RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)) == '\n') |
5042 || (bufp->syntax & RE_DOT_NOT_NULL && | 5042 || (bufp->syntax & RE_DOT_NOT_NULL && |
5043 RE_TRANSLATE (charptr_emchar_fmt (d, fmt, lispobj)) == | 5043 RE_TRANSLATE (itext_ichar_fmt (d, fmt, lispobj)) == |
5044 '\000')) | 5044 '\000')) |
5045 goto fail; | 5045 goto fail; |
5046 | 5046 |
5047 SET_REGS_MATCHED (); | 5047 SET_REGS_MATCHED (); |
5048 DEBUG_PRINT2 (" Matched `%d'.\n", *d); | 5048 DEBUG_PRINT2 (" Matched `%d'.\n", *d); |
5049 INC_CHARPTR_FMT (d, fmt); /* XEmacs change */ | 5049 INC_IBYTEPTR_FMT (d, fmt); /* XEmacs change */ |
5050 break; | 5050 break; |
5051 | 5051 |
5052 | 5052 |
5053 case charset: | 5053 case charset: |
5054 case charset_not: | 5054 case charset_not: |
5057 re_bool not_p = (re_opcode_t) *(p - 1) == charset_not; | 5057 re_bool not_p = (re_opcode_t) *(p - 1) == charset_not; |
5058 | 5058 |
5059 DEBUG_PRINT2 ("EXECUTING charset%s.\n", not_p ? "_not" : ""); | 5059 DEBUG_PRINT2 ("EXECUTING charset%s.\n", not_p ? "_not" : ""); |
5060 | 5060 |
5061 REGEX_PREFETCH (); | 5061 REGEX_PREFETCH (); |
5062 c = charptr_emchar_fmt (d, fmt, lispobj); | 5062 c = itext_ichar_fmt (d, fmt, lispobj); |
5063 c = RE_TRANSLATE (c); /* The character to match. */ | 5063 c = RE_TRANSLATE (c); /* The character to match. */ |
5064 | 5064 |
5065 /* Cast to `unsigned int' instead of `unsigned char' in case the | 5065 /* Cast to `unsigned int' instead of `unsigned char' in case the |
5066 bit list is a full 32 bytes long. */ | 5066 bit list is a full 32 bytes long. */ |
5067 if (c < (unsigned int) (*p * BYTEWIDTH) | 5067 if (c < (unsigned int) (*p * BYTEWIDTH) |
5071 p += 1 + *p; | 5071 p += 1 + *p; |
5072 | 5072 |
5073 if (!not_p) goto fail; | 5073 if (!not_p) goto fail; |
5074 | 5074 |
5075 SET_REGS_MATCHED (); | 5075 SET_REGS_MATCHED (); |
5076 INC_CHARPTR_FMT (d, fmt); /* XEmacs change */ | 5076 INC_IBYTEPTR_FMT (d, fmt); /* XEmacs change */ |
5077 break; | 5077 break; |
5078 } | 5078 } |
5079 | 5079 |
5080 #ifdef MULE | 5080 #ifdef MULE |
5081 case charset_mule: | 5081 case charset_mule: |
5082 case charset_mule_not: | 5082 case charset_mule_not: |
5083 { | 5083 { |
5084 REGISTER Emchar c; | 5084 REGISTER Ichar c; |
5085 re_bool not_p = (re_opcode_t) *(p - 1) == charset_mule_not; | 5085 re_bool not_p = (re_opcode_t) *(p - 1) == charset_mule_not; |
5086 | 5086 |
5087 DEBUG_PRINT2 ("EXECUTING charset_mule%s.\n", not_p ? "_not" : ""); | 5087 DEBUG_PRINT2 ("EXECUTING charset_mule%s.\n", not_p ? "_not" : ""); |
5088 | 5088 |
5089 REGEX_PREFETCH (); | 5089 REGEX_PREFETCH (); |
5090 c = charptr_emchar_fmt (d, fmt, lispobj); | 5090 c = itext_ichar_fmt (d, fmt, lispobj); |
5091 c = RE_TRANSLATE (c); /* The character to match. */ | 5091 c = RE_TRANSLATE (c); /* The character to match. */ |
5092 | 5092 |
5093 if (EQ (Qt, unified_range_table_lookup (p, c, Qnil))) | 5093 if (EQ (Qt, unified_range_table_lookup (p, c, Qnil))) |
5094 not_p = !not_p; | 5094 not_p = !not_p; |
5095 | 5095 |
5096 p += unified_range_table_bytes_used (p); | 5096 p += unified_range_table_bytes_used (p); |
5097 | 5097 |
5098 if (!not_p) goto fail; | 5098 if (!not_p) goto fail; |
5099 | 5099 |
5100 SET_REGS_MATCHED (); | 5100 SET_REGS_MATCHED (); |
5101 INC_CHARPTR_FMT (d, fmt); | 5101 INC_IBYTEPTR_FMT (d, fmt); |
5102 break; | 5102 break; |
5103 } | 5103 } |
5104 #endif /* MULE */ | 5104 #endif /* MULE */ |
5105 | 5105 |
5106 | 5106 |
5382 if (!bufp->not_bol) break; | 5382 if (!bufp->not_bol) break; |
5383 } | 5383 } |
5384 else | 5384 else |
5385 { | 5385 { |
5386 re_char *d2 = d; | 5386 re_char *d2 = d; |
5387 DEC_CHARPTR (d2); | 5387 DEC_IBYTEPTR (d2); |
5388 if (charptr_emchar_ascii_fmt (d2, fmt, lispobj) == '\n' && | 5388 if (itext_ichar_ascii_fmt (d2, fmt, lispobj) == '\n' && |
5389 bufp->newline_anchor) | 5389 bufp->newline_anchor) |
5390 break; | 5390 break; |
5391 } | 5391 } |
5392 /* In all other cases, we fail. */ | 5392 /* In all other cases, we fail. */ |
5393 goto fail; | 5393 goto fail; |
5402 if (!bufp->not_eol) break; | 5402 if (!bufp->not_eol) break; |
5403 } | 5403 } |
5404 | 5404 |
5405 /* We have to ``prefetch'' the next character. */ | 5405 /* We have to ``prefetch'' the next character. */ |
5406 else if ((d == end1 ? | 5406 else if ((d == end1 ? |
5407 charptr_emchar_ascii_fmt (string2, fmt, lispobj) : | 5407 itext_ichar_ascii_fmt (string2, fmt, lispobj) : |
5408 charptr_emchar_ascii_fmt (d, fmt, lispobj)) == '\n' | 5408 itext_ichar_ascii_fmt (d, fmt, lispobj)) == '\n' |
5409 && bufp->newline_anchor) | 5409 && bufp->newline_anchor) |
5410 { | 5410 { |
5411 break; | 5411 break; |
5412 } | 5412 } |
5413 goto fail; | 5413 goto fail; |
5791 re_char *d_after = POS_AFTER_GAP_UNSAFE (d); | 5791 re_char *d_after = POS_AFTER_GAP_UNSAFE (d); |
5792 | 5792 |
5793 /* emch1 is the character before d, syn1 is the syntax of | 5793 /* emch1 is the character before d, syn1 is the syntax of |
5794 emch1, emch2 is the character at d, and syn2 is the | 5794 emch1, emch2 is the character at d, and syn2 is the |
5795 syntax of emch2. */ | 5795 syntax of emch2. */ |
5796 Emchar emch1, emch2; | 5796 Ichar emch1, emch2; |
5797 int syn1, syn2; | 5797 int syn1, syn2; |
5798 #ifdef emacs | 5798 #ifdef emacs |
5799 Charxpos pos_before; | 5799 Charxpos pos_before; |
5800 #endif | 5800 #endif |
5801 | 5801 |
5802 DEC_CHARPTR_FMT (d_before, fmt); | 5802 DEC_IBYTEPTR_FMT (d_before, fmt); |
5803 emch1 = charptr_emchar_fmt (d_before, fmt, lispobj); | 5803 emch1 = itext_ichar_fmt (d_before, fmt, lispobj); |
5804 emch2 = charptr_emchar_fmt (d_after, fmt, lispobj); | 5804 emch2 = itext_ichar_fmt (d_after, fmt, lispobj); |
5805 | 5805 |
5806 #ifdef emacs | 5806 #ifdef emacs |
5807 pos_before = | 5807 pos_before = |
5808 offset_to_charxpos (lispobj, PTR_TO_OFFSET (d)) - 1; | 5808 offset_to_charxpos (lispobj, PTR_TO_OFFSET (d)) - 1; |
5809 UPDATE_SYNTAX_CACHE (scache, pos_before); | 5809 UPDATE_SYNTAX_CACHE (scache, pos_before); |
5836 if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1))) | 5836 if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1))) |
5837 break; | 5837 break; |
5838 | 5838 |
5839 */ | 5839 */ |
5840 re_char *dtmp = POS_AFTER_GAP_UNSAFE (d); | 5840 re_char *dtmp = POS_AFTER_GAP_UNSAFE (d); |
5841 Emchar emch = charptr_emchar_fmt (dtmp, fmt, lispobj); | 5841 Ichar emch = itext_ichar_fmt (dtmp, fmt, lispobj); |
5842 #ifdef emacs | 5842 #ifdef emacs |
5843 Charxpos charpos = offset_to_charxpos (lispobj, PTR_TO_OFFSET (d)); | 5843 Charxpos charpos = offset_to_charxpos (lispobj, PTR_TO_OFFSET (d)); |
5844 UPDATE_SYNTAX_CACHE (scache, charpos); | 5844 UPDATE_SYNTAX_CACHE (scache, charpos); |
5845 #endif | 5845 #endif |
5846 if (SYNTAX_FROM_CACHE (scache, emch) != Sword) | 5846 if (SYNTAX_FROM_CACHE (scache, emch) != Sword) |
5847 goto fail; | 5847 goto fail; |
5848 if (AT_STRINGS_BEG (d)) | 5848 if (AT_STRINGS_BEG (d)) |
5849 break; | 5849 break; |
5850 dtmp = POS_BEFORE_GAP_UNSAFE (d); | 5850 dtmp = POS_BEFORE_GAP_UNSAFE (d); |
5851 DEC_CHARPTR_FMT (dtmp, fmt); | 5851 DEC_IBYTEPTR_FMT (dtmp, fmt); |
5852 emch = charptr_emchar_fmt (dtmp, fmt, lispobj); | 5852 emch = itext_ichar_fmt (dtmp, fmt, lispobj); |
5853 #ifdef emacs | 5853 #ifdef emacs |
5854 UPDATE_SYNTAX_CACHE_BACKWARD (scache, charpos - 1); | 5854 UPDATE_SYNTAX_CACHE_BACKWARD (scache, charpos - 1); |
5855 #endif | 5855 #endif |
5856 if (SYNTAX_FROM_CACHE (scache, emch) != Sword) | 5856 if (SYNTAX_FROM_CACHE (scache, emch) != Sword) |
5857 break; | 5857 break; |
5870 break; | 5870 break; |
5871 | 5871 |
5872 The or condition is incorrect (reversed). | 5872 The or condition is incorrect (reversed). |
5873 */ | 5873 */ |
5874 re_char *dtmp; | 5874 re_char *dtmp; |
5875 Emchar emch; | 5875 Ichar emch; |
5876 #ifdef emacs | 5876 #ifdef emacs |
5877 Charxpos charpos = offset_to_charxpos (lispobj, PTR_TO_OFFSET (d)); | 5877 Charxpos charpos = offset_to_charxpos (lispobj, PTR_TO_OFFSET (d)); |
5878 UPDATE_SYNTAX_CACHE (scache, charpos); | 5878 UPDATE_SYNTAX_CACHE (scache, charpos); |
5879 #endif | 5879 #endif |
5880 dtmp = POS_BEFORE_GAP_UNSAFE (d); | 5880 dtmp = POS_BEFORE_GAP_UNSAFE (d); |
5881 DEC_CHARPTR_FMT (dtmp, fmt); | 5881 DEC_IBYTEPTR_FMT (dtmp, fmt); |
5882 emch = charptr_emchar_fmt (dtmp, fmt, lispobj); | 5882 emch = itext_ichar_fmt (dtmp, fmt, lispobj); |
5883 if (SYNTAX_FROM_CACHE (scache, emch) != Sword) | 5883 if (SYNTAX_FROM_CACHE (scache, emch) != Sword) |
5884 goto fail; | 5884 goto fail; |
5885 if (AT_STRINGS_END (d)) | 5885 if (AT_STRINGS_END (d)) |
5886 break; | 5886 break; |
5887 dtmp = POS_AFTER_GAP_UNSAFE (d); | 5887 dtmp = POS_AFTER_GAP_UNSAFE (d); |
5888 emch = charptr_emchar_fmt (dtmp, fmt, lispobj); | 5888 emch = itext_ichar_fmt (dtmp, fmt, lispobj); |
5889 #ifdef emacs | 5889 #ifdef emacs |
5890 UPDATE_SYNTAX_CACHE_FORWARD (scache, charpos + 1); | 5890 UPDATE_SYNTAX_CACHE_FORWARD (scache, charpos + 1); |
5891 #endif | 5891 #endif |
5892 if (SYNTAX_FROM_CACHE (scache, emch) != Sword) | 5892 if (SYNTAX_FROM_CACHE (scache, emch) != Sword) |
5893 break; | 5893 break; |
5930 matchsyntax: | 5930 matchsyntax: |
5931 should_succeed = 1; | 5931 should_succeed = 1; |
5932 matchornotsyntax: | 5932 matchornotsyntax: |
5933 { | 5933 { |
5934 int matches; | 5934 int matches; |
5935 Emchar emch; | 5935 Ichar emch; |
5936 | 5936 |
5937 REGEX_PREFETCH (); | 5937 REGEX_PREFETCH (); |
5938 UPDATE_SYNTAX_CACHE | 5938 UPDATE_SYNTAX_CACHE |
5939 (scache, offset_to_charxpos (lispobj, PTR_TO_OFFSET (d))); | 5939 (scache, offset_to_charxpos (lispobj, PTR_TO_OFFSET (d))); |
5940 | 5940 |
5941 emch = charptr_emchar_fmt (d, fmt, lispobj); | 5941 emch = itext_ichar_fmt (d, fmt, lispobj); |
5942 matches = (SYNTAX_FROM_CACHE (scache, emch) == | 5942 matches = (SYNTAX_FROM_CACHE (scache, emch) == |
5943 (enum syntaxcode) mcnt); | 5943 (enum syntaxcode) mcnt); |
5944 INC_CHARPTR_FMT (d, fmt); | 5944 INC_IBYTEPTR_FMT (d, fmt); |
5945 if (matches != should_succeed) | 5945 if (matches != should_succeed) |
5946 goto fail; | 5946 goto fail; |
5947 SET_REGS_MATCHED (); | 5947 SET_REGS_MATCHED (); |
5948 } | 5948 } |
5949 break; | 5949 break; |
5964 /* 97/2/17 jhod Mule category code patch */ | 5964 /* 97/2/17 jhod Mule category code patch */ |
5965 case categoryspec: | 5965 case categoryspec: |
5966 should_succeed = 1; | 5966 should_succeed = 1; |
5967 matchornotcategory: | 5967 matchornotcategory: |
5968 { | 5968 { |
5969 Emchar emch; | 5969 Ichar emch; |
5970 | 5970 |
5971 mcnt = *p++; | 5971 mcnt = *p++; |
5972 REGEX_PREFETCH (); | 5972 REGEX_PREFETCH (); |
5973 emch = charptr_emchar_fmt (d, fmt, lispobj); | 5973 emch = itext_ichar_fmt (d, fmt, lispobj); |
5974 INC_CHARPTR_FMT (d, fmt); | 5974 INC_IBYTEPTR_FMT (d, fmt); |
5975 if (check_category_char (emch, BUFFER_CATEGORY_TABLE (lispbuf), | 5975 if (check_category_char (emch, BUFFER_CATEGORY_TABLE (lispbuf), |
5976 mcnt, should_succeed)) | 5976 mcnt, should_succeed)) |
5977 goto fail; | 5977 goto fail; |
5978 SET_REGS_MATCHED (); | 5978 SET_REGS_MATCHED (); |
5979 } | 5979 } |
6324 re_char *p1_end = s1 + len; | 6324 re_char *p1_end = s1 + len; |
6325 re_char *p2_end = s2 + len; | 6325 re_char *p2_end = s2 + len; |
6326 | 6326 |
6327 while (p1 != p1_end && p2 != p2_end) | 6327 while (p1 != p1_end && p2 != p2_end) |
6328 { | 6328 { |
6329 Emchar p1_ch, p2_ch; | 6329 Ichar p1_ch, p2_ch; |
6330 | 6330 |
6331 p1_ch = charptr_emchar_fmt (p1, fmt, lispobj); | 6331 p1_ch = itext_ichar_fmt (p1, fmt, lispobj); |
6332 p2_ch = charptr_emchar_fmt (p2, fmt, lispobj); | 6332 p2_ch = itext_ichar_fmt (p2, fmt, lispobj); |
6333 | 6333 |
6334 if (RE_TRANSLATE_1 (p1_ch) | 6334 if (RE_TRANSLATE_1 (p1_ch) |
6335 != RE_TRANSLATE_1 (p2_ch)) | 6335 != RE_TRANSLATE_1 (p2_ch)) |
6336 return 1; | 6336 return 1; |
6337 INC_CHARPTR_FMT (p1, fmt); | 6337 INC_IBYTEPTR_FMT (p1, fmt); |
6338 INC_CHARPTR_FMT (p2, fmt); | 6338 INC_IBYTEPTR_FMT (p2, fmt); |
6339 } | 6339 } |
6340 #else /* not MULE */ | 6340 #else /* not MULE */ |
6341 while (len) | 6341 while (len) |
6342 { | 6342 { |
6343 if (RE_TRANSLATE_1 (*p1++) != RE_TRANSLATE_1 (*p2++)) return 1; | 6343 if (RE_TRANSLATE_1 (*p1++) != RE_TRANSLATE_1 (*p2++)) return 1; |