Mercurial > hg > xemacs-beta
comparison src/regex.c @ 446:1ccc32a20af4 r21-2-38
Import from CVS: tag r21-2-38
author | cvs |
---|---|
date | Mon, 13 Aug 2007 11:37:21 +0200 |
parents | 576fb035e263 |
children | 98528da0b7fc |
comparison
equal
deleted
inserted
replaced
445:34f3776fcf0e | 446:1ccc32a20af4 |
---|---|
102 void | 102 void |
103 complex_vars_of_regex (void) | 103 complex_vars_of_regex (void) |
104 { | 104 { |
105 } | 105 } |
106 | 106 |
107 #endif /* not MULE */ | 107 #endif /* MULE */ |
108 | |
109 #define RE_TRANSLATE(ch) TRT_TABLE_OF (translate, (Emchar) ch) | |
110 #define TRANSLATE_P(tr) (!NILP (tr)) | |
108 | 111 |
109 #else /* not emacs */ | 112 #else /* not emacs */ |
110 | 113 |
111 /* If we are not linking with Emacs proper, | 114 /* If we are not linking with Emacs proper, |
112 we can't use the relocating allocator | 115 we can't use the relocating allocator |
171 | 174 |
172 done = 1; | 175 done = 1; |
173 } | 176 } |
174 } | 177 } |
175 | 178 |
176 #endif /* not SYNTAX_TABLE */ | 179 #endif /* SYNTAX_TABLE */ |
177 | 180 |
178 #define SYNTAX_UNSAFE(ignored, c) re_syntax_table[c] | 181 #define SYNTAX_UNSAFE(ignored, c) re_syntax_table[c] |
179 | 182 |
180 #endif /* not emacs */ | 183 #define RE_TRANSLATE(c) translate[(unsigned char) (c)] |
184 #define TRANSLATE_P(tr) tr | |
185 | |
186 #endif /* emacs */ | |
181 | 187 |
182 /* Under XEmacs, this is needed because we don't define it elsewhere. */ | 188 /* Under XEmacs, this is needed because we don't define it elsewhere. */ |
183 #ifdef SWITCH_ENUM_BUG | 189 #ifdef SWITCH_ENUM_BUG |
184 #define SWITCH_ENUM_CAST(x) ((int)(x)) | 190 #define SWITCH_ENUM_CAST(x) ((int)(x)) |
185 #else | 191 #else |
286 #include <alloca.h> | 292 #include <alloca.h> |
287 #else /* not __GNUC__ or HAVE_ALLOCA_H */ | 293 #else /* not __GNUC__ or HAVE_ALLOCA_H */ |
288 #ifndef _AIX /* Already did AIX, up at the top. */ | 294 #ifndef _AIX /* Already did AIX, up at the top. */ |
289 void *alloca (); | 295 void *alloca (); |
290 #endif /* not _AIX */ | 296 #endif /* not _AIX */ |
291 #endif /* not HAVE_ALLOCA_H */ | 297 #endif /* HAVE_ALLOCA_H */ |
292 #endif /* not __GNUC__ */ | 298 #endif /* __GNUC__ */ |
293 | 299 |
294 #endif /* not alloca */ | 300 #endif /* not alloca */ |
295 | 301 |
296 #define REGEX_ALLOCATE alloca | 302 #define REGEX_ALLOCATE alloca |
297 | 303 |
302 destination) | 308 destination) |
303 | 309 |
304 /* No need to do anything to free, after alloca. */ | 310 /* No need to do anything to free, after alloca. */ |
305 #define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */ | 311 #define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */ |
306 | 312 |
307 #endif /* not REGEX_MALLOC */ | 313 #endif /* REGEX_MALLOC */ |
308 | 314 |
309 /* Define how to allocate the failure stack. */ | 315 /* Define how to allocate the failure stack. */ |
310 | 316 |
311 #ifdef REL_ALLOC | 317 #ifdef REL_ALLOC |
312 #define REGEX_ALLOCATE_STACK(size) \ | 318 #define REGEX_ALLOCATE_STACK(size) \ |
331 #define REGEX_REALLOCATE_STACK(source, osize, nsize) \ | 337 #define REGEX_REALLOCATE_STACK(source, osize, nsize) \ |
332 REGEX_REALLOCATE (source, osize, nsize) | 338 REGEX_REALLOCATE (source, osize, nsize) |
333 /* No need to explicitly free anything. */ | 339 /* No need to explicitly free anything. */ |
334 #define REGEX_FREE_STACK(arg) | 340 #define REGEX_FREE_STACK(arg) |
335 | 341 |
336 #endif /* not REGEX_MALLOC */ | 342 #endif /* REGEX_MALLOC */ |
337 #endif /* not REL_ALLOC */ | 343 #endif /* REL_ALLOC */ |
338 | 344 |
339 | 345 |
340 /* True if `size1' is non-NULL and PTR is pointing anywhere inside | 346 /* True if `size1' is non-NULL and PTR is pointing anywhere inside |
341 `string1' or just past its end. This works if PTR is NULL, which is | 347 `string1' or just past its end. This works if PTR is NULL, which is |
342 a good thing. */ | 348 a good thing. */ |
356 | 362 |
357 #undef MAX | 363 #undef MAX |
358 #undef MIN | 364 #undef MIN |
359 #define MAX(a, b) ((a) > (b) ? (a) : (b)) | 365 #define MAX(a, b) ((a) > (b) ? (a) : (b)) |
360 #define MIN(a, b) ((a) < (b) ? (a) : (b)) | 366 #define MIN(a, b) ((a) < (b) ? (a) : (b)) |
367 | |
368 /* Type of source-pattern and string chars. */ | |
369 typedef const unsigned char re_char; | |
361 | 370 |
362 typedef char boolean; | 371 typedef char boolean; |
363 #define false 0 | 372 #define false 0 |
364 #define true 1 | 373 #define true 1 |
365 | 374 |
552 (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \ | 561 (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \ |
553 } while (0) | 562 } while (0) |
554 | 563 |
555 #ifdef DEBUG | 564 #ifdef DEBUG |
556 static void | 565 static void |
557 extract_number (int *dest, unsigned char *source) | 566 extract_number (int *dest, re_char *source) |
558 { | 567 { |
559 int temp = SIGN_EXTEND_CHAR (*(source + 1)); | 568 int temp = SIGN_EXTEND_CHAR (*(source + 1)); |
560 *dest = *source & 0377; | 569 *dest = *source & 0377; |
561 *dest += temp << 8; | 570 *dest += temp << 8; |
562 } | 571 } |
655 | 664 |
656 /* Print a compiled pattern string in human-readable form, starting at | 665 /* Print a compiled pattern string in human-readable form, starting at |
657 the START pointer into it and ending just before the pointer END. */ | 666 the START pointer into it and ending just before the pointer END. */ |
658 | 667 |
659 static void | 668 static void |
660 print_partial_compiled_pattern (unsigned char *start, unsigned char *end) | 669 print_partial_compiled_pattern (re_char *start, re_char *end) |
661 { | 670 { |
662 int mcnt, mcnt2; | 671 int mcnt, mcnt2; |
663 unsigned char *p = start; | 672 unsigned char *p = (unsigned char *) start; |
664 unsigned char *pend = end; | 673 re_char *pend = end; |
665 | 674 |
666 if (start == NULL) | 675 if (start == NULL) |
667 { | 676 { |
668 puts ("(null)"); | 677 puts ("(null)"); |
669 return; | 678 return; |
937 | 946 |
938 | 947 |
939 static void | 948 static void |
940 print_compiled_pattern (struct re_pattern_buffer *bufp) | 949 print_compiled_pattern (struct re_pattern_buffer *bufp) |
941 { | 950 { |
942 unsigned char *buffer = bufp->buffer; | 951 re_char *buffer = bufp->buffer; |
943 | 952 |
944 print_partial_compiled_pattern (buffer, buffer + bufp->used); | 953 print_partial_compiled_pattern (buffer, buffer + bufp->used); |
945 printf ("%ld bytes used/%ld bytes allocated.\n", bufp->used, | 954 printf ("%ld bytes used/%ld bytes allocated.\n", bufp->used, |
946 bufp->allocated); | 955 bufp->allocated); |
947 | 956 |
963 /* and maybe the category table? */ | 972 /* and maybe the category table? */ |
964 } | 973 } |
965 | 974 |
966 | 975 |
967 static void | 976 static void |
968 print_double_string (const char *where, const char *string1, int size1, | 977 print_double_string (re_char *where, re_char *string1, int size1, |
969 const char *string2, int size2) | 978 re_char *string2, int size2) |
970 { | 979 { |
971 if (where == NULL) | 980 if (where == NULL) |
972 printf ("(null)"); | 981 printf ("(null)"); |
973 else | 982 else |
974 { | 983 { |
998 #define DEBUG_PRINT3(x1, x2, x3) | 1007 #define DEBUG_PRINT3(x1, x2, x3) |
999 #define DEBUG_PRINT4(x1, x2, x3, x4) | 1008 #define DEBUG_PRINT4(x1, x2, x3, x4) |
1000 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) | 1009 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) |
1001 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) | 1010 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) |
1002 | 1011 |
1003 #endif /* not DEBUG */ | 1012 #endif /* DEBUG */ |
1004 | 1013 |
1005 /* Set by `re_set_syntax' to the current regexp syntax to recognize. Can | 1014 /* Set by `re_set_syntax' to the current regexp syntax to recognize. Can |
1006 also be assigned to arbitrarily: each pattern buffer stores its own | 1015 also be assigned to arbitrarily: each pattern buffer stores its own |
1007 syntax, so it can be changed between regex compilations. */ | 1016 syntax, so it can be changed between regex compilations. */ |
1008 /* This has no initializer because initialized variables in Emacs | 1017 /* This has no initializer because initialized variables in Emacs |
1121 int re_max_failures = 2000; | 1130 int re_max_failures = 2000; |
1122 #endif | 1131 #endif |
1123 | 1132 |
1124 union fail_stack_elt | 1133 union fail_stack_elt |
1125 { | 1134 { |
1126 unsigned char *pointer; | 1135 re_char *pointer; |
1127 int integer; | 1136 int integer; |
1128 }; | 1137 }; |
1129 | 1138 |
1130 typedef union fail_stack_elt fail_stack_elt_t; | 1139 typedef union fail_stack_elt fail_stack_elt_t; |
1131 | 1140 |
1132 typedef struct | 1141 typedef struct |
1133 { | 1142 { |
1134 fail_stack_elt_t *stack; | 1143 fail_stack_elt_t *stack; |
1135 unsigned size; | 1144 size_t size; |
1136 unsigned avail; /* Offset of next open position. */ | 1145 size_t avail; /* Offset of next open position. */ |
1137 } fail_stack_type; | 1146 } fail_stack_type; |
1138 | 1147 |
1139 #define FAIL_STACK_EMPTY() (fail_stack.avail == 0) | 1148 #define FAIL_STACK_EMPTY() (fail_stack.avail == 0) |
1140 #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0) | 1149 #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0) |
1141 #define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size) | 1150 #define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size) |
1307 PUSH_FAILURE_INT (lowest_active_reg); \ | 1316 PUSH_FAILURE_INT (lowest_active_reg); \ |
1308 \ | 1317 \ |
1309 DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\ | 1318 DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\ |
1310 PUSH_FAILURE_INT (highest_active_reg); \ | 1319 PUSH_FAILURE_INT (highest_active_reg); \ |
1311 \ | 1320 \ |
1312 DEBUG_PRINT2 (" Pushing pattern 0x%lx: ", (long) pattern_place); \ | 1321 DEBUG_PRINT2 (" Pushing pattern 0x%lx: \n", (long) pattern_place); \ |
1313 DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \ | 1322 DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \ |
1314 PUSH_FAILURE_POINTER (pattern_place); \ | 1323 PUSH_FAILURE_POINTER (pattern_place); \ |
1315 \ | 1324 \ |
1316 DEBUG_PRINT2 (" Pushing string 0x%lx: `", (long) string_place); \ | 1325 DEBUG_PRINT2 (" Pushing string 0x%lx: `", (long) string_place); \ |
1317 DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \ | 1326 DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \ |
1383 /* If the saved string location is NULL, it came from an \ | 1392 /* If the saved string location is NULL, it came from an \ |
1384 on_failure_keep_string_jump opcode, and we want to throw away the \ | 1393 on_failure_keep_string_jump opcode, and we want to throw away the \ |
1385 saved NULL, thus retaining our current position in the string. */ \ | 1394 saved NULL, thus retaining our current position in the string. */ \ |
1386 string_temp = POP_FAILURE_POINTER (); \ | 1395 string_temp = POP_FAILURE_POINTER (); \ |
1387 if (string_temp != NULL) \ | 1396 if (string_temp != NULL) \ |
1388 str = (const char *) string_temp; \ | 1397 str = string_temp; \ |
1389 \ | 1398 \ |
1390 DEBUG_PRINT2 (" Popping string 0x%lx: `", (long) str); \ | 1399 DEBUG_PRINT2 (" Popping string 0x%lx: `", (long) str); \ |
1391 DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \ | 1400 DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \ |
1392 DEBUG_PRINT1 ("'\n"); \ | 1401 DEBUG_PRINT1 ("'\n"); \ |
1393 \ | 1402 \ |
1408 \ | 1417 \ |
1409 reg_info[this_reg].word = POP_FAILURE_ELT (); \ | 1418 reg_info[this_reg].word = POP_FAILURE_ELT (); \ |
1410 DEBUG_PRINT2 (" info: 0x%lx\n", \ | 1419 DEBUG_PRINT2 (" info: 0x%lx\n", \ |
1411 * (long *) ®_info[this_reg]); \ | 1420 * (long *) ®_info[this_reg]); \ |
1412 \ | 1421 \ |
1413 regend[this_reg] = (const char *) POP_FAILURE_POINTER (); \ | 1422 regend[this_reg] = POP_FAILURE_POINTER (); \ |
1414 DEBUG_PRINT2 (" end: 0x%lx\n", (long) regend[this_reg]); \ | 1423 DEBUG_PRINT2 (" end: 0x%lx\n", (long) regend[this_reg]); \ |
1415 \ | 1424 \ |
1416 regstart[this_reg] = (const char *) POP_FAILURE_POINTER (); \ | 1425 regstart[this_reg] = POP_FAILURE_POINTER (); \ |
1417 DEBUG_PRINT2 (" start: 0x%lx\n", (long) regstart[this_reg]); \ | 1426 DEBUG_PRINT2 (" start: 0x%lx\n", (long) regstart[this_reg]); \ |
1418 } \ | 1427 } \ |
1419 \ | 1428 \ |
1420 set_regs_matched_done = 0; \ | 1429 set_regs_matched_done = 0; \ |
1421 DEBUG_STATEMENT (nfailure_points_popped++); \ | 1430 DEBUG_STATEMENT (nfailure_points_popped++); \ |
1474 } \ | 1483 } \ |
1475 } \ | 1484 } \ |
1476 while (0) | 1485 while (0) |
1477 | 1486 |
1478 /* Registers are set to a sentinel when they haven't yet matched. */ | 1487 /* Registers are set to a sentinel when they haven't yet matched. */ |
1479 static char reg_unset_dummy; | 1488 static unsigned char reg_unset_dummy; |
1480 #define REG_UNSET_VALUE (®_unset_dummy) | 1489 #define REG_UNSET_VALUE (®_unset_dummy) |
1481 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE) | 1490 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE) |
1482 | 1491 |
1483 /* Subroutine declarations and macros for regex_compile. */ | 1492 /* Subroutine declarations and macros for regex_compile. */ |
1484 | 1493 |
1485 /* Fetch the next character in the uncompiled pattern---translating it | 1494 /* Fetch the next character in the uncompiled pattern---translating it |
1486 if necessary. Also cast from a signed character in the constant | 1495 if necessary. Also cast from a signed character in the constant |
1487 string passed to us by the user to an unsigned char that we can use | 1496 string passed to us by the user to an unsigned char that we can use |
1488 as an array index (in, e.g., `translate'). */ | 1497 as an array index (in, e.g., `translate'). */ |
1489 #define PATFETCH(c) \ | 1498 #define PATFETCH(c) \ |
1490 do {if (p == pend) return REG_EEND; \ | 1499 do { \ |
1491 assert (p < pend); \ | 1500 PATFETCH_RAW (c); \ |
1492 c = (unsigned char) *p++; \ | 1501 c = TRANSLATE (c); \ |
1493 if (translate) c = (unsigned char) translate[c]; \ | |
1494 } while (0) | 1502 } while (0) |
1495 | 1503 |
1496 /* Fetch the next character in the uncompiled pattern, with no | 1504 /* Fetch the next character in the uncompiled pattern, with no |
1497 translation. */ | 1505 translation. */ |
1498 #define PATFETCH_RAW(c) \ | 1506 #define PATFETCH_RAW(c) \ |
1499 do {if (p == pend) return REG_EEND; \ | 1507 do {if (p == pend) return REG_EEND; \ |
1500 assert (p < pend); \ | 1508 assert (p < pend); \ |
1501 c = (unsigned char) *p++; \ | 1509 c = charptr_emchar (p); \ |
1510 INC_CHARPTR (p); \ | |
1502 } while (0) | 1511 } while (0) |
1503 | 1512 |
1504 /* Go backwards one character in the pattern. */ | 1513 /* Go backwards one character in the pattern. */ |
1505 #define PATUNFETCH p-- | 1514 #define PATUNFETCH DEC_CHARPTR (p) |
1506 | 1515 |
1507 #ifdef MULE | 1516 #ifdef MULE |
1508 | 1517 |
1509 #define PATFETCH_EXTENDED(emch) \ | 1518 #define PATFETCH_EXTENDED(emch) \ |
1510 do {if (p == pend) return REG_EEND; \ | 1519 do {if (p == pend) return REG_EEND; \ |
1511 assert (p < pend); \ | 1520 assert (p < pend); \ |
1512 emch = charptr_emchar ((const Bufbyte *) p); \ | 1521 emch = charptr_emchar ((const Bufbyte *) p); \ |
1513 INC_CHARPTR (p); \ | 1522 INC_CHARPTR (p); \ |
1514 if (translate && emch < 0x80) \ | 1523 if (TRANSLATE_P (translate) && emch < 0x80) \ |
1515 emch = (Emchar) (unsigned char) translate[emch]; \ | 1524 emch = (Emchar) (unsigned char) RE_TRANSLATE (emch); \ |
1516 } while (0) | 1525 } while (0) |
1517 | 1526 |
1518 #define PATFETCH_RAW_EXTENDED(emch) \ | 1527 #define PATFETCH_RAW_EXTENDED(emch) \ |
1519 do {if (p == pend) return REG_EEND; \ | 1528 do {if (p == pend) return REG_EEND; \ |
1520 assert (p < pend); \ | 1529 assert (p < pend); \ |
1552 | 1561 |
1553 #define PATFETCH_EITHER(emch) PATFETCH (emch) | 1562 #define PATFETCH_EITHER(emch) PATFETCH (emch) |
1554 #define PATFETCH_RAW_EITHER(emch) PATFETCH_RAW (emch) | 1563 #define PATFETCH_RAW_EITHER(emch) PATFETCH_RAW (emch) |
1555 #define PATUNFETCH_EITHER PATUNFETCH | 1564 #define PATUNFETCH_EITHER PATUNFETCH |
1556 | 1565 |
1557 #endif /* not MULE */ | 1566 #endif /* MULE */ |
1558 | 1567 |
1559 /* If `translate' is non-null, return translate[D], else just D. We | 1568 /* If `translate' is non-null, return translate[D], else just D. We |
1560 cast the subscript to translate because some data is declared as | 1569 cast the subscript to translate because some data is declared as |
1561 `char *', to avoid warnings when a string constant is passed. But | 1570 `char *', to avoid warnings when a string constant is passed. But |
1562 when we use a character as a subscript we must make it unsigned. */ | 1571 when we use a character as a subscript we must make it unsigned. */ |
1563 #define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d)) | 1572 #define TRANSLATE(d) (TRANSLATE_P (translate) ? RE_TRANSLATE (d) : (d)) |
1564 | 1573 |
1565 #ifdef MULE | 1574 #ifdef MULE |
1566 | 1575 |
1567 #define TRANSLATE_EXTENDED_UNSAFE(emch) \ | 1576 #define TRANSLATE_EXTENDED_UNSAFE(emch) \ |
1568 (translate && emch < 0x80 ? translate[emch] : (emch)) | 1577 (TRANSLATE_P (translate) && emch < 0x80 ? RE_TRANSLATE (emch) : (emch)) |
1569 | 1578 |
1570 #endif | 1579 #endif |
1571 | 1580 |
1572 /* Macros for outputting the compiled pattern into `buffer'. */ | 1581 /* Macros for outputting the compiled pattern into `buffer'. */ |
1573 | 1582 |
1574 /* If the buffer isn't allocated when it comes in, use this. */ | 1583 /* If the buffer isn't allocated when it comes in, use this. */ |
1575 #define INIT_BUF_SIZE 32 | 1584 #define INIT_BUF_SIZE 32 |
1576 | 1585 |
1577 /* Make sure we have at least N more bytes of space in buffer. */ | 1586 /* Make sure we have at least N more bytes of space in buffer. */ |
1578 #define GET_BUFFER_SPACE(n) \ | 1587 #define GET_BUFFER_SPACE(n) \ |
1579 while (b - bufp->buffer + (n) > bufp->allocated) \ | 1588 while (buf_end - bufp->buffer + (n) > bufp->allocated) \ |
1580 EXTEND_BUFFER () | 1589 EXTEND_BUFFER () |
1581 | 1590 |
1582 /* Make sure we have one more byte of buffer space and then add C to it. */ | 1591 /* Make sure we have one more byte of buffer space and then add C to it. */ |
1583 #define BUF_PUSH(c) \ | 1592 #define BUF_PUSH(c) \ |
1584 do { \ | 1593 do { \ |
1585 GET_BUFFER_SPACE (1); \ | 1594 GET_BUFFER_SPACE (1); \ |
1586 *b++ = (unsigned char) (c); \ | 1595 *buf_end++ = (unsigned char) (c); \ |
1587 } while (0) | 1596 } while (0) |
1588 | 1597 |
1589 | 1598 |
1590 /* Ensure we have two more bytes of buffer space and then append C1 and C2. */ | 1599 /* Ensure we have two more bytes of buffer space and then append C1 and C2. */ |
1591 #define BUF_PUSH_2(c1, c2) \ | 1600 #define BUF_PUSH_2(c1, c2) \ |
1592 do { \ | 1601 do { \ |
1593 GET_BUFFER_SPACE (2); \ | 1602 GET_BUFFER_SPACE (2); \ |
1594 *b++ = (unsigned char) (c1); \ | 1603 *buf_end++ = (unsigned char) (c1); \ |
1595 *b++ = (unsigned char) (c2); \ | 1604 *buf_end++ = (unsigned char) (c2); \ |
1596 } while (0) | 1605 } while (0) |
1597 | 1606 |
1598 | 1607 |
1599 /* As with BUF_PUSH_2, except for three bytes. */ | 1608 /* As with BUF_PUSH_2, except for three bytes. */ |
1600 #define BUF_PUSH_3(c1, c2, c3) \ | 1609 #define BUF_PUSH_3(c1, c2, c3) \ |
1601 do { \ | 1610 do { \ |
1602 GET_BUFFER_SPACE (3); \ | 1611 GET_BUFFER_SPACE (3); \ |
1603 *b++ = (unsigned char) (c1); \ | 1612 *buf_end++ = (unsigned char) (c1); \ |
1604 *b++ = (unsigned char) (c2); \ | 1613 *buf_end++ = (unsigned char) (c2); \ |
1605 *b++ = (unsigned char) (c3); \ | 1614 *buf_end++ = (unsigned char) (c3); \ |
1606 } while (0) | 1615 } while (0) |
1607 | 1616 |
1608 | 1617 |
1609 /* Store a jump with opcode OP at LOC to location TO. We store a | 1618 /* Store a jump with opcode OP at LOC to location TO. We store a |
1610 relative address offset by the three bytes the jump itself occupies. */ | 1619 relative address offset by the three bytes the jump itself occupies. */ |
1613 | 1622 |
1614 /* Likewise, for a two-argument jump. */ | 1623 /* Likewise, for a two-argument jump. */ |
1615 #define STORE_JUMP2(op, loc, to, arg) \ | 1624 #define STORE_JUMP2(op, loc, to, arg) \ |
1616 store_op2 (op, loc, (to) - (loc) - 3, arg) | 1625 store_op2 (op, loc, (to) - (loc) - 3, arg) |
1617 | 1626 |
1618 /* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */ | 1627 /* Like `STORE_JUMP', but for inserting. Assume `buf_end' is the |
1628 buffer end. */ | |
1619 #define INSERT_JUMP(op, loc, to) \ | 1629 #define INSERT_JUMP(op, loc, to) \ |
1620 insert_op1 (op, loc, (to) - (loc) - 3, b) | 1630 insert_op1 (op, loc, (to) - (loc) - 3, buf_end) |
1621 | 1631 |
1622 /* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */ | 1632 /* Like `STORE_JUMP2', but for inserting. Assume `buf_end' is the |
1633 buffer end. */ | |
1623 #define INSERT_JUMP2(op, loc, to, arg) \ | 1634 #define INSERT_JUMP2(op, loc, to, arg) \ |
1624 insert_op2 (op, loc, (to) - (loc) - 3, arg, b) | 1635 insert_op2 (op, loc, (to) - (loc) - 3, arg, buf_end) |
1625 | 1636 |
1626 | 1637 |
1627 /* This is not an arbitrary limit: the arguments which represent offsets | 1638 /* This is not an arbitrary limit: the arguments which represent offsets |
1628 into the pattern are two bytes long. So if 2^16 bytes turns out to | 1639 into the pattern are two bytes long. So if 2^16 bytes turns out to |
1629 be too small, many things would have to change. */ | 1640 be too small, many things would have to change. */ |
1634 reset the pointers that pointed into the old block to point to the | 1645 reset the pointers that pointed into the old block to point to the |
1635 correct places in the new one. If extending the buffer results in it | 1646 correct places in the new one. If extending the buffer results in it |
1636 being larger than MAX_BUF_SIZE, then flag memory exhausted. */ | 1647 being larger than MAX_BUF_SIZE, then flag memory exhausted. */ |
1637 #define EXTEND_BUFFER() \ | 1648 #define EXTEND_BUFFER() \ |
1638 do { \ | 1649 do { \ |
1639 unsigned char *old_buffer = bufp->buffer; \ | 1650 re_char *old_buffer = bufp->buffer; \ |
1640 if (bufp->allocated == MAX_BUF_SIZE) \ | 1651 if (bufp->allocated == MAX_BUF_SIZE) \ |
1641 return REG_ESIZE; \ | 1652 return REG_ESIZE; \ |
1642 bufp->allocated <<= 1; \ | 1653 bufp->allocated <<= 1; \ |
1643 if (bufp->allocated > MAX_BUF_SIZE) \ | 1654 if (bufp->allocated > MAX_BUF_SIZE) \ |
1644 bufp->allocated = MAX_BUF_SIZE; \ | 1655 bufp->allocated = MAX_BUF_SIZE; \ |
1646 if (bufp->buffer == NULL) \ | 1657 if (bufp->buffer == NULL) \ |
1647 return REG_ESPACE; \ | 1658 return REG_ESPACE; \ |
1648 /* If the buffer moved, move all the pointers into it. */ \ | 1659 /* If the buffer moved, move all the pointers into it. */ \ |
1649 if (old_buffer != bufp->buffer) \ | 1660 if (old_buffer != bufp->buffer) \ |
1650 { \ | 1661 { \ |
1651 b = (b - old_buffer) + bufp->buffer; \ | 1662 buf_end = (buf_end - old_buffer) + bufp->buffer; \ |
1652 begalt = (begalt - old_buffer) + bufp->buffer; \ | 1663 begalt = (begalt - old_buffer) + bufp->buffer; \ |
1653 if (fixup_alt_jump) \ | 1664 if (fixup_alt_jump) \ |
1654 fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\ | 1665 fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\ |
1655 if (laststart) \ | 1666 if (laststart) \ |
1656 laststart = (laststart - old_buffer) + bufp->buffer; \ | 1667 laststart = (laststart - old_buffer) + bufp->buffer; \ |
1703 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) | 1714 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) |
1704 | 1715 |
1705 | 1716 |
1706 /* Set the bit for character C in a bit vector. */ | 1717 /* Set the bit for character C in a bit vector. */ |
1707 #define SET_LIST_BIT(c) \ | 1718 #define SET_LIST_BIT(c) \ |
1708 (b[((unsigned char) (c)) / BYTEWIDTH] \ | 1719 (buf_end[((unsigned char) (c)) / BYTEWIDTH] \ |
1709 |= 1 << (((unsigned char) c) % BYTEWIDTH)) | 1720 |= 1 << (((unsigned char) c) % BYTEWIDTH)) |
1710 | 1721 |
1711 #ifdef MULE | 1722 #ifdef MULE |
1712 | 1723 |
1713 /* Set the "bit" for character C in a range table. */ | 1724 /* Set the "bit" for character C in a range table. */ |
1760 static void store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2); | 1771 static void store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2); |
1761 static void insert_op1 (re_opcode_t op, unsigned char *loc, int arg, | 1772 static void insert_op1 (re_opcode_t op, unsigned char *loc, int arg, |
1762 unsigned char *end); | 1773 unsigned char *end); |
1763 static void insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, | 1774 static void insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, |
1764 unsigned char *end); | 1775 unsigned char *end); |
1765 static boolean at_begline_loc_p (const char *pattern, const char *p, | 1776 static boolean at_begline_loc_p (re_char *pattern, re_char *p, |
1766 reg_syntax_t syntax); | 1777 reg_syntax_t syntax); |
1767 static boolean at_endline_loc_p (const char *p, const char *pend, int syntax); | 1778 static boolean at_endline_loc_p (re_char *p, re_char *pend, int syntax); |
1768 static boolean group_in_compile_stack (compile_stack_type compile_stack, | 1779 static boolean group_in_compile_stack (compile_stack_type compile_stack, |
1769 regnum_t regnum); | 1780 regnum_t regnum); |
1770 static reg_errcode_t compile_range (const char **p_ptr, const char *pend, | 1781 static reg_errcode_t compile_range (re_char **p_ptr, re_char *pend, |
1771 char *translate, reg_syntax_t syntax, | 1782 RE_TRANSLATE_TYPE translate, |
1783 reg_syntax_t syntax, | |
1772 unsigned char *b); | 1784 unsigned char *b); |
1773 #ifdef MULE | 1785 #ifdef MULE |
1774 static reg_errcode_t compile_extended_range (const char **p_ptr, | 1786 static reg_errcode_t compile_extended_range (re_char **p_ptr, |
1775 const char *pend, | 1787 re_char *pend, |
1776 char *translate, | 1788 RE_TRANSLATE_TYPE translate, |
1777 reg_syntax_t syntax, | 1789 reg_syntax_t syntax, |
1778 Lisp_Object rtab); | 1790 Lisp_Object rtab); |
1779 #endif /* MULE */ | 1791 #endif /* MULE */ |
1780 static boolean group_match_null_string_p (unsigned char **p, | 1792 static boolean group_match_null_string_p (unsigned char **p, |
1781 unsigned char *end, | 1793 unsigned char *end, |
1784 register_info_type *reg_info); | 1796 register_info_type *reg_info); |
1785 static boolean common_op_match_null_string_p (unsigned char **p, | 1797 static boolean common_op_match_null_string_p (unsigned char **p, |
1786 unsigned char *end, | 1798 unsigned char *end, |
1787 register_info_type *reg_info); | 1799 register_info_type *reg_info); |
1788 static int bcmp_translate (const unsigned char *s1, const unsigned char *s2, | 1800 static int bcmp_translate (const unsigned char *s1, const unsigned char *s2, |
1789 REGISTER int len, char *translate); | 1801 REGISTER int len, RE_TRANSLATE_TYPE translate); |
1790 static int re_match_2_internal (struct re_pattern_buffer *bufp, | 1802 static int re_match_2_internal (struct re_pattern_buffer *bufp, |
1791 const char *string1, int size1, | 1803 re_char *string1, int size1, |
1792 const char *string2, int size2, int pos, | 1804 re_char *string2, int size2, int pos, |
1793 struct re_registers *regs, int stop); | 1805 struct re_registers *regs, int stop); |
1794 | 1806 |
1795 #ifndef MATCH_MAY_ALLOCATE | 1807 #ifndef MATCH_MAY_ALLOCATE |
1796 | 1808 |
1797 /* If we cannot allocate large objects within re_match_2_internal, | 1809 /* If we cannot allocate large objects within re_match_2_internal, |
1806 /* Size with which the following vectors are currently allocated. | 1818 /* Size with which the following vectors are currently allocated. |
1807 That is so we can make them bigger as needed, | 1819 That is so we can make them bigger as needed, |
1808 but never make them smaller. */ | 1820 but never make them smaller. */ |
1809 static int regs_allocated_size; | 1821 static int regs_allocated_size; |
1810 | 1822 |
1811 static const char ** regstart, ** regend; | 1823 static re_char ** regstart, ** regend; |
1812 static const char ** old_regstart, ** old_regend; | 1824 static re_char ** old_regstart, ** old_regend; |
1813 static const char **best_regstart, **best_regend; | 1825 static re_char **best_regstart, **best_regend; |
1814 static register_info_type *reg_info; | 1826 static register_info_type *reg_info; |
1815 static const char **reg_dummy; | 1827 static re_char **reg_dummy; |
1816 static register_info_type *reg_info_dummy; | 1828 static register_info_type *reg_info_dummy; |
1817 | 1829 |
1818 /* Make the register vectors big enough for NUM_REGS registers, | 1830 /* Make the register vectors big enough for NUM_REGS registers, |
1819 but don't make them smaller. */ | 1831 but don't make them smaller. */ |
1820 | 1832 |
1821 static | 1833 static |
1822 regex_grow_registers (int num_regs) | 1834 regex_grow_registers (int num_regs) |
1823 { | 1835 { |
1824 if (num_regs > regs_allocated_size) | 1836 if (num_regs > regs_allocated_size) |
1825 { | 1837 { |
1826 RETALLOC_IF (regstart, num_regs, const char *); | 1838 RETALLOC_IF (regstart, num_regs, re_char *); |
1827 RETALLOC_IF (regend, num_regs, const char *); | 1839 RETALLOC_IF (regend, num_regs, re_char *); |
1828 RETALLOC_IF (old_regstart, num_regs, const char *); | 1840 RETALLOC_IF (old_regstart, num_regs, re_char *); |
1829 RETALLOC_IF (old_regend, num_regs, const char *); | 1841 RETALLOC_IF (old_regend, num_regs, re_char *); |
1830 RETALLOC_IF (best_regstart, num_regs, const char *); | 1842 RETALLOC_IF (best_regstart, num_regs, re_char *); |
1831 RETALLOC_IF (best_regend, num_regs, const char *); | 1843 RETALLOC_IF (best_regend, num_regs, re_char *); |
1832 RETALLOC_IF (reg_info, num_regs, register_info_type); | 1844 RETALLOC_IF (reg_info, num_regs, register_info_type); |
1833 RETALLOC_IF (reg_dummy, num_regs, const char *); | 1845 RETALLOC_IF (reg_dummy, num_regs, re_char *); |
1834 RETALLOC_IF (reg_info_dummy, num_regs, register_info_type); | 1846 RETALLOC_IF (reg_info_dummy, num_regs, register_info_type); |
1835 | 1847 |
1836 regs_allocated_size = num_regs; | 1848 regs_allocated_size = num_regs; |
1837 } | 1849 } |
1838 } | 1850 } |
1860 /* Return, freeing storage we allocated. */ | 1872 /* Return, freeing storage we allocated. */ |
1861 #define FREE_STACK_RETURN(value) \ | 1873 #define FREE_STACK_RETURN(value) \ |
1862 return (free (compile_stack.stack), value) | 1874 return (free (compile_stack.stack), value) |
1863 | 1875 |
1864 static reg_errcode_t | 1876 static reg_errcode_t |
1865 regex_compile (const char *pattern, int size, reg_syntax_t syntax, | 1877 regex_compile (re_char *pattern, int size, reg_syntax_t syntax, |
1866 struct re_pattern_buffer *bufp) | 1878 struct re_pattern_buffer *bufp) |
1867 { | 1879 { |
1868 /* We fetch characters from PATTERN here. We declare these as int | 1880 /* We fetch characters from PATTERN here. We declare these as int |
1869 (or possibly long) so that chars above 127 can be used as | 1881 (or possibly long) so that chars above 127 can be used as |
1870 array indices. The macros that fetch a character from the pattern | 1882 array indices. The macros that fetch a character from the pattern |
1872 get bitten by negative numbers here. */ | 1884 get bitten by negative numbers here. */ |
1873 /* XEmacs change: used to be unsigned char. */ | 1885 /* XEmacs change: used to be unsigned char. */ |
1874 REGISTER EMACS_INT c, c1; | 1886 REGISTER EMACS_INT c, c1; |
1875 | 1887 |
1876 /* A random temporary spot in PATTERN. */ | 1888 /* A random temporary spot in PATTERN. */ |
1877 const char *p1; | 1889 re_char *p1; |
1878 | 1890 |
1879 /* Points to the end of the buffer, where we should append. */ | 1891 /* Points to the end of the buffer, where we should append. */ |
1880 REGISTER unsigned char *b; | 1892 REGISTER unsigned char *buf_end; |
1881 | 1893 |
1882 /* Keeps track of unclosed groups. */ | 1894 /* Keeps track of unclosed groups. */ |
1883 compile_stack_type compile_stack; | 1895 compile_stack_type compile_stack; |
1884 | 1896 |
1885 /* Points to the current (ending) position in the pattern. */ | 1897 /* Points to the current (ending) position in the pattern. */ |
1886 const char *p = pattern; | 1898 re_char *p = pattern; |
1887 const char *pend = pattern + size; | 1899 re_char *pend = pattern + size; |
1888 | 1900 |
1889 /* How to translate the characters in the pattern. */ | 1901 /* How to translate the characters in the pattern. */ |
1890 char *translate = bufp->translate; | 1902 RE_TRANSLATE_TYPE translate = bufp->translate; |
1891 | 1903 |
1892 /* Address of the count-byte of the most recently inserted `exactn' | 1904 /* Address of the count-byte of the most recently inserted `exactn' |
1893 command. This makes it possible to tell if a new exact-match | 1905 command. This makes it possible to tell if a new exact-match |
1894 character can be added to that command or if the character requires | 1906 character can be added to that command or if the character requires |
1895 a new `exactn' command. */ | 1907 a new `exactn' command. */ |
1903 /* Address of beginning of regexp, or inside of last group. */ | 1915 /* Address of beginning of regexp, or inside of last group. */ |
1904 unsigned char *begalt; | 1916 unsigned char *begalt; |
1905 | 1917 |
1906 /* Place in the uncompiled pattern (i.e., the {) to | 1918 /* Place in the uncompiled pattern (i.e., the {) to |
1907 which to go back if the interval is invalid. */ | 1919 which to go back if the interval is invalid. */ |
1908 const char *beg_interval; | 1920 re_char *beg_interval; |
1909 | 1921 |
1910 /* Address of the place where a forward jump should go to the end of | 1922 /* Address of the place where a forward jump should go to the end of |
1911 the containing expression. Each alternative of an `or' -- except the | 1923 the containing expression. Each alternative of an `or' -- except the |
1912 last -- ends with a forward jump of this sort. */ | 1924 last -- ends with a forward jump of this sort. */ |
1913 unsigned char *fixup_alt_jump = 0; | 1925 unsigned char *fixup_alt_jump = 0; |
1970 if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE); | 1982 if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE); |
1971 | 1983 |
1972 bufp->allocated = INIT_BUF_SIZE; | 1984 bufp->allocated = INIT_BUF_SIZE; |
1973 } | 1985 } |
1974 | 1986 |
1975 begalt = b = bufp->buffer; | 1987 begalt = buf_end = bufp->buffer; |
1976 | 1988 |
1977 /* Loop through the uncompiled pattern until we're at the end. */ | 1989 /* Loop through the uncompiled pattern until we're at the end. */ |
1978 while (p != pend) | 1990 while (p != pend) |
1979 { | 1991 { |
1980 PATFETCH (c); | 1992 PATFETCH (c); |
2101 3: /jump to 9 | 2113 3: /jump to 9 |
2102 6: /exactn/1/A | 2114 6: /exactn/1/A |
2103 9: end of pattern. | 2115 9: end of pattern. |
2104 */ | 2116 */ |
2105 GET_BUFFER_SPACE (6); | 2117 GET_BUFFER_SPACE (6); |
2106 INSERT_JUMP (jump, laststart, b + 3); | 2118 INSERT_JUMP (jump, laststart, buf_end + 3); |
2107 b += 3; | 2119 buf_end += 3; |
2108 INSERT_JUMP (on_failure_jump, laststart, laststart + 6); | 2120 INSERT_JUMP (on_failure_jump, laststart, laststart + 6); |
2109 b += 3; | 2121 buf_end += 3; |
2110 } | 2122 } |
2111 else if (zero_times_ok) | 2123 else if (zero_times_ok) |
2112 { | 2124 { |
2113 /* "a*?" becomes: | 2125 /* "a*?" becomes: |
2114 0: /jump to 6 | 2126 0: /jump to 6 |
2115 3: /exactn/1/A | 2127 3: /exactn/1/A |
2116 6: /on_failure_jump to 3 | 2128 6: /on_failure_jump to 3 |
2117 9: end of pattern. | 2129 9: end of pattern. |
2118 */ | 2130 */ |
2119 GET_BUFFER_SPACE (6); | 2131 GET_BUFFER_SPACE (6); |
2120 INSERT_JUMP (jump, laststart, b + 3); | 2132 INSERT_JUMP (jump, laststart, buf_end + 3); |
2121 b += 3; | 2133 buf_end += 3; |
2122 STORE_JUMP (on_failure_jump, b, laststart + 3); | 2134 STORE_JUMP (on_failure_jump, buf_end, laststart + 3); |
2123 b += 3; | 2135 buf_end += 3; |
2124 } | 2136 } |
2125 else | 2137 else |
2126 { | 2138 { |
2127 /* "a+?" becomes: | 2139 /* "a+?" becomes: |
2128 0: /exactn/1/A | 2140 0: /exactn/1/A |
2129 3: /on_failure_jump to 0 | 2141 3: /on_failure_jump to 0 |
2130 6: end of pattern. | 2142 6: end of pattern. |
2131 */ | 2143 */ |
2132 GET_BUFFER_SPACE (3); | 2144 GET_BUFFER_SPACE (3); |
2133 STORE_JUMP (on_failure_jump, b, laststart); | 2145 STORE_JUMP (on_failure_jump, buf_end, laststart); |
2134 b += 3; | 2146 buf_end += 3; |
2135 } | 2147 } |
2136 } | 2148 } |
2137 else | 2149 else |
2138 { | 2150 { |
2139 /* Are we optimizing this jump? */ | 2151 /* Are we optimizing this jump? */ |
2140 boolean keep_string_p = false; | 2152 boolean keep_string_p = false; |
2141 | 2153 |
2142 if (many_times_ok) | 2154 if (many_times_ok) |
2143 { /* More than one repetition is allowed, so put in at the | 2155 { /* More than one repetition is allowed, so put in |
2144 end a backward relative jump from `b' to before the next | 2156 at the end a backward relative jump from |
2145 jump we're going to put in below (which jumps from | 2157 `buf_end' to before the next jump we're going |
2146 laststart to after this jump). | 2158 to put in below (which jumps from laststart to |
2159 after this jump). | |
2147 | 2160 |
2148 But if we are at the `*' in the exact sequence `.*\n', | 2161 But if we are at the `*' in the exact sequence `.*\n', |
2149 insert an unconditional jump backwards to the ., | 2162 insert an unconditional jump backwards to the ., |
2150 instead of the beginning of the loop. This way we only | 2163 instead of the beginning of the loop. This way we only |
2151 push a failure point once, instead of every time | 2164 push a failure point once, instead of every time |
2159 pattern, because laststart was nonzero. And we've | 2172 pattern, because laststart was nonzero. And we've |
2160 already incremented `p', by the way, to be the | 2173 already incremented `p', by the way, to be the |
2161 character after the `*'. Do we have to do something | 2174 character after the `*'. Do we have to do something |
2162 analogous here for null bytes, because of | 2175 analogous here for null bytes, because of |
2163 RE_DOT_NOT_NULL? */ | 2176 RE_DOT_NOT_NULL? */ |
2164 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.') | 2177 if (*(p - 2) == '.' |
2165 && zero_times_ok | 2178 && zero_times_ok |
2166 && p < pend && TRANSLATE (*p) == TRANSLATE ('\n') | 2179 && p < pend && *p == '\n' |
2167 && !(syntax & RE_DOT_NEWLINE)) | 2180 && !(syntax & RE_DOT_NEWLINE)) |
2168 { /* We have .*\n. */ | 2181 { /* We have .*\n. */ |
2169 STORE_JUMP (jump, b, laststart); | 2182 STORE_JUMP (jump, buf_end, laststart); |
2170 keep_string_p = true; | 2183 keep_string_p = true; |
2171 } | 2184 } |
2172 else | 2185 else |
2173 /* Anything else. */ | 2186 /* Anything else. */ |
2174 STORE_JUMP (maybe_pop_jump, b, laststart - 3); | 2187 STORE_JUMP (maybe_pop_jump, buf_end, laststart - 3); |
2175 | 2188 |
2176 /* We've added more stuff to the buffer. */ | 2189 /* We've added more stuff to the buffer. */ |
2177 b += 3; | 2190 buf_end += 3; |
2178 } | 2191 } |
2179 | 2192 |
2180 /* On failure, jump from laststart to b + 3, which will be the | 2193 /* On failure, jump from laststart to buf_end + 3, |
2181 end of the buffer after this jump is inserted. */ | 2194 which will be the end of the buffer after this jump |
2195 is inserted. */ | |
2182 GET_BUFFER_SPACE (3); | 2196 GET_BUFFER_SPACE (3); |
2183 INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump | 2197 INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump |
2184 : on_failure_jump, | 2198 : on_failure_jump, |
2185 laststart, b + 3); | 2199 laststart, buf_end + 3); |
2186 b += 3; | 2200 buf_end += 3; |
2187 | 2201 |
2188 if (!zero_times_ok) | 2202 if (!zero_times_ok) |
2189 { | 2203 { |
2190 /* At least one repetition is required, so insert a | 2204 /* At least one repetition is required, so insert a |
2191 `dummy_failure_jump' before the initial | 2205 `dummy_failure_jump' before the initial |
2192 `on_failure_jump' instruction of the loop. This | 2206 `on_failure_jump' instruction of the loop. This |
2193 effects a skip over that instruction the first time | 2207 effects a skip over that instruction the first time |
2194 we hit that loop. */ | 2208 we hit that loop. */ |
2195 GET_BUFFER_SPACE (3); | 2209 GET_BUFFER_SPACE (3); |
2196 INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6); | 2210 INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6); |
2197 b += 3; | 2211 buf_end += 3; |
2198 } | 2212 } |
2199 } | 2213 } |
2200 pending_exact = 0; | 2214 pending_exact = 0; |
2201 } | 2215 } |
2202 break; | 2216 break; |
2203 | 2217 |
2204 | 2218 |
2205 case '.': | 2219 case '.': |
2206 laststart = b; | 2220 laststart = buf_end; |
2207 BUF_PUSH (anychar); | 2221 BUF_PUSH (anychar); |
2208 break; | 2222 break; |
2209 | 2223 |
2210 | 2224 |
2211 case '[': | 2225 case '[': |
2221 | 2235 |
2222 /* Ensure that we have enough space to push a charset: the | 2236 /* Ensure that we have enough space to push a charset: the |
2223 opcode, the length count, and the bitset; 34 bytes in all. */ | 2237 opcode, the length count, and the bitset; 34 bytes in all. */ |
2224 GET_BUFFER_SPACE (34); | 2238 GET_BUFFER_SPACE (34); |
2225 | 2239 |
2226 laststart = b; | 2240 laststart = buf_end; |
2227 | 2241 |
2228 /* We test `*p == '^' twice, instead of using an if | 2242 /* We test `*p == '^' twice, instead of using an if |
2229 statement, so we only need one BUF_PUSH. */ | 2243 statement, so we only need one BUF_PUSH. */ |
2230 BUF_PUSH (*p == '^' ? charset_not : charset); | 2244 BUF_PUSH (*p == '^' ? charset_not : charset); |
2231 if (*p == '^') | 2245 if (*p == '^') |
2236 | 2250 |
2237 /* Push the number of bytes in the bitmap. */ | 2251 /* Push the number of bytes in the bitmap. */ |
2238 BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); | 2252 BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); |
2239 | 2253 |
2240 /* Clear the whole map. */ | 2254 /* Clear the whole map. */ |
2241 memset (b, 0, (1 << BYTEWIDTH) / BYTEWIDTH); | 2255 memset (buf_end, 0, (1 << BYTEWIDTH) / BYTEWIDTH); |
2242 | 2256 |
2243 /* charset_not matches newline according to a syntax bit. */ | 2257 /* charset_not matches newline according to a syntax bit. */ |
2244 if ((re_opcode_t) b[-2] == charset_not | 2258 if ((re_opcode_t) buf_end[-2] == charset_not |
2245 && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) | 2259 && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) |
2246 SET_LIST_BIT ('\n'); | 2260 SET_LIST_BIT ('\n'); |
2247 | 2261 |
2248 #ifdef MULE | 2262 #ifdef MULE |
2249 start_over_with_extended: | 2263 start_over_with_extended: |
2250 if (has_extended_chars) | 2264 if (has_extended_chars) |
2251 { | 2265 { |
2252 /* There are extended chars here, which means we need to start | 2266 /* There are extended chars here, which means we need to start |
2253 over and shift to unified range-table format. */ | 2267 over and shift to unified range-table format. */ |
2254 if (b[-2] == charset) | 2268 if (buf_end[-2] == charset) |
2255 b[-2] = charset_mule; | 2269 buf_end[-2] = charset_mule; |
2256 else | 2270 else |
2257 b[-2] = charset_mule_not; | 2271 buf_end[-2] = charset_mule_not; |
2258 b--; | 2272 buf_end--; |
2259 p = p1; /* go back to the beginning of the charset, after | 2273 p = p1; /* go back to the beginning of the charset, after |
2260 a possible ^. */ | 2274 a possible ^. */ |
2261 rtab = Vthe_lisp_rangetab; | 2275 rtab = Vthe_lisp_rangetab; |
2262 Fclear_range_table (rtab); | 2276 Fclear_range_table (rtab); |
2263 | 2277 |
2264 /* charset_not matches newline according to a syntax bit. */ | 2278 /* charset_not matches newline according to a syntax bit. */ |
2265 if ((re_opcode_t) b[-1] == charset_mule_not | 2279 if ((re_opcode_t) buf_end[-1] == charset_mule_not |
2266 && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) | 2280 && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) |
2267 SET_EITHER_BIT ('\n'); | 2281 SET_EITHER_BIT ('\n'); |
2268 } | 2282 } |
2269 #endif /* MULE */ | 2283 #endif /* MULE */ |
2270 | 2284 |
2271 /* Read in characters and ranges, setting map bits. */ | 2285 /* Read in characters and ranges, setting map bits. */ |
2272 for (;;) | 2286 for (;;) |
2273 { | 2287 { |
2274 if (p == pend) FREE_STACK_RETURN (REG_EBRACK); | 2288 if (p == pend) FREE_STACK_RETURN (REG_EBRACK); |
2275 | 2289 |
2276 PATFETCH_EITHER (c); | 2290 PATFETCH (c); |
2277 | 2291 |
2278 #ifdef MULE | 2292 #ifdef MULE |
2279 if (c >= 0x80 && !has_extended_chars) | 2293 if (c >= 0x80 && !has_extended_chars) |
2280 { | 2294 { |
2281 has_extended_chars = 1; | 2295 has_extended_chars = 1; |
2290 /* \ might escape characters inside [...] and [^...]. */ | 2304 /* \ might escape characters inside [...] and [^...]. */ |
2291 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') | 2305 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') |
2292 { | 2306 { |
2293 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); | 2307 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); |
2294 | 2308 |
2295 PATFETCH_EITHER (c1); | 2309 PATFETCH (c1); |
2296 #ifdef MULE | 2310 #ifdef MULE |
2297 if (c1 >= 0x80 && !has_extended_chars) | 2311 if (c1 >= 0x80 && !has_extended_chars) |
2298 { | 2312 { |
2299 has_extended_chars = 1; | 2313 has_extended_chars = 1; |
2300 goto start_over_with_extended; | 2314 goto start_over_with_extended; |
2319 was a character: if this is a hyphen not at the | 2333 was a character: if this is a hyphen not at the |
2320 beginning or the end of a list, then it's the range | 2334 beginning or the end of a list, then it's the range |
2321 operator. */ | 2335 operator. */ |
2322 if (c == '-' | 2336 if (c == '-' |
2323 && !(p - 2 >= pattern && p[-2] == '[') | 2337 && !(p - 2 >= pattern && p[-2] == '[') |
2324 && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') | 2338 && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') |
2325 && *p != ']') | 2339 && *p != ']') |
2326 { | 2340 { |
2327 reg_errcode_t ret; | 2341 reg_errcode_t ret; |
2328 | 2342 |
2329 #ifdef MULE | 2343 #ifdef MULE |
2335 if (has_extended_chars) | 2349 if (has_extended_chars) |
2336 ret = compile_extended_range (&p, pend, translate, | 2350 ret = compile_extended_range (&p, pend, translate, |
2337 syntax, rtab); | 2351 syntax, rtab); |
2338 else | 2352 else |
2339 #endif /* MULE */ | 2353 #endif /* MULE */ |
2340 ret = compile_range (&p, pend, translate, syntax, b); | 2354 ret = compile_range (&p, pend, translate, syntax, buf_end); |
2341 if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); | 2355 if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); |
2342 } | 2356 } |
2343 | 2357 |
2344 else if (p[0] == '-' && p[1] != ']') | 2358 else if (p[0] == '-' && p[1] != ']') |
2345 { /* This handles ranges made up of characters only. */ | 2359 { /* This handles ranges made up of characters only. */ |
2357 if (has_extended_chars) | 2371 if (has_extended_chars) |
2358 ret = compile_extended_range (&p, pend, translate, | 2372 ret = compile_extended_range (&p, pend, translate, |
2359 syntax, rtab); | 2373 syntax, rtab); |
2360 else | 2374 else |
2361 #endif /* MULE */ | 2375 #endif /* MULE */ |
2362 ret = compile_range (&p, pend, translate, syntax, b); | 2376 ret = compile_range (&p, pend, translate, syntax, buf_end); |
2363 if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); | 2377 if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); |
2364 } | 2378 } |
2365 | 2379 |
2366 /* See if we're at the beginning of a possible character | 2380 /* See if we're at the beginning of a possible character |
2367 class. */ | 2381 class. */ |
2376 /* If pattern is `[[:'. */ | 2390 /* If pattern is `[[:'. */ |
2377 if (p == pend) FREE_STACK_RETURN (REG_EBRACK); | 2391 if (p == pend) FREE_STACK_RETURN (REG_EBRACK); |
2378 | 2392 |
2379 for (;;) | 2393 for (;;) |
2380 { | 2394 { |
2381 /* Do not do PATFETCH_EITHER() here. We want | 2395 /* #### This code is unused. |
2382 to just see if the bytes match particular | 2396 Correctness is not checked after TRT |
2383 strings, and we put them all back if not. | 2397 table change. */ |
2384 | |
2385 #### May need to be changed once trt tables | |
2386 are working. */ | |
2387 PATFETCH (c); | 2398 PATFETCH (c); |
2388 if (c == ':' || c == ']' || p == pend | 2399 if (c == ':' || c == ']' || p == pend |
2389 || c1 == CHAR_CLASS_MAX_LENGTH) | 2400 || c1 == CHAR_CLASS_MAX_LENGTH) |
2390 break; | 2401 break; |
2391 str[c1++] = (char) c; | 2402 str[c1++] = (char) c; |
2392 } | 2403 } |
2393 str[c1] = '\0'; | 2404 str[c1] = '\0'; |
2394 | 2405 |
2395 /* If isn't a word bracketed by `[:' and:`]': | 2406 /* If isn't a word bracketed by `[:' and `:]': |
2396 undo the ending character, the letters, and leave | 2407 undo the ending character, the letters, and leave |
2397 the leading `:' and `[' (but set bits for them). */ | 2408 the leading `:' and `[' (but set bits for them). */ |
2398 if (c == ':' && *p == ']') | 2409 if (c == ':' && *p == ']') |
2399 { | 2410 { |
2400 int ch; | 2411 int ch; |
2464 { | 2475 { |
2465 /* We have a range table, not a bit vector. */ | 2476 /* We have a range table, not a bit vector. */ |
2466 int bytes_needed = | 2477 int bytes_needed = |
2467 unified_range_table_bytes_needed (rtab); | 2478 unified_range_table_bytes_needed (rtab); |
2468 GET_BUFFER_SPACE (bytes_needed); | 2479 GET_BUFFER_SPACE (bytes_needed); |
2469 unified_range_table_copy_data (rtab, b); | 2480 unified_range_table_copy_data (rtab, buf_end); |
2470 b += unified_range_table_bytes_used (b); | 2481 buf_end += unified_range_table_bytes_used (buf_end); |
2471 break; | 2482 break; |
2472 } | 2483 } |
2473 #endif /* MULE */ | 2484 #endif /* MULE */ |
2474 /* Discard any (non)matching list bytes that are all 0 at the | 2485 /* Discard any (non)matching list bytes that are all 0 at the |
2475 end of the map. Decrease the map-length byte too. */ | 2486 end of the map. Decrease the map-length byte too. */ |
2476 while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) | 2487 while ((int) buf_end[-1] > 0 && buf_end[buf_end[-1] - 1] == 0) |
2477 b[-1]--; | 2488 buf_end[-1]--; |
2478 b += b[-1]; | 2489 buf_end += buf_end[-1]; |
2479 } | 2490 } |
2480 break; | 2491 break; |
2481 | 2492 |
2482 | 2493 |
2483 case '(': | 2494 case '(': |
2533 { | 2544 { |
2534 regnum_t r; | 2545 regnum_t r; |
2535 | 2546 |
2536 if (!(syntax & RE_NO_SHY_GROUPS) | 2547 if (!(syntax & RE_NO_SHY_GROUPS) |
2537 && p != pend | 2548 && p != pend |
2538 && TRANSLATE(*p) == TRANSLATE('?')) | 2549 && *p == '?') |
2539 { | 2550 { |
2540 p++; | 2551 p++; |
2541 PATFETCH(c); | 2552 PATFETCH (c); |
2542 switch (c) | 2553 switch (c) |
2543 { | 2554 { |
2544 case ':': /* shy groups */ | 2555 case ':': /* shy groups */ |
2545 r = MAX_REGNUM + 1; | 2556 r = MAX_REGNUM + 1; |
2546 break; | 2557 break; |
2570 whole pattern moves because of realloc, they will still | 2581 whole pattern moves because of realloc, they will still |
2571 be valid. */ | 2582 be valid. */ |
2572 COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; | 2583 COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; |
2573 COMPILE_STACK_TOP.fixup_alt_jump | 2584 COMPILE_STACK_TOP.fixup_alt_jump |
2574 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; | 2585 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; |
2575 COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer; | 2586 COMPILE_STACK_TOP.laststart_offset = buf_end - bufp->buffer; |
2576 COMPILE_STACK_TOP.regnum = r; | 2587 COMPILE_STACK_TOP.regnum = r; |
2577 | 2588 |
2578 /* We will eventually replace the 0 with the number of | 2589 /* We will eventually replace the 0 with the number of |
2579 groups inner to this one. But do not push a | 2590 groups inner to this one. But do not push a |
2580 start_memory for groups beyond the last one we can | 2591 start_memory for groups beyond the last one we can |
2581 represent in the compiled pattern. */ | 2592 represent in the compiled pattern. */ |
2582 if (r <= MAX_REGNUM) | 2593 if (r <= MAX_REGNUM) |
2583 { | 2594 { |
2584 COMPILE_STACK_TOP.inner_group_offset | 2595 COMPILE_STACK_TOP.inner_group_offset |
2585 = b - bufp->buffer + 2; | 2596 = buf_end - bufp->buffer + 2; |
2586 BUF_PUSH_3 (start_memory, r, 0); | 2597 BUF_PUSH_3 (start_memory, r, 0); |
2587 } | 2598 } |
2588 | 2599 |
2589 compile_stack.avail++; | 2600 compile_stack.avail++; |
2590 | 2601 |
2591 fixup_alt_jump = 0; | 2602 fixup_alt_jump = 0; |
2592 laststart = 0; | 2603 laststart = 0; |
2593 begalt = b; | 2604 begalt = buf_end; |
2594 /* If we've reached MAX_REGNUM groups, then this open | 2605 /* If we've reached MAX_REGNUM groups, then this open |
2595 won't actually generate any code, so we'll have to | 2606 won't actually generate any code, so we'll have to |
2596 clear pending_exact explicitly. */ | 2607 clear pending_exact explicitly. */ |
2597 pending_exact = 0; | 2608 pending_exact = 0; |
2598 } | 2609 } |
2617 `push_dummy_failure' in `re_match_2'. */ | 2628 `push_dummy_failure' in `re_match_2'. */ |
2618 BUF_PUSH (push_dummy_failure); | 2629 BUF_PUSH (push_dummy_failure); |
2619 | 2630 |
2620 /* We allocated space for this jump when we assigned | 2631 /* We allocated space for this jump when we assigned |
2621 to `fixup_alt_jump', in the `handle_alt' case below. */ | 2632 to `fixup_alt_jump', in the `handle_alt' case below. */ |
2622 STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1); | 2633 STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end - 1); |
2623 } | 2634 } |
2624 | 2635 |
2625 /* See similar code for backslashed left paren above. */ | 2636 /* See similar code for backslashed left paren above. */ |
2626 if (COMPILE_STACK_EMPTY) { | 2637 if (COMPILE_STACK_EMPTY) { |
2627 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) | 2638 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) |
2675 goto normal_char; | 2686 goto normal_char; |
2676 | 2687 |
2677 /* Insert before the previous alternative a jump which | 2688 /* Insert before the previous alternative a jump which |
2678 jumps to this alternative if the former fails. */ | 2689 jumps to this alternative if the former fails. */ |
2679 GET_BUFFER_SPACE (3); | 2690 GET_BUFFER_SPACE (3); |
2680 INSERT_JUMP (on_failure_jump, begalt, b + 6); | 2691 INSERT_JUMP (on_failure_jump, begalt, buf_end + 6); |
2681 pending_exact = 0; | 2692 pending_exact = 0; |
2682 b += 3; | 2693 buf_end += 3; |
2683 | 2694 |
2684 /* The alternative before this one has a jump after it | 2695 /* The alternative before this one has a jump after it |
2685 which gets executed if it gets matched. Adjust that | 2696 which gets executed if it gets matched. Adjust that |
2686 jump so it will jump to this alternative's analogous | 2697 jump so it will jump to this alternative's analogous |
2687 jump (put in below, which in turn will jump to the next | 2698 jump (put in below, which in turn will jump to the next |
2696 three-byte space after `a'. We'll put in the jump, set | 2707 three-byte space after `a'. We'll put in the jump, set |
2697 fixup_alt_jump to right after `b', and leave behind three | 2708 fixup_alt_jump to right after `b', and leave behind three |
2698 bytes which we'll fill in when we get to after `c'. */ | 2709 bytes which we'll fill in when we get to after `c'. */ |
2699 | 2710 |
2700 if (fixup_alt_jump) | 2711 if (fixup_alt_jump) |
2701 STORE_JUMP (jump_past_alt, fixup_alt_jump, b); | 2712 STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end); |
2702 | 2713 |
2703 /* Mark and leave space for a jump after this alternative, | 2714 /* Mark and leave space for a jump after this alternative, |
2704 to be filled in later either by next alternative or | 2715 to be filled in later either by next alternative or |
2705 when know we're at the end of a series of alternatives. */ | 2716 when know we're at the end of a series of alternatives. */ |
2706 fixup_alt_jump = b; | 2717 fixup_alt_jump = buf_end; |
2707 GET_BUFFER_SPACE (3); | 2718 GET_BUFFER_SPACE (3); |
2708 b += 3; | 2719 buf_end += 3; |
2709 | 2720 |
2710 laststart = 0; | 2721 laststart = 0; |
2711 begalt = b; | 2722 begalt = buf_end; |
2712 break; | 2723 break; |
2713 | 2724 |
2714 | 2725 |
2715 case '{': | 2726 case '{': |
2716 /* If \{ is a literal. */ | 2727 /* If \{ is a literal. */ |
2779 if (!laststart) | 2790 if (!laststart) |
2780 { | 2791 { |
2781 if (syntax & RE_CONTEXT_INVALID_OPS) | 2792 if (syntax & RE_CONTEXT_INVALID_OPS) |
2782 FREE_STACK_RETURN (REG_BADRPT); | 2793 FREE_STACK_RETURN (REG_BADRPT); |
2783 else if (syntax & RE_CONTEXT_INDEP_OPS) | 2794 else if (syntax & RE_CONTEXT_INDEP_OPS) |
2784 laststart = b; | 2795 laststart = buf_end; |
2785 else | 2796 else |
2786 goto unfetch_interval; | 2797 goto unfetch_interval; |
2787 } | 2798 } |
2788 | 2799 |
2789 /* If the upper bound is zero, don't want to succeed at | 2800 /* If the upper bound is zero, don't want to succeed at |
2790 all; jump from `laststart' to `b + 3', which will be | 2801 all; jump from `laststart' to `b + 3', which will be |
2791 the end of the buffer after we insert the jump. */ | 2802 the end of the buffer after we insert the jump. */ |
2792 if (upper_bound == 0) | 2803 if (upper_bound == 0) |
2793 { | 2804 { |
2794 GET_BUFFER_SPACE (3); | 2805 GET_BUFFER_SPACE (3); |
2795 INSERT_JUMP (jump, laststart, b + 3); | 2806 INSERT_JUMP (jump, laststart, buf_end + 3); |
2796 b += 3; | 2807 buf_end += 3; |
2797 } | 2808 } |
2798 | 2809 |
2799 /* Otherwise, we have a nontrivial interval. When | 2810 /* Otherwise, we have a nontrivial interval. When |
2800 we're all done, the pattern will look like: | 2811 we're all done, the pattern will look like: |
2801 set_number_at <jump count> <upper bound> | 2812 set_number_at <jump count> <upper bound> |
2816 though it will be set during matching by its | 2827 though it will be set during matching by its |
2817 attendant `set_number_at' (inserted next), | 2828 attendant `set_number_at' (inserted next), |
2818 because `re_compile_fastmap' needs to know. | 2829 because `re_compile_fastmap' needs to know. |
2819 Jump to the `jump_n' we might insert below. */ | 2830 Jump to the `jump_n' we might insert below. */ |
2820 INSERT_JUMP2 (succeed_n, laststart, | 2831 INSERT_JUMP2 (succeed_n, laststart, |
2821 b + 5 + (upper_bound > 1) * 5, | 2832 buf_end + 5 + (upper_bound > 1) * 5, |
2822 lower_bound); | 2833 lower_bound); |
2823 b += 5; | 2834 buf_end += 5; |
2824 | 2835 |
2825 /* Code to initialize the lower bound. Insert | 2836 /* Code to initialize the lower bound. Insert |
2826 before the `succeed_n'. The `5' is the last two | 2837 before the `succeed_n'. The `5' is the last two |
2827 bytes of this `set_number_at', plus 3 bytes of | 2838 bytes of this `set_number_at', plus 3 bytes of |
2828 the following `succeed_n'. */ | 2839 the following `succeed_n'. */ |
2829 insert_op2 (set_number_at, laststart, 5, lower_bound, b); | 2840 insert_op2 (set_number_at, laststart, 5, lower_bound, buf_end); |
2830 b += 5; | 2841 buf_end += 5; |
2831 | 2842 |
2832 if (upper_bound > 1) | 2843 if (upper_bound > 1) |
2833 { /* More than one repetition is allowed, so | 2844 { /* More than one repetition is allowed, so |
2834 append a backward jump to the `succeed_n' | 2845 append a backward jump to the `succeed_n' |
2835 that starts this interval. | 2846 that starts this interval. |
2836 | 2847 |
2837 When we've reached this during matching, | 2848 When we've reached this during matching, |
2838 we'll have matched the interval once, so | 2849 we'll have matched the interval once, so |
2839 jump back only `upper_bound - 1' times. */ | 2850 jump back only `upper_bound - 1' times. */ |
2840 STORE_JUMP2 (jump_n, b, laststart + 5, | 2851 STORE_JUMP2 (jump_n, buf_end, laststart + 5, |
2841 upper_bound - 1); | 2852 upper_bound - 1); |
2842 b += 5; | 2853 buf_end += 5; |
2843 | 2854 |
2844 /* The location we want to set is the second | 2855 /* The location we want to set is the second |
2845 parameter of the `jump_n'; that is `b-2' as | 2856 parameter of the `jump_n'; that is `b-2' as |
2846 an absolute address. `laststart' will be | 2857 an absolute address. `laststart' will be |
2847 the `set_number_at' we're about to insert; | 2858 the `set_number_at' we're about to insert; |
2853 i.e., b - laststart. | 2864 i.e., b - laststart. |
2854 | 2865 |
2855 We insert this at the beginning of the loop | 2866 We insert this at the beginning of the loop |
2856 so that if we fail during matching, we'll | 2867 so that if we fail during matching, we'll |
2857 reinitialize the bounds. */ | 2868 reinitialize the bounds. */ |
2858 insert_op2 (set_number_at, laststart, b - laststart, | 2869 insert_op2 (set_number_at, laststart, |
2859 upper_bound - 1, b); | 2870 buf_end - laststart, |
2860 b += 5; | 2871 upper_bound - 1, buf_end); |
2872 buf_end += 5; | |
2861 } | 2873 } |
2862 } | 2874 } |
2863 pending_exact = 0; | 2875 pending_exact = 0; |
2864 beg_interval = NULL; | 2876 beg_interval = NULL; |
2865 } | 2877 } |
2887 case '=': | 2899 case '=': |
2888 BUF_PUSH (at_dot); | 2900 BUF_PUSH (at_dot); |
2889 break; | 2901 break; |
2890 | 2902 |
2891 case 's': | 2903 case 's': |
2892 laststart = b; | 2904 laststart = buf_end; |
2893 PATFETCH (c); | 2905 PATFETCH (c); |
2894 /* XEmacs addition */ | 2906 /* XEmacs addition */ |
2895 if (c >= 0x80 || syntax_spec_code[c] == 0377) | 2907 if (c >= 0x80 || syntax_spec_code[c] == 0377) |
2896 FREE_STACK_RETURN (REG_ESYNTAX); | 2908 FREE_STACK_RETURN (REG_ESYNTAX); |
2897 BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); | 2909 BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); |
2898 break; | 2910 break; |
2899 | 2911 |
2900 case 'S': | 2912 case 'S': |
2901 laststart = b; | 2913 laststart = buf_end; |
2902 PATFETCH (c); | 2914 PATFETCH (c); |
2903 /* XEmacs addition */ | 2915 /* XEmacs addition */ |
2904 if (c >= 0x80 || syntax_spec_code[c] == 0377) | 2916 if (c >= 0x80 || syntax_spec_code[c] == 0377) |
2905 FREE_STACK_RETURN (REG_ESYNTAX); | 2917 FREE_STACK_RETURN (REG_ESYNTAX); |
2906 BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); | 2918 BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); |
2907 break; | 2919 break; |
2908 | 2920 |
2909 #ifdef MULE | 2921 #ifdef MULE |
2910 /* 97.2.17 jhod merged in to XEmacs from mule-2.3 */ | 2922 /* 97.2.17 jhod merged in to XEmacs from mule-2.3 */ |
2911 case 'c': | 2923 case 'c': |
2912 laststart = b; | 2924 laststart = buf_end; |
2913 PATFETCH_RAW (c); | 2925 PATFETCH_RAW (c); |
2914 if (c < 32 || c > 127) | 2926 if (c < 32 || c > 127) |
2915 FREE_STACK_RETURN (REG_ECATEGORY); | 2927 FREE_STACK_RETURN (REG_ECATEGORY); |
2916 BUF_PUSH_2 (categoryspec, c); | 2928 BUF_PUSH_2 (categoryspec, c); |
2917 break; | 2929 break; |
2918 | 2930 |
2919 case 'C': | 2931 case 'C': |
2920 laststart = b; | 2932 laststart = buf_end; |
2921 PATFETCH_RAW (c); | 2933 PATFETCH_RAW (c); |
2922 if (c < 32 || c > 127) | 2934 if (c < 32 || c > 127) |
2923 FREE_STACK_RETURN (REG_ECATEGORY); | 2935 FREE_STACK_RETURN (REG_ECATEGORY); |
2924 BUF_PUSH_2 (notcategoryspec, c); | 2936 BUF_PUSH_2 (notcategoryspec, c); |
2925 break; | 2937 break; |
2927 #endif /* MULE */ | 2939 #endif /* MULE */ |
2928 #endif /* emacs */ | 2940 #endif /* emacs */ |
2929 | 2941 |
2930 | 2942 |
2931 case 'w': | 2943 case 'w': |
2932 laststart = b; | 2944 laststart = buf_end; |
2933 BUF_PUSH (wordchar); | 2945 BUF_PUSH (wordchar); |
2934 break; | 2946 break; |
2935 | 2947 |
2936 | 2948 |
2937 case 'W': | 2949 case 'W': |
2938 laststart = b; | 2950 laststart = buf_end; |
2939 BUF_PUSH (notwordchar); | 2951 BUF_PUSH (notwordchar); |
2940 break; | 2952 break; |
2941 | 2953 |
2942 | 2954 |
2943 case '<': | 2955 case '<': |
2964 BUF_PUSH (endbuf); | 2976 BUF_PUSH (endbuf); |
2965 break; | 2977 break; |
2966 | 2978 |
2967 case '1': case '2': case '3': case '4': case '5': | 2979 case '1': case '2': case '3': case '4': case '5': |
2968 case '6': case '7': case '8': case '9': | 2980 case '6': case '7': case '8': case '9': |
2969 if (syntax & RE_NO_BK_REFS) | 2981 { |
2970 goto normal_char; | 2982 regnum_t reg; |
2971 | 2983 if (syntax & RE_NO_BK_REFS) |
2972 c1 = c - '0'; | 2984 goto normal_char; |
2973 | 2985 |
2974 if (c1 > regnum) | 2986 reg = c - '0'; |
2975 FREE_STACK_RETURN (REG_ESUBREG); | 2987 |
2976 | 2988 if (reg > regnum) |
2977 /* Can't back reference to a subexpression if inside of it. */ | 2989 FREE_STACK_RETURN (REG_ESUBREG); |
2978 if (group_in_compile_stack (compile_stack, c1)) | 2990 |
2979 goto normal_char; | 2991 /* Can't back reference to a subexpression if inside of it. */ |
2980 | 2992 if (group_in_compile_stack (compile_stack, reg)) |
2981 laststart = b; | 2993 goto normal_char; |
2982 BUF_PUSH_2 (duplicate, c1); | 2994 |
2995 laststart = buf_end; | |
2996 BUF_PUSH_2 (duplicate, reg); | |
2997 } | |
2983 break; | 2998 break; |
2984 | 2999 |
2985 | 3000 |
2986 case '+': | 3001 case '+': |
2987 case '?': | 3002 case '?': |
3006 /* `p' points to the location after where `c' came from. */ | 3021 /* `p' points to the location after where `c' came from. */ |
3007 normal_char: | 3022 normal_char: |
3008 { | 3023 { |
3009 /* XEmacs: modifications here for Mule. */ | 3024 /* XEmacs: modifications here for Mule. */ |
3010 /* `q' points to the beginning of the next char. */ | 3025 /* `q' points to the beginning of the next char. */ |
3011 const char *q = p - 1; | 3026 re_char *q = p; |
3012 INC_CHARPTR (q); | |
3013 | 3027 |
3014 /* If no exactn currently being built. */ | 3028 /* If no exactn currently being built. */ |
3015 if (!pending_exact | 3029 if (!pending_exact |
3016 | 3030 |
3017 /* If last exactn not at current position. */ | 3031 /* If last exactn not at current position. */ |
3018 || pending_exact + *pending_exact + 1 != b | 3032 || pending_exact + *pending_exact + 1 != buf_end |
3019 | 3033 |
3020 /* We have only one byte following the exactn for the count. */ | 3034 /* We have only one byte following the exactn for the count. */ |
3021 || ((unsigned int) (*pending_exact + (q - p)) >= | 3035 || ((unsigned int) (*pending_exact + (q - p)) >= |
3022 ((unsigned int) (1 << BYTEWIDTH) - 1)) | 3036 ((unsigned int) (1 << BYTEWIDTH) - 1)) |
3023 | 3037 |
3031 ? *q == '{' | 3045 ? *q == '{' |
3032 : (q[0] == '\\' && q[1] == '{')))) | 3046 : (q[0] == '\\' && q[1] == '{')))) |
3033 { | 3047 { |
3034 /* Start building a new exactn. */ | 3048 /* Start building a new exactn. */ |
3035 | 3049 |
3036 laststart = b; | 3050 laststart = buf_end; |
3037 | 3051 |
3038 BUF_PUSH_2 (exactn, 0); | 3052 BUF_PUSH_2 (exactn, 0); |
3039 pending_exact = b - 1; | 3053 pending_exact = buf_end - 1; |
3040 } | 3054 } |
3041 | 3055 |
3056 #ifndef MULE | |
3042 BUF_PUSH (c); | 3057 BUF_PUSH (c); |
3043 (*pending_exact)++; | 3058 (*pending_exact)++; |
3044 | 3059 #else |
3045 while (p < q) | 3060 { |
3046 { | 3061 Bytecount bt_count; |
3047 PATFETCH (c); | 3062 Bufbyte tmp_buf[MAX_EMCHAR_LEN]; |
3048 BUF_PUSH (c); | 3063 int i; |
3049 (*pending_exact)++; | 3064 |
3050 } | 3065 bt_count = set_charptr_emchar (tmp_buf, c); |
3066 | |
3067 for (i = 0; i < bt_count; i++) | |
3068 { | |
3069 BUF_PUSH (tmp_buf[i]); | |
3070 (*pending_exact)++; | |
3071 } | |
3072 } | |
3073 #endif | |
3051 break; | 3074 break; |
3052 } | 3075 } |
3053 } /* switch (c) */ | 3076 } /* switch (c) */ |
3054 } /* while p != pend */ | 3077 } /* while p != pend */ |
3055 | 3078 |
3056 | 3079 |
3057 /* Through the pattern now. */ | 3080 /* Through the pattern now. */ |
3058 | 3081 |
3059 if (fixup_alt_jump) | 3082 if (fixup_alt_jump) |
3060 STORE_JUMP (jump_past_alt, fixup_alt_jump, b); | 3083 STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end); |
3061 | 3084 |
3062 if (!COMPILE_STACK_EMPTY) | 3085 if (!COMPILE_STACK_EMPTY) |
3063 FREE_STACK_RETURN (REG_EPAREN); | 3086 FREE_STACK_RETURN (REG_EPAREN); |
3064 | 3087 |
3065 /* If we don't want backtracking, force success | 3088 /* If we don't want backtracking, force success |
3068 BUF_PUSH (succeed); | 3091 BUF_PUSH (succeed); |
3069 | 3092 |
3070 free (compile_stack.stack); | 3093 free (compile_stack.stack); |
3071 | 3094 |
3072 /* We have succeeded; set the length of the buffer. */ | 3095 /* We have succeeded; set the length of the buffer. */ |
3073 bufp->used = b - bufp->buffer; | 3096 bufp->used = buf_end - bufp->buffer; |
3074 | 3097 |
3075 #ifdef DEBUG | 3098 #ifdef DEBUG |
3076 if (debug) | 3099 if (debug) |
3077 { | 3100 { |
3078 DEBUG_PRINT1 ("\nCompiled pattern: \n"); | 3101 DEBUG_PRINT1 ("\nCompiled pattern: \n"); |
3112 else | 3135 else |
3113 fail_stack.stack | 3136 fail_stack.stack |
3114 = (fail_stack_elt_t *) realloc (fail_stack.stack, | 3137 = (fail_stack_elt_t *) realloc (fail_stack.stack, |
3115 (fail_stack.size | 3138 (fail_stack.size |
3116 * sizeof (fail_stack_elt_t))); | 3139 * sizeof (fail_stack_elt_t))); |
3117 #endif /* not emacs */ | 3140 #endif /* emacs */ |
3118 } | 3141 } |
3119 | 3142 |
3120 regex_grow_registers (num_regs); | 3143 regex_grow_registers (num_regs); |
3121 } | 3144 } |
3122 #endif /* not MATCH_MAY_ALLOCATE */ | 3145 #endif /* not MATCH_MAY_ALLOCATE */ |
3182 /* P points to just after a ^ in PATTERN. Return true if that ^ comes | 3205 /* P points to just after a ^ in PATTERN. Return true if that ^ comes |
3183 after an alternative or a begin-subexpression. We assume there is at | 3206 after an alternative or a begin-subexpression. We assume there is at |
3184 least one character before the ^. */ | 3207 least one character before the ^. */ |
3185 | 3208 |
3186 static boolean | 3209 static boolean |
3187 at_begline_loc_p (const char *pattern, const char *p, reg_syntax_t syntax) | 3210 at_begline_loc_p (re_char *pattern, re_char *p, reg_syntax_t syntax) |
3188 { | 3211 { |
3189 const char *prev = p - 2; | 3212 re_char *prev = p - 2; |
3190 boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\'; | 3213 boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\'; |
3191 | 3214 |
3192 return | 3215 return |
3193 /* After a subexpression? */ | 3216 /* After a subexpression? */ |
3194 (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) | 3217 (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) |
3199 | 3222 |
3200 /* The dual of at_begline_loc_p. This one is for $. We assume there is | 3223 /* The dual of at_begline_loc_p. This one is for $. We assume there is |
3201 at least one character after the $, i.e., `P < PEND'. */ | 3224 at least one character after the $, i.e., `P < PEND'. */ |
3202 | 3225 |
3203 static boolean | 3226 static boolean |
3204 at_endline_loc_p (const char *p, const char *pend, int syntax) | 3227 at_endline_loc_p (re_char *p, re_char *pend, int syntax) |
3205 { | 3228 { |
3206 const char *next = p; | 3229 re_char *next = p; |
3207 boolean next_backslash = *next == '\\'; | 3230 boolean next_backslash = *next == '\\'; |
3208 const char *next_next = p + 1 < pend ? p + 1 : 0; | 3231 re_char *next_next = p + 1 < pend ? p + 1 : 0; |
3209 | 3232 |
3210 return | 3233 return |
3211 /* Before a subexpression? */ | 3234 /* Before a subexpression? */ |
3212 (syntax & RE_NO_BK_PARENS ? *next == ')' | 3235 (syntax & RE_NO_BK_PARENS ? *next == ')' |
3213 : next_backslash && next_next && *next_next == ')') | 3236 : next_backslash && next_next && *next_next == ')') |
3245 | 3268 |
3246 We use these short variable names so we can use the same macros as | 3269 We use these short variable names so we can use the same macros as |
3247 `regex_compile' itself. */ | 3270 `regex_compile' itself. */ |
3248 | 3271 |
3249 static reg_errcode_t | 3272 static reg_errcode_t |
3250 compile_range (const char **p_ptr, const char *pend, char *translate, | 3273 compile_range (re_char **p_ptr, re_char *pend, RE_TRANSLATE_TYPE translate, |
3251 reg_syntax_t syntax, unsigned char *b) | 3274 reg_syntax_t syntax, unsigned char *buf_end) |
3252 { | 3275 { |
3253 unsigned this_char; | 3276 unsigned this_char; |
3254 | 3277 |
3255 const char *p = *p_ptr; | 3278 re_char *p = *p_ptr; |
3256 int range_start, range_end; | 3279 int range_start, range_end; |
3257 | 3280 |
3258 if (p == pend) | 3281 if (p == pend) |
3259 return REG_ERANGE; | 3282 return REG_ERANGE; |
3260 | 3283 |
3290 } | 3313 } |
3291 | 3314 |
3292 #ifdef MULE | 3315 #ifdef MULE |
3293 | 3316 |
3294 static reg_errcode_t | 3317 static reg_errcode_t |
3295 compile_extended_range (const char **p_ptr, const char *pend, char *translate, | 3318 compile_extended_range (re_char **p_ptr, re_char *pend, |
3319 RE_TRANSLATE_TYPE translate, | |
3296 reg_syntax_t syntax, Lisp_Object rtab) | 3320 reg_syntax_t syntax, Lisp_Object rtab) |
3297 { | 3321 { |
3298 Emchar this_char, range_start, range_end; | 3322 Emchar this_char, range_start, range_end; |
3299 const Bufbyte *p; | 3323 const Bufbyte *p; |
3300 | 3324 |
3412 bufp->can_be_null |= path_can_be_null; | 3436 bufp->can_be_null |= path_can_be_null; |
3413 | 3437 |
3414 /* Reset for next path. */ | 3438 /* Reset for next path. */ |
3415 path_can_be_null = true; | 3439 path_can_be_null = true; |
3416 | 3440 |
3417 p = fail_stack.stack[--fail_stack.avail].pointer; | 3441 p = (unsigned char *) fail_stack.stack[--fail_stack.avail].pointer; |
3418 | 3442 |
3419 continue; | 3443 continue; |
3420 } | 3444 } |
3421 else | 3445 else |
3422 break; | 3446 break; |
3461 for (j = *p * BYTEWIDTH; j < 0x80; j++) | 3485 for (j = *p * BYTEWIDTH; j < 0x80; j++) |
3462 fastmap[j] = 1; | 3486 fastmap[j] = 1; |
3463 /* And all extended characters must be allowed, too. */ | 3487 /* And all extended characters must be allowed, too. */ |
3464 for (j = 0x80; j < 0xA0; j++) | 3488 for (j = 0x80; j < 0xA0; j++) |
3465 fastmap[j] = 1; | 3489 fastmap[j] = 1; |
3466 #else /* ! MULE */ | 3490 #else /* not MULE */ |
3467 for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) | 3491 for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) |
3468 fastmap[j] = 1; | 3492 fastmap[j] = 1; |
3469 #endif /* ! MULE */ | 3493 #endif /* MULE */ |
3470 | 3494 |
3471 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) | 3495 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) |
3472 if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) | 3496 if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) |
3473 fastmap[j] = 1; | 3497 fastmap[j] = 1; |
3474 break; | 3498 break; |
3621 == Sword || multi_p) | 3645 == Sword || multi_p) |
3622 fastmap[j] = 1; | 3646 fastmap[j] = 1; |
3623 } | 3647 } |
3624 } | 3648 } |
3625 } | 3649 } |
3626 #else /* ! MULE */ | 3650 #else /* not MULE */ |
3627 for (j = 0; j < (1 << BYTEWIDTH); j++) | 3651 for (j = 0; j < (1 << BYTEWIDTH); j++) |
3628 if (SYNTAX_UNSAFE | 3652 if (SYNTAX_UNSAFE |
3629 (XCHAR_TABLE | 3653 (XCHAR_TABLE |
3630 (regex_emacs_buffer->mirror_syntax_table), j) == | 3654 (regex_emacs_buffer->mirror_syntax_table), j) == |
3631 (enum syntaxcode) k) | 3655 (enum syntaxcode) k) |
3632 fastmap[j] = 1; | 3656 fastmap[j] = 1; |
3633 #endif /* ! MULE */ | 3657 #endif /* MULE */ |
3634 break; | 3658 break; |
3635 | 3659 |
3636 | 3660 |
3637 case notsyntaxspec: | 3661 case notsyntaxspec: |
3638 k = *p++; | 3662 k = *p++; |
3662 != Sword || multi_p) | 3686 != Sword || multi_p) |
3663 fastmap[j] = 1; | 3687 fastmap[j] = 1; |
3664 } | 3688 } |
3665 } | 3689 } |
3666 } | 3690 } |
3667 #else /* ! MULE */ | 3691 #else /* not MULE */ |
3668 for (j = 0; j < (1 << BYTEWIDTH); j++) | 3692 for (j = 0; j < (1 << BYTEWIDTH); j++) |
3669 if (SYNTAX_UNSAFE | 3693 if (SYNTAX_UNSAFE |
3670 (XCHAR_TABLE | 3694 (XCHAR_TABLE |
3671 (regex_emacs_buffer->mirror_syntax_table), j) != | 3695 (regex_emacs_buffer->mirror_syntax_table), j) != |
3672 (enum syntaxcode) k) | 3696 (enum syntaxcode) k) |
3673 fastmap[j] = 1; | 3697 fastmap[j] = 1; |
3674 #endif /* ! MULE */ | 3698 #endif /* MULE */ |
3675 break; | 3699 break; |
3676 | 3700 |
3677 #ifdef MULE | 3701 #ifdef MULE |
3678 /* 97/2/17 jhod category patch */ | 3702 /* 97/2/17 jhod category patch */ |
3679 case categoryspec: | 3703 case categoryspec: |
3899 We return either the position in the strings at which the match was | 3923 We return either the position in the strings at which the match was |
3900 found, -1 if no match, or -2 if error (such as failure | 3924 found, -1 if no match, or -2 if error (such as failure |
3901 stack overflow). */ | 3925 stack overflow). */ |
3902 | 3926 |
3903 int | 3927 int |
3904 re_search_2 (struct re_pattern_buffer *bufp, const char *string1, | 3928 re_search_2 (struct re_pattern_buffer *bufp, const char *str1, |
3905 int size1, const char *string2, int size2, int startpos, | 3929 int size1, const char *str2, int size2, int startpos, |
3906 int range, struct re_registers *regs, int stop) | 3930 int range, struct re_registers *regs, int stop) |
3907 { | 3931 { |
3908 int val; | 3932 int val; |
3933 re_char *string1 = (re_char *) str1; | |
3934 re_char *string2 = (re_char *) str2; | |
3909 REGISTER char *fastmap = bufp->fastmap; | 3935 REGISTER char *fastmap = bufp->fastmap; |
3910 REGISTER char *translate = bufp->translate; | 3936 REGISTER RE_TRANSLATE_TYPE translate = bufp->translate; |
3911 int total_size = size1 + size2; | 3937 int total_size = size1 + size2; |
3912 int endpos = startpos + range; | 3938 int endpos = startpos + range; |
3913 #ifdef REGEX_BEGLINE_CHECK | 3939 #ifdef REGEX_BEGLINE_CHECK |
3914 int anchored_at_begline = 0; | 3940 int anchored_at_begline = 0; |
3915 #endif | 3941 #endif |
3916 const unsigned char *d; | 3942 re_char *d; |
3917 Charcount d_size; | 3943 Charcount d_size; |
3918 | 3944 |
3919 /* Check for out-of-range STARTPOS. */ | 3945 /* Check for out-of-range STARTPOS. */ |
3920 if (startpos < 0 || startpos > total_size) | 3946 if (startpos < 0 || startpos > total_size) |
3921 return -1; | 3947 return -1; |
3983 d = ((const unsigned char *) | 4009 d = ((const unsigned char *) |
3984 (startpos >= size1 ? string2 - size1 : string1) + startpos); | 4010 (startpos >= size1 ? string2 - size1 : string1) + startpos); |
3985 DEC_CHARPTR(d); /* Ok, since startpos != size1. */ | 4011 DEC_CHARPTR(d); /* Ok, since startpos != size1. */ |
3986 d_size = charcount_to_bytecount (d, 1); | 4012 d_size = charcount_to_bytecount (d, 1); |
3987 | 4013 |
3988 if (translate) | 4014 if (TRANSLATE_P (translate)) |
3989 #ifdef MULE | 4015 while (range > lim && *d != '\n') |
3990 while (range > lim && (*d >= 0x80 || translate[*d] != '\n')) | |
3991 #else | |
3992 while (range > lim && translate[*d] != '\n') | |
3993 #endif | |
3994 { | 4016 { |
3995 d += d_size; /* Speedier INC_CHARPTR(d) */ | 4017 d += d_size; /* Speedier INC_CHARPTR(d) */ |
3996 d_size = charcount_to_bytecount (d, 1); | 4018 d_size = charcount_to_bytecount (d, 1); |
3997 range -= d_size; | 4019 range -= d_size; |
3998 } | 4020 } |
4025 d = ((const unsigned char *) | 4047 d = ((const unsigned char *) |
4026 (startpos >= size1 ? string2 - size1 : string1) + startpos); | 4048 (startpos >= size1 ? string2 - size1 : string1) + startpos); |
4027 | 4049 |
4028 /* Written out as an if-else to avoid testing `translate' | 4050 /* Written out as an if-else to avoid testing `translate' |
4029 inside the loop. */ | 4051 inside the loop. */ |
4030 if (translate) | 4052 if (TRANSLATE_P (translate)) |
4031 while (range > lim && | 4053 while (range > lim) |
4054 { | |
4032 #ifdef MULE | 4055 #ifdef MULE |
4033 *d < 0x80 && | 4056 Emchar buf_ch; |
4034 #endif | 4057 |
4035 !fastmap[(unsigned char)translate[*d]]) | 4058 buf_ch = charptr_emchar (d); |
4036 { | 4059 buf_ch = RE_TRANSLATE (buf_ch); |
4060 if (buf_ch >= 0200 || fastmap[(unsigned char) buf_ch]) | |
4061 break; | |
4062 #else | |
4063 if (fastmap[(unsigned char)RE_TRANSLATE (*d)]) | |
4064 break; | |
4065 #endif /* MULE */ | |
4037 d_size = charcount_to_bytecount (d, 1); | 4066 d_size = charcount_to_bytecount (d, 1); |
4038 range -= d_size; | 4067 range -= d_size; |
4039 d += d_size; /* Speedier INC_CHARPTR(d) */ | 4068 d += d_size; /* Speedier INC_CHARPTR(d) */ |
4040 } | 4069 } |
4041 else | 4070 else |
4048 | 4077 |
4049 startpos += irange - range; | 4078 startpos += irange - range; |
4050 } | 4079 } |
4051 else /* Searching backwards. */ | 4080 else /* Searching backwards. */ |
4052 { | 4081 { |
4053 unsigned char c = (size1 == 0 || startpos >= size1 | 4082 Emchar c = (size1 == 0 || startpos >= size1 |
4054 ? string2[startpos - size1] | 4083 ? charptr_emchar (string2 + startpos - size1) |
4055 : string1[startpos]); | 4084 : charptr_emchar (string1 + startpos)); |
4085 c = TRANSLATE (c); | |
4056 #ifdef MULE | 4086 #ifdef MULE |
4057 if (c < 0x80 && !fastmap[(unsigned char) TRANSLATE (c)]) | 4087 if (!(c >= 0200 || fastmap[(unsigned char) c])) |
4088 goto advance; | |
4058 #else | 4089 #else |
4059 if (!fastmap[(unsigned char) TRANSLATE (c)]) | 4090 if (!fastmap[(unsigned char) c]) |
4091 goto advance; | |
4060 #endif | 4092 #endif |
4061 goto advance; | |
4062 } | 4093 } |
4063 } | 4094 } |
4064 | 4095 |
4065 /* If can't match the null string, and that's all we have left, fail. */ | 4096 /* If can't match the null string, and that's all we have left, fail. */ |
4066 if (range >= 0 && startpos == total_size && fastmap | 4097 if (range >= 0 && startpos == total_size && fastmap |
4169 FREE_VAR (best_regend); \ | 4200 FREE_VAR (best_regend); \ |
4170 FREE_VAR (reg_info); \ | 4201 FREE_VAR (reg_info); \ |
4171 FREE_VAR (reg_dummy); \ | 4202 FREE_VAR (reg_dummy); \ |
4172 FREE_VAR (reg_info_dummy); \ | 4203 FREE_VAR (reg_info_dummy); \ |
4173 } while (0) | 4204 } while (0) |
4174 #else | 4205 #else /* not MATCH_MAY_ALLOCATE */ |
4175 #define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */ | 4206 #define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */ |
4176 #endif /* not MATCH_MAY_ALLOCATE */ | 4207 #endif /* MATCH_MAY_ALLOCATE */ |
4177 | 4208 |
4178 /* These values must meet several constraints. They must not be valid | 4209 /* These values must meet several constraints. They must not be valid |
4179 register values; since we have a limit of 255 registers (because | 4210 register values; since we have a limit of 255 registers (because |
4180 we use only one byte in the pattern for the register number), we can | 4211 we use only one byte in the pattern for the register number), we can |
4181 use numbers larger than 255. They must differ by 1, because of | 4212 use numbers larger than 255. They must differ by 1, because of |
4192 | 4223 |
4193 int | 4224 int |
4194 re_match (struct re_pattern_buffer *bufp, const char *string, int size, | 4225 re_match (struct re_pattern_buffer *bufp, const char *string, int size, |
4195 int pos, struct re_registers *regs) | 4226 int pos, struct re_registers *regs) |
4196 { | 4227 { |
4197 int result = re_match_2_internal (bufp, NULL, 0, string, size, | 4228 int result = re_match_2_internal (bufp, NULL, 0, (re_char *) string, size, |
4198 pos, regs, size); | 4229 pos, regs, size); |
4199 alloca (0); | 4230 alloca (0); |
4200 return result; | 4231 return result; |
4201 } | 4232 } |
4202 #endif /* not emacs */ | 4233 #endif /* not emacs */ |
4218 int | 4249 int |
4219 re_match_2 (struct re_pattern_buffer *bufp, const char *string1, | 4250 re_match_2 (struct re_pattern_buffer *bufp, const char *string1, |
4220 int size1, const char *string2, int size2, int pos, | 4251 int size1, const char *string2, int size2, int pos, |
4221 struct re_registers *regs, int stop) | 4252 struct re_registers *regs, int stop) |
4222 { | 4253 { |
4223 int result = re_match_2_internal (bufp, string1, size1, string2, size2, | 4254 int result = re_match_2_internal (bufp, (re_char *) string1, size1, |
4255 (re_char *) string2, size2, | |
4224 pos, regs, stop); | 4256 pos, regs, stop); |
4225 alloca (0); | 4257 alloca (0); |
4226 return result; | 4258 return result; |
4227 } | 4259 } |
4228 | 4260 |
4229 /* This is a separate function so that we can force an alloca cleanup | 4261 /* This is a separate function so that we can force an alloca cleanup |
4230 afterwards. */ | 4262 afterwards. */ |
4231 static int | 4263 static int |
4232 re_match_2_internal (struct re_pattern_buffer *bufp, const char *string1, | 4264 re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, |
4233 int size1, const char *string2, int size2, int pos, | 4265 int size1, re_char *string2, int size2, int pos, |
4234 struct re_registers *regs, int stop) | 4266 struct re_registers *regs, int stop) |
4235 { | 4267 { |
4236 /* General temporaries. */ | 4268 /* General temporaries. */ |
4237 int mcnt; | 4269 int mcnt; |
4238 unsigned char *p1; | 4270 unsigned char *p1; |
4239 int should_succeed; /* XEmacs change */ | 4271 int should_succeed; /* XEmacs change */ |
4240 | 4272 |
4241 /* Just past the end of the corresponding string. */ | 4273 /* Just past the end of the corresponding string. */ |
4242 const char *end1, *end2; | 4274 re_char *end1, *end2; |
4243 | 4275 |
4244 /* Pointers into string1 and string2, just past the last characters in | 4276 /* Pointers into string1 and string2, just past the last characters in |
4245 each to consider matching. */ | 4277 each to consider matching. */ |
4246 const char *end_match_1, *end_match_2; | 4278 re_char *end_match_1, *end_match_2; |
4247 | 4279 |
4248 /* Where we are in the data, and the end of the current string. */ | 4280 /* Where we are in the data, and the end of the current string. */ |
4249 const char *d, *dend; | 4281 re_char *d, *dend; |
4250 | 4282 |
4251 /* Where we are in the pattern, and the end of the pattern. */ | 4283 /* Where we are in the pattern, and the end of the pattern. */ |
4252 unsigned char *p = bufp->buffer; | 4284 unsigned char *p = bufp->buffer; |
4253 REGISTER unsigned char *pend = p + bufp->used; | 4285 REGISTER unsigned char *pend = p + bufp->used; |
4254 | 4286 |
4255 /* Mark the opcode just after a start_memory, so we can test for an | 4287 /* Mark the opcode just after a start_memory, so we can test for an |
4256 empty subpattern when we get to the stop_memory. */ | 4288 empty subpattern when we get to the stop_memory. */ |
4257 unsigned char *just_past_start_mem = 0; | 4289 re_char *just_past_start_mem = 0; |
4258 | 4290 |
4259 /* We use this to map every character in the string. */ | 4291 /* We use this to map every character in the string. */ |
4260 char *translate = bufp->translate; | 4292 RE_TRANSLATE_TYPE translate = bufp->translate; |
4261 | 4293 |
4262 /* Failure point stack. Each place that can handle a failure further | 4294 /* Failure point stack. Each place that can handle a failure further |
4263 down the line pushes a failure point on this stack. It consists of | 4295 down the line pushes a failure point on this stack. It consists of |
4264 restart, regend, and reg_info for all registers corresponding to | 4296 restart, regend, and reg_info for all registers corresponding to |
4265 the subexpressions we're currently inside, plus the number of such | 4297 the subexpressions we're currently inside, plus the number of such |
4297 regnum-th regstart pointer points to where in the pattern we began | 4329 regnum-th regstart pointer points to where in the pattern we began |
4298 matching and the regnum-th regend points to right after where we | 4330 matching and the regnum-th regend points to right after where we |
4299 stopped matching the regnum-th subexpression. (The zeroth register | 4331 stopped matching the regnum-th subexpression. (The zeroth register |
4300 keeps track of what the whole pattern matches.) */ | 4332 keeps track of what the whole pattern matches.) */ |
4301 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ | 4333 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ |
4302 const char **regstart, **regend; | 4334 re_char **regstart, **regend; |
4303 #endif | 4335 #endif |
4304 | 4336 |
4305 /* If a group that's operated upon by a repetition operator fails to | 4337 /* If a group that's operated upon by a repetition operator fails to |
4306 match anything, then the register for its start will need to be | 4338 match anything, then the register for its start will need to be |
4307 restored because it will have been set to wherever in the string we | 4339 restored because it will have been set to wherever in the string we |
4308 are when we last see its open-group operator. Similarly for a | 4340 are when we last see its open-group operator. Similarly for a |
4309 register's end. */ | 4341 register's end. */ |
4310 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ | 4342 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ |
4311 const char **old_regstart, **old_regend; | 4343 re_char **old_regstart, **old_regend; |
4312 #endif | 4344 #endif |
4313 | 4345 |
4314 /* The is_active field of reg_info helps us keep track of which (possibly | 4346 /* The is_active field of reg_info helps us keep track of which (possibly |
4315 nested) subexpressions we are currently in. The matched_something | 4347 nested) subexpressions we are currently in. The matched_something |
4316 field of reg_info[reg_num] helps us tell whether or not we have | 4348 field of reg_info[reg_num] helps us tell whether or not we have |
4325 variables when we find a match better than any we've seen before. | 4357 variables when we find a match better than any we've seen before. |
4326 This happens as we backtrack through the failure points, which in | 4358 This happens as we backtrack through the failure points, which in |
4327 turn happens only if we have not yet matched the entire string. */ | 4359 turn happens only if we have not yet matched the entire string. */ |
4328 unsigned best_regs_set = false; | 4360 unsigned best_regs_set = false; |
4329 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ | 4361 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ |
4330 const char **best_regstart, **best_regend; | 4362 re_char **best_regstart, **best_regend; |
4331 #endif | 4363 #endif |
4332 | 4364 |
4333 /* Logically, this is `best_regend[0]'. But we don't want to have to | 4365 /* Logically, this is `best_regend[0]'. But we don't want to have to |
4334 allocate space for that if we're not allocating space for anything | 4366 allocate space for that if we're not allocating space for anything |
4335 else (see below). Also, we never need info about register 0 for | 4367 else (see below). Also, we never need info about register 0 for |
4336 any of the other register vectors, and it seems rather a kludge to | 4368 any of the other register vectors, and it seems rather a kludge to |
4337 treat `best_regend' differently than the rest. So we keep track of | 4369 treat `best_regend' differently than the rest. So we keep track of |
4338 the end of the best match so far in a separate variable. We | 4370 the end of the best match so far in a separate variable. We |
4339 initialize this to NULL so that when we backtrack the first time | 4371 initialize this to NULL so that when we backtrack the first time |
4340 and need to test it, it's not garbage. */ | 4372 and need to test it, it's not garbage. */ |
4341 const char *match_end = NULL; | 4373 re_char *match_end = NULL; |
4342 | 4374 |
4343 /* This helps SET_REGS_MATCHED avoid doing redundant work. */ | 4375 /* This helps SET_REGS_MATCHED avoid doing redundant work. */ |
4344 int set_regs_matched_done = 0; | 4376 int set_regs_matched_done = 0; |
4345 | 4377 |
4346 /* Used when we pop values we don't care about. */ | 4378 /* Used when we pop values we don't care about. */ |
4347 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ | 4379 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ |
4348 const char **reg_dummy; | 4380 re_char **reg_dummy; |
4349 register_info_type *reg_info_dummy; | 4381 register_info_type *reg_info_dummy; |
4350 #endif | 4382 #endif |
4351 | 4383 |
4352 #ifdef DEBUG | 4384 #ifdef DEBUG |
4353 /* Counts the total number of registers pushed. */ | 4385 /* Counts the total number of registers pushed. */ |
4371 there are groups, we include space for register 0 (the whole | 4403 there are groups, we include space for register 0 (the whole |
4372 pattern), even though we never use it, since it simplifies the | 4404 pattern), even though we never use it, since it simplifies the |
4373 array indexing. We should fix this. */ | 4405 array indexing. We should fix this. */ |
4374 if (bufp->re_nsub) | 4406 if (bufp->re_nsub) |
4375 { | 4407 { |
4376 regstart = REGEX_TALLOC (num_regs, const char *); | 4408 regstart = REGEX_TALLOC (num_regs, re_char *); |
4377 regend = REGEX_TALLOC (num_regs, const char *); | 4409 regend = REGEX_TALLOC (num_regs, re_char *); |
4378 old_regstart = REGEX_TALLOC (num_regs, const char *); | 4410 old_regstart = REGEX_TALLOC (num_regs, re_char *); |
4379 old_regend = REGEX_TALLOC (num_regs, const char *); | 4411 old_regend = REGEX_TALLOC (num_regs, re_char *); |
4380 best_regstart = REGEX_TALLOC (num_regs, const char *); | 4412 best_regstart = REGEX_TALLOC (num_regs, re_char *); |
4381 best_regend = REGEX_TALLOC (num_regs, const char *); | 4413 best_regend = REGEX_TALLOC (num_regs, re_char *); |
4382 reg_info = REGEX_TALLOC (num_regs, register_info_type); | 4414 reg_info = REGEX_TALLOC (num_regs, register_info_type); |
4383 reg_dummy = REGEX_TALLOC (num_regs, const char *); | 4415 reg_dummy = REGEX_TALLOC (num_regs, re_char *); |
4384 reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type); | 4416 reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type); |
4385 | 4417 |
4386 if (!(regstart && regend && old_regstart && old_regend && reg_info | 4418 if (!(regstart && regend && old_regstart && old_regend && reg_info |
4387 && best_regstart && best_regend && reg_dummy && reg_info_dummy)) | 4419 && best_regstart && best_regend && reg_dummy && reg_info_dummy)) |
4388 { | 4420 { |
4418 REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; | 4450 REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; |
4419 IS_ACTIVE (reg_info[mcnt]) = 0; | 4451 IS_ACTIVE (reg_info[mcnt]) = 0; |
4420 MATCHED_SOMETHING (reg_info[mcnt]) = 0; | 4452 MATCHED_SOMETHING (reg_info[mcnt]) = 0; |
4421 EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; | 4453 EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; |
4422 } | 4454 } |
4423 | |
4424 /* We move `string1' into `string2' if the latter's empty -- but not if | 4455 /* We move `string1' into `string2' if the latter's empty -- but not if |
4425 `string1' is null. */ | 4456 `string1' is null. */ |
4426 if (size2 == 0 && string1 != NULL) | 4457 if (size2 == 0 && string1 != NULL) |
4427 { | 4458 { |
4428 string2 = string1; | 4459 string2 = string1; |
4460 { | 4491 { |
4461 d = string2 + pos - size1; | 4492 d = string2 + pos - size1; |
4462 dend = end_match_2; | 4493 dend = end_match_2; |
4463 } | 4494 } |
4464 | 4495 |
4465 DEBUG_PRINT1 ("The compiled pattern is: "); | 4496 DEBUG_PRINT1 ("The compiled pattern is: \n"); |
4466 DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend); | 4497 DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend); |
4467 DEBUG_PRINT1 ("The string to match is: `"); | 4498 DEBUG_PRINT1 ("The string to match is: `"); |
4468 DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); | 4499 DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); |
4469 DEBUG_PRINT1 ("'\n"); | 4500 DEBUG_PRINT1 ("'\n"); |
4470 | 4501 |
4658 mcnt = *p++; | 4689 mcnt = *p++; |
4659 DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt); | 4690 DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt); |
4660 | 4691 |
4661 /* This is written out as an if-else so we don't waste time | 4692 /* This is written out as an if-else so we don't waste time |
4662 testing `translate' inside the loop. */ | 4693 testing `translate' inside the loop. */ |
4663 if (translate) | 4694 if (TRANSLATE_P (translate)) |
4664 { | 4695 { |
4665 do | 4696 do |
4666 { | 4697 { |
4698 #ifdef MULE | |
4699 Emchar pat_ch, buf_ch; | |
4700 Bytecount pat_len; | |
4701 | |
4667 PREFETCH (); | 4702 PREFETCH (); |
4668 if (translate[(unsigned char) *d++] != (char) *p++) | 4703 pat_ch = charptr_emchar (p); |
4704 buf_ch = charptr_emchar (d); | |
4705 if (RE_TRANSLATE (buf_ch) != pat_ch) | |
4669 goto fail; | 4706 goto fail; |
4707 | |
4708 pat_len = charcount_to_bytecount (p, 1); | |
4709 p += pat_len; | |
4710 INC_CHARPTR (d); | |
4711 | |
4712 mcnt -= pat_len; | |
4713 #else /* not MULE */ | |
4714 PREFETCH (); | |
4715 if ((unsigned char) RE_TRANSLATE (*d++) != *p++) | |
4716 goto fail; | |
4717 mcnt--; | |
4718 #endif | |
4670 } | 4719 } |
4671 while (--mcnt); | 4720 while (mcnt > 0); |
4672 } | 4721 } |
4673 else | 4722 else |
4674 { | 4723 { |
4675 do | 4724 do |
4676 { | 4725 { |
4677 PREFETCH (); | 4726 PREFETCH (); |
4678 if (*d++ != (char) *p++) goto fail; | 4727 if (*d++ != *p++) goto fail; |
4679 } | 4728 } |
4680 while (--mcnt); | 4729 while (--mcnt); |
4681 } | 4730 } |
4682 SET_REGS_MATCHED (); | 4731 SET_REGS_MATCHED (); |
4683 break; | 4732 break; |
4948 | 4997 |
4949 /* \<digit> has been turned into a `duplicate' command which is | 4998 /* \<digit> has been turned into a `duplicate' command which is |
4950 followed by the numeric value of <digit> as the register number. */ | 4999 followed by the numeric value of <digit> as the register number. */ |
4951 case duplicate: | 5000 case duplicate: |
4952 { | 5001 { |
4953 REGISTER const char *d2, *dend2; | 5002 REGISTER re_char *d2, *dend2; |
4954 int regno = *p++; /* Get which register to match against. */ | 5003 int regno = *p++; /* Get which register to match against. */ |
4955 DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno); | 5004 DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno); |
4956 | 5005 |
4957 /* Can't back reference a group which we've never matched. */ | 5006 /* Can't back reference a group which we've never matched. */ |
4958 if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) | 5007 if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) |
4996 if (mcnt > dend2 - d2) | 5045 if (mcnt > dend2 - d2) |
4997 mcnt = dend2 - d2; | 5046 mcnt = dend2 - d2; |
4998 | 5047 |
4999 /* Compare that many; failure if mismatch, else move | 5048 /* Compare that many; failure if mismatch, else move |
5000 past them. */ | 5049 past them. */ |
5001 if (translate | 5050 if (TRANSLATE_P (translate) |
5002 ? bcmp_translate ((unsigned char *) d, | 5051 ? bcmp_translate ((unsigned char *) d, |
5003 (unsigned char *) d2, mcnt, translate) | 5052 (unsigned char *) d2, mcnt, translate) |
5004 : memcmp (d, d2, mcnt)) | 5053 : memcmp (d, d2, mcnt)) |
5005 goto fail; | 5054 goto fail; |
5006 d += mcnt, d2 += mcnt; | 5055 d += mcnt, d2 += mcnt; |
5084 DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump"); | 5133 DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump"); |
5085 | 5134 |
5086 EXTRACT_NUMBER_AND_INCR (mcnt, p); | 5135 EXTRACT_NUMBER_AND_INCR (mcnt, p); |
5087 DEBUG_PRINT3 (" %d (to 0x%lx):\n", mcnt, (long) (p + mcnt)); | 5136 DEBUG_PRINT3 (" %d (to 0x%lx):\n", mcnt, (long) (p + mcnt)); |
5088 | 5137 |
5089 PUSH_FAILURE_POINT (p + mcnt, (char *) 0, -2); | 5138 PUSH_FAILURE_POINT (p + mcnt, (unsigned char *) 0, -2); |
5090 break; | 5139 break; |
5091 | 5140 |
5092 | 5141 |
5093 /* Uses of on_failure_jump: | 5142 /* Uses of on_failure_jump: |
5094 | 5143 |
5304 actual values. Otherwise, we will restore only one | 5353 actual values. Otherwise, we will restore only one |
5305 register from the stack, since lowest will == highest in | 5354 register from the stack, since lowest will == highest in |
5306 `pop_failure_point'. */ | 5355 `pop_failure_point'. */ |
5307 unsigned dummy_low_reg, dummy_high_reg; | 5356 unsigned dummy_low_reg, dummy_high_reg; |
5308 unsigned char *pdummy; | 5357 unsigned char *pdummy; |
5309 const char *sdummy = NULL; | 5358 re_char *sdummy = NULL; |
5310 | 5359 |
5311 DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n"); | 5360 DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n"); |
5312 POP_FAILURE_POINT (sdummy, pdummy, | 5361 POP_FAILURE_POINT (sdummy, pdummy, |
5313 dummy_low_reg, dummy_high_reg, | 5362 dummy_low_reg, dummy_high_reg, |
5314 reg_dummy, reg_dummy, reg_info_dummy); | 5363 reg_dummy, reg_dummy, reg_info_dummy); |
5340 something meaningless for pop_failure_jump to pop. */ | 5389 something meaningless for pop_failure_jump to pop. */ |
5341 case dummy_failure_jump: | 5390 case dummy_failure_jump: |
5342 DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n"); | 5391 DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n"); |
5343 /* It doesn't matter what we push for the string here. What | 5392 /* It doesn't matter what we push for the string here. What |
5344 the code at `fail' tests is the value for the pattern. */ | 5393 the code at `fail' tests is the value for the pattern. */ |
5345 PUSH_FAILURE_POINT ((unsigned char *) 0, (char *) 0, -2); | 5394 PUSH_FAILURE_POINT ((unsigned char *) 0, (unsigned char *) 0, -2); |
5346 goto unconditional_jump; | 5395 goto unconditional_jump; |
5347 | 5396 |
5348 | 5397 |
5349 /* At the end of an alternative, we need to push a dummy failure | 5398 /* At the end of an alternative, we need to push a dummy failure |
5350 point in case we are followed by a `pop_failure_jump', because | 5399 point in case we are followed by a `pop_failure_jump', because |
5353 requires that we match the `ab' alternative. */ | 5402 requires that we match the `ab' alternative. */ |
5354 case push_dummy_failure: | 5403 case push_dummy_failure: |
5355 DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n"); | 5404 DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n"); |
5356 /* See comments just above at `dummy_failure_jump' about the | 5405 /* See comments just above at `dummy_failure_jump' about the |
5357 two zeroes. */ | 5406 two zeroes. */ |
5358 PUSH_FAILURE_POINT ((unsigned char *) 0, (char *) 0, -2); | 5407 PUSH_FAILURE_POINT ((unsigned char *) 0, (unsigned char *) 0, -2); |
5359 break; | 5408 break; |
5360 | 5409 |
5361 /* Have to succeed matching what follows at least n times. | 5410 /* Have to succeed matching what follows at least n times. |
5362 After that, handle like `on_failure_jump'. */ | 5411 After that, handle like `on_failure_jump'. */ |
5363 case succeed_n: | 5412 case succeed_n: |
5609 if (!WORDCHAR_P_UNSAFE ((int) (*d))) | 5658 if (!WORDCHAR_P_UNSAFE ((int) (*d))) |
5610 goto fail; | 5659 goto fail; |
5611 SET_REGS_MATCHED (); | 5660 SET_REGS_MATCHED (); |
5612 d++; | 5661 d++; |
5613 break; | 5662 break; |
5614 #endif /* not emacs */ | 5663 #endif /* emacs */ |
5615 | 5664 |
5616 default: | 5665 default: |
5617 abort (); | 5666 abort (); |
5618 } | 5667 } |
5619 continue; /* Successfully executed one pattern command; keep going. */ | 5668 continue; /* Successfully executed one pattern command; keep going. */ |
5919 | 5968 |
5920 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN | 5969 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN |
5921 bytes; nonzero otherwise. */ | 5970 bytes; nonzero otherwise. */ |
5922 | 5971 |
5923 static int | 5972 static int |
5924 bcmp_translate (const unsigned char *s1, const unsigned char *s2, | 5973 bcmp_translate (re_char *s1, re_char *s2, |
5925 REGISTER int len, char *translate) | 5974 REGISTER int len, RE_TRANSLATE_TYPE translate) |
5926 { | 5975 { |
5927 REGISTER const unsigned char *p1 = s1, *p2 = s2; | 5976 REGISTER const unsigned char *p1 = s1, *p2 = s2; |
5977 #ifdef MULE | |
5978 const unsigned char *p1_end = s1 + len; | |
5979 const unsigned char *p2_end = s2 + len; | |
5980 | |
5981 while (p1 != p1_end && p2 != p2_end) | |
5982 { | |
5983 Emchar p1_ch, p2_ch; | |
5984 | |
5985 p1_ch = charptr_emchar (p1); | |
5986 p2_ch = charptr_emchar (p2); | |
5987 | |
5988 if (RE_TRANSLATE (p1_ch) | |
5989 != RE_TRANSLATE (p2_ch)) | |
5990 return 1; | |
5991 INC_CHARPTR (p1); | |
5992 INC_CHARPTR (p2); | |
5993 } | |
5994 #else /* not MULE */ | |
5928 while (len) | 5995 while (len) |
5929 { | 5996 { |
5930 if (translate[*p1++] != translate[*p2++]) return 1; | 5997 if (RE_TRANSLATE (*p1++) != RE_TRANSLATE (*p2++)) return 1; |
5931 len--; | 5998 len--; |
5932 } | 5999 } |
6000 #endif /* MULE */ | |
5933 return 0; | 6001 return 0; |
5934 } | 6002 } |
5935 | 6003 |
5936 /* Entry points for GNU code. */ | 6004 /* Entry points for GNU code. */ |
5937 | 6005 |
5960 bufp->no_sub = 0; | 6028 bufp->no_sub = 0; |
5961 | 6029 |
5962 /* Match anchors at newline. */ | 6030 /* Match anchors at newline. */ |
5963 bufp->newline_anchor = 1; | 6031 bufp->newline_anchor = 1; |
5964 | 6032 |
5965 ret = regex_compile (pattern, length, re_syntax_options, bufp); | 6033 ret = regex_compile ((unsigned char *) pattern, length, re_syntax_options, bufp); |
5966 | 6034 |
5967 if (!ret) | 6035 if (!ret) |
5968 return NULL; | 6036 return NULL; |
5969 return gettext (re_error_msgid[(int) ret]); | 6037 return gettext (re_error_msgid[(int) ret]); |
5970 } | 6038 } |
6005 don't need to initialize the pattern buffer fields which affect it. */ | 6073 don't need to initialize the pattern buffer fields which affect it. */ |
6006 | 6074 |
6007 /* Match anchors at newlines. */ | 6075 /* Match anchors at newlines. */ |
6008 re_comp_buf.newline_anchor = 1; | 6076 re_comp_buf.newline_anchor = 1; |
6009 | 6077 |
6010 ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf); | 6078 ret = regex_compile ((unsigned char *)s, strlen (s), re_syntax_options, &re_comp_buf); |
6011 | 6079 |
6012 if (!ret) | 6080 if (!ret) |
6013 return NULL; | 6081 return NULL; |
6014 | 6082 |
6015 /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */ | 6083 /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */ |
6111 | 6179 |
6112 preg->no_sub = !!(cflags & REG_NOSUB); | 6180 preg->no_sub = !!(cflags & REG_NOSUB); |
6113 | 6181 |
6114 /* POSIX says a null character in the pattern terminates it, so we | 6182 /* POSIX says a null character in the pattern terminates it, so we |
6115 can use strlen here in compiling the pattern. */ | 6183 can use strlen here in compiling the pattern. */ |
6116 ret = regex_compile (pattern, strlen (pattern), syntax, preg); | 6184 ret = regex_compile ((unsigned char *) pattern, strlen (pattern), syntax, preg); |
6117 | 6185 |
6118 /* POSIX doesn't distinguish between an unmatched open-group and an | 6186 /* POSIX doesn't distinguish between an unmatched open-group and an |
6119 unmatched close-group: both are REG_EPAREN. */ | 6187 unmatched close-group: both are REG_EPAREN. */ |
6120 if (ret == REG_ERPAREN) ret = REG_EPAREN; | 6188 if (ret == REG_ERPAREN) ret = REG_EPAREN; |
6121 | 6189 |