comparison src/regex.c @ 446:1ccc32a20af4 r21-2-38

Import from CVS: tag r21-2-38
author cvs
date Mon, 13 Aug 2007 11:37:21 +0200
parents 576fb035e263
children 98528da0b7fc
comparison
equal deleted inserted replaced
445:34f3776fcf0e 446:1ccc32a20af4
102 void 102 void
103 complex_vars_of_regex (void) 103 complex_vars_of_regex (void)
104 { 104 {
105 } 105 }
106 106
107 #endif /* not MULE */ 107 #endif /* MULE */
108
109 #define RE_TRANSLATE(ch) TRT_TABLE_OF (translate, (Emchar) ch)
110 #define TRANSLATE_P(tr) (!NILP (tr))
108 111
109 #else /* not emacs */ 112 #else /* not emacs */
110 113
111 /* If we are not linking with Emacs proper, 114 /* If we are not linking with Emacs proper,
112 we can't use the relocating allocator 115 we can't use the relocating allocator
171 174
172 done = 1; 175 done = 1;
173 } 176 }
174 } 177 }
175 178
176 #endif /* not SYNTAX_TABLE */ 179 #endif /* SYNTAX_TABLE */
177 180
178 #define SYNTAX_UNSAFE(ignored, c) re_syntax_table[c] 181 #define SYNTAX_UNSAFE(ignored, c) re_syntax_table[c]
179 182
180 #endif /* not emacs */ 183 #define RE_TRANSLATE(c) translate[(unsigned char) (c)]
184 #define TRANSLATE_P(tr) tr
185
186 #endif /* emacs */
181 187
182 /* Under XEmacs, this is needed because we don't define it elsewhere. */ 188 /* Under XEmacs, this is needed because we don't define it elsewhere. */
183 #ifdef SWITCH_ENUM_BUG 189 #ifdef SWITCH_ENUM_BUG
184 #define SWITCH_ENUM_CAST(x) ((int)(x)) 190 #define SWITCH_ENUM_CAST(x) ((int)(x))
185 #else 191 #else
286 #include <alloca.h> 292 #include <alloca.h>
287 #else /* not __GNUC__ or HAVE_ALLOCA_H */ 293 #else /* not __GNUC__ or HAVE_ALLOCA_H */
288 #ifndef _AIX /* Already did AIX, up at the top. */ 294 #ifndef _AIX /* Already did AIX, up at the top. */
289 void *alloca (); 295 void *alloca ();
290 #endif /* not _AIX */ 296 #endif /* not _AIX */
291 #endif /* not HAVE_ALLOCA_H */ 297 #endif /* HAVE_ALLOCA_H */
292 #endif /* not __GNUC__ */ 298 #endif /* __GNUC__ */
293 299
294 #endif /* not alloca */ 300 #endif /* not alloca */
295 301
296 #define REGEX_ALLOCATE alloca 302 #define REGEX_ALLOCATE alloca
297 303
302 destination) 308 destination)
303 309
304 /* No need to do anything to free, after alloca. */ 310 /* No need to do anything to free, after alloca. */
305 #define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */ 311 #define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */
306 312
307 #endif /* not REGEX_MALLOC */ 313 #endif /* REGEX_MALLOC */
308 314
309 /* Define how to allocate the failure stack. */ 315 /* Define how to allocate the failure stack. */
310 316
311 #ifdef REL_ALLOC 317 #ifdef REL_ALLOC
312 #define REGEX_ALLOCATE_STACK(size) \ 318 #define REGEX_ALLOCATE_STACK(size) \
331 #define REGEX_REALLOCATE_STACK(source, osize, nsize) \ 337 #define REGEX_REALLOCATE_STACK(source, osize, nsize) \
332 REGEX_REALLOCATE (source, osize, nsize) 338 REGEX_REALLOCATE (source, osize, nsize)
333 /* No need to explicitly free anything. */ 339 /* No need to explicitly free anything. */
334 #define REGEX_FREE_STACK(arg) 340 #define REGEX_FREE_STACK(arg)
335 341
336 #endif /* not REGEX_MALLOC */ 342 #endif /* REGEX_MALLOC */
337 #endif /* not REL_ALLOC */ 343 #endif /* REL_ALLOC */
338 344
339 345
340 /* True if `size1' is non-NULL and PTR is pointing anywhere inside 346 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
341 `string1' or just past its end. This works if PTR is NULL, which is 347 `string1' or just past its end. This works if PTR is NULL, which is
342 a good thing. */ 348 a good thing. */
356 362
357 #undef MAX 363 #undef MAX
358 #undef MIN 364 #undef MIN
359 #define MAX(a, b) ((a) > (b) ? (a) : (b)) 365 #define MAX(a, b) ((a) > (b) ? (a) : (b))
360 #define MIN(a, b) ((a) < (b) ? (a) : (b)) 366 #define MIN(a, b) ((a) < (b) ? (a) : (b))
367
368 /* Type of source-pattern and string chars. */
369 typedef const unsigned char re_char;
361 370
362 typedef char boolean; 371 typedef char boolean;
363 #define false 0 372 #define false 0
364 #define true 1 373 #define true 1
365 374
552 (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \ 561 (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \
553 } while (0) 562 } while (0)
554 563
555 #ifdef DEBUG 564 #ifdef DEBUG
556 static void 565 static void
557 extract_number (int *dest, unsigned char *source) 566 extract_number (int *dest, re_char *source)
558 { 567 {
559 int temp = SIGN_EXTEND_CHAR (*(source + 1)); 568 int temp = SIGN_EXTEND_CHAR (*(source + 1));
560 *dest = *source & 0377; 569 *dest = *source & 0377;
561 *dest += temp << 8; 570 *dest += temp << 8;
562 } 571 }
655 664
656 /* Print a compiled pattern string in human-readable form, starting at 665 /* Print a compiled pattern string in human-readable form, starting at
657 the START pointer into it and ending just before the pointer END. */ 666 the START pointer into it and ending just before the pointer END. */
658 667
659 static void 668 static void
660 print_partial_compiled_pattern (unsigned char *start, unsigned char *end) 669 print_partial_compiled_pattern (re_char *start, re_char *end)
661 { 670 {
662 int mcnt, mcnt2; 671 int mcnt, mcnt2;
663 unsigned char *p = start; 672 unsigned char *p = (unsigned char *) start;
664 unsigned char *pend = end; 673 re_char *pend = end;
665 674
666 if (start == NULL) 675 if (start == NULL)
667 { 676 {
668 puts ("(null)"); 677 puts ("(null)");
669 return; 678 return;
937 946
938 947
939 static void 948 static void
940 print_compiled_pattern (struct re_pattern_buffer *bufp) 949 print_compiled_pattern (struct re_pattern_buffer *bufp)
941 { 950 {
942 unsigned char *buffer = bufp->buffer; 951 re_char *buffer = bufp->buffer;
943 952
944 print_partial_compiled_pattern (buffer, buffer + bufp->used); 953 print_partial_compiled_pattern (buffer, buffer + bufp->used);
945 printf ("%ld bytes used/%ld bytes allocated.\n", bufp->used, 954 printf ("%ld bytes used/%ld bytes allocated.\n", bufp->used,
946 bufp->allocated); 955 bufp->allocated);
947 956
963 /* and maybe the category table? */ 972 /* and maybe the category table? */
964 } 973 }
965 974
966 975
967 static void 976 static void
968 print_double_string (const char *where, const char *string1, int size1, 977 print_double_string (re_char *where, re_char *string1, int size1,
969 const char *string2, int size2) 978 re_char *string2, int size2)
970 { 979 {
971 if (where == NULL) 980 if (where == NULL)
972 printf ("(null)"); 981 printf ("(null)");
973 else 982 else
974 { 983 {
998 #define DEBUG_PRINT3(x1, x2, x3) 1007 #define DEBUG_PRINT3(x1, x2, x3)
999 #define DEBUG_PRINT4(x1, x2, x3, x4) 1008 #define DEBUG_PRINT4(x1, x2, x3, x4)
1000 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 1009 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
1001 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) 1010 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
1002 1011
1003 #endif /* not DEBUG */ 1012 #endif /* DEBUG */
1004 1013
1005 /* Set by `re_set_syntax' to the current regexp syntax to recognize. Can 1014 /* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
1006 also be assigned to arbitrarily: each pattern buffer stores its own 1015 also be assigned to arbitrarily: each pattern buffer stores its own
1007 syntax, so it can be changed between regex compilations. */ 1016 syntax, so it can be changed between regex compilations. */
1008 /* This has no initializer because initialized variables in Emacs 1017 /* This has no initializer because initialized variables in Emacs
1121 int re_max_failures = 2000; 1130 int re_max_failures = 2000;
1122 #endif 1131 #endif
1123 1132
1124 union fail_stack_elt 1133 union fail_stack_elt
1125 { 1134 {
1126 unsigned char *pointer; 1135 re_char *pointer;
1127 int integer; 1136 int integer;
1128 }; 1137 };
1129 1138
1130 typedef union fail_stack_elt fail_stack_elt_t; 1139 typedef union fail_stack_elt fail_stack_elt_t;
1131 1140
1132 typedef struct 1141 typedef struct
1133 { 1142 {
1134 fail_stack_elt_t *stack; 1143 fail_stack_elt_t *stack;
1135 unsigned size; 1144 size_t size;
1136 unsigned avail; /* Offset of next open position. */ 1145 size_t avail; /* Offset of next open position. */
1137 } fail_stack_type; 1146 } fail_stack_type;
1138 1147
1139 #define FAIL_STACK_EMPTY() (fail_stack.avail == 0) 1148 #define FAIL_STACK_EMPTY() (fail_stack.avail == 0)
1140 #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0) 1149 #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
1141 #define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size) 1150 #define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
1307 PUSH_FAILURE_INT (lowest_active_reg); \ 1316 PUSH_FAILURE_INT (lowest_active_reg); \
1308 \ 1317 \
1309 DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\ 1318 DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\
1310 PUSH_FAILURE_INT (highest_active_reg); \ 1319 PUSH_FAILURE_INT (highest_active_reg); \
1311 \ 1320 \
1312 DEBUG_PRINT2 (" Pushing pattern 0x%lx: ", (long) pattern_place); \ 1321 DEBUG_PRINT2 (" Pushing pattern 0x%lx: \n", (long) pattern_place); \
1313 DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \ 1322 DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
1314 PUSH_FAILURE_POINTER (pattern_place); \ 1323 PUSH_FAILURE_POINTER (pattern_place); \
1315 \ 1324 \
1316 DEBUG_PRINT2 (" Pushing string 0x%lx: `", (long) string_place); \ 1325 DEBUG_PRINT2 (" Pushing string 0x%lx: `", (long) string_place); \
1317 DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \ 1326 DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
1383 /* If the saved string location is NULL, it came from an \ 1392 /* If the saved string location is NULL, it came from an \
1384 on_failure_keep_string_jump opcode, and we want to throw away the \ 1393 on_failure_keep_string_jump opcode, and we want to throw away the \
1385 saved NULL, thus retaining our current position in the string. */ \ 1394 saved NULL, thus retaining our current position in the string. */ \
1386 string_temp = POP_FAILURE_POINTER (); \ 1395 string_temp = POP_FAILURE_POINTER (); \
1387 if (string_temp != NULL) \ 1396 if (string_temp != NULL) \
1388 str = (const char *) string_temp; \ 1397 str = string_temp; \
1389 \ 1398 \
1390 DEBUG_PRINT2 (" Popping string 0x%lx: `", (long) str); \ 1399 DEBUG_PRINT2 (" Popping string 0x%lx: `", (long) str); \
1391 DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \ 1400 DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
1392 DEBUG_PRINT1 ("'\n"); \ 1401 DEBUG_PRINT1 ("'\n"); \
1393 \ 1402 \
1408 \ 1417 \
1409 reg_info[this_reg].word = POP_FAILURE_ELT (); \ 1418 reg_info[this_reg].word = POP_FAILURE_ELT (); \
1410 DEBUG_PRINT2 (" info: 0x%lx\n", \ 1419 DEBUG_PRINT2 (" info: 0x%lx\n", \
1411 * (long *) &reg_info[this_reg]); \ 1420 * (long *) &reg_info[this_reg]); \
1412 \ 1421 \
1413 regend[this_reg] = (const char *) POP_FAILURE_POINTER (); \ 1422 regend[this_reg] = POP_FAILURE_POINTER (); \
1414 DEBUG_PRINT2 (" end: 0x%lx\n", (long) regend[this_reg]); \ 1423 DEBUG_PRINT2 (" end: 0x%lx\n", (long) regend[this_reg]); \
1415 \ 1424 \
1416 regstart[this_reg] = (const char *) POP_FAILURE_POINTER (); \ 1425 regstart[this_reg] = POP_FAILURE_POINTER (); \
1417 DEBUG_PRINT2 (" start: 0x%lx\n", (long) regstart[this_reg]); \ 1426 DEBUG_PRINT2 (" start: 0x%lx\n", (long) regstart[this_reg]); \
1418 } \ 1427 } \
1419 \ 1428 \
1420 set_regs_matched_done = 0; \ 1429 set_regs_matched_done = 0; \
1421 DEBUG_STATEMENT (nfailure_points_popped++); \ 1430 DEBUG_STATEMENT (nfailure_points_popped++); \
1474 } \ 1483 } \
1475 } \ 1484 } \
1476 while (0) 1485 while (0)
1477 1486
1478 /* Registers are set to a sentinel when they haven't yet matched. */ 1487 /* Registers are set to a sentinel when they haven't yet matched. */
1479 static char reg_unset_dummy; 1488 static unsigned char reg_unset_dummy;
1480 #define REG_UNSET_VALUE (&reg_unset_dummy) 1489 #define REG_UNSET_VALUE (&reg_unset_dummy)
1481 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE) 1490 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
1482 1491
1483 /* Subroutine declarations and macros for regex_compile. */ 1492 /* Subroutine declarations and macros for regex_compile. */
1484 1493
1485 /* Fetch the next character in the uncompiled pattern---translating it 1494 /* Fetch the next character in the uncompiled pattern---translating it
1486 if necessary. Also cast from a signed character in the constant 1495 if necessary. Also cast from a signed character in the constant
1487 string passed to us by the user to an unsigned char that we can use 1496 string passed to us by the user to an unsigned char that we can use
1488 as an array index (in, e.g., `translate'). */ 1497 as an array index (in, e.g., `translate'). */
1489 #define PATFETCH(c) \ 1498 #define PATFETCH(c) \
1490 do {if (p == pend) return REG_EEND; \ 1499 do { \
1491 assert (p < pend); \ 1500 PATFETCH_RAW (c); \
1492 c = (unsigned char) *p++; \ 1501 c = TRANSLATE (c); \
1493 if (translate) c = (unsigned char) translate[c]; \
1494 } while (0) 1502 } while (0)
1495 1503
1496 /* Fetch the next character in the uncompiled pattern, with no 1504 /* Fetch the next character in the uncompiled pattern, with no
1497 translation. */ 1505 translation. */
1498 #define PATFETCH_RAW(c) \ 1506 #define PATFETCH_RAW(c) \
1499 do {if (p == pend) return REG_EEND; \ 1507 do {if (p == pend) return REG_EEND; \
1500 assert (p < pend); \ 1508 assert (p < pend); \
1501 c = (unsigned char) *p++; \ 1509 c = charptr_emchar (p); \
1510 INC_CHARPTR (p); \
1502 } while (0) 1511 } while (0)
1503 1512
1504 /* Go backwards one character in the pattern. */ 1513 /* Go backwards one character in the pattern. */
1505 #define PATUNFETCH p-- 1514 #define PATUNFETCH DEC_CHARPTR (p)
1506 1515
1507 #ifdef MULE 1516 #ifdef MULE
1508 1517
1509 #define PATFETCH_EXTENDED(emch) \ 1518 #define PATFETCH_EXTENDED(emch) \
1510 do {if (p == pend) return REG_EEND; \ 1519 do {if (p == pend) return REG_EEND; \
1511 assert (p < pend); \ 1520 assert (p < pend); \
1512 emch = charptr_emchar ((const Bufbyte *) p); \ 1521 emch = charptr_emchar ((const Bufbyte *) p); \
1513 INC_CHARPTR (p); \ 1522 INC_CHARPTR (p); \
1514 if (translate && emch < 0x80) \ 1523 if (TRANSLATE_P (translate) && emch < 0x80) \
1515 emch = (Emchar) (unsigned char) translate[emch]; \ 1524 emch = (Emchar) (unsigned char) RE_TRANSLATE (emch); \
1516 } while (0) 1525 } while (0)
1517 1526
1518 #define PATFETCH_RAW_EXTENDED(emch) \ 1527 #define PATFETCH_RAW_EXTENDED(emch) \
1519 do {if (p == pend) return REG_EEND; \ 1528 do {if (p == pend) return REG_EEND; \
1520 assert (p < pend); \ 1529 assert (p < pend); \
1552 1561
1553 #define PATFETCH_EITHER(emch) PATFETCH (emch) 1562 #define PATFETCH_EITHER(emch) PATFETCH (emch)
1554 #define PATFETCH_RAW_EITHER(emch) PATFETCH_RAW (emch) 1563 #define PATFETCH_RAW_EITHER(emch) PATFETCH_RAW (emch)
1555 #define PATUNFETCH_EITHER PATUNFETCH 1564 #define PATUNFETCH_EITHER PATUNFETCH
1556 1565
1557 #endif /* not MULE */ 1566 #endif /* MULE */
1558 1567
1559 /* If `translate' is non-null, return translate[D], else just D. We 1568 /* If `translate' is non-null, return translate[D], else just D. We
1560 cast the subscript to translate because some data is declared as 1569 cast the subscript to translate because some data is declared as
1561 `char *', to avoid warnings when a string constant is passed. But 1570 `char *', to avoid warnings when a string constant is passed. But
1562 when we use a character as a subscript we must make it unsigned. */ 1571 when we use a character as a subscript we must make it unsigned. */
1563 #define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d)) 1572 #define TRANSLATE(d) (TRANSLATE_P (translate) ? RE_TRANSLATE (d) : (d))
1564 1573
1565 #ifdef MULE 1574 #ifdef MULE
1566 1575
1567 #define TRANSLATE_EXTENDED_UNSAFE(emch) \ 1576 #define TRANSLATE_EXTENDED_UNSAFE(emch) \
1568 (translate && emch < 0x80 ? translate[emch] : (emch)) 1577 (TRANSLATE_P (translate) && emch < 0x80 ? RE_TRANSLATE (emch) : (emch))
1569 1578
1570 #endif 1579 #endif
1571 1580
1572 /* Macros for outputting the compiled pattern into `buffer'. */ 1581 /* Macros for outputting the compiled pattern into `buffer'. */
1573 1582
1574 /* If the buffer isn't allocated when it comes in, use this. */ 1583 /* If the buffer isn't allocated when it comes in, use this. */
1575 #define INIT_BUF_SIZE 32 1584 #define INIT_BUF_SIZE 32
1576 1585
1577 /* Make sure we have at least N more bytes of space in buffer. */ 1586 /* Make sure we have at least N more bytes of space in buffer. */
1578 #define GET_BUFFER_SPACE(n) \ 1587 #define GET_BUFFER_SPACE(n) \
1579 while (b - bufp->buffer + (n) > bufp->allocated) \ 1588 while (buf_end - bufp->buffer + (n) > bufp->allocated) \
1580 EXTEND_BUFFER () 1589 EXTEND_BUFFER ()
1581 1590
1582 /* Make sure we have one more byte of buffer space and then add C to it. */ 1591 /* Make sure we have one more byte of buffer space and then add C to it. */
1583 #define BUF_PUSH(c) \ 1592 #define BUF_PUSH(c) \
1584 do { \ 1593 do { \
1585 GET_BUFFER_SPACE (1); \ 1594 GET_BUFFER_SPACE (1); \
1586 *b++ = (unsigned char) (c); \ 1595 *buf_end++ = (unsigned char) (c); \
1587 } while (0) 1596 } while (0)
1588 1597
1589 1598
1590 /* Ensure we have two more bytes of buffer space and then append C1 and C2. */ 1599 /* Ensure we have two more bytes of buffer space and then append C1 and C2. */
1591 #define BUF_PUSH_2(c1, c2) \ 1600 #define BUF_PUSH_2(c1, c2) \
1592 do { \ 1601 do { \
1593 GET_BUFFER_SPACE (2); \ 1602 GET_BUFFER_SPACE (2); \
1594 *b++ = (unsigned char) (c1); \ 1603 *buf_end++ = (unsigned char) (c1); \
1595 *b++ = (unsigned char) (c2); \ 1604 *buf_end++ = (unsigned char) (c2); \
1596 } while (0) 1605 } while (0)
1597 1606
1598 1607
1599 /* As with BUF_PUSH_2, except for three bytes. */ 1608 /* As with BUF_PUSH_2, except for three bytes. */
1600 #define BUF_PUSH_3(c1, c2, c3) \ 1609 #define BUF_PUSH_3(c1, c2, c3) \
1601 do { \ 1610 do { \
1602 GET_BUFFER_SPACE (3); \ 1611 GET_BUFFER_SPACE (3); \
1603 *b++ = (unsigned char) (c1); \ 1612 *buf_end++ = (unsigned char) (c1); \
1604 *b++ = (unsigned char) (c2); \ 1613 *buf_end++ = (unsigned char) (c2); \
1605 *b++ = (unsigned char) (c3); \ 1614 *buf_end++ = (unsigned char) (c3); \
1606 } while (0) 1615 } while (0)
1607 1616
1608 1617
1609 /* Store a jump with opcode OP at LOC to location TO. We store a 1618 /* Store a jump with opcode OP at LOC to location TO. We store a
1610 relative address offset by the three bytes the jump itself occupies. */ 1619 relative address offset by the three bytes the jump itself occupies. */
1613 1622
1614 /* Likewise, for a two-argument jump. */ 1623 /* Likewise, for a two-argument jump. */
1615 #define STORE_JUMP2(op, loc, to, arg) \ 1624 #define STORE_JUMP2(op, loc, to, arg) \
1616 store_op2 (op, loc, (to) - (loc) - 3, arg) 1625 store_op2 (op, loc, (to) - (loc) - 3, arg)
1617 1626
1618 /* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */ 1627 /* Like `STORE_JUMP', but for inserting. Assume `buf_end' is the
1628 buffer end. */
1619 #define INSERT_JUMP(op, loc, to) \ 1629 #define INSERT_JUMP(op, loc, to) \
1620 insert_op1 (op, loc, (to) - (loc) - 3, b) 1630 insert_op1 (op, loc, (to) - (loc) - 3, buf_end)
1621 1631
1622 /* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */ 1632 /* Like `STORE_JUMP2', but for inserting. Assume `buf_end' is the
1633 buffer end. */
1623 #define INSERT_JUMP2(op, loc, to, arg) \ 1634 #define INSERT_JUMP2(op, loc, to, arg) \
1624 insert_op2 (op, loc, (to) - (loc) - 3, arg, b) 1635 insert_op2 (op, loc, (to) - (loc) - 3, arg, buf_end)
1625 1636
1626 1637
1627 /* This is not an arbitrary limit: the arguments which represent offsets 1638 /* This is not an arbitrary limit: the arguments which represent offsets
1628 into the pattern are two bytes long. So if 2^16 bytes turns out to 1639 into the pattern are two bytes long. So if 2^16 bytes turns out to
1629 be too small, many things would have to change. */ 1640 be too small, many things would have to change. */
1634 reset the pointers that pointed into the old block to point to the 1645 reset the pointers that pointed into the old block to point to the
1635 correct places in the new one. If extending the buffer results in it 1646 correct places in the new one. If extending the buffer results in it
1636 being larger than MAX_BUF_SIZE, then flag memory exhausted. */ 1647 being larger than MAX_BUF_SIZE, then flag memory exhausted. */
1637 #define EXTEND_BUFFER() \ 1648 #define EXTEND_BUFFER() \
1638 do { \ 1649 do { \
1639 unsigned char *old_buffer = bufp->buffer; \ 1650 re_char *old_buffer = bufp->buffer; \
1640 if (bufp->allocated == MAX_BUF_SIZE) \ 1651 if (bufp->allocated == MAX_BUF_SIZE) \
1641 return REG_ESIZE; \ 1652 return REG_ESIZE; \
1642 bufp->allocated <<= 1; \ 1653 bufp->allocated <<= 1; \
1643 if (bufp->allocated > MAX_BUF_SIZE) \ 1654 if (bufp->allocated > MAX_BUF_SIZE) \
1644 bufp->allocated = MAX_BUF_SIZE; \ 1655 bufp->allocated = MAX_BUF_SIZE; \
1646 if (bufp->buffer == NULL) \ 1657 if (bufp->buffer == NULL) \
1647 return REG_ESPACE; \ 1658 return REG_ESPACE; \
1648 /* If the buffer moved, move all the pointers into it. */ \ 1659 /* If the buffer moved, move all the pointers into it. */ \
1649 if (old_buffer != bufp->buffer) \ 1660 if (old_buffer != bufp->buffer) \
1650 { \ 1661 { \
1651 b = (b - old_buffer) + bufp->buffer; \ 1662 buf_end = (buf_end - old_buffer) + bufp->buffer; \
1652 begalt = (begalt - old_buffer) + bufp->buffer; \ 1663 begalt = (begalt - old_buffer) + bufp->buffer; \
1653 if (fixup_alt_jump) \ 1664 if (fixup_alt_jump) \
1654 fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\ 1665 fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
1655 if (laststart) \ 1666 if (laststart) \
1656 laststart = (laststart - old_buffer) + bufp->buffer; \ 1667 laststart = (laststart - old_buffer) + bufp->buffer; \
1703 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) 1714 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1704 1715
1705 1716
1706 /* Set the bit for character C in a bit vector. */ 1717 /* Set the bit for character C in a bit vector. */
1707 #define SET_LIST_BIT(c) \ 1718 #define SET_LIST_BIT(c) \
1708 (b[((unsigned char) (c)) / BYTEWIDTH] \ 1719 (buf_end[((unsigned char) (c)) / BYTEWIDTH] \
1709 |= 1 << (((unsigned char) c) % BYTEWIDTH)) 1720 |= 1 << (((unsigned char) c) % BYTEWIDTH))
1710 1721
1711 #ifdef MULE 1722 #ifdef MULE
1712 1723
1713 /* Set the "bit" for character C in a range table. */ 1724 /* Set the "bit" for character C in a range table. */
1760 static void store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2); 1771 static void store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2);
1761 static void insert_op1 (re_opcode_t op, unsigned char *loc, int arg, 1772 static void insert_op1 (re_opcode_t op, unsigned char *loc, int arg,
1762 unsigned char *end); 1773 unsigned char *end);
1763 static void insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, 1774 static void insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2,
1764 unsigned char *end); 1775 unsigned char *end);
1765 static boolean at_begline_loc_p (const char *pattern, const char *p, 1776 static boolean at_begline_loc_p (re_char *pattern, re_char *p,
1766 reg_syntax_t syntax); 1777 reg_syntax_t syntax);
1767 static boolean at_endline_loc_p (const char *p, const char *pend, int syntax); 1778 static boolean at_endline_loc_p (re_char *p, re_char *pend, int syntax);
1768 static boolean group_in_compile_stack (compile_stack_type compile_stack, 1779 static boolean group_in_compile_stack (compile_stack_type compile_stack,
1769 regnum_t regnum); 1780 regnum_t regnum);
1770 static reg_errcode_t compile_range (const char **p_ptr, const char *pend, 1781 static reg_errcode_t compile_range (re_char **p_ptr, re_char *pend,
1771 char *translate, reg_syntax_t syntax, 1782 RE_TRANSLATE_TYPE translate,
1783 reg_syntax_t syntax,
1772 unsigned char *b); 1784 unsigned char *b);
1773 #ifdef MULE 1785 #ifdef MULE
1774 static reg_errcode_t compile_extended_range (const char **p_ptr, 1786 static reg_errcode_t compile_extended_range (re_char **p_ptr,
1775 const char *pend, 1787 re_char *pend,
1776 char *translate, 1788 RE_TRANSLATE_TYPE translate,
1777 reg_syntax_t syntax, 1789 reg_syntax_t syntax,
1778 Lisp_Object rtab); 1790 Lisp_Object rtab);
1779 #endif /* MULE */ 1791 #endif /* MULE */
1780 static boolean group_match_null_string_p (unsigned char **p, 1792 static boolean group_match_null_string_p (unsigned char **p,
1781 unsigned char *end, 1793 unsigned char *end,
1784 register_info_type *reg_info); 1796 register_info_type *reg_info);
1785 static boolean common_op_match_null_string_p (unsigned char **p, 1797 static boolean common_op_match_null_string_p (unsigned char **p,
1786 unsigned char *end, 1798 unsigned char *end,
1787 register_info_type *reg_info); 1799 register_info_type *reg_info);
1788 static int bcmp_translate (const unsigned char *s1, const unsigned char *s2, 1800 static int bcmp_translate (const unsigned char *s1, const unsigned char *s2,
1789 REGISTER int len, char *translate); 1801 REGISTER int len, RE_TRANSLATE_TYPE translate);
1790 static int re_match_2_internal (struct re_pattern_buffer *bufp, 1802 static int re_match_2_internal (struct re_pattern_buffer *bufp,
1791 const char *string1, int size1, 1803 re_char *string1, int size1,
1792 const char *string2, int size2, int pos, 1804 re_char *string2, int size2, int pos,
1793 struct re_registers *regs, int stop); 1805 struct re_registers *regs, int stop);
1794 1806
1795 #ifndef MATCH_MAY_ALLOCATE 1807 #ifndef MATCH_MAY_ALLOCATE
1796 1808
1797 /* If we cannot allocate large objects within re_match_2_internal, 1809 /* If we cannot allocate large objects within re_match_2_internal,
1806 /* Size with which the following vectors are currently allocated. 1818 /* Size with which the following vectors are currently allocated.
1807 That is so we can make them bigger as needed, 1819 That is so we can make them bigger as needed,
1808 but never make them smaller. */ 1820 but never make them smaller. */
1809 static int regs_allocated_size; 1821 static int regs_allocated_size;
1810 1822
1811 static const char ** regstart, ** regend; 1823 static re_char ** regstart, ** regend;
1812 static const char ** old_regstart, ** old_regend; 1824 static re_char ** old_regstart, ** old_regend;
1813 static const char **best_regstart, **best_regend; 1825 static re_char **best_regstart, **best_regend;
1814 static register_info_type *reg_info; 1826 static register_info_type *reg_info;
1815 static const char **reg_dummy; 1827 static re_char **reg_dummy;
1816 static register_info_type *reg_info_dummy; 1828 static register_info_type *reg_info_dummy;
1817 1829
1818 /* Make the register vectors big enough for NUM_REGS registers, 1830 /* Make the register vectors big enough for NUM_REGS registers,
1819 but don't make them smaller. */ 1831 but don't make them smaller. */
1820 1832
1821 static 1833 static
1822 regex_grow_registers (int num_regs) 1834 regex_grow_registers (int num_regs)
1823 { 1835 {
1824 if (num_regs > regs_allocated_size) 1836 if (num_regs > regs_allocated_size)
1825 { 1837 {
1826 RETALLOC_IF (regstart, num_regs, const char *); 1838 RETALLOC_IF (regstart, num_regs, re_char *);
1827 RETALLOC_IF (regend, num_regs, const char *); 1839 RETALLOC_IF (regend, num_regs, re_char *);
1828 RETALLOC_IF (old_regstart, num_regs, const char *); 1840 RETALLOC_IF (old_regstart, num_regs, re_char *);
1829 RETALLOC_IF (old_regend, num_regs, const char *); 1841 RETALLOC_IF (old_regend, num_regs, re_char *);
1830 RETALLOC_IF (best_regstart, num_regs, const char *); 1842 RETALLOC_IF (best_regstart, num_regs, re_char *);
1831 RETALLOC_IF (best_regend, num_regs, const char *); 1843 RETALLOC_IF (best_regend, num_regs, re_char *);
1832 RETALLOC_IF (reg_info, num_regs, register_info_type); 1844 RETALLOC_IF (reg_info, num_regs, register_info_type);
1833 RETALLOC_IF (reg_dummy, num_regs, const char *); 1845 RETALLOC_IF (reg_dummy, num_regs, re_char *);
1834 RETALLOC_IF (reg_info_dummy, num_regs, register_info_type); 1846 RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);
1835 1847
1836 regs_allocated_size = num_regs; 1848 regs_allocated_size = num_regs;
1837 } 1849 }
1838 } 1850 }
1860 /* Return, freeing storage we allocated. */ 1872 /* Return, freeing storage we allocated. */
1861 #define FREE_STACK_RETURN(value) \ 1873 #define FREE_STACK_RETURN(value) \
1862 return (free (compile_stack.stack), value) 1874 return (free (compile_stack.stack), value)
1863 1875
1864 static reg_errcode_t 1876 static reg_errcode_t
1865 regex_compile (const char *pattern, int size, reg_syntax_t syntax, 1877 regex_compile (re_char *pattern, int size, reg_syntax_t syntax,
1866 struct re_pattern_buffer *bufp) 1878 struct re_pattern_buffer *bufp)
1867 { 1879 {
1868 /* We fetch characters from PATTERN here. We declare these as int 1880 /* We fetch characters from PATTERN here. We declare these as int
1869 (or possibly long) so that chars above 127 can be used as 1881 (or possibly long) so that chars above 127 can be used as
1870 array indices. The macros that fetch a character from the pattern 1882 array indices. The macros that fetch a character from the pattern
1872 get bitten by negative numbers here. */ 1884 get bitten by negative numbers here. */
1873 /* XEmacs change: used to be unsigned char. */ 1885 /* XEmacs change: used to be unsigned char. */
1874 REGISTER EMACS_INT c, c1; 1886 REGISTER EMACS_INT c, c1;
1875 1887
1876 /* A random temporary spot in PATTERN. */ 1888 /* A random temporary spot in PATTERN. */
1877 const char *p1; 1889 re_char *p1;
1878 1890
1879 /* Points to the end of the buffer, where we should append. */ 1891 /* Points to the end of the buffer, where we should append. */
1880 REGISTER unsigned char *b; 1892 REGISTER unsigned char *buf_end;
1881 1893
1882 /* Keeps track of unclosed groups. */ 1894 /* Keeps track of unclosed groups. */
1883 compile_stack_type compile_stack; 1895 compile_stack_type compile_stack;
1884 1896
1885 /* Points to the current (ending) position in the pattern. */ 1897 /* Points to the current (ending) position in the pattern. */
1886 const char *p = pattern; 1898 re_char *p = pattern;
1887 const char *pend = pattern + size; 1899 re_char *pend = pattern + size;
1888 1900
1889 /* How to translate the characters in the pattern. */ 1901 /* How to translate the characters in the pattern. */
1890 char *translate = bufp->translate; 1902 RE_TRANSLATE_TYPE translate = bufp->translate;
1891 1903
1892 /* Address of the count-byte of the most recently inserted `exactn' 1904 /* Address of the count-byte of the most recently inserted `exactn'
1893 command. This makes it possible to tell if a new exact-match 1905 command. This makes it possible to tell if a new exact-match
1894 character can be added to that command or if the character requires 1906 character can be added to that command or if the character requires
1895 a new `exactn' command. */ 1907 a new `exactn' command. */
1903 /* Address of beginning of regexp, or inside of last group. */ 1915 /* Address of beginning of regexp, or inside of last group. */
1904 unsigned char *begalt; 1916 unsigned char *begalt;
1905 1917
1906 /* Place in the uncompiled pattern (i.e., the {) to 1918 /* Place in the uncompiled pattern (i.e., the {) to
1907 which to go back if the interval is invalid. */ 1919 which to go back if the interval is invalid. */
1908 const char *beg_interval; 1920 re_char *beg_interval;
1909 1921
1910 /* Address of the place where a forward jump should go to the end of 1922 /* Address of the place where a forward jump should go to the end of
1911 the containing expression. Each alternative of an `or' -- except the 1923 the containing expression. Each alternative of an `or' -- except the
1912 last -- ends with a forward jump of this sort. */ 1924 last -- ends with a forward jump of this sort. */
1913 unsigned char *fixup_alt_jump = 0; 1925 unsigned char *fixup_alt_jump = 0;
1970 if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE); 1982 if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
1971 1983
1972 bufp->allocated = INIT_BUF_SIZE; 1984 bufp->allocated = INIT_BUF_SIZE;
1973 } 1985 }
1974 1986
1975 begalt = b = bufp->buffer; 1987 begalt = buf_end = bufp->buffer;
1976 1988
1977 /* Loop through the uncompiled pattern until we're at the end. */ 1989 /* Loop through the uncompiled pattern until we're at the end. */
1978 while (p != pend) 1990 while (p != pend)
1979 { 1991 {
1980 PATFETCH (c); 1992 PATFETCH (c);
2101 3: /jump to 9 2113 3: /jump to 9
2102 6: /exactn/1/A 2114 6: /exactn/1/A
2103 9: end of pattern. 2115 9: end of pattern.
2104 */ 2116 */
2105 GET_BUFFER_SPACE (6); 2117 GET_BUFFER_SPACE (6);
2106 INSERT_JUMP (jump, laststart, b + 3); 2118 INSERT_JUMP (jump, laststart, buf_end + 3);
2107 b += 3; 2119 buf_end += 3;
2108 INSERT_JUMP (on_failure_jump, laststart, laststart + 6); 2120 INSERT_JUMP (on_failure_jump, laststart, laststart + 6);
2109 b += 3; 2121 buf_end += 3;
2110 } 2122 }
2111 else if (zero_times_ok) 2123 else if (zero_times_ok)
2112 { 2124 {
2113 /* "a*?" becomes: 2125 /* "a*?" becomes:
2114 0: /jump to 6 2126 0: /jump to 6
2115 3: /exactn/1/A 2127 3: /exactn/1/A
2116 6: /on_failure_jump to 3 2128 6: /on_failure_jump to 3
2117 9: end of pattern. 2129 9: end of pattern.
2118 */ 2130 */
2119 GET_BUFFER_SPACE (6); 2131 GET_BUFFER_SPACE (6);
2120 INSERT_JUMP (jump, laststart, b + 3); 2132 INSERT_JUMP (jump, laststart, buf_end + 3);
2121 b += 3; 2133 buf_end += 3;
2122 STORE_JUMP (on_failure_jump, b, laststart + 3); 2134 STORE_JUMP (on_failure_jump, buf_end, laststart + 3);
2123 b += 3; 2135 buf_end += 3;
2124 } 2136 }
2125 else 2137 else
2126 { 2138 {
2127 /* "a+?" becomes: 2139 /* "a+?" becomes:
2128 0: /exactn/1/A 2140 0: /exactn/1/A
2129 3: /on_failure_jump to 0 2141 3: /on_failure_jump to 0
2130 6: end of pattern. 2142 6: end of pattern.
2131 */ 2143 */
2132 GET_BUFFER_SPACE (3); 2144 GET_BUFFER_SPACE (3);
2133 STORE_JUMP (on_failure_jump, b, laststart); 2145 STORE_JUMP (on_failure_jump, buf_end, laststart);
2134 b += 3; 2146 buf_end += 3;
2135 } 2147 }
2136 } 2148 }
2137 else 2149 else
2138 { 2150 {
2139 /* Are we optimizing this jump? */ 2151 /* Are we optimizing this jump? */
2140 boolean keep_string_p = false; 2152 boolean keep_string_p = false;
2141 2153
2142 if (many_times_ok) 2154 if (many_times_ok)
2143 { /* More than one repetition is allowed, so put in at the 2155 { /* More than one repetition is allowed, so put in
2144 end a backward relative jump from `b' to before the next 2156 at the end a backward relative jump from
2145 jump we're going to put in below (which jumps from 2157 `buf_end' to before the next jump we're going
2146 laststart to after this jump). 2158 to put in below (which jumps from laststart to
2159 after this jump).
2147 2160
2148 But if we are at the `*' in the exact sequence `.*\n', 2161 But if we are at the `*' in the exact sequence `.*\n',
2149 insert an unconditional jump backwards to the ., 2162 insert an unconditional jump backwards to the .,
2150 instead of the beginning of the loop. This way we only 2163 instead of the beginning of the loop. This way we only
2151 push a failure point once, instead of every time 2164 push a failure point once, instead of every time
2159 pattern, because laststart was nonzero. And we've 2172 pattern, because laststart was nonzero. And we've
2160 already incremented `p', by the way, to be the 2173 already incremented `p', by the way, to be the
2161 character after the `*'. Do we have to do something 2174 character after the `*'. Do we have to do something
2162 analogous here for null bytes, because of 2175 analogous here for null bytes, because of
2163 RE_DOT_NOT_NULL? */ 2176 RE_DOT_NOT_NULL? */
2164 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.') 2177 if (*(p - 2) == '.'
2165 && zero_times_ok 2178 && zero_times_ok
2166 && p < pend && TRANSLATE (*p) == TRANSLATE ('\n') 2179 && p < pend && *p == '\n'
2167 && !(syntax & RE_DOT_NEWLINE)) 2180 && !(syntax & RE_DOT_NEWLINE))
2168 { /* We have .*\n. */ 2181 { /* We have .*\n. */
2169 STORE_JUMP (jump, b, laststart); 2182 STORE_JUMP (jump, buf_end, laststart);
2170 keep_string_p = true; 2183 keep_string_p = true;
2171 } 2184 }
2172 else 2185 else
2173 /* Anything else. */ 2186 /* Anything else. */
2174 STORE_JUMP (maybe_pop_jump, b, laststart - 3); 2187 STORE_JUMP (maybe_pop_jump, buf_end, laststart - 3);
2175 2188
2176 /* We've added more stuff to the buffer. */ 2189 /* We've added more stuff to the buffer. */
2177 b += 3; 2190 buf_end += 3;
2178 } 2191 }
2179 2192
2180 /* On failure, jump from laststart to b + 3, which will be the 2193 /* On failure, jump from laststart to buf_end + 3,
2181 end of the buffer after this jump is inserted. */ 2194 which will be the end of the buffer after this jump
2195 is inserted. */
2182 GET_BUFFER_SPACE (3); 2196 GET_BUFFER_SPACE (3);
2183 INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump 2197 INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
2184 : on_failure_jump, 2198 : on_failure_jump,
2185 laststart, b + 3); 2199 laststart, buf_end + 3);
2186 b += 3; 2200 buf_end += 3;
2187 2201
2188 if (!zero_times_ok) 2202 if (!zero_times_ok)
2189 { 2203 {
2190 /* At least one repetition is required, so insert a 2204 /* At least one repetition is required, so insert a
2191 `dummy_failure_jump' before the initial 2205 `dummy_failure_jump' before the initial
2192 `on_failure_jump' instruction of the loop. This 2206 `on_failure_jump' instruction of the loop. This
2193 effects a skip over that instruction the first time 2207 effects a skip over that instruction the first time
2194 we hit that loop. */ 2208 we hit that loop. */
2195 GET_BUFFER_SPACE (3); 2209 GET_BUFFER_SPACE (3);
2196 INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6); 2210 INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
2197 b += 3; 2211 buf_end += 3;
2198 } 2212 }
2199 } 2213 }
2200 pending_exact = 0; 2214 pending_exact = 0;
2201 } 2215 }
2202 break; 2216 break;
2203 2217
2204 2218
2205 case '.': 2219 case '.':
2206 laststart = b; 2220 laststart = buf_end;
2207 BUF_PUSH (anychar); 2221 BUF_PUSH (anychar);
2208 break; 2222 break;
2209 2223
2210 2224
2211 case '[': 2225 case '[':
2221 2235
2222 /* Ensure that we have enough space to push a charset: the 2236 /* Ensure that we have enough space to push a charset: the
2223 opcode, the length count, and the bitset; 34 bytes in all. */ 2237 opcode, the length count, and the bitset; 34 bytes in all. */
2224 GET_BUFFER_SPACE (34); 2238 GET_BUFFER_SPACE (34);
2225 2239
2226 laststart = b; 2240 laststart = buf_end;
2227 2241
2228 /* We test `*p == '^' twice, instead of using an if 2242 /* We test `*p == '^' twice, instead of using an if
2229 statement, so we only need one BUF_PUSH. */ 2243 statement, so we only need one BUF_PUSH. */
2230 BUF_PUSH (*p == '^' ? charset_not : charset); 2244 BUF_PUSH (*p == '^' ? charset_not : charset);
2231 if (*p == '^') 2245 if (*p == '^')
2236 2250
2237 /* Push the number of bytes in the bitmap. */ 2251 /* Push the number of bytes in the bitmap. */
2238 BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); 2252 BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
2239 2253
2240 /* Clear the whole map. */ 2254 /* Clear the whole map. */
2241 memset (b, 0, (1 << BYTEWIDTH) / BYTEWIDTH); 2255 memset (buf_end, 0, (1 << BYTEWIDTH) / BYTEWIDTH);
2242 2256
2243 /* charset_not matches newline according to a syntax bit. */ 2257 /* charset_not matches newline according to a syntax bit. */
2244 if ((re_opcode_t) b[-2] == charset_not 2258 if ((re_opcode_t) buf_end[-2] == charset_not
2245 && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) 2259 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
2246 SET_LIST_BIT ('\n'); 2260 SET_LIST_BIT ('\n');
2247 2261
2248 #ifdef MULE 2262 #ifdef MULE
2249 start_over_with_extended: 2263 start_over_with_extended:
2250 if (has_extended_chars) 2264 if (has_extended_chars)
2251 { 2265 {
2252 /* There are extended chars here, which means we need to start 2266 /* There are extended chars here, which means we need to start
2253 over and shift to unified range-table format. */ 2267 over and shift to unified range-table format. */
2254 if (b[-2] == charset) 2268 if (buf_end[-2] == charset)
2255 b[-2] = charset_mule; 2269 buf_end[-2] = charset_mule;
2256 else 2270 else
2257 b[-2] = charset_mule_not; 2271 buf_end[-2] = charset_mule_not;
2258 b--; 2272 buf_end--;
2259 p = p1; /* go back to the beginning of the charset, after 2273 p = p1; /* go back to the beginning of the charset, after
2260 a possible ^. */ 2274 a possible ^. */
2261 rtab = Vthe_lisp_rangetab; 2275 rtab = Vthe_lisp_rangetab;
2262 Fclear_range_table (rtab); 2276 Fclear_range_table (rtab);
2263 2277
2264 /* charset_not matches newline according to a syntax bit. */ 2278 /* charset_not matches newline according to a syntax bit. */
2265 if ((re_opcode_t) b[-1] == charset_mule_not 2279 if ((re_opcode_t) buf_end[-1] == charset_mule_not
2266 && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) 2280 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
2267 SET_EITHER_BIT ('\n'); 2281 SET_EITHER_BIT ('\n');
2268 } 2282 }
2269 #endif /* MULE */ 2283 #endif /* MULE */
2270 2284
2271 /* Read in characters and ranges, setting map bits. */ 2285 /* Read in characters and ranges, setting map bits. */
2272 for (;;) 2286 for (;;)
2273 { 2287 {
2274 if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 2288 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2275 2289
2276 PATFETCH_EITHER (c); 2290 PATFETCH (c);
2277 2291
2278 #ifdef MULE 2292 #ifdef MULE
2279 if (c >= 0x80 && !has_extended_chars) 2293 if (c >= 0x80 && !has_extended_chars)
2280 { 2294 {
2281 has_extended_chars = 1; 2295 has_extended_chars = 1;
2290 /* \ might escape characters inside [...] and [^...]. */ 2304 /* \ might escape characters inside [...] and [^...]. */
2291 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') 2305 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
2292 { 2306 {
2293 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); 2307 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2294 2308
2295 PATFETCH_EITHER (c1); 2309 PATFETCH (c1);
2296 #ifdef MULE 2310 #ifdef MULE
2297 if (c1 >= 0x80 && !has_extended_chars) 2311 if (c1 >= 0x80 && !has_extended_chars)
2298 { 2312 {
2299 has_extended_chars = 1; 2313 has_extended_chars = 1;
2300 goto start_over_with_extended; 2314 goto start_over_with_extended;
2319 was a character: if this is a hyphen not at the 2333 was a character: if this is a hyphen not at the
2320 beginning or the end of a list, then it's the range 2334 beginning or the end of a list, then it's the range
2321 operator. */ 2335 operator. */
2322 if (c == '-' 2336 if (c == '-'
2323 && !(p - 2 >= pattern && p[-2] == '[') 2337 && !(p - 2 >= pattern && p[-2] == '[')
2324 && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') 2338 && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
2325 && *p != ']') 2339 && *p != ']')
2326 { 2340 {
2327 reg_errcode_t ret; 2341 reg_errcode_t ret;
2328 2342
2329 #ifdef MULE 2343 #ifdef MULE
2335 if (has_extended_chars) 2349 if (has_extended_chars)
2336 ret = compile_extended_range (&p, pend, translate, 2350 ret = compile_extended_range (&p, pend, translate,
2337 syntax, rtab); 2351 syntax, rtab);
2338 else 2352 else
2339 #endif /* MULE */ 2353 #endif /* MULE */
2340 ret = compile_range (&p, pend, translate, syntax, b); 2354 ret = compile_range (&p, pend, translate, syntax, buf_end);
2341 if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); 2355 if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2342 } 2356 }
2343 2357
2344 else if (p[0] == '-' && p[1] != ']') 2358 else if (p[0] == '-' && p[1] != ']')
2345 { /* This handles ranges made up of characters only. */ 2359 { /* This handles ranges made up of characters only. */
2357 if (has_extended_chars) 2371 if (has_extended_chars)
2358 ret = compile_extended_range (&p, pend, translate, 2372 ret = compile_extended_range (&p, pend, translate,
2359 syntax, rtab); 2373 syntax, rtab);
2360 else 2374 else
2361 #endif /* MULE */ 2375 #endif /* MULE */
2362 ret = compile_range (&p, pend, translate, syntax, b); 2376 ret = compile_range (&p, pend, translate, syntax, buf_end);
2363 if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); 2377 if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2364 } 2378 }
2365 2379
2366 /* See if we're at the beginning of a possible character 2380 /* See if we're at the beginning of a possible character
2367 class. */ 2381 class. */
2376 /* If pattern is `[[:'. */ 2390 /* If pattern is `[[:'. */
2377 if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 2391 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2378 2392
2379 for (;;) 2393 for (;;)
2380 { 2394 {
2381 /* Do not do PATFETCH_EITHER() here. We want 2395 /* #### This code is unused.
2382 to just see if the bytes match particular 2396 Correctness is not checked after TRT
2383 strings, and we put them all back if not. 2397 table change. */
2384
2385 #### May need to be changed once trt tables
2386 are working. */
2387 PATFETCH (c); 2398 PATFETCH (c);
2388 if (c == ':' || c == ']' || p == pend 2399 if (c == ':' || c == ']' || p == pend
2389 || c1 == CHAR_CLASS_MAX_LENGTH) 2400 || c1 == CHAR_CLASS_MAX_LENGTH)
2390 break; 2401 break;
2391 str[c1++] = (char) c; 2402 str[c1++] = (char) c;
2392 } 2403 }
2393 str[c1] = '\0'; 2404 str[c1] = '\0';
2394 2405
2395 /* If isn't a word bracketed by `[:' and:`]': 2406 /* If isn't a word bracketed by `[:' and `:]':
2396 undo the ending character, the letters, and leave 2407 undo the ending character, the letters, and leave
2397 the leading `:' and `[' (but set bits for them). */ 2408 the leading `:' and `[' (but set bits for them). */
2398 if (c == ':' && *p == ']') 2409 if (c == ':' && *p == ']')
2399 { 2410 {
2400 int ch; 2411 int ch;
2464 { 2475 {
2465 /* We have a range table, not a bit vector. */ 2476 /* We have a range table, not a bit vector. */
2466 int bytes_needed = 2477 int bytes_needed =
2467 unified_range_table_bytes_needed (rtab); 2478 unified_range_table_bytes_needed (rtab);
2468 GET_BUFFER_SPACE (bytes_needed); 2479 GET_BUFFER_SPACE (bytes_needed);
2469 unified_range_table_copy_data (rtab, b); 2480 unified_range_table_copy_data (rtab, buf_end);
2470 b += unified_range_table_bytes_used (b); 2481 buf_end += unified_range_table_bytes_used (buf_end);
2471 break; 2482 break;
2472 } 2483 }
2473 #endif /* MULE */ 2484 #endif /* MULE */
2474 /* Discard any (non)matching list bytes that are all 0 at the 2485 /* Discard any (non)matching list bytes that are all 0 at the
2475 end of the map. Decrease the map-length byte too. */ 2486 end of the map. Decrease the map-length byte too. */
2476 while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) 2487 while ((int) buf_end[-1] > 0 && buf_end[buf_end[-1] - 1] == 0)
2477 b[-1]--; 2488 buf_end[-1]--;
2478 b += b[-1]; 2489 buf_end += buf_end[-1];
2479 } 2490 }
2480 break; 2491 break;
2481 2492
2482 2493
2483 case '(': 2494 case '(':
2533 { 2544 {
2534 regnum_t r; 2545 regnum_t r;
2535 2546
2536 if (!(syntax & RE_NO_SHY_GROUPS) 2547 if (!(syntax & RE_NO_SHY_GROUPS)
2537 && p != pend 2548 && p != pend
2538 && TRANSLATE(*p) == TRANSLATE('?')) 2549 && *p == '?')
2539 { 2550 {
2540 p++; 2551 p++;
2541 PATFETCH(c); 2552 PATFETCH (c);
2542 switch (c) 2553 switch (c)
2543 { 2554 {
2544 case ':': /* shy groups */ 2555 case ':': /* shy groups */
2545 r = MAX_REGNUM + 1; 2556 r = MAX_REGNUM + 1;
2546 break; 2557 break;
2570 whole pattern moves because of realloc, they will still 2581 whole pattern moves because of realloc, they will still
2571 be valid. */ 2582 be valid. */
2572 COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; 2583 COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
2573 COMPILE_STACK_TOP.fixup_alt_jump 2584 COMPILE_STACK_TOP.fixup_alt_jump
2574 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; 2585 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
2575 COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer; 2586 COMPILE_STACK_TOP.laststart_offset = buf_end - bufp->buffer;
2576 COMPILE_STACK_TOP.regnum = r; 2587 COMPILE_STACK_TOP.regnum = r;
2577 2588
2578 /* We will eventually replace the 0 with the number of 2589 /* We will eventually replace the 0 with the number of
2579 groups inner to this one. But do not push a 2590 groups inner to this one. But do not push a
2580 start_memory for groups beyond the last one we can 2591 start_memory for groups beyond the last one we can
2581 represent in the compiled pattern. */ 2592 represent in the compiled pattern. */
2582 if (r <= MAX_REGNUM) 2593 if (r <= MAX_REGNUM)
2583 { 2594 {
2584 COMPILE_STACK_TOP.inner_group_offset 2595 COMPILE_STACK_TOP.inner_group_offset
2585 = b - bufp->buffer + 2; 2596 = buf_end - bufp->buffer + 2;
2586 BUF_PUSH_3 (start_memory, r, 0); 2597 BUF_PUSH_3 (start_memory, r, 0);
2587 } 2598 }
2588 2599
2589 compile_stack.avail++; 2600 compile_stack.avail++;
2590 2601
2591 fixup_alt_jump = 0; 2602 fixup_alt_jump = 0;
2592 laststart = 0; 2603 laststart = 0;
2593 begalt = b; 2604 begalt = buf_end;
2594 /* If we've reached MAX_REGNUM groups, then this open 2605 /* If we've reached MAX_REGNUM groups, then this open
2595 won't actually generate any code, so we'll have to 2606 won't actually generate any code, so we'll have to
2596 clear pending_exact explicitly. */ 2607 clear pending_exact explicitly. */
2597 pending_exact = 0; 2608 pending_exact = 0;
2598 } 2609 }
2617 `push_dummy_failure' in `re_match_2'. */ 2628 `push_dummy_failure' in `re_match_2'. */
2618 BUF_PUSH (push_dummy_failure); 2629 BUF_PUSH (push_dummy_failure);
2619 2630
2620 /* We allocated space for this jump when we assigned 2631 /* We allocated space for this jump when we assigned
2621 to `fixup_alt_jump', in the `handle_alt' case below. */ 2632 to `fixup_alt_jump', in the `handle_alt' case below. */
2622 STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1); 2633 STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end - 1);
2623 } 2634 }
2624 2635
2625 /* See similar code for backslashed left paren above. */ 2636 /* See similar code for backslashed left paren above. */
2626 if (COMPILE_STACK_EMPTY) { 2637 if (COMPILE_STACK_EMPTY) {
2627 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) 2638 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2675 goto normal_char; 2686 goto normal_char;
2676 2687
2677 /* Insert before the previous alternative a jump which 2688 /* Insert before the previous alternative a jump which
2678 jumps to this alternative if the former fails. */ 2689 jumps to this alternative if the former fails. */
2679 GET_BUFFER_SPACE (3); 2690 GET_BUFFER_SPACE (3);
2680 INSERT_JUMP (on_failure_jump, begalt, b + 6); 2691 INSERT_JUMP (on_failure_jump, begalt, buf_end + 6);
2681 pending_exact = 0; 2692 pending_exact = 0;
2682 b += 3; 2693 buf_end += 3;
2683 2694
2684 /* The alternative before this one has a jump after it 2695 /* The alternative before this one has a jump after it
2685 which gets executed if it gets matched. Adjust that 2696 which gets executed if it gets matched. Adjust that
2686 jump so it will jump to this alternative's analogous 2697 jump so it will jump to this alternative's analogous
2687 jump (put in below, which in turn will jump to the next 2698 jump (put in below, which in turn will jump to the next
2696 three-byte space after `a'. We'll put in the jump, set 2707 three-byte space after `a'. We'll put in the jump, set
2697 fixup_alt_jump to right after `b', and leave behind three 2708 fixup_alt_jump to right after `b', and leave behind three
2698 bytes which we'll fill in when we get to after `c'. */ 2709 bytes which we'll fill in when we get to after `c'. */
2699 2710
2700 if (fixup_alt_jump) 2711 if (fixup_alt_jump)
2701 STORE_JUMP (jump_past_alt, fixup_alt_jump, b); 2712 STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end);
2702 2713
2703 /* Mark and leave space for a jump after this alternative, 2714 /* Mark and leave space for a jump after this alternative,
2704 to be filled in later either by next alternative or 2715 to be filled in later either by next alternative or
2705 when know we're at the end of a series of alternatives. */ 2716 when know we're at the end of a series of alternatives. */
2706 fixup_alt_jump = b; 2717 fixup_alt_jump = buf_end;
2707 GET_BUFFER_SPACE (3); 2718 GET_BUFFER_SPACE (3);
2708 b += 3; 2719 buf_end += 3;
2709 2720
2710 laststart = 0; 2721 laststart = 0;
2711 begalt = b; 2722 begalt = buf_end;
2712 break; 2723 break;
2713 2724
2714 2725
2715 case '{': 2726 case '{':
2716 /* If \{ is a literal. */ 2727 /* If \{ is a literal. */
2779 if (!laststart) 2790 if (!laststart)
2780 { 2791 {
2781 if (syntax & RE_CONTEXT_INVALID_OPS) 2792 if (syntax & RE_CONTEXT_INVALID_OPS)
2782 FREE_STACK_RETURN (REG_BADRPT); 2793 FREE_STACK_RETURN (REG_BADRPT);
2783 else if (syntax & RE_CONTEXT_INDEP_OPS) 2794 else if (syntax & RE_CONTEXT_INDEP_OPS)
2784 laststart = b; 2795 laststart = buf_end;
2785 else 2796 else
2786 goto unfetch_interval; 2797 goto unfetch_interval;
2787 } 2798 }
2788 2799
2789 /* If the upper bound is zero, don't want to succeed at 2800 /* If the upper bound is zero, don't want to succeed at
2790 all; jump from `laststart' to `b + 3', which will be 2801 all; jump from `laststart' to `b + 3', which will be
2791 the end of the buffer after we insert the jump. */ 2802 the end of the buffer after we insert the jump. */
2792 if (upper_bound == 0) 2803 if (upper_bound == 0)
2793 { 2804 {
2794 GET_BUFFER_SPACE (3); 2805 GET_BUFFER_SPACE (3);
2795 INSERT_JUMP (jump, laststart, b + 3); 2806 INSERT_JUMP (jump, laststart, buf_end + 3);
2796 b += 3; 2807 buf_end += 3;
2797 } 2808 }
2798 2809
2799 /* Otherwise, we have a nontrivial interval. When 2810 /* Otherwise, we have a nontrivial interval. When
2800 we're all done, the pattern will look like: 2811 we're all done, the pattern will look like:
2801 set_number_at <jump count> <upper bound> 2812 set_number_at <jump count> <upper bound>
2816 though it will be set during matching by its 2827 though it will be set during matching by its
2817 attendant `set_number_at' (inserted next), 2828 attendant `set_number_at' (inserted next),
2818 because `re_compile_fastmap' needs to know. 2829 because `re_compile_fastmap' needs to know.
2819 Jump to the `jump_n' we might insert below. */ 2830 Jump to the `jump_n' we might insert below. */
2820 INSERT_JUMP2 (succeed_n, laststart, 2831 INSERT_JUMP2 (succeed_n, laststart,
2821 b + 5 + (upper_bound > 1) * 5, 2832 buf_end + 5 + (upper_bound > 1) * 5,
2822 lower_bound); 2833 lower_bound);
2823 b += 5; 2834 buf_end += 5;
2824 2835
2825 /* Code to initialize the lower bound. Insert 2836 /* Code to initialize the lower bound. Insert
2826 before the `succeed_n'. The `5' is the last two 2837 before the `succeed_n'. The `5' is the last two
2827 bytes of this `set_number_at', plus 3 bytes of 2838 bytes of this `set_number_at', plus 3 bytes of
2828 the following `succeed_n'. */ 2839 the following `succeed_n'. */
2829 insert_op2 (set_number_at, laststart, 5, lower_bound, b); 2840 insert_op2 (set_number_at, laststart, 5, lower_bound, buf_end);
2830 b += 5; 2841 buf_end += 5;
2831 2842
2832 if (upper_bound > 1) 2843 if (upper_bound > 1)
2833 { /* More than one repetition is allowed, so 2844 { /* More than one repetition is allowed, so
2834 append a backward jump to the `succeed_n' 2845 append a backward jump to the `succeed_n'
2835 that starts this interval. 2846 that starts this interval.
2836 2847
2837 When we've reached this during matching, 2848 When we've reached this during matching,
2838 we'll have matched the interval once, so 2849 we'll have matched the interval once, so
2839 jump back only `upper_bound - 1' times. */ 2850 jump back only `upper_bound - 1' times. */
2840 STORE_JUMP2 (jump_n, b, laststart + 5, 2851 STORE_JUMP2 (jump_n, buf_end, laststart + 5,
2841 upper_bound - 1); 2852 upper_bound - 1);
2842 b += 5; 2853 buf_end += 5;
2843 2854
2844 /* The location we want to set is the second 2855 /* The location we want to set is the second
2845 parameter of the `jump_n'; that is `b-2' as 2856 parameter of the `jump_n'; that is `b-2' as
2846 an absolute address. `laststart' will be 2857 an absolute address. `laststart' will be
2847 the `set_number_at' we're about to insert; 2858 the `set_number_at' we're about to insert;
2853 i.e., b - laststart. 2864 i.e., b - laststart.
2854 2865
2855 We insert this at the beginning of the loop 2866 We insert this at the beginning of the loop
2856 so that if we fail during matching, we'll 2867 so that if we fail during matching, we'll
2857 reinitialize the bounds. */ 2868 reinitialize the bounds. */
2858 insert_op2 (set_number_at, laststart, b - laststart, 2869 insert_op2 (set_number_at, laststart,
2859 upper_bound - 1, b); 2870 buf_end - laststart,
2860 b += 5; 2871 upper_bound - 1, buf_end);
2872 buf_end += 5;
2861 } 2873 }
2862 } 2874 }
2863 pending_exact = 0; 2875 pending_exact = 0;
2864 beg_interval = NULL; 2876 beg_interval = NULL;
2865 } 2877 }
2887 case '=': 2899 case '=':
2888 BUF_PUSH (at_dot); 2900 BUF_PUSH (at_dot);
2889 break; 2901 break;
2890 2902
2891 case 's': 2903 case 's':
2892 laststart = b; 2904 laststart = buf_end;
2893 PATFETCH (c); 2905 PATFETCH (c);
2894 /* XEmacs addition */ 2906 /* XEmacs addition */
2895 if (c >= 0x80 || syntax_spec_code[c] == 0377) 2907 if (c >= 0x80 || syntax_spec_code[c] == 0377)
2896 FREE_STACK_RETURN (REG_ESYNTAX); 2908 FREE_STACK_RETURN (REG_ESYNTAX);
2897 BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); 2909 BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
2898 break; 2910 break;
2899 2911
2900 case 'S': 2912 case 'S':
2901 laststart = b; 2913 laststart = buf_end;
2902 PATFETCH (c); 2914 PATFETCH (c);
2903 /* XEmacs addition */ 2915 /* XEmacs addition */
2904 if (c >= 0x80 || syntax_spec_code[c] == 0377) 2916 if (c >= 0x80 || syntax_spec_code[c] == 0377)
2905 FREE_STACK_RETURN (REG_ESYNTAX); 2917 FREE_STACK_RETURN (REG_ESYNTAX);
2906 BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); 2918 BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
2907 break; 2919 break;
2908 2920
2909 #ifdef MULE 2921 #ifdef MULE
2910 /* 97.2.17 jhod merged in to XEmacs from mule-2.3 */ 2922 /* 97.2.17 jhod merged in to XEmacs from mule-2.3 */
2911 case 'c': 2923 case 'c':
2912 laststart = b; 2924 laststart = buf_end;
2913 PATFETCH_RAW (c); 2925 PATFETCH_RAW (c);
2914 if (c < 32 || c > 127) 2926 if (c < 32 || c > 127)
2915 FREE_STACK_RETURN (REG_ECATEGORY); 2927 FREE_STACK_RETURN (REG_ECATEGORY);
2916 BUF_PUSH_2 (categoryspec, c); 2928 BUF_PUSH_2 (categoryspec, c);
2917 break; 2929 break;
2918 2930
2919 case 'C': 2931 case 'C':
2920 laststart = b; 2932 laststart = buf_end;
2921 PATFETCH_RAW (c); 2933 PATFETCH_RAW (c);
2922 if (c < 32 || c > 127) 2934 if (c < 32 || c > 127)
2923 FREE_STACK_RETURN (REG_ECATEGORY); 2935 FREE_STACK_RETURN (REG_ECATEGORY);
2924 BUF_PUSH_2 (notcategoryspec, c); 2936 BUF_PUSH_2 (notcategoryspec, c);
2925 break; 2937 break;
2927 #endif /* MULE */ 2939 #endif /* MULE */
2928 #endif /* emacs */ 2940 #endif /* emacs */
2929 2941
2930 2942
2931 case 'w': 2943 case 'w':
2932 laststart = b; 2944 laststart = buf_end;
2933 BUF_PUSH (wordchar); 2945 BUF_PUSH (wordchar);
2934 break; 2946 break;
2935 2947
2936 2948
2937 case 'W': 2949 case 'W':
2938 laststart = b; 2950 laststart = buf_end;
2939 BUF_PUSH (notwordchar); 2951 BUF_PUSH (notwordchar);
2940 break; 2952 break;
2941 2953
2942 2954
2943 case '<': 2955 case '<':
2964 BUF_PUSH (endbuf); 2976 BUF_PUSH (endbuf);
2965 break; 2977 break;
2966 2978
2967 case '1': case '2': case '3': case '4': case '5': 2979 case '1': case '2': case '3': case '4': case '5':
2968 case '6': case '7': case '8': case '9': 2980 case '6': case '7': case '8': case '9':
2969 if (syntax & RE_NO_BK_REFS) 2981 {
2970 goto normal_char; 2982 regnum_t reg;
2971 2983 if (syntax & RE_NO_BK_REFS)
2972 c1 = c - '0'; 2984 goto normal_char;
2973 2985
2974 if (c1 > regnum) 2986 reg = c - '0';
2975 FREE_STACK_RETURN (REG_ESUBREG); 2987
2976 2988 if (reg > regnum)
2977 /* Can't back reference to a subexpression if inside of it. */ 2989 FREE_STACK_RETURN (REG_ESUBREG);
2978 if (group_in_compile_stack (compile_stack, c1)) 2990
2979 goto normal_char; 2991 /* Can't back reference to a subexpression if inside of it. */
2980 2992 if (group_in_compile_stack (compile_stack, reg))
2981 laststart = b; 2993 goto normal_char;
2982 BUF_PUSH_2 (duplicate, c1); 2994
2995 laststart = buf_end;
2996 BUF_PUSH_2 (duplicate, reg);
2997 }
2983 break; 2998 break;
2984 2999
2985 3000
2986 case '+': 3001 case '+':
2987 case '?': 3002 case '?':
3006 /* `p' points to the location after where `c' came from. */ 3021 /* `p' points to the location after where `c' came from. */
3007 normal_char: 3022 normal_char:
3008 { 3023 {
3009 /* XEmacs: modifications here for Mule. */ 3024 /* XEmacs: modifications here for Mule. */
3010 /* `q' points to the beginning of the next char. */ 3025 /* `q' points to the beginning of the next char. */
3011 const char *q = p - 1; 3026 re_char *q = p;
3012 INC_CHARPTR (q);
3013 3027
3014 /* If no exactn currently being built. */ 3028 /* If no exactn currently being built. */
3015 if (!pending_exact 3029 if (!pending_exact
3016 3030
3017 /* If last exactn not at current position. */ 3031 /* If last exactn not at current position. */
3018 || pending_exact + *pending_exact + 1 != b 3032 || pending_exact + *pending_exact + 1 != buf_end
3019 3033
3020 /* We have only one byte following the exactn for the count. */ 3034 /* We have only one byte following the exactn for the count. */
3021 || ((unsigned int) (*pending_exact + (q - p)) >= 3035 || ((unsigned int) (*pending_exact + (q - p)) >=
3022 ((unsigned int) (1 << BYTEWIDTH) - 1)) 3036 ((unsigned int) (1 << BYTEWIDTH) - 1))
3023 3037
3031 ? *q == '{' 3045 ? *q == '{'
3032 : (q[0] == '\\' && q[1] == '{')))) 3046 : (q[0] == '\\' && q[1] == '{'))))
3033 { 3047 {
3034 /* Start building a new exactn. */ 3048 /* Start building a new exactn. */
3035 3049
3036 laststart = b; 3050 laststart = buf_end;
3037 3051
3038 BUF_PUSH_2 (exactn, 0); 3052 BUF_PUSH_2 (exactn, 0);
3039 pending_exact = b - 1; 3053 pending_exact = buf_end - 1;
3040 } 3054 }
3041 3055
3056 #ifndef MULE
3042 BUF_PUSH (c); 3057 BUF_PUSH (c);
3043 (*pending_exact)++; 3058 (*pending_exact)++;
3044 3059 #else
3045 while (p < q) 3060 {
3046 { 3061 Bytecount bt_count;
3047 PATFETCH (c); 3062 Bufbyte tmp_buf[MAX_EMCHAR_LEN];
3048 BUF_PUSH (c); 3063 int i;
3049 (*pending_exact)++; 3064
3050 } 3065 bt_count = set_charptr_emchar (tmp_buf, c);
3066
3067 for (i = 0; i < bt_count; i++)
3068 {
3069 BUF_PUSH (tmp_buf[i]);
3070 (*pending_exact)++;
3071 }
3072 }
3073 #endif
3051 break; 3074 break;
3052 } 3075 }
3053 } /* switch (c) */ 3076 } /* switch (c) */
3054 } /* while p != pend */ 3077 } /* while p != pend */
3055 3078
3056 3079
3057 /* Through the pattern now. */ 3080 /* Through the pattern now. */
3058 3081
3059 if (fixup_alt_jump) 3082 if (fixup_alt_jump)
3060 STORE_JUMP (jump_past_alt, fixup_alt_jump, b); 3083 STORE_JUMP (jump_past_alt, fixup_alt_jump, buf_end);
3061 3084
3062 if (!COMPILE_STACK_EMPTY) 3085 if (!COMPILE_STACK_EMPTY)
3063 FREE_STACK_RETURN (REG_EPAREN); 3086 FREE_STACK_RETURN (REG_EPAREN);
3064 3087
3065 /* If we don't want backtracking, force success 3088 /* If we don't want backtracking, force success
3068 BUF_PUSH (succeed); 3091 BUF_PUSH (succeed);
3069 3092
3070 free (compile_stack.stack); 3093 free (compile_stack.stack);
3071 3094
3072 /* We have succeeded; set the length of the buffer. */ 3095 /* We have succeeded; set the length of the buffer. */
3073 bufp->used = b - bufp->buffer; 3096 bufp->used = buf_end - bufp->buffer;
3074 3097
3075 #ifdef DEBUG 3098 #ifdef DEBUG
3076 if (debug) 3099 if (debug)
3077 { 3100 {
3078 DEBUG_PRINT1 ("\nCompiled pattern: \n"); 3101 DEBUG_PRINT1 ("\nCompiled pattern: \n");
3112 else 3135 else
3113 fail_stack.stack 3136 fail_stack.stack
3114 = (fail_stack_elt_t *) realloc (fail_stack.stack, 3137 = (fail_stack_elt_t *) realloc (fail_stack.stack,
3115 (fail_stack.size 3138 (fail_stack.size
3116 * sizeof (fail_stack_elt_t))); 3139 * sizeof (fail_stack_elt_t)));
3117 #endif /* not emacs */ 3140 #endif /* emacs */
3118 } 3141 }
3119 3142
3120 regex_grow_registers (num_regs); 3143 regex_grow_registers (num_regs);
3121 } 3144 }
3122 #endif /* not MATCH_MAY_ALLOCATE */ 3145 #endif /* not MATCH_MAY_ALLOCATE */
3182 /* P points to just after a ^ in PATTERN. Return true if that ^ comes 3205 /* P points to just after a ^ in PATTERN. Return true if that ^ comes
3183 after an alternative or a begin-subexpression. We assume there is at 3206 after an alternative or a begin-subexpression. We assume there is at
3184 least one character before the ^. */ 3207 least one character before the ^. */
3185 3208
3186 static boolean 3209 static boolean
3187 at_begline_loc_p (const char *pattern, const char *p, reg_syntax_t syntax) 3210 at_begline_loc_p (re_char *pattern, re_char *p, reg_syntax_t syntax)
3188 { 3211 {
3189 const char *prev = p - 2; 3212 re_char *prev = p - 2;
3190 boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\'; 3213 boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
3191 3214
3192 return 3215 return
3193 /* After a subexpression? */ 3216 /* After a subexpression? */
3194 (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) 3217 (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
3199 3222
3200 /* The dual of at_begline_loc_p. This one is for $. We assume there is 3223 /* The dual of at_begline_loc_p. This one is for $. We assume there is
3201 at least one character after the $, i.e., `P < PEND'. */ 3224 at least one character after the $, i.e., `P < PEND'. */
3202 3225
3203 static boolean 3226 static boolean
3204 at_endline_loc_p (const char *p, const char *pend, int syntax) 3227 at_endline_loc_p (re_char *p, re_char *pend, int syntax)
3205 { 3228 {
3206 const char *next = p; 3229 re_char *next = p;
3207 boolean next_backslash = *next == '\\'; 3230 boolean next_backslash = *next == '\\';
3208 const char *next_next = p + 1 < pend ? p + 1 : 0; 3231 re_char *next_next = p + 1 < pend ? p + 1 : 0;
3209 3232
3210 return 3233 return
3211 /* Before a subexpression? */ 3234 /* Before a subexpression? */
3212 (syntax & RE_NO_BK_PARENS ? *next == ')' 3235 (syntax & RE_NO_BK_PARENS ? *next == ')'
3213 : next_backslash && next_next && *next_next == ')') 3236 : next_backslash && next_next && *next_next == ')')
3245 3268
3246 We use these short variable names so we can use the same macros as 3269 We use these short variable names so we can use the same macros as
3247 `regex_compile' itself. */ 3270 `regex_compile' itself. */
3248 3271
3249 static reg_errcode_t 3272 static reg_errcode_t
3250 compile_range (const char **p_ptr, const char *pend, char *translate, 3273 compile_range (re_char **p_ptr, re_char *pend, RE_TRANSLATE_TYPE translate,
3251 reg_syntax_t syntax, unsigned char *b) 3274 reg_syntax_t syntax, unsigned char *buf_end)
3252 { 3275 {
3253 unsigned this_char; 3276 unsigned this_char;
3254 3277
3255 const char *p = *p_ptr; 3278 re_char *p = *p_ptr;
3256 int range_start, range_end; 3279 int range_start, range_end;
3257 3280
3258 if (p == pend) 3281 if (p == pend)
3259 return REG_ERANGE; 3282 return REG_ERANGE;
3260 3283
3290 } 3313 }
3291 3314
3292 #ifdef MULE 3315 #ifdef MULE
3293 3316
3294 static reg_errcode_t 3317 static reg_errcode_t
3295 compile_extended_range (const char **p_ptr, const char *pend, char *translate, 3318 compile_extended_range (re_char **p_ptr, re_char *pend,
3319 RE_TRANSLATE_TYPE translate,
3296 reg_syntax_t syntax, Lisp_Object rtab) 3320 reg_syntax_t syntax, Lisp_Object rtab)
3297 { 3321 {
3298 Emchar this_char, range_start, range_end; 3322 Emchar this_char, range_start, range_end;
3299 const Bufbyte *p; 3323 const Bufbyte *p;
3300 3324
3412 bufp->can_be_null |= path_can_be_null; 3436 bufp->can_be_null |= path_can_be_null;
3413 3437
3414 /* Reset for next path. */ 3438 /* Reset for next path. */
3415 path_can_be_null = true; 3439 path_can_be_null = true;
3416 3440
3417 p = fail_stack.stack[--fail_stack.avail].pointer; 3441 p = (unsigned char *) fail_stack.stack[--fail_stack.avail].pointer;
3418 3442
3419 continue; 3443 continue;
3420 } 3444 }
3421 else 3445 else
3422 break; 3446 break;
3461 for (j = *p * BYTEWIDTH; j < 0x80; j++) 3485 for (j = *p * BYTEWIDTH; j < 0x80; j++)
3462 fastmap[j] = 1; 3486 fastmap[j] = 1;
3463 /* And all extended characters must be allowed, too. */ 3487 /* And all extended characters must be allowed, too. */
3464 for (j = 0x80; j < 0xA0; j++) 3488 for (j = 0x80; j < 0xA0; j++)
3465 fastmap[j] = 1; 3489 fastmap[j] = 1;
3466 #else /* ! MULE */ 3490 #else /* not MULE */
3467 for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) 3491 for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
3468 fastmap[j] = 1; 3492 fastmap[j] = 1;
3469 #endif /* ! MULE */ 3493 #endif /* MULE */
3470 3494
3471 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) 3495 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3472 if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) 3496 if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
3473 fastmap[j] = 1; 3497 fastmap[j] = 1;
3474 break; 3498 break;
3621 == Sword || multi_p) 3645 == Sword || multi_p)
3622 fastmap[j] = 1; 3646 fastmap[j] = 1;
3623 } 3647 }
3624 } 3648 }
3625 } 3649 }
3626 #else /* ! MULE */ 3650 #else /* not MULE */
3627 for (j = 0; j < (1 << BYTEWIDTH); j++) 3651 for (j = 0; j < (1 << BYTEWIDTH); j++)
3628 if (SYNTAX_UNSAFE 3652 if (SYNTAX_UNSAFE
3629 (XCHAR_TABLE 3653 (XCHAR_TABLE
3630 (regex_emacs_buffer->mirror_syntax_table), j) == 3654 (regex_emacs_buffer->mirror_syntax_table), j) ==
3631 (enum syntaxcode) k) 3655 (enum syntaxcode) k)
3632 fastmap[j] = 1; 3656 fastmap[j] = 1;
3633 #endif /* ! MULE */ 3657 #endif /* MULE */
3634 break; 3658 break;
3635 3659
3636 3660
3637 case notsyntaxspec: 3661 case notsyntaxspec:
3638 k = *p++; 3662 k = *p++;
3662 != Sword || multi_p) 3686 != Sword || multi_p)
3663 fastmap[j] = 1; 3687 fastmap[j] = 1;
3664 } 3688 }
3665 } 3689 }
3666 } 3690 }
3667 #else /* ! MULE */ 3691 #else /* not MULE */
3668 for (j = 0; j < (1 << BYTEWIDTH); j++) 3692 for (j = 0; j < (1 << BYTEWIDTH); j++)
3669 if (SYNTAX_UNSAFE 3693 if (SYNTAX_UNSAFE
3670 (XCHAR_TABLE 3694 (XCHAR_TABLE
3671 (regex_emacs_buffer->mirror_syntax_table), j) != 3695 (regex_emacs_buffer->mirror_syntax_table), j) !=
3672 (enum syntaxcode) k) 3696 (enum syntaxcode) k)
3673 fastmap[j] = 1; 3697 fastmap[j] = 1;
3674 #endif /* ! MULE */ 3698 #endif /* MULE */
3675 break; 3699 break;
3676 3700
3677 #ifdef MULE 3701 #ifdef MULE
3678 /* 97/2/17 jhod category patch */ 3702 /* 97/2/17 jhod category patch */
3679 case categoryspec: 3703 case categoryspec:
3899 We return either the position in the strings at which the match was 3923 We return either the position in the strings at which the match was
3900 found, -1 if no match, or -2 if error (such as failure 3924 found, -1 if no match, or -2 if error (such as failure
3901 stack overflow). */ 3925 stack overflow). */
3902 3926
3903 int 3927 int
3904 re_search_2 (struct re_pattern_buffer *bufp, const char *string1, 3928 re_search_2 (struct re_pattern_buffer *bufp, const char *str1,
3905 int size1, const char *string2, int size2, int startpos, 3929 int size1, const char *str2, int size2, int startpos,
3906 int range, struct re_registers *regs, int stop) 3930 int range, struct re_registers *regs, int stop)
3907 { 3931 {
3908 int val; 3932 int val;
3933 re_char *string1 = (re_char *) str1;
3934 re_char *string2 = (re_char *) str2;
3909 REGISTER char *fastmap = bufp->fastmap; 3935 REGISTER char *fastmap = bufp->fastmap;
3910 REGISTER char *translate = bufp->translate; 3936 REGISTER RE_TRANSLATE_TYPE translate = bufp->translate;
3911 int total_size = size1 + size2; 3937 int total_size = size1 + size2;
3912 int endpos = startpos + range; 3938 int endpos = startpos + range;
3913 #ifdef REGEX_BEGLINE_CHECK 3939 #ifdef REGEX_BEGLINE_CHECK
3914 int anchored_at_begline = 0; 3940 int anchored_at_begline = 0;
3915 #endif 3941 #endif
3916 const unsigned char *d; 3942 re_char *d;
3917 Charcount d_size; 3943 Charcount d_size;
3918 3944
3919 /* Check for out-of-range STARTPOS. */ 3945 /* Check for out-of-range STARTPOS. */
3920 if (startpos < 0 || startpos > total_size) 3946 if (startpos < 0 || startpos > total_size)
3921 return -1; 3947 return -1;
3983 d = ((const unsigned char *) 4009 d = ((const unsigned char *)
3984 (startpos >= size1 ? string2 - size1 : string1) + startpos); 4010 (startpos >= size1 ? string2 - size1 : string1) + startpos);
3985 DEC_CHARPTR(d); /* Ok, since startpos != size1. */ 4011 DEC_CHARPTR(d); /* Ok, since startpos != size1. */
3986 d_size = charcount_to_bytecount (d, 1); 4012 d_size = charcount_to_bytecount (d, 1);
3987 4013
3988 if (translate) 4014 if (TRANSLATE_P (translate))
3989 #ifdef MULE 4015 while (range > lim && *d != '\n')
3990 while (range > lim && (*d >= 0x80 || translate[*d] != '\n'))
3991 #else
3992 while (range > lim && translate[*d] != '\n')
3993 #endif
3994 { 4016 {
3995 d += d_size; /* Speedier INC_CHARPTR(d) */ 4017 d += d_size; /* Speedier INC_CHARPTR(d) */
3996 d_size = charcount_to_bytecount (d, 1); 4018 d_size = charcount_to_bytecount (d, 1);
3997 range -= d_size; 4019 range -= d_size;
3998 } 4020 }
4025 d = ((const unsigned char *) 4047 d = ((const unsigned char *)
4026 (startpos >= size1 ? string2 - size1 : string1) + startpos); 4048 (startpos >= size1 ? string2 - size1 : string1) + startpos);
4027 4049
4028 /* Written out as an if-else to avoid testing `translate' 4050 /* Written out as an if-else to avoid testing `translate'
4029 inside the loop. */ 4051 inside the loop. */
4030 if (translate) 4052 if (TRANSLATE_P (translate))
4031 while (range > lim && 4053 while (range > lim)
4054 {
4032 #ifdef MULE 4055 #ifdef MULE
4033 *d < 0x80 && 4056 Emchar buf_ch;
4034 #endif 4057
4035 !fastmap[(unsigned char)translate[*d]]) 4058 buf_ch = charptr_emchar (d);
4036 { 4059 buf_ch = RE_TRANSLATE (buf_ch);
4060 if (buf_ch >= 0200 || fastmap[(unsigned char) buf_ch])
4061 break;
4062 #else
4063 if (fastmap[(unsigned char)RE_TRANSLATE (*d)])
4064 break;
4065 #endif /* MULE */
4037 d_size = charcount_to_bytecount (d, 1); 4066 d_size = charcount_to_bytecount (d, 1);
4038 range -= d_size; 4067 range -= d_size;
4039 d += d_size; /* Speedier INC_CHARPTR(d) */ 4068 d += d_size; /* Speedier INC_CHARPTR(d) */
4040 } 4069 }
4041 else 4070 else
4048 4077
4049 startpos += irange - range; 4078 startpos += irange - range;
4050 } 4079 }
4051 else /* Searching backwards. */ 4080 else /* Searching backwards. */
4052 { 4081 {
4053 unsigned char c = (size1 == 0 || startpos >= size1 4082 Emchar c = (size1 == 0 || startpos >= size1
4054 ? string2[startpos - size1] 4083 ? charptr_emchar (string2 + startpos - size1)
4055 : string1[startpos]); 4084 : charptr_emchar (string1 + startpos));
4085 c = TRANSLATE (c);
4056 #ifdef MULE 4086 #ifdef MULE
4057 if (c < 0x80 && !fastmap[(unsigned char) TRANSLATE (c)]) 4087 if (!(c >= 0200 || fastmap[(unsigned char) c]))
4088 goto advance;
4058 #else 4089 #else
4059 if (!fastmap[(unsigned char) TRANSLATE (c)]) 4090 if (!fastmap[(unsigned char) c])
4091 goto advance;
4060 #endif 4092 #endif
4061 goto advance;
4062 } 4093 }
4063 } 4094 }
4064 4095
4065 /* If can't match the null string, and that's all we have left, fail. */ 4096 /* If can't match the null string, and that's all we have left, fail. */
4066 if (range >= 0 && startpos == total_size && fastmap 4097 if (range >= 0 && startpos == total_size && fastmap
4169 FREE_VAR (best_regend); \ 4200 FREE_VAR (best_regend); \
4170 FREE_VAR (reg_info); \ 4201 FREE_VAR (reg_info); \
4171 FREE_VAR (reg_dummy); \ 4202 FREE_VAR (reg_dummy); \
4172 FREE_VAR (reg_info_dummy); \ 4203 FREE_VAR (reg_info_dummy); \
4173 } while (0) 4204 } while (0)
4174 #else 4205 #else /* not MATCH_MAY_ALLOCATE */
4175 #define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */ 4206 #define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */
4176 #endif /* not MATCH_MAY_ALLOCATE */ 4207 #endif /* MATCH_MAY_ALLOCATE */
4177 4208
4178 /* These values must meet several constraints. They must not be valid 4209 /* These values must meet several constraints. They must not be valid
4179 register values; since we have a limit of 255 registers (because 4210 register values; since we have a limit of 255 registers (because
4180 we use only one byte in the pattern for the register number), we can 4211 we use only one byte in the pattern for the register number), we can
4181 use numbers larger than 255. They must differ by 1, because of 4212 use numbers larger than 255. They must differ by 1, because of
4192 4223
4193 int 4224 int
4194 re_match (struct re_pattern_buffer *bufp, const char *string, int size, 4225 re_match (struct re_pattern_buffer *bufp, const char *string, int size,
4195 int pos, struct re_registers *regs) 4226 int pos, struct re_registers *regs)
4196 { 4227 {
4197 int result = re_match_2_internal (bufp, NULL, 0, string, size, 4228 int result = re_match_2_internal (bufp, NULL, 0, (re_char *) string, size,
4198 pos, regs, size); 4229 pos, regs, size);
4199 alloca (0); 4230 alloca (0);
4200 return result; 4231 return result;
4201 } 4232 }
4202 #endif /* not emacs */ 4233 #endif /* not emacs */
4218 int 4249 int
4219 re_match_2 (struct re_pattern_buffer *bufp, const char *string1, 4250 re_match_2 (struct re_pattern_buffer *bufp, const char *string1,
4220 int size1, const char *string2, int size2, int pos, 4251 int size1, const char *string2, int size2, int pos,
4221 struct re_registers *regs, int stop) 4252 struct re_registers *regs, int stop)
4222 { 4253 {
4223 int result = re_match_2_internal (bufp, string1, size1, string2, size2, 4254 int result = re_match_2_internal (bufp, (re_char *) string1, size1,
4255 (re_char *) string2, size2,
4224 pos, regs, stop); 4256 pos, regs, stop);
4225 alloca (0); 4257 alloca (0);
4226 return result; 4258 return result;
4227 } 4259 }
4228 4260
4229 /* This is a separate function so that we can force an alloca cleanup 4261 /* This is a separate function so that we can force an alloca cleanup
4230 afterwards. */ 4262 afterwards. */
4231 static int 4263 static int
4232 re_match_2_internal (struct re_pattern_buffer *bufp, const char *string1, 4264 re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
4233 int size1, const char *string2, int size2, int pos, 4265 int size1, re_char *string2, int size2, int pos,
4234 struct re_registers *regs, int stop) 4266 struct re_registers *regs, int stop)
4235 { 4267 {
4236 /* General temporaries. */ 4268 /* General temporaries. */
4237 int mcnt; 4269 int mcnt;
4238 unsigned char *p1; 4270 unsigned char *p1;
4239 int should_succeed; /* XEmacs change */ 4271 int should_succeed; /* XEmacs change */
4240 4272
4241 /* Just past the end of the corresponding string. */ 4273 /* Just past the end of the corresponding string. */
4242 const char *end1, *end2; 4274 re_char *end1, *end2;
4243 4275
4244 /* Pointers into string1 and string2, just past the last characters in 4276 /* Pointers into string1 and string2, just past the last characters in
4245 each to consider matching. */ 4277 each to consider matching. */
4246 const char *end_match_1, *end_match_2; 4278 re_char *end_match_1, *end_match_2;
4247 4279
4248 /* Where we are in the data, and the end of the current string. */ 4280 /* Where we are in the data, and the end of the current string. */
4249 const char *d, *dend; 4281 re_char *d, *dend;
4250 4282
4251 /* Where we are in the pattern, and the end of the pattern. */ 4283 /* Where we are in the pattern, and the end of the pattern. */
4252 unsigned char *p = bufp->buffer; 4284 unsigned char *p = bufp->buffer;
4253 REGISTER unsigned char *pend = p + bufp->used; 4285 REGISTER unsigned char *pend = p + bufp->used;
4254 4286
4255 /* Mark the opcode just after a start_memory, so we can test for an 4287 /* Mark the opcode just after a start_memory, so we can test for an
4256 empty subpattern when we get to the stop_memory. */ 4288 empty subpattern when we get to the stop_memory. */
4257 unsigned char *just_past_start_mem = 0; 4289 re_char *just_past_start_mem = 0;
4258 4290
4259 /* We use this to map every character in the string. */ 4291 /* We use this to map every character in the string. */
4260 char *translate = bufp->translate; 4292 RE_TRANSLATE_TYPE translate = bufp->translate;
4261 4293
4262 /* Failure point stack. Each place that can handle a failure further 4294 /* Failure point stack. Each place that can handle a failure further
4263 down the line pushes a failure point on this stack. It consists of 4295 down the line pushes a failure point on this stack. It consists of
4264 restart, regend, and reg_info for all registers corresponding to 4296 restart, regend, and reg_info for all registers corresponding to
4265 the subexpressions we're currently inside, plus the number of such 4297 the subexpressions we're currently inside, plus the number of such
4297 regnum-th regstart pointer points to where in the pattern we began 4329 regnum-th regstart pointer points to where in the pattern we began
4298 matching and the regnum-th regend points to right after where we 4330 matching and the regnum-th regend points to right after where we
4299 stopped matching the regnum-th subexpression. (The zeroth register 4331 stopped matching the regnum-th subexpression. (The zeroth register
4300 keeps track of what the whole pattern matches.) */ 4332 keeps track of what the whole pattern matches.) */
4301 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ 4333 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
4302 const char **regstart, **regend; 4334 re_char **regstart, **regend;
4303 #endif 4335 #endif
4304 4336
4305 /* If a group that's operated upon by a repetition operator fails to 4337 /* If a group that's operated upon by a repetition operator fails to
4306 match anything, then the register for its start will need to be 4338 match anything, then the register for its start will need to be
4307 restored because it will have been set to wherever in the string we 4339 restored because it will have been set to wherever in the string we
4308 are when we last see its open-group operator. Similarly for a 4340 are when we last see its open-group operator. Similarly for a
4309 register's end. */ 4341 register's end. */
4310 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ 4342 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
4311 const char **old_regstart, **old_regend; 4343 re_char **old_regstart, **old_regend;
4312 #endif 4344 #endif
4313 4345
4314 /* The is_active field of reg_info helps us keep track of which (possibly 4346 /* The is_active field of reg_info helps us keep track of which (possibly
4315 nested) subexpressions we are currently in. The matched_something 4347 nested) subexpressions we are currently in. The matched_something
4316 field of reg_info[reg_num] helps us tell whether or not we have 4348 field of reg_info[reg_num] helps us tell whether or not we have
4325 variables when we find a match better than any we've seen before. 4357 variables when we find a match better than any we've seen before.
4326 This happens as we backtrack through the failure points, which in 4358 This happens as we backtrack through the failure points, which in
4327 turn happens only if we have not yet matched the entire string. */ 4359 turn happens only if we have not yet matched the entire string. */
4328 unsigned best_regs_set = false; 4360 unsigned best_regs_set = false;
4329 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ 4361 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
4330 const char **best_regstart, **best_regend; 4362 re_char **best_regstart, **best_regend;
4331 #endif 4363 #endif
4332 4364
4333 /* Logically, this is `best_regend[0]'. But we don't want to have to 4365 /* Logically, this is `best_regend[0]'. But we don't want to have to
4334 allocate space for that if we're not allocating space for anything 4366 allocate space for that if we're not allocating space for anything
4335 else (see below). Also, we never need info about register 0 for 4367 else (see below). Also, we never need info about register 0 for
4336 any of the other register vectors, and it seems rather a kludge to 4368 any of the other register vectors, and it seems rather a kludge to
4337 treat `best_regend' differently than the rest. So we keep track of 4369 treat `best_regend' differently than the rest. So we keep track of
4338 the end of the best match so far in a separate variable. We 4370 the end of the best match so far in a separate variable. We
4339 initialize this to NULL so that when we backtrack the first time 4371 initialize this to NULL so that when we backtrack the first time
4340 and need to test it, it's not garbage. */ 4372 and need to test it, it's not garbage. */
4341 const char *match_end = NULL; 4373 re_char *match_end = NULL;
4342 4374
4343 /* This helps SET_REGS_MATCHED avoid doing redundant work. */ 4375 /* This helps SET_REGS_MATCHED avoid doing redundant work. */
4344 int set_regs_matched_done = 0; 4376 int set_regs_matched_done = 0;
4345 4377
4346 /* Used when we pop values we don't care about. */ 4378 /* Used when we pop values we don't care about. */
4347 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ 4379 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */
4348 const char **reg_dummy; 4380 re_char **reg_dummy;
4349 register_info_type *reg_info_dummy; 4381 register_info_type *reg_info_dummy;
4350 #endif 4382 #endif
4351 4383
4352 #ifdef DEBUG 4384 #ifdef DEBUG
4353 /* Counts the total number of registers pushed. */ 4385 /* Counts the total number of registers pushed. */
4371 there are groups, we include space for register 0 (the whole 4403 there are groups, we include space for register 0 (the whole
4372 pattern), even though we never use it, since it simplifies the 4404 pattern), even though we never use it, since it simplifies the
4373 array indexing. We should fix this. */ 4405 array indexing. We should fix this. */
4374 if (bufp->re_nsub) 4406 if (bufp->re_nsub)
4375 { 4407 {
4376 regstart = REGEX_TALLOC (num_regs, const char *); 4408 regstart = REGEX_TALLOC (num_regs, re_char *);
4377 regend = REGEX_TALLOC (num_regs, const char *); 4409 regend = REGEX_TALLOC (num_regs, re_char *);
4378 old_regstart = REGEX_TALLOC (num_regs, const char *); 4410 old_regstart = REGEX_TALLOC (num_regs, re_char *);
4379 old_regend = REGEX_TALLOC (num_regs, const char *); 4411 old_regend = REGEX_TALLOC (num_regs, re_char *);
4380 best_regstart = REGEX_TALLOC (num_regs, const char *); 4412 best_regstart = REGEX_TALLOC (num_regs, re_char *);
4381 best_regend = REGEX_TALLOC (num_regs, const char *); 4413 best_regend = REGEX_TALLOC (num_regs, re_char *);
4382 reg_info = REGEX_TALLOC (num_regs, register_info_type); 4414 reg_info = REGEX_TALLOC (num_regs, register_info_type);
4383 reg_dummy = REGEX_TALLOC (num_regs, const char *); 4415 reg_dummy = REGEX_TALLOC (num_regs, re_char *);
4384 reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type); 4416 reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
4385 4417
4386 if (!(regstart && regend && old_regstart && old_regend && reg_info 4418 if (!(regstart && regend && old_regstart && old_regend && reg_info
4387 && best_regstart && best_regend && reg_dummy && reg_info_dummy)) 4419 && best_regstart && best_regend && reg_dummy && reg_info_dummy))
4388 { 4420 {
4418 REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; 4450 REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
4419 IS_ACTIVE (reg_info[mcnt]) = 0; 4451 IS_ACTIVE (reg_info[mcnt]) = 0;
4420 MATCHED_SOMETHING (reg_info[mcnt]) = 0; 4452 MATCHED_SOMETHING (reg_info[mcnt]) = 0;
4421 EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; 4453 EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
4422 } 4454 }
4423
4424 /* We move `string1' into `string2' if the latter's empty -- but not if 4455 /* We move `string1' into `string2' if the latter's empty -- but not if
4425 `string1' is null. */ 4456 `string1' is null. */
4426 if (size2 == 0 && string1 != NULL) 4457 if (size2 == 0 && string1 != NULL)
4427 { 4458 {
4428 string2 = string1; 4459 string2 = string1;
4460 { 4491 {
4461 d = string2 + pos - size1; 4492 d = string2 + pos - size1;
4462 dend = end_match_2; 4493 dend = end_match_2;
4463 } 4494 }
4464 4495
4465 DEBUG_PRINT1 ("The compiled pattern is: "); 4496 DEBUG_PRINT1 ("The compiled pattern is: \n");
4466 DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend); 4497 DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
4467 DEBUG_PRINT1 ("The string to match is: `"); 4498 DEBUG_PRINT1 ("The string to match is: `");
4468 DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); 4499 DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
4469 DEBUG_PRINT1 ("'\n"); 4500 DEBUG_PRINT1 ("'\n");
4470 4501
4658 mcnt = *p++; 4689 mcnt = *p++;
4659 DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt); 4690 DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
4660 4691
4661 /* This is written out as an if-else so we don't waste time 4692 /* This is written out as an if-else so we don't waste time
4662 testing `translate' inside the loop. */ 4693 testing `translate' inside the loop. */
4663 if (translate) 4694 if (TRANSLATE_P (translate))
4664 { 4695 {
4665 do 4696 do
4666 { 4697 {
4698 #ifdef MULE
4699 Emchar pat_ch, buf_ch;
4700 Bytecount pat_len;
4701
4667 PREFETCH (); 4702 PREFETCH ();
4668 if (translate[(unsigned char) *d++] != (char) *p++) 4703 pat_ch = charptr_emchar (p);
4704 buf_ch = charptr_emchar (d);
4705 if (RE_TRANSLATE (buf_ch) != pat_ch)
4669 goto fail; 4706 goto fail;
4707
4708 pat_len = charcount_to_bytecount (p, 1);
4709 p += pat_len;
4710 INC_CHARPTR (d);
4711
4712 mcnt -= pat_len;
4713 #else /* not MULE */
4714 PREFETCH ();
4715 if ((unsigned char) RE_TRANSLATE (*d++) != *p++)
4716 goto fail;
4717 mcnt--;
4718 #endif
4670 } 4719 }
4671 while (--mcnt); 4720 while (mcnt > 0);
4672 } 4721 }
4673 else 4722 else
4674 { 4723 {
4675 do 4724 do
4676 { 4725 {
4677 PREFETCH (); 4726 PREFETCH ();
4678 if (*d++ != (char) *p++) goto fail; 4727 if (*d++ != *p++) goto fail;
4679 } 4728 }
4680 while (--mcnt); 4729 while (--mcnt);
4681 } 4730 }
4682 SET_REGS_MATCHED (); 4731 SET_REGS_MATCHED ();
4683 break; 4732 break;
4948 4997
4949 /* \<digit> has been turned into a `duplicate' command which is 4998 /* \<digit> has been turned into a `duplicate' command which is
4950 followed by the numeric value of <digit> as the register number. */ 4999 followed by the numeric value of <digit> as the register number. */
4951 case duplicate: 5000 case duplicate:
4952 { 5001 {
4953 REGISTER const char *d2, *dend2; 5002 REGISTER re_char *d2, *dend2;
4954 int regno = *p++; /* Get which register to match against. */ 5003 int regno = *p++; /* Get which register to match against. */
4955 DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno); 5004 DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
4956 5005
4957 /* Can't back reference a group which we've never matched. */ 5006 /* Can't back reference a group which we've never matched. */
4958 if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) 5007 if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
4996 if (mcnt > dend2 - d2) 5045 if (mcnt > dend2 - d2)
4997 mcnt = dend2 - d2; 5046 mcnt = dend2 - d2;
4998 5047
4999 /* Compare that many; failure if mismatch, else move 5048 /* Compare that many; failure if mismatch, else move
5000 past them. */ 5049 past them. */
5001 if (translate 5050 if (TRANSLATE_P (translate)
5002 ? bcmp_translate ((unsigned char *) d, 5051 ? bcmp_translate ((unsigned char *) d,
5003 (unsigned char *) d2, mcnt, translate) 5052 (unsigned char *) d2, mcnt, translate)
5004 : memcmp (d, d2, mcnt)) 5053 : memcmp (d, d2, mcnt))
5005 goto fail; 5054 goto fail;
5006 d += mcnt, d2 += mcnt; 5055 d += mcnt, d2 += mcnt;
5084 DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump"); 5133 DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
5085 5134
5086 EXTRACT_NUMBER_AND_INCR (mcnt, p); 5135 EXTRACT_NUMBER_AND_INCR (mcnt, p);
5087 DEBUG_PRINT3 (" %d (to 0x%lx):\n", mcnt, (long) (p + mcnt)); 5136 DEBUG_PRINT3 (" %d (to 0x%lx):\n", mcnt, (long) (p + mcnt));
5088 5137
5089 PUSH_FAILURE_POINT (p + mcnt, (char *) 0, -2); 5138 PUSH_FAILURE_POINT (p + mcnt, (unsigned char *) 0, -2);
5090 break; 5139 break;
5091 5140
5092 5141
5093 /* Uses of on_failure_jump: 5142 /* Uses of on_failure_jump:
5094 5143
5304 actual values. Otherwise, we will restore only one 5353 actual values. Otherwise, we will restore only one
5305 register from the stack, since lowest will == highest in 5354 register from the stack, since lowest will == highest in
5306 `pop_failure_point'. */ 5355 `pop_failure_point'. */
5307 unsigned dummy_low_reg, dummy_high_reg; 5356 unsigned dummy_low_reg, dummy_high_reg;
5308 unsigned char *pdummy; 5357 unsigned char *pdummy;
5309 const char *sdummy = NULL; 5358 re_char *sdummy = NULL;
5310 5359
5311 DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n"); 5360 DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
5312 POP_FAILURE_POINT (sdummy, pdummy, 5361 POP_FAILURE_POINT (sdummy, pdummy,
5313 dummy_low_reg, dummy_high_reg, 5362 dummy_low_reg, dummy_high_reg,
5314 reg_dummy, reg_dummy, reg_info_dummy); 5363 reg_dummy, reg_dummy, reg_info_dummy);
5340 something meaningless for pop_failure_jump to pop. */ 5389 something meaningless for pop_failure_jump to pop. */
5341 case dummy_failure_jump: 5390 case dummy_failure_jump:
5342 DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n"); 5391 DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
5343 /* It doesn't matter what we push for the string here. What 5392 /* It doesn't matter what we push for the string here. What
5344 the code at `fail' tests is the value for the pattern. */ 5393 the code at `fail' tests is the value for the pattern. */
5345 PUSH_FAILURE_POINT ((unsigned char *) 0, (char *) 0, -2); 5394 PUSH_FAILURE_POINT ((unsigned char *) 0, (unsigned char *) 0, -2);
5346 goto unconditional_jump; 5395 goto unconditional_jump;
5347 5396
5348 5397
5349 /* At the end of an alternative, we need to push a dummy failure 5398 /* At the end of an alternative, we need to push a dummy failure
5350 point in case we are followed by a `pop_failure_jump', because 5399 point in case we are followed by a `pop_failure_jump', because
5353 requires that we match the `ab' alternative. */ 5402 requires that we match the `ab' alternative. */
5354 case push_dummy_failure: 5403 case push_dummy_failure:
5355 DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n"); 5404 DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
5356 /* See comments just above at `dummy_failure_jump' about the 5405 /* See comments just above at `dummy_failure_jump' about the
5357 two zeroes. */ 5406 two zeroes. */
5358 PUSH_FAILURE_POINT ((unsigned char *) 0, (char *) 0, -2); 5407 PUSH_FAILURE_POINT ((unsigned char *) 0, (unsigned char *) 0, -2);
5359 break; 5408 break;
5360 5409
5361 /* Have to succeed matching what follows at least n times. 5410 /* Have to succeed matching what follows at least n times.
5362 After that, handle like `on_failure_jump'. */ 5411 After that, handle like `on_failure_jump'. */
5363 case succeed_n: 5412 case succeed_n:
5609 if (!WORDCHAR_P_UNSAFE ((int) (*d))) 5658 if (!WORDCHAR_P_UNSAFE ((int) (*d)))
5610 goto fail; 5659 goto fail;
5611 SET_REGS_MATCHED (); 5660 SET_REGS_MATCHED ();
5612 d++; 5661 d++;
5613 break; 5662 break;
5614 #endif /* not emacs */ 5663 #endif /* emacs */
5615 5664
5616 default: 5665 default:
5617 abort (); 5666 abort ();
5618 } 5667 }
5619 continue; /* Successfully executed one pattern command; keep going. */ 5668 continue; /* Successfully executed one pattern command; keep going. */
5919 5968
5920 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN 5969 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
5921 bytes; nonzero otherwise. */ 5970 bytes; nonzero otherwise. */
5922 5971
5923 static int 5972 static int
5924 bcmp_translate (const unsigned char *s1, const unsigned char *s2, 5973 bcmp_translate (re_char *s1, re_char *s2,
5925 REGISTER int len, char *translate) 5974 REGISTER int len, RE_TRANSLATE_TYPE translate)
5926 { 5975 {
5927 REGISTER const unsigned char *p1 = s1, *p2 = s2; 5976 REGISTER const unsigned char *p1 = s1, *p2 = s2;
5977 #ifdef MULE
5978 const unsigned char *p1_end = s1 + len;
5979 const unsigned char *p2_end = s2 + len;
5980
5981 while (p1 != p1_end && p2 != p2_end)
5982 {
5983 Emchar p1_ch, p2_ch;
5984
5985 p1_ch = charptr_emchar (p1);
5986 p2_ch = charptr_emchar (p2);
5987
5988 if (RE_TRANSLATE (p1_ch)
5989 != RE_TRANSLATE (p2_ch))
5990 return 1;
5991 INC_CHARPTR (p1);
5992 INC_CHARPTR (p2);
5993 }
5994 #else /* not MULE */
5928 while (len) 5995 while (len)
5929 { 5996 {
5930 if (translate[*p1++] != translate[*p2++]) return 1; 5997 if (RE_TRANSLATE (*p1++) != RE_TRANSLATE (*p2++)) return 1;
5931 len--; 5998 len--;
5932 } 5999 }
6000 #endif /* MULE */
5933 return 0; 6001 return 0;
5934 } 6002 }
5935 6003
5936 /* Entry points for GNU code. */ 6004 /* Entry points for GNU code. */
5937 6005
5960 bufp->no_sub = 0; 6028 bufp->no_sub = 0;
5961 6029
5962 /* Match anchors at newline. */ 6030 /* Match anchors at newline. */
5963 bufp->newline_anchor = 1; 6031 bufp->newline_anchor = 1;
5964 6032
5965 ret = regex_compile (pattern, length, re_syntax_options, bufp); 6033 ret = regex_compile ((unsigned char *) pattern, length, re_syntax_options, bufp);
5966 6034
5967 if (!ret) 6035 if (!ret)
5968 return NULL; 6036 return NULL;
5969 return gettext (re_error_msgid[(int) ret]); 6037 return gettext (re_error_msgid[(int) ret]);
5970 } 6038 }
6005 don't need to initialize the pattern buffer fields which affect it. */ 6073 don't need to initialize the pattern buffer fields which affect it. */
6006 6074
6007 /* Match anchors at newlines. */ 6075 /* Match anchors at newlines. */
6008 re_comp_buf.newline_anchor = 1; 6076 re_comp_buf.newline_anchor = 1;
6009 6077
6010 ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf); 6078 ret = regex_compile ((unsigned char *)s, strlen (s), re_syntax_options, &re_comp_buf);
6011 6079
6012 if (!ret) 6080 if (!ret)
6013 return NULL; 6081 return NULL;
6014 6082
6015 /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */ 6083 /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */
6111 6179
6112 preg->no_sub = !!(cflags & REG_NOSUB); 6180 preg->no_sub = !!(cflags & REG_NOSUB);
6113 6181
6114 /* POSIX says a null character in the pattern terminates it, so we 6182 /* POSIX says a null character in the pattern terminates it, so we
6115 can use strlen here in compiling the pattern. */ 6183 can use strlen here in compiling the pattern. */
6116 ret = regex_compile (pattern, strlen (pattern), syntax, preg); 6184 ret = regex_compile ((unsigned char *) pattern, strlen (pattern), syntax, preg);
6117 6185
6118 /* POSIX doesn't distinguish between an unmatched open-group and an 6186 /* POSIX doesn't distinguish between an unmatched open-group and an
6119 unmatched close-group: both are REG_EPAREN. */ 6187 unmatched close-group: both are REG_EPAREN. */
6120 if (ret == REG_ERPAREN) ret = REG_EPAREN; 6188 if (ret == REG_ERPAREN) ret = REG_EPAREN;
6121 6189