Mercurial > hg > xemacs-beta
changeset 5041:efaa6cd845e5
add regexp-debugging
-------------------- ChangeLog entries follow: --------------------
src/ChangeLog addition:
2010-02-15 Ben Wing <ben@xemacs.org>
* regex.c:
* regex.c (DEBUG_FAIL_PRINT1):
* regex.c (PUSH_FAILURE_POINT):
* regex.c (POP_FAILURE_POINT):
* regex.c (regex_compile):
* regex.c (re_match_2_internal):
* regex.h:
* search.c:
* search.c (search_buffer):
* search.c (debug_regexps_changed):
* search.c (vars_of_search):
Add an internal variable debug_regexps and a corresponding Lisp
variable `debug-regexps' that takes a list of areas in which to
display debugging info about regex compilation and matching
(currently three areas exist). Use existing debugging code
already in regex.c and modify it so that it recognizes the
debug_regexps variable and the flags in it.
Rename variable `debug-xemacs-searches' to just `debug-searches',
consistent with other debug vars.
tests/ChangeLog addition:
2010-02-15 Ben Wing <ben@xemacs.org>
* automated/search-tests.el (let):
* automated/search-tests.el (boundp):
debug-xemacs-searches renamed to debug-searches.
author | Ben Wing <ben@xemacs.org> |
---|---|
date | Mon, 15 Feb 2010 21:51:22 -0600 |
parents | 22179cd0fe15 |
children | f395ee7ad844 |
files | src/ChangeLog src/regex.c src/regex.h src/search.c tests/ChangeLog tests/automated/search-tests.el |
diffstat | 6 files changed, 271 insertions(+), 135 deletions(-) [+] |
line wrap: on
line diff
--- a/src/ChangeLog Wed Feb 10 07:25:19 2010 -0600 +++ b/src/ChangeLog Mon Feb 15 21:51:22 2010 -0600 @@ -1,3 +1,26 @@ +2010-02-15 Ben Wing <ben@xemacs.org> + + * regex.c: + * regex.c (DEBUG_FAIL_PRINT1): + * regex.c (PUSH_FAILURE_POINT): + * regex.c (POP_FAILURE_POINT): + * regex.c (regex_compile): + * regex.c (re_match_2_internal): + * regex.h: + * search.c: + * search.c (search_buffer): + * search.c (debug_regexps_changed): + * search.c (vars_of_search): + Add an internal variable debug_regexps and a corresponding Lisp + variable `debug-regexps' that takes a list of areas in which to + display debugging info about regex compilation and matching + (currently three areas exist). Use existing debugging code + already in regex.c and modify it so that it recognizes the + debug_regexps variable and the flags in it. + + Rename variable `debug-xemacs-searches' to just `debug-searches', + consistent with other debug vars. + 2010-02-10 Ben Wing <ben@xemacs.org> * text.h:
--- a/src/regex.c Wed Feb 10 07:25:19 2010 -0600 +++ b/src/regex.c Mon Feb 15 21:51:22 2010 -0600 @@ -5,7 +5,7 @@ Copyright (C) 1993, 1994, 1995 Free Software Foundation, Inc. Copyright (C) 1995 Sun Microsystems, Inc. - Copyright (C) 1995, 2001, 2002, 2003 Ben Wing. + Copyright (C) 1995, 2001, 2002, 2003, 2010 Ben Wing. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -734,17 +734,48 @@ #include <assert.h> #endif -static int debug = 0; +extern int debug_regexps; #define DEBUG_STATEMENT(e) e -#define DEBUG_PRINT1(x) if (debug) printf (x) -#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2) -#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3) -#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4) + +#define DEBUG_PRINT1(x) if (debug_regexps) printf (x) +#define DEBUG_PRINT2(x1, x2) if (debug_regexps) printf (x1, x2) +#define DEBUG_PRINT3(x1, x2, x3) if (debug_regexps) printf (x1, x2, x3) +#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug_regexps) printf (x1, x2, x3, x4) #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \ - if (debug) print_partial_compiled_pattern (s, e) + if (debug_regexps) print_partial_compiled_pattern (s, e) #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \ - if (debug) print_double_string (w, s1, sz1, s2, sz2) + if (debug_regexps) print_double_string (w, s1, sz1, s2, sz2) + +#define DEBUG_FAIL_PRINT1(x) \ + if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x) +#define DEBUG_FAIL_PRINT2(x1, x2) \ + if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2) +#define DEBUG_FAIL_PRINT3(x1, x2, x3) \ + if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2, x3) +#define DEBUG_FAIL_PRINT4(x1, x2, x3, x4) \ + if (debug_regexps & RE_DEBUG_FAILURE_POINT) printf (x1, x2, x3, x4) +#define DEBUG_FAIL_PRINT_COMPILED_PATTERN(p, s, e) \ + if (debug_regexps & RE_DEBUG_FAILURE_POINT) \ + print_partial_compiled_pattern (s, e) +#define DEBUG_FAIL_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \ + if (debug_regexps & RE_DEBUG_FAILURE_POINT) \ + print_double_string (w, s1, sz1, s2, sz2) + +#define DEBUG_MATCH_PRINT1(x) \ + if (debug_regexps & RE_DEBUG_MATCHING) printf (x) +#define DEBUG_MATCH_PRINT2(x1, x2) \ + if (debug_regexps & RE_DEBUG_MATCHING) printf (x1, x2) +#define DEBUG_MATCH_PRINT3(x1, x2, x3) \ + if (debug_regexps & RE_DEBUG_MATCHING) printf (x1, x2, x3) +#define DEBUG_MATCH_PRINT4(x1, x2, x3, x4) \ + if (debug_regexps & RE_DEBUG_MATCHING) printf (x1, x2, x3, x4) +#define DEBUG_MATCH_PRINT_COMPILED_PATTERN(p, s, e) \ + if (debug_regexps & RE_DEBUG_MATCHING) \ + print_partial_compiled_pattern (s, e) +#define DEBUG_MATCH_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \ + if (debug_regexps & RE_DEBUG_MATCHING) \ + print_double_string (w, s1, sz1, s2, sz2) /* Print the fastmap in human-readable form. */ @@ -1133,6 +1164,7 @@ #endif #define DEBUG_STATEMENT(e) + #define DEBUG_PRINT1(x) #define DEBUG_PRINT2(x1, x2) #define DEBUG_PRINT3(x1, x2, x3) @@ -1140,6 +1172,20 @@ #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) +#define DEBUG_FAIL_PRINT1(x) +#define DEBUG_FAIL_PRINT2(x1, x2) +#define DEBUG_FAIL_PRINT3(x1, x2, x3) +#define DEBUG_FAIL_PRINT4(x1, x2, x3, x4) +#define DEBUG_FAIL_PRINT_COMPILED_PATTERN(p, s, e) +#define DEBUG_FAIL_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) + +#define DEBUG_MATCH_PRINT1(x) +#define DEBUG_MATCH_PRINT2(x1, x2) +#define DEBUG_MATCH_PRINT3(x1, x2, x3) +#define DEBUG_MATCH_PRINT4(x1, x2, x3, x4) +#define DEBUG_MATCH_PRINT_COMPILED_PATTERN(p, s, e) +#define DEBUG_MATCH_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) + #endif /* DEBUG */ /* Set by `re_set_syntax' to the current regexp syntax to recognize. Can @@ -1523,14 +1569,14 @@ \ DEBUG_STATEMENT (failure_id++); \ DEBUG_STATEMENT (nfailure_points_pushed++); \ - DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%d:\n", failure_id); \ - DEBUG_PRINT2 (" Before push, next avail: %ld\n", \ + DEBUG_FAIL_PRINT2 ("\nPUSH_FAILURE_POINT #%d:\n", failure_id); \ + DEBUG_FAIL_PRINT2 (" Before push, next avail: %ld\n", \ (long) (fail_stack).avail); \ - DEBUG_PRINT2 (" size: %ld\n", \ + DEBUG_FAIL_PRINT2 (" size: %ld\n", \ (long) (fail_stack).size); \ \ - DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \ - DEBUG_PRINT2 (" available: %ld\n", \ + DEBUG_FAIL_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \ + DEBUG_FAIL_PRINT2 (" available: %ld\n", \ (long) REMAINING_AVAIL_SLOTS); \ \ /* Ensure we have enough space allocated for what we will push. */ \ @@ -1544,59 +1590,59 @@ return failure_code; \ } \ END_REGEX_MALLOC_OK (); \ - DEBUG_PRINT2 ("\n Doubled stack; size now: %ld\n", \ + DEBUG_FAIL_PRINT2 ("\n Doubled stack; size now: %ld\n", \ (long) (fail_stack).size); \ - DEBUG_PRINT2 (" slots available: %ld\n", \ + DEBUG_FAIL_PRINT2 (" slots available: %ld\n", \ (long) REMAINING_AVAIL_SLOTS); \ \ RE_MATCH_RELOCATE_MOVEABLE_DATA_POINTERS (); \ } \ \ /* Push the info, starting with the registers. */ \ - DEBUG_PRINT1 ("\n"); \ + DEBUG_FAIL_PRINT1 ("\n"); \ \ for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \ this_reg++) \ { \ - DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \ + DEBUG_FAIL_PRINT2 (" Pushing reg: %d\n", this_reg); \ DEBUG_STATEMENT (num_regs_pushed++); \ \ - DEBUG_PRINT2 (" start: 0x%lx\n", (long) regstart[this_reg]); \ + DEBUG_FAIL_PRINT2 (" start: 0x%lx\n", (long) regstart[this_reg]); \ PUSH_FAILURE_POINTER (regstart[this_reg]); \ \ - DEBUG_PRINT2 (" end: 0x%lx\n", (long) regend[this_reg]); \ + DEBUG_FAIL_PRINT2 (" end: 0x%lx\n", (long) regend[this_reg]); \ PUSH_FAILURE_POINTER (regend[this_reg]); \ \ - DEBUG_PRINT2 (" info: 0x%lx\n ", \ + DEBUG_FAIL_PRINT2 (" info: 0x%lx\n ", \ * (long *) (®_info[this_reg])); \ - DEBUG_PRINT2 (" match_null=%d", \ + DEBUG_FAIL_PRINT2 (" match_null=%d", \ REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \ - DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \ - DEBUG_PRINT2 (" matched_something=%d", \ + DEBUG_FAIL_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \ + DEBUG_FAIL_PRINT2 (" matched_something=%d", \ MATCHED_SOMETHING (reg_info[this_reg])); \ - DEBUG_PRINT2 (" ever_matched_something=%d", \ + DEBUG_FAIL_PRINT2 (" ever_matched_something=%d", \ EVER_MATCHED_SOMETHING (reg_info[this_reg])); \ - DEBUG_PRINT1 ("\n"); \ + DEBUG_FAIL_PRINT1 ("\n"); \ PUSH_FAILURE_ELT (reg_info[this_reg].word); \ } \ \ - DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg); \ + DEBUG_FAIL_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg); \ PUSH_FAILURE_INT (lowest_active_reg); \ \ - DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg); \ + DEBUG_FAIL_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg); \ PUSH_FAILURE_INT (highest_active_reg); \ \ - DEBUG_PRINT2 (" Pushing pattern 0x%lx: \n", (long) pattern_place); \ - DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \ + DEBUG_FAIL_PRINT2 (" Pushing pattern 0x%lx: \n", (long) pattern_place); \ + DEBUG_FAIL_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \ PUSH_FAILURE_POINTER (pattern_place); \ \ - DEBUG_PRINT2 (" Pushing string 0x%lx: `", (long) string_place); \ - DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \ + DEBUG_FAIL_PRINT2 (" Pushing string 0x%lx: `", (long) string_place); \ + DEBUG_FAIL_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \ size2); \ - DEBUG_PRINT1 ("'\n"); \ + DEBUG_FAIL_PRINT1 ("'\n"); \ PUSH_FAILURE_POINTER (string_place); \ \ - DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \ + DEBUG_FAIL_PRINT2 (" Pushing failure id: %u\n", failure_id); \ DEBUG_PUSH (failure_id); \ } while (0) @@ -1648,16 +1694,16 @@ assert (!FAIL_STACK_EMPTY ()); \ \ /* Remove failure points and point to how many regs pushed. */ \ - DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \ - DEBUG_PRINT2 (" Before pop, next avail: %ld\n", \ + DEBUG_FAIL_PRINT1 ("POP_FAILURE_POINT:\n"); \ + DEBUG_FAIL_PRINT2 (" Before pop, next avail: %ld\n", \ (long) fail_stack.avail); \ - DEBUG_PRINT2 (" size: %ld\n", \ + DEBUG_FAIL_PRINT2 (" size: %ld\n", \ (long) fail_stack.size); \ \ assert (fail_stack.avail >= NUM_NONREG_ITEMS); \ \ DEBUG_POP (&ffailure_id.integer); \ - DEBUG_PRINT2 (" Popping failure id: %d\n", \ + DEBUG_FAIL_PRINT2 (" Popping failure id: %d\n", \ * (int *) &ffailure_id); \ \ /* If the saved string location is NULL, it came from an \ @@ -1667,34 +1713,34 @@ if (string_temp != NULL) \ str = string_temp; \ \ - DEBUG_PRINT2 (" Popping string 0x%lx: `", (long) str); \ - DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \ - DEBUG_PRINT1 ("'\n"); \ + DEBUG_FAIL_PRINT2 (" Popping string 0x%lx: `", (long) str); \ + DEBUG_FAIL_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \ + DEBUG_FAIL_PRINT1 ("'\n"); \ \ pat = (unsigned char *) POP_FAILURE_POINTER (); \ - DEBUG_PRINT2 (" Popping pattern 0x%lx: ", (long) pat); \ - DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ + DEBUG_FAIL_PRINT2 (" Popping pattern 0x%lx: ", (long) pat); \ + DEBUG_FAIL_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ \ /* Restore register info. */ \ high_reg = POP_FAILURE_INT (); \ - DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \ + DEBUG_FAIL_PRINT2 (" Popping high active reg: %d\n", high_reg); \ \ low_reg = POP_FAILURE_INT (); \ - DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \ + DEBUG_FAIL_PRINT2 (" Popping low active reg: %d\n", low_reg); \ \ for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ { \ - DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \ + DEBUG_FAIL_PRINT2 (" Popping reg: %d\n", this_reg); \ \ reg_info[this_reg].word = POP_FAILURE_ELT (); \ - DEBUG_PRINT2 (" info: 0x%lx\n", \ + DEBUG_FAIL_PRINT2 (" info: 0x%lx\n", \ * (long *) ®_info[this_reg]); \ \ regend[this_reg] = POP_FAILURE_POINTER (); \ - DEBUG_PRINT2 (" end: 0x%lx\n", (long) regend[this_reg]); \ + DEBUG_FAIL_PRINT2 (" end: 0x%lx\n", (long) regend[this_reg]); \ \ regstart[this_reg] = POP_FAILURE_POINTER (); \ - DEBUG_PRINT2 (" start: 0x%lx\n", (long) regstart[this_reg]); \ + DEBUG_FAIL_PRINT2 (" start: 0x%lx\n", (long) regstart[this_reg]); \ } \ \ set_regs_matched_done = 0; \ @@ -2157,11 +2203,11 @@ regnum_t regnum = 0; #ifdef DEBUG - DEBUG_PRINT1 ("\nCompiling pattern: "); - if (debug) + if (debug_regexps & RE_DEBUG_COMPILATION) { int debug_count; + DEBUG_PRINT1 ("\nCompiling pattern: "); for (debug_count = 0; debug_count < size; debug_count++) putchar (pattern[debug_count]); putchar ('\n'); @@ -3405,7 +3451,7 @@ bufp->used = buf_end - bufp->buffer; #ifdef DEBUG - if (debug) + if (debug_regexps & RE_DEBUG_COMPILATION) { DEBUG_PRINT1 ("\nCompiled pattern: \n"); print_compiled_pattern (bufp); @@ -4906,7 +4952,7 @@ #endif #endif /* emacs */ - DEBUG_PRINT1 ("\n\nEntering re_match_2.\n"); + DEBUG_MATCH_PRINT1 ("\n\nEntering re_match_2.\n"); BEGIN_REGEX_MALLOC_OK (); INIT_FAIL_STACK (); @@ -5024,18 +5070,18 @@ dend = end_match_2; } - DEBUG_PRINT1 ("The compiled pattern is: \n"); - DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend); - DEBUG_PRINT1 ("The string to match is: `"); - DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); - DEBUG_PRINT1 ("'\n"); + DEBUG_MATCH_PRINT1 ("The compiled pattern is: \n"); + DEBUG_MATCH_PRINT_COMPILED_PATTERN (bufp, p, pend); + DEBUG_MATCH_PRINT1 ("The string to match is: `"); + DEBUG_MATCH_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); + DEBUG_MATCH_PRINT1 ("'\n"); /* This loops over pattern commands. It exits by returning from the function if the match is complete, or it drops through if the match fails at this starting point in the input data. */ for (;;) { - DEBUG_PRINT2 ("\n0x%lx: ", (long) p); + DEBUG_MATCH_PRINT2 ("\n0x%lx: ", (long) p); #ifdef emacs /* XEmacs added, w/removal of immediate_quit */ if (!no_quit_in_re_search) { @@ -5048,7 +5094,7 @@ if (p == pend) { /* End of pattern means we might have succeeded. */ - DEBUG_PRINT1 ("end of pattern ... "); + DEBUG_MATCH_PRINT1 ("end of pattern ... "); /* If we haven't matched the entire string, and we want the longest match, try backtracking. */ @@ -5064,7 +5110,7 @@ else best_match_p = !MATCHING_IN_FIRST_STRING; - DEBUG_PRINT1 ("backtracking.\n"); + DEBUG_MATCH_PRINT1 ("backtracking.\n"); if (!FAIL_STACK_EMPTY ()) { /* More failure points to try. */ @@ -5075,7 +5121,7 @@ best_regs_set = true; match_end = d; - DEBUG_PRINT1 ("\nSAVING match as best so far.\n"); + DEBUG_MATCH_PRINT1 ("\nSAVING match as best so far.\n"); for (mcnt = 1; mcnt < num_regs; mcnt++) { @@ -5097,7 +5143,7 @@ For example, the pattern `x.*y.*z' against the strings `x-' and `y-z-', if the two strings are not consecutive in memory. */ - DEBUG_PRINT1 ("Restoring best registers.\n"); + DEBUG_MATCH_PRINT1 ("Restoring best registers.\n"); d = match_end; dend = ((d >= string1 && d <= end1) @@ -5112,7 +5158,7 @@ } /* d != end_match_2 */ succeed_label: - DEBUG_PRINT1 ("Accepting match.\n"); + DEBUG_MATCH_PRINT1 ("Accepting match.\n"); /* If caller wants register contents data back, do it. */ { @@ -5214,16 +5260,16 @@ for (mcnt = num_nonshy_regs; mcnt < regs->num_regs; mcnt++) regs->start[mcnt] = regs->end[mcnt] = -1; } - DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", + DEBUG_MATCH_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", nfailure_points_pushed, nfailure_points_popped, nfailure_points_pushed - nfailure_points_popped); - DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed); + DEBUG_MATCH_PRINT2 ("%u registers pushed.\n", num_regs_pushed); mcnt = d - pos - (MATCHING_IN_FIRST_STRING ? string1 : string2 - size1); - DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt); + DEBUG_MATCH_PRINT2 ("Returning %d from re_match_2.\n", mcnt); FREE_VARIABLES (); return mcnt; @@ -5235,11 +5281,11 @@ /* Ignore these. Used to ignore the n of succeed_n's which currently have n == 0. */ case no_op: - DEBUG_PRINT1 ("EXECUTING no_op.\n"); + DEBUG_MATCH_PRINT1 ("EXECUTING no_op.\n"); break; case succeed: - DEBUG_PRINT1 ("EXECUTING succeed.\n"); + DEBUG_MATCH_PRINT1 ("EXECUTING succeed.\n"); goto succeed_label; /* Match exactly a string of length n in the pattern. The @@ -5248,7 +5294,7 @@ the default internal format.) */ case exactn: mcnt = *p++; - DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt); + DEBUG_MATCH_PRINT2 ("EXECUTING exactn %d.\n", mcnt); /* This is written out as an if-else so we don't waste time testing `translate' inside the loop. */ @@ -5321,7 +5367,7 @@ /* Match any character except possibly a newline or a null. */ case anychar: - DEBUG_PRINT1 ("EXECUTING anychar.\n"); + DEBUG_MATCH_PRINT1 ("EXECUTING anychar.\n"); REGEX_PREFETCH (); @@ -5333,7 +5379,7 @@ goto fail; SET_REGS_MATCHED (); - DEBUG_PRINT2 (" Matched `%d'.\n", *d); + DEBUG_MATCH_PRINT2 (" Matched `%d'.\n", *d); INC_IBYTEPTR_FMT (d, fmt); /* XEmacs change */ break; @@ -5344,7 +5390,7 @@ REGISTER Ichar c; re_bool not_p = (re_opcode_t) *(p - 1) == charset_not; - DEBUG_PRINT2 ("EXECUTING charset%s.\n", not_p ? "_not" : ""); + DEBUG_MATCH_PRINT2 ("EXECUTING charset%s.\n", not_p ? "_not" : ""); REGEX_PREFETCH (); c = itext_ichar_fmt (d, fmt, lispobj); @@ -5372,7 +5418,7 @@ REGISTER Ichar c; re_bool not_p = (re_opcode_t) *(p - 1) == charset_mule_not; - DEBUG_PRINT2 ("EXECUTING charset_mule%s.\n", not_p ? "_not" : ""); + DEBUG_MATCH_PRINT2 ("EXECUTING charset_mule%s.\n", not_p ? "_not" : ""); REGEX_PREFETCH (); c = itext_ichar_fmt (d, fmt, lispobj); @@ -5398,7 +5444,7 @@ matched within the group is recorded (in the internal registers data structure) under the register number. */ case start_memory: - DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]); + DEBUG_MATCH_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]); /* Find out if this group can match the empty string. */ p1 = p; /* To send to group_match_null_string_p. */ @@ -5407,7 +5453,7 @@ REG_MATCH_NULL_STRING_P (reg_info[*p]) = group_match_null_string_p (&p1, pend, reg_info); - DEBUG_PRINT2 (" group CAN%s match null string\n", + DEBUG_MATCH_PRINT2 (" group CAN%s match null string\n", REG_MATCH_NULL_STRING_P (reg_info[*p]) ? "NOT" : ""); /* Save the position in the string where we were the last time @@ -5418,11 +5464,11 @@ old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) ? REG_UNSET (regstart[*p]) ? d : regstart[*p] : regstart[*p]; - DEBUG_PRINT2 (" old_regstart: %d\n", + DEBUG_MATCH_PRINT2 (" old_regstart: %d\n", POINTER_TO_OFFSET (old_regstart[*p])); regstart[*p] = d; - DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p])); + DEBUG_MATCH_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p])); IS_ACTIVE (reg_info[*p]) = 1; MATCHED_SOMETHING (reg_info[*p]) = 0; @@ -5449,7 +5495,7 @@ arguments are the same as start_memory's: the register number, and the number of inner groups. */ case stop_memory: - DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]); + DEBUG_MATCH_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]); /* We need to save the string position the last time we were at this close-group operator in case the group is operated @@ -5459,11 +5505,11 @@ old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) ? REG_UNSET (regend[*p]) ? d : regend[*p] : regend[*p]; - DEBUG_PRINT2 (" old_regend: %d\n", + DEBUG_MATCH_PRINT2 (" old_regend: %d\n", POINTER_TO_OFFSET (old_regend[*p])); regend[*p] = d; - DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p])); + DEBUG_MATCH_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p])); /* This register isn't active anymore. */ IS_ACTIVE (reg_info[*p]) = 0; @@ -5599,7 +5645,7 @@ REGISTER re_char *d2, *dend2; /* Get which register to match against. */ int regno = *p++; - DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno); + DEBUG_MATCH_PRINT2 ("EXECUTING duplicate %d.\n", regno); /* Can't back reference a group which we've never matched. */ if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) @@ -5666,7 +5712,7 @@ (unless `not_bol' is set in `bufp'), and, if `newline_anchor' is set, after newlines. */ case begline: - DEBUG_PRINT1 ("EXECUTING begline.\n"); + DEBUG_MATCH_PRINT1 ("EXECUTING begline.\n"); if (AT_STRINGS_BEG (d)) { @@ -5686,7 +5732,7 @@ /* endline is the dual of begline. */ case endline: - DEBUG_PRINT1 ("EXECUTING endline.\n"); + DEBUG_MATCH_PRINT1 ("EXECUTING endline.\n"); if (AT_STRINGS_END (d)) { @@ -5706,7 +5752,7 @@ /* Match at the very beginning of the data. */ case begbuf: - DEBUG_PRINT1 ("EXECUTING begbuf.\n"); + DEBUG_MATCH_PRINT1 ("EXECUTING begbuf.\n"); if (AT_STRINGS_BEG (d)) break; goto fail; @@ -5714,7 +5760,7 @@ /* Match at the very end of the data. */ case endbuf: - DEBUG_PRINT1 ("EXECUTING endbuf.\n"); + DEBUG_MATCH_PRINT1 ("EXECUTING endbuf.\n"); if (AT_STRINGS_END (d)) break; goto fail; @@ -5737,10 +5783,10 @@ `anychar's code to do something besides goto fail in this case; that seems worse than this. */ case on_failure_keep_string_jump: - DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump"); + DEBUG_MATCH_PRINT1 ("EXECUTING on_failure_keep_string_jump"); EXTRACT_NUMBER_AND_INCR (mcnt, p); - DEBUG_PRINT3 (" %d (to 0x%lx):\n", mcnt, (long) (p + mcnt)); + DEBUG_MATCH_PRINT3 (" %d (to 0x%lx):\n", mcnt, (long) (p + mcnt)); PUSH_FAILURE_POINT (p + mcnt, (unsigned char *) 0, -2); break; @@ -5760,10 +5806,10 @@ pop_failure_jump back to this on_failure_jump. */ case on_failure_jump: on_failure: - DEBUG_PRINT1 ("EXECUTING on_failure_jump"); + DEBUG_MATCH_PRINT1 ("EXECUTING on_failure_jump"); EXTRACT_NUMBER_AND_INCR (mcnt, p); - DEBUG_PRINT3 (" %d (to 0x%lx)", mcnt, (long) (p + mcnt)); + DEBUG_MATCH_PRINT3 (" %d (to 0x%lx)", mcnt, (long) (p + mcnt)); /* If this on_failure_jump comes right before a group (i.e., the original * applied to a group), save the information @@ -5794,7 +5840,7 @@ lowest_active_reg = *(p1 + 1); } - DEBUG_PRINT1 (":\n"); + DEBUG_MATCH_PRINT1 (":\n"); PUSH_FAILURE_POINT (p + mcnt, d, -2); break; @@ -5803,7 +5849,7 @@ We change it to either `pop_failure_jump' or `jump'. */ case maybe_pop_jump: EXTRACT_NUMBER_AND_INCR (mcnt, p); - DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt); + DEBUG_MATCH_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt); { REGISTER unsigned char *p2 = p; @@ -5849,7 +5895,7 @@ against ":/". I don't really understand this code yet. */ p[-3] = (unsigned char) pop_failure_jump; - DEBUG_PRINT1 + DEBUG_MATCH_PRINT1 (" End of pattern: change to `pop_failure_jump'.\n"); } @@ -5862,7 +5908,7 @@ if ((re_opcode_t) p1[3] == exactn && p1[5] != c) { p[-3] = (unsigned char) pop_failure_jump; - DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", + DEBUG_MATCH_PRINT3 (" %c != %c => pop_failure_jump.\n", c, p1[5]); } @@ -5880,7 +5926,7 @@ if (!not_p) { p[-3] = (unsigned char) pop_failure_jump; - DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); + DEBUG_MATCH_PRINT1 (" No match => pop_failure_jump.\n"); } } } @@ -5897,7 +5943,7 @@ & (1 << (p1[5] % BYTEWIDTH))))) { p[-3] = (unsigned char) pop_failure_jump; - DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", + DEBUG_MATCH_PRINT3 (" %c != %c => pop_failure_jump.\n", c, p1[5]); } @@ -5915,7 +5961,7 @@ if (idx == p2[1]) { p[-3] = (unsigned char) pop_failure_jump; - DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); + DEBUG_MATCH_PRINT1 (" No match => pop_failure_jump.\n"); } } else if ((re_opcode_t) p1[3] == charset) @@ -5932,7 +5978,7 @@ if (idx == p2[1] || idx == p1[4]) { p[-3] = (unsigned char) pop_failure_jump; - DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); + DEBUG_MATCH_PRINT1 (" No match => pop_failure_jump.\n"); } } } @@ -5941,7 +5987,7 @@ if ((re_opcode_t) p[-1] != pop_failure_jump) { p[-1] = (unsigned char) jump; - DEBUG_PRINT1 (" Match => jump.\n"); + DEBUG_MATCH_PRINT1 (" Match => jump.\n"); goto unconditional_jump; } /* Note fall through. */ @@ -5964,7 +6010,7 @@ unsigned char *pdummy; re_char *sdummy = NULL; - DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n"); + DEBUG_MATCH_PRINT1 ("EXECUTING pop_failure_jump.\n"); POP_FAILURE_POINT (sdummy, pdummy, dummy_low_reg, dummy_high_reg, reg_dummy, reg_dummy, reg_info_dummy); @@ -5976,16 +6022,16 @@ case jump: unconditional_jump: EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */ - DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt); + DEBUG_MATCH_PRINT2 ("EXECUTING jump %d ", mcnt); p += mcnt; /* Do the jump. */ - DEBUG_PRINT2 ("(to 0x%lx).\n", (long) p); + DEBUG_MATCH_PRINT2 ("(to 0x%lx).\n", (long) p); break; /* We need this opcode so we can detect where alternatives end in `group_match_null_string_p' et al. */ case jump_past_alt: - DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n"); + DEBUG_MATCH_PRINT1 ("EXECUTING jump_past_alt.\n"); goto unconditional_jump; @@ -5995,7 +6041,7 @@ are skipping over the on_failure_jump, so we have to push something meaningless for pop_failure_jump to pop. */ case dummy_failure_jump: - DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n"); + DEBUG_MATCH_PRINT1 ("EXECUTING dummy_failure_jump.\n"); /* It doesn't matter what we push for the string here. What the code at `fail' tests is the value for the pattern. */ PUSH_FAILURE_POINT ((unsigned char *) 0, (unsigned char *) 0, -2); @@ -6008,7 +6054,7 @@ popped. For example, matching `(a|ab)*' against `aab' requires that we match the `ab' alternative. */ case push_dummy_failure: - DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n"); + DEBUG_MATCH_PRINT1 ("EXECUTING push_dummy_failure.\n"); /* See comments just above at `dummy_failure_jump' about the two zeroes. */ PUSH_FAILURE_POINT ((unsigned char *) 0, (unsigned char *) 0, -2); @@ -6018,7 +6064,7 @@ After that, handle like `on_failure_jump'. */ case succeed_n: EXTRACT_NUMBER (mcnt, p + 2); - DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt); + DEBUG_MATCH_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt); assert (mcnt >= 0); /* Originally, this is how many times we HAVE to succeed. */ @@ -6027,11 +6073,11 @@ mcnt--; p += 2; STORE_NUMBER_AND_INCR (p, mcnt); - DEBUG_PRINT3 (" Setting 0x%lx to %d.\n", (long) p, mcnt); + DEBUG_MATCH_PRINT3 (" Setting 0x%lx to %d.\n", (long) p, mcnt); } else if (mcnt == 0) { - DEBUG_PRINT2 (" Setting two bytes from 0x%lx to no_op.\n", + DEBUG_MATCH_PRINT2 (" Setting two bytes from 0x%lx to no_op.\n", (long) (p+2)); p[2] = (unsigned char) no_op; p[3] = (unsigned char) no_op; @@ -6041,7 +6087,7 @@ case jump_n: EXTRACT_NUMBER (mcnt, p + 2); - DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt); + DEBUG_MATCH_PRINT2 ("EXECUTING jump_n %d.\n", mcnt); /* Originally, this is how many times we CAN jump. */ if (mcnt) @@ -6057,18 +6103,18 @@ case set_number_at: { - DEBUG_PRINT1 ("EXECUTING set_number_at.\n"); + DEBUG_MATCH_PRINT1 ("EXECUTING set_number_at.\n"); EXTRACT_NUMBER_AND_INCR (mcnt, p); p1 = p + mcnt; EXTRACT_NUMBER_AND_INCR (mcnt, p); - DEBUG_PRINT3 (" Setting 0x%lx to %d.\n", (long) p1, mcnt); + DEBUG_MATCH_PRINT3 (" Setting 0x%lx to %d.\n", (long) p1, mcnt); STORE_NUMBER (p1, mcnt); break; } case wordbound: - DEBUG_PRINT1 ("EXECUTING wordbound.\n"); + DEBUG_MATCH_PRINT1 ("EXECUTING wordbound.\n"); should_succeed = 1; matchwordbound: { @@ -6138,12 +6184,12 @@ } case notwordbound: - DEBUG_PRINT1 ("EXECUTING notwordbound.\n"); + DEBUG_MATCH_PRINT1 ("EXECUTING notwordbound.\n"); should_succeed = 0; goto matchwordbound; case wordbeg: - DEBUG_PRINT1 ("EXECUTING wordbeg.\n"); + DEBUG_MATCH_PRINT1 ("EXECUTING wordbeg.\n"); if (AT_STRINGS_END (d)) goto fail; { @@ -6186,7 +6232,7 @@ } case wordend: - DEBUG_PRINT1 ("EXECUTING wordend.\n"); + DEBUG_MATCH_PRINT1 ("EXECUTING wordend.\n"); if (AT_STRINGS_BEG (d)) goto fail; { @@ -6235,7 +6281,7 @@ #ifdef emacs case before_dot: - DEBUG_PRINT1 ("EXECUTING before_dot.\n"); + DEBUG_MATCH_PRINT1 ("EXECUTING before_dot.\n"); if (!BUFFERP (lispobj) || (BUF_PTR_BYTE_POS (XBUFFER (lispobj), (unsigned char *) d) >= BUF_PT (XBUFFER (lispobj)))) @@ -6243,7 +6289,7 @@ break; case at_dot: - DEBUG_PRINT1 ("EXECUTING at_dot.\n"); + DEBUG_MATCH_PRINT1 ("EXECUTING at_dot.\n"); if (!BUFFERP (lispobj) || (BUF_PTR_BYTE_POS (XBUFFER (lispobj), (unsigned char *) d) != BUF_PT (XBUFFER (lispobj)))) @@ -6251,7 +6297,7 @@ break; case after_dot: - DEBUG_PRINT1 ("EXECUTING after_dot.\n"); + DEBUG_MATCH_PRINT1 ("EXECUTING after_dot.\n"); if (!BUFFERP (lispobj) || (BUF_PTR_BYTE_POS (XBUFFER (lispobj), (unsigned char *) d) <= BUF_PT (XBUFFER (lispobj)))) @@ -6259,12 +6305,12 @@ break; case syntaxspec: - DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt); + DEBUG_MATCH_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt); mcnt = *p++; goto matchsyntax; case wordchar: - DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n"); + DEBUG_MATCH_PRINT1 ("EXECUTING Emacs wordchar.\n"); mcnt = (int) Sword; matchsyntax: should_succeed = 1; @@ -6294,12 +6340,12 @@ break; case notsyntaxspec: - DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt); + DEBUG_MATCH_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt); mcnt = *p++; goto matchnotsyntax; case notwordchar: - DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n"); + DEBUG_MATCH_PRINT1 ("EXECUTING Emacs notwordchar.\n"); mcnt = (int) Sword; matchnotsyntax: should_succeed = 0; @@ -6331,7 +6377,7 @@ #endif /* MULE */ #else /* not emacs */ case wordchar: - DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n"); + DEBUG_MATCH_PRINT1 ("EXECUTING non-Emacs wordchar.\n"); REGEX_PREFETCH (); if (!WORDCHAR_P ((int) (*d))) goto fail; @@ -6340,7 +6386,7 @@ break; case notwordchar: - DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n"); + DEBUG_MATCH_PRINT1 ("EXECUTING non-Emacs notwordchar.\n"); REGEX_PREFETCH (); if (!WORDCHAR_P ((int) (*d))) goto fail; @@ -6359,7 +6405,7 @@ fail: if (!FAIL_STACK_EMPTY ()) { /* A restart point is known. Restore to that state. */ - DEBUG_PRINT1 ("\nFAIL:\n"); + DEBUG_MATCH_PRINT1 ("\nFAIL:\n"); POP_FAILURE_POINT (d, p, lowest_active_reg, highest_active_reg, regstart, regend, reg_info);
--- a/src/regex.h Wed Feb 10 07:25:19 2010 -0600 +++ b/src/regex.h Mon Feb 15 21:51:22 2010 -0600 @@ -2,7 +2,7 @@ expression library, version 0.12. Copyright (C) 1985, 89, 90, 91, 92, 93, 95 Free Software Foundation, Inc. - Copyright (C) 2002 Ben Wing. + Copyright (C) 2002, 2010 Ben Wing. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -526,4 +526,13 @@ size_t errbuf_size); void regfree (regex_t *preg); +enum regex_debug + { + RE_DEBUG_COMPILATION = 1 << 0, + RE_DEBUG_FAILURE_POINT = 1 << 1, + RE_DEBUG_MATCHING = 1 << 2, + }; + +extern int debug_regexps; + #endif /* INCLUDED_regex_h_ */
--- a/src/search.c Wed Feb 10 07:25:19 2010 -0600 +++ b/src/search.c Mon Feb 15 21:51:22 2010 -0600 @@ -1,7 +1,7 @@ /* String search routines for XEmacs. Copyright (C) 1985, 1986, 1987, 1992-1995 Free Software Foundation, Inc. Copyright (C) 1995 Sun Microsystems, Inc. - Copyright (C) 2001, 2002 Ben Wing. + Copyright (C) 2001, 2002, 2010 Ben Wing. This file is part of XEmacs. @@ -50,10 +50,17 @@ #ifdef DEBUG_XEMACS /* Used in tests/automated/case-tests.el if available. */ -Fixnum debug_xemacs_searches; +Fixnum debug_searches; + +/* Declare as int rather than Bitflags because it's used by regex.c, which + may be used outside of XEmacs (e.g. etags.c). */ +int debug_regexps; +Lisp_Object Vdebug_regexps; Lisp_Object Qsearch_algorithm_used, Qboyer_moore, Qsimple_search; +Lisp_Object Qcompilation, Qfailure_point, Qmatching; + #endif /* If the regexp is non-nil, then the buffer contains the compiled form @@ -1461,7 +1468,7 @@ if (!checked) { #ifdef DEBUG_XEMACS - if (debug_xemacs_searches) + if (debug_searches) { Lisp_Symbol *sym = XSYMBOL (Qsearch_algorithm_used); sym->value = Qnil; @@ -1527,7 +1534,7 @@ pat = base_pat = patbuf; #ifdef DEBUG_XEMACS - if (debug_xemacs_searches) + if (debug_searches) { Lisp_Symbol *sym = XSYMBOL (Qsearch_algorithm_used); sym->value = boyer_moore_ok ? Qboyer_moore : Qsimple_search; @@ -3333,6 +3340,35 @@ } +#ifdef DEBUG_XEMACS + +static int +debug_regexps_changed (Lisp_Object UNUSED (sym), Lisp_Object *val, + Lisp_Object UNUSED (in_object), + int UNUSED (flags)) +{ + int newval = 0; + + EXTERNAL_LIST_LOOP_2 (elt, *val) + { + CHECK_SYMBOL (elt); + if (EQ (elt, Qcompilation)) + newval |= RE_DEBUG_COMPILATION; + else if (EQ (elt, Qfailure_point)) + newval |= RE_DEBUG_FAILURE_POINT; + else if (EQ (elt, Qmatching)) + newval |= RE_DEBUG_MATCHING; + else + invalid_argument + ("Expected `compilation', `failure-point' or `matching'", elt); + } + debug_regexps = newval; + return 0; +} + +#endif /* DEBUG_XEMACS */ + + /************************************************************************/ /* initialization */ /************************************************************************/ @@ -3421,10 +3457,26 @@ DEFSYMBOL (Qboyer_moore); DEFSYMBOL (Qsimple_search); - DEFVAR_INT ("debug-xemacs-searches", &debug_xemacs_searches /* + DEFSYMBOL (Qcompilation); + DEFSYMBOL (Qfailure_point); + DEFSYMBOL (Qmatching); + + DEFVAR_INT ("debug-searches", &debug_searches /* If non-zero, bind `search-algorithm-used' to `boyer-moore' or `simple-search', depending on the algorithm used for each search. Used for testing. */ ); - debug_xemacs_searches = 0; -#endif + debug_searches = 0; + + DEFVAR_LISP_MAGIC ("debug-regexps", &Vdebug_regexps, /* +List of areas to display debug info about during regexp operation. +The following areas are recognized: + +`compilation' Display the result of compiling a regexp. +`failure-point' Display info about failure points reached. +`matching' Display info about the process of matching a regex against + text. +*/ debug_regexps_changed); + Vdebug_regexps = Qnil; + debug_regexps = 0; +#endif /* DEBUG_XEMACS */ }
--- a/tests/ChangeLog Wed Feb 10 07:25:19 2010 -0600 +++ b/tests/ChangeLog Mon Feb 15 21:51:22 2010 -0600 @@ -1,3 +1,9 @@ +2010-02-15 Ben Wing <ben@xemacs.org> + + * automated/search-tests.el (let): + * automated/search-tests.el (boundp): + debug-xemacs-searches renamed to debug-searches. + 2010-02-05 Jerry James <james@xemacs.org> * DLL/dltest.c: Remove old test. Building and using any module now
--- a/tests/automated/search-tests.el Wed Feb 10 07:25:19 2010 -0600 +++ b/tests/automated/search-tests.el Mon Feb 15 21:51:22 2010 -0600 @@ -166,7 +166,7 @@ (with-temp-buffer (let ((target "M\xe9zard") - (debug-xemacs-searches 1)) + (debug-searches 1)) (Assert (not (search-forward target nil t))) (insert target) (goto-char (point-min)) @@ -180,10 +180,10 @@ (Assert= (1+ (length target)) (search-forward target nil t)))) (Skip-Test-Unless - (boundp 'debug-xemacs-searches) ; normal when we have DEBUG_XEMACS + (boundp 'debug-searches) ; normal when we have DEBUG_XEMACS "not a DEBUG_XEMACS build" "checks that the algorithm chosen by #'search-forward is relatively sane" - (let ((debug-xemacs-searches 1) + (let ((debug-searches 1) newcase) (with-temp-buffer (insert "\n\nDer beruehmte deutsche Fleiss\n\n")