Mercurial > hg > xemacs-beta
comparison src/syntax.h @ 70:131b0175ea99 r20-0b30
Import from CVS: tag r20-0b30
author | cvs |
---|---|
date | Mon, 13 Aug 2007 09:02:59 +0200 |
parents | ac2d302a0011 |
children | c5d627a313b1 |
comparison
equal
deleted
inserted
replaced
69:804d1389bcd6 | 70:131b0175ea99 |
---|---|
21 /* Synched up with: FSF 19.28. */ | 21 /* Synched up with: FSF 19.28. */ |
22 | 22 |
23 #ifndef _XEMACS_SYNTAX_H_ | 23 #ifndef _XEMACS_SYNTAX_H_ |
24 #define _XEMACS_SYNTAX_H_ | 24 #define _XEMACS_SYNTAX_H_ |
25 | 25 |
26 #include "chartab.h" | |
27 | |
26 /* The standard syntax table is stored where it will automatically | 28 /* The standard syntax table is stored where it will automatically |
27 be used in all new buffers. */ | 29 be used in all new buffers. */ |
28 extern Lisp_Object Vstandard_syntax_table; | 30 extern Lisp_Object Vstandard_syntax_table; |
29 | 31 |
30 /* A syntax table is a Lisp vector of length 0400, whose elements are integers. | 32 /* A syntax table is a type of char table. |
31 | 33 |
32 The low 7 bits of the integer is a code, as follows. The 8th bit is | 34 The low 7 bits of the integer is a code, as follows. The 8th bit is |
33 used as the prefix bit flag (see below). | 35 used as the prefix bit flag (see below). |
36 | |
37 The values in a syntax table are either integers or conses of | |
38 integers and chars. The lowest 7 bits of the integer are the syntax | |
39 class. If this is Sinherit, then the actual syntax value needs to | |
40 be retrieved from the standard syntax table. | |
41 | |
42 Since the logic involved in finding the actual integer isn't very | |
43 complex, you'd think the time required to retrieve it is not a | |
44 factor. If you thought that, however, you'd be wrong, due to the | |
45 high number of times (many per character) that the syntax value is | |
46 accessed in functions such as scan_lists(). To speed this up, | |
47 we maintain a mirror syntax table that contains the actual | |
48 integers. We can do this successfully because syntax tables are | |
49 now an abstract type, where we control all access. | |
34 */ | 50 */ |
35 | 51 |
36 enum syntaxcode | 52 enum syntaxcode |
37 { | 53 { |
38 Swhitespace, /* whitespace character */ | 54 Swhitespace, /* whitespace character */ |
47 Sescape, /* a character that begins a C-style escape */ | 63 Sescape, /* a character that begins a C-style escape */ |
48 Scharquote, /* a character that quotes the following character */ | 64 Scharquote, /* a character that quotes the following character */ |
49 Scomment, /* a comment-starting character */ | 65 Scomment, /* a comment-starting character */ |
50 Sendcomment, /* a comment-ending character */ | 66 Sendcomment, /* a comment-ending character */ |
51 Sinherit, /* use the standard syntax table for this character */ | 67 Sinherit, /* use the standard syntax table for this character */ |
52 Sextword, /* extended word; works mostly like a word constituent. | |
53 See the comment in syntax.c. */ | |
54 Smax /* Upper bound on codes that are meaningful */ | 68 Smax /* Upper bound on codes that are meaningful */ |
55 }; | 69 }; |
56 | 70 |
57 extern Lisp_Object Qsyntax_table_p; | 71 extern Lisp_Object Qsyntax_table_p; |
58 Lisp_Object Fsyntax_table_p (Lisp_Object); | 72 Lisp_Object Fsyntax_table_p (Lisp_Object); |
59 Lisp_Object Fsyntax_table (Lisp_Object); | 73 Lisp_Object Fsyntax_table (Lisp_Object); |
60 Lisp_Object Fset_syntax_table (Lisp_Object, Lisp_Object); | 74 Lisp_Object Fset_syntax_table (Lisp_Object, Lisp_Object); |
61 | 75 enum syntaxcode charset_syntax (struct buffer *buf, Lisp_Object charset, |
62 /* Return the raw syntax code for a particular character and table */ | 76 int *multi_p_out); |
63 #define RAW_SYNTAX_CODE_UNSAFE(table, c) \ | 77 |
64 (XINT (vector_data (XVECTOR (table))[(unsigned char) (c)])) | 78 /* Return the syntax code for a particular character and mirror table. */ |
65 | 79 |
66 /* Return the syntax code for a particular character and table, taking | 80 #define SYNTAX_CODE_UNSAFE(table, c) \ |
67 into account inheritance. */ | 81 XINT (CHAR_TABLE_VALUE_UNSAFE (table, c)) |
68 | 82 |
69 /* Unfortunately, we cannot write SYNTAX_CODE() as a safe macro in | 83 INLINE int SYNTAX_CODE (struct Lisp_Char_Table *table, Emchar c); |
70 general. I tried just using an inline function but that causes | |
71 significant slowdown (esp. in regex routines) because this macro | |
72 is called so many millions of times. So instead we resort to | |
73 SYNTAX_CODE_UNSAFE(), which is used most of the time. Under | |
74 GCC we can actually write this as a safe macro, and we do because | |
75 it's likely to lead to speedups. */ | |
76 | |
77 #ifdef __GNUC__ | |
78 #define SYNTAX_CODE_UNSAFE(table, c) \ | |
79 ({ Emchar _ch_ = (c); \ | |
80 int _rawcode_ = RAW_SYNTAX_CODE_UNSAFE (table, _ch_); \ | |
81 if ((enum syntaxcode) (_rawcode_ & 0177) == Sinherit) \ | |
82 _rawcode_ = RAW_SYNTAX_CODE_UNSAFE (Vstandard_syntax_table, _ch_); \ | |
83 _rawcode_; }) | |
84 #else | |
85 #define SYNTAX_CODE_UNSAFE(table, c) \ | |
86 (RAW_SYNTAX_CODE_UNSAFE (table, c) == Sinherit \ | |
87 ? RAW_SYNTAX_CODE_UNSAFE (Vstandard_syntax_table, c) \ | |
88 : RAW_SYNTAX_CODE_UNSAFE (table, c)) | |
89 #endif | |
90 | |
91 INLINE int SYNTAX_CODE (Lisp_Object table, Emchar c); | |
92 INLINE int | 84 INLINE int |
93 SYNTAX_CODE (Lisp_Object table, Emchar c) | 85 SYNTAX_CODE (struct Lisp_Char_Table *table, Emchar c) |
94 { | 86 { |
95 return SYNTAX_CODE_UNSAFE (table, c); | 87 return SYNTAX_CODE_UNSAFE (table, c); |
96 } | 88 } |
97 | 89 |
98 #define SYNTAX_UNSAFE(table, c) \ | 90 #define SYNTAX_UNSAFE(table, c) \ |
99 ((enum syntaxcode) (SYNTAX_CODE_UNSAFE (table, c) & 0177)) | 91 ((enum syntaxcode) (SYNTAX_CODE_UNSAFE (table, c) & 0177)) |
100 | 92 |
101 #define SYNTAX_FROM_CODE(code) ((enum syntaxcode) ((code) & 0177)) | 93 #define SYNTAX_FROM_CODE(code) ((enum syntaxcode) ((code) & 0177)) |
102 #define SYNTAX(table, c) SYNTAX_FROM_CODE (SYNTAX_CODE (table, c)) | 94 #define SYNTAX(table, c) SYNTAX_FROM_CODE (SYNTAX_CODE (table, c)) |
103 | 95 |
104 INLINE int WORD_SYNTAX_P (Lisp_Object table, Emchar c); | 96 INLINE int WORD_SYNTAX_P (struct Lisp_Char_Table *table, Emchar c); |
105 INLINE int | 97 INLINE int |
106 WORD_SYNTAX_P (Lisp_Object table, Emchar c) | 98 WORD_SYNTAX_P (struct Lisp_Char_Table *table, Emchar c) |
107 { | 99 { |
108 int syncode = SYNTAX (table, c); | 100 int syncode = SYNTAX (table, c); |
109 return syncode == Sword || syncode == Sextword; | 101 return syncode == Sword; |
110 } | 102 } |
103 | |
104 /* OK, here's a graphic diagram of the format of the syntax values: | |
105 | |
106 Bit number: | |
107 | |
108 [ 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 ] | |
109 [ 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 ] | |
110 | |
111 <-----> <-----> <-------------> <-------------> ^ <-----------> | |
112 ELisp unused |comment bits | unused | syntax code | |
113 tag | | | | | | | | | | |
114 stuff | | | | | | | | | | |
115 | | | | | | | | | | |
116 | | | | | | | | `--> prefix flag | |
117 | | | | | | | | | |
118 | | | | | | | `--> comment end style B, second char | |
119 | | | | | | `----> comment end style A, second char | |
120 | | | | | `------> comment end style B, first char | |
121 | | | | `--------> comment end style A, first char | |
122 | | | `----------> comment start style B, second char | |
123 | | `------------> comment start style A, second char | |
124 | `--------------> comment start style B, first char | |
125 `----------------> comment start style A, first char | |
126 | |
127 In a 64-bit integer, there would be 32 more unused bits between | |
128 the tag and the comment bits. | |
129 | |
130 Clearly, such a scheme will not work for Mule, because the matching | |
131 paren could be any character and as such requires 19 bits, which | |
132 we don't got. | |
133 | |
134 Remember that under Mule we use char tables instead of vectors. | |
135 So what we do is use another char table for the matching paren | |
136 and store a pointer to it in the first char table. (This frees | |
137 code from having to worry about passing two tables around.) | |
138 */ | |
139 | |
111 | 140 |
112 /* The prefix flag bit for backward-prefix-chars is now put into bit 7. */ | 141 /* The prefix flag bit for backward-prefix-chars is now put into bit 7. */ |
113 | 142 |
114 #define SYNTAX_PREFIX_UNSAFE(table, c) \ | 143 #define SYNTAX_PREFIX_UNSAFE(table, c) \ |
115 ((SYNTAX_CODE_UNSAFE (table, c) >> 7) & 1) | 144 ((SYNTAX_CODE_UNSAFE (table, c) >> 7) & 1) |
116 #define SYNTAX_PREFIX(table, c) \ | 145 #define SYNTAX_PREFIX(table, c) \ |
117 ((SYNTAX_CODE (table, c) >> 7) & 1) | 146 ((SYNTAX_CODE (table, c) >> 7) & 1) |
118 | 147 |
119 /* The next 8 bits of the number is a character, | 148 /* Bits 23-16 are used to implement up to two comment styles |
120 the matching delimiter in the case of Sopen or Sclose. */ | |
121 | |
122 #define SYNTAX_MATCH(table, c) \ | |
123 ((SYNTAX_CODE (table, c) >> 8) & 0377) | |
124 | |
125 /* The next 8 bits are used to implement up to two comment styles | |
126 in a single buffer. They have the following meanings: | 149 in a single buffer. They have the following meanings: |
127 | 150 |
128 1. first of a one or two character comment-start sequence of style a. | 151 1. first of a one or two character comment-start sequence of style a. |
129 2. first of a one or two character comment-start sequence of style b. | 152 2. first of a one or two character comment-start sequence of style b. |
130 3. second of a two-character comment-start sequence of style a. | 153 3. second of a two-character comment-start sequence of style a. |
226 Lisp_Object syntax_match (Lisp_Object table, Emchar ch); | 249 Lisp_Object syntax_match (Lisp_Object table, Emchar ch); |
227 | 250 |
228 extern int no_quit_in_re_search; | 251 extern int no_quit_in_re_search; |
229 extern struct buffer *regex_emacs_buffer; | 252 extern struct buffer *regex_emacs_buffer; |
230 | 253 |
254 void update_syntax_table (struct Lisp_Char_Table *ct); | |
255 | |
231 #endif /* _XEMACS_SYNTAX_H_ */ | 256 #endif /* _XEMACS_SYNTAX_H_ */ |