Mercurial > hg > xemacs-beta
comparison lib-src/make-msgfile.lex @ 70:131b0175ea99 r20-0b30
Import from CVS: tag r20-0b30
author | cvs |
---|---|
date | Mon, 13 Aug 2007 09:02:59 +0200 |
parents | |
children | 8626e4521993 |
comparison
equal
deleted
inserted
replaced
69:804d1389bcd6 | 70:131b0175ea99 |
---|---|
1 %{ | |
2 | |
3 /* This is a Lex file. */ | |
4 | |
5 /* Localizable-message snarfing. | |
6 Copyright (C) 1994, 1995 Amdahl Corporation. | |
7 | |
8 This file is part of XEmacs. | |
9 | |
10 XEmacs is free software; you can redistribute it and/or modify | |
11 it under the terms of the GNU General Public License as published by | |
12 the Free Software Foundation; either version 2, or (at your option) | |
13 any later version. | |
14 | |
15 XEmacs is distributed in the hope that it will be useful, | |
16 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 GNU General Public License for more details. | |
19 | |
20 You should have received a copy of the GNU General Public License | |
21 along with XEmacs; see the file COPYING. If not, write to | |
22 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
23 Boston, MA 02111-1307, USA. */ | |
24 | |
25 /* Written by Ben Wing, November 1994. Some code based on earlier | |
26 make-msgfile.c. */ | |
27 | |
28 /* Note: there is still much work to be done on this. | |
29 | |
30 1) Definition of Arg below won't handle a generalized argument | |
31 as might appear in a function call. This is fine for DEFUN | |
32 and friends, because only simple arguments appear there; but | |
33 it might run into problems if Arg is used for other sorts | |
34 of functions. | |
35 2) snarf() should be modified so that it doesn't output null | |
36 strings and non-textual strings (see the comment at the top | |
37 of make-msgfile.c). | |
38 3) parsing of (insert) should snarf all of the arguments. | |
39 4) need to add set-keymap-prompt and deal with gettext of that. | |
40 5) parsing of arguments should snarf all strings anywhere within | |
41 the arguments, rather than just looking for a string as the | |
42 argument. This allows if statements as arguments to get parsed. | |
43 6) begin_paren_counting() et al. should handle recursive entry. | |
44 7) handle set-window-buffer and other such functions that take | |
45 a buffer as the other-than-first argument. | |
46 8) there is a fair amount of work to be done on the C code. | |
47 Look through the code for #### comments associated with | |
48 '#ifdef I18N3' or with an I18N3 nearby. | |
49 9) Deal with `get-buffer-process' et al. | |
50 10) Many of the changes in the Lisp code marked | |
51 'rewritten for I18N3 snarfing' should be undone once (5) is | |
52 implemented. | |
53 11) Go through the Lisp code in prim and make sure that all | |
54 strings are gettexted as necessary. This may reveal more | |
55 things to implement. | |
56 12) Do the equivalent of (8) for the Lisp code. | |
57 13) Deal with parsing of menu specifications. | |
58 | |
59 --ben | |
60 | |
61 */ | |
62 | |
63 /* Long comment from jwz: | |
64 | |
65 (much of this comment is outdated, and a lot of it is actually | |
66 implemented) | |
67 | |
68 | |
69 PROPOSAL FOR HOW THIS ALL OUGHT TO WORK | |
70 this isn't implemented yet, but this is the plan-in-progress | |
71 | |
72 | |
73 In general, it's accepted that the best way to internationalize is for all | |
74 messages to be referred to by a symbolic name (or number) and come out of a | |
75 table or tables, which are easy to change. | |
76 | |
77 However, with Emacs, we've got the task of internationalizing a huge body | |
78 of existing code, which already contains messages internally. | |
79 | |
80 For the C code we've got two options: | |
81 | |
82 - Use a Sun-like gettext() form, which takes an "english" string which | |
83 appears literally in the source, and uses that as a hash key to find | |
84 a translated string; | |
85 - Rip all of the strings out and put them in a table. | |
86 | |
87 In this case, it's desirable to make as few changes as possible to the C | |
88 code, to make it easier to merge the code with the FSF version of emacs | |
89 which won't ever have these changes made to it. So we should go with the | |
90 former option. | |
91 | |
92 The way it has been done (between 19.8 and 19.9) was to use gettext(), but | |
93 *also* to make massive changes to the source code. The goal now is to use | |
94 gettext() at run-time and yet not require a textual change to every line | |
95 in the C code which contains a string constant. A possible way to do this | |
96 is described below. | |
97 | |
98 (gettext() can be implemented in terms of catgets() for non-Sun systems, so | |
99 that in itself isn't a problem.) | |
100 | |
101 For the Lisp code, we've got basically the same options: put everything in | |
102 a table, or translate things implicitly. | |
103 | |
104 Another kink that lisp code introduces is that there are thousands of third- | |
105 party packages, so changing the source for all of those is simply not an | |
106 option. | |
107 | |
108 Is it a goal that if some third party package displays a message which is | |
109 one we know how to translate, then we translate it? I think this is a | |
110 worthy goal. It remains to be seen how well it will work in practice. | |
111 | |
112 So, we should endeavor to minimize the impact on the lisp code. Certain | |
113 primitive lisp routines (the stuff in lisp/prim/, and especially in | |
114 cmdloop.el and minibuf.el) may need to be changed to know about translation, | |
115 but that's an ideologically clean thing to do because those are considered | |
116 a part of the emacs substrate. | |
117 | |
118 However, if we find ourselves wanting to make changes to, say, RMAIL, then | |
119 something has gone wrong. (Except to do things like remove assumptions | |
120 about the order of words within a sentence, or how pluralization works.) | |
121 | |
122 There are two parts to the task of displaying translated strings to the | |
123 user: the first is to extract the strings which need to be translated from | |
124 the sources; and the second is to make some call which will translate those | |
125 strings before they are presented to the user. | |
126 | |
127 The old way was to use the same form to do both, that is, GETTEXT() was both | |
128 the tag that we searched for to build a catalog, and was the form which did | |
129 the translation. The new plan is to separate these two things more: the | |
130 tags that we search for to build the catalog will be stuff that was in there | |
131 already, and the translation will get done in some more centralized, lower | |
132 level place. | |
133 | |
134 This program (make-msgfile.c) addresses the first part, extracting the | |
135 strings. | |
136 | |
137 For the emacs C code, we need to recognise the following patterns: | |
138 | |
139 message ("string" ... ) | |
140 error ("string") | |
141 report_file_error ("string" ... ) | |
142 signal_simple_error ("string" ... ) | |
143 signal_simple_error_2 ("string" ... ) | |
144 | |
145 build_translated_string ("string") | |
146 #### add this and use it instead of build_string() in some places. | |
147 | |
148 yes_or_no_p ("string" ... ) | |
149 #### add this instead of funcalling Qyes_or_no_p directly. | |
150 | |
151 barf_or_query_if_file_exists #### restructure this | |
152 check all callers of Fsignal #### restructure these | |
153 signal_error (Qerror ... ) #### change all of these to error() | |
154 | |
155 And we also parse out the `interactive' prompts from DEFUN() forms. | |
156 | |
157 #### When we've got a string which is a candidate for translation, we | |
158 should ignore it if it contains only format directives, that is, if | |
159 there are no alphabetic characters in it that are not a part of a `%' | |
160 directive. (Careful not to translate either "%s%s" or "%s: ".) | |
161 | |
162 For the emacs Lisp code, we need to recognise the following patterns: | |
163 | |
164 (message "string" ... ) | |
165 (error "string" ... ) | |
166 (format "string" ... ) | |
167 (read-from-minibuffer "string" ... ) | |
168 (read-shell-command "string" ... ) | |
169 (y-or-n-p "string" ... ) | |
170 (yes-or-no-p "string" ... ) | |
171 (read-file-name "string" ... ) | |
172 (temp-minibuffer-message "string") | |
173 (query-replace-read-args "string" ... ) | |
174 | |
175 I expect there will be a lot like the above; basically, any function which | |
176 is a commonly used wrapper around an eventual call to `message' or | |
177 `read-from-minibuffer' needs to be recognised by this program. | |
178 | |
179 | |
180 (dgettext "domain-name" "string") #### do we still need this? | |
181 | |
182 things that should probably be restructured: | |
183 `princ' in cmdloop.el | |
184 `insert' in debug.el | |
185 face-interactive | |
186 help.el, syntax.el all messed up | |
187 | |
188 BPW: (format) is a tricky case. If I use format to create a string | |
189 that I then send to a file, I probably don't want the string translated. | |
190 On the other hand, If the string gets used as an argument to (y-or-n-p) | |
191 or some such function, I do want it translated, and it needs to be | |
192 translated before the %s and such are replaced. The proper solution | |
193 here is for (format) and other functions that call gettext but don't | |
194 immediately output the string to the user to add the translated (and | |
195 formatted) string as a string property of the object, and have | |
196 functions that output potentially translated strings look for a | |
197 "translated string" property. Of course, this will fail if someone | |
198 does something like | |
199 | |
200 (y-or-n-p (concat (if you-p "Do you " "Does he ") | |
201 (format "want to delete %s? " filename)))) | |
202 | |
203 But you shouldn't be doing things like this anyway. | |
204 | |
205 BPW: Also, to avoid excessive translating, strings should be marked | |
206 as translated once they get translated, and further calls to gettext | |
207 don't do any more translating. Otherwise, a call like | |
208 | |
209 (y-or-n-p (format "Delete %s? " filename)) | |
210 | |
211 would cause translation on both the pre-formatted and post-formatted | |
212 strings, which could lead to weird results in some cases (y-or-n-p | |
213 has to translate its argument because someone could pass a string to | |
214 it directly). Note that the "translating too much" solution outlined | |
215 below could be implemented by just marking all strings that don't | |
216 come from a .el or .elc file as already translated. | |
217 | |
218 Menu descriptors: one way to extract the strings in menu labels would be | |
219 to teach this program about "^(defvar .*menu\n" forms; that's probably | |
220 kind of hard, though, so perhaps a better approach would be to make this | |
221 program recognise lines of the form | |
222 | |
223 "string" ... ;###translate | |
224 | |
225 where the magic token ";###translate" on a line means that the string | |
226 constant on this line should go into the message catalog. This is analagous | |
227 to the magic ";###autoload" comments, and to the magic comments used in the | |
228 EPSF structuring conventions. | |
229 | |
230 ----- | |
231 So this program manages to build up a catalog of strings to be translated. | |
232 To address the second part of the problem, of actually looking up the | |
233 translations, there are hooks in a small number of low level places in | |
234 emacs. | |
235 | |
236 Assume the existence of a C function gettext(str) which returns the | |
237 translation of `str' if there is one, otherwise returns `str'. | |
238 | |
239 - message() takes a char* as its argument, and always filters it through | |
240 gettext() before displaying it. | |
241 | |
242 - errors are printed by running the lisp function `display-error' which | |
243 doesn't call `message' directly (it princ's to streams), so it must be | |
244 carefully coded to translate its arguments. This is only a few lines | |
245 of code. | |
246 | |
247 - Fread_minibuffer_internal() is the lowest level interface to all minibuf | |
248 interactions, so it is responsible for translating the value that will go | |
249 into Vminibuf_prompt. | |
250 | |
251 - Fpopup_menu filters the menu titles through gettext(). | |
252 | |
253 The above take care of 99% of all messages the user ever sees. | |
254 | |
255 - The lisp function temp-minibuffer-message translates its arg. | |
256 | |
257 - query-replace-read-args is funny; it does | |
258 (setq from (read-from-minibuffer (format "%s: " string) ... )) | |
259 (setq to (read-from-minibuffer (format "%s %s with: " string from) ... )) | |
260 | |
261 What should we do about this? We could hack query-replace-read-args to | |
262 translate its args, but might this be a more general problem? I don't | |
263 think we ought to translate all calls to format. We could just change | |
264 the calling sequence, since this is odd in that the first %s wants to be | |
265 translated but the second doesn't. | |
266 | |
267 | |
268 Solving the "translating too much" problem: | |
269 The concern has been raised that in this situation: | |
270 - "Help" is a string for which we know a translation; | |
271 - someone visits a file called Help, and someone does something | |
272 contrived like (error buffer-file-name) | |
273 then we would display the translation of Help, which would not be correct. | |
274 We can solve this by adding a bit to Lisp_String objects which identifies | |
275 them as having been read as literal constants from a .el or .elc file (as | |
276 opposed to having been constructed at run time as it would in the above | |
277 case.) To solve this: | |
278 | |
279 - Fmessage() takes a lisp string as its first argument. | |
280 If that string is a constant, that is, was read from a source file | |
281 as a literal, then it calls message() with it, which translates. | |
282 Otherwise, it calls message_no_translate(), which does not translate. | |
283 | |
284 - Ferror() (actually, Fsignal() when condition is Qerror) works similarly. | |
285 */ | |
286 | |
287 /* Some notes: | |
288 | |
289 -- {Arg} below could get confused by commas inside of quotes. | |
290 -- {LispToken} below can match some things that are not tokens (e.g. | |
291 numbers) but for all practical purposes it should be fine. | |
292 */ | |
293 | |
294 #include <stdio.h> | |
295 | |
296 int snarf_return_state; | |
297 | |
298 %} | |
299 | |
300 %p 6000 | |
301 %e 2000 | |
302 %n 1000 | |
303 %a 4000 | |
304 %s C_QUOTE C_COMMENT LQUO LCOM | |
305 %s CSNARF LSNARF | |
306 %s DO_C DO_LISP DEFUN | |
307 %s DEFUN2 DEFUN3 LDEF | |
308 | |
309 W [ \t\n] | |
310 Any (.|"\n") | |
311 Q "\"" | |
312 NQ [^"] | |
313 NT [^A-Za-z_0-9] | |
314 LP "(" | |
315 RP ")" | |
316 BS "\\" | |
317 Esc ({BS}{Any}) | |
318 Wh ({W}*) | |
319 LCom (";"({Esc}|.)*) | |
320 LWh (({W}|{Lcom})*) | |
321 Open ({Wh}{LP}) | |
322 OpWQ ({Open}{Wh}{Q}) | |
323 String ({Q}({Esc}|{NQ})*{Q}) | |
324 Arg ([^,]*",") | |
325 StringArg ({Wh}{String}{Wh}",") | |
326 OpenString ({Open}{StringArg}) | |
327 LispToken (({Esc}|[-A-Za-z0-9!@$%^&*_=+|{}`~,<.>/?])+) | |
328 %% | |
329 | |
330 <DO_C>{NT}"GETTEXT"{OpWQ} { snarf (); } | |
331 <DO_C>{NT}"DEFER_GETTEXT"{OpWQ} { snarf (); } | |
332 <DO_C>{NT}"build_translated_string"{OpWQ} { snarf (); } | |
333 <DO_C>{NT}"insert_string"{OpWQ} { snarf (); } | |
334 <DO_C>{NT}"message"{OpWQ} { snarf (); } | |
335 <DO_C>{NT}"warn_when_safe"{OpWQ} { snarf (); } | |
336 <DO_C>{NT}"error"{OpWQ} { snarf (); } | |
337 <DO_C>{NT}"continuable_error"{OpWQ} { snarf (); } | |
338 <DO_C>{NT}"signal_simple_error"{OpWQ} { snarf (); } | |
339 <DO_C>{NT}"signal_simple_error_2"{OpWQ} { snarf (); } | |
340 <DO_C>{NT}"signal_simple_continuable_error"{OpWQ} { snarf (); } | |
341 <DO_C>{NT}"signal_simple_continuable_error_2"{OpWQ} { snarf (); } | |
342 <DO_C>{NT}"report_file_error"{OpWQ} { snarf (); } | |
343 <DO_C>{NT}"signal_file_error"{OpWQ} { snarf (); } | |
344 <DO_C>{NT}"signal_double_file_error"{OpWQ} { snarf (); } | |
345 <DO_C>{NT}"signal_double_file_error_2"{OpWQ} { snarf (); } | |
346 <DO_C>{NT}"syntax_error"{OpWQ} { snarf (); } | |
347 <DO_C>{NT}"continuable_syntax_error"{OpWQ} { snarf (); } | |
348 <DO_C>{NT}"CTB_ERROR"{OpWQ} { snarf (); } | |
349 <DO_C>{NT}"fatal"{OpWQ} { snarf (); } | |
350 <DO_C>{NT}"stdout_out"{OpWQ} { snarf (); } | |
351 <DO_C>{NT}"stderr_out"{OpWQ} { snarf (); } | |
352 <DO_C>{NT}"with_output_to_temp_buffer"{OpWQ} { snarf (); } | |
353 | |
354 <DO_C>{NT}"DEFVAR_BOOL"{OpenString}{Arg}{Wh}{Q} { snarf (); } | |
355 <DO_C>{NT}"DEFVAR_LISP"{OpenString}{Arg}{Wh}{Q} { snarf (); } | |
356 <DO_C>{NT}"DEFVAR_SPECIFIER"{OpenString}{Arg}{Wh}{Q} { snarf (); } | |
357 <DO_C>{NT}"DEFVAR_INT"{OpenString}{Arg}{Wh}{Q} { snarf (); } | |
358 <DO_C>{NT}"DEFVAR_BUFFER_LOCAL"{OpenString}{Arg}{Wh}{Q} { snarf (); } | |
359 <DO_C>{NT}"DEFVAR_BUFFER_DEFAULTS"{OpenString}{Arg}{Wh}{Q} { snarf (); } | |
360 <DO_C>{NT}"deferror"{Open}{Arg}{StringArg}{Wh}{Q} { snarf (); } | |
361 | |
362 <DO_C>{NT}"barf_or_query_if_file_exists"{Open}{Arg}{Wh}{Q} { | |
363 /* #### see comment above about use of Arg */ | |
364 snarf (); | |
365 } | |
366 | |
367 <DO_C>{NT}"DEFUN"{Open} { BEGIN DEFUN; } | |
368 | |
369 <DO_C>"/*" { | |
370 /* This is hateful, but doc strings are sometimes put inside of comments | |
371 (to get around limits in cpp), so we can't ignore stuff inside of | |
372 comments. */ | |
373 /* BEGIN C_COMMENT; */ | |
374 } | |
375 <DO_C>{Q} { BEGIN C_QUOTE; } | |
376 <DO_C>{Any} { } | |
377 | |
378 <DEFUN>{StringArg}{Arg}{Arg}{Arg}{Arg}{Wh} { BEGIN DEFUN2; } | |
379 <DEFUN>{Any} { bad_c_defun (); } | |
380 | |
381 <DEFUN2>{Q} { | |
382 /* We found an interactive specification. */ | |
383 snarf_return_state = DEFUN3; | |
384 snarf (); | |
385 } | |
386 <DEFUN2>[^,]* { | |
387 /* This function doesn't have an interactive specification. | |
388 Don't use {Arg} in the specification because DEFUN3 looks | |
389 for the comma. */ | |
390 BEGIN DEFUN3; | |
391 } | |
392 | |
393 <DEFUN3>{Wh}","{Wh}{Q} { | |
394 snarf_return_state = DO_C; | |
395 snarf (); | |
396 } | |
397 <DEFUN3>{Any} { bad_c_defun (); } | |
398 | |
399 <C_QUOTE>{Esc} { } | |
400 <C_QUOTE>{Q} { BEGIN DO_C; } | |
401 <C_QUOTE>{Any} { } | |
402 | |
403 <C_COMMENT>"*/" { BEGIN DO_C; } | |
404 <C_COMMENT>{Any} { } | |
405 | |
406 <DO_LISP>{LP}{LWh}"gettext"{LWh}{Q} { inc_paren (); snarf (); } | |
407 <DO_LISP>{LP}{LWh}"purecopy"{LWh}{Q} { inc_paren (); snarf (); } | |
408 <DO_LISP>{LP}{LWh}"interactive"{LWh}{Q} { inc_paren (); snarf (); } | |
409 <DO_LISP>{LP}{LWh}"message"{LWh}{Q} { inc_paren (); snarf (); } | |
410 <DO_LISP>{LP}{LWh}"error"{LWh}{Q} { inc_paren (); snarf (); } | |
411 <DO_LISP>{LP}{LWh}"warn"{LWh}{Q} { inc_paren (); snarf (); } | |
412 <DO_LISP>{LP}{LWh}"format"{LWh}{Q} { inc_paren (); snarf (); } | |
413 <DO_LISP>{LP}{LWh}"substitute-command-keys"{LWh}{Q} { inc_paren (); snarf (); } | |
414 <DO_LISP>{LP}{LWh}"temp-minibuffer-message"{LWh}{Q} { inc_paren (); snarf (); } | |
415 <DO_LISP>{LP}{LWh}"momentary-string-display"{LWh}{Q} { inc_paren (); snarf (); } | |
416 <DO_LISP>{LP}{LWh}"princ"{LWh}{Q} { inc_paren (); snarf (); } | |
417 <DO_LISP>{LP}{LWh}"prin1"{LWh}{Q} { inc_paren (); snarf (); } | |
418 <DO_LISP>{LP}{LWh}"prin1-to-string"{LWh}{Q} { inc_paren (); snarf (); } | |
419 <DO_LISP>{LP}{LWh}"print"{LWh}{Q} { inc_paren (); snarf (); } | |
420 <DO_LISP>{LP}{LWh}"insert"{LWh}{Q} { inc_paren (); snarf (); } | |
421 <DO_LISP>{LP}{LWh}"insert-before-markers"{LWh}{Q} { inc_paren (); snarf (); } | |
422 | |
423 <DO_LISP>{LP}{LWh}"get-buffer"{LWh}{Q} { inc_paren (); snarf (); } | |
424 <DO_LISP>{LP}{LWh}"get-buffer-create"{LWh}{Q} { inc_paren (); snarf (); } | |
425 <DO_LISP>{LP}{LWh}"generate-new-buffer-name"{LWh}{Q} { inc_paren (); snarf (); } | |
426 <DO_LISP>{LP}{LWh}"rename-buffer"{LWh}{Q} { inc_paren (); snarf (); } | |
427 <DO_LISP>{LP}{LWh}"set-buffer"{LWh}{Q} { inc_paren (); snarf (); } | |
428 <DO_LISP>{LP}{LWh}"switch-to-buffer"{LWh}{Q} { inc_paren (); snarf (); } | |
429 <DO_LISP>{LP}{LWh}"pop-to-buffer"{LWh}{Q} { inc_paren (); snarf (); } | |
430 <DO_LISP>{LP}{LWh}"with-output-to-temp-buffer"{LWh}{Q} { inc_paren (); snarf (); } | |
431 <DO_LISP>{LP}{LWh}"buffer-enable-undo"{LWh}{Q} { inc_paren (); snarf (); } | |
432 <DO_LISP>{LP}{LWh}"buffer-disable-undo"{LWh}{Q} { inc_paren (); snarf (); } | |
433 <DO_LISP>{LP}{LWh}"get-buffer-window"{LWh}{Q} { inc_paren (); snarf (); } | |
434 <DO_LISP>{LP}{LWh}"delete-windows-on"{LWh}{Q} { inc_paren (); snarf (); } | |
435 <DO_LISP>{LP}{LWh}"replace-buffer-in-windows"{LWh}{Q} { inc_paren (); snarf (); } | |
436 <DO_LISP>{LP}{LWh}"display-buffer"{LWh}{Q} { inc_paren (); snarf (); } | |
437 <DO_LISP>{LP}{LWh}"other-buffer"{LWh}{Q} { inc_paren (); snarf (); } | |
438 | |
439 <DO_LISP>{LP}{LWh}"read-from-minibuffer"{LWh}{Q} { inc_paren (); snarf (); } | |
440 <DO_LISP>{LP}{LWh}"read-shell-command"{LWh}{Q} { inc_paren (); snarf (); } | |
441 <DO_LISP>{LP}{LWh}"read-file-name"{LWh}{Q} { inc_paren (); snarf (); } | |
442 <DO_LISP>{LP}{LWh}"read-buffer"{LWh}{Q} { inc_paren (); snarf (); } | |
443 <DO_LISP>{LP}{LWh}"read-variable"{LWh}{Q} { inc_paren (); snarf (); } | |
444 <DO_LISP>{LP}{LWh}"read-command"{LWh}{Q} { inc_paren (); snarf (); } | |
445 <DO_LISP>{LP}{LWh}"read-function"{LWh}{Q} { inc_paren (); snarf (); } | |
446 <DO_LISP>{LP}{LWh}"read-directory-name"{LWh}{Q} { inc_paren (); snarf (); } | |
447 <DO_LISP>{LP}{LWh}"read-string"{LWh}{Q} { inc_paren (); snarf (); } | |
448 <DO_LISP>{LP}{LWh}"read-number"{LWh}{Q} { inc_paren (); snarf (); } | |
449 <DO_LISP>{LP}{LWh}"read-minibuffer"{LWh}{Q} { inc_paren (); snarf (); } | |
450 <DO_LISP>{LP}{LWh}"read-quoted-char"{LWh}{Q} { inc_paren (); snarf (); } | |
451 <DO_LISP>{LP}{LWh}"read-face-name"{LWh}{Q} { inc_paren (); snarf (); } | |
452 <DO_LISP>{LP}{LWh}"read-itimer"{LWh}{Q} { inc_paren (); snarf (); } | |
453 <DO_LISP>{LP}{LWh}"completing-read"{LWh}{Q} { inc_paren (); snarf (); } | |
454 <DO_LISP>{LP}{LWh}"y-or-n-p"{LWh}{Q} { inc_paren (); snarf (); } | |
455 <DO_LISP>{LP}{LWh}"yes-or-no-p"{LWh}{Q} { inc_paren (); snarf (); } | |
456 <DO_LISP>{LP}{LWh}"query-replace-read-args"{LWh}{Q} { inc_paren (); snarf (); } | |
457 <DO_LISP>{LP}{LWh}"eval-minibuffer"{LWh}{Q} { inc_paren (); snarf (); } | |
458 <DO_LISP>{LP}{LWh}"edit-and-eval-command"{LWh}{Q} { inc_paren (); snarf (); } | |
459 | |
460 <DO_LISP>{LP}{LWh}"defvar"{LWh}{LispToken}{LWh} { | |
461 inc_paren (); begin_paren_counting (LDEF); | |
462 } | |
463 <DO_LISP>{LP}{LWh}"defconst"{LWh}{LispToken}{LWh} { | |
464 inc_paren (); begin_paren_counting (LDEF); | |
465 } | |
466 <DO_LISP>{LP}{LWh}"defun"{LWh}{LispToken}{LWh} { | |
467 inc_paren (); begin_paren_counting (LDEF); | |
468 } | |
469 <DO_LISP>{LP}{LWh}"defmacro"{LWh}{LispToken}{LWh} { | |
470 inc_paren (); begin_paren_counting (LDEF); | |
471 } | |
472 <DO_LISP>{LP}{LWh}"defsubst"{LWh}{LispToken}{LWh} { | |
473 inc_paren (); begin_paren_counting (LDEF); | |
474 } | |
475 | |
476 <DO_LISP>{Q} { BEGIN LQUO; } | |
477 <DO_LISP>";" { BEGIN LCOM; } | |
478 <DO_LISP>{LP} { inc_paren (); } | |
479 <DO_LISP>{RP} { dec_paren (); } | |
480 <DO_LISP>{Esc} { } | |
481 <DO_LISP>{W} { lisp_whitespace (); } | |
482 <DO_LISP>{Any} { } | |
483 | |
484 <LQUO>{Esc} { } | |
485 <LQUO>{Q} { BEGIN DO_LISP; } | |
486 <LQUO>{Any} { } | |
487 | |
488 <LCOM>"\n" { BEGIN DO_LISP; } | |
489 <LCOM>{Any} { } | |
490 | |
491 <LDEF>{LWh}{Q} { snarf (); } | |
492 <LDEF>{Any} { BEGIN DO_LISP; } | |
493 | |
494 <CSNARF>{Esc} { ECHO; } | |
495 <CSNARF>{Q} { ECHO; fprintf (yyout, ")\n"); BEGIN snarf_return_state; } | |
496 <CSNARF>{Any} { ECHO; } | |
497 | |
498 <LSNARF>{Esc} { ECHO; } | |
499 <LSNARF>"\n" { fprintf (yyout, "\\n\\\n"); } | |
500 <LSNARF>{Q} { ECHO; fprintf (yyout, ")\n"); BEGIN snarf_return_state; } | |
501 <LSNARF>{Any} { ECHO; } | |
502 | |
503 %% | |
504 | |
505 enum filetype { C_FILE, LISP_FILE, INVALID_FILE }; | |
506 /* some brain-dead headers define this ... */ | |
507 #undef FALSE | |
508 #undef TRUE | |
509 enum boolean { FALSE, TRUE }; | |
510 | |
511 void scan_file (char *filename); | |
512 void process_C_file (void); | |
513 void process_Lisp_file (void); | |
514 | |
515 int in_c; | |
516 int in_paren_counting, paren_count; | |
517 int paren_return_state; | |
518 | |
519 snarf () | |
520 { | |
521 fprintf (yyout, "gettext(\""); | |
522 if (in_c) | |
523 BEGIN CSNARF; | |
524 else | |
525 BEGIN LSNARF; | |
526 } | |
527 | |
528 bad_c_defun () | |
529 { | |
530 fprintf (stderr, "Warning: Invalid DEFUN encountered in C, line %d.\n", | |
531 yylineno); | |
532 snarf_return_state = DO_C; | |
533 BEGIN DO_C; | |
534 /* REJECT; Sun's lex is broken! Use Flex! */ | |
535 } | |
536 | |
537 bad_lisp_def () | |
538 { | |
539 fprintf (stderr, | |
540 "Warning: Invalid defmumble encountered in Lisp, line %d.\n", | |
541 yylineno); | |
542 snarf_return_state = DO_LISP; | |
543 BEGIN DO_LISP; | |
544 /* REJECT; Sun's lex is broken! Use Flex! */ | |
545 } | |
546 | |
547 inc_paren () | |
548 { | |
549 if (in_paren_counting) | |
550 paren_count++; | |
551 } | |
552 | |
553 dec_paren () | |
554 { | |
555 if (in_paren_counting) | |
556 { | |
557 /* If we find a right paren without a matching left paren, it usually | |
558 just indicates a statement like | |
559 | |
560 (defvar foo-mumble nil) | |
561 | |
562 where 'nil' is the sexp we are skipping over, and there's no | |
563 doc string. */ | |
564 if (paren_count > 0) | |
565 paren_count--; | |
566 else | |
567 unput (')'); | |
568 if (paren_count == 0) | |
569 { | |
570 in_paren_counting = 0; | |
571 BEGIN paren_return_state; | |
572 } | |
573 } | |
574 } | |
575 | |
576 /* #### begin_paren_counting () does not handle recursive entries */ | |
577 | |
578 begin_paren_counting (int return_state) | |
579 { | |
580 in_paren_counting = 1; | |
581 paren_count = 0; | |
582 paren_return_state = return_state; | |
583 } | |
584 | |
585 lisp_whitespace () | |
586 { | |
587 if (in_paren_counting && !paren_count) | |
588 { | |
589 /* We got to the end of a token and we're not in a parenthesized | |
590 expression, so we're at the end of an sexp. */ | |
591 in_paren_counting = 0; | |
592 BEGIN paren_return_state; | |
593 } | |
594 } | |
595 | |
596 yywrap () | |
597 { | |
598 return 1; | |
599 } | |
600 | |
601 main (int argc, char *argv[]) | |
602 { | |
603 register int i; | |
604 | |
605 yyout = stdout; | |
606 | |
607 /* If first two args are -o FILE, output to FILE. */ | |
608 i = 1; | |
609 if (argc > i + 1 && strcmp (argv[i], "-o") == 0) { | |
610 yyout = fopen (argv[++i], "w"); | |
611 ++i; | |
612 } | |
613 /* ...Or if args are -a FILE, append to FILE. */ | |
614 if (argc > i + 1 && strcmp (argv[i], "-a") == 0) { | |
615 yyout = fopen (argv[++i], "a"); | |
616 ++i; | |
617 } | |
618 if (!yyout) { | |
619 fprintf (stderr, "Unable to open output file %s\n", argv[--i]); | |
620 return; | |
621 } | |
622 | |
623 for (; i < argc; i++) | |
624 scan_file (argv[i]); | |
625 | |
626 return 0; | |
627 } | |
628 | |
629 | |
630 void scan_file (char *filename) | |
631 { | |
632 enum filetype type = INVALID_FILE; | |
633 register char *p = filename + strlen (filename); | |
634 | |
635 if (strcmp (p - 4, ".elc") == 0) { | |
636 *--p = '\0'; /* Use .el file instead */ | |
637 type = LISP_FILE; | |
638 } else if (strcmp (p - 3, ".el") == 0) | |
639 type = LISP_FILE; | |
640 else if (strcmp (p - 2, ".o") == 0) { | |
641 *--p = 'c'; /* Use .c file instead */ | |
642 type = C_FILE; | |
643 } else if (strcmp (p - 2, ".c") == 0) | |
644 type = C_FILE; | |
645 | |
646 if (type == INVALID_FILE) { | |
647 fprintf (stderr, "File %s being ignored\n", filename); | |
648 return; | |
649 } | |
650 yyin = fopen (filename, "r"); | |
651 if (!yyin) { | |
652 fprintf (stderr, "Unable to open input file %s\n", filename); | |
653 return; | |
654 } | |
655 | |
656 fprintf (yyout, "/* %s */\n", filename); | |
657 if (type == C_FILE) | |
658 process_C_file (); | |
659 else | |
660 process_Lisp_file (); | |
661 fputc ('\n', yyout); | |
662 | |
663 fclose (yyin); | |
664 } | |
665 | |
666 void process_C_file () | |
667 { | |
668 snarf_return_state = DO_C; | |
669 in_c = 1; | |
670 BEGIN DO_C; | |
671 yylex (); | |
672 } | |
673 | |
674 void process_Lisp_file () | |
675 { | |
676 snarf_return_state = DO_LISP; | |
677 in_c = 0; | |
678 BEGIN DO_LISP; | |
679 yylex (); | |
680 } | |
681 |