Mercurial > hg > xemacs-beta
comparison lib-src/make-msgfile.c @ 70:131b0175ea99 r20-0b30
Import from CVS: tag r20-0b30
author | cvs |
---|---|
date | Mon, 13 Aug 2007 09:02:59 +0200 |
parents | |
children | 8626e4521993 |
comparison
equal
deleted
inserted
replaced
69:804d1389bcd6 | 70:131b0175ea99 |
---|---|
1 /* | |
2 | |
3 | |
4 PROPOSAL FOR HOW THIS ALL OUGHT TO WORK | |
5 this isn't implemented yet, but this is the plan-in-progress | |
6 | |
7 | |
8 In general, it's accepted that the best way to internationalize is for all | |
9 messages to be referred to by a symbolic name (or number) and come out of a | |
10 table or tables, which are easy to change. | |
11 | |
12 However, with Emacs, we've got the task of internationalizing a huge body | |
13 of existing code, which already contains messages internally. | |
14 | |
15 For the C code we've got two options: | |
16 | |
17 - Use a Sun-like gettext() form, which takes an "english" string which | |
18 appears literally in the source, and uses that as a hash key to find | |
19 a translated string; | |
20 - Rip all of the strings out and put them in a table. | |
21 | |
22 In this case, it's desirable to make as few changes as possible to the C | |
23 code, to make it easier to merge the code with the FSF version of emacs | |
24 which won't ever have these changes made to it. So we should go with the | |
25 former option. | |
26 | |
27 The way it has been done (between 19.8 and 19.9) was to use gettext(), but | |
28 *also* to make massive changes to the source code. The goal now is to use | |
29 gettext() at run-time and yet not require a textual change to every line | |
30 in the C code which contains a string constant. A possible way to do this | |
31 is described below. | |
32 | |
33 (gettext() can be implemented in terms of catgets() for non-Sun systems, so | |
34 that in itself isn't a problem.) | |
35 | |
36 For the Lisp code, we've got basically the same options: put everything in | |
37 a table, or translate things implicitly. | |
38 | |
39 Another kink that lisp code introduces is that there are thousands of third- | |
40 party packages, so changing the source for all of those is simply not an | |
41 option. | |
42 | |
43 Is it a goal that if some third party package displays a message which is | |
44 one we know how to translate, then we translate it? I think this is a | |
45 worthy goal. It remains to be seen how well it will work in practice. | |
46 | |
47 So, we should endeavor to minimize the impact on the lisp code. Certain | |
48 primitive lisp routines (the stuff in lisp/prim/, and especially in | |
49 cmdloop.el and minibuf.el) may need to be changed to know about translation, | |
50 but that's an ideologically clean thing to do because those are considered | |
51 a part of the emacs substrate. | |
52 | |
53 However, if we find ourselves wanting to make changes to, say, RMAIL, then | |
54 something has gone wrong. (Except to do things like remove assumptions | |
55 about the order of words within a sentence, or how pluralization works.) | |
56 | |
57 There are two parts to the task of displaying translated strings to the | |
58 user: the first is to extract the strings which need to be translated from | |
59 the sources; and the second is to make some call which will translate those | |
60 strings before they are presented to the user. | |
61 | |
62 The old way was to use the same form to do both, that is, GETTEXT() was both | |
63 the tag that we searched for to build a catalog, and was the form which did | |
64 the translation. The new plan is to separate these two things more: the | |
65 tags that we search for to build the catalog will be stuff that was in there | |
66 already, and the translation will get done in some more centralized, lower | |
67 level place. | |
68 | |
69 This program (make-msgfile.c) addresses the first part, extracting the | |
70 strings. | |
71 | |
72 For the emacs C code, we need to recognise the following patterns: | |
73 | |
74 message ("string" ... ) | |
75 error ("string") | |
76 report_file_error ("string" ... ) | |
77 signal_simple_error ("string" ... ) | |
78 signal_simple_error_2 ("string" ... ) | |
79 | |
80 build_translated_string ("string") | |
81 #### add this and use it instead of build_string() in some places. | |
82 | |
83 yes_or_no_p ("string" ... ) | |
84 #### add this instead of funcalling Qyes_or_no_p directly. | |
85 | |
86 barf_or_query_if_file_exists #### restructure this | |
87 check all callers of Fsignal #### restructure these | |
88 signal_error (Qerror ... ) #### change all of these to error() | |
89 | |
90 And we also parse out the `interactive' prompts from DEFUN() forms. | |
91 | |
92 #### When we've got a string which is a candidate for translation, we | |
93 should ignore it if it contains only format directives, that is, if | |
94 there are no alphabetic characters in it that are not a part of a `%' | |
95 directive. (Careful not to translate either "%s%s" or "%s: ".) | |
96 | |
97 For the emacs Lisp code, we need to recognise the following patterns: | |
98 | |
99 (message "string" ... ) | |
100 (error "string" ... ) | |
101 (format "string" ... ) | |
102 (read-from-minibuffer "string" ... ) | |
103 (read-shell-command "string" ... ) | |
104 (y-or-n-p "string" ... ) | |
105 (yes-or-no-p "string" ... ) | |
106 (read-file-name "string" ... ) | |
107 (temp-minibuffer-message "string") | |
108 (query-replace-read-args "string" ... ) | |
109 | |
110 I expect there will be a lot like the above; basically, any function which | |
111 is a commonly used wrapper around an eventual call to `message' or | |
112 `read-from-minibuffer' needs to be recognised by this program. | |
113 | |
114 | |
115 (dgettext "domain-name" "string") #### do we still need this? | |
116 | |
117 things that should probably be restructured: | |
118 `princ' in cmdloop.el | |
119 `insert' in debug.el | |
120 face-interactive | |
121 help.el, syntax.el all messed up | |
122 | |
123 | |
124 Menu descriptors: one way to extract the strings in menu labels would be | |
125 to teach this program about "^(defvar .*menu\n" forms; that's probably | |
126 kind of hard, though, so perhaps a better approach would be to make this | |
127 program recognise lines of the form | |
128 | |
129 "string" ... ;###translate | |
130 | |
131 where the magic token ";###translate" on a line means that the string | |
132 constant on this line should go into the message catalog. This is analagous | |
133 to the magic ";###autoload" comments, and to the magic comments used in the | |
134 EPSF structuring conventions. | |
135 | |
136 ----- | |
137 So this program manages to build up a catalog of strings to be translated. | |
138 To address the second part of the problem, of actually looking up the | |
139 translations, there are hooks in a small number of low level places in | |
140 emacs. | |
141 | |
142 Assume the existence of a C function gettext(str) which returns the | |
143 translation of `str' if there is one, otherwise returns `str'. | |
144 | |
145 - message() takes a char* as its argument, and always filters it through | |
146 gettext() before displaying it. | |
147 | |
148 - errors are printed by running the lisp function `display-error' which | |
149 doesn't call `message' directly (it princ's to streams), so it must be | |
150 carefully coded to translate its arguments. This is only a few lines | |
151 of code. | |
152 | |
153 - Fread_minibuffer_internal() is the lowest level interface to all minibuf | |
154 interactions, so it is responsible for translating the value that will go | |
155 into Vminibuf_prompt. | |
156 | |
157 - Fpopup_menu filters the menu titles through gettext(). | |
158 | |
159 The above take care of 99% of all messages the user ever sees. | |
160 | |
161 - The lisp function temp-minibuffer-message translates its arg. | |
162 | |
163 - query-replace-read-args is funny; it does | |
164 (setq from (read-from-minibuffer (format "%s: " string) ... )) | |
165 (setq to (read-from-minibuffer (format "%s %s with: " string from) ... )) | |
166 | |
167 What should we do about this? We could hack query-replace-read-args to | |
168 translate its args, but might this be a more general problem? I don't | |
169 think we ought to translate all calls to format. We could just change | |
170 the calling sequence, since this is odd in that the first %s wants to be | |
171 translated but the second doesn't. | |
172 | |
173 | |
174 Solving the "translating too much" problem: | |
175 The concern has been raised that in this situation: | |
176 - "Help" is a string for which we know a translation; | |
177 - someone visits a file called Help, and someone does something | |
178 contrived like (error buffer-file-name) | |
179 then we would display the translation of Help, which would not be correct. | |
180 We can solve this by adding a bit to Lisp_String objects which identifies | |
181 them as having been read as literal constants from a .el or .elc file (as | |
182 opposed to having been constructed at run time as it would in the above | |
183 case.) To solve this: | |
184 | |
185 - Fmessage() takes a lisp string as its first argument. | |
186 If that string is a constant, that is, was read from a source file | |
187 as a literal, then it calls message() with it, which translates. | |
188 Otherwise, it calls message_no_translate(), which does not translate. | |
189 | |
190 - Ferror() (actually, Fsignal() when condition is Qerror) works similarly. | |
191 */ | |
192 | |
193 | |
194 | |
195 | |
196 /* Scan specified C and Lisp files, extracting the following messages: | |
197 | |
198 C files: | |
199 GETTEXT (...) | |
200 DEFER_GETTEXT (...) | |
201 DEFUN interactive prompts | |
202 Lisp files: | |
203 (gettext ...) | |
204 (dgettext "domain-name" ...) | |
205 (defer-gettext ...) | |
206 (interactive ...) | |
207 | |
208 The arguments given to this program are all the C and Lisp source files | |
209 of GNU Emacs. .el and .c files are allowed. There is no support for .elc | |
210 files at this time, but they may be specified; the corresponding .el file | |
211 will be used. Similarly, .o files can also be specified, and the corresponding | |
212 .c file will be used. This helps the makefile pass the correct list of files. | |
213 | |
214 The results, which go to standard output or to a file specified with -a or -o | |
215 (-a to append, -o to start from nothing), are quoted strings wrapped in | |
216 gettext(...). The results can be passed to xgettext to produce a .po message | |
217 file. | |
218 */ | |
219 | |
220 #include <stdio.h> | |
221 #include <string.h> | |
222 | |
223 #define LINESIZE 256 | |
224 #define GET_LINE fgets (line, LINESIZE, infile) | |
225 #define CHECK_EOL(p) if (*(p) == '\0') (p) = GET_LINE | |
226 #define SKIP_BLANKS(p) while ((*p) == ' ' || (*p) == '\t') (p)++ | |
227 | |
228 enum filetype { C_FILE, LISP_FILE, INVALID_FILE }; | |
229 /* some brain-dead headers define this ... */ | |
230 #undef FALSE | |
231 #undef TRUE | |
232 enum boolean { FALSE, TRUE }; | |
233 | |
234 FILE *infile; | |
235 FILE *outfile; | |
236 char line[LINESIZE]; | |
237 | |
238 | |
239 void scan_file (char *filename); | |
240 void process_C_file (void); | |
241 void process_Lisp_file (void); | |
242 char *copy_up_to_paren (register char *p); | |
243 char *copy_quoted_string (register char *p); | |
244 enum boolean no_interactive_prompt (register char *q); | |
245 char *skip_blanks (register char *p); | |
246 | |
247 | |
248 main (int argc, char *argv[]) | |
249 { | |
250 register int i; | |
251 | |
252 outfile = stdout; | |
253 | |
254 /* If first two args are -o FILE, output to FILE. */ | |
255 i = 1; | |
256 if (argc > i + 1 && strcmp (argv[i], "-o") == 0) { | |
257 outfile = fopen (argv[++i], "w"); | |
258 ++i; | |
259 } | |
260 /* ...Or if args are -a FILE, append to FILE. */ | |
261 if (argc > i + 1 && strcmp (argv[i], "-a") == 0) { | |
262 outfile = fopen (argv[++i], "a"); | |
263 ++i; | |
264 } | |
265 if (!outfile) { | |
266 fprintf (stderr, "Unable to open output file %s\n", argv[--i]); | |
267 return; | |
268 } | |
269 | |
270 for (; i < argc; i++) | |
271 scan_file (argv[i]); | |
272 | |
273 return 0; | |
274 } | |
275 | |
276 | |
277 void scan_file (char *filename) | |
278 { | |
279 enum filetype type = INVALID_FILE; | |
280 register char *p = filename + strlen (filename); | |
281 | |
282 if (strcmp (p - 4, ".elc") == 0) { | |
283 *--p = '\0'; /* Use .el file instead */ | |
284 type = LISP_FILE; | |
285 } else if (strcmp (p - 3, ".el") == 0) | |
286 type = LISP_FILE; | |
287 else if (strcmp (p - 2, ".o") == 0) { | |
288 *--p = 'c'; /* Use .c file instead */ | |
289 type = C_FILE; | |
290 } else if (strcmp (p - 2, ".c") == 0) | |
291 type = C_FILE; | |
292 | |
293 if (type == INVALID_FILE) { | |
294 fprintf (stderr, "File %s being ignored\n", filename); | |
295 return; | |
296 } | |
297 infile = fopen (filename, "r"); | |
298 if (!infile) { | |
299 fprintf (stderr, "Unable to open input file %s\n", filename); | |
300 return; | |
301 } | |
302 | |
303 fprintf (outfile, "/* %s */\n", filename); | |
304 if (type == C_FILE) | |
305 process_C_file (); | |
306 else | |
307 process_Lisp_file (); | |
308 fputc ('\n', outfile); | |
309 | |
310 fclose (infile); | |
311 } | |
312 | |
313 | |
314 void process_C_file (void) | |
315 { | |
316 register char *p; | |
317 char *gettext, *defun; | |
318 | |
319 while (p = GET_LINE) { | |
320 gettext = strstr (p, "GETTEXT"); | |
321 defun = strstr (p, "DEFUN"); | |
322 if (gettext || defun) { | |
323 if (gettext) { | |
324 p = gettext; | |
325 p += 7; /* Skip over "GETTEXT" */ | |
326 } | |
327 else if (defun) { | |
328 p = defun; | |
329 p += 5; /* Skip over "DEFUN" */ | |
330 } | |
331 | |
332 p = skip_blanks (p); | |
333 if (*p++ != '(') | |
334 continue; | |
335 | |
336 if (defun) { | |
337 register int i; | |
338 | |
339 for (i = 0; i < 5; i++) /* Skip over commas to doc string */ | |
340 while (*p++ != ',') | |
341 CHECK_EOL (p); | |
342 if (*p == '\n') | |
343 p = GET_LINE; | |
344 } | |
345 | |
346 p = skip_blanks (p); | |
347 if (*p != '\"') /* Make sure there is a quoted string */ | |
348 continue; | |
349 | |
350 if (defun && no_interactive_prompt (p)) | |
351 continue; | |
352 | |
353 fprintf (outfile, "gettext("); | |
354 if (gettext) | |
355 p = copy_up_to_paren (p); | |
356 else | |
357 p = copy_quoted_string (p); | |
358 fprintf (outfile, ")\n"); | |
359 } | |
360 } | |
361 } | |
362 | |
363 | |
364 void process_Lisp_file (void) | |
365 { | |
366 register char *p; | |
367 char *gettext, *interactive; | |
368 enum boolean dgettext = FALSE; | |
369 | |
370 while (p = GET_LINE) { | |
371 gettext = strstr (p, "gettext"); | |
372 interactive = strstr (p, "(interactive"); | |
373 if (gettext || interactive) { | |
374 if (!interactive) | |
375 p = gettext; | |
376 else if (!gettext) | |
377 p = interactive; | |
378 else if (gettext < interactive) { | |
379 p = gettext; | |
380 interactive = NULL; | |
381 } else { | |
382 p = interactive; | |
383 gettext = NULL; | |
384 } | |
385 | |
386 if (gettext) { | |
387 if (p > line && *(p-1) == 'd') | |
388 dgettext = TRUE; | |
389 p += 7; /* Skip over "gettext" */ | |
390 } else | |
391 p += 12; /* Skip over "(interactive" */ | |
392 | |
393 p = skip_blanks (p); | |
394 if (*p != '\"') /* Make sure there is a quoted string */ | |
395 continue; | |
396 | |
397 if (dgettext) { /* Skip first quoted string (domain name) */ | |
398 while (*++p != '"') | |
399 ; /* null statement */ | |
400 ++p; | |
401 p = skip_blanks (p); | |
402 if (*p != '\"') /* Check for second quoted string (message) */ | |
403 continue; | |
404 } | |
405 | |
406 if (interactive && no_interactive_prompt (p)) | |
407 continue; | |
408 | |
409 fprintf (outfile, "gettext("); | |
410 p = copy_up_to_paren (p); | |
411 fprintf (outfile, ")\n"); | |
412 } | |
413 } | |
414 } | |
415 | |
416 | |
417 /* Assuming p points to some character beyond an opening parenthesis, copy | |
418 everything to outfile up to but not including the closing parenthesis. | |
419 */ | |
420 char *copy_up_to_paren (register char *p) | |
421 { | |
422 for (;;) { | |
423 SKIP_BLANKS (p); /* We don't call skip_blanks() in order to */ | |
424 CHECK_EOL (p); /* preserve blanks at the beginning of the line */ | |
425 if (*p == ')') | |
426 break; | |
427 | |
428 if (*p == '\"') | |
429 p = copy_quoted_string (p); | |
430 else | |
431 fputc (*p++, outfile); | |
432 } | |
433 return p; | |
434 } | |
435 | |
436 | |
437 /* Assuming p points to a quote character, copy the quoted string to outfile. | |
438 */ | |
439 char *copy_quoted_string (register char *p) | |
440 { | |
441 do { | |
442 if (*p == '\\') | |
443 fputc (*p++, outfile); | |
444 fputc (*p++, outfile); | |
445 CHECK_EOL (p); | |
446 } while (*p != '\"'); | |
447 | |
448 fputc (*p++, outfile); | |
449 return p; | |
450 } | |
451 | |
452 | |
453 /* Return TRUE if the interactive specification consists only | |
454 of code letters and no prompt. | |
455 */ | |
456 enum boolean no_interactive_prompt (register char *q) | |
457 { | |
458 while (++q, *q == '*' || *q == '@') | |
459 ; /* null statement */ | |
460 if (*q == '\"') | |
461 return TRUE; | |
462 skip_code_letter: | |
463 if (*++q == '\"') | |
464 return TRUE; | |
465 if (*q == '\\' && *++q == 'n') { | |
466 ++q; | |
467 goto skip_code_letter; | |
468 } | |
469 return FALSE; | |
470 } | |
471 | |
472 | |
473 char *skip_blanks (register char *p) | |
474 { | |
475 while (*p == ' ' || *p == '\t' || *p == '\n') { | |
476 p++; | |
477 CHECK_EOL (p); | |
478 } | |
479 return p; | |
480 } |