442
|
1 /*
|
|
2
|
|
3
|
428
|
4 PROPOSAL FOR HOW THIS ALL OUGHT TO WORK
|
|
5 this isn't implemented yet, but this is the plan-in-progress
|
|
6
|
442
|
7
|
428
|
8 In general, it's accepted that the best way to internationalize is for all
|
|
9 messages to be referred to by a symbolic name (or number) and come out of a
|
|
10 table or tables, which are easy to change.
|
|
11
|
|
12 However, with Emacs, we've got the task of internationalizing a huge body
|
|
13 of existing code, which already contains messages internally.
|
|
14
|
|
15 For the C code we've got two options:
|
|
16
|
|
17 - Use a Sun-like gettext() form, which takes an "english" string which
|
|
18 appears literally in the source, and uses that as a hash key to find
|
|
19 a translated string;
|
|
20 - Rip all of the strings out and put them in a table.
|
|
21
|
|
22 In this case, it's desirable to make as few changes as possible to the C
|
|
23 code, to make it easier to merge the code with the FSF version of emacs
|
|
24 which won't ever have these changes made to it. So we should go with the
|
|
25 former option.
|
|
26
|
|
27 The way it has been done (between 19.8 and 19.9) was to use gettext(), but
|
|
28 *also* to make massive changes to the source code. The goal now is to use
|
|
29 gettext() at run-time and yet not require a textual change to every line
|
|
30 in the C code which contains a string constant. A possible way to do this
|
|
31 is described below.
|
|
32
|
|
33 (gettext() can be implemented in terms of catgets() for non-Sun systems, so
|
|
34 that in itself isn't a problem.)
|
|
35
|
|
36 For the Lisp code, we've got basically the same options: put everything in
|
|
37 a table, or translate things implicitly.
|
|
38
|
|
39 Another kink that lisp code introduces is that there are thousands of third-
|
|
40 party packages, so changing the source for all of those is simply not an
|
|
41 option.
|
|
42
|
|
43 Is it a goal that if some third party package displays a message which is
|
|
44 one we know how to translate, then we translate it? I think this is a
|
|
45 worthy goal. It remains to be seen how well it will work in practice.
|
|
46
|
|
47 So, we should endeavor to minimize the impact on the lisp code. Certain
|
|
48 primitive lisp routines (the stuff in lisp/prim/, and especially in
|
|
49 cmdloop.el and minibuf.el) may need to be changed to know about translation,
|
|
50 but that's an ideologically clean thing to do because those are considered
|
|
51 a part of the emacs substrate.
|
|
52
|
|
53 However, if we find ourselves wanting to make changes to, say, RMAIL, then
|
|
54 something has gone wrong. (Except to do things like remove assumptions
|
|
55 about the order of words within a sentence, or how pluralization works.)
|
|
56
|
442
|
57 There are two parts to the task of displaying translated strings to the
|
428
|
58 user: the first is to extract the strings which need to be translated from
|
|
59 the sources; and the second is to make some call which will translate those
|
|
60 strings before they are presented to the user.
|
442
|
61
|
428
|
62 The old way was to use the same form to do both, that is, GETTEXT() was both
|
|
63 the tag that we searched for to build a catalog, and was the form which did
|
|
64 the translation. The new plan is to separate these two things more: the
|
|
65 tags that we search for to build the catalog will be stuff that was in there
|
|
66 already, and the translation will get done in some more centralized, lower
|
|
67 level place.
|
|
68
|
442
|
69 This program (make-msgfile.c) addresses the first part, extracting the
|
428
|
70 strings.
|
442
|
71
|
428
|
72 For the emacs C code, we need to recognize the following patterns:
|
442
|
73
|
428
|
74 message ("string" ... )
|
|
75 error ("string")
|
|
76 report_file_error ("string" ... )
|
|
77 signal_simple_error ("string" ... )
|
|
78 signal_simple_error_2 ("string" ... )
|
442
|
79
|
428
|
80 build_translated_string ("string")
|
|
81 #### add this and use it instead of build_string() in some places.
|
442
|
82
|
428
|
83 yes_or_no_p ("string" ... )
|
|
84 #### add this instead of funcalling Qyes_or_no_p directly.
|
|
85
|
|
86 barf_or_query_if_file_exists #### restructure this
|
|
87 check all callers of Fsignal #### restructure these
|
|
88 signal_error (Qerror ... ) #### change all of these to error()
|
442
|
89
|
428
|
90 And we also parse out the `interactive' prompts from DEFUN() forms.
|
442
|
91
|
428
|
92 #### When we've got a string which is a candidate for translation, we
|
|
93 should ignore it if it contains only format directives, that is, if
|
|
94 there are no alphabetic characters in it that are not a part of a `%'
|
|
95 directive. (Careful not to translate either "%s%s" or "%s: ".)
|
|
96
|
|
97 For the emacs Lisp code, we need to recognize the following patterns:
|
442
|
98
|
428
|
99 (message "string" ... )
|
|
100 (error "string" ... )
|
|
101 (format "string" ... )
|
|
102 (read-from-minibuffer "string" ... )
|
|
103 (read-shell-command "string" ... )
|
|
104 (y-or-n-p "string" ... )
|
|
105 (yes-or-no-p "string" ... )
|
|
106 (read-file-name "string" ... )
|
|
107 (temp-minibuffer-message "string")
|
|
108 (query-replace-read-args "string" ... )
|
442
|
109
|
428
|
110 I expect there will be a lot like the above; basically, any function which
|
|
111 is a commonly used wrapper around an eventual call to `message' or
|
|
112 `read-from-minibuffer' needs to be recognized by this program.
|
|
113
|
|
114
|
|
115 (dgettext "domain-name" "string") #### do we still need this?
|
442
|
116
|
428
|
117 things that should probably be restructured:
|
|
118 `princ' in cmdloop.el
|
|
119 `insert' in debug.el
|
|
120 face-interactive
|
|
121 help.el, syntax.el all messed up
|
442
|
122
|
428
|
123
|
|
124 Menu descriptors: one way to extract the strings in menu labels would be
|
|
125 to teach this program about "^(defvar .*menu\n" forms; that's probably
|
|
126 kind of hard, though, so perhaps a better approach would be to make this
|
|
127 program recognize lines of the form
|
|
128
|
|
129 "string" ... ;###translate
|
|
130
|
442
|
131 where the magic token ";###translate" on a line means that the string
|
|
132 constant on this line should go into the message catalog. This is analogous
|
428
|
133 to the magic ";###autoload" comments, and to the magic comments used in the
|
|
134 EPSF structuring conventions.
|
|
135
|
|
136 -----
|
|
137 So this program manages to build up a catalog of strings to be translated.
|
|
138 To address the second part of the problem, of actually looking up the
|
|
139 translations, there are hooks in a small number of low level places in
|
|
140 emacs.
|
|
141
|
442
|
142 Assume the existence of a C function gettext(str) which returns the
|
428
|
143 translation of `str' if there is one, otherwise returns `str'.
|
|
144
|
|
145 - message() takes a char* as its argument, and always filters it through
|
|
146 gettext() before displaying it.
|
|
147
|
|
148 - errors are printed by running the lisp function `display-error' which
|
|
149 doesn't call `message' directly (it princ's to streams), so it must be
|
|
150 carefully coded to translate its arguments. This is only a few lines
|
|
151 of code.
|
|
152
|
|
153 - Fread_minibuffer_internal() is the lowest level interface to all minibuf
|
|
154 interactions, so it is responsible for translating the value that will go
|
|
155 into Vminibuf_prompt.
|
|
156
|
|
157 - Fpopup_menu filters the menu titles through gettext().
|
|
158
|
|
159 The above take care of 99% of all messages the user ever sees.
|
|
160
|
|
161 - The lisp function temp-minibuffer-message translates its arg.
|
|
162
|
|
163 - query-replace-read-args is funny; it does
|
|
164 (setq from (read-from-minibuffer (format "%s: " string) ... ))
|
|
165 (setq to (read-from-minibuffer (format "%s %s with: " string from) ... ))
|
|
166
|
|
167 What should we do about this? We could hack query-replace-read-args to
|
|
168 translate its args, but might this be a more general problem? I don't
|
|
169 think we ought to translate all calls to format. We could just change
|
|
170 the calling sequence, since this is odd in that the first %s wants to be
|
|
171 translated but the second doesn't.
|
|
172
|
|
173
|
|
174 Solving the "translating too much" problem:
|
|
175 The concern has been raised that in this situation:
|
|
176 - "Help" is a string for which we know a translation;
|
442
|
177 - someone visits a file called Help, and someone does something
|
428
|
178 contrived like (error buffer-file-name)
|
|
179 then we would display the translation of Help, which would not be correct.
|
|
180 We can solve this by adding a bit to Lisp_String objects which identifies
|
|
181 them as having been read as literal constants from a .el or .elc file (as
|
442
|
182 opposed to having been constructed at run time as it would in the above
|
428
|
183 case.) To solve this:
|
|
184
|
|
185 - Fmessage() takes a lisp string as its first argument.
|
|
186 If that string is a constant, that is, was read from a source file
|
|
187 as a literal, then it calls message() with it, which translates.
|
|
188 Otherwise, it calls message_no_translate(), which does not translate.
|
|
189
|
|
190 - Ferror() (actually, Fsignal() when condition is Qerror) works similarly.
|
|
191 */
|
|
192
|
|
193
|
|
194
|
|
195
|
|
196 /* Scan specified C and Lisp files, extracting the following messages:
|
|
197
|
|
198 C files:
|
|
199 GETTEXT (...)
|
|
200 DEFER_GETTEXT (...)
|
|
201 DEFUN interactive prompts
|
|
202 Lisp files:
|
|
203 (gettext ...)
|
|
204 (dgettext "domain-name" ...)
|
|
205 (defer-gettext ...)
|
|
206 (interactive ...)
|
|
207
|
|
208 The arguments given to this program are all the C and Lisp source files
|
|
209 of GNU Emacs. .el and .c files are allowed. There is no support for .elc
|
|
210 files at this time, but they may be specified; the corresponding .el file
|
|
211 will be used. Similarly, .o files can also be specified, and the corresponding
|
|
212 .c file will be used. This helps the makefile pass the correct list of files.
|
|
213
|
|
214 The results, which go to standard output or to a file specified with -a or -o
|
|
215 (-a to append, -o to start from nothing), are quoted strings wrapped in
|
|
216 gettext(...). The results can be passed to xgettext to produce a .po message
|
|
217 file.
|
|
218 */
|
|
219
|
|
220 #include <stdio.h>
|
|
221 #include <string.h>
|
|
222
|
|
223 #define LINESIZE 256
|
|
224 #define GET_LINE fgets (line, LINESIZE, infile)
|
|
225 #define CHECK_EOL(p) if (*(p) == '\0') (p) = GET_LINE
|
|
226 #define SKIP_BLANKS(p) while ((*p) == ' ' || (*p) == '\t') (p)++
|
|
227
|
|
228 enum filetype { C_FILE, LISP_FILE, INVALID_FILE };
|
|
229 /* some brain-dead headers define this ... */
|
|
230 #undef FALSE
|
|
231 #undef TRUE
|
|
232 enum boolean { FALSE, TRUE };
|
|
233
|
|
234 FILE *infile;
|
|
235 FILE *outfile;
|
|
236 char line[LINESIZE];
|
|
237
|
|
238
|
|
239 void scan_file (char *filename);
|
|
240 void process_C_file (void);
|
|
241 void process_Lisp_file (void);
|
|
242 char *copy_up_to_paren (register char *p);
|
|
243 char *copy_quoted_string (register char *p);
|
|
244 enum boolean no_interactive_prompt (register char *q);
|
|
245 char *skip_blanks (register char *p);
|
|
246
|
|
247
|
|
248 main (int argc, char *argv[])
|
|
249 {
|
|
250 register int i;
|
|
251
|
|
252 outfile = stdout;
|
|
253
|
|
254 /* If first two args are -o FILE, output to FILE. */
|
|
255 i = 1;
|
|
256 if (argc > i + 1 && strcmp (argv[i], "-o") == 0) {
|
|
257 outfile = fopen (argv[++i], "w");
|
|
258 ++i;
|
|
259 }
|
|
260 /* ...Or if args are -a FILE, append to FILE. */
|
|
261 if (argc > i + 1 && strcmp (argv[i], "-a") == 0) {
|
|
262 outfile = fopen (argv[++i], "a");
|
|
263 ++i;
|
|
264 }
|
|
265 if (!outfile) {
|
|
266 fprintf (stderr, "Unable to open output file %s\n", argv[--i]);
|
|
267 return;
|
|
268 }
|
|
269
|
|
270 for (; i < argc; i++)
|
|
271 scan_file (argv[i]);
|
|
272
|
|
273 return 0;
|
|
274 }
|
|
275
|
|
276
|
|
277 void scan_file (char *filename)
|
|
278 {
|
|
279 enum filetype type = INVALID_FILE;
|
|
280 register char *p = filename + strlen (filename);
|
|
281
|
|
282 if (strcmp (p - 4, ".elc") == 0) {
|
|
283 *--p = '\0'; /* Use .el file instead */
|
|
284 type = LISP_FILE;
|
|
285 } else if (strcmp (p - 3, ".el") == 0)
|
|
286 type = LISP_FILE;
|
|
287 else if (strcmp (p - 2, ".o") == 0) {
|
|
288 *--p = 'c'; /* Use .c file instead */
|
|
289 type = C_FILE;
|
|
290 } else if (strcmp (p - 2, ".c") == 0)
|
|
291 type = C_FILE;
|
|
292
|
|
293 if (type == INVALID_FILE) {
|
|
294 fprintf (stderr, "File %s being ignored\n", filename);
|
|
295 return;
|
|
296 }
|
|
297 infile = fopen (filename, "r");
|
|
298 if (!infile) {
|
|
299 fprintf (stderr, "Unable to open input file %s\n", filename);
|
|
300 return;
|
|
301 }
|
|
302
|
|
303 fprintf (outfile, "/* %s */\n", filename);
|
|
304 if (type == C_FILE)
|
|
305 process_C_file ();
|
|
306 else
|
|
307 process_Lisp_file ();
|
|
308 fputc ('\n', outfile);
|
442
|
309
|
428
|
310 fclose (infile);
|
|
311 }
|
|
312
|
|
313
|
|
314 void process_C_file (void)
|
|
315 {
|
|
316 register char *p;
|
|
317 char *gettext, *defun;
|
|
318
|
|
319 while (p = GET_LINE) {
|
|
320 gettext = strstr (p, "GETTEXT");
|
|
321 defun = strstr (p, "DEFUN");
|
|
322 if (gettext || defun) {
|
|
323 if (gettext) {
|
|
324 p = gettext;
|
|
325 p += 7; /* Skip over "GETTEXT" */
|
|
326 }
|
|
327 else if (defun) {
|
|
328 p = defun;
|
|
329 p += 5; /* Skip over "DEFUN" */
|
|
330 }
|
|
331
|
|
332 p = skip_blanks (p);
|
|
333 if (*p++ != '(')
|
|
334 continue;
|
|
335
|
|
336 if (defun) {
|
|
337 register int i;
|
|
338
|
|
339 for (i = 0; i < 5; i++) /* Skip over commas to doc string */
|
|
340 while (*p++ != ',')
|
|
341 CHECK_EOL (p);
|
|
342 if (*p == '\n')
|
|
343 p = GET_LINE;
|
|
344 }
|
|
345
|
|
346 p = skip_blanks (p);
|
|
347 if (*p != '\"') /* Make sure there is a quoted string */
|
|
348 continue;
|
|
349
|
|
350 if (defun && no_interactive_prompt (p))
|
|
351 continue;
|
|
352
|
|
353 fprintf (outfile, "gettext(");
|
|
354 if (gettext)
|
|
355 p = copy_up_to_paren (p);
|
|
356 else
|
|
357 p = copy_quoted_string (p);
|
|
358 fprintf (outfile, ")\n");
|
|
359 }
|
|
360 }
|
|
361 }
|
|
362
|
|
363
|
|
364 void process_Lisp_file (void)
|
|
365 {
|
|
366 register char *p;
|
|
367 char *gettext, *interactive;
|
|
368 enum boolean dgettext = FALSE;
|
|
369
|
|
370 while (p = GET_LINE) {
|
|
371 gettext = strstr (p, "gettext");
|
|
372 interactive = strstr (p, "(interactive");
|
|
373 if (gettext || interactive) {
|
|
374 if (!interactive)
|
|
375 p = gettext;
|
|
376 else if (!gettext)
|
|
377 p = interactive;
|
|
378 else if (gettext < interactive) {
|
|
379 p = gettext;
|
|
380 interactive = NULL;
|
|
381 } else {
|
|
382 p = interactive;
|
|
383 gettext = NULL;
|
|
384 }
|
|
385
|
|
386 if (gettext) {
|
|
387 if (p > line && *(p-1) == 'd')
|
|
388 dgettext = TRUE;
|
|
389 p += 7; /* Skip over "gettext" */
|
|
390 } else
|
|
391 p += 12; /* Skip over "(interactive" */
|
|
392
|
|
393 p = skip_blanks (p);
|
|
394 if (*p != '\"') /* Make sure there is a quoted string */
|
|
395 continue;
|
|
396
|
|
397 if (dgettext) { /* Skip first quoted string (domain name) */
|
|
398 while (*++p != '"')
|
|
399 ; /* null statement */
|
|
400 ++p;
|
|
401 p = skip_blanks (p);
|
|
402 if (*p != '\"') /* Check for second quoted string (message) */
|
|
403 continue;
|
|
404 }
|
|
405
|
|
406 if (interactive && no_interactive_prompt (p))
|
|
407 continue;
|
|
408
|
|
409 fprintf (outfile, "gettext(");
|
|
410 p = copy_up_to_paren (p);
|
|
411 fprintf (outfile, ")\n");
|
|
412 }
|
|
413 }
|
|
414 }
|
|
415
|
|
416
|
|
417 /* Assuming p points to some character beyond an opening parenthesis, copy
|
|
418 everything to outfile up to but not including the closing parenthesis.
|
|
419 */
|
|
420 char *copy_up_to_paren (register char *p)
|
|
421 {
|
|
422 for (;;) {
|
|
423 SKIP_BLANKS (p); /* We don't call skip_blanks() in order to */
|
|
424 CHECK_EOL (p); /* preserve blanks at the beginning of the line */
|
|
425 if (*p == ')')
|
|
426 break;
|
|
427
|
|
428 if (*p == '\"')
|
|
429 p = copy_quoted_string (p);
|
|
430 else
|
|
431 fputc (*p++, outfile);
|
|
432 }
|
|
433 return p;
|
|
434 }
|
|
435
|
|
436
|
|
437 /* Assuming p points to a quote character, copy the quoted string to outfile.
|
|
438 */
|
|
439 char *copy_quoted_string (register char *p)
|
|
440 {
|
|
441 do {
|
|
442 if (*p == '\\')
|
|
443 fputc (*p++, outfile);
|
|
444 fputc (*p++, outfile);
|
|
445 CHECK_EOL (p);
|
|
446 } while (*p != '\"');
|
|
447
|
|
448 fputc (*p++, outfile);
|
|
449 return p;
|
|
450 }
|
|
451
|
|
452
|
|
453 /* Return TRUE if the interactive specification consists only
|
|
454 of code letters and no prompt.
|
|
455 */
|
|
456 enum boolean no_interactive_prompt (register char *q)
|
|
457 {
|
|
458 while (++q, *q == '*' || *q == '@')
|
|
459 ; /* null statement */
|
|
460 if (*q == '\"')
|
|
461 return TRUE;
|
|
462 skip_code_letter:
|
|
463 if (*++q == '\"')
|
|
464 return TRUE;
|
|
465 if (*q == '\\' && *++q == 'n') {
|
|
466 ++q;
|
|
467 goto skip_code_letter;
|
|
468 }
|
|
469 return FALSE;
|
|
470 }
|
|
471
|
|
472
|
|
473 char *skip_blanks (register char *p)
|
|
474 {
|
|
475 while (*p == ' ' || *p == '\t' || *p == '\n') {
|
|
476 p++;
|
|
477 CHECK_EOL (p);
|
|
478 }
|
|
479 return p;
|
|
480 }
|