771
|
1 : #-*- Perl -*-
|
|
2
|
|
3 ### make-mswin-unicode --- generate Unicode-encapsulation code for MS Windows
|
|
4
|
|
5 ## Copyright (C) 2001, 2002 Ben Wing.
|
|
6
|
|
7 ## Author: Ben Wing <ben@xemacs.org>
|
|
8 ## Maintainer: Ben Wing <ben@xemacs.org>
|
|
9 ## Current Version: 1.0, August 24, 2001
|
|
10
|
|
11 ## This file is part of XEmacs.
|
|
12
|
|
13 ## XEmacs is free software; you can redistribute it and/or modify it
|
|
14 ## under the terms of the GNU General Public License as published by
|
|
15 ## the Free Software Foundation; either version 2, or (at your option)
|
|
16 ## any later version.
|
|
17
|
|
18 ## XEmacs is distributed in the hope that it will be useful, but
|
|
19 ## WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
20 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
21 ## General Public License for more details.
|
|
22
|
|
23 ## You should have received a copy of the GNU General Public License
|
|
24 ## along with XEmacs; see the file COPYING. If not, write to the Free
|
|
25 ## Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
|
|
26 ## 02111-1307, USA.
|
|
27
|
|
28 eval 'exec perl -w -S $0 ${1+"$@"}'
|
|
29 if 0;
|
|
30
|
|
31 use strict;
|
|
32 use File::Basename;
|
|
33 use Getopt::Long;
|
|
34
|
|
35 my ($myName, $myPath) = fileparse ($0);
|
|
36
|
|
37 my $usage="
|
|
38 Usage: $myName [--c-output FILE] [--h-output FILE] [--help] [FILES ...]
|
|
39
|
|
40 The purpose of this script is to auto-generate Unicode-encapsulation
|
|
41 code for MS Windows library functions that come in two versions (ANSI
|
|
42 and Unicode). The MS Windows header files provide a way of
|
|
43 automatically calling the right version, but only at compile-time,
|
|
44 which is *NOT* sufficient for any real-world program. The solution is
|
|
45 run-time Unicode encapsulation, which is not conceptually difficult
|
|
46 but is time-consuming, and is not supported standardly only due to
|
|
47 evil marketing decisions made by Microsoft. See src/intl-win32.c
|
|
48 for more information.
|
|
49
|
|
50 This script processes the specified files, looking for commands
|
|
51 indicating library routines to Unicode-encapsulate, as follows:
|
|
52
|
|
53 Portions of the files that should be processed are enclosed in lines
|
|
54 consisting only of the words \"begin-unicode-encapsulation-script\"
|
|
55 and \"end-unicode-encapsulation-script\". More than one section can
|
|
56 occur in a single file. Processed lines begin with a command word,
|
|
57 followed by one or more args (no quotes are necessary for spaces):
|
|
58
|
|
59 dir sets the directory for include files.
|
|
60 file specifies a file to start reading from.
|
|
61 yes indicates a function to be automatically Unicode-encapsulated.
|
|
62 (All parameters either need no special processing or are LPTSTR or
|
|
63 LPCTSTR.)
|
|
64 soon indicates a function that should be automatically Unicode-encapsulated,
|
|
65 but we're not ready to process it yet.
|
|
66 no indicates a function we don't support (it will be #defined to cause
|
|
67 a compile error, with the text after the function included in the
|
|
68 erroneous definition to indicate why we don't support it).
|
|
69 skip indicates a function we support manually; only a comment about this
|
|
70 will be generated.
|
|
71 split indicates a function with a split structure (different versions
|
|
72 for Unicode and ANSI), but where the only difference is in pointer
|
|
73 types, and the actual size does not differ. The structure name
|
|
74 should follow the function name, and it will be automatically
|
|
75 Unicode-encapsulated with appropriate casts.
|
|
76 begin-bracket indicates a #if statement to be inserted here.
|
|
77 end-bracket indicates the corresponding #endif statement.
|
|
78 blank lines and lines beginning with // are ignored.
|
|
79 ";
|
|
80
|
|
81 # ------------------ process command-line options ------------------
|
|
82
|
|
83 my %options;
|
|
84 my @SAVE_ARGV = @ARGV;
|
|
85
|
|
86 $Getopt::Long::ignorecase = 0;
|
|
87 &GetOptions (
|
|
88 \%options,
|
|
89 'c-output=s',
|
|
90 'h-output=s',
|
|
91 'help',
|
|
92 );
|
|
93
|
|
94 die $usage if $options{"help"};
|
|
95
|
|
96 my $in_script;
|
|
97 my $slurp;
|
|
98
|
|
99 my ($cout, $hout) = ($options{"c-output"}, $options{"h-output"});
|
|
100
|
|
101 open (COUT, ">$cout") or die "Can't open C output file $cout: $!";
|
|
102 open (HOUT, ">$hout") or die "Can't open C output file $hout: $!";
|
|
103
|
|
104 select (STDOUT); $| = 1;
|
|
105
|
|
106 print COUT "/* Automatically-generated Unicode-encapsulation file,
|
|
107 using the command
|
|
108
|
|
109 $myPath$myName @SAVE_ARGV
|
|
110
|
|
111 Do not edit. See `$myName'.
|
|
112 */
|
|
113
|
|
114 #include <config.h>
|
|
115 #include \"lisp.h\"
|
|
116
|
|
117 #include \"syswindows.h\"
|
|
118
|
|
119 ";
|
|
120 print HOUT "/* Automatically-generated Unicode-encapsulation header file.
|
|
121 Do not edit. See `$myName'.
|
|
122 */\n\n";
|
|
123
|
|
124 my %files;
|
|
125 my %processed;
|
|
126 my %bracket;
|
|
127
|
|
128 my $current_file;
|
|
129 my @current_bracket;
|
|
130
|
|
131 while (<>)
|
|
132 {
|
|
133 chomp;
|
|
134
|
|
135 if (/^begin-unicode-encapsulation-script$/)
|
|
136 {
|
|
137 $in_script = 1;
|
|
138 }
|
|
139 elsif (/^end-unicode-encapsulation-script$/)
|
|
140 {
|
|
141 $in_script = 0;
|
|
142 }
|
|
143 elsif ($in_script)
|
|
144 {
|
|
145 next if (m!^//!);
|
|
146 next if (/^[ \t]*$/);
|
|
147 if (/(dir|file|yes|soon|no|skip|split|begin-bracket|end-bracket)(?: (.*))?/)
|
|
148 {
|
|
149 my ($command, $parms) = ($1, $2);
|
|
150 if ($command eq "dir")
|
|
151 {
|
|
152 chdir $parms or die "Can't chdir to $parms: $!";
|
|
153 }
|
|
154 elsif ($command eq "file")
|
|
155 {
|
|
156 $current_file = $parms;
|
|
157 }
|
|
158 elsif ($command eq "begin-bracket")
|
|
159 {
|
|
160 my $current_bracket = $current_bracket[$#current_bracket];
|
|
161 if (defined ($current_bracket))
|
|
162 {
|
|
163 $current_bracket .= "&& $parms";
|
|
164 }
|
|
165 else
|
|
166 {
|
|
167 $current_bracket = "$parms";
|
|
168 }
|
|
169 push @current_bracket, $current_bracket;
|
|
170 }
|
|
171 elsif ($command eq "end-bracket")
|
|
172 {
|
|
173 pop @current_bracket;
|
|
174 }
|
|
175 else
|
|
176 {
|
|
177 my ($fun, $reason) = split /\s+/, $parms, 2;
|
|
178 $files{$current_file}{$fun} = [$command, $reason];
|
|
179 $bracket{$current_file}{$fun} =
|
|
180 $current_bracket[$#current_bracket];
|
|
181 }
|
|
182 }
|
|
183 else
|
|
184 {
|
|
185 print "WARNING: Unknown line $_\n";
|
|
186 }
|
|
187 }
|
|
188 }
|
|
189
|
|
190
|
|
191 foreach my $file (keys %files)
|
|
192 {
|
|
193 $slurp = &FileContents ($file);
|
|
194 print "Processing file $file\n";
|
|
195 print HOUT "\n/* Processing file $file */\n\n";
|
|
196 my $totalspace = 70 - length ("Processing file $file");
|
|
197 $totalspace = 0 if $totalspace < 0;
|
|
198 my $alignspaceleft = $totalspace / 2;
|
|
199 my $alignspaceright = ($totalspace + 1) / 2;
|
|
200 print COUT "
|
|
201 /*----------------------------------------------------------------------*/
|
|
202 /*" . (" " x $alignspaceleft) . "Processing file $file" .
|
|
203 (" " x $alignspaceright) . "*/
|
|
204 /*----------------------------------------------------------------------*/
|
|
205
|
|
206 ";
|
|
207
|
|
208 my ($ws_re, $must_ws_re, $tok_ch) =
|
|
209 ("\\s*", "\\s+", "\\w");
|
|
210 # unfortunately there is no surefire way short of
|
|
211 # parsing all include files for typedefs to
|
|
212 # distinguish types from parameters, and prototypes
|
|
213 # appear in the include files both with and without
|
|
214 # parameters -- the latter kinds appear in a very
|
|
215 # different style and were obviously added later. so
|
|
216 # we rely on the fact that defined types are all
|
|
217 # upper-case, and parameters generally are not, and
|
|
218 # special-case the exceptions.
|
|
219 my $typeword_re =
|
|
220 # note the negative lookahead assertions: the first
|
|
221 # one excludes the words "X" and "Y" from type
|
|
222 # words, since they appear as parameter names in
|
|
223 # CreateWindowEx; the second prevents "void
|
|
224 # *Argument" from being parsed as a type "void *A"
|
|
225 # followed by a parameter "rgument".
|
|
226 "(?:(?!(?:X\\b|Y\\b))(?:unsigned|int|long|short|va_list|[A-Z_0-9]+)(?!${tok_ch}))";
|
|
227 my $typetoken_re = "(?:$typeword_re$ws_re\\**$ws_re)";
|
|
228 my $arg_re = "(?:($typetoken_re+)(${tok_ch}+)?(?: OPTIONAL)?)";
|
|
229 my $fun_re = "(SHSTDAPI_\\(${tok_ch}+\\)|${tok_ch}" . "[A-Za-z_0-9 \t\n\r\f]*?${tok_ch})${ws_re}(${tok_ch}+)W${ws_re}\\(((${ws_re}${arg_re}${ws_re},)*${ws_re}${arg_re}${ws_re})\\);";
|
|
230
|
|
231 # print "regexp: $fun_re\n";
|
|
232 while ($slurp =~ /$fun_re/g)
|
|
233 {
|
|
234 my ($rettype, $fun, $args) = ($1, $2, $3);
|
|
235 $processed{$fun} = 1;
|
|
236 print "Processing: $fun";
|
|
237
|
|
238 my ($command, $reason) = ($files{$file}{$fun}[0], $files{$file}{$fun}[1]);
|
|
239 if (!defined ($command))
|
|
240 {
|
|
241 print " (no command found)\n";
|
|
242 }
|
|
243 else
|
|
244 {
|
|
245 print "\n";
|
|
246 my $bracket = $bracket{$file}{$fun};
|
|
247 if (defined ($bracket))
|
|
248 {
|
|
249 print HOUT "#if $bracket\n";
|
|
250 print COUT "#if $bracket\n\n";
|
|
251 }
|
|
252 if ($command eq "no")
|
|
253 {
|
|
254 if (!defined ($reason))
|
|
255 {
|
|
256 print "WARNING: No reason given for `no' with function $fun\n";
|
|
257 $reason = "";
|
|
258 }
|
|
259
|
|
260 print HOUT "#undef $fun\n";
|
|
261 print HOUT "#define $fun error $reason\n";
|
|
262 print COUT "/* Error if $fun used: $reason */\n\n";
|
|
263 }
|
|
264 elsif ($command eq "skip")
|
|
265 {
|
|
266 if (!defined ($reason))
|
|
267 {
|
|
268 print "WARNING: No reason given for `skip' with function $fun\n";
|
|
269 $reason = "";
|
|
270 }
|
|
271
|
|
272 print HOUT "/* Skipping $fun because $reason */\n";
|
|
273 print COUT "/* Skipping $fun because $reason */\n\n";
|
|
274 }
|
|
275 elsif ($command eq "soon")
|
|
276 {
|
|
277 $reason = "" if !defined ($reason);
|
|
278
|
|
279 print HOUT "/* Not yet: $fun $reason */\n";
|
|
280 print COUT "/* Not yet: $fun $reason */\n\n";
|
|
281 }
|
|
282 else
|
|
283 {
|
|
284 my (@args, %argtype, %ansiarg, %xarg, $split_struct,
|
|
285 $split_rettype);
|
|
286 if ($command eq "split")
|
|
287 {
|
|
288 ($split_struct, $reason) = split /\s+/, $reason, 2;
|
|
289 }
|
|
290 my $argno = 0;
|
|
291 while ($args =~ /$arg_re/g)
|
|
292 {
|
|
293 $argno++;
|
|
294 my ($argtype, $argname) = ($1, $2);
|
|
295 $argtype =~ s/\s*$//;
|
|
296 next if $argtype eq "void" || $argtype eq "VOID";
|
|
297 $argname = "arg$argno" if !defined ($argname);
|
|
298 $argtype{$argname} = $argtype;
|
|
299 $ansiarg{$argname} = $argtype;
|
|
300 $ansiarg{$argname} =~ s/\bLPWSTR\b/LPSTR/;
|
|
301 $ansiarg{$argname} =~ s/\bLPCWSTR\b/LPCSTR/;
|
|
302 $xarg{$argname} = $argtype;
|
|
303 $xarg{$argname} =~ s/\bLPWSTR\b/Extbyte */;
|
|
304 $xarg{$argname} =~ s/\bLPCWSTR\b/const Extbyte */;
|
|
305 if (defined ($split_struct))
|
|
306 {
|
|
307 my $fuck_cperl1 = "\\b${split_struct}W\\b";
|
|
308 my $fuck_cperl2 = "${split_struct}A";
|
|
309 $ansiarg{$argname} =~ s/$fuck_cperl1/$fuck_cperl2/;
|
|
310 }
|
|
311 push @args, $argname;
|
|
312 }
|
|
313 $rettype =~ s/\bSHSTDAPI_\((.*)\)/$1/;
|
|
314 $rettype =~ s/\s*WIN\w*?API\s*//g;
|
|
315 $rettype =~ s/\bAPIENTRY\b//;
|
|
316 $rettype =~ s/\bSHSTDAPI\b/HRESULT/;
|
|
317 if ($rettype =~ /LPC?WSTR/)
|
|
318 {
|
|
319 $split_rettype = 1;
|
|
320 $rettype =~ s/\bLPWSTR\b/Extbyte */;
|
|
321 $rettype =~ s/\bLPCWSTR\b/const Extbyte */;
|
|
322 }
|
|
323 if (defined ($reason))
|
|
324 {
|
|
325 print COUT "/* NOTE: $reason */\n";
|
|
326 }
|
|
327 print COUT "$rettype\nqxe$fun (";
|
|
328 print HOUT "$rettype qxe$fun (";
|
|
329 my $first = 1;
|
|
330 if (!@args)
|
|
331 {
|
|
332 print COUT "void";
|
|
333 print HOUT "void";
|
|
334 }
|
|
335 else
|
|
336 {
|
|
337 foreach my $x (@args)
|
|
338 {
|
|
339 print COUT ", " if !$first;
|
|
340 print HOUT ", " if !$first;
|
|
341 $first = 0;
|
|
342 print COUT "$xarg{$x} $x";
|
|
343 print HOUT "$xarg{$x} $x";
|
|
344 }
|
|
345 }
|
|
346 print HOUT ");\n";
|
|
347 print COUT ")\n{\n if (XEUNICODE_P)\n ";
|
|
348 if ($rettype ne "void" && $rettype ne "VOID")
|
|
349 {
|
|
350 print COUT "return ";
|
|
351 print COUT "($rettype) " if $split_rettype;
|
|
352 }
|
|
353 print COUT "${fun}W (";
|
|
354 $first = 1;
|
|
355 foreach my $x (@args)
|
|
356 {
|
|
357 print COUT ", " if !$first;
|
|
358 $first = 0;
|
|
359 print COUT ($argtype{$x} eq $xarg{$x} ? $x :
|
|
360 "($argtype{$x}) $x");
|
|
361 }
|
|
362 print COUT ");\n else\n ";
|
|
363 if ($rettype ne "void" && $rettype ne "VOID")
|
|
364 {
|
|
365 print COUT "return ";
|
|
366 print COUT "($rettype) " if $split_rettype;
|
|
367 }
|
|
368 print COUT "${fun}A (";
|
|
369 $first = 1;
|
|
370 foreach my $x (@args)
|
|
371 {
|
|
372 print COUT ", " if !$first;
|
|
373 $first = 0;
|
|
374 print COUT ($argtype{$x} eq $ansiarg{$x} ? $x :
|
|
375 "($ansiarg{$x}) $x");
|
|
376 }
|
|
377 print COUT ");\n}\n\n";
|
|
378 }
|
|
379 if (defined ($bracket))
|
|
380 {
|
|
381 print HOUT "#endif /* $bracket */\n";
|
|
382 print COUT "#endif /* $bracket */\n\n";
|
|
383 }
|
|
384 }
|
|
385 }
|
|
386 }
|
|
387
|
|
388 foreach my $file (keys %files)
|
|
389 {
|
|
390 foreach my $fun (keys %{$files{$file}})
|
|
391 {
|
|
392 if (!$processed{$fun} && $files{$file}{$fun}[0] =~ /^(yes|soon|split)$/)
|
|
393 {
|
|
394 print "WARNING: Can't locate prototype for $fun\n";
|
|
395 }
|
|
396 }
|
|
397 }
|
|
398
|
|
399
|
|
400 sub FileContents
|
|
401 {
|
|
402 local $/ = undef;
|
|
403 open (FILE, "< $_[0]") or die "$_[0]: $!";
|
|
404 my $retval = scalar <FILE>;
|
|
405 # must hack away CRLF junk.
|
|
406 $retval =~ s/\r\n/\n/g;
|
|
407 return $retval;
|
|
408 }
|