comparison lib-src/make-mswin-unicode.pl @ 771:943eaba38521

[xemacs-hg @ 2002-03-13 08:51:24 by ben] The big ben-mule-21-5 check-in! Various files were added and deleted. See CHANGES-ben-mule. There are still some test suite failures. No crashes, though. Many of the failures have to do with problems in the test suite itself rather than in the actual code. I'll be addressing these in the next day or so -- none of the test suite failures are at all critical. Meanwhile I'll be trying to address the biggest issues -- i.e. build or run failures, which will almost certainly happen on various platforms. All comments should be sent to ben@xemacs.org -- use a Cc: if necessary when sending to mailing lists. There will be pre- and post- tags, something like pre-ben-mule-21-5-merge-in, and post-ben-mule-21-5-merge-in.
author ben
date Wed, 13 Mar 2002 08:54:06 +0000
parents
children 2923009caf47
comparison
equal deleted inserted replaced
770:336a418893b5 771:943eaba38521
1 : #-*- Perl -*-
2
3 ### make-mswin-unicode --- generate Unicode-encapsulation code for MS Windows
4
5 ## Copyright (C) 2001, 2002 Ben Wing.
6
7 ## Author: Ben Wing <ben@xemacs.org>
8 ## Maintainer: Ben Wing <ben@xemacs.org>
9 ## Current Version: 1.0, August 24, 2001
10
11 ## This file is part of XEmacs.
12
13 ## XEmacs is free software; you can redistribute it and/or modify it
14 ## under the terms of the GNU General Public License as published by
15 ## the Free Software Foundation; either version 2, or (at your option)
16 ## any later version.
17
18 ## XEmacs is distributed in the hope that it will be useful, but
19 ## WITHOUT ANY WARRANTY; without even the implied warranty of
20 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 ## General Public License for more details.
22
23 ## You should have received a copy of the GNU General Public License
24 ## along with XEmacs; see the file COPYING. If not, write to the Free
25 ## Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
26 ## 02111-1307, USA.
27
28 eval 'exec perl -w -S $0 ${1+"$@"}'
29 if 0;
30
31 use strict;
32 use File::Basename;
33 use Getopt::Long;
34
35 my ($myName, $myPath) = fileparse ($0);
36
37 my $usage="
38 Usage: $myName [--c-output FILE] [--h-output FILE] [--help] [FILES ...]
39
40 The purpose of this script is to auto-generate Unicode-encapsulation
41 code for MS Windows library functions that come in two versions (ANSI
42 and Unicode). The MS Windows header files provide a way of
43 automatically calling the right version, but only at compile-time,
44 which is *NOT* sufficient for any real-world program. The solution is
45 run-time Unicode encapsulation, which is not conceptually difficult
46 but is time-consuming, and is not supported standardly only due to
47 evil marketing decisions made by Microsoft. See src/intl-win32.c
48 for more information.
49
50 This script processes the specified files, looking for commands
51 indicating library routines to Unicode-encapsulate, as follows:
52
53 Portions of the files that should be processed are enclosed in lines
54 consisting only of the words \"begin-unicode-encapsulation-script\"
55 and \"end-unicode-encapsulation-script\". More than one section can
56 occur in a single file. Processed lines begin with a command word,
57 followed by one or more args (no quotes are necessary for spaces):
58
59 dir sets the directory for include files.
60 file specifies a file to start reading from.
61 yes indicates a function to be automatically Unicode-encapsulated.
62 (All parameters either need no special processing or are LPTSTR or
63 LPCTSTR.)
64 soon indicates a function that should be automatically Unicode-encapsulated,
65 but we're not ready to process it yet.
66 no indicates a function we don't support (it will be #defined to cause
67 a compile error, with the text after the function included in the
68 erroneous definition to indicate why we don't support it).
69 skip indicates a function we support manually; only a comment about this
70 will be generated.
71 split indicates a function with a split structure (different versions
72 for Unicode and ANSI), but where the only difference is in pointer
73 types, and the actual size does not differ. The structure name
74 should follow the function name, and it will be automatically
75 Unicode-encapsulated with appropriate casts.
76 begin-bracket indicates a #if statement to be inserted here.
77 end-bracket indicates the corresponding #endif statement.
78 blank lines and lines beginning with // are ignored.
79 ";
80
81 # ------------------ process command-line options ------------------
82
83 my %options;
84 my @SAVE_ARGV = @ARGV;
85
86 $Getopt::Long::ignorecase = 0;
87 &GetOptions (
88 \%options,
89 'c-output=s',
90 'h-output=s',
91 'help',
92 );
93
94 die $usage if $options{"help"};
95
96 my $in_script;
97 my $slurp;
98
99 my ($cout, $hout) = ($options{"c-output"}, $options{"h-output"});
100
101 open (COUT, ">$cout") or die "Can't open C output file $cout: $!";
102 open (HOUT, ">$hout") or die "Can't open C output file $hout: $!";
103
104 select (STDOUT); $| = 1;
105
106 print COUT "/* Automatically-generated Unicode-encapsulation file,
107 using the command
108
109 $myPath$myName @SAVE_ARGV
110
111 Do not edit. See `$myName'.
112 */
113
114 #include <config.h>
115 #include \"lisp.h\"
116
117 #include \"syswindows.h\"
118
119 ";
120 print HOUT "/* Automatically-generated Unicode-encapsulation header file.
121 Do not edit. See `$myName'.
122 */\n\n";
123
124 my %files;
125 my %processed;
126 my %bracket;
127
128 my $current_file;
129 my @current_bracket;
130
131 while (<>)
132 {
133 chomp;
134
135 if (/^begin-unicode-encapsulation-script$/)
136 {
137 $in_script = 1;
138 }
139 elsif (/^end-unicode-encapsulation-script$/)
140 {
141 $in_script = 0;
142 }
143 elsif ($in_script)
144 {
145 next if (m!^//!);
146 next if (/^[ \t]*$/);
147 if (/(dir|file|yes|soon|no|skip|split|begin-bracket|end-bracket)(?: (.*))?/)
148 {
149 my ($command, $parms) = ($1, $2);
150 if ($command eq "dir")
151 {
152 chdir $parms or die "Can't chdir to $parms: $!";
153 }
154 elsif ($command eq "file")
155 {
156 $current_file = $parms;
157 }
158 elsif ($command eq "begin-bracket")
159 {
160 my $current_bracket = $current_bracket[$#current_bracket];
161 if (defined ($current_bracket))
162 {
163 $current_bracket .= "&& $parms";
164 }
165 else
166 {
167 $current_bracket = "$parms";
168 }
169 push @current_bracket, $current_bracket;
170 }
171 elsif ($command eq "end-bracket")
172 {
173 pop @current_bracket;
174 }
175 else
176 {
177 my ($fun, $reason) = split /\s+/, $parms, 2;
178 $files{$current_file}{$fun} = [$command, $reason];
179 $bracket{$current_file}{$fun} =
180 $current_bracket[$#current_bracket];
181 }
182 }
183 else
184 {
185 print "WARNING: Unknown line $_\n";
186 }
187 }
188 }
189
190
191 foreach my $file (keys %files)
192 {
193 $slurp = &FileContents ($file);
194 print "Processing file $file\n";
195 print HOUT "\n/* Processing file $file */\n\n";
196 my $totalspace = 70 - length ("Processing file $file");
197 $totalspace = 0 if $totalspace < 0;
198 my $alignspaceleft = $totalspace / 2;
199 my $alignspaceright = ($totalspace + 1) / 2;
200 print COUT "
201 /*----------------------------------------------------------------------*/
202 /*" . (" " x $alignspaceleft) . "Processing file $file" .
203 (" " x $alignspaceright) . "*/
204 /*----------------------------------------------------------------------*/
205
206 ";
207
208 my ($ws_re, $must_ws_re, $tok_ch) =
209 ("\\s*", "\\s+", "\\w");
210 # unfortunately there is no surefire way short of
211 # parsing all include files for typedefs to
212 # distinguish types from parameters, and prototypes
213 # appear in the include files both with and without
214 # parameters -- the latter kinds appear in a very
215 # different style and were obviously added later. so
216 # we rely on the fact that defined types are all
217 # upper-case, and parameters generally are not, and
218 # special-case the exceptions.
219 my $typeword_re =
220 # note the negative lookahead assertions: the first
221 # one excludes the words "X" and "Y" from type
222 # words, since they appear as parameter names in
223 # CreateWindowEx; the second prevents "void
224 # *Argument" from being parsed as a type "void *A"
225 # followed by a parameter "rgument".
226 "(?:(?!(?:X\\b|Y\\b))(?:unsigned|int|long|short|va_list|[A-Z_0-9]+)(?!${tok_ch}))";
227 my $typetoken_re = "(?:$typeword_re$ws_re\\**$ws_re)";
228 my $arg_re = "(?:($typetoken_re+)(${tok_ch}+)?(?: OPTIONAL)?)";
229 my $fun_re = "(SHSTDAPI_\\(${tok_ch}+\\)|${tok_ch}" . "[A-Za-z_0-9 \t\n\r\f]*?${tok_ch})${ws_re}(${tok_ch}+)W${ws_re}\\(((${ws_re}${arg_re}${ws_re},)*${ws_re}${arg_re}${ws_re})\\);";
230
231 # print "regexp: $fun_re\n";
232 while ($slurp =~ /$fun_re/g)
233 {
234 my ($rettype, $fun, $args) = ($1, $2, $3);
235 $processed{$fun} = 1;
236 print "Processing: $fun";
237
238 my ($command, $reason) = ($files{$file}{$fun}[0], $files{$file}{$fun}[1]);
239 if (!defined ($command))
240 {
241 print " (no command found)\n";
242 }
243 else
244 {
245 print "\n";
246 my $bracket = $bracket{$file}{$fun};
247 if (defined ($bracket))
248 {
249 print HOUT "#if $bracket\n";
250 print COUT "#if $bracket\n\n";
251 }
252 if ($command eq "no")
253 {
254 if (!defined ($reason))
255 {
256 print "WARNING: No reason given for `no' with function $fun\n";
257 $reason = "";
258 }
259
260 print HOUT "#undef $fun\n";
261 print HOUT "#define $fun error $reason\n";
262 print COUT "/* Error if $fun used: $reason */\n\n";
263 }
264 elsif ($command eq "skip")
265 {
266 if (!defined ($reason))
267 {
268 print "WARNING: No reason given for `skip' with function $fun\n";
269 $reason = "";
270 }
271
272 print HOUT "/* Skipping $fun because $reason */\n";
273 print COUT "/* Skipping $fun because $reason */\n\n";
274 }
275 elsif ($command eq "soon")
276 {
277 $reason = "" if !defined ($reason);
278
279 print HOUT "/* Not yet: $fun $reason */\n";
280 print COUT "/* Not yet: $fun $reason */\n\n";
281 }
282 else
283 {
284 my (@args, %argtype, %ansiarg, %xarg, $split_struct,
285 $split_rettype);
286 if ($command eq "split")
287 {
288 ($split_struct, $reason) = split /\s+/, $reason, 2;
289 }
290 my $argno = 0;
291 while ($args =~ /$arg_re/g)
292 {
293 $argno++;
294 my ($argtype, $argname) = ($1, $2);
295 $argtype =~ s/\s*$//;
296 next if $argtype eq "void" || $argtype eq "VOID";
297 $argname = "arg$argno" if !defined ($argname);
298 $argtype{$argname} = $argtype;
299 $ansiarg{$argname} = $argtype;
300 $ansiarg{$argname} =~ s/\bLPWSTR\b/LPSTR/;
301 $ansiarg{$argname} =~ s/\bLPCWSTR\b/LPCSTR/;
302 $xarg{$argname} = $argtype;
303 $xarg{$argname} =~ s/\bLPWSTR\b/Extbyte */;
304 $xarg{$argname} =~ s/\bLPCWSTR\b/const Extbyte */;
305 if (defined ($split_struct))
306 {
307 my $fuck_cperl1 = "\\b${split_struct}W\\b";
308 my $fuck_cperl2 = "${split_struct}A";
309 $ansiarg{$argname} =~ s/$fuck_cperl1/$fuck_cperl2/;
310 }
311 push @args, $argname;
312 }
313 $rettype =~ s/\bSHSTDAPI_\((.*)\)/$1/;
314 $rettype =~ s/\s*WIN\w*?API\s*//g;
315 $rettype =~ s/\bAPIENTRY\b//;
316 $rettype =~ s/\bSHSTDAPI\b/HRESULT/;
317 if ($rettype =~ /LPC?WSTR/)
318 {
319 $split_rettype = 1;
320 $rettype =~ s/\bLPWSTR\b/Extbyte */;
321 $rettype =~ s/\bLPCWSTR\b/const Extbyte */;
322 }
323 if (defined ($reason))
324 {
325 print COUT "/* NOTE: $reason */\n";
326 }
327 print COUT "$rettype\nqxe$fun (";
328 print HOUT "$rettype qxe$fun (";
329 my $first = 1;
330 if (!@args)
331 {
332 print COUT "void";
333 print HOUT "void";
334 }
335 else
336 {
337 foreach my $x (@args)
338 {
339 print COUT ", " if !$first;
340 print HOUT ", " if !$first;
341 $first = 0;
342 print COUT "$xarg{$x} $x";
343 print HOUT "$xarg{$x} $x";
344 }
345 }
346 print HOUT ");\n";
347 print COUT ")\n{\n if (XEUNICODE_P)\n ";
348 if ($rettype ne "void" && $rettype ne "VOID")
349 {
350 print COUT "return ";
351 print COUT "($rettype) " if $split_rettype;
352 }
353 print COUT "${fun}W (";
354 $first = 1;
355 foreach my $x (@args)
356 {
357 print COUT ", " if !$first;
358 $first = 0;
359 print COUT ($argtype{$x} eq $xarg{$x} ? $x :
360 "($argtype{$x}) $x");
361 }
362 print COUT ");\n else\n ";
363 if ($rettype ne "void" && $rettype ne "VOID")
364 {
365 print COUT "return ";
366 print COUT "($rettype) " if $split_rettype;
367 }
368 print COUT "${fun}A (";
369 $first = 1;
370 foreach my $x (@args)
371 {
372 print COUT ", " if !$first;
373 $first = 0;
374 print COUT ($argtype{$x} eq $ansiarg{$x} ? $x :
375 "($ansiarg{$x}) $x");
376 }
377 print COUT ");\n}\n\n";
378 }
379 if (defined ($bracket))
380 {
381 print HOUT "#endif /* $bracket */\n";
382 print COUT "#endif /* $bracket */\n\n";
383 }
384 }
385 }
386 }
387
388 foreach my $file (keys %files)
389 {
390 foreach my $fun (keys %{$files{$file}})
391 {
392 if (!$processed{$fun} && $files{$file}{$fun}[0] =~ /^(yes|soon|split)$/)
393 {
394 print "WARNING: Can't locate prototype for $fun\n";
395 }
396 }
397 }
398
399
400 sub FileContents
401 {
402 local $/ = undef;
403 open (FILE, "< $_[0]") or die "$_[0]: $!";
404 my $retval = scalar <FILE>;
405 # must hack away CRLF junk.
406 $retval =~ s/\r\n/\n/g;
407 return $retval;
408 }