comparison src/intl-win32.c @ 771:943eaba38521

[xemacs-hg @ 2002-03-13 08:51:24 by ben] The big ben-mule-21-5 check-in! Various files were added and deleted. See CHANGES-ben-mule. There are still some test suite failures. No crashes, though. Many of the failures have to do with problems in the test suite itself rather than in the actual code. I'll be addressing these in the next day or so -- none of the test suite failures are at all critical. Meanwhile I'll be trying to address the biggest issues -- i.e. build or run failures, which will almost certainly happen on various platforms. All comments should be sent to ben@xemacs.org -- use a Cc: if necessary when sending to mailing lists. There will be pre- and post- tags, something like pre-ben-mule-21-5-merge-in, and post-ben-mule-21-5-merge-in.
author ben
date Wed, 13 Mar 2002 08:54:06 +0000
parents
children a5954632b187
comparison
equal deleted inserted replaced
770:336a418893b5 771:943eaba38521
1 /* Win32 internationalization functions.
2 Copyright (C) 2000, 2001, 2002 Ben Wing.
3 Copyright (C) 2000 IKEYAMA Tomonori.
4
5 This file is part of XEmacs.
6
7 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 2, or (at your option) any
10 later version.
11
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with XEmacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 /* Synched up with: Not in FSF. */
23
24 /* Authorship:
25
26 Current primary author: Ben Wing <ben@xemacs.org>
27
28 Created summer 2000 by Ben Wing. Almost completely written by Ben Wing.
29 Little bits of code in some of the Lisp primitives from FSF Emacs.
30 Versions of wcscpy, wcsncpy from Cygwin newlib.
31
32 Coding systems written by Ben Wing in file-coding.c; moved here Sep 2001.
33 */
34
35 #include <config.h>
36 #include "lisp.h"
37
38 #include "elhash.h"
39 #include "faces.h"
40 #include "file-coding.h"
41 #include "frame.h"
42 #include "window.h"
43
44 #include "console-msw.h"
45 #include "objects-msw.h"
46
47 #ifndef CYGWIN_HEADERS
48 # include <mbctype.h>
49 #elif defined (MINGW)
50 int _setmbcp (int);
51 int _getmbcp (void);
52 #else
53 # define NO_EXT_MULTIBYTE_FEATURES
54 #endif
55
56 Lisp_Object Qmswindows_multibyte, Qmswindows_multibyte_to_unicode;
57 Lisp_Object Qmswindows_tstr, Qmswindows_unicode;
58 Lisp_Object Qmswindows_multibyte_system_default;
59
60 Lisp_Object Qansi, Qoem, Qmac, Qebcdic;
61 /* Qcode_page, Qlocale, Qcurrent, Quser_default, Qsystem_default in
62 general-slots.h */
63
64 #ifdef MULE
65
66 static Lisp_Object Vmswindows_charset_code_page_table;
67
68 LCID current_locale;
69
70
71 /************************************************************************/
72 /* Language/locale/code page conversion functions */
73 /************************************************************************/
74
75 /* There are various different ways of representing the vague concept
76 of "language", and it can be very confusing. So:
77
78 -- The C library has the concept of "locale", which is a
79 combination of language and country, and which controls the way
80 currency and dates are displayed, the encoding of data, etc.
81
82 -- XEmacs has the concept of "language environment", more or less
83 like a locale; although currently in most cases it just refers to
84 the language, and no sub-language distinctions are
85 made. (Exceptions are with Chinese, which has different language
86 environments for Taiwan and mainland China, due to the different
87 encodings and writing systems.)
88
89 -- Windows has a number of different language concepts:
90
91 1. There are "languages" and "sublanguages", which correspond to
92 the languages and countries of the C library -- e.g. LANG_ENGLISH
93 and SUBLANG_ENGLISH_US. These are identified by 8-bit integers,
94 called the "primary language identifier" and "sublanguage
95 identifier", respectively. These are combined into a 16-bit
96 integer or "language identifier" by MAKELANGID().
97
98 2. The language identifier in turn is combined with a "sort
99 identifier" (and optionally a "sort version") to yield a 32-bit
100 integer called a "locale identifier" (type LCID), which identifies
101 locales -- the primary means of distinguishing language/regional
102 settings and similar to C library locales.
103
104 3. "Code pages" identify different text encodings (i.e. a set of
105 supported characters, an enumeration of those characters [i.e. an
106 association of character with number or number pair; there may be
107 disjoint ranges of numbers supported]), and a way of encoding a
108 stream of those characters into an 8-bit data stream). All of the
109 encodings are either one-byte or mixed one-byte/two-byte encodings,
110 all non-modal; in the mixed encodings, two-byte characters have the
111 first byte >= 128, although the second byte may or may not be
112 restricted to this range, depending on the encoding. Code pages
113 are similar to XEmacs "charsets"; the latter also define a set of
114 supported characters and an enumeration of those characters (but
115 code pages in additionally define an encoding, which charsets don't
116 do). Code pages often function in Windows like charsets in XEmacs.
117
118 4. Every Windows locale has a specific code page associated with
119 it; more than one locale can share a code page -- e.g. all the
120 Western European languages, including English, do.
121
122 5. Windows also has an "input locale identifier" (aka "keyboard
123 layout id") or HKL, which is a 32-bit integer composed of the
124 16-bit language identifier and a 16-bit "device identifier", which
125 originally specified a particular keyboard layout (e.g. the locale
126 "US English" can have the QWERTY layout, the Dvorak layout, etc.),
127 but has been expanded to include speech-to-text converters and
128 other non-keyboard ways of inputting text. Note that both the HKL
129 and LCID share the language identifier in the lower 16 bits, and in
130 both cases a 0 in the upper 16 bits means "default" (sort order or
131 device), providing a way to convert between HKL's, LCID's, and
132 language identifiers (i.e. language/sublanguage pairs). The
133 default keyboard layout for a language is (as far as I can
134 determine) established using the Regional Settings control panel
135 applet, where you can add input locales as combinations of language
136 (actually language/sublanguage) and layout; presumably if you list
137 only one input locale with a particular language, the corresponding
138 layout is the default for that language. But what if you list more
139 than one? You can specify a single default input locale, but there
140 appears to be no way to do so on a per-language basis.
141 */
142
143 struct lang_to_string
144 {
145 int code;
146 char *string;
147 };
148
149 struct lang_to_string lang_to_string_table[] =
150 {
151 /* These names change from version to version of VC++, so it's easiest
152 just to bracket them all with ifdefs. */
153 #ifdef LANG_AFRIKAANS
154 { LANG_AFRIKAANS, "AFRIKAANS" },
155 #endif
156 #ifdef LANG_ALBANIAN
157 { LANG_ALBANIAN, "ALBANIAN" },
158 #endif
159 #ifdef LANG_ARABIC
160 { LANG_ARABIC, "ARABIC" },
161 #endif
162 #ifdef LANG_ARMENIAN
163 { LANG_ARMENIAN, "ARMENIAN" },
164 #endif
165 #ifdef LANG_ASSAMESE
166 { LANG_ASSAMESE, "ASSAMESE" },
167 #endif
168 #ifdef LANG_AZERI
169 { LANG_AZERI, "AZERI" },
170 #endif
171 #ifdef LANG_BASQUE
172 { LANG_BASQUE, "BASQUE" },
173 #endif
174 #ifdef LANG_BELARUSIAN
175 { LANG_BELARUSIAN, "BELARUSIAN" },
176 #endif
177 #ifdef LANG_BENGALI
178 { LANG_BENGALI, "BENGALI" },
179 #endif
180 #ifdef LANG_BULGARIAN
181 { LANG_BULGARIAN, "BULGARIAN" },
182 #endif
183 #ifdef LANG_CATALAN
184 { LANG_CATALAN, "CATALAN" },
185 #endif
186 #ifdef LANG_CHINESE
187 { LANG_CHINESE, "CHINESE" },
188 #endif
189 #ifdef LANG_CROATIAN
190 { LANG_CROATIAN, "CROATIAN" },
191 #endif
192 #ifdef LANG_CZECH
193 { LANG_CZECH, "CZECH" },
194 #endif
195 #ifdef LANG_DANISH
196 { LANG_DANISH, "DANISH" },
197 #endif
198 #ifdef LANG_DUTCH
199 { LANG_DUTCH, "DUTCH" },
200 #endif
201 #ifdef LANG_ENGLISH
202 { LANG_ENGLISH, "ENGLISH" },
203 #endif
204 #ifdef LANG_ESTONIAN
205 { LANG_ESTONIAN, "ESTONIAN" },
206 #endif
207 #ifdef LANG_FAEROESE
208 { LANG_FAEROESE, "FAEROESE" },
209 #endif
210 #ifdef LANG_FARSI
211 { LANG_FARSI, "FARSI" },
212 #endif
213 #ifdef LANG_FINNISH
214 { LANG_FINNISH, "FINNISH" },
215 #endif
216 #ifdef LANG_FRENCH
217 { LANG_FRENCH, "FRENCH" },
218 #endif
219 #ifdef LANG_GEORGIAN
220 { LANG_GEORGIAN, "GEORGIAN" },
221 #endif
222 #ifdef LANG_GERMAN
223 { LANG_GERMAN, "GERMAN" },
224 #endif
225 #ifdef LANG_GREEK
226 { LANG_GREEK, "GREEK" },
227 #endif
228 #ifdef LANG_GUJARATI
229 { LANG_GUJARATI, "GUJARATI" },
230 #endif
231 #ifdef LANG_HEBREW
232 { LANG_HEBREW, "HEBREW" },
233 #endif
234 #ifdef LANG_HINDI
235 { LANG_HINDI, "HINDI" },
236 #endif
237 #ifdef LANG_HUNGARIAN
238 { LANG_HUNGARIAN, "HUNGARIAN" },
239 #endif
240 #ifdef LANG_ICELANDIC
241 { LANG_ICELANDIC, "ICELANDIC" },
242 #endif
243 #ifdef LANG_INDONESIAN
244 { LANG_INDONESIAN, "INDONESIAN" },
245 #endif
246 #ifdef LANG_ITALIAN
247 { LANG_ITALIAN, "ITALIAN" },
248 #endif
249 #ifdef LANG_JAPANESE
250 { LANG_JAPANESE, "JAPANESE" },
251 #endif
252 #ifdef LANG_KANNADA
253 { LANG_KANNADA, "KANNADA" },
254 #endif
255 #ifdef LANG_KASHMIRI
256 { LANG_KASHMIRI, "KASHMIRI" },
257 #endif
258 #ifdef LANG_KAZAK
259 { LANG_KAZAK, "KAZAK" },
260 #endif
261 #ifdef LANG_KONKANI
262 { LANG_KONKANI, "KONKANI" },
263 #endif
264 #ifdef LANG_KOREAN
265 { LANG_KOREAN, "KOREAN" },
266 #endif
267 #ifdef LANG_LATVIAN
268 { LANG_LATVIAN, "LATVIAN" },
269 #endif
270 #ifdef LANG_LITHUANIAN
271 { LANG_LITHUANIAN, "LITHUANIAN" },
272 #endif
273 #ifdef LANG_MACEDONIAN
274 { LANG_MACEDONIAN, "MACEDONIAN" },
275 #endif
276 #ifdef LANG_MALAY
277 { LANG_MALAY, "MALAY" },
278 #endif
279 #ifdef LANG_MALAYALAM
280 { LANG_MALAYALAM, "MALAYALAM" },
281 #endif
282 #ifdef LANG_MANIPURI
283 { LANG_MANIPURI, "MANIPURI" },
284 #endif
285 #ifdef LANG_MARATHI
286 { LANG_MARATHI, "MARATHI" },
287 #endif
288 #ifdef LANG_NEPALI
289 { LANG_NEPALI, "NEPALI" },
290 #endif
291 #ifdef LANG_NEUTRAL
292 { LANG_NEUTRAL, "NEUTRAL" },
293 #endif
294 #ifdef LANG_NORWEGIAN
295 { LANG_NORWEGIAN, "NORWEGIAN" },
296 #endif
297 #ifdef LANG_ORIYA
298 { LANG_ORIYA, "ORIYA" },
299 #endif
300 #ifdef LANG_POLISH
301 { LANG_POLISH, "POLISH" },
302 #endif
303 #ifdef LANG_PORTUGUESE
304 { LANG_PORTUGUESE, "PORTUGUESE" },
305 #endif
306 #ifdef LANG_PUNJABI
307 { LANG_PUNJABI, "PUNJABI" },
308 #endif
309 #ifdef LANG_ROMANIAN
310 { LANG_ROMANIAN, "ROMANIAN" },
311 #endif
312 #ifdef LANG_RUSSIAN
313 { LANG_RUSSIAN, "RUSSIAN" },
314 #endif
315 #ifdef LANG_SANSKRIT
316 { LANG_SANSKRIT, "SANSKRIT" },
317 #endif
318 #ifdef LANG_SERBIAN
319 { LANG_SERBIAN, "SERBIAN" },
320 #endif
321 #ifdef LANG_SINDHI
322 { LANG_SINDHI, "SINDHI" },
323 #endif
324 #ifdef LANG_SLOVAK
325 { LANG_SLOVAK, "SLOVAK" },
326 #endif
327 #ifdef LANG_SLOVENIAN
328 { LANG_SLOVENIAN, "SLOVENIAN" },
329 #endif
330 #ifdef LANG_SPANISH
331 { LANG_SPANISH, "SPANISH" },
332 #endif
333 #ifdef LANG_SWAHILI
334 { LANG_SWAHILI, "SWAHILI" },
335 #endif
336 #ifdef LANG_SWEDISH
337 { LANG_SWEDISH, "SWEDISH" },
338 #endif
339 #ifdef LANG_TAMIL
340 { LANG_TAMIL, "TAMIL" },
341 #endif
342 #ifdef LANG_TATAR
343 { LANG_TATAR, "TATAR" },
344 #endif
345 #ifdef LANG_TELUGU
346 { LANG_TELUGU, "TELUGU" },
347 #endif
348 #ifdef LANG_THAI
349 { LANG_THAI, "THAI" },
350 #endif
351 #ifdef LANG_TURKISH
352 { LANG_TURKISH, "TURKISH" },
353 #endif
354 #ifdef LANG_UKRAINIAN
355 { LANG_UKRAINIAN, "UKRAINIAN" },
356 #endif
357 #ifdef LANG_URDU
358 { LANG_URDU, "URDU" },
359 #endif
360 #ifdef LANG_UZBEK
361 { LANG_UZBEK, "UZBEK" },
362 #endif
363 #ifdef LANG_VIETNAMESE
364 { LANG_VIETNAMESE, "VIETNAMESE" },
365 #endif
366 };
367
368 struct lang_to_string sublang_to_string_table[] =
369 {
370 { LANG_ARABIC, 0 },
371 #ifdef SUBLANG_ARABIC_ALGERIA
372 { SUBLANG_ARABIC_ALGERIA, "ARABIC_ALGERIA" },
373 #endif
374 #ifdef SUBLANG_ARABIC_BAHRAIN
375 { SUBLANG_ARABIC_BAHRAIN, "ARABIC_BAHRAIN" },
376 #endif
377 #ifdef SUBLANG_ARABIC_EGYPT
378 { SUBLANG_ARABIC_EGYPT, "ARABIC_EGYPT" },
379 #endif
380 #ifdef SUBLANG_ARABIC_IRAQ
381 { SUBLANG_ARABIC_IRAQ, "ARABIC_IRAQ" },
382 #endif
383 #ifdef SUBLANG_ARABIC_JORDAN
384 { SUBLANG_ARABIC_JORDAN, "ARABIC_JORDAN" },
385 #endif
386 #ifdef SUBLANG_ARABIC_KUWAIT
387 { SUBLANG_ARABIC_KUWAIT, "ARABIC_KUWAIT" },
388 #endif
389 #ifdef SUBLANG_ARABIC_LEBANON
390 { SUBLANG_ARABIC_LEBANON, "ARABIC_LEBANON" },
391 #endif
392 #ifdef SUBLANG_ARABIC_LIBYA
393 { SUBLANG_ARABIC_LIBYA, "ARABIC_LIBYA" },
394 #endif
395 #ifdef SUBLANG_ARABIC_MOROCCO
396 { SUBLANG_ARABIC_MOROCCO, "ARABIC_MOROCCO" },
397 #endif
398 #ifdef SUBLANG_ARABIC_OMAN
399 { SUBLANG_ARABIC_OMAN, "ARABIC_OMAN" },
400 #endif
401 #ifdef SUBLANG_ARABIC_QATAR
402 { SUBLANG_ARABIC_QATAR, "ARABIC_QATAR" },
403 #endif
404 #ifdef SUBLANG_ARABIC_SAUDI_ARABIA
405 { SUBLANG_ARABIC_SAUDI_ARABIA, "ARABIC_SAUDI_ARABIA" },
406 #endif
407 #ifdef SUBLANG_ARABIC_SYRIA
408 { SUBLANG_ARABIC_SYRIA, "ARABIC_SYRIA" },
409 #endif
410 #ifdef SUBLANG_ARABIC_TUNISIA
411 { SUBLANG_ARABIC_TUNISIA, "ARABIC_TUNISIA" },
412 #endif
413 #ifdef SUBLANG_ARABIC_UAE
414 { SUBLANG_ARABIC_UAE, "ARABIC_UAE" },
415 #endif
416 #ifdef SUBLANG_ARABIC_YEMEN
417 { SUBLANG_ARABIC_YEMEN, "ARABIC_YEMEN" },
418 #endif
419 { LANG_AZERI, 0 },
420 #ifdef SUBLANG_AZERI_CYRILLIC
421 { SUBLANG_AZERI_CYRILLIC, "AZERI_CYRILLIC" },
422 #endif
423 #ifdef SUBLANG_AZERI_LATIN
424 { SUBLANG_AZERI_LATIN, "AZERI_LATIN" },
425 #endif
426 { LANG_CHINESE, 0 },
427 #ifdef SUBLANG_CHINESE_HONGKONG
428 { SUBLANG_CHINESE_HONGKONG, "CHINESE_HONGKONG" },
429 #endif
430 #ifdef SUBLANG_CHINESE_MACAU
431 { SUBLANG_CHINESE_MACAU, "CHINESE_MACAU" },
432 #endif
433 #ifdef SUBLANG_CHINESE_SIMPLIFIED
434 { SUBLANG_CHINESE_SIMPLIFIED, "CHINESE_SIMPLIFIED" },
435 #endif
436 #ifdef SUBLANG_CHINESE_SINGAPORE
437 { SUBLANG_CHINESE_SINGAPORE, "CHINESE_SINGAPORE" },
438 #endif
439 #ifdef SUBLANG_CHINESE_TRADITIONAL
440 { SUBLANG_CHINESE_TRADITIONAL, "CHINESE_TRADITIONAL" },
441 #endif
442 { LANG_DUTCH, 0 },
443 #ifdef SUBLANG_DUTCH
444 { SUBLANG_DUTCH, "DUTCH" },
445 #endif
446 #ifdef SUBLANG_DUTCH_BELGIAN
447 { SUBLANG_DUTCH_BELGIAN, "DUTCH_BELGIAN" },
448 #endif
449 { LANG_ENGLISH, 0 },
450 #ifdef SUBLANG_ENGLISH_AUS
451 { SUBLANG_ENGLISH_AUS, "ENGLISH_AUS" },
452 #endif
453 #ifdef SUBLANG_ENGLISH_BELIZE
454 { SUBLANG_ENGLISH_BELIZE, "ENGLISH_BELIZE" },
455 #endif
456 #ifdef SUBLANG_ENGLISH_CAN
457 { SUBLANG_ENGLISH_CAN, "ENGLISH_CAN" },
458 #endif
459 #ifdef SUBLANG_ENGLISH_CARIBBEAN
460 { SUBLANG_ENGLISH_CARIBBEAN, "ENGLISH_CARIBBEAN" },
461 #endif
462 #ifdef SUBLANG_ENGLISH_EIRE
463 { SUBLANG_ENGLISH_EIRE, "ENGLISH_EIRE" },
464 #endif
465 #ifdef SUBLANG_ENGLISH_JAMAICA
466 { SUBLANG_ENGLISH_JAMAICA, "ENGLISH_JAMAICA" },
467 #endif
468 #ifdef SUBLANG_ENGLISH_NZ
469 { SUBLANG_ENGLISH_NZ, "ENGLISH_NZ" },
470 #endif
471 #ifdef SUBLANG_ENGLISH_PHILIPPINES
472 { SUBLANG_ENGLISH_PHILIPPINES, "ENGLISH_PHILIPPINES" },
473 #endif
474 #ifdef SUBLANG_ENGLISH_SOUTH_AFRICA
475 { SUBLANG_ENGLISH_SOUTH_AFRICA, "ENGLISH_SOUTH_AFRICA" },
476 #endif
477 #ifdef SUBLANG_ENGLISH_TRINIDAD
478 { SUBLANG_ENGLISH_TRINIDAD, "ENGLISH_TRINIDAD" },
479 #endif
480 #ifdef SUBLANG_ENGLISH_UK
481 { SUBLANG_ENGLISH_UK, "ENGLISH_UK" },
482 #endif
483 #ifdef SUBLANG_ENGLISH_US
484 { SUBLANG_ENGLISH_US, "ENGLISH_US" },
485 #endif
486 #ifdef SUBLANG_ENGLISH_ZIMBABWE
487 { SUBLANG_ENGLISH_ZIMBABWE, "ENGLISH_ZIMBABWE" },
488 #endif
489 { LANG_FRENCH, 0 },
490 #ifdef SUBLANG_FRENCH
491 { SUBLANG_FRENCH, "FRENCH" },
492 #endif
493 #ifdef SUBLANG_FRENCH_BELGIAN
494 { SUBLANG_FRENCH_BELGIAN, "FRENCH_BELGIAN" },
495 #endif
496 #ifdef SUBLANG_FRENCH_CANADIAN
497 { SUBLANG_FRENCH_CANADIAN, "FRENCH_CANADIAN" },
498 #endif
499 #ifdef SUBLANG_FRENCH_LUXEMBOURG
500 { SUBLANG_FRENCH_LUXEMBOURG, "FRENCH_LUXEMBOURG" },
501 #endif
502 #ifdef SUBLANG_FRENCH_MONACO
503 { SUBLANG_FRENCH_MONACO, "FRENCH_MONACO" },
504 #endif
505 #ifdef SUBLANG_FRENCH_SWISS
506 { SUBLANG_FRENCH_SWISS, "FRENCH_SWISS" },
507 #endif
508 { LANG_GERMAN, 0 },
509 #ifdef SUBLANG_GERMAN
510 { SUBLANG_GERMAN, "GERMAN" },
511 #endif
512 #ifdef SUBLANG_GERMAN_AUSTRIAN
513 { SUBLANG_GERMAN_AUSTRIAN, "GERMAN_AUSTRIAN" },
514 #endif
515 #ifdef SUBLANG_GERMAN_LIECHTENSTEIN
516 { SUBLANG_GERMAN_LIECHTENSTEIN, "GERMAN_LIECHTENSTEIN" },
517 #endif
518 #ifdef SUBLANG_GERMAN_LUXEMBOURG
519 { SUBLANG_GERMAN_LUXEMBOURG, "GERMAN_LUXEMBOURG" },
520 #endif
521 #ifdef SUBLANG_GERMAN_SWISS
522 { SUBLANG_GERMAN_SWISS, "GERMAN_SWISS" },
523 #endif
524 { LANG_ITALIAN, 0 },
525 #ifdef SUBLANG_ITALIAN
526 { SUBLANG_ITALIAN, "ITALIAN" },
527 #endif
528 #ifdef SUBLANG_ITALIAN_SWISS
529 { SUBLANG_ITALIAN_SWISS, "ITALIAN_SWISS" },
530 #endif
531 { LANG_KASHMIRI, 0 },
532 #ifdef SUBLANG_KASHMIRI_INDIA
533 { SUBLANG_KASHMIRI_INDIA, "KASHMIRI_INDIA" },
534 #endif
535 { LANG_KOREAN, 0 },
536 #ifdef SUBLANG_KOREAN
537 { SUBLANG_KOREAN, "KOREAN" },
538 #endif
539 #ifdef SUBLANG_KOREAN_JOHAB
540 /* NOTE: Omitted in more recent versions of VC++ (e.g. v6.0) */
541 { SUBLANG_KOREAN_JOHAB, "KOREAN_JOHAB" },
542 #endif
543 { LANG_LITHUANIAN, 0 },
544 #ifdef SUBLANG_LITHUANIAN
545 { SUBLANG_LITHUANIAN, "LITHUANIAN" },
546 #endif
547 #ifdef SUBLANG_LITHUANIAN_CLASSIC
548 { SUBLANG_LITHUANIAN_CLASSIC, "LITHUANIAN_CLASSIC" },
549 #endif
550 { LANG_MALAY, 0 },
551 #ifdef SUBLANG_MALAY_BRUNEI_DARUSSALAM
552 { SUBLANG_MALAY_BRUNEI_DARUSSALAM, "MALAY_BRUNEI_DARUSSALAM" },
553 #endif
554 #ifdef SUBLANG_MALAY_MALAYSIA
555 { SUBLANG_MALAY_MALAYSIA, "MALAY_MALAYSIA" },
556 #endif
557 { LANG_NEPALI, 0 },
558 #ifdef SUBLANG_NEPALI_INDIA
559 { SUBLANG_NEPALI_INDIA, "NEPALI_INDIA" },
560 #endif
561 { LANG_NEUTRAL, 0 },
562 #ifdef SUBLANG_NEUTRAL
563 { SUBLANG_NEUTRAL, "NEUTRAL" },
564 #endif
565 { LANG_NORWEGIAN, 0 },
566 #ifdef SUBLANG_NORWEGIAN_BOKMAL
567 { SUBLANG_NORWEGIAN_BOKMAL, "NORWEGIAN_BOKMAL" },
568 #endif
569 #ifdef SUBLANG_NORWEGIAN_NYNORSK
570 { SUBLANG_NORWEGIAN_NYNORSK, "NORWEGIAN_NYNORSK" },
571 #endif
572 { LANG_PORTUGUESE, 0 },
573 #ifdef SUBLANG_PORTUGUESE
574 { SUBLANG_PORTUGUESE, "PORTUGUESE" },
575 #endif
576 #ifdef SUBLANG_PORTUGUESE_BRAZILIAN
577 { SUBLANG_PORTUGUESE_BRAZILIAN, "PORTUGUESE_BRAZILIAN" },
578 #endif
579 { LANG_SERBIAN, 0 },
580 #ifdef SUBLANG_SERBIAN_CYRILLIC
581 { SUBLANG_SERBIAN_CYRILLIC, "SERBIAN_CYRILLIC" },
582 #endif
583 #ifdef SUBLANG_SERBIAN_LATIN
584 { SUBLANG_SERBIAN_LATIN, "SERBIAN_LATIN" },
585 #endif
586 { LANG_SPANISH, 0 },
587 #ifdef SUBLANG_SPANISH
588 { SUBLANG_SPANISH, "SPANISH" },
589 #endif
590 #ifdef SUBLANG_SPANISH_ARGENTINA
591 { SUBLANG_SPANISH_ARGENTINA, "SPANISH_ARGENTINA" },
592 #endif
593 #ifdef SUBLANG_SPANISH_BOLIVIA
594 { SUBLANG_SPANISH_BOLIVIA, "SPANISH_BOLIVIA" },
595 #endif
596 #ifdef SUBLANG_SPANISH_CHILE
597 { SUBLANG_SPANISH_CHILE, "SPANISH_CHILE" },
598 #endif
599 #ifdef SUBLANG_SPANISH_COLOMBIA
600 { SUBLANG_SPANISH_COLOMBIA, "SPANISH_COLOMBIA" },
601 #endif
602 #ifdef SUBLANG_SPANISH_COSTA_RICA
603 { SUBLANG_SPANISH_COSTA_RICA, "SPANISH_COSTA_RICA" },
604 #endif
605 #ifdef SUBLANG_SPANISH_DOMINICAN_REPUBLIC
606 { SUBLANG_SPANISH_DOMINICAN_REPUBLIC, "SPANISH_DOMINICAN_REPUBLIC" },
607 #endif
608 #ifdef SUBLANG_SPANISH_ECUADOR
609 { SUBLANG_SPANISH_ECUADOR, "SPANISH_ECUADOR" },
610 #endif
611 #ifdef SUBLANG_SPANISH_EL_SALVADOR
612 { SUBLANG_SPANISH_EL_SALVADOR, "SPANISH_EL_SALVADOR" },
613 #endif
614 #ifdef SUBLANG_SPANISH_GUATEMALA
615 { SUBLANG_SPANISH_GUATEMALA, "SPANISH_GUATEMALA" },
616 #endif
617 #ifdef SUBLANG_SPANISH_HONDURAS
618 { SUBLANG_SPANISH_HONDURAS, "SPANISH_HONDURAS" },
619 #endif
620 #ifdef SUBLANG_SPANISH_MEXICAN
621 { SUBLANG_SPANISH_MEXICAN, "SPANISH_MEXICAN" },
622 #endif
623 #ifdef SUBLANG_SPANISH_MODERN
624 { SUBLANG_SPANISH_MODERN, "SPANISH_MODERN" },
625 #endif
626 #ifdef SUBLANG_SPANISH_NICARAGUA
627 { SUBLANG_SPANISH_NICARAGUA, "SPANISH_NICARAGUA" },
628 #endif
629 #ifdef SUBLANG_SPANISH_PANAMA
630 { SUBLANG_SPANISH_PANAMA, "SPANISH_PANAMA" },
631 #endif
632 #ifdef SUBLANG_SPANISH_PARAGUAY
633 { SUBLANG_SPANISH_PARAGUAY, "SPANISH_PARAGUAY" },
634 #endif
635 #ifdef SUBLANG_SPANISH_PERU
636 { SUBLANG_SPANISH_PERU, "SPANISH_PERU" },
637 #endif
638 #ifdef SUBLANG_SPANISH_PUERTO_RICO
639 { SUBLANG_SPANISH_PUERTO_RICO, "SPANISH_PUERTO_RICO" },
640 #endif
641 #ifdef SUBLANG_SPANISH_URUGUAY
642 { SUBLANG_SPANISH_URUGUAY, "SPANISH_URUGUAY" },
643 #endif
644 #ifdef SUBLANG_SPANISH_VENEZUELA
645 { SUBLANG_SPANISH_VENEZUELA, "SPANISH_VENEZUELA" },
646 #endif
647 { LANG_SWEDISH, 0 },
648 #ifdef SUBLANG_SWEDISH
649 { SUBLANG_SWEDISH, "SWEDISH" },
650 #endif
651 #ifdef SUBLANG_SWEDISH_FINLAND
652 { SUBLANG_SWEDISH_FINLAND, "SWEDISH_FINLAND" },
653 #endif
654 { LANG_URDU, 0 },
655 #ifdef SUBLANG_URDU_INDIA
656 { SUBLANG_URDU_INDIA, "URDU_INDIA" },
657 #endif
658 #ifdef SUBLANG_URDU_PAKISTAN
659 { SUBLANG_URDU_PAKISTAN, "URDU_PAKISTAN" },
660 #endif
661 { LANG_UZBEK, 0 },
662 #ifdef SUBLANG_UZBEK_CYRILLIC
663 { SUBLANG_UZBEK_CYRILLIC, "UZBEK_CYRILLIC" },
664 #endif
665 #ifdef SUBLANG_UZBEK_LATIN
666 { SUBLANG_UZBEK_LATIN, "UZBEK_LATIN" },
667 #endif
668 };
669
670 static int
671 lang_to_langcode (Lisp_Object lang, struct lang_to_string *table,
672 int table_size)
673 {
674 int i;
675
676 for (i = 0; i < table_size; i++)
677 if (!strcmp ((char *) XSTRING_DATA (lang), table[i].string))
678 return table[i].code;
679 return -1;
680 }
681
682 static int
683 sublang_to_langcode (Lisp_Object lang, struct lang_to_string *table,
684 int table_size)
685 {
686 int i;
687
688 for (i = 0; i < table_size; i++)
689 if (table[i].string &&
690 !strcmp ((char *) XSTRING_DATA (lang), table[i].string))
691 return table[i].code;
692
693 if (!strcmp ((char *) XSTRING_DATA (lang), "NEUTRAL"))
694 return SUBLANG_NEUTRAL;
695 if (!strcmp ((char *) XSTRING_DATA (lang), "DEFAULT"))
696 return SUBLANG_DEFAULT;
697 if (!strcmp ((char *) XSTRING_DATA (lang), "SYS_DEFAULT"))
698 return SUBLANG_SYS_DEFAULT;
699
700 return -1;
701 }
702
703 static Lisp_Object
704 langcode_to_lang (int code, struct lang_to_string *table,
705 int table_size)
706 {
707 int i;
708
709 for (i = 0; i < table_size; i++)
710 if (code == table[i].code)
711 return build_string (table[i].string);
712 return Qnil;
713 }
714
715 static Lisp_Object
716 sublangcode_to_lang (int lang, int sublang, struct lang_to_string *table,
717 int table_size)
718 {
719 int i;
720 int found_lang = 0;
721
722 for (i = 0; i < table_size; i++)
723 {
724 if (found_lang)
725 {
726 if (!table[i].string)
727 break;
728 if (sublang == table[i].code)
729 return build_string (table[i].string);
730 }
731 else if (!table[i].string && lang == table[i].code)
732 found_lang = 1;
733 }
734
735 switch (sublang)
736 {
737 case SUBLANG_NEUTRAL:
738 return build_string ("NEUTRAL");
739 case SUBLANG_DEFAULT:
740 return build_string ("DEFAULT");
741 case SUBLANG_SYS_DEFAULT:
742 return build_string ("SYS_DEFAULT");
743 }
744
745 return Qnil;
746 }
747
748 static LCID
749 locale_to_lcid (Lisp_Object locale)
750 {
751 int langcode, sublangcode;
752 Lisp_Object lang, sublang;
753
754 if (STRINGP (locale))
755 {
756 lang = locale;
757 sublang = Qnil;
758 }
759 else if (CONSP (locale))
760 {
761 CHECK_STRING (XCAR (locale));
762 CHECK_STRING (XCDR (locale));
763 lang = XCAR (locale);
764 sublang = XCDR (locale);
765 }
766 else
767 invalid_argument ("Locale must be LANG or (LANG . SUBLANG)", locale);
768
769 langcode = lang_to_langcode (lang, lang_to_string_table,
770 countof (lang_to_string_table));
771
772 if (langcode < 0)
773 invalid_constant ("Unrecognized language", lang);
774
775 if (!NILP (sublang))
776 {
777 sublangcode = sublang_to_langcode (sublang, sublang_to_string_table,
778 countof (sublang_to_string_table));
779 if (sublangcode < 0)
780 invalid_constant ("Unrecognized sublanguage", sublang);
781 }
782 else
783 sublangcode = SUBLANG_DEFAULT;
784
785 return MAKELCID (MAKELANGID (langcode, sublangcode),
786 SORT_DEFAULT);
787 }
788
789 static Lisp_Object
790 lcid_to_locale (LCID lcid)
791 {
792 int langid = LANGIDFROMLCID (lcid);
793 int langcode = PRIMARYLANGID (langid);
794 int sublangcode = SUBLANGID (langid);
795
796 return Fcons (langcode_to_lang (langcode, lang_to_string_table,
797 countof (lang_to_string_table)),
798 sublangcode_to_lang (langcode, sublangcode,
799 sublang_to_string_table,
800 countof (sublang_to_string_table)));
801 }
802
803 int
804 mswindows_locale_to_code_page (LCID lcid)
805 {
806 char codepagestr[10];
807
808 GetLocaleInfoA (lcid, LOCALE_IDEFAULTANSICODEPAGE, codepagestr, 10);
809 return atoi (codepagestr);
810 }
811
812 int
813 mswindows_locale_to_oem_code_page (LCID lcid)
814 {
815 char codepagestr[10];
816
817 GetLocaleInfoA (lcid, LOCALE_IDEFAULTCODEPAGE, codepagestr, 10);
818 return atoi (codepagestr);
819 }
820
821 static void
822 set_current_lcid (LCID lcid)
823 {
824 int cp;
825
826 /* This will fail under Win9x, so we remember our own locale rather than
827 consulting GetThreadLocale. */
828 SetThreadLocale (lcid);
829 current_locale = lcid;
830 cp = mswindows_locale_to_code_page (lcid);
831 #ifndef NO_EXT_MULTIBYTE_FEATURES
832 _setmbcp (cp);
833 #endif
834 }
835
836 DEFUN ("mswindows-set-current-locale", Fmswindows_set_current_locale,
837 1, 1, 0, /*
838 Set the current MS Windows locale.
839
840 LOCALE should a language string, or a cons (LANG . SUBLANG).
841 If SUBLANG is omitted, "SUBLANG_DEFAULT" is used.
842
843 Recognized language names are
844 (some may not be recognized if the compiler is older than VC++ 6.0)
845
846 "AFRIKAANS"
847 "ALBANIAN"
848 "ARABIC"
849 "ARMENIAN"
850 "ASSAMESE"
851 "AZERI"
852 "BASQUE"
853 "BELARUSIAN"
854 "BENGALI"
855 "BULGARIAN"
856 "CATALAN"
857 "CHINESE"
858 "CROATIAN"
859 "CZECH"
860 "DANISH"
861 "DUTCH"
862 "ENGLISH"
863 "ESTONIAN"
864 "FAEROESE"
865 "FARSI"
866 "FINNISH"
867 "FRENCH"
868 "GEORGIAN"
869 "GERMAN"
870 "GREEK"
871 "GUJARATI"
872 "HEBREW"
873 "HINDI"
874 "HUNGARIAN"
875 "ICELANDIC"
876 "INDONESIAN"
877 "ITALIAN"
878 "JAPANESE"
879 "KANNADA"
880 "KASHMIRI"
881 "KAZAK"
882 "KONKANI"
883 "KOREAN"
884 "LATVIAN"
885 "LITHUANIAN"
886 "MACEDONIAN"
887 "MALAY"
888 "MALAYALAM"
889 "MANIPURI"
890 "MARATHI"
891 "NEPALI"
892 "NEUTRAL"
893 "NORWEGIAN"
894 "ORIYA"
895 "POLISH"
896 "PORTUGUESE"
897 "PUNJABI"
898 "ROMANIAN"
899 "RUSSIAN"
900 "SANSKRIT"
901 "SERBIAN"
902 "SINDHI"
903 "SLOVAK"
904 "SLOVENIAN"
905 "SPANISH"
906 "SWAHILI"
907 "SWEDISH"
908 "TAMIL"
909 "TATAR"
910 "TELUGU"
911 "THAI"
912 "TURKISH"
913 "UKRAINIAN"
914 "URDU"
915 "UZBEK"
916 "VIETNAMESE"
917
918 Recognized sub-language names are
919 (some may not be recognized if the compiler is older than VC++ 6.0)
920
921 "ARABIC_ALGERIA"
922 "ARABIC_BAHRAIN"
923 "ARABIC_EGYPT"
924 "ARABIC_IRAQ"
925 "ARABIC_JORDAN"
926 "ARABIC_KUWAIT"
927 "ARABIC_LEBANON"
928 "ARABIC_LIBYA"
929 "ARABIC_MOROCCO"
930 "ARABIC_OMAN"
931 "ARABIC_QATAR"
932 "ARABIC_SAUDI_ARABIA"
933 "ARABIC_SYRIA"
934 "ARABIC_TUNISIA"
935 "ARABIC_UAE"
936 "ARABIC_YEMEN"
937 "AZERI_CYRILLIC"
938 "AZERI_LATIN"
939 "CHINESE_HONGKONG"
940 "CHINESE_MACAU"
941 "CHINESE_SIMPLIFIED"
942 "CHINESE_SINGAPORE"
943 "CHINESE_TRADITIONAL"
944 "DEFAULT"
945 "DUTCH"
946 "DUTCH_BELGIAN"
947 "ENGLISH_AUS"
948 "ENGLISH_BELIZE"
949 "ENGLISH_CAN"
950 "ENGLISH_CARIBBEAN"
951 "ENGLISH_EIRE"
952 "ENGLISH_JAMAICA"
953 "ENGLISH_NZ"
954 "ENGLISH_PHILIPPINES"
955 "ENGLISH_SOUTH_AFRICA"
956 "ENGLISH_TRINIDAD"
957 "ENGLISH_UK"
958 "ENGLISH_US"
959 "ENGLISH_ZIMBABWE"
960 "FRENCH"
961 "FRENCH_BELGIAN"
962 "FRENCH_CANADIAN"
963 "FRENCH_LUXEMBOURG"
964 "FRENCH_MONACO"
965 "FRENCH_SWISS"
966 "GERMAN"
967 "GERMAN_AUSTRIAN"
968 "GERMAN_LIECHTENSTEIN"
969 "GERMAN_LUXEMBOURG"
970 "GERMAN_SWISS"
971 "ITALIAN"
972 "ITALIAN_SWISS"
973 "KASHMIRI_INDIA"
974 "KOREAN"
975 "KOREAN_JOHAB" (NOTE: omitted in Visual C++ 6.0 and later)
976 "LITHUANIAN"
977 "LITHUANIAN_CLASSIC"
978 "MALAY_BRUNEI_DARUSSALAM"
979 "MALAY_MALAYSIA"
980 "NEPALI_INDIA"
981 "NEUTRAL"
982 "NORWEGIAN_BOKMAL"
983 "NORWEGIAN_NYNORSK"
984 "PORTUGUESE"
985 "PORTUGUESE_BRAZILIAN"
986 "SERBIAN_CYRILLIC"
987 "SERBIAN_LATIN"
988 "SPANISH"
989 "SPANISH_ARGENTINA"
990 "SPANISH_BOLIVIA"
991 "SPANISH_CHILE"
992 "SPANISH_COLOMBIA"
993 "SPANISH_COSTA_RICA"
994 "SPANISH_DOMINICAN_REPUBLIC"
995 "SPANISH_ECUADOR"
996 "SPANISH_EL_SALVADOR"
997 "SPANISH_GUATEMALA"
998 "SPANISH_HONDURAS"
999 "SPANISH_MEXICAN"
1000 "SPANISH_MODERN"
1001 "SPANISH_NICARAGUA"
1002 "SPANISH_PANAMA"
1003 "SPANISH_PARAGUAY"
1004 "SPANISH_PERU"
1005 "SPANISH_PUERTO_RICO"
1006 "SPANISH_URUGUAY"
1007 "SPANISH_VENEZUELA"
1008 "SWEDISH"
1009 "SWEDISH_FINLAND"
1010 "SYS_DEFAULT"
1011 "URDU_INDIA"
1012 "URDU_PAKISTAN"
1013 "UZBEK_CYRILLIC"
1014 "UZBEK_LATIN"
1015 */
1016 (locale))
1017 {
1018 LCID lcid = locale_to_lcid (locale);
1019
1020 set_current_lcid (lcid);
1021 return Qnil;
1022 }
1023
1024 #ifdef DEBUG_XEMACS
1025
1026 static int getacp (void);
1027 int
1028 getacp (void)
1029 {
1030 return GetACP ();
1031 }
1032
1033 #endif /* DEBUG_XEMACS */
1034
1035 LCID
1036 mswindows_current_locale (void)
1037 {
1038 /* Even if SetThreadLocale() failed, return the right locale anyway */
1039 return current_locale;
1040 }
1041
1042 DEFUN ("mswindows-current-locale", Fmswindows_current_locale,
1043 0, 0, 0, /*
1044 Return the current MS Windows locale.
1045
1046 The return value will be a cons (LANG . SUBLANG). See
1047 `mswindows-set-current-locale' for more info.
1048 */
1049 ())
1050 {
1051 return lcid_to_locale (mswindows_current_locale ());
1052 }
1053
1054 DEFUN ("mswindows-user-default-locale", Fmswindows_user_default_locale,
1055 0, 0, 0, /*
1056 Return the MS Windows user-default locale.
1057 */
1058 ())
1059 {
1060 return lcid_to_locale (GetUserDefaultLCID ());
1061 }
1062
1063 DEFUN ("mswindows-system-default-locale", Fmswindows_system_default_locale,
1064 0, 0, 0, /*
1065 Return the MS Windows system-default locale.
1066 */
1067 ())
1068 {
1069 return lcid_to_locale (GetSystemDefaultLCID ());
1070 }
1071
1072 DEFUN ("mswindows-locale-code-page", Fmswindows_locale_code_page,
1073 0, 1, 0, /*
1074 Return the (ANSI) code page of the specified MS Windows locale.
1075 If LOCALE is nil or omitted, the current locale is used.
1076 */
1077 (locale))
1078 {
1079 LCID lcid = NILP (locale) ? current_locale : locale_to_lcid (locale);
1080 return make_int (mswindows_locale_to_code_page (lcid));
1081 }
1082
1083 DEFUN ("mswindows-locale-oem-code-page", Fmswindows_locale_oem_code_page,
1084 0, 1, 0, /*
1085 Return the OEM code page of the specified MS Windows locale.
1086 If LOCALE is nil or omitted, the current locale is used.
1087 */
1088 (locale))
1089 {
1090 LCID lcid = NILP (locale) ? current_locale : locale_to_lcid (locale);
1091 return make_int (mswindows_locale_to_oem_code_page (lcid));
1092 }
1093
1094 static DWORD
1095 int_from_hex (Char_ASCII *s)
1096 {
1097 DWORD val = 0;
1098 static Char_ASCII hex[] = "0123456789abcdefABCDEF";
1099 Char_ASCII *p;
1100
1101 while (*s && (p = strchr (hex, *s)) != NULL)
1102 {
1103 int digit = p - hex;
1104 if (digit > 15)
1105 digit -= 6;
1106 val = val * 16 + digit;
1107 s++;
1108 }
1109 return val;
1110 }
1111
1112 /* We need to build a global list, since the EnumSystemLocale callback
1113 function isn't given a context pointer. */
1114 static Lisp_Object Vmswindows_valid_locales;
1115
1116 static BOOL CALLBACK
1117 enum_locale_fn (Char_ASCII *localeNum)
1118 {
1119 DWORD id = int_from_hex (localeNum);
1120 Vmswindows_valid_locales =
1121 Fcons (lcid_to_locale ((LCID) id), Vmswindows_valid_locales);
1122 return TRUE;
1123 }
1124
1125 DEFUN ("mswindows-supported-locales", Fmswindows_supported_locales,
1126 0, 0, 0, /*
1127 Return a list of the supported MS Windows locales on this system.
1128 */
1129 ())
1130 {
1131 Vmswindows_valid_locales = Qnil;
1132
1133 /* Use the ANSI version because the return value is just a hex number. */
1134 EnumSystemLocalesA (enum_locale_fn, LCID_SUPPORTED);
1135
1136 Vmswindows_valid_locales = Fnreverse (Vmswindows_valid_locales);
1137 return Vmswindows_valid_locales;
1138 }
1139
1140 /************************************************************************/
1141 /* Mule functions */
1142 /************************************************************************/
1143
1144 DEFUN ("mswindows-charset-code-page",
1145 Fmswindows_charset_code_page, 1, 1, 0, /*
1146 Return the code page for the CHARSET.
1147
1148 #### This function may be changed in the near future.
1149
1150 Currently defined Windows code pages include (along with their status
1151 as Ansi, OEM, Mac, EBCDIC, or some combination):
1152
1153 EBCDIC 037 EBCDIC
1154 OEM 437 MS-DOS United States
1155 EBCDIC 500 EBCDIC "500V1"
1156 OEM 708 Arabic (ASMO 708)
1157 OEM 709 Arabic (ASMO 449+, BCON V4)
1158 OEM 710 Arabic (Transparent Arabic)
1159 OEM 720 Arabic (Transparent ASMO)
1160 OEM 737 Greek (formerly 437G)
1161 OEM 775 Baltic
1162 OEM 850 MS-DOS Multilingual (Latin I)
1163 OEM 852 MS-DOS Slavic (Latin II)
1164 OEM 855 IBM Cyrillic (primarily Russian)
1165 OEM 857 IBM Turkish
1166 OEM 860 MS-DOS Portuguese
1167 OEM 861 MS-DOS Icelandic
1168 OEM 862 Hebrew
1169 OEM 863 MS-DOS Canadian-French
1170 OEM 864 Arabic
1171 OEM 865 MS-DOS Nordic
1172 OEM 866 MS-DOS Russian
1173 OEM 869 IBM Modern Greek
1174 Ansi/OEM 874 Thai
1175 EBCDIC 875 EBCDIC
1176 Ansi/OEM 932 Japanese
1177 Ansi/OEM 936 Chinese (PRC, Singapore)
1178 Ansi/OEM 949 Korean
1179 Ansi/OEM 950 Chinese (Taiwan; Hong Kong SAR, PRC)
1180 EBCDIC 1026 EBCDIC
1181 ANSI 1200 Unicode (BMP of ISO 10646)
1182 ANSI 1250 Windows 3.1 Eastern European
1183 ANSI 1251 Windows 3.1 Cyrillic
1184 ANSI 1252 Windows 3.1 US (ANSI)
1185 ANSI 1253 Windows 3.1 Greek
1186 ANSI 1254 Windows 3.1 Turkish
1187 ANSI 1255 Hebrew
1188 ANSI 1256 Arabic
1189 ANSI 1257 Baltic
1190 ANSI 1258 VietNam
1191 Ansi/OEM 1361 Korean (Johab)
1192 Mac 10000 Macintosh Roman
1193 Mac 10001 Macintosh Japanese
1194 Mac 10006 Macintosh Greek I
1195 Mac 10007 Macintosh Cyrillic
1196 Mac 10029 Macintosh Latin 2
1197 Mac 10079 Macintosh Icelandic
1198 Mac 10081 Macintosh Turkish
1199
1200 A code page is a set of characters, along with an enumeration of these
1201 characters and an encoding of them in a byte stream. Thus, in XEmacs
1202 parlance it defines both a "charset" and a "coding system" for this
1203 charset. Traditional encodings are either simple one-byte encodings, or
1204 combination one-byte/two-byte encodings (aka MBCS encodings, where MBCS
1205 stands for "Multibyte Character Set") with the following properties:
1206
1207 -- all characters are encoded as a one-byte or two-byte sequence
1208 -- the encoding is stateless (non-modal)
1209 -- the lower 128 bytes are compatible with ASCII
1210 -- in the higher bytes, the value of the first byte ("lead byte")
1211 determines whether a second byte follows
1212 -- the values used for second bytes may overlap those used for first bytes,
1213 and (in some encodings) include values in the low half; thus, moving
1214 backwards is hard, and pure-ASCII algorithms (e.g. finding the next slash)
1215 will fail unless rewritten to be MBCS-aware (neither of these problems
1216 exist in UTF-8 or in the XEmacs internal string encoding)
1217
1218 Recent code pages, however, do not necessarily follow these properties --
1219 code pages have been expanded to include arbitrary encodings, such as UTF-8
1220 \(may have more than two bytes per character) and ISO-2022-JP (complex modal
1221 encoding).
1222
1223 Every locale has four associated code pages: ANSI (an international
1224 standard or some Microsoft-created approximation; the native code page
1225 under Windows), OEM (a DOS encoding, still used in the FAT file system),
1226 Mac (an encoding used on the Macintosh) and EBCDIC (a non-ASCII-compatible
1227 encoding used on IBM mainframes, originally based on the BCD or
1228 "binary-coded decimal" encoding of numbers). All code pages associated
1229 with a locale follow (as far as I know) the properties listed above for
1230 traditional code pages.
1231 */
1232 (charset))
1233 {
1234 charset = Fget_charset (charset);
1235 return Fgethash (charset, Vmswindows_charset_code_page_table, Qnil);
1236 }
1237
1238 DEFUN ("mswindows-set-charset-code-page",
1239 Fmswindows_set_charset_code_page, 2, 2, 0, /*
1240 Set the CODE-PAGE for the CHARSET.
1241
1242 #### This function may be changed once full Unicode support is present.
1243 */
1244 (charset, code_page))
1245 {
1246 charset = Fget_charset (charset);
1247 CHECK_INT (code_page);
1248 Fputhash (charset, code_page, Vmswindows_charset_code_page_table);
1249 return Qnil;
1250 }
1251
1252 Lisp_Object
1253 mswindows_get_code_page_charset (int code_page)
1254 {
1255 Lisp_Object charset_tail;
1256 Lisp_Object charset = Qunbound;
1257
1258 LIST_LOOP (charset_tail, Fcharset_list ())
1259 {
1260 Lisp_Object charset_code_page;
1261
1262 charset_code_page = Fmswindows_charset_code_page (XCAR (charset_tail));
1263 if (INTP (charset_code_page) &&
1264 code_page == XINT (charset_code_page))
1265 {
1266 charset = Fget_charset (XCAR (charset_tail));
1267 break;
1268 }
1269 }
1270 return charset;
1271 }
1272
1273
1274
1275 #if 0 /* #### from Emacs 20.6; consider porting */
1276
1277 xxDEFUN ("mswindows-get-locale-info", Fmswindows_get_locale_info, 1, 2, 0, /*
1278 Return information about the Windows locale LCID.
1279 By default, return a three letter locale code which encodes the default
1280 language as the first two characters, and the country or regionial variant
1281 as the third letter. For example, ENU refers to `English (United States)',
1282 while ENC means `English (Canadian)'.
1283
1284 If the optional argument LONGFORM is t, the long form of the locale
1285 name is returned, e.g. `English (United States)' instead; if LONGFORM
1286 is a number, it is interpreted as an LCTYPE constant and the corresponding
1287 locale information is returned.
1288
1289 If LCID (a 16-bit number) is not a valid locale, the result is nil.
1290 */
1291 (lcid, longform))
1292 {
1293 int got_abbrev;
1294 int got_full;
1295 char abbrev_name[32] = { 0 };
1296 char full_name[256] = { 0 };
1297
1298 CHECK_INT (lcid);
1299
1300 if (!IsValidLocale (XINT (lcid), LCID_SUPPORTED))
1301 return Qnil;
1302
1303 if (NILP (longform))
1304 {
1305 got_abbrev = GetLocaleInfo (XINT (lcid),
1306 LOCALE_SABBREVLANGNAME | LOCALE_USE_CP_ACP,
1307 abbrev_name, sizeof (abbrev_name));
1308 if (got_abbrev)
1309 return build_string (abbrev_name);
1310 }
1311 else if (EQ (longform, Qt))
1312 {
1313 got_full = GetLocaleInfo (XINT (lcid),
1314 LOCALE_SLANGUAGE | LOCALE_USE_CP_ACP,
1315 full_name, sizeof (full_name));
1316 if (got_full)
1317 return build_string (full_name);
1318 }
1319 else if (NUMBERP (longform))
1320 {
1321 got_full = GetLocaleInfo (XINT (lcid),
1322 XINT (longform),
1323 full_name, sizeof (full_name));
1324 if (got_full)
1325 return make_unibyte_string (full_name, got_full);
1326 }
1327
1328 return Qnil;
1329 }
1330
1331 /* We need to build a global list, since the EnumCodePages callback
1332 function isn't given a context pointer. */
1333 Lisp_Object Vmswindows_valid_code_pages;
1334
1335 BOOL CALLBACK enum_code_page_fn (LPTSTR codepageNum)
1336 {
1337 DWORD id = atoi (codepageNum);
1338 Vmswindows_valid_code_pages = Fcons (make_int (id), Vmswindows_valid_code_pages);
1339 return TRUE;
1340 }
1341
1342 xxDEFUN ("mswindows-get-valid-code-pages", Fmswindows_get_valid_code_pages, 0, 0, 0, /*
1343 Return list of all valid Windows code pages.
1344 */
1345 ())
1346 {
1347 Vmswindows_valid_code_pages = Qnil;
1348
1349 EnumSystemCodePages (enum_code_page_fn, CP_SUPPORTED);
1350
1351 Vmswindows_valid_code_pages = Fnreverse (Vmswindows_valid_code_pages);
1352 return Vmswindows_valid_code_pages;
1353 }
1354
1355 xxDEFUN ("mswindows-get-console-code-page", Fmswindows_get_console_code_page, 0, 0, 0, /*
1356 Return current Windows code page for console input.
1357 */
1358 ())
1359 {
1360 return make_int (GetConsoleCP ());
1361 }
1362
1363 xxDEFUN ("mswindows-set-console-code-page", Fmswindows_set_console_code_page, 1, 1, 0, /*
1364 Make Windows code page CP be the current code page setting for Emacs.
1365 The code page setting affects keyboard input and display in tty mode.
1366 If successful, the new CP is returned, otherwise nil.
1367 */
1368 (cp))
1369 {
1370 CHECK_INT (cp);
1371
1372 if (!IsValidCodePage (XINT (cp)))
1373 return Qnil;
1374
1375 if (!SetConsoleCP (XINT (cp)))
1376 return Qnil;
1377
1378 return make_int (GetConsoleCP ());
1379 }
1380
1381 xxDEFUN ("mswindows-get-console-output-code-page", Fmswindows_get_console_output_code_page, 0, 0, 0, /*
1382 Return current Windows code page for console output.
1383 */
1384 ())
1385 {
1386 return make_int (GetConsoleOutputCP ());
1387 }
1388
1389 xxDEFUN ("mswindows-set-console-output-code-page", Fmswindows_set_console_output_code_page, 1, 1, 0, /*
1390 Make Windows code page CP be the current code page setting for Emacs.
1391 The code page setting affects keyboard input and display in tty mode.
1392 If successful, the new CP is returned, otherwise nil.
1393 */
1394 (cp))
1395 {
1396 CHECK_INT (cp);
1397
1398 if (!IsValidCodePage (XINT (cp)))
1399 return Qnil;
1400
1401 if (!SetConsoleOutputCP (XINT (cp)))
1402 return Qnil;
1403
1404 return make_int (GetConsoleOutputCP ());
1405 }
1406
1407 xxDEFUN ("mswindows-get-code-page-charset", Fmswindows_get_code_page_charset, 1, 1, 0, /*
1408 Return charset of code page CP.
1409 Returns nil if the code page is not valid.
1410 */
1411 (cp))
1412 {
1413 CHARSETINFO info;
1414
1415 CHECK_INT (cp);
1416
1417 if (!IsValidCodePage (XINT (cp)))
1418 return Qnil;
1419
1420 if (TranslateCharsetInfo ((DWORD *) XINT (cp), &info, TCI_SRCCODEPAGE))
1421 return make_int (info.ciCharset);
1422
1423 return Qnil;
1424 }
1425
1426 xxDEFUN ("mswindows-get-valid-keyboard-layouts", Fmswindows_get_valid_keyboard_layouts, 0, 0, 0, /*
1427 Return list of Windows keyboard languages and layouts.
1428 The return value is a list of pairs of language id and layout id.
1429 */
1430 ())
1431 {
1432 int num_layouts = GetKeyboardLayoutList (0, NULL);
1433 HKL * layouts = (HKL *) alloca (num_layouts * sizeof (HKL));
1434 Lisp_Object obj = Qnil;
1435
1436 if (GetKeyboardLayoutList (num_layouts, layouts) == num_layouts)
1437 {
1438 while (--num_layouts >= 0)
1439 {
1440 DWORD kl = (DWORD) layouts[num_layouts];
1441
1442 obj = Fcons (Fcons (make_int (kl & 0xffff),
1443 make_int ((kl >> 16) & 0xffff)),
1444 obj);
1445 }
1446 }
1447
1448 return obj;
1449 }
1450
1451 xxDEFUN ("mswindows-get-keyboard-layout", Fmswindows_get_keyboard_layout, 0, 0, 0, /*
1452 Return current Windows keyboard language and layout.
1453 The return value is the cons of the language id and the layout id.
1454 */
1455 ())
1456 {
1457 DWORD kl = (DWORD) GetKeyboardLayout (dwWindowsThreadId);
1458
1459 return Fcons (make_int (kl & 0xffff),
1460 make_int ((kl >> 16) & 0xffff));
1461 }
1462
1463 xxDEFUN ("mswindows-set-keyboard-layout", Fmswindows_set_keyboard_layout, 1, 1, 0, /*
1464 Make LAYOUT be the current keyboard layout for Emacs.
1465 The keyboard layout setting affects interpretation of keyboard input.
1466 If successful, the new layout id is returned, otherwise nil.
1467 */
1468 (layout))
1469 {
1470 DWORD kl;
1471
1472 CHECK_CONS (layout);
1473 CHECK_INT (XCAR (layout)));
1474 CHECK_INT (XCDR (layout)));
1475
1476 kl = (XINT (XCAR (layout))) & 0xffff)
1477 | (XINT (XCDR (layout))) << 16);
1478
1479 if (!ActivateKeyboardLayout ((HKL) kl, 0))
1480 return Qnil;
1481
1482 return Fmswindows_get_keyboard_layout ();
1483 }
1484
1485 #endif /* 0 */
1486
1487
1488 /* input method functions. */
1489
1490 #ifdef HAVE_MS_WINDOWS
1491
1492 void
1493 mswindows_start_ime_composition (struct frame *f)
1494 {
1495 COMPOSITIONFORM form;
1496 HWND hwnd = FRAME_MSWINDOWS_HANDLE (f);
1497 HIMC himc = ImmGetContext (hwnd);
1498
1499 /* Set a position of composition window. */
1500 xzero (form);
1501 form.dwStyle = CFS_POINT;
1502 form.ptCurrentPos.x = FRAME_MSWINDOWS_CURSOR_X (f);
1503 form.ptCurrentPos.y = FRAME_MSWINDOWS_CURSOR_Y (f);
1504 ImmSetCompositionWindow (himc, &form);
1505
1506 /* Set composition window font same as current face one. */
1507 {
1508 LOGFONTW old_logfont;
1509 CHARSETINFO info;
1510 Lisp_Object charset;
1511
1512 /* Get Mule charset from current ime font charset. */
1513 qxeImmGetCompositionFont (himc, &old_logfont);
1514 TranslateCharsetInfo ((DWORD *) (DWORD) old_logfont.lfCharSet, &info,
1515 TCI_SRCCHARSET);
1516 charset = mswindows_get_code_page_charset (info.ciACP);
1517
1518 if (CHARSETP (charset))
1519 {
1520 Lisp_Object window = FRAME_SELECTED_WINDOW (f);
1521 struct window *w = XWINDOW (window);
1522 face_index findex = FRAME_MSWINDOWS_CURSOR_FINDEX (f);
1523 struct face_cachel *cachel = WINDOW_FACE_CACHEL (w, findex);
1524 Lisp_Object face_font = FACE_CACHEL_FONT (cachel, charset);
1525
1526 if (!FONT_INSTANCEP (face_font))
1527 face_font =
1528 ensure_face_cachel_contains_charset (cachel, window, charset);
1529
1530 if (!EQ (face_font, Vthe_null_font_instance))
1531 {
1532 LOGFONTW new_logfont;
1533
1534 /* Get LOGFONT from the face font */
1535 if (qxeGetObject (FONT_INSTANCE_MSWINDOWS_HFONT_VARIANT
1536 (XFONT_INSTANCE (face_font),
1537 cachel->underline, cachel->strikethru),
1538 sizeof (LOGFONTW), (void*) &new_logfont))
1539 qxeImmSetCompositionFont (himc, &new_logfont);
1540 }
1541 }
1542 }
1543 ImmReleaseContext (hwnd, himc);
1544 return;
1545 }
1546
1547 #endif /* HAVE_MS_WINDOWS */
1548
1549 #else /* not MULE */
1550
1551 int
1552 mswindows_locale_to_code_page (LCID lcid)
1553 {
1554 return CP_ACP;
1555 }
1556
1557 #endif /* MULE */
1558
1559
1560 #ifdef CYGWIN
1561
1562 /* based on newlib strncpy, strcpy */
1563
1564 wchar_t *
1565 wcsncpy (wchar_t *dst0, const wchar_t *src0, size_t count)
1566 {
1567 wchar_t *dscan;
1568 const wchar_t *sscan;
1569
1570 dscan = dst0;
1571 sscan = src0;
1572 while (count > 0)
1573 {
1574 --count;
1575 if ((*dscan++ = *sscan++) == '\0')
1576 break;
1577 }
1578 while (count-- > 0)
1579 *dscan++ = '\0';
1580
1581 return dst0;
1582 }
1583
1584 wchar_t *
1585 wcscpy (wchar_t *dst0, const wchar_t *src0)
1586 {
1587 wchar_t *s = dst0;
1588
1589 while ((*dst0++ = *src0++))
1590 ;
1591
1592 return s;
1593 }
1594
1595 wchar_t *
1596 wcsdup (const wchar_t *str)
1597 {
1598 int len = wcslen (str) + 1;
1599 void *val = xmalloc (len * sizeof (wchar_t));
1600
1601 if (val == 0) return 0;
1602 return (wchar_t *) memcpy (val, str, len * sizeof (wchar_t));
1603 }
1604
1605 #endif /* CYGWIN */
1606
1607
1608 /************************************************************************/
1609 /* MS Windows multibyte-to-unicode methods */
1610 /************************************************************************/
1611
1612 DEFINE_CODING_SYSTEM_TYPE (mswindows_multibyte_to_unicode);
1613
1614 enum mswindows_multibyte_cp_type
1615 {
1616 MULTIBYTE_ANSI,
1617 MULTIBYTE_OEM,
1618 MULTIBYTE_EBCDIC,
1619 MULTIBYTE_MAC
1620 };
1621
1622 enum mswindows_multibyte_locale_type
1623 {
1624 MULTIBYTE_SPECIFIED_LOCALE,
1625 MULTIBYTE_SPECIFIED_CODE_PAGE,
1626 MULTIBYTE_CURRENT,
1627 MULTIBYTE_USER_DEFAULT,
1628 MULTIBYTE_SYSTEM_DEFAULT
1629 };
1630
1631 struct mswindows_multibyte_to_unicode_coding_system
1632 {
1633 enum mswindows_multibyte_cp_type cp_type;
1634 enum mswindows_multibyte_locale_type locale_type;
1635 LCID locale; /* if locale_type is MULTIBYTE_SPECIFIED_LOCALE */
1636 int cp; /* if locale_type is MULTIBYTE_SPECIFIED_CODE_PAGE */
1637 };
1638
1639 struct mswindows_multibyte_to_unicode_coding_stream
1640 {
1641 int partial_byte;
1642 int partial_byte_present;
1643 int cp;
1644 };
1645
1646 static const struct lrecord_description
1647 mswindows_multibyte_to_unicode_coding_system_description[] = {
1648 { XD_END }
1649 };
1650
1651 static void
1652 mswindows_multibyte_to_unicode_init (Lisp_Object codesys)
1653 {
1654 struct mswindows_multibyte_to_unicode_coding_system *data =
1655 XCODING_SYSTEM_TYPE_DATA (codesys, mswindows_multibyte_to_unicode);
1656
1657 data->cp_type = MULTIBYTE_ANSI;
1658 data->locale_type = MULTIBYTE_CURRENT;
1659 }
1660
1661 static Lisp_Object
1662 lcid_to_locale_mule_or_no (LCID lcid)
1663 {
1664 #ifdef MULE
1665 return lcid_to_locale (lcid);
1666 #else
1667 return Fcons (build_string ("NEUTRAL"), build_string ("DEFAULT"));
1668 #endif
1669 }
1670
1671 static int
1672 determine_code_page (Lisp_Object codesys)
1673 {
1674 #ifdef MULE
1675 LCID locale;
1676 struct mswindows_multibyte_to_unicode_coding_system *data =
1677 XCODING_SYSTEM_TYPE_DATA (codesys, mswindows_multibyte_to_unicode);
1678
1679 switch (data->locale_type)
1680 {
1681 case MULTIBYTE_SPECIFIED_CODE_PAGE:
1682 return data->cp;
1683 case MULTIBYTE_SPECIFIED_LOCALE:
1684 locale = data->locale; break;
1685 case MULTIBYTE_CURRENT:
1686 locale = mswindows_current_locale (); break;
1687 case MULTIBYTE_USER_DEFAULT:
1688 locale = GetUserDefaultLCID (); break;
1689 case MULTIBYTE_SYSTEM_DEFAULT:
1690 locale = GetSystemDefaultLCID (); break;
1691 default:
1692 abort (); locale = 0;
1693 }
1694
1695 switch (data->cp_type)
1696 {
1697 case MULTIBYTE_ANSI:
1698 return mswindows_locale_to_code_page (locale);
1699 case MULTIBYTE_OEM:
1700 return mswindows_locale_to_oem_code_page (locale);
1701 case MULTIBYTE_EBCDIC:
1702 #ifdef LOCALE_IDEFAULTEBCDICCODEPAGE /* Doesn't exist under Cygwin */
1703 {
1704 char codepagestr[10];
1705 GetLocaleInfoA (locale, LOCALE_IDEFAULTEBCDICCODEPAGE, codepagestr,
1706 10);
1707 return atoi (codepagestr);
1708 }
1709 #else
1710 invalid_operation ("Unable to determine EBCDIC code page for locale",
1711 lcid_to_locale (locale));
1712 return 0;
1713 #endif
1714 case MULTIBYTE_MAC:
1715 #ifdef LOCALE_IDEFAULTMACCODEPAGE /* Doesn't exist under Cygwin */
1716 {
1717 char codepagestr[10];
1718 GetLocaleInfoA (locale, LOCALE_IDEFAULTMACCODEPAGE, codepagestr,
1719 10);
1720 return atoi (codepagestr);
1721 }
1722 #else
1723 invalid_operation ("Unable to determine Mac code page for locale",
1724 lcid_to_locale (locale));
1725 return 0;
1726 #endif
1727 default:
1728 abort (); return 0;
1729 }
1730 #else /* not MULE */
1731 return CP_ACP;
1732 #endif
1733 }
1734
1735 static int
1736 mswindows_multibyte_to_unicode_putprop (Lisp_Object codesys,
1737 Lisp_Object key,
1738 Lisp_Object value)
1739 {
1740 struct mswindows_multibyte_to_unicode_coding_system *data =
1741 XCODING_SYSTEM_TYPE_DATA (codesys, mswindows_multibyte_to_unicode);
1742
1743 if (EQ (key, Qcode_page))
1744 {
1745 if (EQ (value, Qansi))
1746 data->cp_type = MULTIBYTE_ANSI;
1747 else if (EQ (value, Qoem))
1748 data->cp_type = MULTIBYTE_OEM;
1749 else if (EQ (value, Qebcdic))
1750 data->cp_type = MULTIBYTE_EBCDIC;
1751 else if (EQ (value, Qmac))
1752 data->cp_type = MULTIBYTE_MAC;
1753 else
1754 {
1755 CHECK_NATNUM (value);
1756 data->locale_type = MULTIBYTE_SPECIFIED_CODE_PAGE;
1757 data->cp = XINT (value);
1758 }
1759 }
1760 else if (EQ (key, Qlocale))
1761 {
1762 if (EQ (value, Qcurrent))
1763 data->locale_type = MULTIBYTE_CURRENT;
1764 else if (EQ (value, Quser_default))
1765 data->locale_type = MULTIBYTE_USER_DEFAULT;
1766 else if (EQ (value, Qsystem_default))
1767 data->locale_type = MULTIBYTE_SYSTEM_DEFAULT;
1768 else
1769 {
1770 data->locale_type = MULTIBYTE_SPECIFIED_LOCALE;
1771 #ifdef MULE
1772 data->locale = locale_to_lcid (value);
1773 #else
1774 data->locale = 0;
1775 #endif
1776 }
1777 }
1778 else
1779 return 0;
1780 return 1;
1781 }
1782
1783 static Lisp_Object
1784 mswindows_multibyte_to_unicode_getprop (Lisp_Object coding_system,
1785 Lisp_Object prop)
1786 {
1787 struct mswindows_multibyte_to_unicode_coding_system *data =
1788 XCODING_SYSTEM_TYPE_DATA (coding_system, mswindows_multibyte_to_unicode);
1789
1790 if (EQ (prop, Qcode_page))
1791 {
1792 if (data->locale_type == MULTIBYTE_SPECIFIED_CODE_PAGE)
1793 return make_int (data->cp);
1794 else
1795 switch (data->cp_type)
1796 {
1797 case MULTIBYTE_ANSI: return Qansi;
1798 case MULTIBYTE_OEM: return Qoem;
1799 case MULTIBYTE_EBCDIC: return Qebcdic;
1800 case MULTIBYTE_MAC: return Qmac;
1801 default: abort ();
1802 }
1803 }
1804 else if (EQ (prop, Qlocale))
1805 {
1806 switch (data->locale_type)
1807 {
1808 case MULTIBYTE_CURRENT: return Qcurrent;
1809 case MULTIBYTE_USER_DEFAULT: return Quser_default;
1810 case MULTIBYTE_SYSTEM_DEFAULT: return Qsystem_default;
1811 case MULTIBYTE_SPECIFIED_LOCALE:
1812 return lcid_to_locale_mule_or_no (data->locale);
1813
1814 case MULTIBYTE_SPECIFIED_CODE_PAGE:
1815 return Qnil;
1816 default: abort ();
1817 }
1818 }
1819
1820 return Qunbound;
1821 }
1822
1823 static void
1824 mswindows_multibyte_to_unicode_print (Lisp_Object cs,
1825 Lisp_Object printcharfun, int escapeflag)
1826 {
1827 struct mswindows_multibyte_to_unicode_coding_system *data =
1828 XCODING_SYSTEM_TYPE_DATA (cs, mswindows_multibyte_to_unicode);
1829
1830 write_c_string ("(", printcharfun);
1831 if (data->locale_type == MULTIBYTE_SPECIFIED_CODE_PAGE)
1832 print_internal (make_int (data->cp), printcharfun, 1);
1833 else
1834 {
1835 print_internal (mswindows_multibyte_to_unicode_getprop (cs, Qlocale),
1836 printcharfun, 0);
1837 write_c_string (", ", printcharfun);
1838 print_internal (mswindows_multibyte_to_unicode_getprop (cs, Qcode_page),
1839 printcharfun, 0);
1840 }
1841 write_c_string (")", printcharfun);
1842 }
1843
1844 /* Convert multibyte to Unicode according to the specified code page
1845 and return the value as a malloc()ed string. This currently exists
1846 because the TO_INTERNAL_FORMAT() mechanism -- the normal way to do
1847 such conversions -- has no way of passing in a parameter to control
1848 the operation. We could use a global variable to pass this value
1849 in, but that runs the risk of causing problems due to reentrancy.
1850 (You might say, yeah, right, how can TO_INTERNAL_FORMAT() get
1851 called recursively merely when I'm doing a simple conversion
1852 operation? It turns out this can and does happen, consistently, as
1853 a result of calling QUIT -- it happens consistently for complicated
1854 reasons outlined in event-msw.c, WM_KEYDOWN handling.) */
1855
1856 Extbyte *
1857 convert_multibyte_to_unicode_malloc (const Extbyte *src, Bytecount n,
1858 int cp, Bytecount *size_out)
1859 {
1860 Bytecount nout = MultiByteToWideChar (cp, 0, src, n, 0, 0);
1861 Extbyte *outp = xnew_array (Extbyte, nout * sizeof (WCHAR));
1862
1863 MultiByteToWideChar (cp, 0, src, n, (LPWSTR) outp, nout);
1864 if (size_out)
1865 *size_out = nout * sizeof (WCHAR);
1866 return outp;
1867 }
1868
1869 /* Convert MS Windows multibyte to internal, with specified code page.
1870 See above for why this exists, and the TO_INTERNAL_FORMAT() macros
1871 aren't just used. */
1872
1873 Intbyte *
1874 convert_multibyte_to_internal_malloc (const Extbyte *src, Bytecount n,
1875 int cp, Bytecount *size_out)
1876 {
1877 Bytecount size;
1878 Extbyte *unidata = convert_multibyte_to_unicode_malloc (src, n, cp, &size);
1879 Intbyte *intdata;
1880
1881 TO_INTERNAL_FORMAT (DATA, (unidata, size), MALLOC, (intdata, size),
1882 Qmswindows_unicode);
1883
1884 xfree (unidata);
1885
1886 if (size_out)
1887 *size_out = size;
1888
1889 return intdata;
1890 }
1891
1892 /* Convert multibyte to Unicode according to the specified code page
1893 and append the results onto the specified Dynarr. See above. */
1894
1895 void
1896 convert_multibyte_to_unicode_dynarr (const Extbyte *src, Bytecount n,
1897 int cp, unsigned_char_dynarr *dst)
1898 {
1899 Bytecount nout = MultiByteToWideChar (cp, 0, src, n, 0, 0);
1900 void *outp;
1901
1902 Dynarr_add_many (dst, 0, nout * sizeof (WCHAR));
1903 /* dynarr's buffer may be realloc()ed by call above, so access it after */
1904 outp = Dynarr_atp (dst, Dynarr_length (dst) - nout * sizeof (WCHAR));
1905 MultiByteToWideChar (cp, 0, src, n, (LPWSTR) outp, nout);
1906 }
1907
1908 /* Convert MS Windows multibyte to Unicode. */
1909
1910 static Bytecount
1911 mswindows_multibyte_to_unicode_convert (struct coding_stream *str,
1912 const unsigned char *src,
1913 unsigned_char_dynarr *dst,
1914 Bytecount n)
1915 {
1916 unsigned char *new_src = (unsigned char *) src;
1917 int i;
1918 struct mswindows_multibyte_to_unicode_coding_stream *data =
1919 CODING_STREAM_TYPE_DATA (str, mswindows_multibyte_to_unicode);
1920 Bytecount orign = n;
1921
1922 if (data->cp == 0)
1923 data->cp = determine_code_page (str->codesys);
1924 if (data->partial_byte_present)
1925 {
1926 new_src = alloca_array (unsigned char, n + 1);
1927 memcpy (new_src + 1, src, n);
1928 new_src[0] =
1929 (unsigned char) data->partial_byte;
1930 n++;
1931 }
1932
1933 if (str->direction == CODING_DECODE)
1934 {
1935 for (i = n - 1; i >= 0; i--)
1936 {
1937 if (!IsDBCSLeadByteEx (data->cp, new_src[i]))
1938 break;
1939 }
1940
1941 i++;
1942
1943 for (; i < n; i++)
1944 {
1945 if (IsDBCSLeadByteEx (data->cp, new_src[i]))
1946 i++;
1947 }
1948
1949 if (i > n)
1950 {
1951 /* a char is split across the boundary */
1952 data->partial_byte = new_src[n - 1];
1953 data->partial_byte_present = 1;
1954 n--;
1955 }
1956 else
1957 data->partial_byte_present = 0;
1958
1959 convert_multibyte_to_unicode_dynarr ((Extbyte *) new_src, n, data->cp,
1960 dst);
1961 }
1962 else
1963 {
1964 if (n & 1)
1965 {
1966 /* a char is split across the boundary */
1967 data->partial_byte = new_src[n - 1];
1968 data->partial_byte_present = 1;
1969 n--;
1970 }
1971 else
1972 data->partial_byte_present = 0;
1973
1974 {
1975 int nout = WideCharToMultiByte (data->cp, WC_COMPOSITECHECK,
1976 (LPWSTR) new_src, n / sizeof (WCHAR),
1977 0, 0, "~", 0);
1978 void *outp;
1979
1980 Dynarr_add_many (dst, 0, nout);
1981 /* dynarr's buffer may be realloc()ed by call above, so access it
1982 after */
1983 outp = Dynarr_atp (dst, Dynarr_length (dst) - nout);
1984 WideCharToMultiByte (data->cp, WC_COMPOSITECHECK, (LPWSTR) new_src,
1985 n / sizeof (WCHAR),
1986 (LPSTR) outp, nout, "~", 0);
1987 }
1988 }
1989 return orign;
1990 }
1991
1992 static enum source_sink_type
1993 mswindows_multibyte_to_unicode_conversion_end_type (Lisp_Object codesys)
1994 {
1995 return DECODES_BYTE_TO_BYTE;
1996 }
1997
1998
1999 /************************************************************************/
2000 /* MS Windows Multibyte methods */
2001 /************************************************************************/
2002
2003 DEFINE_CODING_SYSTEM_TYPE (mswindows_multibyte);
2004
2005 struct mswindows_multibyte_coding_system
2006 {
2007 Lisp_Object code_page;
2008 Lisp_Object locale;
2009 };
2010
2011 struct mswindows_multibyte_coding_stream
2012 {
2013 int dummy;
2014 };
2015
2016 static const struct lrecord_description
2017 mswindows_multibyte_coding_system_description[] = {
2018 { XD_LISP_OBJECT,
2019 coding_system_data_offset +
2020 offsetof (struct mswindows_multibyte_coding_system, code_page) },
2021 { XD_LISP_OBJECT,
2022 coding_system_data_offset +
2023 offsetof (struct mswindows_multibyte_coding_system, locale) },
2024 { XD_END }
2025 };
2026
2027 static Bytecount
2028 mswindows_multibyte_convert (struct coding_stream *str,
2029 const UExtbyte *src,
2030 unsigned_char_dynarr *dst, Bytecount n)
2031 {
2032 Bytecount orign = n;
2033 /* should never be called; is preprocessed away in the
2034 canonicalize method */
2035 abort ();
2036 return orign;
2037 }
2038
2039 static void
2040 mswindows_multibyte_init (Lisp_Object codesys)
2041 {
2042 struct mswindows_multibyte_coding_system *data =
2043 XCODING_SYSTEM_TYPE_DATA (codesys, mswindows_multibyte);
2044
2045 data->code_page = Qnil;
2046 data->locale = Qnil;
2047 }
2048
2049 static void
2050 mswindows_multibyte_mark (Lisp_Object codesys)
2051 {
2052 struct mswindows_multibyte_coding_system *data =
2053 XCODING_SYSTEM_TYPE_DATA (codesys, mswindows_multibyte);
2054
2055 mark_object (data->code_page);
2056 mark_object (data->locale);
2057 }
2058
2059 static int
2060 mswindows_multibyte_putprop (Lisp_Object codesys,
2061 Lisp_Object key,
2062 Lisp_Object value)
2063 {
2064 struct mswindows_multibyte_coding_system *data =
2065 XCODING_SYSTEM_TYPE_DATA (codesys, mswindows_multibyte);
2066
2067 if (EQ (key, Qcode_page))
2068 data->code_page = value;
2069 else if (EQ (key, Qlocale))
2070 data->locale = value;
2071 else
2072 return 0;
2073 return 1;
2074 }
2075
2076 static Lisp_Object
2077 mswindows_multibyte_getprop (Lisp_Object coding_system,
2078 Lisp_Object prop)
2079 {
2080 struct mswindows_multibyte_coding_system *data =
2081 XCODING_SYSTEM_TYPE_DATA (coding_system, mswindows_multibyte);
2082
2083 if (EQ (prop, Qcode_page))
2084 return data->code_page;
2085 else if (EQ (prop, Qlocale))
2086 return data->locale;
2087 else
2088 return Qunbound;
2089 }
2090
2091 /* Convert this coding system into the proper chain. */
2092
2093 static Lisp_Object
2094 mswindows_multibyte_canonicalize (Lisp_Object codesys)
2095 {
2096 struct mswindows_multibyte_coding_system *data =
2097 XCODING_SYSTEM_TYPE_DATA (codesys, mswindows_multibyte);
2098 Lisp_Object m2u;
2099
2100 m2u =
2101 make_internal_coding_system
2102 (Qnil,
2103 "internal-mswindows-multibyte-to-unicode",
2104 Qmswindows_multibyte_to_unicode,
2105 Qnil, NILP (data->locale) ?
2106 list2 (Qcode_page, data->code_page) :
2107 list4 (Qcode_page, data->code_page, Qlocale, data->locale));
2108
2109 return make_internal_coding_system (codesys,
2110 "internal-mswindows-multibyte-chain",
2111 Qchain, Qunbound,
2112 list4 (Qchain,
2113 list2 (m2u, Qmswindows_unicode),
2114 Qcanonicalize_after_coding,
2115 codesys));
2116 }
2117
2118
2119 void
2120 syms_of_intl_win32 (void)
2121 {
2122 #ifdef MULE
2123 DEFSUBR (Fmswindows_set_current_locale);
2124 DEFSUBR (Fmswindows_current_locale);
2125 DEFSUBR (Fmswindows_user_default_locale);
2126 DEFSUBR (Fmswindows_system_default_locale);
2127 DEFSUBR (Fmswindows_locale_code_page);
2128 DEFSUBR (Fmswindows_locale_oem_code_page);
2129 DEFSUBR (Fmswindows_supported_locales);
2130 DEFSUBR (Fmswindows_charset_code_page);
2131 DEFSUBR (Fmswindows_set_charset_code_page);
2132
2133 #if 0
2134 DEFSUBR (Fmswindows_get_locale_info);
2135 DEFSUBR (Fmswindows_get_current_locale_id);
2136 DEFSUBR (Fmswindows_get_default_locale_id);
2137 DEFSUBR (Fmswindows_get_valid_locale_ids);
2138 DEFSUBR (Fmswindows_set_current_locale);
2139
2140 DEFSUBR (Fmswindows_get_console_code_page);
2141 DEFSUBR (Fmswindows_set_console_code_page);
2142 DEFSUBR (Fmswindows_get_console_output_code_page);
2143 DEFSUBR (Fmswindows_set_console_output_code_page);
2144 DEFSUBR (Fmswindows_get_valid_code_pages);
2145 DEFSUBR (Fmswindows_get_code_page_charset);
2146
2147 DEFSUBR (Fmswindows_get_valid_keyboard_layouts);
2148 DEFSUBR (Fmswindows_get_keyboard_layout);
2149 DEFSUBR (Fmswindows_set_keyboard_layout);
2150 #endif
2151 #endif /* MULE */
2152
2153 DEFSYMBOL (Qmswindows_tstr);
2154 DEFSYMBOL (Qmswindows_multibyte);
2155 DEFSYMBOL (Qmswindows_multibyte_to_unicode);
2156 DEFSYMBOL (Qmswindows_unicode);
2157 DEFSYMBOL (Qmswindows_multibyte_system_default);
2158
2159 DEFSYMBOL (Qansi);
2160 DEFSYMBOL (Qoem);
2161 DEFSYMBOL (Qmac);
2162 DEFSYMBOL (Qebcdic);
2163 }
2164
2165 void
2166 coding_system_type_create_intl_win32 (void)
2167 {
2168 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA
2169 (mswindows_multibyte_to_unicode,
2170 "mswindows-multibyte-to-unicode-coding-system-p");
2171 CODING_SYSTEM_HAS_METHOD (mswindows_multibyte_to_unicode, init);
2172 CODING_SYSTEM_HAS_METHOD (mswindows_multibyte_to_unicode, print);
2173 CODING_SYSTEM_HAS_METHOD (mswindows_multibyte_to_unicode, convert);
2174 CODING_SYSTEM_HAS_METHOD (mswindows_multibyte_to_unicode, getprop);
2175 CODING_SYSTEM_HAS_METHOD (mswindows_multibyte_to_unicode, putprop);
2176 CODING_SYSTEM_HAS_METHOD (mswindows_multibyte_to_unicode,
2177 conversion_end_type);
2178
2179 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA
2180 (mswindows_multibyte,
2181 "mswindows-multibyte-coding-system-p");
2182 CODING_SYSTEM_HAS_METHOD (mswindows_multibyte, convert);
2183 CODING_SYSTEM_HAS_METHOD (mswindows_multibyte, init);
2184 CODING_SYSTEM_HAS_METHOD (mswindows_multibyte, mark);
2185 CODING_SYSTEM_HAS_METHOD (mswindows_multibyte, getprop);
2186 CODING_SYSTEM_HAS_METHOD (mswindows_multibyte, putprop);
2187 CODING_SYSTEM_HAS_METHOD (mswindows_multibyte, canonicalize);
2188 }
2189
2190 void
2191 reinit_coding_system_type_create_intl_win32 (void)
2192 {
2193 REINITIALIZE_CODING_SYSTEM_TYPE (mswindows_multibyte_to_unicode);
2194 REINITIALIZE_CODING_SYSTEM_TYPE (mswindows_multibyte);
2195 }
2196
2197 void
2198 vars_of_intl_win32 (void)
2199 {
2200 #ifdef MULE
2201 Vmswindows_charset_code_page_table =
2202 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ);
2203 staticpro (&Vmswindows_charset_code_page_table);
2204 #endif /* MULE */
2205 }
2206
2207 static void
2208 determine_if_using_unicode (void)
2209 {
2210 if (XEUNICODE_P)
2211 Fdefine_coding_system_alias (Qmswindows_tstr, Qmswindows_unicode);
2212 else
2213 Fdefine_coding_system_alias (Qmswindows_tstr,
2214 Qmswindows_multibyte_system_default);
2215 }
2216
2217 void
2218 complex_vars_of_intl_win32 (void)
2219 {
2220 Fmake_coding_system
2221 (Qmswindows_unicode, Qunicode,
2222 build_msg_string ("MS Windows Unicode"),
2223 nconc2 (list4 (Qdocumentation,
2224 build_msg_string (
2225 "Converts to the Unicode encoding for Windows API calls.\n"
2226 "This encoding is equivalent to standard UTF16, little-endian."
2227 ),
2228 Qmnemonic, build_string ("MSW-U")),
2229 list4 (Qtype, Qutf_16,
2230 Qlittle_endian, Qt)));
2231
2232 #ifdef MULE
2233 /* Just temporarily. This will get fixed in mule-msw-init.el. */
2234 Fdefine_coding_system_alias (Qmswindows_multibyte_system_default,
2235 Qraw_text);
2236 #else
2237 /* Not temporarily. These may be referenced by Lisp code so we need to
2238 define them. */
2239 Fdefine_coding_system_alias (Qmswindows_multibyte,
2240 Qraw_text);
2241 Fdefine_coding_system_alias (Qmswindows_multibyte_system_default,
2242 Qraw_text);
2243 Fdefine_coding_system_alias (intern ("mswindows-multibyte-user-default"),
2244 Qraw_text);
2245 Fdefine_coding_system_alias (intern ("mswindows-multibyte-oem"),
2246 Qraw_text);
2247 Fdefine_coding_system_alias (intern
2248 ("mswindows-multibyte-oem-system-default"),
2249 Qraw_text);
2250 Fdefine_coding_system_alias (intern ("mswindows-multibyte-oem-user-default"),
2251 Qraw_text);
2252 #endif /* MULE */
2253
2254 determine_if_using_unicode ();
2255 }
2256
2257 void
2258 init_intl_win32 (void)
2259 {
2260 #ifdef MULE
2261 set_current_lcid (GetUserDefaultLCID ());
2262 #endif /* MULE */
2263
2264 if (initialized)
2265 /* If not initialized, we also call this, but early -- see the
2266 previous function. */
2267 determine_if_using_unicode ();
2268 }