comparison lisp/unicode.el @ 4317:15d36164ebd7

Eliminate lost docstring warnings on 21.5. 2007-12-09 Aidan Kehoe <kehoea@parhasard.net> * unicode.el (load-unicode-mapping-tables): Call #'set-default-unicode-precedence wrapped with #'declare-fboundp, to avoid warnings on non-Mule builds. * unicode.el (ccl-encode-to-ucs-2): * unicode.el (unicode-error-sequence-regexp-range): * unicode.el (frob-unicode-errors-region): * unicode.el (unicode-error-translate-region): Unconditionally provide these functions and variables at top level in the code, to make them available to make-docfile. For the INITVALUE args to #'defvar, conditionalise on (featurep 'mule); ditto for the code that tests the lookup tables and provides the WGL4 characters as jit-ucs-charset-0 characters. Unintern the function and variable symbols if (featurep 'mule) is not true, so their function definitions and so on get garbage collected at dump time in non-Mule builds. * obsolete.el (add-menu-item): * obsolete.el (add-menu): * obsolete.el (add-menu): * obsolete.el (package-get-download-menu): Provide these functions at top level, in order to make them available to make-docfile.c, which has trouble interpreting byte code. Unintern their symbols if the menubar feature is not available, which means they will be garbage collected on non-menubar builds.
author Aidan Kehoe <kehoea@parhasard.net>
date Sun, 09 Dec 2007 14:55:03 +0100
parents 75d0292c1bff
children a78d697ccd2c 68d1ca56cffa
comparison
equal deleted inserted replaced
4316:2e528ccfe690 4317:15d36164ebd7
147 parse-args) 147 parse-args)
148 ;; The default-unicode-precedence-list. We set this here to default to 148 ;; The default-unicode-precedence-list. We set this here to default to
149 ;; *not* mapping various European characters to East Asian characters; 149 ;; *not* mapping various European characters to East Asian characters;
150 ;; otherwise the default-unicode-precedence-list is numerically ordered 150 ;; otherwise the default-unicode-precedence-list is numerically ordered
151 ;; by charset ID. 151 ;; by charset ID.
152 (set-default-unicode-precedence-list 152 (declare-fboundp
153 '(ascii control-1 latin-iso8859-1 latin-iso8859-2 latin-iso8859-15 153 (set-default-unicode-precedence-list
154 greek-iso8859-7 hebrew-iso8859-8 ipa cyrillic-iso8859-5 154 '(ascii control-1 latin-iso8859-1 latin-iso8859-2 latin-iso8859-15
155 latin-iso8859-16 latin-iso8859-3 latin-iso8859-4 latin-iso8859-9 155 greek-iso8859-7 hebrew-iso8859-8 ipa cyrillic-iso8859-5
156 vietnamese-viscii-lower vietnamese-viscii-upper arabic-iso8859-6 156 latin-iso8859-16 latin-iso8859-3 latin-iso8859-4 latin-iso8859-9
157 jit-ucs-charset-0 japanese-jisx0208 japanese-jisx0208-1978 157 vietnamese-viscii-lower vietnamese-viscii-upper arabic-iso8859-6
158 japanese-jisx0212 japanese-jisx0213-1 japanese-jisx0213-2 158 jit-ucs-charset-0 japanese-jisx0208 japanese-jisx0208-1978
159 chinese-gb2312 chinese-sisheng chinese-big5-1 chinese-big5-2 159 japanese-jisx0212 japanese-jisx0213-1 japanese-jisx0213-2
160 indian-is13194 korean-ksc5601 chinese-cns11643-1 chinese-cns11643-2 160 chinese-gb2312 chinese-sisheng chinese-big5-1 chinese-big5-2
161 chinese-isoir165 arabic-1-column arabic-2-column arabic-digit 161 indian-is13194 korean-ksc5601 chinese-cns11643-1 chinese-cns11643-2
162 composite ethiopic indian-1-column indian-2-column jit-ucs-charset-0 162 chinese-isoir165 arabic-1-column arabic-2-column arabic-digit
163 katakana-jisx0201 lao thai-tis620 thai-xtis tibetan tibetan-1-column 163 composite ethiopic indian-1-column indian-2-column jit-ucs-charset-0
164 latin-jisx0201 chinese-cns11643-3 chinese-cns11643-4 164 katakana-jisx0201 lao thai-tis620 thai-xtis tibetan tibetan-1-column
165 chinese-cns11643-5 chinese-cns11643-6 chinese-cns11643-7)))) 165 latin-jisx0201 chinese-cns11643-3 chinese-cns11643-4
166 chinese-cns11643-5 chinese-cns11643-6 chinese-cns11643-7)))))
166 167
167 (make-coding-system 168 (make-coding-system
168 'utf-16 'unicode 169 'utf-16 'unicode
169 "UTF-16" 170 "UTF-16"
170 '(mnemonic "UTF-16" 171 '(mnemonic "UTF-16"
325 The second argument must be 'ucs, the third argument is ignored. " 326 The second argument must be 'ucs, the third argument is ignored. "
326 (assert (eq quote-ucs 'ucs) t 327 (assert (eq quote-ucs 'ucs) t
327 "Sorry, encode-char doesn't yet support anything but the UCS. ") 328 "Sorry, encode-char doesn't yet support anything but the UCS. ")
328 (char-to-unicode char)) 329 (char-to-unicode char))
329 330
331 (defconst ccl-encode-to-ucs-2
332 (eval-when-compile
333 (let ((pre-existing
334 ;; This is the compiled CCL program from the assert
335 ;; below. Since this file is dumped and ccl.el isn't (and
336 ;; even when it was, it was dumped much later than this
337 ;; one), we can't compile the program at dump time. We can
338 ;; check at byte compile time that the program is as
339 ;; expected, though.
340 [1 16 131127 7 98872 65823 1307 5 -65536 65313 64833 1028
341 147513 8 82009 255 22]))
342 (when (featurep 'mule)
343 ;; Check that the pre-existing constant reflects the intended
344 ;; CCL program.
345 (assert
346 (equal pre-existing
347 (ccl-compile
348 `(1
349 ( ;; mule-to-unicode's first argument is the
350 ;; charset ID, the second its first byte
351 ;; left shifted by 7 bits masked with its
352 ;; second byte.
353 (r1 = (r1 << 7))
354 (r1 = (r1 | r2))
355 (mule-to-unicode r0 r1)
356 (if (r0 & ,(lognot #xFFFF))
357 ;; Redisplay looks in r1 and r2 for the first
358 ;; and second bytes of the X11 font,
359 ;; respectively. For non-BMP characters we
360 ;; display U+FFFD.
361 ((r1 = #xFF)
362 (r2 = #xFD))
363 ((r1 = (r0 >> 8))
364 (r2 = (r0 & #xFF))))))))
365 nil
366 "The pre-compiled CCL program appears broken. "))
367 pre-existing))
368 "CCL program to transform Mule characters to UCS-2.")
369
330 (when (featurep 'mule) 370 (when (featurep 'mule)
331 (let ((prog 371 (put 'ccl-encode-to-ucs-2 'ccl-program-idx
332 (eval-when-compile 372 (declare-fboundp
333 (let ((pre-existing 373 (register-ccl-program 'ccl-encode-to-ucs-2 ccl-encode-to-ucs-2))))
334 ;; This is the compiled CCL program from the assert 374
335 ;; below. Since this file is dumped and ccl.el isn't (and 375 ;; Now, create jit-ucs-charset-0 entries for those characters in Windows
336 ;; even when it was, it was dumped much later than this 376 ;; Glyph List 4 that would otherwise end up in East Asian character sets.
337 ;; one), we can't compile the program at dump time. We can 377 ;;
338 ;; check at byte compile time that the program is as 378 ;; WGL4 is a character repertoire from Microsoft that gives a guideline
339 ;; expected, though. 379 ;; for font implementors as to what characters are sufficient for
340 [1 16 131127 7 98872 65823 1307 5 -65536 65313 64833 1028 380 ;; pan-European support. The intention of this code is to avoid the
341 147513 8 82009 255 22])) 381 ;; situation where these characters end up mapping to East Asian XEmacs
342 (when (featurep 'mule) 382 ;; characters, which generally clash strongly with European characters
343 ;; Check that the pre-existing constant reflects the intended 383 ;; both in font choice and character width; jit-ucs-charset-0 is a
344 ;; CCL program. 384 ;; single-width character set which comes before the East Asian character
345 (assert 385 ;; sets in the default-unicode-precedence-list above.
346 (equal pre-existing 386 (loop for (ucs ascii-or-latin-1)
347 (ccl-compile 387 in '((#x2013 ?-) ;; U+2013 EN DASH
348 `(1 388 (#x2014 ?-) ;; U+2014 EM DASH
349 (;; mule-to-unicode's first argument is the 389 (#x2105 ?%) ;; U+2105 CARE OF
350 ;; charset ID, the second its first byte 390 (#x203e ?-) ;; U+203E OVERLINE
351 ;; left shifted by 7 bits masked with its 391 (#x221f ?|) ;; U+221F RIGHT ANGLE
352 ;; second byte. 392 (#x2584 ?|) ;; U+2584 LOWER HALF BLOCK
353 (r1 = (r1 << 7)) 393 (#x2588 ?|) ;; U+2588 FULL BLOCK
354 (r1 = (r1 | r2)) 394 (#x258c ?|) ;; U+258C LEFT HALF BLOCK
355 (mule-to-unicode r0 r1) 395 (#x2550 ?|) ;; U+2550 BOX DRAWINGS DOUBLE HORIZONTAL
356 (if (r0 & ,(lognot #xFFFF)) 396 (#x255e ?|) ;; U+255E BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
357 ;; Redisplay looks in r1 and r2 for the first 397 (#x256a ?|) ;; U+256A BOX DRAWINGS VERTICAL SINGLE & HORIZONTAL DOUBLE
358 ;; and second bytes of the X11 font, 398 (#x2561 ?|) ;; U+2561 BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
359 ;; respectively. For non-BMP characters we 399 (#x2215 ?/) ;; U+2215 DIVISION SLASH
360 ;; display U+FFFD. 400 (#x02c9 ?`) ;; U+02C9 MODIFIER LETTER MACRON
361 ((r1 = #xFF) 401 (#x2211 ?s) ;; U+2211 N-ARY SUMMATION
362 (r2 = #xFD)) 402 (#x220f ?s) ;; U+220F N-ARY PRODUCT
363 ((r1 = (r0 >> 8)) 403 (#x2248 ?=) ;; U+2248 ALMOST EQUAL TO
364 (r2 = (r0 & #xFF)))))))) 404 (#x2264 ?=) ;; U+2264 LESS-THAN OR EQUAL TO
365 nil 405 (#x2265 ?=) ;; U+2265 GREATER-THAN OR EQUAL TO
366 "The pre-compiled CCL program appears broken. ")) 406 (#x201c ?') ;; U+201C LEFT DOUBLE QUOTATION MARK
367 pre-existing)))) 407 (#x2026 ?.) ;; U+2026 HORIZONTAL ELLIPSIS
368 (defconst ccl-encode-to-ucs-2 prog 408 (#x2212 ?-) ;; U+2212 MINUS SIGN
369 "CCL program to transform Mule characters to UCS-2.") 409 (#x2260 ?=) ;; U+2260 NOT EQUAL TO
370 (put 'ccl-encode-to-ucs-2 'ccl-program-idx 410 (#x221e ?=) ;; U+221E INFINITY
371 (register-ccl-program 'ccl-encode-to-ucs-2 prog))) 411 (#x2642 ?=) ;; U+2642 MALE SIGN
372 412 (#x2640 ?=) ;; U+2640 FEMALE SIGN
373 ;; Now, create jit-ucs-charset-0 entries for those characters in Windows 413 (#x2032 ?=) ;; U+2032 PRIME
374 ;; Glyph List 4 that would otherwise end up in East Asian character sets. 414 (#x2033 ?=) ;; U+2033 DOUBLE PRIME
375 ;; 415 (#x25cb ?=) ;; U+25CB WHITE CIRCLE
376 ;; WGL4 is a character repertoire from Microsoft that gives a guideline 416 (#x25cf ?=) ;; U+25CF BLACK CIRCLE
377 ;; for font implementors as to what characters are sufficient for 417 (#x25a1 ?=) ;; U+25A1 WHITE SQUARE
378 ;; pan-European support. The intention of this code is to avoid the 418 (#x25a0 ?=) ;; U+25A0 BLACK SQUARE
379 ;; situation where these characters end up mapping to East Asian XEmacs 419 (#x25b2 ?=) ;; U+25B2 BLACK UP-POINTING TRIANGLE
380 ;; characters, which generally clash strongly with European characters 420 (#x25bc ?=) ;; U+25BC BLACK DOWN-POINTING TRIANGLE
381 ;; both in font choice and character width; jit-ucs-charset-0 is a 421 (#x2192 ?=) ;; U+2192 RIGHTWARDS ARROW
382 ;; single-width character set which comes before the East Asian character 422 (#x2190 ?=) ;; U+2190 LEFTWARDS ARROW
383 ;; sets in the default-unicode-precedence-list above. 423 (#x2191 ?=) ;; U+2191 UPWARDS ARROW
384 (loop for (ucs ascii-or-latin-1) 424 (#x2193 ?=) ;; U+2193 DOWNWARDS ARROW
385 in '((#x2013 ?-) ;; U+2013 EN DASH 425 (#x2229 ?=) ;; U+2229 INTERSECTION
386 (#x2014 ?-) ;; U+2014 EM DASH 426 (#x2202 ?=) ;; U+2202 PARTIAL DIFFERENTIAL
387 (#x2105 ?%) ;; U+2105 CARE OF 427 (#x2261 ?=) ;; U+2261 IDENTICAL TO
388 (#x203e ?-) ;; U+203E OVERLINE 428 (#x221a ?=) ;; U+221A SQUARE ROOT
389 (#x221f ?|) ;; U+221F RIGHT ANGLE 429 (#x222b ?=) ;; U+222B INTEGRAL
390 (#x2584 ?|) ;; U+2584 LOWER HALF BLOCK 430 (#x2030 ?=) ;; U+2030 PER MILLE SIGN
391 (#x2588 ?|) ;; U+2588 FULL BLOCK 431 (#x266a ?=) ;; U+266A EIGHTH NOTE
392 (#x258c ?|) ;; U+258C LEFT HALF BLOCK 432 (#x2020 ?*) ;; U+2020 DAGGER
393 (#x2550 ?|) ;; U+2550 BOX DRAWINGS DOUBLE HORIZONTAL 433 (#x2021 ?*) ;; U+2021 DOUBLE DAGGER
394 (#x255e ?|) ;; U+255E BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE 434 (#x2500 ?|) ;; U+2500 BOX DRAWINGS LIGHT HORIZONTAL
395 (#x256a ?|) ;; U+256A BOX DRAWINGS VERTICAL SINGLE & HORIZONTAL DOUBLE 435 (#x2502 ?|) ;; U+2502 BOX DRAWINGS LIGHT VERTICAL
396 (#x2561 ?|) ;; U+2561 BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE 436 (#x250c ?|) ;; U+250C BOX DRAWINGS LIGHT DOWN AND RIGHT
397 (#x2215 ?/) ;; U+2215 DIVISION SLASH 437 (#x2510 ?|) ;; U+2510 BOX DRAWINGS LIGHT DOWN AND LEFT
398 (#x02c9 ?`) ;; U+02C9 MODIFIER LETTER MACRON 438 (#x2518 ?|) ;; U+2518 BOX DRAWINGS LIGHT UP AND LEFT
399 (#x2211 ?s) ;; U+2211 N-ARY SUMMATION 439 (#x2514 ?|) ;; U+2514 BOX DRAWINGS LIGHT UP AND RIGHT
400 (#x220f ?s) ;; U+220F N-ARY PRODUCT 440 (#x251c ?|) ;; U+251C BOX DRAWINGS LIGHT VERTICAL AND RIGHT
401 (#x2248 ?=) ;; U+2248 ALMOST EQUAL TO 441 (#x252c ?|) ;; U+252C BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
402 (#x2264 ?=) ;; U+2264 LESS-THAN OR EQUAL TO 442 (#x2524 ?|) ;; U+2524 BOX DRAWINGS LIGHT VERTICAL AND LEFT
403 (#x2265 ?=) ;; U+2265 GREATER-THAN OR EQUAL TO 443 (#x2534 ?|) ;; U+2534 BOX DRAWINGS LIGHT UP AND HORIZONTAL
404 (#x201c ?') ;; U+201C LEFT DOUBLE QUOTATION MARK 444 (#x253c ?|) ;; U+253C BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
405 (#x2026 ?.) ;; U+2026 HORIZONTAL ELLIPSIS 445 (#x02da ?^) ;; U+02DA RING ABOVE
406 (#x2212 ?-) ;; U+2212 MINUS SIGN 446 (#x2122 ?\xa9) ;; U+2122 TRADE MARK SIGN, ?,A)(B
407 (#x2260 ?=) ;; U+2260 NOT EQUAL TO 447
408 (#x221e ?=) ;; U+221E INFINITY 448 (#x0132 ?\xe6) ;; U+0132 LATIN CAPITAL LIGATURE IJ, ?,Af(B
409 (#x2642 ?=) ;; U+2642 MALE SIGN 449 (#x013f ?\xe6) ;; U+013F LATIN CAPITAL LETTER L WITH MIDDLE DOT, ?,Af(B
410 (#x2640 ?=) ;; U+2640 FEMALE SIGN 450
411 (#x2032 ?=) ;; U+2032 PRIME 451 (#x0133 ?\xe6) ;; U+0133 LATIN SMALL LIGATURE IJ, ?,Af(B
412 (#x2033 ?=) ;; U+2033 DOUBLE PRIME 452 (#x0140 ?\xe6) ;; U+0140 LATIN SMALL LETTER L WITH MIDDLE DOT, ?,Af(B
413 (#x25cb ?=) ;; U+25CB WHITE CIRCLE 453 (#x0149 ?\xe6) ;; U+0149 LATIN SMALL LETTER N PRECEDED BY APOSTROPH,?,Af(B
414 (#x25cf ?=) ;; U+25CF BLACK CIRCLE 454
415 (#x25a1 ?=) ;; U+25A1 WHITE SQUARE 455 (#x2194 ?|) ;; U+2194 LEFT RIGHT ARROW
416 (#x25a0 ?=) ;; U+25A0 BLACK SQUARE 456 (#x2660 ?*) ;; U+2660 BLACK SPADE SUIT
417 (#x25b2 ?=) ;; U+25B2 BLACK UP-POINTING TRIANGLE 457 (#x2665 ?*) ;; U+2665 BLACK HEART SUIT
418 (#x25bc ?=) ;; U+25BC BLACK DOWN-POINTING TRIANGLE 458 (#x2663 ?*) ;; U+2663 BLACK CLUB SUIT
419 (#x2192 ?=) ;; U+2192 RIGHTWARDS ARROW 459 (#x2592 ?|) ;; U+2592 MEDIUM SHADE
420 (#x2190 ?=) ;; U+2190 LEFTWARDS ARROW 460 (#x2195 ?|) ;; U+2195 UP DOWN ARROW
421 (#x2191 ?=) ;; U+2191 UPWARDS ARROW 461
422 (#x2193 ?=) ;; U+2193 DOWNWARDS ARROW 462 (#x2113 ?\xb9) ;; U+2113 SCRIPT SMALL L, ?,A9(B
423 (#x2229 ?=) ;; U+2229 INTERSECTION 463 (#x215b ?\xbe) ;; U+215B VULGAR FRACTION ONE EIGHTH, ?,A>(B
424 (#x2202 ?=) ;; U+2202 PARTIAL DIFFERENTIAL 464 (#x215c ?\xbe) ;; U+215C VULGAR FRACTION THREE EIGHTHS, ?,A>(B
425 (#x2261 ?=) ;; U+2261 IDENTICAL TO 465 (#x215d ?\xbe) ;; U+215D VULGAR FRACTION FIVE EIGHTHS, ?,A>(B
426 (#x221a ?=) ;; U+221A SQUARE ROOT 466 (#x215e ?\xbe) ;; U+215E VULGAR FRACTION SEVEN EIGHTHS, ?,A>(B
427 (#x222b ?=) ;; U+222B INTEGRAL 467 (#x207f ?\xbe) ;; U+207F SUPERSCRIPT LATIN SMALL LETTER N, ?,A>(B
428 (#x2030 ?=) ;; U+2030 PER MILLE SIGN
429 (#x266a ?=) ;; U+266A EIGHTH NOTE
430 (#x2020 ?*) ;; U+2020 DAGGER
431 (#x2021 ?*) ;; U+2021 DOUBLE DAGGER
432 (#x2500 ?|) ;; U+2500 BOX DRAWINGS LIGHT HORIZONTAL
433 (#x2502 ?|) ;; U+2502 BOX DRAWINGS LIGHT VERTICAL
434 (#x250c ?|) ;; U+250C BOX DRAWINGS LIGHT DOWN AND RIGHT
435 (#x2510 ?|) ;; U+2510 BOX DRAWINGS LIGHT DOWN AND LEFT
436 (#x2518 ?|) ;; U+2518 BOX DRAWINGS LIGHT UP AND LEFT
437 (#x2514 ?|) ;; U+2514 BOX DRAWINGS LIGHT UP AND RIGHT
438 (#x251c ?|) ;; U+251C BOX DRAWINGS LIGHT VERTICAL AND RIGHT
439 (#x252c ?|) ;; U+252C BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
440 (#x2524 ?|) ;; U+2524 BOX DRAWINGS LIGHT VERTICAL AND LEFT
441 (#x2534 ?|) ;; U+2534 BOX DRAWINGS LIGHT UP AND HORIZONTAL
442 (#x253c ?|) ;; U+253C BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
443 (#x02da ?^) ;; U+02DA RING ABOVE
444 (#x2122 ?\xa9) ;; U+2122 TRADE MARK SIGN, ?,A)(B
445
446 (#x0132 ?\xe6) ;; U+0132 LATIN CAPITAL LIGATURE IJ, ?,Af(B
447 (#x013f ?\xe6) ;; U+013F LATIN CAPITAL LETTER L WITH MIDDLE DOT, ?,Af(B
448
449 (#x0133 ?\xe6) ;; U+0133 LATIN SMALL LIGATURE IJ, ?,Af(B
450 (#x0140 ?\xe6) ;; U+0140 LATIN SMALL LETTER L WITH MIDDLE DOT, ?,Af(B
451 (#x0149 ?\xe6) ;; U+0149 LATIN SMALL LETTER N PRECEDED BY APOSTROPH,?,Af(B
452
453 (#x2194 ?|) ;; U+2194 LEFT RIGHT ARROW
454 (#x2660 ?*) ;; U+2660 BLACK SPADE SUIT
455 (#x2665 ?*) ;; U+2665 BLACK HEART SUIT
456 (#x2663 ?*) ;; U+2663 BLACK CLUB SUIT
457 (#x2592 ?|) ;; U+2592 MEDIUM SHADE
458 (#x2195 ?|) ;; U+2195 UP DOWN ARROW
459
460 (#x2113 ?\xb9) ;; U+2113 SCRIPT SMALL L, ?,A9(B
461 (#x215b ?\xbe) ;; U+215B VULGAR FRACTION ONE EIGHTH, ?,A>(B
462 (#x215c ?\xbe) ;; U+215C VULGAR FRACTION THREE EIGHTHS, ?,A>(B
463 (#x215d ?\xbe) ;; U+215D VULGAR FRACTION FIVE EIGHTHS, ?,A>(B
464 (#x215e ?\xbe) ;; U+215E VULGAR FRACTION SEVEN EIGHTHS, ?,A>(B
465 (#x207f ?\xbe) ;; U+207F SUPERSCRIPT LATIN SMALL LETTER N, ?,A>(B
466 468
467 ;; These are not in WGL 4, but are IPA characters that should not 469 ;; These are not in WGL 4, but are IPA characters that should not
468 ;; be double width. They are the only IPA characters that both 470 ;; be double width. They are the only IPA characters that both
469 ;; occur in packages/mule-packages/leim/ipa.el and end up in East 471 ;; occur in packages/mule-packages/leim/ipa.el and end up in East
470 ;; Asian character sets when that file is loaded in an XEmacs 472 ;; Asian character sets when that file is loaded in an XEmacs
471 ;; without packages. 473 ;; without packages.
472 (#x2197 ?|) ;; U+2197 NORTH EAST ARROW 474 (#x2197 ?|) ;; U+2197 NORTH EAST ARROW
473 (#x2199 ?|) ;; U+2199 SOUTH WEST ARROW 475 (#x2199 ?|) ;; U+2199 SOUTH WEST ARROW
474 (#x2191 ?|) ;; U+2191 UPWARDS ARROW 476 (#x2191 ?|) ;; U+2191 UPWARDS ARROW
475 (#x207f ?\xb9));; U+207F SUPERSCRIPT LATIN SMALL LETTER N, ?,A9(B 477 (#x207f ?\xb9)) ;; U+207F SUPERSCRIPT LATIN SMALL LETTER N, ?,A9(B
476 with decoded = nil 478 with decoded = nil
477 with syntax-table = (standard-syntax-table) 479 with syntax-table = (standard-syntax-table)
478 ;; This creates jit-ucs-charset-0 entries because: 480 initially (unless (featurep 'mule) (return))
479 ;; 481 ;; This creates jit-ucs-charset-0 entries because:
480 ;; 1. If the tables are dumped, it is run at dump time before they are 482 ;;
481 ;; dumped, and as such before the relevant conversions are available 483 ;; 1. If the tables are dumped, it is run at dump time before they are
482 ;; (they are made available in mule/general-late.el). 484 ;; dumped, and as such before the relevant conversions are available
483 ;; 485 ;; (they are made available in mule/general-late.el).
484 ;; 2. If the tables are not dumped, it is run at dump time, long before 486 ;;
485 ;; any of the other mappings are available. 487 ;; 2. If the tables are not dumped, it is run at dump time, long before
486 ;; 488 ;; any of the other mappings are available.
487 do 489 ;;
488 (setq decoded (decode-char 'ucs ucs)) 490 do
489 (assert (eq (char-charset decoded) 491 (setq decoded (decode-char 'ucs ucs))
490 'jit-ucs-charset-0) nil 492 (assert (eq (declare-fboundp (char-charset decoded))
491 "Unexpected Unicode decoding behavior. ") 493 'jit-ucs-charset-0) nil
492 (modify-syntax-entry decoded 494 "Unexpected Unicode decoding behavior. ")
493 (string 495 (modify-syntax-entry decoded
494 (char-syntax ascii-or-latin-1)) 496 (string
495 syntax-table)) 497 (char-syntax ascii-or-latin-1))
498 syntax-table))
496 499
497 ;; *Sigh*, declarations needs to be at the start of the line to be picked up 500 ;; *Sigh*, declarations needs to be at the start of the line to be picked up
498 ;; by make-docfile. Not so much an issue with ccl-encode-to-ucs-2, which we 501 ;; by make-docfile. Not so much an issue with ccl-encode-to-ucs-2, which we
499 ;; don't necessarily want to advertise, but the following are important. 502 ;; don't necessarily want to advertise, but the following are important.
500 503
503 ;; point). Make them available to user code. 506 ;; point). Make them available to user code.
504 (defvar unicode-error-default-translation-table 507 (defvar unicode-error-default-translation-table
505 (loop 508 (loop
506 with char-table = (make-char-table 'char) 509 with char-table = (make-char-table 'char)
507 for i from ?\x00 to ?\xFF 510 for i from ?\x00 to ?\xFF
511 initially (unless (featurep 'mule) (return))
508 do 512 do
509 (put-char-table (aref 513 (put-char-table (aref
510 ;; #xd800 is the first leading surrogate; 514 ;; #xd800 is the first leading surrogate;
511 ;; trailing surrogates must be in the range 515 ;; trailing surrogates must be in the range
512 ;; #xdc00-#xdfff. These examples are not, so we 516 ;; #xdc00-#xdfff. These examples are not, so we
521 525
522 To transform XEmacs Unicode error sequences to the Latin-1 characters that 526 To transform XEmacs Unicode error sequences to the Latin-1 characters that
523 correspond to the octets on disk, you can use this variable. ") 527 correspond to the octets on disk, you can use this variable. ")
524 528
525 (defvar unicode-error-sequence-regexp-range 529 (defvar unicode-error-sequence-regexp-range
526 (format "%c%c-%c" 530 (and (featurep 'mule)
527 (aref (decode-coding-string "\xd8\x00\x00\x00" 'utf-16-be) 0) 531 (format "%c%c-%c"
528 (aref (decode-coding-string "\xd8\x00\x00\x00" 'utf-16-be) 3) 532 (aref (decode-coding-string "\xd8\x00\x00\x00" 'utf-16-be) 0)
529 (aref (decode-coding-string "\xd8\x00\x00\xFF" 'utf-16-be) 3)) 533 (aref (decode-coding-string "\xd8\x00\x00\x00" 'utf-16-be) 3)
534 (aref (decode-coding-string "\xd8\x00\x00\xFF" 'utf-16-be) 3)))
530 "Regular expression range to match Unicode error sequences in XEmacs. 535 "Regular expression range to match Unicode error sequences in XEmacs.
531 536
532 Invalid Unicode sequences on input are represented as XEmacs 537 Invalid Unicode sequences on input are represented as XEmacs
533 characters with values stored as the keys in 538 characters with values stored as the keys in
534 `unicode-error-default-translation-table', one character for each 539 `unicode-error-default-translation-table', one character for each
535 invalid octet. You can use this variable (with `re-search-forward' or 540 invalid octet. You can use this variable (with `re-search-forward' or
536 `skip-chars-forward') to search for such characters; see also 541 `skip-chars-forward') to search for such characters; see also
537 `unicode-error-translate-region'. ") 542 `unicode-error-translate-region'. ")
538 543
539 ;; Check that the lookup table is correct, and that all the actual error 544 ;; Check that the lookup table is correct, and that all the actual error
540 ;; sequences are caught by the regexp. 545 ;; sequences are caught by the regexp.
541 (with-temp-buffer 546 (with-temp-buffer
542 (loop 547 (loop
543 for i from ?\x00 to ?\xFF 548 for i from ?\x00 to ?\xFF
544 with to-check = (make-string 20 ?\x20) 549 with to-check = (make-string 20 ?\x20)
545 do 550 initially (unless (featurep 'mule) (return))
546 (delete-region (point-min) (point-max)) 551 do
547 (insert to-check) 552 (delete-region (point-min) (point-max))
548 (goto-char 10) 553 (insert to-check)
549 (insert (decode-coding-string (format "\xd8\x00\x00%c" i) 554 (goto-char 10)
550 'utf-16-be)) 555 (insert (decode-coding-string (format "\xd8\x00\x00%c" i)
551 (backward-char) 556 'utf-16-be))
552 (assert (= i (get-char-table (char-after (point)) 557 (backward-char)
553 unicode-error-default-translation-table)) 558 (assert (= i (get-char-table (char-after (point))
554 (format "Char ?\\x%x not the expected error sequence!" 559 unicode-error-default-translation-table))
555 i)) 560 (format "Char ?\\x%x not the expected error sequence!"
556 561 i))
557 (goto-char (point-min)) 562
558 ;; Comment out until the issue in 563 (goto-char (point-min))
559 ;; 18179.49815.622843.336527@parhasard.net is fixed. 564 ;; Comment out until the issue in
560 (assert t ;(re-search-forward (concat "[" 565 ;; 18179.49815.622843.336527@parhasard.net is fixed.
561 ; unicode-error-sequence-regexp-range 566 (assert t ; (re-search-forward (concat "["
562 ; "]")) 567 ; unicode-error-sequence-regexp-range
563 nil 568 ; "]"))
564 (format "Could not find char ?\\x%x in buffer" i)))) 569 nil
570 (format "Could not find char ?\\x%x in buffer" i))))
565 571
566 (defun frob-unicode-errors-region (frob-function begin end &optional buffer) 572 (defun frob-unicode-errors-region (frob-function begin end &optional buffer)
567 "Call FROB-FUNCTION on the Unicode error sequences between BEGIN and END. 573 "Call FROB-FUNCTION on the Unicode error sequences between BEGIN and END.
568 574
569 Optional argument BUFFER specifies the buffer that should be examined for 575 Optional argument BUFFER specifies the buffer that should be examined for
588 unicode-error-sequence-regexp-range) 594 unicode-error-sequence-regexp-range)
589 (point)))) 595 (point))))
590 (if end 596 (if end
591 (funcall frob-function begin end)))))) 597 (funcall frob-function begin end))))))
592 598
593 (defun unicode-error-translate-region (begin end &optional buffer table) 599 (defun unicode-error-translate-region (begin end &optional buffer table)
594 "Translate the Unicode error sequences in BUFFER between BEGIN and END. 600 "Translate the Unicode error sequences in BUFFER between BEGIN and END.
595 601
596 The error sequences are transformed, by default, into the ASCII, 602 The error sequences are transformed, by default, into the ASCII,
597 control-1 and latin-iso8859-1 characters with the numeric values 603 control-1 and latin-iso8859-1 characters with the numeric values
598 corresponding to the incorrect octets encountered. This is achieved 604 corresponding to the incorrect octets encountered. This is achieved
599 by using `unicode-error-default-translation-table' (which see) for 605 by using `unicode-error-default-translation-table' (which see) for
601 mapping from the error sequences to the desired characters. " 607 mapping from the error sequences to the desired characters. "
602 (unless table (setq table unicode-error-default-translation-table)) 608 (unless table (setq table unicode-error-default-translation-table))
603 (frob-unicode-errors-region 609 (frob-unicode-errors-region
604 (lambda (start finish) 610 (lambda (start finish)
605 (translate-region start finish table)) 611 (translate-region start finish table))
606 begin end buffer))) 612 begin end buffer))
613
614 (unless (featurep 'mule)
615 ;; We do this in such a roundabout way--instead of having the above defun
616 ;; and defvar calls inside a (when (featurep 'mule) ...) form--to have
617 ;; make-docfile.c pick up symbol and function documentation correctly. An
618 ;; alternative approach would be to fix make-docfile.c to be able to read
619 ;; Lisp.
620 (mapcar #'unintern
621 '(ccl-encode-to-ucs-2 unicode-error-default-translation-table
622 unicode-error-sequence-regexp-range
623 frob-unicode-errors-region unicode-error-translate-region)))
607 624
608 ;; #### UTF-7 is not yet implemented, and it's tricky to do. There's 625 ;; #### UTF-7 is not yet implemented, and it's tricky to do. There's
609 ;; an implementation in appendix A.1 of the Unicode Standard, Version 626 ;; an implementation in appendix A.1 of the Unicode Standard, Version
610 ;; 2.0, but I don't know its licensing characteristics. 627 ;; 2.0, but I don't know its licensing characteristics.
611 628