xemacs-beta: lisp/unicode.el comparison

comparison lisp/unicode.el @ 4317:15d36164ebd7

Eliminate lost docstring warnings on 21.5. 2007-12-09 Aidan Kehoe <kehoea@parhasard.net> * unicode.el (load-unicode-mapping-tables): Call #'set-default-unicode-precedence wrapped with #'declare-fboundp, to avoid warnings on non-Mule builds. * unicode.el (ccl-encode-to-ucs-2): * unicode.el (unicode-error-sequence-regexp-range): * unicode.el (frob-unicode-errors-region): * unicode.el (unicode-error-translate-region): Unconditionally provide these functions and variables at top level in the code, to make them available to make-docfile. For the INITVALUE args to #'defvar, conditionalise on (featurep 'mule); ditto for the code that tests the lookup tables and provides the WGL4 characters as jit-ucs-charset-0 characters. Unintern the function and variable symbols if (featurep 'mule) is not true, so their function definitions and so on get garbage collected at dump time in non-Mule builds. * obsolete.el (add-menu-item): * obsolete.el (add-menu): * obsolete.el (add-menu): * obsolete.el (package-get-download-menu): Provide these functions at top level, in order to make them available to make-docfile.c, which has trouble interpreting byte code. Unintern their symbols if the menubar feature is not available, which means they will be garbage collected on non-menubar builds.

author	Aidan Kehoe <kehoea@parhasard.net>
date	Sun, 09 Dec 2007 14:55:03 +0100
parents	75d0292c1bff
children	a78d697ccd2c 68d1ca56cffa

comparison

equal deleted inserted replaced

-:2e528ccfe690
+:15d36164ebd7
 	    parse-args)
 ;; The default-unicode-precedence-list. We set this here to default to
 ;; *not* mapping various European characters to East Asian characters;
 ;; otherwise the default-unicode-precedence-list is numerically ordered
 ;; by charset ID.
-(set-default-unicode-precedence-list
+(declare-fboundp
-'(ascii control-1 latin-iso8859-1 latin-iso8859-2 latin-iso8859-15
+(set-default-unicode-precedence-list
-greek-iso8859-7 hebrew-iso8859-8 ipa cyrillic-iso8859-5
+'(ascii control-1 latin-iso8859-1 latin-iso8859-2 latin-iso8859-15
-latin-iso8859-16 latin-iso8859-3 latin-iso8859-4 latin-iso8859-9
+	greek-iso8859-7 hebrew-iso8859-8 ipa cyrillic-iso8859-5
-vietnamese-viscii-lower vietnamese-viscii-upper arabic-iso8859-6
+	latin-iso8859-16 latin-iso8859-3 latin-iso8859-4 latin-iso8859-9
-jit-ucs-charset-0 japanese-jisx0208 japanese-jisx0208-1978
+	vietnamese-viscii-lower vietnamese-viscii-upper arabic-iso8859-6
-japanese-jisx0212 japanese-jisx0213-1 japanese-jisx0213-2
+	jit-ucs-charset-0 japanese-jisx0208 japanese-jisx0208-1978
-chinese-gb2312 chinese-sisheng chinese-big5-1 chinese-big5-2
+	japanese-jisx0212 japanese-jisx0213-1 japanese-jisx0213-2
-indian-is13194 korean-ksc5601 chinese-cns11643-1 chinese-cns11643-2
+	chinese-gb2312 chinese-sisheng chinese-big5-1 chinese-big5-2
-chinese-isoir165 arabic-1-column arabic-2-column arabic-digit
+	indian-is13194 korean-ksc5601 chinese-cns11643-1 chinese-cns11643-2
-composite ethiopic indian-1-column indian-2-column jit-ucs-charset-0
+	chinese-isoir165 arabic-1-column arabic-2-column arabic-digit
-katakana-jisx0201 lao thai-tis620 thai-xtis tibetan tibetan-1-column
+	composite ethiopic indian-1-column indian-2-column jit-ucs-charset-0
-latin-jisx0201 chinese-cns11643-3 chinese-cns11643-4
+	katakana-jisx0201 lao thai-tis620 thai-xtis tibetan tibetan-1-column
-chinese-cns11643-5 chinese-cns11643-6 chinese-cns11643-7))))
+	latin-jisx0201 chinese-cns11643-3 chinese-cns11643-4
+	chinese-cns11643-5 chinese-cns11643-6 chinese-cns11643-7)))))
 (make-coding-system
 'utf-16 'unicode
 "UTF-16"
 '(mnemonic "UTF-16"
 The second argument must be 'ucs, the third argument is ignored.  "
 (assert (eq quote-ucs 'ucs) t
 	  "Sorry, encode-char doesn't yet support anything but the UCS.  ")
 (char-to-unicode char))
+(defconst ccl-encode-to-ucs-2
+(eval-when-compile
+(let ((pre-existing
+;; This is the compiled CCL program from the assert
+;; below. Since this file is dumped and ccl.el isn't (and
+;; even when it was, it was dumped much later than this
+;; one), we can't compile the program at dump time. We can
+;; check at byte compile time that the program is as
+;; expected, though.
+[1 16 131127 7 98872 65823 1307 5 -65536 65313 64833 1028
+147513 8 82009 255 22]))
+(when (featurep 'mule)
+;; Check that the pre-existing constant reflects the intended
+;; CCL program.
+(assert
+(equal pre-existing
+(ccl-compile
+`(1
+( ;; mule-to-unicode's first argument is the
+;; charset ID, the second its first byte
+;; left shifted by 7 bits masked with its
+;; second byte.
+(r1 = (r1 << 7))
+(r1 = (r1 | r2))
+(mule-to-unicode r0 r1)
+(if (r0 & ,(lognot #xFFFF))
+;; Redisplay looks in r1 and r2 for the first
+;; and second bytes of the X11 font,
+;; respectively. For non-BMP characters we
+;; display U+FFFD.
+((r1 = #xFF)
+(r2 = #xFD))
+((r1 = (r0 >> 8))
+(r2 = (r0 & #xFF))))))))
+nil
+"The pre-compiled CCL program appears broken. "))
+pre-existing))
+"CCL program to transform Mule characters to UCS-2.")
 (when (featurep 'mule)
-(let ((prog
+(put 'ccl-encode-to-ucs-2 'ccl-program-idx
-(eval-when-compile
+(declare-fboundp
-(let ((pre-existing
+	(register-ccl-program 'ccl-encode-to-ucs-2 ccl-encode-to-ucs-2))))
-;; This is the compiled CCL program from the assert
-;; below. Since this file is dumped and ccl.el isn't (and
+;; Now, create jit-ucs-charset-0 entries for those characters in Windows
-;; even when it was, it was dumped much later than this
+;; Glyph List 4 that would otherwise end up in East Asian character sets.
-;; one), we can't compile the program at dump time. We can
+;;
-;; check at byte compile time that the program is as
+;; WGL4 is a character repertoire from Microsoft that gives a guideline
-;; expected, though.
+;; for font implementors as to what characters are sufficient for
-[1 16 131127 7 98872 65823 1307 5 -65536 65313 64833 1028
+;; pan-European support.  The intention of this code is to avoid the
-147513 8 82009 255 22]))
+;; situation where these characters end up mapping to East Asian XEmacs
-(when (featurep 'mule)
+;; characters, which generally clash strongly with European characters
-;; Check that the pre-existing constant reflects the intended
+;; both in font choice and character width; jit-ucs-charset-0 is a
-;; CCL program.
+;; single-width character set which comes before the East Asian character
-(assert
+;; sets in the default-unicode-precedence-list above.
-(equal pre-existing
+(loop for (ucs ascii-or-latin-1)
-(ccl-compile
+in '((#x2013 ?-) ;; U+2013 EN DASH
-`(1
+(#x2014 ?-) ;; U+2014 EM DASH
-(;; mule-to-unicode's first argument is the
+(#x2105 ?%) ;; U+2105 CARE OF
-;; charset ID, the second its first byte
+(#x203e ?-) ;; U+203E OVERLINE
-;; left shifted by 7 bits masked with its
+(#x221f ?|) ;; U+221F RIGHT ANGLE
-;; second byte.
+(#x2584 ?|) ;; U+2584 LOWER HALF BLOCK
-(r1 = (r1 << 7))
+(#x2588 ?|) ;; U+2588 FULL BLOCK
-(r1 = (r1 | r2))
+(#x258c ?|) ;; U+258C LEFT HALF BLOCK
-(mule-to-unicode r0 r1)
+(#x2550 ?|) ;; U+2550 BOX DRAWINGS DOUBLE HORIZONTAL
-(if (r0 & ,(lognot #xFFFF))
+(#x255e ?|) ;; U+255E BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
-;; Redisplay looks in r1 and r2 for the first
+(#x256a ?|) ;; U+256A BOX DRAWINGS VERTICAL SINGLE & HORIZONTAL DOUBLE
-;; and second bytes of the X11 font,
+(#x2561 ?|) ;; U+2561 BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
-;; respectively. For non-BMP characters we
+(#x2215 ?/) ;; U+2215 DIVISION SLASH
-;; display U+FFFD.
+(#x02c9 ?`) ;; U+02C9 MODIFIER LETTER MACRON
-((r1 = #xFF)
+(#x2211 ?s) ;; U+2211 N-ARY SUMMATION
-(r2 = #xFD))
+(#x220f ?s) ;; U+220F N-ARY PRODUCT
-((r1 = (r0 >> 8))
+(#x2248 ?=) ;; U+2248 ALMOST EQUAL TO
-(r2 = (r0 & #xFF))))))))
+(#x2264 ?=) ;; U+2264 LESS-THAN OR EQUAL TO
-nil
+(#x2265 ?=) ;; U+2265 GREATER-THAN OR EQUAL TO
-"The pre-compiled CCL program appears broken. "))
+(#x201c ?') ;; U+201C LEFT DOUBLE QUOTATION MARK
-pre-existing))))
+(#x2026 ?.) ;; U+2026 HORIZONTAL ELLIPSIS
-(defconst ccl-encode-to-ucs-2 prog
+(#x2212 ?-) ;; U+2212 MINUS SIGN
-"CCL program to transform Mule characters to UCS-2.")
+(#x2260 ?=) ;; U+2260 NOT EQUAL TO
-(put 'ccl-encode-to-ucs-2 'ccl-program-idx
+(#x221e ?=) ;; U+221E INFINITY
-(register-ccl-program 'ccl-encode-to-ucs-2 prog)))
+(#x2642 ?=) ;; U+2642 MALE SIGN
+(#x2640 ?=) ;; U+2640 FEMALE SIGN
-;; Now, create jit-ucs-charset-0 entries for those characters in Windows
+(#x2032 ?=) ;; U+2032 PRIME
-;; Glyph List 4 that would otherwise end up in East Asian character sets.
+(#x2033 ?=) ;; U+2033 DOUBLE PRIME
-;;
+(#x25cb ?=) ;; U+25CB WHITE CIRCLE
-;; WGL4 is a character repertoire from Microsoft that gives a guideline
+(#x25cf ?=) ;; U+25CF BLACK CIRCLE
-;; for font implementors as to what characters are sufficient for
+(#x25a1 ?=) ;; U+25A1 WHITE SQUARE
-;; pan-European support.  The intention of this code is to avoid the
+(#x25a0 ?=) ;; U+25A0 BLACK SQUARE
-;; situation where these characters end up mapping to East Asian XEmacs
+(#x25b2 ?=) ;; U+25B2 BLACK UP-POINTING TRIANGLE
-;; characters, which generally clash strongly with European characters
+(#x25bc ?=) ;; U+25BC BLACK DOWN-POINTING TRIANGLE
-;; both in font choice and character width; jit-ucs-charset-0 is a
+(#x2192 ?=) ;; U+2192 RIGHTWARDS ARROW
-;; single-width character set which comes before the East Asian character
+(#x2190 ?=) ;; U+2190 LEFTWARDS ARROW
-;; sets in the default-unicode-precedence-list above.
+(#x2191 ?=) ;; U+2191 UPWARDS ARROW
-(loop for (ucs ascii-or-latin-1)
+(#x2193 ?=) ;; U+2193 DOWNWARDS ARROW
-in '((#x2013 ?-) ;; U+2013 EN DASH
+(#x2229 ?=) ;; U+2229 INTERSECTION
-(#x2014 ?-) ;; U+2014 EM DASH
+(#x2202 ?=) ;; U+2202 PARTIAL DIFFERENTIAL
-(#x2105 ?%) ;; U+2105 CARE OF
+(#x2261 ?=) ;; U+2261 IDENTICAL TO
-(#x203e ?-) ;; U+203E OVERLINE
+(#x221a ?=) ;; U+221A SQUARE ROOT
-(#x221f ?|) ;; U+221F RIGHT ANGLE
+(#x222b ?=) ;; U+222B INTEGRAL
-(#x2584 ?|) ;; U+2584 LOWER HALF BLOCK
+(#x2030 ?=) ;; U+2030 PER MILLE SIGN
-(#x2588 ?|) ;; U+2588 FULL BLOCK
+(#x266a ?=) ;; U+266A EIGHTH NOTE
-(#x258c ?|) ;; U+258C LEFT HALF BLOCK
+(#x2020 ?*) ;; U+2020 DAGGER
-(#x2550 ?|) ;; U+2550 BOX DRAWINGS DOUBLE HORIZONTAL
+(#x2021 ?*) ;; U+2021 DOUBLE DAGGER
-(#x255e ?|) ;; U+255E BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+(#x2500 ?|) ;; U+2500 BOX DRAWINGS LIGHT HORIZONTAL
-(#x256a ?|) ;; U+256A BOX DRAWINGS VERTICAL SINGLE & HORIZONTAL DOUBLE
+(#x2502 ?|) ;; U+2502 BOX DRAWINGS LIGHT VERTICAL
-(#x2561 ?|) ;; U+2561 BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+(#x250c ?|) ;; U+250C BOX DRAWINGS LIGHT DOWN AND RIGHT
-(#x2215 ?/) ;; U+2215 DIVISION SLASH
+(#x2510 ?|) ;; U+2510 BOX DRAWINGS LIGHT DOWN AND LEFT
-(#x02c9 ?`) ;; U+02C9 MODIFIER LETTER MACRON
+(#x2518 ?|) ;; U+2518 BOX DRAWINGS LIGHT UP AND LEFT
-(#x2211 ?s) ;; U+2211 N-ARY SUMMATION
+(#x2514 ?|) ;; U+2514 BOX DRAWINGS LIGHT UP AND RIGHT
-(#x220f ?s) ;; U+220F N-ARY PRODUCT
+(#x251c ?|) ;; U+251C BOX DRAWINGS LIGHT VERTICAL AND RIGHT
-(#x2248 ?=) ;; U+2248 ALMOST EQUAL TO
+(#x252c ?|) ;; U+252C BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
-(#x2264 ?=) ;; U+2264 LESS-THAN OR EQUAL TO
+(#x2524 ?|) ;; U+2524 BOX DRAWINGS LIGHT VERTICAL AND LEFT
-(#x2265 ?=) ;; U+2265 GREATER-THAN OR EQUAL TO
+(#x2534 ?|) ;; U+2534 BOX DRAWINGS LIGHT UP AND HORIZONTAL
-(#x201c ?') ;; U+201C LEFT DOUBLE QUOTATION MARK
+(#x253c ?|) ;; U+253C BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
-(#x2026 ?.) ;; U+2026 HORIZONTAL ELLIPSIS
+(#x02da ?^) ;; U+02DA RING ABOVE
-(#x2212 ?-) ;; U+2212 MINUS SIGN
+(#x2122 ?\xa9) ;; U+2122 TRADE MARK SIGN, ?,A)(B
-(#x2260 ?=) ;; U+2260 NOT EQUAL TO
-(#x221e ?=) ;; U+221E INFINITY
+(#x0132 ?\xe6) ;; U+0132 LATIN CAPITAL LIGATURE IJ, ?,Af(B
-(#x2642 ?=) ;; U+2642 MALE SIGN
+(#x013f ?\xe6) ;; U+013F LATIN CAPITAL LETTER L WITH MIDDLE DOT, ?,Af(B
-(#x2640 ?=) ;; U+2640 FEMALE SIGN
-(#x2032 ?=) ;; U+2032 PRIME
+(#x0133 ?\xe6) ;; U+0133 LATIN SMALL LIGATURE IJ, ?,Af(B
-(#x2033 ?=) ;; U+2033 DOUBLE PRIME
+(#x0140 ?\xe6) ;; U+0140 LATIN SMALL LETTER L WITH MIDDLE DOT, ?,Af(B
-(#x25cb ?=) ;; U+25CB WHITE CIRCLE
+(#x0149 ?\xe6) ;; U+0149 LATIN SMALL LETTER N PRECEDED BY APOSTROPH,?,Af(B
-(#x25cf ?=) ;; U+25CF BLACK CIRCLE
-(#x25a1 ?=) ;; U+25A1 WHITE SQUARE
+(#x2194 ?|) ;; U+2194 LEFT RIGHT ARROW
-(#x25a0 ?=) ;; U+25A0 BLACK SQUARE
+(#x2660 ?*) ;; U+2660 BLACK SPADE SUIT
-(#x25b2 ?=) ;; U+25B2 BLACK UP-POINTING TRIANGLE
+(#x2665 ?*) ;; U+2665 BLACK HEART SUIT
-(#x25bc ?=) ;; U+25BC BLACK DOWN-POINTING TRIANGLE
+(#x2663 ?*) ;; U+2663 BLACK CLUB SUIT
-(#x2192 ?=) ;; U+2192 RIGHTWARDS ARROW
+(#x2592 ?|) ;; U+2592 MEDIUM SHADE
-(#x2190 ?=) ;; U+2190 LEFTWARDS ARROW
+(#x2195 ?|) ;; U+2195 UP DOWN ARROW
-(#x2191 ?=) ;; U+2191 UPWARDS ARROW
-(#x2193 ?=) ;; U+2193 DOWNWARDS ARROW
+(#x2113 ?\xb9) ;; U+2113 SCRIPT SMALL L, ?,A9(B
-(#x2229 ?=) ;; U+2229 INTERSECTION
+(#x215b ?\xbe) ;; U+215B VULGAR FRACTION ONE EIGHTH, ?,A>(B
-(#x2202 ?=) ;; U+2202 PARTIAL DIFFERENTIAL
+(#x215c ?\xbe) ;; U+215C VULGAR FRACTION THREE EIGHTHS, ?,A>(B
-(#x2261 ?=) ;; U+2261 IDENTICAL TO
+(#x215d ?\xbe) ;; U+215D VULGAR FRACTION FIVE EIGHTHS, ?,A>(B
-(#x221a ?=) ;; U+221A SQUARE ROOT
+(#x215e ?\xbe) ;; U+215E VULGAR FRACTION SEVEN EIGHTHS, ?,A>(B
-(#x222b ?=) ;; U+222B INTEGRAL
+(#x207f ?\xbe) ;; U+207F SUPERSCRIPT LATIN SMALL LETTER N, ?,A>(B
-(#x2030 ?=) ;; U+2030 PER MILLE SIGN
-(#x266a ?=) ;; U+266A EIGHTH NOTE
-(#x2020 ?*) ;; U+2020 DAGGER
-(#x2021 ?*) ;; U+2021 DOUBLE DAGGER
-(#x2500 ?|) ;; U+2500 BOX DRAWINGS LIGHT HORIZONTAL
-(#x2502 ?|) ;; U+2502 BOX DRAWINGS LIGHT VERTICAL
-(#x250c ?|) ;; U+250C BOX DRAWINGS LIGHT DOWN AND RIGHT
-(#x2510 ?|) ;; U+2510 BOX DRAWINGS LIGHT DOWN AND LEFT
-(#x2518 ?|) ;; U+2518 BOX DRAWINGS LIGHT UP AND LEFT
-(#x2514 ?|) ;; U+2514 BOX DRAWINGS LIGHT UP AND RIGHT
-(#x251c ?|) ;; U+251C BOX DRAWINGS LIGHT VERTICAL AND RIGHT
-(#x252c ?|) ;; U+252C BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
-(#x2524 ?|) ;; U+2524 BOX DRAWINGS LIGHT VERTICAL AND LEFT
-(#x2534 ?|) ;; U+2534 BOX DRAWINGS LIGHT UP AND HORIZONTAL
-(#x253c ?|) ;; U+253C BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
-(#x02da ?^) ;; U+02DA RING ABOVE
-(#x2122 ?\xa9) ;; U+2122 TRADE MARK SIGN, ?,A)(B
-(#x0132 ?\xe6) ;; U+0132 LATIN CAPITAL LIGATURE IJ, ?,Af(B
-(#x013f ?\xe6) ;; U+013F LATIN CAPITAL LETTER L WITH MIDDLE DOT, ?,Af(B
-(#x0133 ?\xe6) ;; U+0133 LATIN SMALL LIGATURE IJ, ?,Af(B
-(#x0140 ?\xe6) ;; U+0140 LATIN SMALL LETTER L WITH MIDDLE DOT, ?,Af(B
-(#x0149 ?\xe6) ;; U+0149 LATIN SMALL LETTER N PRECEDED BY APOSTROPH,?,Af(B
-(#x2194 ?|) ;; U+2194 LEFT RIGHT ARROW
-(#x2660 ?*) ;; U+2660 BLACK SPADE SUIT
-(#x2665 ?*) ;; U+2665 BLACK HEART SUIT
-(#x2663 ?*) ;; U+2663 BLACK CLUB SUIT
-(#x2592 ?|) ;; U+2592 MEDIUM SHADE
-(#x2195 ?|) ;; U+2195 UP DOWN ARROW
-(#x2113 ?\xb9) ;; U+2113 SCRIPT SMALL L, ?,A9(B
-(#x215b ?\xbe) ;; U+215B VULGAR FRACTION ONE EIGHTH, ?,A>(B
-(#x215c ?\xbe) ;; U+215C VULGAR FRACTION THREE EIGHTHS, ?,A>(B
-(#x215d ?\xbe) ;; U+215D VULGAR FRACTION FIVE EIGHTHS, ?,A>(B
-(#x215e ?\xbe) ;; U+215E VULGAR FRACTION SEVEN EIGHTHS, ?,A>(B
-(#x207f ?\xbe) ;; U+207F SUPERSCRIPT LATIN SMALL LETTER N, ?,A>(B
 ;; These are not in WGL 4, but are IPA characters that should not
 ;; be double width. They are the only IPA characters that both
 ;; occur in packages/mule-packages/leim/ipa.el and end up in East
 ;; Asian character sets when that file is loaded in an XEmacs
 ;; without packages.
 (#x2197 ?|) ;; U+2197 NORTH EAST ARROW
 (#x2199 ?|) ;; U+2199 SOUTH WEST ARROW
 (#x2191 ?|) ;; U+2191 UPWARDS ARROW
-(#x207f ?\xb9));; U+207F SUPERSCRIPT LATIN SMALL LETTER N, ?,A9(B
+(#x207f ?\xb9)) ;; U+207F SUPERSCRIPT LATIN SMALL LETTER N, ?,A9(B
 with decoded = nil
 with syntax-table = (standard-syntax-table)
-;; This creates jit-ucs-charset-0 entries because:
+initially (unless (featurep 'mule) (return))
-;;
+;; This creates jit-ucs-charset-0 entries because:
-;;    1. If the tables are dumped, it is run at dump time before they are
+;;
-;;    dumped, and as such before the relevant conversions are available
+;;    1. If the tables are dumped, it is run at dump time before they are
-;;    (they are made available in mule/general-late.el).
+;;    dumped, and as such before the relevant conversions are available
-;;
+;;    (they are made available in mule/general-late.el).
-;;    2. If the tables are not dumped, it is run at dump time, long before
+;;
-;;    any of the other mappings are available.
+;;    2. If the tables are not dumped, it is run at dump time, long before
-;;
+;;    any of the other mappings are available.
-do
+;;
-(setq decoded (decode-char 'ucs ucs))
+do
-(assert (eq (char-charset decoded)
+(setq decoded (decode-char 'ucs ucs))
-'jit-ucs-charset-0) nil
+(assert (eq (declare-fboundp (char-charset decoded))
-"Unexpected Unicode decoding behavior.  ")
+'jit-ucs-charset-0) nil
-(modify-syntax-entry decoded
+"Unexpected Unicode decoding behavior.  ")
-(string
+(modify-syntax-entry decoded
-(char-syntax ascii-or-latin-1))
+(string
-syntax-table))
+(char-syntax ascii-or-latin-1))
+syntax-table))
 ;; *Sigh*, declarations needs to be at the start of the line to be picked up
 ;; by make-docfile. Not so much an issue with ccl-encode-to-ucs-2, which we
 ;; don't necessarily want to advertise, but the following are important.
 ;; point). Make them available to user code.
 (defvar unicode-error-default-translation-table
 (loop
 with char-table = (make-char-table 'char)
 for i from ?\x00 to ?\xFF
+initially (unless (featurep 'mule) (return))
 do
 (put-char-table (aref
 ;; #xd800 is the first leading surrogate;
 ;; trailing surrogates must be in the range
 ;; #xdc00-#xdfff. These examples are not, so we
 To transform XEmacs Unicode error sequences to the Latin-1 characters that
 correspond to the octets on disk, you can use this variable.  ")
 (defvar unicode-error-sequence-regexp-range
-(format "%c%c-%c"
+(and (featurep 'mule)
-(aref (decode-coding-string "\xd8\x00\x00\x00" 'utf-16-be) 0)
+(format "%c%c-%c"
-(aref (decode-coding-string "\xd8\x00\x00\x00" 'utf-16-be) 3)
+(aref (decode-coding-string "\xd8\x00\x00\x00" 'utf-16-be) 0)
-(aref (decode-coding-string "\xd8\x00\x00\xFF" 'utf-16-be) 3))
+(aref (decode-coding-string "\xd8\x00\x00\x00" 'utf-16-be) 3)
+(aref (decode-coding-string "\xd8\x00\x00\xFF" 'utf-16-be) 3)))
 "Regular expression range to match Unicode error sequences in XEmacs.
 Invalid Unicode sequences on input are represented as XEmacs
 characters with values stored as the keys in
 `unicode-error-default-translation-table', one character for each
 invalid octet.  You can use this variable (with `re-search-forward' or
 `skip-chars-forward') to search for such characters; see also
 `unicode-error-translate-region'.  ")
 ;; Check that the lookup table is correct, and that all the actual error
 ;; sequences are caught by the regexp.
 (with-temp-buffer
 (loop
 for i from ?\x00 to ?\xFF
 with to-check = (make-string 20 ?\x20)
-do
+initially (unless (featurep 'mule) (return))
-(delete-region (point-min) (point-max))
+do
-(insert to-check)
+(delete-region (point-min) (point-max))
-(goto-char 10)
+(insert to-check)
-(insert (decode-coding-string (format "\xd8\x00\x00%c" i)
+(goto-char 10)
-				    'utf-16-be))
+(insert (decode-coding-string (format "\xd8\x00\x00%c" i)
-(backward-char)
+'utf-16-be))
-(assert (= i (get-char-table (char-after (point))
+(backward-char)
-				   unicode-error-default-translation-table))
+(assert (= i (get-char-table (char-after (point))
-	      (format "Char ?\\x%x not the expected error sequence!"
+unicode-error-default-translation-table))
-		      i))
+(format "Char ?\\x%x not the expected error sequence!"
+i))
-(goto-char (point-min))
-;; Comment out until the issue in
+(goto-char (point-min))
-;; 18179.49815.622843.336527@parhasard.net is fixed.
+;; Comment out until the issue in
-(assert t ;(re-search-forward (concat "["
+;; 18179.49815.622843.336527@parhasard.net is fixed.
-		;			 unicode-error-sequence-regexp-range
+(assert t ; (re-search-forward (concat "["
-		;			 "]"))
+;                        unicode-error-sequence-regexp-range
-	      nil
+;                        "]"))
-	      (format "Could not find char ?\\x%x in buffer" i))))
+nil
+(format "Could not find char ?\\x%x in buffer" i))))
 (defun frob-unicode-errors-region (frob-function begin end &optional buffer)
 "Call FROB-FUNCTION on the Unicode error sequences between BEGIN and END.
 Optional argument BUFFER specifies the buffer that should be examined for
 			    unicode-error-sequence-regexp-range)
 			   (point))))
 	  (if end
 	      (funcall frob-function begin end))))))
 (defun unicode-error-translate-region (begin end &optional buffer table)
 "Translate the Unicode error sequences in BUFFER between BEGIN and END.
 The error sequences are transformed, by default, into the ASCII,
 control-1 and latin-iso8859-1 characters with the numeric values
 corresponding to the incorrect octets encountered.  This is achieved
 by using `unicode-error-default-translation-table' (which see) for
 mapping from the error sequences to the desired characters.  "
 (unless table (setq table unicode-error-default-translation-table))
 (frob-unicode-errors-region
 (lambda (start finish)
 (translate-region start finish table))
-begin end buffer)))
+begin end buffer))
+(unless (featurep 'mule)
+;; We do this in such a roundabout way--instead of having the above defun
+;; and defvar calls inside a (when (featurep 'mule) ...) form--to have
+;; make-docfile.c pick up symbol and function documentation correctly. An
+;; alternative approach would be to fix make-docfile.c to be able to read
+;; Lisp.
+(mapcar #'unintern
+'(ccl-encode-to-ucs-2 unicode-error-default-translation-table
+unicode-error-sequence-regexp-range
+frob-unicode-errors-region unicode-error-translate-region)))
 ;; #### UTF-7 is not yet implemented, and it's tricky to do.  There's
 ;; an implementation in appendix A.1 of the Unicode Standard, Version
 ;; 2.0, but I don't know its licensing characteristics.

Mercurial > hg > xemacs-beta

comparison lisp/unicode.el @ 4317:15d36164ebd7