Mercurial > hg > xemacs-beta
changeset 4072:aa28d959af41
[xemacs-hg @ 2007-07-22 22:03:29 by aidan]
Add support for non-ISO2022 8 bit fixed-width coding-systems
author | aidan |
---|---|
date | Sun, 22 Jul 2007 22:04:14 +0000 |
parents | d607d13fca67 |
children | c098c0d9125f |
files | lisp/ChangeLog lisp/dumped-lisp.el lisp/mule/chinese.el lisp/mule/cyrillic.el lisp/mule/english.el lisp/mule/ethiopic.el lisp/mule/japanese.el lisp/mule/korean.el lisp/mule/latin.el lisp/mule/mule-ccl.el lisp/mule/mule-charset.el lisp/mule/mule-coding.el lisp/mule/viet-util.el lisp/mule/vietnamese.el lisp/unicode.el src/ChangeLog src/elhash.c src/elhash.h src/file-coding.c src/mule-ccl.c |
diffstat | 20 files changed, 2271 insertions(+), 741 deletions(-) [+] |
line wrap: on
line diff
--- a/lisp/ChangeLog Sun Jul 22 21:53:08 2007 +0000 +++ b/lisp/ChangeLog Sun Jul 22 22:04:14 2007 +0000 @@ -1,3 +1,136 @@ +2007-07-21 Aidan Kehoe <kehoea@parhasard.net> + + * mule/chinese.el: + * mule/cyrillic.el: + * mule/cyrillic.el (cyrillic-koi8-r-decode-table): Removed. + * mule/cyrillic.el (cyrillic-koi8-r-encode-table): Removed. + * mule/cyrillic.el (cyrillic-alternativnyj-decode-table): Removed. + * mule/cyrillic.el (cyrillic-alternativnyj-encode-table): Removed. + * mule/cyrillic.el (cyrillic-koi8-r-to-external-code-table): + Removed. + + * mule/cyrillic.el (ccl-decode-koi8): Removed. + * mule/cyrillic.el (ccl-encode-koi8): Removed. + * mule/cyrillic.el (ccl-decode-alternativnyj): Removed. + * mule/cyrillic.el (ccl-encode-alternativnyj): Removed. + * mule/cyrillic.el + (cyrillic-alternativnyj-to-external-code-table): Removed. + * mule/cyrillic.el ("Cyrillic-ISO"): Langenv added. + * mule/cyrillic.el ("Ukrainian"): Langenv added. + * mule/cyrillic.el ("Bulgarian"): Langenv added. + * mule/cyrillic.el ("Belarusian"): Langenv added. + + * mule/cyrillic.el (koi8-r): + * mule/cyrillic.el (koi8-ru): + * mule/cyrillic.el (koi8-t): + * mule/cyrillic.el (koi8-c): + * mule/english.el ("English"): + * mule/ethiopic.el (ccl-encode-ethio-font): + * mule/ethiopic.el (ethiopic): + * mule/japanese.el (for): + * mule/korean.el (for): + * mule/latin.el (iso-8859-15): + Use a normal quote; there's no need for a backquote. + * mule/mule-ccl.el: + * mule/mule-ccl.el (ccl-command-table): + * mule/mule-ccl.el (ccl-extended-code-table): + * mule/mule-ccl.el (ccl-embed-data): + * mule/mule-ccl.el (ccl-compile-lookup-integer): New. + * mule/mule-ccl.el (ccl-compile-lookup-character): New. + * mule/mule-ccl.el (ccl-dump-ex-cmd): + * mule/mule-ccl.el (ccl-dump-lookup-int-const-tbl): New. + * mule/mule-ccl.el (ccl-dump-lookup-char-const-tbl): New. + * mule/mule-ccl.el (define-ccl-program): + * mule/mule-ccl.el (ccl): + * mule/mule-ccl.el (ccl-execute-with-args): Removed. + * mule/mule-charset.el: + * mule/mule-charset.el ('charset-columns): Removed. + * mule/mule-charset.el ('charset-final): Removed. + * mule/mule-charset.el ('charset-graphic): Removed. + * mule/mule-charset.el ('charset-doc-string): Removed. + * mule/mule-coding.el: + * mule/mule-coding.el (define-translation-hash-table): New. + * mule/mule-coding.el (make-8-bit-private-use-start): New. + * mule/mule-coding.el (make-8-bit-generate-helper): New. + * mule/mule-coding.el (make-8-bit-generate-encode-program): New. + * mule/mule-coding.el (make-8-bit-create-decode-encode-tables): New. + * mule/mule-coding.el (make-8-bit-generate-decode-program): New. + * mule/mule-coding.el (make-8-bit-coding-system): New. + * mule/viet-util.el (viet-encode-viscii-char): + * mule/vietnamese.el: + * mule/vietnamese.el (?v): + * mule/vietnamese.el (viscii): + * mule/vietnamese.el (viet-viscii-decode-table): Removed. + * mule/vietnamese.el (viet-viscii-encode-table): Removed. + * mule/vietnamese.el (viet-vscii-decode-table): Removed. + * mule/vietnamese.el (viet-vscii-encode-table): Removed. + * mule/vietnamese.el (ccl-decode-viscii): Removed. + * mule/vietnamese.el (leading-code-private-11): Removed. + * mule/vietnamese.el (ccl-encode-viscii): Removed. + * mule/vietnamese.el (ccl-encode-viscii-font): Removed. + * mule/vietnamese.el (ccl-decode-vscii): Removed. + * mule/vietnamese.el (ccl-encode-vscii): Removed. + * mule/vietnamese.el (ccl-encode-vscii-font): Removed. + * mule/vietnamese.el (viet-viscii-to-external-code-table): + Removed. + + Provide make-8-bit-coding-system, a way to construct a fixed-width + XEmacs CCL coding system given a map from 8-bit characters to + Unicode. Such generated CCL coding systems do not have the Latin + deunification problem of iso-8859-15, for example. For most users, + most of the time, only a small minority of code conversions will + require a hash table lookup, so performance is good. + + Provide a compiler macro version of same, used when + make-8-bit-coding-system is called with literal arguments, since + at dump time the unicode conversion tables are not available, + whereas during compilation of dumped files they are. + + Rework cyrillic.el to use make-8-bit-coding-system. Add support + for previously trashed characters in KOI8-R, add support for + KOI8-u, re-add support for Windows-1251 when not building on + win32, add support for previously trashed characters in + Alternativnyj, add support for koi8-ru (unified Russian, + Belarusian and Ukrainian), add support for koi8-t (Tajik), koi8-c + \(languages of the Caucasus), koi8-o (archaic Russian, Old Church + Slavonic). Add language environments for Russian, Bulgarian, + Belarusian and Cyrillic-KOI8RU. + + Remove the ascii-right-to-left charset, which never worked. + + Move to syntactic integers in CCL, when semantic integers were + represented by character. + + Remove superflous calls to (modify-syntax-entry [...] "w"), since + character syntax now defaults to word. + + Add support for lookup-integer, lookup-character in mule-ccl.el, + based on Dave Love's work for GNU. + + Remove ccl-execute-with-args; ccl-execute-on-string ably does the + same job, and no-one's using it on google.com/codesearch. + + Correct the documentation for read-multibyte-character and + write-multibyte-character. + + Rework vietnamese.el to use make-8-bit-coding-system. Remove + the viqr coding system, since it never worked. Don't use a CCL + program to display VISCII; rely on our Unicode conversion + instead. + + Rework viet-encode-viscii-char, now + viet-viscii-to-external-code-table is no longer available. + +2007-07-21 Aidan Kehoe <kehoea@parhasard.net> + + * dumped-lisp.el (preloaded-file-list): + It's not necessary to dump mule-ccl.el; the functionality from it + that's used in other dumped files is available in macro form. + * unicode.el (ccl-encode-to-ucs-2): + Update ccl-encode-to-ucs-2 to reflect the changes made to + mule-ccl.c. NB; this change needs to be made to + fsf-compat-unicode.el too. + 2007-07-16 Aidan Kehoe <kehoea@parhasard.net> * isearch-mode.el (isearch-exit):
--- a/lisp/dumped-lisp.el Sun Jul 22 21:53:08 2007 +0000 +++ b/lisp/dumped-lisp.el Sun Jul 22 22:04:14 2007 +0000 @@ -178,7 +178,6 @@ ;; All files after this can have extended characters in them. (when (featurep 'mule) '("mule/mule-category" - "mule/mule-ccl" "mule/kinsoku" )) @@ -212,10 +211,12 @@ "mule/devanagari" ; must be loaded after indian.el "mule/japanese" "mule/korean" - "mule/lao" + "mule/lao" ; sucks. "mule/latin" "mule/misc-lang" ;; "thai" #### merge thai and thai-xtis!!! + ;; #### Even better; take out thai-xtis! It's not even a + ;; standard, and no-one uses it. "mule/thai-xtis" "mule/tibetan" "mule/vietnamese" @@ -223,17 +224,6 @@ ;; Specialized language support (when (featurep 'mule) "mule/canna-leim") -;; Egg/Its is now a package -; (when (featurep '(and mule wnn)) -; '("egg-leim" "egg-kwnn-leim" "egg-cwnn-leim")) -; (when (featurep 'mule) "mule/egg-sj3-leim") -;; SKK is now a package -; (when (featurep 'mule) "mule/skk-leim") - -;; Enable Mule capability for Gnus, mail, etc... -;; Moved to sunpro-load.el - the default only for Sun. -;;(pureload "mime-setup") - ;; needs access to the charsets created by the above ;; language-specific files. (when (and (featurep 'mule) (valid-console-type-p 'mswindows))
--- a/lisp/mule/chinese.el Sun Jul 22 21:53:08 2007 +0000 +++ b/lisp/mule/chinese.el Sun Jul 22 22:04:14 2007 +0000 @@ -31,109 +31,11 @@ ;;; Code: -(eval-when-compile - (require 'china-util)) - -; (make-charset 'chinese-gb2312 -; "GB2312 Chinese simplified: ISO-IR-58" -; '(dimension -; 2 -; registry "GB2312.1980" -; chars 94 -; columns 2 -; direction l2r -; final ?A -; graphic 0 -; short-name "GB2312" -; long-name "GB2312: ISO-IR-58" -; )) - -; (make-charset 'chinese-cns11643-1 -; "CNS11643 Plane 1 Chinese traditional: ISO-IR-171" -; '(dimension -; 2 -; registry "CNS11643.1992-1" -; chars 94 -; columns 2 -; direction l2r -; final ?G -; graphic 0 -; short-name "CNS11643-1" -; long-name "CNS11643-1 (Chinese traditional): ISO-IR-171" -; )) - -; (make-charset 'chinese-cns11643-2 -; "CNS11643 Plane 2 Chinese traditional: ISO-IR-172" -; '(dimension -; 2 -; registry "CNS11643.1992-2" -; chars 94 -; columns 2 -; direction l2r -; final ?H -; graphic 0 -; short-name "CNS11643-2" -; long-name "CNS11643-2 (Chinese traditional): ISO-IR-172" -; )) - -; (make-charset 'chinese-big5-1 -; "Frequently used part (A141-C67F) of Big5 (Chinese traditional)" -; '(dimension -; 2 -; registry "Big5" -; chars 94 -; columns 2 -; direction l2r -; final ?0 -; graphic 0 -; short-name "Big5 (Level-1)" -; long-name "Big5 (Level-1) A141-C67F" -; )) - -; (make-charset 'chinese-big5-2 -; "Less frequently used part (C940-FEFE) of Big5 (Chinese traditional)" -; '(dimension -; 2 -; registry "Big5" -; chars 94 -; columns 2 -; direction l2r -; final ?1 -; graphic 0 -; short-name "Big5 (Level-2)" -; long-name "Big5 (Level-2) C940-FEFE" -; )) +(eval-when-compile (progn (require 'ccl "mule-ccl") (require 'china-util))) ;; Syntax of Chinese characters. -(modify-syntax-entry 'chinese-gb2312 "w") (loop for row in '(33 34 41) do (modify-syntax-entry `[chinese-gb2312 ,row] ".")) -;;(loop for row from 35 to 40 -;; do (modify-syntax-entry `[chinese-gb2312 ,row] "w")) -;;(loop for row from 42 to 126 -;; do (modify-syntax-entry `[chinese-gb2312 ,row] "w")) - -(modify-syntax-entry 'chinese-cns11643-1 "w") -(modify-syntax-entry 'chinese-cns11643-2 "w") -(modify-syntax-entry 'chinese-big5-1 "w") -(modify-syntax-entry 'chinese-big5-2 "w") - -; ;; Chinese CNS11643 Plane3 thru Plane7. Although these are official -; ;; character sets, the use is rare and don't have to be treated -; ;; space-efficiently in the buffer. -; (make-charset 'chinese-cns11643-3 -; "CNS11643 Plane 3 Chinese Traditional: ISO-IR-183" -; '(dimension -; 2 -; registry "CNS11643.1992-3" -; chars 94 -; columns 2 -; direction l2r -; final ?I -; graphic 0 -; short-name "CNS11643-3" -; long-name "CNS11643-3 (Chinese traditional): ISO-IR-183" -; )) ;; CNS11643 Plane3 thru Plane7 ;; These represent more and more obscure Chinese characters. @@ -378,16 +280,13 @@ ;; R2:position code 2 ;; Out: R1:font code point 1 ;; R2:font code point 2 - ((r2 = ((((r1 - ?\x21) * 94) + r2) - ?\x21)) + ((r2 = ((((r1 - #x21) * 94) + r2) - #x21)) (if (r0 == ,(charset-id 'chinese-big5-2)) (r2 += 6280)) - (r1 = ((r2 / 157) + ?\xA1)) + (r1 = ((r2 / 157) + #xA1)) (r2 %= 157) - (if (r2 < ?\x3F) (r2 += ?\x40) (r2 += ?\x62)))) + (if (r2 < #x3F) (r2 += #x40) (r2 += #x62)))) "CCL program to encode a Big5 code to code point of Big5 font.") -;; (setq font-ccl-encoder-alist -;; (cons (cons "big5" ccl-encode-big5-font) font-ccl-encoder-alist)) - (set-charset-ccl-program 'chinese-big5-1 'ccl-encode-big5-font) (set-charset-ccl-program 'chinese-big5-2 'ccl-encode-big5-font)
--- a/lisp/mule/cyrillic.el Sun Jul 22 21:53:08 2007 +0000 +++ b/lisp/mule/cyrillic.el Sun Jul 22 22:04:14 2007 +0000 @@ -4,6 +4,7 @@ ;; Licensed to the Free Software Foundation. ;; Copyright (C) 1997 MORIOKA Tomohiko ;; Copyright (C) 2001, 2002 Ben Wing. +;; Copyright (C) 2007 Free Software Foundation ;; Keywords: multilingual, Cyrillic @@ -29,16 +30,6 @@ ;; The character set ISO8859-5 is supported. KOI-8 and ALTERNATIVNYJ are ;; converted to ISO8859-5 internally. -;; [Windows-1251 support deleted because XEmacs has automatic support.] - -;; #### We only have automatic support on Windows; that needs to be put -;; back. Also, the Russian Wikipedia articles on KOI-8 list several other -;; related encodings--KOI8-U (Ukrainian), KOI8-RU (simultaneous support for -;; Russian, Belorussian, and Ukrainian), KOI8-C (for languages of the -;; Caucasus), KOI8-O (Old Church Slavonic)--and it would be nice to have -;; them. Beyond that, we're currently trashing lots of code points with -;; KOI-8 R; it would be nice to leverage the Unicode support to not do that. - ;;; Code: ;; Case table: @@ -93,8 +84,8 @@ with case-table = (standard-case-table) do (put-case-table-pair (make-char 'cyrillic-iso8859-5 upper) - (make-char 'cyrillic-iso8859-5 lower) - case-table)) + (make-char 'cyrillic-iso8859-5 lower) + case-table)) ;; The default character syntax is now word. Pay attention to the ;; exceptions in ISO-8859-5, copying them from ISO-8859-1. @@ -114,6 +105,7 @@ (string (char-syntax ?\# (standard-syntax-table))) (standard-syntax-table)) +;; And create the coding system. (make-coding-system 'iso-8859-5 'iso2022 "ISO-8859-5 (Cyrillic)" @@ -125,224 +117,1226 @@ (set-language-info-alist "Cyrillic-ISO" '((charset cyrillic-iso8859-5) - (tutorial . "TUTORIAL.ru") - (coding-system iso-8859-5) - (native-coding-system iso-8859-5) - (coding-priority iso-8859-5) - (input-method . "cyrillic-yawerty") - (features cyril-util) - (sample-text . "Russian (,L@caaZXY(B) ,L7T`PRabRcYbU(B!") - (documentation . "Support for Cyrillic ISO-8859-5.")) + (tutorial . "TUTORIAL.ru") + (coding-system iso-8859-5) + (native-coding-system iso-8859-5) + (coding-priority iso-8859-5) + (input-method . "cyrillic-yawerty") + (features cyril-util) + (sample-text . "Russian (,L@caaZXY(B) ,L7T`PRabRcYbU(B!") + (documentation . "Support for Cyrillic ISO-8859-5.")) '("Cyrillic")) -;; KOI-8 - -(eval-and-compile - -(defvar cyrillic-koi8-r-decode-table - [ - 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 - 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 - 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 - 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 - 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 - 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 - 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 - 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 - ?$B(!(B ?$B("(B ?$B(#(B ?$B($(B ?$B(&(B ?$B(%(B ?$B('(B ?$B()(B ?$B(((B ?$B(*(B ?$B(+(B 32 ?$(G#'(B ?$(G#+(B ?$(G#/(B 32 - 32 ?$(C"F(B 32 32 ?$B"#(B 32 ?$B"e(B ?$A!V(B ?$A!\(B ?$A!](B ?,L (B 32 ?,A0(B ?,A2(B ?,A7(B ?,Aw(B - ?$(G#D(B 32 32 ?,Lq(B 32 32 32 32 32 32 32 32 32 32 32 ?$(G#E(B - 32 32 ?$(G#G(B ?,L!(B 32 32 32 32 32 32 32 32 ?$(G#F(B 32 32 ?,A)(B - ?,Ln(B ?,LP(B ?,LQ(B ?,Lf(B ?,LT(B ?,LU(B ?,Ld(B ?,LS(B ?,Le(B ?,LX(B ?,LY(B ?,LZ(B ?,L[(B ?,L\(B ?,L](B ?,L^(B - ?,L_(B ?,Lo(B ?,L`(B ?,La(B ?,Lb(B ?,Lc(B ?,LV(B ?,LR(B ?,Ll(B ?,Lk(B ?,LW(B ?,Lh(B ?,Lm(B ?,Li(B ?,Lg(B ?,Lj(B - ?,LN(B ?,L0(B ?,L1(B ?,LF(B ?,L4(B ?,L5(B ?,LD(B ?,L3(B ?,LE(B ?,L8(B ?,L9(B ?,L:(B ?,L;(B ?,L<(B ?,L=(B ?,L>(B - ?,L?(B ?,LO(B ?,L@(B ?,LA(B ?,LB(B ?,LC(B ?,L6(B ?,L2(B ?,LL(B ?,LK(B ?,L7(B ?,LH(B ?,LM(B ?,LI(B ?,LG(B ?,LJ(B ] - "Cyrillic KOI8-R decoding table.") - -(defvar cyrillic-koi8-r-encode-table - (let ((table (make-vector 256 32)) - (i 0)) - (while (< i 256) - (let* ((ch (aref cyrillic-koi8-r-decode-table i)) - (split (split-char ch))) - (cond ((eq (car split) 'cyrillic-iso8859-5) - (aset table (logior (nth 1 split) 128) i)) - ((eq ch 32)) - ((eq (car split) 'ascii) - (aset table ch i)))) - (setq i (1+ i))) - table) - "Cyrillic KOI8-R encoding table.") - -) +;; KOI8-R, the most common encoding for Cyrillic on Unix and Usenet. +(make-8-bit-coding-system + 'koi8-r + '((#x80 ?\u2500) ;; BOX DRAWINGS LIGHT HORIZONTAL + (#x81 ?\u2502) ;; BOX DRAWINGS LIGHT VERTICAL + (#x82 ?\u250C) ;; BOX DRAWINGS LIGHT DOWN AND RIGHT + (#x83 ?\u2510) ;; BOX DRAWINGS LIGHT DOWN AND LEFT + (#x84 ?\u2514) ;; BOX DRAWINGS LIGHT UP AND RIGHT + (#x85 ?\u2518) ;; BOX DRAWINGS LIGHT UP AND LEFT + (#x86 ?\u251C) ;; BOX DRAWINGS LIGHT VERTICAL AND RIGHT + (#x87 ?\u2524) ;; BOX DRAWINGS LIGHT VERTICAL AND LEFT + (#x88 ?\u252C) ;; BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + (#x89 ?\u2534) ;; BOX DRAWINGS LIGHT UP AND HORIZONTAL + (#x8A ?\u253C) ;; BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + (#x8B ?\u2580) ;; UPPER HALF BLOCK + (#x8C ?\u2584) ;; LOWER HALF BLOCK + (#x8D ?\u2588) ;; FULL BLOCK + (#x8E ?\u258C) ;; LEFT HALF BLOCK + (#x8F ?\u2590) ;; RIGHT HALF BLOCK + (#x90 ?\u2591) ;; LIGHT SHADE + (#x91 ?\u2592) ;; MEDIUM SHADE + (#x92 ?\u2593) ;; DARK SHADE + (#x93 ?\u2320) ;; TOP HALF INTEGRAL + (#x94 ?\u25A0) ;; BLACK SQUARE + (#x95 ?\u2219) ;; BULLET OPERATOR + (#x96 ?\u221A) ;; SQUARE ROOT + (#x97 ?\u2248) ;; ALMOST EQUAL TO + (#x98 ?\u2264) ;; LESS-THAN OR EQUAL TO + (#x99 ?\u2265) ;; GREATER-THAN OR EQUAL TO + (#x9A ?\u00A0) ;; NO-BREAK SPACE + (#x9B ?\u2321) ;; BOTTOM HALF INTEGRAL + (#x9C ?\u00B0) ;; DEGREE SIGN + (#x9D ?\u00B2) ;; SUPERSCRIPT TWO + (#x9E ?\u00B7) ;; MIDDLE DOT + (#x9F ?\u00F7) ;; DIVISION SIGN + (#xA0 ?\u2550) ;; BOX DRAWINGS DOUBLE HORIZONTAL + (#xA1 ?\u2551) ;; BOX DRAWINGS DOUBLE VERTICAL + (#xA2 ?\u2552) ;; BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + (#xA3 ?\u0451) ;; CYRILLIC SMALL LETTER IO + (#xA4 ?\u2553) ;; BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + (#xA5 ?\u2554) ;; BOX DRAWINGS DOUBLE DOWN AND RIGHT + (#xA6 ?\u2555) ;; BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + (#xA7 ?\u2556) ;; BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + (#xA8 ?\u2557) ;; BOX DRAWINGS DOUBLE DOWN AND LEFT + (#xA9 ?\u2558) ;; BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + (#xAA ?\u2559) ;; BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + (#xAB ?\u255A) ;; BOX DRAWINGS DOUBLE UP AND RIGHT + (#xAC ?\u255B) ;; BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + (#xAD ?\u255C) ;; BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + (#xAE ?\u255D) ;; BOX DRAWINGS DOUBLE UP AND LEFT + (#xAF ?\u255E) ;; BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + (#xB0 ?\u255F) ;; BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + (#xB1 ?\u2560) ;; BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + (#xB2 ?\u2561) ;; BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + (#xB3 ?\u0401) ;; CYRILLIC CAPITAL LETTER IO + (#xB4 ?\u2562) ;; BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + (#xB5 ?\u2563) ;; BOX DRAWINGS DOUBLE VERTICAL AND LEFT + (#xB6 ?\u2564) ;; BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + (#xB7 ?\u2565) ;; BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + (#xB8 ?\u2566) ;; BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + (#xB9 ?\u2567) ;; BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + (#xBA ?\u2568) ;; BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + (#xBB ?\u2569) ;; BOX DRAWINGS DOUBLE UP AND HORIZONTAL + (#xBC ?\u256A) ;; BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + (#xBD ?\u256B) ;; BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + (#xBE ?\u256C) ;; BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + (#xBF ?\u00A9) ;; COPYRIGHT SIGN + (#xC0 ?\u044E) ;; CYRILLIC SMALL LETTER YU + (#xC1 ?\u0430) ;; CYRILLIC SMALL LETTER A + (#xC2 ?\u0431) ;; CYRILLIC SMALL LETTER BE + (#xC3 ?\u0446) ;; CYRILLIC SMALL LETTER TSE + (#xC4 ?\u0434) ;; CYRILLIC SMALL LETTER DE + (#xC5 ?\u0435) ;; CYRILLIC SMALL LETTER IE + (#xC6 ?\u0444) ;; CYRILLIC SMALL LETTER EF + (#xC7 ?\u0433) ;; CYRILLIC SMALL LETTER GHE + (#xC8 ?\u0445) ;; CYRILLIC SMALL LETTER HA + (#xC9 ?\u0438) ;; CYRILLIC SMALL LETTER I + (#xCA ?\u0439) ;; CYRILLIC SMALL LETTER SHORT I + (#xCB ?\u043A) ;; CYRILLIC SMALL LETTER KA + (#xCC ?\u043B) ;; CYRILLIC SMALL LETTER EL + (#xCD ?\u043C) ;; CYRILLIC SMALL LETTER EM + (#xCE ?\u043D) ;; CYRILLIC SMALL LETTER EN + (#xCF ?\u043E) ;; CYRILLIC SMALL LETTER O + (#xD0 ?\u043F) ;; CYRILLIC SMALL LETTER PE + (#xD1 ?\u044F) ;; CYRILLIC SMALL LETTER YA + (#xD2 ?\u0440) ;; CYRILLIC SMALL LETTER ER + (#xD3 ?\u0441) ;; CYRILLIC SMALL LETTER ES + (#xD4 ?\u0442) ;; CYRILLIC SMALL LETTER TE + (#xD5 ?\u0443) ;; CYRILLIC SMALL LETTER U + (#xD6 ?\u0436) ;; CYRILLIC SMALL LETTER ZHE + (#xD7 ?\u0432) ;; CYRILLIC SMALL LETTER VE + (#xD8 ?\u044C) ;; CYRILLIC SMALL LETTER SOFT SIGN + (#xD9 ?\u044B) ;; CYRILLIC SMALL LETTER YERU + (#xDA ?\u0437) ;; CYRILLIC SMALL LETTER ZE + (#xDB ?\u0448) ;; CYRILLIC SMALL LETTER SHA + (#xDC ?\u044D) ;; CYRILLIC SMALL LETTER E + (#xDD ?\u0449) ;; CYRILLIC SMALL LETTER SHCHA + (#xDE ?\u0447) ;; CYRILLIC SMALL LETTER CHE + (#xDF ?\u044A) ;; CYRILLIC SMALL LETTER HARD SIGN + (#xE0 ?\u042E) ;; CYRILLIC CAPITAL LETTER YU + (#xE1 ?\u0410) ;; CYRILLIC CAPITAL LETTER A + (#xE2 ?\u0411) ;; CYRILLIC CAPITAL LETTER BE + (#xE3 ?\u0426) ;; CYRILLIC CAPITAL LETTER TSE + (#xE4 ?\u0414) ;; CYRILLIC CAPITAL LETTER DE + (#xE5 ?\u0415) ;; CYRILLIC CAPITAL LETTER IE + (#xE6 ?\u0424) ;; CYRILLIC CAPITAL LETTER EF + (#xE7 ?\u0413) ;; CYRILLIC CAPITAL LETTER GHE + (#xE8 ?\u0425) ;; CYRILLIC CAPITAL LETTER HA + (#xE9 ?\u0418) ;; CYRILLIC CAPITAL LETTER I + (#xEA ?\u0419) ;; CYRILLIC CAPITAL LETTER SHORT I + (#xEB ?\u041A) ;; CYRILLIC CAPITAL LETTER KA + (#xEC ?\u041B) ;; CYRILLIC CAPITAL LETTER EL + (#xED ?\u041C) ;; CYRILLIC CAPITAL LETTER EM + (#xEE ?\u041D) ;; CYRILLIC CAPITAL LETTER EN + (#xEF ?\u041E) ;; CYRILLIC CAPITAL LETTER O + (#xF0 ?\u041F) ;; CYRILLIC CAPITAL LETTER PE + (#xF1 ?\u042F) ;; CYRILLIC CAPITAL LETTER YA + (#xF2 ?\u0420) ;; CYRILLIC CAPITAL LETTER ER + (#xF3 ?\u0421) ;; CYRILLIC CAPITAL LETTER ES + (#xF4 ?\u0422) ;; CYRILLIC CAPITAL LETTER TE + (#xF5 ?\u0423) ;; CYRILLIC CAPITAL LETTER U + (#xF6 ?\u0416) ;; CYRILLIC CAPITAL LETTER ZHE + (#xF7 ?\u0412) ;; CYRILLIC CAPITAL LETTER VE + (#xF8 ?\u042C) ;; CYRILLIC CAPITAL LETTER SOFT SIGN + (#xF9 ?\u042B) ;; CYRILLIC CAPITAL LETTER YERU + (#xFA ?\u0417) ;; CYRILLIC CAPITAL LETTER ZE + (#xFB ?\u0428) ;; CYRILLIC CAPITAL LETTER SHA + (#xFC ?\u042D) ;; CYRILLIC CAPITAL LETTER E + (#xFD ?\u0429) ;; CYRILLIC CAPITAL LETTER SHCHA + (#xFE ?\u0427) ;; CYRILLIC CAPITAL LETTER CHE + (#xFF ?\u042A));; CYRILLIC CAPITAL LETTER HARD SIGN + "KOI8-R (,L:^T(B ,L>Q\U]P(B ,L8]d^`\PfXUY(B, 8 ,LQXb(B) for Russian and Bulgarian." + '(mnemonic "KOI8" + documentation + "This is the ASCII-compatible encoding documented in RFC 1341. +,L:>8(B8-,L@(B is very common across the Cyrillic-using internet and in Unix +implementations; it shares the useful property with the other ,L:>8(B8 +encodings that when the high bit is stripped, encoded text remains +readable (Cyrillic characters get mapped to corresponding Roman +character of the opposite case). " + aliases (cyrillic-koi8 koi8 cp878))) -(define-ccl-program ccl-decode-koi8 - `(3 - ((read r0) - (loop - (write-read-repeat r0 ,cyrillic-koi8-r-decode-table)))) - "CCL program to decode KOI8.") - -(define-ccl-program ccl-encode-koi8 - `(1 - ((read r0) - (loop - (if (r0 != ,(charset-id 'cyrillic-iso8859-5)) - (write-read-repeat r0) - ((read r0) - (write-read-repeat r0 , cyrillic-koi8-r-encode-table)))))) - "CCL program to encode KOI8.") - -;; (define-coding-system-alias 'koi8-r 'cyrillic-koi8) -;; (define-coding-system-alias 'koi8 'cyrillic-koi8) - -(make-coding-system - 'koi8-r 'ccl - "KOI8-R (Cyrillic)" - '(decode ccl-decode-koi8 - encode ccl-encode-koi8 - mnemonic "KOI8")) - -;; `iso-8-1' is not correct, but XEmacs doesn't have a `ccl' category -(coding-system-put 'koi8-r 'category 'iso-8-1) - -;; (define-ccl-program ccl-encode-koi8-font -;; `(0 -;; ((r1 |= 128) -;; (r1 = r1 ,cyrillic-koi8-r-encode-table))) -;; "CCL program to encode Cyrillic chars to KOI font.") - -;; (setq font-ccl-encoder-alist -;; (cons (cons "koi8" ccl-encode-koi8-font) font-ccl-encoder-alist)) - -(defvar cyrillic-koi8-r-to-external-code-table - (let ((table (make-char-table 'generic)) - (i 0) - (len (length cyrillic-koi8-r-decode-table))) - (while (< i len) - (let ((ch (aref cyrillic-koi8-r-decode-table i))) - (if (characterp ch) - (put-char-table ch i table))) - (incf i))) - "Table to convert from characters to their Koi8-R code.") - +;; Create a corresponding language environment. (set-language-info-alist "Cyrillic-KOI8" '((charset cyrillic-iso8859-5) - (coding-system koi8-r) - (native-coding-system koi8-r) - (coding-priority koi8-r) - (input-method . "cyrillic-yawerty") - (features cyril-util) - (locale "ru") - (mswindows-locale . "RUSSIAN") - (tutorial . "TUTORIAL.ru") - (sample-text . "Russian (,L@caaZXY(B) ,L7T`PRabRcYbU(B!") - (documentation . "Support for Cyrillic KOI8-R.")) + (coding-system koi8-r) + (native-coding-system koi8-r) + (coding-priority koi8-r) + (input-method . "cyrillic-yawerty") + (features cyril-util) + (locale "ru") + (mswindows-locale . "RUSSIAN") + (tutorial . "TUTORIAL.ru") + (sample-text . "Russian (,L@caaZXY(B) ,L7T`PRabRcYbU(B!") + (documentation . "Support for Cyrillic KOI8-R.")) + '("Cyrillic")) + +;; Alias it to Russian. +(set-language-info-alist + "Russian" + (cdr (assoc "Cyrillic-KOI8" language-info-alist)) + '("Cyrillic")) + +;; KOI8-U, for Ukrainian. +(make-8-bit-coding-system + 'koi8-u + '((#x80 ?\u2500) ;; BOX DRAWINGS LIGHT HORIZONTAL + (#x81 ?\u2502) ;; BOX DRAWINGS LIGHT VERTICAL + (#x82 ?\u250C) ;; BOX DRAWINGS LIGHT DOWN AND RIGHT + (#x83 ?\u2510) ;; BOX DRAWINGS LIGHT DOWN AND LEFT + (#x84 ?\u2514) ;; BOX DRAWINGS LIGHT UP AND RIGHT + (#x85 ?\u2518) ;; BOX DRAWINGS LIGHT UP AND LEFT + (#x86 ?\u251C) ;; BOX DRAWINGS LIGHT VERTICAL AND RIGHT + (#x87 ?\u2524) ;; BOX DRAWINGS LIGHT VERTICAL AND LEFT + (#x88 ?\u252C) ;; BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + (#x89 ?\u2534) ;; BOX DRAWINGS LIGHT UP AND HORIZONTAL + (#x8A ?\u253C) ;; BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + (#x8B ?\u2580) ;; UPPER HALF BLOCK + (#x8C ?\u2584) ;; LOWER HALF BLOCK + (#x8D ?\u2588) ;; FULL BLOCK + (#x8E ?\u258C) ;; LEFT HALF BLOCK + (#x8F ?\u2590) ;; RIGHT HALF BLOCK + (#x90 ?\u2591) ;; LIGHT SHADE + (#x91 ?\u2592) ;; MEDIUM SHADE + (#x92 ?\u2593) ;; DARK SHADE + (#x93 ?\u2320) ;; TOP HALF INTEGRAL + (#x94 ?\u25A0) ;; BLACK SQUARE + (#x95 ?\u2022) ;; BULLET + (#x96 ?\u221A) ;; SQUARE ROOT + (#x97 ?\u2248) ;; ALMOST EQUAL TO + (#x98 ?\u2264) ;; LESS-THAN OR EQUAL TO + (#x99 ?\u2265) ;; GREATER-THAN OR EQUAL TO + (#x9A ?\u00A0) ;; NO-BREAK SPACE + (#x9B ?\u2321) ;; BOTTOM HALF INTEGRAL + (#x9C ?\u00B0) ;; DEGREE SIGN + (#x9D ?\u00B2) ;; SUPERSCRIPT TWO + (#x9E ?\u00B7) ;; MIDDLE DOT + (#x9F ?\u00F7) ;; DIVISION SIGN + (#xA0 ?\u2550) ;; BOX DRAWINGS DOUBLE HORIZONTAL + (#xA1 ?\u2551) ;; BOX DRAWINGS DOUBLE VERTICAL + (#xA2 ?\u2552) ;; BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + (#xA3 ?\u0451) ;; CYRILLIC SMALL LETTER IO + (#xA4 ?\u0454) ;; CYRILLIC SMALL LETTER UKRAINIAN IE + (#xA5 ?\u2554) ;; BOX DRAWINGS DOUBLE DOWN AND RIGHT + (#xA6 ?\u0456) ;; CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + (#xA7 ?\u0457) ;; CYRILLIC SMALL LETTER YI + (#xA8 ?\u2557) ;; BOX DRAWINGS DOUBLE DOWN AND LEFT + (#xA9 ?\u2558) ;; BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + (#xAA ?\u2559) ;; BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + (#xAB ?\u255A) ;; BOX DRAWINGS DOUBLE UP AND RIGHT + (#xAC ?\u255B) ;; BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + (#xAD ?\u0491) ;; CYRILLIC SMALL LETTER GHE WITH UPTURN + (#xAE ?\u255D) ;; BOX DRAWINGS DOUBLE UP AND LEFT + (#xAF ?\u255E) ;; BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + (#xB0 ?\u255F) ;; BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + (#xB1 ?\u2560) ;; BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + (#xB2 ?\u2561) ;; BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + (#xB3 ?\u0401) ;; CYRILLIC CAPITAL LETTER IO + (#xB4 ?\u0404) ;; CYRILLIC CAPITAL LETTER UKRAINIAN IE + (#xB5 ?\u2563) ;; BOX DRAWINGS DOUBLE VERTICAL AND LEFT + (#xB6 ?\u0406) ;; CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + (#xB7 ?\u0407) ;; CYRILLIC CAPITAL LETTER YI + (#xB8 ?\u2566) ;; BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + (#xB9 ?\u2567) ;; BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + (#xBA ?\u2568) ;; BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + (#xBB ?\u2569) ;; BOX DRAWINGS DOUBLE UP AND HORIZONTAL + (#xBC ?\u256A) ;; BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + (#xBD ?\u0490) ;; CYRILLIC CAPITAL LETTER GHE WITH UPTURN + (#xBE ?\u256C) ;; BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + (#xBF ?\u00A9) ;; COPYRIGHT SIGN + (#xC0 ?\u044E) ;; CYRILLIC SMALL LETTER YU + (#xC1 ?\u0430) ;; CYRILLIC SMALL LETTER A + (#xC2 ?\u0431) ;; CYRILLIC SMALL LETTER BE + (#xC3 ?\u0446) ;; CYRILLIC SMALL LETTER TSE + (#xC4 ?\u0434) ;; CYRILLIC SMALL LETTER DE + (#xC5 ?\u0435) ;; CYRILLIC SMALL LETTER IE + (#xC6 ?\u0444) ;; CYRILLIC SMALL LETTER EF + (#xC7 ?\u0433) ;; CYRILLIC SMALL LETTER GHE + (#xC8 ?\u0445) ;; CYRILLIC SMALL LETTER HA + (#xC9 ?\u0438) ;; CYRILLIC SMALL LETTER I + (#xCA ?\u0439) ;; CYRILLIC SMALL LETTER SHORT I + (#xCB ?\u043A) ;; CYRILLIC SMALL LETTER KA + (#xCC ?\u043B) ;; CYRILLIC SMALL LETTER EL + (#xCD ?\u043C) ;; CYRILLIC SMALL LETTER EM + (#xCE ?\u043D) ;; CYRILLIC SMALL LETTER EN + (#xCF ?\u043E) ;; CYRILLIC SMALL LETTER O + (#xD0 ?\u043F) ;; CYRILLIC SMALL LETTER PE + (#xD1 ?\u044F) ;; CYRILLIC SMALL LETTER YA + (#xD2 ?\u0440) ;; CYRILLIC SMALL LETTER ER + (#xD3 ?\u0441) ;; CYRILLIC SMALL LETTER ES + (#xD4 ?\u0442) ;; CYRILLIC SMALL LETTER TE + (#xD5 ?\u0443) ;; CYRILLIC SMALL LETTER U + (#xD6 ?\u0436) ;; CYRILLIC SMALL LETTER ZHE + (#xD7 ?\u0432) ;; CYRILLIC SMALL LETTER VE + (#xD8 ?\u044C) ;; CYRILLIC SMALL LETTER SOFT SIGN + (#xD9 ?\u044B) ;; CYRILLIC SMALL LETTER YERU + (#xDA ?\u0437) ;; CYRILLIC SMALL LETTER ZE + (#xDB ?\u0448) ;; CYRILLIC SMALL LETTER SHA + (#xDC ?\u044D) ;; CYRILLIC SMALL LETTER E + (#xDD ?\u0449) ;; CYRILLIC SMALL LETTER SHCHA + (#xDE ?\u0447) ;; CYRILLIC SMALL LETTER CHE + (#xDF ?\u044A) ;; CYRILLIC SMALL LETTER HARD SIGN + (#xE0 ?\u042E) ;; CYRILLIC CAPITAL LETTER YU + (#xE1 ?\u0410) ;; CYRILLIC CAPITAL LETTER A + (#xE2 ?\u0411) ;; CYRILLIC CAPITAL LETTER BE + (#xE3 ?\u0426) ;; CYRILLIC CAPITAL LETTER TSE + (#xE4 ?\u0414) ;; CYRILLIC CAPITAL LETTER DE + (#xE5 ?\u0415) ;; CYRILLIC CAPITAL LETTER IE + (#xE6 ?\u0424) ;; CYRILLIC CAPITAL LETTER EF + (#xE7 ?\u0413) ;; CYRILLIC CAPITAL LETTER GHE + (#xE8 ?\u0425) ;; CYRILLIC CAPITAL LETTER HA + (#xE9 ?\u0418) ;; CYRILLIC CAPITAL LETTER I + (#xEA ?\u0419) ;; CYRILLIC CAPITAL LETTER SHORT I + (#xEB ?\u041A) ;; CYRILLIC CAPITAL LETTER KA + (#xEC ?\u041B) ;; CYRILLIC CAPITAL LETTER EL + (#xED ?\u041C) ;; CYRILLIC CAPITAL LETTER EM + (#xEE ?\u041D) ;; CYRILLIC CAPITAL LETTER EN + (#xEF ?\u041E) ;; CYRILLIC CAPITAL LETTER O + (#xF0 ?\u041F) ;; CYRILLIC CAPITAL LETTER PE + (#xF1 ?\u042F) ;; CYRILLIC CAPITAL LETTER YA + (#xF2 ?\u0420) ;; CYRILLIC CAPITAL LETTER ER + (#xF3 ?\u0421) ;; CYRILLIC CAPITAL LETTER ES + (#xF4 ?\u0422) ;; CYRILLIC CAPITAL LETTER TE + (#xF5 ?\u0423) ;; CYRILLIC CAPITAL LETTER U + (#xF6 ?\u0416) ;; CYRILLIC CAPITAL LETTER ZHE + (#xF7 ?\u0412) ;; CYRILLIC CAPITAL LETTER VE + (#xF8 ?\u042C) ;; CYRILLIC CAPITAL LETTER SOFT SIGN + (#xF9 ?\u042B) ;; CYRILLIC CAPITAL LETTER YERU + (#xFA ?\u0417) ;; CYRILLIC CAPITAL LETTER ZE + (#xFB ?\u0428) ;; CYRILLIC CAPITAL LETTER SHA + (#xFC ?\u042D) ;; CYRILLIC CAPITAL LETTER E + (#xFD ?\u0429) ;; CYRILLIC CAPITAL LETTER SHCHA + (#xFE ?\u0427) ;; CYRILLIC CAPITAL LETTER CHE + (#xFF ?\u042A)) ;; CYRILLIC CAPITAL LETTER HARD SIGN + "KOI8-U, for Ukrainian" + '(mnemonic ",L:>8(B8,LC(B" + documentation + "KOI8-U, a KOI-8-compatible encoding for Ukrainian, described in RFC2319. +This has GHE WITH UPTURN, BYELORUSSIAN-UKRAINIAN I, UKRAINIAN IE and +YI instead of some box-drawing characters. Russian in this encoding +(without using box-drawing characters) is bit-for-bit compatible with +Russian in KOI8-R. ")) + +;; Case support, for the new characters. +(loop + for (upper lower) + in '((?\u0404 ?\u0454) ; UKRAINIAN IE + (?\u0406 ?\u0456) ; BYELORUSSIAN-UKRAINIAN I + (?\u0407 ?\u0457) ; YI + (?\u0490 ?\u0491)); GHE WITH UPTURN + with case-table = (standard-case-table) + do + (put-case-table-pair upper lower case-table)) + +(set-language-info-alist + "Ukrainian" '((coding-system koi8-u) + (coding-priority koi8-u) + (input-method . "cyrillic-ukrainian") + (documentation + . "Support for Ukrainian with KOI8-U character set.")) '("Cyrillic")) -;;; WINDOWS-1251 deleted; we support it automatically in XEmacs - -;;; ALTERNATIVNYJ - -(eval-and-compile - -(defvar cyrillic-alternativnyj-decode-table - [ - 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 - 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 - 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 - 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 - 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 - 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 - 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 - 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 - ?,L0(B ?,L1(B ?,L2(B ?,L3(B ?,L4(B ?,L5(B ?,L6(B ?,L7(B ?,L8(B ?,L9(B ?,L:(B ?,L;(B ?,L<(B ?,L=(B ?,L>(B ?,L?(B - ?,L@(B ?,LA(B ?,LB(B ?,LC(B ?,LD(B ?,LE(B ?,LF(B ?,LG(B ?,LH(B ?,LI(B ?,LJ(B ?,LK(B ?,LL(B ?,LM(B ?,LN(B ?,LO(B - ?,LP(B ?,LQ(B ?,LR(B ?,LS(B ?,LT(B ?,LU(B ?,LV(B ?,LW(B ?,LX(B ?,LY(B ?,LZ(B ?,L[(B ?,L\(B ?,L](B ?,L^(B ?,L_(B - 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 - 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 - 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 - ?,L`(B ?,La(B ?,Lb(B ?,Lc(B ?,Ld(B ?,Le(B ?,Lf(B ?,Lg(B ?,Lh(B ?,Li(B ?,Lj(B ?,Lk(B ?,Ll(B ?,Lm(B ?,Ln(B ?,Lo(B - ?,L!(B ?,Lq(B 32 32 32 32 32 32 32 32 32 32 32 32 32 ?,Lp(B] - "Cyrillic ALTERNATIVNYJ decoding table.") - -(defvar cyrillic-alternativnyj-encode-table - (let ((table (make-vector 256 32)) - (i 0)) - (while (< i 256) - (let* ((ch (aref cyrillic-alternativnyj-decode-table i)) - (split (split-char ch))) - (if (eq (car split) 'cyrillic-iso8859-5) - (aset table (logior (nth 1 split) 128) i) - (if (/= ch 32) - (aset table ch i)))) - (setq i (1+ i))) - table) - "Cyrillic ALTERNATIVNYJ encoding table.") - -) - +;; Windows 1251 may be provide automatically on Windows, in which case +;; we don't need to. +(unless (find-coding-system 'windows-1251) + (make-8-bit-coding-system + 'windows-1251 + '((#x80 ?\u0402) ;; CYRILLIC CAPITAL LETTER DJE + (#x81 ?\u0403) ;; CYRILLIC CAPITAL LETTER GJE + (#x82 ?\u201A) ;; SINGLE LOW-9 QUOTATION MARK + (#x83 ?\u0453) ;; CYRILLIC SMALL LETTER GJE + (#x84 ?\u201E) ;; DOUBLE LOW-9 QUOTATION MARK + (#x85 ?\u2026) ;; HORIZONTAL ELLIPSIS + (#x86 ?\u2020) ;; DAGGER + (#x87 ?\u2021) ;; DOUBLE DAGGER + (#x88 ?\u20AC) ;; EURO SIGN + (#x89 ?\u2030) ;; PER MILLE SIGN + (#x8A ?\u0409) ;; CYRILLIC CAPITAL LETTER LJE + (#x8B ?\u2039) ;; SINGLE LEFT-POINTING ANGLE QUOTATION MARK + (#x8C ?\u040A) ;; CYRILLIC CAPITAL LETTER NJE + (#x8D ?\u040C) ;; CYRILLIC CAPITAL LETTER KJE + (#x8E ?\u040B) ;; CYRILLIC CAPITAL LETTER TSHE + (#x8F ?\u040F) ;; CYRILLIC CAPITAL LETTER DZHE + (#x90 ?\u0452) ;; CYRILLIC SMALL LETTER DJE + (#x91 ?\u2018) ;; LEFT SINGLE QUOTATION MARK + (#x92 ?\u2019) ;; RIGHT SINGLE QUOTATION MARK + (#x93 ?\u201C) ;; LEFT DOUBLE QUOTATION MARK + (#x94 ?\u201D) ;; RIGHT DOUBLE QUOTATION MARK + (#x95 ?\u2022) ;; BULLET + (#x96 ?\u2013) ;; EN DASH + (#x97 ?\u2014) ;; EM DASH + (#x99 ?\u2122) ;; TRADE MARK SIGN + (#x9A ?\u0459) ;; CYRILLIC SMALL LETTER LJE + (#x9B ?\u203A) ;; SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + (#x9C ?\u045A) ;; CYRILLIC SMALL LETTER NJE + (#x9D ?\u045C) ;; CYRILLIC SMALL LETTER KJE + (#x9E ?\u045B) ;; CYRILLIC SMALL LETTER TSHE + (#x9F ?\u045F) ;; CYRILLIC SMALL LETTER DZHE + (#xA0 ?\u00A0) ;; NO-BREAK SPACE + (#xA1 ?\u040E) ;; CYRILLIC CAPITAL LETTER SHORT U + (#xA2 ?\u045E) ;; CYRILLIC SMALL LETTER SHORT U + (#xA3 ?\u0408) ;; CYRILLIC CAPITAL LETTER JE + (#xA4 ?\u00A4) ;; CURRENCY SIGN + (#xA5 ?\u0490) ;; CYRILLIC CAPITAL LETTER GHE WITH UPTURN + (#xA6 ?\u00A6) ;; BROKEN BAR + (#xA7 ?\u00A7) ;; SECTION SIGN + (#xA8 ?\u0401) ;; CYRILLIC CAPITAL LETTER IO + (#xA9 ?\u00A9) ;; COPYRIGHT SIGN + (#xAA ?\u0404) ;; CYRILLIC CAPITAL LETTER UKRAINIAN IE + (#xAB ?\u00AB) ;; LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + (#xAC ?\u00AC) ;; NOT SIGN + (#xAD ?\u00AD) ;; SOFT HYPHEN + (#xAE ?\u00AE) ;; REGISTERED SIGN + (#xAF ?\u0407) ;; CYRILLIC CAPITAL LETTER YI + (#xB0 ?\u00B0) ;; DEGREE SIGN + (#xB1 ?\u00B1) ;; PLUS-MINUS SIGN + (#xB2 ?\u0406) ;; CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + (#xB3 ?\u0456) ;; CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + (#xB4 ?\u0491) ;; CYRILLIC SMALL LETTER GHE WITH UPTURN + (#xB5 ?\u00B5) ;; MICRO SIGN + (#xB6 ?\u00B6) ;; PILCROW SIGN + (#xB7 ?\u00B7) ;; MIDDLE DOT + (#xB8 ?\u0451) ;; CYRILLIC SMALL LETTER IO + (#xB9 ?\u2116) ;; NUMERO SIGN + (#xBA ?\u0454) ;; CYRILLIC SMALL LETTER UKRAINIAN IE + (#xBB ?\u00BB) ;; RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + (#xBC ?\u0458) ;; CYRILLIC SMALL LETTER JE + (#xBD ?\u0405) ;; CYRILLIC CAPITAL LETTER DZE + (#xBE ?\u0455) ;; CYRILLIC SMALL LETTER DZE + (#xBF ?\u0457) ;; CYRILLIC SMALL LETTER YI + (#xC0 ?\u0410) ;; CYRILLIC CAPITAL LETTER A + (#xC1 ?\u0411) ;; CYRILLIC CAPITAL LETTER BE + (#xC2 ?\u0412) ;; CYRILLIC CAPITAL LETTER VE + (#xC3 ?\u0413) ;; CYRILLIC CAPITAL LETTER GHE + (#xC4 ?\u0414) ;; CYRILLIC CAPITAL LETTER DE + (#xC5 ?\u0415) ;; CYRILLIC CAPITAL LETTER IE + (#xC6 ?\u0416) ;; CYRILLIC CAPITAL LETTER ZHE + (#xC7 ?\u0417) ;; CYRILLIC CAPITAL LETTER ZE + (#xC8 ?\u0418) ;; CYRILLIC CAPITAL LETTER I + (#xC9 ?\u0419) ;; CYRILLIC CAPITAL LETTER SHORT I + (#xCA ?\u041A) ;; CYRILLIC CAPITAL LETTER KA + (#xCB ?\u041B) ;; CYRILLIC CAPITAL LETTER EL + (#xCC ?\u041C) ;; CYRILLIC CAPITAL LETTER EM + (#xCD ?\u041D) ;; CYRILLIC CAPITAL LETTER EN + (#xCE ?\u041E) ;; CYRILLIC CAPITAL LETTER O + (#xCF ?\u041F) ;; CYRILLIC CAPITAL LETTER PE + (#xD0 ?\u0420) ;; CYRILLIC CAPITAL LETTER ER + (#xD1 ?\u0421) ;; CYRILLIC CAPITAL LETTER ES + (#xD2 ?\u0422) ;; CYRILLIC CAPITAL LETTER TE + (#xD3 ?\u0423) ;; CYRILLIC CAPITAL LETTER U + (#xD4 ?\u0424) ;; CYRILLIC CAPITAL LETTER EF + (#xD5 ?\u0425) ;; CYRILLIC CAPITAL LETTER HA + (#xD6 ?\u0426) ;; CYRILLIC CAPITAL LETTER TSE + (#xD7 ?\u0427) ;; CYRILLIC CAPITAL LETTER CHE + (#xD8 ?\u0428) ;; CYRILLIC CAPITAL LETTER SHA + (#xD9 ?\u0429) ;; CYRILLIC CAPITAL LETTER SHCHA + (#xDA ?\u042A) ;; CYRILLIC CAPITAL LETTER HARD SIGN + (#xDB ?\u042B) ;; CYRILLIC CAPITAL LETTER YERU + (#xDC ?\u042C) ;; CYRILLIC CAPITAL LETTER SOFT SIGN + (#xDD ?\u042D) ;; CYRILLIC CAPITAL LETTER E + (#xDE ?\u042E) ;; CYRILLIC CAPITAL LETTER YU + (#xDF ?\u042F) ;; CYRILLIC CAPITAL LETTER YA + (#xE0 ?\u0430) ;; CYRILLIC SMALL LETTER A + (#xE1 ?\u0431) ;; CYRILLIC SMALL LETTER BE + (#xE2 ?\u0432) ;; CYRILLIC SMALL LETTER VE + (#xE3 ?\u0433) ;; CYRILLIC SMALL LETTER GHE + (#xE4 ?\u0434) ;; CYRILLIC SMALL LETTER DE + (#xE5 ?\u0435) ;; CYRILLIC SMALL LETTER IE + (#xE6 ?\u0436) ;; CYRILLIC SMALL LETTER ZHE + (#xE7 ?\u0437) ;; CYRILLIC SMALL LETTER ZE + (#xE8 ?\u0438) ;; CYRILLIC SMALL LETTER I + (#xE9 ?\u0439) ;; CYRILLIC SMALL LETTER SHORT I + (#xEA ?\u043A) ;; CYRILLIC SMALL LETTER KA + (#xEB ?\u043B) ;; CYRILLIC SMALL LETTER EL + (#xEC ?\u043C) ;; CYRILLIC SMALL LETTER EM + (#xED ?\u043D) ;; CYRILLIC SMALL LETTER EN + (#xEE ?\u043E) ;; CYRILLIC SMALL LETTER O + (#xEF ?\u043F) ;; CYRILLIC SMALL LETTER PE + (#xF0 ?\u0440) ;; CYRILLIC SMALL LETTER ER + (#xF1 ?\u0441) ;; CYRILLIC SMALL LETTER ES + (#xF2 ?\u0442) ;; CYRILLIC SMALL LETTER TE + (#xF3 ?\u0443) ;; CYRILLIC SMALL LETTER U + (#xF4 ?\u0444) ;; CYRILLIC SMALL LETTER EF + (#xF5 ?\u0445) ;; CYRILLIC SMALL LETTER HA + (#xF6 ?\u0446) ;; CYRILLIC SMALL LETTER TSE + (#xF7 ?\u0447) ;; CYRILLIC SMALL LETTER CHE + (#xF8 ?\u0448) ;; CYRILLIC SMALL LETTER SHA + (#xF9 ?\u0449) ;; CYRILLIC SMALL LETTER SHCHA + (#xFA ?\u044A) ;; CYRILLIC SMALL LETTER HARD SIGN + (#xFB ?\u044B) ;; CYRILLIC SMALL LETTER YERU + (#xFC ?\u044C) ;; CYRILLIC SMALL LETTER SOFT SIGN + (#xFD ?\u044D) ;; CYRILLIC SMALL LETTER E + (#xFE ?\u044E) ;; CYRILLIC SMALL LETTER YU + (#xFF ?\u044F)) ;; CYRILLIC SMALL LETTER YA + "Microsoft's Code Page 1251, for Russian, Bulgarian, Serbian and others. " + '(mnemonic "CyrW" + documentation + "This ASCII-compatible encoding is unfortunately not compatible at +the code point level with the KOI8 family of encodings, but it +provides several widely-used punctuation and quotation marks that +KOI-8R and its relatives don't, and has become widely used. " + aliases (cp1251)))) -(define-ccl-program ccl-decode-alternativnyj - `(3 - ((read r0) - (loop - (write-read-repeat r0 ,cyrillic-alternativnyj-decode-table)))) - "CCL program to decode Alternativnyj.") +(set-language-info-alist + "Bulgarian" '((coding-system windows-1251) + (coding-priority windows-1251) + (input-method . "bulgarian-bds") + (documentation + . "Support for Bulgarian with windows-1251 character set.") + (tutorial . "TUTORIAL.bg")) + '("Cyrillic")) -(define-ccl-program ccl-encode-alternativnyj - `(1 - ((read r0) - (loop - (if (r0 != ,(charset-id 'cyrillic-iso8859-5)) - (write-read-repeat r0) - ((read r0) - (write-read-repeat r0 ,cyrillic-alternativnyj-encode-table)))))) - "CCL program to encode Alternativnyj.") - -;; (define-coding-system-alias 'alternativnyj 'cyrillic-alternativnyj) +(set-language-info-alist + "Belarusian" '((coding-system windows-1251) + (coding-priority windows-1251) + (input-method . "belarusian") + (documentation + . "Support for Belarusian with windows-1251 character set. +\(The name Belarusian replaced Byelorussian in the early 1990s.)")) + '("Cyrillic")) -(make-coding-system - 'alternativnyj 'ccl - "Alternativnyj (Cyrillic)" - '(decode ccl-decode-alternativnyj - encode ccl-encode-alternativnyj - mnemonic "Cy.Alt")) - -;; `iso-8-1' is not correct, but XEmacs doesn't have `ccl' category -(coding-system-put 'alternativnyj 'category 'iso-8-1) - -;; (define-ccl-program ccl-encode-alternativnyj-font -;; '(0 -;; ((r1 |= 128) -;; (r1 = r1 ,cyrillic-alternativnyj-encode-table))) -;; "CCL program to encode Cyrillic chars to Alternativnyj font.") - -;; (setq font-ccl-encoder-alist -;; (cons (cons "alternativnyj" ccl-encode-alternativnyj-font) -;; font-ccl-encoder-alist)) - -(defvar cyrillic-alternativnyj-to-external-code-table - (let ((table (make-char-table 'generic)) - (i 0) - (len (length cyrillic-alternativnyj-decode-table))) - (while (< i len) - (let ((ch (aref cyrillic-alternativnyj-decode-table i))) - (if (characterp ch) - (put-char-table ch i table))) - (incf i))) - "Table to convert from characters to their Alternativnyj code.") +;;; Alternativnyj +(make-8-bit-coding-system + 'alternativnyj + '((#x80 ?\u0410) ;; CYRILLIC CAPITAL LETTER A + (#x81 ?\u0411) ;; CYRILLIC CAPITAL LETTER BE + (#x82 ?\u0412) ;; CYRILLIC CAPITAL LETTER VE + (#x83 ?\u0413) ;; CYRILLIC CAPITAL LETTER GHE + (#x84 ?\u0414) ;; CYRILLIC CAPITAL LETTER DE + (#x85 ?\u0415) ;; CYRILLIC CAPITAL LETTER IE + (#x86 ?\u0416) ;; CYRILLIC CAPITAL LETTER ZHE + (#x87 ?\u0417) ;; CYRILLIC CAPITAL LETTER ZE + (#x88 ?\u0418) ;; CYRILLIC CAPITAL LETTER I + (#x89 ?\u0419) ;; CYRILLIC CAPITAL LETTER SHORT I + (#x8A ?\u041A) ;; CYRILLIC CAPITAL LETTER KA + (#x8B ?\u041B) ;; CYRILLIC CAPITAL LETTER EL + (#x8C ?\u041C) ;; CYRILLIC CAPITAL LETTER EM + (#x8D ?\u041D) ;; CYRILLIC CAPITAL LETTER EN + (#x8E ?\u041E) ;; CYRILLIC CAPITAL LETTER O + (#x8F ?\u041F) ;; CYRILLIC CAPITAL LETTER PE + (#x90 ?\u0420) ;; CYRILLIC CAPITAL LETTER ER + (#x91 ?\u0421) ;; CYRILLIC CAPITAL LETTER ES + (#x92 ?\u0422) ;; CYRILLIC CAPITAL LETTER TE + (#x93 ?\u0423) ;; CYRILLIC CAPITAL LETTER U + (#x94 ?\u0424) ;; CYRILLIC CAPITAL LETTER EF + (#x95 ?\u0425) ;; CYRILLIC CAPITAL LETTER HA + (#x96 ?\u0426) ;; CYRILLIC CAPITAL LETTER TSE + (#x97 ?\u0427) ;; CYRILLIC CAPITAL LETTER CHE + (#x98 ?\u0428) ;; CYRILLIC CAPITAL LETTER SHA + (#x99 ?\u0429) ;; CYRILLIC CAPITAL LETTER SHCHA + (#x9A ?\u042A) ;; CYRILLIC CAPITAL LETTER HARD SIGN + (#x9B ?\u042B) ;; CYRILLIC CAPITAL LETTER YERU + (#x9C ?\u042C) ;; CYRILLIC CAPITAL LETTER SOFT SIGN + (#x9D ?\u042D) ;; CYRILLIC CAPITAL LETTER E + (#x9E ?\u042E) ;; CYRILLIC CAPITAL LETTER YU + (#x9F ?\u042F) ;; CYRILLIC CAPITAL LETTER YA + (#xA0 ?\u0430) ;; CYRILLIC SMALL LETTER A + (#xA1 ?\u0431) ;; CYRILLIC SMALL LETTER BE + (#xA2 ?\u0432) ;; CYRILLIC SMALL LETTER VE + (#xA3 ?\u0433) ;; CYRILLIC SMALL LETTER GHE + (#xA4 ?\u0434) ;; CYRILLIC SMALL LETTER DE + (#xA5 ?\u0435) ;; CYRILLIC SMALL LETTER IE + (#xA6 ?\u0436) ;; CYRILLIC SMALL LETTER ZHE + (#xA7 ?\u0437) ;; CYRILLIC SMALL LETTER ZE + (#xA8 ?\u0438) ;; CYRILLIC SMALL LETTER I + (#xA9 ?\u0439) ;; CYRILLIC SMALL LETTER SHORT I + (#xAA ?\u043A) ;; CYRILLIC SMALL LETTER KA + (#xAB ?\u043B) ;; CYRILLIC SMALL LETTER EL + (#xAC ?\u043C) ;; CYRILLIC SMALL LETTER EM + (#xAD ?\u043D) ;; CYRILLIC SMALL LETTER EN + (#xAE ?\u043E) ;; CYRILLIC SMALL LETTER O + (#xAF ?\u043F) ;; CYRILLIC SMALL LETTER PE + (#xB0 ?\u2591) ;; LIGHT SHADE + (#xB1 ?\u2592) ;; MEDIUM SHADE + (#xB2 ?\u2593) ;; DARK SHADE + (#xB3 ?\u2502) ;; BOX DRAWINGS LIGHT VERTICAL + (#xB4 ?\u2524) ;; BOX DRAWINGS LIGHT VERTICAL AND LEFT + (#xB5 ?\u2561) ;; BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + (#xB6 ?\u2562) ;; BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE + (#xB7 ?\u2556) ;; BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE + (#xB8 ?\u2555) ;; BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE + (#xB9 ?\u2563) ;; BOX DRAWINGS DOUBLE VERTICAL AND LEFT + (#xBA ?\u2551) ;; BOX DRAWINGS DOUBLE VERTICAL + (#xBB ?\u2557) ;; BOX DRAWINGS DOUBLE DOWN AND LEFT + (#xBC ?\u255D) ;; BOX DRAWINGS DOUBLE UP AND LEFT + (#xBD ?\u255C) ;; BOX DRAWINGS UP DOUBLE AND LEFT SINGLE + (#xBE ?\u255B) ;; BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + (#xBF ?\u2510) ;; BOX DRAWINGS LIGHT DOWN AND LEFT + (#xC0 ?\u2514) ;; BOX DRAWINGS LIGHT UP AND RIGHT + (#xC1 ?\u2534) ;; BOX DRAWINGS LIGHT UP AND HORIZONTAL + (#xC2 ?\u252C) ;; BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + (#xC3 ?\u251C) ;; BOX DRAWINGS LIGHT VERTICAL AND RIGHT + (#xC4 ?\u2500) ;; BOX DRAWINGS LIGHT HORIZONTAL + (#xC5 ?\u253C) ;; BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + (#xC6 ?\u255E) ;; BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + (#xC7 ?\u255F) ;; BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + (#xC8 ?\u255A) ;; BOX DRAWINGS DOUBLE UP AND RIGHT + (#xC9 ?\u2554) ;; BOX DRAWINGS DOUBLE DOWN AND RIGHT + (#xCA ?\u2569) ;; BOX DRAWINGS DOUBLE UP AND HORIZONTAL + (#xCB ?\u2566) ;; BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + (#xCC ?\u2560) ;; BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + (#xCD ?\u2550) ;; BOX DRAWINGS DOUBLE HORIZONTAL + (#xCE ?\u256C) ;; BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL + (#xCF ?\u2567) ;; BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + (#xD0 ?\u2568) ;; BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + (#xD1 ?\u2564) ;; BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE + (#xD2 ?\u2565) ;; BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE + (#xD3 ?\u2559) ;; BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + (#xD4 ?\u2558) ;; BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + (#xD5 ?\u2552) ;; BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + (#xD6 ?\u2553) ;; BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE + (#xD7 ?\u256B) ;; BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE + (#xD8 ?\u256A) ;; BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + (#xD9 ?\u2518) ;; BOX DRAWINGS LIGHT UP AND LEFT + (#xDA ?\u250C) ;; BOX DRAWINGS LIGHT DOWN AND RIGHT + (#xDB ?\u2588) ;; FULL BLOCK + (#xDC ?\u2584) ;; LOWER HALF BLOCK + (#xDD ?\u258C) ;; LEFT HALF BLOCK + (#xDE ?\u2590) ;; RIGHT HALF BLOCK + (#xDF ?\u2580) ;; UPPER HALF BLOCK + (#xE0 ?\u0440) ;; CYRILLIC SMALL LETTER ER + (#xE1 ?\u0441) ;; CYRILLIC SMALL LETTER ES + (#xE2 ?\u0442) ;; CYRILLIC SMALL LETTER TE + (#xE3 ?\u0443) ;; CYRILLIC SMALL LETTER U + (#xE4 ?\u0444) ;; CYRILLIC SMALL LETTER EF + (#xE5 ?\u0445) ;; CYRILLIC SMALL LETTER HA + (#xE6 ?\u0446) ;; CYRILLIC SMALL LETTER TSE + (#xE7 ?\u0447) ;; CYRILLIC SMALL LETTER CHE + (#xE8 ?\u0448) ;; CYRILLIC SMALL LETTER SHA + (#xE9 ?\u0449) ;; CYRILLIC SMALL LETTER SHCHA + (#xEA ?\u044A) ;; CYRILLIC SMALL LETTER HARD SIGN + (#xEB ?\u044B) ;; CYRILLIC SMALL LETTER YERU + (#xEC ?\u044C) ;; CYRILLIC SMALL LETTER SOFT SIGN + (#xED ?\u044D) ;; CYRILLIC SMALL LETTER E + (#xEE ?\u044E) ;; CYRILLIC SMALL LETTER YU + (#xEF ?\u044F) ;; CYRILLIC SMALL LETTER YA + (#xF0 ?\u0401) ;; CYRILLIC CAPITAL LETTER IO + (#xF1 ?\u0451) ;; CYRILLIC SMALL LETTER IO + (#xF2 ?\u0404) ;; CYRILLIC CAPITAL LETTER UKRAINIAN IE + (#xF3 ?\u0454) ;; CYRILLIC SMALL LETTER UKRAINIAN IE + (#xF4 ?\u0407) ;; CYRILLIC CAPITAL LETTER YI + (#xF5 ?\u0457) ;; CYRILLIC SMALL LETTER YI + (#xF6 ?\u040E) ;; CYRILLIC CAPITAL LETTER SHORT U + (#xF7 ?\u045E) ;; CYRILLIC SMALL LETTER SHORT U + (#xF8 ?\u00B0) ;; DEGREE SIGN + (#xF9 ?\u2022) ;; BULLET + (#xFA ?\u00B7) ;; MIDDLE DOT + (#xFB ?\u221A) ;; SQUARE ROOT + (#xFC ?\u2116) ;; NUMERO SIGN + (#xFD ?\u00A4) ;; CURRENCY SIGN + (#xFE ?\u25A0) ;; BLACK SQUARE + (#xFF ?\u00A0)) ;; NO-BREAK SPACE + "Alternativnyj (Cyrillic). Microsoft's Code Page 966. " + '(mnemonic "Cy.Alt" + aliases (cp866))) (set-language-info-alist "Cyrillic-ALT" '((charset cyrillic-iso8859-5) - (coding-system alternativnyj) - (native-coding-system alternativnyj) - (coding-priority alternativnyj) - (input-method . "cyrillic-yawerty") - (features cyril-util) - (tutorial . "TUTORIAL.ru") - (sample-text . "Russian (,L@caaZXY(B) ,L7T`PRabRcYbU(B!") - (documentation . "Support for Cyrillic ALTERNATIVNYJ.")) + (coding-system alternativnyj) + (native-coding-system alternativnyj) + (coding-priority alternativnyj) + (input-method . "cyrillic-yawerty") + (features cyril-util) + (tutorial . "TUTORIAL.ru") + (sample-text . "Russian (,L@caaZXY(B) ,L7T`PRabRcYbU(B!") + (documentation . "Support for Cyrillic ALTERNATIVNYJ.")) + '("Cyrillic")) + +(make-8-bit-coding-system + 'koi8-ru + '((#x80 ?\u2500) ;; BOX DRAWINGS LIGHT HORIZONTAL + (#x81 ?\u2502) ;; BOX DRAWINGS LIGHT VERTICAL + (#x82 ?\u250C) ;; BOX DRAWINGS LIGHT DOWN AND RIGHT + (#x83 ?\u2510) ;; BOX DRAWINGS LIGHT DOWN AND LEFT + (#x84 ?\u2514) ;; BOX DRAWINGS LIGHT UP AND RIGHT + (#x85 ?\u2518) ;; BOX DRAWINGS LIGHT UP AND LEFT + (#x86 ?\u251C) ;; BOX DRAWINGS LIGHT VERTICAL AND RIGHT + (#x87 ?\u2524) ;; BOX DRAWINGS LIGHT VERTICAL AND LEFT + (#x88 ?\u252C) ;; BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + (#x89 ?\u2534) ;; BOX DRAWINGS LIGHT UP AND HORIZONTAL + (#x8A ?\u253C) ;; BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + (#x8B ?\u2580) ;; UPPER HALF BLOCK + (#x8C ?\u2584) ;; LOWER HALF BLOCK + (#x8D ?\u2588) ;; FULL BLOCK + (#x8E ?\u258C) ;; LEFT HALF BLOCK + (#x8F ?\u2590) ;; RIGHT HALF BLOCK + (#x90 ?\u2591) ;; LIGHT SHADE + (#x91 ?\u2592) ;; MEDIUM SHADE + (#x92 ?\u2593) ;; DARK SHADE + (#x93 ?\u2320) ;; TOP HALF INTEGRAL + (#x94 ?\u25A0) ;; BLACK SQUARE + (#x95 ?\u2219) ;; BULLET OPERATOR + (#x96 ?\u221A) ;; SQUARE ROOT + (#x97 ?\u2248) ;; ALMOST EQUAL TO + (#x98 ?\u2264) ;; LESS-THAN OR EQUAL TO + (#x99 ?\u2265) ;; GREATER-THAN OR EQUAL TO + (#x9A ?\u00A0) ;; NO-BREAK SPACE + (#x9B ?\u2321) ;; BOTTOM HALF INTEGRAL + (#x9C ?\u00B0) ;; DEGREE SIGN + (#x9D ?\u00B2) ;; SUPERSCRIPT TWO + (#x9E ?\u00B7) ;; MIDDLE DOT + (#x9F ?\u00F7) ;; DIVISION SIGN + (#xA0 ?\u2550) ;; BOX DRAWINGS DOUBLE HORIZONTAL + (#xA1 ?\u2551) ;; BOX DRAWINGS DOUBLE VERTICAL + (#xA2 ?\u2552) ;; BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE + (#xA3 ?\u0451) ;; CYRILLIC SMALL LETTER IO + (#xA4 ?\u0454) ;; CYRILLIC SMALL LETTER UKRAINIAN IE + (#xA5 ?\u2554) ;; BOX DRAWINGS DOUBLE DOWN AND RIGHT + (#xA6 ?\u0456) ;; CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + (#xA7 ?\u0457) ;; CYRILLIC SMALL LETTER YI + (#xA8 ?\u2557) ;; BOX DRAWINGS DOUBLE DOWN AND LEFT + (#xA9 ?\u2558) ;; BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE + (#xAA ?\u2559) ;; BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE + (#xAB ?\u255A) ;; BOX DRAWINGS DOUBLE UP AND RIGHT + (#xAC ?\u255B) ;; BOX DRAWINGS UP SINGLE AND LEFT DOUBLE + (#xAD ?\u0491) ;; CYRILLIC SMALL LETTER GHE WITH UPTURN + (#xAE ?\u045E) ;; CYRILLIC SMALL LETTER SHORT U + (#xAF ?\u255E) ;; BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE + (#xB0 ?\u255F) ;; BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE + (#xB1 ?\u2560) ;; BOX DRAWINGS DOUBLE VERTICAL AND RIGHT + (#xB2 ?\u2561) ;; BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE + (#xB3 ?\u0401) ;; CYRILLIC CAPITAL LETTER IO + (#xB4 ?\u0404) ;; CYRILLIC CAPITAL LETTER UKRAINIAN IE + (#xB5 ?\u2563) ;; BOX DRAWINGS DOUBLE VERTICAL AND LEFT + (#xB6 ?\u0406) ;; CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + (#xB7 ?\u0407) ;; CYRILLIC CAPITAL LETTER YI + (#xB8 ?\u2566) ;; BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL + (#xB9 ?\u2567) ;; BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE + (#xBA ?\u2568) ;; BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE + (#xBB ?\u2569) ;; BOX DRAWINGS DOUBLE UP AND HORIZONTAL + (#xBC ?\u256A) ;; BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE + (#xBD ?\u0490) ;; CYRILLIC CAPITAL LETTER GHE WITH UPTURN + (#xBE ?\u040E) ;; CYRILLIC CAPITAL LETTER SHORT U + (#xBF ?\u00A9) ;; COPYRIGHT SIGN + (#xC0 ?\u044E) ;; CYRILLIC SMALL LETTER YU + (#xC1 ?\u0430) ;; CYRILLIC SMALL LETTER A + (#xC2 ?\u0431) ;; CYRILLIC SMALL LETTER BE + (#xC3 ?\u0446) ;; CYRILLIC SMALL LETTER TSE + (#xC4 ?\u0434) ;; CYRILLIC SMALL LETTER DE + (#xC5 ?\u0435) ;; CYRILLIC SMALL LETTER IE + (#xC6 ?\u0444) ;; CYRILLIC SMALL LETTER EF + (#xC7 ?\u0433) ;; CYRILLIC SMALL LETTER GHE + (#xC8 ?\u0445) ;; CYRILLIC SMALL LETTER HA + (#xC9 ?\u0438) ;; CYRILLIC SMALL LETTER I + (#xCA ?\u0439) ;; CYRILLIC SMALL LETTER SHORT I + (#xCB ?\u043A) ;; CYRILLIC SMALL LETTER KA + (#xCC ?\u043B) ;; CYRILLIC SMALL LETTER EL + (#xCD ?\u043C) ;; CYRILLIC SMALL LETTER EM + (#xCE ?\u043D) ;; CYRILLIC SMALL LETTER EN + (#xCF ?\u043E) ;; CYRILLIC SMALL LETTER O + (#xD0 ?\u043F) ;; CYRILLIC SMALL LETTER PE + (#xD1 ?\u044F) ;; CYRILLIC SMALL LETTER YA + (#xD2 ?\u0440) ;; CYRILLIC SMALL LETTER ER + (#xD3 ?\u0441) ;; CYRILLIC SMALL LETTER ES + (#xD4 ?\u0442) ;; CYRILLIC SMALL LETTER TE + (#xD5 ?\u0443) ;; CYRILLIC SMALL LETTER U + (#xD6 ?\u0436) ;; CYRILLIC SMALL LETTER ZHE + (#xD7 ?\u0432) ;; CYRILLIC SMALL LETTER VE + (#xD8 ?\u044C) ;; CYRILLIC SMALL LETTER SOFT SIGN + (#xD9 ?\u044B) ;; CYRILLIC SMALL LETTER YERU + (#xDA ?\u0437) ;; CYRILLIC SMALL LETTER ZE + (#xDB ?\u0448) ;; CYRILLIC SMALL LETTER SHA + (#xDC ?\u044D) ;; CYRILLIC SMALL LETTER E + (#xDD ?\u0449) ;; CYRILLIC SMALL LETTER SHCHA + (#xDE ?\u0447) ;; CYRILLIC SMALL LETTER CHE + (#xDF ?\u044A) ;; CYRILLIC SMALL LETTER HARD SIGN + (#xE0 ?\u042E) ;; CYRILLIC CAPITAL LETTER YU + (#xE1 ?\u0410) ;; CYRILLIC CAPITAL LETTER A + (#xE2 ?\u0411) ;; CYRILLIC CAPITAL LETTER BE + (#xE3 ?\u0426) ;; CYRILLIC CAPITAL LETTER TSE + (#xE4 ?\u0414) ;; CYRILLIC CAPITAL LETTER DE + (#xE5 ?\u0415) ;; CYRILLIC CAPITAL LETTER IE + (#xE6 ?\u0424) ;; CYRILLIC CAPITAL LETTER EF + (#xE7 ?\u0413) ;; CYRILLIC CAPITAL LETTER GHE + (#xE8 ?\u0425) ;; CYRILLIC CAPITAL LETTER HA + (#xE9 ?\u0418) ;; CYRILLIC CAPITAL LETTER I + (#xEA ?\u0419) ;; CYRILLIC CAPITAL LETTER SHORT I + (#xEB ?\u041A) ;; CYRILLIC CAPITAL LETTER KA + (#xEC ?\u041B) ;; CYRILLIC CAPITAL LETTER EL + (#xED ?\u041C) ;; CYRILLIC CAPITAL LETTER EM + (#xEE ?\u041D) ;; CYRILLIC CAPITAL LETTER EN + (#xEF ?\u041E) ;; CYRILLIC CAPITAL LETTER O + (#xF0 ?\u041F) ;; CYRILLIC CAPITAL LETTER PE + (#xF1 ?\u042F) ;; CYRILLIC CAPITAL LETTER YA + (#xF2 ?\u0420) ;; CYRILLIC CAPITAL LETTER ER + (#xF3 ?\u0421) ;; CYRILLIC CAPITAL LETTER ES + (#xF4 ?\u0422) ;; CYRILLIC CAPITAL LETTER TE + (#xF5 ?\u0423) ;; CYRILLIC CAPITAL LETTER U + (#xF6 ?\u0416) ;; CYRILLIC CAPITAL LETTER ZHE + (#xF7 ?\u0412) ;; CYRILLIC CAPITAL LETTER VE + (#xF8 ?\u042C) ;; CYRILLIC CAPITAL LETTER SOFT SIGN + (#xF9 ?\u042B) ;; CYRILLIC CAPITAL LETTER YERU + (#xFA ?\u0417) ;; CYRILLIC CAPITAL LETTER ZE + (#xFB ?\u0428) ;; CYRILLIC CAPITAL LETTER SHA + (#xFC ?\u042D) ;; CYRILLIC CAPITAL LETTER E + (#xFD ?\u0429) ;; CYRILLIC CAPITAL LETTER SHCHA + (#xFE ?\u0427) ;; CYRILLIC CAPITAL LETTER CHE + (#xFF ?\u042A));; CYRILLIC CAPITAL LETTER HARD SIGN + "KOI8-RU, a mostly-compatible superset of KOI8-R. +Also known as Windows code page 21866; has Ukrainian and Belarussian support. " + '(mnemonic ",L@C(B" + aliases (cp21866))) + +(set-language-info-alist + "Cyrillic-KOI8RU" '((charset cyrillic-iso8859-5) + (coding-system koi8-ru) + (native-coding-system koi8-ru) + (coding-priority koi8-ru) + (input-method . "cyrillic-yawerty") + (tutorial . "TUTORIAL.ru") + (sample-text . "Russian (,L@caaZXY(B) ,L7T`PRabRcYbU(B!") + (documentation . "Support for Cyrillic ALTERNATIVNYJ.")) '("Cyrillic")) -;;; cyrillic.el ends here +;; We should provide an input method and the corresponding language +;; environments for the next three coding systems. + +(make-8-bit-coding-system + 'koi8-t + '((#x80 ?\u049B) ;; CYRILLIC SMALL LETTER KA WITH DESCENDER + (#x81 ?\u0493) ;; CYRILLIC SMALL LETTER GHE WITH STROKE + (#x82 ?\u201A) ;; SINGLE LOW-9 QUOTATION MARK + (#x83 ?\u0492) ;; CYRILLIC CAPITAL LETTER GHE WITH STROKE + (#x84 ?\u201E) ;; DOUBLE LOW-9 QUOTATION MARK + (#x85 ?\u2026) ;; HORIZONTAL ELLIPSIS + (#x86 ?\u2020) ;; DAGGER + (#x87 ?\u2021) ;; DOUBLE DAGGER + (#x89 ?\u2030) ;; PER MILLE SIGN + (#x8A ?\u04B3) ;; CYRILLIC SMALL LETTER HA WITH DESCENDER + (#x8B ?\u2039) ;; SINGLE LEFT-POINTING ANGLE QUOTATION MARK + (#x8C ?\u04B2) ;; CYRILLIC CAPITAL LETTER HA WITH DESCENDER + (#x8D ?\u04B7) ;; CYRILLIC SMALL LETTER CHE WITH DESCENDER + (#x8E ?\u04B6) ;; CYRILLIC CAPITAL LETTER CHE WITH DESCENDER + (#x90 ?\u049A) ;; CYRILLIC CAPITAL LETTER KA WITH DESCENDER + (#x91 ?\u2018) ;; LEFT SINGLE QUOTATION MARK + (#x92 ?\u2019) ;; RIGHT SINGLE QUOTATION MARK + (#x93 ?\u201C) ;; LEFT DOUBLE QUOTATION MARK + (#x94 ?\u201D) ;; RIGHT DOUBLE QUOTATION MARK + (#x95 ?\u2022) ;; BULLET + (#x96 ?\u2013) ;; EN DASH + (#x97 ?\u2014) ;; EM DASH + (#x99 ?\u2122) ;; TRADE MARK SIGN + (#x9B ?\u203A) ;; SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + (#xA1 ?\u04EF) ;; CYRILLIC SMALL LETTER U WITH MACRON + (#xA2 ?\u04EE) ;; CYRILLIC CAPITAL LETTER U WITH MACRON + (#xA3 ?\u0451) ;; CYRILLIC SMALL LETTER IO + (#xA4 ?\u00A4) ;; CURRENCY SIGN + (#xA5 ?\u04E3) ;; CYRILLIC SMALL LETTER I WITH MACRON + (#xA6 ?\u00A6) ;; BROKEN BAR + (#xA7 ?\u00A7) ;; SECTION SIGN + (#xAB ?\u00AB) ;; LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + (#xAC ?\u00AC) ;; NOT SIGN + (#xAD ?\u00AD) ;; SOFT HYPHEN + (#xAE ?\u00AE) ;; REGISTERED SIGN + (#xB0 ?\u00B0) ;; DEGREE SIGN + (#xB1 ?\u00B1) ;; PLUS-MINUS SIGN + (#xB2 ?\u00B2) ;; SUPERSCRIPT TWO + (#xB3 ?\u0401) ;; CYRILLIC CAPITAL LETTER IO + (#xB5 ?\u04E2) ;; CYRILLIC CAPITAL LETTER I WITH MACRON + (#xB6 ?\u00B6) ;; PILCROW SIGN + (#xB7 ?\u00B7) ;; MIDDLE DOT + (#xB9 ?\u2116) ;; NUMERO SIGN + (#xBB ?\u00BB) ;; RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + (#xBF ?\u00A9) ;; COPYRIGHT SIGN + (#xC0 ?\u044E) ;; CYRILLIC SMALL LETTER YU + (#xC1 ?\u0430) ;; CYRILLIC SMALL LETTER A + (#xC2 ?\u0431) ;; CYRILLIC SMALL LETTER BE + (#xC3 ?\u0446) ;; CYRILLIC SMALL LETTER TSE + (#xC4 ?\u0434) ;; CYRILLIC SMALL LETTER DE + (#xC5 ?\u0435) ;; CYRILLIC SMALL LETTER IE + (#xC6 ?\u0444) ;; CYRILLIC SMALL LETTER EF + (#xC7 ?\u0433) ;; CYRILLIC SMALL LETTER GHE + (#xC8 ?\u0445) ;; CYRILLIC SMALL LETTER HA + (#xC9 ?\u0438) ;; CYRILLIC SMALL LETTER I + (#xCA ?\u0439) ;; CYRILLIC SMALL LETTER SHORT I + (#xCB ?\u043A) ;; CYRILLIC SMALL LETTER KA + (#xCC ?\u043B) ;; CYRILLIC SMALL LETTER EL + (#xCD ?\u043C) ;; CYRILLIC SMALL LETTER EM + (#xCE ?\u043D) ;; CYRILLIC SMALL LETTER EN + (#xCF ?\u043E) ;; CYRILLIC SMALL LETTER O + (#xD0 ?\u043F) ;; CYRILLIC SMALL LETTER PE + (#xD1 ?\u044F) ;; CYRILLIC SMALL LETTER YA + (#xD2 ?\u0440) ;; CYRILLIC SMALL LETTER ER + (#xD3 ?\u0441) ;; CYRILLIC SMALL LETTER ES + (#xD4 ?\u0442) ;; CYRILLIC SMALL LETTER TE + (#xD5 ?\u0443) ;; CYRILLIC SMALL LETTER U + (#xD6 ?\u0436) ;; CYRILLIC SMALL LETTER ZHE + (#xD7 ?\u0432) ;; CYRILLIC SMALL LETTER VE + (#xD8 ?\u044C) ;; CYRILLIC SMALL LETTER SOFT SIGN + (#xD9 ?\u044B) ;; CYRILLIC SMALL LETTER YERU + (#xDA ?\u0437) ;; CYRILLIC SMALL LETTER ZE + (#xDB ?\u0448) ;; CYRILLIC SMALL LETTER SHA + (#xDC ?\u044D) ;; CYRILLIC SMALL LETTER E + (#xDD ?\u0449) ;; CYRILLIC SMALL LETTER SHCHA + (#xDE ?\u0447) ;; CYRILLIC SMALL LETTER CHE + (#xDF ?\u044A) ;; CYRILLIC SMALL LETTER HARD SIGN + (#xE0 ?\u042E) ;; CYRILLIC CAPITAL LETTER YU + (#xE1 ?\u0410) ;; CYRILLIC CAPITAL LETTER A + (#xE2 ?\u0411) ;; CYRILLIC CAPITAL LETTER BE + (#xE3 ?\u0426) ;; CYRILLIC CAPITAL LETTER TSE + (#xE4 ?\u0414) ;; CYRILLIC CAPITAL LETTER DE + (#xE5 ?\u0415) ;; CYRILLIC CAPITAL LETTER IE + (#xE6 ?\u0424) ;; CYRILLIC CAPITAL LETTER EF + (#xE7 ?\u0413) ;; CYRILLIC CAPITAL LETTER GHE + (#xE8 ?\u0425) ;; CYRILLIC CAPITAL LETTER HA + (#xE9 ?\u0418) ;; CYRILLIC CAPITAL LETTER I + (#xEA ?\u0419) ;; CYRILLIC CAPITAL LETTER SHORT I + (#xEB ?\u041A) ;; CYRILLIC CAPITAL LETTER KA + (#xEC ?\u041B) ;; CYRILLIC CAPITAL LETTER EL + (#xED ?\u041C) ;; CYRILLIC CAPITAL LETTER EM + (#xEE ?\u041D) ;; CYRILLIC CAPITAL LETTER EN + (#xEF ?\u041E) ;; CYRILLIC CAPITAL LETTER O + (#xF0 ?\u041F) ;; CYRILLIC CAPITAL LETTER PE + (#xF1 ?\u042F) ;; CYRILLIC CAPITAL LETTER YA + (#xF2 ?\u0420) ;; CYRILLIC CAPITAL LETTER ER + (#xF3 ?\u0421) ;; CYRILLIC CAPITAL LETTER ES + (#xF4 ?\u0422) ;; CYRILLIC CAPITAL LETTER TE + (#xF5 ?\u0423) ;; CYRILLIC CAPITAL LETTER U + (#xF6 ?\u0416) ;; CYRILLIC CAPITAL LETTER ZHE + (#xF7 ?\u0412) ;; CYRILLIC CAPITAL LETTER VE + (#xF8 ?\u042C) ;; CYRILLIC CAPITAL LETTER SOFT SIGN + (#xF9 ?\u042B) ;; CYRILLIC CAPITAL LETTER YERU + (#xFA ?\u0417) ;; CYRILLIC CAPITAL LETTER ZE + (#xFB ?\u0428) ;; CYRILLIC CAPITAL LETTER SHA + (#xFC ?\u042D) ;; CYRILLIC CAPITAL LETTER E + (#xFD ?\u0429) ;; CYRILLIC CAPITAL LETTER SHCHA + (#xFE ?\u0427) ;; CYRILLIC CAPITAL LETTER CHE + (#xFF ?\u042A));; CYRILLIC CAPITAL LETTER HARD SIGN + ",L:>8(B-8 for Tajik." + '(mnemonic ",LB%GҶ%@(B" + documentation + "Compatible in the alphabetic characters with KOI-8R for Russian, +this 8-bit Cyrillic coding system makes those characters available +that are necessary for writing Tajik, (,LWPQ^]%Gӣ%@(B ,Lb^%Gҷ%@XZ%Gӣ%@(B) the main +language of Tajikistan and a close relative of Persian. ")) + +;; Case support, for the new characters. +(loop + for (upper lower) + in '((?\u04B6 ?\u04B7) ;; CHE WITH DESCENDER + (?\u0492 ?\u0493) ;; GHE WITH STROKE + (?\u04B2 ?\u04B3) ;; HA WITH DESCENDER + (?\u04E2 ?\u04E3) ;; I WITH MACRON + (?\u049A ?\u049B) ;; KA WITH DESCENDER + (?\u04EE ?\u04EF)) ;; U WITH MACRON + with case-table = (standard-case-table) + do + (put-case-table-pair upper lower case-table)) + +;; Support fot the languages of the Caucasus. +(make-8-bit-coding-system + 'koi8-c + '((#x80 ?\u0493) ;; CYRILLIC SMALL LETTER GHE WITH STROKE + (#x81 ?\u0497) ;; CYRILLIC SMALL LETTER ZHE WITH DESCENDER + (#x82 ?\u049B) ;; CYRILLIC SMALL LETTER KA WITH DESCENDER + (#x83 ?\u049D) ;; CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE + (#x84 ?\u04A3) ;; CYRILLIC SMALL LETTER EN WITH DESCENDER + (#x85 ?\u04AF) ;; CYRILLIC SMALL LETTER STRAIGHT U + (#x86 ?\u04B1) ;; CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE + (#x87 ?\u04B3) ;; CYRILLIC SMALL LETTER HA WITH DESCENDER + (#x88 ?\u04B7) ;; CYRILLIC SMALL LETTER CHE WITH DESCENDER + (#x89 ?\u04B9) ;; CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE + (#x8A ?\u04BB) ;; CYRILLIC SMALL LETTER SHHA + (#x8B ?\u2580) ;; UPPER HALF BLOCK + (#x8C ?\u049D) ;; CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE + (#x8D ?\u04E3) ;; CYRILLIC SMALL LETTER I WITH MACRON + (#x8E ?\u04E9) ;; CYRILLIC SMALL LETTER BARRED O + (#x8F ?\u04EF) ;; CYRILLIC SMALL LETTER U WITH MACRON + (#x90 ?\u0492) ;; CYRILLIC CAPITAL LETTER GHE WITH STROKE + (#x91 ?\u0496) ;; CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER + (#x92 ?\u049A) ;; CYRILLIC CAPITAL LETTER KA WITH DESCENDER + (#x93 ?\u049C) ;; CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE + (#x94 ?\u04A2) ;; CYRILLIC CAPITAL LETTER EN WITH DESCENDER + (#x95 ?\u04AE) ;; CYRILLIC CAPITAL LETTER STRAIGHT U + (#x96 ?\u04B0) ;; CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE + (#x97 ?\u04B2) ;; CYRILLIC CAPITAL LETTER HA WITH DESCENDER + (#x98 ?\u04B6) ;; CYRILLIC CAPITAL LETTER CHE WITH DESCENDER + (#x99 ?\u04B8) ;; CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE + (#x9A ?\u04BA) ;; CYRILLIC CAPITAL LETTER SHHA + (#x9B ?\u2321) ;; BOTTOM HALF INTEGRAL + (#x9C ?\u04D8) ;; CYRILLIC CAPITAL LETTER SCHWA + (#x9D ?\u04E2) ;; CYRILLIC CAPITAL LETTER I WITH MACRON + (#x9E ?\u04E8) ;; CYRILLIC CAPITAL LETTER BARRED O + (#x9F ?\u04EE) ;; CYRILLIC CAPITAL LETTER U WITH MACRON + (#xA0 ?\u00A0) ;; NO-BREAK SPACE + (#xA1 ?\u0452) ;; CYRILLIC SMALL LETTER DJE + (#xA2 ?\u0453) ;; CYRILLIC SMALL LETTER GJE + (#xA3 ?\u0451) ;; CYRILLIC SMALL LETTER IO + (#xA4 ?\u0454) ;; CYRILLIC SMALL LETTER UKRAINIAN IE + (#xA5 ?\u0455) ;; CYRILLIC SMALL LETTER DZE + (#xA6 ?\u0456) ;; CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + (#xA7 ?\u0457) ;; CYRILLIC SMALL LETTER YI + (#xA8 ?\u0458) ;; CYRILLIC SMALL LETTER JE + (#xA9 ?\u0459) ;; CYRILLIC SMALL LETTER LJE + (#xAA ?\u045A) ;; CYRILLIC SMALL LETTER NJE + (#xAB ?\u045B) ;; CYRILLIC SMALL LETTER TSHE + (#xAC ?\u045C) ;; CYRILLIC SMALL LETTER KJE + (#xAD ?\u0491) ;; CYRILLIC SMALL LETTER GHE WITH UPTURN + (#xAE ?\u045E) ;; CYRILLIC SMALL LETTER SHORT U + (#xAF ?\u045F) ;; CYRILLIC SMALL LETTER DZHE + (#xB0 ?\u2116) ;; NUMERO SIGN + (#xB1 ?\u0402) ;; CYRILLIC CAPITAL LETTER DJE + (#xB2 ?\u0403) ;; CYRILLIC CAPITAL LETTER GJE + (#xB3 ?\u0401) ;; CYRILLIC CAPITAL LETTER IO + (#xB4 ?\u0404) ;; CYRILLIC CAPITAL LETTER UKRAINIAN IE + (#xB5 ?\u0405) ;; CYRILLIC CAPITAL LETTER DZE + (#xB6 ?\u0406) ;; CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + (#xB7 ?\u0407) ;; CYRILLIC CAPITAL LETTER YI + (#xB8 ?\u0408) ;; CYRILLIC CAPITAL LETTER JE + (#xB9 ?\u0409) ;; CYRILLIC CAPITAL LETTER LJE + (#xBA ?\u040A) ;; CYRILLIC CAPITAL LETTER NJE + (#xBB ?\u040B) ;; CYRILLIC CAPITAL LETTER TSHE + (#xBC ?\u040C) ;; CYRILLIC CAPITAL LETTER KJE + (#xBD ?\u0490) ;; CYRILLIC CAPITAL LETTER GHE WITH UPTURN + (#xBE ?\u040E) ;; CYRILLIC CAPITAL LETTER SHORT U + (#xBF ?\u040F) ;; CYRILLIC CAPITAL LETTER DZHE + (#xC0 ?\u044E) ;; CYRILLIC SMALL LETTER YU + (#xC1 ?\u0430) ;; CYRILLIC SMALL LETTER A + (#xC2 ?\u0431) ;; CYRILLIC SMALL LETTER BE + (#xC3 ?\u0446) ;; CYRILLIC SMALL LETTER TSE + (#xC4 ?\u0434) ;; CYRILLIC SMALL LETTER DE + (#xC5 ?\u0435) ;; CYRILLIC SMALL LETTER IE + (#xC6 ?\u0444) ;; CYRILLIC SMALL LETTER EF + (#xC7 ?\u0433) ;; CYRILLIC SMALL LETTER GHE + (#xC8 ?\u0445) ;; CYRILLIC SMALL LETTER HA + (#xC9 ?\u0438) ;; CYRILLIC SMALL LETTER I + (#xCA ?\u0439) ;; CYRILLIC SMALL LETTER SHORT I + (#xCB ?\u043A) ;; CYRILLIC SMALL LETTER KA + (#xCC ?\u043B) ;; CYRILLIC SMALL LETTER EL + (#xCD ?\u043C) ;; CYRILLIC SMALL LETTER EM + (#xCE ?\u043D) ;; CYRILLIC SMALL LETTER EN + (#xCF ?\u043E) ;; CYRILLIC SMALL LETTER O + (#xD0 ?\u043F) ;; CYRILLIC SMALL LETTER PE + (#xD1 ?\u044F) ;; CYRILLIC SMALL LETTER YA + (#xD2 ?\u0440) ;; CYRILLIC SMALL LETTER ER + (#xD3 ?\u0441) ;; CYRILLIC SMALL LETTER ES + (#xD4 ?\u0442) ;; CYRILLIC SMALL LETTER TE + (#xD5 ?\u0443) ;; CYRILLIC SMALL LETTER U + (#xD6 ?\u0436) ;; CYRILLIC SMALL LETTER ZHE + (#xD7 ?\u0432) ;; CYRILLIC SMALL LETTER VE + (#xD8 ?\u044C) ;; CYRILLIC SMALL LETTER SOFT SIGN + (#xD9 ?\u044B) ;; CYRILLIC SMALL LETTER YERU + (#xDA ?\u0437) ;; CYRILLIC SMALL LETTER ZE + (#xDB ?\u0448) ;; CYRILLIC SMALL LETTER SHA + (#xDC ?\u044D) ;; CYRILLIC SMALL LETTER E + (#xDD ?\u0449) ;; CYRILLIC SMALL LETTER SHCHA + (#xDE ?\u0447) ;; CYRILLIC SMALL LETTER CHE + (#xDF ?\u044A) ;; CYRILLIC SMALL LETTER HARD SIGN + (#xE0 ?\u042E) ;; CYRILLIC CAPITAL LETTER YU + (#xE1 ?\u0410) ;; CYRILLIC CAPITAL LETTER A + (#xE2 ?\u0411) ;; CYRILLIC CAPITAL LETTER BE + (#xE3 ?\u0426) ;; CYRILLIC CAPITAL LETTER TSE + (#xE4 ?\u0414) ;; CYRILLIC CAPITAL LETTER DE + (#xE5 ?\u0415) ;; CYRILLIC CAPITAL LETTER IE + (#xE6 ?\u0424) ;; CYRILLIC CAPITAL LETTER EF + (#xE7 ?\u0413) ;; CYRILLIC CAPITAL LETTER GHE + (#xE8 ?\u0425) ;; CYRILLIC CAPITAL LETTER HA + (#xE9 ?\u0418) ;; CYRILLIC CAPITAL LETTER I + (#xEA ?\u0419) ;; CYRILLIC CAPITAL LETTER SHORT I + (#xEB ?\u041A) ;; CYRILLIC CAPITAL LETTER KA + (#xEC ?\u041B) ;; CYRILLIC CAPITAL LETTER EL + (#xED ?\u041C) ;; CYRILLIC CAPITAL LETTER EM + (#xEE ?\u041D) ;; CYRILLIC CAPITAL LETTER EN + (#xEF ?\u041E) ;; CYRILLIC CAPITAL LETTER O + (#xF0 ?\u041F) ;; CYRILLIC CAPITAL LETTER PE + (#xF1 ?\u042F) ;; CYRILLIC CAPITAL LETTER YA + (#xF2 ?\u0420) ;; CYRILLIC CAPITAL LETTER ER + (#xF3 ?\u0421) ;; CYRILLIC CAPITAL LETTER ES + (#xF4 ?\u0422) ;; CYRILLIC CAPITAL LETTER TE + (#xF5 ?\u0423) ;; CYRILLIC CAPITAL LETTER U + (#xF6 ?\u0416) ;; CYRILLIC CAPITAL LETTER ZHE + (#xF7 ?\u0412) ;; CYRILLIC CAPITAL LETTER VE + (#xF8 ?\u042C) ;; CYRILLIC CAPITAL LETTER SOFT SIGN + (#xF9 ?\u042B) ;; CYRILLIC CAPITAL LETTER YERU + (#xFA ?\u0417) ;; CYRILLIC CAPITAL LETTER ZE + (#xFB ?\u0428) ;; CYRILLIC CAPITAL LETTER SHA + (#xFC ?\u042D) ;; CYRILLIC CAPITAL LETTER E + (#xFD ?\u0429) ;; CYRILLIC CAPITAL LETTER SHCHA + (#xFE ?\u0427) ;; CYRILLIC CAPITAL LETTER CHE + (#xFF ?\u042A));; CYRILLIC CAPITAL LETTER HARD SIGN + "KOI-8 for the Caucasus." + '(documentation + "Note that this does not support old Russian orthography; +for that, see koi8-o. ")) + +(loop + for (upper lower) + in '((?\u04E9 ?\u04E8) ;; BARRED O + (?\u04B9 ?\u04B8) ;; CHE WITH VERTICAL STROKE + (?\u0452 ?\u0402) ;; DJE + (?\u0455 ?\u0405) ;; DZE + (?\u04A3 ?\u04A2) ;; EN WITH DESCENDER + (?\u049D ?\u049C) ;; KA WITH VERTICAL STROKE + (?\u04BB ?\u04BA) ;; SHHA + (?\u04AF ?\u04AE) ;; STRAIGHT U + (?\u04B1 ?\u04B0) ;; STRAIGHT U WITH STROKE + (?\u0497 ?\u0496)) ;; ZHE WITH DESCENDER + with case-table = (standard-case-table) + do + (put-case-table-pair upper lower case-table)) + +;; Archaic Russian support. +(make-8-bit-coding-system + 'koi8-o + '((#x80 ?\u0402) ;; CYRILLIC CAPITAL LETTER DJE + (#x81 ?\u0403) ;; CYRILLIC CAPITAL LETTER GJE + (#x82 ?\u00B8) ;; CEDILLA + (#x83 ?\u0453) ;; CYRILLIC SMALL LETTER GJE + (#x84 ?\u201E) ;; DOUBLE LOW-9 QUOTATION MARK + (#x85 ?\u2026) ;; HORIZONTAL ELLIPSIS + (#x86 ?\u2020) ;; DAGGER + (#x87 ?\u00A7) ;; SECTION SIGN + (#x88 ?\u20AC) ;; EURO SIGN + (#x89 ?\u00A8) ;; DIAERESIS + (#x8A ?\u0409) ;; CYRILLIC CAPITAL LETTER LJE + (#x8B ?\u2039) ;; SINGLE LEFT-POINTING ANGLE QUOTATION MARK + (#x8C ?\u040A) ;; CYRILLIC CAPITAL LETTER NJE + (#x8D ?\u040C) ;; CYRILLIC CAPITAL LETTER KJE + (#x8E ?\u040B) ;; CYRILLIC CAPITAL LETTER TSHE + (#x8F ?\u040F) ;; CYRILLIC CAPITAL LETTER DZHE + (#x90 ?\u0452) ;; CYRILLIC SMALL LETTER DJE + (#x91 ?\u2018) ;; LEFT SINGLE QUOTATION MARK + (#x92 ?\u2019) ;; RIGHT SINGLE QUOTATION MARK + (#x93 ?\u201C) ;; LEFT DOUBLE QUOTATION MARK + (#x94 ?\u201D) ;; RIGHT DOUBLE QUOTATION MARK + (#x95 ?\u2022) ;; BULLET + (#x96 ?\u2013) ;; EN DASH + (#x97 ?\u2014) ;; EM DASH + (#x98 ?\u00A3) ;; POUND SIGN + (#x99 ?\u00B7) ;; MIDDLE DOT + (#x9A ?\u0459) ;; CYRILLIC SMALL LETTER LJE + (#x9B ?\u203A) ;; SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + (#x9C ?\u045A) ;; CYRILLIC SMALL LETTER NJE + (#x9D ?\u045C) ;; CYRILLIC SMALL LETTER KJE + (#x9E ?\u045B) ;; CYRILLIC SMALL LETTER TSHE + (#x9F ?\u045F) ;; CYRILLIC SMALL LETTER DZHE + (#xA0 ?\u00A0) ;; NO-BREAK SPACE + (#xA1 ?\u0475) ;; CYRILLIC SMALL LETTER IZHITSA + (#xA2 ?\u0463) ;; CYRILLIC SMALL LETTER YAT + (#xA3 ?\u0451) ;; CYRILLIC SMALL LETTER IO + (#xA4 ?\u0454) ;; CYRILLIC SMALL LETTER UKRAINIAN IE + (#xA5 ?\u0455) ;; CYRILLIC SMALL LETTER DZE + (#xA6 ?\u0456) ;; CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I + (#xA7 ?\u0457) ;; CYRILLIC SMALL LETTER YI + (#xA8 ?\u0458) ;; CYRILLIC SMALL LETTER JE + (#xA9 ?\u00AE) ;; REGISTERED SIGN + (#xAA ?\u2122) ;; TRADE MARK SIGN + (#xAB ?\u00AB) ;; LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + (#xAC ?\u0473) ;; CYRILLIC SMALL LETTER FITA + (#xAD ?\u0491) ;; CYRILLIC SMALL LETTER GHE WITH UPTURN + (#xAE ?\u045E) ;; CYRILLIC SMALL LETTER SHORT U + (#xAF ?\u00B4) ;; ACUTE ACCENT + (#xB0 ?\u00B0) ;; DEGREE SIGN + (#xB1 ?\u0474) ;; CYRILLIC CAPITAL LETTER IZHITSA + (#xB2 ?\u0462) ;; CYRILLIC CAPITAL LETTER YAT + (#xB3 ?\u0401) ;; CYRILLIC CAPITAL LETTER IO + (#xB4 ?\u0404) ;; CYRILLIC CAPITAL LETTER UKRAINIAN IE + (#xB5 ?\u0405) ;; CYRILLIC CAPITAL LETTER DZE + (#xB6 ?\u0406) ;; CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I + (#xB7 ?\u0407) ;; CYRILLIC CAPITAL LETTER YI + (#xB8 ?\u0408) ;; CYRILLIC CAPITAL LETTER JE + (#xB9 ?\u2116) ;; NUMERO SIGN + (#xBA ?\u00A2) ;; CENT SIGN + (#xBB ?\u00BB) ;; RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + (#xBC ?\u0472) ;; CYRILLIC CAPITAL LETTER FITA + (#xBD ?\u0490) ;; CYRILLIC CAPITAL LETTER GHE WITH UPTURN + (#xBE ?\u040E) ;; CYRILLIC CAPITAL LETTER SHORT U + (#xBF ?\u00A9) ;; COPYRIGHT SIGN + (#xC0 ?\u044E) ;; CYRILLIC SMALL LETTER YU + (#xC1 ?\u0430) ;; CYRILLIC SMALL LETTER A + (#xC2 ?\u0431) ;; CYRILLIC SMALL LETTER BE + (#xC3 ?\u0446) ;; CYRILLIC SMALL LETTER TSE + (#xC4 ?\u0434) ;; CYRILLIC SMALL LETTER DE + (#xC5 ?\u0435) ;; CYRILLIC SMALL LETTER IE + (#xC6 ?\u0444) ;; CYRILLIC SMALL LETTER EF + (#xC7 ?\u0433) ;; CYRILLIC SMALL LETTER GHE + (#xC8 ?\u0445) ;; CYRILLIC SMALL LETTER HA + (#xC9 ?\u0438) ;; CYRILLIC SMALL LETTER I + (#xCA ?\u0439) ;; CYRILLIC SMALL LETTER SHORT I + (#xCB ?\u043A) ;; CYRILLIC SMALL LETTER KA + (#xCC ?\u043B) ;; CYRILLIC SMALL LETTER EL + (#xCD ?\u043C) ;; CYRILLIC SMALL LETTER EM + (#xCE ?\u043D) ;; CYRILLIC SMALL LETTER EN + (#xCF ?\u043E) ;; CYRILLIC SMALL LETTER O + (#xD0 ?\u043F) ;; CYRILLIC SMALL LETTER PE + (#xD1 ?\u044F) ;; CYRILLIC SMALL LETTER YA + (#xD2 ?\u0440) ;; CYRILLIC SMALL LETTER ER + (#xD3 ?\u0441) ;; CYRILLIC SMALL LETTER ES + (#xD4 ?\u0442) ;; CYRILLIC SMALL LETTER TE + (#xD5 ?\u0443) ;; CYRILLIC SMALL LETTER U + (#xD6 ?\u0436) ;; CYRILLIC SMALL LETTER ZHE + (#xD7 ?\u0432) ;; CYRILLIC SMALL LETTER VE + (#xD8 ?\u044C) ;; CYRILLIC SMALL LETTER SOFT SIGN + (#xD9 ?\u044B) ;; CYRILLIC SMALL LETTER YERU + (#xDA ?\u0437) ;; CYRILLIC SMALL LETTER ZE + (#xDB ?\u0448) ;; CYRILLIC SMALL LETTER SHA + (#xDC ?\u044D) ;; CYRILLIC SMALL LETTER E + (#xDD ?\u0449) ;; CYRILLIC SMALL LETTER SHCHA + (#xDE ?\u0447) ;; CYRILLIC SMALL LETTER CHE + (#xDF ?\u044A) ;; CYRILLIC SMALL LETTER HARD SIGN + (#xE0 ?\u042E) ;; CYRILLIC CAPITAL LETTER YU + (#xE1 ?\u0410) ;; CYRILLIC CAPITAL LETTER A + (#xE2 ?\u0411) ;; CYRILLIC CAPITAL LETTER BE + (#xE3 ?\u0426) ;; CYRILLIC CAPITAL LETTER TSE + (#xE4 ?\u0414) ;; CYRILLIC CAPITAL LETTER DE + (#xE5 ?\u0415) ;; CYRILLIC CAPITAL LETTER IE + (#xE6 ?\u0424) ;; CYRILLIC CAPITAL LETTER EF + (#xE7 ?\u0413) ;; CYRILLIC CAPITAL LETTER GHE + (#xE8 ?\u0425) ;; CYRILLIC CAPITAL LETTER HA + (#xE9 ?\u0418) ;; CYRILLIC CAPITAL LETTER I + (#xEA ?\u0419) ;; CYRILLIC CAPITAL LETTER SHORT I + (#xEB ?\u041A) ;; CYRILLIC CAPITAL LETTER KA + (#xEC ?\u041B) ;; CYRILLIC CAPITAL LETTER EL + (#xED ?\u041C) ;; CYRILLIC CAPITAL LETTER EM + (#xEE ?\u041D) ;; CYRILLIC CAPITAL LETTER EN + (#xEF ?\u041E) ;; CYRILLIC CAPITAL LETTER O + (#xF0 ?\u041F) ;; CYRILLIC CAPITAL LETTER PE + (#xF1 ?\u042F) ;; CYRILLIC CAPITAL LETTER YA + (#xF2 ?\u0420) ;; CYRILLIC CAPITAL LETTER ER + (#xF3 ?\u0421) ;; CYRILLIC CAPITAL LETTER ES + (#xF4 ?\u0422) ;; CYRILLIC CAPITAL LETTER TE + (#xF5 ?\u0423) ;; CYRILLIC CAPITAL LETTER U + (#xF6 ?\u0416) ;; CYRILLIC CAPITAL LETTER ZHE + (#xF7 ?\u0412) ;; CYRILLIC CAPITAL LETTER VE + (#xF8 ?\u042C) ;; CYRILLIC CAPITAL LETTER SOFT SIGN + (#xF9 ?\u042B) ;; CYRILLIC CAPITAL LETTER YERU + (#xFA ?\u0417) ;; CYRILLIC CAPITAL LETTER ZE + (#xFB ?\u0428) ;; CYRILLIC CAPITAL LETTER SHA + (#xFC ?\u042D) ;; CYRILLIC CAPITAL LETTER E + (#xFD ?\u0429) ;; CYRILLIC CAPITAL LETTER SHCHA + (#xFE ?\u0427) ;; CYRILLIC CAPITAL LETTER CHE + (#xFF ?\u042A));; CYRILLIC CAPITAL LETTER HARD SIGN + "KOI-8 for old-orthography Russian; also known as KOI-C." + '(mnemonic ",L:>(B" + documentation + "KOI-8 for old-orthography Russian; also known as KOI-C. + +This is mostly compatible with KOI8-R in the alphabetic characters, and +provides ,L&v(B, %GѲѳ%@, %GѢѣ%@, and %GѴѵ%@ instead of some of the box-drawing characters. ")) + +(loop + for (upper lower) + in '((?\u0472 ?\u0473) ;; FITA + (?\u0474 ?\u0475) ;; IZHITSA + (?\u0408 ?\u0458) ;; JE + (?\u0462 ?\u0463)) ;; YAT + with case-table = (standard-case-table) + do + (put-case-table-pair upper lower case-table)) + +(provide 'cyrillic) + +;;; cyrillic.el ends here \ No newline at end of file
--- a/lisp/mule/english.el Sun Jul 22 21:53:08 2007 +0000 +++ b/lisp/mule/english.el Sun Jul 22 22:04:14 2007 +0000 @@ -31,21 +31,6 @@ ;;; Code -;; ASCII with right-to-left direction. -(make-charset 'ascii-right-to-left - "ASCII (left half of ISO 8859-1) with right-to-left direction" - '(dimension - 1 - registries ["ISO8859-1"] - chars 94 - columns 1 - direction r2l - final ?B - graphic 0 - short-name "rev ASCII" - long-name "ASCII with right-to-left direction" - )) - (set-language-info-alist "English" '((tutorial . "TUTORIAL") (locale "en" "C")
--- a/lisp/mule/ethiopic.el Sun Jul 22 21:53:08 2007 +0000 +++ b/lisp/mule/ethiopic.el Sun Jul 22 22:04:14 2007 +0000 @@ -61,15 +61,13 @@ (r1 *= 94) (r2 += r1) (if (r2 < 256) - (r1 = ?\x12) + (r1 = #x12) (if (r2 < 448) - ((r1 = ?\x13) (r2 -= 256)) - ((r1 = ?\xfd) (r2 -= 208)) + ((r1 = #x13) (r2 -= 256)) + ((r1 = #xfd) (r2 -= 208)) )))) "CCL program to encode an Ethiopic code to code point of Ethiopic font.") -;; (setq font-ccl-encoder-alist -;; (cons (cons "ethiopic" ccl-encode-ethio-font) font-ccl-encoder-alist)) (set-charset-ccl-program 'ethiopic 'ccl-encode-ethio-font) (set-language-info-alist
--- a/lisp/mule/japanese.el Sun Jul 22 21:53:08 2007 +0000 +++ b/lisp/mule/japanese.el Sun Jul 22 22:04:14 2007 +0000 @@ -61,10 +61,6 @@ )) ;;; Syntax of Japanese characters. -(modify-syntax-entry 'katakana-jisx0201 "w") -(modify-syntax-entry 'japanese-jisx0212 "w") - -(modify-syntax-entry 'japanese-jisx0208 "w") (loop for row in '(33 34 40) do (modify-syntax-entry `[japanese-jisx0208 ,row] "_")) (loop for char in '(?$B!<(B ?$B!+(B ?$B!,(B ?$B!3(B ?$B!4(B ?$B!5(B ?$B!6(B ?$B!7(B ?$B!8(B ?$B!9(B ?$B!:(B ?$B!;(B)
--- a/lisp/mule/korean.el Sun Jul 22 21:53:08 2007 +0000 +++ b/lisp/mule/korean.el Sun Jul 22 22:04:14 2007 +0000 @@ -46,12 +46,8 @@ ;; Syntax of Korean characters. (loop for row from 33 to 34 do (modify-syntax-entry `[korean-ksc5601 ,row] ".")) -(loop for row from 35 to 37 do - (modify-syntax-entry `[korean-ksc5601 ,row] "w")) (loop for row from 38 to 41 do (modify-syntax-entry `[korean-ksc5601 ,row] ".")) -(loop for row from 42 to 126 do - (modify-syntax-entry `[korean-ksc5601 ,row] "w")) ;; Setting for coding-system and quail were moved to ;; language/korean.el.
--- a/lisp/mule/latin.el Sun Jul 22 21:53:08 2007 +0000 +++ b/lisp/mule/latin.el Sun Jul 22 22:04:14 2007 +0000 @@ -443,7 +443,7 @@ (make-coding-system 'iso-8859-15 'iso2022 "ISO 4873 conforming 8-bit code (ASCII + Latin 9; aka Latin-1 with Euro)" - `(mnemonic "MIME/Ltn-9" ; bletch + '(mnemonic "MIME/Ltn-9" ; bletch eol-type nil charset-g0 ascii charset-g1 latin-iso8859-15
--- a/lisp/mule/mule-ccl.el Sun Jul 22 21:53:08 2007 +0000 +++ b/lisp/mule/mule-ccl.el Sun Jul 22 22:04:14 2007 +0000 @@ -2,6 +2,7 @@ ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. ;; Licensed to the Free Software Foundation. +;; Copyright (C) 2002, 2007 Free Software Foundation, Inc. ;; Keywords: CCL, mule, multilingual, character set, coding-system @@ -49,7 +50,8 @@ read read-if read-branch write call end read-multibyte-character write-multibyte-character translate-character mule-to-unicode unicode-to-mule - iterate-multiple-map map-multiple map-single] + iterate-multiple-map map-multiple map-single lookup-integer + lookup-character] "Vector of CCL commands (symbols).") ;; Put a property to each symbol of CCL commands for the compiler. @@ -106,6 +108,8 @@ iterate-multiple-map map-multiple map-single + lookup-int-const-tbl + lookup-char-const-tbl ] "Vector of CCL extended compiled codes (symbols).") @@ -181,8 +185,10 @@ ;; Embed integer DATA in `ccl-program-vector' at `ccl-current-ic' and ;; increment it. If IC is specified, embed DATA at IC. (defun ccl-embed-data (data &optional ic) - (if (characterp data) - (setq data (char-int data))) + ;; XEmacs: Embed characters as characters, since their integer values vary at + ;; runtime. + ; (if (characterp data) + ; (setq data (char-int data))) (if ic (aset ccl-program-vector ic data) (let ((len (length ccl-program-vector))) @@ -197,8 +203,8 @@ ;; Embed pair of SYMBOL and PROP where (get SYMBOL PROP) should give ;; proper index number for SYMBOL. PROP should be -;; `translation-table-id', `code-conversion-map-id', or -;; `ccl-program-idx'. +;; `translation-table-id', `translation-hash-table-id' +;; `code-conversion-map-id', or `ccl-program-idx'. (defun ccl-embed-symbol (symbol prop) (ccl-embed-data (cons symbol prop))) @@ -855,6 +861,46 @@ (ccl-embed-extended-command 'unicode-to-mule rrr RRR 0)) nil) +;; Compile lookup-integer +(defun ccl-compile-lookup-integer (cmd) + (if (/= (length cmd) 4) + (error "CCL: Invalid number of arguments: %s" cmd)) + (let ((Rrr (nth 1 cmd)) + (RRR (nth 2 cmd)) + (rrr (nth 3 cmd))) + (ccl-check-register RRR cmd) + (ccl-check-register rrr cmd) + (cond ((and (symbolp Rrr) (not (get Rrr 'ccl-register-number))) + (ccl-embed-extended-command 'lookup-int-const-tbl + rrr RRR 0) + (ccl-embed-symbol Rrr 'translation-hash-table-id)) + (t + (error "CCL: non-constant table: %s" cmd) + ;; not implemented: + (ccl-check-register Rrr cmd) + (ccl-embed-extended-command 'lookup-int rrr RRR 0)))) + nil) + +;; Compile lookup-character +(defun ccl-compile-lookup-character (cmd) + (if (/= (length cmd) 4) + (error "CCL: Invalid number of arguments: %s" cmd)) + (let ((Rrr (nth 1 cmd)) + (RRR (nth 2 cmd)) + (rrr (nth 3 cmd))) + (ccl-check-register RRR cmd) + (ccl-check-register rrr cmd) + (cond ((and (symbolp Rrr) (not (get Rrr 'ccl-register-number))) + (ccl-embed-extended-command 'lookup-char-const-tbl + rrr RRR 0) + (ccl-embed-symbol Rrr 'translation-hash-table-id)) + (t + (error "CCL: non-constant table: %s" cmd) + ;; not implemented: + (ccl-check-register Rrr cmd) + (ccl-embed-extended-command 'lookup-char rrr RRR 0)))) + nil) + (defun ccl-compile-iterate-multiple-map (cmd) (ccl-compile-multiple-map-function 'iterate-multiple-map cmd) nil) @@ -1194,8 +1240,8 @@ (setq i (1+ i))))) (defun ccl-dump-ex-cmd (rrr cc) - (let* ((RRR (logand cc ?\x7)) - (Rrr (logand (ash cc -3) ?\x7)) + (let* ((RRR (logand cc #x7)) + (Rrr (logand (ash cc -3) #x7)) (ex-op (aref ccl-extended-code-table (logand (ash cc -6) #x3fff)))) (insert (format "<%s> " ex-op)) (funcall (get ex-op 'ccl-dump-function) rrr RRR Rrr))) @@ -1213,6 +1259,14 @@ (let ((tbl (ccl-get-next-code))) (insert (format "translation table(%S) r%d r%d\n" tbl RRR rrr)))) +(defun ccl-dump-lookup-int-const-tbl (rrr RRR Rrr) + (let ((tbl (ccl-get-next-code))) + (insert (format "hash table(%S) r%d r%d\n" tbl RRR rrr)))) + +(defun ccl-dump-lookup-char-const-tbl (rrr RRR Rrr) + (let ((tbl (ccl-get-next-code))) + (insert (format "hash table(%S) r%d r%d\n" tbl RRR rrr)))) + (defun ccl-dump-mule-to-unicode (rrr RRR Rrr) (insert (format "change chars in r%d and r%d to unicode\n" RRR rrr))) @@ -1297,7 +1351,7 @@ STATEMENT := SET | IF | BRANCH | LOOP | REPEAT | BREAK | READ | WRITE | CALL - | TRANSLATE | MAP | END + | TRANSLATE | MAP | LOOKUP | END SET := (REG = EXPRESSION) | (REG ASSIGNMENT_OPERATOR EXPRESSION) @@ -1351,7 +1405,7 @@ ;; Read a character from the input text, splitting it into its ;; multibyte representation. Set REG_0 to the charset ID of the ;; character, and set REG_1 to the code point of the character. If - ;; the dimension of charset is two, set REG_1 to ((CODE0 << 8) | + ;; the dimension of charset is two, set REG_1 to ((CODE0 << 7) | ;; CODE1), where CODE0 is the first code point and CODE1 is the ;; second code point. | (read-multibyte-character REG_0 REG_1) @@ -1383,7 +1437,7 @@ ;; Write a multibyte representation of a character whose ;; charset ID is REG_0 and code point is REG_1. If the ;; dimension of the charset is two, REG_1 should be ((CODE0 << - ;; 8) | CODE1), where CODE0 is the first code point and CODE1 + ;; 7) | CODE1), where CODE0 is the first code point and CODE1 ;; is the second code point of the character. | (write-multibyte-character REG_0 REG_1) @@ -1397,6 +1451,11 @@ | (mule-to-unicode REG(charset) REG(codepoint)) | (unicode-to-mule REG(unicode,code) REG(CHARSET)) +LOOKUP := + (lookup-character SYMBOL REG(charset) REG(codepoint)) + | (lookup-integer SYMBOL REG(integer)) + ;; SYMBOL refers to a table defined by `define-hash-translation-table'. + MAP := (iterate-multiple-map REG REG MAP-IDs) | (map-multiple REG REG (MAP-SET)) @@ -1501,22 +1560,6 @@ ,name) ,ccl-program))) -;;;###autoload -(defun ccl-execute-with-args (ccl-prog &rest args) - "Execute CCL-PROGRAM with registers initialized by the remaining args. -The return value is a vector of resulting CCL registers. - -See the documentation of `define-ccl-program' for the detail of CCL program." - (let ((reg (make-vector 8 0)) - (i 0)) - (while (and args (< i 8)) - (if (not (integerp (car args))) - (error "Arguments should be integer")) - (aset reg i (car args)) - (setq args (cdr args) i (1+ i))) - (ccl-execute ccl-prog reg) - reg)) - (provide 'ccl) -;; ccl.el ends here +;; ccl.el ends here \ No newline at end of file
--- a/lisp/mule/mule-charset.el Sun Jul 22 21:53:08 2007 +0000 +++ b/lisp/mule/mule-charset.el Sun Jul 22 22:04:14 2007 +0000 @@ -117,11 +117,6 @@ "Useless in XEmacs, returns 1." 1) -(define-obsolete-function-alias 'charset-columns 'charset-width) ;; 19990409 -(define-obsolete-function-alias 'charset-final 'charset-iso-final-char) ;; 19990409 -(define-obsolete-function-alias 'charset-graphic 'charset-iso-graphic-plane) ;; 19990409 -(define-obsolete-function-alias 'charset-doc-string 'charset-description) ;; 19990409 - ;;;; Define setf methods for all settable Charset properties (defsetf charset-registry set-charset-registry) @@ -355,7 +350,6 @@ ;; arabic-2-column "MuleArabic-2" ;; ipa "MuleIPA" ;; ethiopic "Ethiopic-Unicode" -;; ascii-right-to-left "ISO8859-1" ;; indian-is13194 "IS13194-Devanagari" ;; indian-2-column "MuleIndian-2" ;; indian-1-column "MuleIndian-1"
--- a/lisp/mule/mule-coding.el Sun Jul 22 21:53:08 2007 +0000 +++ b/lisp/mule/mule-coding.el Sun Jul 22 22:04:14 2007 +0000 @@ -28,6 +28,9 @@ ;;; split off of mule.el and mostly moved to coding.el +;; Needed for make-8-bit-coding-system. +(eval-when-compile (require 'ccl "mule-ccl")) + ;;; Code: (defun coding-system-force-on-output (coding-system register) @@ -185,5 +188,483 @@ mnemonic "ISO7/Lock" documentation "ISO-2022 coding system using Locking-Shift for 96-charset." )) + -;;; mule-coding.el ends here +;; This is used by people writing CCL programs, but is called at runtime. +(defun define-translation-hash-table (symbol table) + "Define SYMBOL as the name of the hash translation TABLE for use in CCL. + +Analogous to `define-translation-table', but updates +`translation-hash-table-vector' and the table is for use in the CCL +`lookup-integer' and `lookup-character' functions." + (unless (and (symbolp symbol) + (hash-table-p table)) + (error "Bad args to define-translation-hash-table")) + (let ((len (length translation-hash-table-vector)) + (id 0) + done) + (put symbol 'translation-hash-table table) + (while (not done) + (if (>= id len) + (setq translation-hash-table-vector + (vconcat translation-hash-table-vector [nil]))) + (let ((slot (aref translation-hash-table-vector id))) + (if (or (not slot) + (eq (car slot) symbol)) + (progn + (aset translation-hash-table-vector id (cons symbol table)) + (setq done t)) + (setq id (1+ id))))) + (put symbol 'translation-hash-table-id id) + id)) + +(defvar make-8-bit-private-use-start (decode-char 'ucs #xE000) + "Start of a 256 code private use area for make-8-bit-coding-system. + +This is used to ensure that distinct octets on disk for a given coding +system map to distinct XEmacs characters, preventing a spurious changes when +a file is read, not changed, and then written. ") + +(defun make-8-bit-generate-helper (decode-table encode-table + encode-failure-octet) + "Helper function for `make-8-bit-generate-encode-program', which see. + +Deals with the case where ASCII and another character set provide the +can both be encoded unambiguously into the coding-system; if this is +so, returns a list corresponding to such a ccl-program. If not, it +returns nil. " + (let ((tentative-encode-program-parts + (eval-when-compile + (let* ((compiled + (append + (ccl-compile + `(1 + (loop + (read-multibyte-character r0 r1) + (if (r0 == ,(charset-id 'ascii)) + (write r1) + ((if (r0 == #xABAB) + ;; #xBFFE is a sentinel in the compiled + ;; program. + (write r1 ,(make-vector 256 #xBFFE)) + ((mule-to-unicode r0 r1) + (if (r0 == #xFFFD) + (write #xBEEF) + ((lookup-integer encode-table-sym r0 r3) + (if r7 + (write-multibyte-character r0 r3) + (write #xBEEF)))))))) + (repeat)))) nil)) + (first-part compiled) + (last-part + (member-if-not (lambda (entr) (eq #xBFFE entr)) + (member-if + (lambda (entr) (eq #xBFFE entr)) + first-part)))) + (while compiled + (if (eq #xBFFE (cadr compiled)) + (setcdr compiled nil)) + (setq compiled (cdr compiled))) + ;; Is the generated code as we expect it to be? + (assert (and (memq #xABAB first-part) + (memq #xBEEF14 last-part)) + nil + "This code assumes that the constant #xBEEF is #xBEEF14 in \ +compiled CCL code,\nand that the constant #xABAB is #xABAB. If that is +not the case, and it appears not to be--that's why you're getting this +message--it will not work. ") + (list first-part last-part)))) + (charset-lower -1) + (charset-upper -1) + worth-trying known-charsets encode-program + other-charset-vector ucs) + + (loop for char across decode-table + do (pushnew (char-charset char) known-charsets)) + (setq known-charsets (delq 'ascii known-charsets)) + + (loop for known-charset in known-charsets + do + ;; This is not possible for two dimensional charsets. + (when (eq 1 (charset-dimension known-charset)) + (setq args-out-of-range t) + (if (eq 'control-1 known-charset) + (setq charset-lower 0 + charset-upper 31) + ;; There should be a nicer way to get the limits here. + (condition-case args-out-of-range + (make-char known-charset #x100) + (args-out-of-range + (setq charset-lower (third args-out-of-range) + charset-upper (fourth args-out-of-range))))) + (loop + for i from charset-lower to charset-upper + always (and (setq ucs + (encode-char (make-char known-charset i) 'ucs)) + (gethash ucs encode-table)) + finally (setq worth-trying known-charset)) + + ;; Only trying this for one charset at a time, the first find. + (when worth-trying (return)) + + ;; Okay, this charset is not worth trying, Try the next. + (setq charset-lower -1 + charset-upper -1 + worth-trying nil))) + + (when worth-trying + (setq other-charset-vector (make-vector 256 encode-failure-octet)) + (loop for i from charset-lower to charset-upper + do (aset other-charset-vector (+ #x80 i) + (gethash (encode-char (make-char worth-trying i) + 'ucs) encode-table))) + (setq encode-program + (nsublis + (list (cons #xABAB (charset-id worth-trying))) + (nconc + (copy-list (first + tentative-encode-program-parts)) + (append other-charset-vector nil) + (copy-tree (second + tentative-encode-program-parts)))))) + encode-program)) + +(defun make-8-bit-generate-encode-program (decode-table encode-table + encode-failure-octet) + "Generate a CCL program to decode a 8-bit fixed-width charset. + +DECODE-TABLE must have 256 non-cons entries, and will be regarded as +describing a map from the octet corresponding to an offset in the +table to the that entry in the table. ENCODE-TABLE is a hash table +map from unicode values to characters in the range [0,255]. +ENCODE-FAILURE-OCTET describes an integer between 0 and 255 +\(inclusive) to write in the event that a character cannot be encoded. " + (check-argument-type #'vectorp decode-table) + (check-argument-range (length decode-table) #x100 #x100) + (check-argument-type #'hash-table-p encode-table) + (check-argument-type #'integerp encode-failure-octet) + (check-argument-range encode-failure-octet #x00 #xFF) + (let ((encode-program nil) + (general-encode-program + (eval-when-compile + (let ((prog (append + (ccl-compile + `(1 + (loop + (read-multibyte-character r0 r1) + (mule-to-unicode r0 r1) + (if (r0 == #xFFFD) + (write #xBEEF) + ((lookup-integer encode-table-sym r0 r3) + (if r7 + (write-multibyte-character r0 r3) + (write #xBEEF)))) + (repeat)))) nil))) + (assert (memq #xBEEF14 prog) + nil + "This code assumes that the constant #xBEEF is #xBEEF14 \ +in compiled CCL code.\nIf that is not the case, and it appears not to +be--that's why you're getting this message--it will not work. ") + prog))) + (encode-program-with-ascii-optimisation + (eval-when-compile + (let ((prog (append + (ccl-compile + `(1 + (loop + (read-multibyte-character r0 r1) + (if (r0 == ,(charset-id 'ascii)) + (write r1) + ((mule-to-unicode r0 r1) + (if (r0 == #xFFFD) + (write #xBEEF) + ((lookup-integer encode-table-sym r0 r3) + (if r7 + (write-multibyte-character r0 r3) + (write #xBEEF)))))) + (repeat)))) nil))) + (assert (memq #xBEEF14 prog) + nil + "This code assumes that the constant #xBEEF is #xBEEF14 \ +in compiled CCL code.\nIf that is not the case, and it appears not to +be--that's why you're getting this message--it will not work. ") + prog))) + (ascii-encodes-as-itself nil)) + + ;; Is this coding system ASCII-compatible? If so, we can avoid the hash + ;; table lookup for those characters. + (loop + for i from #x00 to #x7f + always (eq (int-to-char i) (gethash i encode-table)) + finally (setq ascii-encodes-as-itself t)) + + ;; Note that this logic handles EBCDIC badly. For example, CP037, + ;; MIME name ebcdic-na, has the entire repertoire of ASCII and + ;; Latin 1, and thus a more optimal ccl encode program would check + ;; for those character sets and use tables. But for now, we do a + ;; hash table lookup for every character. + (if (null ascii-encodes-as-itself) + ;; General encode program. Pros; general and correct. Cons; + ;; slow, a hash table lookup + mule-unicode conversion is done + ;; for every character encoding. + (setq encode-program general-encode-program) + (setq encode-program + ;; Encode program with ascii-ascii mapping (based on a + ;; character's mule character set), and one other mule + ;; character set using table-based encoding, other + ;; character sets using hash table lookups. + ;; make-8-bit-non-ascii-completely-coveredp only returns + ;; such a mapping if some non-ASCII charset with + ;; characters in decode-table is entirely covered by + ;; encode-table. + (make-8-bit-generate-helper decode-table encode-table + encode-failure-octet)) + (unless encode-program + ;; If make-8-bit-non-ascii-completely-coveredp returned nil, + ;; but ASCII still encodes as itself, do one-to-one mapping + ;; for ASCII, and a hash table lookup for everything else. + (setq encode-program encode-program-with-ascii-optimisation))) + + (setq encode-program + (nsublis + (list (cons #xBEEF14 + (logior (lsh encode-failure-octet 8) + #x14))) + (copy-tree encode-program))) + encode-program)) + +(defun make-8-bit-create-decode-encode-tables (unicode-map) + "Return a list \(DECODE-TABLE ENCODE-TABLE) given UNICODE-MAP. +UNICODE-MAP should be an alist mapping from integer octet values to +characters with UCS code points; DECODE-TABLE will be a 256-element +vector, and ENCODE-TABLE will be a hash table mapping from 256 numbers +to 256 distinct characters. " + (check-argument-type #'listp unicode-map) + (let ((decode-table (make-vector 256 nil)) + (encode-table (make-hash-table :size 256)) + (private-use-start (encode-char make-8-bit-private-use-start 'ucs)) + desired-ucs) + + (loop for (external internal) + in unicode-map + do + (aset decode-table external internal) + (assert (not (eq (encode-char internal 'ucs) -1)) + nil + "Looks like you're calling make-8-bit-coding-system in a \ +dumped file, \nand you're either not providing a literal UNICODE-MAP +or PROPS. Don't do that; make-8-bit-coding-system relies on sensible +Unicode mappings being available, which they are at compile time for +dumped files (but this requires the mentioned literals), but not, for +most of them, at run time. ") + + (puthash (encode-char internal 'ucs) + ;; This is semantically an integer, but Dave Love's design + ;; for lookup-integer in CCL means we need to store it as a + ;; character. + (int-to-char external) + encode-table)) + + ;; Now, go through the decode table looking at the characters that + ;; remain nil. If the XEmacs character with that integer is already in + ;; the encode table, map the on-disk octet to a Unicode private use + ;; character. Otherwise map the on-disk octet to the XEmacs character + ;; with that numeric value, to make it clearer what it is. + (dotimes (i 256) + (when (null (aref decode-table i)) + ;; Find a free code point. + (setq desired-ucs i) + (while (gethash desired-ucs encode-table) + ;; In the normal case, the code point chosen will be U+E0XY, where + ;; XY is the hexadecimal octet on disk. In pathological cases + ;; it'll be something else. + (setq desired-ucs (+ private-use-start desired-ucs) + private-use-start (+ private-use-start 1))) + (aset decode-table i (decode-char 'ucs desired-ucs)) + (puthash desired-ucs (int-to-char i) encode-table))) + (values decode-table encode-table))) + +(defun make-8-bit-generate-decode-program (decode-table) + "Given DECODE-TABLE, generate a CCL program to decode an 8-bit charset. +DECODE-TABLE must have 256 non-cons entries, and will be regarded as +describing a map from the octet corresponding to an offset in the +table to the that entry in the table. " + (check-argument-type #'vectorp decode-table) + (check-argument-range (length decode-table) #x100 #x100) + (let ((decode-program-parts + (eval-when-compile + (let* ((compiled + (append + (ccl-compile + `(3 + ((read r0) + (loop + (write-read-repeat r0 ,(make-vector + 256 'sentinel)))))) nil)) + (first-part compiled) + (last-part + (member-if-not #'symbolp + (member-if-not #'integerp first-part)))) + ;; Chop off the sentinel sentinel sentinel [..] part. + (while compiled + (if (symbolp (cadr compiled)) + (setcdr compiled nil)) + (setq compiled (cdr compiled))) + (list first-part last-part))))) + (nconc + ;; copy-list needed, because the structure of the literal provided + ;; by our eval-when-compile hangs around. + (copy-list (first decode-program-parts)) + (append decode-table nil) + (second decode-program-parts)))) + +;;;###autoload +(defun make-8-bit-coding-system (name unicode-map &optional description props) + "Make and return a fixed-width 8-bit CCL coding system named NAME. +NAME must be a symbol, and UNICODE-MAP a list. + +UNICODE-MAP is a plist describing a map from octets in the coding +system NAME (as integers) to XEmacs characters. Those XEmacs +characters will be used explicitly on decoding, but for encoding (most +relevantly, on writing to disk) XEmacs characters that map to the same +Unicode code point will be unified. This means that the ISO-8859-? +characters that map to the same Unicode code point will not be +distinct when written to disk, which is normally what is intended; it +also means that East Asian Han characters from different XEmacs +character sets will not be distinct when written to disk, which is +less often what is intended. + +Any octets not mapped will be decoded into the ISO 8859-1 characters with +the corresponding numeric value; unless another octet maps to that +character, in which case the Unicode private use area will be used. This +avoids spurious changes to files on disk when they contain octets that would +be otherwise remapped to the canonical values for the corresponding +characters in the coding system. + +DESCRIPTION and PROPS are as in `make-coding-system', which see. This +function also accepts two additional (optional) properties in PROPS; +`aliases', giving a list of aliases to be initialized for this +coding-system, and `encode-failure-octet', an integer between 0 and 256 to +write in place of XEmacs characters that cannot be encoded, defaulting to +the code for tilde `~'. " + (check-argument-type #'symbolp name) + (check-argument-type #'listp unicode-map) + (check-argument-type #'stringp + (or description + (setq description + (format "Coding system used for %s." name)))) + (check-valid-plist props) + (let ((encode-failure-octet (or (plist-get props 'encode-failure-octet) + (char-to-int ?~))) + (aliases (plist-get props 'aliases)) + (hash-table-sym (gentemp (format "%s-encode-table" name))) + encode-program decode-program result decode-table encode-table) + + ;; Some more sanity checking. + (check-argument-range encode-failure-octet 0 #xFF) + (check-argument-type #'listp aliases) + + ;; Don't pass on our extra data to make-coding-system. + (setq props (plist-remprop props 'encode-failure-octet) + props (plist-remprop props 'aliases)) + + (multiple-value-setq + (decode-table encode-table) + (make-8-bit-create-decode-encode-tables unicode-map)) + + ;; Register the decode-table. + (define-translation-hash-table hash-table-sym encode-table) + + ;; Generate the programs. + (setq decode-program (make-8-bit-generate-decode-program decode-table) + encode-program (make-8-bit-generate-encode-program + decode-table encode-table encode-failure-octet)) + (unless (vectorp encode-program) + (setq encode-program + (apply #'vector + (nsublis (list (cons 'encode-table-sym hash-table-sym)) + (copy-tree encode-program))))) + (unless (vectorp decode-program) + (setq decode-program + (apply #'vector decode-program))) + + ;; And now generate the actual coding system. + (setq result + (make-coding-system + name 'ccl + description + (plist-put (plist-put props 'decode decode-program) + 'encode encode-program))) + (coding-system-put name 'category 'iso-8-1) + (loop for alias in aliases + do (define-coding-system-alias alias name)) + result)) + +;;;###autoload +(define-compiler-macro make-8-bit-coding-system (&whole form name unicode-map + &optional description props) + + ;; We provide the compiler macro (= macro that is expanded only on + ;; compilation, and that can punt to a runtime version of the + ;; associate function if necessary) not for reasons of speed, though + ;; it does speed up things at runtime a little, but because the + ;; Unicode mappings are available at compile time in the dumped + ;; files, but they are not available at run time for the vast + ;; majority of them. + + (if (not (and (and (consp name) (eq (car name) 'quote)) + (and (consp unicode-map) (eq (car unicode-map) 'quote)) + (and (or (and (consp props) (eq (car props) 'quote)) + (null props))))) + ;; The call does not use literals; do it at runtime. + form + (setq name (cadr name) + unicode-map (cadr unicode-map) + props (if props (cadr props))) + (let ((encode-failure-octet + (or (plist-get props 'encode-failure-octet) (char-to-int ?~))) + (aliases (plist-get props 'aliases)) + encode-program decode-program + decode-table encode-table res) + + ;; Some sanity checking. + (check-argument-range encode-failure-octet 0 #xFF) + (check-argument-type #'listp aliases) + + ;; Don't pass on our extra data to make-coding-system. + (setq props (plist-remprop props 'encode-failure-octet) + props (plist-remprop props 'aliases)) + + ;; Work out encode-table and decode-table. + (multiple-value-setq + (decode-table encode-table) + (make-8-bit-create-decode-encode-tables unicode-map)) + + ;; Generate the decode and encode programs. + (setq decode-program (make-8-bit-generate-decode-program decode-table) + encode-program (make-8-bit-generate-encode-program + decode-table encode-table encode-failure-octet)) + + ;; And return the generated code. + `(let ((encode-table-sym (gentemp (format "%s-encode-table" ',name))) + result) + (define-translation-hash-table encode-table-sym ,encode-table) + (setq result + (make-coding-system + ',name 'ccl ,description + (plist-put (plist-put ',props 'decode + ,(apply #'vector decode-program)) + 'encode + (apply #'vector + (nsublis + (list (cons + 'encode-table-sym + (symbol-value 'encode-table-sym))) + ',encode-program))))) + (coding-system-put ',name 'category 'iso-8-1) + ,(macroexpand `(loop for alias in ',aliases + do (define-coding-system-alias alias + ',name))) + 'result)))) + + \ No newline at end of file
--- a/lisp/mule/viet-util.el Sun Jul 22 21:53:08 2007 +0000 +++ b/lisp/mule/viet-util.el Sun Jul 22 22:04:14 2007 +0000 @@ -41,7 +41,13 @@ ;;;###autoload (defun viet-encode-viscii-char (char) "Return VISCII character code of CHAR if appropriate." - (get-char-table char viet-viscii-to-external-code-table)) + (check-argument-type #'characterp char) + (if (eq char ?~) + char + (setq char (encode-coding-string char 'viscii)) + (if (and (= 1 (length char)) + (not (eq (aref char 0) ?~))) + (aref char 0)))) ;; VIQR is a menmonic encoding specification for Vietnamese. ;; It represents diacritical marks by ASCII characters as follows:
--- a/lisp/mule/vietnamese.el Sun Jul 22 21:53:08 2007 +0000 +++ b/lisp/mule/vietnamese.el Sun Jul 22 22:04:14 2007 +0000 @@ -60,271 +60,148 @@ long-name "VISCII upper-case" )) -(modify-syntax-entry 'vietnamese-viscii-lower "w") -(modify-syntax-entry 'vietnamese-viscii-upper "w") - (define-category ?v "Vietnamese character.") (modify-category-entry 'vietnamese-viscii-lower ?v) (modify-category-entry 'vietnamese-viscii-upper ?v) -(eval-and-compile - -(defvar viet-viscii-decode-table - [;; VISCII is a full 8-bit code. - 0 1 ?,2F(B 3 4 ?,2G(B ?,2g(B 7 8 9 10 11 12 13 14 15 - 16 17 18 19 ?,2V(B 21 22 23 24 ?,2[(B 26 27 28 29 ?,2\(B 31 - 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 - 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 - 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 - 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 - 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 - 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 - ?,2U(B ?,2!(B ?,2"(B ?,2#(B ?,2$(B ?,2%(B ?,2&(B ?,2'(B ?,2((B ?,2)(B ?,2*(B ?,2+(B ?,2,(B ?,2-(B ?,2.(B ?,2/(B - ?,20(B ?,21(B ?,22(B ?,25(B ?,2~(B ?,2>(B ?,26(B ?,27(B ?,28(B ?,2v(B ?,2w(B ?,2o(B ?,2|(B ?,2{(B ?,2x(B ?,2O(B - ?,2u(B ?,1!(B ?,1"(B ?,1#(B ?,1$(B ?,1%(B ?,1&(B ?,1'(B ?,1((B ?,1)(B ?,1*(B ?,1+(B ?,1,(B ?,1-(B ?,1.(B ?,1/(B - ?,10(B ?,11(B ?,12(B ?,2^(B ?,2=(B ?,15(B ?,16(B ?,17(B ?,18(B ?,2q(B ?,2Q(B ?,2W(B ?,2X(B ?,1=(B ?,1>(B ?,2_(B - ?,2`(B ?,2a(B ?,2b(B ?,2c(B ?,2d(B ?,2e(B ?,1F(B ?,1G(B ?,2h(B ?,2i(B ?,2j(B ?,2k(B ?,2l(B ?,2m(B ?,2n(B ?,1O(B - ?,2p(B ?,1Q(B ?,2r(B ?,2s(B ?,2t(B ?,1U(B ?,1V(B ?,1W(B ?,1X(B ?,2y(B ?,2z(B ?,1[(B ?,1\(B ?,2}(B ?,1^(B ?,1_(B - ?,1`(B ?,1a(B ?,1b(B ?,1c(B ?,1d(B ?,1e(B ?,1f(B ?,1g(B ?,1h(B ?,1i(B ?,1j(B ?,1k(B ?,1l(B ?,1m(B ?,1n(B ?,1o(B - ?,1p(B ?,1q(B ?,1r(B ?,1s(B ?,1t(B ?,1u(B ?,1v(B ?,1w(B ?,1x(B ?,1y(B ?,1z(B ?,1{(B ?,1|(B ?,1}(B ?,1~(B ?,2f(B ] - "Vietnamese VISCII decoding table.") - -(defvar viet-viscii-encode-table - (let ((table-lower (make-vector 128 0)) - (table-upper (make-vector 128 0)) - (i 0) - char-component) - (while (< i 256) - (setq char-component - (split-char (aref viet-viscii-decode-table i))) - (cond ((eq (car char-component) 'vietnamese-viscii-lower) - (aset table-lower (nth 1 char-component) i)) - ((eq (car char-component) 'vietnamese-viscii-upper) - (aset table-upper (nth 1 char-component) i))) - (setq i (1+ i))) - (cons table-lower table-upper)) - "Vietnamese VISCII encoding table. -Cons of tables for encoding lower-case chars and upper-case characters. -Both tables are indexed by the position code of Vietnamese characters.") - -(defvar viet-vscii-decode-table - [;; VSCII is a full 8-bit code. - 0 ?,2z(B ?,2x(B 3 ?,2W(B ?,2X(B ?,2f(B 7 8 9 10 11 12 13 14 15 - 16 ?,2Q(B ?,2_(B ?,2O(B ?,2V(B ?,2[(B ?,2}(B ?,2\(B 24 25 26 27 28 29 30 31 - 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 - 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 - 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 - 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 - 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 - 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 - ?,2`(B ?,2d(B ?,2c(B ?,2a(B ?,2U(B ?,2#(B ?,2'(B ?,2h(B ?,2k(B ?,2((B ?,2i(B ?,2)(B ?,2.(B ?,2l(B ?,2o(B ?,2n(B - ?,2m(B ?,28(B ?,2r(B ?,2v(B ?,2u(B ?,2s(B ?,2w(B ?,25(B ?,26(B ?,27(B ?,2^(B ?,2>(B ?,2~(B ?,2y(B ?,2|(B ?,2{(B - 160 ?,2e(B ?,2b(B ?,2j(B ?,2t(B ?,2=(B ?,2_(B ?,2p(B ?,1e(B ?,1b(B ?,1j(B ?,1t(B ?,1>(B ?,1y(B ?,1p(B ?,2"(B - 192 193 194 195 196 ?,1`(B ?,1d(B ?,1c(B ?,1a(B ?,1U(B ?,2F(B ?,1"(B ?,1F(B ?,1G(B ?,1!(B ?,2G(B - ?,2!(B ?,2%(B ?,2&(B ?,2g(B ?,2%(B ?,2+(B ?,1#(B ?,1%(B ?,1&(B ?,1g(B ?,1$(B ?,1'(B ?,1h(B ?,2,(B ?,1k(B ?,1((B - ?,1i(B ?,1)(B ?,1+(B ?,1,(B ?,1-(B ?,1*(B ?,1.(B ?,1l(B ?,1o(B ?,2-(B ?,2*(B ?,20(B ?,1n(B ?,1m(B ?,18(B ?,1r(B - ?,21(B ?,1v(B ?,1u(B ?,1s(B ?,1w(B ?,10(B ?,11(B ?,12(B ?,1/(B ?,15(B ?,16(B ?,17(B ?,1^(B ?,1>(B ?,1~(B ?,1y(B - ?,22(B ?,1|(B ?,1{(B ?,1z(B ?,1x(B ?,1W(B ?,1X(B ?,1f(B ?,1Q(B ?,1q(B ?,1O(B ?,1V(B ?,1[(B ?,1}(B ?,1\(B ?,2/(B] - "Vietnamese VSCII decoding table.") - -(defvar viet-vscii-encode-table - (let ((table-lower (make-vector 128 0)) - (table-upper (make-vector 128 0)) - (i 0) - char-component) - (while (< i 256) - (setq char-component - (split-char (aref viet-vscii-decode-table i))) - (cond ((eq (car char-component) 'vietnamese-viscii-lower) - (aset table-lower (nth 1 char-component) i)) - ((eq (car char-component) 'vietnamese-viscii-upper) - (aset table-upper (nth 1 char-component) i))) - (setq i (1+ i))) - (cons table-lower table-upper)) - "Vietnamese VSCII encoding table. -Cons of tables for encoding lower-case chars and upper-case characters. -Both tables are indexed by the position code of Vietnamese characters.") - -) - -(define-ccl-program ccl-decode-viscii - `(3 - ((read r0) - (loop - (write-read-repeat r0 ,viet-viscii-decode-table)) - )) - "CCL program to decode VISCII 1.1") - -;; Multibyte form of a Vietnamese character is as follows (3-byte): -;; LEADING-CODE-PRIVATE-11 LEADING-CODE-EXTENDED-11 POSITION-CODE -;; where LEADING-CODE-EXTENDED-11 for Vietnamese is -;; `vietnamese-viscii-lower' or `vietnamese-viscii-upper'. - -(defvar leading-code-private-11 #x9E) - -(define-ccl-program ccl-encode-viscii - `(1 - ((read r0) - (loop - (if (r0 < 128) - ;; ASCII - (write-read-repeat r0) - ;; not ASCII - (if (r0 != ,leading-code-private-11) - ;; not Vietnamese - (write-read-repeat r0) - ((read-if (r0 == ,(charset-id 'vietnamese-viscii-lower)) - (;; Vietnamese lower - (read r0) - (r0 -= 128) - (write-read-repeat r0 ,(car viet-viscii-encode-table))) - (if (r0 == ,(charset-id 'vietnamese-viscii-upper)) - (;; Vietnamese upper - (read r0) - (r0 -= 128) - (write-read-repeat r0 ,(cdr viet-viscii-encode-table))) - ;; not Vietnamese - (write-read-repeat r0))))))))) - "CCL program to encode VISCII 1.1") - -(define-ccl-program ccl-encode-viscii-font - `(0 - ;; In: R0:vietnamese-viscii-lower/vietnamese-viscii-upper - ;; R1:position code - ;; Out: R1:font code point - (if (r0 == ,(charset-id 'vietnamese-viscii-lower)) - (r1 = r1 ,(car viet-viscii-encode-table)) - (r1 = r1 ,(cdr viet-viscii-encode-table))) - ) - "CCL program to encode Vietnamese chars to VISCII 1.1 font") - -(define-ccl-program ccl-decode-vscii - `(3 - ((read r0) - (loop - (write-read-repeat r0 ,viet-vscii-decode-table)) - )) - "CCL program to decode VSCII-1.") - -(define-ccl-program ccl-encode-vscii - `(1 - ((read r0) - (loop - (if (r0 < 128) - ;; ASCII - (write-read-repeat r0) - ;; not ASCII - (if (r0 != ,leading-code-private-11) - ;; not Vietnamese - (write-read-repeat r0) - (read-if (r0 == ,(charset-id 'vietnamese-viscii-lower)) - (;; Vietnamese lower - (read r0) - (r0 -= 128) - (write-read-repeat r0 ,(car viet-vscii-encode-table))) - (if (r0 == ,(charset-id 'vietnamese-viscii-upper)) - (;; Vietnamese upper - (read r0) - (r0 -= 128) - (write-read-repeat r0 ,(cdr viet-vscii-encode-table))) - ;; not Vietnamese - (write-read-repeat r0)))))))) - "CCL program to encode VSCII-1.") - -(define-ccl-program ccl-encode-vscii-font - `(0 - ;; In: R0:vietnamese-viscii-lower/vietnamese-viscii-upper - ;; R1:position code - ;; Out: R1:font code point - (if (r0 == ,(charset-id 'vietnamese-viscii-lower)) - (r1 = r1 ,(car viet-vscii-encode-table)) - (r1 = r1 ,(cdr viet-vscii-encode-table))) - ) - "CCL program to encode Vietnamese chars to VSCII-1 font.") - - -(make-coding-system - 'viscii 'ccl +(make-8-bit-coding-system + 'viscii + '((#x02 ?\u1EB2) ;; CAPITAL LETTER A WITH BREVE AND HOOK ABOVE + (#x05 ?\u1EB4) ;; CAPITAL LETTER A WITH BREVE AND TILDE + (#x06 ?\u1EAA) ;; CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE + (#x14 ?\u1EF6) ;; CAPITAL LETTER Y WITH HOOK ABOVE + (#x19 ?\u1EF8) ;; CAPITAL LETTER Y WITH TILDE + (#x1E ?\u1EF4) ;; CAPITAL LETTER Y WITH DOT BELOW + (#x80 ?\u1EA0) ;; CAPITAL LETTER A WITH DOT BELOW + (#x81 ?\u1EAE) ;; CAPITAL LETTER A WITH BREVE AND ACUTE + (#x82 ?\u1EB0) ;; CAPITAL LETTER A WITH BREVE AND GRAVE + (#x83 ?\u1EB6) ;; CAPITAL LETTER A WITH BREVE AND DOT BELOW + (#x84 ?\u1EA4) ;; CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE + (#x85 ?\u1EA6) ;; CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE + (#x86 ?\u1EA8) ;; CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE + (#x87 ?\u1EAC) ;; CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW + (#x88 ?\u1EBC) ;; CAPITAL LETTER E WITH TILDE + (#x89 ?\u1EB8) ;; CAPITAL LETTER E WITH DOT BELOW + (#x8A ?\u1EBE) ;; CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE + (#x8B ?\u1EC0) ;; CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE + (#x8C ?\u1EC2) ;; CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE + (#x8D ?\u1EC4) ;; CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE + (#x8E ?\u1EC6) ;; CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW + (#x8F ?\u1ED0) ;; CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE + (#x90 ?\u1ED2) ;; CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE + (#x91 ?\u1ED4) ;; CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE + (#x92 ?\u1ED6) ;; CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE + (#x93 ?\u1ED8) ;; CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW + (#x94 ?\u1EE2) ;; CAPITAL LETTER O WITH HORN AND DOT BELOW + (#x95 ?\u1EDA) ;; CAPITAL LETTER O WITH HORN AND ACUTE + (#x96 ?\u1EDC) ;; CAPITAL LETTER O WITH HORN AND GRAVE + (#x97 ?\u1EDE) ;; CAPITAL LETTER O WITH HORN AND HOOK ABOVE + (#x98 ?\u1ECA) ;; CAPITAL LETTER I WITH DOT BELOW + (#x99 ?\u1ECE) ;; CAPITAL LETTER O WITH HOOK ABOVE + (#x9A ?\u1ECC) ;; CAPITAL LETTER O WITH DOT BELOW + (#x9B ?\u1EC8) ;; CAPITAL LETTER I WITH HOOK ABOVE + (#x9C ?\u1EE6) ;; CAPITAL LETTER U WITH HOOK ABOVE + (#x9D ?\u0168) ;; CAPITAL LETTER U WITH TILDE + (#x9E ?\u1EE4) ;; CAPITAL LETTER U WITH DOT BELOW + (#x9F ?\u1EF2) ;; CAPITAL LETTER Y WITH GRAVE + (#xA0 ?\u00D5) ;; CAPITAL LETTER O WITH TILDE + (#xA1 ?\u1EAF) ;; SMALL LETTER A WITH BREVE AND ACUTE + (#xA2 ?\u1EB1) ;; SMALL LETTER A WITH BREVE AND GRAVE + (#xA3 ?\u1EB7) ;; SMALL LETTER A WITH BREVE AND DOT BELOW + (#xA4 ?\u1EA5) ;; SMALL LETTER A WITH CIRCUMFLEX AND ACUTE + (#xA5 ?\u1EA7) ;; SMALL LETTER A WITH CIRCUMFLEX AND GRAVE + (#xA6 ?\u1EA8) ;; CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE + (#xA7 ?\u1EAD) ;; SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW + (#xA8 ?\u1EBD) ;; SMALL LETTER E WITH TILDE + (#xA9 ?\u1EB9) ;; SMALL LETTER E WITH DOT BELOW + (#xAA ?\u1EBF) ;; SMALL LETTER E WITH CIRCUMFLEX AND ACUTE + (#xAB ?\u1EC1) ;; SMALL LETTER E WITH CIRCUMFLEX AND GRAVE + (#xAC ?\u1EC3) ;; SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE + (#xAD ?\u1EC5) ;; SMALL LETTER E WITH CIRCUMFLEX AND TILDE + (#xAE ?\u1EC7) ;; SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW + (#xAF ?\u1ED1) ;; SMALL LETTER O WITH CIRCUMFLEX AND ACUTE + (#xB0 ?\u1ED3) ;; SMALL LETTER O WITH CIRCUMFLEX AND GRAVE + (#xB1 ?\u1ED5) ;; SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE + (#xB2 ?\u1ED7) ;; SMALL LETTER O WITH CIRCUMFLEX AND TILDE + (#xB3 ?\u1EE0) ;; CAPITAL LETTER O WITH HORN AND TILDE + (#xB4 ?\u01A0) ;; CAPITAL LETTER O WITH HORN + (#xB5 ?\u1ED9) ;; SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW + (#xB6 ?\u1EDD) ;; SMALL LETTER O WITH HORN AND GRAVE + (#xB7 ?\u1EDF) ;; SMALL LETTER O WITH HORN AND HOOK ABOVE + (#xB8 ?\u1ECB) ;; SMALL LETTER I WITH DOT BELOW + (#xB9 ?\u1EF0) ;; CAPITAL LETTER U WITH HORN AND DOT BELOW + (#xBA ?\u1EE8) ;; CAPITAL LETTER U WITH HORN AND ACUTE + (#xBB ?\u1EEA) ;; CAPITAL LETTER U WITH HORN AND GRAVE + (#xBC ?\u1EEC) ;; CAPITAL LETTER U WITH HORN AND HOOK ABOVE + (#xBD ?\u01A1) ;; SMALL LETTER O WITH HORN + (#xBE ?\u1EDB) ;; SMALL LETTER O WITH HORN AND ACUTE + (#xBF ?\u01AF) ;; CAPITAL LETTER U WITH HORN + (#xC0 ?\u00C0) ;; CAPITAL LETTER A WITH GRAVE + (#xC1 ?\u00C1) ;; CAPITAL LETTER A WITH ACUTE + (#xC2 ?\u00C2) ;; CAPITAL LETTER A WITH CIRCUMFLEX + (#xC3 ?\u00C3) ;; CAPITAL LETTER A WITH TILDE + (#xC4 ?\u1EA2) ;; CAPITAL LETTER A WITH HOOK ABOVE + (#xC5 ?\u0102) ;; CAPITAL LETTER A WITH BREVE + (#xC6 ?\u1EB3) ;; SMALL LETTER A WITH BREVE AND HOOK ABOVE + (#xC7 ?\u1EB5) ;; SMALL LETTER A WITH BREVE AND TILDE + (#xC8 ?\u00C8) ;; CAPITAL LETTER E WITH GRAVE + (#xC9 ?\u00C9) ;; CAPITAL LETTER E WITH ACUTE + (#xCA ?\u00CA) ;; CAPITAL LETTER E WITH CIRCUMFLEX + (#xCB ?\u1EBA) ;; CAPITAL LETTER E WITH HOOK ABOVE + (#xCC ?\u00CC) ;; CAPITAL LETTER I WITH GRAVE + (#xCD ?\u00CD) ;; CAPITAL LETTER I WITH ACUTE + (#xCE ?\u0128) ;; CAPITAL LETTER I WITH TILDE + (#xCF ?\u1EF3) ;; SMALL LETTER Y WITH GRAVE + (#xD0 ?\u0110) ;; CAPITAL LETTER D WITH STROKE + (#xD1 ?\u1EE9) ;; SMALL LETTER U WITH HORN AND ACUTE + (#xD2 ?\u00D2) ;; CAPITAL LETTER O WITH GRAVE + (#xD3 ?\u00D3) ;; CAPITAL LETTER O WITH ACUTE + (#xD4 ?\u00D4) ;; CAPITAL LETTER O WITH CIRCUMFLEX + (#xD5 ?\u1EA1) ;; SMALL LETTER A WITH DOT BELOW + (#xD6 ?\u1EF7) ;; SMALL LETTER Y WITH HOOK ABOVE + (#xD7 ?\u1EEB) ;; SMALL LETTER U WITH HORN AND GRAVE + (#xD8 ?\u1EED) ;; SMALL LETTER U WITH HORN AND HOOK ABOVE + (#xD9 ?\u00D9) ;; CAPITAL LETTER U WITH GRAVE + (#xDA ?\u00DA) ;; CAPITAL LETTER U WITH ACUTE + (#xDB ?\u1EF9) ;; SMALL LETTER Y WITH TILDE + (#xDC ?\u1EF5) ;; SMALL LETTER Y WITH DOT BELOW + (#xDD ?\u00DD) ;; CAPITAL LETTER Y WITH ACUTE + (#xDE ?\u1EE1) ;; SMALL LETTER O WITH HORN AND TILDE + (#xDF ?\u01B0) ;; SMALL LETTER U WITH HORN + (#xE0 ?\u00E0) ;; SMALL LETTER A WITH GRAVE + (#xE1 ?\u00E1) ;; SMALL LETTER A WITH ACUTE + (#xE2 ?\u00E2) ;; SMALL LETTER A WITH CIRCUMFLEX + (#xE3 ?\u00E3) ;; SMALL LETTER A WITH TILDE + (#xE4 ?\u1EA3) ;; SMALL LETTER A WITH HOOK ABOVE + (#xE5 ?\u0103) ;; SMALL LETTER A WITH BREVE + (#xE6 ?\u1EEF) ;; SMALL LETTER U WITH HORN AND TILDE + (#xE7 ?\u1EAB) ;; SMALL LETTER A WITH CIRCUMFLEX AND TILDE + (#xE8 ?\u00E8) ;; SMALL LETTER E WITH GRAVE + (#xE9 ?\u00E9) ;; SMALL LETTER E WITH ACUTE + (#xEA ?\u00EA) ;; SMALL LETTER E WITH CIRCUMFLEX + (#xEB ?\u1EBB) ;; SMALL LETTER E WITH HOOK ABOVE + (#xEC ?\u00EC) ;; SMALL LETTER I WITH GRAVE + (#xED ?\u00ED) ;; SMALL LETTER I WITH ACUTE + (#xEE ?\u0129) ;; SMALL LETTER I WITH TILDE + (#xEF ?\u1EC9) ;; SMALL LETTER I WITH HOOK ABOVE + (#xF0 ?\u0111) ;; SMALL LETTER D WITH STROKE + (#xF1 ?\u1EF1) ;; SMALL LETTER U WITH HORN AND DOT BELOW + (#xF2 ?\u00F2) ;; SMALL LETTER O WITH GRAVE + (#xF3 ?\u00F3) ;; SMALL LETTER O WITH ACUTE + (#xF4 ?\u00F4) ;; SMALL LETTER O WITH CIRCUMFLEX + (#xF5 ?\u00F5) ;; SMALL LETTER O WITH TILDE + (#xF6 ?\u1ECF) ;; SMALL LETTER O WITH HOOK ABOVE + (#xF7 ?\u1ECD) ;; SMALL LETTER O WITH DOT BELOW + (#xF8 ?\u1EE5) ;; SMALL LETTER U WITH DOT BELOW + (#xF9 ?\u00F9) ;; SMALL LETTER U WITH GRAVE + (#xFA ?\u00FA) ;; SMALL LETTER U WITH ACUTE + (#xFB ?\u0169) ;; SMALL LETTER U WITH TILDE + (#xFC ?\u1EE7) ;; SMALL LETTER U WITH HOOK ABOVE + (#xFD ?\u00FD) ;; SMALL LETTER Y WITH ACUTE + (#xFE ?\u1EE3) ;; SMALL LETTER O WITH HORN AND DOT BELOW + (#xFF ?\u1EEE)) ;; CAPITAL LETTER U WITH HORN AND TILDE "VISCII 1.1 (Vietnamese)" - `(mnemonic "VISCII" - decode ccl-decode-viscii - encode ccl-encode-viscii)) - -;; it is not correct, but XEmacs doesn't have `ccl' category... -(coding-system-put 'viscii 'category 'iso-8-1) - -;; (make-coding-system -;; 'vietnamese-viscii 4 ?V -;; "8-bit encoding for Vietnamese VISCII 1.1 (MIME:VISCII)" -;; '(ccl-decode-viscii . ccl-encode-viscii) -;; '((safe-charsets ascii vietnamese-viscii-lower vietnamese-viscii-upper) -;; (mime-charset . viscii) -;; (valid-codes (0 . 255)))) - -;; (define-coding-system-alias 'viscii 'vietnamese-viscii) - -(make-coding-system - 'vscii 'ccl - "VSCII 1.1 (Vietnamese)" - `(mnemonic "VSCII" - decode ccl-decode-vscii - encode ccl-encode-vscii)) - -;; (make-coding-system -;; 'vietnamese-vscii 4 ?v -;; "8-bit encoding for Vietnamese VSCII-1" -;; '(ccl-decode-vscii . ccl-encode-vscii) -;; '((safe-charsets ascii vietnamese-viscii-lower vietnamese-viscii-upper) -;; (valid-codes (0 . 255)))) - -;; (define-coding-system-alias 'vscii 'vietnamese-vscii) - -(make-coding-system - 'viqr 'no-conversion - "VIQR (Vietnamese)" - '(mnemonic "VIQR" - eol-type lf - post-read-conversion viqr-post-read-conversion - pre-write-conversion viqr-pre-write-conversion)) - -;; (make-coding-system -;; 'vietnamese-viqr 0 ?q -;; "Vietnamese latin transcription (VIQR)" -;; nil -;; '((safe-charsets ascii vietnamese-viscii-lower vietnamese-viscii-upper) -;; (post-read-conversion . viqr-post-read-conversion) -;; (pre-write-conversion . viqr-pre-write-conversion) -;; (charset-origin-alist -;; (vietnamese-viscii-lower "VISCII" viet-encode-viscii-char) -;; (vietnamese-viscii-upper "VISCII" viet-encode-viscii-char)))) - -;; (define-coding-system-alias 'viqr 'vietnamese-viqr) - -;; For VISCII users -(set-charset-ccl-program 'vietnamese-viscii-lower - 'ccl-encode-viscii-font) -(set-charset-ccl-program 'vietnamese-viscii-upper - 'ccl-encode-viscii-font) -;; For VSCII users -(set-charset-ccl-program 'vietnamese-viscii-lower 'ccl-encode-vscii-font) -(set-charset-ccl-program 'vietnamese-viscii-upper 'ccl-encode-vscii-font) - -;; (setq font-ccl-encoder-alist -;; (cons (cons "viscii" ccl-encode-viscii-font) font-ccl-encoder-alist)) - -;; (setq font-ccl-encoder-alist -;; (cons (cons "vscii" ccl-encode-vscii-font) font-ccl-encoder-alist)) - -(defvar viet-viscii-to-external-code-table - (let ((table (make-char-table 'generic)) - (i 0) - (len (length viet-viscii-decode-table))) - (while (< i len) - (let ((ch (aref viet-viscii-decode-table i))) - (if (characterp ch) - (put-char-table ch i table))) - (incf i))) - "Table to convert from characters to their VISCII code.") + '(mnemonic "VISCII")) (set-language-info-alist "Vietnamese" '((charset vietnamese-viscii-lower vietnamese-viscii-upper)
--- a/lisp/unicode.el Sun Jul 22 21:53:08 2007 +0000 +++ b/lisp/unicode.el Sun Jul 22 22:04:14 2007 +0000 @@ -293,21 +293,24 @@ ;; (macroexpand ;; '(define-ccl-program ccl-encode-to-ucs-2 ;; `(1 - ;; ((r1 = (r1 << 8)) - ;; (r1 = (r1 | r2)) - ;; (mule-to-unicode r0 r1) - ;; (r1 = (r0 >> 8)) - ;; (r2 = (r0 & 255)))) + ;; ((r1 = (r1 << 7)) + ;; (r1 = (r1 | r2)) + ;; (mule-to-unicode r0 r1) + ;; (r1 = (r0 >> 8)) + ;; (r2 = (r0 & #xff)))) ;; "CCL program to transform Mule characters to UCS-2.")) ;; ;; and it should occasionally be confirmed that the correspondence still ;; holds. - (let ((prog [1 10 131127 8 98872 65823 147513 8 82009 255 22])) - (defconst ccl-encode-to-ucs-2 prog + (let ((prog [1 10 131127 7 98872 65823 147513 8 82009 255 22])) + (defconst ccl-encode-to-ucs-2 + prog "CCL program to transform Mule characters to UCS-2.") - (put (quote ccl-encode-to-ucs-2) (quote ccl-program-idx) - (register-ccl-program (quote ccl-encode-to-ucs-2) prog)) nil)) + (put 'ccl-encode-to-ucs-2 + 'ccl-program-idx + (register-ccl-program 'ccl-encode-to-ucs-2 prog)) + nil)) ;; #### UTF-7 is not yet implemented, and it's tricky to do. There's ;; an implementation in appendix A.1 of the Unicode Standard, Version
--- a/src/ChangeLog Sun Jul 22 21:53:08 2007 +0000 +++ b/src/ChangeLog Sun Jul 22 22:04:14 2007 +0000 @@ -1,3 +1,52 @@ +2007-07-21 Aidan Kehoe <kehoea@parhasard.net> + + * elhash.c (struct Lisp_Hash_Table): + * elhash.c (HTENTRY_CLEAR_P): + * elhash.c (find_htentry): + * elhash.h: + * elhash.h (htentry): + Move struct Lisp_Hash_Table, HTENTRY_CLEAR_P, struct htentry to + elhash.h, since mule-ccl.c needs to use them. Ditto adding a + declaration for find_htentry to elhash.h, and removing the static + from its declaration in elhash.c + + * file-coding.c: + Add a backslash at the beginning of a line starting with a + parenthesis, for the sake of our font lock engine. + + * mule-ccl.c: + * mule-ccl.c (CCL_LookupIntConstTbl): + * mule-ccl.c (CCL_WRITE_STRING): + * mule-ccl.c (POSSIBLE_LEADING_BYTE_P): + Fix a bug where the charset ID of ASCII was not recognised as + being a possible leading byte. + * mule-ccl.c (CCL_MAKE_CHAR): + Take Mule character arguments in the same format as GNU. + * mule-ccl.c (CCL_DEBUG): + * mule-ccl.c (ccl_driver): + * mule-ccl.c (resolve_symbol_ccl_program): + * mule-ccl.c (Fccl_execute): + * mule-ccl.c (vars_of_mule_ccl): + Add Dave Love's hash table lookup CCL code, which has been in GNU + for years. + + Move mule-to-unicode to accept its argument in the same format as + write-multibyte-character. + + Move to accepting characters as well as integers in lots of + places; with XEmacs (and even with GNU Emacs), where the + character-integer map is not constant from one invocation to the + next, it makes sense for CCL programs with character literals to + accept those literals as characters, not integers. + + If we're a debug build, allow access to ccl-program-table from + Lisp, which can make debugging things easier. + + Take out font-ccl-encoder-alist, since it was never implemented, + and was not ever a stellar idea. + + Provide translation-hash-table-vector, as in GNU. + 2007-06-23 Stephen J. Turnbull <stephen@xemacs.org> * event-unixoid.c (event_stream_unixoid_create_io_streams):
--- a/src/elhash.c Sun Jul 22 21:53:08 2007 +0000 +++ b/src/elhash.c Sun Jul 22 22:04:14 2007 +0000 @@ -94,15 +94,6 @@ static Lisp_Object Qweak, Qkey_weak, Qvalue_weak, Qkey_or_value_weak; static Lisp_Object Qnon_weak, Q_type; -typedef struct htentry -{ -#ifdef NEW_GC - struct lrecord_header lheader; -#endif /* NEW_GC */ - Lisp_Object key; - Lisp_Object value; -} htentry; - struct Lisp_Hash_Table { struct LCRECORD_HEADER header; @@ -120,7 +111,6 @@ hash tables. Don't mark through this. */ }; -#define HTENTRY_CLEAR_P(htentry) ((*(EMACS_UINT*)(&((htentry)->key))) == 0) #define CLEAR_HTENTRY(htentry) \ ((*(EMACS_UINT*)(&((htentry)->key))) = 0, \ (*(EMACS_UINT*)(&((htentry)->value))) = 0) @@ -1126,7 +1116,7 @@ resize_hash_table (ht, new_size); } -static htentry * +htentry * find_htentry (Lisp_Object key, const Lisp_Hash_Table *ht) { hash_table_test_function_t test_function = ht->test_function;
--- a/src/elhash.h Sun Jul 22 21:53:08 2007 +0000 +++ b/src/elhash.h Sun Jul 22 22:04:14 2007 +0000 @@ -33,7 +33,19 @@ #define CHECK_HASH_TABLE(x) CHECK_RECORD (x, hash_table) #define CONCHECK_HASH_TABLE(x) CONCHECK_RECORD (x, hash_table) +typedef struct htentry +{ #ifdef NEW_GC + struct lrecord_header lheader; +#endif /* NEW_GC */ + Lisp_Object key; + Lisp_Object value; +} htentry; + +#define HTENTRY_CLEAR_P(htentry) ((*(EMACS_UINT*)(&((htentry)->key))) == 0) + +#ifdef NEW_GC + typedef struct htentry Lisp_Hash_Table_Entry; DECLARE_LRECORD (hash_table_entry, Lisp_Hash_Table_Entry); @@ -112,4 +124,6 @@ void inchash_eq (Lisp_Object key, Lisp_Object table, EMACS_INT offset); +htentry *find_htentry (Lisp_Object key, const Lisp_Hash_Table *ht); + #endif /* INCLUDED_elhash_h_ */
--- a/src/file-coding.c Sun Jul 22 21:53:08 2007 +0000 +++ b/src/file-coding.c Sun Jul 22 22:04:14 2007 +0000 @@ -1357,7 +1357,7 @@ The following additional properties are recognized if TYPE is `undecided': -[[ Doesn't GNU use \"detect-*\" for the following two? ]] +\[[ Doesn't GNU use \"detect-*\" for the following two? ]] `do-eol' Do EOL detection.
--- a/src/mule-ccl.c Sun Jul 22 21:53:08 2007 +0000 +++ b/src/mule-ccl.c Sun Jul 22 22:04:14 2007 +0000 @@ -29,15 +29,13 @@ #include "charset.h" #include "mule-ccl.h" #include "file-coding.h" +#include "elhash.h" Lisp_Object Qccl_error; /* This contains all code conversion map available to CCL. */ Lisp_Object Vcode_conversion_map_vector; -/* Alist of fontname patterns vs corresponding CCL program. */ -Lisp_Object Vfont_ccl_encoder_alist; - /* This symbol is a property which associates with ccl program vector. Ex: (get 'ccl-big5-encoder 'ccl-program) returns ccl program vector. Moved to general-slots.h. */ @@ -59,6 +57,15 @@ already resolved to index numbers or not. */ Lisp_Object Vccl_program_table; +/* Vector of registered hash tables for translation. */ +Lisp_Object Vtranslation_hash_table_vector; + +/* Return a hash table of id number ID. */ +#define GET_HASH_TABLE(id) \ + (XHASH_TABLE (XCDR(XVECTOR(Vtranslation_hash_table_vector)->contents[(id)]))) +/* Copied from fns.c. */ +#define HASH_VALUE(H, IDX) AREF ((H)->key_and_value, 2 * (IDX) + 1) + /* CCL (Code Conversion Language) is a simple language which has operations on one input buffer, one output buffer, and 7 registers. The syntax of CCL is described in `ccl.el'. Emacs Lisp function @@ -651,6 +658,19 @@ set reg[RRR] to -1. */ +#define CCL_LookupIntConstTbl 0x13 /* Lookup multibyte character by + integer key. Afterwards R7 set + to 1 iff lookup succeeded. + 1:ExtendedCOMMNDRrrRRRXXXXXXXX + 2:ARGUMENT(Hash table ID) */ + +#define CCL_LookupCharConstTbl 0x14 /* Lookup integer by multibyte + character key. Afterwards R7 set + to 1 iff lookup succeeded. + 1:ExtendedCOMMNDRrrRRRrrrXXXXX + 2:ARGUMENT(Hash table ID) */ + + /* CCL arithmetic/logical operators. */ #define CCL_PLUS 0x00 /* X = Y + Z */ #define CCL_MINUS 0x01 /* X = Y - Z */ @@ -773,7 +793,7 @@ { \ for (i = 0; i < (len); i++) \ { \ - ch = ((XINT (ccl_prog[ic + (i / 3)])) \ + ch = ((XCHAR_OR_INT (ccl_prog[ic + (i / 3)])) \ >> ((2 - (i % 3)) * 8)) & 0xFF; \ if (ch == '\n') \ { \ @@ -802,7 +822,7 @@ { \ for (i = 0; i < (len); i++) \ { \ - ch = ((XINT (ccl_prog[ic + (i / 3)])) \ + ch = ((XCHAR_OR_INT (ccl_prog[ic + (i / 3)])) \ >> ((2 - (i % 3)) * 8)) & 0xFF; \ if (!ichar_multibyte_p(ch)) \ { \ @@ -837,7 +857,7 @@ } while (0) #define POSSIBLE_LEADING_BYTE_P(leading_byte) \ - ((leading_byte > MIN_LEADING_BYTE) && \ + ((leading_byte >= MIN_LEADING_BYTE) && \ (leading_byte - MIN_LEADING_BYTE) < NUM_LEADING_BYTES) /* Set C to the character code made from CHARSET and CODE. This is @@ -864,7 +884,7 @@ } \ else if (!NILP(charset_by_leading_byte(charset)) \ && ((code) >= 32) \ - && ((code) < 256 || ((code >> 8) & 0x7F) >= 32)) \ + && ((code) < 256 || ((code >> 7) & 0x7F) >= 32)) \ { \ int c1, c2 = 0; \ \ @@ -875,7 +895,7 @@ } \ else \ { \ - c1 = ((code) >> 8) & 0x7F; \ + c1 = ((code) >> 7) & 0x7F; \ c2 = (code) & 0x7F; \ } \ c = make_ichar (charset_by_leading_byte(charset), \ @@ -898,7 +918,7 @@ #ifdef CCL_DEBUG #define CCL_DEBUG_BACKTRACE_LEN 256 -int ccl_backtrace_table[CCL_BACKTRACE_TABLE]; +int ccl_backtrace_table[CCL_DEBUG_BACKTRACE_LEN]; int ccl_backtrace_idx; #endif @@ -966,7 +986,7 @@ } this_ic = ic; - code = XINT (ccl_prog[ic]); ic++; + code = XCHAR_OR_INT (ccl_prog[ic]); ic++; field1 = code >> 8; field2 = (code & 0xFF) >> 5; @@ -987,7 +1007,7 @@ break; case CCL_SetConst: /* 00000000000000000000rrrXXXXX */ - reg[rrr] = XINT (ccl_prog[ic]); + reg[rrr] = XCHAR_OR_INT (ccl_prog[ic]); ic++; break; @@ -998,7 +1018,7 @@ but the left one was already there so clearly the intention was an unsigned comparison. --ben */ if ((unsigned int) i < (unsigned int) j) - reg[rrr] = XINT (ccl_prog[ic + i]); + reg[rrr] = XCHAR_OR_INT (ccl_prog[ic + i]); ic += j; break; @@ -1026,13 +1046,13 @@ break; case CCL_WriteConstJump: /* A--D--D--R--E--S--S-000XXXXX */ - i = XINT (ccl_prog[ic]); + i = XCHAR_OR_INT (ccl_prog[ic]); CCL_WRITE_CHAR (i); ic += ADDR; break; case CCL_WriteConstReadJump: /* A--D--D--R--E--S--S-rrrXXXXX */ - i = XINT (ccl_prog[ic]); + i = XCHAR_OR_INT (ccl_prog[ic]); CCL_WRITE_CHAR (i); ic++; CCL_READ_CHAR (reg[rrr]); @@ -1040,7 +1060,7 @@ break; case CCL_WriteStringJump: /* A--D--D--R--E--S--S-000XXXXX */ - j = XINT (ccl_prog[ic]); + j = XCHAR_OR_INT (ccl_prog[ic]); ic++; CCL_WRITE_STRING (j); ic += ADDR - 1; @@ -1048,11 +1068,11 @@ case CCL_WriteArrayReadJump: /* A--D--D--R--E--S--S-rrrXXXXX */ i = reg[rrr]; - j = XINT (ccl_prog[ic]); + j = XCHAR_OR_INT (ccl_prog[ic]); /* #### see comment at CCL_SetArray */ if ((unsigned int) i < (unsigned int) j) { - i = XINT (ccl_prog[ic + 1 + i]); + i = XCHAR_OR_INT (ccl_prog[ic + 1 + i]); CCL_WRITE_CHAR (i); } ic += j + 2; @@ -1071,9 +1091,9 @@ case CCL_Branch: /* CCCCCCCCCCCCCCCCCCCCrrrXXXXX */ /* #### see comment at CCL_SetArray */ if ((unsigned int) reg[rrr] < (unsigned int) field1) - ic += XINT (ccl_prog[ic + reg[rrr]]); + ic += XCHAR_OR_INT (ccl_prog[ic + reg[rrr]]); else - ic += XINT (ccl_prog[ic + field1]); + ic += XCHAR_OR_INT (ccl_prog[ic + field1]); break; case CCL_ReadRegister: /* CCCCCCCCCCCCCCCCCCCCrrXXXXX */ @@ -1081,7 +1101,7 @@ { CCL_READ_CHAR (reg[rrr]); if (!field1) break; - code = XINT (ccl_prog[ic]); ic++; + code = XCHAR_OR_INT (ccl_prog[ic]); ic++; field1 = code >> 8; field2 = (code & 0xFF) >> 5; } @@ -1090,7 +1110,7 @@ case CCL_WriteExprConst: /* 1:00000OPERATION000RRR000XXXXX */ rrr = 7; i = reg[RRR]; - j = XINT (ccl_prog[ic]); + j = XCHAR_OR_INT (ccl_prog[ic]); op = field1 >> 6; jump_address = ic + 1; goto ccl_set_expr; @@ -1101,7 +1121,7 @@ i = reg[rrr]; CCL_WRITE_CHAR (i); if (!field1) break; - code = XINT (ccl_prog[ic]); ic++; + code = XCHAR_OR_INT (ccl_prog[ic]); ic++; field1 = code >> 8; field2 = (code & 0xFF) >> 5; } @@ -1124,7 +1144,7 @@ following code. */ if (rrr) { - prog_id = XINT (ccl_prog[ic]); + prog_id = XCHAR_OR_INT (ccl_prog[ic]); ic++; } else @@ -1168,7 +1188,7 @@ /* #### see comment at CCL_SetArray */ if ((unsigned int) i < (unsigned int) field1) { - j = XINT (ccl_prog[ic + i]); + j = XCHAR_OR_INT (ccl_prog[ic + i]); CCL_WRITE_CHAR (j); } ic += field1; @@ -1190,7 +1210,7 @@ CCL_SUCCESS; case CCL_ExprSelfConst: /* 00000OPERATION000000rrrXXXXX */ - i = XINT (ccl_prog[ic]); + i = XCHAR_OR_INT (ccl_prog[ic]); ic++; op = field1 >> 6; goto ccl_expr_self; @@ -1227,7 +1247,7 @@ case CCL_SetExprConst: /* 00000OPERATION000RRRrrrXXXXX */ i = reg[RRR]; - j = XINT (ccl_prog[ic]); + j = XCHAR_OR_INT (ccl_prog[ic]); op = field1 >> 6; jump_address = ++ic; goto ccl_set_expr; @@ -1243,9 +1263,9 @@ CCL_READ_CHAR (reg[rrr]); case CCL_JumpCondExprConst: /* A--D--D--R--E--S--S-rrrXXXXX */ i = reg[rrr]; - op = XINT (ccl_prog[ic]); + op = XCHAR_OR_INT (ccl_prog[ic]); jump_address = ic++ + ADDR; - j = XINT (ccl_prog[ic]); + j = XCHAR_OR_INT (ccl_prog[ic]); ic++; rrr = 7; goto ccl_set_expr; @@ -1254,9 +1274,9 @@ CCL_READ_CHAR (reg[rrr]); case CCL_JumpCondExprReg: i = reg[rrr]; - op = XINT (ccl_prog[ic]); + op = XCHAR_OR_INT (ccl_prog[ic]); jump_address = ic++ + ADDR; - j = reg[XINT (ccl_prog[ic])]; + j = reg[XCHAR_OR_INT (ccl_prog[ic])]; ic++; rrr = 7; @@ -1358,7 +1378,7 @@ if ((src + 1) >= src_end) goto ccl_read_multibyte_character_suspend; reg[RRR] = *src++; - reg[rrr] = (*src++ & 0x7F); + reg[rrr] = (*src++ & 0xFF); } else if (i == PRE_LEADING_BYTE_PRIVATE_2) { @@ -1438,7 +1458,7 @@ #if 0 /* XEmacs does not have translate_char or an equivalent. We do nothing on this operation. */ - op = XINT (ccl_prog[ic]); /* table */ + op = XCHAR_OR_INT (ccl_prog[ic]); /* table */ ic++; CCL_MAKE_CHAR (reg[RRR], reg[rrr], i); op = translate_char (GET_TRANSLATION_TABLE (op), i, -1, 0, 0); @@ -1454,7 +1474,8 @@ { Lisp_Object ucs; - CCL_MAKE_CHAR(reg[rrr], reg[RRR], op); + CCL_MAKE_CHAR (reg[rrr], reg[RRR], op); + ucs = Fchar_to_unicode(make_char(op)); if (NILP(ucs)) @@ -1465,7 +1486,7 @@ } else { - reg[rrr] = XINT(ucs); + reg[rrr] = XCHAR_OR_INT(ucs); if (-1 == reg[rrr]) { reg[rrr] = 0xFFFD; /* REPLACEMENT CHARACTER */ @@ -1488,7 +1509,7 @@ if (j != 0) { - i = (i << 8) | j; + i = (i << 7) | j; } reg[rrr] = i; @@ -1500,12 +1521,65 @@ break; } + case CCL_LookupIntConstTbl: + op = XCHAR_OR_INT (ccl_prog[ic]); /* table */ + ic++; + { + struct Lisp_Hash_Table *h = GET_HASH_TABLE (op); + htentry *e = find_htentry(make_int (reg[RRR]), h); + Lisp_Object scratch; + + if (!HTENTRY_CLEAR_P(e)) + { + op = XCHARVAL (e->value); + if (!valid_ichar_p(op)) + { + CCL_INVALID_CMD; + } + + BREAKUP_ICHAR (op, scratch, i, j); + reg[RRR] = XCHARSET_ID(scratch); + + if (j != 0) + { + i = (i << 7) | j; + } + reg[rrr] = i; + reg[7] = 1; /* r7 true for success */ + } + else + reg[7] = 0; + } + break; + + case CCL_LookupCharConstTbl: + op = XCHAR_OR_INT (ccl_prog[ic]); /* table */ + ic++; + CCL_MAKE_CHAR (reg[RRR], reg[rrr], i); + { + struct Lisp_Hash_Table *h = GET_HASH_TABLE (op); + htentry *e = find_htentry(make_int(i), h); + + if (!HTENTRY_CLEAR_P(e)) + { + op = e->value; + if (!INTP (op)) + CCL_INVALID_CMD; + reg[RRR] = XCHAR_OR_INT (op); + reg[7] = 1; /* r7 true for success */ + } + else + reg[7] = 0; + } + break; + + case CCL_IterateMultipleMap: { Lisp_Object map, content, attrib, value; int point, size, fin_ic; - j = XINT (ccl_prog[ic++]); /* number of maps. */ + j = XCHAR_OR_INT (ccl_prog[ic++]); /* number of maps. */ fin_ic = ic + j; op = reg[rrr]; if ((j > reg[RRR]) && (j >= 0)) @@ -1523,7 +1597,7 @@ for (;i < j;i++) { size = XVECTOR (Vcode_conversion_map_vector)->size; - point = XINT (ccl_prog[ic++]); + point = XCHAR_OR_INT (ccl_prog[ic++]); if (point >= size) continue; map = XVECTOR (Vcode_conversion_map_vector)->contents[point]; @@ -1569,7 +1643,7 @@ else if (INTP (content)) { reg[RRR] = i; - reg[rrr] = XINT(content); + reg[rrr] = XCHAR_OR_INT(content); break; } else if (EQ (content, Qt) || EQ (content, Qlambda)) @@ -1620,7 +1694,7 @@ stack_idx_of_map_multiple = 0; map_set_rest_length = - XINT (ccl_prog[ic++]); /* number of maps and separators. */ + XCHAR_OR_INT (ccl_prog[ic++]); /* number of maps and separators. */ fin_ic = ic + map_set_rest_length; op = reg[rrr]; @@ -1688,7 +1762,7 @@ do { for (;map_set_rest_length > 0;i++, ic++, map_set_rest_length--) { - point = XINT(ccl_prog[ic]); + point = XCHAR_OR_INT(ccl_prog[ic]); if (point < 0) { /* +1 is for including separator. */ @@ -1749,7 +1823,7 @@ reg[RRR] = i; if (INTP (content)) { - op = XINT (content); + op = XCHAR_OR_INT (content); i += map_set_rest_length - 1; ic += map_set_rest_length - 1; POP_MAPPING_STACK (map_set_rest_length, reg[rrr]); @@ -1807,7 +1881,7 @@ { Lisp_Object map, attrib, value, content; int size, point; - j = XINT (ccl_prog[ic++]); /* map_id */ + j = XCHAR_OR_INT (ccl_prog[ic++]); /* map_id */ op = reg[rrr]; if (j >= XVECTOR (Vcode_conversion_map_vector)->size) { @@ -1840,7 +1914,7 @@ if (NILP (content)) reg[RRR] = -1; else if (INTP (content)) - reg[rrr] = XINT (content); + reg[rrr] = XCHAR_OR_INT (content); else if (EQ (content, Qt)); else if (CONSP (content)) { @@ -1943,7 +2017,9 @@ for (i = 0; i < veclen; i++) { contents = XVECTOR (result)->contents[i]; - if (INTP (contents)) + /* XEmacs change; accept characters as well as integers, on the basis + that most CCL code written doesn't make a distinction. */ + if (INTP (contents) || CHARP(contents)) continue; else if (CONSP (contents) && SYMBOLP (XCAR (contents)) @@ -2107,8 +2183,8 @@ syntax_error ("Length of vector REGISTERS is not 8", Qunbound); for (i = 0; i < 8; i++) - ccl.reg[i] = (INTP (XVECTOR_DATA (reg)[i]) - ? XINT (XVECTOR_DATA (reg)[i]) + ccl.reg[i] = (INTP (XVECTOR_DATA (reg)[i]) || CHARP (XVECTOR_DATA (reg)[i]) + ? XCHAR_OR_INT (XVECTOR_DATA (reg)[i]) : 0); ccl_driver (&ccl, (const unsigned char *)0, @@ -2172,10 +2248,13 @@ XVECTOR_DATA (status)[i] = make_int (0); if (INTP (XVECTOR_DATA (status)[i])) ccl.reg[i] = XINT (XVECTOR_DATA (status)[i]); + if (CHARP (XVECTOR_DATA (status)[i])) + ccl.reg[i] = XCHAR (XVECTOR_DATA (status)[i]); } - if (INTP (XVECTOR (status)->contents[i])) + if (INTP (XVECTOR (status)->contents[i]) || + CHARP (XVECTOR (status)->contents[i])) { - i = XINT (XVECTOR_DATA (status)[8]); + i = XCHAR_OR_INT (XVECTOR_DATA (status)[8]); if (ccl.ic < i && i < ccl.size) ccl.ic = i; } @@ -2347,9 +2426,16 @@ void vars_of_mule_ccl (void) { + staticpro (&Vccl_program_table); Vccl_program_table = Fmake_vector (make_int (32), Qnil); +#ifdef DEBUG_XEMACS + DEFVAR_LISP ("ccl-program-table", + &Vccl_program_table /* +Vector containing all registered CCL programs. +*/ ); +#endif DEFSYMBOL (Qccl_program); DEFSYMBOL (Qccl_program_idx); DEFSYMBOL (Qcode_conversion_map); @@ -2360,19 +2446,15 @@ */ ); Vcode_conversion_map_vector = Fmake_vector (make_int (16), Qnil); - DEFVAR_LISP ("font-ccl-encoder-alist", &Vfont_ccl_encoder_alist /* -Alist of fontname patterns vs corresponding CCL program. -Each element looks like (REGEXP . CCL-CODE), - where CCL-CODE is a compiled CCL program. -When a font whose name matches REGEXP is used for displaying a character, - CCL-CODE is executed to calculate the code point in the font - from the charset number and position code(s) of the character which are set - in CCL registers R0, R1, and R2 before the execution. -The code point in the font is set in CCL registers R1 and R2 - when the execution terminated. -If the font is single-byte font, the register R2 is not used. + DEFVAR_LISP ("translation-hash-table-vector", + &Vtranslation_hash_table_vector /* +Vector containing all translation hash tables ever defined. +Comprises pairs (SYMBOL . TABLE) where SYMBOL and TABLE were set up by calls +to `define-translation-hash-table'. The vector is indexed by the table id +used by CCL. */ ); - Vfont_ccl_encoder_alist = Qnil; + Vtranslation_hash_table_vector = Qnil; + } #endif /* emacs */