Mercurial > hg > xemacs-beta
view lisp/mule/viet-util.el @ 771:943eaba38521
[xemacs-hg @ 2002-03-13 08:51:24 by ben]
The big ben-mule-21-5 check-in!
Various files were added and deleted. See CHANGES-ben-mule.
There are still some test suite failures. No crashes, though.
Many of the failures have to do with problems in the test suite itself
rather than in the actual code. I'll be addressing these in the next
day or so -- none of the test suite failures are at all critical.
Meanwhile I'll be trying to address the biggest issues -- i.e. build
or run failures, which will almost certainly happen on various platforms.
All comments should be sent to ben@xemacs.org -- use a Cc: if necessary
when sending to mailing lists. There will be pre- and post- tags,
something like
pre-ben-mule-21-5-merge-in, and
post-ben-mule-21-5-merge-in.
author | ben |
---|---|
date | Wed, 13 Mar 2002 08:54:06 +0000 |
parents | |
children | 2923009caf47 |
line wrap: on
line source
;;; viet-util.el --- utilities for Vietnamese -*- coding: iso-2022-7bit; -*- ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. ;; Licensed to the Free Software Foundation. ;; Keywords: mule, multilingual, Vietnamese ;; This file is part of XEmacs. ;; XEmacs is free software; you can redistribute it and/or modify it ;; under the terms of the GNU General Public License as published by ;; the Free Software Foundation; either version 2, or (at your option) ;; any later version. ;; XEmacs is distributed in the hope that it will be useful, but ;; WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;; General Public License for more details. ;; You should have received a copy of the GNU General Public License ;; along with XEmacs; see the file COPYING. If not, write to the Free ;; Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ;; 02111-1307, USA. ;;; Synched up with: Emacs 21.0.103 (language/viet-util.el). ;;; Commentary: ;; Vietnamese uses ASCII characters and additional 134 unique ;; characters (these are Latin alphabets with various diacritical and ;; tone marks). As far as I know, Vietnamese now has 4 different ways ;; for representing these characters: VISCII, VSCII, VIQR, and ;; Unicode. VISCII and VSCII are simple 1-byte code which assigns 134 ;; unique characters in control-code area (0x00..0x1F) and right half ;; area (0x80..0xFF). VIQR is a menmonic encoding specification ;; representing diacritical marks by following ASCII characters. ;;; Code: ;;;###autoload (defun viet-encode-viscii-char (char) "Return VISCII character code of CHAR if appropriate." (aref (char-table-extra-slot viet-viscii-nonascii-translation-table 0) char)) ;; VIQR is a menmonic encoding specification for Vietnamese. ;; It represents diacritical marks by ASCII characters as follows: ;; ------------+----------+-------- ;; mark | mnemonic | example ;; ------------+----------+--------- ;; breve | ( | a( -> ,1e(B ;; circumflex | ^ | a^ -> ,1b(B ;; horn | + | o+ -> ,1=(B ;; ------------+----------+--------- ;; acute | ' | a' -> ,1a(B ;; grave | ` | a` -> ,1`(B ;; hook above | ? | a? -> ,1d(B ;; tilde | ~ | a~ -> ,1c(B ;; dot below | . | a. -> ,1U(B ;; ------------+----------+--------- ;; d bar | dd | dd -> ,1p(B ;; ------------+----------+--------- (defvar viet-viqr-alist '(;; lowercase (?,1!(B . "a('") ; 161 (?,1"(B . "a(`") ; 162 (?,1#(B . "a(.") ; 163 (?,1$(B . "a^'") ; 164 (?,1%(B . "a^`") ; 165 (?,1&(B . "a^?") ; 166 (?,1'(B . "a^.") ; 167 (?,1((B . "e~") ; 168 (?,1)(B . "e.") ; 169 (?,1*(B . "e^'") ; 170 (?,1+(B . "e^`") ; 171 (?,1,(B . "e^?") ; 172 (?,1-(B . "e^~") ; 173 (?,1.(B . "e^.") ; 174 (?,1/(B . "o^'") ; 175 (?,10(B . "o^`") ; 176 (?,11(B . "o^?") ; 177 (?,12(B . "o^~") ; 178 (?,15(B . "o^.") ; 181 (?,16(B . "o+`") ; 182 (?,17(B . "o+?") ; 183 (?,18(B . "i.") ; 184 (?,1=(B . "o+") ; 189 (?,1>(B . "o+'") ; 190 (?,1F(B . "a(?") ; 198 (?,1G(B . "a(~") ; 199 (?,1O(B . "y`") ; 207 (?,1Q(B . "u+'") ; 209 (?,1U(B . "a.") ; 213 (?,1V(B . "y?") ; 214 (?,1W(B . "u+`") ; 215 (?,1X(B . "u+?") ; 216 (?,1[(B . "y~") ; 219 (?,1\(B . "y.") ; 220 (?,1^(B . "o+~") ; 222 (?,1_(B . "u+") ; 223 (?,1`(B . "a`") ; 224 (?,1a(B . "a'") ; 225 (?,1b(B . "a^") ; 226 (?,1c(B . "a~") ; 227 (?,1d(B . "a?") ; 228 (?,1e(B . "a(") ; 229 (?,1f(B . "u+~") ; 230 (?,1g(B . "a^~") ; 231 (?,1h(B . "e`") ; 232 (?,1i(B . "e'") ; 233 (?,1j(B . "e^") ; 234 (?,1k(B . "e?") ; 235 (?,1l(B . "i`") ; 236 (?,1m(B . "i'") ; 237 (?,1n(B . "i~") ; 238 (?,1o(B . "i?") ; 239 (?,1p(B . "dd") ; 240 (?,1q(B . "u+.") ; 241 (?,1r(B . "o`") ; 242 (?,1s(B . "o'") ; 243 (?,1t(B . "o^") ; 244 (?,1u(B . "o~") ; 245 (?,1v(B . "o?") ; 246 (?,1w(B . "o.") ; 247 (?,1x(B . "u.") ; 248 (?,1y(B . "u`") ; 249 (?,1z(B . "u'") ; 250 (?,1{(B . "u~") ; 251 (?,1|(B . "u?") ; 252 (?,1}(B . "y'") ; 253 (?,1~(B . "o+.") ; 254 ;; upper case (?,2!(B . "A('") ; 161 (?,2"(B . "A(`") ; 162 (?,2#(B . "A(.") ; 163 (?,2$(B . "A^'") ; 164 (?,2%(B . "A^`") ; 165 (?,2&(B . "A^?") ; 166 (?,2'(B . "A^.") ; 167 (?,2((B . "E~") ; 168 (?,2)(B . "E.") ; 169 (?,2*(B . "E^'") ; 170 (?,2+(B . "E^`") ; 171 (?,2,(B . "E^?") ; 172 (?,2-(B . "E^~") ; 173 (?,2.(B . "E^.") ; 174 (?,2/(B . "O^'") ; 175 (?,20(B . "O^`") ; 176 (?,21(B . "O^?") ; 177 (?,22(B . "O^~") ; 178 (?,25(B . "O^.") ; 181 (?,26(B . "O+`") ; 182 (?,27(B . "O+?") ; 183 (?,28(B . "I.") ; 184 (?,2=(B . "O+") ; 189 (?,2>(B . "O+'") ; 190 (?,2F(B . "A(?") ; 198 (?,2G(B . "A(~") ; 199 (?,2O(B . "Y`") ; 207 (?,2Q(B . "U+'") ; 209 (?,2U(B . "A.") ; 213 (?,2V(B . "Y?") ; 214 (?,2W(B . "U+`") ; 215 (?,2X(B . "U+?") ; 216 (?,2[(B . "Y~") ; 219 (?,2\(B . "Y.") ; 220 (?,2^(B . "O+~") ; 222 (?,2_(B . "U+") ; 223 (?,2`(B . "A`") ; 224 (?,2a(B . "A'") ; 225 (?,2b(B . "A^") ; 226 (?,2c(B . "A~") ; 227 (?,2d(B . "A?") ; 228 (?,2e(B . "A(") ; 229 (?,2f(B . "U+~") ; 230 (?,2g(B . "A^~") ; 231 (?,2h(B . "E`") ; 232 (?,2i(B . "E'") ; 233 (?,2j(B . "E^") ; 234 (?,2k(B . "E?") ; 235 (?,2l(B . "I`") ; 236 (?,2m(B . "I'") ; 237 (?,2n(B . "I~") ; 238 (?,2o(B . "I?") ; 239 (?,2p(B . "DD") ; 240 (?,2p(B . "dD") ; 240 (?,2p(B . "Dd") ; 240 (?,2q(B . "U+.") ; 241 (?,2r(B . "O`") ; 242 (?,2s(B . "O'") ; 243 (?,2t(B . "O^") ; 244 (?,2u(B . "O~") ; 245 (?,2v(B . "O?") ; 246 (?,2w(B . "O.") ; 247 (?,2x(B . "U.") ; 248 (?,2y(B . "U`") ; 249 (?,2z(B . "U'") ; 250 (?,2{(B . "U~") ; 251 (?,2|(B . "U?") ; 252 (?,2}(B . "Y'") ; 253 (?,2~(B . "O+.") ; 254 ;; escape from composition (?\( . "\\(") ; breve (left parenthesis) (?^ . "\\^") ; circumflex (caret) (?+ . "\\+") ; horn (plus sign) (?' . "\\'") ; acute (apostrophe) (?` . "\\`") ; grave (backquote) (?? . "\\?") ; hook above (question mark) (?~ . "\\~") ; tilde (tilde) (?. . "\\.") ; dot below (period) (?d . "\\d") ; d-bar (d) (?\\ . "\\\\") ; literal backslash ) "Alist of Vietnamese characters vs corresponding `VIQR' string.") ;; Regular expression matching single Vietnamese character represented ;; by VIQR. (defconst viqr-regexp "[aeiouyAEIOUY]\\([(^+]?['`?~.]\\|[(^+]\\)\\|[Dd][Dd]") ;;;###autoload (defun viet-decode-viqr-region (from to) "Convert `VIQR' mnemonics of the current region to Vietnamese characaters. When called from a program, expects two arguments, positions (integers or markers) specifying the stretch of the region." (interactive "r") (save-restriction (narrow-to-region from to) (goto-char (point-min)) (while (re-search-forward viqr-regexp nil t) (let* ((viqr (buffer-substring (match-beginning 0) (match-end 0))) (ch (car (rassoc viqr viet-viqr-alist)))) (if ch (progn (delete-region (match-beginning 0) (match-end 0)) (insert ch))))))) ;;;###autoload (defun viet-decode-viqr-buffer () "Convert `VIQR' mnemonics of the current buffer to Vietnamese characaters." (interactive) (viet-decode-viqr-region (point-min) (point-max))) ;;;###autoload (defun viet-encode-viqr-region (from to) "Convert Vietnamese characaters of the current region to `VIQR' mnemonics. When called from a program, expects two arguments, positions (integers or markers) specifying the stretch of the region." (interactive "r") (save-restriction (narrow-to-region from to) (goto-char (point-min)) (while (re-search-forward "\\cv" nil t) (let* ((ch (preceding-char)) (viqr (cdr (assq ch viet-viqr-alist)))) (if viqr (progn (delete-char -1) (insert viqr))))))) ;;;###autoload (defun viet-encode-viqr-buffer () "Convert Vietnamese characaters of the current buffer to `VIQR' mnemonics." (interactive) (viet-encode-viqr-region (point-min) (point-max))) ;;;###autoload (defun viqr-post-read-conversion (len) (save-excursion (save-restriction (narrow-to-region (point) (+ (point) len)) (let ((buffer-modified-p (buffer-modified-p))) (viet-decode-viqr-region (point-min) (point-max)) (set-buffer-modified-p buffer-modified-p) (- (point-max) (point-min)))))) ;;;###autoload (defun viqr-pre-write-conversion (from to) (let ((old-buf (current-buffer))) (set-buffer (generate-new-buffer " *temp*")) (if (stringp from) (insert from) (insert-buffer-substring old-buf from to)) (viet-encode-viqr-region (point-min) (point-max)) ;; Should return nil as annotations. nil)) ;;; (provide 'viet-util) ;;; viet-util.el ends here