comparison lisp/mule/china-util.el @ 771:943eaba38521

[xemacs-hg @ 2002-03-13 08:51:24 by ben] The big ben-mule-21-5 check-in! Various files were added and deleted. See CHANGES-ben-mule. There are still some test suite failures. No crashes, though. Many of the failures have to do with problems in the test suite itself rather than in the actual code. I'll be addressing these in the next day or so -- none of the test suite failures are at all critical. Meanwhile I'll be trying to address the biggest issues -- i.e. build or run failures, which will almost certainly happen on various platforms. All comments should be sent to ben@xemacs.org -- use a Cc: if necessary when sending to mailing lists. There will be pre- and post- tags, something like pre-ben-mule-21-5-merge-in, and post-ben-mule-21-5-merge-in.
author ben
date Wed, 13 Mar 2002 08:54:06 +0000
parents
children 2923009caf47
comparison
equal deleted inserted replaced
770:336a418893b5 771:943eaba38521
1 ;;; china-util.el --- utilities for Chinese -*- coding: iso-2022-7bit; -*-
2
3 ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
4 ;; Licensed to the Free Software Foundation.
5
6 ;; Keywords: mule, multilingual, Chinese
7
8 ;; This file is part of XEmacs.
9
10 ;; XEmacs is free software; you can redistribute it and/or modify it
11 ;; under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 2, or (at your option)
13 ;; any later version.
14
15 ;; XEmacs is distributed in the hope that it will be useful, but
16 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ;; General Public License for more details.
19
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with XEmacs; see the file COPYING. If not, write to the Free
22 ;; Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
23 ;; 02111-1307, USA.
24
25 ;;; Synched up with: Emacs 21.0.103 (language/china-util.el).
26
27 ;;; Code:
28
29 ;; Hz/ZW encoding stuffs
30
31 ;; HZ is an encoding method for Chinese character set GB2312 used
32 ;; widely in Internet. It is very similar to 7-bit environment of
33 ;; ISO-2022. The difference is that HZ uses the sequence "~{" and
34 ;; "~}" for designating GB2312 and ASCII respectively, hence, it
35 ;; doesn't uses ESC (0x1B) code.
36
37 ;; ZW is another encoding method for Chinese character set GB2312. It
38 ;; encodes Chinese characters line by line by starting each line with
39 ;; the sequence "zW". It also uses only 7-bit as HZ.
40
41 ;; ISO-2022 escape sequence to designate GB2312.
42 (defvar iso2022-gb-designation "\e$A")
43 ;; HZ escape sequence to designate GB2312.
44 (defvar hz-gb-designnation "~{")
45 ;; ISO-2022 escape sequence to designate ASCII.
46 (defvar iso2022-ascii-designation "\e(B")
47 ;; HZ escape sequence to designate ASCII.
48 (defvar hz-ascii-designnation "~}")
49 ;; Regexp of ZW sequence to start GB2312.
50 (defvar zw-start-gb "^zW")
51 ;; Regexp for start of GB2312 in an encoding mixture of HZ and ZW.
52 (defvar hz/zw-start-gb
53 (concat hz-gb-designnation "\\|" zw-start-gb "\\|[^\0-\177]"))
54
55 (defvar decode-hz-line-continuation nil
56 "Flag to tell if we should care line continuation convention of Hz.")
57
58 (defconst hz-set-msb-table
59 (let ((str (make-string 127 0))
60 (i 0))
61 (while (< i 33)
62 (aset str i i)
63 (setq i (1+ i)))
64 (while (< i 127)
65 (aset str i (+ i 128))
66 (setq i (1+ i)))
67 str))
68
69 ;;;###autoload
70 (defun decode-hz-region (beg end)
71 "Decode HZ/ZW encoded text in the current region.
72 Return the length of resulting text."
73 (interactive "r")
74 (save-excursion
75 (save-restriction
76 (let (pos ch)
77 (narrow-to-region beg end)
78
79 ;; We, at first, convert HZ/ZW to `euc-china',
80 ;; then decode it.
81
82 ;; "~\n" -> "\n", "~~" -> "~"
83 (goto-char (point-min))
84 (while (search-forward "~" nil t)
85 (setq ch (following-char))
86 (if (or (= ch ?\n) (= ch ?~)) (delete-char -1)))
87
88 ;; "^zW...\n" -> Chinese GB2312
89 ;; "~{...~}" -> Chinese GB2312
90 (goto-char (point-min))
91 (setq beg nil)
92 (while (re-search-forward hz/zw-start-gb nil t)
93 (setq pos (match-beginning 0)
94 ch (char-after pos))
95 ;; Record the first position to start conversion.
96 (or beg (setq beg pos))
97 (end-of-line)
98 (setq end (point))
99 (if (>= ch 128) ; 8bit GB2312
100 nil
101 (goto-char pos)
102 (delete-char 2)
103 (setq end (- end 2))
104 (if (= ch ?z) ; ZW -> euc-china
105 (progn
106 (translate-region (point) end hz-set-msb-table)
107 (goto-char end))
108 (if (search-forward hz-ascii-designnation
109 (if decode-hz-line-continuation nil end)
110 t)
111 (delete-char -2))
112 (setq end (point))
113 (translate-region pos (point) hz-set-msb-table))))
114 (if beg
115 (decode-coding-region beg end 'euc-china)))
116 (- (point-max) (point-min)))))
117
118 ;;;###autoload
119 (defun decode-hz-buffer ()
120 "Decode HZ/ZW encoded text in the current buffer."
121 (interactive)
122 (decode-hz-region (point-min) (point-max)))
123
124 ;;;###autoload
125 (defun encode-hz-region (beg end)
126 "Encode the text in the current region to HZ.
127 Return the length of resulting text."
128 (interactive "r")
129 (save-excursion
130 (save-restriction
131 (narrow-to-region beg end)
132
133 ;; "~" -> "~~"
134 (goto-char (point-min))
135 (while (search-forward "~" nil t) (insert ?~))
136
137 ;; Chinese GB2312 -> "~{...~}"
138 (goto-char (point-min))
139 (if (re-search-forward "\\cc" nil t)
140 (let (pos)
141 (goto-char (setq pos (match-beginning 0)))
142 (encode-coding-region pos (point-max) 'iso-2022-7bit)
143 (goto-char pos)
144 (while (search-forward iso2022-gb-designation nil t)
145 (delete-char -3)
146 (insert hz-gb-designnation))
147 (goto-char pos)
148 (while (search-forward iso2022-ascii-designation nil t)
149 (delete-char -3)
150 (insert hz-ascii-designnation))))
151 (- (point-max) (point-min)))))
152
153 ;;;###autoload
154 (defun encode-hz-buffer ()
155 "Encode the text in the current buffer to HZ."
156 (interactive)
157 (encode-hz-region (point-min) (point-max)))
158
159 ;;
160 (provide 'china-util)
161
162 ;;; china-util.el ends here