Mercurial > hg > xemacs-beta
comparison lisp/mule/mule-category.el @ 428:3ecd8885ac67 r21-2-22
Import from CVS: tag r21-2-22
author | cvs |
---|---|
date | Mon, 13 Aug 2007 11:28:15 +0200 |
parents | |
children | 9d177e8d4150 |
comparison
equal
deleted
inserted
replaced
427:0a0253eac470 | 428:3ecd8885ac67 |
---|---|
1 ;;; mule-category.el --- category functions for XEmacs/Mule. | |
2 | |
3 ;; Copyright (C) 1992,93,94,95 Free Software Foundation, Inc. | |
4 ;; Copyright (C) 1995, 1997, 1999 Electrotechnical Laboratory, JAPAN. | |
5 ;; Licensed to the Free Software Foundation. | |
6 ;; Copyright (C) 1995 Amdahl Corporation. | |
7 ;; Copyright (C) 1995 Sun Microsystems. | |
8 | |
9 ;; This file is part of XEmacs. | |
10 | |
11 ;; XEmacs is free software; you can redistribute it and/or modify it | |
12 ;; under the terms of the GNU General Public License as published by | |
13 ;; the Free Software Foundation; either version 2, or (at your option) | |
14 ;; any later version. | |
15 | |
16 ;; XEmacs is distributed in the hope that it will be useful, but | |
17 ;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
19 ;; General Public License for more details. | |
20 | |
21 ;; You should have received a copy of the GNU General Public License | |
22 ;; along with XEmacs; see the file COPYING. If not, write to the | |
23 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
24 ;; Boston, MA 02111-1307, USA. | |
25 | |
26 ;;; Commentary: | |
27 | |
28 ;; Functions for working with category tables, which are a particular | |
29 ;; type of char table. Some function names / arguments should be | |
30 ;; parallel with syntax tables. | |
31 | |
32 ;; Written by Ben Wing <ben@xemacs.org>. The initialization code | |
33 ;; at the end of this file comes from Mule. | |
34 ;; Some bugfixes by Jareth Hein <jhod@po.iijnet.or.jp> | |
35 | |
36 ;;; Code: | |
37 | |
38 (defvar defined-category-hashtable (make-hashtable 50)) | |
39 | |
40 (defun define-category (designator doc-string) | |
41 "Make a new category whose designator is DESIGNATOR. | |
42 DESIGNATOR should be a visible letter of ' ' thru '~'. | |
43 STRING is a doc string for the category. | |
44 Letters of 'a' thru 'z' are already used or kept for the system." | |
45 (check-argument-type 'category-designator-p designator) | |
46 (check-argument-type 'stringp doc-string) | |
47 (puthash designator doc-string defined-category-hashtable)) | |
48 | |
49 (defun undefine-category (designator) | |
50 "Undefine DESIGNATOR as a designator for a category." | |
51 (check-argument-type 'category-designator-p designator) | |
52 (remhash designator defined-category-hashtable)) | |
53 | |
54 (defun defined-category-p (designator) | |
55 "Return non-nil if DESIGNATOR is a designator for a defined category." | |
56 (and (category-designator-p designator) | |
57 (gethash designator defined-category-hashtable))) | |
58 | |
59 (defun defined-category-list () | |
60 "Return a list of the currently defined categories. | |
61 Categories are given by their designators." | |
62 (let (list) | |
63 (maphash #'(lambda (key value) | |
64 (setq list (cons key list))) | |
65 defined-category-hashtable) | |
66 (nreverse list))) | |
67 | |
68 (defun undefined-category-designator () | |
69 "Return an undefined category designator, or nil if there are none." | |
70 (let ((a 32) found) | |
71 (while (and (< a 127) (not found)) | |
72 (if (gethash a defined-category-hashtable) | |
73 (setq found a)) | |
74 (setq a (1+ a))) | |
75 found)) | |
76 | |
77 (defun category-doc-string (designator) | |
78 "Return the doc-string for the category denoted by DESIGNATOR." | |
79 (check-argument-type 'defined-category-p designator) | |
80 (gethash designator defined-category-hashtable)) | |
81 | |
82 (defun modify-category-entry (char-range designator &optional table reset) | |
83 "Add a category to the categories associated with CHAR-RANGE. | |
84 CHAR-RANGE is a single character or a range of characters, | |
85 as per `put-char-table'. | |
86 The category is given by a designator character. | |
87 The changes are made in TABLE, which defaults to the current buffer's | |
88 category table. | |
89 If optional fourth argument RESET is non-nil, previous categories associated | |
90 with CHAR-RANGE are removed before adding the specified category." | |
91 (or table (setq table (category-table))) | |
92 (check-argument-type 'category-table-p table) | |
93 (check-argument-type 'defined-category-p designator) | |
94 (if reset | |
95 ;; clear all existing stuff. | |
96 (put-char-table char-range nil table)) | |
97 (map-char-table | |
98 #'(lambda (key value) | |
99 ;; make sure that this range has a bit-vector assigned to it | |
100 (if (not (bit-vector-p value)) | |
101 (setq value (make-bit-vector 95 0)) | |
102 (setq value (copy-sequence value))) | |
103 ;; set the appropriate bit in that vector. | |
104 (aset value (- designator 32) 1) | |
105 ;; put the vector back, thus assuring we have a unique setting for this range | |
106 (put-char-table key value table)) | |
107 table char-range)) | |
108 | |
109 (defun char-category-list (char &optional table) | |
110 "Return a list of the categories that CHAR is in. | |
111 TABLE defaults to the current buffer's category table. | |
112 The categories are given by their designators." | |
113 (or table (setq table (category-table))) | |
114 (check-argument-type 'category-table-p table) | |
115 (let ((vec (get-char-table char table))) | |
116 (if (null vec) nil | |
117 (let ((a 32) list) | |
118 (while (< a 127) | |
119 (if (= 1 (aref vec (- a 32))) | |
120 (setq list (cons a list))) | |
121 (setq a (1+ a))) | |
122 (nreverse list))))) | |
123 | |
124 ;; implimented in c, file chartab.c (97/3/14 jhod@po.iijnet.or.jp) | |
125 ;(defun char-in-category-p (char category &optional table) | |
126 ; "Return non-nil if CHAR is in CATEGORY. | |
127 ;TABLE defaults to the current buffer's category table. | |
128 ;Categories are specified by their designators." | |
129 ; (or table (setq table (category-table))) | |
130 ; (check-argument-type 'category-table-p table) | |
131 ; (check-argument-type 'category-designator-p category) | |
132 ; (let ((vec (get-char-table char table))) | |
133 ; (if (null vec) nil | |
134 ; (= 1 (aref vec (- category 32)))))) | |
135 | |
136 (defun describe-category () | |
137 "Describe the category specifications in the category table. | |
138 The descriptions are inserted in a buffer, which is then displayed." | |
139 (interactive) | |
140 (with-output-to-temp-buffer "*Help*" | |
141 (describe-category-table (category-table) standard-output))) | |
142 | |
143 (defun describe-category-table (table stream) | |
144 (let (first-char | |
145 last-char | |
146 prev-val | |
147 (describe-one | |
148 (lambda (first last value stream) | |
149 (if (and (bit-vector-p value) | |
150 (> (reduce '+ value) 0)) | |
151 (progn | |
152 (if (equal first last) | |
153 (cond ((vectorp first) | |
154 (princ (format "%s, row %d" | |
155 (charset-name | |
156 (aref first 0)) | |
157 (aref first 1)) | |
158 stream)) | |
159 ((charsetp first) | |
160 (princ (charset-name first) stream)) | |
161 (t (princ first stream))) | |
162 (cond ((vectorp first) | |
163 (princ (format "%s, rows %d .. %d" | |
164 (charset-name | |
165 (aref first 0)) | |
166 (aref first 1) | |
167 (aref last 1)) | |
168 stream)) | |
169 (t | |
170 (princ (format "%s .. %s" first last) | |
171 stream)))) | |
172 (describe-category-code value stream)))))) | |
173 (map-char-table | |
174 (lambda (range value) | |
175 (if (and (or | |
176 (and (characterp range) | |
177 (characterp first-char) | |
178 (eq (char-charset range) (char-charset first-char)) | |
179 (= (char-to-int last-char) (1- (char-to-int range)))) | |
180 (and (vectorp range) | |
181 (vectorp first-char) | |
182 (eq (aref range 0) (aref first-char 0)) | |
183 (= (aref last-char 1) (1- (aref range 1)))) | |
184 (equal value prev-val))) | |
185 (setq last-char range) | |
186 (if first-char | |
187 (progn | |
188 (funcall describe-one first-char last-char prev-val stream) | |
189 (setq first-char nil))) | |
190 (funcall describe-one range range value stream)) | |
191 nil) | |
192 table) | |
193 (if first-char | |
194 (funcall describe-one first-char last-char prev-val stream)))) | |
195 | |
196 (defun describe-category-code (code stream) | |
197 (let ((standard-output (or stream standard-output))) | |
198 (princ "\tin categories: ") | |
199 (if (not (bit-vector-p code)) | |
200 (princ "(none)") | |
201 (let ((i 0) | |
202 already-matched) | |
203 (while (< i 95) | |
204 (if (= 1 (aref code i)) | |
205 (progn | |
206 (if (not already-matched) | |
207 (setq already-matched t) | |
208 (princ " ")) | |
209 (princ (int-to-char (+ 32 i))))) | |
210 (setq i (1+ i))) | |
211 (if (not already-matched) | |
212 (princ "(none)"))) | |
213 (let ((i 0)) | |
214 (while (< i 95) | |
215 (if (= 1 (aref code i)) | |
216 (princ (format "\n\t\tmeaning: %s" | |
217 (category-doc-string (int-to-char (+ 32 i)))))) | |
218 (setq i (1+ i))))) | |
219 (terpri))) | |
220 | |
221 (defconst predefined-category-list | |
222 '((latin-iso8859-1 ?l "Latin-1 through Latin-5 character set") | |
223 (latin-iso8859-2 ?l) | |
224 (latin-iso8859-3 ?l) | |
225 (latin-iso8859-4 ?l) | |
226 (latin-iso8859-9 ?l) | |
227 (cyrillic-iso8859-5 ?y "Cyrillic character set") | |
228 (arabic-iso8859-6 ?b "Arabic character set") | |
229 (greek-iso8859-7 ?g "Greek character set") | |
230 (hebrew-iso8859-8 ?w "Hebrew character set") | |
231 (katakana-jisx0201 ?k "Japanese 1-byte Katakana character set") | |
232 (latin-jisx0201 ?r "Japanese 1-byte Roman character set") | |
233 (japanese-jisx0208-1978 ?j "Japanese 2-byte character set (old)") | |
234 (japanese-jisx0208 ?j "Japanese 2-byte character set") | |
235 (japanese-jisx0212 ?j) | |
236 (chinese-gb2312 ?c "Chinese GB (China, PRC) 2-byte character set") | |
237 (chinese-cns11643-1 ?t "Chinese Taiwan (CNS or Big5) 2-byte character set") | |
238 (chinese-cns11643-2 ?t) | |
239 (chinese-big5-1 ?t) | |
240 (chinese-big5-2 ?t) | |
241 (korean-ksc5601 ?h "Hangul (Korean) 2-byte character set") | |
242 ) | |
243 "List of predefined categories. | |
244 Each element is a list of a charset, a designator, and maybe a doc string.") | |
245 | |
246 (let (i l) | |
247 (define-category ?a "ASCII character set.") | |
248 (define-category ?l "Latin-1 through Latin-5 character set") | |
249 (setq i 32) | |
250 (while (< i 127) | |
251 (modify-category-entry i ?a) | |
252 (modify-category-entry i ?l) | |
253 (setq i (1+ i))) | |
254 (setq l predefined-category-list) | |
255 (while l | |
256 (if (and (nth 2 (car l)) | |
257 (not (defined-category-p (nth 2 (car l))))) | |
258 (define-category (nth 1 (car l)) (nth 2 (car l)))) | |
259 (modify-category-entry (car (car l)) (nth 1 (car l))) | |
260 (setq l (cdr l)))) | |
261 | |
262 ;;; Setting word boundary. | |
263 | |
264 (setq word-combining-categories | |
265 '((?l . ?l))) | |
266 | |
267 (setq word-separating-categories ; (2-byte character sets) | |
268 '((?A . ?K) ; Alpha numeric - Katakana | |
269 (?A . ?C) ; Alpha numeric - Chinese | |
270 (?H . ?A) ; Hiragana - Alpha numeric | |
271 (?H . ?K) ; Hiragana - Katakana | |
272 (?H . ?C) ; Hiragana - Chinese | |
273 (?K . ?A) ; Katakana - Alpha numeric | |
274 (?K . ?C) ; Katakana - Chinese | |
275 (?C . ?A) ; Chinese - Alpha numeric | |
276 (?C . ?K) ; Chinese - Katakana | |
277 )) | |
278 | |
279 ;;; At the present, I know Japanese and Chinese text can | |
280 ;;; break line at any point under a restriction of 'kinsoku'. | |
281 (defvar word-across-newline "\\(\\cj\\|\\cc\\|\\ct\\)" | |
282 "Regular expression of such characters which can be a word across newline.") | |
283 | |
284 (defvar ascii-char "[\40-\176]") | |
285 (defvar ascii-space "[ \t]") | |
286 (defvar ascii-symbols "[\40-\57\72-\100\133-\140\173-\176]") | |
287 (defvar ascii-numeric "[\60-\71]") | |
288 (defvar ascii-English-Upper "[\101-\132]") | |
289 (defvar ascii-English-Lower "[\141-\172]") | |
290 (defvar ascii-alphanumeric "[\60-\71\101-\132\141-\172]") | |
291 | |
292 (defvar kanji-char "\\cj") | |
293 (defvar kanji-space "$B!!(B") | |
294 (defvar kanji-symbols "\\cS") | |
295 (defvar kanji-numeric "[$B#0(B-$B#9(B]") | |
296 (defvar kanji-English-Upper "[$B#A(B-$B#Z(B]") | |
297 (defvar kanji-English-Lower "[$B#a(B-$B#z(B]") | |
298 (defvar kanji-hiragana "\\cH") | |
299 (defvar kanji-katakana "\\cK") | |
300 (defvar kanji-Greek-Upper "[$B&!(B-$B&8(B]") | |
301 (defvar kanji-Greek-Lower "[$B&A(B-$B&X(B]") | |
302 (defvar kanji-Russian-Upper "[$B'!(B-$B'A(B]") | |
303 (defvar kanji-Russian-Lower "[$B'Q(B-$B'q(B]") | |
304 (defvar kanji-Kanji-1st-Level "[$B0!(B-$BOS(B]") | |
305 (defvar kanji-Kanji-2nd-Level "[$BP!(B-$Bt$(B]") | |
306 | |
307 (defvar kanji-kanji-char "\\(\\cH\\|\\cK\\|\\cC\\)") |