502
|
1 ;;; mule-category.el --- category functions for XEmacs/Mule. -*- coding: iso-2022-7bit; -*-
|
428
|
2
|
|
3 ;; Copyright (C) 1992,93,94,95 Free Software Foundation, Inc.
|
|
4 ;; Copyright (C) 1995, 1997, 1999 Electrotechnical Laboratory, JAPAN.
|
|
5 ;; Licensed to the Free Software Foundation.
|
|
6 ;; Copyright (C) 1995 Amdahl Corporation.
|
|
7 ;; Copyright (C) 1995 Sun Microsystems.
|
|
8
|
|
9 ;; This file is part of XEmacs.
|
|
10
|
|
11 ;; XEmacs is free software; you can redistribute it and/or modify it
|
|
12 ;; under the terms of the GNU General Public License as published by
|
|
13 ;; the Free Software Foundation; either version 2, or (at your option)
|
|
14 ;; any later version.
|
|
15
|
|
16 ;; XEmacs is distributed in the hope that it will be useful, but
|
|
17 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
19 ;; General Public License for more details.
|
|
20
|
|
21 ;; You should have received a copy of the GNU General Public License
|
444
|
22 ;; along with XEmacs; see the file COPYING. If not, write to the
|
428
|
23 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
24 ;; Boston, MA 02111-1307, USA.
|
|
25
|
|
26 ;;; Commentary:
|
|
27
|
|
28 ;; Functions for working with category tables, which are a particular
|
|
29 ;; type of char table. Some function names / arguments should be
|
|
30 ;; parallel with syntax tables.
|
|
31
|
|
32 ;; Written by Ben Wing <ben@xemacs.org>. The initialization code
|
|
33 ;; at the end of this file comes from Mule.
|
|
34 ;; Some bugfixes by Jareth Hein <jhod@po.iijnet.or.jp>
|
|
35
|
|
36 ;;; Code:
|
|
37
|
442
|
38 (defvar defined-category-hashtable (make-hash-table :size 50))
|
428
|
39
|
3970
|
40 (defun define-category (designator doc-string &optional table)
|
428
|
41 "Make a new category whose designator is DESIGNATOR.
|
|
42 DESIGNATOR should be a visible letter of ' ' thru '~'.
|
3970
|
43 DOC-STRING is a doc string for the category.
|
|
44 Letters of 'a' thru 'z' are already used or kept for the system.
|
|
45 The category should be defined only in category table TABLE, which defaults
|
|
46 to the current buffer's category table, but this is not implemented. "
|
|
47 ;; #### Implement the limiting of the definition.
|
428
|
48 (check-argument-type 'category-designator-p designator)
|
|
49 (check-argument-type 'stringp doc-string)
|
3970
|
50 (setq table (or table (category-table)))
|
|
51 (check-argument-type 'category-table-p table)
|
428
|
52 (puthash designator doc-string defined-category-hashtable))
|
|
53
|
|
54 (defun undefine-category (designator)
|
|
55 "Undefine DESIGNATOR as a designator for a category."
|
|
56 (check-argument-type 'category-designator-p designator)
|
|
57 (remhash designator defined-category-hashtable))
|
|
58
|
|
59 (defun defined-category-p (designator)
|
|
60 "Return non-nil if DESIGNATOR is a designator for a defined category."
|
|
61 (and (category-designator-p designator)
|
|
62 (gethash designator defined-category-hashtable)))
|
|
63
|
|
64 (defun defined-category-list ()
|
|
65 "Return a list of the currently defined categories.
|
|
66 Categories are given by their designators."
|
|
67 (let (list)
|
|
68 (maphash #'(lambda (key value)
|
|
69 (setq list (cons key list)))
|
|
70 defined-category-hashtable)
|
|
71 (nreverse list)))
|
|
72
|
|
73 (defun undefined-category-designator ()
|
|
74 "Return an undefined category designator, or nil if there are none."
|
|
75 (let ((a 32) found)
|
|
76 (while (and (< a 127) (not found))
|
434
|
77 (unless (gethash a defined-category-hashtable)
|
|
78 (setq found (make-char 'ascii a)))
|
428
|
79 (setq a (1+ a)))
|
|
80 found))
|
|
81
|
|
82 (defun category-doc-string (designator)
|
|
83 "Return the doc-string for the category denoted by DESIGNATOR."
|
|
84 (check-argument-type 'defined-category-p designator)
|
|
85 (gethash designator defined-category-hashtable))
|
|
86
|
444
|
87 (defun modify-category-entry (char-range designator &optional category-table reset)
|
428
|
88 "Add a category to the categories associated with CHAR-RANGE.
|
|
89 CHAR-RANGE is a single character or a range of characters,
|
|
90 as per `put-char-table'.
|
|
91 The category is given by a designator character.
|
444
|
92 The changes are made in CATEGORY-TABLE, which defaults to the current
|
|
93 buffer's category table.
|
428
|
94 If optional fourth argument RESET is non-nil, previous categories associated
|
|
95 with CHAR-RANGE are removed before adding the specified category."
|
444
|
96 (or category-table (setq category-table (category-table)))
|
|
97 (check-argument-type 'category-table-p category-table)
|
428
|
98 (check-argument-type 'defined-category-p designator)
|
|
99 (if reset
|
|
100 ;; clear all existing stuff.
|
444
|
101 (put-char-table char-range nil category-table))
|
428
|
102 (map-char-table
|
|
103 #'(lambda (key value)
|
|
104 ;; make sure that this range has a bit-vector assigned to it
|
|
105 (if (not (bit-vector-p value))
|
|
106 (setq value (make-bit-vector 95 0))
|
|
107 (setq value (copy-sequence value)))
|
|
108 ;; set the appropriate bit in that vector.
|
|
109 (aset value (- designator 32) 1)
|
|
110 ;; put the vector back, thus assuring we have a unique setting for this range
|
444
|
111 (put-char-table key value category-table))
|
|
112 category-table char-range))
|
428
|
113
|
444
|
114 (defun char-category-list (character &optional category-table)
|
|
115 "Return a list of the categories that CHARACTER is in.
|
|
116 CATEGORY-TABLE defaults to the current buffer's category table.
|
428
|
117 The categories are given by their designators."
|
444
|
118 (or category-table (setq category-table (category-table)))
|
|
119 (check-argument-type 'category-table-p category-table)
|
|
120 (let ((vec (get-char-table character category-table)))
|
428
|
121 (if (null vec) nil
|
|
122 (let ((a 32) list)
|
|
123 (while (< a 127)
|
|
124 (if (= 1 (aref vec (- a 32)))
|
434
|
125 (setq list (cons (make-char 'ascii a) list)))
|
428
|
126 (setq a (1+ a)))
|
|
127 (nreverse list)))))
|
|
128
|
444
|
129 ;; implemented in C, file chartab.c (97/3/14 jhod@po.iijnet.or.jp)
|
428
|
130 ;(defun char-in-category-p (char category &optional table)
|
|
131 ; "Return non-nil if CHAR is in CATEGORY.
|
|
132 ;TABLE defaults to the current buffer's category table.
|
|
133 ;Categories are specified by their designators."
|
|
134 ; (or table (setq table (category-table)))
|
|
135 ; (check-argument-type 'category-table-p table)
|
|
136 ; (check-argument-type 'category-designator-p category)
|
|
137 ; (let ((vec (get-char-table char table)))
|
|
138 ; (if (null vec) nil
|
|
139 ; (= 1 (aref vec (- category 32))))))
|
|
140
|
788
|
141 (put 'with-category-table 'lisp-indent-function 1)
|
|
142
|
|
143 (defmacro with-category-table (category-table &rest body)
|
|
144 `(let ((current-category-table (category-table)))
|
|
145 (set-category-table ,category-table)
|
|
146 (unwind-protect
|
|
147 (progn ,@body)
|
|
148 (set-category-table current-category-table))))
|
|
149
|
3970
|
150 (defun make-category-table ()
|
|
151 "Construct a new and empty category table and return it."
|
|
152 (make-char-table 'category))
|
|
153
|
428
|
154 (defun describe-category ()
|
|
155 "Describe the category specifications in the category table.
|
|
156 The descriptions are inserted in a buffer, which is then displayed."
|
|
157 (interactive)
|
434
|
158 (with-displaying-help-buffer
|
|
159 (lambda ()
|
|
160 (describe-category-table (category-table) standard-output))))
|
428
|
161
|
|
162 (defun describe-category-table (table stream)
|
|
163 (let (first-char
|
|
164 last-char
|
|
165 prev-val
|
|
166 (describe-one
|
|
167 (lambda (first last value stream)
|
|
168 (if (and (bit-vector-p value)
|
|
169 (> (reduce '+ value) 0))
|
|
170 (progn
|
|
171 (if (equal first last)
|
|
172 (cond ((vectorp first)
|
|
173 (princ (format "%s, row %d"
|
|
174 (charset-name
|
|
175 (aref first 0))
|
|
176 (aref first 1))
|
|
177 stream))
|
|
178 ((charsetp first)
|
|
179 (princ (charset-name first) stream))
|
|
180 (t (princ first stream)))
|
|
181 (cond ((vectorp first)
|
|
182 (princ (format "%s, rows %d .. %d"
|
|
183 (charset-name
|
|
184 (aref first 0))
|
|
185 (aref first 1)
|
|
186 (aref last 1))
|
|
187 stream))
|
|
188 (t
|
|
189 (princ (format "%s .. %s" first last)
|
|
190 stream))))
|
|
191 (describe-category-code value stream))))))
|
|
192 (map-char-table
|
|
193 (lambda (range value)
|
|
194 (if (and (or
|
|
195 (and (characterp range)
|
|
196 (characterp first-char)
|
|
197 (eq (char-charset range) (char-charset first-char))
|
|
198 (= (char-to-int last-char) (1- (char-to-int range))))
|
|
199 (and (vectorp range)
|
|
200 (vectorp first-char)
|
|
201 (eq (aref range 0) (aref first-char 0))
|
|
202 (= (aref last-char 1) (1- (aref range 1))))
|
|
203 (equal value prev-val)))
|
|
204 (setq last-char range)
|
|
205 (if first-char
|
|
206 (progn
|
|
207 (funcall describe-one first-char last-char prev-val stream)
|
|
208 (setq first-char nil)))
|
|
209 (funcall describe-one range range value stream))
|
|
210 nil)
|
|
211 table)
|
|
212 (if first-char
|
|
213 (funcall describe-one first-char last-char prev-val stream))))
|
|
214
|
|
215 (defun describe-category-code (code stream)
|
|
216 (let ((standard-output (or stream standard-output)))
|
|
217 (princ "\tin categories: ")
|
|
218 (if (not (bit-vector-p code))
|
|
219 (princ "(none)")
|
|
220 (let ((i 0)
|
|
221 already-matched)
|
|
222 (while (< i 95)
|
|
223 (if (= 1 (aref code i))
|
|
224 (progn
|
|
225 (if (not already-matched)
|
|
226 (setq already-matched t)
|
|
227 (princ " "))
|
|
228 (princ (int-to-char (+ 32 i)))))
|
|
229 (setq i (1+ i)))
|
|
230 (if (not already-matched)
|
|
231 (princ "(none)")))
|
|
232 (let ((i 0))
|
|
233 (while (< i 95)
|
|
234 (if (= 1 (aref code i))
|
|
235 (princ (format "\n\t\tmeaning: %s"
|
|
236 (category-doc-string (int-to-char (+ 32 i))))))
|
|
237 (setq i (1+ i)))))
|
|
238 (terpri)))
|
|
239
|
|
240 (defconst predefined-category-list
|
|
241 '((latin-iso8859-1 ?l "Latin-1 through Latin-5 character set")
|
|
242 (latin-iso8859-2 ?l)
|
|
243 (latin-iso8859-3 ?l)
|
|
244 (latin-iso8859-4 ?l)
|
|
245 (latin-iso8859-9 ?l)
|
|
246 (cyrillic-iso8859-5 ?y "Cyrillic character set")
|
|
247 (greek-iso8859-7 ?g "Greek character set")
|
|
248 (hebrew-iso8859-8 ?w "Hebrew character set")
|
|
249 (katakana-jisx0201 ?k "Japanese 1-byte Katakana character set")
|
|
250 (latin-jisx0201 ?r "Japanese 1-byte Roman character set")
|
|
251 (japanese-jisx0208-1978 ?j "Japanese 2-byte character set (old)")
|
|
252 (japanese-jisx0208 ?j "Japanese 2-byte character set")
|
|
253 (japanese-jisx0212 ?j)
|
|
254 (chinese-gb2312 ?c "Chinese GB (China, PRC) 2-byte character set")
|
|
255 (chinese-cns11643-1 ?t "Chinese Taiwan (CNS or Big5) 2-byte character set")
|
|
256 (chinese-cns11643-2 ?t)
|
|
257 (chinese-big5-1 ?t)
|
|
258 (chinese-big5-2 ?t)
|
|
259 (korean-ksc5601 ?h "Hangul (Korean) 2-byte character set")
|
|
260 )
|
|
261 "List of predefined categories.
|
|
262 Each element is a list of a charset, a designator, and maybe a doc string.")
|
|
263
|
|
264 (let (i l)
|
|
265 (define-category ?a "ASCII character set.")
|
|
266 (define-category ?l "Latin-1 through Latin-5 character set")
|
|
267 (setq i 32)
|
|
268 (while (< i 127)
|
|
269 (modify-category-entry i ?a)
|
|
270 (modify-category-entry i ?l)
|
|
271 (setq i (1+ i)))
|
|
272 (setq l predefined-category-list)
|
|
273 (while l
|
|
274 (if (and (nth 2 (car l))
|
|
275 (not (defined-category-p (nth 2 (car l)))))
|
|
276 (define-category (nth 1 (car l)) (nth 2 (car l))))
|
952
|
277 (modify-category-entry (car (car l)) (nth 1 (car l)) nil t)
|
428
|
278 (setq l (cdr l))))
|
|
279
|
|
280 ;;; Setting word boundary.
|
|
281
|
|
282 (setq word-combining-categories
|
|
283 '((?l . ?l)))
|
|
284
|
|
285 (setq word-separating-categories ; (2-byte character sets)
|
|
286 '((?A . ?K) ; Alpha numeric - Katakana
|
|
287 (?A . ?C) ; Alpha numeric - Chinese
|
|
288 (?H . ?A) ; Hiragana - Alpha numeric
|
|
289 (?H . ?K) ; Hiragana - Katakana
|
|
290 (?H . ?C) ; Hiragana - Chinese
|
|
291 (?K . ?A) ; Katakana - Alpha numeric
|
|
292 (?K . ?C) ; Katakana - Chinese
|
|
293 (?C . ?A) ; Chinese - Alpha numeric
|
|
294 (?C . ?K) ; Chinese - Katakana
|
|
295 ))
|
|
296
|
|
297 ;;; At the present, I know Japanese and Chinese text can
|
|
298 ;;; break line at any point under a restriction of 'kinsoku'.
|
450
|
299 ;;; #### SJT this needs to be set by language environments and probably should
|
|
300 ;;; be buffer-local---strategy for dealing with this: check all $language.el
|
|
301 ;;; files and also mule-base/$language-utils.el files for variables set;
|
|
302 ;;; these should be made buffer local and some kind of a- or p-list of vars
|
|
303 ;;; to be set for a language environment created.
|
428
|
304 (defvar word-across-newline "\\(\\cj\\|\\cc\\|\\ct\\)"
|
|
305 "Regular expression of such characters which can be a word across newline.")
|
|
306
|
|
307 (defvar ascii-char "[\40-\176]")
|
|
308 (defvar ascii-space "[ \t]")
|
|
309 (defvar ascii-symbols "[\40-\57\72-\100\133-\140\173-\176]")
|
|
310 (defvar ascii-numeric "[\60-\71]")
|
|
311 (defvar ascii-English-Upper "[\101-\132]")
|
|
312 (defvar ascii-English-Lower "[\141-\172]")
|
|
313 (defvar ascii-alphanumeric "[\60-\71\101-\132\141-\172]")
|
|
314
|
|
315 (defvar kanji-char "\\cj")
|
|
316 (defvar kanji-space "$B!!(B")
|
|
317 (defvar kanji-symbols "\\cS")
|
|
318 (defvar kanji-numeric "[$B#0(B-$B#9(B]")
|
|
319 (defvar kanji-English-Upper "[$B#A(B-$B#Z(B]")
|
|
320 (defvar kanji-English-Lower "[$B#a(B-$B#z(B]")
|
|
321 (defvar kanji-hiragana "\\cH")
|
|
322 (defvar kanji-katakana "\\cK")
|
|
323 (defvar kanji-Greek-Upper "[$B&!(B-$B&8(B]")
|
|
324 (defvar kanji-Greek-Lower "[$B&A(B-$B&X(B]")
|
|
325 (defvar kanji-Russian-Upper "[$B'!(B-$B'A(B]")
|
|
326 (defvar kanji-Russian-Lower "[$B'Q(B-$B'q(B]")
|
|
327 (defvar kanji-Kanji-1st-Level "[$B0!(B-$BOS(B]")
|
|
328 (defvar kanji-Kanji-2nd-Level "[$BP!(B-$Bt$(B]")
|
|
329
|
|
330 (defvar kanji-kanji-char "\\(\\cH\\|\\cK\\|\\cC\\)")
|