Mercurial > hg > xemacs-beta
annotate lisp/mule/mule-category.el @ 5842:9e5f3a0d4e66
Add `noerror' optional argument to `live-move'.
2014-12-31 Michael Sperber <mike@xemacs.org>
* simple.el (line-move): Add `noerror' optional argument, as in
GNU Emacs.
| author | Mike Sperber <sperber@deinprogramm.de> |
|---|---|
| date | Sat, 03 Jan 2015 16:24:56 +0100 |
| parents | bed39edf91ba |
| children |
| rev | line source |
|---|---|
| 502 | 1 ;;; mule-category.el --- category functions for XEmacs/Mule. -*- coding: iso-2022-7bit; -*- |
| 428 | 2 |
| 3 ;; Copyright (C) 1992,93,94,95 Free Software Foundation, Inc. | |
| 4 ;; Copyright (C) 1995, 1997, 1999 Electrotechnical Laboratory, JAPAN. | |
| 5 ;; Licensed to the Free Software Foundation. | |
| 6 ;; Copyright (C) 1995 Amdahl Corporation. | |
| 7 ;; Copyright (C) 1995 Sun Microsystems. | |
| 8 | |
| 9 ;; This file is part of XEmacs. | |
| 10 | |
|
5402
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
4805
diff
changeset
|
11 ;; XEmacs is free software: you can redistribute it and/or modify it |
|
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
4805
diff
changeset
|
12 ;; under the terms of the GNU General Public License as published by the |
|
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
4805
diff
changeset
|
13 ;; Free Software Foundation, either version 3 of the License, or (at your |
|
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
4805
diff
changeset
|
14 ;; option) any later version. |
| 428 | 15 |
|
5402
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
4805
diff
changeset
|
16 ;; XEmacs is distributed in the hope that it will be useful, but WITHOUT |
|
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
4805
diff
changeset
|
17 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
4805
diff
changeset
|
18 ;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
4805
diff
changeset
|
19 ;; for more details. |
| 428 | 20 |
| 21 ;; You should have received a copy of the GNU General Public License | |
|
5402
308d34e9f07d
Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents:
4805
diff
changeset
|
22 ;; along with XEmacs. If not, see <http://www.gnu.org/licenses/>. |
| 428 | 23 |
| 24 ;;; Commentary: | |
| 25 | |
| 26 ;; Functions for working with category tables, which are a particular | |
| 27 ;; type of char table. Some function names / arguments should be | |
| 28 ;; parallel with syntax tables. | |
| 29 | |
| 30 ;; Written by Ben Wing <ben@xemacs.org>. The initialization code | |
| 31 ;; at the end of this file comes from Mule. | |
| 32 ;; Some bugfixes by Jareth Hein <jhod@po.iijnet.or.jp> | |
| 33 | |
| 34 ;;; Code: | |
| 35 | |
| 442 | 36 (defvar defined-category-hashtable (make-hash-table :size 50)) |
| 428 | 37 |
| 3970 | 38 (defun define-category (designator doc-string &optional table) |
| 428 | 39 "Make a new category whose designator is DESIGNATOR. |
| 40 DESIGNATOR should be a visible letter of ' ' thru '~'. | |
| 3970 | 41 DOC-STRING is a doc string for the category. |
| 42 Letters of 'a' thru 'z' are already used or kept for the system. | |
| 43 The category should be defined only in category table TABLE, which defaults | |
| 44 to the current buffer's category table, but this is not implemented. " | |
| 45 ;; #### Implement the limiting of the definition. | |
| 428 | 46 (check-argument-type 'category-designator-p designator) |
| 47 (check-argument-type 'stringp doc-string) | |
| 3970 | 48 (setq table (or table (category-table))) |
| 49 (check-argument-type 'category-table-p table) | |
| 428 | 50 (puthash designator doc-string defined-category-hashtable)) |
| 51 | |
| 52 (defun undefine-category (designator) | |
| 53 "Undefine DESIGNATOR as a designator for a category." | |
| 54 (check-argument-type 'category-designator-p designator) | |
| 55 (remhash designator defined-category-hashtable)) | |
| 56 | |
| 57 (defun defined-category-p (designator) | |
| 58 "Return non-nil if DESIGNATOR is a designator for a defined category." | |
| 59 (and (category-designator-p designator) | |
| 60 (gethash designator defined-category-hashtable))) | |
| 61 | |
| 62 (defun defined-category-list () | |
| 63 "Return a list of the currently defined categories. | |
| 64 Categories are given by their designators." | |
|
5363
311f6817efc2
Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4805
diff
changeset
|
65 (hash-table-key-list defined-category-hashtable)) |
| 428 | 66 |
| 67 (defun undefined-category-designator () | |
| 68 "Return an undefined category designator, or nil if there are none." | |
| 69 (let ((a 32) found) | |
| 70 (while (and (< a 127) (not found)) | |
| 434 | 71 (unless (gethash a defined-category-hashtable) |
| 72 (setq found (make-char 'ascii a))) | |
| 428 | 73 (setq a (1+ a))) |
| 74 found)) | |
| 75 | |
| 76 (defun category-doc-string (designator) | |
| 77 "Return the doc-string for the category denoted by DESIGNATOR." | |
| 78 (check-argument-type 'defined-category-p designator) | |
| 79 (gethash designator defined-category-hashtable)) | |
| 80 | |
| 444 | 81 (defun modify-category-entry (char-range designator &optional category-table reset) |
| 428 | 82 "Add a category to the categories associated with CHAR-RANGE. |
| 83 CHAR-RANGE is a single character or a range of characters, | |
| 84 as per `put-char-table'. | |
| 85 The category is given by a designator character. | |
| 444 | 86 The changes are made in CATEGORY-TABLE, which defaults to the current |
| 87 buffer's category table. | |
| 428 | 88 If optional fourth argument RESET is non-nil, previous categories associated |
| 89 with CHAR-RANGE are removed before adding the specified category." | |
| 444 | 90 (or category-table (setq category-table (category-table))) |
| 91 (check-argument-type 'category-table-p category-table) | |
| 428 | 92 (check-argument-type 'defined-category-p designator) |
| 93 (if reset | |
| 94 ;; clear all existing stuff. | |
| 444 | 95 (put-char-table char-range nil category-table)) |
| 428 | 96 (map-char-table |
| 97 #'(lambda (key value) | |
| 98 ;; make sure that this range has a bit-vector assigned to it | |
| 99 (if (not (bit-vector-p value)) | |
| 100 (setq value (make-bit-vector 95 0)) | |
| 101 (setq value (copy-sequence value))) | |
| 102 ;; set the appropriate bit in that vector. | |
| 103 (aset value (- designator 32) 1) | |
| 104 ;; put the vector back, thus assuring we have a unique setting for this range | |
| 444 | 105 (put-char-table key value category-table)) |
| 106 category-table char-range)) | |
| 428 | 107 |
| 444 | 108 (defun char-category-list (character &optional category-table) |
| 109 "Return a list of the categories that CHARACTER is in. | |
| 110 CATEGORY-TABLE defaults to the current buffer's category table. | |
| 428 | 111 The categories are given by their designators." |
| 444 | 112 (or category-table (setq category-table (category-table))) |
| 113 (check-argument-type 'category-table-p category-table) | |
| 114 (let ((vec (get-char-table character category-table))) | |
| 428 | 115 (if (null vec) nil |
| 116 (let ((a 32) list) | |
| 117 (while (< a 127) | |
| 118 (if (= 1 (aref vec (- a 32))) | |
| 434 | 119 (setq list (cons (make-char 'ascii a) list))) |
| 428 | 120 (setq a (1+ a))) |
| 121 (nreverse list))))) | |
| 122 | |
| 444 | 123 ;; implemented in C, file chartab.c (97/3/14 jhod@po.iijnet.or.jp) |
| 428 | 124 ;(defun char-in-category-p (char category &optional table) |
| 125 ; "Return non-nil if CHAR is in CATEGORY. | |
| 126 ;TABLE defaults to the current buffer's category table. | |
| 127 ;Categories are specified by their designators." | |
| 128 ; (or table (setq table (category-table))) | |
| 129 ; (check-argument-type 'category-table-p table) | |
| 130 ; (check-argument-type 'category-designator-p category) | |
| 131 ; (let ((vec (get-char-table char table))) | |
| 132 ; (if (null vec) nil | |
| 133 ; (= 1 (aref vec (- category 32)))))) | |
| 134 | |
| 788 | 135 (put 'with-category-table 'lisp-indent-function 1) |
| 136 | |
| 137 (defmacro with-category-table (category-table &rest body) | |
| 138 `(let ((current-category-table (category-table))) | |
| 139 (set-category-table ,category-table) | |
| 140 (unwind-protect | |
| 141 (progn ,@body) | |
| 142 (set-category-table current-category-table)))) | |
| 143 | |
| 3970 | 144 (defun make-category-table () |
| 145 "Construct a new and empty category table and return it." | |
| 146 (make-char-table 'category)) | |
| 147 | |
| 428 | 148 (defun describe-category () |
| 149 "Describe the category specifications in the category table. | |
| 150 The descriptions are inserted in a buffer, which is then displayed." | |
| 151 (interactive) | |
| 434 | 152 (with-displaying-help-buffer |
| 153 (lambda () | |
| 154 (describe-category-table (category-table) standard-output)))) | |
| 428 | 155 |
| 156 (defun describe-category-table (table stream) | |
| 157 (let (first-char | |
| 158 last-char | |
| 159 prev-val | |
| 160 (describe-one | |
| 161 (lambda (first last value stream) | |
| 162 (if (and (bit-vector-p value) | |
| 163 (> (reduce '+ value) 0)) | |
| 164 (progn | |
| 165 (if (equal first last) | |
| 166 (cond ((vectorp first) | |
| 167 (princ (format "%s, row %d" | |
| 168 (charset-name | |
| 169 (aref first 0)) | |
| 170 (aref first 1)) | |
| 171 stream)) | |
| 172 ((charsetp first) | |
| 173 (princ (charset-name first) stream)) | |
| 174 (t (princ first stream))) | |
| 175 (cond ((vectorp first) | |
| 176 (princ (format "%s, rows %d .. %d" | |
| 177 (charset-name | |
| 178 (aref first 0)) | |
| 179 (aref first 1) | |
| 180 (aref last 1)) | |
| 181 stream)) | |
| 182 (t | |
| 183 (princ (format "%s .. %s" first last) | |
| 184 stream)))) | |
| 185 (describe-category-code value stream)))))) | |
| 186 (map-char-table | |
| 187 (lambda (range value) | |
| 188 (if (and (or | |
| 189 (and (characterp range) | |
| 190 (characterp first-char) | |
| 191 (eq (char-charset range) (char-charset first-char)) | |
| 192 (= (char-to-int last-char) (1- (char-to-int range)))) | |
| 193 (and (vectorp range) | |
| 194 (vectorp first-char) | |
| 195 (eq (aref range 0) (aref first-char 0)) | |
| 196 (= (aref last-char 1) (1- (aref range 1)))) | |
| 197 (equal value prev-val))) | |
| 198 (setq last-char range) | |
| 199 (if first-char | |
| 200 (progn | |
| 201 (funcall describe-one first-char last-char prev-val stream) | |
| 202 (setq first-char nil))) | |
| 203 (funcall describe-one range range value stream)) | |
| 204 nil) | |
| 205 table) | |
| 206 (if first-char | |
| 207 (funcall describe-one first-char last-char prev-val stream)))) | |
| 208 | |
| 209 (defun describe-category-code (code stream) | |
| 210 (let ((standard-output (or stream standard-output))) | |
| 211 (princ "\tin categories: ") | |
| 212 (if (not (bit-vector-p code)) | |
| 213 (princ "(none)") | |
| 214 (let ((i 0) | |
| 215 already-matched) | |
| 216 (while (< i 95) | |
| 217 (if (= 1 (aref code i)) | |
| 218 (progn | |
| 219 (if (not already-matched) | |
| 220 (setq already-matched t) | |
| 221 (princ " ")) | |
| 222 (princ (int-to-char (+ 32 i))))) | |
| 223 (setq i (1+ i))) | |
| 224 (if (not already-matched) | |
| 225 (princ "(none)"))) | |
| 226 (let ((i 0)) | |
| 227 (while (< i 95) | |
| 228 (if (= 1 (aref code i)) | |
| 229 (princ (format "\n\t\tmeaning: %s" | |
| 230 (category-doc-string (int-to-char (+ 32 i)))))) | |
| 231 (setq i (1+ i))))) | |
| 232 (terpri))) | |
| 233 | |
| 234 (defconst predefined-category-list | |
| 235 '((latin-iso8859-1 ?l "Latin-1 through Latin-5 character set") | |
| 236 (latin-iso8859-2 ?l) | |
| 237 (latin-iso8859-3 ?l) | |
| 238 (latin-iso8859-4 ?l) | |
| 239 (latin-iso8859-9 ?l) | |
| 240 (cyrillic-iso8859-5 ?y "Cyrillic character set") | |
|
4805
980575c76541
Move the arabic-iso8859-6 character set back to C, otherwise X11 lookup fails.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4491
diff
changeset
|
241 (arabic-iso8859-6 ?b "Arabic character set") |
| 428 | 242 (greek-iso8859-7 ?g "Greek character set") |
| 243 (hebrew-iso8859-8 ?w "Hebrew character set") | |
| 244 (katakana-jisx0201 ?k "Japanese 1-byte Katakana character set") | |
| 245 (latin-jisx0201 ?r "Japanese 1-byte Roman character set") | |
| 246 (japanese-jisx0208-1978 ?j "Japanese 2-byte character set (old)") | |
| 247 (japanese-jisx0208 ?j "Japanese 2-byte character set") | |
| 248 (japanese-jisx0212 ?j) | |
| 249 (chinese-gb2312 ?c "Chinese GB (China, PRC) 2-byte character set") | |
| 250 (chinese-cns11643-1 ?t "Chinese Taiwan (CNS or Big5) 2-byte character set") | |
| 251 (chinese-cns11643-2 ?t) | |
| 252 (chinese-big5-1 ?t) | |
| 253 (chinese-big5-2 ?t) | |
| 254 (korean-ksc5601 ?h "Hangul (Korean) 2-byte character set") | |
|
5662
bed39edf91ba
Be better about word boundaries with JIT unicode characters, mule-category.el
Aidan Kehoe <kehoea@parhasard.net>
parents:
5473
diff
changeset
|
255 (jit-ucs-charset-0 ?J "Just-in-time-allocated Unicode character") |
| 428 | 256 ) |
| 257 "List of predefined categories. | |
| 258 Each element is a list of a charset, a designator, and maybe a doc string.") | |
| 259 | |
| 260 (let (i l) | |
| 261 (define-category ?a "ASCII character set.") | |
| 262 (define-category ?l "Latin-1 through Latin-5 character set") | |
| 263 (setq i 32) | |
| 264 (while (< i 127) | |
| 265 (modify-category-entry i ?a) | |
| 266 (modify-category-entry i ?l) | |
| 267 (setq i (1+ i))) | |
| 268 (setq l predefined-category-list) | |
| 269 (while l | |
| 270 (if (and (nth 2 (car l)) | |
| 271 (not (defined-category-p (nth 2 (car l))))) | |
| 272 (define-category (nth 1 (car l)) (nth 2 (car l)))) | |
| 952 | 273 (modify-category-entry (car (car l)) (nth 1 (car l)) nil t) |
| 428 | 274 (setq l (cdr l)))) |
| 275 | |
| 276 ;;; Setting word boundary. | |
| 277 | |
| 278 (setq word-combining-categories | |
|
5662
bed39edf91ba
Be better about word boundaries with JIT unicode characters, mule-category.el
Aidan Kehoe <kehoea@parhasard.net>
parents:
5473
diff
changeset
|
279 ;; XEmacs; we should change to defining scripts, as does GNU, once |
|
bed39edf91ba
Be better about word boundaries with JIT unicode characters, mule-category.el
Aidan Kehoe <kehoea@parhasard.net>
parents:
5473
diff
changeset
|
280 ;; unicode-internal is the default, and placing word boundaries |
|
bed39edf91ba
Be better about word boundaries with JIT unicode characters, mule-category.el
Aidan Kehoe <kehoea@parhasard.net>
parents:
5473
diff
changeset
|
281 ;; between different scripts, not different charsets, by default. |
|
bed39edf91ba
Be better about word boundaries with JIT unicode characters, mule-category.el
Aidan Kehoe <kehoea@parhasard.net>
parents:
5473
diff
changeset
|
282 ;; Then we can remove the jit-ucs-charset-0 entry above and all the |
|
bed39edf91ba
Be better about word boundaries with JIT unicode characters, mule-category.el
Aidan Kehoe <kehoea@parhasard.net>
parents:
5473
diff
changeset
|
283 ;; entries containing ?J in this list. |
|
bed39edf91ba
Be better about word boundaries with JIT unicode characters, mule-category.el
Aidan Kehoe <kehoea@parhasard.net>
parents:
5473
diff
changeset
|
284 ;; |
|
bed39edf91ba
Be better about word boundaries with JIT unicode characters, mule-category.el
Aidan Kehoe <kehoea@parhasard.net>
parents:
5473
diff
changeset
|
285 ;; These entries are a bit heuristic, working on the assumption that |
|
bed39edf91ba
Be better about word boundaries with JIT unicode characters, mule-category.el
Aidan Kehoe <kehoea@parhasard.net>
parents:
5473
diff
changeset
|
286 ;; characters that will be just-in-time-allocated will not be East |
|
bed39edf91ba
Be better about word boundaries with JIT unicode characters, mule-category.el
Aidan Kehoe <kehoea@parhasard.net>
parents:
5473
diff
changeset
|
287 ;; Asian in XEmacs, and there's also no mechanism to apply the ?J |
|
bed39edf91ba
Be better about word boundaries with JIT unicode characters, mule-category.el
Aidan Kehoe <kehoea@parhasard.net>
parents:
5473
diff
changeset
|
288 ;; category to further newly-created JIT categories. |
|
bed39edf91ba
Be better about word boundaries with JIT unicode characters, mule-category.el
Aidan Kehoe <kehoea@parhasard.net>
parents:
5473
diff
changeset
|
289 '((?l . ?l) (?J . ?l) (?l . ?J) (?J . ?y) (?y . ?J) (?J . ?b) (?b . ?J) |
|
bed39edf91ba
Be better about word boundaries with JIT unicode characters, mule-category.el
Aidan Kehoe <kehoea@parhasard.net>
parents:
5473
diff
changeset
|
290 (?J . ?g) (?J . ?w) (?w . ?J))) |
| 428 | 291 |
| 292 (setq word-separating-categories ; (2-byte character sets) | |
| 293 '((?A . ?K) ; Alpha numeric - Katakana | |
| 294 (?A . ?C) ; Alpha numeric - Chinese | |
| 295 (?H . ?A) ; Hiragana - Alpha numeric | |
| 296 (?H . ?K) ; Hiragana - Katakana | |
| 297 (?H . ?C) ; Hiragana - Chinese | |
| 298 (?K . ?A) ; Katakana - Alpha numeric | |
| 299 (?K . ?C) ; Katakana - Chinese | |
| 300 (?C . ?A) ; Chinese - Alpha numeric | |
| 301 (?C . ?K) ; Chinese - Katakana | |
| 302 )) | |
| 303 | |
| 304 ;;; At the present, I know Japanese and Chinese text can | |
| 305 ;;; break line at any point under a restriction of 'kinsoku'. | |
| 450 | 306 ;;; #### SJT this needs to be set by language environments and probably should |
| 307 ;;; be buffer-local---strategy for dealing with this: check all $language.el | |
| 308 ;;; files and also mule-base/$language-utils.el files for variables set; | |
| 309 ;;; these should be made buffer local and some kind of a- or p-list of vars | |
| 310 ;;; to be set for a language environment created. | |
| 428 | 311 (defvar word-across-newline "\\(\\cj\\|\\cc\\|\\ct\\)" |
| 312 "Regular expression of such characters which can be a word across newline.") | |
| 313 | |
| 314 (defvar ascii-char "[\40-\176]") | |
| 315 (defvar ascii-space "[ \t]") | |
| 316 (defvar ascii-symbols "[\40-\57\72-\100\133-\140\173-\176]") | |
| 317 (defvar ascii-numeric "[\60-\71]") | |
| 318 (defvar ascii-English-Upper "[\101-\132]") | |
| 319 (defvar ascii-English-Lower "[\141-\172]") | |
| 320 (defvar ascii-alphanumeric "[\60-\71\101-\132\141-\172]") | |
| 321 | |
| 322 (defvar kanji-char "\\cj") | |
| 323 (defvar kanji-space "$B!!(B") | |
| 324 (defvar kanji-symbols "\\cS") | |
| 325 (defvar kanji-numeric "[$B#0(B-$B#9(B]") | |
| 326 (defvar kanji-English-Upper "[$B#A(B-$B#Z(B]") | |
| 327 (defvar kanji-English-Lower "[$B#a(B-$B#z(B]") | |
| 328 (defvar kanji-hiragana "\\cH") | |
| 329 (defvar kanji-katakana "\\cK") | |
| 330 (defvar kanji-Greek-Upper "[$B&!(B-$B&8(B]") | |
| 331 (defvar kanji-Greek-Lower "[$B&A(B-$B&X(B]") | |
| 332 (defvar kanji-Russian-Upper "[$B'!(B-$B'A(B]") | |
| 333 (defvar kanji-Russian-Lower "[$B'Q(B-$B'q(B]") | |
| 334 (defvar kanji-Kanji-1st-Level "[$B0!(B-$BOS(B]") | |
| 335 (defvar kanji-Kanji-2nd-Level "[$BP!(B-$Bt$(B]") | |
| 336 | |
| 337 (defvar kanji-kanji-char "\\(\\cH\\|\\cK\\|\\cC\\)") |
