Mercurial > hg > xemacs-beta
comparison lisp/mule/mule-util.el @ 165:5a88923fcbfe r20-3b9
Import from CVS: tag r20-3b9
author | cvs |
---|---|
date | Mon, 13 Aug 2007 09:44:42 +0200 |
parents | |
children | 85ec50267440 |
comparison
equal
deleted
inserted
replaced
164:4e0740e5aab2 | 165:5a88923fcbfe |
---|---|
1 ;;; mule-util.el --- Utility functions for mulitilingual environment (mule) | |
2 | |
3 ;; Copyright (C) 1995 Free Software Foundation, Inc. | |
4 ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. | |
5 ;; Copyright (C) 1997 MORIOKA Tomohiko | |
6 | |
7 ;; Keywords: mule, multilingual | |
8 | |
9 ;; This file is part of XEmacs. | |
10 | |
11 ;; XEmacs is free software; you can redistribute it and/or modify it | |
12 ;; under the terms of the GNU General Public License as published by | |
13 ;; the Free Software Foundation; either version 2, or (at your option) | |
14 ;; any later version. | |
15 | |
16 ;; XEmacs is distributed in the hope that it will be useful, but | |
17 ;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
19 ;; General Public License for more details. | |
20 | |
21 ;; You should have received a copy of the GNU General Public License | |
22 ;; along with XEmacs; see the file COPYING. If not, write to the Free | |
23 ;; Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA | |
24 ;; 02111-1307, USA. | |
25 | |
26 ;;; Code: | |
27 | |
28 ;;; String manipulations while paying attention to multibyte | |
29 ;;; characters. | |
30 | |
31 ;;;###autoload | |
32 (defsubst string-to-sequence (string type) | |
33 "Convert STRING to a sequence of TYPE which contains characters in STRING. | |
34 TYPE should be `list' or `vector'. | |
35 Multibyte characters are conserned." | |
36 (map type (function identity) string)) | |
37 | |
38 ;;;###autoload | |
39 (defsubst string-to-list (string) | |
40 "Return a list of characters in STRING." | |
41 (mapcar (function identity) string)) | |
42 | |
43 ;;;###autoload | |
44 (defsubst string-to-vector (string) | |
45 "Return a vector of characters in STRING." | |
46 (string-to-sequence string 'vector)) | |
47 | |
48 ;;;###autoload | |
49 (defun store-substring (string idx obj) | |
50 "Embed OBJ (string or character) at index IDX of STRING." | |
51 (let* ((str (cond ((stringp obj) obj) | |
52 ((characterp obj) (char-to-string obj)) | |
53 (t (error | |
54 "Invalid argument (should be string or character): %s" | |
55 obj)))) | |
56 (string-len (length string)) | |
57 (len (length str)) | |
58 (i 0)) | |
59 (while (and (< i len) (< idx string-len)) | |
60 (aset string idx (aref str i)) | |
61 (setq idx (1+ idx) i (1+ i))) | |
62 string)) | |
63 | |
64 ;;;###autoload | |
65 (defun truncate-string-to-width (str width &optional start-column padding) | |
66 "Truncate string STR to fit in WIDTH columns. | |
67 Optional 1st arg START-COLUMN if non-nil specifies the starting column. | |
68 Optional 2nd arg PADDING if non-nil is a padding character to be padded at | |
69 the head and tail of the resulting string to fit in WIDTH if necessary. | |
70 If PADDING is nil, the resulting string may be narrower than WIDTH." | |
71 (or start-column | |
72 (setq start-column 0)) | |
73 (let ((len (length str)) | |
74 (idx 0) | |
75 (column 0) | |
76 (head-padding "") (tail-padding "") | |
77 ch last-column last-idx from-idx) | |
78 (condition-case nil | |
79 (while (< column start-column) | |
80 (setq ch (sref str idx) | |
81 column (+ column (char-width ch)) | |
82 idx (+ idx (char-bytes ch)))) | |
83 (args-out-of-range (setq idx len))) | |
84 (if (< column start-column) | |
85 (if padding (make-string width padding) "") | |
86 (if (and padding (> column start-column)) | |
87 (setq head-padding (make-string (- column start-column) ?\ ))) | |
88 (setq from-idx idx) | |
89 (condition-case nil | |
90 (while (< column width) | |
91 (setq last-column column | |
92 last-idx idx | |
93 ch (sref str idx) | |
94 column (+ column (char-width ch)) | |
95 idx (+ idx (char-bytes ch)))) | |
96 (args-out-of-range (setq idx len))) | |
97 (if (> column width) | |
98 (setq column last-column idx last-idx)) | |
99 (if (and padding (< column width)) | |
100 (setq tail-padding (make-string (- width column) padding))) | |
101 (setq str (substring str from-idx idx)) | |
102 (if padding | |
103 (concat head-padding str tail-padding) | |
104 str)))) | |
105 | |
106 ;;; For backward compatiblity ... | |
107 ;;;###autoload | |
108 (defalias 'truncate-string 'truncate-string-to-width) | |
109 (make-obsolete 'truncate-string 'truncate-string-to-width) | |
110 | |
111 ;;; Nested alist handler. Nested alist is alist whose elements are | |
112 ;;; also nested alist. | |
113 | |
114 ;;;###autoload | |
115 (defsubst nested-alist-p (obj) | |
116 "Return t if OBJ is a nesetd alist. | |
117 | |
118 Nested alist is a list of the form (ENTRY . BRANCHES), where ENTRY is | |
119 any Lisp object, and BRANCHES is a list of cons cells of the form | |
120 (KEY-ELEMENT . NESTED-ALIST). | |
121 | |
122 You can use a nested alist to store any Lisp object (ENTRY) for a key | |
123 sequence KEYSEQ, where KEYSEQ is a sequence of KEY-ELEMENT. KEYSEQ | |
124 can be a string, a vector, or a list." | |
125 (and obj (listp obj) (listp (cdr obj)))) | |
126 | |
127 ;;;###autoload | |
128 (defun set-nested-alist (keyseq entry alist &optional len branches) | |
129 "Set ENTRY for KEYSEQ in a nested alist ALIST. | |
130 Optional 4th arg LEN non-nil means the firlst LEN elements in KEYSEQ | |
131 is considered. | |
132 Optional argument BRANCHES if non-nil is branches for a keyseq | |
133 longer than KEYSEQ. | |
134 See the documentation of `nested-alist-p' for more detail." | |
135 (or (nested-alist-p alist) | |
136 (error "Invalid arguement %s" alist)) | |
137 (let ((islist (listp keyseq)) | |
138 (len (or len (length keyseq))) | |
139 (i 0) | |
140 key-elt slot) | |
141 (while (< i len) | |
142 (if (null (nested-alist-p alist)) | |
143 (error "Keyseq %s is too long for this nested alist" keyseq)) | |
144 (setq key-elt (if islist (nth i keyseq) (aref keyseq i))) | |
145 (setq slot (assoc key-elt (cdr alist))) | |
146 (if (null slot) | |
147 (progn | |
148 (setq slot (cons key-elt (list t))) | |
149 (setcdr alist (cons slot (cdr alist))))) | |
150 (setq alist (cdr slot)) | |
151 (setq i (1+ i))) | |
152 (setcar alist entry) | |
153 (if branches | |
154 (if (cdr alist) | |
155 (error "Can't set branches for keyseq %s" keyseq) | |
156 (setcdr alist branches))))) | |
157 | |
158 ;;;###autoload | |
159 (defun lookup-nested-alist (keyseq alist &optional len start nil-for-too-long) | |
160 "Look up key sequence KEYSEQ in nested alist ALIST. Return the definition. | |
161 Optional 1st argument LEN specifies the length of KEYSEQ. | |
162 Optional 2nd argument START specifies index of the starting key. | |
163 The returned value is normally a nested alist of which | |
164 car part is the entry for KEYSEQ. | |
165 If ALIST is not deep enough for KEYSEQ, return number which is | |
166 how many key elements at the front of KEYSEQ it takes | |
167 to reach a leaf in ALIST. | |
168 Optional 3rd argument NIL-FOR-TOO-LONG non-nil means return nil | |
169 even if ALIST is not deep enough." | |
170 (or (nested-alist-p alist) | |
171 (error "invalid arguement %s" alist)) | |
172 (or len | |
173 (setq len (length keyseq))) | |
174 (let ((i (or start 0))) | |
175 (if (catch 'lookup-nested-alist-tag | |
176 (if (listp keyseq) | |
177 (while (< i len) | |
178 (if (setq alist (cdr (assoc (nth i keyseq) (cdr alist)))) | |
179 (setq i (1+ i)) | |
180 (throw 'lookup-nested-alist-tag t)))) | |
181 (while (< i len) | |
182 (if (setq alist (cdr (assoc (aref keyseq i) (cdr alist)))) | |
183 (setq i (1+ i)) | |
184 (throw 'lookup-nested-alist-tag t)))) | |
185 ;; KEYSEQ is too long. | |
186 (if nil-for-too-long nil i) | |
187 alist))) | |
188 | |
189 ;; Coding system related functions. | |
190 | |
191 ;;;###autoload | |
192 (defun set-coding-system-alist (target-type regexp coding-system | |
193 &optional operation) | |
194 "Update `coding-system-alist' according to the arguments. | |
195 TARGET-TYPE specifies a type of the target: `file', `process', or `network'. | |
196 TARGET-TYPE tells which slots of coding-system-alist should be affected. | |
197 If `file', it affects slots for insert-file-contents and write-region. | |
198 If `process', it affects slots for call-process, call-process-region, and | |
199 start-process. | |
200 If `network', it affects a slot for open-network-process. | |
201 REGEXP is a regular expression matching a target of I/O operation. | |
202 CODING-SYSTEM is a coding system to perform code conversion | |
203 on the I/O operation, or a cons of coding systems for decoding and | |
204 encoding respectively, or a function symbol which returns the cons. | |
205 Optional arg OPERATION if non-nil specifies directly one of slots above. | |
206 The valid value is: insert-file-contents, write-region, | |
207 call-process, call-process-region, start-process, or open-network-stream. | |
208 If OPERATION is specified, TARGET-TYPE is ignored. | |
209 See the documentation of `coding-system-alist' for more detail." | |
210 (or (stringp regexp) | |
211 (error "Invalid regular expression: %s" regexp)) | |
212 (or (memq target-type '(file process network)) | |
213 (error "Invalid target type: %s" target-type)) | |
214 (if (symbolp coding-system) | |
215 (if (not (fboundp coding-system)) | |
216 (progn | |
217 (check-coding-system coding-system) | |
218 (setq coding-system (cons coding-system coding-system)))) | |
219 (check-coding-system (car coding-system)) | |
220 (check-coding-system (cdr coding-system))) | |
221 (let ((op-list (if operation (list operation) | |
222 (cond ((eq target-type 'file) | |
223 '(insert-file-contents write-region)) | |
224 ((eq target-type 'process) | |
225 '(call-process call-process-region start-process)) | |
226 (t ; i.e. (eq target-type network) | |
227 '(open-network-stream))))) | |
228 slot) | |
229 (while op-list | |
230 (setq slot (assq (car op-list) coding-system-alist)) | |
231 (if slot | |
232 (let ((chain (cdr slot))) | |
233 (if (catch 'tag | |
234 (while chain | |
235 (if (string= regexp (car (car chain))) | |
236 (progn | |
237 (setcdr (car chain) coding-system) | |
238 (throw 'tag nil))) | |
239 (setq chain (cdr chain))) | |
240 t) | |
241 (setcdr slot (cons (cons regexp coding-system) (cdr slot))))) | |
242 (setq coding-system-alist | |
243 (cons (cons (car op-list) (list (cons regexp coding-system))) | |
244 coding-system-alist))) | |
245 (setq op-list (cdr op-list))))) | |
246 | |
247 | |
248 ;;; Composite charcater manipulations. | |
249 | |
250 ;;;###autoload | |
251 (defun compose-region (start end &optional buffer) | |
252 "Compose characters in the current region into one composite character. | |
253 From a Lisp program, pass two arguments, START to END. | |
254 The composite character replaces the composed characters. | |
255 BUFFER defaults to the current buffer if omitted." | |
256 (interactive "r") | |
257 (let ((ch (make-composite-char (buffer-substring start end buffer)))) | |
258 (delete-region start end buffer) | |
259 (insert-char ch nil nil buffer))) | |
260 | |
261 ;;;###autoload | |
262 (defun decompose-region (start end &optional buffer) | |
263 "Decompose any composite characters in the current region. | |
264 From a Lisp program, pass two arguments, START to END. | |
265 This converts each composite character into one or more characters, | |
266 the individual characters out of which the composite character was formed. | |
267 Non-composite characters are left as-is. BUFFER defaults to the current | |
268 buffer if omitted." | |
269 (interactive "r") | |
270 (save-excursion | |
271 (set-buffer buffer) | |
272 (save-restriction | |
273 (narrow-to-region start end) | |
274 (goto-char (point-min)) | |
275 (let ((compcharset (get-charset 'composite))) | |
276 (while (< (point) (point-max)) | |
277 (let ((ch (char-after (point)))) | |
278 (if (eq compcharset (char-charset ch)) | |
279 (progn | |
280 (delete-char 1) | |
281 (insert (composite-char-string ch)))))))))) | |
282 | |
283 ;;;###autoload | |
284 (defconst reference-point-alist | |
285 '((tl . 0) (tc . 1) (tr . 2) | |
286 (ml . 3) (mc . 4) (mr . 5) | |
287 (bl . 6) (bc . 7) (br . 8) | |
288 (top-left . 0) (top-center . 1) (top-right . 2) | |
289 (mid-left . 3) (mid-center . 4) (mid-right . 5) | |
290 (bottom-left . 6) (bottom-center . 7) (bottom-right . 8) | |
291 (0 . 0) (1 . 1) (2 . 2) | |
292 (3 . 3) (4 . 4) (5 . 5) | |
293 (6 . 6) (7 . 7) (8 . 8)) | |
294 "Alist of reference point symbols vs reference point codes. | |
295 Meanings of reference point codes are as follows: | |
296 | |
297 0----1----2 <-- ascent 0:tl or top-left | |
298 | | 1:tc or top-center | |
299 | | 2:tr or top-right | |
300 | | 3:ml or mid-left | |
301 | 4 <--+---- center 4:mc or mid-center | |
302 | | 5:mr or mid-right | |
303 --- 3 5 <-- baseline 6:bl or bottom-left | |
304 | | 7:bc or bottom-center | |
305 6----7----8 <-- descent 8:br or bottom-right | |
306 | |
307 Reference point symbols are to be used to specify composition rule of | |
308 the form \(GLOBAL-REF-POINT . NEW-REF-POINT), where GLOBAL-REF-POINT | |
309 is a reference point in the overall glyphs already composed, and | |
310 NEW-REF-POINT is a reference point in the new glyph to be added. | |
311 | |
312 For instance, if GLOBAL-REF-POINT is 8 and NEW-REF-POINT is 1, the | |
313 overall glyph is updated as follows: | |
314 | |
315 +-------+--+ <--- new ascent | |
316 | | | | |
317 | global| | | |
318 | glyph | | | |
319 --- | | | <--- baseline (doesn't change) | |
320 +----+--+--+ | |
321 | | new | | |
322 | |glyph| | |
323 +----+-----+ <--- new descent | |
324 ") | |
325 | |
326 ;; Return a string for char CH to be embedded in multibyte form of | |
327 ;; composite character. | |
328 (defun compose-chars-component (ch) | |
329 (if (< ch 128) | |
330 (format "\240%c" (+ ch 128)) | |
331 (let ((str (char-to-string ch))) | |
332 (if (cmpcharp ch) | |
333 (if (/= (aref str 1) ?\xFF) | |
334 (error "Char %c can't be composed" ch) | |
335 (substring str 2)) | |
336 (aset str 0 (+ (aref str 0) ?\x20)) | |
337 str)))) | |
338 | |
339 ;; Return a string for composition rule RULE to be embedded in | |
340 ;; multibyte form of composite character. | |
341 (defsubst compose-chars-rule (rule) | |
342 (char-to-string (+ ?\xA0 | |
343 (* (cdr (assq (car rule) reference-point-alist)) 9) | |
344 (cdr (assq (cdr rule) reference-point-alist))))) | |
345 | |
346 ;;;###autoload | |
347 (defun compose-chars (first-component &rest args) | |
348 "Return one char string composed from the arguments. | |
349 Each argument is a character (including a composite chararacter) | |
350 or a composition rule. | |
351 A composition rule has the form \(GLOBAL-REF-POINT . NEW-REF-POINT). | |
352 See the documentation of `reference-point-alist' for more detail." | |
353 (if (= (length args) 0) | |
354 (char-to-string first-component) | |
355 (let* ((with-rule (consp (car args))) | |
356 (str (if with-rule (concat (vector leading-code-composition ?\xFF)) | |
357 (char-to-string leading-code-composition)))) | |
358 (setq str (concat str (compose-chars-component first-component))) | |
359 (while args | |
360 (if with-rule | |
361 (progn | |
362 (if (not (consp (car args))) | |
363 (error "Invalid composition rule: %s" (car args))) | |
364 (setq str (concat str (compose-chars-rule (car args)) | |
365 (compose-chars-component (car (cdr args)))) | |
366 args (cdr (cdr args)))) | |
367 (setq str (concat str (compose-chars-component (car args))) | |
368 args (cdr args)))) | |
369 str))) | |
370 | |
371 ;;;###autoload | |
372 (defun decompose-composite-char (char &optional type with-composition-rule) | |
373 "Convert composite character CHAR to a string containing components of CHAR. | |
374 Optional 1st arg TYPE specifies the type of sequence returned. | |
375 It should be `string' (default), `list', or `vector'. | |
376 Optional 2nd arg WITH-COMPOSITION-RULE non-nil means the returned | |
377 sequence contains embedded composition rules if any. In this case, the | |
378 order of elements in the sequence is the same as arguments for | |
379 `compose-chars' to create CHAR. | |
380 If TYPE is omitted or is `string', composition rules are omitted | |
381 even if WITH-COMPOSITION-RULE is t." | |
382 (or type | |
383 (setq type 'string)) | |
384 (let* ((len (composite-char-component-count char)) | |
385 (i (1- len)) | |
386 l) | |
387 (setq with-composition-rule (and with-composition-rule | |
388 (not (eq type 'string)) | |
389 (composite-char-composition-rule-p char))) | |
390 (while (> i 0) | |
391 (setq l (cons (composite-char-component char i) l)) | |
392 (if with-composition-rule | |
393 (let ((rule (- (composite-char-composition-rule char i) ?\xA0))) | |
394 (setq l (cons (cons (/ rule 9) (% rule 9)) l)))) | |
395 (setq i (1- i))) | |
396 (setq l (cons (composite-char-component char 0) l)) | |
397 (cond ((eq type 'string) | |
398 (apply 'concat-chars l)) | |
399 ((eq type 'list) | |
400 l) | |
401 (t ; i.e. TYPE is vector | |
402 (vconcat l))))) | |
403 | |
404 ;;; mule-util.el ends here |