165
|
1 ;;; mule-util.el --- Utility functions for mulitilingual environment (mule)
|
|
2
|
|
3 ;; Copyright (C) 1995 Free Software Foundation, Inc.
|
|
4 ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
|
|
5 ;; Copyright (C) 1997 MORIOKA Tomohiko
|
|
6
|
|
7 ;; Keywords: mule, multilingual
|
|
8
|
|
9 ;; This file is part of XEmacs.
|
|
10
|
|
11 ;; XEmacs is free software; you can redistribute it and/or modify it
|
|
12 ;; under the terms of the GNU General Public License as published by
|
|
13 ;; the Free Software Foundation; either version 2, or (at your option)
|
|
14 ;; any later version.
|
|
15
|
|
16 ;; XEmacs is distributed in the hope that it will be useful, but
|
|
17 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
19 ;; General Public License for more details.
|
|
20
|
|
21 ;; You should have received a copy of the GNU General Public License
|
|
22 ;; along with XEmacs; see the file COPYING. If not, write to the Free
|
|
23 ;; Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
|
|
24 ;; 02111-1307, USA.
|
|
25
|
|
26 ;;; Code:
|
|
27
|
|
28 ;;; String manipulations while paying attention to multibyte
|
|
29 ;;; characters.
|
|
30
|
167
|
31 ;; [Was defsubst]
|
165
|
32 ;;;###autoload
|
167
|
33 (defun string-to-sequence (string type)
|
165
|
34 "Convert STRING to a sequence of TYPE which contains characters in STRING.
|
|
35 TYPE should be `list' or `vector'.
|
167
|
36 Multibyte characters are concerned."
|
165
|
37 (map type (function identity) string))
|
|
38
|
167
|
39 ;; [Was defsubst]
|
165
|
40 ;;;###autoload
|
167
|
41 (defun string-to-list (string)
|
165
|
42 "Return a list of characters in STRING."
|
|
43 (mapcar (function identity) string))
|
|
44
|
167
|
45 ;; [Was defsubst]
|
165
|
46 ;;;###autoload
|
167
|
47 (defun string-to-vector (string)
|
165
|
48 "Return a vector of characters in STRING."
|
|
49 (string-to-sequence string 'vector))
|
|
50
|
|
51 ;;;###autoload
|
|
52 (defun store-substring (string idx obj)
|
|
53 "Embed OBJ (string or character) at index IDX of STRING."
|
|
54 (let* ((str (cond ((stringp obj) obj)
|
|
55 ((characterp obj) (char-to-string obj))
|
|
56 (t (error
|
|
57 "Invalid argument (should be string or character): %s"
|
|
58 obj))))
|
|
59 (string-len (length string))
|
|
60 (len (length str))
|
|
61 (i 0))
|
|
62 (while (and (< i len) (< idx string-len))
|
|
63 (aset string idx (aref str i))
|
|
64 (setq idx (1+ idx) i (1+ i)))
|
|
65 string))
|
|
66
|
|
67 ;;;###autoload
|
|
68 (defun truncate-string-to-width (str width &optional start-column padding)
|
|
69 "Truncate string STR to fit in WIDTH columns.
|
|
70 Optional 1st arg START-COLUMN if non-nil specifies the starting column.
|
|
71 Optional 2nd arg PADDING if non-nil is a padding character to be padded at
|
|
72 the head and tail of the resulting string to fit in WIDTH if necessary.
|
|
73 If PADDING is nil, the resulting string may be narrower than WIDTH."
|
|
74 (or start-column
|
|
75 (setq start-column 0))
|
|
76 (let ((len (length str))
|
|
77 (idx 0)
|
|
78 (column 0)
|
|
79 (head-padding "") (tail-padding "")
|
|
80 ch last-column last-idx from-idx)
|
|
81 (condition-case nil
|
|
82 (while (< column start-column)
|
|
83 (setq ch (sref str idx)
|
|
84 column (+ column (char-width ch))
|
|
85 idx (+ idx (char-bytes ch))))
|
|
86 (args-out-of-range (setq idx len)))
|
|
87 (if (< column start-column)
|
|
88 (if padding (make-string width padding) "")
|
|
89 (if (and padding (> column start-column))
|
|
90 (setq head-padding (make-string (- column start-column) ?\ )))
|
|
91 (setq from-idx idx)
|
|
92 (condition-case nil
|
|
93 (while (< column width)
|
|
94 (setq last-column column
|
|
95 last-idx idx
|
|
96 ch (sref str idx)
|
|
97 column (+ column (char-width ch))
|
|
98 idx (+ idx (char-bytes ch))))
|
|
99 (args-out-of-range (setq idx len)))
|
|
100 (if (> column width)
|
|
101 (setq column last-column idx last-idx))
|
|
102 (if (and padding (< column width))
|
|
103 (setq tail-padding (make-string (- width column) padding)))
|
|
104 (setq str (substring str from-idx idx))
|
|
105 (if padding
|
|
106 (concat head-padding str tail-padding)
|
|
107 str))))
|
|
108
|
|
109 ;;; For backward compatiblity ...
|
|
110 ;;;###autoload
|
|
111 (defalias 'truncate-string 'truncate-string-to-width)
|
|
112 (make-obsolete 'truncate-string 'truncate-string-to-width)
|
|
113
|
|
114 ;;; Nested alist handler. Nested alist is alist whose elements are
|
|
115 ;;; also nested alist.
|
|
116
|
167
|
117 ;; [Was defsubst]
|
165
|
118 ;;;###autoload
|
167
|
119 (defun nested-alist-p (obj)
|
165
|
120 "Return t if OBJ is a nesetd alist.
|
|
121
|
|
122 Nested alist is a list of the form (ENTRY . BRANCHES), where ENTRY is
|
|
123 any Lisp object, and BRANCHES is a list of cons cells of the form
|
|
124 (KEY-ELEMENT . NESTED-ALIST).
|
|
125
|
|
126 You can use a nested alist to store any Lisp object (ENTRY) for a key
|
|
127 sequence KEYSEQ, where KEYSEQ is a sequence of KEY-ELEMENT. KEYSEQ
|
|
128 can be a string, a vector, or a list."
|
|
129 (and obj (listp obj) (listp (cdr obj))))
|
|
130
|
|
131 ;;;###autoload
|
|
132 (defun set-nested-alist (keyseq entry alist &optional len branches)
|
|
133 "Set ENTRY for KEYSEQ in a nested alist ALIST.
|
|
134 Optional 4th arg LEN non-nil means the firlst LEN elements in KEYSEQ
|
|
135 is considered.
|
|
136 Optional argument BRANCHES if non-nil is branches for a keyseq
|
|
137 longer than KEYSEQ.
|
|
138 See the documentation of `nested-alist-p' for more detail."
|
|
139 (or (nested-alist-p alist)
|
|
140 (error "Invalid arguement %s" alist))
|
|
141 (let ((islist (listp keyseq))
|
|
142 (len (or len (length keyseq)))
|
|
143 (i 0)
|
|
144 key-elt slot)
|
|
145 (while (< i len)
|
|
146 (if (null (nested-alist-p alist))
|
|
147 (error "Keyseq %s is too long for this nested alist" keyseq))
|
|
148 (setq key-elt (if islist (nth i keyseq) (aref keyseq i)))
|
|
149 (setq slot (assoc key-elt (cdr alist)))
|
|
150 (if (null slot)
|
|
151 (progn
|
|
152 (setq slot (cons key-elt (list t)))
|
|
153 (setcdr alist (cons slot (cdr alist)))))
|
|
154 (setq alist (cdr slot))
|
|
155 (setq i (1+ i)))
|
|
156 (setcar alist entry)
|
|
157 (if branches
|
|
158 (if (cdr alist)
|
|
159 (error "Can't set branches for keyseq %s" keyseq)
|
|
160 (setcdr alist branches)))))
|
|
161
|
|
162 ;;;###autoload
|
|
163 (defun lookup-nested-alist (keyseq alist &optional len start nil-for-too-long)
|
|
164 "Look up key sequence KEYSEQ in nested alist ALIST. Return the definition.
|
|
165 Optional 1st argument LEN specifies the length of KEYSEQ.
|
|
166 Optional 2nd argument START specifies index of the starting key.
|
|
167 The returned value is normally a nested alist of which
|
|
168 car part is the entry for KEYSEQ.
|
|
169 If ALIST is not deep enough for KEYSEQ, return number which is
|
|
170 how many key elements at the front of KEYSEQ it takes
|
|
171 to reach a leaf in ALIST.
|
|
172 Optional 3rd argument NIL-FOR-TOO-LONG non-nil means return nil
|
|
173 even if ALIST is not deep enough."
|
|
174 (or (nested-alist-p alist)
|
|
175 (error "invalid arguement %s" alist))
|
|
176 (or len
|
|
177 (setq len (length keyseq)))
|
|
178 (let ((i (or start 0)))
|
|
179 (if (catch 'lookup-nested-alist-tag
|
|
180 (if (listp keyseq)
|
|
181 (while (< i len)
|
|
182 (if (setq alist (cdr (assoc (nth i keyseq) (cdr alist))))
|
|
183 (setq i (1+ i))
|
|
184 (throw 'lookup-nested-alist-tag t))))
|
|
185 (while (< i len)
|
|
186 (if (setq alist (cdr (assoc (aref keyseq i) (cdr alist))))
|
|
187 (setq i (1+ i))
|
|
188 (throw 'lookup-nested-alist-tag t))))
|
|
189 ;; KEYSEQ is too long.
|
|
190 (if nil-for-too-long nil i)
|
|
191 alist)))
|
|
192
|
|
193 ;; Coding system related functions.
|
|
194
|
|
195 ;;;###autoload
|
|
196 (defun set-coding-system-alist (target-type regexp coding-system
|
|
197 &optional operation)
|
|
198 "Update `coding-system-alist' according to the arguments.
|
|
199 TARGET-TYPE specifies a type of the target: `file', `process', or `network'.
|
|
200 TARGET-TYPE tells which slots of coding-system-alist should be affected.
|
|
201 If `file', it affects slots for insert-file-contents and write-region.
|
|
202 If `process', it affects slots for call-process, call-process-region, and
|
|
203 start-process.
|
|
204 If `network', it affects a slot for open-network-process.
|
|
205 REGEXP is a regular expression matching a target of I/O operation.
|
|
206 CODING-SYSTEM is a coding system to perform code conversion
|
|
207 on the I/O operation, or a cons of coding systems for decoding and
|
|
208 encoding respectively, or a function symbol which returns the cons.
|
|
209 Optional arg OPERATION if non-nil specifies directly one of slots above.
|
|
210 The valid value is: insert-file-contents, write-region,
|
|
211 call-process, call-process-region, start-process, or open-network-stream.
|
|
212 If OPERATION is specified, TARGET-TYPE is ignored.
|
|
213 See the documentation of `coding-system-alist' for more detail."
|
|
214 (or (stringp regexp)
|
|
215 (error "Invalid regular expression: %s" regexp))
|
|
216 (or (memq target-type '(file process network))
|
|
217 (error "Invalid target type: %s" target-type))
|
|
218 (if (symbolp coding-system)
|
|
219 (if (not (fboundp coding-system))
|
|
220 (progn
|
|
221 (check-coding-system coding-system)
|
|
222 (setq coding-system (cons coding-system coding-system))))
|
|
223 (check-coding-system (car coding-system))
|
|
224 (check-coding-system (cdr coding-system)))
|
|
225 (let ((op-list (if operation (list operation)
|
|
226 (cond ((eq target-type 'file)
|
|
227 '(insert-file-contents write-region))
|
|
228 ((eq target-type 'process)
|
|
229 '(call-process call-process-region start-process))
|
|
230 (t ; i.e. (eq target-type network)
|
|
231 '(open-network-stream)))))
|
|
232 slot)
|
|
233 (while op-list
|
|
234 (setq slot (assq (car op-list) coding-system-alist))
|
|
235 (if slot
|
|
236 (let ((chain (cdr slot)))
|
|
237 (if (catch 'tag
|
|
238 (while chain
|
|
239 (if (string= regexp (car (car chain)))
|
|
240 (progn
|
|
241 (setcdr (car chain) coding-system)
|
|
242 (throw 'tag nil)))
|
|
243 (setq chain (cdr chain)))
|
|
244 t)
|
|
245 (setcdr slot (cons (cons regexp coding-system) (cdr slot)))))
|
|
246 (setq coding-system-alist
|
|
247 (cons (cons (car op-list) (list (cons regexp coding-system)))
|
|
248 coding-system-alist)))
|
|
249 (setq op-list (cdr op-list)))))
|
|
250
|
|
251
|
|
252 ;;; Composite charcater manipulations.
|
|
253
|
|
254 ;;;###autoload
|
|
255 (defun compose-region (start end &optional buffer)
|
|
256 "Compose characters in the current region into one composite character.
|
|
257 From a Lisp program, pass two arguments, START to END.
|
|
258 The composite character replaces the composed characters.
|
|
259 BUFFER defaults to the current buffer if omitted."
|
|
260 (interactive "r")
|
|
261 (let ((ch (make-composite-char (buffer-substring start end buffer))))
|
|
262 (delete-region start end buffer)
|
|
263 (insert-char ch nil nil buffer)))
|
|
264
|
|
265 ;;;###autoload
|
|
266 (defun decompose-region (start end &optional buffer)
|
|
267 "Decompose any composite characters in the current region.
|
|
268 From a Lisp program, pass two arguments, START to END.
|
|
269 This converts each composite character into one or more characters,
|
|
270 the individual characters out of which the composite character was formed.
|
|
271 Non-composite characters are left as-is. BUFFER defaults to the current
|
|
272 buffer if omitted."
|
|
273 (interactive "r")
|
|
274 (save-excursion
|
|
275 (set-buffer buffer)
|
|
276 (save-restriction
|
|
277 (narrow-to-region start end)
|
|
278 (goto-char (point-min))
|
|
279 (let ((compcharset (get-charset 'composite)))
|
|
280 (while (< (point) (point-max))
|
|
281 (let ((ch (char-after (point))))
|
|
282 (if (eq compcharset (char-charset ch))
|
|
283 (progn
|
|
284 (delete-char 1)
|
|
285 (insert (composite-char-string ch))))))))))
|
|
286
|
|
287 ;;;###autoload
|
|
288 (defconst reference-point-alist
|
|
289 '((tl . 0) (tc . 1) (tr . 2)
|
|
290 (ml . 3) (mc . 4) (mr . 5)
|
|
291 (bl . 6) (bc . 7) (br . 8)
|
|
292 (top-left . 0) (top-center . 1) (top-right . 2)
|
|
293 (mid-left . 3) (mid-center . 4) (mid-right . 5)
|
|
294 (bottom-left . 6) (bottom-center . 7) (bottom-right . 8)
|
|
295 (0 . 0) (1 . 1) (2 . 2)
|
|
296 (3 . 3) (4 . 4) (5 . 5)
|
|
297 (6 . 6) (7 . 7) (8 . 8))
|
|
298 "Alist of reference point symbols vs reference point codes.
|
|
299 Meanings of reference point codes are as follows:
|
|
300
|
|
301 0----1----2 <-- ascent 0:tl or top-left
|
|
302 | | 1:tc or top-center
|
|
303 | | 2:tr or top-right
|
|
304 | | 3:ml or mid-left
|
|
305 | 4 <--+---- center 4:mc or mid-center
|
|
306 | | 5:mr or mid-right
|
|
307 --- 3 5 <-- baseline 6:bl or bottom-left
|
|
308 | | 7:bc or bottom-center
|
|
309 6----7----8 <-- descent 8:br or bottom-right
|
|
310
|
|
311 Reference point symbols are to be used to specify composition rule of
|
|
312 the form \(GLOBAL-REF-POINT . NEW-REF-POINT), where GLOBAL-REF-POINT
|
|
313 is a reference point in the overall glyphs already composed, and
|
|
314 NEW-REF-POINT is a reference point in the new glyph to be added.
|
|
315
|
|
316 For instance, if GLOBAL-REF-POINT is 8 and NEW-REF-POINT is 1, the
|
|
317 overall glyph is updated as follows:
|
|
318
|
|
319 +-------+--+ <--- new ascent
|
|
320 | | |
|
|
321 | global| |
|
|
322 | glyph | |
|
|
323 --- | | | <--- baseline (doesn't change)
|
|
324 +----+--+--+
|
|
325 | | new |
|
|
326 | |glyph|
|
|
327 +----+-----+ <--- new descent
|
|
328 ")
|
|
329
|
|
330 ;; Return a string for char CH to be embedded in multibyte form of
|
|
331 ;; composite character.
|
|
332 (defun compose-chars-component (ch)
|
|
333 (if (< ch 128)
|
|
334 (format "\240%c" (+ ch 128))
|
|
335 (let ((str (char-to-string ch)))
|
|
336 (if (cmpcharp ch)
|
|
337 (if (/= (aref str 1) ?\xFF)
|
|
338 (error "Char %c can't be composed" ch)
|
|
339 (substring str 2))
|
|
340 (aset str 0 (+ (aref str 0) ?\x20))
|
|
341 str))))
|
|
342
|
|
343 ;; Return a string for composition rule RULE to be embedded in
|
|
344 ;; multibyte form of composite character.
|
|
345 (defsubst compose-chars-rule (rule)
|
|
346 (char-to-string (+ ?\xA0
|
|
347 (* (cdr (assq (car rule) reference-point-alist)) 9)
|
|
348 (cdr (assq (cdr rule) reference-point-alist)))))
|
|
349
|
|
350 ;;;###autoload
|
|
351 (defun compose-chars (first-component &rest args)
|
|
352 "Return one char string composed from the arguments.
|
|
353 Each argument is a character (including a composite chararacter)
|
|
354 or a composition rule.
|
|
355 A composition rule has the form \(GLOBAL-REF-POINT . NEW-REF-POINT).
|
|
356 See the documentation of `reference-point-alist' for more detail."
|
|
357 (if (= (length args) 0)
|
|
358 (char-to-string first-component)
|
|
359 (let* ((with-rule (consp (car args)))
|
|
360 (str (if with-rule (concat (vector leading-code-composition ?\xFF))
|
|
361 (char-to-string leading-code-composition))))
|
|
362 (setq str (concat str (compose-chars-component first-component)))
|
|
363 (while args
|
|
364 (if with-rule
|
|
365 (progn
|
|
366 (if (not (consp (car args)))
|
|
367 (error "Invalid composition rule: %s" (car args)))
|
|
368 (setq str (concat str (compose-chars-rule (car args))
|
|
369 (compose-chars-component (car (cdr args))))
|
|
370 args (cdr (cdr args))))
|
|
371 (setq str (concat str (compose-chars-component (car args)))
|
|
372 args (cdr args))))
|
|
373 str)))
|
|
374
|
|
375 ;;;###autoload
|
|
376 (defun decompose-composite-char (char &optional type with-composition-rule)
|
|
377 "Convert composite character CHAR to a string containing components of CHAR.
|
|
378 Optional 1st arg TYPE specifies the type of sequence returned.
|
|
379 It should be `string' (default), `list', or `vector'.
|
|
380 Optional 2nd arg WITH-COMPOSITION-RULE non-nil means the returned
|
|
381 sequence contains embedded composition rules if any. In this case, the
|
|
382 order of elements in the sequence is the same as arguments for
|
|
383 `compose-chars' to create CHAR.
|
|
384 If TYPE is omitted or is `string', composition rules are omitted
|
|
385 even if WITH-COMPOSITION-RULE is t."
|
|
386 (or type
|
|
387 (setq type 'string))
|
|
388 (let* ((len (composite-char-component-count char))
|
|
389 (i (1- len))
|
|
390 l)
|
|
391 (setq with-composition-rule (and with-composition-rule
|
|
392 (not (eq type 'string))
|
|
393 (composite-char-composition-rule-p char)))
|
|
394 (while (> i 0)
|
|
395 (setq l (cons (composite-char-component char i) l))
|
|
396 (if with-composition-rule
|
|
397 (let ((rule (- (composite-char-composition-rule char i) ?\xA0)))
|
|
398 (setq l (cons (cons (/ rule 9) (% rule 9)) l))))
|
|
399 (setq i (1- i)))
|
|
400 (setq l (cons (composite-char-component char 0) l))
|
|
401 (cond ((eq type 'string)
|
|
402 (apply 'concat-chars l))
|
|
403 ((eq type 'list)
|
|
404 l)
|
|
405 (t ; i.e. TYPE is vector
|
|
406 (vconcat l)))))
|
|
407
|
|
408 ;;; mule-util.el ends here
|