Mercurial > hg > xemacs-beta
annotate lisp/mule/mule-coding.el @ 4558:d9fcb5442c95
Automated merge with file:/Sources/xemacs-21.5-checked-out
author | Aidan Kehoe <kehoea@parhasard.net> |
---|---|
date | Wed, 21 May 2008 21:47:42 +0200 |
parents | 6812571bfcb9 04ec3340612e |
children | bd1a68c34d44 |
rev | line source |
---|---|
502 | 1 ;;; mule-coding.el --- Coding-system functions for Mule. -*- coding: iso-2022-7bit; -*- |
333 | 2 |
3 ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. | |
4 ;; Licensed to the Free Software Foundation. | |
5 ;; Copyright (C) 1995 Amdahl Corporation. | |
6 ;; Copyright (C) 1995 Sun Microsystems. | |
7 ;; Copyright (C) 1997 MORIOKA Tomohiko | |
771 | 8 ;; Copyright (C) 2001 Ben Wing. |
333 | 9 |
10 ;; This file is part of XEmacs. | |
11 | |
12 ;; XEmacs is free software; you can redistribute it and/or modify it | |
13 ;; under the terms of the GNU General Public License as published by | |
14 ;; the Free Software Foundation; either version 2, or (at your option) | |
15 ;; any later version. | |
16 | |
17 ;; XEmacs is distributed in the hope that it will be useful, but | |
18 ;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
20 ;; General Public License for more details. | |
21 | |
22 ;; You should have received a copy of the GNU General Public License | |
444 | 23 ;; along with XEmacs; see the file COPYING. If not, write to the |
333 | 24 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
25 ;; Boston, MA 02111-1307, USA. | |
26 | |
27 ;;; Commentary: | |
28 | |
29 ;;; split off of mule.el and mostly moved to coding.el | |
30 | |
4072 | 31 ;; Needed for make-8-bit-coding-system. |
4080 | 32 (eval-when-compile (require 'ccl)) |
4072 | 33 |
333 | 34 ;;; Code: |
35 | |
36 (defun coding-system-force-on-output (coding-system register) | |
37 "Return the 'force-on-output property of CODING-SYSTEM for the specified REGISTER." | |
444 | 38 (check-type register integer) |
333 | 39 (coding-system-property |
40 coding-system | |
41 (case register | |
42 (0 'force-g0-on-output) | |
43 (1 'force-g1-on-output) | |
44 (2 'force-g2-on-output) | |
45 (3 'force-g3-on-output) | |
46 (t (signal 'args-out-of-range (list register 0 3)))))) | |
47 | |
48 (defun coding-system-short (coding-system) | |
49 "Return the 'short property of CODING-SYSTEM." | |
50 (coding-system-property coding-system 'short)) | |
51 | |
52 (defun coding-system-no-ascii-eol (coding-system) | |
53 "Return the 'no-ascii-eol property of CODING-SYSTEM." | |
54 (coding-system-property coding-system 'no-ascii-eol)) | |
55 | |
56 (defun coding-system-no-ascii-cntl (coding-system) | |
57 "Return the 'no-ascii-cntl property of CODING-SYSTEM." | |
58 (coding-system-property coding-system 'no-ascii-cntl)) | |
59 | |
60 (defun coding-system-seven (coding-system) | |
61 "Return the 'seven property of CODING-SYSTEM." | |
62 (coding-system-property coding-system 'seven)) | |
63 | |
64 (defun coding-system-lock-shift (coding-system) | |
65 "Return the 'lock-shift property of CODING-SYSTEM." | |
66 (coding-system-property coding-system 'lock-shift)) | |
67 | |
68 ;;(defun coding-system-use-japanese-jisx0201-roman (coding-system) | |
69 ;; "Return the 'use-japanese-jisx0201-roman property of CODING-SYSTEM." | |
70 ;; (coding-system-property coding-system 'use-japanese-jisx0201-roman)) | |
71 | |
72 ;;(defun coding-system-use-japanese-jisx0208-1978 (coding-system) | |
73 ;; "Return the 'use-japanese-jisx0208-1978 property of CODING-SYSTEM." | |
74 ;; (coding-system-property coding-system 'use-japanese-jisx0208-2978)) | |
75 | |
76 (defun coding-system-no-iso6429 (coding-system) | |
77 "Return the 'no-iso6429 property of CODING-SYSTEM." | |
78 (coding-system-property coding-system 'no-iso6429)) | |
79 | |
80 (defun coding-system-ccl-encode (coding-system) | |
81 "Return the CCL 'encode property of CODING-SYSTEM." | |
82 (coding-system-property coding-system 'encode)) | |
83 | |
84 (defun coding-system-ccl-decode (coding-system) | |
85 "Return the CCL 'decode property of CODING-SYSTEM." | |
86 (coding-system-property coding-system 'decode)) | |
87 | |
771 | 88 (defun coding-system-iso2022-charset (coding-system register) |
89 "Return the charset initially designated to REGISTER in CODING-SYSTEM. | |
90 The allowable range of REGISTER is 0 through 3." | |
91 (if (or (< register 0) (> register 3)) | |
92 (error 'args-out-of-range "coding-system-charset REGISTER" register 0 3)) | |
93 (coding-system-property coding-system (nth register '(charset-g0 | |
94 charset-g1 | |
95 charset-g2 | |
96 charset-g3)))) | |
97 | |
333 | 98 |
99 ;;;; Definitions of predefined coding systems | |
100 | |
101 (make-coding-system | |
102 'ctext 'iso2022 | |
771 | 103 "Compound Text" |
333 | 104 '(charset-g0 ascii |
105 charset-g1 latin-iso8859-1 | |
106 eol-type nil | |
107 mnemonic "CText")) | |
108 | |
109 (make-coding-system | |
110 'iso-2022-8bit-ss2 'iso2022 | |
771 | 111 "ISO-2022 8-bit w/SS2" |
333 | 112 '(charset-g0 ascii |
113 charset-g1 latin-iso8859-1 | |
114 charset-g2 t ;; unspecified but can be used later. | |
115 short t | |
116 mnemonic "ISO8/SS" | |
771 | 117 documentation "ISO 2022 based 8-bit encoding using SS2 for 96-charset" |
333 | 118 )) |
119 | |
120 (make-coding-system | |
121 'iso-2022-7bit-ss2 'iso2022 | |
771 | 122 "ISO-2022 7-bit w/SS2" |
333 | 123 '(charset-g0 ascii |
124 charset-g2 t ;; unspecified but can be used later. | |
125 seven t | |
126 short t | |
127 mnemonic "ISO7/SS" | |
771 | 128 documentation "ISO 2022 based 7-bit encoding using SS2 for 96-charset" |
333 | 129 eol-type nil)) |
130 | |
131 ;; (copy-coding-system 'iso-2022-7bit-ss2 'iso-2022-jp-2) | |
132 (make-coding-system | |
133 'iso-2022-jp-2 'iso2022 | |
771 | 134 "ISO-2022-JP-2" |
333 | 135 '(charset-g0 ascii |
136 charset-g2 t ;; unspecified but can be used later. | |
137 seven t | |
138 short t | |
139 mnemonic "ISO7/SS" | |
140 eol-type nil)) | |
141 | |
142 (make-coding-system | |
143 'iso-2022-7bit 'iso2022 | |
771 | 144 "ISO 2022 7-bit" |
333 | 145 '(charset-g0 ascii |
146 seven t | |
147 short t | |
771 | 148 mnemonic "ISO7" |
149 documentation "ISO-2022-based 7-bit encoding using only G0" | |
150 )) | |
333 | 151 |
152 ;; compatibility for old XEmacsen | |
771 | 153 (define-coding-system-alias 'iso-2022-7 'iso-2022-7bit) |
333 | 154 |
155 (make-coding-system | |
156 'iso-2022-8 'iso2022 | |
771 | 157 "ISO-2022 8-bit" |
333 | 158 '(charset-g0 ascii |
159 charset-g1 latin-iso8859-1 | |
160 short t | |
161 mnemonic "ISO8" | |
771 | 162 documentation "ISO-2022 eight-bit coding system. No single-shift or locking-shift." |
333 | 163 )) |
164 | |
165 (make-coding-system | |
166 'escape-quoted 'iso2022 | |
771 | 167 "Escape-Quoted (for .ELC files)" |
333 | 168 '(charset-g0 ascii |
169 charset-g1 latin-iso8859-1 | |
170 eol-type lf | |
171 escape-quoted t | |
172 mnemonic "ESC/Quot" | |
771 | 173 documentation "ISO-2022 eight-bit coding system with escape quoting; used for .ELC files." |
333 | 174 )) |
175 | |
176 (make-coding-system | |
177 'iso-2022-lock 'iso2022 | |
771 | 178 "ISO-2022 w/locking-shift" |
333 | 179 '(charset-g0 ascii |
180 charset-g1 t ;; unspecified but can be used later. | |
181 seven t | |
182 lock-shift t | |
183 mnemonic "ISO7/Lock" | |
771 | 184 documentation "ISO-2022 coding system using Locking-Shift for 96-charset." |
333 | 185 )) |
4072 | 186 |
333 | 187 |
4072 | 188 ;; This is used by people writing CCL programs, but is called at runtime. |
189 (defun define-translation-hash-table (symbol table) | |
190 "Define SYMBOL as the name of the hash translation TABLE for use in CCL. | |
191 | |
192 Analogous to `define-translation-table', but updates | |
193 `translation-hash-table-vector' and the table is for use in the CCL | |
194 `lookup-integer' and `lookup-character' functions." | |
4145 | 195 (check-argument-type #'symbolp symbol) |
196 (check-argument-type #'hash-table-p table) | |
4072 | 197 (let ((len (length translation-hash-table-vector)) |
198 (id 0) | |
199 done) | |
200 (put symbol 'translation-hash-table table) | |
201 (while (not done) | |
202 (if (>= id len) | |
203 (setq translation-hash-table-vector | |
204 (vconcat translation-hash-table-vector [nil]))) | |
205 (let ((slot (aref translation-hash-table-vector id))) | |
206 (if (or (not slot) | |
207 (eq (car slot) symbol)) | |
208 (progn | |
209 (aset translation-hash-table-vector id (cons symbol table)) | |
210 (setq done t)) | |
211 (setq id (1+ id))))) | |
212 (put symbol 'translation-hash-table-id id) | |
213 id)) | |
214 | |
215 (defvar make-8-bit-private-use-start (decode-char 'ucs #xE000) | |
216 "Start of a 256 code private use area for make-8-bit-coding-system. | |
217 | |
218 This is used to ensure that distinct octets on disk for a given coding | |
219 system map to distinct XEmacs characters, preventing a spurious changes when | |
220 a file is read, not changed, and then written. ") | |
221 | |
222 (defun make-8-bit-generate-helper (decode-table encode-table | |
223 encode-failure-octet) | |
224 "Helper function for `make-8-bit-generate-encode-program', which see. | |
225 | |
4145 | 226 Deals with the case where ASCII and another character set can both be |
227 encoded unambiguously and completely into the coding-system; if this is so, | |
228 returns a list corresponding to such a ccl-program. If not, it returns nil. " | |
4072 | 229 (let ((tentative-encode-program-parts |
230 (eval-when-compile | |
4295 | 231 (let* ((vec-len 128) |
232 (compiled | |
4072 | 233 (append |
234 (ccl-compile | |
235 `(1 | |
236 (loop | |
237 (read-multibyte-character r0 r1) | |
238 (if (r0 == ,(charset-id 'ascii)) | |
239 (write r1) | |
240 ((if (r0 == #xABAB) | |
241 ;; #xBFFE is a sentinel in the compiled | |
242 ;; program. | |
4295 | 243 ((r0 = r1 & #x7F) |
244 (write r0 ,(make-vector vec-len #xBFFE))) | |
4072 | 245 ((mule-to-unicode r0 r1) |
246 (if (r0 == #xFFFD) | |
247 (write #xBEEF) | |
248 ((lookup-integer encode-table-sym r0 r3) | |
249 (if r7 | |
250 (write-multibyte-character r0 r3) | |
251 (write #xBEEF)))))))) | |
252 (repeat)))) nil)) | |
253 (first-part compiled) | |
254 (last-part | |
255 (member-if-not (lambda (entr) (eq #xBFFE entr)) | |
256 (member-if | |
257 (lambda (entr) (eq #xBFFE entr)) | |
258 first-part)))) | |
259 (while compiled | |
4295 | 260 (when (eq #xBFFE (cadr compiled)) |
261 (assert (= vec-len (search '(#xBFFE) (cdr compiled) | |
262 :test #'/=)) nil | |
263 "Strange ccl vector length") | |
264 (setcdr compiled nil)) | |
4072 | 265 (setq compiled (cdr compiled))) |
266 ;; Is the generated code as we expect it to be? | |
267 (assert (and (memq #xABAB first-part) | |
268 (memq #xBEEF14 last-part)) | |
269 nil | |
270 "This code assumes that the constant #xBEEF is #xBEEF14 in \ | |
271 compiled CCL code,\nand that the constant #xABAB is #xABAB. If that is | |
272 not the case, and it appears not to be--that's why you're getting this | |
273 message--it will not work. ") | |
4295 | 274 (list first-part last-part vec-len)))) |
4072 | 275 (charset-lower -1) |
276 (charset-upper -1) | |
277 worth-trying known-charsets encode-program | |
4295 | 278 other-charset-vector ucs args-out-of-range) |
4072 | 279 |
280 (loop for char across decode-table | |
281 do (pushnew (char-charset char) known-charsets)) | |
282 (setq known-charsets (delq 'ascii known-charsets)) | |
283 | |
284 (loop for known-charset in known-charsets | |
285 do | |
286 ;; This is not possible for two dimensional charsets. | |
287 (when (eq 1 (charset-dimension known-charset)) | |
288 (setq args-out-of-range t) | |
289 (if (eq 'control-1 known-charset) | |
290 (setq charset-lower 0 | |
291 charset-upper 31) | |
292 ;; There should be a nicer way to get the limits here. | |
293 (condition-case args-out-of-range | |
294 (make-char known-charset #x100) | |
295 (args-out-of-range | |
296 (setq charset-lower (third args-out-of-range) | |
297 charset-upper (fourth args-out-of-range))))) | |
298 (loop | |
299 for i from charset-lower to charset-upper | |
300 always (and (setq ucs | |
301 (encode-char (make-char known-charset i) 'ucs)) | |
302 (gethash ucs encode-table)) | |
303 finally (setq worth-trying known-charset)) | |
304 | |
305 ;; Only trying this for one charset at a time, the first find. | |
306 (when worth-trying (return)) | |
307 | |
308 ;; Okay, this charset is not worth trying, Try the next. | |
309 (setq charset-lower -1 | |
310 charset-upper -1 | |
311 worth-trying nil))) | |
312 | |
313 (when worth-trying | |
4295 | 314 (setq other-charset-vector |
315 (make-vector (third tentative-encode-program-parts) | |
316 encode-failure-octet)) | |
4072 | 317 (loop for i from charset-lower to charset-upper |
4090 | 318 do (aset other-charset-vector i |
4072 | 319 (gethash (encode-char (make-char worth-trying i) |
320 'ucs) encode-table))) | |
321 (setq encode-program | |
322 (nsublis | |
323 (list (cons #xABAB (charset-id worth-trying))) | |
324 (nconc | |
325 (copy-list (first | |
326 tentative-encode-program-parts)) | |
327 (append other-charset-vector nil) | |
328 (copy-tree (second | |
329 tentative-encode-program-parts)))))) | |
330 encode-program)) | |
331 | |
332 (defun make-8-bit-generate-encode-program (decode-table encode-table | |
333 encode-failure-octet) | |
334 "Generate a CCL program to decode a 8-bit fixed-width charset. | |
335 | |
336 DECODE-TABLE must have 256 non-cons entries, and will be regarded as | |
337 describing a map from the octet corresponding to an offset in the | |
338 table to the that entry in the table. ENCODE-TABLE is a hash table | |
339 map from unicode values to characters in the range [0,255]. | |
340 ENCODE-FAILURE-OCTET describes an integer between 0 and 255 | |
341 \(inclusive) to write in the event that a character cannot be encoded. " | |
342 (check-argument-type #'vectorp decode-table) | |
343 (check-argument-range (length decode-table) #x100 #x100) | |
344 (check-argument-type #'hash-table-p encode-table) | |
345 (check-argument-type #'integerp encode-failure-octet) | |
346 (check-argument-range encode-failure-octet #x00 #xFF) | |
347 (let ((encode-program nil) | |
348 (general-encode-program | |
349 (eval-when-compile | |
350 (let ((prog (append | |
351 (ccl-compile | |
352 `(1 | |
353 (loop | |
354 (read-multibyte-character r0 r1) | |
355 (mule-to-unicode r0 r1) | |
356 (if (r0 == #xFFFD) | |
357 (write #xBEEF) | |
358 ((lookup-integer encode-table-sym r0 r3) | |
359 (if r7 | |
360 (write-multibyte-character r0 r3) | |
361 (write #xBEEF)))) | |
362 (repeat)))) nil))) | |
363 (assert (memq #xBEEF14 prog) | |
364 nil | |
365 "This code assumes that the constant #xBEEF is #xBEEF14 \ | |
366 in compiled CCL code.\nIf that is not the case, and it appears not to | |
367 be--that's why you're getting this message--it will not work. ") | |
368 prog))) | |
369 (encode-program-with-ascii-optimisation | |
370 (eval-when-compile | |
371 (let ((prog (append | |
372 (ccl-compile | |
373 `(1 | |
374 (loop | |
375 (read-multibyte-character r0 r1) | |
376 (if (r0 == ,(charset-id 'ascii)) | |
377 (write r1) | |
378 ((mule-to-unicode r0 r1) | |
379 (if (r0 == #xFFFD) | |
380 (write #xBEEF) | |
381 ((lookup-integer encode-table-sym r0 r3) | |
382 (if r7 | |
383 (write-multibyte-character r0 r3) | |
384 (write #xBEEF)))))) | |
385 (repeat)))) nil))) | |
386 (assert (memq #xBEEF14 prog) | |
387 nil | |
388 "This code assumes that the constant #xBEEF is #xBEEF14 \ | |
389 in compiled CCL code.\nIf that is not the case, and it appears not to | |
390 be--that's why you're getting this message--it will not work. ") | |
391 prog))) | |
392 (ascii-encodes-as-itself nil)) | |
393 | |
394 ;; Is this coding system ASCII-compatible? If so, we can avoid the hash | |
395 ;; table lookup for those characters. | |
396 (loop | |
397 for i from #x00 to #x7f | |
398 always (eq (int-to-char i) (gethash i encode-table)) | |
399 finally (setq ascii-encodes-as-itself t)) | |
400 | |
401 ;; Note that this logic handles EBCDIC badly. For example, CP037, | |
402 ;; MIME name ebcdic-na, has the entire repertoire of ASCII and | |
403 ;; Latin 1, and thus a more optimal ccl encode program would check | |
404 ;; for those character sets and use tables. But for now, we do a | |
405 ;; hash table lookup for every character. | |
406 (if (null ascii-encodes-as-itself) | |
407 ;; General encode program. Pros; general and correct. Cons; | |
408 ;; slow, a hash table lookup + mule-unicode conversion is done | |
409 ;; for every character encoding. | |
410 (setq encode-program general-encode-program) | |
411 (setq encode-program | |
412 ;; Encode program with ascii-ascii mapping (based on a | |
413 ;; character's mule character set), and one other mule | |
414 ;; character set using table-based encoding, other | |
415 ;; character sets using hash table lookups. | |
416 ;; make-8-bit-non-ascii-completely-coveredp only returns | |
417 ;; such a mapping if some non-ASCII charset with | |
418 ;; characters in decode-table is entirely covered by | |
419 ;; encode-table. | |
420 (make-8-bit-generate-helper decode-table encode-table | |
421 encode-failure-octet)) | |
422 (unless encode-program | |
423 ;; If make-8-bit-non-ascii-completely-coveredp returned nil, | |
424 ;; but ASCII still encodes as itself, do one-to-one mapping | |
425 ;; for ASCII, and a hash table lookup for everything else. | |
426 (setq encode-program encode-program-with-ascii-optimisation))) | |
427 | |
428 (setq encode-program | |
429 (nsublis | |
430 (list (cons #xBEEF14 | |
431 (logior (lsh encode-failure-octet 8) | |
432 #x14))) | |
433 (copy-tree encode-program))) | |
434 encode-program)) | |
435 | |
436 (defun make-8-bit-create-decode-encode-tables (unicode-map) | |
437 "Return a list \(DECODE-TABLE ENCODE-TABLE) given UNICODE-MAP. | |
438 UNICODE-MAP should be an alist mapping from integer octet values to | |
439 characters with UCS code points; DECODE-TABLE will be a 256-element | |
440 vector, and ENCODE-TABLE will be a hash table mapping from 256 numbers | |
441 to 256 distinct characters. " | |
442 (check-argument-type #'listp unicode-map) | |
443 (let ((decode-table (make-vector 256 nil)) | |
444 (encode-table (make-hash-table :size 256)) | |
445 (private-use-start (encode-char make-8-bit-private-use-start 'ucs)) | |
446 desired-ucs) | |
447 | |
448 (loop for (external internal) | |
449 in unicode-map | |
450 do | |
451 (aset decode-table external internal) | |
452 (assert (not (eq (encode-char internal 'ucs) -1)) | |
453 nil | |
454 "Looks like you're calling make-8-bit-coding-system in a \ | |
455 dumped file, \nand you're either not providing a literal UNICODE-MAP | |
456 or PROPS. Don't do that; make-8-bit-coding-system relies on sensible | |
457 Unicode mappings being available, which they are at compile time for | |
458 dumped files (but this requires the mentioned literals), but not, for | |
459 most of them, at run time. ") | |
460 | |
461 (puthash (encode-char internal 'ucs) | |
462 ;; This is semantically an integer, but Dave Love's design | |
463 ;; for lookup-integer in CCL means we need to store it as a | |
464 ;; character. | |
465 (int-to-char external) | |
466 encode-table)) | |
467 | |
468 ;; Now, go through the decode table looking at the characters that | |
469 ;; remain nil. If the XEmacs character with that integer is already in | |
470 ;; the encode table, map the on-disk octet to a Unicode private use | |
471 ;; character. Otherwise map the on-disk octet to the XEmacs character | |
472 ;; with that numeric value, to make it clearer what it is. | |
473 (dotimes (i 256) | |
474 (when (null (aref decode-table i)) | |
475 ;; Find a free code point. | |
476 (setq desired-ucs i) | |
477 (while (gethash desired-ucs encode-table) | |
478 ;; In the normal case, the code point chosen will be U+E0XY, where | |
479 ;; XY is the hexadecimal octet on disk. In pathological cases | |
480 ;; it'll be something else. | |
481 (setq desired-ucs (+ private-use-start desired-ucs) | |
482 private-use-start (+ private-use-start 1))) | |
4085 | 483 (puthash desired-ucs (int-to-char i) encode-table) |
484 (setq desired-ucs (if (> desired-ucs #xFF) | |
485 (decode-char 'ucs desired-ucs) | |
486 ;; So we get Latin-1 when run at dump time, | |
487 ;; instead of JIT-allocated characters. | |
488 (int-to-char desired-ucs))) | |
489 (aset decode-table i desired-ucs))) | |
4072 | 490 (values decode-table encode-table))) |
491 | |
492 (defun make-8-bit-generate-decode-program (decode-table) | |
493 "Given DECODE-TABLE, generate a CCL program to decode an 8-bit charset. | |
494 DECODE-TABLE must have 256 non-cons entries, and will be regarded as | |
495 describing a map from the octet corresponding to an offset in the | |
496 table to the that entry in the table. " | |
497 (check-argument-type #'vectorp decode-table) | |
498 (check-argument-range (length decode-table) #x100 #x100) | |
499 (let ((decode-program-parts | |
500 (eval-when-compile | |
501 (let* ((compiled | |
502 (append | |
503 (ccl-compile | |
504 `(3 | |
505 ((read r0) | |
506 (loop | |
507 (write-read-repeat r0 ,(make-vector | |
508 256 'sentinel)))))) nil)) | |
509 (first-part compiled) | |
510 (last-part | |
511 (member-if-not #'symbolp | |
512 (member-if-not #'integerp first-part)))) | |
513 ;; Chop off the sentinel sentinel sentinel [..] part. | |
514 (while compiled | |
515 (if (symbolp (cadr compiled)) | |
516 (setcdr compiled nil)) | |
517 (setq compiled (cdr compiled))) | |
518 (list first-part last-part))))) | |
519 (nconc | |
520 ;; copy-list needed, because the structure of the literal provided | |
521 ;; by our eval-when-compile hangs around. | |
522 (copy-list (first decode-program-parts)) | |
523 (append decode-table nil) | |
524 (second decode-program-parts)))) | |
525 | |
4145 | 526 (defun make-8-bit-choose-category (decode-table) |
527 "Given DECODE-TABLE, return an appropriate coding category. | |
528 DECODE-TABLE is a 256-entry vector describing the mapping from octets on | |
529 disk to XEmacs characters for some fixed-width 8-bit coding system. " | |
530 (check-argument-type #'vectorp decode-table) | |
531 (check-argument-range (length decode-table) #x100 #x100) | |
4549
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
532 (loop |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
533 named category |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
534 for i from #x80 to #xBF |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
535 do (unless (= i (aref decode-table i)) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
536 (return-from category 'no-conversion)) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
537 finally return 'iso-8-1)) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
538 |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
539 (defun 8-bit-fixed-query-coding-region (begin end coding-system |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
540 &optional buffer errorp highlightp) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
541 "The `query-coding-region' implementation for 8-bit-fixed coding systems. |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
542 |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
543 Uses the `8-bit-fixed-query-from-unicode' and `8-bit-fixed-query-skip-chars' |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
544 coding system properties. The former is a hash table mapping from valid |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
545 Unicode code points to on-disk octets in the coding system; the latter a set |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
546 of characters as used by `skip-chars-forward'. Both of these properties are |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
547 generated automatically by `make-8-bit-coding-system'. |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
548 |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
549 See that the documentation of `query-coding-region'; see also |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
550 `make-8-bit-coding-system'. " |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
551 (check-argument-type #'coding-system-p |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
552 (setq coding-system (find-coding-system coding-system))) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
553 (check-argument-type #'integer-or-marker-p begin) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
554 (check-argument-type #'integer-or-marker-p end) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
555 (let ((from-unicode |
4551 | 556 (or (coding-system-get coding-system '8-bit-fixed-query-from-unicode) |
557 (coding-system-get (coding-system-base coding-system) | |
558 '8-bit-fixed-query-from-unicode))) | |
4549
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
559 (skip-chars-arg |
4551 | 560 (or (coding-system-get coding-system '8-bit-fixed-query-skip-chars) |
561 (coding-system-get (coding-system-base coding-system) | |
562 '8-bit-fixed-query-skip-chars))) | |
4549
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
563 (ranges (make-range-table)) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
564 char-after fail-range-start fail-range-end previous-fail extent |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
565 failed) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
566 (check-type from-unicode hash-table) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
567 (check-type skip-chars-arg string) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
568 (save-excursion |
4551 | 569 (when highlightp |
570 (map-extents #'(lambda (extent ignored-arg) | |
571 (when (eq 'query-coding-warning-face | |
572 (extent-face extent)) | |
573 (delete-extent extent))) buffer begin end)) | |
4549
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
574 (goto-char begin buffer) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
575 (skip-chars-forward skip-chars-arg end buffer) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
576 (while (< (point buffer) end) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
577 (message |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
578 "fail-range-start is %S, previous-fail %S, point is %S, end is %S" |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
579 fail-range-start previous-fail (point buffer) end) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
580 (setq char-after (char-after (point buffer) buffer) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
581 fail-range-start (point buffer)) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
582 (message "arguments are %S %S" |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
583 (< (point buffer) end) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
584 (not (gethash (encode-char char-after 'ucs) from-unicode))) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
585 (while (and |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
586 (< (point buffer) end) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
587 (not (gethash (encode-char char-after 'ucs) from-unicode))) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
588 (forward-char 1 buffer) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
589 (setq char-after (char-after (point buffer) buffer) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
590 failed t)) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
591 (if (= fail-range-start (point buffer)) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
592 ;; The character can actually be encoded by the coding |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
593 ;; system; check the characters past it. |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
594 (forward-char 1 buffer) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
595 ;; The character actually failed. |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
596 (message "past the move through, point now %S" (point buffer)) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
597 (when errorp |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
598 (error 'text-conversion-error |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
599 (format "Cannot encode %s using coding system" |
4551 | 600 (buffer-substring fail-range-start (point buffer) |
4549
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
601 buffer)) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
602 (coding-system-name coding-system))) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
603 (put-range-table fail-range-start |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
604 ;; If char-after is non-nil, we're not at |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
605 ;; the end of the buffer. |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
606 (setq fail-range-end (if char-after |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
607 (point buffer) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
608 (point-max buffer))) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
609 t ranges) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
610 (when highlightp |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
611 (message "highlighting") |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
612 (setq extent (make-extent fail-range-start fail-range-end buffer)) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
613 (set-extent-priority extent (+ mouse-highlight-priority 2)) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
614 (set-extent-face extent 'query-coding-warning-face)) |
4551 | 615 (skip-chars-forward skip-chars-arg end buffer))) |
616 (message "about to give the result, ranges %S" ranges) | |
4549
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
617 (if failed |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
618 (values nil ranges) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
619 (values t nil))))) |
4145 | 620 |
4072 | 621 ;;;###autoload |
622 (defun make-8-bit-coding-system (name unicode-map &optional description props) | |
623 "Make and return a fixed-width 8-bit CCL coding system named NAME. | |
624 NAME must be a symbol, and UNICODE-MAP a list. | |
625 | |
626 UNICODE-MAP is a plist describing a map from octets in the coding | |
627 system NAME (as integers) to XEmacs characters. Those XEmacs | |
628 characters will be used explicitly on decoding, but for encoding (most | |
629 relevantly, on writing to disk) XEmacs characters that map to the same | |
630 Unicode code point will be unified. This means that the ISO-8859-? | |
631 characters that map to the same Unicode code point will not be | |
632 distinct when written to disk, which is normally what is intended; it | |
633 also means that East Asian Han characters from different XEmacs | |
634 character sets will not be distinct when written to disk, which is | |
635 less often what is intended. | |
636 | |
637 Any octets not mapped will be decoded into the ISO 8859-1 characters with | |
638 the corresponding numeric value; unless another octet maps to that | |
639 character, in which case the Unicode private use area will be used. This | |
640 avoids spurious changes to files on disk when they contain octets that would | |
641 be otherwise remapped to the canonical values for the corresponding | |
642 characters in the coding system. | |
643 | |
644 DESCRIPTION and PROPS are as in `make-coding-system', which see. This | |
645 function also accepts two additional (optional) properties in PROPS; | |
646 `aliases', giving a list of aliases to be initialized for this | |
647 coding-system, and `encode-failure-octet', an integer between 0 and 256 to | |
648 write in place of XEmacs characters that cannot be encoded, defaulting to | |
649 the code for tilde `~'. " | |
650 (check-argument-type #'symbolp name) | |
651 (check-argument-type #'listp unicode-map) | |
652 (check-argument-type #'stringp | |
653 (or description | |
654 (setq description | |
655 (format "Coding system used for %s." name)))) | |
656 (check-valid-plist props) | |
657 (let ((encode-failure-octet (or (plist-get props 'encode-failure-octet) | |
658 (char-to-int ?~))) | |
659 (aliases (plist-get props 'aliases)) | |
660 (hash-table-sym (gentemp (format "%s-encode-table" name))) | |
661 encode-program decode-program result decode-table encode-table) | |
662 | |
663 ;; Some more sanity checking. | |
664 (check-argument-range encode-failure-octet 0 #xFF) | |
665 (check-argument-type #'listp aliases) | |
666 | |
667 ;; Don't pass on our extra data to make-coding-system. | |
668 (setq props (plist-remprop props 'encode-failure-octet) | |
669 props (plist-remprop props 'aliases)) | |
670 | |
671 (multiple-value-setq | |
672 (decode-table encode-table) | |
673 (make-8-bit-create-decode-encode-tables unicode-map)) | |
674 | |
675 ;; Register the decode-table. | |
676 (define-translation-hash-table hash-table-sym encode-table) | |
677 | |
678 ;; Generate the programs. | |
679 (setq decode-program (make-8-bit-generate-decode-program decode-table) | |
680 encode-program (make-8-bit-generate-encode-program | |
681 decode-table encode-table encode-failure-octet)) | |
682 (unless (vectorp encode-program) | |
683 (setq encode-program | |
684 (apply #'vector | |
685 (nsublis (list (cons 'encode-table-sym hash-table-sym)) | |
686 (copy-tree encode-program))))) | |
687 (unless (vectorp decode-program) | |
688 (setq decode-program | |
689 (apply #'vector decode-program))) | |
690 | |
691 ;; And now generate the actual coding system. | |
692 (setq result | |
693 (make-coding-system | |
694 name 'ccl | |
695 description | |
696 (plist-put (plist-put props 'decode decode-program) | |
697 'encode encode-program))) | |
4295 | 698 (coding-system-put name '8-bit-fixed t) |
4145 | 699 (coding-system-put name 'category |
700 (make-8-bit-choose-category decode-table)) | |
4549
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
701 (coding-system-put name '8-bit-fixed-query-skip-chars |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
702 (apply #'string (append decode-table nil))) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
703 (coding-system-put name '8-bit-fixed-query-from-unicode encode-table) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
704 |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
705 (coding-system-put name 'query-coding-function |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
706 #'8-bit-fixed-query-coding-region) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
707 (coding-system-put (intern (format "%s-unix" name)) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
708 'query-coding-function |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
709 #'8-bit-fixed-query-coding-region) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
710 (coding-system-put (intern (format "%s-dos" name)) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
711 'query-coding-function |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
712 #'8-bit-fixed-query-coding-region) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
713 (coding-system-put (intern (format "%s-mac" name)) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
714 'query-coding-function |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
715 #'8-bit-fixed-query-coding-region) |
4072 | 716 (loop for alias in aliases |
717 do (define-coding-system-alias alias name)) | |
718 result)) | |
719 | |
720 (define-compiler-macro make-8-bit-coding-system (&whole form name unicode-map | |
721 &optional description props) | |
722 ;; We provide the compiler macro (= macro that is expanded only on | |
723 ;; compilation, and that can punt to a runtime version of the | |
724 ;; associate function if necessary) not for reasons of speed, though | |
725 ;; it does speed up things at runtime a little, but because the | |
726 ;; Unicode mappings are available at compile time in the dumped | |
727 ;; files, but they are not available at run time for the vast | |
728 ;; majority of them. | |
729 | |
730 (if (not (and (and (consp name) (eq (car name) 'quote)) | |
731 (and (consp unicode-map) (eq (car unicode-map) 'quote)) | |
732 (and (or (and (consp props) (eq (car props) 'quote)) | |
733 (null props))))) | |
734 ;; The call does not use literals; do it at runtime. | |
735 form | |
736 (setq name (cadr name) | |
737 unicode-map (cadr unicode-map) | |
738 props (if props (cadr props))) | |
739 (let ((encode-failure-octet | |
740 (or (plist-get props 'encode-failure-octet) (char-to-int ?~))) | |
741 (aliases (plist-get props 'aliases)) | |
742 encode-program decode-program | |
4103 | 743 decode-table encode-table) |
4072 | 744 |
745 ;; Some sanity checking. | |
746 (check-argument-range encode-failure-octet 0 #xFF) | |
747 (check-argument-type #'listp aliases) | |
748 | |
749 ;; Don't pass on our extra data to make-coding-system. | |
750 (setq props (plist-remprop props 'encode-failure-octet) | |
751 props (plist-remprop props 'aliases)) | |
752 | |
753 ;; Work out encode-table and decode-table. | |
754 (multiple-value-setq | |
755 (decode-table encode-table) | |
756 (make-8-bit-create-decode-encode-tables unicode-map)) | |
757 | |
758 ;; Generate the decode and encode programs. | |
759 (setq decode-program (make-8-bit-generate-decode-program decode-table) | |
760 encode-program (make-8-bit-generate-encode-program | |
761 decode-table encode-table encode-failure-octet)) | |
762 | |
763 ;; And return the generated code. | |
764 `(let ((encode-table-sym (gentemp (format "%s-encode-table" ',name))) | |
4103 | 765 ;; The case-fold-search bind shouldn't be necessary. If I take |
766 ;; it, out, though, I get: | |
767 ;; | |
768 ;; (invalid-read-syntax "Multiply defined symbol label" 1) | |
769 ;; | |
770 ;; when the file is byte compiled. | |
4549
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
771 (case-fold-search t) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
772 (encode-table ,encode-table)) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
773 (define-translation-hash-table encode-table-sym encode-table) |
4103 | 774 (make-coding-system |
775 ',name 'ccl ,description | |
776 (plist-put (plist-put ',props 'decode | |
777 ,(apply #'vector decode-program)) | |
778 'encode | |
779 (apply #'vector | |
780 (nsublis | |
781 (list (cons | |
782 'encode-table-sym | |
783 (symbol-value 'encode-table-sym))) | |
784 ',encode-program)))) | |
4295 | 785 (coding-system-put ',name '8-bit-fixed t) |
4549
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
786 (coding-system-put ',name 'category |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
787 ',(make-8-bit-choose-category decode-table)) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
788 (coding-system-put ',name '8-bit-fixed-query-skip-chars |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
789 ',(apply #'string (append decode-table nil))) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
790 (coding-system-put ',name '8-bit-fixed-query-from-unicode encode-table) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
791 (coding-system-put ',name 'query-coding-function |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
792 #'8-bit-fixed-query-coding-region) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
793 (coding-system-put ',(intern (format "%s-unix" name)) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
794 'query-coding-function |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
795 #'8-bit-fixed-query-coding-region) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
796 (coding-system-put ',(intern (format "%s-dos" name)) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
797 'query-coding-function |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
798 #'8-bit-fixed-query-coding-region) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
799 (coding-system-put ',(intern (format "%s-mac" name)) |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
800 'query-coding-function |
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
801 #'8-bit-fixed-query-coding-region) |
4072 | 802 ,(macroexpand `(loop for alias in ',aliases |
803 do (define-coding-system-alias alias | |
804 ',name))) | |
4103 | 805 (find-coding-system ',name))))) |
4299 | 806 |
807 ;; Ideally this would be in latin.el, but code-init.el uses it. | |
808 (make-8-bit-coding-system | |
809 'iso-8859-1 | |
810 '() ;; No differences from Latin 1. | |
811 "ISO-8859-1 (Latin-1)" | |
812 '(mnemonic "Latin 1" | |
813 documentation "The most used encoding of Western Europe and the Americas." | |
814 aliases (iso-latin-1 latin-1))) |