Mercurial > hg > xemacs-beta
annotate lisp/mule/mule-coding.el @ 4569:80e0588fb42f
Merge.
| author | Aidan Kehoe <kehoea@parhasard.net> |
|---|---|
| date | Sun, 28 Dec 2008 14:55:02 +0000 |
| parents | 1d74a1d115ee |
| children | e0a8715fdb1f |
| rev | line source |
|---|---|
| 502 | 1 ;;; mule-coding.el --- Coding-system functions for Mule. -*- coding: iso-2022-7bit; -*- |
| 333 | 2 |
| 3 ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. | |
| 4 ;; Licensed to the Free Software Foundation. | |
| 5 ;; Copyright (C) 1995 Amdahl Corporation. | |
| 6 ;; Copyright (C) 1995 Sun Microsystems. | |
| 7 ;; Copyright (C) 1997 MORIOKA Tomohiko | |
| 771 | 8 ;; Copyright (C) 2001 Ben Wing. |
| 333 | 9 |
| 10 ;; This file is part of XEmacs. | |
| 11 | |
| 12 ;; XEmacs is free software; you can redistribute it and/or modify it | |
| 13 ;; under the terms of the GNU General Public License as published by | |
| 14 ;; the Free Software Foundation; either version 2, or (at your option) | |
| 15 ;; any later version. | |
| 16 | |
| 17 ;; XEmacs is distributed in the hope that it will be useful, but | |
| 18 ;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 20 ;; General Public License for more details. | |
| 21 | |
| 22 ;; You should have received a copy of the GNU General Public License | |
| 444 | 23 ;; along with XEmacs; see the file COPYING. If not, write to the |
| 333 | 24 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
| 25 ;; Boston, MA 02111-1307, USA. | |
| 26 | |
| 27 ;;; Commentary: | |
| 28 | |
| 29 ;;; split off of mule.el and mostly moved to coding.el | |
| 30 | |
| 4072 | 31 ;; Needed for make-8-bit-coding-system. |
| 4080 | 32 (eval-when-compile (require 'ccl)) |
| 4072 | 33 |
| 333 | 34 ;;; Code: |
| 35 | |
| 36 (defun coding-system-force-on-output (coding-system register) | |
| 37 "Return the 'force-on-output property of CODING-SYSTEM for the specified REGISTER." | |
| 444 | 38 (check-type register integer) |
| 333 | 39 (coding-system-property |
| 40 coding-system | |
| 41 (case register | |
| 42 (0 'force-g0-on-output) | |
| 43 (1 'force-g1-on-output) | |
| 44 (2 'force-g2-on-output) | |
| 45 (3 'force-g3-on-output) | |
| 46 (t (signal 'args-out-of-range (list register 0 3)))))) | |
| 47 | |
| 48 (defun coding-system-short (coding-system) | |
| 49 "Return the 'short property of CODING-SYSTEM." | |
| 50 (coding-system-property coding-system 'short)) | |
| 51 | |
| 52 (defun coding-system-no-ascii-eol (coding-system) | |
| 53 "Return the 'no-ascii-eol property of CODING-SYSTEM." | |
| 54 (coding-system-property coding-system 'no-ascii-eol)) | |
| 55 | |
| 56 (defun coding-system-no-ascii-cntl (coding-system) | |
| 57 "Return the 'no-ascii-cntl property of CODING-SYSTEM." | |
| 58 (coding-system-property coding-system 'no-ascii-cntl)) | |
| 59 | |
| 60 (defun coding-system-seven (coding-system) | |
| 61 "Return the 'seven property of CODING-SYSTEM." | |
| 62 (coding-system-property coding-system 'seven)) | |
| 63 | |
| 64 (defun coding-system-lock-shift (coding-system) | |
| 65 "Return the 'lock-shift property of CODING-SYSTEM." | |
| 66 (coding-system-property coding-system 'lock-shift)) | |
| 67 | |
| 68 ;;(defun coding-system-use-japanese-jisx0201-roman (coding-system) | |
| 69 ;; "Return the 'use-japanese-jisx0201-roman property of CODING-SYSTEM." | |
| 70 ;; (coding-system-property coding-system 'use-japanese-jisx0201-roman)) | |
| 71 | |
| 72 ;;(defun coding-system-use-japanese-jisx0208-1978 (coding-system) | |
| 73 ;; "Return the 'use-japanese-jisx0208-1978 property of CODING-SYSTEM." | |
| 74 ;; (coding-system-property coding-system 'use-japanese-jisx0208-2978)) | |
| 75 | |
| 76 (defun coding-system-no-iso6429 (coding-system) | |
| 77 "Return the 'no-iso6429 property of CODING-SYSTEM." | |
| 78 (coding-system-property coding-system 'no-iso6429)) | |
| 79 | |
| 80 (defun coding-system-ccl-encode (coding-system) | |
| 81 "Return the CCL 'encode property of CODING-SYSTEM." | |
| 82 (coding-system-property coding-system 'encode)) | |
| 83 | |
| 84 (defun coding-system-ccl-decode (coding-system) | |
| 85 "Return the CCL 'decode property of CODING-SYSTEM." | |
| 86 (coding-system-property coding-system 'decode)) | |
| 87 | |
| 771 | 88 (defun coding-system-iso2022-charset (coding-system register) |
| 89 "Return the charset initially designated to REGISTER in CODING-SYSTEM. | |
| 90 The allowable range of REGISTER is 0 through 3." | |
| 91 (if (or (< register 0) (> register 3)) | |
| 92 (error 'args-out-of-range "coding-system-charset REGISTER" register 0 3)) | |
| 93 (coding-system-property coding-system (nth register '(charset-g0 | |
| 94 charset-g1 | |
| 95 charset-g2 | |
| 96 charset-g3)))) | |
| 97 | |
| 333 | 98 |
| 99 ;;;; Definitions of predefined coding systems | |
| 100 | |
| 101 (make-coding-system | |
| 102 'ctext 'iso2022 | |
| 771 | 103 "Compound Text" |
| 333 | 104 '(charset-g0 ascii |
| 105 charset-g1 latin-iso8859-1 | |
| 106 eol-type nil | |
|
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4567
diff
changeset
|
107 safe-charsets t ;; Reasonable |
| 333 | 108 mnemonic "CText")) |
| 109 | |
| 110 (make-coding-system | |
| 111 'iso-2022-8bit-ss2 'iso2022 | |
| 771 | 112 "ISO-2022 8-bit w/SS2" |
| 333 | 113 '(charset-g0 ascii |
| 114 charset-g1 latin-iso8859-1 | |
| 115 charset-g2 t ;; unspecified but can be used later. | |
| 116 short t | |
|
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4567
diff
changeset
|
117 safe-charsets (ascii katakana-jisx0201 japanese-jisx0208-1978 |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4567
diff
changeset
|
118 japanese-jisx0208 japanese-jisx0212 japanese-jisx0213-1 |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4567
diff
changeset
|
119 japanese-jisx0213-2) |
| 333 | 120 mnemonic "ISO8/SS" |
| 771 | 121 documentation "ISO 2022 based 8-bit encoding using SS2 for 96-charset" |
| 333 | 122 )) |
| 123 | |
| 124 (make-coding-system | |
| 125 'iso-2022-7bit-ss2 'iso2022 | |
| 771 | 126 "ISO-2022 7-bit w/SS2" |
| 333 | 127 '(charset-g0 ascii |
| 128 charset-g2 t ;; unspecified but can be used later. | |
| 129 seven t | |
| 130 short t | |
|
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4567
diff
changeset
|
131 safe-charsets t |
| 333 | 132 mnemonic "ISO7/SS" |
| 771 | 133 documentation "ISO 2022 based 7-bit encoding using SS2 for 96-charset" |
| 333 | 134 eol-type nil)) |
| 135 | |
| 136 ;; (copy-coding-system 'iso-2022-7bit-ss2 'iso-2022-jp-2) | |
| 137 (make-coding-system | |
| 138 'iso-2022-jp-2 'iso2022 | |
| 771 | 139 "ISO-2022-JP-2" |
| 333 | 140 '(charset-g0 ascii |
| 141 charset-g2 t ;; unspecified but can be used later. | |
| 142 seven t | |
| 143 short t | |
|
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4567
diff
changeset
|
144 safe-charsets t |
| 333 | 145 mnemonic "ISO7/SS" |
| 146 eol-type nil)) | |
| 147 | |
| 148 (make-coding-system | |
| 149 'iso-2022-7bit 'iso2022 | |
| 771 | 150 "ISO 2022 7-bit" |
| 333 | 151 '(charset-g0 ascii |
| 152 seven t | |
| 153 short t | |
|
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4567
diff
changeset
|
154 safe-charsets t |
| 771 | 155 mnemonic "ISO7" |
| 156 documentation "ISO-2022-based 7-bit encoding using only G0" | |
| 157 )) | |
| 333 | 158 |
| 159 ;; compatibility for old XEmacsen | |
| 771 | 160 (define-coding-system-alias 'iso-2022-7 'iso-2022-7bit) |
| 333 | 161 |
| 162 (make-coding-system | |
| 163 'iso-2022-8 'iso2022 | |
| 771 | 164 "ISO-2022 8-bit" |
| 333 | 165 '(charset-g0 ascii |
| 166 charset-g1 latin-iso8859-1 | |
| 167 short t | |
|
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4567
diff
changeset
|
168 safe-charsets t |
| 333 | 169 mnemonic "ISO8" |
| 771 | 170 documentation "ISO-2022 eight-bit coding system. No single-shift or locking-shift." |
| 333 | 171 )) |
| 172 | |
| 173 (make-coding-system | |
| 174 'escape-quoted 'iso2022 | |
| 771 | 175 "Escape-Quoted (for .ELC files)" |
| 333 | 176 '(charset-g0 ascii |
| 177 charset-g1 latin-iso8859-1 | |
| 178 eol-type lf | |
| 179 escape-quoted t | |
|
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4567
diff
changeset
|
180 safe-charsets t |
| 333 | 181 mnemonic "ESC/Quot" |
| 771 | 182 documentation "ISO-2022 eight-bit coding system with escape quoting; used for .ELC files." |
| 333 | 183 )) |
| 184 | |
| 185 (make-coding-system | |
| 186 'iso-2022-lock 'iso2022 | |
| 771 | 187 "ISO-2022 w/locking-shift" |
| 333 | 188 '(charset-g0 ascii |
| 189 charset-g1 t ;; unspecified but can be used later. | |
| 190 seven t | |
| 191 lock-shift t | |
|
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4567
diff
changeset
|
192 safe-charsets t |
| 333 | 193 mnemonic "ISO7/Lock" |
| 771 | 194 documentation "ISO-2022 coding system using Locking-Shift for 96-charset." |
| 333 | 195 )) |
| 4072 | 196 |
| 333 | 197 |
| 4072 | 198 ;; This is used by people writing CCL programs, but is called at runtime. |
| 199 (defun define-translation-hash-table (symbol table) | |
| 200 "Define SYMBOL as the name of the hash translation TABLE for use in CCL. | |
| 201 | |
| 202 Analogous to `define-translation-table', but updates | |
| 203 `translation-hash-table-vector' and the table is for use in the CCL | |
| 204 `lookup-integer' and `lookup-character' functions." | |
| 4145 | 205 (check-argument-type #'symbolp symbol) |
| 206 (check-argument-type #'hash-table-p table) | |
| 4072 | 207 (let ((len (length translation-hash-table-vector)) |
| 208 (id 0) | |
| 209 done) | |
| 210 (put symbol 'translation-hash-table table) | |
| 211 (while (not done) | |
| 212 (if (>= id len) | |
| 213 (setq translation-hash-table-vector | |
| 214 (vconcat translation-hash-table-vector [nil]))) | |
| 215 (let ((slot (aref translation-hash-table-vector id))) | |
| 216 (if (or (not slot) | |
| 217 (eq (car slot) symbol)) | |
| 218 (progn | |
| 219 (aset translation-hash-table-vector id (cons symbol table)) | |
| 220 (setq done t)) | |
| 221 (setq id (1+ id))))) | |
| 222 (put symbol 'translation-hash-table-id id) | |
| 223 id)) | |
| 224 | |
| 225 (defvar make-8-bit-private-use-start (decode-char 'ucs #xE000) | |
| 226 "Start of a 256 code private use area for make-8-bit-coding-system. | |
| 227 | |
| 228 This is used to ensure that distinct octets on disk for a given coding | |
| 229 system map to distinct XEmacs characters, preventing a spurious changes when | |
| 230 a file is read, not changed, and then written. ") | |
| 231 | |
| 232 (defun make-8-bit-generate-helper (decode-table encode-table | |
| 233 encode-failure-octet) | |
| 234 "Helper function for `make-8-bit-generate-encode-program', which see. | |
| 235 | |
| 4145 | 236 Deals with the case where ASCII and another character set can both be |
| 237 encoded unambiguously and completely into the coding-system; if this is so, | |
| 238 returns a list corresponding to such a ccl-program. If not, it returns nil. " | |
| 4072 | 239 (let ((tentative-encode-program-parts |
| 240 (eval-when-compile | |
| 4295 | 241 (let* ((vec-len 128) |
| 242 (compiled | |
| 4072 | 243 (append |
| 244 (ccl-compile | |
| 245 `(1 | |
| 246 (loop | |
| 247 (read-multibyte-character r0 r1) | |
| 248 (if (r0 == ,(charset-id 'ascii)) | |
| 249 (write r1) | |
| 250 ((if (r0 == #xABAB) | |
| 251 ;; #xBFFE is a sentinel in the compiled | |
| 252 ;; program. | |
| 4295 | 253 ((r0 = r1 & #x7F) |
| 254 (write r0 ,(make-vector vec-len #xBFFE))) | |
| 4072 | 255 ((mule-to-unicode r0 r1) |
| 256 (if (r0 == #xFFFD) | |
| 257 (write #xBEEF) | |
| 258 ((lookup-integer encode-table-sym r0 r3) | |
| 259 (if r7 | |
| 260 (write-multibyte-character r0 r3) | |
| 261 (write #xBEEF)))))))) | |
| 262 (repeat)))) nil)) | |
| 263 (first-part compiled) | |
| 264 (last-part | |
| 265 (member-if-not (lambda (entr) (eq #xBFFE entr)) | |
| 266 (member-if | |
| 267 (lambda (entr) (eq #xBFFE entr)) | |
| 268 first-part)))) | |
| 269 (while compiled | |
| 4295 | 270 (when (eq #xBFFE (cadr compiled)) |
| 271 (assert (= vec-len (search '(#xBFFE) (cdr compiled) | |
| 272 :test #'/=)) nil | |
| 273 "Strange ccl vector length") | |
| 274 (setcdr compiled nil)) | |
| 4072 | 275 (setq compiled (cdr compiled))) |
| 276 ;; Is the generated code as we expect it to be? | |
| 277 (assert (and (memq #xABAB first-part) | |
| 278 (memq #xBEEF14 last-part)) | |
| 279 nil | |
| 280 "This code assumes that the constant #xBEEF is #xBEEF14 in \ | |
| 281 compiled CCL code,\nand that the constant #xABAB is #xABAB. If that is | |
| 282 not the case, and it appears not to be--that's why you're getting this | |
| 283 message--it will not work. ") | |
| 4295 | 284 (list first-part last-part vec-len)))) |
| 4072 | 285 (charset-lower -1) |
| 286 (charset-upper -1) | |
| 287 worth-trying known-charsets encode-program | |
| 4295 | 288 other-charset-vector ucs args-out-of-range) |
| 4072 | 289 |
| 290 (loop for char across decode-table | |
| 291 do (pushnew (char-charset char) known-charsets)) | |
| 292 (setq known-charsets (delq 'ascii known-charsets)) | |
| 293 | |
| 294 (loop for known-charset in known-charsets | |
| 295 do | |
| 296 ;; This is not possible for two dimensional charsets. | |
| 297 (when (eq 1 (charset-dimension known-charset)) | |
| 298 (setq args-out-of-range t) | |
| 299 (if (eq 'control-1 known-charset) | |
| 300 (setq charset-lower 0 | |
| 301 charset-upper 31) | |
| 302 ;; There should be a nicer way to get the limits here. | |
| 303 (condition-case args-out-of-range | |
| 304 (make-char known-charset #x100) | |
| 305 (args-out-of-range | |
| 306 (setq charset-lower (third args-out-of-range) | |
| 307 charset-upper (fourth args-out-of-range))))) | |
| 308 (loop | |
| 309 for i from charset-lower to charset-upper | |
| 310 always (and (setq ucs | |
| 311 (encode-char (make-char known-charset i) 'ucs)) | |
| 312 (gethash ucs encode-table)) | |
| 313 finally (setq worth-trying known-charset)) | |
| 314 | |
| 315 ;; Only trying this for one charset at a time, the first find. | |
| 316 (when worth-trying (return)) | |
| 317 | |
| 318 ;; Okay, this charset is not worth trying, Try the next. | |
| 319 (setq charset-lower -1 | |
| 320 charset-upper -1 | |
| 321 worth-trying nil))) | |
| 322 | |
| 323 (when worth-trying | |
| 4295 | 324 (setq other-charset-vector |
| 325 (make-vector (third tentative-encode-program-parts) | |
| 326 encode-failure-octet)) | |
| 4072 | 327 (loop for i from charset-lower to charset-upper |
| 4090 | 328 do (aset other-charset-vector i |
| 4072 | 329 (gethash (encode-char (make-char worth-trying i) |
| 330 'ucs) encode-table))) | |
| 331 (setq encode-program | |
| 332 (nsublis | |
| 333 (list (cons #xABAB (charset-id worth-trying))) | |
| 334 (nconc | |
| 335 (copy-list (first | |
| 336 tentative-encode-program-parts)) | |
| 337 (append other-charset-vector nil) | |
| 338 (copy-tree (second | |
| 339 tentative-encode-program-parts)))))) | |
| 340 encode-program)) | |
| 341 | |
| 342 (defun make-8-bit-generate-encode-program (decode-table encode-table | |
| 343 encode-failure-octet) | |
| 344 "Generate a CCL program to decode a 8-bit fixed-width charset. | |
| 345 | |
| 346 DECODE-TABLE must have 256 non-cons entries, and will be regarded as | |
| 347 describing a map from the octet corresponding to an offset in the | |
| 348 table to the that entry in the table. ENCODE-TABLE is a hash table | |
| 349 map from unicode values to characters in the range [0,255]. | |
| 350 ENCODE-FAILURE-OCTET describes an integer between 0 and 255 | |
| 351 \(inclusive) to write in the event that a character cannot be encoded. " | |
| 352 (check-argument-type #'vectorp decode-table) | |
| 353 (check-argument-range (length decode-table) #x100 #x100) | |
| 354 (check-argument-type #'hash-table-p encode-table) | |
| 355 (check-argument-type #'integerp encode-failure-octet) | |
| 356 (check-argument-range encode-failure-octet #x00 #xFF) | |
| 357 (let ((encode-program nil) | |
| 358 (general-encode-program | |
| 359 (eval-when-compile | |
| 360 (let ((prog (append | |
| 361 (ccl-compile | |
| 362 `(1 | |
| 363 (loop | |
| 364 (read-multibyte-character r0 r1) | |
| 365 (mule-to-unicode r0 r1) | |
| 366 (if (r0 == #xFFFD) | |
| 367 (write #xBEEF) | |
| 368 ((lookup-integer encode-table-sym r0 r3) | |
| 369 (if r7 | |
| 370 (write-multibyte-character r0 r3) | |
| 371 (write #xBEEF)))) | |
| 372 (repeat)))) nil))) | |
| 373 (assert (memq #xBEEF14 prog) | |
| 374 nil | |
| 375 "This code assumes that the constant #xBEEF is #xBEEF14 \ | |
| 376 in compiled CCL code.\nIf that is not the case, and it appears not to | |
| 377 be--that's why you're getting this message--it will not work. ") | |
| 378 prog))) | |
| 379 (encode-program-with-ascii-optimisation | |
| 380 (eval-when-compile | |
| 381 (let ((prog (append | |
| 382 (ccl-compile | |
| 383 `(1 | |
| 384 (loop | |
| 385 (read-multibyte-character r0 r1) | |
| 386 (if (r0 == ,(charset-id 'ascii)) | |
| 387 (write r1) | |
| 388 ((mule-to-unicode r0 r1) | |
| 389 (if (r0 == #xFFFD) | |
| 390 (write #xBEEF) | |
| 391 ((lookup-integer encode-table-sym r0 r3) | |
| 392 (if r7 | |
| 393 (write-multibyte-character r0 r3) | |
| 394 (write #xBEEF)))))) | |
| 395 (repeat)))) nil))) | |
| 396 (assert (memq #xBEEF14 prog) | |
| 397 nil | |
| 398 "This code assumes that the constant #xBEEF is #xBEEF14 \ | |
| 399 in compiled CCL code.\nIf that is not the case, and it appears not to | |
| 400 be--that's why you're getting this message--it will not work. ") | |
| 401 prog))) | |
| 402 (ascii-encodes-as-itself nil)) | |
| 403 | |
| 404 ;; Is this coding system ASCII-compatible? If so, we can avoid the hash | |
| 405 ;; table lookup for those characters. | |
| 406 (loop | |
| 407 for i from #x00 to #x7f | |
| 408 always (eq (int-to-char i) (gethash i encode-table)) | |
| 409 finally (setq ascii-encodes-as-itself t)) | |
| 410 | |
| 411 ;; Note that this logic handles EBCDIC badly. For example, CP037, | |
| 412 ;; MIME name ebcdic-na, has the entire repertoire of ASCII and | |
| 413 ;; Latin 1, and thus a more optimal ccl encode program would check | |
| 414 ;; for those character sets and use tables. But for now, we do a | |
| 415 ;; hash table lookup for every character. | |
| 416 (if (null ascii-encodes-as-itself) | |
| 417 ;; General encode program. Pros; general and correct. Cons; | |
| 418 ;; slow, a hash table lookup + mule-unicode conversion is done | |
| 419 ;; for every character encoding. | |
| 420 (setq encode-program general-encode-program) | |
| 421 (setq encode-program | |
| 422 ;; Encode program with ascii-ascii mapping (based on a | |
| 423 ;; character's mule character set), and one other mule | |
| 424 ;; character set using table-based encoding, other | |
| 425 ;; character sets using hash table lookups. | |
| 426 ;; make-8-bit-non-ascii-completely-coveredp only returns | |
| 427 ;; such a mapping if some non-ASCII charset with | |
| 428 ;; characters in decode-table is entirely covered by | |
| 429 ;; encode-table. | |
| 430 (make-8-bit-generate-helper decode-table encode-table | |
| 431 encode-failure-octet)) | |
| 432 (unless encode-program | |
| 433 ;; If make-8-bit-non-ascii-completely-coveredp returned nil, | |
| 434 ;; but ASCII still encodes as itself, do one-to-one mapping | |
| 435 ;; for ASCII, and a hash table lookup for everything else. | |
| 436 (setq encode-program encode-program-with-ascii-optimisation))) | |
| 437 | |
| 438 (setq encode-program | |
| 439 (nsublis | |
| 440 (list (cons #xBEEF14 | |
| 441 (logior (lsh encode-failure-octet 8) | |
| 442 #x14))) | |
| 443 (copy-tree encode-program))) | |
| 444 encode-program)) | |
| 445 | |
| 446 (defun make-8-bit-create-decode-encode-tables (unicode-map) | |
| 447 "Return a list \(DECODE-TABLE ENCODE-TABLE) given UNICODE-MAP. | |
| 448 UNICODE-MAP should be an alist mapping from integer octet values to | |
| 449 characters with UCS code points; DECODE-TABLE will be a 256-element | |
| 450 vector, and ENCODE-TABLE will be a hash table mapping from 256 numbers | |
| 451 to 256 distinct characters. " | |
| 452 (check-argument-type #'listp unicode-map) | |
| 453 (let ((decode-table (make-vector 256 nil)) | |
| 454 (encode-table (make-hash-table :size 256)) | |
| 455 (private-use-start (encode-char make-8-bit-private-use-start 'ucs)) | |
| 456 desired-ucs) | |
| 457 | |
| 458 (loop for (external internal) | |
| 459 in unicode-map | |
| 460 do | |
| 461 (aset decode-table external internal) | |
| 462 (assert (not (eq (encode-char internal 'ucs) -1)) | |
| 463 nil | |
| 464 "Looks like you're calling make-8-bit-coding-system in a \ | |
| 465 dumped file, \nand you're either not providing a literal UNICODE-MAP | |
| 466 or PROPS. Don't do that; make-8-bit-coding-system relies on sensible | |
| 467 Unicode mappings being available, which they are at compile time for | |
| 468 dumped files (but this requires the mentioned literals), but not, for | |
| 469 most of them, at run time. ") | |
| 470 | |
| 471 (puthash (encode-char internal 'ucs) | |
| 472 ;; This is semantically an integer, but Dave Love's design | |
| 473 ;; for lookup-integer in CCL means we need to store it as a | |
| 474 ;; character. | |
| 475 (int-to-char external) | |
| 476 encode-table)) | |
| 477 | |
| 478 ;; Now, go through the decode table looking at the characters that | |
| 479 ;; remain nil. If the XEmacs character with that integer is already in | |
| 480 ;; the encode table, map the on-disk octet to a Unicode private use | |
| 481 ;; character. Otherwise map the on-disk octet to the XEmacs character | |
| 482 ;; with that numeric value, to make it clearer what it is. | |
| 483 (dotimes (i 256) | |
| 484 (when (null (aref decode-table i)) | |
| 485 ;; Find a free code point. | |
| 486 (setq desired-ucs i) | |
| 487 (while (gethash desired-ucs encode-table) | |
| 488 ;; In the normal case, the code point chosen will be U+E0XY, where | |
| 489 ;; XY is the hexadecimal octet on disk. In pathological cases | |
| 490 ;; it'll be something else. | |
| 491 (setq desired-ucs (+ private-use-start desired-ucs) | |
| 492 private-use-start (+ private-use-start 1))) | |
| 4085 | 493 (puthash desired-ucs (int-to-char i) encode-table) |
| 494 (setq desired-ucs (if (> desired-ucs #xFF) | |
| 495 (decode-char 'ucs desired-ucs) | |
| 496 ;; So we get Latin-1 when run at dump time, | |
| 497 ;; instead of JIT-allocated characters. | |
| 498 (int-to-char desired-ucs))) | |
| 499 (aset decode-table i desired-ucs))) | |
| 4072 | 500 (values decode-table encode-table))) |
| 501 | |
| 502 (defun make-8-bit-generate-decode-program (decode-table) | |
| 503 "Given DECODE-TABLE, generate a CCL program to decode an 8-bit charset. | |
| 504 DECODE-TABLE must have 256 non-cons entries, and will be regarded as | |
| 505 describing a map from the octet corresponding to an offset in the | |
| 506 table to the that entry in the table. " | |
| 507 (check-argument-type #'vectorp decode-table) | |
| 508 (check-argument-range (length decode-table) #x100 #x100) | |
| 509 (let ((decode-program-parts | |
| 510 (eval-when-compile | |
| 511 (let* ((compiled | |
| 512 (append | |
| 513 (ccl-compile | |
| 514 `(3 | |
| 515 ((read r0) | |
| 516 (loop | |
| 517 (write-read-repeat r0 ,(make-vector | |
| 518 256 'sentinel)))))) nil)) | |
| 519 (first-part compiled) | |
| 520 (last-part | |
| 521 (member-if-not #'symbolp | |
| 522 (member-if-not #'integerp first-part)))) | |
| 523 ;; Chop off the sentinel sentinel sentinel [..] part. | |
| 524 (while compiled | |
| 525 (if (symbolp (cadr compiled)) | |
| 526 (setcdr compiled nil)) | |
| 527 (setq compiled (cdr compiled))) | |
| 528 (list first-part last-part))))) | |
| 529 (nconc | |
| 530 ;; copy-list needed, because the structure of the literal provided | |
| 531 ;; by our eval-when-compile hangs around. | |
| 532 (copy-list (first decode-program-parts)) | |
| 533 (append decode-table nil) | |
| 534 (second decode-program-parts)))) | |
| 535 | |
| 4145 | 536 (defun make-8-bit-choose-category (decode-table) |
| 537 "Given DECODE-TABLE, return an appropriate coding category. | |
| 538 DECODE-TABLE is a 256-entry vector describing the mapping from octets on | |
| 539 disk to XEmacs characters for some fixed-width 8-bit coding system. " | |
| 540 (check-argument-type #'vectorp decode-table) | |
| 541 (check-argument-range (length decode-table) #x100 #x100) | |
|
4549
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
542 (loop |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
543 named category |
|
4559
bd1a68c34d44
Merge my change of 2008-05-14 to the query-coding-region code.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4558
diff
changeset
|
544 for i from #x80 to #x9F |
|
4549
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
545 do (unless (= i (aref decode-table i)) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
546 (return-from category 'no-conversion)) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
547 finally return 'iso-8-1)) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
548 |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
549 (defun 8-bit-fixed-query-coding-region (begin end coding-system |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
550 &optional buffer errorp highlightp) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
551 "The `query-coding-region' implementation for 8-bit-fixed coding systems. |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
552 |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
553 Uses the `8-bit-fixed-query-from-unicode' and `8-bit-fixed-query-skip-chars' |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
554 coding system properties. The former is a hash table mapping from valid |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
555 Unicode code points to on-disk octets in the coding system; the latter a set |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
556 of characters as used by `skip-chars-forward'. Both of these properties are |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
557 generated automatically by `make-8-bit-coding-system'. |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
558 |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
559 See that the documentation of `query-coding-region'; see also |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
560 `make-8-bit-coding-system'. " |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
561 (check-argument-type #'coding-system-p |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
562 (setq coding-system (find-coding-system coding-system))) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
563 (check-argument-type #'integer-or-marker-p begin) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
564 (check-argument-type #'integer-or-marker-p end) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
565 (let ((from-unicode |
| 4551 | 566 (or (coding-system-get coding-system '8-bit-fixed-query-from-unicode) |
| 567 (coding-system-get (coding-system-base coding-system) | |
| 568 '8-bit-fixed-query-from-unicode))) | |
|
4549
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
569 (skip-chars-arg |
| 4551 | 570 (or (coding-system-get coding-system '8-bit-fixed-query-skip-chars) |
| 571 (coding-system-get (coding-system-base coding-system) | |
| 572 '8-bit-fixed-query-skip-chars))) | |
|
4549
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
573 (ranges (make-range-table)) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
574 char-after fail-range-start fail-range-end previous-fail extent |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
575 failed) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
576 (check-type from-unicode hash-table) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
577 (check-type skip-chars-arg string) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
578 (save-excursion |
| 4551 | 579 (when highlightp |
| 580 (map-extents #'(lambda (extent ignored-arg) | |
| 581 (when (eq 'query-coding-warning-face | |
| 582 (extent-face extent)) | |
| 583 (delete-extent extent))) buffer begin end)) | |
|
4549
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
584 (goto-char begin buffer) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
585 (skip-chars-forward skip-chars-arg end buffer) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
586 (while (< (point buffer) end) |
|
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4567
diff
changeset
|
587 ; (message |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4567
diff
changeset
|
588 ; "fail-range-start is %S, previous-fail %S, point is %S, end is %S" |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4567
diff
changeset
|
589 ; fail-range-start previous-fail (point buffer) end) |
|
4549
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
590 (setq char-after (char-after (point buffer) buffer) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
591 fail-range-start (point buffer)) |
|
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4567
diff
changeset
|
592 ; (message "arguments are %S %S" |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4567
diff
changeset
|
593 ; (< (point buffer) end) |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4567
diff
changeset
|
594 ; (not (gethash (encode-char char-after 'ucs) from-unicode))) |
|
4549
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
595 (while (and |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
596 (< (point buffer) end) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
597 (not (gethash (encode-char char-after 'ucs) from-unicode))) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
598 (forward-char 1 buffer) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
599 (setq char-after (char-after (point buffer) buffer) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
600 failed t)) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
601 (if (= fail-range-start (point buffer)) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
602 ;; The character can actually be encoded by the coding |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
603 ;; system; check the characters past it. |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
604 (forward-char 1 buffer) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
605 ;; The character actually failed. |
|
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4567
diff
changeset
|
606 ; (message "past the move through, point now %S" (point buffer)) |
|
4549
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
607 (when errorp |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
608 (error 'text-conversion-error |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
609 (format "Cannot encode %s using coding system" |
| 4551 | 610 (buffer-substring fail-range-start (point buffer) |
|
4549
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
611 buffer)) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
612 (coding-system-name coding-system))) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
613 (put-range-table fail-range-start |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
614 ;; If char-after is non-nil, we're not at |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
615 ;; the end of the buffer. |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
616 (setq fail-range-end (if char-after |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
617 (point buffer) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
618 (point-max buffer))) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
619 t ranges) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
620 (when highlightp |
|
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4567
diff
changeset
|
621 ; (message "highlighting") |
|
4549
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
622 (setq extent (make-extent fail-range-start fail-range-end buffer)) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
623 (set-extent-priority extent (+ mouse-highlight-priority 2)) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
624 (set-extent-face extent 'query-coding-warning-face)) |
| 4551 | 625 (skip-chars-forward skip-chars-arg end buffer))) |
|
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4567
diff
changeset
|
626 ; (message "about to give the result, ranges %S" ranges) |
|
4549
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
627 (if failed |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
628 (values nil ranges) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
629 (values t nil))))) |
| 4145 | 630 |
| 4072 | 631 ;;;###autoload |
| 632 (defun make-8-bit-coding-system (name unicode-map &optional description props) | |
| 633 "Make and return a fixed-width 8-bit CCL coding system named NAME. | |
| 634 NAME must be a symbol, and UNICODE-MAP a list. | |
| 635 | |
| 636 UNICODE-MAP is a plist describing a map from octets in the coding | |
| 637 system NAME (as integers) to XEmacs characters. Those XEmacs | |
| 638 characters will be used explicitly on decoding, but for encoding (most | |
| 639 relevantly, on writing to disk) XEmacs characters that map to the same | |
| 640 Unicode code point will be unified. This means that the ISO-8859-? | |
| 641 characters that map to the same Unicode code point will not be | |
| 642 distinct when written to disk, which is normally what is intended; it | |
| 643 also means that East Asian Han characters from different XEmacs | |
| 644 character sets will not be distinct when written to disk, which is | |
| 645 less often what is intended. | |
| 646 | |
| 647 Any octets not mapped will be decoded into the ISO 8859-1 characters with | |
| 648 the corresponding numeric value; unless another octet maps to that | |
| 649 character, in which case the Unicode private use area will be used. This | |
| 650 avoids spurious changes to files on disk when they contain octets that would | |
| 651 be otherwise remapped to the canonical values for the corresponding | |
| 652 characters in the coding system. | |
| 653 | |
| 654 DESCRIPTION and PROPS are as in `make-coding-system', which see. This | |
| 655 function also accepts two additional (optional) properties in PROPS; | |
| 656 `aliases', giving a list of aliases to be initialized for this | |
| 657 coding-system, and `encode-failure-octet', an integer between 0 and 256 to | |
| 658 write in place of XEmacs characters that cannot be encoded, defaulting to | |
| 659 the code for tilde `~'. " | |
| 660 (check-argument-type #'symbolp name) | |
| 661 (check-argument-type #'listp unicode-map) | |
| 662 (check-argument-type #'stringp | |
| 663 (or description | |
| 664 (setq description | |
| 665 (format "Coding system used for %s." name)))) | |
| 666 (check-valid-plist props) | |
| 667 (let ((encode-failure-octet (or (plist-get props 'encode-failure-octet) | |
| 668 (char-to-int ?~))) | |
| 669 (aliases (plist-get props 'aliases)) | |
| 670 (hash-table-sym (gentemp (format "%s-encode-table" name))) | |
| 671 encode-program decode-program result decode-table encode-table) | |
| 672 | |
| 673 ;; Some more sanity checking. | |
| 674 (check-argument-range encode-failure-octet 0 #xFF) | |
| 675 (check-argument-type #'listp aliases) | |
| 676 | |
| 677 ;; Don't pass on our extra data to make-coding-system. | |
| 678 (setq props (plist-remprop props 'encode-failure-octet) | |
| 679 props (plist-remprop props 'aliases)) | |
| 680 | |
| 681 (multiple-value-setq | |
| 682 (decode-table encode-table) | |
| 683 (make-8-bit-create-decode-encode-tables unicode-map)) | |
| 684 | |
| 685 ;; Register the decode-table. | |
| 686 (define-translation-hash-table hash-table-sym encode-table) | |
| 687 | |
| 688 ;; Generate the programs. | |
| 689 (setq decode-program (make-8-bit-generate-decode-program decode-table) | |
| 690 encode-program (make-8-bit-generate-encode-program | |
| 691 decode-table encode-table encode-failure-octet)) | |
| 692 (unless (vectorp encode-program) | |
| 693 (setq encode-program | |
| 694 (apply #'vector | |
| 695 (nsublis (list (cons 'encode-table-sym hash-table-sym)) | |
| 696 (copy-tree encode-program))))) | |
| 697 (unless (vectorp decode-program) | |
| 698 (setq decode-program | |
| 699 (apply #'vector decode-program))) | |
| 700 | |
| 701 ;; And now generate the actual coding system. | |
| 702 (setq result | |
| 703 (make-coding-system | |
| 704 name 'ccl | |
| 705 description | |
| 706 (plist-put (plist-put props 'decode decode-program) | |
| 707 'encode encode-program))) | |
| 4295 | 708 (coding-system-put name '8-bit-fixed t) |
| 4145 | 709 (coding-system-put name 'category |
| 710 (make-8-bit-choose-category decode-table)) | |
|
4549
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
711 (coding-system-put name '8-bit-fixed-query-skip-chars |
|
4567
84d618b355f5
2008-08-09 Aidan Kehoe <kehoea@parhasard.net>
Aidan Kehoe <kehoea@parhasard.net>
parents:
4559
diff
changeset
|
712 (skip-chars-quote |
|
84d618b355f5
2008-08-09 Aidan Kehoe <kehoea@parhasard.net>
Aidan Kehoe <kehoea@parhasard.net>
parents:
4559
diff
changeset
|
713 (apply #'string (append decode-table nil)))) |
|
4549
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
714 (coding-system-put name '8-bit-fixed-query-from-unicode encode-table) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
715 |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
716 (coding-system-put name 'query-coding-function |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
717 #'8-bit-fixed-query-coding-region) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
718 (coding-system-put (intern (format "%s-unix" name)) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
719 'query-coding-function |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
720 #'8-bit-fixed-query-coding-region) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
721 (coding-system-put (intern (format "%s-dos" name)) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
722 'query-coding-function |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
723 #'8-bit-fixed-query-coding-region) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
724 (coding-system-put (intern (format "%s-mac" name)) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
725 'query-coding-function |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
726 #'8-bit-fixed-query-coding-region) |
| 4072 | 727 (loop for alias in aliases |
| 728 do (define-coding-system-alias alias name)) | |
| 729 result)) | |
| 730 | |
| 731 (define-compiler-macro make-8-bit-coding-system (&whole form name unicode-map | |
| 732 &optional description props) | |
| 733 ;; We provide the compiler macro (= macro that is expanded only on | |
| 734 ;; compilation, and that can punt to a runtime version of the | |
| 735 ;; associate function if necessary) not for reasons of speed, though | |
| 736 ;; it does speed up things at runtime a little, but because the | |
| 737 ;; Unicode mappings are available at compile time in the dumped | |
| 738 ;; files, but they are not available at run time for the vast | |
| 739 ;; majority of them. | |
| 740 | |
| 741 (if (not (and (and (consp name) (eq (car name) 'quote)) | |
| 742 (and (consp unicode-map) (eq (car unicode-map) 'quote)) | |
| 743 (and (or (and (consp props) (eq (car props) 'quote)) | |
| 744 (null props))))) | |
| 745 ;; The call does not use literals; do it at runtime. | |
| 746 form | |
| 747 (setq name (cadr name) | |
| 748 unicode-map (cadr unicode-map) | |
| 749 props (if props (cadr props))) | |
| 750 (let ((encode-failure-octet | |
| 751 (or (plist-get props 'encode-failure-octet) (char-to-int ?~))) | |
| 752 (aliases (plist-get props 'aliases)) | |
| 753 encode-program decode-program | |
| 4103 | 754 decode-table encode-table) |
| 4072 | 755 |
| 756 ;; Some sanity checking. | |
| 757 (check-argument-range encode-failure-octet 0 #xFF) | |
| 758 (check-argument-type #'listp aliases) | |
| 759 | |
| 760 ;; Don't pass on our extra data to make-coding-system. | |
| 761 (setq props (plist-remprop props 'encode-failure-octet) | |
| 762 props (plist-remprop props 'aliases)) | |
| 763 | |
| 764 ;; Work out encode-table and decode-table. | |
| 765 (multiple-value-setq | |
| 766 (decode-table encode-table) | |
| 767 (make-8-bit-create-decode-encode-tables unicode-map)) | |
| 768 | |
| 769 ;; Generate the decode and encode programs. | |
| 770 (setq decode-program (make-8-bit-generate-decode-program decode-table) | |
| 771 encode-program (make-8-bit-generate-encode-program | |
| 772 decode-table encode-table encode-failure-octet)) | |
| 773 | |
| 774 ;; And return the generated code. | |
| 775 `(let ((encode-table-sym (gentemp (format "%s-encode-table" ',name))) | |
| 4103 | 776 ;; The case-fold-search bind shouldn't be necessary. If I take |
| 777 ;; it, out, though, I get: | |
| 778 ;; | |
| 779 ;; (invalid-read-syntax "Multiply defined symbol label" 1) | |
| 780 ;; | |
| 781 ;; when the file is byte compiled. | |
|
4549
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
782 (case-fold-search t) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
783 (encode-table ,encode-table)) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
784 (define-translation-hash-table encode-table-sym encode-table) |
| 4103 | 785 (make-coding-system |
| 786 ',name 'ccl ,description | |
| 787 (plist-put (plist-put ',props 'decode | |
| 788 ,(apply #'vector decode-program)) | |
| 789 'encode | |
| 790 (apply #'vector | |
| 791 (nsublis | |
| 792 (list (cons | |
| 793 'encode-table-sym | |
| 794 (symbol-value 'encode-table-sym))) | |
| 795 ',encode-program)))) | |
| 4295 | 796 (coding-system-put ',name '8-bit-fixed t) |
|
4549
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
797 (coding-system-put ',name 'category |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
798 ',(make-8-bit-choose-category decode-table)) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
799 (coding-system-put ',name '8-bit-fixed-query-skip-chars |
|
4567
84d618b355f5
2008-08-09 Aidan Kehoe <kehoea@parhasard.net>
Aidan Kehoe <kehoea@parhasard.net>
parents:
4559
diff
changeset
|
800 ',(skip-chars-quote |
|
84d618b355f5
2008-08-09 Aidan Kehoe <kehoea@parhasard.net>
Aidan Kehoe <kehoea@parhasard.net>
parents:
4559
diff
changeset
|
801 (apply #'string (append decode-table nil)))) |
|
4549
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
802 (coding-system-put ',name '8-bit-fixed-query-from-unicode encode-table) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
803 (coding-system-put ',name 'query-coding-function |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
804 #'8-bit-fixed-query-coding-region) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
805 (coding-system-put ',(intern (format "%s-unix" name)) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
806 'query-coding-function |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
807 #'8-bit-fixed-query-coding-region) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
808 (coding-system-put ',(intern (format "%s-dos" name)) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
809 'query-coding-function |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
810 #'8-bit-fixed-query-coding-region) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
811 (coding-system-put ',(intern (format "%s-mac" name)) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
812 'query-coding-function |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
813 #'8-bit-fixed-query-coding-region) |
| 4072 | 814 ,(macroexpand `(loop for alias in ',aliases |
| 815 do (define-coding-system-alias alias | |
| 816 ',name))) | |
| 4103 | 817 (find-coding-system ',name))))) |
| 4299 | 818 |
| 819 ;; Ideally this would be in latin.el, but code-init.el uses it. | |
| 820 (make-8-bit-coding-system | |
| 821 'iso-8859-1 | |
| 822 '() ;; No differences from Latin 1. | |
| 823 "ISO-8859-1 (Latin-1)" | |
| 824 '(mnemonic "Latin 1" | |
| 825 documentation "The most used encoding of Western Europe and the Americas." | |
| 826 aliases (iso-latin-1 latin-1))) |
