Mercurial > hg > xemacs-beta
annotate lisp/mule/mule-coding.el @ 4614:afbfad080ddd
The URLs in our current config.guess and config.sub files are obsolete.
Update to the latest upstream release to get correct URLs, as well as fixes
and enhancements to those scripts.
| author | Jerry James <james@xemacs.org> |
|---|---|
| date | Wed, 11 Feb 2009 11:09:35 -0700 |
| parents | c786c3fd0740 |
| children | 257b468bf2ca |
| rev | line source |
|---|---|
| 502 | 1 ;;; mule-coding.el --- Coding-system functions for Mule. -*- coding: iso-2022-7bit; -*- |
| 333 | 2 |
| 3 ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. | |
| 4 ;; Licensed to the Free Software Foundation. | |
| 5 ;; Copyright (C) 1995 Amdahl Corporation. | |
| 6 ;; Copyright (C) 1995 Sun Microsystems. | |
| 7 ;; Copyright (C) 1997 MORIOKA Tomohiko | |
| 771 | 8 ;; Copyright (C) 2001 Ben Wing. |
| 333 | 9 |
| 10 ;; This file is part of XEmacs. | |
| 11 | |
| 12 ;; XEmacs is free software; you can redistribute it and/or modify it | |
| 13 ;; under the terms of the GNU General Public License as published by | |
| 14 ;; the Free Software Foundation; either version 2, or (at your option) | |
| 15 ;; any later version. | |
| 16 | |
| 17 ;; XEmacs is distributed in the hope that it will be useful, but | |
| 18 ;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
| 20 ;; General Public License for more details. | |
| 21 | |
| 22 ;; You should have received a copy of the GNU General Public License | |
| 444 | 23 ;; along with XEmacs; see the file COPYING. If not, write to the |
| 333 | 24 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
| 25 ;; Boston, MA 02111-1307, USA. | |
| 26 | |
| 27 ;;; Commentary: | |
| 28 | |
| 29 ;;; split off of mule.el and mostly moved to coding.el | |
| 30 | |
| 4072 | 31 ;; Needed for make-8-bit-coding-system. |
| 4080 | 32 (eval-when-compile (require 'ccl)) |
| 4072 | 33 |
| 333 | 34 ;;; Code: |
| 35 | |
| 36 (defun coding-system-force-on-output (coding-system register) | |
| 37 "Return the 'force-on-output property of CODING-SYSTEM for the specified REGISTER." | |
| 444 | 38 (check-type register integer) |
| 333 | 39 (coding-system-property |
| 40 coding-system | |
| 41 (case register | |
| 42 (0 'force-g0-on-output) | |
| 43 (1 'force-g1-on-output) | |
| 44 (2 'force-g2-on-output) | |
| 45 (3 'force-g3-on-output) | |
| 46 (t (signal 'args-out-of-range (list register 0 3)))))) | |
| 47 | |
| 48 (defun coding-system-short (coding-system) | |
| 49 "Return the 'short property of CODING-SYSTEM." | |
| 50 (coding-system-property coding-system 'short)) | |
| 51 | |
| 52 (defun coding-system-no-ascii-eol (coding-system) | |
| 53 "Return the 'no-ascii-eol property of CODING-SYSTEM." | |
| 54 (coding-system-property coding-system 'no-ascii-eol)) | |
| 55 | |
| 56 (defun coding-system-no-ascii-cntl (coding-system) | |
| 57 "Return the 'no-ascii-cntl property of CODING-SYSTEM." | |
| 58 (coding-system-property coding-system 'no-ascii-cntl)) | |
| 59 | |
| 60 (defun coding-system-seven (coding-system) | |
| 61 "Return the 'seven property of CODING-SYSTEM." | |
| 62 (coding-system-property coding-system 'seven)) | |
| 63 | |
| 64 (defun coding-system-lock-shift (coding-system) | |
| 65 "Return the 'lock-shift property of CODING-SYSTEM." | |
| 66 (coding-system-property coding-system 'lock-shift)) | |
| 67 | |
| 68 ;;(defun coding-system-use-japanese-jisx0201-roman (coding-system) | |
| 69 ;; "Return the 'use-japanese-jisx0201-roman property of CODING-SYSTEM." | |
| 70 ;; (coding-system-property coding-system 'use-japanese-jisx0201-roman)) | |
| 71 | |
| 72 ;;(defun coding-system-use-japanese-jisx0208-1978 (coding-system) | |
| 73 ;; "Return the 'use-japanese-jisx0208-1978 property of CODING-SYSTEM." | |
| 74 ;; (coding-system-property coding-system 'use-japanese-jisx0208-2978)) | |
| 75 | |
| 76 (defun coding-system-no-iso6429 (coding-system) | |
| 77 "Return the 'no-iso6429 property of CODING-SYSTEM." | |
| 78 (coding-system-property coding-system 'no-iso6429)) | |
| 79 | |
| 80 (defun coding-system-ccl-encode (coding-system) | |
| 81 "Return the CCL 'encode property of CODING-SYSTEM." | |
| 82 (coding-system-property coding-system 'encode)) | |
| 83 | |
| 84 (defun coding-system-ccl-decode (coding-system) | |
| 85 "Return the CCL 'decode property of CODING-SYSTEM." | |
| 86 (coding-system-property coding-system 'decode)) | |
| 87 | |
| 771 | 88 (defun coding-system-iso2022-charset (coding-system register) |
| 89 "Return the charset initially designated to REGISTER in CODING-SYSTEM. | |
| 90 The allowable range of REGISTER is 0 through 3." | |
| 91 (if (or (< register 0) (> register 3)) | |
| 92 (error 'args-out-of-range "coding-system-charset REGISTER" register 0 3)) | |
| 93 (coding-system-property coding-system (nth register '(charset-g0 | |
| 94 charset-g1 | |
| 95 charset-g2 | |
| 96 charset-g3)))) | |
| 97 | |
| 333 | 98 |
| 99 ;;;; Definitions of predefined coding systems | |
| 100 | |
| 101 (make-coding-system | |
| 102 'ctext 'iso2022 | |
| 771 | 103 "Compound Text" |
| 333 | 104 '(charset-g0 ascii |
| 105 charset-g1 latin-iso8859-1 | |
| 106 eol-type nil | |
|
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4567
diff
changeset
|
107 safe-charsets t ;; Reasonable |
| 333 | 108 mnemonic "CText")) |
| 109 | |
| 110 (make-coding-system | |
| 111 'iso-2022-8bit-ss2 'iso2022 | |
| 771 | 112 "ISO-2022 8-bit w/SS2" |
| 333 | 113 '(charset-g0 ascii |
| 114 charset-g1 latin-iso8859-1 | |
| 115 charset-g2 t ;; unspecified but can be used later. | |
| 116 short t | |
|
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4567
diff
changeset
|
117 safe-charsets (ascii katakana-jisx0201 japanese-jisx0208-1978 |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4567
diff
changeset
|
118 japanese-jisx0208 japanese-jisx0212 japanese-jisx0213-1 |
|
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4567
diff
changeset
|
119 japanese-jisx0213-2) |
| 333 | 120 mnemonic "ISO8/SS" |
| 771 | 121 documentation "ISO 2022 based 8-bit encoding using SS2 for 96-charset" |
| 333 | 122 )) |
| 123 | |
| 124 (make-coding-system | |
| 125 'iso-2022-7bit-ss2 'iso2022 | |
| 771 | 126 "ISO-2022 7-bit w/SS2" |
| 333 | 127 '(charset-g0 ascii |
| 128 charset-g2 t ;; unspecified but can be used later. | |
| 129 seven t | |
| 130 short t | |
|
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4567
diff
changeset
|
131 safe-charsets t |
| 333 | 132 mnemonic "ISO7/SS" |
| 771 | 133 documentation "ISO 2022 based 7-bit encoding using SS2 for 96-charset" |
| 333 | 134 eol-type nil)) |
| 135 | |
| 136 ;; (copy-coding-system 'iso-2022-7bit-ss2 'iso-2022-jp-2) | |
| 137 (make-coding-system | |
| 138 'iso-2022-jp-2 'iso2022 | |
| 771 | 139 "ISO-2022-JP-2" |
| 333 | 140 '(charset-g0 ascii |
| 141 charset-g2 t ;; unspecified but can be used later. | |
| 142 seven t | |
| 143 short t | |
|
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4567
diff
changeset
|
144 safe-charsets t |
| 333 | 145 mnemonic "ISO7/SS" |
| 146 eol-type nil)) | |
| 147 | |
| 148 (make-coding-system | |
| 149 'iso-2022-7bit 'iso2022 | |
| 771 | 150 "ISO 2022 7-bit" |
| 333 | 151 '(charset-g0 ascii |
| 152 seven t | |
| 153 short t | |
|
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4567
diff
changeset
|
154 safe-charsets t |
| 771 | 155 mnemonic "ISO7" |
| 156 documentation "ISO-2022-based 7-bit encoding using only G0" | |
| 157 )) | |
| 333 | 158 |
| 159 ;; compatibility for old XEmacsen | |
| 771 | 160 (define-coding-system-alias 'iso-2022-7 'iso-2022-7bit) |
| 333 | 161 |
| 162 (make-coding-system | |
| 163 'iso-2022-8 'iso2022 | |
| 771 | 164 "ISO-2022 8-bit" |
| 333 | 165 '(charset-g0 ascii |
| 166 charset-g1 latin-iso8859-1 | |
| 167 short t | |
|
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4567
diff
changeset
|
168 safe-charsets t |
| 333 | 169 mnemonic "ISO8" |
| 771 | 170 documentation "ISO-2022 eight-bit coding system. No single-shift or locking-shift." |
| 333 | 171 )) |
| 172 | |
| 173 (make-coding-system | |
| 174 'escape-quoted 'iso2022 | |
| 771 | 175 "Escape-Quoted (for .ELC files)" |
| 333 | 176 '(charset-g0 ascii |
| 177 charset-g1 latin-iso8859-1 | |
| 178 eol-type lf | |
| 179 escape-quoted t | |
|
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4567
diff
changeset
|
180 safe-charsets t |
| 333 | 181 mnemonic "ESC/Quot" |
| 771 | 182 documentation "ISO-2022 eight-bit coding system with escape quoting; used for .ELC files." |
| 333 | 183 )) |
| 184 | |
| 185 (make-coding-system | |
| 186 'iso-2022-lock 'iso2022 | |
| 771 | 187 "ISO-2022 w/locking-shift" |
| 333 | 188 '(charset-g0 ascii |
| 189 charset-g1 t ;; unspecified but can be used later. | |
| 190 seven t | |
| 191 lock-shift t | |
|
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4567
diff
changeset
|
192 safe-charsets t |
| 333 | 193 mnemonic "ISO7/Lock" |
| 771 | 194 documentation "ISO-2022 coding system using Locking-Shift for 96-charset." |
| 333 | 195 )) |
| 4072 | 196 |
| 333 | 197 |
| 4072 | 198 ;; This is used by people writing CCL programs, but is called at runtime. |
| 199 (defun define-translation-hash-table (symbol table) | |
| 200 "Define SYMBOL as the name of the hash translation TABLE for use in CCL. | |
| 201 | |
| 202 Analogous to `define-translation-table', but updates | |
| 203 `translation-hash-table-vector' and the table is for use in the CCL | |
| 204 `lookup-integer' and `lookup-character' functions." | |
| 4145 | 205 (check-argument-type #'symbolp symbol) |
| 206 (check-argument-type #'hash-table-p table) | |
| 4072 | 207 (let ((len (length translation-hash-table-vector)) |
| 208 (id 0) | |
| 209 done) | |
| 210 (put symbol 'translation-hash-table table) | |
| 211 (while (not done) | |
| 212 (if (>= id len) | |
| 213 (setq translation-hash-table-vector | |
| 214 (vconcat translation-hash-table-vector [nil]))) | |
| 215 (let ((slot (aref translation-hash-table-vector id))) | |
| 216 (if (or (not slot) | |
| 217 (eq (car slot) symbol)) | |
| 218 (progn | |
| 219 (aset translation-hash-table-vector id (cons symbol table)) | |
| 220 (setq done t)) | |
| 221 (setq id (1+ id))))) | |
| 222 (put symbol 'translation-hash-table-id id) | |
| 223 id)) | |
| 224 | |
| 225 (defvar make-8-bit-private-use-start (decode-char 'ucs #xE000) | |
| 226 "Start of a 256 code private use area for make-8-bit-coding-system. | |
| 227 | |
| 228 This is used to ensure that distinct octets on disk for a given coding | |
| 229 system map to distinct XEmacs characters, preventing a spurious changes when | |
| 230 a file is read, not changed, and then written. ") | |
| 231 | |
| 232 (defun make-8-bit-generate-helper (decode-table encode-table | |
| 233 encode-failure-octet) | |
|
4604
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
234 "Helper function, `make-8-bit-generate-encode-program-and-skip-chars-strings', |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
235 which see. |
| 4072 | 236 |
| 4145 | 237 Deals with the case where ASCII and another character set can both be |
| 238 encoded unambiguously and completely into the coding-system; if this is so, | |
|
4604
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
239 returns a list comprised of such a ccl-program and the character set in |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
240 question. If not, it returns a list with both entries nil." |
| 4072 | 241 (let ((tentative-encode-program-parts |
| 242 (eval-when-compile | |
| 4295 | 243 (let* ((vec-len 128) |
| 244 (compiled | |
| 4072 | 245 (append |
| 246 (ccl-compile | |
| 247 `(1 | |
| 248 (loop | |
| 249 (read-multibyte-character r0 r1) | |
| 250 (if (r0 == ,(charset-id 'ascii)) | |
| 251 (write r1) | |
| 252 ((if (r0 == #xABAB) | |
| 253 ;; #xBFFE is a sentinel in the compiled | |
| 254 ;; program. | |
| 4295 | 255 ((r0 = r1 & #x7F) |
| 256 (write r0 ,(make-vector vec-len #xBFFE))) | |
| 4072 | 257 ((mule-to-unicode r0 r1) |
| 258 (if (r0 == #xFFFD) | |
| 259 (write #xBEEF) | |
| 260 ((lookup-integer encode-table-sym r0 r3) | |
| 261 (if r7 | |
| 262 (write-multibyte-character r0 r3) | |
| 263 (write #xBEEF)))))))) | |
| 264 (repeat)))) nil)) | |
| 265 (first-part compiled) | |
| 266 (last-part | |
| 267 (member-if-not (lambda (entr) (eq #xBFFE entr)) | |
| 268 (member-if | |
| 269 (lambda (entr) (eq #xBFFE entr)) | |
| 270 first-part)))) | |
| 271 (while compiled | |
| 4295 | 272 (when (eq #xBFFE (cadr compiled)) |
| 273 (assert (= vec-len (search '(#xBFFE) (cdr compiled) | |
| 274 :test #'/=)) nil | |
| 275 "Strange ccl vector length") | |
| 276 (setcdr compiled nil)) | |
| 4072 | 277 (setq compiled (cdr compiled))) |
| 278 ;; Is the generated code as we expect it to be? | |
| 279 (assert (and (memq #xABAB first-part) | |
| 280 (memq #xBEEF14 last-part)) | |
| 281 nil | |
| 282 "This code assumes that the constant #xBEEF is #xBEEF14 in \ | |
| 283 compiled CCL code,\nand that the constant #xABAB is #xABAB. If that is | |
| 284 not the case, and it appears not to be--that's why you're getting this | |
| 285 message--it will not work. ") | |
| 4295 | 286 (list first-part last-part vec-len)))) |
| 4072 | 287 (charset-lower -1) |
| 288 (charset-upper -1) | |
| 289 worth-trying known-charsets encode-program | |
|
4605
c786c3fd0740
Listen to the byte-compiler, core Lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4604
diff
changeset
|
290 other-charset-vector ucs) |
| 4072 | 291 |
| 292 (loop for char across decode-table | |
| 293 do (pushnew (char-charset char) known-charsets)) | |
| 294 (setq known-charsets (delq 'ascii known-charsets)) | |
| 295 | |
| 296 (loop for known-charset in known-charsets | |
| 297 do | |
| 298 ;; This is not possible for two dimensional charsets. | |
| 299 (when (eq 1 (charset-dimension known-charset)) | |
| 300 (if (eq 'control-1 known-charset) | |
| 301 (setq charset-lower 0 | |
| 302 charset-upper 31) | |
| 303 ;; There should be a nicer way to get the limits here. | |
| 304 (condition-case args-out-of-range | |
| 305 (make-char known-charset #x100) | |
| 306 (args-out-of-range | |
| 307 (setq charset-lower (third args-out-of-range) | |
| 308 charset-upper (fourth args-out-of-range))))) | |
| 309 (loop | |
| 310 for i from charset-lower to charset-upper | |
| 311 always (and (setq ucs | |
| 312 (encode-char (make-char known-charset i) 'ucs)) | |
| 313 (gethash ucs encode-table)) | |
| 314 finally (setq worth-trying known-charset)) | |
| 315 | |
| 316 ;; Only trying this for one charset at a time, the first find. | |
| 317 (when worth-trying (return)) | |
| 318 | |
| 319 ;; Okay, this charset is not worth trying, Try the next. | |
| 320 (setq charset-lower -1 | |
| 321 charset-upper -1 | |
| 322 worth-trying nil))) | |
| 323 | |
| 324 (when worth-trying | |
| 4295 | 325 (setq other-charset-vector |
| 326 (make-vector (third tentative-encode-program-parts) | |
| 327 encode-failure-octet)) | |
| 4072 | 328 (loop for i from charset-lower to charset-upper |
| 4090 | 329 do (aset other-charset-vector i |
| 4072 | 330 (gethash (encode-char (make-char worth-trying i) |
| 331 'ucs) encode-table))) | |
| 332 (setq encode-program | |
| 333 (nsublis | |
| 334 (list (cons #xABAB (charset-id worth-trying))) | |
| 335 (nconc | |
| 336 (copy-list (first | |
| 337 tentative-encode-program-parts)) | |
| 338 (append other-charset-vector nil) | |
| 339 (copy-tree (second | |
| 340 tentative-encode-program-parts)))))) | |
|
4604
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
341 (values encode-program worth-trying))) |
| 4072 | 342 |
|
4604
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
343 (defun make-8-bit-generate-encode-program-and-skip-chars-strings |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
344 (decode-table encode-table encode-failure-octet) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
345 "Generate a CCL program to encode a 8-bit fixed-width charset. |
| 4072 | 346 |
| 347 DECODE-TABLE must have 256 non-cons entries, and will be regarded as | |
| 348 describing a map from the octet corresponding to an offset in the | |
| 349 table to the that entry in the table. ENCODE-TABLE is a hash table | |
| 350 map from unicode values to characters in the range [0,255]. | |
| 351 ENCODE-FAILURE-OCTET describes an integer between 0 and 255 | |
| 352 \(inclusive) to write in the event that a character cannot be encoded. " | |
| 353 (check-argument-type #'vectorp decode-table) | |
| 354 (check-argument-range (length decode-table) #x100 #x100) | |
| 355 (check-argument-type #'hash-table-p encode-table) | |
| 356 (check-argument-type #'integerp encode-failure-octet) | |
| 357 (check-argument-range encode-failure-octet #x00 #xFF) | |
| 358 (let ((encode-program nil) | |
| 359 (general-encode-program | |
| 360 (eval-when-compile | |
| 361 (let ((prog (append | |
| 362 (ccl-compile | |
| 363 `(1 | |
| 364 (loop | |
| 365 (read-multibyte-character r0 r1) | |
| 366 (mule-to-unicode r0 r1) | |
| 367 (if (r0 == #xFFFD) | |
| 368 (write #xBEEF) | |
| 369 ((lookup-integer encode-table-sym r0 r3) | |
| 370 (if r7 | |
| 371 (write-multibyte-character r0 r3) | |
| 372 (write #xBEEF)))) | |
| 373 (repeat)))) nil))) | |
| 374 (assert (memq #xBEEF14 prog) | |
| 375 nil | |
| 376 "This code assumes that the constant #xBEEF is #xBEEF14 \ | |
| 377 in compiled CCL code.\nIf that is not the case, and it appears not to | |
| 378 be--that's why you're getting this message--it will not work. ") | |
| 379 prog))) | |
| 380 (encode-program-with-ascii-optimisation | |
| 381 (eval-when-compile | |
| 382 (let ((prog (append | |
| 383 (ccl-compile | |
| 384 `(1 | |
| 385 (loop | |
| 386 (read-multibyte-character r0 r1) | |
| 387 (if (r0 == ,(charset-id 'ascii)) | |
| 388 (write r1) | |
| 389 ((mule-to-unicode r0 r1) | |
| 390 (if (r0 == #xFFFD) | |
| 391 (write #xBEEF) | |
| 392 ((lookup-integer encode-table-sym r0 r3) | |
| 393 (if r7 | |
| 394 (write-multibyte-character r0 r3) | |
| 395 (write #xBEEF)))))) | |
| 396 (repeat)))) nil))) | |
| 397 (assert (memq #xBEEF14 prog) | |
| 398 nil | |
| 399 "This code assumes that the constant #xBEEF is #xBEEF14 \ | |
| 400 in compiled CCL code.\nIf that is not the case, and it appears not to | |
| 401 be--that's why you're getting this message--it will not work. ") | |
| 402 prog))) | |
|
4604
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
403 (ascii-encodes-as-itself nil) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
404 (control-1-encodes-as-itself t) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
405 (invalid-sequence-code-point-start |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
406 (eval-when-compile |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
407 (char-to-unicode |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
408 (aref (decode-coding-string "\xd8\x00\x00\x00" 'utf-16-be) 3)))) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
409 further-char-set skip-chars invalid-sequences-skip-chars) |
| 4072 | 410 |
| 411 ;; Is this coding system ASCII-compatible? If so, we can avoid the hash | |
| 412 ;; table lookup for those characters. | |
| 413 (loop | |
| 414 for i from #x00 to #x7f | |
| 415 always (eq (int-to-char i) (gethash i encode-table)) | |
| 416 finally (setq ascii-encodes-as-itself t)) | |
| 417 | |
| 418 ;; Note that this logic handles EBCDIC badly. For example, CP037, | |
| 419 ;; MIME name ebcdic-na, has the entire repertoire of ASCII and | |
| 420 ;; Latin 1, and thus a more optimal ccl encode program would check | |
| 421 ;; for those character sets and use tables. But for now, we do a | |
| 422 ;; hash table lookup for every character. | |
| 423 (if (null ascii-encodes-as-itself) | |
| 424 ;; General encode program. Pros; general and correct. Cons; | |
| 425 ;; slow, a hash table lookup + mule-unicode conversion is done | |
| 426 ;; for every character encoding. | |
| 427 (setq encode-program general-encode-program) | |
|
4604
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
428 (multiple-value-setq |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
429 (encode-program further-char-set) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
430 ;; Encode program with ascii-ascii mapping (based on a |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
431 ;; character's mule character set), and one other mule |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
432 ;; character set using table-based encoding, other |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
433 ;; character sets using hash table lookups. |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
434 ;; make-8-bit-non-ascii-completely-coveredp only returns |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
435 ;; such a mapping if some non-ASCII charset with |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
436 ;; characters in decode-table is entirely covered by |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
437 ;; encode-table. |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
438 (make-8-bit-generate-helper decode-table encode-table |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
439 encode-failure-octet)) |
| 4072 | 440 (unless encode-program |
| 441 ;; If make-8-bit-non-ascii-completely-coveredp returned nil, | |
| 442 ;; but ASCII still encodes as itself, do one-to-one mapping | |
| 443 ;; for ASCII, and a hash table lookup for everything else. | |
| 444 (setq encode-program encode-program-with-ascii-optimisation))) | |
| 445 | |
| 446 (setq encode-program | |
| 447 (nsublis | |
| 448 (list (cons #xBEEF14 | |
| 449 (logior (lsh encode-failure-octet 8) | |
| 450 #x14))) | |
| 451 (copy-tree encode-program))) | |
|
4604
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
452 (loop |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
453 for i from #x80 to #x9f |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
454 do (unless (= i (aref decode-table i)) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
455 (setq control-1-encodes-as-itself nil) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
456 (return))) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
457 (loop |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
458 for i from #x00 to #xFF |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
459 initially (setq skip-chars |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
460 (cond |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
461 ((and ascii-encodes-as-itself |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
462 control-1-encodes-as-itself further-char-set) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
463 (concat "\x00-\x9f" (charset-skip-chars-string |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
464 further-char-set))) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
465 ((and ascii-encodes-as-itself |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
466 control-1-encodes-as-itself) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
467 "\x00-\x9f") |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
468 ((null ascii-encodes-as-itself) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
469 (skip-chars-quote (apply #'string |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
470 (append decode-table nil)))) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
471 (further-char-set |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
472 (concat (charset-skip-chars-string 'ascii) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
473 (charset-skip-chars-string further-char-set))) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
474 (t |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
475 (charset-skip-chars-string 'ascii))) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
476 invalid-sequences-skip-chars "") |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
477 with decoded-ucs = nil |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
478 with decoded = nil |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
479 with no-ascii-transparency-skip-chars-list = |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
480 (unless ascii-encodes-as-itself (append decode-table nil)) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
481 ;; Can't use #'match-string here, see: |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
482 ;; http://mid.gmane.org/18829.34118.709782.704574@parhasard.net |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
483 with skip-chars-test = |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
484 #'(lambda (skip-chars-string testing) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
485 (with-temp-buffer |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
486 (insert testing) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
487 (goto-char (point-min)) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
488 (skip-chars-forward skip-chars-string) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
489 (= (point) (point-max)))) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
490 do |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
491 (setq decoded (aref decode-table i) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
492 decoded-ucs (char-to-unicode decoded)) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
493 (cond |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
494 ((<= invalid-sequence-code-point-start decoded-ucs |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
495 (+ invalid-sequence-code-point-start #xFF)) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
496 (setq invalid-sequences-skip-chars |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
497 (concat (string decoded) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
498 invalid-sequences-skip-chars)) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
499 (assert (not (funcall skip-chars-test skip-chars decoded)) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
500 "This char should only be skipped with \ |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
501 `invalid-sequences-skip-chars', not by `skip-chars'")) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
502 ((not (funcall skip-chars-test skip-chars decoded)) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
503 (if ascii-encodes-as-itself |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
504 (setq skip-chars (concat skip-chars (string decoded))) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
505 (push decoded no-ascii-transparency-skip-chars-list)))) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
506 finally (unless ascii-encodes-as-itself |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
507 (setq skip-chars |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
508 (skip-chars-quote |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
509 (apply #'string |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
510 no-ascii-transparency-skip-chars-list))))) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
511 (values encode-program skip-chars invalid-sequences-skip-chars))) |
| 4072 | 512 |
| 513 (defun make-8-bit-create-decode-encode-tables (unicode-map) | |
| 514 "Return a list \(DECODE-TABLE ENCODE-TABLE) given UNICODE-MAP. | |
| 515 UNICODE-MAP should be an alist mapping from integer octet values to | |
| 516 characters with UCS code points; DECODE-TABLE will be a 256-element | |
| 517 vector, and ENCODE-TABLE will be a hash table mapping from 256 numbers | |
| 518 to 256 distinct characters. " | |
| 519 (check-argument-type #'listp unicode-map) | |
| 520 (let ((decode-table (make-vector 256 nil)) | |
| 521 (encode-table (make-hash-table :size 256)) | |
| 522 (private-use-start (encode-char make-8-bit-private-use-start 'ucs)) | |
|
4604
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
523 (invalid-sequence-code-point-start |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
524 (eval-when-compile |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
525 (char-to-unicode |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
526 (aref (decode-coding-string "\xd8\x00\x00\x00" 'utf-16-be) 3)))) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
527 desired-ucs decode-table-entry) |
| 4072 | 528 |
| 529 (loop for (external internal) | |
| 530 in unicode-map | |
| 531 do | |
| 532 (aset decode-table external internal) | |
| 533 (assert (not (eq (encode-char internal 'ucs) -1)) | |
| 534 nil | |
| 535 "Looks like you're calling make-8-bit-coding-system in a \ | |
| 536 dumped file, \nand you're either not providing a literal UNICODE-MAP | |
| 537 or PROPS. Don't do that; make-8-bit-coding-system relies on sensible | |
| 538 Unicode mappings being available, which they are at compile time for | |
| 539 dumped files (but this requires the mentioned literals), but not, for | |
| 540 most of them, at run time. ") | |
| 541 | |
| 542 (puthash (encode-char internal 'ucs) | |
| 543 ;; This is semantically an integer, but Dave Love's design | |
| 544 ;; for lookup-integer in CCL means we need to store it as a | |
| 545 ;; character. | |
| 546 (int-to-char external) | |
| 547 encode-table)) | |
| 548 | |
|
4604
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
549 ;; Now, go through the decode table. For octet values above #x7f, if the |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
550 ;; decode table entry is nil, this means that they have an undefined |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
551 ;; mapping (= they map to XEmacs characters with keys in |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
552 ;; unicode-error-default-translation-table); for octet values below or |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
553 ;; equal to #x7f, it means that they map to ASCII. |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
554 |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
555 ;; If any entry (whether below or above #x7f) in the decode-table |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
556 ;; already maps to some character with a key in |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
557 ;; unicode-error-default-translation-table, it is treated as an |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
558 ;; undefined octet by `query-coding-region'. That is, it is not |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
559 ;; necessary for an octet value to be above #x7f for this to happen. |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
560 |
| 4072 | 561 (dotimes (i 256) |
|
4604
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
562 (setq decode-table-entry (aref decode-table i)) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
563 (if decode-table-entry |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
564 (when (get-char-table |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
565 decode-table-entry |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
566 unicode-error-default-translation-table) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
567 ;; The caller is explicitly specifying that this octet |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
568 ;; corresponds to an invalid sequence on disk: |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
569 (assert (= (get-char-table |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
570 decode-table-entry |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
571 unicode-error-default-translation-table) i) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
572 "Bad argument to `make-8-bit-coding-system'. |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
573 If you're going to designate an octet with value below #x80 as invalid |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
574 for this coding system, make sure to map it to the invalid sequence |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
575 character corresponding to its octet value on disk. ")) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
576 |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
577 ;; decode-table-entry is nil; either the octet is to be treated as |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
578 ;; contributing to an error sequence (when (> #x7f i)), or it should |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
579 ;; be attempted to treat it as ASCII-equivalent. |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
580 (setq desired-ucs (or (and (< i #x80) i) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
581 (+ invalid-sequence-code-point-start i))) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
582 (while (gethash desired-ucs encode-table) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
583 (assert (not (< i #x80)) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
584 "UCS code point should not already be in encode-table!" |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
585 ;; There is one invalid sequence char per octet value; |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
586 ;; with eight-bit-fixed coding systems, it makes no sense |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
587 ;; for us to be multiply allocating them. |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
588 (gethash desired-ucs encode-table)) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
589 (setq desired-ucs (+ private-use-start desired-ucs) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
590 private-use-start (+ private-use-start 1))) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
591 (puthash desired-ucs (int-to-char i) encode-table) |
| 4085 | 592 (setq desired-ucs (if (> desired-ucs #xFF) |
|
4604
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
593 (unicode-to-char desired-ucs) |
| 4085 | 594 ;; So we get Latin-1 when run at dump time, |
| 595 ;; instead of JIT-allocated characters. | |
| 596 (int-to-char desired-ucs))) | |
| 597 (aset decode-table i desired-ucs))) | |
| 4072 | 598 (values decode-table encode-table))) |
| 599 | |
| 600 (defun make-8-bit-generate-decode-program (decode-table) | |
| 601 "Given DECODE-TABLE, generate a CCL program to decode an 8-bit charset. | |
| 602 DECODE-TABLE must have 256 non-cons entries, and will be regarded as | |
| 603 describing a map from the octet corresponding to an offset in the | |
| 604 table to the that entry in the table. " | |
| 605 (check-argument-type #'vectorp decode-table) | |
| 606 (check-argument-range (length decode-table) #x100 #x100) | |
| 607 (let ((decode-program-parts | |
| 608 (eval-when-compile | |
| 609 (let* ((compiled | |
| 610 (append | |
| 611 (ccl-compile | |
| 612 `(3 | |
| 613 ((read r0) | |
| 614 (loop | |
| 615 (write-read-repeat r0 ,(make-vector | |
| 616 256 'sentinel)))))) nil)) | |
| 617 (first-part compiled) | |
| 618 (last-part | |
| 619 (member-if-not #'symbolp | |
| 620 (member-if-not #'integerp first-part)))) | |
| 621 ;; Chop off the sentinel sentinel sentinel [..] part. | |
| 622 (while compiled | |
| 623 (if (symbolp (cadr compiled)) | |
| 624 (setcdr compiled nil)) | |
| 625 (setq compiled (cdr compiled))) | |
| 626 (list first-part last-part))))) | |
| 627 (nconc | |
| 628 ;; copy-list needed, because the structure of the literal provided | |
| 629 ;; by our eval-when-compile hangs around. | |
| 630 (copy-list (first decode-program-parts)) | |
| 631 (append decode-table nil) | |
| 632 (second decode-program-parts)))) | |
| 633 | |
| 4145 | 634 (defun make-8-bit-choose-category (decode-table) |
| 635 "Given DECODE-TABLE, return an appropriate coding category. | |
| 636 DECODE-TABLE is a 256-entry vector describing the mapping from octets on | |
| 637 disk to XEmacs characters for some fixed-width 8-bit coding system. " | |
| 638 (check-argument-type #'vectorp decode-table) | |
| 639 (check-argument-range (length decode-table) #x100 #x100) | |
|
4549
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
640 (loop |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
641 named category |
|
4559
bd1a68c34d44
Merge my change of 2008-05-14 to the query-coding-region code.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4558
diff
changeset
|
642 for i from #x80 to #x9F |
|
4549
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
643 do (unless (= i (aref decode-table i)) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
644 (return-from category 'no-conversion)) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
645 finally return 'iso-8-1)) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
646 |
|
4604
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
647 (defun 8-bit-fixed-query-coding-region (begin end coding-system &optional |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
648 buffer ignore-invalid-sequencesp |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
649 errorp highlightp) |
|
4549
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
650 "The `query-coding-region' implementation for 8-bit-fixed coding systems. |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
651 |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
652 Uses the `8-bit-fixed-query-from-unicode' and `8-bit-fixed-query-skip-chars' |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
653 coding system properties. The former is a hash table mapping from valid |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
654 Unicode code points to on-disk octets in the coding system; the latter a set |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
655 of characters as used by `skip-chars-forward'. Both of these properties are |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
656 generated automatically by `make-8-bit-coding-system'. |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
657 |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
658 See that the documentation of `query-coding-region'; see also |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
659 `make-8-bit-coding-system'. " |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
660 (check-argument-type #'coding-system-p |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
661 (setq coding-system (find-coding-system coding-system))) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
662 (check-argument-type #'integer-or-marker-p begin) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
663 (check-argument-type #'integer-or-marker-p end) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
664 (let ((from-unicode |
| 4551 | 665 (or (coding-system-get coding-system '8-bit-fixed-query-from-unicode) |
| 666 (coding-system-get (coding-system-base coding-system) | |
| 667 '8-bit-fixed-query-from-unicode))) | |
|
4549
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
668 (skip-chars-arg |
| 4551 | 669 (or (coding-system-get coding-system '8-bit-fixed-query-skip-chars) |
| 670 (coding-system-get (coding-system-base coding-system) | |
| 671 '8-bit-fixed-query-skip-chars))) | |
|
4604
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
672 (invalid-sequences-skip-chars |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
673 (or (coding-system-get coding-system |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
674 '8-bit-fixed-invalid-sequences-skip-chars) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
675 (coding-system-get (coding-system-base coding-system) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
676 '8-bit-fixed-invalid-sequences-skip-chars))) |
|
4549
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
677 (ranges (make-range-table)) |
|
4604
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
678 (case-fold-search nil) |
|
4605
c786c3fd0740
Listen to the byte-compiler, core Lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4604
diff
changeset
|
679 char-after fail-range-start fail-range-end extent |
|
4604
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
680 failed invalid-sequences-looking-at failed-reason |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
681 previous-failed-reason) |
|
4549
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
682 (check-type from-unicode hash-table) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
683 (check-type skip-chars-arg string) |
|
4604
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
684 (check-type invalid-sequences-skip-chars string) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
685 (setq invalid-sequences-looking-at |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
686 (if (equal "" invalid-sequences-skip-chars) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
687 ;; Regexp that will never match. |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
688 #r".\{0,0\}" |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
689 (concat "[" invalid-sequences-skip-chars "]"))) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
690 (when ignore-invalid-sequencesp |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
691 (setq skip-chars-arg |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
692 (concat skip-chars-arg invalid-sequences-skip-chars))) |
|
4549
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
693 (save-excursion |
| 4551 | 694 (when highlightp |
|
4604
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
695 (query-coding-clear-highlights begin end buffer)) |
|
4549
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
696 (goto-char begin buffer) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
697 (skip-chars-forward skip-chars-arg end buffer) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
698 (while (< (point buffer) end) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
699 (setq char-after (char-after (point buffer) buffer) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
700 fail-range-start (point buffer)) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
701 (while (and |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
702 (< (point buffer) end) |
|
4604
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
703 (or (and |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
704 (not (gethash (encode-char char-after 'ucs) from-unicode)) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
705 (setq failed-reason 'unencodable)) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
706 (and (not ignore-invalid-sequencesp) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
707 (looking-at invalid-sequences-looking-at buffer) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
708 (setq failed-reason 'invalid-sequence))) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
709 (or (null previous-failed-reason) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
710 (eq previous-failed-reason failed-reason))) |
|
4549
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
711 (forward-char 1 buffer) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
712 (setq char-after (char-after (point buffer) buffer) |
|
4604
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
713 failed t |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
714 previous-failed-reason failed-reason)) |
|
4549
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
715 (if (= fail-range-start (point buffer)) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
716 ;; The character can actually be encoded by the coding |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
717 ;; system; check the characters past it. |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
718 (forward-char 1 buffer) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
719 ;; The character actually failed. |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
720 (when errorp |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
721 (error 'text-conversion-error |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
722 (format "Cannot encode %s using coding system" |
| 4551 | 723 (buffer-substring fail-range-start (point buffer) |
|
4549
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
724 buffer)) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
725 (coding-system-name coding-system))) |
|
4604
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
726 (assert (not (null previous-failed-reason)) t |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
727 "previous-failed-reason should always be non-nil here") |
|
4549
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
728 (put-range-table fail-range-start |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
729 ;; If char-after is non-nil, we're not at |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
730 ;; the end of the buffer. |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
731 (setq fail-range-end (if char-after |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
732 (point buffer) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
733 (point-max buffer))) |
|
4604
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
734 previous-failed-reason ranges) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
735 (setq previous-failed-reason nil) |
|
4549
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
736 (when highlightp |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
737 (setq extent (make-extent fail-range-start fail-range-end buffer)) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
738 (set-extent-priority extent (+ mouse-highlight-priority 2)) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
739 (set-extent-face extent 'query-coding-warning-face)) |
| 4551 | 740 (skip-chars-forward skip-chars-arg end buffer))) |
|
4549
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
741 (if failed |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
742 (values nil ranges) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
743 (values t nil))))) |
| 4145 | 744 |
| 4072 | 745 (defun make-8-bit-coding-system (name unicode-map &optional description props) |
| 746 "Make and return a fixed-width 8-bit CCL coding system named NAME. | |
| 747 NAME must be a symbol, and UNICODE-MAP a list. | |
| 748 | |
| 749 UNICODE-MAP is a plist describing a map from octets in the coding | |
| 750 system NAME (as integers) to XEmacs characters. Those XEmacs | |
| 751 characters will be used explicitly on decoding, but for encoding (most | |
| 752 relevantly, on writing to disk) XEmacs characters that map to the same | |
| 753 Unicode code point will be unified. This means that the ISO-8859-? | |
| 754 characters that map to the same Unicode code point will not be | |
| 755 distinct when written to disk, which is normally what is intended; it | |
| 756 also means that East Asian Han characters from different XEmacs | |
| 757 character sets will not be distinct when written to disk, which is | |
| 758 less often what is intended. | |
| 759 | |
|
4604
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
760 Any octets not mapped, and with values above #x7f, will be decoded into |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
761 XEmacs characters that reflect that their values are undefined. These |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
762 characters will be displayed in a language-environment-specific way. See |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
763 `unicode-error-default-translation-table' and the |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
764 `invalid-sequence-coding-system' argument to `set-language-info'. |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
765 |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
766 These characters will normally be treated as invalid when checking whether |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
767 text can be encoded with `query-coding-region'--see the |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
768 IGNORE-INVALID-SEQUENCESP argument to that function to avoid this. It is |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
769 possible to specify that octets with values less than #x80 (or indeed |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
770 greater than it) be treated in this way, by specifying explicitly that they |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
771 correspond to the character mapping to that octet in |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
772 `unicode-error-default-translation-table'. Far fewer coding systems |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
773 override the ASCII mapping, though, so this is not the default. |
| 4072 | 774 |
| 775 DESCRIPTION and PROPS are as in `make-coding-system', which see. This | |
| 776 function also accepts two additional (optional) properties in PROPS; | |
| 777 `aliases', giving a list of aliases to be initialized for this | |
| 778 coding-system, and `encode-failure-octet', an integer between 0 and 256 to | |
| 779 write in place of XEmacs characters that cannot be encoded, defaulting to | |
| 780 the code for tilde `~'. " | |
| 781 (check-argument-type #'symbolp name) | |
| 782 (check-argument-type #'listp unicode-map) | |
| 783 (check-argument-type #'stringp | |
| 784 (or description | |
| 785 (setq description | |
| 786 (format "Coding system used for %s." name)))) | |
| 787 (check-valid-plist props) | |
| 788 (let ((encode-failure-octet (or (plist-get props 'encode-failure-octet) | |
| 789 (char-to-int ?~))) | |
| 790 (aliases (plist-get props 'aliases)) | |
| 791 (hash-table-sym (gentemp (format "%s-encode-table" name))) | |
|
4604
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
792 encode-program decode-program result decode-table encode-table |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
793 skip-chars invalid-sequences-skip-chars) |
| 4072 | 794 |
| 795 ;; Some more sanity checking. | |
| 796 (check-argument-range encode-failure-octet 0 #xFF) | |
| 797 (check-argument-type #'listp aliases) | |
| 798 | |
| 799 ;; Don't pass on our extra data to make-coding-system. | |
| 800 (setq props (plist-remprop props 'encode-failure-octet) | |
| 801 props (plist-remprop props 'aliases)) | |
| 802 | |
| 803 (multiple-value-setq | |
| 804 (decode-table encode-table) | |
| 805 (make-8-bit-create-decode-encode-tables unicode-map)) | |
| 806 | |
| 807 ;; Register the decode-table. | |
| 808 (define-translation-hash-table hash-table-sym encode-table) | |
| 809 | |
|
4604
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
810 ;; Generate the programs and skip-chars strings. |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
811 (setq decode-program (make-8-bit-generate-decode-program decode-table)) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
812 (multiple-value-setq |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
813 (encode-program skip-chars invalid-sequences-skip-chars) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
814 (make-8-bit-generate-encode-program-and-skip-chars-strings |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
815 decode-table encode-table encode-failure-octet)) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
816 |
| 4072 | 817 (unless (vectorp encode-program) |
| 818 (setq encode-program | |
| 819 (apply #'vector | |
| 820 (nsublis (list (cons 'encode-table-sym hash-table-sym)) | |
| 821 (copy-tree encode-program))))) | |
| 822 (unless (vectorp decode-program) | |
| 823 (setq decode-program | |
| 824 (apply #'vector decode-program))) | |
| 825 | |
| 826 ;; And now generate the actual coding system. | |
| 827 (setq result | |
| 828 (make-coding-system | |
| 829 name 'ccl | |
| 830 description | |
| 831 (plist-put (plist-put props 'decode decode-program) | |
| 832 'encode encode-program))) | |
| 4295 | 833 (coding-system-put name '8-bit-fixed t) |
| 4145 | 834 (coding-system-put name 'category |
| 835 (make-8-bit-choose-category decode-table)) | |
|
4549
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
836 (coding-system-put name '8-bit-fixed-query-skip-chars |
|
4604
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
837 skip-chars) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
838 (coding-system-put name '8-bit-fixed-invalid-sequences-skip-chars |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
839 invalid-sequences-skip-chars) |
|
4549
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
840 (coding-system-put name '8-bit-fixed-query-from-unicode encode-table) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
841 (coding-system-put name 'query-coding-function |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
842 #'8-bit-fixed-query-coding-region) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
843 (coding-system-put (intern (format "%s-unix" name)) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
844 'query-coding-function |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
845 #'8-bit-fixed-query-coding-region) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
846 (coding-system-put (intern (format "%s-dos" name)) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
847 'query-coding-function |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
848 #'8-bit-fixed-query-coding-region) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
849 (coding-system-put (intern (format "%s-mac" name)) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
850 'query-coding-function |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
851 #'8-bit-fixed-query-coding-region) |
| 4072 | 852 (loop for alias in aliases |
| 853 do (define-coding-system-alias alias name)) | |
| 854 result)) | |
| 855 | |
| 856 (define-compiler-macro make-8-bit-coding-system (&whole form name unicode-map | |
| 857 &optional description props) | |
| 858 ;; We provide the compiler macro (= macro that is expanded only on | |
| 859 ;; compilation, and that can punt to a runtime version of the | |
| 860 ;; associate function if necessary) not for reasons of speed, though | |
| 861 ;; it does speed up things at runtime a little, but because the | |
| 862 ;; Unicode mappings are available at compile time in the dumped | |
| 863 ;; files, but they are not available at run time for the vast | |
| 864 ;; majority of them. | |
| 865 | |
| 866 (if (not (and (and (consp name) (eq (car name) 'quote)) | |
| 867 (and (consp unicode-map) (eq (car unicode-map) 'quote)) | |
| 868 (and (or (and (consp props) (eq (car props) 'quote)) | |
| 869 (null props))))) | |
| 870 ;; The call does not use literals; do it at runtime. | |
| 871 form | |
| 872 (setq name (cadr name) | |
| 873 unicode-map (cadr unicode-map) | |
| 874 props (if props (cadr props))) | |
| 875 (let ((encode-failure-octet | |
| 876 (or (plist-get props 'encode-failure-octet) (char-to-int ?~))) | |
| 877 (aliases (plist-get props 'aliases)) | |
| 878 encode-program decode-program | |
|
4604
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
879 decode-table encode-table |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
880 skip-chars invalid-sequences-skip-chars) |
| 4072 | 881 |
| 882 ;; Some sanity checking. | |
| 883 (check-argument-range encode-failure-octet 0 #xFF) | |
| 884 (check-argument-type #'listp aliases) | |
| 885 | |
| 886 ;; Don't pass on our extra data to make-coding-system. | |
| 887 (setq props (plist-remprop props 'encode-failure-octet) | |
| 888 props (plist-remprop props 'aliases)) | |
| 889 | |
|
4604
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
890 ;; Work out encode-table and decode-table |
| 4072 | 891 (multiple-value-setq |
|
4604
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
892 (decode-table encode-table) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
893 (make-8-bit-create-decode-encode-tables unicode-map)) |
| 4072 | 894 |
|
4604
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
895 ;; Generate the decode and encode programs, and the skip-chars |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
896 ;; arguments. |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
897 (setq decode-program (make-8-bit-generate-decode-program decode-table)) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
898 (multiple-value-setq |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
899 (encode-program skip-chars invalid-sequences-skip-chars) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
900 (make-8-bit-generate-encode-program-and-skip-chars-strings |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
901 decode-table encode-table encode-failure-octet)) |
| 4072 | 902 |
| 903 ;; And return the generated code. | |
| 904 `(let ((encode-table-sym (gentemp (format "%s-encode-table" ',name))) | |
|
4549
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
905 (encode-table ,encode-table)) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
906 (define-translation-hash-table encode-table-sym encode-table) |
| 4103 | 907 (make-coding-system |
| 908 ',name 'ccl ,description | |
| 909 (plist-put (plist-put ',props 'decode | |
| 910 ,(apply #'vector decode-program)) | |
| 911 'encode | |
| 912 (apply #'vector | |
| 913 (nsublis | |
| 914 (list (cons | |
| 915 'encode-table-sym | |
| 916 (symbol-value 'encode-table-sym))) | |
| 917 ',encode-program)))) | |
| 4295 | 918 (coding-system-put ',name '8-bit-fixed t) |
|
4549
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
919 (coding-system-put ',name 'category |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
920 ',(make-8-bit-choose-category decode-table)) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
921 (coding-system-put ',name '8-bit-fixed-query-skip-chars |
|
4604
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
922 ,skip-chars) |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
923 (coding-system-put ',name '8-bit-fixed-invalid-sequences-skip-chars |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
924 ,invalid-sequences-skip-chars) |
|
4549
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
925 (coding-system-put ',name '8-bit-fixed-query-from-unicode encode-table) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
926 (coding-system-put ',name 'query-coding-function |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
927 #'8-bit-fixed-query-coding-region) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
928 (coding-system-put ',(intern (format "%s-unix" name)) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
929 'query-coding-function |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
930 #'8-bit-fixed-query-coding-region) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
931 (coding-system-put ',(intern (format "%s-dos" name)) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
932 'query-coding-function |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
933 #'8-bit-fixed-query-coding-region) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
934 (coding-system-put ',(intern (format "%s-mac" name)) |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
935 'query-coding-function |
|
68d1ca56cffa
First part of interactive checks that coding systems encode regions.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4299
diff
changeset
|
936 #'8-bit-fixed-query-coding-region) |
| 4072 | 937 ,(macroexpand `(loop for alias in ',aliases |
| 938 do (define-coding-system-alias alias | |
| 939 ',name))) | |
| 4103 | 940 (find-coding-system ',name))))) |
| 4299 | 941 |
| 942 ;; Ideally this would be in latin.el, but code-init.el uses it. | |
| 943 (make-8-bit-coding-system | |
| 944 'iso-8859-1 | |
|
4604
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
945 (loop |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
946 for i from #x80 to #xff |
|
e0a8715fdb1f
Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4568
diff
changeset
|
947 collect (list i (int-char i))) ;; Identical to Latin-1. |
| 4299 | 948 "ISO-8859-1 (Latin-1)" |
| 949 '(mnemonic "Latin 1" | |
| 950 documentation "The most used encoding of Western Europe and the Americas." | |
| 951 aliases (iso-latin-1 latin-1))) |
