annotate lisp/mule/make-coding-system.el @ 5934:e2fae7783046 cygwin

lots of use of EMACS_INT, a few others, to eliminate all pointer truncation warnings
author Henry Thompson <ht@markup.co.uk>
date Sat, 12 Dec 2015 19:08:46 +0000
parents cc6f0266bc36
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4690
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
1 ;;; make-coding-system.el; Provides the #'make-coding-system function and
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
2 ;;; much of the implementation of the fixed-width coding system type.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
3
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
4 ;; Copyright (C) 2009 Free Software Foundation
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
5
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
6 ;; Author: Aidan Kehoe
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
7
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
8 ;; This file is part of XEmacs.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
9
5402
308d34e9f07d Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents: 5083
diff changeset
10 ;; XEmacs is free software: you can redistribute it and/or modify it
308d34e9f07d Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents: 5083
diff changeset
11 ;; under the terms of the GNU General Public License as published by the
308d34e9f07d Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents: 5083
diff changeset
12 ;; Free Software Foundation, either version 3 of the License, or (at your
308d34e9f07d Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents: 5083
diff changeset
13 ;; option) any later version.
4690
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
14
5402
308d34e9f07d Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents: 5083
diff changeset
15 ;; XEmacs is distributed in the hope that it will be useful, but WITHOUT
308d34e9f07d Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents: 5083
diff changeset
16 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
308d34e9f07d Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents: 5083
diff changeset
17 ;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
308d34e9f07d Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents: 5083
diff changeset
18 ;; for more details.
4690
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
19
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
20 ;; You should have received a copy of the GNU General Public License
5402
308d34e9f07d Changed bulk of GPLv2 or later files identified by script
Mats Lidell <matsl@xemacs.org>
parents: 5083
diff changeset
21 ;; along with XEmacs. If not, see <http://www.gnu.org/licenses/>.
4690
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
22
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
23 ;;; Commentary:
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
24
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
25 ;;; Code:
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
26
5083
88f955fa5a7f Back out revision c673987f5f3d, undump mule/make-coding-system.el.
Aidan Kehoe <kehoea@parhasard.net>
parents: 5081
diff changeset
27 (defvar fixed-width-private-use-start ?\uE000
5081
baffa6ca776a Backed out changeset c673987f5f3d
Aidan Kehoe <kehoea@parhasard.net>
parents: 5068
diff changeset
28 "Start of a 256 code private use area for fixed-width coding systems.
baffa6ca776a Backed out changeset c673987f5f3d
Aidan Kehoe <kehoea@parhasard.net>
parents: 5068
diff changeset
29
baffa6ca776a Backed out changeset c673987f5f3d
Aidan Kehoe <kehoea@parhasard.net>
parents: 5068
diff changeset
30 This is used to ensure that distinct octets on disk for a given coding
baffa6ca776a Backed out changeset c673987f5f3d
Aidan Kehoe <kehoea@parhasard.net>
parents: 5068
diff changeset
31 system map to distinct XEmacs characters, preventing spurious changes when
baffa6ca776a Backed out changeset c673987f5f3d
Aidan Kehoe <kehoea@parhasard.net>
parents: 5068
diff changeset
32 a file is read, not changed, and then written. ")
baffa6ca776a Backed out changeset c673987f5f3d
Aidan Kehoe <kehoea@parhasard.net>
parents: 5068
diff changeset
33
4690
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
34 (defun fixed-width-generate-helper (decode-table encode-table
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
35 encode-failure-octet)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
36 "Helper func, `fixed-width-generate-encode-program-and-skip-chars-strings',
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
37 which see.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
38
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
39 Deals with the case where ASCII and another character set can both be
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
40 encoded unambiguously and completely into the coding-system; if this is so,
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
41 returns multiple values comprisig of such a ccl-program and the character
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
42 set in question. If not, it returns nil."
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
43 (let ((tentative-encode-program-parts
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
44 (eval-when-compile
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
45 (let* ((vec-len 128)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
46 (compiled
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
47 (append
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
48 (ccl-compile
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
49 `(1
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
50 (loop
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
51 (read-multibyte-character r0 r1)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
52 (if (r0 == ,(charset-id 'ascii))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
53 (write r1)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
54 ((if (r0 == #xABAB)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
55 ;; #xBFFE is a sentinel in the compiled
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
56 ;; program.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
57 ((r0 = r1 & #x7F)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
58 (write r0 ,(make-vector vec-len #xBFFE)))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
59 ((mule-to-unicode r0 r1)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
60 (if (r0 == #xFFFD)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
61 (write #xBEEF)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
62 ((lookup-integer encode-table-sym r0 r3)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
63 (if r7
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
64 (write-multibyte-character r0 r3)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
65 (write #xBEEF))))))))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
66 (repeat)))) nil))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
67 (first-part compiled)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
68 (last-part
5363
311f6817efc2 Remove various redundant wrapper lambdas, core lisp.
Aidan Kehoe <kehoea@parhasard.net>
parents: 5083
diff changeset
69 (member* #xBFFE (member* #xBFFE first-part) :test-not 'eql)))
4690
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
70 (while compiled
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
71 (when (eq #xBFFE (cadr compiled))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
72 (assert (= vec-len (search '(#xBFFE) (cdr compiled)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
73 :test #'/=)) nil
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
74 "Strange ccl vector length")
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
75 (setcdr compiled nil))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
76 (setq compiled (cdr compiled)))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
77 ;; Is the generated code as we expect it to be?
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
78 (assert (and (memq #xABAB first-part)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
79 (memq #xBEEF14 last-part))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
80 nil
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
81 "This code assumes that the constant #xBEEF is #xBEEF14 in \
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
82 compiled CCL code,\nand that the constant #xABAB is #xABAB. If that is
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
83 not the case, and it appears not to be--that's why you're getting this
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
84 message--it will not work. ")
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
85 (list first-part last-part vec-len))))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
86 (charset-lower -1)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
87 (charset-upper -1)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
88 worth-trying known-charsets encode-program
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
89 other-charset-vector ucs)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
90
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
91 (loop for char across decode-table
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
92 do (pushnew (char-charset char) known-charsets))
5652
cc6f0266bc36 Avoid #'delq in core Lisp, for the sake of style, a very slightly smaller binary
Aidan Kehoe <kehoea@parhasard.net>
parents: 5473
diff changeset
93 (setq known-charsets (delete* 'ascii known-charsets))
4690
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
94
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
95 (loop for known-charset in known-charsets
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
96 do
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
97 ;; This is not possible for two dimensional charsets.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
98 (when (eq 1 (charset-dimension known-charset))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
99 (if (eq 'control-1 known-charset)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
100 (setq charset-lower 0
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
101 charset-upper 31)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
102 ;; There should be a nicer way to get the limits here.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
103 (condition-case args-out-of-range
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
104 (make-char known-charset #x100)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
105 (args-out-of-range
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
106 (setq charset-lower (third args-out-of-range)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
107 charset-upper (fourth args-out-of-range)))))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
108 (loop
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
109 for i from charset-lower to charset-upper
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
110 always (and (setq ucs
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
111 (encode-char (make-char known-charset i) 'ucs))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
112 (gethash ucs encode-table))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
113 finally (setq worth-trying known-charset))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
114
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
115 ;; Only trying this for one charset at a time, the first find.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
116 (when worth-trying (return))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
117
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
118 ;; Okay, this charset is not worth trying, Try the next.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
119 (setq charset-lower -1
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
120 charset-upper -1
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
121 worth-trying nil)))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
122
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
123 (when worth-trying
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
124 (setq other-charset-vector
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
125 (make-vector (third tentative-encode-program-parts)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
126 encode-failure-octet))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
127 (loop for i from charset-lower to charset-upper
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
128 do (aset other-charset-vector i
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
129 (gethash (encode-char (make-char worth-trying i)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
130 'ucs) encode-table)))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
131 (setq encode-program
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
132 (nsublis
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
133 (list (cons #xABAB (charset-id worth-trying)))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
134 (nconc
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
135 (copy-list (first
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
136 tentative-encode-program-parts))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
137 (append other-charset-vector nil)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
138 (copy-tree (second
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
139 tentative-encode-program-parts))))))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
140 (and encode-program (values encode-program worth-trying))))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
141
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
142 (defun fixed-width-generate-encode-program-and-skip-chars-strings
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
143 (decode-table encode-table encode-failure-octet)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
144 "Generate a CCL program to encode a 8-bit fixed-width charset.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
145
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
146 DECODE-TABLE must have 256 non-cons entries, and will be regarded as
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
147 describing a map from the octet corresponding to an offset in the
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
148 table to the that entry in the table. ENCODE-TABLE is a hash table
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
149 map from unicode values to characters in the range [0,255].
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
150 ENCODE-FAILURE-OCTET describes an integer between 0 and 255
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
151 \(inclusive) to write in the event that a character cannot be encoded."
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
152 (check-argument-type #'vectorp decode-table)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
153 (check-argument-range (length decode-table) #x100 #x100)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
154 (check-argument-type #'hash-table-p encode-table)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
155 (check-argument-type #'integerp encode-failure-octet)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
156 (check-argument-range encode-failure-octet #x00 #xFF)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
157 (let ((encode-program nil)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
158 (general-encode-program
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
159 (eval-when-compile
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
160 (let ((prog (append
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
161 (ccl-compile
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
162 `(1
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
163 (loop
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
164 (read-multibyte-character r0 r1)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
165 (mule-to-unicode r0 r1)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
166 (if (r0 == #xFFFD)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
167 (write #xBEEF)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
168 ((lookup-integer encode-table-sym r0 r3)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
169 (if r7
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
170 (write-multibyte-character r0 r3)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
171 (write #xBEEF))))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
172 (repeat)))) nil)))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
173 (assert (memq #xBEEF14 prog)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
174 nil
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
175 "This code assumes that the constant #xBEEF is #xBEEF14 \
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
176 in compiled CCL code.\nIf that is not the case, and it appears not to
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
177 be--that's why you're getting this message--it will not work. ")
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
178 prog)))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
179 (encode-program-with-ascii-optimisation
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
180 (eval-when-compile
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
181 (let ((prog (append
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
182 (ccl-compile
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
183 `(1
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
184 (loop
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
185 (read-multibyte-character r0 r1)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
186 (if (r0 == ,(charset-id 'ascii))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
187 (write r1)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
188 ((mule-to-unicode r0 r1)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
189 (if (r0 == #xFFFD)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
190 (write #xBEEF)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
191 ((lookup-integer encode-table-sym r0 r3)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
192 (if r7
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
193 (write-multibyte-character r0 r3)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
194 (write #xBEEF))))))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
195 (repeat)))) nil)))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
196 (assert (memq #xBEEF14 prog)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
197 nil
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
198 "This code assumes that the constant #xBEEF is #xBEEF14 \
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
199 in compiled CCL code.\nIf that is not the case, and it appears not to
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
200 be--that's why you're getting this message--it will not work. ")
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
201 prog)))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
202 (ascii-encodes-as-itself nil)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
203 (control-1-encodes-as-itself t)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
204 (invalid-sequence-code-point-start
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
205 (eval-when-compile
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
206 (char-to-unicode
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
207 (aref (decode-coding-string "\xd8\x00\x00\x00" 'utf-16-be) 3))))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
208 further-char-set skip-chars invalid-sequences-skip-chars)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
209
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
210 ;; Is this coding system ASCII-compatible? If so, we can avoid the hash
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
211 ;; table lookup for those characters.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
212 (loop
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
213 for i from #x00 to #x7f
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
214 always (eq (int-to-char i) (gethash i encode-table))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
215 finally (setq ascii-encodes-as-itself t))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
216
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
217 ;; Note that this logic handles EBCDIC badly. For example, CP037,
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
218 ;; MIME name ebcdic-na, has the entire repertoire of ASCII and
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
219 ;; Latin 1, and thus a more optimal ccl encode program would check
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
220 ;; for those character sets and use tables. But for now, we do a
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
221 ;; hash table lookup for every character.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
222 (if (null ascii-encodes-as-itself)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
223 ;; General encode program. Pros; general and correct. Cons;
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
224 ;; slow, a hash table lookup + mule-unicode conversion is done
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
225 ;; for every character encoding.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
226 (setq encode-program general-encode-program)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
227 (multiple-value-setq
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
228 (encode-program further-char-set)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
229 ;; Encode program with ascii-ascii mapping (based on a
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
230 ;; character's mule character set), and one other mule
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
231 ;; character set using table-based encoding, other
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
232 ;; character sets using hash table lookups.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
233 ;; fixed-width-non-ascii-completely-coveredp only returns
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
234 ;; such a mapping if some non-ASCII charset with
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
235 ;; characters in decode-table is entirely covered by
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
236 ;; encode-table.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
237 (fixed-width-generate-helper decode-table encode-table
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
238 encode-failure-octet))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
239 (unless encode-program
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
240 ;; If fixed-width-non-ascii-completely-coveredp returned nil,
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
241 ;; but ASCII still encodes as itself, do one-to-one mapping
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
242 ;; for ASCII, and a hash table lookup for everything else.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
243 (setq encode-program encode-program-with-ascii-optimisation)))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
244
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
245 (setq encode-program
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
246 (nsublis
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
247 (list (cons #xBEEF14
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
248 (logior (lsh encode-failure-octet 8)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
249 #x14)))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
250 (copy-tree encode-program)))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
251 (loop
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
252 for i from #x80 to #x9f
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
253 do (unless (= i (aref decode-table i))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
254 (setq control-1-encodes-as-itself nil)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
255 (return)))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
256 (loop
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
257 for i from #x00 to #xFF
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
258 initially (setq skip-chars
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
259 (cond
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
260 ((and ascii-encodes-as-itself
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
261 control-1-encodes-as-itself further-char-set)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
262 (concat "\x00-\x9f" (charset-skip-chars-string
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
263 further-char-set)))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
264 ((and ascii-encodes-as-itself
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
265 control-1-encodes-as-itself)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
266 "\x00-\x9f")
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
267 ((null ascii-encodes-as-itself)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
268 (skip-chars-quote (apply #'string
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
269 (append decode-table nil))))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
270 (further-char-set
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
271 (concat (charset-skip-chars-string 'ascii)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
272 (charset-skip-chars-string further-char-set)))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
273 (t
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
274 (charset-skip-chars-string 'ascii)))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
275 invalid-sequences-skip-chars "")
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
276 with decoded-ucs = nil
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
277 with decoded = nil
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
278 with no-ascii-transparency-skip-chars-list =
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
279 (unless ascii-encodes-as-itself (append decode-table nil))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
280 ;; Can't use #'match-string here, see:
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
281 ;; http://mid.gmane.org/18829.34118.709782.704574@parhasard.net
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
282 with skip-chars-test =
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
283 #'(lambda (skip-chars-string testing)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
284 (with-temp-buffer
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
285 (insert testing)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
286 (goto-char (point-min))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
287 (skip-chars-forward skip-chars-string)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
288 (= (point) (point-max))))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
289 do
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
290 (setq decoded (aref decode-table i)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
291 decoded-ucs (char-to-unicode decoded))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
292 (cond
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
293 ((<= invalid-sequence-code-point-start decoded-ucs
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
294 (+ invalid-sequence-code-point-start #xFF))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
295 (setq invalid-sequences-skip-chars
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
296 (concat (string decoded)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
297 invalid-sequences-skip-chars))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
298 (assert (not (funcall skip-chars-test skip-chars decoded))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
299 "This char should only be skipped with \
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
300 `invalid-sequences-skip-chars', not by `skip-chars'"))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
301 ((not (funcall skip-chars-test skip-chars decoded))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
302 (if ascii-encodes-as-itself
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
303 (setq skip-chars (concat skip-chars (string decoded)))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
304 (push decoded no-ascii-transparency-skip-chars-list))))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
305 finally (unless ascii-encodes-as-itself
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
306 (setq skip-chars
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
307 (skip-chars-quote
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
308 (apply #'string
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
309 no-ascii-transparency-skip-chars-list)))))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
310 (values encode-program skip-chars invalid-sequences-skip-chars)))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
311
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
312 (defun fixed-width-create-decode-encode-tables (unicode-map)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
313 "Return multiple values \(DECODE-TABLE ENCODE-TABLE) given UNICODE-MAP.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
314 UNICODE-MAP should be an alist mapping from integer octet values to
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
315 characters with UCS code points; DECODE-TABLE will be a 256-element
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
316 vector, and ENCODE-TABLE will be a hash table mapping from 256 numbers
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
317 to 256 distinct characters."
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
318 (check-argument-type #'listp unicode-map)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
319 (let ((decode-table (make-vector 256 nil))
4777
c69aeb86b2a3 Serialise non-default hash table rehash thresholds correctly; use this.
Aidan Kehoe <kehoea@parhasard.net>
parents: 4692
diff changeset
320 (encode-table (make-hash-table :size 256 :rehash-threshold 0.999))
5081
baffa6ca776a Backed out changeset c673987f5f3d
Aidan Kehoe <kehoea@parhasard.net>
parents: 5068
diff changeset
321 (private-use-start (encode-char fixed-width-private-use-start 'ucs))
4690
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
322 (invalid-sequence-code-point-start
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
323 (eval-when-compile
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
324 (char-to-unicode
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
325 (aref (decode-coding-string "\xd8\x00\x00\x00" 'utf-16-be) 3))))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
326 desired-ucs decode-table-entry)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
327
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
328 (loop for (external internal)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
329 in unicode-map
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
330 do
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
331 (aset decode-table external internal)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
332 (assert (not (eq (encode-char internal 'ucs) -1))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
333 nil
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
334 "Looks like you're creating a fixed-width coding system \
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
335 in a dumped file, \nand you're either not providing a literal unicode map
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
336 or PROPS. Don't do that; fixed-width coding systems rely on sensible
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
337 Unicode mappings being available, which they are at compile time for
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
338 dumped files (but this requires the mentioned literals), but not, for
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
339 most of them, at run time. ")
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
340
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
341 (puthash (encode-char internal 'ucs)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
342 ;; This is semantically an integer, but Dave Love's design
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
343 ;; for lookup-integer in CCL means we need to store it as a
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
344 ;; character.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
345 (int-to-char external)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
346 encode-table))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
347
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
348 ;; Now, go through the decode table. For octet values above #x7f, if the
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
349 ;; decode table entry is nil, this means that they have an undefined
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
350 ;; mapping (= they map to XEmacs characters with keys in
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
351 ;; unicode-error-default-translation-table); for octet values below or
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
352 ;; equal to #x7f, it means that they map to ASCII.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
353
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
354 ;; If any entry (whether below or above #x7f) in the decode-table
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
355 ;; already maps to some character with a key in
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
356 ;; unicode-error-default-translation-table, it is treated as an
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
357 ;; undefined octet by `query-coding-region'. That is, it is not
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
358 ;; necessary for an octet value to be above #x7f for this to happen.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
359
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
360 (dotimes (i 256)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
361 (setq decode-table-entry (aref decode-table i))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
362 (if decode-table-entry
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
363 (when (get-char-table
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
364 decode-table-entry
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
365 unicode-error-default-translation-table)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
366 ;; The caller is explicitly specifying that this octet
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
367 ;; corresponds to an invalid sequence on disk:
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
368 (assert (= (get-char-table
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
369 decode-table-entry
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
370 unicode-error-default-translation-table) i)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
371 "Bad argument for a fixed-width coding system.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
372 If you're going to designate an octet with value below #x80 as invalid
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
373 for this coding system, make sure to map it to the invalid sequence
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
374 character corresponding to its octet value on disk. "))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
375
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
376 ;; decode-table-entry is nil; either the octet is to be treated as
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
377 ;; contributing to an error sequence (when (> #x7f i)), or it should
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
378 ;; be attempted to treat it as ASCII-equivalent.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
379 (setq desired-ucs (or (and (< i #x80) i)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
380 (+ invalid-sequence-code-point-start i)))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
381 (while (gethash desired-ucs encode-table)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
382 (assert (not (< i #x80))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
383 "UCS code point should not already be in encode-table!"
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
384 ;; There is one invalid sequence char per octet value;
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
385 ;; with fixed-width coding systems, it makes no sense
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
386 ;; for us to be multiply allocating them.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
387 (gethash desired-ucs encode-table))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
388 (setq desired-ucs (+ private-use-start desired-ucs)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
389 private-use-start (+ private-use-start 1)))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
390 (puthash desired-ucs (int-to-char i) encode-table)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
391 (setq desired-ucs (if (> desired-ucs #xFF)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
392 (unicode-to-char desired-ucs)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
393 ;; So we get Latin-1 when run at dump time,
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
394 ;; instead of JIT-allocated characters.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
395 (int-to-char desired-ucs)))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
396 (aset decode-table i desired-ucs)))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
397 (values decode-table encode-table)))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
398
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
399 (defun fixed-width-generate-decode-program (decode-table)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
400 "Given DECODE-TABLE, generate a CCL program to decode an 8-bit charset.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
401 DECODE-TABLE must have 256 non-cons entries, and will be regarded as
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
402 describing a map from the octet corresponding to an offset in the
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
403 table to the that entry in the table. "
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
404 (check-argument-type #'vectorp decode-table)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
405 (check-argument-range (length decode-table) #x100 #x100)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
406 (let ((decode-program-parts
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
407 (eval-when-compile
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
408 (let* ((compiled
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
409 (append
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
410 (ccl-compile
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
411 `(3
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
412 ((read r0)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
413 (loop
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
414 (write-read-repeat r0 ,(make-vector
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
415 256 'sentinel)))))) nil))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
416 (first-part compiled)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
417 (last-part
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
418 (member-if-not #'symbolp
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
419 (member-if-not #'integerp first-part))))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
420 ;; Chop off the sentinel sentinel sentinel [..] part.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
421 (while compiled
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
422 (if (symbolp (cadr compiled))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
423 (setcdr compiled nil))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
424 (setq compiled (cdr compiled)))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
425 (list first-part last-part)))))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
426 (nconc
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
427 ;; copy-list needed, because the structure of the literal provided
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
428 ;; by our eval-when-compile hangs around.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
429 (copy-list (first decode-program-parts))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
430 (append decode-table nil)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
431 (second decode-program-parts))))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
432
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
433 (defun fixed-width-choose-category (decode-table)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
434 "Given DECODE-TABLE, return an appropriate coding category.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
435 DECODE-TABLE is a 256-entry vector describing the mapping from octets on
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
436 disk to XEmacs characters for some fixed-width 8-bit coding system."
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
437 (check-argument-type #'vectorp decode-table)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
438 (check-argument-range (length decode-table) #x100 #x100)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
439 (loop
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
440 named category
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
441 for i from #x80 to #x9F
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
442 do (unless (= i (aref decode-table i))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
443 (return-from category 'no-conversion))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
444 finally return 'iso-8-1))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
445
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
446 (defun fixed-width-rework-props-runtime (name props)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
447 "Rework PROPS to a form understood by `make-coding-system-internal'.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
448
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
449 NAME must be a symbol, describing a fixed-width coding system that is
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
450 about to be created. Much of the implementation of the fixed-width
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
451 coding system is in Lisp, and this function allows us to rework the
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
452 arguments that `make-coding-system-internal' sees accordingly.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
453
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
454 If you are calling this function from anywhere but
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
455 `make-coding-system', you're probably doing something wrong."
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
456 (check-argument-type #'symbolp name)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
457 (check-valid-plist props)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
458 (let ((encode-failure-octet (or (plist-get props 'encode-failure-octet)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
459 (char-to-int ?~)))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
460 (unicode-map (plist-get props 'unicode-map))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
461 (hash-table-sym (gensym (format "%s-encode-table" name)))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
462 encode-program decode-program decode-table encode-table skip-chars
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
463 invalid-sequences-skip-chars category)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
464
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
465 (check-argument-range encode-failure-octet 0 #xFF)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
466 ;; unicode-map must be a true list, and must be non-nil.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
467 (check-argument-type #'true-list-p unicode-map)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
468 (check-argument-type #'consp unicode-map)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
469
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
470 ;; Don't pass on our extra data to make-coding-system-internal.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
471 (setq props (plist-remprop props 'encode-failure-octet)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
472 props (plist-remprop props 'unicode-map))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
473
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
474 (multiple-value-setq
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
475 (decode-table encode-table)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
476 (fixed-width-create-decode-encode-tables unicode-map))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
477
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
478 ;; Register the decode-table.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
479 (define-translation-hash-table hash-table-sym encode-table)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
480
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
481 ;; Generate the programs and skip-chars strings.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
482 (setq decode-program (fixed-width-generate-decode-program decode-table))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
483 (multiple-value-setq
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
484 (encode-program skip-chars invalid-sequences-skip-chars)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
485 (fixed-width-generate-encode-program-and-skip-chars-strings
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
486 decode-table encode-table encode-failure-octet))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
487
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
488 (setq category (fixed-width-choose-category decode-table))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
489
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
490 (unless (vectorp encode-program)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
491 (setq encode-program
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
492 (apply #'vector
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
493 (nsublis (list (cons 'encode-table-sym hash-table-sym))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
494 (copy-tree encode-program)))))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
495 (unless (vectorp decode-program)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
496 (setq decode-program
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
497 (apply #'vector decode-program)))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
498
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
499 (loop for (symbol . value)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
500 in `((decode . ,decode-program)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
501 (encode . ,encode-program)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
502 (from-unicode . ,encode-table)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
503 (query-skip-chars . ,skip-chars)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
504 (invalid-sequences-skip-chars . ,invalid-sequences-skip-chars)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
505 (category . ,category))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
506 with default = (gensym)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
507 do
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
508 (unless (eq default (plist-get props symbol default))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
509 (error
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
510 'invalid-argument
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
511 "Explicit property not allowed for fixed-width coding systems"
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
512 symbol))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
513 (setq props (nconc (list symbol value) props)))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
514 props))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
515
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
516 ;;;###autoload
4692
dc3c2f298857 Support last two arguments to #'make-coding-system being optional, again.
Aidan Kehoe <kehoea@parhasard.net>
parents: 4690
diff changeset
517 (defun make-coding-system (name type &optional description props)
4690
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
518 "Register symbol NAME as a coding system.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
519
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
520 TYPE describes the conversion method used and should be one of
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
521
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
522 nil or `undecided'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
523 Automatic conversion. XEmacs attempts to detect the coding system
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
524 used in the file.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
525 `chain'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
526 Chain two or more coding systems together to make a combination coding
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
527 system.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
528 `no-conversion'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
529 No conversion. Use this for binary files and such. On output,
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
530 graphic characters that are not in ASCII or Latin-1 will be
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
531 replaced by a ?. (For a no-conversion-encoded buffer, these
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
532 characters will only be present if you explicitly insert them.)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
533 `convert-eol'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
534 Convert CRLF sequences or CR to LF.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
535 `shift-jis'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
536 Shift-JIS (a Japanese encoding commonly used in PC operating systems).
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
537 `unicode'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
538 Any Unicode encoding (UCS-4, UTF-8, UTF-16, etc.).
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
539 `mswindows-unicode-to-multibyte'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
540 (MS Windows only) Converts from Windows Unicode to Windows Multibyte
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
541 (any code page encoding) upon encoding, and the other way upon decoding.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
542 `mswindows-multibyte'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
543 Converts to or from Windows Multibyte (any code page encoding).
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
544 This is resolved into a chain of `mswindows-unicode' and
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
545 `mswindows-unicode-to-multibyte'.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
546 `iso2022'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
547 Any ISO2022-compliant encoding. Among other things, this includes
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
548 JIS (the Japanese encoding commonly used for e-mail), EUC (the
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
549 standard Unix encoding for Japanese and other languages), and
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
550 Compound Text (the encoding used in X11). You can specify more
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
551 specific information about the conversion with the PROPS argument.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
552 `fixed-width'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
553 A fixed-width eight bit encoding that is not necessarily compliant with
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
554 ISO 2022. This coding system assumes Unicode equivalency, that is, if
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
555 two given XEmacs characters have the same Unicode mapping, they will
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
556 always map to the same octet on disk.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
557 `big5'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
558 Big5 (the encoding commonly used for Mandarin Chinese in Taiwan).
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
559 `ccl'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
560 The conversion is performed using a user-written pseudo-code
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
561 program. CCL (Code Conversion Language) is the name of this
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
562 pseudo-code.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
563 `gzip'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
564 GZIP compression format.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
565 `internal'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
566 Write out or read in the raw contents of the memory representing
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
567 the buffer's text. This is primarily useful for debugging
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
568 purposes, and is only enabled when XEmacs has been compiled with
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
569 DEBUG_XEMACS defined (via the --debug configure option).
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
570 WARNING: Reading in a file using `internal' conversion can result
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
571 in an internal inconsistency in the memory representing a
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
572 buffer's text, which will produce unpredictable results and may
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
573 cause XEmacs to crash. Under normal circumstances you should
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
574 never use `internal' conversion.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
575
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
576 DESCRIPTION is a short English phrase describing the coding system,
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
577 suitable for use as a menu item. (See also the `documentation' property
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
578 below.)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
579
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
580 PROPS is a property list, describing the specific nature of the
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
581 character set. Recognized properties are:
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
582
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
583 `mnemonic'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
584 String to be displayed in the modeline when this coding system is
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
585 active.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
586
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
587 `documentation'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
588 Detailed documentation on the coding system.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
589
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
590 `aliases'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
591 A list of aliases for the coding system. See
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
592 `define-coding-system-alias'.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
593
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
594 `eol-type'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
595 End-of-line conversion to be used. It should be one of
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
596
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
597 nil
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
598 Automatically detect the end-of-line type (LF, CRLF,
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
599 or CR). Also generate subsidiary coding systems named
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
600 `NAME-unix', `NAME-dos', and `NAME-mac', that are
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
601 identical to this coding system but have an EOL-TYPE
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
602 value of `lf', `crlf', and `cr', respectively.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
603 `lf'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
604 The end of a line is marked externally using ASCII LF.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
605 Since this is also the way that XEmacs represents an
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
606 end-of-line internally, specifying this option results
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
607 in no end-of-line conversion. This is the standard
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
608 format for Unix text files.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
609 `crlf'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
610 The end of a line is marked externally using ASCII
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
611 CRLF. This is the standard format for MS-DOS text
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
612 files.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
613 `cr'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
614 The end of a line is marked externally using ASCII CR.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
615 This is the standard format for Macintosh text files.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
616 t
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
617 Automatically detect the end-of-line type but do not
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
618 generate subsidiary coding systems. (This value is
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
619 converted to nil when stored internally, and
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
620 `coding-system-property' will return nil.)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
621
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
622 `post-read-conversion'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
623 The value is a function to call after some text is inserted and
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
624 decoded by the coding system itself and before any functions in
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
625 `after-change-functions' are called. (#### Not actually true in
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
626 XEmacs. `after-change-functions' will be called twice if
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
627 `post-read-conversion' changes something.) The argument of this
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
628 function is the same as for a function in
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
629 `after-insert-file-functions', i.e. LENGTH of the text inserted,
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
630 with point at the head of the text to be decoded.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
631
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
632 `pre-write-conversion'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
633 The value is a function to call after all functions in
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
634 `write-region-annotate-functions' and `buffer-file-format' are
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
635 called, and before the text is encoded by the coding system itself.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
636 The arguments to this function are the same as those of a function
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
637 in `write-region-annotate-functions', i.e. FROM and TO, specifying
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
638 a region of text.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
639
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
640 The following properties are used by `default-query-coding-region',
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
641 the default implementation of `query-coding-region'. This
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
642 implementation and these properties are not used by the Unicode coding
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
643 systems, nor by fixed-width coding systems.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
644
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
645 `safe-chars'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
646 The value is a char table. If a character has non-nil value in it,
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
647 the character is safely supported by the coding system.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
648 This overrides the `safe-charsets' property.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
649
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
650 `safe-charsets'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
651 The value is a list of charsets safely supported by the coding
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
652 system. For coding systems based on ISO 2022, XEmacs may try to
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
653 encode characters outside these character sets, but outside of
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
654 East Asia and East Asian coding systems, it is unlikely that
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
655 consumers of the data will understand XEmacs' encoding.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
656 The value t means that all XEmacs character sets handles are supported.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
657
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
658 The following properties are allowed for FSF compatibility but currently
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
659 ignored:
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
660
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
661 `translation-table-for-decode'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
662 The value is a translation table to be applied on decoding. See
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
663 the function `make-translation-table' for the format of translation
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
664 table. This is not applicable to CCL-based coding systems.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
665
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
666 `translation-table-for-encode'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
667 The value is a translation table to be applied on encoding. This is
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
668 not applicable to CCL-based coding systems.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
669
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
670 `mime-charset'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
671 The value is a symbol of which name is `MIME-charset' parameter of
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
672 the coding system.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
673
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
674 `valid-codes' (meaningful only for a coding system based on CCL)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
675 The value is a list to indicate valid byte ranges of the encoded
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
676 file. Each element of the list is an integer or a cons of integer.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
677 In the former case, the integer value is a valid byte code. In the
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
678 latter case, the integers specifies the range of valid byte codes.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
679
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
680 The following additional property is recognized if TYPE is `convert-eol':
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
681
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
682 `subtype'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
683 One of `lf', `crlf', `cr' or nil (for autodetection). When decoding,
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
684 the corresponding sequence will be converted to LF. When encoding,
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
685 the opposite happens. This coding system converts characters to
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
686 characters.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
687
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
688
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
689
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
690 The following additional properties are recognized if TYPE is `iso2022':
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
691
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
692 `charset-g0'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
693 `charset-g1'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
694 `charset-g2'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
695 `charset-g3'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
696 The character set initially designated to the G0 - G3 registers.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
697 The value should be one of
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
698
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
699 -- A charset object (designate that character set)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
700 -- nil (do not ever use this register)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
701 -- t (no character set is initially designated to
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
702 the register, but may be later on; this automatically
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
703 sets the corresponding `force-g*-on-output' property)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
704
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
705 `force-g0-on-output'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
706 `force-g1-on-output'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
707 `force-g2-on-output'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
708 `force-g2-on-output'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
709 If non-nil, send an explicit designation sequence on output before
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
710 using the specified register.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
711
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
712 `short'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
713 If non-nil, use the short forms \"ESC $ @\", \"ESC $ A\", and
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
714 \"ESC $ B\" on output in place of the full designation sequences
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
715 \"ESC $ ( @\", \"ESC $ ( A\", and \"ESC $ ( B\".
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
716
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
717 `no-ascii-eol'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
718 If non-nil, don't designate ASCII to G0 at each end of line on output.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
719 Setting this to non-nil also suppresses other state-resetting that
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
720 normally happens at the end of a line.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
721
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
722 `no-ascii-cntl'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
723 If non-nil, don't designate ASCII to G0 before control chars on output.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
724
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
725 `seven'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
726 If non-nil, use 7-bit environment on output. Otherwise, use 8-bit
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
727 environment.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
728
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
729 `lock-shift'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
730 If non-nil, use locking-shift (SO/SI) instead of single-shift
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
731 or designation by escape sequence.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
732
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
733 `no-iso6429'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
734 If non-nil, don't use ISO6429's direction specification.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
735
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
736 `escape-quoted'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
737 If non-nil, literal control characters that are the same as
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
738 the beginning of a recognized ISO2022 or ISO6429 escape sequence
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
739 (in particular, ESC (0x1B), SO (0x0E), SI (0x0F), SS2 (0x8E),
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
740 SS3 (0x8F), and CSI (0x9B)) are \"quoted\" with an escape character
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
741 so that they can be properly distinguished from an escape sequence.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
742 (Note that doing this results in a non-portable encoding.) This
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
743 encoding flag is used for byte-compiled files. Note that ESC
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
744 is a good choice for a quoting character because there are no
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
745 escape sequences whose second byte is a character from the Control-0
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
746 or Control-1 character sets; this is explicitly disallowed by the
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
747 ISO2022 standard.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
748
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
749 `input-charset-conversion'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
750 A list of conversion specifications, specifying conversion of
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
751 characters in one charset to another when decoding is performed.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
752 Each specification is a list of two elements: the source charset,
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
753 and the destination charset.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
754
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
755 `output-charset-conversion'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
756 A list of conversion specifications, specifying conversion of
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
757 characters in one charset to another when encoding is performed.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
758 The form of each specification is the same as for
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
759 `input-charset-conversion'.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
760
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
761 The following additional properties are recognized if TYPE is
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
762 `fixed-width':
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
763
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
764 `unicode-map'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
765 Required. A plist describing a map from octets in the coding system
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
766 NAME (as integers) to XEmacs characters. Those XEmacs characters will
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
767 be used explicitly on decoding, but for encoding (most relevantly, on
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
768 writing to disk) XEmacs characters that map to the same Unicode code
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
769 point will be unified. This means that the ISO-8859-? characters that
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
770 map to the same Unicode code point will not be distinct when written to
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
771 disk, which is normally what is intended; it also means that East Asian
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
772 Han characters from different XEmacs character sets will not be
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
773 distinct when written to disk, which is less often what is intended.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
774
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
775 Any octets not mapped, and with values above #x7f, will be decoded into
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
776 XEmacs characters that reflect that their values are undefined. These
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
777 characters will be displayed in a language-environment-specific
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
778 way. See `unicode-error-default-translation-table' and the
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
779 `invalid-sequence-coding-system' argument to `set-language-info'.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
780
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
781 These characters will normally be treated as invalid when checking
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
782 whether text can be encoded with `query-coding-region'--see the
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
783 IGNORE-INVALID-SEQUENCESP argument to that function to avoid this. It
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
784 is possible to specify that octets with values less than #x80 (or
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
785 indeed greater than it) be treated in this way, by specifying
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
786 explicitly that they correspond to the character mapping to that octet
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
787 in `unicode-error-default-translation-table'. Far fewer coding systems
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
788 override the ASCII mapping, though, so this is not the default.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
789
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
790 `encode-failure-octet'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
791 An integer between 0 and 255 to write in place of XEmacs characters
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
792 that cannot be encoded, defaulting to the code for tilde `~'.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
793
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
794 The following additional properties are recognized (and required)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
795 if TYPE is `ccl':
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
796
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
797 `decode'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
798 CCL program used for decoding (converting to internal format).
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
799
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
800 `encode'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
801 CCL program used for encoding (converting to external format).
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
802
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
803
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
804 The following additional properties are recognized if TYPE is `chain':
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
805
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
806 `chain'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
807 List of coding systems to be chained together, in decoding order.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
808
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
809 `canonicalize-after-coding'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
810 Coding system to be returned by the detector routines in place of
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
811 this coding system.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
812
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
813
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
814
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
815 The following additional properties are recognized if TYPE is `unicode':
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
816
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
817 `unicode-type'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
818 One of `utf-16', `utf-8', `ucs-4', or `utf-7' (the latter is not
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
819 yet implemented). `utf-16' is the basic two-byte encoding;
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
820 `ucs-4' is the four-byte encoding; `utf-8' is an ASCII-compatible
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
821 variable-width 8-bit encoding; `utf-7' is a 7-bit encoding using
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
822 only characters that will safely pass through all mail gateways.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
823 [[ This should be \"transformation format\". There should also be
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
824 `ucs-2' (or `bmp' -- no surrogates) and `utf-32' (range checked). ]]
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
825
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
826 `little-endian'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
827 If non-nil, `utf-16' and `ucs-4' will write out the groups of two
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
828 or four bytes little-endian instead of big-endian. This is required,
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
829 for example, under Windows.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
830
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
831 `need-bom'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
832 If non-nil, a byte order mark (BOM, or Unicode FFFE) should be
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
833 written out at the beginning of the data. This serves both to
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
834 identify the endianness of the following data and to mark the
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
835 data as Unicode (at least, this is how Windows uses it).
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
836 [[ The correct term is \"signature\", since this technique may also
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
837 be used with UTF-8. That is the term used in the standard. ]]
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
838
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
839
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
840 The following additional properties are recognized if TYPE is
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
841 `mswindows-multibyte':
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
842
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
843 `code-page'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
844 Either a number (specifying a particular code page) or one of the
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
845 symbols `ansi', `oem', `mac', or `ebcdic', specifying the ANSI,
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
846 OEM, Macintosh, or EBCDIC code page associated with a particular
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
847 locale (given by the `locale' property). NOTE: EBCDIC code pages
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
848 only exist in Windows 2000 and later.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
849
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
850 `locale'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
851 If `code-page' is a symbol, this specifies the locale whose code
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
852 page of the corresponding type should be used. This should be
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
853 one of the following: A cons of two strings, (LANGUAGE
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
854 . SUBLANGUAGE) (see `mswindows-set-current-locale'); a string (a
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
855 language; SUBLANG_DEFAULT, i.e. the default sublanguage, is
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
856 used); or one of the symbols `current', `user-default', or
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
857 `system-default', corresponding to the values of
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
858 `mswindows-current-locale', `mswindows-user-default-locale', or
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
859 `mswindows-system-default-locale', respectively.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
860
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
861
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
862 The following additional properties are recognized if TYPE is `undecided':
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
863 \[[ Doesn't GNU use \"detect-*\" for the following two? ]]
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
864
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
865 `do-eol'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
866 Do EOL detection.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
867
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
868 `do-coding'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
869 Do encoding detection.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
870
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
871 `coding-system'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
872 If encoding detection is not done, use the specified coding system
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
873 to do decoding. This is used internally when implementing coding
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
874 systems with an EOL type that specifies autodetection (the default),
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
875 so that the detector routines return the proper subsidiary.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
876
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
877
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
878
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
879 The following additional property is recognized if TYPE is `gzip':
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
880
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
881 `level'
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
882 Compression level: 0 through 9, or `default' (currently 6)."
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
883 (when (eq 'fixed-width type)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
884 (setq props (fixed-width-rework-props-runtime name props)))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
885 (make-coding-system-internal name type description props))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
886
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
887 (define-compiler-macro make-coding-system (&whole form name type
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
888 &optional description props)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
889 (if (equal '(quote fixed-width) type)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
890 (if (memq (car-safe props) '(quote eval-when-compile))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
891 (let* ((props (if (eq 'eval-when-compile (car props))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
892 (eval (cadr props))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
893 (cadr props)))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
894 (encode-failure-octet
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
895 (or (plist-get props 'encode-failure-octet)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
896 (char-to-int ?~)))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
897 (unicode-map (plist-get props 'unicode-map))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
898 (default-plist-entry (gensym))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
899 (encode-table-sym (gensym
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
900 (if (eq 'quote (car name))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
901 (format "%s-enc-" (second name)))))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
902 encode-program decode-program
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
903 decode-table encode-table
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
904 skip-chars invalid-sequences-skip-chars category)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
905
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
906 (check-argument-range encode-failure-octet 0 #xFF)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
907 ;; unicode-map must be a true list, and must be non-nil.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
908 (check-argument-type #'true-list-p unicode-map)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
909 (check-argument-type #'consp unicode-map)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
910
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
911 ;; Don't pass on our extra data to make-coding-system-internal.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
912 (setq props (plist-remprop props 'encode-failure-octet)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
913 props (plist-remprop props 'unicode-map))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
914
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
915 (multiple-value-setq
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
916 (decode-table encode-table)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
917 (fixed-width-create-decode-encode-tables unicode-map))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
918
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
919 ;; Generate the decode and encode programs, and the skip-chars
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
920 ;; arguments.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
921 (setq decode-program
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
922 (fixed-width-generate-decode-program decode-table)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
923 category (fixed-width-choose-category decode-table))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
924
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
925 (multiple-value-setq
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
926 (encode-program skip-chars invalid-sequences-skip-chars)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
927 (fixed-width-generate-encode-program-and-skip-chars-strings
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
928 decode-table encode-table encode-failure-octet))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
929
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
930 (unless (vectorp decode-program)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
931 (setq decode-program
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
932 (apply #'vector decode-program)))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
933
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
934 (unless (eq default-plist-entry (plist-get props 'encode
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
935 default-plist-entry))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
936 (error
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
937 'invalid-argument
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
938 "Explicit property not allowed for fixed-width coding system"
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
939 'encode))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
940 (loop for (symbol . value)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
941 in `((decode . ,decode-program)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
942 (from-unicode . ,encode-table)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
943 (query-skip-chars . ,skip-chars)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
944 (invalid-sequences-skip-chars .
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
945 ,invalid-sequences-skip-chars)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
946 (category . ,category))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
947 do
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
948 (unless (eq default-plist-entry (plist-get props symbol
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
949 default-plist-entry))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
950 (error
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
951 'invalid-argument
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
952 "Explicit property not allowed for \
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
953 fixed-width coding systems"
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
954 symbol))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
955 (setq props (nconc (list symbol value) props)))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
956 `(progn
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
957 (define-translation-hash-table ',encode-table-sym ,encode-table)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
958 (make-coding-system-internal
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
959 ,name ,type ,description
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
960 ',(nconc (list 'encode
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
961 (apply #'vector
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
962 (nsublis
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
963 (list (cons 'encode-table-sym
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
964 encode-table-sym))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
965 encode-program)))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
966 props))))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
967 ;; The form does not use literals; call make-coding-system at
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
968 ;; run time.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
969 form)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
970 (if (byte-compile-constp type)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
971 ;; This is not a fixed-width call; compile it to a form that 21.4
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
972 ;; can also understand.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
973 `(funcall (or (and (fboundp 'make-coding-system-internal)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
974 'make-coding-system-internal)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
975 'make-coding-system)
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
976 ,@(cdr form))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
977 ;; TYPE is not literal; work things out at runtime.
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
978 form)))
257b468bf2ca Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
diff changeset
979