Mercurial > hg > xemacs-beta
annotate src/file-coding.h @ 5366:f00192e1cd49
Examining the result of #'length: `eql', not `=', it's better style & cheaper
2011-03-08 Aidan Kehoe <kehoea@parhasard.net>
* buff-menu.el (list-buffers-noselect):
* byte-optimize.el (byte-optimize-identity):
* byte-optimize.el (byte-optimize-if):
* byte-optimize.el (byte-optimize-nth):
* byte-optimize.el (byte-optimize-nthcdr):
* bytecomp.el (byte-compile-warn-wrong-args):
* bytecomp.el (byte-compile-two-args-19->20):
* bytecomp.el (byte-compile-list):
* bytecomp.el (byte-compile-beginning-of-line):
* bytecomp.el (byte-compile-set):
* bytecomp.el (byte-compile-set-default):
* bytecomp.el (byte-compile-values):
* bytecomp.el (byte-compile-values-list):
* bytecomp.el (byte-compile-integerp):
* bytecomp.el (byte-compile-multiple-value-list-internal):
* bytecomp.el (byte-compile-throw):
* cl-macs.el (cl-do-arglist):
* cl-macs.el (cl-parse-loop-clause):
* cl-macs.el (multiple-value-bind):
* cl-macs.el (multiple-value-setq):
* cl-macs.el (get-setf-method):
* cmdloop.el (command-error):
* cmdloop.el (y-or-n-p-minibuf):
* cmdloop.el (yes-or-no-p-minibuf):
* coding.el (unencodable-char-position):
* cus-edit.el (custom-face-prompt):
* cus-edit.el (custom-buffer-create-internal):
* cus-edit.el (widget-face-action):
* cus-edit.el (custom-group-value-create):
* descr-text.el (describe-char-unicode-data):
* dialog-gtk.el (popup-builtin-question-dialog):
* dragdrop.el (experimental-dragdrop-drop-log-function):
* dragdrop.el (experimental-dragdrop-drop-mime-default):
* easymenu.el (easy-menu-add):
* easymenu.el (easy-menu-remove):
* faces.el (read-face-name):
* faces.el (set-face-stipple):
* files.el (file-name-non-special):
* font.el (font-combine-fonts):
* font.el (font-set-face-font):
* font.el (font-parse-rgb-components):
* font.el (font-rgb-color-p):
* font.el (font-color-rgb-components):
* gnuserv.el (gnuserv-edit-files):
* help.el (key-or-menu-binding):
* help.el (function-documentation-1):
* help.el (function-documentation):
* info.el (info):
* isearch-mode.el (isearch-exit):
* isearch-mode.el (isearch-edit-string):
* isearch-mode.el (isearch-*-char):
* isearch-mode.el (isearch-complete1):
* ldap.el (ldap-encode-country-string):
* ldap.el (ldap-decode-string):
* minibuf.el (read-file-name-internal-1):
* minibuf.el (read-non-nil-coding-system):
* minibuf.el (get-user-response):
* mouse.el (drag-window-divider):
* mule/ccl.el:
* mule/ccl.el (ccl-compile-if):
* mule/ccl.el (ccl-compile-break):
* mule/ccl.el (ccl-compile-repeat):
* mule/ccl.el (ccl-compile-write-repeat):
* mule/ccl.el (ccl-compile-call):
* mule/ccl.el (ccl-compile-end):
* mule/ccl.el (ccl-compile-read-multibyte-character):
* mule/ccl.el (ccl-compile-write-multibyte-character):
* mule/ccl.el (ccl-compile-translate-character):
* mule/ccl.el (ccl-compile-mule-to-unicode):
* mule/ccl.el (ccl-compile-unicode-to-mule):
* mule/ccl.el (ccl-compile-lookup-integer):
* mule/ccl.el (ccl-compile-lookup-character):
* mule/ccl.el (ccl-compile-map-multiple):
* mule/ccl.el (ccl-compile-map-single):
* mule/devan-util.el (devanagari-compose-to-one-glyph):
* mule/devan-util.el (devanagari-composition-component):
* mule/mule-cmds.el (finish-set-language-environment):
* mule/viet-util.el:
* mule/viet-util.el (viet-encode-viscii-char):
* multicast.el (open-multicast-group):
* newcomment.el (comment-quote-nested):
* newcomment.el (comment-region):
* newcomment.el (comment-dwim):
* regexp-opt.el (regexp-opt-group):
* replace.el (map-query-replace-regexp):
* specifier.el (derive-device-type-from-tag-set):
* subr.el (skip-chars-quote):
* test-harness.el (test-harness-from-buffer):
* test-harness.el (batch-test-emacs):
* wid-edit.el (widget-choice-action):
* wid-edit.el (widget-symbol-prompt-internal):
* wid-edit.el (widget-color-action):
* window-xemacs.el (push-window-configuration):
* window-xemacs.el (pop-window-configuration):
* window.el (quit-window):
* x-compose.el (electric-diacritic):
It's better style, and cheaper (often one assembler instruction
vs. a C funcall in the byte code), to use `eql' instead of `='
when it's clear what numerical type a given result will be. Change
much of our code to do this, with the help of a byte-compiler
change (not comitted) that looked for calls to #'length (which
always returns an integer) in its args.
author | Aidan Kehoe <kehoea@parhasard.net> |
---|---|
date | Tue, 08 Mar 2011 23:41:52 +0000 |
parents | a9c41067dd88 |
children | 308d34e9f07d |
rev | line source |
---|---|
771 | 1 /* Header for encoding conversion functions; coding-system object. |
2 #### rename me to coding-system.h | |
428 | 3 Copyright (C) 1991, 1995 Free Software Foundation, Inc. |
4 Copyright (C) 1995 Sun Microsystems, Inc. | |
793 | 5 Copyright (C) 2000, 2001, 2002 Ben Wing. |
428 | 6 |
7 This file is part of XEmacs. | |
8 | |
9 XEmacs is free software; you can redistribute it and/or modify it | |
10 under the terms of the GNU General Public License as published by the | |
11 Free Software Foundation; either version 2, or (at your option) any | |
12 later version. | |
13 | |
14 XEmacs is distributed in the hope that it will be useful, but WITHOUT | |
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
17 for more details. | |
18 | |
19 You should have received a copy of the GNU General Public License | |
20 along with XEmacs; see the file COPYING. If not, write to | |
21 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
22 Boston, MA 02111-1307, USA. */ | |
23 | |
24 /* Synched up with: Mule 2.3. Not in FSF. */ | |
25 | |
771 | 26 /* Authorship: |
27 | |
28 Current primary author: Ben Wing <ben@xemacs.org> | |
29 | |
30 Written by Ben Wing <ben@xemacs.org> for XEmacs, 1995, loosely based | |
31 on code written 91.10.09 by K.Handa <handa@etl.go.jp>. | |
32 Rewritten again 2000-2001 by Ben Wing to support properly | |
33 abstracted coding systems. | |
34 September 2001: Finished last part of abstraction, the detection | |
35 mechanism. | |
36 */ | |
428 | 37 |
440 | 38 #ifndef INCLUDED_file_coding_h_ |
39 #define INCLUDED_file_coding_h_ | |
428 | 40 |
771 | 41 /* Capsule description of the different structures, what their purpose is, |
42 how they fit together, and where various bits of data are stored. | |
43 | |
2297 | 44 A "coding system" is an algorithm for converting stream data in one format |
45 into stream data in another format. Currently most of the coding systems | |
46 we have created concern internationalized text, and convert between the | |
47 XEmacs internal format for multilingual text, and various external | |
771 | 48 representations of such text. However, any such conversion is possible, |
49 for example, compressing or uncompressing text using the gzip algorithm. | |
50 All coding systems provide both encode and decode routines, so that the | |
2297 | 51 conversion can go both ways. Unfortunately encoding and decoding may not |
52 be exact inverses, even for a specific instance of a coding system. Care | |
53 must be taken when this is not the case. | |
771 | 54 |
55 The way we handle this is by dividing the various potential coding | |
56 systems into types, analogous to classes in C++. Each coding system | |
57 type encompasses a series of related coding systems that it can | |
58 implement, and it has properties which control how exactly the encoding | |
59 works. A particular set of values for each of the properties makes up a | |
60 "coding system", and specifies one particular encoding. A `struct | |
61 Lisp_Coding_System' object encapsulates those settings -- its type, the | |
62 values chosen for all properties of that type, a name for the coding | |
63 system, some documentation. | |
64 | |
65 In addition, there are of course methods associated with a coding system | |
66 type, implementing the encoding, decoding, etc. These are stored in a | |
67 `struct coding_system_methods' object, one per coding-system type, which | |
68 contains mostly function pointers. This is retrievable from the | |
69 coding-system object (i.e. the struct Lisp_Coding_System), which has a | |
70 pointer to it. | |
71 | |
72 In order to actually use a coding system to do an encoding or decoding | |
73 operation, you need to use a coding Lstream. | |
74 | |
75 Now let's look more at attached data. All coding systems have certain | |
76 common data fields -- name, type, documentation, etc. -- as well as a | |
77 bunch more that are defined by the coding system type. To handle this | |
78 cleanly, each coding system type defines a structure that holds just the | |
79 fields of data particular to it, and calls it e.g. `struct | |
80 iso2022_coding_system' for coding system type `iso2022'. When the | |
81 memory block holding the coding system object is created, it is sized | |
82 such that it can hold both the struct Lisp_Coding_System and the struct | |
83 iso2022_coding_system (or whatever) directly following it. (This is a | |
84 common trick; another possibility is to have a void * pointer in the | |
85 struct Lisp_Coding_System, which points to another memory block holding | |
86 the struct iso2022_coding_system.) A macro is provided | |
87 (CODING_SYSTEM_TYPE_DATA) to retrieve a pointer of the right type to the | |
88 type-specific data contained within the overall `struct | |
89 Lisp_Coding_System' block. | |
90 | |
91 Lstreams, similarly, are objects of type `struct lstream' holding data | |
92 about the stream operation (how much data has been read or written, any | |
93 buffered data, any error conditions, etc.), and like coding systems have | |
94 different types. They have a structure called `Lstream_implementation', | |
95 one per lstream type, exactly analogous to `struct | |
96 coding_system_methods'. In addition, they have type-specific data | |
97 (specifying, e.g., the file number, FILE *, memory location, other | |
98 lstream, etc. to read the data from or write it to, and for conversion | |
99 processes, the current state of the process -- are we decoding ASCII or | |
100 Kanji characters? are we in the middle of a processing an escape | |
101 sequence? etc.). This type-specific data is stored in a structure | |
102 named `struct coding_stream'. Just like for coding systems, the | |
103 type-independent data in the `struct lstream' and the type-dependent | |
104 data in the `struct coding_stream' are stored together in the same | |
105 memory block. | |
428 | 106 |
771 | 107 Now things get a bit tricky. The `struct coding_stream' is |
108 type-specific from the point of view of an lstream, but not from the | |
109 point of view of a coding system. It contains only general data about | |
110 the conversion process, e.g. the name of the coding system used for | |
111 conversion, the lstream that we take data from or write it to (depending | |
112 on whether this was created as a read stream or a write stream), a | |
113 buffer to hold extra data we retrieved but can't send on yet, some | |
114 flags, etc. It also needs some data specific to the particular coding | |
115 system and thus to the particular operation going on. This data is held | |
116 in a structure named (e.g.) `struct iso2022_coding_stream', and it's | |
117 held in a separate memory block and pointed to by the generic `struct | |
118 coding_stream'. It's not glommed into a single memory block both | |
119 because that would require making changes to the generic lstream code | |
120 and more importantly because the coding system used in a particular | |
121 coding lstream can be changed at any point during the lifetime of the | |
122 lstream, and possibly multiple times. (For example, it can be set using | |
123 the Lisp primitives `set-process-input-coding-system' and | |
124 `set-console-tty-input-coding-system', as well as getting set when a | |
125 conversion operation was started with coding system `undecided' and the | |
2297 | 126 correct coding system was then detected.) #### This suggests implementing |
127 compound text extended segments by saving the state of the ctext stream, | |
128 and installing an appropriate for the duration of the segment. | |
428 | 129 |
771 | 130 IMPORTANT NOTE: There are at least two ancillary data structures |
131 associated with a coding system type. (There may also be detection data; | |
132 see elsewhere.) It's important, when writing a coding system type, to | |
133 keep straight which type of data goes where. In particular, `struct | |
134 foo_coding_system' is attached to the coding system object itself. This | |
135 is a permanent object and there's only one per coding system. It's | |
136 created once, usually at init time, and never destroyed. So, `struct | |
137 foo_coding_system' should in general not contain dynamic data! (Just | |
138 data describing the properties of the coding system.) In particular, | |
139 *NO* data about any conversion in progress. There may be many | |
140 conversions going on simultaneously using a particular coding system, | |
141 and by storing conversion data in the coding system, these conversions | |
142 will overwrite each other's data. | |
143 | |
144 Instead, use the lstream object, whose purpose is to encapsulate a | |
145 particular conversion and all associated data. From the lstream object, | |
146 you can get the struct coding_stream using something like | |
147 | |
148 struct coding_stream *str = LSTREAM_TYPE_DATA (lstr, coding); | |
149 | |
150 But usually this structure is already passed to you as one of the | |
151 parameters of the method being invoked. | |
152 | |
153 From the struct coding_stream, you can retrieve the | |
154 coding-system-type-specific data using something like | |
155 | |
156 struct foo_coding_stream *data = CODING_STREAM_TYPE_DATA (str, foo); | |
157 | |
158 Then, use this structure to hold all data relevant to the particular | |
159 conversion being done. | |
160 | |
161 Initialize this structure whenever init_coding_stream_method is called | |
162 (this may happen more than once), and finalize it (free resources, etc.) | |
163 when finalize_coding_stream_method is called. | |
164 */ | |
165 | |
166 struct coding_stream; | |
167 struct detection_state; | |
168 | |
1204 | 169 extern const struct sized_memory_description coding_system_methods_description; |
771 | 170 |
171 struct coding_system_methods; | |
172 | |
173 enum source_sink_type | |
428 | 174 { |
771 | 175 DECODES_CHARACTER_TO_BYTE, |
176 DECODES_BYTE_TO_BYTE, | |
177 DECODES_BYTE_TO_CHARACTER, | |
178 DECODES_CHARACTER_TO_CHARACTER | |
428 | 179 }; |
180 | |
181 enum eol_type | |
182 { | |
183 EOL_LF, | |
184 EOL_CRLF, | |
771 | 185 EOL_CR, |
1429 | 186 EOL_AUTODETECT |
428 | 187 }; |
188 | |
189 struct Lisp_Coding_System | |
190 { | |
5127
a9c41067dd88
more cleanups, terminology clarification, lots of doc work
Ben Wing <ben@xemacs.org>
parents:
5124
diff
changeset
|
191 NORMAL_LISP_OBJECT_HEADER header; |
771 | 192 struct coding_system_methods *methods; |
428 | 193 |
1204 | 194 #define CODING_SYSTEM_SLOT_DECLARATION |
195 #define MARKED_SLOT(x) Lisp_Object x; | |
196 #include "coding-system-slots.h" | |
771 | 197 |
1204 | 198 /* Eol type requested by user. See comment about EOL junk in |
199 coding-system-slots.h. */ | |
771 | 200 enum eol_type eol_type; |
428 | 201 |
2132 | 202 /* If true, this is an internal coding system, which will not show up in |
203 coding-system-list unless a special parameter is given to it. */ | |
204 int internal_p; | |
205 | |
771 | 206 /* type-specific extra data attached to a coding_system */ |
207 char data[1]; | |
428 | 208 }; |
209 typedef struct Lisp_Coding_System Lisp_Coding_System; | |
210 | |
5118
e0db3c197671
merge up to latest default branch, doesn't compile yet
Ben Wing <ben@xemacs.org>
parents:
4690
diff
changeset
|
211 DECLARE_LISP_OBJECT (coding_system, Lisp_Coding_System); |
440 | 212 #define XCODING_SYSTEM(x) XRECORD (x, coding_system, Lisp_Coding_System) |
617 | 213 #define wrap_coding_system(p) wrap_record (p, coding_system) |
428 | 214 #define CODING_SYSTEMP(x) RECORDP (x, coding_system) |
215 #define CHECK_CODING_SYSTEM(x) CHECK_RECORD (x, coding_system) | |
216 #define CONCHECK_CODING_SYSTEM(x) CONCHECK_RECORD (x, coding_system) | |
217 | |
1204 | 218 enum coding_system_variant |
219 { | |
220 no_conversion_coding_system, | |
221 convert_eol_coding_system, | |
222 undecided_coding_system, | |
223 chain_coding_system, | |
224 text_file_wrapper_coding_system, | |
225 internal_coding_system, | |
226 gzip_coding_system, | |
227 mswindows_multibyte_to_unicode_coding_system, | |
228 mswindows_multibyte_coding_system, | |
229 iso2022_coding_system, | |
230 ccl_coding_system, | |
231 shift_jis_coding_system, | |
232 big5_coding_system, | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
233 unicode_coding_system, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
234 fixed_width_coding_system |
1204 | 235 }; |
236 | |
771 | 237 struct coding_system_methods |
238 { | |
239 Lisp_Object type; | |
240 Lisp_Object predicate_symbol; | |
241 | |
1204 | 242 /* Type expressed as an enum, needed for KKCC marking of the |
243 type-specific lstream data; copied into the struct coding_stream. */ | |
244 | |
245 enum coding_system_variant enumtype; | |
246 | |
771 | 247 /* Implementation specific methods: */ |
248 | |
249 /* Init method: Initialize coding-system data. Optional. */ | |
250 void (*init_method) (Lisp_Object coding_system); | |
251 | |
252 /* Mark method: Mark any Lisp objects in the type-specific data | |
253 attached to the coding-system object. Optional. */ | |
254 void (*mark_method) (Lisp_Object coding_system); | |
255 | |
256 /* Print method: Print the type-specific properties of this coding | |
257 system, as part of `print'-ing the object. If this method is defined | |
258 and prints anything, it should print a space as the first thing it | |
259 does. Optional. */ | |
260 void (*print_method) (Lisp_Object cs, Lisp_Object printcharfun, | |
261 int escapeflag); | |
262 | |
263 /* Canonicalize method: Convert this coding system to another one; called | |
264 once, at creation time, after all properties have been parsed. The | |
265 returned value should be a coding system created with | |
266 make_internal_coding_system() (passing the existing coding system as the | |
267 first argument), and will become the coding system returned by | |
268 `make-coding-system'. Optional. | |
269 | |
270 NOTE: There are *three* different uses of "canonical" or "canonicalize" | |
271 w.r.t. coding systems, and it's important to keep them straight. | |
272 | |
273 1. The canonicalize method. Used to specify a different coding | |
274 system, used when doing conversions, in place of the actual coding | |
275 system itself. Stored in the CANONICAL field of a coding system. | |
276 | |
277 2. The canonicalize-after-coding method. Used to return the encoding | |
278 that was "actually" used to decode some text, such that this | |
279 particular encoding can be used to encode the text again with the | |
280 expectation that the result will be the same as the original encoding. | |
281 Particularly important with auto-detecting coding systems. | |
282 | |
283 3. From the perspective of aliases, a "canonical" coding system is one | |
284 that's not an alias to some other coding system, and "canonicalization" | |
285 is the process of traversing the alias pointers to find the canonical | |
286 coding system that's equivalent to the alias. | |
287 */ | |
288 Lisp_Object (*canonicalize_method) (Lisp_Object coding_system); | |
289 | |
290 /* Canonicalize after coding method: Convert this coding system to | |
291 another one, after coding (usually decoding) has finished. This is | |
292 meant to be used by auto-detecting coding systems, which should return | |
293 the actually detected coding system. Optional. */ | |
294 Lisp_Object (*canonicalize_after_coding_method) | |
295 (struct coding_stream *str); | |
296 | |
297 /* Convert method: Decode or encode the data in SRC of size N, writing | |
298 the results into the Dynarr DST. If the conversion_end_type method | |
299 indicates that the source is characters (as opposed to bytes), you are | |
300 guaranteed to get only whole characters in the data in SRC/N. STR, a | |
301 struct coding_stream, stores all necessary state and other info about | |
302 the conversion. Coding-specific state (struct TYPE_coding_stream) can | |
303 be retrieved from STR using CODING_STREAM_TYPE_DATA(). Return value | |
304 indicates the number of bytes of the *INPUT* that were converted (not | |
305 the number of bytes written to the Dynarr!). This can be less than | |
306 the total amount of input passed in; if so, the remainder is | |
307 considered "rejected" and will appear again at the beginning of the | |
308 data passed in the next time the convert method is called. When EOF | |
309 is returned on the other end and there's no more data, the convert | |
310 method will be called one last time, STR->eof set and the passed-in | |
311 data will consist only of any rejected data from the previous | |
312 call. (At this point, file handles and similar resources can be | |
313 closed, but do NOT arbitrarily free data structures in the | |
314 type-specific data, because there are operations that can be done on | |
315 closed streams to query the results of the processing -- specifically, | |
316 for coding streams, there's the canonicalize_after_coding() method.) | |
317 Required. */ | |
318 Bytecount (*convert_method) (struct coding_stream *str, | |
319 const unsigned char *src, | |
320 unsigned_char_dynarr *dst, Bytecount n); | |
321 | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
322 /* Query method: Check whether the buffer text between point and END |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
323 can be encoded by this coding system. Returns |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
324 either nil (meaning the text can be encoded by the coding system) or a |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
325 range table object describing the stretches that the coding system |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
326 cannot encode. |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
327 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
328 Possible values for flags are below, search for |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
329 QUERY_METHOD_IGNORE_INVALID_SEQUENCES. |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
330 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
331 Coding systems are expected to be able to behave sensibly with all |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
332 possible octets on decoding, which is why this method is only available |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
333 for encoding. */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
334 Lisp_Object (*query_method) (Lisp_Object coding_system, struct buffer *buf, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
335 Charbpos end, int flags); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
336 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
337 /* Same as the previous method, but this works in the context of |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
338 lstreams. (Where the data do need to be copied, unfortunately.) The |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
339 intention is to implement the query method for the mswindows-multibyte |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
340 coding systems in terms of a query_lstream method. */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
341 Lisp_Object (*query_lstream_method) (struct coding_stream *str, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
342 const Ibyte *start, Bytecount n); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
343 |
771 | 344 /* Coding mark method: Mark any Lisp objects in the type-specific data |
345 attached to `struct coding_stream'. Optional. */ | |
346 void (*mark_coding_stream_method) (struct coding_stream *str); | |
347 | |
348 /* Init coding stream method: Initialize the type-specific data attached | |
349 to the coding stream (i.e. in struct TYPE_coding_stream), when the | |
350 coding stream is opened. The type-specific data will be zeroed out. | |
351 Optional. */ | |
352 void (*init_coding_stream_method) (struct coding_stream *str); | |
353 | |
354 /* Rewind coding stream method: Reset any necessary type-specific data as | |
355 a result of the stream being rewound. Optional. */ | |
356 void (*rewind_coding_stream_method) (struct coding_stream *str); | |
357 | |
358 /* Finalize coding stream method: Clean up the type-specific data | |
359 attached to the coding stream (i.e. in struct TYPE_coding_stream). | |
360 Happens when the Lstream is deleted using Lstream_delete() or is | |
361 garbage-collected. Most streams are deleted after they've been used, | |
362 so it's less likely (but still possible) that allocated data will | |
363 stick around until GC time. (File handles can also be closed when EOF | |
364 is signalled; but some data must stick around after this point, for | |
365 the benefit of canonicalize_after_coding. See the convert method.) | |
5124
623d57b7fbe8
separate regular and disksave finalization, print method fixes.
Ben Wing <ben@xemacs.org>
parents:
5120
diff
changeset
|
366 Called only once. Optional. */ |
771 | 367 void (*finalize_coding_stream_method) (struct coding_stream *str); |
368 | |
369 /* Finalize method: Clean up type-specific data (e.g. free allocated | |
370 data) attached to the coding system (i.e. in struct | |
371 TYPE_coding_system), when the coding system is about to be garbage | |
5124
623d57b7fbe8
separate regular and disksave finalization, print method fixes.
Ben Wing <ben@xemacs.org>
parents:
5120
diff
changeset
|
372 collected. (Currently not called.) Called only once. Optional. */ |
771 | 373 void (*finalize_method) (Lisp_Object codesys); |
374 | |
375 /* Conversion end type method: Does this coding system encode bytes -> | |
376 characters, characters -> characters, bytes -> bytes, or | |
377 characters -> bytes?. Default is characters -> bytes. Optional. */ | |
378 enum source_sink_type (*conversion_end_type_method) (Lisp_Object codesys); | |
379 | |
380 /* Putprop method: Set the value of a type-specific property. If | |
381 the property name is unrecognized, return 0. If the value is disallowed | |
382 or erroneous, signal an error. Currently called only at creation time. | |
383 Optional. */ | |
384 int (*putprop_method) (Lisp_Object codesys, | |
385 Lisp_Object key, | |
386 Lisp_Object value); | |
387 | |
388 /* Getprop method: Return the value of a type-specific property. If | |
389 the property name is unrecognized, return Qunbound. Optional. | |
390 */ | |
391 Lisp_Object (*getprop_method) (Lisp_Object coding_system, | |
392 Lisp_Object prop); | |
393 | |
394 /* These next three are set as part of the call to | |
395 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA. */ | |
396 | |
397 /* Description of the extra data (struct foo_coding_system) attached to a | |
1204 | 398 coding system, for pdump purposes. */ |
399 const struct sized_memory_description *extra_description; | |
771 | 400 /* size of struct foo_coding_system -- extra data associated with |
401 the coding system */ | |
402 int extra_data_size; | |
403 /* size of struct foo_coding_stream -- extra data associated with the | |
404 struct coding_stream, needed for each active coding process | |
405 using this coding system. note that we can have more than one | |
406 process active at once (simply by creating more than one coding | |
407 lstream using this coding system), so we can't store this data in | |
408 the coding system object. */ | |
409 int coding_data_size; | |
410 }; | |
411 | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
412 /* Values for flags, as passed to query_method. */ |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
413 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
414 #define QUERY_METHOD_IGNORE_INVALID_SEQUENCES 0x0001 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
415 #define QUERY_METHOD_ERRORP 0x0002 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
416 #define QUERY_METHOD_HIGHLIGHT 0x0004 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
417 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
418 enum query_coding_failure_reasons |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
419 { |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
420 query_coding_succeeded = 0, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
421 query_coding_unencodable = 1, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
422 query_coding_invalid_sequence = 2 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
423 }; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
424 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
425 extern Lisp_Object Qquery_coding_warning_face; |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
426 |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
427 Lisp_Object default_query_method (Lisp_Object, struct buffer *, Charbpos, |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
428 int); |
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
429 |
771 | 430 /***** Calling a coding-system method *****/ |
431 | |
432 #define RAW_CODESYSMETH(cs, m) ((cs)->methods->m##_method) | |
433 #define HAS_CODESYSMETH_P(cs, m) (!!RAW_CODESYSMETH (cs, m)) | |
434 #define CODESYSMETH(cs, m, args) (((cs)->methods->m##_method) args) | |
435 | |
436 /* Call a void-returning coding-system method, if it exists. */ | |
437 #define MAYBE_CODESYSMETH(cs, m, args) do { \ | |
438 Lisp_Coding_System *maybe_codesysmeth_cs = (cs); \ | |
439 if (HAS_CODESYSMETH_P (maybe_codesysmeth_cs, m)) \ | |
440 CODESYSMETH (maybe_codesysmeth_cs, m, args); \ | |
441 } while (0) | |
442 | |
443 /* Call a coding-system method, if it exists, or return GIVEN. | |
444 NOTE: Multiply-evaluates CS. */ | |
445 #define CODESYSMETH_OR_GIVEN(cs, m, args, given) \ | |
446 (HAS_CODESYSMETH_P (cs, m) ? \ | |
447 CODESYSMETH (cs, m, args) : (given)) | |
448 | |
449 #define XCODESYSMETH(cs, m, args) \ | |
450 CODESYSMETH (XCODING_SYSTEM (cs), m, args) | |
451 #define MAYBE_XCODESYSMETH(cs, m, args) \ | |
452 MAYBE_CODESYSMETH (XCODING_SYSTEM (cs), m, args) | |
453 #define XCODESYSMETH_OR_GIVEN(cs, m, args, given) \ | |
454 CODESYSMETH_OR_GIVEN (XCODING_SYSTEM (cs), m, args, given) | |
455 | |
456 /***** Defining new coding-system types *****/ | |
457 | |
1204 | 458 extern const struct sized_memory_description coding_system_empty_extra_description; |
771 | 459 |
800 | 460 #ifdef ERROR_CHECK_TYPES |
771 | 461 #define DECLARE_CODING_SYSTEM_TYPE(type) \ |
462 \ | |
463 extern struct coding_system_methods * type##_coding_system_methods; \ | |
826 | 464 DECLARE_INLINE_HEADER ( \ |
465 struct type##_coding_system * \ | |
771 | 466 error_check_##type##_coding_system_data (Lisp_Coding_System *cs) \ |
826 | 467 ) \ |
771 | 468 { \ |
469 assert (CODING_SYSTEM_TYPE_P (cs, type)); \ | |
470 /* Catch accidental use of INITIALIZE_CODING_SYSTEM_TYPE in place \ | |
471 of INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA. */ \ | |
472 assert (cs->methods->extra_data_size > 0); \ | |
473 return (struct type##_coding_system *) cs->data; \ | |
474 } \ | |
475 \ | |
826 | 476 DECLARE_INLINE_HEADER ( \ |
477 struct type##_coding_stream * \ | |
771 | 478 error_check_##type##_coding_stream_data (struct coding_stream *s) \ |
826 | 479 ) \ |
771 | 480 { \ |
481 assert (XCODING_SYSTEM_TYPE_P (s->codesys, type)); \ | |
482 return (struct type##_coding_stream *) s->data; \ | |
483 } \ | |
484 \ | |
826 | 485 DECLARE_INLINE_HEADER ( \ |
486 Lisp_Coding_System * \ | |
771 | 487 error_check_##type##_coding_system_type (Lisp_Object obj) \ |
826 | 488 ) \ |
771 | 489 { \ |
490 Lisp_Coding_System *cs = XCODING_SYSTEM (obj); \ | |
491 assert (CODING_SYSTEM_TYPE_P (cs, type)); \ | |
492 return cs; \ | |
493 } \ | |
494 \ | |
495 DECLARE_NOTHING | |
496 #else | |
497 #define DECLARE_CODING_SYSTEM_TYPE(type) \ | |
498 extern struct coding_system_methods * type##_coding_system_methods | |
800 | 499 #endif /* ERROR_CHECK_TYPES */ |
771 | 500 |
501 #define DEFINE_CODING_SYSTEM_TYPE(type) \ | |
502 struct coding_system_methods * type##_coding_system_methods | |
503 | |
1204 | 504 #define DEFINE_CODING_SYSTEM_TYPE_WITH_DATA(type) \ |
505 struct coding_system_methods * type##_coding_system_methods; \ | |
506 static const struct sized_memory_description \ | |
507 type##_coding_system_description_0 = { \ | |
508 sizeof (struct type##_coding_system), \ | |
509 type##_coding_system_description \ | |
510 } | |
511 | |
771 | 512 #define INITIALIZE_CODING_SYSTEM_TYPE(ty, pred_sym) do { \ |
513 ty##_coding_system_methods = \ | |
514 xnew_and_zero (struct coding_system_methods); \ | |
515 ty##_coding_system_methods->type = Q##ty; \ | |
516 ty##_coding_system_methods->extra_description = \ | |
1204 | 517 &coding_system_empty_extra_description; \ |
518 ty##_coding_system_methods->enumtype = ty##_coding_system; \ | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
519 ty##_coding_system_methods->query_method = default_query_method; \ |
771 | 520 defsymbol_nodump (&ty##_coding_system_methods->predicate_symbol, \ |
521 pred_sym); \ | |
522 add_entry_to_coding_system_type_list (ty##_coding_system_methods); \ | |
2367 | 523 dump_add_root_block_ptr (&ty##_coding_system_methods, \ |
771 | 524 &coding_system_methods_description); \ |
525 } while (0) | |
526 | |
527 #define REINITIALIZE_CODING_SYSTEM_TYPE(type) do { \ | |
528 staticpro_nodump (&type##_coding_system_methods->predicate_symbol); \ | |
529 } while (0) | |
530 | |
531 /* This assumes the existence of two structures: | |
532 | |
533 struct foo_coding_system (attached to the coding system) | |
534 struct foo_coding_stream (per coding process, attached to the | |
535 struct coding_stream) | |
1204 | 536 const struct memory_description foo_coding_system_description[] |
537 (data description of struct foo_coding_system) | |
771 | 538 |
1204 | 539 For an example of how to do the description, see |
771 | 540 chain_coding_system_description. |
541 */ | |
542 #define INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA(type, pred_sym) \ | |
543 do { \ | |
544 INITIALIZE_CODING_SYSTEM_TYPE (type, pred_sym); \ | |
545 type##_coding_system_methods->extra_data_size = \ | |
546 sizeof (struct type##_coding_system); \ | |
547 type##_coding_system_methods->extra_description = \ | |
1204 | 548 &type##_coding_system_description_0; \ |
771 | 549 type##_coding_system_methods->coding_data_size = \ |
550 sizeof (struct type##_coding_stream); \ | |
551 } while (0) | |
552 | |
553 /* Declare that coding-system-type TYPE has method METH; used in | |
554 initialization routines */ | |
555 #define CODING_SYSTEM_HAS_METHOD(type, meth) \ | |
556 (type##_coding_system_methods->meth##_method = type##_##meth) | |
557 | |
558 /***** Macros for accessing coding-system types *****/ | |
559 | |
560 #define CODING_SYSTEM_TYPE_P(cs, type) \ | |
561 ((cs)->methods == type##_coding_system_methods) | |
562 #define XCODING_SYSTEM_TYPE_P(cs, type) \ | |
563 CODING_SYSTEM_TYPE_P (XCODING_SYSTEM (cs), type) | |
564 | |
800 | 565 #ifdef ERROR_CHECK_TYPES |
771 | 566 # define CODING_SYSTEM_TYPE_DATA(cs, type) \ |
567 error_check_##type##_coding_system_data (cs) | |
568 #else | |
569 # define CODING_SYSTEM_TYPE_DATA(cs, type) \ | |
570 ((struct type##_coding_system *) \ | |
571 (cs)->data) | |
572 #endif | |
573 | |
574 #define XCODING_SYSTEM_TYPE_DATA(cs, type) \ | |
575 CODING_SYSTEM_TYPE_DATA (XCODING_SYSTEM_OF_TYPE (cs, type), type) | |
576 | |
800 | 577 #ifdef ERROR_CHECK_TYPES |
771 | 578 # define XCODING_SYSTEM_OF_TYPE(x, type) \ |
579 error_check_##type##_coding_system_type (x) | |
580 # define XSETCODING_SYSTEM_OF_TYPE(x, p, type) do \ | |
581 { \ | |
793 | 582 x = wrap_coding_system (p); \ |
583 assert (CODING_SYSTEM_TYPEP (XCODING_SYSTEM (x), type)); \ | |
771 | 584 } while (0) |
585 #else | |
586 # define XCODING_SYSTEM_OF_TYPE(x, type) XCODING_SYSTEM (x) | |
793 | 587 # define XSETCODING_SYSTEM_OF_TYPE(x, p, type) do \ |
588 { \ | |
589 x = wrap_coding_system (p); \ | |
590 } while (0) | |
771 | 591 #endif /* ERROR_CHECK_TYPE_CHECK */ |
592 | |
593 #define CODING_SYSTEM_TYPEP(x, type) \ | |
594 (CODING_SYSTEMP (x) && CODING_SYSTEM_TYPE_P (XCODING_SYSTEM (x), type)) | |
595 #define CHECK_CODING_SYSTEM_OF_TYPE(x, type) do { \ | |
596 CHECK_CODING_SYSTEM (x); \ | |
597 if (!CODING_SYSTEM_TYPE_P (XCODING_SYSTEM (x), type)) \ | |
598 dead_wrong_type_argument \ | |
599 (type##_coding_system_methods->predicate_symbol, x); \ | |
600 } while (0) | |
601 #define CONCHECK_CODING_SYSTEM_OF_TYPE(x, type) do { \ | |
602 CONCHECK_CODING_SYSTEM (x); \ | |
603 if (!(CODING_SYSTEM_TYPEP (x, type))) \ | |
604 x = wrong_type_argument \ | |
605 (type##_coding_system_methods->predicate_symbol, x); \ | |
606 } while (0) | |
607 | |
608 #define CODING_SYSTEM_METHODS(codesys) ((codesys)->methods) | |
428 | 609 #define CODING_SYSTEM_NAME(codesys) ((codesys)->name) |
771 | 610 #define CODING_SYSTEM_DESCRIPTION(codesys) ((codesys)->description) |
611 #define CODING_SYSTEM_TYPE(codesys) ((codesys)->methods->type) | |
428 | 612 #define CODING_SYSTEM_MNEMONIC(codesys) ((codesys)->mnemonic) |
771 | 613 #define CODING_SYSTEM_DOCUMENTATION(codesys) ((codesys)->documentation) |
428 | 614 #define CODING_SYSTEM_POST_READ_CONVERSION(codesys) \ |
615 ((codesys)->post_read_conversion) | |
616 #define CODING_SYSTEM_PRE_WRITE_CONVERSION(codesys) \ | |
617 ((codesys)->pre_write_conversion) | |
618 #define CODING_SYSTEM_EOL_TYPE(codesys) ((codesys)->eol_type) | |
771 | 619 #define CODING_SYSTEM_EOL_LF(codesys) ((codesys)->eol[EOL_LF]) |
620 #define CODING_SYSTEM_EOL_CRLF(codesys) ((codesys)->eol[EOL_CRLF]) | |
621 #define CODING_SYSTEM_EOL_CR(codesys) ((codesys)->eol[EOL_CR]) | |
622 #define CODING_SYSTEM_TEXT_FILE_WRAPPER(codesys) ((codesys)->text_file_wrapper) | |
623 #define CODING_SYSTEM_AUTO_EOL_WRAPPER(codesys) ((codesys)->auto_eol_wrapper) | |
624 #define CODING_SYSTEM_SUBSIDIARY_PARENT(codesys) ((codesys)->subsidiary_parent) | |
625 #define CODING_SYSTEM_CANONICAL(codesys) ((codesys)->canonical) | |
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
3017
diff
changeset
|
626 #define CODING_SYSTEM_SAFE_CHARSETS(codesys) ((codesys)->safe_charsets) |
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
3017
diff
changeset
|
627 #define CODING_SYSTEM_SAFE_CHARS(codesys) ((codesys)->safe_chars) |
428 | 628 |
771 | 629 #define CODING_SYSTEM_CHAIN_CHAIN(codesys) \ |
630 (CODING_SYSTEM_TYPE_DATA (codesys, chain)->chain) | |
631 #define CODING_SYSTEM_CHAIN_COUNT(codesys) \ | |
632 (CODING_SYSTEM_TYPE_DATA (codesys, chain)->count) | |
633 #define CODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING(codesys) \ | |
634 (CODING_SYSTEM_TYPE_DATA (codesys, chain)->canonicalize_after_coding) | |
428 | 635 |
771 | 636 #define XCODING_SYSTEM_METHODS(codesys) \ |
637 CODING_SYSTEM_METHODS (XCODING_SYSTEM (codesys)) | |
428 | 638 #define XCODING_SYSTEM_NAME(codesys) \ |
639 CODING_SYSTEM_NAME (XCODING_SYSTEM (codesys)) | |
771 | 640 #define XCODING_SYSTEM_DESCRIPTION(codesys) \ |
641 CODING_SYSTEM_DESCRIPTION (XCODING_SYSTEM (codesys)) | |
428 | 642 #define XCODING_SYSTEM_TYPE(codesys) \ |
643 CODING_SYSTEM_TYPE (XCODING_SYSTEM (codesys)) | |
644 #define XCODING_SYSTEM_MNEMONIC(codesys) \ | |
645 CODING_SYSTEM_MNEMONIC (XCODING_SYSTEM (codesys)) | |
771 | 646 #define XCODING_SYSTEM_DOCUMENTATION(codesys) \ |
647 CODING_SYSTEM_DOCUMENTATION (XCODING_SYSTEM (codesys)) | |
428 | 648 #define XCODING_SYSTEM_POST_READ_CONVERSION(codesys) \ |
649 CODING_SYSTEM_POST_READ_CONVERSION (XCODING_SYSTEM (codesys)) | |
650 #define XCODING_SYSTEM_PRE_WRITE_CONVERSION(codesys) \ | |
651 CODING_SYSTEM_PRE_WRITE_CONVERSION (XCODING_SYSTEM (codesys)) | |
652 #define XCODING_SYSTEM_EOL_TYPE(codesys) \ | |
653 CODING_SYSTEM_EOL_TYPE (XCODING_SYSTEM (codesys)) | |
654 #define XCODING_SYSTEM_EOL_LF(codesys) \ | |
655 CODING_SYSTEM_EOL_LF (XCODING_SYSTEM (codesys)) | |
656 #define XCODING_SYSTEM_EOL_CRLF(codesys) \ | |
657 CODING_SYSTEM_EOL_CRLF (XCODING_SYSTEM (codesys)) | |
658 #define XCODING_SYSTEM_EOL_CR(codesys) \ | |
659 CODING_SYSTEM_EOL_CR (XCODING_SYSTEM (codesys)) | |
771 | 660 #define XCODING_SYSTEM_TEXT_FILE_WRAPPER(codesys) \ |
661 CODING_SYSTEM_TEXT_FILE_WRAPPER (XCODING_SYSTEM (codesys)) | |
662 #define XCODING_SYSTEM_AUTO_EOL_WRAPPER(codesys) \ | |
663 CODING_SYSTEM_AUTO_EOL_WRAPPER (XCODING_SYSTEM (codesys)) | |
664 #define XCODING_SYSTEM_SUBSIDIARY_PARENT(codesys) \ | |
665 CODING_SYSTEM_SUBSIDIARY_PARENT (XCODING_SYSTEM (codesys)) | |
666 #define XCODING_SYSTEM_CANONICAL(codesys) \ | |
667 CODING_SYSTEM_CANONICAL (XCODING_SYSTEM (codesys)) | |
4568
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
3017
diff
changeset
|
668 #define XCODING_SYSTEM_SAFE_CHARSETS(codesys) \ |
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
3017
diff
changeset
|
669 CODING_SYSTEM_SAFE_CHARSETS (XCODING_SYSTEM (codesys)) |
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
3017
diff
changeset
|
670 #define XCODING_SYSTEM_SAFE_CHARS(codesys) \ |
1d74a1d115ee
Add #'query-coding-region tests; do the work necessary to get them running.
Aidan Kehoe <kehoea@parhasard.net>
parents:
3017
diff
changeset
|
671 CODING_SYSTEM_SAFE_CHARS (XCODING_SYSTEM (codesys)) |
428 | 672 |
771 | 673 #define XCODING_SYSTEM_CHAIN_CHAIN(codesys) \ |
674 CODING_SYSTEM_CHAIN_CHAIN (XCODING_SYSTEM (codesys)) | |
675 #define XCODING_SYSTEM_CHAIN_COUNT(codesys) \ | |
676 CODING_SYSTEM_CHAIN_COUNT (XCODING_SYSTEM (codesys)) | |
677 #define XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING(codesys) \ | |
678 CODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (XCODING_SYSTEM (codesys)) | |
428 | 679 |
771 | 680 /**************************************************/ |
681 /* Detection */ | |
682 /**************************************************/ | |
428 | 683 |
771 | 684 #define MAX_DETECTOR_CATEGORIES 256 |
685 #define MAX_DETECTORS 64 | |
428 | 686 |
771 | 687 #define MAX_BYTES_PROCESSED_FOR_DETECTION 65536 |
428 | 688 |
771 | 689 struct detection_state |
428 | 690 { |
771 | 691 int seen_non_ascii; |
692 Bytecount bytes_seen; | |
428 | 693 |
771 | 694 char categories[MAX_DETECTOR_CATEGORIES]; |
695 Bytecount data_offset[MAX_DETECTORS]; | |
696 /* ... more data follows; data_offset[detector_##TYPE] points to | |
697 the data for that type */ | |
428 | 698 }; |
699 | |
771 | 700 #define DETECTION_STATE_DATA(st, type) \ |
701 ((struct type##_detector *) \ | |
702 ((char *) (st) + (st)->data_offset[detector_##type])) | |
428 | 703 |
448 | 704 /* Distinguishable categories of encodings. |
705 | |
706 This list determines the initial priority of the categories. | |
707 | |
708 For better or worse, currently Mule files are encoded in 7-bit ISO 2022. | |
709 For this reason, under Mule ISO_7 gets highest priority. | |
710 | |
711 Putting NO_CONVERSION second prevents "binary corruption" in the | |
712 default case in all but the (presumably) extremely rare case of a | |
713 binary file which contains redundant escape sequences but no 8-bit | |
714 characters. | |
715 | |
716 The remaining priorities are based on perceived "internationalization | |
717 political correctness." An exception is UCS-4 at the bottom, since | |
718 basically everything is compatible with UCS-4, but it is likely to | |
719 be very rare as an external encoding. */ | |
720 | |
771 | 721 /* Macros to define code of control characters for ISO2022's functions. */ |
722 /* Used by the detection routines of other coding system types as well. */ | |
723 /* code */ /* function */ | |
724 #define ISO_CODE_LF 0x0A /* line-feed */ | |
725 #define ISO_CODE_CR 0x0D /* carriage-return */ | |
726 #define ISO_CODE_SO 0x0E /* shift-out */ | |
727 #define ISO_CODE_SI 0x0F /* shift-in */ | |
728 #define ISO_CODE_ESC 0x1B /* escape */ | |
729 #define ISO_CODE_DEL 0x7F /* delete */ | |
730 #define ISO_CODE_SS2 0x8E /* single-shift-2 */ | |
731 #define ISO_CODE_SS3 0x8F /* single-shift-3 */ | |
732 #define ISO_CODE_CSI 0x9B /* control-sequence-introduce */ | |
733 | |
734 enum detection_result | |
735 { | |
736 /* Basically means a magic cookie was seen indicating this type, or | |
737 something similar. */ | |
738 DET_NEAR_CERTAINTY = 4, | |
739 DET_HIGHEST = 4, | |
740 /* Characteristics seen that are unlikely to be other coding system types | |
741 -- e.g. ISO-2022 escape sequences, or perhaps a consistent pattern of | |
742 alternating zero bytes in UTF-16, along with Unicode LF or CRLF | |
743 sequences at regular intervals. (Zero bytes are unlikely or impossible | |
744 in most text encodings.) */ | |
745 DET_QUITE_PROBABLE = 3, | |
746 /* Strong or medium statistical likelihood. At least some | |
747 characteristics seen that match what's normally found in this encoding | |
748 -- e.g. in Shift-JIS, a number of two-byte Japanese character | |
749 sequences in the right range, and nothing out of range; or in Unicode, | |
750 much higher statistical variance in the odd bytes than in the even | |
751 bytes, or vice-versa (perhaps the presence of regular EOL sequences | |
752 would bump this too to DET_QUITE_PROBABLE). This is quite often a | |
753 statistical test. */ | |
754 DET_SOMEWHAT_LIKELY = 2, | |
755 /* Weak statistical likelihood. Pretty much any features at all that | |
756 characterize this encoding, and nothing that rules against it. */ | |
757 DET_SLIGHTLY_LIKELY = 1, | |
758 /* Default state. Perhaps it indicates pure ASCII or something similarly | |
759 vague seen in Shift-JIS, or, exactly as the level says, it might mean | |
760 in a statistical-based detector that the pros and cons are balanced | |
761 out. This is also the lowest level that will be accepted by the | |
762 auto-detector without asking the user: If all available detectors | |
763 report lower levels for all categories with attached coding systems, | |
764 the user will be shown the results and explicitly prompted for action. | |
765 The user will also be prompted if this is the highest available level | |
766 and more than one detector reports the level. (See below about the | |
767 consequent necessity of an "ASCII" detector, which will return level 1 | |
768 or higher for most plain text files.) */ | |
769 DET_AS_LIKELY_AS_UNLIKELY = 0, | |
770 /* Some characteristics seen that are unusual for this encoding -- | |
771 e.g. unusual control characters in a plain-text encoding, lots of | |
772 8-bit characters, or little statistical variance in the odd and even | |
773 bytes in UTF-16. */ | |
774 DET_SOMEWHAT_UNLIKELY = -1, | |
775 /* This indicates that there is very little chance the data is in the | |
776 right format; this is probably the lowest level you can get when | |
777 presenting random binary data to a text file, because there are no | |
778 "specific sequences" you can see that would totally rule out | |
779 recognition. */ | |
780 DET_QUITE_IMPROBABLE = -2, | |
781 /* An erroneous sequence was seen. */ | |
782 DET_NEARLY_IMPOSSIBLE = -3, | |
1429 | 783 DET_LOWEST = -3 |
771 | 784 }; |
785 | |
786 extern int coding_detector_count; | |
787 extern int coding_detector_category_count; | |
788 | |
789 struct detector_category | |
428 | 790 { |
771 | 791 int id; |
792 Lisp_Object sym; | |
793 }; | |
794 | |
795 typedef struct | |
796 { | |
797 Dynarr_declare (struct detector_category); | |
798 } detector_category_dynarr; | |
799 | |
800 struct detector | |
801 { | |
802 int id; | |
803 detector_category_dynarr *cats; | |
804 Bytecount data_size; | |
805 /* Detect method: Required. */ | |
806 void (*detect_method) (struct detection_state *st, | |
807 const unsigned char *src, Bytecount n); | |
808 /* Finalize detection state method: Clean up any allocated data in the | |
5124
623d57b7fbe8
separate regular and disksave finalization, print method fixes.
Ben Wing <ben@xemacs.org>
parents:
5120
diff
changeset
|
809 detection state. Called only once. Optional. */ |
771 | 810 void (*finalize_detection_state_method) (struct detection_state *st); |
428 | 811 }; |
812 | |
771 | 813 /* Lvalue for a particular detection result -- detection state ST, |
814 category CAT */ | |
815 #define DET_RESULT(st, cat) ((st)->categories[detector_category_##cat]) | |
816 /* In state ST, set all detection results associated with detector DET to | |
817 RESULT. */ | |
818 #define SET_DET_RESULTS(st, det, result) \ | |
819 set_detection_results (st, detector_##det, result) | |
820 | |
821 typedef struct | |
822 { | |
823 Dynarr_declare (struct detector); | |
824 } detector_dynarr; | |
825 | |
826 extern detector_dynarr *all_coding_detectors; | |
827 | |
828 #define DEFINE_DETECTOR_CATEGORY(detector, cat) \ | |
829 int detector_category_##cat | |
830 #define DECLARE_DETECTOR_CATEGORY(detector, cat) \ | |
831 extern int detector_category_##cat | |
832 #define INITIALIZE_DETECTOR_CATEGORY(detector, cat) \ | |
833 do { \ | |
834 struct detector_category dog; \ | |
835 xzero (dog); \ | |
836 detector_category_##cat = coding_detector_category_count++; \ | |
837 dump_add_opaque_int (&detector_category_##cat); \ | |
838 dog.id = detector_category_##cat; \ | |
839 dog.sym = Q##cat; \ | |
840 Dynarr_add (Dynarr_at (all_coding_detectors, detector_##detector).cats, \ | |
841 dog); \ | |
842 } while (0) | |
843 | |
844 #define DEFINE_DETECTOR(Detector) \ | |
845 int detector_##Detector | |
846 #define DECLARE_DETECTOR(Detector) \ | |
847 extern int detector_##Detector | |
848 #define INITIALIZE_DETECTOR(Detector) \ | |
849 do { \ | |
850 struct detector det; \ | |
851 xzero (det); \ | |
852 detector_##Detector = coding_detector_count++; \ | |
853 dump_add_opaque_int (&detector_##Detector); \ | |
854 det.id = detector_##Detector; \ | |
855 det.cats = Dynarr_new2 (detector_category_dynarr, \ | |
856 struct detector_category); \ | |
857 det.data_size = sizeof (struct Detector##_detector); \ | |
858 Dynarr_add (all_coding_detectors, det); \ | |
859 } while (0) | |
860 #define DETECTOR_HAS_METHOD(Detector, Meth) \ | |
861 Dynarr_at (all_coding_detectors, detector_##Detector).Meth##_method = \ | |
802 | 862 Detector##_##Meth |
771 | 863 |
864 | |
865 /**************************************************/ | |
866 /* Decoding/Encoding */ | |
867 /**************************************************/ | |
868 | |
869 /* Is the source (SOURCEP == 1) or sink (SOURCEP == 0) when encoding specified | |
870 in characters? */ | |
871 | |
872 enum source_or_sink | |
873 { | |
874 CODING_SOURCE, | |
875 CODING_SINK | |
876 }; | |
877 | |
878 enum encode_decode | |
879 { | |
880 CODING_ENCODE, | |
881 CODING_DECODE | |
882 }; | |
883 | |
884 /* Data structure attached to an lstream of type `coding', | |
885 containing values specific to the coding process. Additional | |
886 data is stored in the DATA field below; the exact form of that data | |
887 is controlled by the type of the coding system that governs the | |
888 conversion (field CODESYS). CODESYS may be set at any time | |
889 throughout the lifetime of the lstream and possibly more than once. | |
890 See long comment above for more info. */ | |
891 | |
892 struct coding_stream | |
893 { | |
1204 | 894 /* Enumerated constant listing which type of console this is (TTY, X, |
895 MS-Windows, etc.). This duplicates the method structure in | |
896 XCODING_SYSTEM (str->codesys)->methods->type, which formerly was the | |
897 only way to determine the coding system type. We need this constant | |
898 now for KKCC, so that it can be used in an XD_UNION clause to | |
899 determine the Lisp objects in the type-specific data. */ | |
900 enum coding_system_variant type; | |
901 | |
771 | 902 /* Coding system that governs the conversion. */ |
903 Lisp_Object codesys; | |
904 /* Original coding system, pre-canonicalization. */ | |
905 Lisp_Object orig_codesys; | |
906 | |
907 /* Back pointer to current stream. */ | |
908 Lstream *us; | |
909 | |
910 /* Stream that we read the unprocessed data from or write the processed | |
911 data to. */ | |
912 Lstream *other_end; | |
913 | |
914 /* In order to handle both reading to and writing from a coding stream, | |
915 we phrase the conversion methods like write methods -- we can | |
916 implement reading in terms of a write method but not vice-versa, | |
917 because the write method is forced to take only what it's given but | |
918 the read method can read more data from the other end if necessary. | |
919 On the other hand, the write method is free to generate all the data | |
2297 | 920 it wants (and just write it to the other end), but the read method |
771 | 921 can return only as much as was asked for, so we need to implement our |
922 own buffering. */ | |
923 | |
924 /* If we are reading, then we can return only a fixed amount of data, but | |
925 the converter is free to return as much as it wants, so we direct it | |
926 to store the data here and lop off chunks as we need them. If we are | |
927 writing, we use this because the converter takes a Dynarr but we are | |
928 supposed to write into a fixed buffer. (NOTE: This introduces an extra | |
929 memory copy.) */ | |
930 unsigned_char_dynarr *convert_to; | |
931 | |
932 /* The conversion method might reject some of the data -- this typically | |
933 includes partial characters, partial escape sequences, etc. When | |
934 writing, we just pass the rejection up to the Lstream module, and it | |
935 will buffer the data. When reading, however, we need to do the | |
936 buffering ourselves, and we put it here, combined with newly read | |
937 data. */ | |
938 unsigned_char_dynarr *convert_from; | |
939 | |
940 /* If set, this is the last chunk of data being processed. When this is | |
941 finished, output any necessary terminating control characters, escape | |
942 sequences, etc. */ | |
943 unsigned int eof:1; | |
944 | |
945 /* CH holds a partially built-up character. This is really part of the | |
946 state-dependent data and should be moved there. */ | |
947 unsigned int ch; | |
948 | |
949 /* Coding-system-specific data holding extra state about the | |
950 conversion. Logically a struct TYPE_coding_stream; a pointer | |
800 | 951 to such a struct, with (when ERROR_CHECK_TYPES is defined) |
771 | 952 error-checking that this is really a structure of that type |
953 (checking the corresponding coding system type) can be retrieved using | |
954 CODING_STREAM_TYPE_DATA(). Allocated at the same time that | |
955 CODESYS is set (which may occur at any time, even multiple times, | |
956 during the lifetime of the stream). The size comes from | |
957 methods->coding_data_size. */ | |
958 void *data; | |
959 | |
960 enum encode_decode direction; | |
961 | |
800 | 962 /* If set, don't close the stream at the other end when being closed. */ |
963 unsigned int no_close_other:1; | |
802 | 964 /* If set, read only one byte at a time from other end to avoid any |
965 possible blocking. */ | |
966 unsigned int one_byte_at_a_time:1; | |
814 | 967 /* If set, and we're a read stream, we init char mode on ourselves as |
968 necessary to prevent the caller from getting partial characters. (the | |
969 default) */ | |
970 unsigned int set_char_mode_on_us_when_reading:1; | |
800 | 971 |
771 | 972 /* #### Temporary test */ |
973 unsigned int finalized:1; | |
974 }; | |
975 | |
976 #define CODING_STREAM_DATA(stream) LSTREAM_TYPE_DATA (stream, coding) | |
977 | |
800 | 978 #ifdef ERROR_CHECK_TYPES |
771 | 979 # define CODING_STREAM_TYPE_DATA(s, type) \ |
980 error_check_##type##_coding_stream_data (s) | |
981 #else | |
982 # define CODING_STREAM_TYPE_DATA(s, type) \ | |
983 ((struct type##_coding_stream *) (s)->data) | |
984 #endif | |
985 | |
986 /* C should be a binary character in the range 0 - 255; convert | |
987 to internal format and add to Dynarr DST. */ | |
988 | |
428 | 989 #ifdef MULE |
771 | 990 |
991 #define DECODE_ADD_BINARY_CHAR(c, dst) \ | |
992 do { \ | |
826 | 993 if (byte_ascii_p (c)) \ |
771 | 994 Dynarr_add (dst, c); \ |
826 | 995 else if (byte_c1_p (c)) \ |
771 | 996 { \ |
997 Dynarr_add (dst, LEADING_BYTE_CONTROL_1); \ | |
998 Dynarr_add (dst, c + 0x20); \ | |
999 } \ | |
1000 else \ | |
1001 { \ | |
1002 Dynarr_add (dst, LEADING_BYTE_LATIN_ISO8859_1); \ | |
1003 Dynarr_add (dst, c); \ | |
1004 } \ | |
1005 } while (0) | |
1006 | |
1007 #else /* not MULE */ | |
1008 | |
1009 #define DECODE_ADD_BINARY_CHAR(c, dst) \ | |
1010 do { \ | |
1011 Dynarr_add (dst, c); \ | |
1012 } while (0) | |
1013 | |
1014 #endif /* MULE */ | |
1015 | |
1016 #define DECODE_OUTPUT_PARTIAL_CHAR(ch, dst) \ | |
1017 do { \ | |
1018 if (ch) \ | |
1019 { \ | |
1020 DECODE_ADD_BINARY_CHAR (ch, dst); \ | |
1021 ch = 0; \ | |
1022 } \ | |
1023 } while (0) | |
428 | 1024 |
1025 #ifdef MULE | |
1026 /* Convert shift-JIS code (sj1, sj2) into internal string | |
1027 representation (c1, c2). (The leading byte is assumed.) */ | |
1028 | |
771 | 1029 #define DECODE_SHIFT_JIS(sj1, sj2, c1, c2) \ |
428 | 1030 do { \ |
1031 int I1 = sj1, I2 = sj2; \ | |
1032 if (I2 >= 0x9f) \ | |
1033 c1 = (I1 << 1) - ((I1 >= 0xe0) ? 0xe0 : 0x60), \ | |
1034 c2 = I2 + 2; \ | |
1035 else \ | |
1036 c1 = (I1 << 1) - ((I1 >= 0xe0) ? 0xe1 : 0x61), \ | |
1037 c2 = I2 + ((I2 >= 0x7f) ? 0x60 : 0x61); \ | |
1038 } while (0) | |
1039 | |
1040 /* Convert the internal string representation of a Shift-JIS character | |
1041 (c1, c2) into Shift-JIS code (sj1, sj2). The leading byte is | |
1042 assumed. */ | |
1043 | |
771 | 1044 #define ENCODE_SHIFT_JIS(c1, c2, sj1, sj2) \ |
428 | 1045 do { \ |
1046 int I1 = c1, I2 = c2; \ | |
1047 if (I1 & 1) \ | |
1048 sj1 = (I1 >> 1) + ((I1 < 0xdf) ? 0x31 : 0x71), \ | |
1049 sj2 = I2 - ((I2 >= 0xe0) ? 0x60 : 0x61); \ | |
1050 else \ | |
1051 sj1 = (I1 >> 1) + ((I1 < 0xdf) ? 0x30 : 0x70), \ | |
1052 sj2 = I2 - 2; \ | |
1053 } while (0) | |
1054 #endif /* MULE */ | |
1055 | |
771 | 1056 DECLARE_CODING_SYSTEM_TYPE (no_conversion); |
1057 DECLARE_CODING_SYSTEM_TYPE (convert_eol); | |
1058 #if 0 | |
1059 DECLARE_CODING_SYSTEM_TYPE (text_file_wrapper); | |
1060 #endif /* 0 */ | |
1061 DECLARE_CODING_SYSTEM_TYPE (undecided); | |
1062 DECLARE_CODING_SYSTEM_TYPE (chain); | |
1063 | |
1064 #ifdef DEBUG_XEMACS | |
1065 DECLARE_CODING_SYSTEM_TYPE (internal); | |
1066 #endif | |
1067 | |
1068 #ifdef MULE | |
1069 DECLARE_CODING_SYSTEM_TYPE (iso2022); | |
1070 DECLARE_CODING_SYSTEM_TYPE (ccl); | |
4690
257b468bf2ca
Move the #'query-coding-region implementation to C.
Aidan Kehoe <kehoea@parhasard.net>
parents:
4569
diff
changeset
|
1071 DECLARE_CODING_SYSTEM_TYPE (fixed_width); |
771 | 1072 DECLARE_CODING_SYSTEM_TYPE (shift_jis); |
1073 DECLARE_CODING_SYSTEM_TYPE (big5); | |
1074 #endif | |
1075 | |
1076 #ifdef HAVE_ZLIB | |
1077 DECLARE_CODING_SYSTEM_TYPE (gzip); | |
1078 #endif | |
428 | 1079 |
771 | 1080 DECLARE_CODING_SYSTEM_TYPE (unicode); |
428 | 1081 |
1315 | 1082 #ifdef WIN32_ANY |
771 | 1083 DECLARE_CODING_SYSTEM_TYPE (mswindows_multibyte_to_unicode); |
1084 DECLARE_CODING_SYSTEM_TYPE (mswindows_multibyte); | |
428 | 1085 #endif |
771 | 1086 |
1087 Lisp_Object coding_stream_detected_coding_system (Lstream *stream); | |
1088 Lisp_Object coding_stream_coding_system (Lstream *stream); | |
1089 void set_coding_stream_coding_system (Lstream *stream, | |
1090 Lisp_Object codesys); | |
1091 Lisp_Object detect_coding_stream (Lisp_Object stream); | |
867 | 1092 Ichar decode_big5_char (int o1, int o2); |
771 | 1093 void add_entry_to_coding_system_type_list (struct coding_system_methods *m); |
1094 Lisp_Object make_internal_coding_system (Lisp_Object existing, | |
4528
726060ee587c
First draft of g++ 4.3 warning removal patch. Builds. *Needs ChangeLogs.*
Stephen J. Turnbull <stephen@xemacs.org>
parents:
4522
diff
changeset
|
1095 const Ascbyte *prefix, |
771 | 1096 Lisp_Object type, |
1097 Lisp_Object description, | |
1098 Lisp_Object props); | |
802 | 1099 |
814 | 1100 #define LSTREAM_FL_NO_CLOSE_OTHER (1 << 16) |
1101 #define LSTREAM_FL_READ_ONE_BYTE_AT_A_TIME (1 << 17) | |
1102 #define LSTREAM_FL_NO_INIT_CHAR_MODE_WHEN_READING (1 << 18) | |
1103 | |
771 | 1104 Lisp_Object make_coding_input_stream (Lstream *stream, Lisp_Object codesys, |
800 | 1105 enum encode_decode direction, |
802 | 1106 int flags); |
771 | 1107 Lisp_Object make_coding_output_stream (Lstream *stream, Lisp_Object codesys, |
800 | 1108 enum encode_decode direction, |
802 | 1109 int flags); |
771 | 1110 void set_detection_results (struct detection_state *st, int detector, |
1111 int given); | |
428 | 1112 |
440 | 1113 #endif /* INCLUDED_file_coding_h_ */ |
1114 |