comparison src/file-coding.h @ 771:943eaba38521

[xemacs-hg @ 2002-03-13 08:51:24 by ben] The big ben-mule-21-5 check-in! Various files were added and deleted. See CHANGES-ben-mule. There are still some test suite failures. No crashes, though. Many of the failures have to do with problems in the test suite itself rather than in the actual code. I'll be addressing these in the next day or so -- none of the test suite failures are at all critical. Meanwhile I'll be trying to address the biggest issues -- i.e. build or run failures, which will almost certainly happen on various platforms. All comments should be sent to ben@xemacs.org -- use a Cc: if necessary when sending to mailing lists. There will be pre- and post- tags, something like pre-ben-mule-21-5-merge-in, and post-ben-mule-21-5-merge-in.
author ben
date Wed, 13 Mar 2002 08:54:06 +0000
parents fdefd0186b75
children e38acbeb1cae
comparison
equal deleted inserted replaced
770:336a418893b5 771:943eaba38521
1 /* Header for code conversion stuff 1 /* Header for encoding conversion functions; coding-system object.
2 #### rename me to coding-system.h
2 Copyright (C) 1991, 1995 Free Software Foundation, Inc. 3 Copyright (C) 1991, 1995 Free Software Foundation, Inc.
3 Copyright (C) 1995 Sun Microsystems, Inc. 4 Copyright (C) 1995 Sun Microsystems, Inc.
5 Copyright (C) 2000, 2001 Ben Wing.
4 6
5 This file is part of XEmacs. 7 This file is part of XEmacs.
6 8
7 XEmacs is free software; you can redistribute it and/or modify it 9 XEmacs is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the 10 under the terms of the GNU General Public License as published by the
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, 21 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */ 22 Boston, MA 02111-1307, USA. */
21 23
22 /* Synched up with: Mule 2.3. Not in FSF. */ 24 /* Synched up with: Mule 2.3. Not in FSF. */
23 25
24 /* 91.10.09 written by K.Handa <handa@etl.go.jp> */ 26 /* Authorship:
25 /* Rewritten by Ben Wing <ben@xemacs.org>. */ 27
28 Current primary author: Ben Wing <ben@xemacs.org>
29
30 Written by Ben Wing <ben@xemacs.org> for XEmacs, 1995, loosely based
31 on code written 91.10.09 by K.Handa <handa@etl.go.jp>.
32 Rewritten again 2000-2001 by Ben Wing to support properly
33 abstracted coding systems.
34 September 2001: Finished last part of abstraction, the detection
35 mechanism.
36 */
26 37
27 #ifndef INCLUDED_file_coding_h_ 38 #ifndef INCLUDED_file_coding_h_
28 #define INCLUDED_file_coding_h_ 39 #define INCLUDED_file_coding_h_
29 40
30 struct decoding_stream; 41 /* Capsule description of the different structures, what their purpose is,
31 struct encoding_stream; 42 how they fit together, and where various bits of data are stored.
32 43
33 /* Coding system types. These go into the TYPE field of a 44 A "coding system" is an algorithm for converting data in one format into
34 Lisp_Coding_System. */ 45 data in another format. Currently most of the coding systems we have
35 46 created concern internationalized text, and convert between the XEmacs
36 enum coding_system_type 47 internal format for multilingual text, and various external
37 { 48 representations of such text. However, any such conversion is possible,
38 CODESYS_AUTODETECT, /* Automatic conversion. */ 49 for example, compressing or uncompressing text using the gzip algorithm.
39 #ifdef MULE 50 All coding systems provide both encode and decode routines, so that the
40 CODESYS_SHIFT_JIS, /* Shift-JIS; Hankaku (half-width) KANA 51 conversion can go both ways.
41 is also supported. */ 52
42 CODESYS_ISO2022, /* Any ISO2022-compliant coding system. 53 The way we handle this is by dividing the various potential coding
43 Includes JIS, EUC, CTEXT */ 54 systems into types, analogous to classes in C++. Each coding system
44 CODESYS_BIG5, /* BIG5 (used for Taiwanese). */ 55 type encompasses a series of related coding systems that it can
45 CODESYS_UCS4, /* ISO 10646 UCS-4 */ 56 implement, and it has properties which control how exactly the encoding
46 CODESYS_UTF8, /* ISO 10646 UTF-8 */ 57 works. A particular set of values for each of the properties makes up a
47 CODESYS_CCL, /* Converter written in CCL. */ 58 "coding system", and specifies one particular encoding. A `struct
48 #endif 59 Lisp_Coding_System' object encapsulates those settings -- its type, the
49 CODESYS_NO_CONVERSION /* "No conversion"; used for binary files. 60 values chosen for all properties of that type, a name for the coding
50 We use quotes because there really 61 system, some documentation.
51 is some conversion being applied, 62
52 but it appears to the user as if 63 In addition, there are of course methods associated with a coding system
53 the text is read in without conversion. */ 64 type, implementing the encoding, decoding, etc. These are stored in a
54 #ifdef DEBUG_XEMACS 65 `struct coding_system_methods' object, one per coding-system type, which
55 ,CODESYS_INTERNAL /* Raw (internally-formatted) data. */ 66 contains mostly function pointers. This is retrievable from the
56 #endif 67 coding-system object (i.e. the struct Lisp_Coding_System), which has a
68 pointer to it.
69
70 In order to actually use a coding system to do an encoding or decoding
71 operation, you need to use a coding Lstream.
72
73 Now let's look more at attached data. All coding systems have certain
74 common data fields -- name, type, documentation, etc. -- as well as a
75 bunch more that are defined by the coding system type. To handle this
76 cleanly, each coding system type defines a structure that holds just the
77 fields of data particular to it, and calls it e.g. `struct
78 iso2022_coding_system' for coding system type `iso2022'. When the
79 memory block holding the coding system object is created, it is sized
80 such that it can hold both the struct Lisp_Coding_System and the struct
81 iso2022_coding_system (or whatever) directly following it. (This is a
82 common trick; another possibility is to have a void * pointer in the
83 struct Lisp_Coding_System, which points to another memory block holding
84 the struct iso2022_coding_system.) A macro is provided
85 (CODING_SYSTEM_TYPE_DATA) to retrieve a pointer of the right type to the
86 type-specific data contained within the overall `struct
87 Lisp_Coding_System' block.
88
89 Lstreams, similarly, are objects of type `struct lstream' holding data
90 about the stream operation (how much data has been read or written, any
91 buffered data, any error conditions, etc.), and like coding systems have
92 different types. They have a structure called `Lstream_implementation',
93 one per lstream type, exactly analogous to `struct
94 coding_system_methods'. In addition, they have type-specific data
95 (specifying, e.g., the file number, FILE *, memory location, other
96 lstream, etc. to read the data from or write it to, and for conversion
97 processes, the current state of the process -- are we decoding ASCII or
98 Kanji characters? are we in the middle of a processing an escape
99 sequence? etc.). This type-specific data is stored in a structure
100 named `struct coding_stream'. Just like for coding systems, the
101 type-independent data in the `struct lstream' and the type-dependent
102 data in the `struct coding_stream' are stored together in the same
103 memory block.
104
105 Now things get a bit tricky. The `struct coding_stream' is
106 type-specific from the point of view of an lstream, but not from the
107 point of view of a coding system. It contains only general data about
108 the conversion process, e.g. the name of the coding system used for
109 conversion, the lstream that we take data from or write it to (depending
110 on whether this was created as a read stream or a write stream), a
111 buffer to hold extra data we retrieved but can't send on yet, some
112 flags, etc. It also needs some data specific to the particular coding
113 system and thus to the particular operation going on. This data is held
114 in a structure named (e.g.) `struct iso2022_coding_stream', and it's
115 held in a separate memory block and pointed to by the generic `struct
116 coding_stream'. It's not glommed into a single memory block both
117 because that would require making changes to the generic lstream code
118 and more importantly because the coding system used in a particular
119 coding lstream can be changed at any point during the lifetime of the
120 lstream, and possibly multiple times. (For example, it can be set using
121 the Lisp primitives `set-process-input-coding-system' and
122 `set-console-tty-input-coding-system', as well as getting set when a
123 conversion operation was started with coding system `undecided' and the
124 correct coding system was then detected.)
125
126 IMPORTANT NOTE: There are at least two ancillary data structures
127 associated with a coding system type. (There may also be detection data;
128 see elsewhere.) It's important, when writing a coding system type, to
129 keep straight which type of data goes where. In particular, `struct
130 foo_coding_system' is attached to the coding system object itself. This
131 is a permanent object and there's only one per coding system. It's
132 created once, usually at init time, and never destroyed. So, `struct
133 foo_coding_system' should in general not contain dynamic data! (Just
134 data describing the properties of the coding system.) In particular,
135 *NO* data about any conversion in progress. There may be many
136 conversions going on simultaneously using a particular coding system,
137 and by storing conversion data in the coding system, these conversions
138 will overwrite each other's data.
139
140 Instead, use the lstream object, whose purpose is to encapsulate a
141 particular conversion and all associated data. From the lstream object,
142 you can get the struct coding_stream using something like
143
144 struct coding_stream *str = LSTREAM_TYPE_DATA (lstr, coding);
145
146 But usually this structure is already passed to you as one of the
147 parameters of the method being invoked.
148
149 From the struct coding_stream, you can retrieve the
150 coding-system-type-specific data using something like
151
152 struct foo_coding_stream *data = CODING_STREAM_TYPE_DATA (str, foo);
153
154 Then, use this structure to hold all data relevant to the particular
155 conversion being done.
156
157 Initialize this structure whenever init_coding_stream_method is called
158 (this may happen more than once), and finalize it (free resources, etc.)
159 when finalize_coding_stream_method is called.
160 */
161
162 struct coding_stream;
163 struct detection_state;
164
165 extern const struct struct_description coding_system_methods_description;
166
167 struct coding_system_methods;
168
169 enum source_sink_type
170 {
171 DECODES_CHARACTER_TO_BYTE,
172 DECODES_BYTE_TO_BYTE,
173 DECODES_BYTE_TO_CHARACTER,
174 DECODES_CHARACTER_TO_CHARACTER
57 }; 175 };
58 176
59 enum eol_type 177 enum eol_type
60 { 178 {
61 EOL_AUTODETECT,
62 EOL_LF, 179 EOL_LF,
63 EOL_CRLF, 180 EOL_CRLF,
64 EOL_CR 181 EOL_CR,
182 EOL_AUTODETECT,
65 }; 183 };
66 typedef enum eol_type eol_type_t;
67
68 #ifdef MULE
69 typedef struct charset_conversion_spec charset_conversion_spec;
70 struct charset_conversion_spec
71 {
72 Lisp_Object from_charset;
73 Lisp_Object to_charset;
74 };
75
76 typedef struct
77 {
78 Dynarr_declare (charset_conversion_spec);
79 } charset_conversion_spec_dynarr;
80 #endif
81 184
82 struct Lisp_Coding_System 185 struct Lisp_Coding_System
83 { 186 {
84 struct lcrecord_header header; 187 struct lcrecord_header header;
85 188 struct coding_system_methods *methods;
86 /* Name and doc string of this coding system. */ 189
190 /* Name and description of this coding system. The description
191 should be suitable for a menu entry. */
87 Lisp_Object name; 192 Lisp_Object name;
88 Lisp_Object doc_string; 193 Lisp_Object description;
89
90 /* This is the major type of the coding system -- one of Big5, ISO2022,
91 Shift-JIS, etc. See the constants above. */
92 enum coding_system_type type;
93 194
94 /* Mnemonic string displayed in the modeline when this coding 195 /* Mnemonic string displayed in the modeline when this coding
95 system is active for a particular buffer. */ 196 system is active for a particular buffer. */
96 Lisp_Object mnemonic; 197 Lisp_Object mnemonic;
97 198
199 /* Long documentation on the coding system. */
200 Lisp_Object documentation;
201 /* Functions to handle additional conversion after reading or before
202 writing. #### This mechanism should be replaced by the ability to
203 simply create new coding system types. */
98 Lisp_Object post_read_conversion; 204 Lisp_Object post_read_conversion;
99 Lisp_Object pre_write_conversion; 205 Lisp_Object pre_write_conversion;
100 206
101 eol_type_t eol_type; 207 /* If this coding system is not of the correct type for text file
208 conversion (i.e. decodes byte->char), we wrap it with appropriate
209 char<->byte converters. This is created dynamically, when it's
210 needed, and cached here. */
211 Lisp_Object text_file_wrapper;
212
213 /* If true, this is an internal coding system, which will not show up in
214 coding-system-list unless a special parameter is given to it. */
215 int internal_p;
216
217 /* ------------------------ junk to handle EOL -------------------------
218 I had hoped that we could handle this without lots of special-case
219 code, but it appears not to be the case if we want to maintain
220 compatibility with the existing way. However, at least with the way
221 we do things now, we avoid EOL junk in most of the coding system
222 methods themselves, or in the decode/encode functions. The EOL
223 special-case code is limited to coding-system creation and to the
224 convert-eol and undecided coding system types. */
225
226 /* If this coding system wants autodetection of the EOL type, then at the
227 appropriate time we wrap this coding system with
228 convert-eol-autodetect. (We do NOT do this at creation time because
229 then we end up with multiple convert-eols wrapped into the final
230 result -- esp. with autodetection using `undecided' -- leading to a
231 big mess.) We cache the wrapped coding system here. */
232 Lisp_Object auto_eol_wrapper;
233
234 /* Eol type requested by user. */
235 enum eol_type eol_type;
102 236
103 /* Subsidiary coding systems that specify a particular type of EOL 237 /* Subsidiary coding systems that specify a particular type of EOL
104 marking, rather than autodetecting it. These will only be non-nil 238 marking, rather than autodetecting it. These will only be non-nil
105 if (eol_type == EOL_AUTODETECT). */ 239 if (eol_type == EOL_AUTODETECT). These are chains. */
106 Lisp_Object eol_lf; 240 Lisp_Object eol[3];
107 Lisp_Object eol_crlf; 241 /* If this coding system is a subsidiary, this element points back to its
108 Lisp_Object eol_cr; 242 parent. */
109 #ifdef MULE 243 Lisp_Object subsidiary_parent;
110 struct 244
111 { 245 /* At decoding or encoding time, we use the following coding system, if
112 /* What are the charsets to be initially designated to G0, G1, 246 it exists, in place of the coding system object. This is how we
113 G2, G3? If t, no charset is initially designated. If nil, 247 handle coding systems with EOL types of CRLF or CR. Formerly, we did
114 no charset is initially designated and no charset is allowed 248 the canonicalization at creation time, returning a chain in place of
115 to be designated. */ 249 the original coding system; but that interferes with
116 Lisp_Object initial_charset[4]; 250 `coding-system-property' and causes other complications. CANONICAL is
117 251 used when determining the end types of a coding system.
118 /* If true, a designation escape sequence needs to be sent on output 252 canonicalize-after-coding also consults CANONICAL (it has to, because
119 for the charset in G[0-3] before that charset is used. */ 253 the data in the lstream is based on CANONICAL, not on the original
120 unsigned char force_charset_on_output[4]; 254 coding system). */
121 255 Lisp_Object canonical;
122 charset_conversion_spec_dynarr *input_conv; 256
123 charset_conversion_spec_dynarr *output_conv; 257 /* type-specific extra data attached to a coding_system */
124 258 char data[1];
125 unsigned int shoort :1; /* C makes you speak Dutch */
126 unsigned int no_ascii_eol :1;
127 unsigned int no_ascii_cntl :1;
128 unsigned int seven :1;
129 unsigned int lock_shift :1;
130 unsigned int no_iso6429 :1;
131 unsigned int escape_quoted :1;
132 } iso2022;
133 struct
134 {
135 /* For a CCL coding system, these specify the CCL programs used for
136 decoding (input) and encoding (output). */
137 Lisp_Object decode;
138 Lisp_Object encode;
139 } ccl;
140 #endif
141 }; 259 };
142 typedef struct Lisp_Coding_System Lisp_Coding_System; 260 typedef struct Lisp_Coding_System Lisp_Coding_System;
143 261
144 DECLARE_LRECORD (coding_system, Lisp_Coding_System); 262 DECLARE_LRECORD (coding_system, Lisp_Coding_System);
145 #define XCODING_SYSTEM(x) XRECORD (x, coding_system, Lisp_Coding_System) 263 #define XCODING_SYSTEM(x) XRECORD (x, coding_system, Lisp_Coding_System)
147 #define wrap_coding_system(p) wrap_record (p, coding_system) 265 #define wrap_coding_system(p) wrap_record (p, coding_system)
148 #define CODING_SYSTEMP(x) RECORDP (x, coding_system) 266 #define CODING_SYSTEMP(x) RECORDP (x, coding_system)
149 #define CHECK_CODING_SYSTEM(x) CHECK_RECORD (x, coding_system) 267 #define CHECK_CODING_SYSTEM(x) CHECK_RECORD (x, coding_system)
150 #define CONCHECK_CODING_SYSTEM(x) CONCHECK_RECORD (x, coding_system) 268 #define CONCHECK_CODING_SYSTEM(x) CONCHECK_RECORD (x, coding_system)
151 269
270 struct coding_system_methods
271 {
272 Lisp_Object type;
273 Lisp_Object predicate_symbol;
274
275 /* Implementation specific methods: */
276
277 /* Init method: Initialize coding-system data. Optional. */
278 void (*init_method) (Lisp_Object coding_system);
279
280 /* Mark method: Mark any Lisp objects in the type-specific data
281 attached to the coding-system object. Optional. */
282 void (*mark_method) (Lisp_Object coding_system);
283
284 /* Print method: Print the type-specific properties of this coding
285 system, as part of `print'-ing the object. If this method is defined
286 and prints anything, it should print a space as the first thing it
287 does. Optional. */
288 void (*print_method) (Lisp_Object cs, Lisp_Object printcharfun,
289 int escapeflag);
290
291 /* Canonicalize method: Convert this coding system to another one; called
292 once, at creation time, after all properties have been parsed. The
293 returned value should be a coding system created with
294 make_internal_coding_system() (passing the existing coding system as the
295 first argument), and will become the coding system returned by
296 `make-coding-system'. Optional.
297
298 NOTE: There are *three* different uses of "canonical" or "canonicalize"
299 w.r.t. coding systems, and it's important to keep them straight.
300
301 1. The canonicalize method. Used to specify a different coding
302 system, used when doing conversions, in place of the actual coding
303 system itself. Stored in the CANONICAL field of a coding system.
304
305 2. The canonicalize-after-coding method. Used to return the encoding
306 that was "actually" used to decode some text, such that this
307 particular encoding can be used to encode the text again with the
308 expectation that the result will be the same as the original encoding.
309 Particularly important with auto-detecting coding systems.
310
311 3. From the perspective of aliases, a "canonical" coding system is one
312 that's not an alias to some other coding system, and "canonicalization"
313 is the process of traversing the alias pointers to find the canonical
314 coding system that's equivalent to the alias.
315 */
316 Lisp_Object (*canonicalize_method) (Lisp_Object coding_system);
317
318 /* Canonicalize after coding method: Convert this coding system to
319 another one, after coding (usually decoding) has finished. This is
320 meant to be used by auto-detecting coding systems, which should return
321 the actually detected coding system. Optional. */
322 Lisp_Object (*canonicalize_after_coding_method)
323 (struct coding_stream *str);
324
325 /* Convert method: Decode or encode the data in SRC of size N, writing
326 the results into the Dynarr DST. If the conversion_end_type method
327 indicates that the source is characters (as opposed to bytes), you are
328 guaranteed to get only whole characters in the data in SRC/N. STR, a
329 struct coding_stream, stores all necessary state and other info about
330 the conversion. Coding-specific state (struct TYPE_coding_stream) can
331 be retrieved from STR using CODING_STREAM_TYPE_DATA(). Return value
332 indicates the number of bytes of the *INPUT* that were converted (not
333 the number of bytes written to the Dynarr!). This can be less than
334 the total amount of input passed in; if so, the remainder is
335 considered "rejected" and will appear again at the beginning of the
336 data passed in the next time the convert method is called. When EOF
337 is returned on the other end and there's no more data, the convert
338 method will be called one last time, STR->eof set and the passed-in
339 data will consist only of any rejected data from the previous
340 call. (At this point, file handles and similar resources can be
341 closed, but do NOT arbitrarily free data structures in the
342 type-specific data, because there are operations that can be done on
343 closed streams to query the results of the processing -- specifically,
344 for coding streams, there's the canonicalize_after_coding() method.)
345 Required. */
346 Bytecount (*convert_method) (struct coding_stream *str,
347 const unsigned char *src,
348 unsigned_char_dynarr *dst, Bytecount n);
349
350 /* Coding mark method: Mark any Lisp objects in the type-specific data
351 attached to `struct coding_stream'. Optional. */
352 void (*mark_coding_stream_method) (struct coding_stream *str);
353
354 /* Init coding stream method: Initialize the type-specific data attached
355 to the coding stream (i.e. in struct TYPE_coding_stream), when the
356 coding stream is opened. The type-specific data will be zeroed out.
357 Optional. */
358 void (*init_coding_stream_method) (struct coding_stream *str);
359
360 /* Rewind coding stream method: Reset any necessary type-specific data as
361 a result of the stream being rewound. Optional. */
362 void (*rewind_coding_stream_method) (struct coding_stream *str);
363
364 /* Finalize coding stream method: Clean up the type-specific data
365 attached to the coding stream (i.e. in struct TYPE_coding_stream).
366 Happens when the Lstream is deleted using Lstream_delete() or is
367 garbage-collected. Most streams are deleted after they've been used,
368 so it's less likely (but still possible) that allocated data will
369 stick around until GC time. (File handles can also be closed when EOF
370 is signalled; but some data must stick around after this point, for
371 the benefit of canonicalize_after_coding. See the convert method.)
372 Called only once (NOT called at disksave time). Optional. */
373 void (*finalize_coding_stream_method) (struct coding_stream *str);
374
375 /* Finalize method: Clean up type-specific data (e.g. free allocated
376 data) attached to the coding system (i.e. in struct
377 TYPE_coding_system), when the coding system is about to be garbage
378 collected. (Currently not called.) Called only once (NOT called at
379 disksave time). Optional. */
380 void (*finalize_method) (Lisp_Object codesys);
381
382 /* Conversion end type method: Does this coding system encode bytes ->
383 characters, characters -> characters, bytes -> bytes, or
384 characters -> bytes?. Default is characters -> bytes. Optional. */
385 enum source_sink_type (*conversion_end_type_method) (Lisp_Object codesys);
386
387 /* Putprop method: Set the value of a type-specific property. If
388 the property name is unrecognized, return 0. If the value is disallowed
389 or erroneous, signal an error. Currently called only at creation time.
390 Optional. */
391 int (*putprop_method) (Lisp_Object codesys,
392 Lisp_Object key,
393 Lisp_Object value);
394
395 /* Getprop method: Return the value of a type-specific property. If
396 the property name is unrecognized, return Qunbound. Optional.
397 */
398 Lisp_Object (*getprop_method) (Lisp_Object coding_system,
399 Lisp_Object prop);
400
401 /* These next three are set as part of the call to
402 INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA. */
403
404 /* Description of the extra data (struct foo_coding_system) attached to a
405 coding system, for pdump purposes. NOTE: All offsets must have
406 coding_system_data_offset added to them! */
407 const struct lrecord_description *extra_description;
408 /* size of struct foo_coding_system -- extra data associated with
409 the coding system */
410 int extra_data_size;
411 /* size of struct foo_coding_stream -- extra data associated with the
412 struct coding_stream, needed for each active coding process
413 using this coding system. note that we can have more than one
414 process active at once (simply by creating more than one coding
415 lstream using this coding system), so we can't store this data in
416 the coding system object. */
417 int coding_data_size;
418 };
419
420 /***** Calling a coding-system method *****/
421
422 #define RAW_CODESYSMETH(cs, m) ((cs)->methods->m##_method)
423 #define HAS_CODESYSMETH_P(cs, m) (!!RAW_CODESYSMETH (cs, m))
424 #define CODESYSMETH(cs, m, args) (((cs)->methods->m##_method) args)
425
426 /* Call a void-returning coding-system method, if it exists. */
427 #define MAYBE_CODESYSMETH(cs, m, args) do { \
428 Lisp_Coding_System *maybe_codesysmeth_cs = (cs); \
429 if (HAS_CODESYSMETH_P (maybe_codesysmeth_cs, m)) \
430 CODESYSMETH (maybe_codesysmeth_cs, m, args); \
431 } while (0)
432
433 /* Call a coding-system method, if it exists, or return GIVEN.
434 NOTE: Multiply-evaluates CS. */
435 #define CODESYSMETH_OR_GIVEN(cs, m, args, given) \
436 (HAS_CODESYSMETH_P (cs, m) ? \
437 CODESYSMETH (cs, m, args) : (given))
438
439 #define XCODESYSMETH(cs, m, args) \
440 CODESYSMETH (XCODING_SYSTEM (cs), m, args)
441 #define MAYBE_XCODESYSMETH(cs, m, args) \
442 MAYBE_CODESYSMETH (XCODING_SYSTEM (cs), m, args)
443 #define XCODESYSMETH_OR_GIVEN(cs, m, args, given) \
444 CODESYSMETH_OR_GIVEN (XCODING_SYSTEM (cs), m, args, given)
445
446
447 /***** Defining new coding-system types *****/
448
449 #define coding_system_data_offset (offsetof (Lisp_Coding_System, data))
450 extern const struct lrecord_description coding_system_empty_extra_description[];
451
452 #ifdef ERROR_CHECK_TYPECHECK
453 #define DECLARE_CODING_SYSTEM_TYPE(type) \
454 \
455 extern struct coding_system_methods * type##_coding_system_methods; \
456 INLINE_HEADER struct type##_coding_system * \
457 error_check_##type##_coding_system_data (Lisp_Coding_System *cs); \
458 INLINE_HEADER struct type##_coding_system * \
459 error_check_##type##_coding_system_data (Lisp_Coding_System *cs) \
460 { \
461 assert (CODING_SYSTEM_TYPE_P (cs, type)); \
462 /* Catch accidental use of INITIALIZE_CODING_SYSTEM_TYPE in place \
463 of INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA. */ \
464 assert (cs->methods->extra_data_size > 0); \
465 return (struct type##_coding_system *) cs->data; \
466 } \
467 \
468 INLINE_HEADER struct type##_coding_stream * \
469 error_check_##type##_coding_stream_data (struct coding_stream *s); \
470 INLINE_HEADER struct type##_coding_stream * \
471 error_check_##type##_coding_stream_data (struct coding_stream *s) \
472 { \
473 assert (XCODING_SYSTEM_TYPE_P (s->codesys, type)); \
474 return (struct type##_coding_stream *) s->data; \
475 } \
476 \
477 INLINE_HEADER Lisp_Coding_System * \
478 error_check_##type##_coding_system_type (Lisp_Object obj); \
479 INLINE_HEADER Lisp_Coding_System * \
480 error_check_##type##_coding_system_type (Lisp_Object obj) \
481 { \
482 Lisp_Coding_System *cs = XCODING_SYSTEM (obj); \
483 assert (CODING_SYSTEM_TYPE_P (cs, type)); \
484 return cs; \
485 } \
486 \
487 DECLARE_NOTHING
488 #else
489 #define DECLARE_CODING_SYSTEM_TYPE(type) \
490 extern struct coding_system_methods * type##_coding_system_methods
491 #endif /* ERROR_CHECK_TYPECHECK */
492
493 #define DEFINE_CODING_SYSTEM_TYPE(type) \
494 struct coding_system_methods * type##_coding_system_methods
495
496 #define INITIALIZE_CODING_SYSTEM_TYPE(ty, pred_sym) do { \
497 ty##_coding_system_methods = \
498 xnew_and_zero (struct coding_system_methods); \
499 ty##_coding_system_methods->type = Q##ty; \
500 ty##_coding_system_methods->extra_description = \
501 coding_system_empty_extra_description; \
502 defsymbol_nodump (&ty##_coding_system_methods->predicate_symbol, \
503 pred_sym); \
504 add_entry_to_coding_system_type_list (ty##_coding_system_methods); \
505 dump_add_root_struct_ptr (&ty##_coding_system_methods, \
506 &coding_system_methods_description); \
507 } while (0)
508
509 #define REINITIALIZE_CODING_SYSTEM_TYPE(type) do { \
510 staticpro_nodump (&type##_coding_system_methods->predicate_symbol); \
511 } while (0)
512
513 /* This assumes the existence of two structures:
514
515 struct foo_coding_system (attached to the coding system)
516 struct foo_coding_stream (per coding process, attached to the
517 struct coding_stream)
518 const struct foo_coding_system_description[] (pdump description of
519 struct foo_coding_system)
520
521 NOTE: The description must have coding_system_data_offset added to
522 all offsets in it! For an example of how to do things, see
523 chain_coding_system_description.
524 */
525 #define INITIALIZE_CODING_SYSTEM_TYPE_WITH_DATA(type, pred_sym) \
526 do { \
527 INITIALIZE_CODING_SYSTEM_TYPE (type, pred_sym); \
528 type##_coding_system_methods->extra_data_size = \
529 sizeof (struct type##_coding_system); \
530 type##_coding_system_methods->extra_description = \
531 type##_coding_system_description; \
532 type##_coding_system_methods->coding_data_size = \
533 sizeof (struct type##_coding_stream); \
534 } while (0)
535
536 /* Declare that coding-system-type TYPE has method METH; used in
537 initialization routines */
538 #define CODING_SYSTEM_HAS_METHOD(type, meth) \
539 (type##_coding_system_methods->meth##_method = type##_##meth)
540
541 /***** Macros for accessing coding-system types *****/
542
543 #define CODING_SYSTEM_TYPE_P(cs, type) \
544 ((cs)->methods == type##_coding_system_methods)
545 #define XCODING_SYSTEM_TYPE_P(cs, type) \
546 CODING_SYSTEM_TYPE_P (XCODING_SYSTEM (cs), type)
547
548 #ifdef ERROR_CHECK_TYPECHECK
549 # define CODING_SYSTEM_TYPE_DATA(cs, type) \
550 error_check_##type##_coding_system_data (cs)
551 #else
552 # define CODING_SYSTEM_TYPE_DATA(cs, type) \
553 ((struct type##_coding_system *) \
554 (cs)->data)
555 #endif
556
557 #define XCODING_SYSTEM_TYPE_DATA(cs, type) \
558 CODING_SYSTEM_TYPE_DATA (XCODING_SYSTEM_OF_TYPE (cs, type), type)
559
560 #ifdef ERROR_CHECK_TYPECHECK
561 # define XCODING_SYSTEM_OF_TYPE(x, type) \
562 error_check_##type##_coding_system_type (x)
563 # define XSETCODING_SYSTEM_OF_TYPE(x, p, type) do \
564 { \
565 XSETCODING_SYSTEM (x, p); \
566 assert (CODING_SYSTEM_TYPEP (XCODING_SYSTEM(x), type)); \
567 } while (0)
568 #else
569 # define XCODING_SYSTEM_OF_TYPE(x, type) XCODING_SYSTEM (x)
570 # define XSETCODING_SYSTEM_OF_TYPE(x, p, type) XSETCODING_SYSTEM (x, p)
571 #endif /* ERROR_CHECK_TYPE_CHECK */
572
573 #define CODING_SYSTEM_TYPEP(x, type) \
574 (CODING_SYSTEMP (x) && CODING_SYSTEM_TYPE_P (XCODING_SYSTEM (x), type))
575 #define CHECK_CODING_SYSTEM_OF_TYPE(x, type) do { \
576 CHECK_CODING_SYSTEM (x); \
577 if (!CODING_SYSTEM_TYPE_P (XCODING_SYSTEM (x), type)) \
578 dead_wrong_type_argument \
579 (type##_coding_system_methods->predicate_symbol, x); \
580 } while (0)
581 #define CONCHECK_CODING_SYSTEM_OF_TYPE(x, type) do { \
582 CONCHECK_CODING_SYSTEM (x); \
583 if (!(CODING_SYSTEM_TYPEP (x, type))) \
584 x = wrong_type_argument \
585 (type##_coding_system_methods->predicate_symbol, x); \
586 } while (0)
587
588 #define CODING_SYSTEM_METHODS(codesys) ((codesys)->methods)
152 #define CODING_SYSTEM_NAME(codesys) ((codesys)->name) 589 #define CODING_SYSTEM_NAME(codesys) ((codesys)->name)
153 #define CODING_SYSTEM_DOC_STRING(codesys) ((codesys)->doc_string) 590 #define CODING_SYSTEM_DESCRIPTION(codesys) ((codesys)->description)
154 #define CODING_SYSTEM_TYPE(codesys) ((codesys)->type) 591 #define CODING_SYSTEM_TYPE(codesys) ((codesys)->methods->type)
155 #define CODING_SYSTEM_MNEMONIC(codesys) ((codesys)->mnemonic) 592 #define CODING_SYSTEM_MNEMONIC(codesys) ((codesys)->mnemonic)
593 #define CODING_SYSTEM_DOCUMENTATION(codesys) ((codesys)->documentation)
156 #define CODING_SYSTEM_POST_READ_CONVERSION(codesys) \ 594 #define CODING_SYSTEM_POST_READ_CONVERSION(codesys) \
157 ((codesys)->post_read_conversion) 595 ((codesys)->post_read_conversion)
158 #define CODING_SYSTEM_PRE_WRITE_CONVERSION(codesys) \ 596 #define CODING_SYSTEM_PRE_WRITE_CONVERSION(codesys) \
159 ((codesys)->pre_write_conversion) 597 ((codesys)->pre_write_conversion)
160 #define CODING_SYSTEM_EOL_TYPE(codesys) ((codesys)->eol_type) 598 #define CODING_SYSTEM_EOL_TYPE(codesys) ((codesys)->eol_type)
161 #define CODING_SYSTEM_EOL_LF(codesys) ((codesys)->eol_lf) 599 #define CODING_SYSTEM_EOL_LF(codesys) ((codesys)->eol[EOL_LF])
162 #define CODING_SYSTEM_EOL_CRLF(codesys) ((codesys)->eol_crlf) 600 #define CODING_SYSTEM_EOL_CRLF(codesys) ((codesys)->eol[EOL_CRLF])
163 #define CODING_SYSTEM_EOL_CR(codesys) ((codesys)->eol_cr) 601 #define CODING_SYSTEM_EOL_CR(codesys) ((codesys)->eol[EOL_CR])
164 602 #define CODING_SYSTEM_TEXT_FILE_WRAPPER(codesys) ((codesys)->text_file_wrapper)
165 #ifdef MULE 603 #define CODING_SYSTEM_AUTO_EOL_WRAPPER(codesys) ((codesys)->auto_eol_wrapper)
166 #define CODING_SYSTEM_ISO2022_INITIAL_CHARSET(codesys, g) \ 604 #define CODING_SYSTEM_SUBSIDIARY_PARENT(codesys) ((codesys)->subsidiary_parent)
167 ((codesys)->iso2022.initial_charset[g]) 605 #define CODING_SYSTEM_CANONICAL(codesys) ((codesys)->canonical)
168 #define CODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT(codesys, g) \ 606
169 ((codesys)->iso2022.force_charset_on_output[g]) 607 #define CODING_SYSTEM_CHAIN_CHAIN(codesys) \
170 #define CODING_SYSTEM_ISO2022_SHORT(codesys) ((codesys)->iso2022.shoort) 608 (CODING_SYSTEM_TYPE_DATA (codesys, chain)->chain)
171 #define CODING_SYSTEM_ISO2022_NO_ASCII_EOL(codesys) \ 609 #define CODING_SYSTEM_CHAIN_COUNT(codesys) \
172 ((codesys)->iso2022.no_ascii_eol) 610 (CODING_SYSTEM_TYPE_DATA (codesys, chain)->count)
173 #define CODING_SYSTEM_ISO2022_NO_ASCII_CNTL(codesys) \ 611 #define CODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING(codesys) \
174 ((codesys)->iso2022.no_ascii_cntl) 612 (CODING_SYSTEM_TYPE_DATA (codesys, chain)->canonicalize_after_coding)
175 #define CODING_SYSTEM_ISO2022_SEVEN(codesys) ((codesys)->iso2022.seven) 613
176 #define CODING_SYSTEM_ISO2022_LOCK_SHIFT(codesys) \ 614 #define XCODING_SYSTEM_METHODS(codesys) \
177 ((codesys)->iso2022.lock_shift) 615 CODING_SYSTEM_METHODS (XCODING_SYSTEM (codesys))
178 #define CODING_SYSTEM_ISO2022_NO_ISO6429(codesys) \
179 ((codesys)->iso2022.no_iso6429)
180 #define CODING_SYSTEM_ISO2022_ESCAPE_QUOTED(codesys) \
181 ((codesys)->iso2022.escape_quoted)
182 #define CODING_SYSTEM_CCL_DECODE(codesys) ((codesys)->ccl.decode)
183 #define CODING_SYSTEM_CCL_ENCODE(codesys) ((codesys)->ccl.encode)
184 #endif /* MULE */
185
186 #define XCODING_SYSTEM_NAME(codesys) \ 616 #define XCODING_SYSTEM_NAME(codesys) \
187 CODING_SYSTEM_NAME (XCODING_SYSTEM (codesys)) 617 CODING_SYSTEM_NAME (XCODING_SYSTEM (codesys))
188 #define XCODING_SYSTEM_DOC_STRING(codesys) \ 618 #define XCODING_SYSTEM_DESCRIPTION(codesys) \
189 CODING_SYSTEM_DOC_STRING (XCODING_SYSTEM (codesys)) 619 CODING_SYSTEM_DESCRIPTION (XCODING_SYSTEM (codesys))
190 #define XCODING_SYSTEM_TYPE(codesys) \ 620 #define XCODING_SYSTEM_TYPE(codesys) \
191 CODING_SYSTEM_TYPE (XCODING_SYSTEM (codesys)) 621 CODING_SYSTEM_TYPE (XCODING_SYSTEM (codesys))
192 #define XCODING_SYSTEM_MNEMONIC(codesys) \ 622 #define XCODING_SYSTEM_MNEMONIC(codesys) \
193 CODING_SYSTEM_MNEMONIC (XCODING_SYSTEM (codesys)) 623 CODING_SYSTEM_MNEMONIC (XCODING_SYSTEM (codesys))
624 #define XCODING_SYSTEM_DOCUMENTATION(codesys) \
625 CODING_SYSTEM_DOCUMENTATION (XCODING_SYSTEM (codesys))
194 #define XCODING_SYSTEM_POST_READ_CONVERSION(codesys) \ 626 #define XCODING_SYSTEM_POST_READ_CONVERSION(codesys) \
195 CODING_SYSTEM_POST_READ_CONVERSION (XCODING_SYSTEM (codesys)) 627 CODING_SYSTEM_POST_READ_CONVERSION (XCODING_SYSTEM (codesys))
196 #define XCODING_SYSTEM_PRE_WRITE_CONVERSION(codesys) \ 628 #define XCODING_SYSTEM_PRE_WRITE_CONVERSION(codesys) \
197 CODING_SYSTEM_PRE_WRITE_CONVERSION (XCODING_SYSTEM (codesys)) 629 CODING_SYSTEM_PRE_WRITE_CONVERSION (XCODING_SYSTEM (codesys))
198 #define XCODING_SYSTEM_EOL_TYPE(codesys) \ 630 #define XCODING_SYSTEM_EOL_TYPE(codesys) \
201 CODING_SYSTEM_EOL_LF (XCODING_SYSTEM (codesys)) 633 CODING_SYSTEM_EOL_LF (XCODING_SYSTEM (codesys))
202 #define XCODING_SYSTEM_EOL_CRLF(codesys) \ 634 #define XCODING_SYSTEM_EOL_CRLF(codesys) \
203 CODING_SYSTEM_EOL_CRLF (XCODING_SYSTEM (codesys)) 635 CODING_SYSTEM_EOL_CRLF (XCODING_SYSTEM (codesys))
204 #define XCODING_SYSTEM_EOL_CR(codesys) \ 636 #define XCODING_SYSTEM_EOL_CR(codesys) \
205 CODING_SYSTEM_EOL_CR (XCODING_SYSTEM (codesys)) 637 CODING_SYSTEM_EOL_CR (XCODING_SYSTEM (codesys))
206 638 #define XCODING_SYSTEM_TEXT_FILE_WRAPPER(codesys) \
207 #ifdef MULE 639 CODING_SYSTEM_TEXT_FILE_WRAPPER (XCODING_SYSTEM (codesys))
208 #define XCODING_SYSTEM_ISO2022_INITIAL_CHARSET(codesys, g) \ 640 #define XCODING_SYSTEM_AUTO_EOL_WRAPPER(codesys) \
209 CODING_SYSTEM_ISO2022_INITIAL_CHARSET (XCODING_SYSTEM (codesys), g) 641 CODING_SYSTEM_AUTO_EOL_WRAPPER (XCODING_SYSTEM (codesys))
210 #define XCODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT(codesys, g) \ 642 #define XCODING_SYSTEM_SUBSIDIARY_PARENT(codesys) \
211 CODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT (XCODING_SYSTEM (codesys), g) 643 CODING_SYSTEM_SUBSIDIARY_PARENT (XCODING_SYSTEM (codesys))
212 #define XCODING_SYSTEM_ISO2022_SHORT(codesys) \ 644 #define XCODING_SYSTEM_CANONICAL(codesys) \
213 CODING_SYSTEM_ISO2022_SHORT (XCODING_SYSTEM (codesys)) 645 CODING_SYSTEM_CANONICAL (XCODING_SYSTEM (codesys))
214 #define XCODING_SYSTEM_ISO2022_NO_ASCII_EOL(codesys) \ 646
215 CODING_SYSTEM_ISO2022_NO_ASCII_EOL (XCODING_SYSTEM (codesys)) 647 #define XCODING_SYSTEM_CHAIN_CHAIN(codesys) \
216 #define XCODING_SYSTEM_ISO2022_NO_ASCII_CNTL(codesys) \ 648 CODING_SYSTEM_CHAIN_CHAIN (XCODING_SYSTEM (codesys))
217 CODING_SYSTEM_ISO2022_NO_ASCII_CNTL (XCODING_SYSTEM (codesys)) 649 #define XCODING_SYSTEM_CHAIN_COUNT(codesys) \
218 #define XCODING_SYSTEM_ISO2022_SEVEN(codesys) \ 650 CODING_SYSTEM_CHAIN_COUNT (XCODING_SYSTEM (codesys))
219 CODING_SYSTEM_ISO2022_SEVEN (XCODING_SYSTEM (codesys)) 651 #define XCODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING(codesys) \
220 #define XCODING_SYSTEM_ISO2022_LOCK_SHIFT(codesys) \ 652 CODING_SYSTEM_CHAIN_CANONICALIZE_AFTER_CODING (XCODING_SYSTEM (codesys))
221 CODING_SYSTEM_ISO2022_LOCK_SHIFT (XCODING_SYSTEM (codesys)) 653
222 #define XCODING_SYSTEM_ISO2022_NO_ISO6429(codesys) \ 654 /**************************************************/
223 CODING_SYSTEM_ISO2022_NO_ISO6429 (XCODING_SYSTEM (codesys)) 655 /* Detection */
224 #define XCODING_SYSTEM_ISO2022_ESCAPE_QUOTED(codesys) \ 656 /**************************************************/
225 CODING_SYSTEM_ISO2022_ESCAPE_QUOTED (XCODING_SYSTEM (codesys)) 657
226 #define XCODING_SYSTEM_CCL_DECODE(codesys) \ 658 #define MAX_DETECTOR_CATEGORIES 256
227 CODING_SYSTEM_CCL_DECODE (XCODING_SYSTEM (codesys)) 659 #define MAX_DETECTORS 64
228 #define XCODING_SYSTEM_CCL_ENCODE(codesys) \ 660
229 CODING_SYSTEM_CCL_ENCODE (XCODING_SYSTEM (codesys)) 661 #define MAX_BYTES_PROCESSED_FOR_DETECTION 65536
230 #endif /* MULE */ 662
231 663 struct detection_state
232 EXFUN (Fcoding_category_list, 0); 664 {
233 EXFUN (Fcoding_category_system, 1); 665 int seen_non_ascii;
234 EXFUN (Fcoding_priority_list, 0); 666 Bytecount bytes_seen;
235 EXFUN (Fcoding_system_doc_string, 1); 667
236 EXFUN (Fcoding_system_list, 0); 668 char categories[MAX_DETECTOR_CATEGORIES];
237 EXFUN (Fcoding_system_name, 1); 669 Bytecount data_offset[MAX_DETECTORS];
238 EXFUN (Fcoding_system_p, 1); 670 /* ... more data follows; data_offset[detector_##TYPE] points to
239 EXFUN (Fcoding_system_property, 2); 671 the data for that type */
240 EXFUN (Fcoding_system_type, 1);
241 EXFUN (Fcopy_coding_system, 2);
242 EXFUN (Fdecode_big5_char, 1);
243 EXFUN (Fdecode_coding_region, 4);
244 EXFUN (Fdecode_shift_jis_char, 1);
245 EXFUN (Fdetect_coding_region, 3);
246 EXFUN (Fencode_big5_char, 1);
247 EXFUN (Fencode_coding_region, 4);
248 EXFUN (Fencode_shift_jis_char, 1);
249 EXFUN (Ffind_coding_system, 1);
250 EXFUN (Fget_coding_system, 1);
251 EXFUN (Fmake_coding_system, 4);
252 EXFUN (Fset_coding_category_system, 2);
253 EXFUN (Fset_coding_priority_list, 1);
254 EXFUN (Fsubsidiary_coding_system, 2);
255
256 extern Lisp_Object Qucs4, Qutf8;
257 extern Lisp_Object Qbig5, Qccl, Qcharset_g0;
258 extern Lisp_Object Qcharset_g1, Qcharset_g2, Qcharset_g3, Qcoding_system_error;
259 extern Lisp_Object Qcoding_systemp, Qcr, Qcrlf, Qdecode, Qencode;
260 extern Lisp_Object Qeol_cr, Qeol_crlf, Qeol_lf, Qeol_type, Qescape_quoted;
261 extern Lisp_Object Qforce_g0_on_output, Qforce_g1_on_output;
262 extern Lisp_Object Qforce_g2_on_output, Qforce_g3_on_output;
263 extern Lisp_Object Qinput_charset_conversion, Qiso2022, Qlf, Qlock_shift;
264 extern Lisp_Object Qmnemonic, Qno_ascii_cntl, Qno_ascii_eol, Qno_conversion;
265 extern Lisp_Object Qraw_text;
266 extern Lisp_Object Qno_iso6429, Qoutput_charset_conversion;
267 extern Lisp_Object Qpost_read_conversion, Qpre_write_conversion, Qseven;
268 extern Lisp_Object Qshift_jis, Qshort, Vcoding_system_for_read;
269 extern Lisp_Object Vcoding_system_for_write, Vcoding_system_hash_table;
270 extern Lisp_Object Vfile_name_coding_system, Vkeyboard_coding_system;
271 extern Lisp_Object Vterminal_coding_system;
272
273 /* Flags indicating current state while converting code. */
274
275 /* Used by everyone. */
276
277 #define CODING_STATE_END (1 << 0) /* If set, this is the last chunk of
278 data being processed. When this
279 is finished, output any necessary
280 terminating control characters,
281 escape sequences, etc. */
282 #define CODING_STATE_CR (1 << 1) /* If set, we just saw a CR. */
283
284
285 /* Used by Big 5 on output. */
286 #ifdef MULE
287 #define CODING_STATE_BIG5_1 (1 << 2) /* If set, we just encountered
288 LEADING_BYTE_BIG5_1. */
289 #define CODING_STATE_BIG5_2 (1 << 3) /* If set, we just encountered
290 LEADING_BYTE_BIG5_2. */
291
292
293 /* Used by ISO2022 on input and output. */
294
295 #define CODING_STATE_R2L (1 << 4) /* If set, the current
296 directionality is right-to-left.
297 Otherwise, it's left-to-right. */
298
299
300 /* Used by ISO2022 on input. */
301
302 #define CODING_STATE_ESCAPE (1 << 5) /* If set, we're currently parsing
303 an escape sequence and the upper
304 16 bits should be looked at to
305 indicate what partial escape
306 sequence we've seen so far.
307 Otherwise, we're running
308 through actual text. */
309 #define CODING_STATE_SS2 (1 << 6) /* If set, G2 is invoked into GL, but
310 only for the next character. */
311 #define CODING_STATE_SS3 (1 << 7) /* If set, G3 is invoked into GL,
312 but only for the next character.
313 If both CODING_STATE_SS2 and
314 CODING_STATE_SS3 are set,
315 CODING_STATE_SS2 overrides; but
316 this probably indicates an error
317 in the text encoding. */
318 #ifdef ENABLE_COMPOSITE_CHARS
319 #define CODING_STATE_COMPOSITE (1 << 8) /* If set, we're currently processing
320 a composite character (i.e. a
321 character constructed by
322 overstriking two or more
323 characters). */
324 #endif /* ENABLE_COMPOSITE_CHARS */
325
326
327 /* CODING_STATE_ISO2022_LOCK is the mask of flags that remain on until
328 explicitly turned off when in the ISO2022 encoder/decoder. Other flags are
329 turned off at the end of processing each character or escape sequence. */
330 #ifdef ENABLE_COMPOSITE_CHARS
331 # define CODING_STATE_ISO2022_LOCK \
332 (CODING_STATE_END | CODING_STATE_COMPOSITE | CODING_STATE_R2L)
333 #else
334 # define CODING_STATE_ISO2022_LOCK (CODING_STATE_END | CODING_STATE_R2L)
335 #endif
336
337 #define CODING_STATE_BIG5_LOCK CODING_STATE_END
338
339 /* Flags indicating what we've seen so far when parsing an
340 ISO2022 escape sequence. */
341 enum iso_esc_flag
342 {
343 /* Partial sequences */
344 ISO_ESC_NOTHING, /* Nothing has been seen. */
345 ISO_ESC, /* We've seen ESC. */
346 ISO_ESC_2_4, /* We've seen ESC $. This indicates
347 that we're designating a multi-byte, rather
348 than a single-byte, character set. */
349 ISO_ESC_2_8, /* We've seen ESC 0x28, i.e. ESC (.
350 This means designate a 94-character
351 character set into G0. */
352 ISO_ESC_2_9, /* We've seen ESC 0x29 -- designate a
353 94-character character set into G1. */
354 ISO_ESC_2_10, /* We've seen ESC 0x2A. */
355 ISO_ESC_2_11, /* We've seen ESC 0x2B. */
356 ISO_ESC_2_12, /* We've seen ESC 0x2C -- designate a
357 96-character character set into G0.
358 (This is not ISO2022-standard.
359 The following 96-character
360 control sequences are standard,
361 though.) */
362 ISO_ESC_2_13, /* We've seen ESC 0x2D -- designate a
363 96-character character set into G1.
364 */
365 ISO_ESC_2_14, /* We've seen ESC 0x2E. */
366 ISO_ESC_2_15, /* We've seen ESC 0x2F. */
367 ISO_ESC_2_4_8, /* We've seen ESC $ 0x28 -- designate
368 a 94^N character set into G0. */
369 ISO_ESC_2_4_9, /* We've seen ESC $ 0x29. */
370 ISO_ESC_2_4_10, /* We've seen ESC $ 0x2A. */
371 ISO_ESC_2_4_11, /* We've seen ESC $ 0x2B. */
372 ISO_ESC_2_4_12, /* We've seen ESC $ 0x2C. */
373 ISO_ESC_2_4_13, /* We've seen ESC $ 0x2D. */
374 ISO_ESC_2_4_14, /* We've seen ESC $ 0x2E. */
375 ISO_ESC_2_4_15, /* We've seen ESC $ 0x2F. */
376 ISO_ESC_5_11, /* We've seen ESC [ or 0x9B. This
377 starts a directionality-control
378 sequence. The next character
379 must be 0, 1, 2, or ]. */
380 ISO_ESC_5_11_0, /* We've seen 0x9B 0. The next character must be ]. */
381 ISO_ESC_5_11_1, /* We've seen 0x9B 1. The next character must be ]. */
382 ISO_ESC_5_11_2, /* We've seen 0x9B 2. The next character must be ]. */
383
384 /* Full sequences. */
385 #ifdef ENABLE_COMPOSITE_CHARS
386 ISO_ESC_START_COMPOSITE, /* Private usage for START COMPOSING */
387 ISO_ESC_END_COMPOSITE, /* Private usage for END COMPOSING */
388 #endif /* ENABLE_COMPOSITE_CHARS */
389 ISO_ESC_SINGLE_SHIFT, /* We've seen a complete single-shift sequence. */
390 ISO_ESC_LOCKING_SHIFT,/* We've seen a complete locking-shift sequence. */
391 ISO_ESC_DESIGNATE, /* We've seen a complete designation sequence. */
392 ISO_ESC_DIRECTIONALITY,/* We've seen a complete ISO6429 directionality
393 sequence. */
394 ISO_ESC_LITERAL /* We've seen a literal character ala
395 escape-quoting. */
396 }; 672 };
397 673
674 #define DETECTION_STATE_DATA(st, type) \
675 ((struct type##_detector *) \
676 ((char *) (st) + (st)->data_offset[detector_##type]))
677
678 /* Distinguishable categories of encodings.
679
680 This list determines the initial priority of the categories.
681
682 For better or worse, currently Mule files are encoded in 7-bit ISO 2022.
683 For this reason, under Mule ISO_7 gets highest priority.
684
685 Putting NO_CONVERSION second prevents "binary corruption" in the
686 default case in all but the (presumably) extremely rare case of a
687 binary file which contains redundant escape sequences but no 8-bit
688 characters.
689
690 The remaining priorities are based on perceived "internationalization
691 political correctness." An exception is UCS-4 at the bottom, since
692 basically everything is compatible with UCS-4, but it is likely to
693 be very rare as an external encoding. */
694
398 /* Macros to define code of control characters for ISO2022's functions. */ 695 /* Macros to define code of control characters for ISO2022's functions. */
696 /* Used by the detection routines of other coding system types as well. */
399 /* code */ /* function */ 697 /* code */ /* function */
400 #define ISO_CODE_LF 0x0A /* line-feed */ 698 #define ISO_CODE_LF 0x0A /* line-feed */
401 #define ISO_CODE_CR 0x0D /* carriage-return */ 699 #define ISO_CODE_CR 0x0D /* carriage-return */
402 #define ISO_CODE_SO 0x0E /* shift-out */ 700 #define ISO_CODE_SO 0x0E /* shift-out */
403 #define ISO_CODE_SI 0x0F /* shift-in */ 701 #define ISO_CODE_SI 0x0F /* shift-in */
404 #define ISO_CODE_ESC 0x1B /* escape */ 702 #define ISO_CODE_ESC 0x1B /* escape */
405 #define ISO_CODE_DEL 0x7F /* delete */ 703 #define ISO_CODE_DEL 0x7F /* delete */
406 #define ISO_CODE_SS2 0x8E /* single-shift-2 */ 704 #define ISO_CODE_SS2 0x8E /* single-shift-2 */
407 #define ISO_CODE_SS3 0x8F /* single-shift-3 */ 705 #define ISO_CODE_SS3 0x8F /* single-shift-3 */
408 #define ISO_CODE_CSI 0x9B /* control-sequence-introduce */ 706 #define ISO_CODE_CSI 0x9B /* control-sequence-introduce */
707
708 enum detection_result
709 {
710 /* Basically means a magic cookie was seen indicating this type, or
711 something similar. */
712 DET_NEAR_CERTAINTY = 4,
713 DET_HIGHEST = 4,
714 /* Characteristics seen that are unlikely to be other coding system types
715 -- e.g. ISO-2022 escape sequences, or perhaps a consistent pattern of
716 alternating zero bytes in UTF-16, along with Unicode LF or CRLF
717 sequences at regular intervals. (Zero bytes are unlikely or impossible
718 in most text encodings.) */
719 DET_QUITE_PROBABLE = 3,
720 /* Strong or medium statistical likelihood. At least some
721 characteristics seen that match what's normally found in this encoding
722 -- e.g. in Shift-JIS, a number of two-byte Japanese character
723 sequences in the right range, and nothing out of range; or in Unicode,
724 much higher statistical variance in the odd bytes than in the even
725 bytes, or vice-versa (perhaps the presence of regular EOL sequences
726 would bump this too to DET_QUITE_PROBABLE). This is quite often a
727 statistical test. */
728 DET_SOMEWHAT_LIKELY = 2,
729 /* Weak statistical likelihood. Pretty much any features at all that
730 characterize this encoding, and nothing that rules against it. */
731 DET_SLIGHTLY_LIKELY = 1,
732 /* Default state. Perhaps it indicates pure ASCII or something similarly
733 vague seen in Shift-JIS, or, exactly as the level says, it might mean
734 in a statistical-based detector that the pros and cons are balanced
735 out. This is also the lowest level that will be accepted by the
736 auto-detector without asking the user: If all available detectors
737 report lower levels for all categories with attached coding systems,
738 the user will be shown the results and explicitly prompted for action.
739 The user will also be prompted if this is the highest available level
740 and more than one detector reports the level. (See below about the
741 consequent necessity of an "ASCII" detector, which will return level 1
742 or higher for most plain text files.) */
743 DET_AS_LIKELY_AS_UNLIKELY = 0,
744 /* Some characteristics seen that are unusual for this encoding --
745 e.g. unusual control characters in a plain-text encoding, lots of
746 8-bit characters, or little statistical variance in the odd and even
747 bytes in UTF-16. */
748 DET_SOMEWHAT_UNLIKELY = -1,
749 /* This indicates that there is very little chance the data is in the
750 right format; this is probably the lowest level you can get when
751 presenting random binary data to a text file, because there are no
752 "specific sequences" you can see that would totally rule out
753 recognition. */
754 DET_QUITE_IMPROBABLE = -2,
755 /* An erroneous sequence was seen. */
756 DET_NEARLY_IMPOSSIBLE = -3,
757 DET_LOWEST = 3,
758 };
759
760 extern int coding_detector_count;
761 extern int coding_detector_category_count;
762
763 struct detector_category
764 {
765 int id;
766 Lisp_Object sym;
767 };
768
769 typedef struct
770 {
771 Dynarr_declare (struct detector_category);
772 } detector_category_dynarr;
773
774 struct detector
775 {
776 int id;
777 detector_category_dynarr *cats;
778 Bytecount data_size;
779 /* Detect method: Required. */
780 void (*detect_method) (struct detection_state *st,
781 const unsigned char *src, Bytecount n);
782 /* Finalize detection state method: Clean up any allocated data in the
783 detection state. Called only once (NOT called at disksave time).
784 Optional. */
785 void (*finalize_detection_state_method) (struct detection_state *st);
786 };
787
788 /* Lvalue for a particular detection result -- detection state ST,
789 category CAT */
790 #define DET_RESULT(st, cat) ((st)->categories[detector_category_##cat])
791 /* In state ST, set all detection results associated with detector DET to
792 RESULT. */
793 #define SET_DET_RESULTS(st, det, result) \
794 set_detection_results (st, detector_##det, result)
795
796 typedef struct
797 {
798 Dynarr_declare (struct detector);
799 } detector_dynarr;
800
801 extern detector_dynarr *all_coding_detectors;
802
803 #define DEFINE_DETECTOR_CATEGORY(detector, cat) \
804 int detector_category_##cat
805 #define DECLARE_DETECTOR_CATEGORY(detector, cat) \
806 extern int detector_category_##cat
807 #define INITIALIZE_DETECTOR_CATEGORY(detector, cat) \
808 do { \
809 struct detector_category dog; \
810 xzero (dog); \
811 detector_category_##cat = coding_detector_category_count++; \
812 dump_add_opaque_int (&detector_category_##cat); \
813 dog.id = detector_category_##cat; \
814 dog.sym = Q##cat; \
815 Dynarr_add (Dynarr_at (all_coding_detectors, detector_##detector).cats, \
816 dog); \
817 } while (0)
818
819 #define DEFINE_DETECTOR(Detector) \
820 int detector_##Detector
821 #define DECLARE_DETECTOR(Detector) \
822 extern int detector_##Detector
823 #define INITIALIZE_DETECTOR(Detector) \
824 do { \
825 struct detector det; \
826 xzero (det); \
827 detector_##Detector = coding_detector_count++; \
828 dump_add_opaque_int (&detector_##Detector); \
829 det.id = detector_##Detector; \
830 det.cats = Dynarr_new2 (detector_category_dynarr, \
831 struct detector_category); \
832 det.data_size = sizeof (struct Detector##_detector); \
833 Dynarr_add (all_coding_detectors, det); \
834 } while (0)
835 #define DETECTOR_HAS_METHOD(Detector, Meth) \
836 Dynarr_at (all_coding_detectors, detector_##Detector).Meth##_method = \
837 Detector##_##Meth
838
839
840 /**************************************************/
841 /* Decoding/Encoding */
842 /**************************************************/
843
844 /* Is the source (SOURCEP == 1) or sink (SOURCEP == 0) when encoding specified
845 in characters? */
846
847 enum source_or_sink
848 {
849 CODING_SOURCE,
850 CODING_SINK
851 };
852
853 enum encode_decode
854 {
855 CODING_ENCODE,
856 CODING_DECODE
857 };
858
859 /* Data structure attached to an lstream of type `coding',
860 containing values specific to the coding process. Additional
861 data is stored in the DATA field below; the exact form of that data
862 is controlled by the type of the coding system that governs the
863 conversion (field CODESYS). CODESYS may be set at any time
864 throughout the lifetime of the lstream and possibly more than once.
865 See long comment above for more info. */
866
867 struct coding_stream
868 {
869 /* Coding system that governs the conversion. */
870 Lisp_Object codesys;
871 /* Original coding system, pre-canonicalization. */
872 Lisp_Object orig_codesys;
873
874 /* Back pointer to current stream. */
875 Lstream *us;
876
877 /* Stream that we read the unprocessed data from or write the processed
878 data to. */
879 Lstream *other_end;
880
881 /* In order to handle both reading to and writing from a coding stream,
882 we phrase the conversion methods like write methods -- we can
883 implement reading in terms of a write method but not vice-versa,
884 because the write method is forced to take only what it's given but
885 the read method can read more data from the other end if necessary.
886 On the other hand, the write method is free to generate all the data
887 it wants (and just write it to the other end), but the the read method
888 can return only as much as was asked for, so we need to implement our
889 own buffering. */
890
891 /* If we are reading, then we can return only a fixed amount of data, but
892 the converter is free to return as much as it wants, so we direct it
893 to store the data here and lop off chunks as we need them. If we are
894 writing, we use this because the converter takes a Dynarr but we are
895 supposed to write into a fixed buffer. (NOTE: This introduces an extra
896 memory copy.) */
897 unsigned_char_dynarr *convert_to;
898
899 /* The conversion method might reject some of the data -- this typically
900 includes partial characters, partial escape sequences, etc. When
901 writing, we just pass the rejection up to the Lstream module, and it
902 will buffer the data. When reading, however, we need to do the
903 buffering ourselves, and we put it here, combined with newly read
904 data. */
905 unsigned_char_dynarr *convert_from;
906
907 /* If set, this is the last chunk of data being processed. When this is
908 finished, output any necessary terminating control characters, escape
909 sequences, etc. */
910 unsigned int eof:1;
911
912 /* CH holds a partially built-up character. This is really part of the
913 state-dependent data and should be moved there. */
914 unsigned int ch;
915
916 /* Coding-system-specific data holding extra state about the
917 conversion. Logically a struct TYPE_coding_stream; a pointer
918 to such a struct, with (when ERROR_CHECK_TYPECHECK is defined)
919 error-checking that this is really a structure of that type
920 (checking the corresponding coding system type) can be retrieved using
921 CODING_STREAM_TYPE_DATA(). Allocated at the same time that
922 CODESYS is set (which may occur at any time, even multiple times,
923 during the lifetime of the stream). The size comes from
924 methods->coding_data_size. */
925 void *data;
926
927 enum encode_decode direction;
928
929 /* #### Temporary test */
930 unsigned int finalized:1;
931 };
932
933 #define CODING_STREAM_DATA(stream) LSTREAM_TYPE_DATA (stream, coding)
934
935 #ifdef ERROR_CHECK_TYPECHECK
936 # define CODING_STREAM_TYPE_DATA(s, type) \
937 error_check_##type##_coding_stream_data (s)
938 #else
939 # define CODING_STREAM_TYPE_DATA(s, type) \
940 ((struct type##_coding_stream *) (s)->data)
941 #endif
942
943 /* C should be a binary character in the range 0 - 255; convert
944 to internal format and add to Dynarr DST. */
945
946 #ifdef MULE
947
948 #define DECODE_ADD_BINARY_CHAR(c, dst) \
949 do { \
950 if (BYTE_ASCII_P (c)) \
951 Dynarr_add (dst, c); \
952 else if (BYTE_C1_P (c)) \
953 { \
954 Dynarr_add (dst, LEADING_BYTE_CONTROL_1); \
955 Dynarr_add (dst, c + 0x20); \
956 } \
957 else \
958 { \
959 Dynarr_add (dst, LEADING_BYTE_LATIN_ISO8859_1); \
960 Dynarr_add (dst, c); \
961 } \
962 } while (0)
963
964 #else /* not MULE */
965
966 #define DECODE_ADD_BINARY_CHAR(c, dst) \
967 do { \
968 Dynarr_add (dst, c); \
969 } while (0)
970
409 #endif /* MULE */ 971 #endif /* MULE */
410 972
411 /* Distinguishable categories of encodings. 973 #define DECODE_OUTPUT_PARTIAL_CHAR(ch, dst) \
412 974 do { \
413 This list determines the initial priority of the categories. 975 if (ch) \
414 976 { \
415 For better or worse, currently Mule files are encoded in 7-bit ISO 2022. 977 DECODE_ADD_BINARY_CHAR (ch, dst); \
416 For this reason, under Mule ISO_7 gets highest priority. 978 ch = 0; \
417 979 } \
418 Putting NO_CONVERSION second prevents "binary corruption" in the 980 } while (0)
419 default case in all but the (presumably) extremely rare case of a
420 binary file which contains redundant escape sequences but no 8-bit
421 characters.
422
423 The remaining priorities are based on perceived "internationalization
424 political correctness." An exception is UCS-4 at the bottom, since
425 basically everything is compatible with UCS-4, but it is likely to
426 be very rare as an external encoding. */
427
428 enum coding_category_type
429 {
430 /* must be a contiguous range of values 0 -- CODING_CATEGORY_LAST - 1 */
431 #ifdef MULE
432 CODING_CATEGORY_ISO_7, /* ISO2022 system using only seven-bit bytes,
433 no locking shift */
434 CODING_CATEGORY_NO_CONVERSION,
435 CODING_CATEGORY_UTF8,
436 CODING_CATEGORY_ISO_8_1, /* ISO2022 system using eight-bit bytes,
437 no locking shift, no designation sequences,
438 one-dimension characters in the upper half. */
439 CODING_CATEGORY_ISO_8_2, /* ISO2022 system using eight-bit bytes,
440 no locking shift, no designation sequences,
441 two-dimension characters in the upper half. */
442 CODING_CATEGORY_ISO_8_DESIGNATE, /* ISO2022 system using eight-bit bytes,
443 no locking shift, no single shift,
444 using designation to switch charsets */
445 CODING_CATEGORY_ISO_LOCK_SHIFT, /* ISO2022 system using locking shift */
446 CODING_CATEGORY_SHIFT_JIS,
447 CODING_CATEGORY_BIG5,
448 CODING_CATEGORY_UCS4,
449 #else /* not MULE */
450 CODING_CATEGORY_NO_CONVERSION,
451 #endif /* MULE */
452 CODING_CATEGORY_LAST /* not a real coding category */
453 };
454
455 #ifdef MULE
456 #define CODING_CATEGORY_SHIFT_JIS_MASK \
457 (1 << CODING_CATEGORY_SHIFT_JIS)
458 #define CODING_CATEGORY_ISO_7_MASK \
459 (1 << CODING_CATEGORY_ISO_7)
460 #define CODING_CATEGORY_ISO_8_DESIGNATE_MASK \
461 (1 << CODING_CATEGORY_ISO_8_DESIGNATE)
462 #define CODING_CATEGORY_ISO_8_1_MASK \
463 (1 << CODING_CATEGORY_ISO_8_1)
464 #define CODING_CATEGORY_ISO_8_2_MASK \
465 (1 << CODING_CATEGORY_ISO_8_2)
466 #define CODING_CATEGORY_ISO_LOCK_SHIFT_MASK \
467 (1 << CODING_CATEGORY_ISO_LOCK_SHIFT)
468 #define CODING_CATEGORY_BIG5_MASK \
469 (1 << CODING_CATEGORY_BIG5)
470 #define CODING_CATEGORY_UCS4_MASK \
471 (1 << CODING_CATEGORY_UCS4)
472 #define CODING_CATEGORY_UTF8_MASK \
473 (1 << CODING_CATEGORY_UTF8)
474 #endif
475 #define CODING_CATEGORY_NO_CONVERSION_MASK \
476 (1 << CODING_CATEGORY_NO_CONVERSION)
477 #define CODING_CATEGORY_NOT_FINISHED_MASK \
478 (1 << 30)
479 981
480 #ifdef MULE 982 #ifdef MULE
481 /* Convert shift-JIS code (sj1, sj2) into internal string 983 /* Convert shift-JIS code (sj1, sj2) into internal string
482 representation (c1, c2). (The leading byte is assumed.) */ 984 representation (c1, c2). (The leading byte is assumed.) */
483 985
484 #define DECODE_SJIS(sj1, sj2, c1, c2) \ 986 #define DECODE_SHIFT_JIS(sj1, sj2, c1, c2) \
485 do { \ 987 do { \
486 int I1 = sj1, I2 = sj2; \ 988 int I1 = sj1, I2 = sj2; \
487 if (I2 >= 0x9f) \ 989 if (I2 >= 0x9f) \
488 c1 = (I1 << 1) - ((I1 >= 0xe0) ? 0xe0 : 0x60), \ 990 c1 = (I1 << 1) - ((I1 >= 0xe0) ? 0xe0 : 0x60), \
489 c2 = I2 + 2; \ 991 c2 = I2 + 2; \
494 996
495 /* Convert the internal string representation of a Shift-JIS character 997 /* Convert the internal string representation of a Shift-JIS character
496 (c1, c2) into Shift-JIS code (sj1, sj2). The leading byte is 998 (c1, c2) into Shift-JIS code (sj1, sj2). The leading byte is
497 assumed. */ 999 assumed. */
498 1000
499 #define ENCODE_SJIS(c1, c2, sj1, sj2) \ 1001 #define ENCODE_SHIFT_JIS(c1, c2, sj1, sj2) \
500 do { \ 1002 do { \
501 int I1 = c1, I2 = c2; \ 1003 int I1 = c1, I2 = c2; \
502 if (I1 & 1) \ 1004 if (I1 & 1) \
503 sj1 = (I1 >> 1) + ((I1 < 0xdf) ? 0x31 : 0x71), \ 1005 sj1 = (I1 >> 1) + ((I1 < 0xdf) ? 0x31 : 0x71), \
504 sj2 = I2 - ((I2 >= 0xe0) ? 0x60 : 0x61); \ 1006 sj2 = I2 - ((I2 >= 0xe0) ? 0x60 : 0x61); \
506 sj1 = (I1 >> 1) + ((I1 < 0xdf) ? 0x30 : 0x70), \ 1008 sj1 = (I1 >> 1) + ((I1 < 0xdf) ? 0x30 : 0x70), \
507 sj2 = I2 - 2; \ 1009 sj2 = I2 - 2; \
508 } while (0) 1010 } while (0)
509 #endif /* MULE */ 1011 #endif /* MULE */
510 1012
511 Lisp_Object make_decoding_input_stream (Lstream *stream, 1013 DECLARE_CODING_SYSTEM_TYPE (no_conversion);
512 Lisp_Object codesys); 1014 DECLARE_CODING_SYSTEM_TYPE (convert_eol);
513 Lisp_Object make_encoding_input_stream (Lstream *stream, 1015 #if 0
514 Lisp_Object codesys); 1016 DECLARE_CODING_SYSTEM_TYPE (text_file_wrapper);
515 Lisp_Object make_decoding_output_stream (Lstream *stream, 1017 #endif /* 0 */
516 Lisp_Object codesys); 1018 DECLARE_CODING_SYSTEM_TYPE (undecided);
517 Lisp_Object make_encoding_output_stream (Lstream *stream, 1019 DECLARE_CODING_SYSTEM_TYPE (chain);
518 Lisp_Object codesys); 1020
519 Lisp_Object decoding_stream_coding_system (Lstream *stream); 1021 #ifdef DEBUG_XEMACS
520 Lisp_Object encoding_stream_coding_system (Lstream *stream); 1022 DECLARE_CODING_SYSTEM_TYPE (internal);
521 void set_decoding_stream_coding_system (Lstream *stream,
522 Lisp_Object codesys);
523 void set_encoding_stream_coding_system (Lstream *stream,
524 Lisp_Object codesys);
525 void determine_real_coding_system (Lstream *stream, Lisp_Object *codesys_in_out,
526 eol_type_t *eol_type_in_out);
527
528
529 #ifndef MULE
530 #define MIN_LEADING_BYTE 0x80
531 /* These need special treatment in a string and/or character */
532 #ifdef ENABLE_COMPOSITE_CHARS
533 #define LEADING_BYTE_COMPOSITE 0x80 /* for a composite character */
534 #endif 1023 #endif
535 #define LEADING_BYTE_CONTROL_1 0x8F /* represent normal 80-9F */ 1024
536 #define LEADING_BYTE_LATIN_ISO8859_1 0x81 /* Right half of ISO 8859-1 */ 1025 #ifdef MULE
537 #define BYTE_C1_P(c) ((unsigned int) ((unsigned int) (c) - 0x80) < 0x20) 1026 DECLARE_CODING_SYSTEM_TYPE (iso2022);
538 #define INTBYTE_FIRST_BYTE_P(c) ((c) < 0xA0) 1027 DECLARE_CODING_SYSTEM_TYPE (ccl);
539 #define INTBYTE_LEADING_BYTE_P(c) BYTE_C1_P (c) 1028 DECLARE_CODING_SYSTEM_TYPE (shift_jis);
540 #endif /* not MULE */ 1029 DECLARE_CODING_SYSTEM_TYPE (big5);
1030 #endif
1031
1032 #ifdef HAVE_ZLIB
1033 DECLARE_CODING_SYSTEM_TYPE (gzip);
1034 #endif
1035
1036 DECLARE_CODING_SYSTEM_TYPE (unicode);
1037
1038 #ifdef HAVE_WIN32_CODING_SYSTEMS
1039 DECLARE_CODING_SYSTEM_TYPE (mswindows_multibyte_to_unicode);
1040 DECLARE_CODING_SYSTEM_TYPE (mswindows_multibyte);
1041 #endif
1042
1043 Lisp_Object coding_stream_detected_coding_system (Lstream *stream);
1044 Lisp_Object coding_stream_coding_system (Lstream *stream);
1045 void set_coding_stream_coding_system (Lstream *stream,
1046 Lisp_Object codesys);
1047 Lisp_Object detect_coding_stream (Lisp_Object stream);
1048 Emchar decode_big5_char (int o1, int o2);
1049 void add_entry_to_coding_system_type_list (struct coding_system_methods *m);
1050 Lisp_Object make_internal_coding_system (Lisp_Object existing,
1051 Char_ASCII *prefix,
1052 Lisp_Object type,
1053 Lisp_Object description,
1054 Lisp_Object props);
1055 Lisp_Object make_coding_input_stream (Lstream *stream, Lisp_Object codesys,
1056 enum encode_decode direction);
1057 Lisp_Object make_coding_output_stream (Lstream *stream, Lisp_Object codesys,
1058 enum encode_decode direction);
1059 void set_detection_results (struct detection_state *st, int detector,
1060 int given);
541 1061
542 #endif /* INCLUDED_file_coding_h_ */ 1062 #endif /* INCLUDED_file_coding_h_ */
543 1063