428
|
1 /* Header for code conversion stuff
|
|
2 Copyright (C) 1991, 1995 Free Software Foundation, Inc.
|
|
3 Copyright (C) 1995 Sun Microsystems, Inc.
|
|
4
|
|
5 This file is part of XEmacs.
|
|
6
|
|
7 XEmacs is free software; you can redistribute it and/or modify it
|
|
8 under the terms of the GNU General Public License as published by the
|
|
9 Free Software Foundation; either version 2, or (at your option) any
|
|
10 later version.
|
|
11
|
|
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT
|
|
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
15 for more details.
|
|
16
|
|
17 You should have received a copy of the GNU General Public License
|
|
18 along with XEmacs; see the file COPYING. If not, write to
|
|
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
20 Boston, MA 02111-1307, USA. */
|
|
21
|
|
22 /* Synched up with: Mule 2.3. Not in FSF. */
|
|
23
|
|
24 /* 91.10.09 written by K.Handa <handa@etl.go.jp> */
|
|
25 /* Rewritten by Ben Wing <ben@xemacs.org>. */
|
|
26
|
440
|
27 #ifndef INCLUDED_file_coding_h_
|
|
28 #define INCLUDED_file_coding_h_
|
428
|
29
|
|
30 struct decoding_stream;
|
|
31 struct encoding_stream;
|
|
32
|
|
33 /* Coding system types. These go into the TYPE field of a
|
440
|
34 Lisp_Coding_System. */
|
428
|
35
|
|
36 enum coding_system_type
|
|
37 {
|
|
38 CODESYS_AUTODETECT, /* Automatic conversion. */
|
|
39 #ifdef MULE
|
|
40 CODESYS_SHIFT_JIS, /* Shift-JIS; Hankaku (half-width) KANA
|
|
41 is also supported. */
|
|
42 CODESYS_ISO2022, /* Any ISO2022-compliant coding system.
|
|
43 Includes JIS, EUC, CTEXT */
|
|
44 CODESYS_BIG5, /* BIG5 (used for Taiwanese). */
|
|
45 CODESYS_UCS4, /* ISO 10646 UCS-4 */
|
|
46 CODESYS_UTF8, /* ISO 10646 UTF-8 */
|
|
47 CODESYS_CCL, /* Converter written in CCL. */
|
|
48 #endif
|
|
49 CODESYS_NO_CONVERSION /* "No conversion"; used for binary files.
|
|
50 We use quotes because there really
|
|
51 is some conversion being applied,
|
|
52 but it appears to the user as if
|
|
53 the text is read in without conversion. */
|
|
54 #ifdef DEBUG_XEMACS
|
|
55 ,CODESYS_INTERNAL /* Raw (internally-formatted) data. */
|
|
56 #endif
|
|
57 };
|
|
58
|
|
59 enum eol_type
|
|
60 {
|
|
61 EOL_AUTODETECT,
|
|
62 EOL_LF,
|
|
63 EOL_CRLF,
|
|
64 EOL_CR
|
|
65 };
|
|
66 typedef enum eol_type eol_type_t;
|
|
67
|
|
68 #ifdef MULE
|
|
69 typedef struct charset_conversion_spec charset_conversion_spec;
|
|
70 struct charset_conversion_spec
|
|
71 {
|
|
72 Lisp_Object from_charset;
|
|
73 Lisp_Object to_charset;
|
|
74 };
|
|
75
|
|
76 typedef struct
|
|
77 {
|
|
78 Dynarr_declare (charset_conversion_spec);
|
|
79 } charset_conversion_spec_dynarr;
|
|
80 #endif
|
|
81
|
|
82 struct Lisp_Coding_System
|
|
83 {
|
|
84 struct lcrecord_header header;
|
|
85
|
|
86 /* Name and doc string of this coding system. */
|
440
|
87 Lisp_Object name;
|
|
88 Lisp_Object doc_string;
|
428
|
89
|
|
90 /* This is the major type of the coding system -- one of Big5, ISO2022,
|
|
91 Shift-JIS, etc. See the constants above. */
|
|
92 enum coding_system_type type;
|
|
93
|
|
94 /* Mnemonic string displayed in the modeline when this coding
|
|
95 system is active for a particular buffer. */
|
|
96 Lisp_Object mnemonic;
|
|
97
|
440
|
98 Lisp_Object post_read_conversion;
|
|
99 Lisp_Object pre_write_conversion;
|
428
|
100
|
438
|
101 eol_type_t eol_type;
|
428
|
102
|
|
103 /* Subsidiary coding systems that specify a particular type of EOL
|
|
104 marking, rather than autodetecting it. These will only be non-nil
|
|
105 if (eol_type == EOL_AUTODETECT). */
|
440
|
106 Lisp_Object eol_lf;
|
|
107 Lisp_Object eol_crlf;
|
|
108 Lisp_Object eol_cr;
|
428
|
109 #ifdef MULE
|
|
110 struct
|
|
111 {
|
|
112 /* What are the charsets to be initially designated to G0, G1,
|
|
113 G2, G3? If t, no charset is initially designated. If nil,
|
|
114 no charset is initially designated and no charset is allowed
|
|
115 to be designated. */
|
|
116 Lisp_Object initial_charset[4];
|
|
117
|
|
118 /* If true, a designation escape sequence needs to be sent on output
|
|
119 for the charset in G[0-3] before that charset is used. */
|
|
120 unsigned char force_charset_on_output[4];
|
|
121
|
|
122 charset_conversion_spec_dynarr *input_conv;
|
|
123 charset_conversion_spec_dynarr *output_conv;
|
|
124
|
|
125 unsigned int shoort :1; /* C makes you speak Dutch */
|
|
126 unsigned int no_ascii_eol :1;
|
|
127 unsigned int no_ascii_cntl :1;
|
|
128 unsigned int seven :1;
|
|
129 unsigned int lock_shift :1;
|
|
130 unsigned int no_iso6429 :1;
|
|
131 unsigned int escape_quoted :1;
|
|
132 } iso2022;
|
|
133 struct
|
|
134 {
|
|
135 /* For a CCL coding system, these specify the CCL programs used for
|
|
136 decoding (input) and encoding (output). */
|
440
|
137 Lisp_Object decode;
|
|
138 Lisp_Object encode;
|
428
|
139 } ccl;
|
|
140 #endif
|
|
141 };
|
|
142 typedef struct Lisp_Coding_System Lisp_Coding_System;
|
|
143
|
440
|
144 DECLARE_LRECORD (coding_system, Lisp_Coding_System);
|
|
145 #define XCODING_SYSTEM(x) XRECORD (x, coding_system, Lisp_Coding_System)
|
428
|
146 #define XSETCODING_SYSTEM(x, p) XSETRECORD (x, p, coding_system)
|
617
|
147 #define wrap_coding_system(p) wrap_record (p, coding_system)
|
428
|
148 #define CODING_SYSTEMP(x) RECORDP (x, coding_system)
|
|
149 #define CHECK_CODING_SYSTEM(x) CHECK_RECORD (x, coding_system)
|
|
150 #define CONCHECK_CODING_SYSTEM(x) CONCHECK_RECORD (x, coding_system)
|
|
151
|
|
152 #define CODING_SYSTEM_NAME(codesys) ((codesys)->name)
|
|
153 #define CODING_SYSTEM_DOC_STRING(codesys) ((codesys)->doc_string)
|
|
154 #define CODING_SYSTEM_TYPE(codesys) ((codesys)->type)
|
|
155 #define CODING_SYSTEM_MNEMONIC(codesys) ((codesys)->mnemonic)
|
|
156 #define CODING_SYSTEM_POST_READ_CONVERSION(codesys) \
|
|
157 ((codesys)->post_read_conversion)
|
|
158 #define CODING_SYSTEM_PRE_WRITE_CONVERSION(codesys) \
|
|
159 ((codesys)->pre_write_conversion)
|
|
160 #define CODING_SYSTEM_EOL_TYPE(codesys) ((codesys)->eol_type)
|
|
161 #define CODING_SYSTEM_EOL_LF(codesys) ((codesys)->eol_lf)
|
|
162 #define CODING_SYSTEM_EOL_CRLF(codesys) ((codesys)->eol_crlf)
|
|
163 #define CODING_SYSTEM_EOL_CR(codesys) ((codesys)->eol_cr)
|
|
164
|
|
165 #ifdef MULE
|
|
166 #define CODING_SYSTEM_ISO2022_INITIAL_CHARSET(codesys, g) \
|
|
167 ((codesys)->iso2022.initial_charset[g])
|
|
168 #define CODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT(codesys, g) \
|
|
169 ((codesys)->iso2022.force_charset_on_output[g])
|
|
170 #define CODING_SYSTEM_ISO2022_SHORT(codesys) ((codesys)->iso2022.shoort)
|
|
171 #define CODING_SYSTEM_ISO2022_NO_ASCII_EOL(codesys) \
|
|
172 ((codesys)->iso2022.no_ascii_eol)
|
|
173 #define CODING_SYSTEM_ISO2022_NO_ASCII_CNTL(codesys) \
|
|
174 ((codesys)->iso2022.no_ascii_cntl)
|
|
175 #define CODING_SYSTEM_ISO2022_SEVEN(codesys) ((codesys)->iso2022.seven)
|
|
176 #define CODING_SYSTEM_ISO2022_LOCK_SHIFT(codesys) \
|
|
177 ((codesys)->iso2022.lock_shift)
|
|
178 #define CODING_SYSTEM_ISO2022_NO_ISO6429(codesys) \
|
|
179 ((codesys)->iso2022.no_iso6429)
|
|
180 #define CODING_SYSTEM_ISO2022_ESCAPE_QUOTED(codesys) \
|
|
181 ((codesys)->iso2022.escape_quoted)
|
|
182 #define CODING_SYSTEM_CCL_DECODE(codesys) ((codesys)->ccl.decode)
|
|
183 #define CODING_SYSTEM_CCL_ENCODE(codesys) ((codesys)->ccl.encode)
|
|
184 #endif /* MULE */
|
|
185
|
|
186 #define XCODING_SYSTEM_NAME(codesys) \
|
|
187 CODING_SYSTEM_NAME (XCODING_SYSTEM (codesys))
|
|
188 #define XCODING_SYSTEM_DOC_STRING(codesys) \
|
|
189 CODING_SYSTEM_DOC_STRING (XCODING_SYSTEM (codesys))
|
|
190 #define XCODING_SYSTEM_TYPE(codesys) \
|
|
191 CODING_SYSTEM_TYPE (XCODING_SYSTEM (codesys))
|
|
192 #define XCODING_SYSTEM_MNEMONIC(codesys) \
|
|
193 CODING_SYSTEM_MNEMONIC (XCODING_SYSTEM (codesys))
|
|
194 #define XCODING_SYSTEM_POST_READ_CONVERSION(codesys) \
|
|
195 CODING_SYSTEM_POST_READ_CONVERSION (XCODING_SYSTEM (codesys))
|
|
196 #define XCODING_SYSTEM_PRE_WRITE_CONVERSION(codesys) \
|
|
197 CODING_SYSTEM_PRE_WRITE_CONVERSION (XCODING_SYSTEM (codesys))
|
|
198 #define XCODING_SYSTEM_EOL_TYPE(codesys) \
|
|
199 CODING_SYSTEM_EOL_TYPE (XCODING_SYSTEM (codesys))
|
|
200 #define XCODING_SYSTEM_EOL_LF(codesys) \
|
|
201 CODING_SYSTEM_EOL_LF (XCODING_SYSTEM (codesys))
|
|
202 #define XCODING_SYSTEM_EOL_CRLF(codesys) \
|
|
203 CODING_SYSTEM_EOL_CRLF (XCODING_SYSTEM (codesys))
|
|
204 #define XCODING_SYSTEM_EOL_CR(codesys) \
|
|
205 CODING_SYSTEM_EOL_CR (XCODING_SYSTEM (codesys))
|
|
206
|
|
207 #ifdef MULE
|
|
208 #define XCODING_SYSTEM_ISO2022_INITIAL_CHARSET(codesys, g) \
|
|
209 CODING_SYSTEM_ISO2022_INITIAL_CHARSET (XCODING_SYSTEM (codesys), g)
|
|
210 #define XCODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT(codesys, g) \
|
|
211 CODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT (XCODING_SYSTEM (codesys), g)
|
|
212 #define XCODING_SYSTEM_ISO2022_SHORT(codesys) \
|
|
213 CODING_SYSTEM_ISO2022_SHORT (XCODING_SYSTEM (codesys))
|
|
214 #define XCODING_SYSTEM_ISO2022_NO_ASCII_EOL(codesys) \
|
|
215 CODING_SYSTEM_ISO2022_NO_ASCII_EOL (XCODING_SYSTEM (codesys))
|
|
216 #define XCODING_SYSTEM_ISO2022_NO_ASCII_CNTL(codesys) \
|
|
217 CODING_SYSTEM_ISO2022_NO_ASCII_CNTL (XCODING_SYSTEM (codesys))
|
|
218 #define XCODING_SYSTEM_ISO2022_SEVEN(codesys) \
|
|
219 CODING_SYSTEM_ISO2022_SEVEN (XCODING_SYSTEM (codesys))
|
|
220 #define XCODING_SYSTEM_ISO2022_LOCK_SHIFT(codesys) \
|
|
221 CODING_SYSTEM_ISO2022_LOCK_SHIFT (XCODING_SYSTEM (codesys))
|
|
222 #define XCODING_SYSTEM_ISO2022_NO_ISO6429(codesys) \
|
|
223 CODING_SYSTEM_ISO2022_NO_ISO6429 (XCODING_SYSTEM (codesys))
|
|
224 #define XCODING_SYSTEM_ISO2022_ESCAPE_QUOTED(codesys) \
|
|
225 CODING_SYSTEM_ISO2022_ESCAPE_QUOTED (XCODING_SYSTEM (codesys))
|
|
226 #define XCODING_SYSTEM_CCL_DECODE(codesys) \
|
|
227 CODING_SYSTEM_CCL_DECODE (XCODING_SYSTEM (codesys))
|
|
228 #define XCODING_SYSTEM_CCL_ENCODE(codesys) \
|
|
229 CODING_SYSTEM_CCL_ENCODE (XCODING_SYSTEM (codesys))
|
|
230 #endif /* MULE */
|
|
231
|
|
232 EXFUN (Fcoding_category_list, 0);
|
|
233 EXFUN (Fcoding_category_system, 1);
|
|
234 EXFUN (Fcoding_priority_list, 0);
|
|
235 EXFUN (Fcoding_system_doc_string, 1);
|
|
236 EXFUN (Fcoding_system_list, 0);
|
|
237 EXFUN (Fcoding_system_name, 1);
|
|
238 EXFUN (Fcoding_system_p, 1);
|
|
239 EXFUN (Fcoding_system_property, 2);
|
|
240 EXFUN (Fcoding_system_type, 1);
|
|
241 EXFUN (Fcopy_coding_system, 2);
|
|
242 EXFUN (Fdecode_big5_char, 1);
|
|
243 EXFUN (Fdecode_coding_region, 4);
|
|
244 EXFUN (Fdecode_shift_jis_char, 1);
|
|
245 EXFUN (Fdetect_coding_region, 3);
|
|
246 EXFUN (Fencode_big5_char, 1);
|
|
247 EXFUN (Fencode_coding_region, 4);
|
|
248 EXFUN (Fencode_shift_jis_char, 1);
|
|
249 EXFUN (Ffind_coding_system, 1);
|
|
250 EXFUN (Fget_coding_system, 1);
|
|
251 EXFUN (Fmake_coding_system, 4);
|
|
252 EXFUN (Fset_coding_category_system, 2);
|
|
253 EXFUN (Fset_coding_priority_list, 1);
|
|
254 EXFUN (Fsubsidiary_coding_system, 2);
|
|
255
|
|
256 extern Lisp_Object Qucs4, Qutf8;
|
|
257 extern Lisp_Object Qbig5, Qccl, Qcharset_g0;
|
|
258 extern Lisp_Object Qcharset_g1, Qcharset_g2, Qcharset_g3, Qcoding_system_error;
|
440
|
259 extern Lisp_Object Qcoding_systemp, Qcr, Qcrlf, Qdecode, Qencode;
|
428
|
260 extern Lisp_Object Qeol_cr, Qeol_crlf, Qeol_lf, Qeol_type, Qescape_quoted;
|
|
261 extern Lisp_Object Qforce_g0_on_output, Qforce_g1_on_output;
|
|
262 extern Lisp_Object Qforce_g2_on_output, Qforce_g3_on_output;
|
|
263 extern Lisp_Object Qinput_charset_conversion, Qiso2022, Qlf, Qlock_shift;
|
|
264 extern Lisp_Object Qmnemonic, Qno_ascii_cntl, Qno_ascii_eol, Qno_conversion;
|
|
265 extern Lisp_Object Qraw_text;
|
|
266 extern Lisp_Object Qno_iso6429, Qoutput_charset_conversion;
|
|
267 extern Lisp_Object Qpost_read_conversion, Qpre_write_conversion, Qseven;
|
|
268 extern Lisp_Object Qshift_jis, Qshort, Vcoding_system_for_read;
|
|
269 extern Lisp_Object Vcoding_system_for_write, Vcoding_system_hash_table;
|
|
270 extern Lisp_Object Vfile_name_coding_system, Vkeyboard_coding_system;
|
|
271 extern Lisp_Object Vterminal_coding_system;
|
|
272
|
|
273 /* Flags indicating current state while converting code. */
|
|
274
|
|
275 /* Used by everyone. */
|
|
276
|
|
277 #define CODING_STATE_END (1 << 0) /* If set, this is the last chunk of
|
|
278 data being processed. When this
|
|
279 is finished, output any necessary
|
|
280 terminating control characters,
|
|
281 escape sequences, etc. */
|
|
282 #define CODING_STATE_CR (1 << 1) /* If set, we just saw a CR. */
|
|
283
|
|
284
|
|
285 /* Used by Big 5 on output. */
|
|
286 #ifdef MULE
|
|
287 #define CODING_STATE_BIG5_1 (1 << 2) /* If set, we just encountered
|
|
288 LEADING_BYTE_BIG5_1. */
|
|
289 #define CODING_STATE_BIG5_2 (1 << 3) /* If set, we just encountered
|
|
290 LEADING_BYTE_BIG5_2. */
|
|
291
|
|
292
|
|
293 /* Used by ISO2022 on input and output. */
|
|
294
|
|
295 #define CODING_STATE_R2L (1 << 4) /* If set, the current
|
|
296 directionality is right-to-left.
|
|
297 Otherwise, it's left-to-right. */
|
|
298
|
|
299
|
|
300 /* Used by ISO2022 on input. */
|
|
301
|
|
302 #define CODING_STATE_ESCAPE (1 << 5) /* If set, we're currently parsing
|
|
303 an escape sequence and the upper
|
|
304 16 bits should be looked at to
|
|
305 indicate what partial escape
|
|
306 sequence we've seen so far.
|
|
307 Otherwise, we're running
|
|
308 through actual text. */
|
|
309 #define CODING_STATE_SS2 (1 << 6) /* If set, G2 is invoked into GL, but
|
|
310 only for the next character. */
|
|
311 #define CODING_STATE_SS3 (1 << 7) /* If set, G3 is invoked into GL,
|
|
312 but only for the next character.
|
|
313 If both CODING_STATE_SS2 and
|
|
314 CODING_STATE_SS3 are set,
|
|
315 CODING_STATE_SS2 overrides; but
|
|
316 this probably indicates an error
|
|
317 in the text encoding. */
|
|
318 #ifdef ENABLE_COMPOSITE_CHARS
|
|
319 #define CODING_STATE_COMPOSITE (1 << 8) /* If set, we're currently processing
|
|
320 a composite character (i.e. a
|
|
321 character constructed by
|
|
322 overstriking two or more
|
|
323 characters). */
|
|
324 #endif /* ENABLE_COMPOSITE_CHARS */
|
|
325
|
|
326
|
|
327 /* CODING_STATE_ISO2022_LOCK is the mask of flags that remain on until
|
|
328 explicitly turned off when in the ISO2022 encoder/decoder. Other flags are
|
|
329 turned off at the end of processing each character or escape sequence. */
|
|
330 #ifdef ENABLE_COMPOSITE_CHARS
|
|
331 # define CODING_STATE_ISO2022_LOCK \
|
|
332 (CODING_STATE_END | CODING_STATE_COMPOSITE | CODING_STATE_R2L)
|
|
333 #else
|
|
334 # define CODING_STATE_ISO2022_LOCK (CODING_STATE_END | CODING_STATE_R2L)
|
|
335 #endif
|
|
336
|
|
337 #define CODING_STATE_BIG5_LOCK CODING_STATE_END
|
|
338
|
|
339 /* Flags indicating what we've seen so far when parsing an
|
|
340 ISO2022 escape sequence. */
|
|
341 enum iso_esc_flag
|
|
342 {
|
|
343 /* Partial sequences */
|
|
344 ISO_ESC_NOTHING, /* Nothing has been seen. */
|
|
345 ISO_ESC, /* We've seen ESC. */
|
|
346 ISO_ESC_2_4, /* We've seen ESC $. This indicates
|
|
347 that we're designating a multi-byte, rather
|
|
348 than a single-byte, character set. */
|
|
349 ISO_ESC_2_8, /* We've seen ESC 0x28, i.e. ESC (.
|
|
350 This means designate a 94-character
|
|
351 character set into G0. */
|
|
352 ISO_ESC_2_9, /* We've seen ESC 0x29 -- designate a
|
|
353 94-character character set into G1. */
|
|
354 ISO_ESC_2_10, /* We've seen ESC 0x2A. */
|
|
355 ISO_ESC_2_11, /* We've seen ESC 0x2B. */
|
|
356 ISO_ESC_2_12, /* We've seen ESC 0x2C -- designate a
|
|
357 96-character character set into G0.
|
|
358 (This is not ISO2022-standard.
|
|
359 The following 96-character
|
|
360 control sequences are standard,
|
|
361 though.) */
|
|
362 ISO_ESC_2_13, /* We've seen ESC 0x2D -- designate a
|
|
363 96-character character set into G1.
|
|
364 */
|
|
365 ISO_ESC_2_14, /* We've seen ESC 0x2E. */
|
|
366 ISO_ESC_2_15, /* We've seen ESC 0x2F. */
|
|
367 ISO_ESC_2_4_8, /* We've seen ESC $ 0x28 -- designate
|
|
368 a 94^N character set into G0. */
|
|
369 ISO_ESC_2_4_9, /* We've seen ESC $ 0x29. */
|
|
370 ISO_ESC_2_4_10, /* We've seen ESC $ 0x2A. */
|
|
371 ISO_ESC_2_4_11, /* We've seen ESC $ 0x2B. */
|
|
372 ISO_ESC_2_4_12, /* We've seen ESC $ 0x2C. */
|
|
373 ISO_ESC_2_4_13, /* We've seen ESC $ 0x2D. */
|
|
374 ISO_ESC_2_4_14, /* We've seen ESC $ 0x2E. */
|
|
375 ISO_ESC_2_4_15, /* We've seen ESC $ 0x2F. */
|
|
376 ISO_ESC_5_11, /* We've seen ESC [ or 0x9B. This
|
|
377 starts a directionality-control
|
|
378 sequence. The next character
|
|
379 must be 0, 1, 2, or ]. */
|
|
380 ISO_ESC_5_11_0, /* We've seen 0x9B 0. The next character must be ]. */
|
|
381 ISO_ESC_5_11_1, /* We've seen 0x9B 1. The next character must be ]. */
|
|
382 ISO_ESC_5_11_2, /* We've seen 0x9B 2. The next character must be ]. */
|
|
383
|
|
384 /* Full sequences. */
|
|
385 #ifdef ENABLE_COMPOSITE_CHARS
|
|
386 ISO_ESC_START_COMPOSITE, /* Private usage for START COMPOSING */
|
|
387 ISO_ESC_END_COMPOSITE, /* Private usage for END COMPOSING */
|
|
388 #endif /* ENABLE_COMPOSITE_CHARS */
|
|
389 ISO_ESC_SINGLE_SHIFT, /* We've seen a complete single-shift sequence. */
|
|
390 ISO_ESC_LOCKING_SHIFT,/* We've seen a complete locking-shift sequence. */
|
|
391 ISO_ESC_DESIGNATE, /* We've seen a complete designation sequence. */
|
|
392 ISO_ESC_DIRECTIONALITY,/* We've seen a complete ISO6429 directionality
|
|
393 sequence. */
|
|
394 ISO_ESC_LITERAL /* We've seen a literal character ala
|
|
395 escape-quoting. */
|
|
396 };
|
|
397
|
|
398 /* Macros to define code of control characters for ISO2022's functions. */
|
|
399 /* code */ /* function */
|
|
400 #define ISO_CODE_LF 0x0A /* line-feed */
|
|
401 #define ISO_CODE_CR 0x0D /* carriage-return */
|
|
402 #define ISO_CODE_SO 0x0E /* shift-out */
|
|
403 #define ISO_CODE_SI 0x0F /* shift-in */
|
|
404 #define ISO_CODE_ESC 0x1B /* escape */
|
|
405 #define ISO_CODE_DEL 0x7F /* delete */
|
|
406 #define ISO_CODE_SS2 0x8E /* single-shift-2 */
|
|
407 #define ISO_CODE_SS3 0x8F /* single-shift-3 */
|
|
408 #define ISO_CODE_CSI 0x9B /* control-sequence-introduce */
|
|
409 #endif /* MULE */
|
|
410
|
448
|
411 /* Distinguishable categories of encodings.
|
|
412
|
|
413 This list determines the initial priority of the categories.
|
|
414
|
|
415 For better or worse, currently Mule files are encoded in 7-bit ISO 2022.
|
|
416 For this reason, under Mule ISO_7 gets highest priority.
|
|
417
|
|
418 Putting NO_CONVERSION second prevents "binary corruption" in the
|
|
419 default case in all but the (presumably) extremely rare case of a
|
|
420 binary file which contains redundant escape sequences but no 8-bit
|
|
421 characters.
|
|
422
|
|
423 The remaining priorities are based on perceived "internationalization
|
|
424 political correctness." An exception is UCS-4 at the bottom, since
|
|
425 basically everything is compatible with UCS-4, but it is likely to
|
|
426 be very rare as an external encoding. */
|
|
427
|
428
|
428 enum coding_category_type
|
|
429 {
|
448
|
430 /* must be a contiguous range of values 0 -- CODING_CATEGORY_LAST - 1 */
|
428
|
431 #ifdef MULE
|
|
432 CODING_CATEGORY_ISO_7, /* ISO2022 system using only seven-bit bytes,
|
|
433 no locking shift */
|
448
|
434 CODING_CATEGORY_NO_CONVERSION,
|
|
435 CODING_CATEGORY_UTF8,
|
428
|
436 CODING_CATEGORY_ISO_8_1, /* ISO2022 system using eight-bit bytes,
|
|
437 no locking shift, no designation sequences,
|
|
438 one-dimension characters in the upper half. */
|
|
439 CODING_CATEGORY_ISO_8_2, /* ISO2022 system using eight-bit bytes,
|
|
440 no locking shift, no designation sequences,
|
|
441 two-dimension characters in the upper half. */
|
448
|
442 CODING_CATEGORY_ISO_8_DESIGNATE, /* ISO2022 system using eight-bit bytes,
|
|
443 no locking shift, no single shift,
|
|
444 using designation to switch charsets */
|
428
|
445 CODING_CATEGORY_ISO_LOCK_SHIFT, /* ISO2022 system using locking shift */
|
448
|
446 CODING_CATEGORY_SHIFT_JIS,
|
428
|
447 CODING_CATEGORY_BIG5,
|
|
448 CODING_CATEGORY_UCS4,
|
448
|
449 #else /* not MULE */
|
|
450 CODING_CATEGORY_NO_CONVERSION,
|
428
|
451 #endif /* MULE */
|
448
|
452 CODING_CATEGORY_LAST /* not a real coding category */
|
428
|
453 };
|
|
454
|
|
455 #ifdef MULE
|
|
456 #define CODING_CATEGORY_SHIFT_JIS_MASK \
|
|
457 (1 << CODING_CATEGORY_SHIFT_JIS)
|
|
458 #define CODING_CATEGORY_ISO_7_MASK \
|
|
459 (1 << CODING_CATEGORY_ISO_7)
|
|
460 #define CODING_CATEGORY_ISO_8_DESIGNATE_MASK \
|
|
461 (1 << CODING_CATEGORY_ISO_8_DESIGNATE)
|
|
462 #define CODING_CATEGORY_ISO_8_1_MASK \
|
|
463 (1 << CODING_CATEGORY_ISO_8_1)
|
|
464 #define CODING_CATEGORY_ISO_8_2_MASK \
|
|
465 (1 << CODING_CATEGORY_ISO_8_2)
|
|
466 #define CODING_CATEGORY_ISO_LOCK_SHIFT_MASK \
|
|
467 (1 << CODING_CATEGORY_ISO_LOCK_SHIFT)
|
|
468 #define CODING_CATEGORY_BIG5_MASK \
|
|
469 (1 << CODING_CATEGORY_BIG5)
|
|
470 #define CODING_CATEGORY_UCS4_MASK \
|
|
471 (1 << CODING_CATEGORY_UCS4)
|
|
472 #define CODING_CATEGORY_UTF8_MASK \
|
|
473 (1 << CODING_CATEGORY_UTF8)
|
|
474 #endif
|
|
475 #define CODING_CATEGORY_NO_CONVERSION_MASK \
|
|
476 (1 << CODING_CATEGORY_NO_CONVERSION)
|
|
477 #define CODING_CATEGORY_NOT_FINISHED_MASK \
|
|
478 (1 << 30)
|
|
479
|
|
480 #ifdef MULE
|
|
481 /* Convert shift-JIS code (sj1, sj2) into internal string
|
|
482 representation (c1, c2). (The leading byte is assumed.) */
|
|
483
|
|
484 #define DECODE_SJIS(sj1, sj2, c1, c2) \
|
|
485 do { \
|
|
486 int I1 = sj1, I2 = sj2; \
|
|
487 if (I2 >= 0x9f) \
|
|
488 c1 = (I1 << 1) - ((I1 >= 0xe0) ? 0xe0 : 0x60), \
|
|
489 c2 = I2 + 2; \
|
|
490 else \
|
|
491 c1 = (I1 << 1) - ((I1 >= 0xe0) ? 0xe1 : 0x61), \
|
|
492 c2 = I2 + ((I2 >= 0x7f) ? 0x60 : 0x61); \
|
|
493 } while (0)
|
|
494
|
|
495 /* Convert the internal string representation of a Shift-JIS character
|
|
496 (c1, c2) into Shift-JIS code (sj1, sj2). The leading byte is
|
|
497 assumed. */
|
|
498
|
|
499 #define ENCODE_SJIS(c1, c2, sj1, sj2) \
|
|
500 do { \
|
|
501 int I1 = c1, I2 = c2; \
|
|
502 if (I1 & 1) \
|
|
503 sj1 = (I1 >> 1) + ((I1 < 0xdf) ? 0x31 : 0x71), \
|
|
504 sj2 = I2 - ((I2 >= 0xe0) ? 0x60 : 0x61); \
|
|
505 else \
|
|
506 sj1 = (I1 >> 1) + ((I1 < 0xdf) ? 0x30 : 0x70), \
|
|
507 sj2 = I2 - 2; \
|
|
508 } while (0)
|
|
509 #endif /* MULE */
|
|
510
|
|
511 Lisp_Object make_decoding_input_stream (Lstream *stream,
|
|
512 Lisp_Object codesys);
|
|
513 Lisp_Object make_encoding_input_stream (Lstream *stream,
|
|
514 Lisp_Object codesys);
|
|
515 Lisp_Object make_decoding_output_stream (Lstream *stream,
|
|
516 Lisp_Object codesys);
|
|
517 Lisp_Object make_encoding_output_stream (Lstream *stream,
|
|
518 Lisp_Object codesys);
|
|
519 Lisp_Object decoding_stream_coding_system (Lstream *stream);
|
|
520 Lisp_Object encoding_stream_coding_system (Lstream *stream);
|
|
521 void set_decoding_stream_coding_system (Lstream *stream,
|
|
522 Lisp_Object codesys);
|
|
523 void set_encoding_stream_coding_system (Lstream *stream,
|
|
524 Lisp_Object codesys);
|
|
525 void determine_real_coding_system (Lstream *stream, Lisp_Object *codesys_in_out,
|
438
|
526 eol_type_t *eol_type_in_out);
|
428
|
527
|
|
528
|
|
529 #ifndef MULE
|
|
530 #define MIN_LEADING_BYTE 0x80
|
|
531 /* These need special treatment in a string and/or character */
|
|
532 #ifdef ENABLE_COMPOSITE_CHARS
|
|
533 #define LEADING_BYTE_COMPOSITE 0x80 /* for a composite character */
|
|
534 #endif
|
|
535 #define LEADING_BYTE_CONTROL_1 0x8F /* represent normal 80-9F */
|
|
536 #define LEADING_BYTE_LATIN_ISO8859_1 0x81 /* Right half of ISO 8859-1 */
|
|
537 #define BYTE_C1_P(c) ((unsigned int) ((unsigned int) (c) - 0x80) < 0x20)
|
|
538 #define BUFBYTE_FIRST_BYTE_P(c) ((c) < 0xA0)
|
|
539 #define BUFBYTE_LEADING_BYTE_P(c) BYTE_C1_P (c)
|
|
540 #endif /* not MULE */
|
|
541
|
440
|
542 #endif /* INCLUDED_file_coding_h_ */
|
|
543
|