Mercurial > hg > xemacs-beta
comparison src/file-coding.h @ 428:3ecd8885ac67 r21-2-22
Import from CVS: tag r21-2-22
author | cvs |
---|---|
date | Mon, 13 Aug 2007 11:28:15 +0200 |
parents | |
children | 84b14dcb0985 |
comparison
equal
deleted
inserted
replaced
427:0a0253eac470 | 428:3ecd8885ac67 |
---|---|
1 /* Header for code conversion stuff | |
2 Copyright (C) 1991, 1995 Free Software Foundation, Inc. | |
3 Copyright (C) 1995 Sun Microsystems, Inc. | |
4 | |
5 This file is part of XEmacs. | |
6 | |
7 XEmacs is free software; you can redistribute it and/or modify it | |
8 under the terms of the GNU General Public License as published by the | |
9 Free Software Foundation; either version 2, or (at your option) any | |
10 later version. | |
11 | |
12 XEmacs is distributed in the hope that it will be useful, but WITHOUT | |
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 for more details. | |
16 | |
17 You should have received a copy of the GNU General Public License | |
18 along with XEmacs; see the file COPYING. If not, write to | |
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
20 Boston, MA 02111-1307, USA. */ | |
21 | |
22 /* Synched up with: Mule 2.3. Not in FSF. */ | |
23 | |
24 /* 91.10.09 written by K.Handa <handa@etl.go.jp> */ | |
25 /* Rewritten by Ben Wing <ben@xemacs.org>. */ | |
26 | |
27 #ifndef _XEMACS_MULE_CODING_H_ | |
28 #define _XEMACS_MULE_CODING_H_ | |
29 | |
30 struct decoding_stream; | |
31 struct encoding_stream; | |
32 | |
33 /* Coding system types. These go into the TYPE field of a | |
34 struct Lisp_Coding_System. */ | |
35 | |
36 enum coding_system_type | |
37 { | |
38 CODESYS_AUTODETECT, /* Automatic conversion. */ | |
39 #ifdef MULE | |
40 CODESYS_SHIFT_JIS, /* Shift-JIS; Hankaku (half-width) KANA | |
41 is also supported. */ | |
42 CODESYS_ISO2022, /* Any ISO2022-compliant coding system. | |
43 Includes JIS, EUC, CTEXT */ | |
44 CODESYS_BIG5, /* BIG5 (used for Taiwanese). */ | |
45 CODESYS_UCS4, /* ISO 10646 UCS-4 */ | |
46 CODESYS_UTF8, /* ISO 10646 UTF-8 */ | |
47 CODESYS_CCL, /* Converter written in CCL. */ | |
48 #endif | |
49 CODESYS_NO_CONVERSION /* "No conversion"; used for binary files. | |
50 We use quotes because there really | |
51 is some conversion being applied, | |
52 but it appears to the user as if | |
53 the text is read in without conversion. */ | |
54 #ifdef DEBUG_XEMACS | |
55 ,CODESYS_INTERNAL /* Raw (internally-formatted) data. */ | |
56 #endif | |
57 }; | |
58 | |
59 enum eol_type | |
60 { | |
61 EOL_AUTODETECT, | |
62 EOL_LF, | |
63 EOL_CRLF, | |
64 EOL_CR | |
65 }; | |
66 typedef enum eol_type eol_type_t; | |
67 | |
68 #ifdef MULE | |
69 typedef struct charset_conversion_spec charset_conversion_spec; | |
70 struct charset_conversion_spec | |
71 { | |
72 Lisp_Object from_charset; | |
73 Lisp_Object to_charset; | |
74 }; | |
75 | |
76 typedef struct | |
77 { | |
78 Dynarr_declare (charset_conversion_spec); | |
79 } charset_conversion_spec_dynarr; | |
80 #endif | |
81 | |
82 struct Lisp_Coding_System | |
83 { | |
84 struct lcrecord_header header; | |
85 | |
86 /* Name and doc string of this coding system. */ | |
87 Lisp_Object name, doc_string; | |
88 | |
89 /* This is the major type of the coding system -- one of Big5, ISO2022, | |
90 Shift-JIS, etc. See the constants above. */ | |
91 enum coding_system_type type; | |
92 | |
93 /* Mnemonic string displayed in the modeline when this coding | |
94 system is active for a particular buffer. */ | |
95 Lisp_Object mnemonic; | |
96 | |
97 Lisp_Object post_read_conversion, pre_write_conversion; | |
98 | |
99 enum eol_type eol_type; | |
100 | |
101 /* Subsidiary coding systems that specify a particular type of EOL | |
102 marking, rather than autodetecting it. These will only be non-nil | |
103 if (eol_type == EOL_AUTODETECT). */ | |
104 Lisp_Object eol_lf, eol_crlf, eol_cr; | |
105 #ifdef MULE | |
106 struct | |
107 { | |
108 /* What are the charsets to be initially designated to G0, G1, | |
109 G2, G3? If t, no charset is initially designated. If nil, | |
110 no charset is initially designated and no charset is allowed | |
111 to be designated. */ | |
112 Lisp_Object initial_charset[4]; | |
113 | |
114 /* If true, a designation escape sequence needs to be sent on output | |
115 for the charset in G[0-3] before that charset is used. */ | |
116 unsigned char force_charset_on_output[4]; | |
117 | |
118 charset_conversion_spec_dynarr *input_conv; | |
119 charset_conversion_spec_dynarr *output_conv; | |
120 | |
121 unsigned int shoort :1; /* C makes you speak Dutch */ | |
122 unsigned int no_ascii_eol :1; | |
123 unsigned int no_ascii_cntl :1; | |
124 unsigned int seven :1; | |
125 unsigned int lock_shift :1; | |
126 unsigned int no_iso6429 :1; | |
127 unsigned int escape_quoted :1; | |
128 } iso2022; | |
129 struct | |
130 { | |
131 /* For a CCL coding system, these specify the CCL programs used for | |
132 decoding (input) and encoding (output). */ | |
133 Lisp_Object decode, encode; | |
134 } ccl; | |
135 #endif | |
136 }; | |
137 typedef struct Lisp_Coding_System Lisp_Coding_System; | |
138 | |
139 DECLARE_LRECORD (coding_system, struct Lisp_Coding_System); | |
140 #define XCODING_SYSTEM(x) XRECORD (x, coding_system, struct Lisp_Coding_System) | |
141 #define XSETCODING_SYSTEM(x, p) XSETRECORD (x, p, coding_system) | |
142 #define CODING_SYSTEMP(x) RECORDP (x, coding_system) | |
143 #define CHECK_CODING_SYSTEM(x) CHECK_RECORD (x, coding_system) | |
144 #define CONCHECK_CODING_SYSTEM(x) CONCHECK_RECORD (x, coding_system) | |
145 | |
146 #define CODING_SYSTEM_NAME(codesys) ((codesys)->name) | |
147 #define CODING_SYSTEM_DOC_STRING(codesys) ((codesys)->doc_string) | |
148 #define CODING_SYSTEM_TYPE(codesys) ((codesys)->type) | |
149 #define CODING_SYSTEM_MNEMONIC(codesys) ((codesys)->mnemonic) | |
150 #define CODING_SYSTEM_POST_READ_CONVERSION(codesys) \ | |
151 ((codesys)->post_read_conversion) | |
152 #define CODING_SYSTEM_PRE_WRITE_CONVERSION(codesys) \ | |
153 ((codesys)->pre_write_conversion) | |
154 #define CODING_SYSTEM_EOL_TYPE(codesys) ((codesys)->eol_type) | |
155 #define CODING_SYSTEM_EOL_LF(codesys) ((codesys)->eol_lf) | |
156 #define CODING_SYSTEM_EOL_CRLF(codesys) ((codesys)->eol_crlf) | |
157 #define CODING_SYSTEM_EOL_CR(codesys) ((codesys)->eol_cr) | |
158 | |
159 #ifdef MULE | |
160 #define CODING_SYSTEM_ISO2022_INITIAL_CHARSET(codesys, g) \ | |
161 ((codesys)->iso2022.initial_charset[g]) | |
162 #define CODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT(codesys, g) \ | |
163 ((codesys)->iso2022.force_charset_on_output[g]) | |
164 #define CODING_SYSTEM_ISO2022_SHORT(codesys) ((codesys)->iso2022.shoort) | |
165 #define CODING_SYSTEM_ISO2022_NO_ASCII_EOL(codesys) \ | |
166 ((codesys)->iso2022.no_ascii_eol) | |
167 #define CODING_SYSTEM_ISO2022_NO_ASCII_CNTL(codesys) \ | |
168 ((codesys)->iso2022.no_ascii_cntl) | |
169 #define CODING_SYSTEM_ISO2022_SEVEN(codesys) ((codesys)->iso2022.seven) | |
170 #define CODING_SYSTEM_ISO2022_LOCK_SHIFT(codesys) \ | |
171 ((codesys)->iso2022.lock_shift) | |
172 #define CODING_SYSTEM_ISO2022_NO_ISO6429(codesys) \ | |
173 ((codesys)->iso2022.no_iso6429) | |
174 #define CODING_SYSTEM_ISO2022_ESCAPE_QUOTED(codesys) \ | |
175 ((codesys)->iso2022.escape_quoted) | |
176 #define CODING_SYSTEM_CCL_DECODE(codesys) ((codesys)->ccl.decode) | |
177 #define CODING_SYSTEM_CCL_ENCODE(codesys) ((codesys)->ccl.encode) | |
178 #endif /* MULE */ | |
179 | |
180 #define XCODING_SYSTEM_NAME(codesys) \ | |
181 CODING_SYSTEM_NAME (XCODING_SYSTEM (codesys)) | |
182 #define XCODING_SYSTEM_DOC_STRING(codesys) \ | |
183 CODING_SYSTEM_DOC_STRING (XCODING_SYSTEM (codesys)) | |
184 #define XCODING_SYSTEM_TYPE(codesys) \ | |
185 CODING_SYSTEM_TYPE (XCODING_SYSTEM (codesys)) | |
186 #define XCODING_SYSTEM_MNEMONIC(codesys) \ | |
187 CODING_SYSTEM_MNEMONIC (XCODING_SYSTEM (codesys)) | |
188 #define XCODING_SYSTEM_POST_READ_CONVERSION(codesys) \ | |
189 CODING_SYSTEM_POST_READ_CONVERSION (XCODING_SYSTEM (codesys)) | |
190 #define XCODING_SYSTEM_PRE_WRITE_CONVERSION(codesys) \ | |
191 CODING_SYSTEM_PRE_WRITE_CONVERSION (XCODING_SYSTEM (codesys)) | |
192 #define XCODING_SYSTEM_EOL_TYPE(codesys) \ | |
193 CODING_SYSTEM_EOL_TYPE (XCODING_SYSTEM (codesys)) | |
194 #define XCODING_SYSTEM_EOL_LF(codesys) \ | |
195 CODING_SYSTEM_EOL_LF (XCODING_SYSTEM (codesys)) | |
196 #define XCODING_SYSTEM_EOL_CRLF(codesys) \ | |
197 CODING_SYSTEM_EOL_CRLF (XCODING_SYSTEM (codesys)) | |
198 #define XCODING_SYSTEM_EOL_CR(codesys) \ | |
199 CODING_SYSTEM_EOL_CR (XCODING_SYSTEM (codesys)) | |
200 | |
201 #ifdef MULE | |
202 #define XCODING_SYSTEM_ISO2022_INITIAL_CHARSET(codesys, g) \ | |
203 CODING_SYSTEM_ISO2022_INITIAL_CHARSET (XCODING_SYSTEM (codesys), g) | |
204 #define XCODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT(codesys, g) \ | |
205 CODING_SYSTEM_ISO2022_FORCE_CHARSET_ON_OUTPUT (XCODING_SYSTEM (codesys), g) | |
206 #define XCODING_SYSTEM_ISO2022_SHORT(codesys) \ | |
207 CODING_SYSTEM_ISO2022_SHORT (XCODING_SYSTEM (codesys)) | |
208 #define XCODING_SYSTEM_ISO2022_NO_ASCII_EOL(codesys) \ | |
209 CODING_SYSTEM_ISO2022_NO_ASCII_EOL (XCODING_SYSTEM (codesys)) | |
210 #define XCODING_SYSTEM_ISO2022_NO_ASCII_CNTL(codesys) \ | |
211 CODING_SYSTEM_ISO2022_NO_ASCII_CNTL (XCODING_SYSTEM (codesys)) | |
212 #define XCODING_SYSTEM_ISO2022_SEVEN(codesys) \ | |
213 CODING_SYSTEM_ISO2022_SEVEN (XCODING_SYSTEM (codesys)) | |
214 #define XCODING_SYSTEM_ISO2022_LOCK_SHIFT(codesys) \ | |
215 CODING_SYSTEM_ISO2022_LOCK_SHIFT (XCODING_SYSTEM (codesys)) | |
216 #define XCODING_SYSTEM_ISO2022_NO_ISO6429(codesys) \ | |
217 CODING_SYSTEM_ISO2022_NO_ISO6429 (XCODING_SYSTEM (codesys)) | |
218 #define XCODING_SYSTEM_ISO2022_ESCAPE_QUOTED(codesys) \ | |
219 CODING_SYSTEM_ISO2022_ESCAPE_QUOTED (XCODING_SYSTEM (codesys)) | |
220 #define XCODING_SYSTEM_CCL_DECODE(codesys) \ | |
221 CODING_SYSTEM_CCL_DECODE (XCODING_SYSTEM (codesys)) | |
222 #define XCODING_SYSTEM_CCL_ENCODE(codesys) \ | |
223 CODING_SYSTEM_CCL_ENCODE (XCODING_SYSTEM (codesys)) | |
224 #endif /* MULE */ | |
225 | |
226 EXFUN (Fcoding_category_list, 0); | |
227 EXFUN (Fcoding_category_system, 1); | |
228 EXFUN (Fcoding_priority_list, 0); | |
229 EXFUN (Fcoding_system_charset, 2); | |
230 EXFUN (Fcoding_system_doc_string, 1); | |
231 EXFUN (Fcoding_system_list, 0); | |
232 EXFUN (Fcoding_system_name, 1); | |
233 EXFUN (Fcoding_system_p, 1); | |
234 EXFUN (Fcoding_system_property, 2); | |
235 EXFUN (Fcoding_system_type, 1); | |
236 EXFUN (Fcopy_coding_system, 2); | |
237 EXFUN (Fdecode_big5_char, 1); | |
238 EXFUN (Fdecode_coding_region, 4); | |
239 EXFUN (Fdecode_shift_jis_char, 1); | |
240 EXFUN (Fdetect_coding_region, 3); | |
241 EXFUN (Fencode_big5_char, 1); | |
242 EXFUN (Fencode_coding_region, 4); | |
243 EXFUN (Fencode_shift_jis_char, 1); | |
244 EXFUN (Ffind_coding_system, 1); | |
245 EXFUN (Fget_coding_system, 1); | |
246 EXFUN (Fmake_coding_system, 4); | |
247 EXFUN (Fset_coding_category_system, 2); | |
248 EXFUN (Fset_coding_priority_list, 1); | |
249 EXFUN (Fsubsidiary_coding_system, 2); | |
250 | |
251 extern Lisp_Object Qucs4, Qutf8; | |
252 extern Lisp_Object Qbig5, Qccl, Qcharset_g0; | |
253 extern Lisp_Object Qcharset_g1, Qcharset_g2, Qcharset_g3, Qcoding_system_error; | |
254 extern Lisp_Object Qcoding_systemp, Qcr, Qcrlf, Qctext, Qdecode, Qencode; | |
255 extern Lisp_Object Qeol_cr, Qeol_crlf, Qeol_lf, Qeol_type, Qescape_quoted; | |
256 extern Lisp_Object Qforce_g0_on_output, Qforce_g1_on_output; | |
257 extern Lisp_Object Qforce_g2_on_output, Qforce_g3_on_output; | |
258 extern Lisp_Object Qinput_charset_conversion, Qiso2022, Qlf, Qlock_shift; | |
259 extern Lisp_Object Qmnemonic, Qno_ascii_cntl, Qno_ascii_eol, Qno_conversion; | |
260 extern Lisp_Object Qraw_text; | |
261 extern Lisp_Object Qno_iso6429, Qoutput_charset_conversion; | |
262 extern Lisp_Object Qpost_read_conversion, Qpre_write_conversion, Qseven; | |
263 extern Lisp_Object Qshift_jis, Qshort, Vcoding_system_for_read; | |
264 extern Lisp_Object Vcoding_system_for_write, Vcoding_system_hash_table; | |
265 extern Lisp_Object Vfile_name_coding_system, Vkeyboard_coding_system; | |
266 extern Lisp_Object Vterminal_coding_system; | |
267 | |
268 /* Flags indicating current state while converting code. */ | |
269 | |
270 /* Used by everyone. */ | |
271 | |
272 #define CODING_STATE_END (1 << 0) /* If set, this is the last chunk of | |
273 data being processed. When this | |
274 is finished, output any necessary | |
275 terminating control characters, | |
276 escape sequences, etc. */ | |
277 #define CODING_STATE_CR (1 << 1) /* If set, we just saw a CR. */ | |
278 | |
279 | |
280 /* Used by Big 5 on output. */ | |
281 #ifdef MULE | |
282 #define CODING_STATE_BIG5_1 (1 << 2) /* If set, we just encountered | |
283 LEADING_BYTE_BIG5_1. */ | |
284 #define CODING_STATE_BIG5_2 (1 << 3) /* If set, we just encountered | |
285 LEADING_BYTE_BIG5_2. */ | |
286 | |
287 | |
288 /* Used by ISO2022 on input and output. */ | |
289 | |
290 #define CODING_STATE_R2L (1 << 4) /* If set, the current | |
291 directionality is right-to-left. | |
292 Otherwise, it's left-to-right. */ | |
293 | |
294 | |
295 /* Used by ISO2022 on input. */ | |
296 | |
297 #define CODING_STATE_ESCAPE (1 << 5) /* If set, we're currently parsing | |
298 an escape sequence and the upper | |
299 16 bits should be looked at to | |
300 indicate what partial escape | |
301 sequence we've seen so far. | |
302 Otherwise, we're running | |
303 through actual text. */ | |
304 #define CODING_STATE_SS2 (1 << 6) /* If set, G2 is invoked into GL, but | |
305 only for the next character. */ | |
306 #define CODING_STATE_SS3 (1 << 7) /* If set, G3 is invoked into GL, | |
307 but only for the next character. | |
308 If both CODING_STATE_SS2 and | |
309 CODING_STATE_SS3 are set, | |
310 CODING_STATE_SS2 overrides; but | |
311 this probably indicates an error | |
312 in the text encoding. */ | |
313 #ifdef ENABLE_COMPOSITE_CHARS | |
314 #define CODING_STATE_COMPOSITE (1 << 8) /* If set, we're currently processing | |
315 a composite character (i.e. a | |
316 character constructed by | |
317 overstriking two or more | |
318 characters). */ | |
319 #endif /* ENABLE_COMPOSITE_CHARS */ | |
320 | |
321 | |
322 /* CODING_STATE_ISO2022_LOCK is the mask of flags that remain on until | |
323 explicitly turned off when in the ISO2022 encoder/decoder. Other flags are | |
324 turned off at the end of processing each character or escape sequence. */ | |
325 #ifdef ENABLE_COMPOSITE_CHARS | |
326 # define CODING_STATE_ISO2022_LOCK \ | |
327 (CODING_STATE_END | CODING_STATE_COMPOSITE | CODING_STATE_R2L) | |
328 #else | |
329 # define CODING_STATE_ISO2022_LOCK (CODING_STATE_END | CODING_STATE_R2L) | |
330 #endif | |
331 | |
332 #define CODING_STATE_BIG5_LOCK CODING_STATE_END | |
333 | |
334 /* Flags indicating what we've seen so far when parsing an | |
335 ISO2022 escape sequence. */ | |
336 enum iso_esc_flag | |
337 { | |
338 /* Partial sequences */ | |
339 ISO_ESC_NOTHING, /* Nothing has been seen. */ | |
340 ISO_ESC, /* We've seen ESC. */ | |
341 ISO_ESC_2_4, /* We've seen ESC $. This indicates | |
342 that we're designating a multi-byte, rather | |
343 than a single-byte, character set. */ | |
344 ISO_ESC_2_8, /* We've seen ESC 0x28, i.e. ESC (. | |
345 This means designate a 94-character | |
346 character set into G0. */ | |
347 ISO_ESC_2_9, /* We've seen ESC 0x29 -- designate a | |
348 94-character character set into G1. */ | |
349 ISO_ESC_2_10, /* We've seen ESC 0x2A. */ | |
350 ISO_ESC_2_11, /* We've seen ESC 0x2B. */ | |
351 ISO_ESC_2_12, /* We've seen ESC 0x2C -- designate a | |
352 96-character character set into G0. | |
353 (This is not ISO2022-standard. | |
354 The following 96-character | |
355 control sequences are standard, | |
356 though.) */ | |
357 ISO_ESC_2_13, /* We've seen ESC 0x2D -- designate a | |
358 96-character character set into G1. | |
359 */ | |
360 ISO_ESC_2_14, /* We've seen ESC 0x2E. */ | |
361 ISO_ESC_2_15, /* We've seen ESC 0x2F. */ | |
362 ISO_ESC_2_4_8, /* We've seen ESC $ 0x28 -- designate | |
363 a 94^N character set into G0. */ | |
364 ISO_ESC_2_4_9, /* We've seen ESC $ 0x29. */ | |
365 ISO_ESC_2_4_10, /* We've seen ESC $ 0x2A. */ | |
366 ISO_ESC_2_4_11, /* We've seen ESC $ 0x2B. */ | |
367 ISO_ESC_2_4_12, /* We've seen ESC $ 0x2C. */ | |
368 ISO_ESC_2_4_13, /* We've seen ESC $ 0x2D. */ | |
369 ISO_ESC_2_4_14, /* We've seen ESC $ 0x2E. */ | |
370 ISO_ESC_2_4_15, /* We've seen ESC $ 0x2F. */ | |
371 ISO_ESC_5_11, /* We've seen ESC [ or 0x9B. This | |
372 starts a directionality-control | |
373 sequence. The next character | |
374 must be 0, 1, 2, or ]. */ | |
375 ISO_ESC_5_11_0, /* We've seen 0x9B 0. The next character must be ]. */ | |
376 ISO_ESC_5_11_1, /* We've seen 0x9B 1. The next character must be ]. */ | |
377 ISO_ESC_5_11_2, /* We've seen 0x9B 2. The next character must be ]. */ | |
378 | |
379 /* Full sequences. */ | |
380 #ifdef ENABLE_COMPOSITE_CHARS | |
381 ISO_ESC_START_COMPOSITE, /* Private usage for START COMPOSING */ | |
382 ISO_ESC_END_COMPOSITE, /* Private usage for END COMPOSING */ | |
383 #endif /* ENABLE_COMPOSITE_CHARS */ | |
384 ISO_ESC_SINGLE_SHIFT, /* We've seen a complete single-shift sequence. */ | |
385 ISO_ESC_LOCKING_SHIFT,/* We've seen a complete locking-shift sequence. */ | |
386 ISO_ESC_DESIGNATE, /* We've seen a complete designation sequence. */ | |
387 ISO_ESC_DIRECTIONALITY,/* We've seen a complete ISO6429 directionality | |
388 sequence. */ | |
389 ISO_ESC_LITERAL /* We've seen a literal character ala | |
390 escape-quoting. */ | |
391 }; | |
392 | |
393 /* Macros to define code of control characters for ISO2022's functions. */ | |
394 /* code */ /* function */ | |
395 #define ISO_CODE_LF 0x0A /* line-feed */ | |
396 #define ISO_CODE_CR 0x0D /* carriage-return */ | |
397 #define ISO_CODE_SO 0x0E /* shift-out */ | |
398 #define ISO_CODE_SI 0x0F /* shift-in */ | |
399 #define ISO_CODE_ESC 0x1B /* escape */ | |
400 #define ISO_CODE_DEL 0x7F /* delete */ | |
401 #define ISO_CODE_SS2 0x8E /* single-shift-2 */ | |
402 #define ISO_CODE_SS3 0x8F /* single-shift-3 */ | |
403 #define ISO_CODE_CSI 0x9B /* control-sequence-introduce */ | |
404 #endif /* MULE */ | |
405 | |
406 /* For detecting the encoding of text */ | |
407 enum coding_category_type | |
408 { | |
409 #ifdef MULE | |
410 CODING_CATEGORY_SHIFT_JIS, | |
411 CODING_CATEGORY_ISO_7, /* ISO2022 system using only seven-bit bytes, | |
412 no locking shift */ | |
413 CODING_CATEGORY_ISO_8_DESIGNATE, /* ISO2022 system using eight-bit bytes, | |
414 no locking shift, no single shift, | |
415 using designation to switch charsets */ | |
416 CODING_CATEGORY_ISO_8_1, /* ISO2022 system using eight-bit bytes, | |
417 no locking shift, no designation sequences, | |
418 one-dimension characters in the upper half. */ | |
419 CODING_CATEGORY_ISO_8_2, /* ISO2022 system using eight-bit bytes, | |
420 no locking shift, no designation sequences, | |
421 two-dimension characters in the upper half. */ | |
422 CODING_CATEGORY_ISO_LOCK_SHIFT, /* ISO2022 system using locking shift */ | |
423 CODING_CATEGORY_BIG5, | |
424 CODING_CATEGORY_UCS4, | |
425 CODING_CATEGORY_UTF8, | |
426 #endif /* MULE */ | |
427 CODING_CATEGORY_NO_CONVERSION | |
428 }; | |
429 | |
430 #define CODING_CATEGORY_LAST CODING_CATEGORY_NO_CONVERSION | |
431 | |
432 #ifdef MULE | |
433 #define CODING_CATEGORY_SHIFT_JIS_MASK \ | |
434 (1 << CODING_CATEGORY_SHIFT_JIS) | |
435 #define CODING_CATEGORY_ISO_7_MASK \ | |
436 (1 << CODING_CATEGORY_ISO_7) | |
437 #define CODING_CATEGORY_ISO_8_DESIGNATE_MASK \ | |
438 (1 << CODING_CATEGORY_ISO_8_DESIGNATE) | |
439 #define CODING_CATEGORY_ISO_8_1_MASK \ | |
440 (1 << CODING_CATEGORY_ISO_8_1) | |
441 #define CODING_CATEGORY_ISO_8_2_MASK \ | |
442 (1 << CODING_CATEGORY_ISO_8_2) | |
443 #define CODING_CATEGORY_ISO_LOCK_SHIFT_MASK \ | |
444 (1 << CODING_CATEGORY_ISO_LOCK_SHIFT) | |
445 #define CODING_CATEGORY_BIG5_MASK \ | |
446 (1 << CODING_CATEGORY_BIG5) | |
447 #define CODING_CATEGORY_UCS4_MASK \ | |
448 (1 << CODING_CATEGORY_UCS4) | |
449 #define CODING_CATEGORY_UTF8_MASK \ | |
450 (1 << CODING_CATEGORY_UTF8) | |
451 #endif | |
452 #define CODING_CATEGORY_NO_CONVERSION_MASK \ | |
453 (1 << CODING_CATEGORY_NO_CONVERSION) | |
454 #define CODING_CATEGORY_NOT_FINISHED_MASK \ | |
455 (1 << 30) | |
456 | |
457 #ifdef MULE | |
458 /* Convert shift-JIS code (sj1, sj2) into internal string | |
459 representation (c1, c2). (The leading byte is assumed.) */ | |
460 | |
461 #define DECODE_SJIS(sj1, sj2, c1, c2) \ | |
462 do { \ | |
463 int I1 = sj1, I2 = sj2; \ | |
464 if (I2 >= 0x9f) \ | |
465 c1 = (I1 << 1) - ((I1 >= 0xe0) ? 0xe0 : 0x60), \ | |
466 c2 = I2 + 2; \ | |
467 else \ | |
468 c1 = (I1 << 1) - ((I1 >= 0xe0) ? 0xe1 : 0x61), \ | |
469 c2 = I2 + ((I2 >= 0x7f) ? 0x60 : 0x61); \ | |
470 } while (0) | |
471 | |
472 /* Convert the internal string representation of a Shift-JIS character | |
473 (c1, c2) into Shift-JIS code (sj1, sj2). The leading byte is | |
474 assumed. */ | |
475 | |
476 #define ENCODE_SJIS(c1, c2, sj1, sj2) \ | |
477 do { \ | |
478 int I1 = c1, I2 = c2; \ | |
479 if (I1 & 1) \ | |
480 sj1 = (I1 >> 1) + ((I1 < 0xdf) ? 0x31 : 0x71), \ | |
481 sj2 = I2 - ((I2 >= 0xe0) ? 0x60 : 0x61); \ | |
482 else \ | |
483 sj1 = (I1 >> 1) + ((I1 < 0xdf) ? 0x30 : 0x70), \ | |
484 sj2 = I2 - 2; \ | |
485 } while (0) | |
486 #endif /* MULE */ | |
487 | |
488 Lisp_Object make_decoding_input_stream (Lstream *stream, | |
489 Lisp_Object codesys); | |
490 Lisp_Object make_encoding_input_stream (Lstream *stream, | |
491 Lisp_Object codesys); | |
492 Lisp_Object make_decoding_output_stream (Lstream *stream, | |
493 Lisp_Object codesys); | |
494 Lisp_Object make_encoding_output_stream (Lstream *stream, | |
495 Lisp_Object codesys); | |
496 Lisp_Object decoding_stream_coding_system (Lstream *stream); | |
497 Lisp_Object encoding_stream_coding_system (Lstream *stream); | |
498 void set_decoding_stream_coding_system (Lstream *stream, | |
499 Lisp_Object codesys); | |
500 void set_encoding_stream_coding_system (Lstream *stream, | |
501 Lisp_Object codesys); | |
502 void determine_real_coding_system (Lstream *stream, Lisp_Object *codesys_in_out, | |
503 enum eol_type *eol_type_in_out); | |
504 | |
505 | |
506 #ifndef MULE | |
507 #define MIN_LEADING_BYTE 0x80 | |
508 /* These need special treatment in a string and/or character */ | |
509 #ifdef ENABLE_COMPOSITE_CHARS | |
510 #define LEADING_BYTE_COMPOSITE 0x80 /* for a composite character */ | |
511 #endif | |
512 #define LEADING_BYTE_CONTROL_1 0x8F /* represent normal 80-9F */ | |
513 #define LEADING_BYTE_LATIN_ISO8859_1 0x81 /* Right half of ISO 8859-1 */ | |
514 #define BYTE_C1_P(c) ((unsigned int) ((unsigned int) (c) - 0x80) < 0x20) | |
515 #define BUFBYTE_FIRST_BYTE_P(c) ((c) < 0xA0) | |
516 #define BUFBYTE_LEADING_BYTE_P(c) BYTE_C1_P (c) | |
517 #endif /* not MULE */ | |
518 #endif /* _XEMACS_MULE_CODING_H_ */ | |
519 |