Mercurial > hg > xemacs-beta
comparison src/file-coding.c @ 424:11054d720c21 r21-2-20
Import from CVS: tag r21-2-20
author | cvs |
---|---|
date | Mon, 13 Aug 2007 11:26:11 +0200 |
parents | 95016f13131a |
children |
comparison
equal
deleted
inserted
replaced
423:28d9c139be4c | 424:11054d720c21 |
---|---|
34 #include "mule-ccl.h" | 34 #include "mule-ccl.h" |
35 #include "chartab.h" | 35 #include "chartab.h" |
36 #endif | 36 #endif |
37 #include "file-coding.h" | 37 #include "file-coding.h" |
38 | 38 |
39 Lisp_Object Qbuffer_file_coding_system, Qcoding_system_error; | 39 Lisp_Object Qcoding_system_error; |
40 | 40 |
41 Lisp_Object Vkeyboard_coding_system; | 41 Lisp_Object Vkeyboard_coding_system; |
42 Lisp_Object Vterminal_coding_system; | 42 Lisp_Object Vterminal_coding_system; |
43 Lisp_Object Vcoding_system_for_read; | 43 Lisp_Object Vcoding_system_for_read; |
44 Lisp_Object Vcoding_system_for_write; | 44 Lisp_Object Vcoding_system_for_write; |
45 Lisp_Object Vfile_name_coding_system; | 45 Lisp_Object Vfile_name_coding_system; |
46 | 46 |
47 /* Table of symbols identifying each coding category. */ | 47 /* Table of symbols identifying each coding category. */ |
48 Lisp_Object coding_category_symbol[CODING_CATEGORY_LAST + 1]; | 48 Lisp_Object coding_category_symbol[CODING_CATEGORY_LAST + 1]; |
49 | 49 |
50 /* Coding system currently associated with each coding category. */ | 50 |
51 Lisp_Object coding_category_system[CODING_CATEGORY_LAST + 1]; | 51 |
52 | 52 struct file_coding_dump { |
53 /* Table of all coding categories in decreasing order of priority. | 53 /* Coding system currently associated with each coding category. */ |
54 This describes a permutation of the possible coding categories. */ | 54 Lisp_Object coding_category_system[CODING_CATEGORY_LAST + 1]; |
55 int coding_category_by_priority[CODING_CATEGORY_LAST + 1]; | 55 |
56 | 56 /* Table of all coding categories in decreasing order of priority. |
57 Lisp_Object Qcoding_system_p; | 57 This describes a permutation of the possible coding categories. */ |
58 int coding_category_by_priority[CODING_CATEGORY_LAST + 1]; | |
59 | |
60 Lisp_Object ucs_to_mule_table[65536]; | |
61 } *fcd; | |
62 | |
63 static const struct lrecord_description fcd_description_1[] = { | |
64 { XD_LISP_OBJECT, offsetof(struct file_coding_dump, coding_category_system), CODING_CATEGORY_LAST + 1 }, | |
65 { XD_LISP_OBJECT, offsetof(struct file_coding_dump, ucs_to_mule_table), 65536 }, | |
66 { XD_END } | |
67 }; | |
68 | |
69 static const struct struct_description fcd_description = { | |
70 sizeof(struct file_coding_dump), | |
71 fcd_description_1 | |
72 }; | |
73 | |
74 Lisp_Object mule_to_ucs_table; | |
75 | |
76 Lisp_Object Qcoding_systemp; | |
58 | 77 |
59 Lisp_Object Qraw_text, Qno_conversion, Qccl, Qiso2022; | 78 Lisp_Object Qraw_text, Qno_conversion, Qccl, Qiso2022; |
60 /* Qinternal in general.c */ | 79 /* Qinternal in general.c */ |
61 | 80 |
62 Lisp_Object Qmnemonic, Qeol_type; | 81 Lisp_Object Qmnemonic, Qeol_type; |
225 typedef struct | 244 typedef struct |
226 { | 245 { |
227 Dynarr_declare (codesys_prop); | 246 Dynarr_declare (codesys_prop); |
228 } codesys_prop_dynarr; | 247 } codesys_prop_dynarr; |
229 | 248 |
249 static const struct lrecord_description codesys_prop_description_1[] = { | |
250 { XD_LISP_OBJECT, offsetof(codesys_prop, sym), 1 }, | |
251 { XD_END } | |
252 }; | |
253 | |
254 static const struct struct_description codesys_prop_description = { | |
255 sizeof(codesys_prop), | |
256 codesys_prop_description_1 | |
257 }; | |
258 | |
259 static const struct lrecord_description codesys_prop_dynarr_description_1[] = { | |
260 XD_DYNARR_DESC(codesys_prop_dynarr, &codesys_prop_description), | |
261 { XD_END } | |
262 }; | |
263 | |
264 static const struct struct_description codesys_prop_dynarr_description = { | |
265 sizeof(codesys_prop_dynarr), | |
266 codesys_prop_dynarr_description_1 | |
267 }; | |
268 | |
230 codesys_prop_dynarr *the_codesys_prop_dynarr; | 269 codesys_prop_dynarr *the_codesys_prop_dynarr; |
231 | 270 |
232 enum codesys_prop_enum | 271 enum codesys_prop_enum |
233 { | 272 { |
234 CODESYS_PROP_ALL_OK, | 273 CODESYS_PROP_ALL_OK, |
239 | 278 |
240 /************************************************************************/ | 279 /************************************************************************/ |
241 /* Coding system functions */ | 280 /* Coding system functions */ |
242 /************************************************************************/ | 281 /************************************************************************/ |
243 | 282 |
244 static Lisp_Object mark_coding_system (Lisp_Object, void (*) (Lisp_Object)); | 283 static Lisp_Object mark_coding_system (Lisp_Object); |
245 static void print_coding_system (Lisp_Object, Lisp_Object, int); | 284 static void print_coding_system (Lisp_Object, Lisp_Object, int); |
246 static void finalize_coding_system (void *header, int for_disksave); | 285 static void finalize_coding_system (void *header, int for_disksave); |
247 | 286 |
248 #ifdef MULE | 287 #ifdef MULE |
249 static const struct lrecord_description ccs_description_1[] = { | 288 static const struct lrecord_description ccs_description_1[] = { |
285 finalize_coding_system, | 324 finalize_coding_system, |
286 0, 0, coding_system_description, | 325 0, 0, coding_system_description, |
287 struct Lisp_Coding_System); | 326 struct Lisp_Coding_System); |
288 | 327 |
289 static Lisp_Object | 328 static Lisp_Object |
290 mark_coding_system (Lisp_Object obj, void (*markobj) (Lisp_Object)) | 329 mark_coding_system (Lisp_Object obj) |
291 { | 330 { |
292 Lisp_Coding_System *codesys = XCODING_SYSTEM (obj); | 331 Lisp_Coding_System *codesys = XCODING_SYSTEM (obj); |
293 | 332 |
294 markobj (CODING_SYSTEM_NAME (codesys)); | 333 mark_object (CODING_SYSTEM_NAME (codesys)); |
295 markobj (CODING_SYSTEM_DOC_STRING (codesys)); | 334 mark_object (CODING_SYSTEM_DOC_STRING (codesys)); |
296 markobj (CODING_SYSTEM_MNEMONIC (codesys)); | 335 mark_object (CODING_SYSTEM_MNEMONIC (codesys)); |
297 markobj (CODING_SYSTEM_EOL_LF (codesys)); | 336 mark_object (CODING_SYSTEM_EOL_LF (codesys)); |
298 markobj (CODING_SYSTEM_EOL_CRLF (codesys)); | 337 mark_object (CODING_SYSTEM_EOL_CRLF (codesys)); |
299 markobj (CODING_SYSTEM_EOL_CR (codesys)); | 338 mark_object (CODING_SYSTEM_EOL_CR (codesys)); |
300 | 339 |
301 switch (CODING_SYSTEM_TYPE (codesys)) | 340 switch (CODING_SYSTEM_TYPE (codesys)) |
302 { | 341 { |
303 #ifdef MULE | 342 #ifdef MULE |
304 int i; | 343 int i; |
305 case CODESYS_ISO2022: | 344 case CODESYS_ISO2022: |
306 for (i = 0; i < 4; i++) | 345 for (i = 0; i < 4; i++) |
307 markobj (CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i)); | 346 mark_object (CODING_SYSTEM_ISO2022_INITIAL_CHARSET (codesys, i)); |
308 if (codesys->iso2022.input_conv) | 347 if (codesys->iso2022.input_conv) |
309 { | 348 { |
310 for (i = 0; i < Dynarr_length (codesys->iso2022.input_conv); i++) | 349 for (i = 0; i < Dynarr_length (codesys->iso2022.input_conv); i++) |
311 { | 350 { |
312 struct charset_conversion_spec *ccs = | 351 struct charset_conversion_spec *ccs = |
313 Dynarr_atp (codesys->iso2022.input_conv, i); | 352 Dynarr_atp (codesys->iso2022.input_conv, i); |
314 markobj (ccs->from_charset); | 353 mark_object (ccs->from_charset); |
315 markobj (ccs->to_charset); | 354 mark_object (ccs->to_charset); |
316 } | 355 } |
317 } | 356 } |
318 if (codesys->iso2022.output_conv) | 357 if (codesys->iso2022.output_conv) |
319 { | 358 { |
320 for (i = 0; i < Dynarr_length (codesys->iso2022.output_conv); i++) | 359 for (i = 0; i < Dynarr_length (codesys->iso2022.output_conv); i++) |
321 { | 360 { |
322 struct charset_conversion_spec *ccs = | 361 struct charset_conversion_spec *ccs = |
323 Dynarr_atp (codesys->iso2022.output_conv, i); | 362 Dynarr_atp (codesys->iso2022.output_conv, i); |
324 markobj (ccs->from_charset); | 363 mark_object (ccs->from_charset); |
325 markobj (ccs->to_charset); | 364 mark_object (ccs->to_charset); |
326 } | 365 } |
327 } | 366 } |
328 break; | 367 break; |
329 | 368 |
330 case CODESYS_CCL: | 369 case CODESYS_CCL: |
331 markobj (CODING_SYSTEM_CCL_DECODE (codesys)); | 370 mark_object (CODING_SYSTEM_CCL_DECODE (codesys)); |
332 markobj (CODING_SYSTEM_CCL_ENCODE (codesys)); | 371 mark_object (CODING_SYSTEM_CCL_ENCODE (codesys)); |
333 break; | 372 break; |
334 #endif /* MULE */ | 373 #endif /* MULE */ |
335 default: | 374 default: |
336 break; | 375 break; |
337 } | 376 } |
338 | 377 |
339 markobj (CODING_SYSTEM_PRE_WRITE_CONVERSION (codesys)); | 378 mark_object (CODING_SYSTEM_PRE_WRITE_CONVERSION (codesys)); |
340 return CODING_SYSTEM_POST_READ_CONVERSION (codesys); | 379 return CODING_SYSTEM_POST_READ_CONVERSION (codesys); |
341 } | 380 } |
342 | 381 |
343 static void | 382 static void |
344 print_coding_system (Lisp_Object obj, Lisp_Object printcharfun, | 383 print_coding_system (Lisp_Object obj, Lisp_Object printcharfun, |
1299 /* Now go through the existing categories by priority to retrieve | 1338 /* Now go through the existing categories by priority to retrieve |
1300 the categories not yet specified and preserve their priority | 1339 the categories not yet specified and preserve their priority |
1301 order. */ | 1340 order. */ |
1302 for (j = 0; j <= CODING_CATEGORY_LAST; j++) | 1341 for (j = 0; j <= CODING_CATEGORY_LAST; j++) |
1303 { | 1342 { |
1304 int cat = coding_category_by_priority[j]; | 1343 int cat = fcd->coding_category_by_priority[j]; |
1305 if (category_to_priority[cat] < 0) | 1344 if (category_to_priority[cat] < 0) |
1306 category_to_priority[cat] = i++; | 1345 category_to_priority[cat] = i++; |
1307 } | 1346 } |
1308 | 1347 |
1309 /* Now we need to construct the inverse of the mapping we just | 1348 /* Now we need to construct the inverse of the mapping we just |
1310 constructed. */ | 1349 constructed. */ |
1311 | 1350 |
1312 for (i = 0; i <= CODING_CATEGORY_LAST; i++) | 1351 for (i = 0; i <= CODING_CATEGORY_LAST; i++) |
1313 coding_category_by_priority[category_to_priority[i]] = i; | 1352 fcd->coding_category_by_priority[category_to_priority[i]] = i; |
1314 | 1353 |
1315 /* Phew! That was confusing. */ | 1354 /* Phew! That was confusing. */ |
1316 return Qnil; | 1355 return Qnil; |
1317 } | 1356 } |
1318 | 1357 |
1323 { | 1362 { |
1324 int i; | 1363 int i; |
1325 Lisp_Object list = Qnil; | 1364 Lisp_Object list = Qnil; |
1326 | 1365 |
1327 for (i = CODING_CATEGORY_LAST; i >= 0; i--) | 1366 for (i = CODING_CATEGORY_LAST; i >= 0; i--) |
1328 list = Fcons (coding_category_symbol[coding_category_by_priority[i]], | 1367 list = Fcons (coding_category_symbol[fcd->coding_category_by_priority[i]], |
1329 list); | 1368 list); |
1330 return list; | 1369 return list; |
1331 } | 1370 } |
1332 | 1371 |
1333 DEFUN ("set-coding-category-system", Fset_coding_category_system, 2, 2, 0, /* | 1372 DEFUN ("set-coding-category-system", Fset_coding_category_system, 2, 2, 0, /* |
1336 (coding_category, coding_system)) | 1375 (coding_category, coding_system)) |
1337 { | 1376 { |
1338 int cat = decode_coding_category (coding_category); | 1377 int cat = decode_coding_category (coding_category); |
1339 | 1378 |
1340 coding_system = Fget_coding_system (coding_system); | 1379 coding_system = Fget_coding_system (coding_system); |
1341 coding_category_system[cat] = coding_system; | 1380 fcd->coding_category_system[cat] = coding_system; |
1342 return Qnil; | 1381 return Qnil; |
1343 } | 1382 } |
1344 | 1383 |
1345 DEFUN ("coding-category-system", Fcoding_category_system, 1, 1, 0, /* | 1384 DEFUN ("coding-category-system", Fcoding_category_system, 1, 1, 0, /* |
1346 Return the coding system associated with a coding category. | 1385 Return the coding system associated with a coding category. |
1347 */ | 1386 */ |
1348 (coding_category)) | 1387 (coding_category)) |
1349 { | 1388 { |
1350 int cat = decode_coding_category (coding_category); | 1389 int cat = decode_coding_category (coding_category); |
1351 Lisp_Object sys = coding_category_system[cat]; | 1390 Lisp_Object sys = fcd->coding_category_system[cat]; |
1352 | 1391 |
1353 if (!NILP (sys)) | 1392 if (!NILP (sys)) |
1354 return XCODING_SYSTEM_NAME (sys); | 1393 return XCODING_SYSTEM_NAME (sys); |
1355 return Qnil; | 1394 return Qnil; |
1356 } | 1395 } |
1580 #endif | 1619 #endif |
1581 /* Look through the coding categories by priority and find | 1620 /* Look through the coding categories by priority and find |
1582 the first one that is allowed. */ | 1621 the first one that is allowed. */ |
1583 for (i = 0; i <= CODING_CATEGORY_LAST; i++) | 1622 for (i = 0; i <= CODING_CATEGORY_LAST; i++) |
1584 { | 1623 { |
1585 cat = coding_category_by_priority[i]; | 1624 cat = fcd->coding_category_by_priority[i]; |
1586 if ((mask & (1 << cat)) && | 1625 if ((mask & (1 << cat)) && |
1587 !NILP (coding_category_system[cat])) | 1626 !NILP (fcd->coding_category_system[cat])) |
1588 break; | 1627 break; |
1589 } | 1628 } |
1590 if (cat >= 0) | 1629 if (cat >= 0) |
1591 return coding_category_system[cat]; | 1630 return fcd->coding_category_system[cat]; |
1592 else | 1631 else |
1593 return Fget_coding_system (Qraw_text); | 1632 return Fget_coding_system (Qraw_text); |
1594 } | 1633 } |
1595 } | 1634 } |
1596 | 1635 |
1618 | 1657 |
1619 /* If autodetection is called for, do it now. */ | 1658 /* If autodetection is called for, do it now. */ |
1620 if (XCODING_SYSTEM_TYPE (*codesys_in_out) == CODESYS_AUTODETECT || | 1659 if (XCODING_SYSTEM_TYPE (*codesys_in_out) == CODESYS_AUTODETECT || |
1621 *eol_type_in_out == EOL_AUTODETECT) | 1660 *eol_type_in_out == EOL_AUTODETECT) |
1622 { | 1661 { |
1623 | 1662 unsigned char random_buffer[4096]; |
1624 while (1) | 1663 int nread; |
1625 { | 1664 Lisp_Object coding_system = Qnil; |
1626 unsigned char random_buffer[4096]; | 1665 |
1627 int nread; | 1666 nread = Lstream_read (stream, random_buffer, sizeof (random_buffer)); |
1628 | 1667 if (nread) |
1629 nread = Lstream_read (stream, random_buffer, sizeof (random_buffer)); | 1668 { |
1630 if (!nread) | 1669 unsigned char *cp = random_buffer; |
1631 break; | 1670 |
1632 if (detect_coding_type (&decst, random_buffer, nread, | 1671 while (cp < random_buffer + nread) |
1633 XCODING_SYSTEM_TYPE (*codesys_in_out) != | 1672 { |
1634 CODESYS_AUTODETECT)) | 1673 if ((*cp++ == 'c') && (cp < random_buffer + nread) && |
1635 break; | 1674 (*cp++ == 'o') && (cp < random_buffer + nread) && |
1636 } | 1675 (*cp++ == 'd') && (cp < random_buffer + nread) && |
1637 | 1676 (*cp++ == 'i') && (cp < random_buffer + nread) && |
1677 (*cp++ == 'n') && (cp < random_buffer + nread) && | |
1678 (*cp++ == 'g') && (cp < random_buffer + nread) && | |
1679 (*cp++ == ':') && (cp < random_buffer + nread)) | |
1680 { | |
1681 unsigned char coding_system_name[4096 - 6]; | |
1682 unsigned char *np = coding_system_name; | |
1683 | |
1684 while ( (cp < random_buffer + nread) | |
1685 && ((*cp == ' ') || (*cp == '\t')) ) | |
1686 { | |
1687 cp++; | |
1688 } | |
1689 while ( (cp < random_buffer + nread) && | |
1690 (*cp != ' ') && (*cp != '\t') && (*cp != ';') ) | |
1691 { | |
1692 *np++ = *cp++; | |
1693 } | |
1694 *np = 0; | |
1695 coding_system | |
1696 = Ffind_coding_system (intern ((char *) coding_system_name)); | |
1697 break; | |
1698 } | |
1699 } | |
1700 if (EQ(coding_system, Qnil)) | |
1701 do{ | |
1702 if (detect_coding_type (&decst, random_buffer, nread, | |
1703 XCODING_SYSTEM_TYPE (*codesys_in_out) | |
1704 != CODESYS_AUTODETECT)) | |
1705 break; | |
1706 nread = Lstream_read (stream, | |
1707 random_buffer, sizeof (random_buffer)); | |
1708 if (!nread) | |
1709 break; | |
1710 } while(1); | |
1711 } | |
1638 *eol_type_in_out = decst.eol_type; | 1712 *eol_type_in_out = decst.eol_type; |
1639 if (XCODING_SYSTEM_TYPE (*codesys_in_out) == CODESYS_AUTODETECT) | 1713 if (XCODING_SYSTEM_TYPE (*codesys_in_out) == CODESYS_AUTODETECT) |
1640 *codesys_in_out = coding_system_from_mask (decst.mask); | 1714 { |
1641 } | 1715 if (EQ(coding_system, Qnil)) |
1642 | 1716 *codesys_in_out = coding_system_from_mask (decst.mask); |
1717 else | |
1718 *codesys_in_out = coding_system; | |
1719 } | |
1720 } | |
1643 /* If we absolutely can't determine the EOL type, just assume LF. */ | 1721 /* If we absolutely can't determine the EOL type, just assume LF. */ |
1644 if (*eol_type_in_out == EOL_AUTODETECT) | 1722 if (*eol_type_in_out == EOL_AUTODETECT) |
1645 *eol_type_in_out = EOL_LF; | 1723 *eol_type_in_out = EOL_LF; |
1646 | 1724 |
1647 Lstream_rewind (stream); | 1725 Lstream_rewind (stream); |
1695 #ifdef MULE | 1773 #ifdef MULE |
1696 decst.mask = postprocess_iso2022_mask (decst.mask); | 1774 decst.mask = postprocess_iso2022_mask (decst.mask); |
1697 #endif | 1775 #endif |
1698 for (i = CODING_CATEGORY_LAST; i >= 0; i--) | 1776 for (i = CODING_CATEGORY_LAST; i >= 0; i--) |
1699 { | 1777 { |
1700 int sys = coding_category_by_priority[i]; | 1778 int sys = fcd->coding_category_by_priority[i]; |
1701 if (decst.mask & (1 << sys)) | 1779 if (decst.mask & (1 << sys)) |
1702 { | 1780 { |
1703 Lisp_Object codesys = coding_category_system[sys]; | 1781 Lisp_Object codesys = fcd->coding_category_system[sys]; |
1704 if (!NILP (codesys)) | 1782 if (!NILP (codesys)) |
1705 codesys = subsidiary_coding_system (codesys, decst.eol_type); | 1783 codesys = subsidiary_coding_system (codesys, decst.eol_type); |
1706 val = Fcons (codesys, val); | 1784 val = Fcons (codesys, val); |
1707 } | 1785 } |
1708 } | 1786 } |
1834 struct iso2022_decoder iso2022; | 1912 struct iso2022_decoder iso2022; |
1835 | 1913 |
1836 /* Additional information (the state of the running CCL program) | 1914 /* Additional information (the state of the running CCL program) |
1837 used by the CCL decoder. */ | 1915 used by the CCL decoder. */ |
1838 struct ccl_program ccl; | 1916 struct ccl_program ccl; |
1917 | |
1918 /* counter for UTF-8 or UCS-4 */ | |
1919 unsigned char counter; | |
1839 #endif | 1920 #endif |
1840 struct detection_state decst; | 1921 struct detection_state decst; |
1841 }; | 1922 }; |
1842 | 1923 |
1843 static int decoding_reader (Lstream *stream, unsigned char *data, size_t size); | 1924 static int decoding_reader (Lstream *stream, unsigned char *data, size_t size); |
1845 static int decoding_rewinder (Lstream *stream); | 1926 static int decoding_rewinder (Lstream *stream); |
1846 static int decoding_seekable_p (Lstream *stream); | 1927 static int decoding_seekable_p (Lstream *stream); |
1847 static int decoding_flusher (Lstream *stream); | 1928 static int decoding_flusher (Lstream *stream); |
1848 static int decoding_closer (Lstream *stream); | 1929 static int decoding_closer (Lstream *stream); |
1849 | 1930 |
1850 static Lisp_Object decoding_marker (Lisp_Object stream, | 1931 static Lisp_Object decoding_marker (Lisp_Object stream); |
1851 void (*markobj) (Lisp_Object)); | |
1852 | 1932 |
1853 DEFINE_LSTREAM_IMPLEMENTATION ("decoding", lstream_decoding, | 1933 DEFINE_LSTREAM_IMPLEMENTATION ("decoding", lstream_decoding, |
1854 sizeof (struct decoding_stream)); | 1934 sizeof (struct decoding_stream)); |
1855 | 1935 |
1856 static Lisp_Object | 1936 static Lisp_Object |
1857 decoding_marker (Lisp_Object stream, void (*markobj) (Lisp_Object)) | 1937 decoding_marker (Lisp_Object stream) |
1858 { | 1938 { |
1859 Lstream *str = DECODING_STREAM_DATA (XLSTREAM (stream))->other_end; | 1939 Lstream *str = DECODING_STREAM_DATA (XLSTREAM (stream))->other_end; |
1860 Lisp_Object str_obj; | 1940 Lisp_Object str_obj; |
1861 | 1941 |
1862 /* We do not need to mark the coding systems or charsets stored | 1942 /* We do not need to mark the coding systems or charsets stored |
1863 within the stream because they are stored in a global list | 1943 within the stream because they are stored in a global list |
1864 and automatically marked. */ | 1944 and automatically marked. */ |
1865 | 1945 |
1866 XSETLSTREAM (str_obj, str); | 1946 XSETLSTREAM (str_obj, str); |
1867 markobj (str_obj); | 1947 mark_object (str_obj); |
1868 if (str->imp->marker) | 1948 if (str->imp->marker) |
1869 return (str->imp->marker) (str_obj, markobj); | 1949 return (str->imp->marker) (str_obj); |
1870 else | 1950 else |
1871 return Qnil; | 1951 return Qnil; |
1872 } | 1952 } |
1873 | 1953 |
1874 /* Read SIZE bytes of data and store it into DATA. We are a decoding stream | 1954 /* Read SIZE bytes of data and store it into DATA. We are a decoding stream |
1968 } | 2048 } |
1969 else if (CODING_SYSTEM_TYPE (str->codesys) == CODESYS_CCL) | 2049 else if (CODING_SYSTEM_TYPE (str->codesys) == CODESYS_CCL) |
1970 { | 2050 { |
1971 setup_ccl_program (&str->ccl, CODING_SYSTEM_CCL_DECODE (str->codesys)); | 2051 setup_ccl_program (&str->ccl, CODING_SYSTEM_CCL_DECODE (str->codesys)); |
1972 } | 2052 } |
2053 str->counter = 0; | |
1973 #endif /* MULE */ | 2054 #endif /* MULE */ |
1974 str->flags = str->ch = 0; | 2055 str->flags = str->ch = 0; |
1975 } | 2056 } |
1976 | 2057 |
1977 static int | 2058 static int |
2300 static int encoding_rewinder (Lstream *stream); | 2381 static int encoding_rewinder (Lstream *stream); |
2301 static int encoding_seekable_p (Lstream *stream); | 2382 static int encoding_seekable_p (Lstream *stream); |
2302 static int encoding_flusher (Lstream *stream); | 2383 static int encoding_flusher (Lstream *stream); |
2303 static int encoding_closer (Lstream *stream); | 2384 static int encoding_closer (Lstream *stream); |
2304 | 2385 |
2305 static Lisp_Object encoding_marker (Lisp_Object stream, | 2386 static Lisp_Object encoding_marker (Lisp_Object stream); |
2306 void (*markobj) (Lisp_Object)); | |
2307 | 2387 |
2308 DEFINE_LSTREAM_IMPLEMENTATION ("encoding", lstream_encoding, | 2388 DEFINE_LSTREAM_IMPLEMENTATION ("encoding", lstream_encoding, |
2309 sizeof (struct encoding_stream)); | 2389 sizeof (struct encoding_stream)); |
2310 | 2390 |
2311 static Lisp_Object | 2391 static Lisp_Object |
2312 encoding_marker (Lisp_Object stream, void (*markobj) (Lisp_Object)) | 2392 encoding_marker (Lisp_Object stream) |
2313 { | 2393 { |
2314 Lstream *str = ENCODING_STREAM_DATA (XLSTREAM (stream))->other_end; | 2394 Lstream *str = ENCODING_STREAM_DATA (XLSTREAM (stream))->other_end; |
2315 Lisp_Object str_obj; | 2395 Lisp_Object str_obj; |
2316 | 2396 |
2317 /* We do not need to mark the coding systems or charsets stored | 2397 /* We do not need to mark the coding systems or charsets stored |
2318 within the stream because they are stored in a global list | 2398 within the stream because they are stored in a global list |
2319 and automatically marked. */ | 2399 and automatically marked. */ |
2320 | 2400 |
2321 XSETLSTREAM (str_obj, str); | 2401 XSETLSTREAM (str_obj, str); |
2322 markobj (str_obj); | 2402 mark_object (str_obj); |
2323 if (str->imp->marker) | 2403 if (str->imp->marker) |
2324 return (str->imp->marker) (str_obj, markobj); | 2404 return (str->imp->marker) (str_obj); |
2325 else | 2405 else |
2326 return Qnil; | 2406 return Qnil; |
2327 } | 2407 } |
2328 | 2408 |
2329 /* Read SIZE bytes of data and store it into DATA. We are a encoding stream | 2409 /* Read SIZE bytes of data and store it into DATA. We are a encoding stream |
3158 /* */ | 3238 /* */ |
3159 /* UCS-4 character codes are implemented as nonnegative integers. */ | 3239 /* UCS-4 character codes are implemented as nonnegative integers. */ |
3160 /* */ | 3240 /* */ |
3161 /************************************************************************/ | 3241 /************************************************************************/ |
3162 | 3242 |
3163 Lisp_Object ucs_to_mule_table[65536]; | |
3164 Lisp_Object mule_to_ucs_table; | |
3165 | 3243 |
3166 DEFUN ("set-ucs-char", Fset_ucs_char, 2, 2, 0, /* | 3244 DEFUN ("set-ucs-char", Fset_ucs_char, 2, 2, 0, /* |
3167 Map UCS-4 code CODE to Mule character CHARACTER. | 3245 Map UCS-4 code CODE to Mule character CHARACTER. |
3168 | 3246 |
3169 Return T on success, NIL on failure. | 3247 Return T on success, NIL on failure. |
3174 | 3252 |
3175 CHECK_CHAR (character); | 3253 CHECK_CHAR (character); |
3176 CHECK_INT (code); | 3254 CHECK_INT (code); |
3177 c = XINT (code); | 3255 c = XINT (code); |
3178 | 3256 |
3179 if (c < sizeof (ucs_to_mule_table)) | 3257 if (c < sizeof (fcd->ucs_to_mule_table)) |
3180 { | 3258 { |
3181 ucs_to_mule_table[c] = character; | 3259 fcd->ucs_to_mule_table[c] = character; |
3182 return Qt; | 3260 return Qt; |
3183 } | 3261 } |
3184 else | 3262 else |
3185 return Qnil; | 3263 return Qnil; |
3186 } | 3264 } |
3187 | 3265 |
3188 static Lisp_Object | 3266 static Lisp_Object |
3189 ucs_to_char (unsigned long code) | 3267 ucs_to_char (unsigned long code) |
3190 { | 3268 { |
3191 if (code < sizeof (ucs_to_mule_table)) | 3269 if (code < sizeof (fcd->ucs_to_mule_table)) |
3192 { | 3270 { |
3193 return ucs_to_mule_table[code]; | 3271 return fcd->ucs_to_mule_table[code]; |
3194 } | 3272 } |
3195 else if ((0xe00000 <= code) && (code <= 0xe00000 + 94 * 94 * 14)) | 3273 else if ((0xe00000 <= code) && (code <= 0xe00000 + 94 * 94 * 14)) |
3196 { | 3274 { |
3197 unsigned int c; | 3275 unsigned int c; |
3198 | 3276 |
3341 unsigned_char_dynarr *dst, unsigned int n) | 3419 unsigned_char_dynarr *dst, unsigned int n) |
3342 { | 3420 { |
3343 struct decoding_stream *str = DECODING_STREAM_DATA (decoding); | 3421 struct decoding_stream *str = DECODING_STREAM_DATA (decoding); |
3344 unsigned int flags = str->flags; | 3422 unsigned int flags = str->flags; |
3345 unsigned int ch = str->ch; | 3423 unsigned int ch = str->ch; |
3424 unsigned char counter = str->counter; | |
3346 | 3425 |
3347 while (n--) | 3426 while (n--) |
3348 { | 3427 { |
3349 unsigned char c = *src++; | 3428 unsigned char c = *src++; |
3350 switch (flags) | 3429 switch (counter) |
3351 { | 3430 { |
3352 case 0: | 3431 case 0: |
3353 ch = c; | 3432 ch = c; |
3354 flags = 3; | 3433 counter = 3; |
3355 break; | 3434 break; |
3356 case 1: | 3435 case 1: |
3357 decode_ucs4 ( ( ch << 8 ) | c, dst); | 3436 decode_ucs4 ( ( ch << 8 ) | c, dst); |
3358 ch = 0; | 3437 ch = 0; |
3359 flags = 0; | 3438 counter = 0; |
3360 break; | 3439 break; |
3361 default: | 3440 default: |
3362 ch = ( ch << 8 ) | c; | 3441 ch = ( ch << 8 ) | c; |
3363 flags--; | 3442 counter--; |
3364 } | 3443 } |
3365 } | 3444 } |
3366 if (flags & CODING_STATE_END) | 3445 if (counter & CODING_STATE_END) |
3367 DECODE_OUTPUT_PARTIAL_CHAR (ch); | 3446 DECODE_OUTPUT_PARTIAL_CHAR (ch); |
3368 | 3447 |
3369 str->flags = flags; | 3448 str->flags = flags; |
3370 str->ch = ch; | 3449 str->ch = ch; |
3450 str->counter = counter; | |
3371 } | 3451 } |
3372 | 3452 |
3373 static void | 3453 static void |
3374 encode_coding_ucs4 (Lstream *encoding, CONST unsigned char *src, | 3454 encode_coding_ucs4 (Lstream *encoding, CONST unsigned char *src, |
3375 unsigned_char_dynarr *dst, unsigned int n) | 3455 unsigned_char_dynarr *dst, unsigned int n) |
3550 { | 3630 { |
3551 struct decoding_stream *str = DECODING_STREAM_DATA (decoding); | 3631 struct decoding_stream *str = DECODING_STREAM_DATA (decoding); |
3552 unsigned int flags = str->flags; | 3632 unsigned int flags = str->flags; |
3553 unsigned int ch = str->ch; | 3633 unsigned int ch = str->ch; |
3554 eol_type_t eol_type = str->eol_type; | 3634 eol_type_t eol_type = str->eol_type; |
3635 unsigned char counter = str->counter; | |
3555 | 3636 |
3556 while (n--) | 3637 while (n--) |
3557 { | 3638 { |
3558 unsigned char c = *src++; | 3639 unsigned char c = *src++; |
3559 switch (flags) | 3640 switch (counter) |
3560 { | 3641 { |
3561 case 0: | 3642 case 0: |
3562 if ( c >= 0xfc ) | 3643 if ( c >= 0xfc ) |
3563 { | 3644 { |
3564 ch = c & 0x01; | 3645 ch = c & 0x01; |
3565 flags = 5; | 3646 counter = 5; |
3566 } | 3647 } |
3567 else if ( c >= 0xf8 ) | 3648 else if ( c >= 0xf8 ) |
3568 { | 3649 { |
3569 ch = c & 0x03; | 3650 ch = c & 0x03; |
3570 flags = 4; | 3651 counter = 4; |
3571 } | 3652 } |
3572 else if ( c >= 0xf0 ) | 3653 else if ( c >= 0xf0 ) |
3573 { | 3654 { |
3574 ch = c & 0x07; | 3655 ch = c & 0x07; |
3575 flags = 3; | 3656 counter = 3; |
3576 } | 3657 } |
3577 else if ( c >= 0xe0 ) | 3658 else if ( c >= 0xe0 ) |
3578 { | 3659 { |
3579 ch = c & 0x0f; | 3660 ch = c & 0x0f; |
3580 flags = 2; | 3661 counter = 2; |
3581 } | 3662 } |
3582 else if ( c >= 0xc0 ) | 3663 else if ( c >= 0xc0 ) |
3583 { | 3664 { |
3584 ch = c & 0x1f; | 3665 ch = c & 0x1f; |
3585 flags = 1; | 3666 counter = 1; |
3586 } | 3667 } |
3587 else | 3668 else |
3588 { | 3669 { |
3589 DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst); | 3670 DECODE_HANDLE_EOL_TYPE (eol_type, c, flags, dst); |
3590 decode_ucs4 (c, dst); | 3671 decode_ucs4 (c, dst); |
3592 break; | 3673 break; |
3593 case 1: | 3674 case 1: |
3594 ch = ( ch << 6 ) | ( c & 0x3f ); | 3675 ch = ( ch << 6 ) | ( c & 0x3f ); |
3595 decode_ucs4 (ch, dst); | 3676 decode_ucs4 (ch, dst); |
3596 ch = 0; | 3677 ch = 0; |
3597 flags = 0; | 3678 counter = 0; |
3598 break; | 3679 break; |
3599 default: | 3680 default: |
3600 ch = ( ch << 6 ) | ( c & 0x3f ); | 3681 ch = ( ch << 6 ) | ( c & 0x3f ); |
3601 flags--; | 3682 counter--; |
3602 } | 3683 } |
3603 label_continue_loop:; | 3684 label_continue_loop:; |
3604 } | 3685 } |
3605 | 3686 |
3606 if (flags & CODING_STATE_END) | 3687 if (flags & CODING_STATE_END) |
3607 DECODE_OUTPUT_PARTIAL_CHAR (ch); | 3688 DECODE_OUTPUT_PARTIAL_CHAR (ch); |
3608 | 3689 |
3609 str->flags = flags; | 3690 str->flags = flags; |
3610 str->ch = ch; | 3691 str->ch = ch; |
3692 str->counter = counter; | |
3611 } | 3693 } |
3612 | 3694 |
3613 static void | 3695 static void |
3614 encode_utf8 (Lisp_Object charset, | 3696 encode_utf8 (Lisp_Object charset, |
3615 unsigned char h, unsigned char l, unsigned_char_dynarr *dst) | 3697 unsigned char h, unsigned char l, unsigned_char_dynarr *dst) |
5442 /************************************************************************/ | 5524 /************************************************************************/ |
5443 | 5525 |
5444 void | 5526 void |
5445 syms_of_file_coding (void) | 5527 syms_of_file_coding (void) |
5446 { | 5528 { |
5447 defsymbol (&Qbuffer_file_coding_system, "buffer-file-coding-system"); | |
5448 deferror (&Qcoding_system_error, "coding-system-error", | 5529 deferror (&Qcoding_system_error, "coding-system-error", |
5449 "Coding-system error", Qio_error); | 5530 "Coding-system error", Qio_error); |
5450 | 5531 |
5451 DEFSUBR (Fcoding_system_p); | 5532 DEFSUBR (Fcoding_system_p); |
5452 DEFSUBR (Ffind_coding_system); | 5533 DEFSUBR (Ffind_coding_system); |
5482 DEFSUBR (Fset_ucs_char); | 5563 DEFSUBR (Fset_ucs_char); |
5483 DEFSUBR (Fucs_char); | 5564 DEFSUBR (Fucs_char); |
5484 DEFSUBR (Fset_char_ucs); | 5565 DEFSUBR (Fset_char_ucs); |
5485 DEFSUBR (Fchar_ucs); | 5566 DEFSUBR (Fchar_ucs); |
5486 #endif /* MULE */ | 5567 #endif /* MULE */ |
5487 defsymbol (&Qcoding_system_p, "coding-system-p"); | 5568 defsymbol (&Qcoding_systemp, "coding-system-p"); |
5488 defsymbol (&Qno_conversion, "no-conversion"); | 5569 defsymbol (&Qno_conversion, "no-conversion"); |
5489 defsymbol (&Qraw_text, "raw-text"); | 5570 defsymbol (&Qraw_text, "raw-text"); |
5490 #ifdef MULE | 5571 #ifdef MULE |
5491 defsymbol (&Qbig5, "big5"); | 5572 defsymbol (&Qbig5, "big5"); |
5492 defsymbol (&Qshift_jis, "shift-jis"); | 5573 defsymbol (&Qshift_jis, "shift-jis"); |
5577 void | 5658 void |
5578 vars_of_file_coding (void) | 5659 vars_of_file_coding (void) |
5579 { | 5660 { |
5580 int i; | 5661 int i; |
5581 | 5662 |
5663 fcd = xnew (struct file_coding_dump); | |
5664 dumpstruct (&fcd, &fcd_description); | |
5665 | |
5582 /* Initialize to something reasonable ... */ | 5666 /* Initialize to something reasonable ... */ |
5583 for (i = 0; i <= CODING_CATEGORY_LAST; i++) | 5667 for (i = 0; i <= CODING_CATEGORY_LAST; i++) |
5584 { | 5668 { |
5585 coding_category_system[i] = Qnil; | 5669 fcd->coding_category_system[i] = Qnil; |
5586 coding_category_by_priority[i] = i; | 5670 fcd->coding_category_by_priority[i] = i; |
5587 } | 5671 } |
5588 | 5672 |
5589 Fprovide (intern ("file-coding")); | 5673 Fprovide (intern ("file-coding")); |
5590 | 5674 |
5591 DEFVAR_LISP ("keyboard-coding-system", &Vkeyboard_coding_system /* | 5675 DEFVAR_LISP ("keyboard-coding-system", &Vkeyboard_coding_system /* |
5642 staticpro (&Vcoding_system_hash_table); | 5726 staticpro (&Vcoding_system_hash_table); |
5643 Vcoding_system_hash_table = | 5727 Vcoding_system_hash_table = |
5644 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ); | 5728 make_lisp_hash_table (50, HASH_TABLE_NON_WEAK, HASH_TABLE_EQ); |
5645 | 5729 |
5646 the_codesys_prop_dynarr = Dynarr_new (codesys_prop); | 5730 the_codesys_prop_dynarr = Dynarr_new (codesys_prop); |
5731 dumpstruct (&the_codesys_prop_dynarr, &codesys_prop_dynarr_description); | |
5647 | 5732 |
5648 #define DEFINE_CODESYS_PROP(Prop_Type, Sym) do \ | 5733 #define DEFINE_CODESYS_PROP(Prop_Type, Sym) do \ |
5649 { \ | 5734 { \ |
5650 struct codesys_prop csp; \ | 5735 struct codesys_prop csp; \ |
5651 csp.sym = (Sym); \ | 5736 csp.sym = (Sym); \ |
5695 Qmnemonic, build_string ("Binary"))); | 5780 Qmnemonic, build_string ("Binary"))); |
5696 | 5781 |
5697 Fdefine_coding_system_alias (Qno_conversion, Qraw_text); | 5782 Fdefine_coding_system_alias (Qno_conversion, Qraw_text); |
5698 | 5783 |
5699 /* Need this for bootstrapping */ | 5784 /* Need this for bootstrapping */ |
5700 coding_category_system[CODING_CATEGORY_NO_CONVERSION] = | 5785 fcd->coding_category_system[CODING_CATEGORY_NO_CONVERSION] = |
5701 Fget_coding_system (Qraw_text); | 5786 Fget_coding_system (Qraw_text); |
5702 | 5787 |
5703 #ifdef MULE | 5788 #ifdef MULE |
5704 { | 5789 { |
5705 unsigned int i; | 5790 unsigned int i; |
5706 | 5791 |
5707 for (i = 0; i < 65536; i++) | 5792 for (i = 0; i < 65536; i++) |
5708 ucs_to_mule_table[i] = Qnil; | 5793 fcd->ucs_to_mule_table[i] = Qnil; |
5709 } | 5794 } |
5710 staticpro (&mule_to_ucs_table); | 5795 staticpro (&mule_to_ucs_table); |
5711 mule_to_ucs_table = Fmake_char_table(Qgeneric); | 5796 mule_to_ucs_table = Fmake_char_table(Qgeneric); |
5712 #endif /* MULE */ | 5797 #endif /* MULE */ |
5713 } | 5798 } |