Mercurial > hg > xemacs-beta
comparison src/file-coding.c @ 448:3078fd1074e8 r21-2-39
Import from CVS: tag r21-2-39
author | cvs |
---|---|
date | Mon, 13 Aug 2007 11:38:25 +0200 |
parents | 576fb035e263 |
children | 3d3049ae1304 |
comparison
equal
deleted
inserted
replaced
447:4fc5f13f3bd3 | 448:3078fd1074e8 |
---|---|
44 Lisp_Object Vcoding_system_for_read; | 44 Lisp_Object Vcoding_system_for_read; |
45 Lisp_Object Vcoding_system_for_write; | 45 Lisp_Object Vcoding_system_for_write; |
46 Lisp_Object Vfile_name_coding_system; | 46 Lisp_Object Vfile_name_coding_system; |
47 | 47 |
48 /* Table of symbols identifying each coding category. */ | 48 /* Table of symbols identifying each coding category. */ |
49 Lisp_Object coding_category_symbol[CODING_CATEGORY_LAST + 1]; | 49 Lisp_Object coding_category_symbol[CODING_CATEGORY_LAST]; |
50 | 50 |
51 | 51 |
52 | 52 |
53 struct file_coding_dump { | 53 struct file_coding_dump { |
54 /* Coding system currently associated with each coding category. */ | 54 /* Coding system currently associated with each coding category. */ |
55 Lisp_Object coding_category_system[CODING_CATEGORY_LAST + 1]; | 55 Lisp_Object coding_category_system[CODING_CATEGORY_LAST]; |
56 | 56 |
57 /* Table of all coding categories in decreasing order of priority. | 57 /* Table of all coding categories in decreasing order of priority. |
58 This describes a permutation of the possible coding categories. */ | 58 This describes a permutation of the possible coding categories. */ |
59 int coding_category_by_priority[CODING_CATEGORY_LAST + 1]; | 59 int coding_category_by_priority[CODING_CATEGORY_LAST]; |
60 | 60 |
61 #ifdef MULE | 61 #ifdef MULE |
62 Lisp_Object ucs_to_mule_table[65536]; | 62 Lisp_Object ucs_to_mule_table[65536]; |
63 #endif | 63 #endif |
64 } *fcd; | 64 } *fcd; |
65 | 65 |
66 static const struct lrecord_description fcd_description_1[] = { | 66 static const struct lrecord_description fcd_description_1[] = { |
67 { XD_LISP_OBJECT_ARRAY, offsetof (struct file_coding_dump, coding_category_system), CODING_CATEGORY_LAST + 1 }, | 67 { XD_LISP_OBJECT_ARRAY, offsetof (struct file_coding_dump, coding_category_system), CODING_CATEGORY_LAST }, |
68 #ifdef MULE | 68 #ifdef MULE |
69 { XD_LISP_OBJECT_ARRAY, offsetof (struct file_coding_dump, ucs_to_mule_table), countof (fcd->ucs_to_mule_table) }, | 69 { XD_LISP_OBJECT_ARRAY, offsetof (struct file_coding_dump, ucs_to_mule_table), countof (fcd->ucs_to_mule_table) }, |
70 #endif | 70 #endif |
71 { XD_END } | 71 { XD_END } |
72 }; | 72 }; |
1431 decode_coding_category (Lisp_Object symbol) | 1431 decode_coding_category (Lisp_Object symbol) |
1432 { | 1432 { |
1433 int i; | 1433 int i; |
1434 | 1434 |
1435 CHECK_SYMBOL (symbol); | 1435 CHECK_SYMBOL (symbol); |
1436 for (i = 0; i <= CODING_CATEGORY_LAST; i++) | 1436 for (i = 0; i < CODING_CATEGORY_LAST; i++) |
1437 if (EQ (coding_category_symbol[i], symbol)) | 1437 if (EQ (coding_category_symbol[i], symbol)) |
1438 return i; | 1438 return i; |
1439 | 1439 |
1440 signal_simple_error ("Unrecognized coding category", symbol); | 1440 signal_simple_error ("Unrecognized coding category", symbol); |
1441 return 0; /* not reached */ | 1441 return 0; /* not reached */ |
1447 ()) | 1447 ()) |
1448 { | 1448 { |
1449 int i; | 1449 int i; |
1450 Lisp_Object list = Qnil; | 1450 Lisp_Object list = Qnil; |
1451 | 1451 |
1452 for (i = CODING_CATEGORY_LAST; i >= 0; i--) | 1452 for (i = CODING_CATEGORY_LAST - 1; i >= 0; i--) |
1453 list = Fcons (coding_category_symbol[i], list); | 1453 list = Fcons (coding_category_symbol[i], list); |
1454 return list; | 1454 return list; |
1455 } | 1455 } |
1456 | 1456 |
1457 DEFUN ("set-coding-priority-list", Fset_coding_priority_list, 1, 1, 0, /* | 1457 DEFUN ("set-coding-priority-list", Fset_coding_priority_list, 1, 1, 0, /* |
1461 than all specified ones, in the same relative order they were in | 1461 than all specified ones, in the same relative order they were in |
1462 previously. | 1462 previously. |
1463 */ | 1463 */ |
1464 (list)) | 1464 (list)) |
1465 { | 1465 { |
1466 int category_to_priority[CODING_CATEGORY_LAST + 1]; | 1466 int category_to_priority[CODING_CATEGORY_LAST]; |
1467 int i, j; | 1467 int i, j; |
1468 Lisp_Object rest; | 1468 Lisp_Object rest; |
1469 | 1469 |
1470 /* First generate a list that maps coding categories to priorities. */ | 1470 /* First generate a list that maps coding categories to priorities. */ |
1471 | 1471 |
1472 for (i = 0; i <= CODING_CATEGORY_LAST; i++) | 1472 for (i = 0; i < CODING_CATEGORY_LAST; i++) |
1473 category_to_priority[i] = -1; | 1473 category_to_priority[i] = -1; |
1474 | 1474 |
1475 /* Highest priority comes from the specified list. */ | 1475 /* Highest priority comes from the specified list. */ |
1476 i = 0; | 1476 i = 0; |
1477 EXTERNAL_LIST_LOOP (rest, list) | 1477 EXTERNAL_LIST_LOOP (rest, list) |
1484 } | 1484 } |
1485 | 1485 |
1486 /* Now go through the existing categories by priority to retrieve | 1486 /* Now go through the existing categories by priority to retrieve |
1487 the categories not yet specified and preserve their priority | 1487 the categories not yet specified and preserve their priority |
1488 order. */ | 1488 order. */ |
1489 for (j = 0; j <= CODING_CATEGORY_LAST; j++) | 1489 for (j = 0; j < CODING_CATEGORY_LAST; j++) |
1490 { | 1490 { |
1491 int cat = fcd->coding_category_by_priority[j]; | 1491 int cat = fcd->coding_category_by_priority[j]; |
1492 if (category_to_priority[cat] < 0) | 1492 if (category_to_priority[cat] < 0) |
1493 category_to_priority[cat] = i++; | 1493 category_to_priority[cat] = i++; |
1494 } | 1494 } |
1495 | 1495 |
1496 /* Now we need to construct the inverse of the mapping we just | 1496 /* Now we need to construct the inverse of the mapping we just |
1497 constructed. */ | 1497 constructed. */ |
1498 | 1498 |
1499 for (i = 0; i <= CODING_CATEGORY_LAST; i++) | 1499 for (i = 0; i < CODING_CATEGORY_LAST; i++) |
1500 fcd->coding_category_by_priority[category_to_priority[i]] = i; | 1500 fcd->coding_category_by_priority[category_to_priority[i]] = i; |
1501 | 1501 |
1502 /* Phew! That was confusing. */ | 1502 /* Phew! That was confusing. */ |
1503 return Qnil; | 1503 return Qnil; |
1504 } | 1504 } |
1509 ()) | 1509 ()) |
1510 { | 1510 { |
1511 int i; | 1511 int i; |
1512 Lisp_Object list = Qnil; | 1512 Lisp_Object list = Qnil; |
1513 | 1513 |
1514 for (i = CODING_CATEGORY_LAST; i >= 0; i--) | 1514 for (i = CODING_CATEGORY_LAST - 1; i >= 0; i--) |
1515 list = Fcons (coding_category_symbol[fcd->coding_category_by_priority[i]], | 1515 list = Fcons (coding_category_symbol[fcd->coding_category_by_priority[i]], |
1516 list); | 1516 list); |
1517 return list; | 1517 return list; |
1518 } | 1518 } |
1519 | 1519 |
1759 #ifdef MULE | 1759 #ifdef MULE |
1760 mask = postprocess_iso2022_mask (mask); | 1760 mask = postprocess_iso2022_mask (mask); |
1761 #endif | 1761 #endif |
1762 /* Look through the coding categories by priority and find | 1762 /* Look through the coding categories by priority and find |
1763 the first one that is allowed. */ | 1763 the first one that is allowed. */ |
1764 for (i = 0; i <= CODING_CATEGORY_LAST; i++) | 1764 for (i = 0; i < CODING_CATEGORY_LAST; i++) |
1765 { | 1765 { |
1766 cat = fcd->coding_category_by_priority[i]; | 1766 cat = fcd->coding_category_by_priority[i]; |
1767 if ((mask & (1 << cat)) && | 1767 if ((mask & (1 << cat)) && |
1768 !NILP (fcd->coding_category_system[cat])) | 1768 !NILP (fcd->coding_category_system[cat])) |
1769 break; | 1769 break; |
1957 | 1957 |
1958 val = Qnil; | 1958 val = Qnil; |
1959 #ifdef MULE | 1959 #ifdef MULE |
1960 decst.mask = postprocess_iso2022_mask (decst.mask); | 1960 decst.mask = postprocess_iso2022_mask (decst.mask); |
1961 #endif | 1961 #endif |
1962 for (i = CODING_CATEGORY_LAST; i >= 0; i--) | 1962 for (i = CODING_CATEGORY_LAST - 1; i >= 0; i--) |
1963 { | 1963 { |
1964 int sys = fcd->coding_category_by_priority[i]; | 1964 int sys = fcd->coding_category_by_priority[i]; |
1965 if (decst.mask & (1 << sys)) | 1965 if (decst.mask & (1 << sys)) |
1966 { | 1966 { |
1967 Lisp_Object codesys = fcd->coding_category_system[sys]; | 1967 Lisp_Object codesys = fcd->coding_category_system[sys]; |
4280 all the state variables (but not ISO2022.ESC_BYTES) and | 4280 all the state variables (but not ISO2022.ESC_BYTES) and |
4281 return 1. | 4281 return 1. |
4282 | 4282 |
4283 If CHECK_INVALID_CHARSETS is non-zero, check for designation | 4283 If CHECK_INVALID_CHARSETS is non-zero, check for designation |
4284 or invocation of an invalid character set and treat that as | 4284 or invocation of an invalid character set and treat that as |
4285 an unrecognized escape sequence. */ | 4285 an unrecognized escape sequence. |
4286 | |
4287 ******************************************************************** | |
4288 | |
4289 #### Strategies for error annotation and coding orthogonalization | |
4290 | |
4291 We really want to separate out a number of things. Conceptually, | |
4292 there is a nested syntax. | |
4293 | |
4294 At the top level is the ISO 2022 extension syntax, including charset | |
4295 designation and invocation, and certain auxiliary controls such as the | |
4296 ISO 6429 direction specification. These are octet-oriented, with the | |
4297 single exception (AFAIK) of the "exit Unicode" sequence which uses the | |
4298 UTF's natural width (1 byte for UTF-7 and UTF-8, 2 bytes for UCS-2 and | |
4299 UTF-16, and 4 bytes for UCS-4 and UTF-32). This will be treated as a | |
4300 (deprecated) special case in Unicode processing. | |
4301 | |
4302 The middle layer is ISO 2022 character interpretation. This will depend | |
4303 on the current state of the ISO 2022 registers, and assembles octets | |
4304 into the character's internal representation. | |
4305 | |
4306 The lowest level is translating system control conventions. At present | |
4307 this is restricted to newline translation, but one could imagine doing | |
4308 tab conversion or line wrapping here. "Escape from Unicode" processing | |
4309 would be done at this level. | |
4310 | |
4311 At each level the parser will verify the syntax. In the case of a | |
4312 syntax error or warning (such as a redundant escape sequence that affects | |
4313 no characters), the parser will take some action, typically inserting the | |
4314 erroneous octets directly into the output and creating an annotation | |
4315 which can be used by higher level I/O to mark the affected region. | |
4316 | |
4317 This should make it possible to do something sensible about separating | |
4318 newline convention processing from character construction, and about | |
4319 preventing ISO 2022 escape sequences from being recognized | |
4320 inappropriately. | |
4321 | |
4322 The basic strategy will be to have octet classification tables, and | |
4323 switch processing according to the table entry. | |
4324 | |
4325 It's possible that, by doing the processing with tables of functions or | |
4326 the like, the parser can be used for both detection and translation. */ | |
4286 | 4327 |
4287 static int | 4328 static int |
4288 parse_iso2022_esc (Lisp_Object codesys, struct iso2022_decoder *iso, | 4329 parse_iso2022_esc (Lisp_Object codesys, struct iso2022_decoder *iso, |
4289 unsigned char c, unsigned int *flags, | 4330 unsigned char c, unsigned int *flags, |
4290 int check_invalid_charsets) | 4331 int check_invalid_charsets) |
5698 | 5739 |
5699 fcd = xnew (struct file_coding_dump); | 5740 fcd = xnew (struct file_coding_dump); |
5700 dumpstruct (&fcd, &fcd_description); | 5741 dumpstruct (&fcd, &fcd_description); |
5701 | 5742 |
5702 /* Initialize to something reasonable ... */ | 5743 /* Initialize to something reasonable ... */ |
5703 for (i = 0; i <= CODING_CATEGORY_LAST; i++) | 5744 for (i = 0; i < CODING_CATEGORY_LAST; i++) |
5704 { | 5745 { |
5705 fcd->coding_category_system[i] = Qnil; | 5746 fcd->coding_category_system[i] = Qnil; |
5706 fcd->coding_category_by_priority[i] = i; | 5747 fcd->coding_category_by_priority[i] = i; |
5707 } | 5748 } |
5708 | 5749 |