comparison src/file-coding.c @ 448:3078fd1074e8 r21-2-39

Import from CVS: tag r21-2-39
author cvs
date Mon, 13 Aug 2007 11:38:25 +0200
parents 576fb035e263
children 3d3049ae1304
comparison
equal deleted inserted replaced
447:4fc5f13f3bd3 448:3078fd1074e8
44 Lisp_Object Vcoding_system_for_read; 44 Lisp_Object Vcoding_system_for_read;
45 Lisp_Object Vcoding_system_for_write; 45 Lisp_Object Vcoding_system_for_write;
46 Lisp_Object Vfile_name_coding_system; 46 Lisp_Object Vfile_name_coding_system;
47 47
48 /* Table of symbols identifying each coding category. */ 48 /* Table of symbols identifying each coding category. */
49 Lisp_Object coding_category_symbol[CODING_CATEGORY_LAST + 1]; 49 Lisp_Object coding_category_symbol[CODING_CATEGORY_LAST];
50 50
51 51
52 52
53 struct file_coding_dump { 53 struct file_coding_dump {
54 /* Coding system currently associated with each coding category. */ 54 /* Coding system currently associated with each coding category. */
55 Lisp_Object coding_category_system[CODING_CATEGORY_LAST + 1]; 55 Lisp_Object coding_category_system[CODING_CATEGORY_LAST];
56 56
57 /* Table of all coding categories in decreasing order of priority. 57 /* Table of all coding categories in decreasing order of priority.
58 This describes a permutation of the possible coding categories. */ 58 This describes a permutation of the possible coding categories. */
59 int coding_category_by_priority[CODING_CATEGORY_LAST + 1]; 59 int coding_category_by_priority[CODING_CATEGORY_LAST];
60 60
61 #ifdef MULE 61 #ifdef MULE
62 Lisp_Object ucs_to_mule_table[65536]; 62 Lisp_Object ucs_to_mule_table[65536];
63 #endif 63 #endif
64 } *fcd; 64 } *fcd;
65 65
66 static const struct lrecord_description fcd_description_1[] = { 66 static const struct lrecord_description fcd_description_1[] = {
67 { XD_LISP_OBJECT_ARRAY, offsetof (struct file_coding_dump, coding_category_system), CODING_CATEGORY_LAST + 1 }, 67 { XD_LISP_OBJECT_ARRAY, offsetof (struct file_coding_dump, coding_category_system), CODING_CATEGORY_LAST },
68 #ifdef MULE 68 #ifdef MULE
69 { XD_LISP_OBJECT_ARRAY, offsetof (struct file_coding_dump, ucs_to_mule_table), countof (fcd->ucs_to_mule_table) }, 69 { XD_LISP_OBJECT_ARRAY, offsetof (struct file_coding_dump, ucs_to_mule_table), countof (fcd->ucs_to_mule_table) },
70 #endif 70 #endif
71 { XD_END } 71 { XD_END }
72 }; 72 };
1431 decode_coding_category (Lisp_Object symbol) 1431 decode_coding_category (Lisp_Object symbol)
1432 { 1432 {
1433 int i; 1433 int i;
1434 1434
1435 CHECK_SYMBOL (symbol); 1435 CHECK_SYMBOL (symbol);
1436 for (i = 0; i <= CODING_CATEGORY_LAST; i++) 1436 for (i = 0; i < CODING_CATEGORY_LAST; i++)
1437 if (EQ (coding_category_symbol[i], symbol)) 1437 if (EQ (coding_category_symbol[i], symbol))
1438 return i; 1438 return i;
1439 1439
1440 signal_simple_error ("Unrecognized coding category", symbol); 1440 signal_simple_error ("Unrecognized coding category", symbol);
1441 return 0; /* not reached */ 1441 return 0; /* not reached */
1447 ()) 1447 ())
1448 { 1448 {
1449 int i; 1449 int i;
1450 Lisp_Object list = Qnil; 1450 Lisp_Object list = Qnil;
1451 1451
1452 for (i = CODING_CATEGORY_LAST; i >= 0; i--) 1452 for (i = CODING_CATEGORY_LAST - 1; i >= 0; i--)
1453 list = Fcons (coding_category_symbol[i], list); 1453 list = Fcons (coding_category_symbol[i], list);
1454 return list; 1454 return list;
1455 } 1455 }
1456 1456
1457 DEFUN ("set-coding-priority-list", Fset_coding_priority_list, 1, 1, 0, /* 1457 DEFUN ("set-coding-priority-list", Fset_coding_priority_list, 1, 1, 0, /*
1461 than all specified ones, in the same relative order they were in 1461 than all specified ones, in the same relative order they were in
1462 previously. 1462 previously.
1463 */ 1463 */
1464 (list)) 1464 (list))
1465 { 1465 {
1466 int category_to_priority[CODING_CATEGORY_LAST + 1]; 1466 int category_to_priority[CODING_CATEGORY_LAST];
1467 int i, j; 1467 int i, j;
1468 Lisp_Object rest; 1468 Lisp_Object rest;
1469 1469
1470 /* First generate a list that maps coding categories to priorities. */ 1470 /* First generate a list that maps coding categories to priorities. */
1471 1471
1472 for (i = 0; i <= CODING_CATEGORY_LAST; i++) 1472 for (i = 0; i < CODING_CATEGORY_LAST; i++)
1473 category_to_priority[i] = -1; 1473 category_to_priority[i] = -1;
1474 1474
1475 /* Highest priority comes from the specified list. */ 1475 /* Highest priority comes from the specified list. */
1476 i = 0; 1476 i = 0;
1477 EXTERNAL_LIST_LOOP (rest, list) 1477 EXTERNAL_LIST_LOOP (rest, list)
1484 } 1484 }
1485 1485
1486 /* Now go through the existing categories by priority to retrieve 1486 /* Now go through the existing categories by priority to retrieve
1487 the categories not yet specified and preserve their priority 1487 the categories not yet specified and preserve their priority
1488 order. */ 1488 order. */
1489 for (j = 0; j <= CODING_CATEGORY_LAST; j++) 1489 for (j = 0; j < CODING_CATEGORY_LAST; j++)
1490 { 1490 {
1491 int cat = fcd->coding_category_by_priority[j]; 1491 int cat = fcd->coding_category_by_priority[j];
1492 if (category_to_priority[cat] < 0) 1492 if (category_to_priority[cat] < 0)
1493 category_to_priority[cat] = i++; 1493 category_to_priority[cat] = i++;
1494 } 1494 }
1495 1495
1496 /* Now we need to construct the inverse of the mapping we just 1496 /* Now we need to construct the inverse of the mapping we just
1497 constructed. */ 1497 constructed. */
1498 1498
1499 for (i = 0; i <= CODING_CATEGORY_LAST; i++) 1499 for (i = 0; i < CODING_CATEGORY_LAST; i++)
1500 fcd->coding_category_by_priority[category_to_priority[i]] = i; 1500 fcd->coding_category_by_priority[category_to_priority[i]] = i;
1501 1501
1502 /* Phew! That was confusing. */ 1502 /* Phew! That was confusing. */
1503 return Qnil; 1503 return Qnil;
1504 } 1504 }
1509 ()) 1509 ())
1510 { 1510 {
1511 int i; 1511 int i;
1512 Lisp_Object list = Qnil; 1512 Lisp_Object list = Qnil;
1513 1513
1514 for (i = CODING_CATEGORY_LAST; i >= 0; i--) 1514 for (i = CODING_CATEGORY_LAST - 1; i >= 0; i--)
1515 list = Fcons (coding_category_symbol[fcd->coding_category_by_priority[i]], 1515 list = Fcons (coding_category_symbol[fcd->coding_category_by_priority[i]],
1516 list); 1516 list);
1517 return list; 1517 return list;
1518 } 1518 }
1519 1519
1759 #ifdef MULE 1759 #ifdef MULE
1760 mask = postprocess_iso2022_mask (mask); 1760 mask = postprocess_iso2022_mask (mask);
1761 #endif 1761 #endif
1762 /* Look through the coding categories by priority and find 1762 /* Look through the coding categories by priority and find
1763 the first one that is allowed. */ 1763 the first one that is allowed. */
1764 for (i = 0; i <= CODING_CATEGORY_LAST; i++) 1764 for (i = 0; i < CODING_CATEGORY_LAST; i++)
1765 { 1765 {
1766 cat = fcd->coding_category_by_priority[i]; 1766 cat = fcd->coding_category_by_priority[i];
1767 if ((mask & (1 << cat)) && 1767 if ((mask & (1 << cat)) &&
1768 !NILP (fcd->coding_category_system[cat])) 1768 !NILP (fcd->coding_category_system[cat]))
1769 break; 1769 break;
1957 1957
1958 val = Qnil; 1958 val = Qnil;
1959 #ifdef MULE 1959 #ifdef MULE
1960 decst.mask = postprocess_iso2022_mask (decst.mask); 1960 decst.mask = postprocess_iso2022_mask (decst.mask);
1961 #endif 1961 #endif
1962 for (i = CODING_CATEGORY_LAST; i >= 0; i--) 1962 for (i = CODING_CATEGORY_LAST - 1; i >= 0; i--)
1963 { 1963 {
1964 int sys = fcd->coding_category_by_priority[i]; 1964 int sys = fcd->coding_category_by_priority[i];
1965 if (decst.mask & (1 << sys)) 1965 if (decst.mask & (1 << sys))
1966 { 1966 {
1967 Lisp_Object codesys = fcd->coding_category_system[sys]; 1967 Lisp_Object codesys = fcd->coding_category_system[sys];
4280 all the state variables (but not ISO2022.ESC_BYTES) and 4280 all the state variables (but not ISO2022.ESC_BYTES) and
4281 return 1. 4281 return 1.
4282 4282
4283 If CHECK_INVALID_CHARSETS is non-zero, check for designation 4283 If CHECK_INVALID_CHARSETS is non-zero, check for designation
4284 or invocation of an invalid character set and treat that as 4284 or invocation of an invalid character set and treat that as
4285 an unrecognized escape sequence. */ 4285 an unrecognized escape sequence.
4286
4287 ********************************************************************
4288
4289 #### Strategies for error annotation and coding orthogonalization
4290
4291 We really want to separate out a number of things. Conceptually,
4292 there is a nested syntax.
4293
4294 At the top level is the ISO 2022 extension syntax, including charset
4295 designation and invocation, and certain auxiliary controls such as the
4296 ISO 6429 direction specification. These are octet-oriented, with the
4297 single exception (AFAIK) of the "exit Unicode" sequence which uses the
4298 UTF's natural width (1 byte for UTF-7 and UTF-8, 2 bytes for UCS-2 and
4299 UTF-16, and 4 bytes for UCS-4 and UTF-32). This will be treated as a
4300 (deprecated) special case in Unicode processing.
4301
4302 The middle layer is ISO 2022 character interpretation. This will depend
4303 on the current state of the ISO 2022 registers, and assembles octets
4304 into the character's internal representation.
4305
4306 The lowest level is translating system control conventions. At present
4307 this is restricted to newline translation, but one could imagine doing
4308 tab conversion or line wrapping here. "Escape from Unicode" processing
4309 would be done at this level.
4310
4311 At each level the parser will verify the syntax. In the case of a
4312 syntax error or warning (such as a redundant escape sequence that affects
4313 no characters), the parser will take some action, typically inserting the
4314 erroneous octets directly into the output and creating an annotation
4315 which can be used by higher level I/O to mark the affected region.
4316
4317 This should make it possible to do something sensible about separating
4318 newline convention processing from character construction, and about
4319 preventing ISO 2022 escape sequences from being recognized
4320 inappropriately.
4321
4322 The basic strategy will be to have octet classification tables, and
4323 switch processing according to the table entry.
4324
4325 It's possible that, by doing the processing with tables of functions or
4326 the like, the parser can be used for both detection and translation. */
4286 4327
4287 static int 4328 static int
4288 parse_iso2022_esc (Lisp_Object codesys, struct iso2022_decoder *iso, 4329 parse_iso2022_esc (Lisp_Object codesys, struct iso2022_decoder *iso,
4289 unsigned char c, unsigned int *flags, 4330 unsigned char c, unsigned int *flags,
4290 int check_invalid_charsets) 4331 int check_invalid_charsets)
5698 5739
5699 fcd = xnew (struct file_coding_dump); 5740 fcd = xnew (struct file_coding_dump);
5700 dumpstruct (&fcd, &fcd_description); 5741 dumpstruct (&fcd, &fcd_description);
5701 5742
5702 /* Initialize to something reasonable ... */ 5743 /* Initialize to something reasonable ... */
5703 for (i = 0; i <= CODING_CATEGORY_LAST; i++) 5744 for (i = 0; i < CODING_CATEGORY_LAST; i++)
5704 { 5745 {
5705 fcd->coding_category_system[i] = Qnil; 5746 fcd->coding_category_system[i] = Qnil;
5706 fcd->coding_category_by_priority[i] = i; 5747 fcd->coding_category_by_priority[i] = i;
5707 } 5748 }
5708 5749