Mercurial > hg > xemacs-beta
comparison lisp/unicode.el @ 4268:75d0292c1bff
[xemacs-hg @ 2007-11-14 19:41:04 by aidan]
Correct the dumped information for the Unicode JIT infrastructure.
author | aidan |
---|---|
date | Wed, 14 Nov 2007 19:41:09 +0000 |
parents | 38ef5a6da799 |
children | 15d36164ebd7 |
comparison
equal
deleted
inserted
replaced
4267:66e2714696bd | 4268:75d0292c1bff |
---|---|
492 (modify-syntax-entry decoded | 492 (modify-syntax-entry decoded |
493 (string | 493 (string |
494 (char-syntax ascii-or-latin-1)) | 494 (char-syntax ascii-or-latin-1)) |
495 syntax-table)) | 495 syntax-table)) |
496 | 496 |
497 ;; Create all the Unicode error sequences, normally as jit-ucs-charset-0 | 497 ;; *Sigh*, declarations needs to be at the start of the line to be picked up |
498 ;; characters starting at U+200000 (which isn't a valid Unicode code | 498 ;; by make-docfile. Not so much an issue with ccl-encode-to-ucs-2, which we |
499 ;; point). Make them available to user code. | 499 ;; don't necessarily want to advertise, but the following are important. |
500 (defvar unicode-error-default-translation-table | 500 |
501 (loop | 501 ;; Create all the Unicode error sequences, normally as jit-ucs-charset-0 |
502 with char-table = (make-char-table 'char) | 502 ;; characters starting at U+200000 (which isn't a valid Unicode code |
503 for i from ?\x00 to ?\xFF | 503 ;; point). Make them available to user code. |
504 do | 504 (defvar unicode-error-default-translation-table |
505 (put-char-table (aref | 505 (loop |
506 ;; #xd800 is the first leading surrogate; | 506 with char-table = (make-char-table 'char) |
507 ;; trailing surrogates must be in the range | 507 for i from ?\x00 to ?\xFF |
508 ;; #xdc00-#xdfff. These examples are not, so we | 508 do |
509 ;; intentionally provoke an error sequence. | 509 (put-char-table (aref |
510 (decode-coding-string (format "\xd8\x00\x00%c" i) | 510 ;; #xd800 is the first leading surrogate; |
511 'utf-16-be) | 511 ;; trailing surrogates must be in the range |
512 3) | 512 ;; #xdc00-#xdfff. These examples are not, so we |
513 i | 513 ;; intentionally provoke an error sequence. |
514 char-table) | 514 (decode-coding-string (format "\xd8\x00\x00%c" i) |
515 finally return char-table) | 515 'utf-16-be) |
516 "Translation table mapping Unicode error sequences to Latin-1 chars. | 516 3) |
517 i | |
518 char-table) | |
519 finally return char-table) | |
520 "Translation table mapping Unicode error sequences to Latin-1 chars. | |
517 | 521 |
518 To transform XEmacs Unicode error sequences to the Latin-1 characters that | 522 To transform XEmacs Unicode error sequences to the Latin-1 characters that |
519 correspond to the octets on disk, you can use this variable. ") | 523 correspond to the octets on disk, you can use this variable. ") |
520 | 524 |
521 (defvar unicode-error-sequence-regexp-range | 525 (defvar unicode-error-sequence-regexp-range |
522 (format "%c%c-%c" | 526 (format "%c%c-%c" |
523 (aref (decode-coding-string "\xd8\x00\x00\x00" 'utf-16-be) 0) | 527 (aref (decode-coding-string "\xd8\x00\x00\x00" 'utf-16-be) 0) |
524 (aref (decode-coding-string "\xd8\x00\x00\x00" 'utf-16-be) 3) | 528 (aref (decode-coding-string "\xd8\x00\x00\x00" 'utf-16-be) 3) |
525 (aref (decode-coding-string "\xd8\x00\x00\xFF" 'utf-16-be) 3)) | 529 (aref (decode-coding-string "\xd8\x00\x00\xFF" 'utf-16-be) 3)) |
526 "Regular expression range to match Unicode error sequences in XEmacs. | 530 "Regular expression range to match Unicode error sequences in XEmacs. |
527 | 531 |
528 Invalid Unicode sequences on input are represented as XEmacs | 532 Invalid Unicode sequences on input are represented as XEmacs |
529 characters with values stored as the keys in | 533 characters with values stored as the keys in |
530 `unicode-error-default-translation-table', one character for each | 534 `unicode-error-default-translation-table', one character for each |
531 invalid octet. You can use this variable (with `re-search-forward' or | 535 invalid octet. You can use this variable (with `re-search-forward' or |
557 ; unicode-error-sequence-regexp-range | 561 ; unicode-error-sequence-regexp-range |
558 ; "]")) | 562 ; "]")) |
559 nil | 563 nil |
560 (format "Could not find char ?\\x%x in buffer" i)))) | 564 (format "Could not find char ?\\x%x in buffer" i)))) |
561 | 565 |
562 (defun frob-unicode-errors-region (frob-function begin end &optional buffer) | 566 (defun frob-unicode-errors-region (frob-function begin end &optional buffer) |
563 "Call FROB-FUNCTION on the Unicode error sequences between BEGIN and END. | 567 "Call FROB-FUNCTION on the Unicode error sequences between BEGIN and END. |
564 | 568 |
565 Optional argument BUFFER specifies the buffer that should be examined for | 569 Optional argument BUFFER specifies the buffer that should be examined for |
566 such sequences. " | 570 such sequences. " |
567 (check-argument-type #'functionp frob-function) | 571 (check-argument-type #'functionp frob-function) |
568 (check-argument-range begin (point-min buffer) (point-max buffer)) | 572 (check-argument-range begin (point-min buffer) (point-max buffer)) |
569 (check-argument-range end (point-min buffer) (point-max buffer)) | 573 (check-argument-range end (point-min buffer) (point-max buffer)) |
570 (save-excursion | 574 (save-excursion |
571 (save-restriction | 575 (save-restriction |
572 (if buffer (set-buffer buffer)) | 576 (if buffer (set-buffer buffer)) |
573 (narrow-to-region begin end) | 577 (narrow-to-region begin end) |
574 (goto-char (point-min)) | 578 (goto-char (point-min)) |