comparison lisp/unicode.el @ 4268:75d0292c1bff

[xemacs-hg @ 2007-11-14 19:41:04 by aidan] Correct the dumped information for the Unicode JIT infrastructure.
author aidan
date Wed, 14 Nov 2007 19:41:09 +0000
parents 38ef5a6da799
children 15d36164ebd7
comparison
equal deleted inserted replaced
4267:66e2714696bd 4268:75d0292c1bff
492 (modify-syntax-entry decoded 492 (modify-syntax-entry decoded
493 (string 493 (string
494 (char-syntax ascii-or-latin-1)) 494 (char-syntax ascii-or-latin-1))
495 syntax-table)) 495 syntax-table))
496 496
497 ;; Create all the Unicode error sequences, normally as jit-ucs-charset-0 497 ;; *Sigh*, declarations needs to be at the start of the line to be picked up
498 ;; characters starting at U+200000 (which isn't a valid Unicode code 498 ;; by make-docfile. Not so much an issue with ccl-encode-to-ucs-2, which we
499 ;; point). Make them available to user code. 499 ;; don't necessarily want to advertise, but the following are important.
500 (defvar unicode-error-default-translation-table 500
501 (loop 501 ;; Create all the Unicode error sequences, normally as jit-ucs-charset-0
502 with char-table = (make-char-table 'char) 502 ;; characters starting at U+200000 (which isn't a valid Unicode code
503 for i from ?\x00 to ?\xFF 503 ;; point). Make them available to user code.
504 do 504 (defvar unicode-error-default-translation-table
505 (put-char-table (aref 505 (loop
506 ;; #xd800 is the first leading surrogate; 506 with char-table = (make-char-table 'char)
507 ;; trailing surrogates must be in the range 507 for i from ?\x00 to ?\xFF
508 ;; #xdc00-#xdfff. These examples are not, so we 508 do
509 ;; intentionally provoke an error sequence. 509 (put-char-table (aref
510 (decode-coding-string (format "\xd8\x00\x00%c" i) 510 ;; #xd800 is the first leading surrogate;
511 'utf-16-be) 511 ;; trailing surrogates must be in the range
512 3) 512 ;; #xdc00-#xdfff. These examples are not, so we
513 i 513 ;; intentionally provoke an error sequence.
514 char-table) 514 (decode-coding-string (format "\xd8\x00\x00%c" i)
515 finally return char-table) 515 'utf-16-be)
516 "Translation table mapping Unicode error sequences to Latin-1 chars. 516 3)
517 i
518 char-table)
519 finally return char-table)
520 "Translation table mapping Unicode error sequences to Latin-1 chars.
517 521
518 To transform XEmacs Unicode error sequences to the Latin-1 characters that 522 To transform XEmacs Unicode error sequences to the Latin-1 characters that
519 correspond to the octets on disk, you can use this variable. ") 523 correspond to the octets on disk, you can use this variable. ")
520 524
521 (defvar unicode-error-sequence-regexp-range 525 (defvar unicode-error-sequence-regexp-range
522 (format "%c%c-%c" 526 (format "%c%c-%c"
523 (aref (decode-coding-string "\xd8\x00\x00\x00" 'utf-16-be) 0) 527 (aref (decode-coding-string "\xd8\x00\x00\x00" 'utf-16-be) 0)
524 (aref (decode-coding-string "\xd8\x00\x00\x00" 'utf-16-be) 3) 528 (aref (decode-coding-string "\xd8\x00\x00\x00" 'utf-16-be) 3)
525 (aref (decode-coding-string "\xd8\x00\x00\xFF" 'utf-16-be) 3)) 529 (aref (decode-coding-string "\xd8\x00\x00\xFF" 'utf-16-be) 3))
526 "Regular expression range to match Unicode error sequences in XEmacs. 530 "Regular expression range to match Unicode error sequences in XEmacs.
527 531
528 Invalid Unicode sequences on input are represented as XEmacs 532 Invalid Unicode sequences on input are represented as XEmacs
529 characters with values stored as the keys in 533 characters with values stored as the keys in
530 `unicode-error-default-translation-table', one character for each 534 `unicode-error-default-translation-table', one character for each
531 invalid octet. You can use this variable (with `re-search-forward' or 535 invalid octet. You can use this variable (with `re-search-forward' or
557 ; unicode-error-sequence-regexp-range 561 ; unicode-error-sequence-regexp-range
558 ; "]")) 562 ; "]"))
559 nil 563 nil
560 (format "Could not find char ?\\x%x in buffer" i)))) 564 (format "Could not find char ?\\x%x in buffer" i))))
561 565
562 (defun frob-unicode-errors-region (frob-function begin end &optional buffer) 566 (defun frob-unicode-errors-region (frob-function begin end &optional buffer)
563 "Call FROB-FUNCTION on the Unicode error sequences between BEGIN and END. 567 "Call FROB-FUNCTION on the Unicode error sequences between BEGIN and END.
564 568
565 Optional argument BUFFER specifies the buffer that should be examined for 569 Optional argument BUFFER specifies the buffer that should be examined for
566 such sequences. " 570 such sequences. "
567 (check-argument-type #'functionp frob-function) 571 (check-argument-type #'functionp frob-function)
568 (check-argument-range begin (point-min buffer) (point-max buffer)) 572 (check-argument-range begin (point-min buffer) (point-max buffer))
569 (check-argument-range end (point-min buffer) (point-max buffer)) 573 (check-argument-range end (point-min buffer) (point-max buffer))
570 (save-excursion 574 (save-excursion
571 (save-restriction 575 (save-restriction
572 (if buffer (set-buffer buffer)) 576 (if buffer (set-buffer buffer))
573 (narrow-to-region begin end) 577 (narrow-to-region begin end)
574 (goto-char (point-min)) 578 (goto-char (point-min))