view lisp/mule/china-util.el @ 4967:0d4c9d0f6a8d

rewrite dynarr code -------------------- ChangeLog entries follow: -------------------- src/ChangeLog addition: 2010-02-03 Ben Wing <ben@xemacs.org> * device-x.c (x_get_resource_prefix): * device-x.c (Fx_get_resource): * device-x.c (Fx_get_resource_prefix): * device-x.c (Fx_put_resource): * dialog-msw.c: * dialog-msw.c (handle_question_dialog_box): * dired-msw.c (mswindows_sort_files): * dired-msw.c (mswindows_get_files): * extents.c (extent_fragment_sort_by_priority): * extents.c (Fset_extent_parent): * file-coding.c (coding_reader): * file-coding.c (coding_writer): * file-coding.c (gzip_convert): * frame.c (generate_title_string): * gutter.c (calculate_gutter_size_from_display_lines): * indent.c (vmotion_1): * lread.c (read_bit_vector): * mule-coding.c (iso2022_decode): * rangetab.c: * rangetab.c (Fcopy_range_table): * rangetab.c (Fget_range_table): * rangetab.c (unified_range_table_copy_data): * redisplay-msw.c (mswindows_output_string): * redisplay-output.c (output_display_line): * redisplay-output.c (redisplay_move_cursor): * redisplay-output.c (redisplay_clear_bottom_of_window): * redisplay-tty.c (tty_output_ichar_dynarr): * redisplay-tty.c (set_foreground_to): * redisplay-tty.c (set_background_to): * redisplay-xlike-inc.c (XLIKE_output_string): * redisplay.c (redisplay_window_text_width_string): * redisplay.c (redisplay_text_width_string): * redisplay.c (create_text_block): * redisplay.c (SET_CURRENT_MODE_CHARS_PIXSIZE): * redisplay.c (generate_fstring_runes): * redisplay.c (regenerate_modeline): * redisplay.c (ensure_modeline_generated): * redisplay.c (real_current_modeline_height): * redisplay.c (create_string_text_block): * redisplay.c (regenerate_window): * redisplay.c (REGEN_INC_FIND_START_END): * redisplay.c (point_visible): * redisplay.c (redisplay_window): * redisplay.c (mark_glyph_block_dynarr): * redisplay.c (line_start_cache_start): * redisplay.c (start_with_line_at_pixpos): * redisplay.c (update_line_start_cache): * redisplay.c (glyph_to_pixel_translation): * redisplay.c (pixel_to_glyph_translation): * sysdep.c (qxe_readdir): * text.c (dfc_convert_to_external_format): * text.c (dfc_convert_to_internal_format): * toolbar-common.c (common_output_toolbar_button): * window.c (window_modeline_height): * window.c (Fwindow_last_line_visible_height): * window.c (window_displayed_height): * window.c (window_scroll): * window.c (get_current_pixel_pos): Use Dynarr_begin() in place of Dynarr_atp (foo, 0). * dynarr.c (Dynarr_realloc): * dynarr.c (Dynarr_lisp_realloc): * dynarr.c (Dynarr_resize): * dynarr.c (Dynarr_insert_many): * dynarr.c (Dynarr_delete_many): * dynarr.c (Dynarr_memory_usage): * dynarr.c (stack_like_malloc): * dynarr.c (stack_like_free): * lisp.h: * lisp.h (DECLARE_DYNARR_LISP_IMP): * lisp.h (XD_DYNARR_DESC): * lisp.h (Dynarr_pop): * gutter.c (output_gutter): * redisplay-output.c (sync_rune_structs): * redisplay-output.c (redisplay_output_window): Redo the dynarr code, add greater checks. Rename the `len', `largest' and `max' members to `len_', `largest_' and `max_' to try and catch existing places that might directly modify these values. Make new accessors Dynarr_largest() and Dynarr_max() and make them and existing Dynarr_length() be non-lvalues by adding '+ 0' to them; fix a couple of places in the redisplay code that tried to modify the length directly by setting Dynarr_length(). Use the accessors whenever possible even in the dynarr code itself. The accessors also verify that 0 <= len <= largest <= max. Rename settor function Dynarr_set_size() to Dynarr_set_length() and use it more consistently; also create lower-level Dynarr_set_length_1(). This latter function should be the only function that directly modifies the `len_' member of a Dynarr, and in the process makes sure that the `largest' value is kept correct. Consistently use ERROR_CHECK_STRUCTURES instead of ERROR_CHECK_TYPES for error-checking code. Reintroduce the temporarily disabled verification code on the positions of Dynarr_at(), Dynarr_atp() and Dynarr_atp_past_end(). Also create Dynarr_resize_if() in place of a repeated code fragment. Clean up all the functions that modify Dynarrs to use the new macros and functions and verify the correctness of the Dynarr both before and after the change. Note that there are two kinds of verification -- one for accessing and one for modifying. The difference is that the modify verification additionally checks to make sure that the Dynarr isn't locked. (This is used in redisplay to check for problems with reentrancy.) * lrecord.h: Move XD_DYNARR_DESC to lisp.h, grouping with the dynarr code.
author Ben Wing <ben@xemacs.org>
date Wed, 03 Feb 2010 20:51:18 -0600
parents 2923009caf47
children 308d34e9f07d
line wrap: on
line source

;;; china-util.el --- utilities for Chinese -*- coding: iso-2022-7bit; -*-

;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
;; Licensed to the Free Software Foundation.

;; Keywords: mule, multilingual, Chinese

;; This file is part of XEmacs.

;; XEmacs is free software; you can redistribute it and/or modify it
;; under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 2, or (at your option)
;; any later version.

;; XEmacs is distributed in the hope that it will be useful, but
;; WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;; General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with XEmacs; see the file COPYING.  If not, write to the Free
;; Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
;; 02111-1307, USA.

;;; Synched up with: Emacs 21.1 (language/china-util.el).

;;; Commentary:

;;; Code:

;; Hz/ZW encoding stuffs

;; HZ is an encoding method for Chinese character set GB2312 used
;; widely in Internet.  It is very similar to 7-bit environment of
;; ISO-2022.  The difference is that HZ uses the sequence "~{" and
;; "~}" for designating GB2312 and ASCII respectively, hence, it
;; doesn't uses ESC (0x1B) code.

;; ZW is another encoding method for Chinese character set GB2312.  It
;; encodes Chinese characters line by line by starting each line with
;; the sequence "zW".  It also uses only 7-bit as HZ.

;; ISO-2022 escape sequence to designate GB2312.
(defvar iso2022-gb-designation "\e$A")
;; HZ escape sequence to designate GB2312.
(defvar hz-gb-designnation "~{")
;; ISO-2022 escape sequence to designate ASCII.
(defvar iso2022-ascii-designation "\e(B")
;; HZ escape sequence to designate ASCII.
(defvar hz-ascii-designnation "~}")
;; Regexp of ZW sequence to start GB2312.
(defvar zw-start-gb "^zW")
;; Regexp for start of GB2312 in an encoding mixture of HZ and ZW.
(defvar hz/zw-start-gb
  (concat hz-gb-designnation "\\|" zw-start-gb "\\|[^\0-\177]"))

(defvar decode-hz-line-continuation nil
  "Flag to tell if we should care line continuation convention of Hz.")

(defconst hz-set-msb-table
  (let ((str (make-string 127 0))
	(i 0))
    (while (< i 33)
      (aset str i i)
      (setq i (1+ i)))
    (while (< i 127)
      (aset str i (+ i 128))
      (setq i (1+ i)))
    str))

;;;###autoload
(defun decode-hz-region (beg end)
  "Decode HZ/ZW encoded text in the current region.
Return the length of resulting text."
  (interactive "r")
  (save-excursion
    (save-restriction
      (let (pos ch)
	(narrow-to-region beg end)

	;; We, at first, convert HZ/ZW to `euc-china',
	;; then decode it.

	;; "~\n" -> "\n", "~~" -> "~"
	(goto-char (point-min))
	(while (search-forward "~" nil t)
	  (setq ch (following-char))
	  (if (or (= ch ?\n) (= ch ?~)) (delete-char -1)))

	;; "^zW...\n" -> Chinese GB2312
	;; "~{...~}"  -> Chinese GB2312
	(goto-char (point-min))
	(setq beg nil)
	(while (re-search-forward hz/zw-start-gb nil t)
	  (setq pos (match-beginning 0)
		ch (char-after pos))
	  ;; Record the first position to start conversion.
	  (or beg (setq beg pos))
	  (end-of-line)
	  (setq end (point))
	  (if (>= ch 128)		; 8bit GB2312
	      nil
	    (goto-char pos)
	    (delete-char 2)
	    (setq end (- end 2))
	    (if (= ch ?z)			; ZW -> euc-china
		(progn
		  (translate-region (point) end hz-set-msb-table)
		  (goto-char end))
	      (if (search-forward hz-ascii-designnation
				  (if decode-hz-line-continuation nil end)
				  t)
		  (delete-char -2))
	      (setq end (point))
	      (translate-region pos (point) hz-set-msb-table))))
	(if beg
	    (decode-coding-region beg end 'euc-china)))
      (- (point-max) (point-min)))))

;;;###autoload
(defun decode-hz-buffer ()
  "Decode HZ/ZW encoded text in the current buffer."
  (interactive)
  (decode-hz-region (point-min) (point-max)))

;;;###autoload
(defun encode-hz-region (beg end)
  "Encode the text in the current region to HZ.
Return the length of resulting text."
  (interactive "r")
  (save-excursion
    (save-restriction
      (narrow-to-region beg end)

      ;; "~" -> "~~"
      (goto-char (point-min))
      (while (search-forward "~" nil t)	(insert ?~))

      ;; Chinese GB2312 -> "~{...~}"
      (goto-char (point-min))
      (if (re-search-forward "\\cc" nil t)
	  (let (pos)
	    (goto-char (setq pos (match-beginning 0)))
	    (encode-coding-region pos (point-max) 'iso-2022-7bit)
	    (goto-char pos)
	    (while (search-forward iso2022-gb-designation nil t)
	      (delete-char -3)
	      (insert hz-gb-designnation))
	    (goto-char pos)
	    (while (search-forward iso2022-ascii-designation nil t)
	      (delete-char -3)
	      (insert hz-ascii-designnation))))
      (- (point-max) (point-min)))))

;;;###autoload
(defun encode-hz-buffer ()
  "Encode the text in the current buffer to HZ."
  (interactive)
  (encode-hz-region (point-min) (point-max)))

;;
(provide 'china-util)

;;; china-util.el ends here