Mercurial > hg > xemacs-beta
comparison lisp/packages/man.el @ 76:c0c698873ce1 r20-0b33
Import from CVS: tag r20-0b33
author | cvs |
---|---|
date | Mon, 13 Aug 2007 09:05:10 +0200 |
parents | 54cc21c15cbb |
children | 1ce6082ce73f |
comparison
equal
deleted
inserted
replaced
75:a4e0195b387b | 76:c0c698873ce1 |
---|---|
167 | 167 |
168 (defvar Manual-use-subdirectory-list (eq system-type 'irix) "\ | 168 (defvar Manual-use-subdirectory-list (eq system-type 'irix) "\ |
169 This makes manual-entry work correctly on SGI machines but it | 169 This makes manual-entry work correctly on SGI machines but it |
170 imposes a large startup cost which is why it is not simply on by | 170 imposes a large startup cost which is why it is not simply on by |
171 default on all systems.") | 171 default on all systems.") |
172 | |
173 (defvar Manual-use-rosetta-man (not (null (locate-file "rman" exec-path))) "\ | |
174 If non-nil, use RosettaMan (rman) to filter man pages. | |
175 This makes man-page cleanup virtually instantaneous, instead of | |
176 potentially taking a long time. | |
177 | |
178 Here is information on RosettaMan, from Neal.Becker@comsat.com (Neal Becker): | |
179 | |
180 RosettaMan is a filter for UNIX manual pages. It takes as input man | |
181 pages formatted for a variety of UNIX flavors (not [tn]roff source) | |
182 and produces as output a variety of file formats. Currently | |
183 RosettaMan accepts man pages as formatted by the following flavors of | |
184 UNIX: Hewlett-Packard HP-UX, AT&T System V, SunOS, Sun Solaris, OSF/1, | |
185 DEC Ultrix, SGI IRIX, Linux, SCO; and produces output for the following | |
186 formats: printable ASCII only (stripping page headers and footers), | |
187 section and subsection headers only, TkMan, [tn]roff, Ensemble, RTF, | |
188 SGML (soon--I finally found a DTD), HTML, MIME, LaTeX, LaTeX 2e, Perl 5's pod. | |
189 | |
190 RosettaMan improves on other man page filters in several ways: (1) its | |
191 analysis recognizes the structural pieces of man pages, enabling high | |
192 quality output, (2) its modular structure permits easy augmentation of | |
193 output formats, (3) it accepts man pages formatted with the varient | |
194 macros of many different flavors of UNIX, and (4) it doesn't require | |
195 modification or cooperation with any other program. | |
196 | |
197 RosettaMan is a rewrite of TkMan's man page filter, called bs2tk. (If | |
198 you haven't heard about TkMan, a hypertext man page browser, you | |
199 should grab it via anonymous ftp from ftp.cs.berkeley.edu: | |
200 /ucb/people/phelps/tkman.tar.Z.) Whereas bs2tk generated output only for | |
201 TkMan, RosettaMan generalizes the process so that the analysis can be | |
202 leveraged to new output formats. A single analysis engine recognizes | |
203 section heads, subsection heads, body text, lists, references to other | |
204 man pages, boldface, italics, bold italics, special characters (like | |
205 bullets), tables (to a degree) and strips out page headers and | |
206 footers. The engine sends signals to the selected output functions so | |
207 that an enhancement in the engine improves the quality of output of | |
208 all of them. Output format functions are easy to add, and thus far | |
209 average about about 75 lines of C code each. | |
210 | |
211 | |
212 | |
213 *** NOTES ON CURRENT VERSION *** | |
214 | |
215 Help! I'm looking for people to help with the following projects. | |
216 \(1) Better RTF output format. The current one works, but could be | |
217 made better. (2) Roff macros that produce text that is easily | |
218 parsable. RosettaMan handles a great variety, but some things, like | |
219 H-P's tables, are intractable. If you write an output format or | |
220 otherwise improve RosettaMan, please send in your code so that I may | |
221 share the wealth in future releases. | |
222 | |
223 This version can try to identify tables (turn this on with the -T | |
224 switch) by looking for lines with a large amount of interword spacing, | |
225 reasoning that this is space between columns of a table. This | |
226 heuristic doesn't always work and sometimes misidentifies ordinary | |
227 text as tables. In general I think it is impossible to perfectly | |
228 identify tables from nroff formatted text. However, I do think the | |
229 heuristics can be tuned, so if you have a collection of manual pages | |
230 with unrecognized tables, send me the lot, in formatted form (i.e., | |
231 after formatting with nroff -man), and uuencode them to preserve the | |
232 control characters. Better, if you can think of heuristics that | |
233 distinguish tables from ordinary text, I'd like to hear them. | |
234 | |
235 | |
236 Notes for HTML consumers: This filter does real (heuristic) | |
237 parsing--no <PRE>! Man page references are turned into hypertext links.") | |
172 | 238 |
173 (make-face 'man-italic) | 239 (make-face 'man-italic) |
174 (or (face-differs-from-default-p 'man-italic) | 240 (or (face-differs-from-default-p 'man-italic) |
175 (copy-face 'italic 'man-italic)) | 241 (copy-face 'italic 'man-italic)) |
176 ;; XEmacs (from Darrell Kindred): underlining is annoying due to | 242 ;; XEmacs (from Darrell Kindred): underlining is annoying due to |
778 (list 'delete-region '(point) (list '+ '(point) n))) | 844 (list 'delete-region '(point) (list '+ '(point) n))) |
779 | 845 |
780 ;; Hint: BS stands form more things than "back space" | 846 ;; Hint: BS stands form more things than "back space" |
781 (defun Manual-nuke-nroff-bs (&optional apropos-mode) | 847 (defun Manual-nuke-nroff-bs (&optional apropos-mode) |
782 (interactive "*") | 848 (interactive "*") |
783 ;; | 849 (if Manual-use-rosetta-man |
784 ;; turn underlining into italics | 850 (call-process-region (point-min) (point-max) "rman" t t nil) |
785 ;; | 851 ;; |
786 (goto-char (point-min)) | 852 ;; turn underlining into italics |
787 (while (search-forward "_\b" nil t) | 853 ;; |
788 ;; searching for underscore-backspace and then comparing the following | 854 (goto-char (point-min)) |
789 ;; chars until the sequence ends turns out to be much faster than searching | 855 (while (search-forward "_\b" nil t) |
790 ;; for a regexp which matches the whole sequence. | 856 ;; searching for underscore-backspace and then comparing the following |
791 (let ((s (match-beginning 0))) | 857 ;; chars until the sequence ends turns out to be much faster than searching |
792 (goto-char s) | 858 ;; for a regexp which matches the whole sequence. |
793 (while (and (= (following-char) ?_) | 859 (let ((s (match-beginning 0))) |
794 (= (char-after (1+ (point))) ?\b)) | 860 (goto-char s) |
795 (Manual-delete-char 2) | 861 (while (and (= (following-char) ?_) |
796 (forward-char 1)) | 862 (= (char-after (1+ (point))) ?\b)) |
797 (set-extent-face (make-extent s (point)) 'man-italic))) | 863 (Manual-delete-char 2) |
798 ;; | 864 (forward-char 1)) |
799 ;; turn overstriking into bold | 865 (set-extent-face (make-extent s (point)) 'man-italic))) |
800 ;; | 866 ;; |
801 (goto-char (point-min)) | 867 ;; turn overstriking into bold |
802 (while (re-search-forward "\\([^\n]\\)\\(\b\\1\\)" nil t) | 868 ;; |
803 ;; Surprisingly, searching for the above regexp is faster than searching | 869 (goto-char (point-min)) |
804 ;; for a backspace and then comparing the preceding and following chars, | 870 (while (re-search-forward "\\([^\n]\\)\\(\b\\1\\)" nil t) |
805 ;; I presume because there are many false matches, meaning more funcalls | 871 ;; Surprisingly, searching for the above regexp is faster than searching |
806 ;; to re-search-forward. | 872 ;; for a backspace and then comparing the preceding and following chars, |
807 (let ((s (match-beginning 0))) | 873 ;; I presume because there are many false matches, meaning more funcalls |
808 (goto-char s) | 874 ;; to re-search-forward. |
809 ;; Some systems (SGI) overstrike multiple times, eg, "M\bM\bM\bM". | 875 (let ((s (match-beginning 0))) |
810 (while (looking-at "\\([^\n]\\)\\(\b\\1\\)+") | 876 (goto-char s) |
811 (delete-region (+ (point) 1) (match-end 0)) | 877 ;; Some systems (SGI) overstrike multiple times, eg, "M\bM\bM\bM". |
812 (forward-char 1)) | 878 (while (looking-at "\\([^\n]\\)\\(\b\\1\\)+") |
813 (set-extent-face (make-extent s (point)) 'man-bold))) | 879 (delete-region (+ (point) 1) (match-end 0)) |
814 ;; | 880 (forward-char 1)) |
815 ;; hack bullets: o^H+ --> + | 881 (set-extent-face (make-extent s (point)) 'man-bold))) |
816 (goto-char (point-min)) | 882 ;; |
817 (while (search-forward "\b" nil t) | 883 ;; hack bullets: o^H+ --> + |
818 (Manual-delete-char -2)) | 884 (goto-char (point-min)) |
819 | 885 (while (search-forward "\b" nil t) |
820 (if (> (buffer-size) 100) ; minor kludge | 886 (Manual-delete-char -2)) |
821 (Manual-nuke-nroff-bs-footers)) | 887 |
888 (if (> (buffer-size) 100) ; minor kludge | |
889 (Manual-nuke-nroff-bs-footers)) | |
890 ) ;; not Manual-use-rosetta-man | |
822 ;; | 891 ;; |
823 ;; turn subsection header lines into bold | 892 ;; turn subsection header lines into bold |
824 ;; | 893 ;; |
825 (goto-char (point-min)) | 894 (goto-char (point-min)) |
826 (if apropos-mode | 895 (if apropos-mode |
848 (set-extent-face (make-extent (match-beginning 1) (match-end 1)) | 917 (set-extent-face (make-extent (match-beginning 1) (match-end 1)) |
849 'man-heading) | 918 'man-heading) |
850 (forward-line 1)) | 919 (forward-line 1)) |
851 ) | 920 ) |
852 | 921 |
853 ;; Zap ESC7, ESC8, and ESC9 | 922 (if Manual-use-rosetta-man |
854 ;; This is for Sun man pages like "man 1 csh" | 923 nil |
855 (goto-char (point-min)) | 924 ;; Zap ESC7, ESC8, and ESC9 |
856 (while (re-search-forward "\e[789]" nil t) | 925 ;; This is for Sun man pages like "man 1 csh" |
857 (replace-match "")) | 926 (goto-char (point-min)) |
858 | 927 (while (re-search-forward "\e[789]" nil t) |
928 (replace-match ""))) | |
929 | |
859 ;; Nuke blanks lines at start. | 930 ;; Nuke blanks lines at start. |
860 ;; (goto-char (point-min)) | 931 ;; (goto-char (point-min)) |
861 ;; (skip-chars-forward "\n") | 932 ;; (skip-chars-forward "\n") |
862 ;; (delete-region (point-min) (point)) | 933 ;; (delete-region (point-min) (point)) |
863 | 934 |