comparison lisp/packages/man.el @ 76:c0c698873ce1 r20-0b33

Import from CVS: tag r20-0b33
author cvs
date Mon, 13 Aug 2007 09:05:10 +0200
parents 54cc21c15cbb
children 1ce6082ce73f
comparison
equal deleted inserted replaced
75:a4e0195b387b 76:c0c698873ce1
167 167
168 (defvar Manual-use-subdirectory-list (eq system-type 'irix) "\ 168 (defvar Manual-use-subdirectory-list (eq system-type 'irix) "\
169 This makes manual-entry work correctly on SGI machines but it 169 This makes manual-entry work correctly on SGI machines but it
170 imposes a large startup cost which is why it is not simply on by 170 imposes a large startup cost which is why it is not simply on by
171 default on all systems.") 171 default on all systems.")
172
173 (defvar Manual-use-rosetta-man (not (null (locate-file "rman" exec-path))) "\
174 If non-nil, use RosettaMan (rman) to filter man pages.
175 This makes man-page cleanup virtually instantaneous, instead of
176 potentially taking a long time.
177
178 Here is information on RosettaMan, from Neal.Becker@comsat.com (Neal Becker):
179
180 RosettaMan is a filter for UNIX manual pages. It takes as input man
181 pages formatted for a variety of UNIX flavors (not [tn]roff source)
182 and produces as output a variety of file formats. Currently
183 RosettaMan accepts man pages as formatted by the following flavors of
184 UNIX: Hewlett-Packard HP-UX, AT&T System V, SunOS, Sun Solaris, OSF/1,
185 DEC Ultrix, SGI IRIX, Linux, SCO; and produces output for the following
186 formats: printable ASCII only (stripping page headers and footers),
187 section and subsection headers only, TkMan, [tn]roff, Ensemble, RTF,
188 SGML (soon--I finally found a DTD), HTML, MIME, LaTeX, LaTeX 2e, Perl 5's pod.
189
190 RosettaMan improves on other man page filters in several ways: (1) its
191 analysis recognizes the structural pieces of man pages, enabling high
192 quality output, (2) its modular structure permits easy augmentation of
193 output formats, (3) it accepts man pages formatted with the varient
194 macros of many different flavors of UNIX, and (4) it doesn't require
195 modification or cooperation with any other program.
196
197 RosettaMan is a rewrite of TkMan's man page filter, called bs2tk. (If
198 you haven't heard about TkMan, a hypertext man page browser, you
199 should grab it via anonymous ftp from ftp.cs.berkeley.edu:
200 /ucb/people/phelps/tkman.tar.Z.) Whereas bs2tk generated output only for
201 TkMan, RosettaMan generalizes the process so that the analysis can be
202 leveraged to new output formats. A single analysis engine recognizes
203 section heads, subsection heads, body text, lists, references to other
204 man pages, boldface, italics, bold italics, special characters (like
205 bullets), tables (to a degree) and strips out page headers and
206 footers. The engine sends signals to the selected output functions so
207 that an enhancement in the engine improves the quality of output of
208 all of them. Output format functions are easy to add, and thus far
209 average about about 75 lines of C code each.
210
211
212
213 *** NOTES ON CURRENT VERSION ***
214
215 Help! I'm looking for people to help with the following projects.
216 \(1) Better RTF output format. The current one works, but could be
217 made better. (2) Roff macros that produce text that is easily
218 parsable. RosettaMan handles a great variety, but some things, like
219 H-P's tables, are intractable. If you write an output format or
220 otherwise improve RosettaMan, please send in your code so that I may
221 share the wealth in future releases.
222
223 This version can try to identify tables (turn this on with the -T
224 switch) by looking for lines with a large amount of interword spacing,
225 reasoning that this is space between columns of a table. This
226 heuristic doesn't always work and sometimes misidentifies ordinary
227 text as tables. In general I think it is impossible to perfectly
228 identify tables from nroff formatted text. However, I do think the
229 heuristics can be tuned, so if you have a collection of manual pages
230 with unrecognized tables, send me the lot, in formatted form (i.e.,
231 after formatting with nroff -man), and uuencode them to preserve the
232 control characters. Better, if you can think of heuristics that
233 distinguish tables from ordinary text, I'd like to hear them.
234
235
236 Notes for HTML consumers: This filter does real (heuristic)
237 parsing--no <PRE>! Man page references are turned into hypertext links.")
172 238
173 (make-face 'man-italic) 239 (make-face 'man-italic)
174 (or (face-differs-from-default-p 'man-italic) 240 (or (face-differs-from-default-p 'man-italic)
175 (copy-face 'italic 'man-italic)) 241 (copy-face 'italic 'man-italic))
176 ;; XEmacs (from Darrell Kindred): underlining is annoying due to 242 ;; XEmacs (from Darrell Kindred): underlining is annoying due to
778 (list 'delete-region '(point) (list '+ '(point) n))) 844 (list 'delete-region '(point) (list '+ '(point) n)))
779 845
780 ;; Hint: BS stands form more things than "back space" 846 ;; Hint: BS stands form more things than "back space"
781 (defun Manual-nuke-nroff-bs (&optional apropos-mode) 847 (defun Manual-nuke-nroff-bs (&optional apropos-mode)
782 (interactive "*") 848 (interactive "*")
783 ;; 849 (if Manual-use-rosetta-man
784 ;; turn underlining into italics 850 (call-process-region (point-min) (point-max) "rman" t t nil)
785 ;; 851 ;;
786 (goto-char (point-min)) 852 ;; turn underlining into italics
787 (while (search-forward "_\b" nil t) 853 ;;
788 ;; searching for underscore-backspace and then comparing the following 854 (goto-char (point-min))
789 ;; chars until the sequence ends turns out to be much faster than searching 855 (while (search-forward "_\b" nil t)
790 ;; for a regexp which matches the whole sequence. 856 ;; searching for underscore-backspace and then comparing the following
791 (let ((s (match-beginning 0))) 857 ;; chars until the sequence ends turns out to be much faster than searching
792 (goto-char s) 858 ;; for a regexp which matches the whole sequence.
793 (while (and (= (following-char) ?_) 859 (let ((s (match-beginning 0)))
794 (= (char-after (1+ (point))) ?\b)) 860 (goto-char s)
795 (Manual-delete-char 2) 861 (while (and (= (following-char) ?_)
796 (forward-char 1)) 862 (= (char-after (1+ (point))) ?\b))
797 (set-extent-face (make-extent s (point)) 'man-italic))) 863 (Manual-delete-char 2)
798 ;; 864 (forward-char 1))
799 ;; turn overstriking into bold 865 (set-extent-face (make-extent s (point)) 'man-italic)))
800 ;; 866 ;;
801 (goto-char (point-min)) 867 ;; turn overstriking into bold
802 (while (re-search-forward "\\([^\n]\\)\\(\b\\1\\)" nil t) 868 ;;
803 ;; Surprisingly, searching for the above regexp is faster than searching 869 (goto-char (point-min))
804 ;; for a backspace and then comparing the preceding and following chars, 870 (while (re-search-forward "\\([^\n]\\)\\(\b\\1\\)" nil t)
805 ;; I presume because there are many false matches, meaning more funcalls 871 ;; Surprisingly, searching for the above regexp is faster than searching
806 ;; to re-search-forward. 872 ;; for a backspace and then comparing the preceding and following chars,
807 (let ((s (match-beginning 0))) 873 ;; I presume because there are many false matches, meaning more funcalls
808 (goto-char s) 874 ;; to re-search-forward.
809 ;; Some systems (SGI) overstrike multiple times, eg, "M\bM\bM\bM". 875 (let ((s (match-beginning 0)))
810 (while (looking-at "\\([^\n]\\)\\(\b\\1\\)+") 876 (goto-char s)
811 (delete-region (+ (point) 1) (match-end 0)) 877 ;; Some systems (SGI) overstrike multiple times, eg, "M\bM\bM\bM".
812 (forward-char 1)) 878 (while (looking-at "\\([^\n]\\)\\(\b\\1\\)+")
813 (set-extent-face (make-extent s (point)) 'man-bold))) 879 (delete-region (+ (point) 1) (match-end 0))
814 ;; 880 (forward-char 1))
815 ;; hack bullets: o^H+ --> + 881 (set-extent-face (make-extent s (point)) 'man-bold)))
816 (goto-char (point-min)) 882 ;;
817 (while (search-forward "\b" nil t) 883 ;; hack bullets: o^H+ --> +
818 (Manual-delete-char -2)) 884 (goto-char (point-min))
819 885 (while (search-forward "\b" nil t)
820 (if (> (buffer-size) 100) ; minor kludge 886 (Manual-delete-char -2))
821 (Manual-nuke-nroff-bs-footers)) 887
888 (if (> (buffer-size) 100) ; minor kludge
889 (Manual-nuke-nroff-bs-footers))
890 ) ;; not Manual-use-rosetta-man
822 ;; 891 ;;
823 ;; turn subsection header lines into bold 892 ;; turn subsection header lines into bold
824 ;; 893 ;;
825 (goto-char (point-min)) 894 (goto-char (point-min))
826 (if apropos-mode 895 (if apropos-mode
848 (set-extent-face (make-extent (match-beginning 1) (match-end 1)) 917 (set-extent-face (make-extent (match-beginning 1) (match-end 1))
849 'man-heading) 918 'man-heading)
850 (forward-line 1)) 919 (forward-line 1))
851 ) 920 )
852 921
853 ;; Zap ESC7, ESC8, and ESC9 922 (if Manual-use-rosetta-man
854 ;; This is for Sun man pages like "man 1 csh" 923 nil
855 (goto-char (point-min)) 924 ;; Zap ESC7, ESC8, and ESC9
856 (while (re-search-forward "\e[789]" nil t) 925 ;; This is for Sun man pages like "man 1 csh"
857 (replace-match "")) 926 (goto-char (point-min))
858 927 (while (re-search-forward "\e[789]" nil t)
928 (replace-match "")))
929
859 ;; Nuke blanks lines at start. 930 ;; Nuke blanks lines at start.
860 ;; (goto-char (point-min)) 931 ;; (goto-char (point-min))
861 ;; (skip-chars-forward "\n") 932 ;; (skip-chars-forward "\n")
862 ;; (delete-region (point-min) (point)) 933 ;; (delete-region (point-min) (point))
863 934