diff lisp/utils/rfc822.el @ 0:376386a54a3c r19-14

Import from CVS: tag r19-14
author cvs
date Mon, 13 Aug 2007 08:45:50 +0200
parents
children 0293115a14e9
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lisp/utils/rfc822.el	Mon Aug 13 08:45:50 2007 +0200
@@ -0,0 +1,314 @@
+;;; rfc822.el --- hairy rfc822 parser for mail and news and suchlike
+;; Keywords: mail
+
+;; Copyright (C) 1986, 1987, 1993 Free Software Foundation, Inc.
+;; Author Richard Mlynarik.
+
+;; This file is part of XEmacs.
+
+;; XEmacs is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2, or (at your option)
+;; any later version.
+
+;; XEmacs is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with XEmacs; see the file COPYING.  If not, write to the Free
+;; Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+
+;;; Synched up with: Not synched with FSF but very close.
+
+;; This code should probably be replaced with mail-extr.el once it's a
+;; little more stable.
+
+
+(provide 'rfc822)
+
+;; uses address-start free, throws to address
+(defun rfc822-bad-address (reason)
+  (save-restriction
+    (insert "_^_")
+    (narrow-to-region address-start
+		      (if (re-search-forward "[,;]" nil t)
+			  (max (point-min) (1- (point)))
+			(point-max)))
+    ;; make the error string be suitable for inclusion in (...)
+    (let ((losers '("\\" "(" ")" "\n")))
+      (while losers
+	(goto-char (point-min))
+	(while (search-forward (car losers) nil t)
+	  (backward-char 1)
+	  (insert ?\\)
+	  (forward-char 1))
+	(setq losers (cdr losers))))
+    (goto-char (point-min)) (insert "(Unparsable address -- "
+				    reason
+				    ":\n\t  \"")
+    (goto-char (point-max)) (insert "\")"))
+  (rfc822-nuke-whitespace)
+  (throw 'address (buffer-substring address-start (point))))
+
+(defun rfc822-nuke-whitespace (&optional leave-space)
+  (let (ch)
+    (while (cond ((eobp)
+		  nil)
+		 ((= (setq ch (following-char)) ?\()
+		  (forward-char 1)
+		  (while (if (eobp)
+			     (rfc822-bad-address "Unbalanced comment (...)")
+			   (/= (setq ch (following-char)) ?\)))
+		    (cond ((looking-at "[^()\\]+")
+			   (replace-match ""))
+			  ((= ch ?\()
+			   (rfc822-nuke-whitespace))
+			  ((< (point) (1- (point-max)))
+			   (delete-char 2))
+			  (t
+			   (rfc822-bad-address "orphaned backslash"))))
+		  ;; delete remaining "()"
+		  (forward-char -1)
+		  (delete-char 2)
+		  t)
+		 ((memq ch '(?\ ?\t ?\n))
+		  (delete-region (point)
+				 (progn (skip-chars-forward " \t\n") (point)))
+		  t)
+		 (t
+		  nil)))
+    (or (not leave-space)
+	(eobp)
+	(bobp)
+	(= (preceding-char) ?\ )
+	(insert ?\ ))))
+
+(defun rfc822-looking-at (regex &optional leave-space)
+  (if (cond ((stringp regex)
+	     (if (looking-at regex)
+		 (progn (goto-char (match-end 0))
+			t)))
+	    (t
+	     (if (and (not (eobp))
+		      (= (following-char) regex))
+		 (progn (forward-char 1)
+			t))))
+      (let ((tem (match-data)))
+	(rfc822-nuke-whitespace leave-space)
+	(store-match-data tem)
+	t)))
+
+(defun rfc822-snarf-word ()
+  ;; word is atom | quoted-string
+  (cond ((= (following-char) ?\")
+	 ;; quoted-string
+	 (or (rfc822-looking-at "\"\\([^\"\\\n]\\|\\\\.\\|\\\\\n\\)*\"")
+	     (rfc822-bad-address "Unterminated quoted string")))
+	((rfc822-looking-at "[^][\000-\037\177-\377 ()<>@,;:\\\".]+")
+	 ;; atom
+	 )
+	(t
+	 (rfc822-bad-address "Rubbish in address"))))
+
+(defun rfc822-snarf-words ()
+  (rfc822-snarf-word)
+  (while (rfc822-looking-at ?.)
+    (rfc822-snarf-word)))
+
+(defun rfc822-snarf-subdomain ()
+  ;; sub-domain is domain-ref | domain-literal
+  (cond ((= (following-char) ?\[)
+	 ;; domain-ref
+	 (or (rfc822-looking-at "\\[\\([^][\\\n]\\|\\\\.\\|\\\\\n\\)*\\]")
+	     (rfc822-bad-address "Unterminated domain literal [...]")))
+	((rfc822-looking-at "[^][\000-\037\177-\377 ()<>@,;:\\\".]+")
+	 ;; domain-literal = atom
+	 )
+	(t
+	 (rfc822-bad-address "Rubbish in host/domain specification"))))
+
+(defun rfc822-snarf-domain ()
+  (rfc822-snarf-subdomain)
+  (while (rfc822-looking-at ?.)
+    (rfc822-snarf-subdomain)))
+
+(defun rfc822-snarf-frob-list (name separator terminator snarfer
+				    &optional return)
+  (let ((first t)
+	(list ())
+	tem)
+    (while (cond ((eobp)
+		  (rfc822-bad-address
+		    (format "End of addresses in middle of %s" name)))
+		 ((rfc822-looking-at terminator)
+		  nil)
+		 ((rfc822-looking-at separator)
+		  ;; multiple separators are allowed and do nothing.
+		  (while (rfc822-looking-at separator))
+		  t)
+		 (first
+		  t)
+		 (t
+		  (rfc822-bad-address
+		    (format "Gubbish in middle of %s" name))))
+      (setq tem (funcall snarfer)
+	    first nil)
+      (and return tem
+	   (setq list (if (listp tem)
+			  (nconc (reverse tem) list)
+			  (cons tem list)))))
+    (nreverse list)))
+
+;; return either an address (a string) or a list of addresses
+(defun rfc822-addresses-1 (&optional allow-groups)
+  ;; Looking for an rfc822 `address'
+  ;; Either a group (1*word ":" [#mailbox] ";")
+  ;; or a mailbox (addr-spec | 1*word route-addr)
+  ;;  addr-spec is (local-part "@" domain)
+  ;;  route-addr is ("<" [1#("@" domain) ":"] addr-spec ">")
+  ;;  local-part is (word *("." word))
+  ;;  word is (atom | quoted-string)
+  ;;  quoted-string is ("\([^\"\\n]\|\\.\|\\\n\)")
+  ;;  atom is [^\000-\037\177 ()<>@,;:\".[]]+
+  ;;  domain is sub-domain *("." sub-domain)
+  ;;  sub-domain is domain-ref | domain-literal
+  ;;  domain-literal is  "[" *(dtext | quoted-pair) "]"
+  ;;  dtext is "[^][\\n"
+  ;;  domain-ref is atom
+  (let ((address-start (point))
+	(n 0))
+    (catch 'address
+      ;; optimize common cases:
+      ;;  foo
+      ;;  foo.bar@bar.zap
+      ;; followed by "\\'\\|,\\|([^()\\]*)\\'"
+      ;; other common cases are:
+      ;;  foo bar <foo.bar@baz.zap>
+      ;;  "foo bar" <foo.bar@baz.zap>
+      ;;  those aren't hacked yet.
+      (if (and (rfc822-looking-at "[^][\000-\037\177-\377 ()<>@,;:\\\"]+\\(\\|@[^][\000-\037\177-\377 ()<>@,;:\\\"]+\\)" t)
+	       (progn (or (eobp)
+			  (rfc822-looking-at ?,))))
+	  (progn
+	    ;; rfc822-looking-at may have inserted a space
+	    (or (bobp) (/= (preceding-char) ?\ ) (delete-char -1))
+	    ;; relying on the fact that rfc822-looking-at <char>
+	    ;;  doesn't mung match-data
+	    (throw 'address (buffer-substring address-start (match-end 0)))))
+      (goto-char address-start)
+      (while t
+	(cond ((and (= n 1) (rfc822-looking-at ?@))
+	       ;; local-part@domain
+	       (rfc822-snarf-domain)
+	       (throw 'address
+		 (buffer-substring address-start (point))))
+	      ((rfc822-looking-at ?:)
+	       (cond ((not allow-groups)
+		      (rfc822-bad-address "A group name may not appear here"))
+		     ((= n 0)
+		      (rfc822-bad-address "No name for :...; group")))
+	       ;; group
+	       (throw 'address
+		 ;; return a list of addresses
+		 (rfc822-snarf-frob-list ":...; group" ?\, ?\;
+					 'rfc822-addresses-1 t)))
+	      ((rfc822-looking-at ?<)
+	       (let ((start (point))
+		     (strip t))
+		 (cond ((rfc822-looking-at ?>)
+			;; empty path
+			())
+		       ((and (not (eobp)) (= (following-char) ?\@))
+			;; <@foo.bar,@baz:quux@abcd.efg>
+			(rfc822-snarf-frob-list "<...> address" ?\, ?\:
+			  (function (lambda ()
+				      (if (rfc822-looking-at ?\@)
+					  (rfc822-snarf-domain)
+					(rfc822-bad-address
+					  "Gubbish in route-addr")))))
+			(rfc822-snarf-words)
+			(or (rfc822-looking-at ?@)
+			    (rfc822-bad-address "Malformed <..@..> address"))
+			(rfc822-snarf-domain)
+			(setq strip nil))
+		       ((progn (rfc822-snarf-words) (rfc822-looking-at ?@))
+			; allow <foo> (losing unix seems to do this)
+			(rfc822-snarf-domain)))
+		 (let ((end (point)))
+		   (if (rfc822-looking-at ?\>)
+		       (throw 'address
+			 (buffer-substring (if strip start (1- start))
+					   (if strip end (1+ end))))
+		     (rfc822-bad-address "Unterminated <...> address")))))
+	      ((looking-at "[^][\000-\037\177-\377 ()<>@,;:\\.]")
+	       ;; this allows "." to be part of the words preceding
+	       ;; an addr-spec, since many broken mailers output
+	       ;; "Hern K. Herklemeyer III
+	       ;;   <yank@megadeath.dod.gods-own-country>"
+	       (or (= n 0)
+		   (= (preceding-char) ?\ )
+		   (insert ?\ ))
+	       (rfc822-snarf-words)
+	       (setq n (1+ n)))
+	      ((= n 0)
+	       (throw 'address nil))
+	      ((= n 1) ; allow "foo" (losing unix seems to do this)
+	       (throw 'address
+		 (buffer-substring address-start (point))))
+	      ((or (eobp) (looking-at ","))
+	       (rfc822-bad-address "Missing comma or route-spec"))
+	      (t
+	       (rfc822-bad-address "Strange character or missing comma")))))))
+
+(defun rfc822-addresses (header-text)
+  (if (string-match "\\`[ \t]*\\([^][\000-\037\177-\377 ()<>@,;:\\\".]+\\)[ \t]*\\'"
+                    header-text)
+      ;; Make very simple case moderately fast.
+      (list (substring header-text (match-beginning 1) (match-end 1)))
+    (let ((buf (generate-new-buffer " rfc822")))
+      (unwind-protect
+	(save-excursion
+	  (set-buffer buf)
+	  (make-local-variable 'case-fold-search)
+	  (setq case-fold-search nil)	;For speed(?)
+	  (insert header-text)
+	  ;; unfold continuation lines
+	  (goto-char (point-min))
+
+	  (while (re-search-forward "\\([^\\]\\(\\\\\\\\\\)*\\)\n[ \t]" nil t)
+	    (replace-match "\\1 " t))
+
+	  (goto-char (point-min))
+	  (rfc822-nuke-whitespace)
+	  (let ((list ())
+		tem
+		(p -1)
+		address-start); this is for rfc822-bad-address
+	    (while (not (eobp))
+	      (setq address-start (point))
+	      (setq tem
+		    (catch 'address ; this is for rfc822-bad-address
+		      (cond ((rfc822-looking-at ?\,)
+			     nil)
+			    ((looking-at "[][\000-\037\177-\377@;:\\.>]")
+			     (forward-char)
+			     (rfc822-bad-address
+			       (format "Strange character \\%c found"
+				       (preceding-char))))
+			    (t
+			     (rfc822-addresses-1 t)))))
+	      (cond ((null tem))
+		    ((stringp tem)
+		     (setq list (cons tem list)))
+		    (t
+		     (setq list (nconc (nreverse tem) list))))
+	      (if (= (point) p)
+		  ;; Punt on losing, misformatted address / infinite loop.
+		  ;; For example: "lcm36651@uxa.cso.uiuc.edu (Hi!  ;) )"
+		  (goto-char (point-max)))
+	      (setq p (point))
+	      )
+	    (nreverse list)))
+      (and buf (kill-buffer buf))))))