0
|
1 ;;; rfc822.el --- hairy rfc822 parser for mail and news and suchlike
|
|
2 ;; Keywords: mail
|
|
3
|
|
4 ;; Copyright (C) 1986, 1987, 1993 Free Software Foundation, Inc.
|
|
5 ;; Author Richard Mlynarik.
|
|
6
|
|
7 ;; This file is part of XEmacs.
|
|
8
|
|
9 ;; XEmacs is free software; you can redistribute it and/or modify it
|
|
10 ;; under the terms of the GNU General Public License as published by
|
|
11 ;; the Free Software Foundation; either version 2, or (at your option)
|
|
12 ;; any later version.
|
|
13
|
|
14 ;; XEmacs is distributed in the hope that it will be useful, but
|
|
15 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
16 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
17 ;; General Public License for more details.
|
|
18
|
|
19 ;; You should have received a copy of the GNU General Public License
|
16
|
20 ;; along with XEmacs; see the file COPYING. If not, write to the
|
|
21 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
22 ;; Boston, MA 02111-1307, USA.
|
0
|
23
|
|
24 ;;; Synched up with: Not synched with FSF but very close.
|
|
25
|
|
26 ;; This code should probably be replaced with mail-extr.el once it's a
|
|
27 ;; little more stable.
|
|
28
|
|
29
|
|
30 (provide 'rfc822)
|
|
31
|
|
32 ;; uses address-start free, throws to address
|
|
33 (defun rfc822-bad-address (reason)
|
|
34 (save-restriction
|
|
35 (insert "_^_")
|
|
36 (narrow-to-region address-start
|
|
37 (if (re-search-forward "[,;]" nil t)
|
|
38 (max (point-min) (1- (point)))
|
|
39 (point-max)))
|
|
40 ;; make the error string be suitable for inclusion in (...)
|
|
41 (let ((losers '("\\" "(" ")" "\n")))
|
|
42 (while losers
|
|
43 (goto-char (point-min))
|
|
44 (while (search-forward (car losers) nil t)
|
|
45 (backward-char 1)
|
|
46 (insert ?\\)
|
|
47 (forward-char 1))
|
|
48 (setq losers (cdr losers))))
|
|
49 (goto-char (point-min)) (insert "(Unparsable address -- "
|
|
50 reason
|
|
51 ":\n\t \"")
|
|
52 (goto-char (point-max)) (insert "\")"))
|
|
53 (rfc822-nuke-whitespace)
|
|
54 (throw 'address (buffer-substring address-start (point))))
|
|
55
|
|
56 (defun rfc822-nuke-whitespace (&optional leave-space)
|
|
57 (let (ch)
|
|
58 (while (cond ((eobp)
|
|
59 nil)
|
|
60 ((= (setq ch (following-char)) ?\()
|
|
61 (forward-char 1)
|
|
62 (while (if (eobp)
|
|
63 (rfc822-bad-address "Unbalanced comment (...)")
|
|
64 (/= (setq ch (following-char)) ?\)))
|
|
65 (cond ((looking-at "[^()\\]+")
|
|
66 (replace-match ""))
|
|
67 ((= ch ?\()
|
|
68 (rfc822-nuke-whitespace))
|
|
69 ((< (point) (1- (point-max)))
|
|
70 (delete-char 2))
|
|
71 (t
|
|
72 (rfc822-bad-address "orphaned backslash"))))
|
|
73 ;; delete remaining "()"
|
|
74 (forward-char -1)
|
|
75 (delete-char 2)
|
|
76 t)
|
|
77 ((memq ch '(?\ ?\t ?\n))
|
|
78 (delete-region (point)
|
|
79 (progn (skip-chars-forward " \t\n") (point)))
|
|
80 t)
|
|
81 (t
|
|
82 nil)))
|
|
83 (or (not leave-space)
|
|
84 (eobp)
|
|
85 (bobp)
|
|
86 (= (preceding-char) ?\ )
|
|
87 (insert ?\ ))))
|
|
88
|
|
89 (defun rfc822-looking-at (regex &optional leave-space)
|
|
90 (if (cond ((stringp regex)
|
|
91 (if (looking-at regex)
|
|
92 (progn (goto-char (match-end 0))
|
|
93 t)))
|
|
94 (t
|
|
95 (if (and (not (eobp))
|
|
96 (= (following-char) regex))
|
|
97 (progn (forward-char 1)
|
|
98 t))))
|
|
99 (let ((tem (match-data)))
|
|
100 (rfc822-nuke-whitespace leave-space)
|
|
101 (store-match-data tem)
|
|
102 t)))
|
|
103
|
|
104 (defun rfc822-snarf-word ()
|
|
105 ;; word is atom | quoted-string
|
|
106 (cond ((= (following-char) ?\")
|
|
107 ;; quoted-string
|
|
108 (or (rfc822-looking-at "\"\\([^\"\\\n]\\|\\\\.\\|\\\\\n\\)*\"")
|
|
109 (rfc822-bad-address "Unterminated quoted string")))
|
|
110 ((rfc822-looking-at "[^][\000-\037\177-\377 ()<>@,;:\\\".]+")
|
|
111 ;; atom
|
|
112 )
|
|
113 (t
|
|
114 (rfc822-bad-address "Rubbish in address"))))
|
|
115
|
|
116 (defun rfc822-snarf-words ()
|
|
117 (rfc822-snarf-word)
|
|
118 (while (rfc822-looking-at ?.)
|
|
119 (rfc822-snarf-word)))
|
|
120
|
|
121 (defun rfc822-snarf-subdomain ()
|
|
122 ;; sub-domain is domain-ref | domain-literal
|
|
123 (cond ((= (following-char) ?\[)
|
|
124 ;; domain-ref
|
|
125 (or (rfc822-looking-at "\\[\\([^][\\\n]\\|\\\\.\\|\\\\\n\\)*\\]")
|
|
126 (rfc822-bad-address "Unterminated domain literal [...]")))
|
|
127 ((rfc822-looking-at "[^][\000-\037\177-\377 ()<>@,;:\\\".]+")
|
|
128 ;; domain-literal = atom
|
|
129 )
|
|
130 (t
|
|
131 (rfc822-bad-address "Rubbish in host/domain specification"))))
|
|
132
|
|
133 (defun rfc822-snarf-domain ()
|
|
134 (rfc822-snarf-subdomain)
|
|
135 (while (rfc822-looking-at ?.)
|
|
136 (rfc822-snarf-subdomain)))
|
|
137
|
|
138 (defun rfc822-snarf-frob-list (name separator terminator snarfer
|
|
139 &optional return)
|
|
140 (let ((first t)
|
|
141 (list ())
|
|
142 tem)
|
|
143 (while (cond ((eobp)
|
|
144 (rfc822-bad-address
|
|
145 (format "End of addresses in middle of %s" name)))
|
|
146 ((rfc822-looking-at terminator)
|
|
147 nil)
|
|
148 ((rfc822-looking-at separator)
|
|
149 ;; multiple separators are allowed and do nothing.
|
|
150 (while (rfc822-looking-at separator))
|
|
151 t)
|
|
152 (first
|
|
153 t)
|
|
154 (t
|
|
155 (rfc822-bad-address
|
|
156 (format "Gubbish in middle of %s" name))))
|
|
157 (setq tem (funcall snarfer)
|
|
158 first nil)
|
|
159 (and return tem
|
|
160 (setq list (if (listp tem)
|
|
161 (nconc (reverse tem) list)
|
|
162 (cons tem list)))))
|
|
163 (nreverse list)))
|
|
164
|
|
165 ;; return either an address (a string) or a list of addresses
|
|
166 (defun rfc822-addresses-1 (&optional allow-groups)
|
|
167 ;; Looking for an rfc822 `address'
|
|
168 ;; Either a group (1*word ":" [#mailbox] ";")
|
|
169 ;; or a mailbox (addr-spec | 1*word route-addr)
|
|
170 ;; addr-spec is (local-part "@" domain)
|
|
171 ;; route-addr is ("<" [1#("@" domain) ":"] addr-spec ">")
|
|
172 ;; local-part is (word *("." word))
|
|
173 ;; word is (atom | quoted-string)
|
|
174 ;; quoted-string is ("\([^\"\\n]\|\\.\|\\\n\)")
|
|
175 ;; atom is [^\000-\037\177 ()<>@,;:\".[]]+
|
|
176 ;; domain is sub-domain *("." sub-domain)
|
|
177 ;; sub-domain is domain-ref | domain-literal
|
|
178 ;; domain-literal is "[" *(dtext | quoted-pair) "]"
|
|
179 ;; dtext is "[^][\\n"
|
|
180 ;; domain-ref is atom
|
|
181 (let ((address-start (point))
|
|
182 (n 0))
|
|
183 (catch 'address
|
|
184 ;; optimize common cases:
|
|
185 ;; foo
|
|
186 ;; foo.bar@bar.zap
|
|
187 ;; followed by "\\'\\|,\\|([^()\\]*)\\'"
|
|
188 ;; other common cases are:
|
|
189 ;; foo bar <foo.bar@baz.zap>
|
|
190 ;; "foo bar" <foo.bar@baz.zap>
|
|
191 ;; those aren't hacked yet.
|
|
192 (if (and (rfc822-looking-at "[^][\000-\037\177-\377 ()<>@,;:\\\"]+\\(\\|@[^][\000-\037\177-\377 ()<>@,;:\\\"]+\\)" t)
|
|
193 (progn (or (eobp)
|
|
194 (rfc822-looking-at ?,))))
|
|
195 (progn
|
|
196 ;; rfc822-looking-at may have inserted a space
|
|
197 (or (bobp) (/= (preceding-char) ?\ ) (delete-char -1))
|
|
198 ;; relying on the fact that rfc822-looking-at <char>
|
|
199 ;; doesn't mung match-data
|
|
200 (throw 'address (buffer-substring address-start (match-end 0)))))
|
|
201 (goto-char address-start)
|
|
202 (while t
|
|
203 (cond ((and (= n 1) (rfc822-looking-at ?@))
|
|
204 ;; local-part@domain
|
|
205 (rfc822-snarf-domain)
|
|
206 (throw 'address
|
|
207 (buffer-substring address-start (point))))
|
|
208 ((rfc822-looking-at ?:)
|
|
209 (cond ((not allow-groups)
|
|
210 (rfc822-bad-address "A group name may not appear here"))
|
|
211 ((= n 0)
|
|
212 (rfc822-bad-address "No name for :...; group")))
|
|
213 ;; group
|
|
214 (throw 'address
|
|
215 ;; return a list of addresses
|
|
216 (rfc822-snarf-frob-list ":...; group" ?\, ?\;
|
|
217 'rfc822-addresses-1 t)))
|
|
218 ((rfc822-looking-at ?<)
|
|
219 (let ((start (point))
|
|
220 (strip t))
|
|
221 (cond ((rfc822-looking-at ?>)
|
|
222 ;; empty path
|
|
223 ())
|
|
224 ((and (not (eobp)) (= (following-char) ?\@))
|
|
225 ;; <@foo.bar,@baz:quux@abcd.efg>
|
|
226 (rfc822-snarf-frob-list "<...> address" ?\, ?\:
|
|
227 (function (lambda ()
|
|
228 (if (rfc822-looking-at ?\@)
|
|
229 (rfc822-snarf-domain)
|
|
230 (rfc822-bad-address
|
|
231 "Gubbish in route-addr")))))
|
|
232 (rfc822-snarf-words)
|
|
233 (or (rfc822-looking-at ?@)
|
|
234 (rfc822-bad-address "Malformed <..@..> address"))
|
|
235 (rfc822-snarf-domain)
|
|
236 (setq strip nil))
|
|
237 ((progn (rfc822-snarf-words) (rfc822-looking-at ?@))
|
|
238 ; allow <foo> (losing unix seems to do this)
|
|
239 (rfc822-snarf-domain)))
|
|
240 (let ((end (point)))
|
|
241 (if (rfc822-looking-at ?\>)
|
|
242 (throw 'address
|
|
243 (buffer-substring (if strip start (1- start))
|
|
244 (if strip end (1+ end))))
|
|
245 (rfc822-bad-address "Unterminated <...> address")))))
|
|
246 ((looking-at "[^][\000-\037\177-\377 ()<>@,;:\\.]")
|
|
247 ;; this allows "." to be part of the words preceding
|
|
248 ;; an addr-spec, since many broken mailers output
|
|
249 ;; "Hern K. Herklemeyer III
|
|
250 ;; <yank@megadeath.dod.gods-own-country>"
|
|
251 (or (= n 0)
|
|
252 (= (preceding-char) ?\ )
|
|
253 (insert ?\ ))
|
|
254 (rfc822-snarf-words)
|
|
255 (setq n (1+ n)))
|
|
256 ((= n 0)
|
|
257 (throw 'address nil))
|
|
258 ((= n 1) ; allow "foo" (losing unix seems to do this)
|
|
259 (throw 'address
|
|
260 (buffer-substring address-start (point))))
|
|
261 ((or (eobp) (looking-at ","))
|
|
262 (rfc822-bad-address "Missing comma or route-spec"))
|
|
263 (t
|
|
264 (rfc822-bad-address "Strange character or missing comma")))))))
|
|
265
|
|
266 (defun rfc822-addresses (header-text)
|
|
267 (if (string-match "\\`[ \t]*\\([^][\000-\037\177-\377 ()<>@,;:\\\".]+\\)[ \t]*\\'"
|
|
268 header-text)
|
|
269 ;; Make very simple case moderately fast.
|
|
270 (list (substring header-text (match-beginning 1) (match-end 1)))
|
|
271 (let ((buf (generate-new-buffer " rfc822")))
|
|
272 (unwind-protect
|
|
273 (save-excursion
|
|
274 (set-buffer buf)
|
|
275 (make-local-variable 'case-fold-search)
|
|
276 (setq case-fold-search nil) ;For speed(?)
|
|
277 (insert header-text)
|
|
278 ;; unfold continuation lines
|
|
279 (goto-char (point-min))
|
|
280
|
|
281 (while (re-search-forward "\\([^\\]\\(\\\\\\\\\\)*\\)\n[ \t]" nil t)
|
|
282 (replace-match "\\1 " t))
|
|
283
|
|
284 (goto-char (point-min))
|
|
285 (rfc822-nuke-whitespace)
|
|
286 (let ((list ())
|
|
287 tem
|
|
288 (p -1)
|
|
289 address-start); this is for rfc822-bad-address
|
|
290 (while (not (eobp))
|
|
291 (setq address-start (point))
|
|
292 (setq tem
|
|
293 (catch 'address ; this is for rfc822-bad-address
|
|
294 (cond ((rfc822-looking-at ?\,)
|
|
295 nil)
|
|
296 ((looking-at "[][\000-\037\177-\377@;:\\.>]")
|
|
297 (forward-char)
|
|
298 (rfc822-bad-address
|
|
299 (format "Strange character \\%c found"
|
|
300 (preceding-char))))
|
|
301 (t
|
|
302 (rfc822-addresses-1 t)))))
|
|
303 (cond ((null tem))
|
|
304 ((stringp tem)
|
|
305 (setq list (cons tem list)))
|
|
306 (t
|
|
307 (setq list (nconc (nreverse tem) list))))
|
|
308 (if (= (point) p)
|
|
309 ;; Punt on losing, misformatted address / infinite loop.
|
|
310 ;; For example: "lcm36651@uxa.cso.uiuc.edu (Hi! ;) )"
|
|
311 (goto-char (point-max)))
|
|
312 (setq p (point))
|
|
313 )
|
|
314 (nreverse list)))
|
|
315 (and buf (kill-buffer buf))))))
|