Mercurial > hg > xemacs-beta
comparison lisp/utils/rfc822.el @ 0:376386a54a3c r19-14
Import from CVS: tag r19-14
author | cvs |
---|---|
date | Mon, 13 Aug 2007 08:45:50 +0200 |
parents | |
children | 0293115a14e9 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:376386a54a3c |
---|---|
1 ;;; rfc822.el --- hairy rfc822 parser for mail and news and suchlike | |
2 ;; Keywords: mail | |
3 | |
4 ;; Copyright (C) 1986, 1987, 1993 Free Software Foundation, Inc. | |
5 ;; Author Richard Mlynarik. | |
6 | |
7 ;; This file is part of XEmacs. | |
8 | |
9 ;; XEmacs is free software; you can redistribute it and/or modify it | |
10 ;; under the terms of the GNU General Public License as published by | |
11 ;; the Free Software Foundation; either version 2, or (at your option) | |
12 ;; any later version. | |
13 | |
14 ;; XEmacs is distributed in the hope that it will be useful, but | |
15 ;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 ;; General Public License for more details. | |
18 | |
19 ;; You should have received a copy of the GNU General Public License | |
20 ;; along with XEmacs; see the file COPYING. If not, write to the Free | |
21 ;; Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | |
22 | |
23 ;;; Synched up with: Not synched with FSF but very close. | |
24 | |
25 ;; This code should probably be replaced with mail-extr.el once it's a | |
26 ;; little more stable. | |
27 | |
28 | |
29 (provide 'rfc822) | |
30 | |
31 ;; uses address-start free, throws to address | |
32 (defun rfc822-bad-address (reason) | |
33 (save-restriction | |
34 (insert "_^_") | |
35 (narrow-to-region address-start | |
36 (if (re-search-forward "[,;]" nil t) | |
37 (max (point-min) (1- (point))) | |
38 (point-max))) | |
39 ;; make the error string be suitable for inclusion in (...) | |
40 (let ((losers '("\\" "(" ")" "\n"))) | |
41 (while losers | |
42 (goto-char (point-min)) | |
43 (while (search-forward (car losers) nil t) | |
44 (backward-char 1) | |
45 (insert ?\\) | |
46 (forward-char 1)) | |
47 (setq losers (cdr losers)))) | |
48 (goto-char (point-min)) (insert "(Unparsable address -- " | |
49 reason | |
50 ":\n\t \"") | |
51 (goto-char (point-max)) (insert "\")")) | |
52 (rfc822-nuke-whitespace) | |
53 (throw 'address (buffer-substring address-start (point)))) | |
54 | |
55 (defun rfc822-nuke-whitespace (&optional leave-space) | |
56 (let (ch) | |
57 (while (cond ((eobp) | |
58 nil) | |
59 ((= (setq ch (following-char)) ?\() | |
60 (forward-char 1) | |
61 (while (if (eobp) | |
62 (rfc822-bad-address "Unbalanced comment (...)") | |
63 (/= (setq ch (following-char)) ?\))) | |
64 (cond ((looking-at "[^()\\]+") | |
65 (replace-match "")) | |
66 ((= ch ?\() | |
67 (rfc822-nuke-whitespace)) | |
68 ((< (point) (1- (point-max))) | |
69 (delete-char 2)) | |
70 (t | |
71 (rfc822-bad-address "orphaned backslash")))) | |
72 ;; delete remaining "()" | |
73 (forward-char -1) | |
74 (delete-char 2) | |
75 t) | |
76 ((memq ch '(?\ ?\t ?\n)) | |
77 (delete-region (point) | |
78 (progn (skip-chars-forward " \t\n") (point))) | |
79 t) | |
80 (t | |
81 nil))) | |
82 (or (not leave-space) | |
83 (eobp) | |
84 (bobp) | |
85 (= (preceding-char) ?\ ) | |
86 (insert ?\ )))) | |
87 | |
88 (defun rfc822-looking-at (regex &optional leave-space) | |
89 (if (cond ((stringp regex) | |
90 (if (looking-at regex) | |
91 (progn (goto-char (match-end 0)) | |
92 t))) | |
93 (t | |
94 (if (and (not (eobp)) | |
95 (= (following-char) regex)) | |
96 (progn (forward-char 1) | |
97 t)))) | |
98 (let ((tem (match-data))) | |
99 (rfc822-nuke-whitespace leave-space) | |
100 (store-match-data tem) | |
101 t))) | |
102 | |
103 (defun rfc822-snarf-word () | |
104 ;; word is atom | quoted-string | |
105 (cond ((= (following-char) ?\") | |
106 ;; quoted-string | |
107 (or (rfc822-looking-at "\"\\([^\"\\\n]\\|\\\\.\\|\\\\\n\\)*\"") | |
108 (rfc822-bad-address "Unterminated quoted string"))) | |
109 ((rfc822-looking-at "[^][\000-\037\177-\377 ()<>@,;:\\\".]+") | |
110 ;; atom | |
111 ) | |
112 (t | |
113 (rfc822-bad-address "Rubbish in address")))) | |
114 | |
115 (defun rfc822-snarf-words () | |
116 (rfc822-snarf-word) | |
117 (while (rfc822-looking-at ?.) | |
118 (rfc822-snarf-word))) | |
119 | |
120 (defun rfc822-snarf-subdomain () | |
121 ;; sub-domain is domain-ref | domain-literal | |
122 (cond ((= (following-char) ?\[) | |
123 ;; domain-ref | |
124 (or (rfc822-looking-at "\\[\\([^][\\\n]\\|\\\\.\\|\\\\\n\\)*\\]") | |
125 (rfc822-bad-address "Unterminated domain literal [...]"))) | |
126 ((rfc822-looking-at "[^][\000-\037\177-\377 ()<>@,;:\\\".]+") | |
127 ;; domain-literal = atom | |
128 ) | |
129 (t | |
130 (rfc822-bad-address "Rubbish in host/domain specification")))) | |
131 | |
132 (defun rfc822-snarf-domain () | |
133 (rfc822-snarf-subdomain) | |
134 (while (rfc822-looking-at ?.) | |
135 (rfc822-snarf-subdomain))) | |
136 | |
137 (defun rfc822-snarf-frob-list (name separator terminator snarfer | |
138 &optional return) | |
139 (let ((first t) | |
140 (list ()) | |
141 tem) | |
142 (while (cond ((eobp) | |
143 (rfc822-bad-address | |
144 (format "End of addresses in middle of %s" name))) | |
145 ((rfc822-looking-at terminator) | |
146 nil) | |
147 ((rfc822-looking-at separator) | |
148 ;; multiple separators are allowed and do nothing. | |
149 (while (rfc822-looking-at separator)) | |
150 t) | |
151 (first | |
152 t) | |
153 (t | |
154 (rfc822-bad-address | |
155 (format "Gubbish in middle of %s" name)))) | |
156 (setq tem (funcall snarfer) | |
157 first nil) | |
158 (and return tem | |
159 (setq list (if (listp tem) | |
160 (nconc (reverse tem) list) | |
161 (cons tem list))))) | |
162 (nreverse list))) | |
163 | |
164 ;; return either an address (a string) or a list of addresses | |
165 (defun rfc822-addresses-1 (&optional allow-groups) | |
166 ;; Looking for an rfc822 `address' | |
167 ;; Either a group (1*word ":" [#mailbox] ";") | |
168 ;; or a mailbox (addr-spec | 1*word route-addr) | |
169 ;; addr-spec is (local-part "@" domain) | |
170 ;; route-addr is ("<" [1#("@" domain) ":"] addr-spec ">") | |
171 ;; local-part is (word *("." word)) | |
172 ;; word is (atom | quoted-string) | |
173 ;; quoted-string is ("\([^\"\\n]\|\\.\|\\\n\)") | |
174 ;; atom is [^\000-\037\177 ()<>@,;:\".[]]+ | |
175 ;; domain is sub-domain *("." sub-domain) | |
176 ;; sub-domain is domain-ref | domain-literal | |
177 ;; domain-literal is "[" *(dtext | quoted-pair) "]" | |
178 ;; dtext is "[^][\\n" | |
179 ;; domain-ref is atom | |
180 (let ((address-start (point)) | |
181 (n 0)) | |
182 (catch 'address | |
183 ;; optimize common cases: | |
184 ;; foo | |
185 ;; foo.bar@bar.zap | |
186 ;; followed by "\\'\\|,\\|([^()\\]*)\\'" | |
187 ;; other common cases are: | |
188 ;; foo bar <foo.bar@baz.zap> | |
189 ;; "foo bar" <foo.bar@baz.zap> | |
190 ;; those aren't hacked yet. | |
191 (if (and (rfc822-looking-at "[^][\000-\037\177-\377 ()<>@,;:\\\"]+\\(\\|@[^][\000-\037\177-\377 ()<>@,;:\\\"]+\\)" t) | |
192 (progn (or (eobp) | |
193 (rfc822-looking-at ?,)))) | |
194 (progn | |
195 ;; rfc822-looking-at may have inserted a space | |
196 (or (bobp) (/= (preceding-char) ?\ ) (delete-char -1)) | |
197 ;; relying on the fact that rfc822-looking-at <char> | |
198 ;; doesn't mung match-data | |
199 (throw 'address (buffer-substring address-start (match-end 0))))) | |
200 (goto-char address-start) | |
201 (while t | |
202 (cond ((and (= n 1) (rfc822-looking-at ?@)) | |
203 ;; local-part@domain | |
204 (rfc822-snarf-domain) | |
205 (throw 'address | |
206 (buffer-substring address-start (point)))) | |
207 ((rfc822-looking-at ?:) | |
208 (cond ((not allow-groups) | |
209 (rfc822-bad-address "A group name may not appear here")) | |
210 ((= n 0) | |
211 (rfc822-bad-address "No name for :...; group"))) | |
212 ;; group | |
213 (throw 'address | |
214 ;; return a list of addresses | |
215 (rfc822-snarf-frob-list ":...; group" ?\, ?\; | |
216 'rfc822-addresses-1 t))) | |
217 ((rfc822-looking-at ?<) | |
218 (let ((start (point)) | |
219 (strip t)) | |
220 (cond ((rfc822-looking-at ?>) | |
221 ;; empty path | |
222 ()) | |
223 ((and (not (eobp)) (= (following-char) ?\@)) | |
224 ;; <@foo.bar,@baz:quux@abcd.efg> | |
225 (rfc822-snarf-frob-list "<...> address" ?\, ?\: | |
226 (function (lambda () | |
227 (if (rfc822-looking-at ?\@) | |
228 (rfc822-snarf-domain) | |
229 (rfc822-bad-address | |
230 "Gubbish in route-addr"))))) | |
231 (rfc822-snarf-words) | |
232 (or (rfc822-looking-at ?@) | |
233 (rfc822-bad-address "Malformed <..@..> address")) | |
234 (rfc822-snarf-domain) | |
235 (setq strip nil)) | |
236 ((progn (rfc822-snarf-words) (rfc822-looking-at ?@)) | |
237 ; allow <foo> (losing unix seems to do this) | |
238 (rfc822-snarf-domain))) | |
239 (let ((end (point))) | |
240 (if (rfc822-looking-at ?\>) | |
241 (throw 'address | |
242 (buffer-substring (if strip start (1- start)) | |
243 (if strip end (1+ end)))) | |
244 (rfc822-bad-address "Unterminated <...> address"))))) | |
245 ((looking-at "[^][\000-\037\177-\377 ()<>@,;:\\.]") | |
246 ;; this allows "." to be part of the words preceding | |
247 ;; an addr-spec, since many broken mailers output | |
248 ;; "Hern K. Herklemeyer III | |
249 ;; <yank@megadeath.dod.gods-own-country>" | |
250 (or (= n 0) | |
251 (= (preceding-char) ?\ ) | |
252 (insert ?\ )) | |
253 (rfc822-snarf-words) | |
254 (setq n (1+ n))) | |
255 ((= n 0) | |
256 (throw 'address nil)) | |
257 ((= n 1) ; allow "foo" (losing unix seems to do this) | |
258 (throw 'address | |
259 (buffer-substring address-start (point)))) | |
260 ((or (eobp) (looking-at ",")) | |
261 (rfc822-bad-address "Missing comma or route-spec")) | |
262 (t | |
263 (rfc822-bad-address "Strange character or missing comma"))))))) | |
264 | |
265 (defun rfc822-addresses (header-text) | |
266 (if (string-match "\\`[ \t]*\\([^][\000-\037\177-\377 ()<>@,;:\\\".]+\\)[ \t]*\\'" | |
267 header-text) | |
268 ;; Make very simple case moderately fast. | |
269 (list (substring header-text (match-beginning 1) (match-end 1))) | |
270 (let ((buf (generate-new-buffer " rfc822"))) | |
271 (unwind-protect | |
272 (save-excursion | |
273 (set-buffer buf) | |
274 (make-local-variable 'case-fold-search) | |
275 (setq case-fold-search nil) ;For speed(?) | |
276 (insert header-text) | |
277 ;; unfold continuation lines | |
278 (goto-char (point-min)) | |
279 | |
280 (while (re-search-forward "\\([^\\]\\(\\\\\\\\\\)*\\)\n[ \t]" nil t) | |
281 (replace-match "\\1 " t)) | |
282 | |
283 (goto-char (point-min)) | |
284 (rfc822-nuke-whitespace) | |
285 (let ((list ()) | |
286 tem | |
287 (p -1) | |
288 address-start); this is for rfc822-bad-address | |
289 (while (not (eobp)) | |
290 (setq address-start (point)) | |
291 (setq tem | |
292 (catch 'address ; this is for rfc822-bad-address | |
293 (cond ((rfc822-looking-at ?\,) | |
294 nil) | |
295 ((looking-at "[][\000-\037\177-\377@;:\\.>]") | |
296 (forward-char) | |
297 (rfc822-bad-address | |
298 (format "Strange character \\%c found" | |
299 (preceding-char)))) | |
300 (t | |
301 (rfc822-addresses-1 t))))) | |
302 (cond ((null tem)) | |
303 ((stringp tem) | |
304 (setq list (cons tem list))) | |
305 (t | |
306 (setq list (nconc (nreverse tem) list)))) | |
307 (if (= (point) p) | |
308 ;; Punt on losing, misformatted address / infinite loop. | |
309 ;; For example: "lcm36651@uxa.cso.uiuc.edu (Hi! ;) )" | |
310 (goto-char (point-max))) | |
311 (setq p (point)) | |
312 ) | |
313 (nreverse list))) | |
314 (and buf (kill-buffer buf)))))) |