Mercurial > hg > rc2
comparison program/lib/Roundcube/rcube_mime_decode.php @ 0:4681f974d28b
vanilla 1.3.3 distro, I hope
author | Charlie Root |
---|---|
date | Thu, 04 Jan 2018 15:52:31 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4681f974d28b |
---|---|
1 <?php | |
2 | |
3 /** | |
4 +-----------------------------------------------------------------------+ | |
5 | This file is part of the Roundcube Webmail client | | |
6 | Copyright (C) 2005-2015, The Roundcube Dev Team | | |
7 | Copyright (C) 2011-2015, Kolab Systems AG | | |
8 | | | |
9 | Licensed under the GNU General Public License version 3 or | | |
10 | any later version with exceptions for skins & plugins. | | |
11 | See the README file for a full license statement. | | |
12 | | | |
13 | PURPOSE: | | |
14 | MIME message parsing utilities derived from Mail_mimeDecode | | |
15 +-----------------------------------------------------------------------+ | |
16 | Author: Thomas Bruederli <roundcube@gmail.com> | | |
17 | Author: Aleksander Machniak <alec@alec.pl> | | |
18 | Author: Richard Heyes <richard@phpguru.org> | | |
19 +-----------------------------------------------------------------------+ | |
20 */ | |
21 | |
22 /** | |
23 * Class for parsing MIME messages | |
24 * | |
25 * @package Framework | |
26 * @subpackage Storage | |
27 * @author Aleksander Machniak <alec@alec.pl> | |
28 */ | |
29 class rcube_mime_decode | |
30 { | |
31 /** | |
32 * Class configuration parameters. | |
33 * | |
34 * @var array | |
35 */ | |
36 protected $params = array( | |
37 'include_bodies' => true, | |
38 'decode_bodies' => true, | |
39 'decode_headers' => true, | |
40 'crlf' => "\r\n", | |
41 'default_charset' => RCUBE_CHARSET, | |
42 ); | |
43 | |
44 | |
45 /** | |
46 * Constructor. | |
47 * | |
48 * Sets up the object, initialise the variables, and splits and | |
49 * stores the header and body of the input. | |
50 * | |
51 * @param array $params An array of various parameters that determine | |
52 * various things: | |
53 * include_bodies - Whether to include the body in the returned | |
54 * object. | |
55 * decode_bodies - Whether to decode the bodies | |
56 * of the parts. (Transfer encoding) | |
57 * decode_headers - Whether to decode headers | |
58 * crlf - CRLF type to use (CRLF/LF/CR) | |
59 */ | |
60 public function __construct($params = array()) | |
61 { | |
62 if (!empty($params)) { | |
63 $this->params = array_merge($this->params, (array) $params); | |
64 } | |
65 } | |
66 | |
67 /** | |
68 * Performs the decoding process. | |
69 * | |
70 * @param string $input The input to decode | |
71 * @param bool $convert Convert result to rcube_message_part structure | |
72 * | |
73 * @return object|bool Decoded results or False on failure | |
74 */ | |
75 public function decode($input, $convert = true) | |
76 { | |
77 list($header, $body) = $this->splitBodyHeader($input); | |
78 | |
79 $struct = $this->do_decode($header, $body); | |
80 | |
81 if ($struct && $convert) { | |
82 $struct = $this->structure_part($struct); | |
83 } | |
84 | |
85 return $struct; | |
86 } | |
87 | |
88 /** | |
89 * Performs the decoding. Decodes the body string passed to it | |
90 * If it finds certain content-types it will call itself in a | |
91 * recursive fashion | |
92 * | |
93 * @param string $headers Header section | |
94 * @param string $body Body section | |
95 * @param string $default_ctype Default content type | |
96 * | |
97 * @return object|bool Decoded results or False on error | |
98 */ | |
99 protected function do_decode($headers, $body, $default_ctype = 'text/plain') | |
100 { | |
101 $return = new stdClass; | |
102 $headers = $this->parseHeaders($headers); | |
103 | |
104 foreach ($headers as $value) { | |
105 $header_name = strtolower($value['name']); | |
106 | |
107 if (isset($return->headers[$header_name]) && !is_array($return->headers[$header_name])) { | |
108 $return->headers[$header_name] = array($return->headers[$header_name]); | |
109 $return->headers[$header_name][] = $value['value']; | |
110 } | |
111 else if (isset($return->headers[$header_name])) { | |
112 $return->headers[$header_name][] = $value['value']; | |
113 } | |
114 else { | |
115 $return->headers[$header_name] = $value['value']; | |
116 } | |
117 | |
118 switch ($header_name) { | |
119 case 'content-type': | |
120 $content_type = $this->parseHeaderValue($value['value']); | |
121 | |
122 if (preg_match('/([0-9a-z+.-]+)\/([0-9a-z+.-]+)/i', $content_type['value'], $regs)) { | |
123 $return->ctype_primary = $regs[1]; | |
124 $return->ctype_secondary = $regs[2]; | |
125 } | |
126 | |
127 if (!empty($content_type['other'])) { | |
128 $return->ctype_parameters = array_merge((array) $return->ctype_parameters, (array) $content_type['other']); | |
129 } | |
130 | |
131 break; | |
132 | |
133 case 'content-disposition'; | |
134 $content_disposition = $this->parseHeaderValue($value['value']); | |
135 $return->disposition = $content_disposition['value']; | |
136 | |
137 if (!empty($content_disposition['other'])) { | |
138 $return->d_parameters = array_merge((array) $return->d_parameters, (array) $content_disposition['other']); | |
139 } | |
140 | |
141 break; | |
142 | |
143 case 'content-transfer-encoding': | |
144 $content_transfer_encoding = $this->parseHeaderValue($value['value']); | |
145 break; | |
146 } | |
147 } | |
148 | |
149 if (isset($content_type)) { | |
150 $ctype = strtolower($content_type['value']); | |
151 | |
152 switch ($ctype) { | |
153 case 'text/plain': | |
154 $encoding = isset($content_transfer_encoding) ? $content_transfer_encoding['value'] : '7bit'; | |
155 | |
156 if ($this->params['include_bodies']) { | |
157 $return->body = $this->params['decode_bodies'] ? rcube_mime::decode($body, $encoding) : $body; | |
158 } | |
159 | |
160 break; | |
161 | |
162 case 'text/html': | |
163 $encoding = isset($content_transfer_encoding) ? $content_transfer_encoding['value'] : '7bit'; | |
164 | |
165 if ($this->params['include_bodies']) { | |
166 $return->body = $this->params['decode_bodies'] ? rcube_mime::decode($body, $encoding) : $body; | |
167 } | |
168 | |
169 break; | |
170 | |
171 case 'multipart/digest': | |
172 case 'multipart/alternative': | |
173 case 'multipart/related': | |
174 case 'multipart/mixed': | |
175 case 'multipart/signed': | |
176 case 'multipart/encrypted': | |
177 if (!isset($content_type['other']['boundary'])) { | |
178 return false; | |
179 } | |
180 | |
181 $default_ctype = $ctype === 'multipart/digest' ? 'message/rfc822' : 'text/plain'; | |
182 $parts = $this->boundarySplit($body, $content_type['other']['boundary']); | |
183 | |
184 for ($i = 0; $i < count($parts); $i++) { | |
185 list($part_header, $part_body) = $this->splitBodyHeader($parts[$i]); | |
186 $return->parts[] = $this->do_decode($part_header, $part_body, $default_ctype); | |
187 } | |
188 | |
189 break; | |
190 | |
191 case 'message/rfc822': | |
192 $obj = new rcube_mime_decode($this->params); | |
193 $return->parts[] = $obj->decode($body, false); | |
194 unset($obj); | |
195 break; | |
196 | |
197 default: | |
198 if ($this->params['include_bodies']) { | |
199 $return->body = $this->params['decode_bodies'] ? rcube_mime::decode($body, $content_transfer_encoding['value']) : $body; | |
200 } | |
201 | |
202 break; | |
203 } | |
204 } | |
205 else { | |
206 $ctype = explode('/', $default_ctype); | |
207 $return->ctype_primary = $ctype[0]; | |
208 $return->ctype_secondary = $ctype[1]; | |
209 | |
210 if ($this->params['include_bodies']) { | |
211 $return->body = $this->params['decode_bodies'] ? rcube_mime::decode($body) : $body; | |
212 } | |
213 } | |
214 | |
215 return $return; | |
216 } | |
217 | |
218 /** | |
219 * Given a string containing a header and body | |
220 * section, this function will split them (at the first | |
221 * blank line) and return them. | |
222 * | |
223 * @param string $input Input to split apart | |
224 * | |
225 * @return array Contains header and body section | |
226 */ | |
227 protected function splitBodyHeader($input) | |
228 { | |
229 $pos = strpos($input, $this->params['crlf'] . $this->params['crlf']); | |
230 if ($pos === false) { | |
231 return false; | |
232 } | |
233 | |
234 $crlf_len = strlen($this->params['crlf']); | |
235 $header = substr($input, 0, $pos); | |
236 $body = substr($input, $pos + 2 * $crlf_len); | |
237 | |
238 if (substr_compare($body, $this->params['crlf'], -$crlf_len) === 0) { | |
239 $body = substr($body, 0, -$crlf_len); | |
240 } | |
241 | |
242 return array($header, $body); | |
243 } | |
244 | |
245 /** | |
246 * Parse headers given in $input and return as assoc array. | |
247 * | |
248 * @param string $input Headers to parse | |
249 * | |
250 * @return array Contains parsed headers | |
251 */ | |
252 protected function parseHeaders($input) | |
253 { | |
254 if ($input !== '') { | |
255 // Unfold the input | |
256 $input = preg_replace('/' . $this->params['crlf'] . "(\t| )/", ' ', $input); | |
257 $headers = explode($this->params['crlf'], trim($input)); | |
258 | |
259 foreach ($headers as $value) { | |
260 $hdr_name = substr($value, 0, $pos = strpos($value, ':')); | |
261 $hdr_value = substr($value, $pos+1); | |
262 | |
263 if ($hdr_value[0] == ' ') { | |
264 $hdr_value = substr($hdr_value, 1); | |
265 } | |
266 | |
267 $return[] = array( | |
268 'name' => $hdr_name, | |
269 'value' => $this->params['decode_headers'] ? $this->decodeHeader($hdr_value) : $hdr_value, | |
270 ); | |
271 } | |
272 } | |
273 else { | |
274 $return = array(); | |
275 } | |
276 | |
277 return $return; | |
278 } | |
279 | |
280 /** | |
281 * Function to parse a header value, extract first part, and any secondary | |
282 * parts (after ;) This function is not as robust as it could be. | |
283 * Eg. header comments in the wrong place will probably break it. | |
284 * | |
285 * @param string $input Header value to parse | |
286 * | |
287 * @return array Contains parsed result | |
288 */ | |
289 protected function parseHeaderValue($input) | |
290 { | |
291 $parts = preg_split('/;\s*/', $input); | |
292 | |
293 if (!empty($parts)) { | |
294 $return['value'] = trim($parts[0]); | |
295 | |
296 for ($n = 1; $n < count($parts); $n++) { | |
297 if (preg_match_all('/(([[:alnum:]]+)="?([^"]*)"?\s?;?)+/i', $parts[$n], $matches)) { | |
298 for ($i = 0; $i < count($matches[2]); $i++) { | |
299 $return['other'][strtolower($matches[2][$i])] = $matches[3][$i]; | |
300 } | |
301 } | |
302 } | |
303 } | |
304 else { | |
305 $return['value'] = trim($input); | |
306 } | |
307 | |
308 return $return; | |
309 } | |
310 | |
311 /** | |
312 * This function splits the input based on the given boundary | |
313 * | |
314 * @param string $input Input to parse | |
315 * @param string $boundary Boundary | |
316 * | |
317 * @return array Contains array of resulting mime parts | |
318 */ | |
319 protected function boundarySplit($input, $boundary) | |
320 { | |
321 $tmp = explode('--' . $boundary, $input); | |
322 | |
323 for ($i = 1; $i < count($tmp)-1; $i++) { | |
324 $parts[] = $tmp[$i]; | |
325 } | |
326 | |
327 return $parts; | |
328 } | |
329 | |
330 /** | |
331 * Given a header, this function will decode it according to RFC2047. | |
332 * Probably not *exactly* conformant, but it does pass all the given | |
333 * examples (in RFC2047). | |
334 * | |
335 * @param string $input Input header value to decode | |
336 * | |
337 * @return string Decoded header value | |
338 */ | |
339 protected function decodeHeader($input) | |
340 { | |
341 return rcube_mime::decode_mime_string($input, $this->params['default_charset']); | |
342 } | |
343 | |
344 /** | |
345 * Recursive method to convert a rcube_mime_decode structure | |
346 * into a rcube_message_part object. | |
347 * | |
348 * @param object $part A message part struct | |
349 * @param int $count Part count | |
350 * @param string $parent Parent MIME ID | |
351 * | |
352 * @return object rcube_message_part | |
353 * @see self::decode() | |
354 */ | |
355 protected function structure_part($part, $count = 0, $parent = '') | |
356 { | |
357 $struct = new rcube_message_part; | |
358 $struct->mime_id = $part->mime_id ?: (empty($parent) ? (string)$count : "$parent.$count"); | |
359 $struct->headers = $part->headers; | |
360 $struct->mimetype = $part->ctype_primary . '/' . $part->ctype_secondary; | |
361 $struct->ctype_primary = $part->ctype_primary; | |
362 $struct->ctype_secondary = $part->ctype_secondary; | |
363 $struct->ctype_parameters = $part->ctype_parameters; | |
364 | |
365 if ($part->headers['content-transfer-encoding']) { | |
366 $struct->encoding = $part->headers['content-transfer-encoding']; | |
367 } | |
368 | |
369 if ($part->ctype_parameters['charset']) { | |
370 $struct->charset = $part->ctype_parameters['charset']; | |
371 } | |
372 | |
373 $part_charset = $struct->charset ?: $this->params['default_charset']; | |
374 | |
375 // determine filename | |
376 if (($filename = $part->d_parameters['filename']) || ($filename = $part->ctype_parameters['name'])) { | |
377 if (!$this->params['decode_headers']) { | |
378 $filename = $this->decodeHeader($filename); | |
379 } | |
380 | |
381 $struct->filename = $filename; | |
382 } | |
383 | |
384 $struct->body = $part->body; | |
385 $struct->size = strlen($part->body); | |
386 $struct->disposition = $part->disposition; | |
387 | |
388 $count = 0; | |
389 foreach ((array)$part->parts as $child_part) { | |
390 $struct->parts[] = $this->structure_part($child_part, ++$count, $struct->mime_id); | |
391 } | |
392 | |
393 return $struct; | |
394 } | |
395 } |