0
|
1 <?php
|
|
2
|
|
3 /**
|
|
4 +-----------------------------------------------------------------------+
|
|
5 | This file is part of the Roundcube Webmail client |
|
|
6 | Copyright (C) 2005-2015, The Roundcube Dev Team |
|
|
7 | Copyright (C) 2011-2015, Kolab Systems AG |
|
|
8 | |
|
|
9 | Licensed under the GNU General Public License version 3 or |
|
|
10 | any later version with exceptions for skins & plugins. |
|
|
11 | See the README file for a full license statement. |
|
|
12 | |
|
|
13 | PURPOSE: |
|
|
14 | MIME message parsing utilities derived from Mail_mimeDecode |
|
|
15 +-----------------------------------------------------------------------+
|
|
16 | Author: Thomas Bruederli <roundcube@gmail.com> |
|
|
17 | Author: Aleksander Machniak <alec@alec.pl> |
|
|
18 | Author: Richard Heyes <richard@phpguru.org> |
|
|
19 +-----------------------------------------------------------------------+
|
|
20 */
|
|
21
|
|
22 /**
|
|
23 * Class for parsing MIME messages
|
|
24 *
|
|
25 * @package Framework
|
|
26 * @subpackage Storage
|
|
27 * @author Aleksander Machniak <alec@alec.pl>
|
|
28 */
|
|
29 class rcube_mime_decode
|
|
30 {
|
|
31 /**
|
|
32 * Class configuration parameters.
|
|
33 *
|
|
34 * @var array
|
|
35 */
|
|
36 protected $params = array(
|
|
37 'include_bodies' => true,
|
|
38 'decode_bodies' => true,
|
|
39 'decode_headers' => true,
|
|
40 'crlf' => "\r\n",
|
|
41 'default_charset' => RCUBE_CHARSET,
|
|
42 );
|
|
43
|
|
44
|
|
45 /**
|
|
46 * Constructor.
|
|
47 *
|
|
48 * Sets up the object, initialise the variables, and splits and
|
|
49 * stores the header and body of the input.
|
|
50 *
|
|
51 * @param array $params An array of various parameters that determine
|
|
52 * various things:
|
|
53 * include_bodies - Whether to include the body in the returned
|
|
54 * object.
|
|
55 * decode_bodies - Whether to decode the bodies
|
|
56 * of the parts. (Transfer encoding)
|
|
57 * decode_headers - Whether to decode headers
|
|
58 * crlf - CRLF type to use (CRLF/LF/CR)
|
|
59 */
|
|
60 public function __construct($params = array())
|
|
61 {
|
|
62 if (!empty($params)) {
|
|
63 $this->params = array_merge($this->params, (array) $params);
|
|
64 }
|
|
65 }
|
|
66
|
|
67 /**
|
|
68 * Performs the decoding process.
|
|
69 *
|
|
70 * @param string $input The input to decode
|
|
71 * @param bool $convert Convert result to rcube_message_part structure
|
|
72 *
|
|
73 * @return object|bool Decoded results or False on failure
|
|
74 */
|
|
75 public function decode($input, $convert = true)
|
|
76 {
|
|
77 list($header, $body) = $this->splitBodyHeader($input);
|
|
78
|
|
79 $struct = $this->do_decode($header, $body);
|
|
80
|
|
81 if ($struct && $convert) {
|
|
82 $struct = $this->structure_part($struct);
|
|
83 }
|
|
84
|
|
85 return $struct;
|
|
86 }
|
|
87
|
|
88 /**
|
|
89 * Performs the decoding. Decodes the body string passed to it
|
|
90 * If it finds certain content-types it will call itself in a
|
|
91 * recursive fashion
|
|
92 *
|
|
93 * @param string $headers Header section
|
|
94 * @param string $body Body section
|
|
95 * @param string $default_ctype Default content type
|
|
96 *
|
|
97 * @return object|bool Decoded results or False on error
|
|
98 */
|
|
99 protected function do_decode($headers, $body, $default_ctype = 'text/plain')
|
|
100 {
|
|
101 $return = new stdClass;
|
|
102 $headers = $this->parseHeaders($headers);
|
|
103
|
|
104 foreach ($headers as $value) {
|
|
105 $header_name = strtolower($value['name']);
|
|
106
|
|
107 if (isset($return->headers[$header_name]) && !is_array($return->headers[$header_name])) {
|
|
108 $return->headers[$header_name] = array($return->headers[$header_name]);
|
|
109 $return->headers[$header_name][] = $value['value'];
|
|
110 }
|
|
111 else if (isset($return->headers[$header_name])) {
|
|
112 $return->headers[$header_name][] = $value['value'];
|
|
113 }
|
|
114 else {
|
|
115 $return->headers[$header_name] = $value['value'];
|
|
116 }
|
|
117
|
|
118 switch ($header_name) {
|
|
119 case 'content-type':
|
|
120 $content_type = $this->parseHeaderValue($value['value']);
|
|
121
|
|
122 if (preg_match('/([0-9a-z+.-]+)\/([0-9a-z+.-]+)/i', $content_type['value'], $regs)) {
|
|
123 $return->ctype_primary = $regs[1];
|
|
124 $return->ctype_secondary = $regs[2];
|
|
125 }
|
|
126
|
|
127 if (!empty($content_type['other'])) {
|
|
128 $return->ctype_parameters = array_merge((array) $return->ctype_parameters, (array) $content_type['other']);
|
|
129 }
|
|
130
|
|
131 break;
|
|
132
|
|
133 case 'content-disposition';
|
|
134 $content_disposition = $this->parseHeaderValue($value['value']);
|
|
135 $return->disposition = $content_disposition['value'];
|
|
136
|
|
137 if (!empty($content_disposition['other'])) {
|
|
138 $return->d_parameters = array_merge((array) $return->d_parameters, (array) $content_disposition['other']);
|
|
139 }
|
|
140
|
|
141 break;
|
|
142
|
|
143 case 'content-transfer-encoding':
|
|
144 $content_transfer_encoding = $this->parseHeaderValue($value['value']);
|
|
145 break;
|
|
146 }
|
|
147 }
|
|
148
|
|
149 if (isset($content_type)) {
|
|
150 $ctype = strtolower($content_type['value']);
|
|
151
|
|
152 switch ($ctype) {
|
|
153 case 'text/plain':
|
|
154 $encoding = isset($content_transfer_encoding) ? $content_transfer_encoding['value'] : '7bit';
|
|
155
|
|
156 if ($this->params['include_bodies']) {
|
|
157 $return->body = $this->params['decode_bodies'] ? rcube_mime::decode($body, $encoding) : $body;
|
|
158 }
|
|
159
|
|
160 break;
|
|
161
|
|
162 case 'text/html':
|
|
163 $encoding = isset($content_transfer_encoding) ? $content_transfer_encoding['value'] : '7bit';
|
|
164
|
|
165 if ($this->params['include_bodies']) {
|
|
166 $return->body = $this->params['decode_bodies'] ? rcube_mime::decode($body, $encoding) : $body;
|
|
167 }
|
|
168
|
|
169 break;
|
|
170
|
|
171 case 'multipart/digest':
|
|
172 case 'multipart/alternative':
|
|
173 case 'multipart/related':
|
|
174 case 'multipart/mixed':
|
|
175 case 'multipart/signed':
|
|
176 case 'multipart/encrypted':
|
|
177 if (!isset($content_type['other']['boundary'])) {
|
|
178 return false;
|
|
179 }
|
|
180
|
|
181 $default_ctype = $ctype === 'multipart/digest' ? 'message/rfc822' : 'text/plain';
|
|
182 $parts = $this->boundarySplit($body, $content_type['other']['boundary']);
|
|
183
|
|
184 for ($i = 0; $i < count($parts); $i++) {
|
|
185 list($part_header, $part_body) = $this->splitBodyHeader($parts[$i]);
|
|
186 $return->parts[] = $this->do_decode($part_header, $part_body, $default_ctype);
|
|
187 }
|
|
188
|
|
189 break;
|
|
190
|
|
191 case 'message/rfc822':
|
|
192 $obj = new rcube_mime_decode($this->params);
|
|
193 $return->parts[] = $obj->decode($body, false);
|
|
194 unset($obj);
|
|
195 break;
|
|
196
|
|
197 default:
|
|
198 if ($this->params['include_bodies']) {
|
|
199 $return->body = $this->params['decode_bodies'] ? rcube_mime::decode($body, $content_transfer_encoding['value']) : $body;
|
|
200 }
|
|
201
|
|
202 break;
|
|
203 }
|
|
204 }
|
|
205 else {
|
|
206 $ctype = explode('/', $default_ctype);
|
|
207 $return->ctype_primary = $ctype[0];
|
|
208 $return->ctype_secondary = $ctype[1];
|
|
209
|
|
210 if ($this->params['include_bodies']) {
|
|
211 $return->body = $this->params['decode_bodies'] ? rcube_mime::decode($body) : $body;
|
|
212 }
|
|
213 }
|
|
214
|
|
215 return $return;
|
|
216 }
|
|
217
|
|
218 /**
|
|
219 * Given a string containing a header and body
|
|
220 * section, this function will split them (at the first
|
|
221 * blank line) and return them.
|
|
222 *
|
|
223 * @param string $input Input to split apart
|
|
224 *
|
|
225 * @return array Contains header and body section
|
|
226 */
|
|
227 protected function splitBodyHeader($input)
|
|
228 {
|
|
229 $pos = strpos($input, $this->params['crlf'] . $this->params['crlf']);
|
|
230 if ($pos === false) {
|
|
231 return false;
|
|
232 }
|
|
233
|
|
234 $crlf_len = strlen($this->params['crlf']);
|
|
235 $header = substr($input, 0, $pos);
|
|
236 $body = substr($input, $pos + 2 * $crlf_len);
|
|
237
|
|
238 if (substr_compare($body, $this->params['crlf'], -$crlf_len) === 0) {
|
|
239 $body = substr($body, 0, -$crlf_len);
|
|
240 }
|
|
241
|
|
242 return array($header, $body);
|
|
243 }
|
|
244
|
|
245 /**
|
|
246 * Parse headers given in $input and return as assoc array.
|
|
247 *
|
|
248 * @param string $input Headers to parse
|
|
249 *
|
|
250 * @return array Contains parsed headers
|
|
251 */
|
|
252 protected function parseHeaders($input)
|
|
253 {
|
|
254 if ($input !== '') {
|
|
255 // Unfold the input
|
|
256 $input = preg_replace('/' . $this->params['crlf'] . "(\t| )/", ' ', $input);
|
|
257 $headers = explode($this->params['crlf'], trim($input));
|
|
258
|
|
259 foreach ($headers as $value) {
|
|
260 $hdr_name = substr($value, 0, $pos = strpos($value, ':'));
|
|
261 $hdr_value = substr($value, $pos+1);
|
|
262
|
|
263 if ($hdr_value[0] == ' ') {
|
|
264 $hdr_value = substr($hdr_value, 1);
|
|
265 }
|
|
266
|
|
267 $return[] = array(
|
|
268 'name' => $hdr_name,
|
|
269 'value' => $this->params['decode_headers'] ? $this->decodeHeader($hdr_value) : $hdr_value,
|
|
270 );
|
|
271 }
|
|
272 }
|
|
273 else {
|
|
274 $return = array();
|
|
275 }
|
|
276
|
|
277 return $return;
|
|
278 }
|
|
279
|
|
280 /**
|
|
281 * Function to parse a header value, extract first part, and any secondary
|
|
282 * parts (after ;) This function is not as robust as it could be.
|
|
283 * Eg. header comments in the wrong place will probably break it.
|
|
284 *
|
|
285 * @param string $input Header value to parse
|
|
286 *
|
|
287 * @return array Contains parsed result
|
|
288 */
|
|
289 protected function parseHeaderValue($input)
|
|
290 {
|
|
291 $parts = preg_split('/;\s*/', $input);
|
|
292
|
|
293 if (!empty($parts)) {
|
|
294 $return['value'] = trim($parts[0]);
|
|
295
|
|
296 for ($n = 1; $n < count($parts); $n++) {
|
|
297 if (preg_match_all('/(([[:alnum:]]+)="?([^"]*)"?\s?;?)+/i', $parts[$n], $matches)) {
|
|
298 for ($i = 0; $i < count($matches[2]); $i++) {
|
|
299 $return['other'][strtolower($matches[2][$i])] = $matches[3][$i];
|
|
300 }
|
|
301 }
|
|
302 }
|
|
303 }
|
|
304 else {
|
|
305 $return['value'] = trim($input);
|
|
306 }
|
|
307
|
|
308 return $return;
|
|
309 }
|
|
310
|
|
311 /**
|
|
312 * This function splits the input based on the given boundary
|
|
313 *
|
|
314 * @param string $input Input to parse
|
|
315 * @param string $boundary Boundary
|
|
316 *
|
|
317 * @return array Contains array of resulting mime parts
|
|
318 */
|
|
319 protected function boundarySplit($input, $boundary)
|
|
320 {
|
|
321 $tmp = explode('--' . $boundary, $input);
|
|
322
|
|
323 for ($i = 1; $i < count($tmp)-1; $i++) {
|
|
324 $parts[] = $tmp[$i];
|
|
325 }
|
|
326
|
|
327 return $parts;
|
|
328 }
|
|
329
|
|
330 /**
|
|
331 * Given a header, this function will decode it according to RFC2047.
|
|
332 * Probably not *exactly* conformant, but it does pass all the given
|
|
333 * examples (in RFC2047).
|
|
334 *
|
|
335 * @param string $input Input header value to decode
|
|
336 *
|
|
337 * @return string Decoded header value
|
|
338 */
|
|
339 protected function decodeHeader($input)
|
|
340 {
|
|
341 return rcube_mime::decode_mime_string($input, $this->params['default_charset']);
|
|
342 }
|
|
343
|
|
344 /**
|
|
345 * Recursive method to convert a rcube_mime_decode structure
|
|
346 * into a rcube_message_part object.
|
|
347 *
|
|
348 * @param object $part A message part struct
|
|
349 * @param int $count Part count
|
|
350 * @param string $parent Parent MIME ID
|
|
351 *
|
|
352 * @return object rcube_message_part
|
|
353 * @see self::decode()
|
|
354 */
|
|
355 protected function structure_part($part, $count = 0, $parent = '')
|
|
356 {
|
|
357 $struct = new rcube_message_part;
|
|
358 $struct->mime_id = $part->mime_id ?: (empty($parent) ? (string)$count : "$parent.$count");
|
|
359 $struct->headers = $part->headers;
|
|
360 $struct->mimetype = $part->ctype_primary . '/' . $part->ctype_secondary;
|
|
361 $struct->ctype_primary = $part->ctype_primary;
|
|
362 $struct->ctype_secondary = $part->ctype_secondary;
|
|
363 $struct->ctype_parameters = $part->ctype_parameters;
|
|
364
|
|
365 if ($part->headers['content-transfer-encoding']) {
|
|
366 $struct->encoding = $part->headers['content-transfer-encoding'];
|
|
367 }
|
|
368
|
|
369 if ($part->ctype_parameters['charset']) {
|
|
370 $struct->charset = $part->ctype_parameters['charset'];
|
|
371 }
|
|
372
|
|
373 $part_charset = $struct->charset ?: $this->params['default_charset'];
|
|
374
|
|
375 // determine filename
|
|
376 if (($filename = $part->d_parameters['filename']) || ($filename = $part->ctype_parameters['name'])) {
|
|
377 if (!$this->params['decode_headers']) {
|
|
378 $filename = $this->decodeHeader($filename);
|
|
379 }
|
|
380
|
|
381 $struct->filename = $filename;
|
|
382 }
|
|
383
|
|
384 $struct->body = $part->body;
|
|
385 $struct->size = strlen($part->body);
|
|
386 $struct->disposition = $part->disposition;
|
|
387
|
|
388 $count = 0;
|
|
389 foreach ((array)$part->parts as $child_part) {
|
|
390 $struct->parts[] = $this->structure_part($child_part, ++$count, $struct->mime_id);
|
|
391 }
|
|
392
|
|
393 return $struct;
|
|
394 }
|
|
395 }
|