Mercurial > hg > rc2
diff program/lib/Roundcube/rcube_mime_decode.php @ 0:4681f974d28b
vanilla 1.3.3 distro, I hope
author | Charlie Root |
---|---|
date | Thu, 04 Jan 2018 15:52:31 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/program/lib/Roundcube/rcube_mime_decode.php Thu Jan 04 15:52:31 2018 -0500 @@ -0,0 +1,395 @@ +<?php + +/** + +-----------------------------------------------------------------------+ + | This file is part of the Roundcube Webmail client | + | Copyright (C) 2005-2015, The Roundcube Dev Team | + | Copyright (C) 2011-2015, Kolab Systems AG | + | | + | Licensed under the GNU General Public License version 3 or | + | any later version with exceptions for skins & plugins. | + | See the README file for a full license statement. | + | | + | PURPOSE: | + | MIME message parsing utilities derived from Mail_mimeDecode | + +-----------------------------------------------------------------------+ + | Author: Thomas Bruederli <roundcube@gmail.com> | + | Author: Aleksander Machniak <alec@alec.pl> | + | Author: Richard Heyes <richard@phpguru.org> | + +-----------------------------------------------------------------------+ +*/ + +/** + * Class for parsing MIME messages + * + * @package Framework + * @subpackage Storage + * @author Aleksander Machniak <alec@alec.pl> + */ +class rcube_mime_decode +{ + /** + * Class configuration parameters. + * + * @var array + */ + protected $params = array( + 'include_bodies' => true, + 'decode_bodies' => true, + 'decode_headers' => true, + 'crlf' => "\r\n", + 'default_charset' => RCUBE_CHARSET, + ); + + + /** + * Constructor. + * + * Sets up the object, initialise the variables, and splits and + * stores the header and body of the input. + * + * @param array $params An array of various parameters that determine + * various things: + * include_bodies - Whether to include the body in the returned + * object. + * decode_bodies - Whether to decode the bodies + * of the parts. (Transfer encoding) + * decode_headers - Whether to decode headers + * crlf - CRLF type to use (CRLF/LF/CR) + */ + public function __construct($params = array()) + { + if (!empty($params)) { + $this->params = array_merge($this->params, (array) $params); + } + } + + /** + * Performs the decoding process. + * + * @param string $input The input to decode + * @param bool $convert Convert result to rcube_message_part structure + * + * @return object|bool Decoded results or False on failure + */ + public function decode($input, $convert = true) + { + list($header, $body) = $this->splitBodyHeader($input); + + $struct = $this->do_decode($header, $body); + + if ($struct && $convert) { + $struct = $this->structure_part($struct); + } + + return $struct; + } + + /** + * Performs the decoding. Decodes the body string passed to it + * If it finds certain content-types it will call itself in a + * recursive fashion + * + * @param string $headers Header section + * @param string $body Body section + * @param string $default_ctype Default content type + * + * @return object|bool Decoded results or False on error + */ + protected function do_decode($headers, $body, $default_ctype = 'text/plain') + { + $return = new stdClass; + $headers = $this->parseHeaders($headers); + + foreach ($headers as $value) { + $header_name = strtolower($value['name']); + + if (isset($return->headers[$header_name]) && !is_array($return->headers[$header_name])) { + $return->headers[$header_name] = array($return->headers[$header_name]); + $return->headers[$header_name][] = $value['value']; + } + else if (isset($return->headers[$header_name])) { + $return->headers[$header_name][] = $value['value']; + } + else { + $return->headers[$header_name] = $value['value']; + } + + switch ($header_name) { + case 'content-type': + $content_type = $this->parseHeaderValue($value['value']); + + if (preg_match('/([0-9a-z+.-]+)\/([0-9a-z+.-]+)/i', $content_type['value'], $regs)) { + $return->ctype_primary = $regs[1]; + $return->ctype_secondary = $regs[2]; + } + + if (!empty($content_type['other'])) { + $return->ctype_parameters = array_merge((array) $return->ctype_parameters, (array) $content_type['other']); + } + + break; + + case 'content-disposition'; + $content_disposition = $this->parseHeaderValue($value['value']); + $return->disposition = $content_disposition['value']; + + if (!empty($content_disposition['other'])) { + $return->d_parameters = array_merge((array) $return->d_parameters, (array) $content_disposition['other']); + } + + break; + + case 'content-transfer-encoding': + $content_transfer_encoding = $this->parseHeaderValue($value['value']); + break; + } + } + + if (isset($content_type)) { + $ctype = strtolower($content_type['value']); + + switch ($ctype) { + case 'text/plain': + $encoding = isset($content_transfer_encoding) ? $content_transfer_encoding['value'] : '7bit'; + + if ($this->params['include_bodies']) { + $return->body = $this->params['decode_bodies'] ? rcube_mime::decode($body, $encoding) : $body; + } + + break; + + case 'text/html': + $encoding = isset($content_transfer_encoding) ? $content_transfer_encoding['value'] : '7bit'; + + if ($this->params['include_bodies']) { + $return->body = $this->params['decode_bodies'] ? rcube_mime::decode($body, $encoding) : $body; + } + + break; + + case 'multipart/digest': + case 'multipart/alternative': + case 'multipart/related': + case 'multipart/mixed': + case 'multipart/signed': + case 'multipart/encrypted': + if (!isset($content_type['other']['boundary'])) { + return false; + } + + $default_ctype = $ctype === 'multipart/digest' ? 'message/rfc822' : 'text/plain'; + $parts = $this->boundarySplit($body, $content_type['other']['boundary']); + + for ($i = 0; $i < count($parts); $i++) { + list($part_header, $part_body) = $this->splitBodyHeader($parts[$i]); + $return->parts[] = $this->do_decode($part_header, $part_body, $default_ctype); + } + + break; + + case 'message/rfc822': + $obj = new rcube_mime_decode($this->params); + $return->parts[] = $obj->decode($body, false); + unset($obj); + break; + + default: + if ($this->params['include_bodies']) { + $return->body = $this->params['decode_bodies'] ? rcube_mime::decode($body, $content_transfer_encoding['value']) : $body; + } + + break; + } + } + else { + $ctype = explode('/', $default_ctype); + $return->ctype_primary = $ctype[0]; + $return->ctype_secondary = $ctype[1]; + + if ($this->params['include_bodies']) { + $return->body = $this->params['decode_bodies'] ? rcube_mime::decode($body) : $body; + } + } + + return $return; + } + + /** + * Given a string containing a header and body + * section, this function will split them (at the first + * blank line) and return them. + * + * @param string $input Input to split apart + * + * @return array Contains header and body section + */ + protected function splitBodyHeader($input) + { + $pos = strpos($input, $this->params['crlf'] . $this->params['crlf']); + if ($pos === false) { + return false; + } + + $crlf_len = strlen($this->params['crlf']); + $header = substr($input, 0, $pos); + $body = substr($input, $pos + 2 * $crlf_len); + + if (substr_compare($body, $this->params['crlf'], -$crlf_len) === 0) { + $body = substr($body, 0, -$crlf_len); + } + + return array($header, $body); + } + + /** + * Parse headers given in $input and return as assoc array. + * + * @param string $input Headers to parse + * + * @return array Contains parsed headers + */ + protected function parseHeaders($input) + { + if ($input !== '') { + // Unfold the input + $input = preg_replace('/' . $this->params['crlf'] . "(\t| )/", ' ', $input); + $headers = explode($this->params['crlf'], trim($input)); + + foreach ($headers as $value) { + $hdr_name = substr($value, 0, $pos = strpos($value, ':')); + $hdr_value = substr($value, $pos+1); + + if ($hdr_value[0] == ' ') { + $hdr_value = substr($hdr_value, 1); + } + + $return[] = array( + 'name' => $hdr_name, + 'value' => $this->params['decode_headers'] ? $this->decodeHeader($hdr_value) : $hdr_value, + ); + } + } + else { + $return = array(); + } + + return $return; + } + + /** + * Function to parse a header value, extract first part, and any secondary + * parts (after ;) This function is not as robust as it could be. + * Eg. header comments in the wrong place will probably break it. + * + * @param string $input Header value to parse + * + * @return array Contains parsed result + */ + protected function parseHeaderValue($input) + { + $parts = preg_split('/;\s*/', $input); + + if (!empty($parts)) { + $return['value'] = trim($parts[0]); + + for ($n = 1; $n < count($parts); $n++) { + if (preg_match_all('/(([[:alnum:]]+)="?([^"]*)"?\s?;?)+/i', $parts[$n], $matches)) { + for ($i = 0; $i < count($matches[2]); $i++) { + $return['other'][strtolower($matches[2][$i])] = $matches[3][$i]; + } + } + } + } + else { + $return['value'] = trim($input); + } + + return $return; + } + + /** + * This function splits the input based on the given boundary + * + * @param string $input Input to parse + * @param string $boundary Boundary + * + * @return array Contains array of resulting mime parts + */ + protected function boundarySplit($input, $boundary) + { + $tmp = explode('--' . $boundary, $input); + + for ($i = 1; $i < count($tmp)-1; $i++) { + $parts[] = $tmp[$i]; + } + + return $parts; + } + + /** + * Given a header, this function will decode it according to RFC2047. + * Probably not *exactly* conformant, but it does pass all the given + * examples (in RFC2047). + * + * @param string $input Input header value to decode + * + * @return string Decoded header value + */ + protected function decodeHeader($input) + { + return rcube_mime::decode_mime_string($input, $this->params['default_charset']); + } + + /** + * Recursive method to convert a rcube_mime_decode structure + * into a rcube_message_part object. + * + * @param object $part A message part struct + * @param int $count Part count + * @param string $parent Parent MIME ID + * + * @return object rcube_message_part + * @see self::decode() + */ + protected function structure_part($part, $count = 0, $parent = '') + { + $struct = new rcube_message_part; + $struct->mime_id = $part->mime_id ?: (empty($parent) ? (string)$count : "$parent.$count"); + $struct->headers = $part->headers; + $struct->mimetype = $part->ctype_primary . '/' . $part->ctype_secondary; + $struct->ctype_primary = $part->ctype_primary; + $struct->ctype_secondary = $part->ctype_secondary; + $struct->ctype_parameters = $part->ctype_parameters; + + if ($part->headers['content-transfer-encoding']) { + $struct->encoding = $part->headers['content-transfer-encoding']; + } + + if ($part->ctype_parameters['charset']) { + $struct->charset = $part->ctype_parameters['charset']; + } + + $part_charset = $struct->charset ?: $this->params['default_charset']; + + // determine filename + if (($filename = $part->d_parameters['filename']) || ($filename = $part->ctype_parameters['name'])) { + if (!$this->params['decode_headers']) { + $filename = $this->decodeHeader($filename); + } + + $struct->filename = $filename; + } + + $struct->body = $part->body; + $struct->size = strlen($part->body); + $struct->disposition = $part->disposition; + + $count = 0; + foreach ((array)$part->parts as $child_part) { + $struct->parts[] = $this->structure_part($child_part, ++$count, $struct->mime_id); + } + + return $struct; + } +}