diff program/lib/Roundcube/rcube_mime_decode.php @ 0:4681f974d28b

vanilla 1.3.3 distro, I hope
author Charlie Root
date Thu, 04 Jan 2018 15:52:31 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/program/lib/Roundcube/rcube_mime_decode.php	Thu Jan 04 15:52:31 2018 -0500
@@ -0,0 +1,395 @@
+<?php
+
+/**
+ +-----------------------------------------------------------------------+
+ | This file is part of the Roundcube Webmail client                     |
+ | Copyright (C) 2005-2015, The Roundcube Dev Team                       |
+ | Copyright (C) 2011-2015, Kolab Systems AG                             |
+ |                                                                       |
+ | Licensed under the GNU General Public License version 3 or            |
+ | any later version with exceptions for skins & plugins.                |
+ | See the README file for a full license statement.                     |
+ |                                                                       |
+ | PURPOSE:                                                              |
+ |   MIME message parsing utilities derived from Mail_mimeDecode         |
+ +-----------------------------------------------------------------------+
+ | Author: Thomas Bruederli <roundcube@gmail.com>                        |
+ | Author: Aleksander Machniak <alec@alec.pl>                            |
+ | Author: Richard Heyes <richard@phpguru.org>                           |
+ +-----------------------------------------------------------------------+
+*/
+
+/**
+ * Class for parsing MIME messages
+ *
+ * @package    Framework
+ * @subpackage Storage
+ * @author     Aleksander Machniak <alec@alec.pl>
+ */
+class rcube_mime_decode
+{
+    /**
+     * Class configuration parameters.
+     *
+     * @var array
+     */
+    protected $params = array(
+        'include_bodies'  => true,
+        'decode_bodies'   => true,
+        'decode_headers'  => true,
+        'crlf'            => "\r\n",
+        'default_charset' => RCUBE_CHARSET,
+    );
+
+
+    /**
+     * Constructor.
+     *
+     * Sets up the object, initialise the variables, and splits and
+     * stores the header and body of the input.
+     *
+     * @param array $params An array of various parameters that determine
+     *                       various things:
+     *              include_bodies - Whether to include the body in the returned
+     *                               object.
+     *              decode_bodies  - Whether to decode the bodies
+     *                               of the parts. (Transfer encoding)
+     *              decode_headers - Whether to decode headers
+     *              crlf           - CRLF type to use (CRLF/LF/CR)
+     */
+    public function __construct($params = array())
+    {
+        if (!empty($params)) {
+            $this->params = array_merge($this->params, (array) $params);
+        }
+    }
+
+    /**
+     * Performs the decoding process.
+     *
+     * @param string $input   The input to decode
+     * @param bool   $convert Convert result to rcube_message_part structure
+     *
+     * @return object|bool Decoded results or False on failure
+     */
+    public function decode($input, $convert = true)
+    {
+        list($header, $body) = $this->splitBodyHeader($input);
+
+        $struct = $this->do_decode($header, $body);
+
+        if ($struct && $convert) {
+            $struct = $this->structure_part($struct);
+        }
+
+        return $struct;
+    }
+
+    /**
+     * Performs the decoding. Decodes the body string passed to it
+     * If it finds certain content-types it will call itself in a
+     * recursive fashion
+     *
+     * @param string $headers       Header section
+     * @param string $body          Body section
+     * @param string $default_ctype Default content type
+     *
+     * @return object|bool Decoded results or False on error
+     */
+    protected function do_decode($headers, $body, $default_ctype = 'text/plain')
+    {
+        $return  = new stdClass;
+        $headers = $this->parseHeaders($headers);
+
+        foreach ($headers as $value) {
+            $header_name = strtolower($value['name']);
+
+            if (isset($return->headers[$header_name]) && !is_array($return->headers[$header_name])) {
+                $return->headers[$header_name]   = array($return->headers[$header_name]);
+                $return->headers[$header_name][] = $value['value'];
+            }
+            else if (isset($return->headers[$header_name])) {
+                $return->headers[$header_name][] = $value['value'];
+            }
+            else {
+                $return->headers[$header_name] = $value['value'];
+            }
+
+            switch ($header_name) {
+            case 'content-type':
+                $content_type = $this->parseHeaderValue($value['value']);
+
+                if (preg_match('/([0-9a-z+.-]+)\/([0-9a-z+.-]+)/i', $content_type['value'], $regs)) {
+                    $return->ctype_primary   = $regs[1];
+                    $return->ctype_secondary = $regs[2];
+                }
+
+                if (!empty($content_type['other'])) {
+                    $return->ctype_parameters = array_merge((array) $return->ctype_parameters, (array) $content_type['other']);
+                }
+
+                break;
+
+            case 'content-disposition';
+                $content_disposition = $this->parseHeaderValue($value['value']);
+                $return->disposition = $content_disposition['value'];
+
+                if (!empty($content_disposition['other'])) {
+                    $return->d_parameters = array_merge((array) $return->d_parameters, (array) $content_disposition['other']);
+                }
+
+                break;
+
+            case 'content-transfer-encoding':
+                $content_transfer_encoding = $this->parseHeaderValue($value['value']);
+                break;
+            }
+        }
+
+        if (isset($content_type)) {
+            $ctype = strtolower($content_type['value']);
+
+            switch ($ctype) {
+            case 'text/plain':
+                $encoding = isset($content_transfer_encoding) ? $content_transfer_encoding['value'] : '7bit';
+
+                if ($this->params['include_bodies']) {
+                    $return->body = $this->params['decode_bodies'] ? rcube_mime::decode($body, $encoding) : $body;
+                }
+
+                break;
+
+            case 'text/html':
+                $encoding = isset($content_transfer_encoding) ? $content_transfer_encoding['value'] : '7bit';
+
+                if ($this->params['include_bodies']) {
+                    $return->body = $this->params['decode_bodies'] ? rcube_mime::decode($body, $encoding) : $body;
+                }
+
+                break;
+
+            case 'multipart/digest':
+            case 'multipart/alternative':
+            case 'multipart/related':
+            case 'multipart/mixed':
+            case 'multipart/signed':
+            case 'multipart/encrypted':
+                if (!isset($content_type['other']['boundary'])) {
+                    return false;
+                }
+
+                $default_ctype = $ctype === 'multipart/digest' ? 'message/rfc822' : 'text/plain';
+                $parts         = $this->boundarySplit($body, $content_type['other']['boundary']);
+
+                for ($i = 0; $i < count($parts); $i++) {
+                    list($part_header, $part_body) = $this->splitBodyHeader($parts[$i]);
+                    $return->parts[] = $this->do_decode($part_header, $part_body, $default_ctype);
+                }
+
+                break;
+
+            case 'message/rfc822':
+                $obj = new rcube_mime_decode($this->params);
+                $return->parts[] = $obj->decode($body, false);
+                unset($obj);
+                break;
+
+            default:
+                if ($this->params['include_bodies']) {
+                    $return->body = $this->params['decode_bodies'] ? rcube_mime::decode($body, $content_transfer_encoding['value']) : $body;
+                }
+
+                break;
+            }
+        }
+        else {
+            $ctype = explode('/', $default_ctype);
+            $return->ctype_primary   = $ctype[0];
+            $return->ctype_secondary = $ctype[1];
+
+            if ($this->params['include_bodies']) {
+                $return->body = $this->params['decode_bodies'] ? rcube_mime::decode($body) : $body;
+            }
+        }
+
+        return $return;
+    }
+
+    /**
+     * Given a string containing a header and body
+     * section, this function will split them (at the first
+     * blank line) and return them.
+     *
+     * @param string $input Input to split apart
+     *
+     * @return array Contains header and body section
+     */
+    protected function splitBodyHeader($input)
+    {
+        $pos = strpos($input, $this->params['crlf'] . $this->params['crlf']);
+        if ($pos === false) {
+            return false;
+        }
+
+        $crlf_len = strlen($this->params['crlf']);
+        $header   = substr($input, 0, $pos);
+        $body     = substr($input, $pos + 2 * $crlf_len);
+
+        if (substr_compare($body, $this->params['crlf'], -$crlf_len) === 0) {
+            $body = substr($body, 0, -$crlf_len);
+        }
+
+        return array($header, $body);
+    }
+
+    /**
+     * Parse headers given in $input and return as assoc array.
+     *
+     * @param string $input Headers to parse
+     *
+     * @return array Contains parsed headers
+     */
+    protected function parseHeaders($input)
+    {
+        if ($input !== '') {
+            // Unfold the input
+            $input   = preg_replace('/' . $this->params['crlf'] . "(\t| )/", ' ', $input);
+            $headers = explode($this->params['crlf'], trim($input));
+
+            foreach ($headers as $value) {
+                $hdr_name  = substr($value, 0, $pos = strpos($value, ':'));
+                $hdr_value = substr($value, $pos+1);
+
+                if ($hdr_value[0] == ' ') {
+                    $hdr_value = substr($hdr_value, 1);
+                }
+
+                $return[] = array(
+                    'name'  => $hdr_name,
+                    'value' => $this->params['decode_headers'] ? $this->decodeHeader($hdr_value) : $hdr_value,
+                );
+            }
+        }
+        else {
+            $return = array();
+        }
+
+        return $return;
+    }
+
+    /**
+     * Function to parse a header value, extract first part, and any secondary
+     * parts (after ;) This function is not as robust as it could be.
+     * Eg. header comments in the wrong place will probably break it.
+     *
+     * @param string $input Header value to parse
+     *
+     * @return array Contains parsed result
+     */
+    protected function parseHeaderValue($input)
+    {
+        $parts = preg_split('/;\s*/', $input);
+
+        if (!empty($parts)) {
+            $return['value'] = trim($parts[0]);
+
+            for ($n = 1; $n < count($parts); $n++) {
+                if (preg_match_all('/(([[:alnum:]]+)="?([^"]*)"?\s?;?)+/i', $parts[$n], $matches)) {
+                    for ($i = 0; $i < count($matches[2]); $i++) {
+                        $return['other'][strtolower($matches[2][$i])] = $matches[3][$i];
+                    }
+                }
+            }
+        }
+        else {
+            $return['value'] = trim($input);
+        }
+
+        return $return;
+    }
+
+    /**
+     * This function splits the input based on the given boundary
+     *
+     * @param string $input    Input to parse
+     * @param string $boundary Boundary
+     *
+     * @return array Contains array of resulting mime parts
+     */
+    protected function boundarySplit($input, $boundary)
+    {
+        $tmp = explode('--' . $boundary, $input);
+
+        for ($i = 1; $i < count($tmp)-1; $i++) {
+            $parts[] = $tmp[$i];
+        }
+
+        return $parts;
+    }
+
+    /**
+     * Given a header, this function will decode it according to RFC2047.
+     * Probably not *exactly* conformant, but it does pass all the given
+     * examples (in RFC2047).
+     *
+     * @param string $input Input header value to decode
+     *
+     * @return string Decoded header value
+     */
+    protected function decodeHeader($input)
+    {
+        return rcube_mime::decode_mime_string($input, $this->params['default_charset']);
+    }
+
+    /**
+     * Recursive method to convert a rcube_mime_decode structure
+     * into a rcube_message_part object.
+     *
+     * @param object $part   A message part struct
+     * @param int    $count  Part count
+     * @param string $parent Parent MIME ID
+     *
+     * @return object rcube_message_part
+     * @see self::decode()
+     */
+    protected function structure_part($part, $count = 0, $parent = '')
+    {
+        $struct = new rcube_message_part;
+        $struct->mime_id          = $part->mime_id ?: (empty($parent) ? (string)$count : "$parent.$count");
+        $struct->headers          = $part->headers;
+        $struct->mimetype         = $part->ctype_primary . '/' . $part->ctype_secondary;
+        $struct->ctype_primary    = $part->ctype_primary;
+        $struct->ctype_secondary  = $part->ctype_secondary;
+        $struct->ctype_parameters = $part->ctype_parameters;
+
+        if ($part->headers['content-transfer-encoding']) {
+            $struct->encoding = $part->headers['content-transfer-encoding'];
+        }
+
+        if ($part->ctype_parameters['charset']) {
+            $struct->charset = $part->ctype_parameters['charset'];
+        }
+
+        $part_charset = $struct->charset ?: $this->params['default_charset'];
+
+        // determine filename
+        if (($filename = $part->d_parameters['filename']) || ($filename = $part->ctype_parameters['name'])) {
+            if (!$this->params['decode_headers']) {
+                $filename = $this->decodeHeader($filename);
+            }
+
+            $struct->filename = $filename;
+        }
+
+        $struct->body        = $part->body;
+        $struct->size        = strlen($part->body);
+        $struct->disposition = $part->disposition;
+
+        $count = 0;
+        foreach ((array)$part->parts as $child_part) {
+            $struct->parts[] = $this->structure_part($child_part, ++$count, $struct->mime_id);
+        }
+
+        return $struct;
+    }
+}