7
|
1 <?php
|
|
2
|
|
3 namespace Sabre\VObject\Parser;
|
|
4
|
|
5 use
|
|
6 Sabre\VObject\ParseException,
|
|
7 Sabre\VObject\EofException,
|
|
8 Sabre\VObject\Component,
|
|
9 Sabre\VObject\Property,
|
|
10 Sabre\VObject\Component\VCalendar,
|
|
11 Sabre\VObject\Component\VCard;
|
|
12
|
|
13 /**
|
|
14 * MimeDir parser.
|
|
15 *
|
|
16 * This class parses iCalendar 2.0 and vCard 2.1, 3.0 and 4.0 files. This
|
|
17 * parser will return one of the following two objects from the parse method:
|
|
18 *
|
|
19 * Sabre\VObject\Component\VCalendar
|
|
20 * Sabre\VObject\Component\VCard
|
|
21 *
|
|
22 * @copyright Copyright (C) 2011-2015 fruux GmbH (https://fruux.com/).
|
|
23 * @author Evert Pot (http://evertpot.com/)
|
|
24 * @license http://sabre.io/license/ Modified BSD License
|
|
25 */
|
|
26 class MimeDir extends Parser {
|
|
27
|
|
28 /**
|
|
29 * The input stream.
|
|
30 *
|
|
31 * @var resource
|
|
32 */
|
|
33 protected $input;
|
|
34
|
|
35 /**
|
|
36 * Root component
|
|
37 *
|
|
38 * @var Component
|
|
39 */
|
|
40 protected $root;
|
|
41
|
|
42 /**
|
|
43 * Parses an iCalendar or vCard file
|
|
44 *
|
|
45 * Pass a stream or a string. If null is parsed, the existing buffer is
|
|
46 * used.
|
|
47 *
|
|
48 * @param string|resource|null $input
|
|
49 * @param int|null $options
|
|
50 * @return array
|
|
51 */
|
|
52 public function parse($input = null, $options = null) {
|
|
53
|
|
54 $this->root = null;
|
|
55 if (!is_null($input)) {
|
|
56
|
|
57 $this->setInput($input);
|
|
58
|
|
59 }
|
|
60
|
|
61 if (!is_null($options)) $this->options = $options;
|
|
62
|
|
63 $this->parseDocument();
|
|
64
|
|
65 return $this->root;
|
|
66
|
|
67 }
|
|
68
|
|
69 /**
|
|
70 * Sets the input buffer. Must be a string or stream.
|
|
71 *
|
|
72 * @param resource|string $input
|
|
73 * @return void
|
|
74 */
|
|
75 public function setInput($input) {
|
|
76
|
|
77 // Resetting the parser
|
|
78 $this->lineIndex = 0;
|
|
79 $this->startLine = 0;
|
|
80
|
|
81 if (is_string($input)) {
|
|
82 // Convering to a stream.
|
|
83 $stream = fopen('php://temp', 'r+');
|
|
84 fwrite($stream, $input);
|
|
85 rewind($stream);
|
|
86 $this->input = $stream;
|
|
87 } elseif (is_resource($input)) {
|
|
88 $this->input = $input;
|
|
89 } else {
|
|
90 throw new \InvalidArgumentException('This parser can only read from strings or streams.');
|
|
91 }
|
|
92
|
|
93 }
|
|
94
|
|
95 /**
|
|
96 * Parses an entire document.
|
|
97 *
|
|
98 * @return void
|
|
99 */
|
|
100 protected function parseDocument() {
|
|
101
|
|
102 $line = $this->readLine();
|
|
103
|
|
104 // BOM is ZERO WIDTH NO-BREAK SPACE (U+FEFF).
|
|
105 // It's 0xEF 0xBB 0xBF in UTF-8 hex.
|
|
106 if ( 3 <= strlen($line)
|
|
107 && ord($line[0]) === 0xef
|
|
108 && ord($line[1]) === 0xbb
|
|
109 && ord($line[2]) === 0xbf) {
|
|
110 $line = substr($line, 3);
|
|
111 }
|
|
112
|
|
113 switch(strtoupper($line)) {
|
|
114 case 'BEGIN:VCALENDAR' :
|
|
115 $class = isset(VCalendar::$componentMap['VCALENDAR'])
|
|
116 ? VCalendar::$componentMap[$name]
|
|
117 : 'Sabre\\VObject\\Component\\VCalendar';
|
|
118 break;
|
|
119 case 'BEGIN:VCARD' :
|
|
120 $class = isset(VCard::$componentMap['VCARD'])
|
|
121 ? VCard::$componentMap['VCARD']
|
|
122 : 'Sabre\\VObject\\Component\\VCard';
|
|
123 break;
|
|
124 default :
|
|
125 throw new ParseException('This parser only supports VCARD and VCALENDAR files');
|
|
126 }
|
|
127
|
|
128 $this->root = new $class(array(), false);
|
|
129
|
|
130 while(true) {
|
|
131
|
|
132 // Reading until we hit END:
|
|
133 $line = $this->readLine();
|
|
134 if (strtoupper(substr($line,0,4)) === 'END:') {
|
|
135 break;
|
|
136 }
|
|
137 $result = $this->parseLine($line);
|
|
138 if ($result) {
|
|
139 $this->root->add($result);
|
|
140 }
|
|
141
|
|
142 }
|
|
143
|
|
144 $name = strtoupper(substr($line, 4));
|
|
145 if ($name!==$this->root->name) {
|
|
146 throw new ParseException('Invalid MimeDir file. expected: "END:' . $this->root->name . '" got: "END:' . $name . '"');
|
|
147 }
|
|
148
|
|
149 }
|
|
150
|
|
151 /**
|
|
152 * Parses a line, and if it hits a component, it will also attempt to parse
|
|
153 * the entire component
|
|
154 *
|
|
155 * @param string $line Unfolded line
|
|
156 * @return Node
|
|
157 */
|
|
158 protected function parseLine($line) {
|
|
159
|
|
160 // Start of a new component
|
|
161 if (strtoupper(substr($line, 0, 6)) === 'BEGIN:') {
|
|
162
|
|
163 $component = $this->root->createComponent(substr($line,6), array(), false);
|
|
164
|
|
165 while(true) {
|
|
166
|
|
167 // Reading until we hit END:
|
|
168 $line = $this->readLine();
|
|
169 if (strtoupper(substr($line,0,4)) === 'END:') {
|
|
170 break;
|
|
171 }
|
|
172 $result = $this->parseLine($line);
|
|
173 if ($result) {
|
|
174 $component->add($result);
|
|
175 }
|
|
176
|
|
177 }
|
|
178
|
|
179 $name = strtoupper(substr($line, 4));
|
|
180 if ($name!==$component->name) {
|
|
181 throw new ParseException('Invalid MimeDir file. expected: "END:' . $component->name . '" got: "END:' . $name . '"');
|
|
182 }
|
|
183
|
|
184 return $component;
|
|
185
|
|
186 } else {
|
|
187
|
|
188 // Property reader
|
|
189 $property = $this->readProperty($line);
|
|
190 if (!$property) {
|
|
191 // Ignored line
|
|
192 return false;
|
|
193 }
|
|
194 return $property;
|
|
195
|
|
196 }
|
|
197
|
|
198 }
|
|
199
|
|
200 /**
|
|
201 * We need to look ahead 1 line every time to see if we need to 'unfold'
|
|
202 * the next line.
|
|
203 *
|
|
204 * If that was not the case, we store it here.
|
|
205 *
|
|
206 * @var null|string
|
|
207 */
|
|
208 protected $lineBuffer;
|
|
209
|
|
210 /**
|
|
211 * The real current line number.
|
|
212 */
|
|
213 protected $lineIndex = 0;
|
|
214
|
|
215 /**
|
|
216 * In the case of unfolded lines, this property holds the line number for
|
|
217 * the start of the line.
|
|
218 *
|
|
219 * @var int
|
|
220 */
|
|
221 protected $startLine = 0;
|
|
222
|
|
223 /**
|
|
224 * Contains a 'raw' representation of the current line.
|
|
225 *
|
|
226 * @var string
|
|
227 */
|
|
228 protected $rawLine;
|
|
229
|
|
230 /**
|
|
231 * Reads a single line from the buffer.
|
|
232 *
|
|
233 * This method strips any newlines and also takes care of unfolding.
|
|
234 *
|
|
235 * @throws \Sabre\VObject\EofException
|
|
236 * @return string
|
|
237 */
|
|
238 protected function readLine() {
|
|
239
|
|
240 if (!is_null($this->lineBuffer)) {
|
|
241 $rawLine = $this->lineBuffer;
|
|
242 $this->lineBuffer = null;
|
|
243 } else {
|
|
244 do {
|
|
245 $eof = feof($this->input);
|
|
246
|
|
247 $rawLine = fgets($this->input);
|
|
248
|
|
249 if ($eof || (feof($this->input) && $rawLine===false)) {
|
|
250 throw new EofException('End of document reached prematurely');
|
|
251 }
|
|
252 if ($rawLine === false) {
|
|
253 throw new ParseException('Error reading from input stream');
|
|
254 }
|
|
255 $rawLine = rtrim($rawLine, "\r\n");
|
|
256 } while ($rawLine === ''); // Skipping empty lines
|
|
257 $this->lineIndex++;
|
|
258 }
|
|
259 $line = $rawLine;
|
|
260
|
|
261 $this->startLine = $this->lineIndex;
|
|
262
|
|
263 // Looking ahead for folded lines.
|
|
264 while (true) {
|
|
265
|
|
266 $nextLine = rtrim(fgets($this->input), "\r\n");
|
|
267 $this->lineIndex++;
|
|
268 if (!$nextLine) {
|
|
269 break;
|
|
270 }
|
|
271 if ($nextLine[0] === "\t" || $nextLine[0] === " ") {
|
|
272 $line .= substr($nextLine, 1);
|
|
273 $rawLine .= "\n " . substr($nextLine, 1);
|
|
274 } else {
|
|
275 $this->lineBuffer = $nextLine;
|
|
276 break;
|
|
277 }
|
|
278
|
|
279 }
|
|
280 $this->rawLine = $rawLine;
|
|
281 return $line;
|
|
282
|
|
283 }
|
|
284
|
|
285 /**
|
|
286 * Reads a property or component from a line.
|
|
287 *
|
|
288 * @return void
|
|
289 */
|
|
290 protected function readProperty($line) {
|
|
291
|
|
292 if ($this->options & self::OPTION_FORGIVING) {
|
|
293 $propNameToken = 'A-Z0-9\-\._\\/';
|
|
294 } else {
|
|
295 $propNameToken = 'A-Z0-9\-\.';
|
|
296 }
|
|
297
|
|
298 $paramNameToken = 'A-Z0-9\-';
|
|
299 $safeChar = '^";:,';
|
|
300 $qSafeChar = '^"';
|
|
301
|
|
302 $regex = "/
|
|
303 ^(?P<name> [$propNameToken]+ ) (?=[;:]) # property name
|
|
304 |
|
|
305 (?<=:)(?P<propValue> .+)$ # property value
|
|
306 |
|
|
307 ;(?P<paramName> [$paramNameToken]+) (?=[=;:]) # parameter name
|
|
308 |
|
|
309 (=|,)(?P<paramValue> # parameter value
|
|
310 (?: [$safeChar]*) |
|
|
311 \"(?: [$qSafeChar]+)\"
|
|
312 ) (?=[;:,])
|
|
313 /xi";
|
|
314
|
|
315 //echo $regex, "\n"; die();
|
|
316 preg_match_all($regex, $line, $matches, PREG_SET_ORDER);
|
|
317
|
|
318 $property = array(
|
|
319 'name' => null,
|
|
320 'parameters' => array(),
|
|
321 'value' => null
|
|
322 );
|
|
323
|
|
324 $lastParam = null;
|
|
325
|
|
326 /**
|
|
327 * Looping through all the tokens.
|
|
328 *
|
|
329 * Note that we are looping through them in reverse order, because if a
|
|
330 * sub-pattern matched, the subsequent named patterns will not show up
|
|
331 * in the result.
|
|
332 */
|
|
333 foreach($matches as $match) {
|
|
334
|
|
335 if (isset($match['paramValue'])) {
|
|
336 if ($match['paramValue'] && $match['paramValue'][0] === '"') {
|
|
337 $value = substr($match['paramValue'], 1, -1);
|
|
338 } else {
|
|
339 $value = $match['paramValue'];
|
|
340 }
|
|
341
|
|
342 $value = $this->unescapeParam($value);
|
|
343
|
|
344 if (is_null($property['parameters'][$lastParam])) {
|
|
345 $property['parameters'][$lastParam] = $value;
|
|
346 } elseif (is_array($property['parameters'][$lastParam])) {
|
|
347 $property['parameters'][$lastParam][] = $value;
|
|
348 } else {
|
|
349 $property['parameters'][$lastParam] = array(
|
|
350 $property['parameters'][$lastParam],
|
|
351 $value
|
|
352 );
|
|
353 }
|
|
354 continue;
|
|
355 }
|
|
356 if (isset($match['paramName'])) {
|
|
357 $lastParam = strtoupper($match['paramName']);
|
|
358 if (!isset($property['parameters'][$lastParam])) {
|
|
359 $property['parameters'][$lastParam] = null;
|
|
360 }
|
|
361 continue;
|
|
362 }
|
|
363 if (isset($match['propValue'])) {
|
|
364 $property['value'] = $match['propValue'];
|
|
365 continue;
|
|
366 }
|
|
367 if (isset($match['name']) && $match['name']) {
|
|
368 $property['name'] = strtoupper($match['name']);
|
|
369 continue;
|
|
370 }
|
|
371
|
|
372 // @codeCoverageIgnoreStart
|
|
373 throw new \LogicException('This code should not be reachable');
|
|
374 // @codeCoverageIgnoreEnd
|
|
375
|
|
376 }
|
|
377
|
|
378 if (is_null($property['value'])) {
|
|
379 $property['value'] = '';
|
|
380 }
|
|
381 if (!$property['name']) {
|
|
382 if ($this->options & self::OPTION_IGNORE_INVALID_LINES) {
|
|
383 return false;
|
|
384 }
|
|
385 throw new ParseException('Invalid Mimedir file. Line starting at ' . $this->startLine . ' did not follow iCalendar/vCard conventions');
|
|
386 }
|
|
387
|
|
388 // vCard 2.1 states that parameters may appear without a name, and only
|
|
389 // a value. We can deduce the value based on it's name.
|
|
390 //
|
|
391 // Our parser will get those as parameters without a value instead, so
|
|
392 // we're filtering these parameters out first.
|
|
393 $namedParameters = array();
|
|
394 $namelessParameters = array();
|
|
395
|
|
396 foreach($property['parameters'] as $name=>$value) {
|
|
397 if (!is_null($value)) {
|
|
398 $namedParameters[$name] = $value;
|
|
399 } else {
|
|
400 $namelessParameters[] = $name;
|
|
401 }
|
|
402 }
|
|
403
|
|
404 $propObj = $this->root->createProperty($property['name'], null, $namedParameters);
|
|
405
|
|
406 foreach($namelessParameters as $namelessParameter) {
|
|
407 $propObj->add(null, $namelessParameter);
|
|
408 }
|
|
409
|
|
410 if (strtoupper($propObj['ENCODING']) === 'QUOTED-PRINTABLE') {
|
|
411 $propObj->setQuotedPrintableValue($this->extractQuotedPrintableValue());
|
|
412 } else {
|
|
413 $propObj->setRawMimeDirValue($property['value']);
|
|
414 }
|
|
415
|
|
416 return $propObj;
|
|
417
|
|
418 }
|
|
419
|
|
420 /**
|
|
421 * Unescapes a property value.
|
|
422 *
|
|
423 * vCard 2.1 says:
|
|
424 * * Semi-colons must be escaped in some property values, specifically
|
|
425 * ADR, ORG and N.
|
|
426 * * Semi-colons must be escaped in parameter values, because semi-colons
|
|
427 * are also use to separate values.
|
|
428 * * No mention of escaping backslashes with another backslash.
|
|
429 * * newlines are not escaped either, instead QUOTED-PRINTABLE is used to
|
|
430 * span values over more than 1 line.
|
|
431 *
|
|
432 * vCard 3.0 says:
|
|
433 * * (rfc2425) Backslashes, newlines (\n or \N) and comma's must be
|
|
434 * escaped, all time time.
|
|
435 * * Comma's are used for delimeters in multiple values
|
|
436 * * (rfc2426) Adds to to this that the semi-colon MUST also be escaped,
|
|
437 * as in some properties semi-colon is used for separators.
|
|
438 * * Properties using semi-colons: N, ADR, GEO, ORG
|
|
439 * * Both ADR and N's individual parts may be broken up further with a
|
|
440 * comma.
|
|
441 * * Properties using commas: NICKNAME, CATEGORIES
|
|
442 *
|
|
443 * vCard 4.0 (rfc6350) says:
|
|
444 * * Commas must be escaped.
|
|
445 * * Semi-colons may be escaped, an unescaped semi-colon _may_ be a
|
|
446 * delimiter, depending on the property.
|
|
447 * * Backslashes must be escaped
|
|
448 * * Newlines must be escaped as either \N or \n.
|
|
449 * * Some compound properties may contain multiple parts themselves, so a
|
|
450 * comma within a semi-colon delimited property may also be unescaped
|
|
451 * to denote multiple parts _within_ the compound property.
|
|
452 * * Text-properties using semi-colons: N, ADR, ORG, CLIENTPIDMAP.
|
|
453 * * Text-properties using commas: NICKNAME, RELATED, CATEGORIES, PID.
|
|
454 *
|
|
455 * Even though the spec says that commas must always be escaped, the
|
|
456 * example for GEO in Section 6.5.2 seems to violate this.
|
|
457 *
|
|
458 * iCalendar 2.0 (rfc5545) says:
|
|
459 * * Commas or semi-colons may be used as delimiters, depending on the
|
|
460 * property.
|
|
461 * * Commas, semi-colons, backslashes, newline (\N or \n) are always
|
|
462 * escaped, unless they are delimiters.
|
|
463 * * Colons shall not be escaped.
|
|
464 * * Commas can be considered the 'default delimiter' and is described as
|
|
465 * the delimiter in cases where the order of the multiple values is
|
|
466 * insignificant.
|
|
467 * * Semi-colons are described as the delimiter for 'structured values'.
|
|
468 * They are specifically used in Semi-colons are used as a delimiter in
|
|
469 * REQUEST-STATUS, RRULE, GEO and EXRULE. EXRULE is deprecated however.
|
|
470 *
|
|
471 * Now for the parameters
|
|
472 *
|
|
473 * If delimiter is not set (null) this method will just return a string.
|
|
474 * If it's a comma or a semi-colon the string will be split on those
|
|
475 * characters, and always return an array.
|
|
476 *
|
|
477 * @param string $input
|
|
478 * @param string $delimiter
|
|
479 * @return string|string[]
|
|
480 */
|
|
481 static public function unescapeValue($input, $delimiter = ';') {
|
|
482
|
|
483 $regex = '# (?: (\\\\ (?: \\\\ | N | n | ; | , ) )';
|
|
484 if ($delimiter) {
|
|
485 $regex .= ' | (' . $delimiter . ')';
|
|
486 }
|
|
487 $regex .= ') #x';
|
|
488
|
|
489 $matches = preg_split($regex, $input, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
|
|
490
|
|
491 $resultArray = array();
|
|
492 $result = '';
|
|
493
|
|
494 foreach($matches as $match) {
|
|
495
|
|
496 switch ($match) {
|
|
497 case '\\\\' :
|
|
498 $result .='\\';
|
|
499 break;
|
|
500 case '\N' :
|
|
501 case '\n' :
|
|
502 $result .="\n";
|
|
503 break;
|
|
504 case '\;' :
|
|
505 $result .=';';
|
|
506 break;
|
|
507 case '\,' :
|
|
508 $result .=',';
|
|
509 break;
|
|
510 case $delimiter :
|
|
511 $resultArray[] = $result;
|
|
512 $result = '';
|
|
513 break;
|
|
514 default :
|
|
515 $result .= $match;
|
|
516 break;
|
|
517
|
|
518 }
|
|
519
|
|
520 }
|
|
521
|
|
522 $resultArray[] = $result;
|
|
523 return $delimiter ? $resultArray : $result;
|
|
524
|
|
525 }
|
|
526
|
|
527 /**
|
|
528 * Unescapes a parameter value.
|
|
529 *
|
|
530 * vCard 2.1:
|
|
531 * * Does not mention a mechanism for this. In addition, double quotes
|
|
532 * are never used to wrap values.
|
|
533 * * This means that parameters can simply not contain colons or
|
|
534 * semi-colons.
|
|
535 *
|
|
536 * vCard 3.0 (rfc2425, rfc2426):
|
|
537 * * Parameters _may_ be surrounded by double quotes.
|
|
538 * * If this is not the case, semi-colon, colon and comma may simply not
|
|
539 * occur (the comma used for multiple parameter values though).
|
|
540 * * If it is surrounded by double-quotes, it may simply not contain
|
|
541 * double-quotes.
|
|
542 * * This means that a parameter can in no case encode double-quotes, or
|
|
543 * newlines.
|
|
544 *
|
|
545 * vCard 4.0 (rfc6350)
|
|
546 * * Behavior seems to be identical to vCard 3.0
|
|
547 *
|
|
548 * iCalendar 2.0 (rfc5545)
|
|
549 * * Behavior seems to be identical to vCard 3.0
|
|
550 *
|
|
551 * Parameter escaping mechanism (rfc6868) :
|
|
552 * * This rfc describes a new way to escape parameter values.
|
|
553 * * New-line is encoded as ^n
|
|
554 * * ^ is encoded as ^^.
|
|
555 * * " is encoded as ^'
|
|
556 *
|
|
557 * @param string $input
|
|
558 * @return void
|
|
559 */
|
|
560 private function unescapeParam($input) {
|
|
561
|
|
562 return
|
|
563 preg_replace_callback(
|
|
564 '#(\^(\^|n|\'))#',
|
|
565 function($matches) {
|
|
566 switch($matches[2]) {
|
|
567 case 'n' :
|
|
568 return "\n";
|
|
569 case '^' :
|
|
570 return '^';
|
|
571 case '\'' :
|
|
572 return '"';
|
|
573
|
|
574 // @codeCoverageIgnoreStart
|
|
575 }
|
|
576 // @codeCoverageIgnoreEnd
|
|
577 },
|
|
578 $input
|
|
579 );
|
|
580 }
|
|
581
|
|
582 /**
|
|
583 * Gets the full quoted printable value.
|
|
584 *
|
|
585 * We need a special method for this, because newlines have both a meaning
|
|
586 * in vCards, and in QuotedPrintable.
|
|
587 *
|
|
588 * This method does not do any decoding.
|
|
589 *
|
|
590 * @return string
|
|
591 */
|
|
592 private function extractQuotedPrintableValue() {
|
|
593
|
|
594 // We need to parse the raw line again to get the start of the value.
|
|
595 //
|
|
596 // We are basically looking for the first colon (:), but we need to
|
|
597 // skip over the parameters first, as they may contain one.
|
|
598 $regex = '/^
|
|
599 (?: [^:])+ # Anything but a colon
|
|
600 (?: "[^"]")* # A parameter in double quotes
|
|
601 : # start of the value we really care about
|
|
602 (.*)$
|
|
603 /xs';
|
|
604
|
|
605 preg_match($regex, $this->rawLine, $matches);
|
|
606
|
|
607 $value = $matches[1];
|
|
608 // Removing the first whitespace character from every line. Kind of
|
|
609 // like unfolding, but we keep the newline.
|
|
610 $value = str_replace("\n ", "\n", $value);
|
|
611
|
|
612 // Microsoft products don't always correctly fold lines, they may be
|
|
613 // missing a whitespace. So if 'forgiving' is turned on, we will take
|
|
614 // those as well.
|
|
615 if ($this->options & self::OPTION_FORGIVING) {
|
|
616 while(substr($value,-1) === '=') {
|
|
617 // Reading the line
|
|
618 $this->readLine();
|
|
619 // Grabbing the raw form
|
|
620 $value.="\n" . $this->rawLine;
|
|
621 }
|
|
622 }
|
|
623
|
|
624 return $value;
|
|
625
|
|
626 }
|
|
627
|
|
628 }
|