Mercurial > hg > rc1
comparison vendor/sabre/vobject/lib/Parser/MimeDir.php @ 7:430dbd5346f7
vendor sabre as distributed
author | Charlie Root |
---|---|
date | Sat, 13 Jan 2018 09:06:10 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
6:cec75ba50afc | 7:430dbd5346f7 |
---|---|
1 <?php | |
2 | |
3 namespace Sabre\VObject\Parser; | |
4 | |
5 use | |
6 Sabre\VObject\ParseException, | |
7 Sabre\VObject\EofException, | |
8 Sabre\VObject\Component, | |
9 Sabre\VObject\Property, | |
10 Sabre\VObject\Component\VCalendar, | |
11 Sabre\VObject\Component\VCard; | |
12 | |
13 /** | |
14 * MimeDir parser. | |
15 * | |
16 * This class parses iCalendar 2.0 and vCard 2.1, 3.0 and 4.0 files. This | |
17 * parser will return one of the following two objects from the parse method: | |
18 * | |
19 * Sabre\VObject\Component\VCalendar | |
20 * Sabre\VObject\Component\VCard | |
21 * | |
22 * @copyright Copyright (C) 2011-2015 fruux GmbH (https://fruux.com/). | |
23 * @author Evert Pot (http://evertpot.com/) | |
24 * @license http://sabre.io/license/ Modified BSD License | |
25 */ | |
26 class MimeDir extends Parser { | |
27 | |
28 /** | |
29 * The input stream. | |
30 * | |
31 * @var resource | |
32 */ | |
33 protected $input; | |
34 | |
35 /** | |
36 * Root component | |
37 * | |
38 * @var Component | |
39 */ | |
40 protected $root; | |
41 | |
42 /** | |
43 * Parses an iCalendar or vCard file | |
44 * | |
45 * Pass a stream or a string. If null is parsed, the existing buffer is | |
46 * used. | |
47 * | |
48 * @param string|resource|null $input | |
49 * @param int|null $options | |
50 * @return array | |
51 */ | |
52 public function parse($input = null, $options = null) { | |
53 | |
54 $this->root = null; | |
55 if (!is_null($input)) { | |
56 | |
57 $this->setInput($input); | |
58 | |
59 } | |
60 | |
61 if (!is_null($options)) $this->options = $options; | |
62 | |
63 $this->parseDocument(); | |
64 | |
65 return $this->root; | |
66 | |
67 } | |
68 | |
69 /** | |
70 * Sets the input buffer. Must be a string or stream. | |
71 * | |
72 * @param resource|string $input | |
73 * @return void | |
74 */ | |
75 public function setInput($input) { | |
76 | |
77 // Resetting the parser | |
78 $this->lineIndex = 0; | |
79 $this->startLine = 0; | |
80 | |
81 if (is_string($input)) { | |
82 // Convering to a stream. | |
83 $stream = fopen('php://temp', 'r+'); | |
84 fwrite($stream, $input); | |
85 rewind($stream); | |
86 $this->input = $stream; | |
87 } elseif (is_resource($input)) { | |
88 $this->input = $input; | |
89 } else { | |
90 throw new \InvalidArgumentException('This parser can only read from strings or streams.'); | |
91 } | |
92 | |
93 } | |
94 | |
95 /** | |
96 * Parses an entire document. | |
97 * | |
98 * @return void | |
99 */ | |
100 protected function parseDocument() { | |
101 | |
102 $line = $this->readLine(); | |
103 | |
104 // BOM is ZERO WIDTH NO-BREAK SPACE (U+FEFF). | |
105 // It's 0xEF 0xBB 0xBF in UTF-8 hex. | |
106 if ( 3 <= strlen($line) | |
107 && ord($line[0]) === 0xef | |
108 && ord($line[1]) === 0xbb | |
109 && ord($line[2]) === 0xbf) { | |
110 $line = substr($line, 3); | |
111 } | |
112 | |
113 switch(strtoupper($line)) { | |
114 case 'BEGIN:VCALENDAR' : | |
115 $class = isset(VCalendar::$componentMap['VCALENDAR']) | |
116 ? VCalendar::$componentMap[$name] | |
117 : 'Sabre\\VObject\\Component\\VCalendar'; | |
118 break; | |
119 case 'BEGIN:VCARD' : | |
120 $class = isset(VCard::$componentMap['VCARD']) | |
121 ? VCard::$componentMap['VCARD'] | |
122 : 'Sabre\\VObject\\Component\\VCard'; | |
123 break; | |
124 default : | |
125 throw new ParseException('This parser only supports VCARD and VCALENDAR files'); | |
126 } | |
127 | |
128 $this->root = new $class(array(), false); | |
129 | |
130 while(true) { | |
131 | |
132 // Reading until we hit END: | |
133 $line = $this->readLine(); | |
134 if (strtoupper(substr($line,0,4)) === 'END:') { | |
135 break; | |
136 } | |
137 $result = $this->parseLine($line); | |
138 if ($result) { | |
139 $this->root->add($result); | |
140 } | |
141 | |
142 } | |
143 | |
144 $name = strtoupper(substr($line, 4)); | |
145 if ($name!==$this->root->name) { | |
146 throw new ParseException('Invalid MimeDir file. expected: "END:' . $this->root->name . '" got: "END:' . $name . '"'); | |
147 } | |
148 | |
149 } | |
150 | |
151 /** | |
152 * Parses a line, and if it hits a component, it will also attempt to parse | |
153 * the entire component | |
154 * | |
155 * @param string $line Unfolded line | |
156 * @return Node | |
157 */ | |
158 protected function parseLine($line) { | |
159 | |
160 // Start of a new component | |
161 if (strtoupper(substr($line, 0, 6)) === 'BEGIN:') { | |
162 | |
163 $component = $this->root->createComponent(substr($line,6), array(), false); | |
164 | |
165 while(true) { | |
166 | |
167 // Reading until we hit END: | |
168 $line = $this->readLine(); | |
169 if (strtoupper(substr($line,0,4)) === 'END:') { | |
170 break; | |
171 } | |
172 $result = $this->parseLine($line); | |
173 if ($result) { | |
174 $component->add($result); | |
175 } | |
176 | |
177 } | |
178 | |
179 $name = strtoupper(substr($line, 4)); | |
180 if ($name!==$component->name) { | |
181 throw new ParseException('Invalid MimeDir file. expected: "END:' . $component->name . '" got: "END:' . $name . '"'); | |
182 } | |
183 | |
184 return $component; | |
185 | |
186 } else { | |
187 | |
188 // Property reader | |
189 $property = $this->readProperty($line); | |
190 if (!$property) { | |
191 // Ignored line | |
192 return false; | |
193 } | |
194 return $property; | |
195 | |
196 } | |
197 | |
198 } | |
199 | |
200 /** | |
201 * We need to look ahead 1 line every time to see if we need to 'unfold' | |
202 * the next line. | |
203 * | |
204 * If that was not the case, we store it here. | |
205 * | |
206 * @var null|string | |
207 */ | |
208 protected $lineBuffer; | |
209 | |
210 /** | |
211 * The real current line number. | |
212 */ | |
213 protected $lineIndex = 0; | |
214 | |
215 /** | |
216 * In the case of unfolded lines, this property holds the line number for | |
217 * the start of the line. | |
218 * | |
219 * @var int | |
220 */ | |
221 protected $startLine = 0; | |
222 | |
223 /** | |
224 * Contains a 'raw' representation of the current line. | |
225 * | |
226 * @var string | |
227 */ | |
228 protected $rawLine; | |
229 | |
230 /** | |
231 * Reads a single line from the buffer. | |
232 * | |
233 * This method strips any newlines and also takes care of unfolding. | |
234 * | |
235 * @throws \Sabre\VObject\EofException | |
236 * @return string | |
237 */ | |
238 protected function readLine() { | |
239 | |
240 if (!is_null($this->lineBuffer)) { | |
241 $rawLine = $this->lineBuffer; | |
242 $this->lineBuffer = null; | |
243 } else { | |
244 do { | |
245 $eof = feof($this->input); | |
246 | |
247 $rawLine = fgets($this->input); | |
248 | |
249 if ($eof || (feof($this->input) && $rawLine===false)) { | |
250 throw new EofException('End of document reached prematurely'); | |
251 } | |
252 if ($rawLine === false) { | |
253 throw new ParseException('Error reading from input stream'); | |
254 } | |
255 $rawLine = rtrim($rawLine, "\r\n"); | |
256 } while ($rawLine === ''); // Skipping empty lines | |
257 $this->lineIndex++; | |
258 } | |
259 $line = $rawLine; | |
260 | |
261 $this->startLine = $this->lineIndex; | |
262 | |
263 // Looking ahead for folded lines. | |
264 while (true) { | |
265 | |
266 $nextLine = rtrim(fgets($this->input), "\r\n"); | |
267 $this->lineIndex++; | |
268 if (!$nextLine) { | |
269 break; | |
270 } | |
271 if ($nextLine[0] === "\t" || $nextLine[0] === " ") { | |
272 $line .= substr($nextLine, 1); | |
273 $rawLine .= "\n " . substr($nextLine, 1); | |
274 } else { | |
275 $this->lineBuffer = $nextLine; | |
276 break; | |
277 } | |
278 | |
279 } | |
280 $this->rawLine = $rawLine; | |
281 return $line; | |
282 | |
283 } | |
284 | |
285 /** | |
286 * Reads a property or component from a line. | |
287 * | |
288 * @return void | |
289 */ | |
290 protected function readProperty($line) { | |
291 | |
292 if ($this->options & self::OPTION_FORGIVING) { | |
293 $propNameToken = 'A-Z0-9\-\._\\/'; | |
294 } else { | |
295 $propNameToken = 'A-Z0-9\-\.'; | |
296 } | |
297 | |
298 $paramNameToken = 'A-Z0-9\-'; | |
299 $safeChar = '^";:,'; | |
300 $qSafeChar = '^"'; | |
301 | |
302 $regex = "/ | |
303 ^(?P<name> [$propNameToken]+ ) (?=[;:]) # property name | |
304 | | |
305 (?<=:)(?P<propValue> .+)$ # property value | |
306 | | |
307 ;(?P<paramName> [$paramNameToken]+) (?=[=;:]) # parameter name | |
308 | | |
309 (=|,)(?P<paramValue> # parameter value | |
310 (?: [$safeChar]*) | | |
311 \"(?: [$qSafeChar]+)\" | |
312 ) (?=[;:,]) | |
313 /xi"; | |
314 | |
315 //echo $regex, "\n"; die(); | |
316 preg_match_all($regex, $line, $matches, PREG_SET_ORDER); | |
317 | |
318 $property = array( | |
319 'name' => null, | |
320 'parameters' => array(), | |
321 'value' => null | |
322 ); | |
323 | |
324 $lastParam = null; | |
325 | |
326 /** | |
327 * Looping through all the tokens. | |
328 * | |
329 * Note that we are looping through them in reverse order, because if a | |
330 * sub-pattern matched, the subsequent named patterns will not show up | |
331 * in the result. | |
332 */ | |
333 foreach($matches as $match) { | |
334 | |
335 if (isset($match['paramValue'])) { | |
336 if ($match['paramValue'] && $match['paramValue'][0] === '"') { | |
337 $value = substr($match['paramValue'], 1, -1); | |
338 } else { | |
339 $value = $match['paramValue']; | |
340 } | |
341 | |
342 $value = $this->unescapeParam($value); | |
343 | |
344 if (is_null($property['parameters'][$lastParam])) { | |
345 $property['parameters'][$lastParam] = $value; | |
346 } elseif (is_array($property['parameters'][$lastParam])) { | |
347 $property['parameters'][$lastParam][] = $value; | |
348 } else { | |
349 $property['parameters'][$lastParam] = array( | |
350 $property['parameters'][$lastParam], | |
351 $value | |
352 ); | |
353 } | |
354 continue; | |
355 } | |
356 if (isset($match['paramName'])) { | |
357 $lastParam = strtoupper($match['paramName']); | |
358 if (!isset($property['parameters'][$lastParam])) { | |
359 $property['parameters'][$lastParam] = null; | |
360 } | |
361 continue; | |
362 } | |
363 if (isset($match['propValue'])) { | |
364 $property['value'] = $match['propValue']; | |
365 continue; | |
366 } | |
367 if (isset($match['name']) && $match['name']) { | |
368 $property['name'] = strtoupper($match['name']); | |
369 continue; | |
370 } | |
371 | |
372 // @codeCoverageIgnoreStart | |
373 throw new \LogicException('This code should not be reachable'); | |
374 // @codeCoverageIgnoreEnd | |
375 | |
376 } | |
377 | |
378 if (is_null($property['value'])) { | |
379 $property['value'] = ''; | |
380 } | |
381 if (!$property['name']) { | |
382 if ($this->options & self::OPTION_IGNORE_INVALID_LINES) { | |
383 return false; | |
384 } | |
385 throw new ParseException('Invalid Mimedir file. Line starting at ' . $this->startLine . ' did not follow iCalendar/vCard conventions'); | |
386 } | |
387 | |
388 // vCard 2.1 states that parameters may appear without a name, and only | |
389 // a value. We can deduce the value based on it's name. | |
390 // | |
391 // Our parser will get those as parameters without a value instead, so | |
392 // we're filtering these parameters out first. | |
393 $namedParameters = array(); | |
394 $namelessParameters = array(); | |
395 | |
396 foreach($property['parameters'] as $name=>$value) { | |
397 if (!is_null($value)) { | |
398 $namedParameters[$name] = $value; | |
399 } else { | |
400 $namelessParameters[] = $name; | |
401 } | |
402 } | |
403 | |
404 $propObj = $this->root->createProperty($property['name'], null, $namedParameters); | |
405 | |
406 foreach($namelessParameters as $namelessParameter) { | |
407 $propObj->add(null, $namelessParameter); | |
408 } | |
409 | |
410 if (strtoupper($propObj['ENCODING']) === 'QUOTED-PRINTABLE') { | |
411 $propObj->setQuotedPrintableValue($this->extractQuotedPrintableValue()); | |
412 } else { | |
413 $propObj->setRawMimeDirValue($property['value']); | |
414 } | |
415 | |
416 return $propObj; | |
417 | |
418 } | |
419 | |
420 /** | |
421 * Unescapes a property value. | |
422 * | |
423 * vCard 2.1 says: | |
424 * * Semi-colons must be escaped in some property values, specifically | |
425 * ADR, ORG and N. | |
426 * * Semi-colons must be escaped in parameter values, because semi-colons | |
427 * are also use to separate values. | |
428 * * No mention of escaping backslashes with another backslash. | |
429 * * newlines are not escaped either, instead QUOTED-PRINTABLE is used to | |
430 * span values over more than 1 line. | |
431 * | |
432 * vCard 3.0 says: | |
433 * * (rfc2425) Backslashes, newlines (\n or \N) and comma's must be | |
434 * escaped, all time time. | |
435 * * Comma's are used for delimeters in multiple values | |
436 * * (rfc2426) Adds to to this that the semi-colon MUST also be escaped, | |
437 * as in some properties semi-colon is used for separators. | |
438 * * Properties using semi-colons: N, ADR, GEO, ORG | |
439 * * Both ADR and N's individual parts may be broken up further with a | |
440 * comma. | |
441 * * Properties using commas: NICKNAME, CATEGORIES | |
442 * | |
443 * vCard 4.0 (rfc6350) says: | |
444 * * Commas must be escaped. | |
445 * * Semi-colons may be escaped, an unescaped semi-colon _may_ be a | |
446 * delimiter, depending on the property. | |
447 * * Backslashes must be escaped | |
448 * * Newlines must be escaped as either \N or \n. | |
449 * * Some compound properties may contain multiple parts themselves, so a | |
450 * comma within a semi-colon delimited property may also be unescaped | |
451 * to denote multiple parts _within_ the compound property. | |
452 * * Text-properties using semi-colons: N, ADR, ORG, CLIENTPIDMAP. | |
453 * * Text-properties using commas: NICKNAME, RELATED, CATEGORIES, PID. | |
454 * | |
455 * Even though the spec says that commas must always be escaped, the | |
456 * example for GEO in Section 6.5.2 seems to violate this. | |
457 * | |
458 * iCalendar 2.0 (rfc5545) says: | |
459 * * Commas or semi-colons may be used as delimiters, depending on the | |
460 * property. | |
461 * * Commas, semi-colons, backslashes, newline (\N or \n) are always | |
462 * escaped, unless they are delimiters. | |
463 * * Colons shall not be escaped. | |
464 * * Commas can be considered the 'default delimiter' and is described as | |
465 * the delimiter in cases where the order of the multiple values is | |
466 * insignificant. | |
467 * * Semi-colons are described as the delimiter for 'structured values'. | |
468 * They are specifically used in Semi-colons are used as a delimiter in | |
469 * REQUEST-STATUS, RRULE, GEO and EXRULE. EXRULE is deprecated however. | |
470 * | |
471 * Now for the parameters | |
472 * | |
473 * If delimiter is not set (null) this method will just return a string. | |
474 * If it's a comma or a semi-colon the string will be split on those | |
475 * characters, and always return an array. | |
476 * | |
477 * @param string $input | |
478 * @param string $delimiter | |
479 * @return string|string[] | |
480 */ | |
481 static public function unescapeValue($input, $delimiter = ';') { | |
482 | |
483 $regex = '# (?: (\\\\ (?: \\\\ | N | n | ; | , ) )'; | |
484 if ($delimiter) { | |
485 $regex .= ' | (' . $delimiter . ')'; | |
486 } | |
487 $regex .= ') #x'; | |
488 | |
489 $matches = preg_split($regex, $input, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY); | |
490 | |
491 $resultArray = array(); | |
492 $result = ''; | |
493 | |
494 foreach($matches as $match) { | |
495 | |
496 switch ($match) { | |
497 case '\\\\' : | |
498 $result .='\\'; | |
499 break; | |
500 case '\N' : | |
501 case '\n' : | |
502 $result .="\n"; | |
503 break; | |
504 case '\;' : | |
505 $result .=';'; | |
506 break; | |
507 case '\,' : | |
508 $result .=','; | |
509 break; | |
510 case $delimiter : | |
511 $resultArray[] = $result; | |
512 $result = ''; | |
513 break; | |
514 default : | |
515 $result .= $match; | |
516 break; | |
517 | |
518 } | |
519 | |
520 } | |
521 | |
522 $resultArray[] = $result; | |
523 return $delimiter ? $resultArray : $result; | |
524 | |
525 } | |
526 | |
527 /** | |
528 * Unescapes a parameter value. | |
529 * | |
530 * vCard 2.1: | |
531 * * Does not mention a mechanism for this. In addition, double quotes | |
532 * are never used to wrap values. | |
533 * * This means that parameters can simply not contain colons or | |
534 * semi-colons. | |
535 * | |
536 * vCard 3.0 (rfc2425, rfc2426): | |
537 * * Parameters _may_ be surrounded by double quotes. | |
538 * * If this is not the case, semi-colon, colon and comma may simply not | |
539 * occur (the comma used for multiple parameter values though). | |
540 * * If it is surrounded by double-quotes, it may simply not contain | |
541 * double-quotes. | |
542 * * This means that a parameter can in no case encode double-quotes, or | |
543 * newlines. | |
544 * | |
545 * vCard 4.0 (rfc6350) | |
546 * * Behavior seems to be identical to vCard 3.0 | |
547 * | |
548 * iCalendar 2.0 (rfc5545) | |
549 * * Behavior seems to be identical to vCard 3.0 | |
550 * | |
551 * Parameter escaping mechanism (rfc6868) : | |
552 * * This rfc describes a new way to escape parameter values. | |
553 * * New-line is encoded as ^n | |
554 * * ^ is encoded as ^^. | |
555 * * " is encoded as ^' | |
556 * | |
557 * @param string $input | |
558 * @return void | |
559 */ | |
560 private function unescapeParam($input) { | |
561 | |
562 return | |
563 preg_replace_callback( | |
564 '#(\^(\^|n|\'))#', | |
565 function($matches) { | |
566 switch($matches[2]) { | |
567 case 'n' : | |
568 return "\n"; | |
569 case '^' : | |
570 return '^'; | |
571 case '\'' : | |
572 return '"'; | |
573 | |
574 // @codeCoverageIgnoreStart | |
575 } | |
576 // @codeCoverageIgnoreEnd | |
577 }, | |
578 $input | |
579 ); | |
580 } | |
581 | |
582 /** | |
583 * Gets the full quoted printable value. | |
584 * | |
585 * We need a special method for this, because newlines have both a meaning | |
586 * in vCards, and in QuotedPrintable. | |
587 * | |
588 * This method does not do any decoding. | |
589 * | |
590 * @return string | |
591 */ | |
592 private function extractQuotedPrintableValue() { | |
593 | |
594 // We need to parse the raw line again to get the start of the value. | |
595 // | |
596 // We are basically looking for the first colon (:), but we need to | |
597 // skip over the parameters first, as they may contain one. | |
598 $regex = '/^ | |
599 (?: [^:])+ # Anything but a colon | |
600 (?: "[^"]")* # A parameter in double quotes | |
601 : # start of the value we really care about | |
602 (.*)$ | |
603 /xs'; | |
604 | |
605 preg_match($regex, $this->rawLine, $matches); | |
606 | |
607 $value = $matches[1]; | |
608 // Removing the first whitespace character from every line. Kind of | |
609 // like unfolding, but we keep the newline. | |
610 $value = str_replace("\n ", "\n", $value); | |
611 | |
612 // Microsoft products don't always correctly fold lines, they may be | |
613 // missing a whitespace. So if 'forgiving' is turned on, we will take | |
614 // those as well. | |
615 if ($this->options & self::OPTION_FORGIVING) { | |
616 while(substr($value,-1) === '=') { | |
617 // Reading the line | |
618 $this->readLine(); | |
619 // Grabbing the raw form | |
620 $value.="\n" . $this->rawLine; | |
621 } | |
622 } | |
623 | |
624 return $value; | |
625 | |
626 } | |
627 | |
628 } |