Parser.php 41 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Component\Yaml;
  11. use Symfony\Component\Yaml\Exception\ParseException;
  12. use Symfony\Component\Yaml\Tag\TaggedValue;
  13. /**
  14. * Parser parses YAML strings to convert them to PHP arrays.
  15. *
  16. * @author Fabien Potencier <fabien@symfony.com>
  17. *
  18. * @final
  19. */
  20. class Parser
  21. {
  22. const TAG_PATTERN = '(?P<tag>![\w!.\/:-]+)';
  23. const BLOCK_SCALAR_HEADER_PATTERN = '(?P<separator>\||>)(?P<modifiers>\+|\-|\d+|\+\d+|\-\d+|\d+\+|\d+\-)?(?P<comments> +#.*)?';
  24. private $filename;
  25. private $offset = 0;
  26. private $totalNumberOfLines;
  27. private $lines = [];
  28. private $currentLineNb = -1;
  29. private $currentLine = '';
  30. private $refs = [];
  31. private $skippedLineNumbers = [];
  32. private $locallySkippedLineNumbers = [];
  33. private $refsBeingParsed = [];
  34. /**
  35. * Parses a YAML file into a PHP value.
  36. *
  37. * @param string $filename The path to the YAML file to be parsed
  38. * @param int $flags A bit field of PARSE_* constants to customize the YAML parser behavior
  39. *
  40. * @return mixed The YAML converted to a PHP value
  41. *
  42. * @throws ParseException If the file could not be read or the YAML is not valid
  43. */
  44. public function parseFile(string $filename, int $flags = 0)
  45. {
  46. if (!is_file($filename)) {
  47. throw new ParseException(sprintf('File "%s" does not exist.', $filename));
  48. }
  49. if (!is_readable($filename)) {
  50. throw new ParseException(sprintf('File "%s" cannot be read.', $filename));
  51. }
  52. $this->filename = $filename;
  53. try {
  54. return $this->parse(file_get_contents($filename), $flags);
  55. } finally {
  56. $this->filename = null;
  57. }
  58. }
  59. /**
  60. * Parses a YAML string to a PHP value.
  61. *
  62. * @param string $value A YAML string
  63. * @param int $flags A bit field of PARSE_* constants to customize the YAML parser behavior
  64. *
  65. * @return mixed A PHP value
  66. *
  67. * @throws ParseException If the YAML is not valid
  68. */
  69. public function parse(string $value, int $flags = 0)
  70. {
  71. if (false === preg_match('//u', $value)) {
  72. throw new ParseException('The YAML value does not appear to be valid UTF-8.', -1, null, $this->filename);
  73. }
  74. $this->refs = [];
  75. $mbEncoding = null;
  76. $data = null;
  77. if (2 /* MB_OVERLOAD_STRING */ & (int) ini_get('mbstring.func_overload')) {
  78. $mbEncoding = mb_internal_encoding();
  79. mb_internal_encoding('UTF-8');
  80. }
  81. try {
  82. $data = $this->doParse($value, $flags);
  83. } finally {
  84. if (null !== $mbEncoding) {
  85. mb_internal_encoding($mbEncoding);
  86. }
  87. $this->lines = [];
  88. $this->currentLine = '';
  89. $this->refs = [];
  90. $this->skippedLineNumbers = [];
  91. $this->locallySkippedLineNumbers = [];
  92. }
  93. return $data;
  94. }
  95. /**
  96. * @internal
  97. *
  98. * @return int
  99. */
  100. public function getLastLineNumberBeforeDeprecation(): int
  101. {
  102. return $this->getRealCurrentLineNb();
  103. }
  104. private function doParse(string $value, int $flags)
  105. {
  106. $this->currentLineNb = -1;
  107. $this->currentLine = '';
  108. $value = $this->cleanup($value);
  109. $this->lines = explode("\n", $value);
  110. $this->locallySkippedLineNumbers = [];
  111. if (null === $this->totalNumberOfLines) {
  112. $this->totalNumberOfLines = \count($this->lines);
  113. }
  114. if (!$this->moveToNextLine()) {
  115. return null;
  116. }
  117. $data = [];
  118. $context = null;
  119. $allowOverwrite = false;
  120. while ($this->isCurrentLineEmpty()) {
  121. if (!$this->moveToNextLine()) {
  122. return null;
  123. }
  124. }
  125. // Resolves the tag and returns if end of the document
  126. if (null !== ($tag = $this->getLineTag($this->currentLine, $flags, false)) && !$this->moveToNextLine()) {
  127. return new TaggedValue($tag, '');
  128. }
  129. do {
  130. if ($this->isCurrentLineEmpty()) {
  131. continue;
  132. }
  133. // tab?
  134. if ("\t" === $this->currentLine[0]) {
  135. throw new ParseException('A YAML file cannot contain tabs as indentation.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
  136. }
  137. Inline::initialize($flags, $this->getRealCurrentLineNb(), $this->filename);
  138. $isRef = $mergeNode = false;
  139. if ('-' === $this->currentLine[0] && self::preg_match('#^\-((?P<leadspaces>\s+)(?P<value>.+))?$#u', rtrim($this->currentLine), $values)) {
  140. if ($context && 'mapping' == $context) {
  141. throw new ParseException('You cannot define a sequence item when in a mapping', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
  142. }
  143. $context = 'sequence';
  144. if (isset($values['value']) && '&' === $values['value'][0] && self::preg_match('#^&(?P<ref>[^ ]+) *(?P<value>.*)#u', $values['value'], $matches)) {
  145. $isRef = $matches['ref'];
  146. $this->refsBeingParsed[] = $isRef;
  147. $values['value'] = $matches['value'];
  148. }
  149. if (isset($values['value'][1]) && '?' === $values['value'][0] && ' ' === $values['value'][1]) {
  150. throw new ParseException('Complex mappings are not supported.', $this->getRealCurrentLineNb() + 1, $this->currentLine);
  151. }
  152. // array
  153. if (!isset($values['value']) || '' == trim($values['value'], ' ') || 0 === strpos(ltrim($values['value'], ' '), '#')) {
  154. $data[] = $this->parseBlock($this->getRealCurrentLineNb() + 1, $this->getNextEmbedBlock(null, true) ?? '', $flags);
  155. } elseif (null !== $subTag = $this->getLineTag(ltrim($values['value'], ' '), $flags)) {
  156. $data[] = new TaggedValue(
  157. $subTag,
  158. $this->parseBlock($this->getRealCurrentLineNb() + 1, $this->getNextEmbedBlock(null, true), $flags)
  159. );
  160. } else {
  161. if (isset($values['leadspaces'])
  162. && self::preg_match('#^(?P<key>'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\{\[].*?) *\:(\s+(?P<value>.+?))?\s*$#u', $this->trimTag($values['value']), $matches)
  163. ) {
  164. // this is a compact notation element, add to next block and parse
  165. $block = $values['value'];
  166. if ($this->isNextLineIndented()) {
  167. $block .= "\n".$this->getNextEmbedBlock($this->getCurrentLineIndentation() + \strlen($values['leadspaces']) + 1);
  168. }
  169. $data[] = $this->parseBlock($this->getRealCurrentLineNb(), $block, $flags);
  170. } else {
  171. $data[] = $this->parseValue($values['value'], $flags, $context);
  172. }
  173. }
  174. if ($isRef) {
  175. $this->refs[$isRef] = end($data);
  176. array_pop($this->refsBeingParsed);
  177. }
  178. } elseif (
  179. self::preg_match('#^(?P<key>(?:![^\s]++\s++)?(?:'.Inline::REGEX_QUOTED_STRING.'|(?:!?!php/const:)?[^ \'"\[\{!].*?)) *\:(\s++(?P<value>.+))?$#u', rtrim($this->currentLine), $values)
  180. && (false === strpos($values['key'], ' #') || \in_array($values['key'][0], ['"', "'"]))
  181. ) {
  182. if ($context && 'sequence' == $context) {
  183. throw new ParseException('You cannot define a mapping item when in a sequence', $this->currentLineNb + 1, $this->currentLine, $this->filename);
  184. }
  185. $context = 'mapping';
  186. try {
  187. $key = Inline::parseScalar($values['key']);
  188. } catch (ParseException $e) {
  189. $e->setParsedLine($this->getRealCurrentLineNb() + 1);
  190. $e->setSnippet($this->currentLine);
  191. throw $e;
  192. }
  193. if (!\is_string($key) && !\is_int($key)) {
  194. throw new ParseException(sprintf('%s keys are not supported. Quote your evaluable mapping keys instead.', is_numeric($key) ? 'Numeric' : 'Non-string'), $this->getRealCurrentLineNb() + 1, $this->currentLine);
  195. }
  196. // Convert float keys to strings, to avoid being converted to integers by PHP
  197. if (\is_float($key)) {
  198. $key = (string) $key;
  199. }
  200. if ('<<' === $key && (!isset($values['value']) || '&' !== $values['value'][0] || !self::preg_match('#^&(?P<ref>[^ ]+)#u', $values['value'], $refMatches))) {
  201. $mergeNode = true;
  202. $allowOverwrite = true;
  203. if (isset($values['value'][0]) && '*' === $values['value'][0]) {
  204. $refName = substr(rtrim($values['value']), 1);
  205. if (!\array_key_exists($refName, $this->refs)) {
  206. if (false !== $pos = array_search($refName, $this->refsBeingParsed, true)) {
  207. throw new ParseException(sprintf('Circular reference [%s, %s] detected for reference "%s".', implode(', ', \array_slice($this->refsBeingParsed, $pos)), $refName, $refName), $this->currentLineNb + 1, $this->currentLine, $this->filename);
  208. }
  209. throw new ParseException(sprintf('Reference "%s" does not exist.', $refName), $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
  210. }
  211. $refValue = $this->refs[$refName];
  212. if (Yaml::PARSE_OBJECT_FOR_MAP & $flags && $refValue instanceof \stdClass) {
  213. $refValue = (array) $refValue;
  214. }
  215. if (!\is_array($refValue)) {
  216. throw new ParseException('YAML merge keys used with a scalar value instead of an array.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
  217. }
  218. $data += $refValue; // array union
  219. } else {
  220. if (isset($values['value']) && '' !== $values['value']) {
  221. $value = $values['value'];
  222. } else {
  223. $value = $this->getNextEmbedBlock();
  224. }
  225. $parsed = $this->parseBlock($this->getRealCurrentLineNb() + 1, $value, $flags);
  226. if (Yaml::PARSE_OBJECT_FOR_MAP & $flags && $parsed instanceof \stdClass) {
  227. $parsed = (array) $parsed;
  228. }
  229. if (!\is_array($parsed)) {
  230. throw new ParseException('YAML merge keys used with a scalar value instead of an array.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
  231. }
  232. if (isset($parsed[0])) {
  233. // If the value associated with the merge key is a sequence, then this sequence is expected to contain mapping nodes
  234. // and each of these nodes is merged in turn according to its order in the sequence. Keys in mapping nodes earlier
  235. // in the sequence override keys specified in later mapping nodes.
  236. foreach ($parsed as $parsedItem) {
  237. if (Yaml::PARSE_OBJECT_FOR_MAP & $flags && $parsedItem instanceof \stdClass) {
  238. $parsedItem = (array) $parsedItem;
  239. }
  240. if (!\is_array($parsedItem)) {
  241. throw new ParseException('Merge items must be arrays.', $this->getRealCurrentLineNb() + 1, $parsedItem, $this->filename);
  242. }
  243. $data += $parsedItem; // array union
  244. }
  245. } else {
  246. // If the value associated with the key is a single mapping node, each of its key/value pairs is inserted into the
  247. // current mapping, unless the key already exists in it.
  248. $data += $parsed; // array union
  249. }
  250. }
  251. } elseif ('<<' !== $key && isset($values['value']) && '&' === $values['value'][0] && self::preg_match('#^&(?P<ref>[^ ]++) *+(?P<value>.*)#u', $values['value'], $matches)) {
  252. $isRef = $matches['ref'];
  253. $this->refsBeingParsed[] = $isRef;
  254. $values['value'] = $matches['value'];
  255. }
  256. $subTag = null;
  257. if ($mergeNode) {
  258. // Merge keys
  259. } elseif (!isset($values['value']) || '' === $values['value'] || 0 === strpos($values['value'], '#') || (null !== $subTag = $this->getLineTag($values['value'], $flags)) || '<<' === $key) {
  260. // hash
  261. // if next line is less indented or equal, then it means that the current value is null
  262. if (!$this->isNextLineIndented() && !$this->isNextLineUnIndentedCollection()) {
  263. // Spec: Keys MUST be unique; first one wins.
  264. // But overwriting is allowed when a merge node is used in current block.
  265. if ($allowOverwrite || !isset($data[$key])) {
  266. if (null !== $subTag) {
  267. $data[$key] = new TaggedValue($subTag, '');
  268. } else {
  269. $data[$key] = null;
  270. }
  271. } else {
  272. throw new ParseException(sprintf('Duplicate key "%s" detected.', $key), $this->getRealCurrentLineNb() + 1, $this->currentLine);
  273. }
  274. } else {
  275. // remember the parsed line number here in case we need it to provide some contexts in error messages below
  276. $realCurrentLineNbKey = $this->getRealCurrentLineNb();
  277. $value = $this->parseBlock($this->getRealCurrentLineNb() + 1, $this->getNextEmbedBlock(), $flags);
  278. if ('<<' === $key) {
  279. $this->refs[$refMatches['ref']] = $value;
  280. if (Yaml::PARSE_OBJECT_FOR_MAP & $flags && $value instanceof \stdClass) {
  281. $value = (array) $value;
  282. }
  283. $data += $value;
  284. } elseif ($allowOverwrite || !isset($data[$key])) {
  285. // Spec: Keys MUST be unique; first one wins.
  286. // But overwriting is allowed when a merge node is used in current block.
  287. if (null !== $subTag) {
  288. $data[$key] = new TaggedValue($subTag, $value);
  289. } else {
  290. $data[$key] = $value;
  291. }
  292. } else {
  293. throw new ParseException(sprintf('Duplicate key "%s" detected.', $key), $realCurrentLineNbKey + 1, $this->currentLine);
  294. }
  295. }
  296. } else {
  297. $value = $this->parseValue(rtrim($values['value']), $flags, $context);
  298. // Spec: Keys MUST be unique; first one wins.
  299. // But overwriting is allowed when a merge node is used in current block.
  300. if ($allowOverwrite || !isset($data[$key])) {
  301. $data[$key] = $value;
  302. } else {
  303. throw new ParseException(sprintf('Duplicate key "%s" detected.', $key), $this->getRealCurrentLineNb() + 1, $this->currentLine);
  304. }
  305. }
  306. if ($isRef) {
  307. $this->refs[$isRef] = $data[$key];
  308. array_pop($this->refsBeingParsed);
  309. }
  310. } else {
  311. // multiple documents are not supported
  312. if ('---' === $this->currentLine) {
  313. throw new ParseException('Multiple documents are not supported.', $this->currentLineNb + 1, $this->currentLine, $this->filename);
  314. }
  315. if ($deprecatedUsage = (isset($this->currentLine[1]) && '?' === $this->currentLine[0] && ' ' === $this->currentLine[1])) {
  316. throw new ParseException('Complex mappings are not supported.', $this->getRealCurrentLineNb() + 1, $this->currentLine);
  317. }
  318. // 1-liner optionally followed by newline(s)
  319. if (\is_string($value) && $this->lines[0] === trim($value)) {
  320. try {
  321. $value = Inline::parse($this->lines[0], $flags, $this->refs);
  322. } catch (ParseException $e) {
  323. $e->setParsedLine($this->getRealCurrentLineNb() + 1);
  324. $e->setSnippet($this->currentLine);
  325. throw $e;
  326. }
  327. return $value;
  328. }
  329. // try to parse the value as a multi-line string as a last resort
  330. if (0 === $this->currentLineNb) {
  331. $previousLineWasNewline = false;
  332. $previousLineWasTerminatedWithBackslash = false;
  333. $value = '';
  334. foreach ($this->lines as $line) {
  335. // If the indentation is not consistent at offset 0, it is to be considered as a ParseError
  336. if (0 === $this->offset && !$deprecatedUsage && isset($line[0]) && ' ' === $line[0]) {
  337. throw new ParseException('Unable to parse.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
  338. }
  339. if ('' === trim($line)) {
  340. $value .= "\n";
  341. } elseif (!$previousLineWasNewline && !$previousLineWasTerminatedWithBackslash) {
  342. $value .= ' ';
  343. }
  344. if ('' !== trim($line) && '\\' === substr($line, -1)) {
  345. $value .= ltrim(substr($line, 0, -1));
  346. } elseif ('' !== trim($line)) {
  347. $value .= trim($line);
  348. }
  349. if ('' === trim($line)) {
  350. $previousLineWasNewline = true;
  351. $previousLineWasTerminatedWithBackslash = false;
  352. } elseif ('\\' === substr($line, -1)) {
  353. $previousLineWasNewline = false;
  354. $previousLineWasTerminatedWithBackslash = true;
  355. } else {
  356. $previousLineWasNewline = false;
  357. $previousLineWasTerminatedWithBackslash = false;
  358. }
  359. }
  360. try {
  361. return Inline::parse(trim($value));
  362. } catch (ParseException $e) {
  363. // fall-through to the ParseException thrown below
  364. }
  365. }
  366. throw new ParseException('Unable to parse.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
  367. }
  368. } while ($this->moveToNextLine());
  369. if (null !== $tag) {
  370. $data = new TaggedValue($tag, $data);
  371. }
  372. if (Yaml::PARSE_OBJECT_FOR_MAP & $flags && !\is_object($data) && 'mapping' === $context) {
  373. $object = new \stdClass();
  374. foreach ($data as $key => $value) {
  375. $object->$key = $value;
  376. }
  377. $data = $object;
  378. }
  379. return empty($data) ? null : $data;
  380. }
  381. private function parseBlock(int $offset, string $yaml, int $flags)
  382. {
  383. $skippedLineNumbers = $this->skippedLineNumbers;
  384. foreach ($this->locallySkippedLineNumbers as $lineNumber) {
  385. if ($lineNumber < $offset) {
  386. continue;
  387. }
  388. $skippedLineNumbers[] = $lineNumber;
  389. }
  390. $parser = new self();
  391. $parser->offset = $offset;
  392. $parser->totalNumberOfLines = $this->totalNumberOfLines;
  393. $parser->skippedLineNumbers = $skippedLineNumbers;
  394. $parser->refs = &$this->refs;
  395. $parser->refsBeingParsed = $this->refsBeingParsed;
  396. return $parser->doParse($yaml, $flags);
  397. }
  398. /**
  399. * Returns the current line number (takes the offset into account).
  400. *
  401. * @internal
  402. *
  403. * @return int The current line number
  404. */
  405. public function getRealCurrentLineNb(): int
  406. {
  407. $realCurrentLineNumber = $this->currentLineNb + $this->offset;
  408. foreach ($this->skippedLineNumbers as $skippedLineNumber) {
  409. if ($skippedLineNumber > $realCurrentLineNumber) {
  410. break;
  411. }
  412. ++$realCurrentLineNumber;
  413. }
  414. return $realCurrentLineNumber;
  415. }
  416. /**
  417. * Returns the current line indentation.
  418. *
  419. * @return int The current line indentation
  420. */
  421. private function getCurrentLineIndentation(): int
  422. {
  423. return \strlen($this->currentLine) - \strlen(ltrim($this->currentLine, ' '));
  424. }
  425. /**
  426. * Returns the next embed block of YAML.
  427. *
  428. * @param int|null $indentation The indent level at which the block is to be read, or null for default
  429. * @param bool $inSequence True if the enclosing data structure is a sequence
  430. *
  431. * @return string A YAML string
  432. *
  433. * @throws ParseException When indentation problem are detected
  434. */
  435. private function getNextEmbedBlock(int $indentation = null, bool $inSequence = false): ?string
  436. {
  437. $oldLineIndentation = $this->getCurrentLineIndentation();
  438. if (!$this->moveToNextLine()) {
  439. return null;
  440. }
  441. if (null === $indentation) {
  442. $newIndent = null;
  443. $movements = 0;
  444. do {
  445. $EOF = false;
  446. // empty and comment-like lines do not influence the indentation depth
  447. if ($this->isCurrentLineEmpty() || $this->isCurrentLineComment()) {
  448. $EOF = !$this->moveToNextLine();
  449. if (!$EOF) {
  450. ++$movements;
  451. }
  452. } else {
  453. $newIndent = $this->getCurrentLineIndentation();
  454. }
  455. } while (!$EOF && null === $newIndent);
  456. for ($i = 0; $i < $movements; ++$i) {
  457. $this->moveToPreviousLine();
  458. }
  459. $unindentedEmbedBlock = $this->isStringUnIndentedCollectionItem();
  460. if (!$this->isCurrentLineEmpty() && 0 === $newIndent && !$unindentedEmbedBlock) {
  461. throw new ParseException('Indentation problem.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
  462. }
  463. } else {
  464. $newIndent = $indentation;
  465. }
  466. $data = [];
  467. if ($this->getCurrentLineIndentation() >= $newIndent) {
  468. $data[] = substr($this->currentLine, $newIndent);
  469. } elseif ($this->isCurrentLineEmpty() || $this->isCurrentLineComment()) {
  470. $data[] = $this->currentLine;
  471. } else {
  472. $this->moveToPreviousLine();
  473. return null;
  474. }
  475. if ($inSequence && $oldLineIndentation === $newIndent && isset($data[0][0]) && '-' === $data[0][0]) {
  476. // the previous line contained a dash but no item content, this line is a sequence item with the same indentation
  477. // and therefore no nested list or mapping
  478. $this->moveToPreviousLine();
  479. return null;
  480. }
  481. $isItUnindentedCollection = $this->isStringUnIndentedCollectionItem();
  482. while ($this->moveToNextLine()) {
  483. $indent = $this->getCurrentLineIndentation();
  484. if ($isItUnindentedCollection && !$this->isCurrentLineEmpty() && !$this->isStringUnIndentedCollectionItem() && $newIndent === $indent) {
  485. $this->moveToPreviousLine();
  486. break;
  487. }
  488. if ($this->isCurrentLineBlank()) {
  489. $data[] = substr($this->currentLine, $newIndent);
  490. continue;
  491. }
  492. if ($indent >= $newIndent) {
  493. $data[] = substr($this->currentLine, $newIndent);
  494. } elseif ($this->isCurrentLineComment()) {
  495. $data[] = $this->currentLine;
  496. } elseif (0 == $indent) {
  497. $this->moveToPreviousLine();
  498. break;
  499. } else {
  500. throw new ParseException('Indentation problem.', $this->getRealCurrentLineNb() + 1, $this->currentLine, $this->filename);
  501. }
  502. }
  503. return implode("\n", $data);
  504. }
  505. /**
  506. * Moves the parser to the next line.
  507. *
  508. * @return bool
  509. */
  510. private function moveToNextLine(): bool
  511. {
  512. if ($this->currentLineNb >= \count($this->lines) - 1) {
  513. return false;
  514. }
  515. $this->currentLine = $this->lines[++$this->currentLineNb];
  516. return true;
  517. }
  518. /**
  519. * Moves the parser to the previous line.
  520. *
  521. * @return bool
  522. */
  523. private function moveToPreviousLine(): bool
  524. {
  525. if ($this->currentLineNb < 1) {
  526. return false;
  527. }
  528. $this->currentLine = $this->lines[--$this->currentLineNb];
  529. return true;
  530. }
  531. /**
  532. * Parses a YAML value.
  533. *
  534. * @param string $value A YAML value
  535. * @param int $flags A bit field of PARSE_* constants to customize the YAML parser behavior
  536. * @param string $context The parser context (either sequence or mapping)
  537. *
  538. * @return mixed A PHP value
  539. *
  540. * @throws ParseException When reference does not exist
  541. */
  542. private function parseValue(string $value, int $flags, string $context)
  543. {
  544. if (0 === strpos($value, '*')) {
  545. if (false !== $pos = strpos($value, '#')) {
  546. $value = substr($value, 1, $pos - 2);
  547. } else {
  548. $value = substr($value, 1);
  549. }
  550. if (!\array_key_exists($value, $this->refs)) {
  551. if (false !== $pos = array_search($value, $this->refsBeingParsed, true)) {
  552. throw new ParseException(sprintf('Circular reference [%s, %s] detected for reference "%s".', implode(', ', \array_slice($this->refsBeingParsed, $pos)), $value, $value), $this->currentLineNb + 1, $this->currentLine, $this->filename);
  553. }
  554. throw new ParseException(sprintf('Reference "%s" does not exist.', $value), $this->currentLineNb + 1, $this->currentLine, $this->filename);
  555. }
  556. return $this->refs[$value];
  557. }
  558. if (\in_array($value[0], ['!', '|', '>'], true) && self::preg_match('/^(?:'.self::TAG_PATTERN.' +)?'.self::BLOCK_SCALAR_HEADER_PATTERN.'$/', $value, $matches)) {
  559. $modifiers = isset($matches['modifiers']) ? $matches['modifiers'] : '';
  560. $data = $this->parseBlockScalar($matches['separator'], preg_replace('#\d+#', '', $modifiers), (int) abs($modifiers));
  561. if ('' !== $matches['tag'] && '!' !== $matches['tag']) {
  562. if ('!!binary' === $matches['tag']) {
  563. return Inline::evaluateBinaryScalar($data);
  564. }
  565. return new TaggedValue(substr($matches['tag'], 1), $data);
  566. }
  567. return $data;
  568. }
  569. try {
  570. $quotation = '' !== $value && ('"' === $value[0] || "'" === $value[0]) ? $value[0] : null;
  571. // do not take following lines into account when the current line is a quoted single line value
  572. if (null !== $quotation && self::preg_match('/^'.$quotation.'.*'.$quotation.'(\s*#.*)?$/', $value)) {
  573. return Inline::parse($value, $flags, $this->refs);
  574. }
  575. $lines = [];
  576. while ($this->moveToNextLine()) {
  577. // unquoted strings end before the first unindented line
  578. if (null === $quotation && 0 === $this->getCurrentLineIndentation()) {
  579. $this->moveToPreviousLine();
  580. break;
  581. }
  582. $lines[] = trim($this->currentLine);
  583. // quoted string values end with a line that is terminated with the quotation character
  584. if ('' !== $this->currentLine && substr($this->currentLine, -1) === $quotation) {
  585. break;
  586. }
  587. }
  588. for ($i = 0, $linesCount = \count($lines), $previousLineBlank = false; $i < $linesCount; ++$i) {
  589. if ('' === $lines[$i]) {
  590. $value .= "\n";
  591. $previousLineBlank = true;
  592. } elseif ($previousLineBlank) {
  593. $value .= $lines[$i];
  594. $previousLineBlank = false;
  595. } else {
  596. $value .= ' '.$lines[$i];
  597. $previousLineBlank = false;
  598. }
  599. }
  600. Inline::$parsedLineNumber = $this->getRealCurrentLineNb();
  601. $parsedValue = Inline::parse($value, $flags, $this->refs);
  602. if ('mapping' === $context && \is_string($parsedValue) && '"' !== $value[0] && "'" !== $value[0] && '[' !== $value[0] && '{' !== $value[0] && '!' !== $value[0] && false !== strpos($parsedValue, ': ')) {
  603. throw new ParseException('A colon cannot be used in an unquoted mapping value.', $this->getRealCurrentLineNb() + 1, $value, $this->filename);
  604. }
  605. return $parsedValue;
  606. } catch (ParseException $e) {
  607. $e->setParsedLine($this->getRealCurrentLineNb() + 1);
  608. $e->setSnippet($this->currentLine);
  609. throw $e;
  610. }
  611. }
  612. /**
  613. * Parses a block scalar.
  614. *
  615. * @param string $style The style indicator that was used to begin this block scalar (| or >)
  616. * @param string $chomping The chomping indicator that was used to begin this block scalar (+ or -)
  617. * @param int $indentation The indentation indicator that was used to begin this block scalar
  618. *
  619. * @return string The text value
  620. */
  621. private function parseBlockScalar(string $style, string $chomping = '', int $indentation = 0): string
  622. {
  623. $notEOF = $this->moveToNextLine();
  624. if (!$notEOF) {
  625. return '';
  626. }
  627. $isCurrentLineBlank = $this->isCurrentLineBlank();
  628. $blockLines = [];
  629. // leading blank lines are consumed before determining indentation
  630. while ($notEOF && $isCurrentLineBlank) {
  631. // newline only if not EOF
  632. if ($notEOF = $this->moveToNextLine()) {
  633. $blockLines[] = '';
  634. $isCurrentLineBlank = $this->isCurrentLineBlank();
  635. }
  636. }
  637. // determine indentation if not specified
  638. if (0 === $indentation) {
  639. $currentLineLength = \strlen($this->currentLine);
  640. for ($i = 0; $i < $currentLineLength && ' ' === $this->currentLine[$i]; ++$i) {
  641. ++$indentation;
  642. }
  643. }
  644. if ($indentation > 0) {
  645. $pattern = sprintf('/^ {%d}(.*)$/', $indentation);
  646. while (
  647. $notEOF && (
  648. $isCurrentLineBlank ||
  649. self::preg_match($pattern, $this->currentLine, $matches)
  650. )
  651. ) {
  652. if ($isCurrentLineBlank && \strlen($this->currentLine) > $indentation) {
  653. $blockLines[] = substr($this->currentLine, $indentation);
  654. } elseif ($isCurrentLineBlank) {
  655. $blockLines[] = '';
  656. } else {
  657. $blockLines[] = $matches[1];
  658. }
  659. // newline only if not EOF
  660. if ($notEOF = $this->moveToNextLine()) {
  661. $isCurrentLineBlank = $this->isCurrentLineBlank();
  662. }
  663. }
  664. } elseif ($notEOF) {
  665. $blockLines[] = '';
  666. }
  667. if ($notEOF) {
  668. $blockLines[] = '';
  669. $this->moveToPreviousLine();
  670. } elseif (!$notEOF && !$this->isCurrentLineLastLineInDocument()) {
  671. $blockLines[] = '';
  672. }
  673. // folded style
  674. if ('>' === $style) {
  675. $text = '';
  676. $previousLineIndented = false;
  677. $previousLineBlank = false;
  678. for ($i = 0, $blockLinesCount = \count($blockLines); $i < $blockLinesCount; ++$i) {
  679. if ('' === $blockLines[$i]) {
  680. $text .= "\n";
  681. $previousLineIndented = false;
  682. $previousLineBlank = true;
  683. } elseif (' ' === $blockLines[$i][0]) {
  684. $text .= "\n".$blockLines[$i];
  685. $previousLineIndented = true;
  686. $previousLineBlank = false;
  687. } elseif ($previousLineIndented) {
  688. $text .= "\n".$blockLines[$i];
  689. $previousLineIndented = false;
  690. $previousLineBlank = false;
  691. } elseif ($previousLineBlank || 0 === $i) {
  692. $text .= $blockLines[$i];
  693. $previousLineIndented = false;
  694. $previousLineBlank = false;
  695. } else {
  696. $text .= ' '.$blockLines[$i];
  697. $previousLineIndented = false;
  698. $previousLineBlank = false;
  699. }
  700. }
  701. } else {
  702. $text = implode("\n", $blockLines);
  703. }
  704. // deal with trailing newlines
  705. if ('' === $chomping) {
  706. $text = preg_replace('/\n+$/', "\n", $text);
  707. } elseif ('-' === $chomping) {
  708. $text = preg_replace('/\n+$/', '', $text);
  709. }
  710. return $text;
  711. }
  712. /**
  713. * Returns true if the next line is indented.
  714. *
  715. * @return bool Returns true if the next line is indented, false otherwise
  716. */
  717. private function isNextLineIndented(): bool
  718. {
  719. $currentIndentation = $this->getCurrentLineIndentation();
  720. $movements = 0;
  721. do {
  722. $EOF = !$this->moveToNextLine();
  723. if (!$EOF) {
  724. ++$movements;
  725. }
  726. } while (!$EOF && ($this->isCurrentLineEmpty() || $this->isCurrentLineComment()));
  727. if ($EOF) {
  728. return false;
  729. }
  730. $ret = $this->getCurrentLineIndentation() > $currentIndentation;
  731. for ($i = 0; $i < $movements; ++$i) {
  732. $this->moveToPreviousLine();
  733. }
  734. return $ret;
  735. }
  736. /**
  737. * Returns true if the current line is blank or if it is a comment line.
  738. *
  739. * @return bool Returns true if the current line is empty or if it is a comment line, false otherwise
  740. */
  741. private function isCurrentLineEmpty(): bool
  742. {
  743. return $this->isCurrentLineBlank() || $this->isCurrentLineComment();
  744. }
  745. /**
  746. * Returns true if the current line is blank.
  747. *
  748. * @return bool Returns true if the current line is blank, false otherwise
  749. */
  750. private function isCurrentLineBlank(): bool
  751. {
  752. return '' == trim($this->currentLine, ' ');
  753. }
  754. /**
  755. * Returns true if the current line is a comment line.
  756. *
  757. * @return bool Returns true if the current line is a comment line, false otherwise
  758. */
  759. private function isCurrentLineComment(): bool
  760. {
  761. //checking explicitly the first char of the trim is faster than loops or strpos
  762. $ltrimmedLine = ltrim($this->currentLine, ' ');
  763. return '' !== $ltrimmedLine && '#' === $ltrimmedLine[0];
  764. }
  765. private function isCurrentLineLastLineInDocument(): bool
  766. {
  767. return ($this->offset + $this->currentLineNb) >= ($this->totalNumberOfLines - 1);
  768. }
  769. /**
  770. * Cleanups a YAML string to be parsed.
  771. *
  772. * @param string $value The input YAML string
  773. *
  774. * @return string A cleaned up YAML string
  775. */
  776. private function cleanup(string $value): string
  777. {
  778. $value = str_replace(["\r\n", "\r"], "\n", $value);
  779. // strip YAML header
  780. $count = 0;
  781. $value = preg_replace('#^\%YAML[: ][\d\.]+.*\n#u', '', $value, -1, $count);
  782. $this->offset += $count;
  783. // remove leading comments
  784. $trimmedValue = preg_replace('#^(\#.*?\n)+#s', '', $value, -1, $count);
  785. if (1 === $count) {
  786. // items have been removed, update the offset
  787. $this->offset += substr_count($value, "\n") - substr_count($trimmedValue, "\n");
  788. $value = $trimmedValue;
  789. }
  790. // remove start of the document marker (---)
  791. $trimmedValue = preg_replace('#^\-\-\-.*?\n#s', '', $value, -1, $count);
  792. if (1 === $count) {
  793. // items have been removed, update the offset
  794. $this->offset += substr_count($value, "\n") - substr_count($trimmedValue, "\n");
  795. $value = $trimmedValue;
  796. // remove end of the document marker (...)
  797. $value = preg_replace('#\.\.\.\s*$#', '', $value);
  798. }
  799. return $value;
  800. }
  801. /**
  802. * Returns true if the next line starts unindented collection.
  803. *
  804. * @return bool Returns true if the next line starts unindented collection, false otherwise
  805. */
  806. private function isNextLineUnIndentedCollection(): bool
  807. {
  808. $currentIndentation = $this->getCurrentLineIndentation();
  809. $movements = 0;
  810. do {
  811. $EOF = !$this->moveToNextLine();
  812. if (!$EOF) {
  813. ++$movements;
  814. }
  815. } while (!$EOF && ($this->isCurrentLineEmpty() || $this->isCurrentLineComment()));
  816. if ($EOF) {
  817. return false;
  818. }
  819. $ret = $this->getCurrentLineIndentation() === $currentIndentation && $this->isStringUnIndentedCollectionItem();
  820. for ($i = 0; $i < $movements; ++$i) {
  821. $this->moveToPreviousLine();
  822. }
  823. return $ret;
  824. }
  825. /**
  826. * Returns true if the string is un-indented collection item.
  827. *
  828. * @return bool Returns true if the string is un-indented collection item, false otherwise
  829. */
  830. private function isStringUnIndentedCollectionItem(): bool
  831. {
  832. return '-' === rtrim($this->currentLine) || 0 === strpos($this->currentLine, '- ');
  833. }
  834. /**
  835. * A local wrapper for "preg_match" which will throw a ParseException if there
  836. * is an internal error in the PCRE engine.
  837. *
  838. * This avoids us needing to check for "false" every time PCRE is used
  839. * in the YAML engine
  840. *
  841. * @throws ParseException on a PCRE internal error
  842. *
  843. * @see preg_last_error()
  844. *
  845. * @internal
  846. */
  847. public static function preg_match(string $pattern, string $subject, array &$matches = null, int $flags = 0, int $offset = 0): int
  848. {
  849. if (false === $ret = preg_match($pattern, $subject, $matches, $flags, $offset)) {
  850. switch (preg_last_error()) {
  851. case PREG_INTERNAL_ERROR:
  852. $error = 'Internal PCRE error.';
  853. break;
  854. case PREG_BACKTRACK_LIMIT_ERROR:
  855. $error = 'pcre.backtrack_limit reached.';
  856. break;
  857. case PREG_RECURSION_LIMIT_ERROR:
  858. $error = 'pcre.recursion_limit reached.';
  859. break;
  860. case PREG_BAD_UTF8_ERROR:
  861. $error = 'Malformed UTF-8 data.';
  862. break;
  863. case PREG_BAD_UTF8_OFFSET_ERROR:
  864. $error = 'Offset doesn\'t correspond to the begin of a valid UTF-8 code point.';
  865. break;
  866. default:
  867. $error = 'Error.';
  868. }
  869. throw new ParseException($error);
  870. }
  871. return $ret;
  872. }
  873. /**
  874. * Trim the tag on top of the value.
  875. *
  876. * Prevent values such as "!foo {quz: bar}" to be considered as
  877. * a mapping block.
  878. */
  879. private function trimTag(string $value): string
  880. {
  881. if ('!' === $value[0]) {
  882. return ltrim(substr($value, 1, strcspn($value, " \r\n", 1)), ' ');
  883. }
  884. return $value;
  885. }
  886. private function getLineTag(string $value, int $flags, bool $nextLineCheck = true): ?string
  887. {
  888. if ('' === $value || '!' !== $value[0] || 1 !== self::preg_match('/^'.self::TAG_PATTERN.' *( +#.*)?$/', $value, $matches)) {
  889. return null;
  890. }
  891. if ($nextLineCheck && !$this->isNextLineIndented()) {
  892. return null;
  893. }
  894. $tag = substr($matches['tag'], 1);
  895. // Built-in tags
  896. if ($tag && '!' === $tag[0]) {
  897. throw new ParseException(sprintf('The built-in tag "!%s" is not implemented.', $tag), $this->getRealCurrentLineNb() + 1, $value, $this->filename);
  898. }
  899. if (Yaml::PARSE_CUSTOM_TAGS & $flags) {
  900. return $tag;
  901. }
  902. throw new ParseException(sprintf('Tags support is not enabled. You must use the flag "Yaml::PARSE_CUSTOM_TAGS" to use "%s".', $matches['tag']), $this->getRealCurrentLineNb() + 1, $value, $this->filename);
  903. }
  904. }