Unescaper.php 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Component\Yaml;
  11. /**
  12. * Unescaper encapsulates unescaping rules for single and double-quoted
  13. * YAML strings.
  14. *
  15. * @author Matthew Lewinski <matthew@lewinski.org>
  16. */
  17. class Unescaper
  18. {
  19. /**
  20. * Parser and Inline assume UTF-8 encoding, so escaped Unicode characters
  21. * must be converted to that encoding.
  22. *
  23. * @deprecated since version 2.5, to be removed in 3.0
  24. *
  25. * @internal
  26. */
  27. const ENCODING = 'UTF-8';
  28. /**
  29. * Regex fragment that matches an escaped character in a double quoted string.
  30. */
  31. const REGEX_ESCAPED_CHARACTER = "\\\\([0abt\tnvfre \\\"\\/\\\\N_LP]|x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8})";
  32. /**
  33. * Unescapes a single quoted string.
  34. *
  35. * @param string $value A single quoted string.
  36. *
  37. * @return string The unescaped string.
  38. */
  39. public function unescapeSingleQuotedString($value)
  40. {
  41. return str_replace('\'\'', '\'', $value);
  42. }
  43. /**
  44. * Unescapes a double quoted string.
  45. *
  46. * @param string $value A double quoted string.
  47. *
  48. * @return string The unescaped string.
  49. */
  50. public function unescapeDoubleQuotedString($value)
  51. {
  52. $self = $this;
  53. $callback = function ($match) use ($self) {
  54. return $self->unescapeCharacter($match[0]);
  55. };
  56. // evaluate the string
  57. return preg_replace_callback('/'.self::REGEX_ESCAPED_CHARACTER.'/u', $callback, $value);
  58. }
  59. /**
  60. * Unescapes a character that was found in a double-quoted string.
  61. *
  62. * @param string $value An escaped character
  63. *
  64. * @return string The unescaped character
  65. */
  66. public function unescapeCharacter($value)
  67. {
  68. switch ($value{1}) {
  69. case '0':
  70. return "\x0";
  71. case 'a':
  72. return "\x7";
  73. case 'b':
  74. return "\x8";
  75. case 't':
  76. return "\t";
  77. case "\t":
  78. return "\t";
  79. case 'n':
  80. return "\n";
  81. case 'v':
  82. return "\xB";
  83. case 'f':
  84. return "\xC";
  85. case 'r':
  86. return "\r";
  87. case 'e':
  88. return "\x1B";
  89. case ' ':
  90. return ' ';
  91. case '"':
  92. return '"';
  93. case '/':
  94. return '/';
  95. case '\\':
  96. return '\\';
  97. case 'N':
  98. // U+0085 NEXT LINE
  99. return "\xC2\x85";
  100. case '_':
  101. // U+00A0 NO-BREAK SPACE
  102. return "\xC2\xA0";
  103. case 'L':
  104. // U+2028 LINE SEPARATOR
  105. return "\xE2\x80\xA8";
  106. case 'P':
  107. // U+2029 PARAGRAPH SEPARATOR
  108. return "\xE2\x80\xA9";
  109. case 'x':
  110. return self::utf8chr(hexdec(substr($value, 2, 2)));
  111. case 'u':
  112. return self::utf8chr(hexdec(substr($value, 2, 4)));
  113. case 'U':
  114. return self::utf8chr(hexdec(substr($value, 2, 8)));
  115. }
  116. }
  117. /**
  118. * Get the UTF-8 character for the given code point.
  119. *
  120. * @param int $c The unicode code point
  121. *
  122. * @return string The corresponding UTF-8 character
  123. */
  124. private static function utf8chr($c)
  125. {
  126. if (0x80 > $c %= 0x200000) {
  127. return chr($c);
  128. }
  129. if (0x800 > $c) {
  130. return chr(0xC0 | $c >> 6).chr(0x80 | $c & 0x3F);
  131. }
  132. if (0x10000 > $c) {
  133. return chr(0xE0 | $c >> 12).chr(0x80 | $c >> 6 & 0x3F).chr(0x80 | $c & 0x3F);
  134. }
  135. return chr(0xF0 | $c >> 18).chr(0x80 | $c >> 12 & 0x3F).chr(0x80 | $c >> 6 & 0x3F).chr(0x80 | $c & 0x3F);
  136. }
  137. }