Unescaper.php 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Component\Yaml;
  11. /**
  12. * Unescaper encapsulates unescaping rules for single and double-quoted
  13. * YAML strings.
  14. *
  15. * @author Matthew Lewinski <matthew@lewinski.org>
  16. *
  17. * @internal
  18. */
  19. class Unescaper
  20. {
  21. /**
  22. * Parser and Inline assume UTF-8 encoding, so escaped Unicode characters
  23. * must be converted to that encoding.
  24. *
  25. * @deprecated since version 2.5, to be removed in 3.0
  26. *
  27. * @internal
  28. */
  29. const ENCODING = 'UTF-8';
  30. /**
  31. * Regex fragment that matches an escaped character in a double quoted string.
  32. */
  33. const REGEX_ESCAPED_CHARACTER = '\\\\(x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8}|.)';
  34. /**
  35. * Unescapes a single quoted string.
  36. *
  37. * @param string $value A single quoted string
  38. *
  39. * @return string The unescaped string
  40. */
  41. public function unescapeSingleQuotedString($value)
  42. {
  43. return str_replace('\'\'', '\'', $value);
  44. }
  45. /**
  46. * Unescapes a double quoted string.
  47. *
  48. * @param string $value A double quoted string
  49. *
  50. * @return string The unescaped string
  51. */
  52. public function unescapeDoubleQuotedString($value)
  53. {
  54. $self = $this;
  55. $callback = function ($match) use ($self) {
  56. return $self->unescapeCharacter($match[0]);
  57. };
  58. // evaluate the string
  59. return preg_replace_callback('/'.self::REGEX_ESCAPED_CHARACTER.'/u', $callback, $value);
  60. }
  61. /**
  62. * Unescapes a character that was found in a double-quoted string.
  63. *
  64. * @param string $value An escaped character
  65. *
  66. * @return string The unescaped character
  67. *
  68. * @internal This method is public to be usable as callback. It should not
  69. * be used in user code. Should be changed in 3.0.
  70. */
  71. public function unescapeCharacter($value)
  72. {
  73. switch ($value[1]) {
  74. case '0':
  75. return "\x0";
  76. case 'a':
  77. return "\x7";
  78. case 'b':
  79. return "\x8";
  80. case 't':
  81. return "\t";
  82. case "\t":
  83. return "\t";
  84. case 'n':
  85. return "\n";
  86. case 'v':
  87. return "\xB";
  88. case 'f':
  89. return "\xC";
  90. case 'r':
  91. return "\r";
  92. case 'e':
  93. return "\x1B";
  94. case ' ':
  95. return ' ';
  96. case '"':
  97. return '"';
  98. case '/':
  99. return '/';
  100. case '\\':
  101. return '\\';
  102. case 'N':
  103. // U+0085 NEXT LINE
  104. return "\xC2\x85";
  105. case '_':
  106. // U+00A0 NO-BREAK SPACE
  107. return "\xC2\xA0";
  108. case 'L':
  109. // U+2028 LINE SEPARATOR
  110. return "\xE2\x80\xA8";
  111. case 'P':
  112. // U+2029 PARAGRAPH SEPARATOR
  113. return "\xE2\x80\xA9";
  114. case 'x':
  115. return self::utf8chr(hexdec(substr($value, 2, 2)));
  116. case 'u':
  117. return self::utf8chr(hexdec(substr($value, 2, 4)));
  118. case 'U':
  119. return self::utf8chr(hexdec(substr($value, 2, 8)));
  120. default:
  121. @trigger_error('Not escaping a backslash in a double-quoted string is deprecated since Symfony 2.8 and will throw a ParseException in 3.0.', E_USER_DEPRECATED);
  122. return $value;
  123. }
  124. }
  125. /**
  126. * Get the UTF-8 character for the given code point.
  127. *
  128. * @param int $c The unicode code point
  129. *
  130. * @return string The corresponding UTF-8 character
  131. */
  132. private static function utf8chr($c)
  133. {
  134. if (0x80 > $c %= 0x200000) {
  135. return chr($c);
  136. }
  137. if (0x800 > $c) {
  138. return chr(0xC0 | $c >> 6).chr(0x80 | $c & 0x3F);
  139. }
  140. if (0x10000 > $c) {
  141. return chr(0xE0 | $c >> 12).chr(0x80 | $c >> 6 & 0x3F).chr(0x80 | $c & 0x3F);
  142. }
  143. return chr(0xF0 | $c >> 18).chr(0x80 | $c >> 12 & 0x3F).chr(0x80 | $c >> 6 & 0x3F).chr(0x80 | $c & 0x3F);
  144. }
  145. }