Iconv.php 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Polyfill\Iconv;
  11. /**
  12. * iconv implementation in pure PHP, UTF-8 centric.
  13. *
  14. * Implemented:
  15. * - iconv - Convert string to requested character encoding
  16. * - iconv_mime_decode - Decodes a MIME header field
  17. * - iconv_mime_decode_headers - Decodes multiple MIME header fields at once
  18. * - iconv_get_encoding - Retrieve internal configuration variables of iconv extension
  19. * - iconv_set_encoding - Set current setting for character encoding conversion
  20. * - iconv_mime_encode - Composes a MIME header field
  21. * - iconv_strlen - Returns the character count of string
  22. * - iconv_strpos - Finds position of first occurrence of a needle within a haystack
  23. * - iconv_strrpos - Finds the last occurrence of a needle within a haystack
  24. * - iconv_substr - Cut out part of a string
  25. *
  26. * Charsets available for conversion are defined by files
  27. * in the charset/ directory and by Iconv::$alias below.
  28. * You're welcome to send back any addition you make.
  29. *
  30. * @author Nicolas Grekas <p@tchwork.com>
  31. *
  32. * @internal
  33. */
  34. final class Iconv
  35. {
  36. const ERROR_ILLEGAL_CHARACTER = 'iconv(): Detected an illegal character in input string';
  37. const ERROR_WRONG_CHARSET = 'iconv(): Wrong charset, conversion from `%s\' to `%s\' is not allowed';
  38. public static $inputEncoding = 'utf-8';
  39. public static $outputEncoding = 'utf-8';
  40. public static $internalEncoding = 'utf-8';
  41. private static $alias = array(
  42. 'utf8' => 'utf-8',
  43. 'ascii' => 'us-ascii',
  44. 'tis-620' => 'iso-8859-11',
  45. 'cp1250' => 'windows-1250',
  46. 'cp1251' => 'windows-1251',
  47. 'cp1252' => 'windows-1252',
  48. 'cp1253' => 'windows-1253',
  49. 'cp1254' => 'windows-1254',
  50. 'cp1255' => 'windows-1255',
  51. 'cp1256' => 'windows-1256',
  52. 'cp1257' => 'windows-1257',
  53. 'cp1258' => 'windows-1258',
  54. 'shift-jis' => 'cp932',
  55. 'shift_jis' => 'cp932',
  56. 'latin1' => 'iso-8859-1',
  57. 'latin2' => 'iso-8859-2',
  58. 'latin3' => 'iso-8859-3',
  59. 'latin4' => 'iso-8859-4',
  60. 'latin5' => 'iso-8859-9',
  61. 'latin6' => 'iso-8859-10',
  62. 'latin7' => 'iso-8859-13',
  63. 'latin8' => 'iso-8859-14',
  64. 'latin9' => 'iso-8859-15',
  65. 'latin10' => 'iso-8859-16',
  66. 'iso8859-1' => 'iso-8859-1',
  67. 'iso8859-2' => 'iso-8859-2',
  68. 'iso8859-3' => 'iso-8859-3',
  69. 'iso8859-4' => 'iso-8859-4',
  70. 'iso8859-5' => 'iso-8859-5',
  71. 'iso8859-6' => 'iso-8859-6',
  72. 'iso8859-7' => 'iso-8859-7',
  73. 'iso8859-8' => 'iso-8859-8',
  74. 'iso8859-9' => 'iso-8859-9',
  75. 'iso8859-10' => 'iso-8859-10',
  76. 'iso8859-11' => 'iso-8859-11',
  77. 'iso8859-12' => 'iso-8859-12',
  78. 'iso8859-13' => 'iso-8859-13',
  79. 'iso8859-14' => 'iso-8859-14',
  80. 'iso8859-15' => 'iso-8859-15',
  81. 'iso8859-16' => 'iso-8859-16',
  82. 'iso_8859-1' => 'iso-8859-1',
  83. 'iso_8859-2' => 'iso-8859-2',
  84. 'iso_8859-3' => 'iso-8859-3',
  85. 'iso_8859-4' => 'iso-8859-4',
  86. 'iso_8859-5' => 'iso-8859-5',
  87. 'iso_8859-6' => 'iso-8859-6',
  88. 'iso_8859-7' => 'iso-8859-7',
  89. 'iso_8859-8' => 'iso-8859-8',
  90. 'iso_8859-9' => 'iso-8859-9',
  91. 'iso_8859-10' => 'iso-8859-10',
  92. 'iso_8859-11' => 'iso-8859-11',
  93. 'iso_8859-12' => 'iso-8859-12',
  94. 'iso_8859-13' => 'iso-8859-13',
  95. 'iso_8859-14' => 'iso-8859-14',
  96. 'iso_8859-15' => 'iso-8859-15',
  97. 'iso_8859-16' => 'iso-8859-16',
  98. 'iso88591' => 'iso-8859-1',
  99. 'iso88592' => 'iso-8859-2',
  100. 'iso88593' => 'iso-8859-3',
  101. 'iso88594' => 'iso-8859-4',
  102. 'iso88595' => 'iso-8859-5',
  103. 'iso88596' => 'iso-8859-6',
  104. 'iso88597' => 'iso-8859-7',
  105. 'iso88598' => 'iso-8859-8',
  106. 'iso88599' => 'iso-8859-9',
  107. 'iso885910' => 'iso-8859-10',
  108. 'iso885911' => 'iso-8859-11',
  109. 'iso885912' => 'iso-8859-12',
  110. 'iso885913' => 'iso-8859-13',
  111. 'iso885914' => 'iso-8859-14',
  112. 'iso885915' => 'iso-8859-15',
  113. 'iso885916' => 'iso-8859-16',
  114. );
  115. private static $translitMap = array();
  116. private static $convertMap = array();
  117. private static $errorHandler;
  118. private static $lastError;
  119. private static $ulenMask = array("\xC0" => 2, "\xD0" => 2, "\xE0" => 3, "\xF0" => 4);
  120. private static $isValidUtf8;
  121. public static function iconv($inCharset, $outCharset, $str)
  122. {
  123. $str = (string) $str;
  124. if ('' === $str) {
  125. return '';
  126. }
  127. // Prepare for //IGNORE and //TRANSLIT
  128. $translit = $ignore = '';
  129. $outCharset = strtolower($outCharset);
  130. $inCharset = strtolower($inCharset);
  131. if ('' === $outCharset) {
  132. $outCharset = 'iso-8859-1';
  133. }
  134. if ('' === $inCharset) {
  135. $inCharset = 'iso-8859-1';
  136. }
  137. do {
  138. $loop = false;
  139. if ('//translit' === substr($outCharset, -10)) {
  140. $loop = $translit = true;
  141. $outCharset = substr($outCharset, 0, -10);
  142. }
  143. if ('//ignore' === substr($outCharset, -8)) {
  144. $loop = $ignore = true;
  145. $outCharset = substr($outCharset, 0, -8);
  146. }
  147. } while ($loop);
  148. do {
  149. $loop = false;
  150. if ('//translit' === substr($inCharset, -10)) {
  151. $loop = true;
  152. $inCharset = substr($inCharset, 0, -10);
  153. }
  154. if ('//ignore' === substr($inCharset, -8)) {
  155. $loop = true;
  156. $inCharset = substr($inCharset, 0, -8);
  157. }
  158. } while ($loop);
  159. if (isset(self::$alias[ $inCharset])) {
  160. $inCharset = self::$alias[ $inCharset];
  161. }
  162. if (isset(self::$alias[$outCharset])) {
  163. $outCharset = self::$alias[$outCharset];
  164. }
  165. // Load charset maps
  166. if (('utf-8' !== $inCharset && !self::loadMap('from.', $inCharset, $inMap))
  167. || ('utf-8' !== $outCharset && !self::loadMap('to.', $outCharset, $outMap))) {
  168. trigger_error(sprintf(self::ERROR_WRONG_CHARSET, $inCharset, $outCharset));
  169. return false;
  170. }
  171. if ('utf-8' !== $inCharset) {
  172. // Convert input to UTF-8
  173. $result = '';
  174. if (self::mapToUtf8($result, $inMap, $str, $ignore)) {
  175. $str = $result;
  176. } else {
  177. $str = false;
  178. }
  179. self::$isValidUtf8 = true;
  180. } else {
  181. self::$isValidUtf8 = preg_match('//u', $str);
  182. if (!self::$isValidUtf8 && !$ignore) {
  183. trigger_error(self::ERROR_ILLEGAL_CHARACTER);
  184. return false;
  185. }
  186. if ('utf-8' === $outCharset) {
  187. // UTF-8 validation
  188. $str = self::utf8ToUtf8($str, $ignore);
  189. }
  190. }
  191. if ('utf-8' !== $outCharset && false !== $str) {
  192. // Convert output to UTF-8
  193. $result = '';
  194. if (self::mapFromUtf8($result, $outMap, $str, $ignore, $translit)) {
  195. return $result;
  196. }
  197. return false;
  198. }
  199. return $str;
  200. }
  201. public static function iconv_mime_decode_headers($str, $mode = 0, $charset = null)
  202. {
  203. if (null === $charset) {
  204. $charset = self::$internalEncoding;
  205. }
  206. if (false !== strpos($str, "\r")) {
  207. $str = strtr(str_replace("\r\n", "\n", $str), "\r", "\n");
  208. }
  209. $str = explode("\n\n", $str, 2);
  210. $headers = array();
  211. $str = preg_split('/\n(?![ \t])/', $str[0]);
  212. foreach ($str as $str) {
  213. $str = self::iconv_mime_decode($str, $mode, $charset);
  214. if (false === $str) {
  215. return false;
  216. }
  217. $str = explode(':', $str, 2);
  218. if (2 === \count($str)) {
  219. if (isset($headers[$str[0]])) {
  220. if (!\is_array($headers[$str[0]])) {
  221. $headers[$str[0]] = array($headers[$str[0]]);
  222. }
  223. $headers[$str[0]][] = ltrim($str[1]);
  224. } else {
  225. $headers[$str[0]] = ltrim($str[1]);
  226. }
  227. }
  228. }
  229. return $headers;
  230. }
  231. public static function iconv_mime_decode($str, $mode = 0, $charset = null)
  232. {
  233. if (null === $charset) {
  234. $charset = self::$internalEncoding;
  235. }
  236. if (ICONV_MIME_DECODE_CONTINUE_ON_ERROR & $mode) {
  237. $charset .= '//IGNORE';
  238. }
  239. if (false !== strpos($str, "\r")) {
  240. $str = strtr(str_replace("\r\n", "\n", $str), "\r", "\n");
  241. }
  242. $str = preg_split('/\n(?![ \t])/', rtrim($str), 2);
  243. $str = preg_replace('/[ \t]*\n[ \t]+/', ' ', rtrim($str[0]));
  244. $str = preg_split('/=\?([^?]+)\?([bqBQ])\?(.*?)\?=/', $str, -1, PREG_SPLIT_DELIM_CAPTURE);
  245. $result = self::iconv('utf-8', $charset, $str[0]);
  246. if (false === $result) {
  247. return false;
  248. }
  249. $i = 1;
  250. $len = \count($str);
  251. while ($i < $len) {
  252. $c = strtolower($str[$i]);
  253. if ((ICONV_MIME_DECODE_CONTINUE_ON_ERROR & $mode)
  254. && 'utf-8' !== $c
  255. && !isset(self::$alias[$c])
  256. && !self::loadMap('from.', $c, $d)) {
  257. $d = false;
  258. } elseif ('B' === strtoupper($str[$i + 1])) {
  259. $d = base64_decode($str[$i + 2]);
  260. } else {
  261. $d = rawurldecode(strtr(str_replace('%', '%25', $str[$i + 2]), '=_', '% '));
  262. }
  263. if (false !== $d) {
  264. if ('' !== $d) {
  265. if ('' === $d = self::iconv($c, $charset, $d)) {
  266. $str[$i + 3] = substr($str[$i + 3], 1);
  267. } else {
  268. $result .= $d;
  269. }
  270. }
  271. $d = self::iconv('utf-8', $charset, $str[$i + 3]);
  272. if ('' !== trim($d)) {
  273. $result .= $d;
  274. }
  275. } elseif (ICONV_MIME_DECODE_CONTINUE_ON_ERROR & $mode) {
  276. $result .= "=?{$str[$i]}?{$str[$i + 1]}?{$str[$i + 2]}?={$str[$i + 3]}";
  277. } else {
  278. $result = false;
  279. break;
  280. }
  281. $i += 4;
  282. }
  283. return $result;
  284. }
  285. public static function iconv_get_encoding($type = 'all')
  286. {
  287. switch ($type) {
  288. case 'input_encoding': return self::$inputEncoding;
  289. case 'output_encoding': return self::$outputEncoding;
  290. case 'internal_encoding': return self::$internalEncoding;
  291. }
  292. return array(
  293. 'input_encoding' => self::$inputEncoding,
  294. 'output_encoding' => self::$outputEncoding,
  295. 'internal_encoding' => self::$internalEncoding,
  296. );
  297. }
  298. public static function iconv_set_encoding($type, $charset)
  299. {
  300. switch ($type) {
  301. case 'input_encoding': self::$inputEncoding = $charset; break;
  302. case 'output_encoding': self::$outputEncoding = $charset; break;
  303. case 'internal_encoding': self::$internalEncoding = $charset; break;
  304. default: return false;
  305. }
  306. return true;
  307. }
  308. public static function iconv_mime_encode($fieldName, $fieldValue, $pref = null)
  309. {
  310. if (!\is_array($pref)) {
  311. $pref = array();
  312. }
  313. $pref += array(
  314. 'scheme' => 'B',
  315. 'input-charset' => self::$internalEncoding,
  316. 'output-charset' => self::$internalEncoding,
  317. 'line-length' => 76,
  318. 'line-break-chars' => "\r\n",
  319. );
  320. if (preg_match('/[\x80-\xFF]/', $fieldName)) {
  321. $fieldName = '';
  322. }
  323. $scheme = strtoupper(substr($pref['scheme'], 0, 1));
  324. $in = strtolower($pref['input-charset']);
  325. $out = strtolower($pref['output-charset']);
  326. if ('utf-8' !== $in && false === $fieldValue = self::iconv($in, 'utf-8', $fieldValue)) {
  327. return false;
  328. }
  329. preg_match_all('/./us', $fieldValue, $chars);
  330. $chars = isset($chars[0]) ? $chars[0] : array();
  331. $lineBreak = (int) $pref['line-length'];
  332. $lineStart = "=?{$pref['output-charset']}?{$scheme}?";
  333. $lineLength = \strlen($fieldName) + 2 + \strlen($lineStart) + 2;
  334. $lineOffset = \strlen($lineStart) + 3;
  335. $lineData = '';
  336. $fieldValue = array();
  337. $Q = 'Q' === $scheme;
  338. foreach ($chars as $c) {
  339. if ('utf-8' !== $out && false === $c = self::iconv('utf-8', $out, $c)) {
  340. return false;
  341. }
  342. $o = $Q
  343. ? $c = preg_replace_callback(
  344. '/[=_\?\x00-\x1F\x80-\xFF]/',
  345. array(__CLASS__, 'qpByteCallback'),
  346. $c
  347. )
  348. : base64_encode($lineData.$c);
  349. if (isset($o[$lineBreak - $lineLength])) {
  350. if (!$Q) {
  351. $lineData = base64_encode($lineData);
  352. }
  353. $fieldValue[] = $lineStart.$lineData.'?=';
  354. $lineLength = $lineOffset;
  355. $lineData = '';
  356. }
  357. $lineData .= $c;
  358. $Q && $lineLength += \strlen($c);
  359. }
  360. if ('' !== $lineData) {
  361. if (!$Q) {
  362. $lineData = base64_encode($lineData);
  363. }
  364. $fieldValue[] = $lineStart.$lineData.'?=';
  365. }
  366. return $fieldName.': '.implode($pref['line-break-chars'].' ', $fieldValue);
  367. }
  368. public static function iconv_strlen($s, $encoding = null)
  369. {
  370. static $hasXml = null;
  371. if (null === $hasXml) {
  372. $hasXml = extension_loaded('xml');
  373. }
  374. if ($hasXml) {
  375. return self::strlen1($s, $encoding);
  376. }
  377. return self::strlen2($s, $encoding);
  378. }
  379. public static function strlen1($s, $encoding = null)
  380. {
  381. if (null === $encoding) {
  382. $encoding = self::$internalEncoding;
  383. }
  384. if (0 !== stripos($encoding, 'utf-8') && false === $s = self::iconv($encoding, 'utf-8', $s)) {
  385. return false;
  386. }
  387. return \strlen(utf8_decode($s));
  388. }
  389. public static function strlen2($s, $encoding = null)
  390. {
  391. if (null === $encoding) {
  392. $encoding = self::$internalEncoding;
  393. }
  394. if (0 !== stripos($encoding, 'utf-8') && false === $s = self::iconv($encoding, 'utf-8', $s)) {
  395. return false;
  396. }
  397. $ulenMask = self::$ulenMask;
  398. $i = 0;
  399. $j = 0;
  400. $len = \strlen($s);
  401. while ($i < $len) {
  402. $u = $s[$i] & "\xF0";
  403. $i += isset($ulenMask[$u]) ? $ulenMask[$u] : 1;
  404. ++$j;
  405. }
  406. return $j;
  407. }
  408. public static function iconv_strpos($haystack, $needle, $offset = 0, $encoding = null)
  409. {
  410. if (null === $encoding) {
  411. $encoding = self::$internalEncoding;
  412. }
  413. if (0 !== stripos($encoding, 'utf-8')) {
  414. if (false === $haystack = self::iconv($encoding, 'utf-8', $haystack)) {
  415. return false;
  416. }
  417. if (false === $needle = self::iconv($encoding, 'utf-8', $needle)) {
  418. return false;
  419. }
  420. }
  421. if ($offset = (int) $offset) {
  422. $haystack = self::iconv_substr($haystack, $offset, 2147483647, 'utf-8');
  423. }
  424. $pos = strpos($haystack, $needle);
  425. return false === $pos ? false : ($offset + ($pos ? self::iconv_strlen(substr($haystack, 0, $pos), 'utf-8') : 0));
  426. }
  427. public static function iconv_strrpos($haystack, $needle, $encoding = null)
  428. {
  429. if (null === $encoding) {
  430. $encoding = self::$internalEncoding;
  431. }
  432. if (0 !== stripos($encoding, 'utf-8')) {
  433. if (false === $haystack = self::iconv($encoding, 'utf-8', $haystack)) {
  434. return false;
  435. }
  436. if (false === $needle = self::iconv($encoding, 'utf-8', $needle)) {
  437. return false;
  438. }
  439. }
  440. $pos = isset($needle[0]) ? strrpos($haystack, $needle) : false;
  441. return false === $pos ? false : self::iconv_strlen($pos ? substr($haystack, 0, $pos) : $haystack, 'utf-8');
  442. }
  443. public static function iconv_substr($s, $start, $length = 2147483647, $encoding = null)
  444. {
  445. if (null === $encoding) {
  446. $encoding = self::$internalEncoding;
  447. }
  448. if (0 !== stripos($encoding, 'utf-8')) {
  449. $encoding = null;
  450. } elseif (false === $s = self::iconv($encoding, 'utf-8', $s)) {
  451. return false;
  452. }
  453. $s = (string) $s;
  454. $slen = self::iconv_strlen($s, 'utf-8');
  455. $start = (int) $start;
  456. if (0 > $start) {
  457. $start += $slen;
  458. }
  459. if (0 > $start) {
  460. return false;
  461. }
  462. if ($start >= $slen) {
  463. return false;
  464. }
  465. $rx = $slen - $start;
  466. if (0 > $length) {
  467. $length += $rx;
  468. }
  469. if (0 === $length) {
  470. return '';
  471. }
  472. if (0 > $length) {
  473. return false;
  474. }
  475. if ($length > $rx) {
  476. $length = $rx;
  477. }
  478. $rx = '/^'.($start ? self::pregOffset($start) : '').'('.self::pregOffset($length).')/u';
  479. $s = preg_match($rx, $s, $s) ? $s[1] : '';
  480. if (null === $encoding) {
  481. return $s;
  482. }
  483. return self::iconv('utf-8', $encoding, $s);
  484. }
  485. private static function loadMap($type, $charset, &$map)
  486. {
  487. if (!isset(self::$convertMap[$type.$charset])) {
  488. if (false === $map = self::getData($type.$charset)) {
  489. if ('to.' === $type && self::loadMap('from.', $charset, $map)) {
  490. $map = array_flip($map);
  491. } else {
  492. return false;
  493. }
  494. }
  495. self::$convertMap[$type.$charset] = $map;
  496. } else {
  497. $map = self::$convertMap[$type.$charset];
  498. }
  499. return true;
  500. }
  501. private static function utf8ToUtf8($str, $ignore)
  502. {
  503. $ulenMask = self::$ulenMask;
  504. $valid = self::$isValidUtf8;
  505. $u = $str;
  506. $i = $j = 0;
  507. $len = \strlen($str);
  508. while ($i < $len) {
  509. if ($str[$i] < "\x80") {
  510. $u[$j++] = $str[$i++];
  511. } else {
  512. $ulen = $str[$i] & "\xF0";
  513. $ulen = isset($ulenMask[$ulen]) ? $ulenMask[$ulen] : 1;
  514. $uchr = substr($str, $i, $ulen);
  515. if (1 === $ulen || !($valid || preg_match('/^.$/us', $uchr))) {
  516. if ($ignore) {
  517. ++$i;
  518. continue;
  519. }
  520. trigger_error(self::ERROR_ILLEGAL_CHARACTER);
  521. return false;
  522. } else {
  523. $i += $ulen;
  524. }
  525. $u[$j++] = $uchr[0];
  526. isset($uchr[1]) && 0 !== ($u[$j++] = $uchr[1])
  527. && isset($uchr[2]) && 0 !== ($u[$j++] = $uchr[2])
  528. && isset($uchr[3]) && 0 !== ($u[$j++] = $uchr[3]);
  529. }
  530. }
  531. return substr($u, 0, $j);
  532. }
  533. private static function mapToUtf8(&$result, array $map, $str, $ignore)
  534. {
  535. $len = \strlen($str);
  536. for ($i = 0; $i < $len; ++$i) {
  537. if (isset($str[$i + 1], $map[$str[$i].$str[$i + 1]])) {
  538. $result .= $map[$str[$i].$str[++$i]];
  539. } elseif (isset($map[$str[$i]])) {
  540. $result .= $map[$str[$i]];
  541. } elseif (!$ignore) {
  542. trigger_error(self::ERROR_ILLEGAL_CHARACTER);
  543. return false;
  544. }
  545. }
  546. return true;
  547. }
  548. private static function mapFromUtf8(&$result, array $map, $str, $ignore, $translit)
  549. {
  550. $ulenMask = self::$ulenMask;
  551. $valid = self::$isValidUtf8;
  552. if ($translit && !self::$translitMap) {
  553. self::$translitMap = self::getData('translit');
  554. }
  555. $i = 0;
  556. $len = \strlen($str);
  557. while ($i < $len) {
  558. if ($str[$i] < "\x80") {
  559. $uchr = $str[$i++];
  560. } else {
  561. $ulen = $str[$i] & "\xF0";
  562. $ulen = isset($ulenMask[$ulen]) ? $ulenMask[$ulen] : 1;
  563. $uchr = substr($str, $i, $ulen);
  564. if ($ignore && (1 === $ulen || !($valid || preg_match('/^.$/us', $uchr)))) {
  565. ++$i;
  566. continue;
  567. } else {
  568. $i += $ulen;
  569. }
  570. }
  571. if (isset($map[$uchr])) {
  572. $result .= $map[$uchr];
  573. } elseif ($translit) {
  574. if (isset(self::$translitMap[$uchr])) {
  575. $uchr = self::$translitMap[$uchr];
  576. } elseif ($uchr >= "\xC3\x80") {
  577. $uchr = \Normalizer::normalize($uchr, \Normalizer::NFD);
  578. if ($uchr[0] < "\x80") {
  579. $uchr = $uchr[0];
  580. } elseif ($ignore) {
  581. continue;
  582. } else {
  583. return false;
  584. }
  585. }
  586. $str = $uchr.substr($str, $i);
  587. $len = \strlen($str);
  588. $i = 0;
  589. } elseif (!$ignore) {
  590. return false;
  591. }
  592. }
  593. return true;
  594. }
  595. private static function qpByteCallback(array $m)
  596. {
  597. return '='.strtoupper(dechex(\ord($m[0])));
  598. }
  599. private static function pregOffset($offset)
  600. {
  601. $rx = array();
  602. $offset = (int) $offset;
  603. while ($offset > 65535) {
  604. $rx[] = '.{65535}';
  605. $offset -= 65535;
  606. }
  607. return implode('', $rx).'.{'.$offset.'}';
  608. }
  609. private static function getData($file)
  610. {
  611. if (file_exists($file = __DIR__.'/Resources/charset/'.$file.'.php')) {
  612. return require $file;
  613. }
  614. return false;
  615. }
  616. }