Iconv.php 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Polyfill\Iconv;
  11. /**
  12. * iconv implementation in pure PHP, UTF-8 centric.
  13. *
  14. * Implemented:
  15. * - iconv - Convert string to requested character encoding
  16. * - iconv_mime_decode - Decodes a MIME header field
  17. * - iconv_mime_decode_headers - Decodes multiple MIME header fields at once
  18. * - iconv_get_encoding - Retrieve internal configuration variables of iconv extension
  19. * - iconv_set_encoding - Set current setting for character encoding conversion
  20. * - iconv_mime_encode - Composes a MIME header field
  21. * - iconv_strlen - Returns the character count of string
  22. * - iconv_strpos - Finds position of first occurrence of a needle within a haystack
  23. * - iconv_strrpos - Finds the last occurrence of a needle within a haystack
  24. * - iconv_substr - Cut out part of a string
  25. *
  26. * Charsets available for conversion are defined by files
  27. * in the charset/ directory and by Iconv::$alias below.
  28. * You're welcome to send back any addition you make.
  29. *
  30. * @author Nicolas Grekas <p@tchwork.com>
  31. *
  32. * @internal
  33. */
  34. final class Iconv
  35. {
  36. const ERROR_ILLEGAL_CHARACTER = 'iconv(): Detected an illegal character in input string';
  37. const ERROR_WRONG_CHARSET = 'iconv(): Wrong charset, conversion from `%s\' to `%s\' is not allowed';
  38. public static $inputEncoding = 'utf-8';
  39. public static $outputEncoding = 'utf-8';
  40. public static $internalEncoding = 'utf-8';
  41. private static $alias = array(
  42. 'utf8' => 'utf-8',
  43. 'ascii' => 'us-ascii',
  44. 'tis-620' => 'iso-8859-11',
  45. 'cp1250' => 'windows-1250',
  46. 'cp1251' => 'windows-1251',
  47. 'cp1252' => 'windows-1252',
  48. 'cp1253' => 'windows-1253',
  49. 'cp1254' => 'windows-1254',
  50. 'cp1255' => 'windows-1255',
  51. 'cp1256' => 'windows-1256',
  52. 'cp1257' => 'windows-1257',
  53. 'cp1258' => 'windows-1258',
  54. 'shift-jis' => 'cp932',
  55. 'shift_jis' => 'cp932',
  56. 'latin1' => 'iso-8859-1',
  57. 'latin2' => 'iso-8859-2',
  58. 'latin3' => 'iso-8859-3',
  59. 'latin4' => 'iso-8859-4',
  60. 'latin5' => 'iso-8859-9',
  61. 'latin6' => 'iso-8859-10',
  62. 'latin7' => 'iso-8859-13',
  63. 'latin8' => 'iso-8859-14',
  64. 'latin9' => 'iso-8859-15',
  65. 'latin10' => 'iso-8859-16',
  66. 'iso8859-1' => 'iso-8859-1',
  67. 'iso8859-2' => 'iso-8859-2',
  68. 'iso8859-3' => 'iso-8859-3',
  69. 'iso8859-4' => 'iso-8859-4',
  70. 'iso8859-5' => 'iso-8859-5',
  71. 'iso8859-6' => 'iso-8859-6',
  72. 'iso8859-7' => 'iso-8859-7',
  73. 'iso8859-8' => 'iso-8859-8',
  74. 'iso8859-9' => 'iso-8859-9',
  75. 'iso8859-10' => 'iso-8859-10',
  76. 'iso8859-11' => 'iso-8859-11',
  77. 'iso8859-12' => 'iso-8859-12',
  78. 'iso8859-13' => 'iso-8859-13',
  79. 'iso8859-14' => 'iso-8859-14',
  80. 'iso8859-15' => 'iso-8859-15',
  81. 'iso8859-16' => 'iso-8859-16',
  82. 'iso_8859-1' => 'iso-8859-1',
  83. 'iso_8859-2' => 'iso-8859-2',
  84. 'iso_8859-3' => 'iso-8859-3',
  85. 'iso_8859-4' => 'iso-8859-4',
  86. 'iso_8859-5' => 'iso-8859-5',
  87. 'iso_8859-6' => 'iso-8859-6',
  88. 'iso_8859-7' => 'iso-8859-7',
  89. 'iso_8859-8' => 'iso-8859-8',
  90. 'iso_8859-9' => 'iso-8859-9',
  91. 'iso_8859-10' => 'iso-8859-10',
  92. 'iso_8859-11' => 'iso-8859-11',
  93. 'iso_8859-12' => 'iso-8859-12',
  94. 'iso_8859-13' => 'iso-8859-13',
  95. 'iso_8859-14' => 'iso-8859-14',
  96. 'iso_8859-15' => 'iso-8859-15',
  97. 'iso_8859-16' => 'iso-8859-16',
  98. 'iso88591' => 'iso-8859-1',
  99. 'iso88592' => 'iso-8859-2',
  100. 'iso88593' => 'iso-8859-3',
  101. 'iso88594' => 'iso-8859-4',
  102. 'iso88595' => 'iso-8859-5',
  103. 'iso88596' => 'iso-8859-6',
  104. 'iso88597' => 'iso-8859-7',
  105. 'iso88598' => 'iso-8859-8',
  106. 'iso88599' => 'iso-8859-9',
  107. 'iso885910' => 'iso-8859-10',
  108. 'iso885911' => 'iso-8859-11',
  109. 'iso885912' => 'iso-8859-12',
  110. 'iso885913' => 'iso-8859-13',
  111. 'iso885914' => 'iso-8859-14',
  112. 'iso885915' => 'iso-8859-15',
  113. 'iso885916' => 'iso-8859-16',
  114. );
  115. private static $translitMap = array();
  116. private static $convertMap = array();
  117. private static $errorHandler;
  118. private static $lastError;
  119. private static $ulenMask = array("\xC0" => 2, "\xD0" => 2, "\xE0" => 3, "\xF0" => 4);
  120. private static $isValidUtf8;
  121. public static function iconv($inCharset, $outCharset, $str)
  122. {
  123. if ('' === $str .= '') {
  124. return '';
  125. }
  126. // Prepare for //IGNORE and //TRANSLIT
  127. $translit = $ignore = '';
  128. $outCharset = strtolower($outCharset);
  129. $inCharset = strtolower($inCharset);
  130. if ('' === $outCharset) {
  131. $outCharset = 'iso-8859-1';
  132. }
  133. if ('' === $inCharset) {
  134. $inCharset = 'iso-8859-1';
  135. }
  136. if ('//translit' === substr($outCharset, -10)) {
  137. $translit = '//TRANSLIT';
  138. $outCharset = substr($outCharset, 0, -10);
  139. }
  140. if ('//ignore' === substr($outCharset, -8)) {
  141. $ignore = '//IGNORE';
  142. $outCharset = substr($outCharset, 0, -8);
  143. }
  144. if ('//translit' === substr($inCharset, -10)) {
  145. $inCharset = substr($inCharset, 0, -10);
  146. }
  147. if ('//ignore' === substr($inCharset, -8)) {
  148. $inCharset = substr($inCharset, 0, -8);
  149. }
  150. if (isset(self::$alias[ $inCharset])) {
  151. $inCharset = self::$alias[ $inCharset];
  152. }
  153. if (isset(self::$alias[$outCharset])) {
  154. $outCharset = self::$alias[$outCharset];
  155. }
  156. // Load charset maps
  157. if (('utf-8' !== $inCharset && !self::loadMap('from.', $inCharset, $inMap))
  158. || ('utf-8' !== $outCharset && !self::loadMap('to.', $outCharset, $outMap))) {
  159. trigger_error(sprintf(self::ERROR_WRONG_CHARSET, $inCharset, $outCharset));
  160. return false;
  161. }
  162. if ('utf-8' !== $inCharset) {
  163. // Convert input to UTF-8
  164. $result = '';
  165. if (self::mapToUtf8($result, $inMap, $str, $ignore)) {
  166. $str = $result;
  167. } else {
  168. $str = false;
  169. }
  170. self::$isValidUtf8 = true;
  171. } else {
  172. self::$isValidUtf8 = preg_match('//u', $str);
  173. if (!self::$isValidUtf8 && !$ignore) {
  174. trigger_error(self::ERROR_ILLEGAL_CHARACTER);
  175. return false;
  176. }
  177. if ('utf-8' === $outCharset) {
  178. // UTF-8 validation
  179. $str = self::utf8ToUtf8($str, $ignore);
  180. }
  181. }
  182. if ('utf-8' !== $outCharset && false !== $str) {
  183. // Convert output to UTF-8
  184. $result = '';
  185. if (self::mapFromUtf8($result, $outMap, $str, $ignore, $translit)) {
  186. return $result;
  187. }
  188. return false;
  189. }
  190. return $str;
  191. }
  192. public static function iconv_mime_decode_headers($str, $mode = 0, $charset = null)
  193. {
  194. if (null === $charset) {
  195. $charset = self::$internalEncoding;
  196. }
  197. if (false !== strpos($str, "\r")) {
  198. $str = strtr(str_replace("\r\n", "\n", $str), "\r", "\n");
  199. }
  200. $str = explode("\n\n", $str, 2);
  201. $headers = array();
  202. $str = preg_split('/\n(?![ \t])/', $str[0]);
  203. foreach ($str as $str) {
  204. $str = self::iconv_mime_decode($str, $mode, $charset);
  205. if (false === $str) {
  206. return false;
  207. }
  208. $str = explode(':', $str, 2);
  209. if (2 === count($str)) {
  210. if (isset($headers[$str[0]])) {
  211. if (!is_array($headers[$str[0]])) {
  212. $headers[$str[0]] = array($headers[$str[0]]);
  213. }
  214. $headers[$str[0]][] = ltrim($str[1]);
  215. } else {
  216. $headers[$str[0]] = ltrim($str[1]);
  217. }
  218. }
  219. }
  220. return $headers;
  221. }
  222. public static function iconv_mime_decode($str, $mode = 0, $charset = null)
  223. {
  224. if (null === $charset) {
  225. $charset = self::$internalEncoding;
  226. }
  227. if (ICONV_MIME_DECODE_CONTINUE_ON_ERROR & $mode) {
  228. $charset .= '//IGNORE';
  229. }
  230. if (false !== strpos($str, "\r")) {
  231. $str = strtr(str_replace("\r\n", "\n", $str), "\r", "\n");
  232. }
  233. $str = preg_split('/\n(?![ \t])/', rtrim($str), 2);
  234. $str = preg_replace('/[ \t]*\n[ \t]+/', ' ', rtrim($str[0]));
  235. $str = preg_split('/=\?([^?]+)\?([bqBQ])\?(.*?)\?=/', $str, -1, PREG_SPLIT_DELIM_CAPTURE);
  236. $result = self::iconv('utf-8', $charset, $str[0]);
  237. if (false === $result) {
  238. return false;
  239. }
  240. $i = 1;
  241. $len = count($str);
  242. while ($i < $len) {
  243. $c = strtolower($str[$i]);
  244. if ((ICONV_MIME_DECODE_CONTINUE_ON_ERROR & $mode)
  245. && 'utf-8' !== $c
  246. && !isset(self::$alias[$c])
  247. && !self::loadMap('from.', $c, $d)) {
  248. $d = false;
  249. } elseif ('B' === strtoupper($str[$i + 1])) {
  250. $d = base64_decode($str[$i + 2]);
  251. } else {
  252. $d = rawurldecode(strtr(str_replace('%', '%25', $str[$i + 2]), '=_', '% '));
  253. }
  254. if (false !== $d) {
  255. if ('' !== $d) {
  256. if ('' === $d = self::iconv($c, $charset, $d)) {
  257. $str[$i + 3] = substr($str[$i + 3], 1);
  258. } else {
  259. $result .= $d;
  260. }
  261. }
  262. $d = self::iconv('utf-8', $charset, $str[$i + 3]);
  263. if ('' !== trim($d)) {
  264. $result .= $d;
  265. }
  266. } elseif (ICONV_MIME_DECODE_CONTINUE_ON_ERROR & $mode) {
  267. $result .= "=?{$str[$i]}?{$str[$i + 1]}?{$str[$i + 2]}?={$str[$i + 3]}";
  268. } else {
  269. $result = false;
  270. break;
  271. }
  272. $i += 4;
  273. }
  274. return $result;
  275. }
  276. public static function iconv_get_encoding($type = 'all')
  277. {
  278. switch ($type) {
  279. case 'input_encoding': return self::$inputEncoding;
  280. case 'output_encoding': return self::$outputEncoding;
  281. case 'internal_encoding': return self::$internalEncoding;
  282. }
  283. return array(
  284. 'input_encoding' => self::$inputEncoding,
  285. 'output_encoding' => self::$outputEncoding,
  286. 'internal_encoding' => self::$internalEncoding,
  287. );
  288. }
  289. public static function iconv_set_encoding($type, $charset)
  290. {
  291. switch ($type) {
  292. case 'input_encoding': self::$inputEncoding = $charset; break;
  293. case 'output_encoding': self::$outputEncoding = $charset; break;
  294. case 'internal_encoding': self::$internalEncoding = $charset; break;
  295. default: return false;
  296. }
  297. return true;
  298. }
  299. public static function iconv_mime_encode($fieldName, $fieldValue, $pref = null)
  300. {
  301. if (!is_array($pref)) {
  302. $pref = array();
  303. }
  304. $pref += array(
  305. 'scheme' => 'B',
  306. 'input-charset' => self::$internalEncoding,
  307. 'output-charset' => self::$internalEncoding,
  308. 'line-length' => 76,
  309. 'line-break-chars' => "\r\n",
  310. );
  311. if (preg_match('/[\x80-\xFF]/', $fieldName)) {
  312. $fieldName = '';
  313. }
  314. $scheme = strtoupper(substr($pref['scheme'], 0, 1));
  315. $in = strtolower($pref['input-charset']);
  316. $out = strtolower($pref['output-charset']);
  317. if ('utf-8' !== $in && false === $fieldValue = self::iconv($in, 'utf-8', $fieldValue)) {
  318. return false;
  319. }
  320. preg_match_all('/./us', $fieldValue, $chars);
  321. $chars = isset($chars[0]) ? $chars[0] : array();
  322. $lineBreak = (int) $pref['line-length'];
  323. $lineStart = "=?{$pref['output-charset']}?{$scheme}?";
  324. $lineLength = strlen($fieldName) + 2 + strlen($lineStart) + 2;
  325. $lineOffset = strlen($lineStart) + 3;
  326. $lineData = '';
  327. $fieldValue = array();
  328. $Q = 'Q' === $scheme;
  329. foreach ($chars as $c) {
  330. if ('utf-8' !== $out && false === $c = self::iconv('utf-8', $out, $c)) {
  331. return false;
  332. }
  333. $o = $Q
  334. ? $c = preg_replace_callback(
  335. '/[=_\?\x00-\x1F\x80-\xFF]/',
  336. array(__CLASS__, 'qpByteCallback'),
  337. $c
  338. )
  339. : base64_encode($lineData.$c);
  340. if (isset($o[$lineBreak - $lineLength])) {
  341. if (!$Q) {
  342. $lineData = base64_encode($lineData);
  343. }
  344. $fieldValue[] = $lineStart.$lineData.'?=';
  345. $lineLength = $lineOffset;
  346. $lineData = '';
  347. }
  348. $lineData .= $c;
  349. $Q && $lineLength += strlen($c);
  350. }
  351. if ('' !== $lineData) {
  352. if (!$Q) {
  353. $lineData = base64_encode($lineData);
  354. }
  355. $fieldValue[] = $lineStart.$lineData.'?=';
  356. }
  357. return $fieldName.': '.implode($pref['line-break-chars'].' ', $fieldValue);
  358. }
  359. public static function iconv_strlen($s, $encoding = null)
  360. {
  361. static $hasXml = null;
  362. if (null === $hasXml) {
  363. $hasXml = extension_loaded('xml');
  364. }
  365. if ($hasXml) {
  366. return self::strlen1($s, $encoding);
  367. }
  368. return self::strlen2($s, $encoding);
  369. }
  370. public static function strlen1($s, $encoding = null)
  371. {
  372. if (null === $encoding) {
  373. $encoding = self::$internalEncoding;
  374. }
  375. if (0 !== stripos($encoding, 'utf-8') && false === $s = self::iconv($encoding, 'utf-8', $s)) {
  376. return false;
  377. }
  378. return strlen(utf8_decode($s));
  379. }
  380. public static function strlen2($s, $encoding = null)
  381. {
  382. if (null === $encoding) {
  383. $encoding = self::$internalEncoding;
  384. }
  385. if (0 !== stripos($encoding, 'utf-8') && false === $s = self::iconv($encoding, 'utf-8', $s)) {
  386. return false;
  387. }
  388. $ulenMask = self::$ulenMask;
  389. $i = 0;
  390. $j = 0;
  391. $len = strlen($s);
  392. while ($i < $len) {
  393. $u = $s[$i] & "\xF0";
  394. $i += isset($ulenMask[$u]) ? $ulenMask[$u] : 1;
  395. ++$j;
  396. }
  397. return $j;
  398. }
  399. public static function iconv_strpos($haystack, $needle, $offset = 0, $encoding = null)
  400. {
  401. if (null === $encoding) {
  402. $encoding = self::$internalEncoding;
  403. }
  404. if (0 !== stripos($encoding, 'utf-8')) {
  405. if (false === $haystack = self::iconv($encoding, 'utf-8', $haystack)) {
  406. return false;
  407. }
  408. if (false === $needle = self::iconv($encoding, 'utf-8', $needle)) {
  409. return false;
  410. }
  411. }
  412. if ($offset = (int) $offset) {
  413. $haystack = self::iconv_substr($haystack, $offset, 2147483647, 'utf-8');
  414. }
  415. $pos = strpos($haystack, $needle);
  416. return false === $pos ? false : ($offset + ($pos ? self::iconv_strlen(substr($haystack, 0, $pos), 'utf-8') : 0));
  417. }
  418. public static function iconv_strrpos($haystack, $needle, $encoding = null)
  419. {
  420. if (null === $encoding) {
  421. $encoding = self::$internalEncoding;
  422. }
  423. if (0 !== stripos($encoding, 'utf-8')) {
  424. if (false === $haystack = self::iconv($encoding, 'utf-8', $haystack)) {
  425. return false;
  426. }
  427. if (false === $needle = self::iconv($encoding, 'utf-8', $needle)) {
  428. return false;
  429. }
  430. }
  431. $pos = isset($needle[0]) ? strrpos($haystack, $needle) : false;
  432. return false === $pos ? false : self::iconv_strlen($pos ? substr($haystack, 0, $pos) : $haystack, 'utf-8');
  433. }
  434. public static function iconv_substr($s, $start, $length = 2147483647, $encoding = null)
  435. {
  436. if (null === $encoding) {
  437. $encoding = self::$internalEncoding;
  438. }
  439. if (0 !== stripos($encoding, 'utf-8')) {
  440. $encoding = null;
  441. } elseif (false === $s = self::iconv($encoding, 'utf-8', $s)) {
  442. return false;
  443. }
  444. $s .= '';
  445. $slen = self::iconv_strlen($s, 'utf-8');
  446. $start = (int) $start;
  447. if (0 > $start) {
  448. $start += $slen;
  449. }
  450. if (0 > $start) {
  451. return false;
  452. }
  453. if ($start >= $slen) {
  454. return false;
  455. }
  456. $rx = $slen - $start;
  457. if (0 > $length) {
  458. $length += $rx;
  459. }
  460. if (0 === $length) {
  461. return '';
  462. }
  463. if (0 > $length) {
  464. return false;
  465. }
  466. if ($length > $rx) {
  467. $length = $rx;
  468. }
  469. $rx = '/^'.($start ? self::pregOffset($start) : '').'('.self::pregOffset($length).')/u';
  470. $s = preg_match($rx, $s, $s) ? $s[1] : '';
  471. if (null === $encoding) {
  472. return $s;
  473. }
  474. return self::iconv('utf-8', $encoding, $s);
  475. }
  476. private static function loadMap($type, $charset, &$map)
  477. {
  478. if (!isset(self::$convertMap[$type.$charset])) {
  479. if (false === $map = self::getData($type.$charset)) {
  480. if ('to.' === $type && self::loadMap('from.', $charset, $map)) {
  481. $map = array_flip($map);
  482. } else {
  483. return false;
  484. }
  485. }
  486. self::$convertMap[$type.$charset] = $map;
  487. } else {
  488. $map = self::$convertMap[$type.$charset];
  489. }
  490. return true;
  491. }
  492. private static function utf8ToUtf8($str, $ignore)
  493. {
  494. $ulenMask = self::$ulenMask;
  495. $valid = self::$isValidUtf8;
  496. $u = $str;
  497. $i = $j = 0;
  498. $len = strlen($str);
  499. while ($i < $len) {
  500. if ($str[$i] < "\x80") {
  501. $u[$j++] = $str[$i++];
  502. } else {
  503. $ulen = $str[$i] & "\xF0";
  504. $ulen = isset($ulenMask[$ulen]) ? $ulenMask[$ulen] : 1;
  505. $uchr = substr($str, $i, $ulen);
  506. if (1 === $ulen || !($valid || preg_match('/^.$/us', $uchr))) {
  507. if ($ignore) {
  508. ++$i;
  509. continue;
  510. }
  511. trigger_error(self::ERROR_ILLEGAL_CHARACTER);
  512. return false;
  513. } else {
  514. $i += $ulen;
  515. }
  516. $u[$j++] = $uchr[0];
  517. isset($uchr[1]) && 0 !== ($u[$j++] = $uchr[1])
  518. && isset($uchr[2]) && 0 !== ($u[$j++] = $uchr[2])
  519. && isset($uchr[3]) && 0 !== ($u[$j++] = $uchr[3]);
  520. }
  521. }
  522. return substr($u, 0, $j);
  523. }
  524. private static function mapToUtf8(&$result, $map, $str, $ignore)
  525. {
  526. $len = strlen($str);
  527. for ($i = 0; $i < $len; ++$i) {
  528. if (isset($str[$i + 1], $map[$str[$i].$str[$i + 1]])) {
  529. $result .= $map[$str[$i].$str[++$i]];
  530. } elseif (isset($map[$str[$i]])) {
  531. $result .= $map[$str[$i]];
  532. } elseif (!$ignore) {
  533. trigger_error(self::ERROR_ILLEGAL_CHARACTER);
  534. return false;
  535. }
  536. }
  537. return true;
  538. }
  539. private static function mapFromUtf8(&$result, $map, $str, $ignore, $translit)
  540. {
  541. $ulenMask = self::$ulenMask;
  542. $valid = self::$isValidUtf8;
  543. if ($translit && !self::$translitMap) {
  544. self::$translitMap = self::getData('translit');
  545. }
  546. $i = 0;
  547. $len = strlen($str);
  548. while ($i < $len) {
  549. if ($str[$i] < "\x80") {
  550. $uchr = $str[$i++];
  551. } else {
  552. $ulen = $str[$i] & "\xF0";
  553. $ulen = isset($ulenMask[$ulen]) ? $ulenMask[$ulen] : 1;
  554. $uchr = substr($str, $i, $ulen);
  555. if ($ignore && (1 === $ulen || !($valid || preg_match('/^.$/us', $uchr)))) {
  556. ++$i;
  557. continue;
  558. } else {
  559. $i += $ulen;
  560. }
  561. }
  562. if (isset($map[$uchr])) {
  563. $result .= $map[$uchr];
  564. } elseif ($translit) {
  565. if (isset(self::$translitMap[$uchr])) {
  566. $uchr = self::$translitMap[$uchr];
  567. } elseif ($uchr >= "\xC3\x80") {
  568. $uchr = \Normalizer::normalize($uchr, \Normalizer::NFD);
  569. if ($uchr[0] < "\x80") {
  570. $uchr = $uchr[0];
  571. } elseif ($ignore) {
  572. continue;
  573. } else {
  574. return false;
  575. }
  576. }
  577. $str = $uchr.substr($str, $i);
  578. $len = strlen($str);
  579. $i = 0;
  580. } elseif (!$ignore) {
  581. return false;
  582. }
  583. }
  584. return true;
  585. }
  586. private static function qpByteCallback($m)
  587. {
  588. return '='.strtoupper(dechex(ord($m[0])));
  589. }
  590. private static function pregOffset($offset)
  591. {
  592. $rx = array();
  593. $offset = (int) $offset;
  594. while ($offset > 65535) {
  595. $rx[] = '.{65535}';
  596. $offset -= 65535;
  597. }
  598. return implode('', $rx).'.{'.$offset.'}';
  599. }
  600. private static function getData($file)
  601. {
  602. if (file_exists($file = __DIR__.'/Resources/charset/'.$file.'.php')) {
  603. return require $file;
  604. }
  605. return false;
  606. }
  607. }