UnicodeTest.php 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553
  1. <?php
  2. namespace Drupal\Tests\Component\Utility;
  3. use Drupal\Component\Utility\Unicode;
  4. use PHPUnit\Framework\TestCase;
  5. /**
  6. * Test unicode handling features implemented in Unicode component.
  7. *
  8. * @group Utility
  9. *
  10. * @coversDefaultClass \Drupal\Component\Utility\Unicode
  11. */
  12. class UnicodeTest extends TestCase {
  13. /**
  14. * @group legacy
  15. * @expectedDeprecation \Drupal\Component\Utility\Unicode::setStatus() is deprecated in Drupal 8.6.0 and will be removed before Drupal 9.0.0. In Drupal 9 there will be no way to set the status and in Drupal 8 this ability has been removed because mb_*() functions are supplied using Symfony's polyfill. See https://www.drupal.org/node/2850048.
  16. */
  17. public function testSetStatus() {
  18. Unicode::setStatus(Unicode::STATUS_SINGLEBYTE);
  19. }
  20. /**
  21. * Tests multibyte encoding.
  22. *
  23. * @dataProvider providerTestMimeHeader
  24. * @covers ::mimeHeaderEncode
  25. */
  26. public function testMimeHeaderEncode($value, $encoded) {
  27. $this->assertEquals($encoded, Unicode::mimeHeaderEncode($value));
  28. }
  29. /**
  30. * Data provider for testMimeHeader().
  31. *
  32. * @see testMimeHeader()
  33. *
  34. * @return array
  35. * An array containing a string and its encoded value.
  36. */
  37. public function providerTestMimeHeader() {
  38. return [
  39. "Base64 encoding" => ['tést.txt', '=?UTF-8?B?dMOpc3QudHh0?='],
  40. "ASCII characters only" => ['test.txt', 'test.txt'],
  41. ];
  42. }
  43. /**
  44. * Tests multibyte decoding.
  45. *
  46. * @dataProvider providerTestMimeHeaderDecode
  47. * @covers ::mimeHeaderDecode
  48. */
  49. public function testMimeHeaderDecode($value, $encoded) {
  50. $this->assertEquals($value, Unicode::mimeHeaderDecode($encoded));
  51. }
  52. /**
  53. * Data provider for testMimeHeaderDecode().
  54. *
  55. * @return array
  56. * An array containing a string and its encoded value.
  57. */
  58. public function providerTestMimeHeaderDecode() {
  59. return [
  60. 'Uppercase base64 encoding' => [
  61. 'tést.txt',
  62. '=?utf-8?B?dMOpc3QudHh0?=',
  63. ],
  64. 'Uppercase quoted-printable encoding' => [
  65. 'tést.txt',
  66. '=?UTF-8?Q?t=C3=A9st.txt?=',
  67. ],
  68. 'Lowercase base64 encoding' => [
  69. 'tést.txt',
  70. '=?utf-8?b?dMOpc3QudHh0?=',
  71. ],
  72. 'Lowercase quoted-printable encoding' => [
  73. 'tést.txt',
  74. '=?UTF-8?q?t=C3=A9st.txt?=',
  75. ],
  76. 'ASCII characters only' => [
  77. 'test.txt',
  78. 'test.txt',
  79. ],
  80. ];
  81. }
  82. /**
  83. * Tests multibyte strtolower.
  84. *
  85. * @dataProvider providerStrtolower
  86. * @covers ::strtolower
  87. * @group legacy
  88. * @expectedDeprecation \Drupal\Component\Utility\Unicode::strtolower() is deprecated in Drupal 8.6.0 and will be removed before Drupal 9.0.0. Use mb_strtolower() instead. See https://www.drupal.org/node/2850048.
  89. */
  90. public function testStrtolower($text, $expected) {
  91. $this->assertEquals($expected, Unicode::strtolower($text));
  92. }
  93. /**
  94. * Data provider for testStrtolower().
  95. *
  96. * @see testStrtolower()
  97. *
  98. * @return array
  99. * An array containing a string and its lowercase version.
  100. */
  101. public function providerStrtolower() {
  102. return [
  103. ['tHe QUIcK bRoWn', 'the quick brown'],
  104. ['FrançAIS is ÜBER-åwesome', 'français is über-åwesome'],
  105. ['ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΣὨ', 'αβγδεζηθικλμνξοσὠ'],
  106. ];
  107. }
  108. /**
  109. * Tests multibyte strtoupper.
  110. *
  111. * @dataProvider providerStrtoupper
  112. * @covers ::strtoupper
  113. * @group legacy
  114. * @expectedDeprecation \Drupal\Component\Utility\Unicode::strtoupper() is deprecated in Drupal 8.6.0 and will be removed before Drupal 9.0.0. Use mb_strtoupper() instead. See https://www.drupal.org/node/2850048.
  115. */
  116. public function testStrtoupper($text, $expected) {
  117. $this->assertEquals($expected, Unicode::strtoupper($text));
  118. }
  119. /**
  120. * Data provider for testStrtoupper().
  121. *
  122. * @see testStrtoupper()
  123. *
  124. * @return array
  125. * An array containing a string and its uppercase version.
  126. */
  127. public function providerStrtoupper() {
  128. return [
  129. ['tHe QUIcK bRoWn', 'THE QUICK BROWN'],
  130. ['FrançAIS is ÜBER-åwesome', 'FRANÇAIS IS ÜBER-ÅWESOME'],
  131. ['αβγδεζηθικλμνξοσὠ', 'ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΣὨ'],
  132. ];
  133. }
  134. /**
  135. * Tests multibyte ucfirst.
  136. *
  137. * @dataProvider providerUcfirst
  138. * @covers ::ucfirst
  139. */
  140. public function testUcfirst($text, $expected) {
  141. $this->assertEquals($expected, Unicode::ucfirst($text));
  142. }
  143. /**
  144. * Data provider for testUcfirst().
  145. *
  146. * @see testUcfirst()
  147. *
  148. * @return array
  149. * An array containing a string and its uppercase first version.
  150. */
  151. public function providerUcfirst() {
  152. return [
  153. ['tHe QUIcK bRoWn', 'THe QUIcK bRoWn'],
  154. ['françAIS', 'FrançAIS'],
  155. ['über', 'Über'],
  156. ['åwesome', 'Åwesome'],
  157. // A multibyte string.
  158. ['σion', 'Σion'],
  159. ];
  160. }
  161. /**
  162. * Tests multibyte lcfirst.
  163. *
  164. * @dataProvider providerLcfirst
  165. * @covers ::lcfirst
  166. */
  167. public function testLcfirst($text, $expected) {
  168. $this->assertEquals($expected, Unicode::lcfirst($text));
  169. }
  170. /**
  171. * Data provider for testLcfirst().
  172. *
  173. * @see testLcfirst()
  174. *
  175. * @return array
  176. * An array containing a string and its lowercase version.
  177. */
  178. public function providerLcfirst() {
  179. return [
  180. ['tHe QUIcK bRoWn', 'tHe QUIcK bRoWn'],
  181. ['FrançAIS is ÜBER-åwesome', 'françAIS is ÜBER-åwesome'],
  182. ['Über', 'über'],
  183. ['Åwesome', 'åwesome'],
  184. // Add a multibyte string.
  185. ['ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΣὨ', 'αΒΓΔΕΖΗΘΙΚΛΜΝΞΟΣὨ'],
  186. ];
  187. }
  188. /**
  189. * Tests multibyte ucwords.
  190. *
  191. * @dataProvider providerUcwords
  192. * @covers ::ucwords
  193. */
  194. public function testUcwords($text, $expected) {
  195. $this->assertEquals($expected, Unicode::ucwords($text));
  196. }
  197. /**
  198. * Data provider for testUcwords().
  199. *
  200. * @see testUcwords()
  201. *
  202. * @return array
  203. * An array containing a string and its capitalized version.
  204. */
  205. public function providerUcwords() {
  206. return [
  207. ['tHe QUIcK bRoWn', 'THe QUIcK BRoWn'],
  208. ['françAIS', 'FrançAIS'],
  209. ['über', 'Über'],
  210. ['åwesome', 'Åwesome'],
  211. // Make sure we don't mangle extra spaces.
  212. ['frànçAIS is über-åwesome', 'FrànçAIS Is Über-Åwesome'],
  213. // Add a multibyte string.
  214. ['σion', 'Σion'],
  215. ];
  216. }
  217. /**
  218. * Tests multibyte strlen.
  219. *
  220. * @dataProvider providerStrlen
  221. * @covers ::strlen
  222. * @group legacy
  223. * @expectedDeprecation \Drupal\Component\Utility\Unicode::strlen() is deprecated in Drupal 8.6.0 and will be removed before Drupal 9.0.0. Use mb_strlen() instead. See https://www.drupal.org/node/2850048.
  224. */
  225. public function testStrlen($text, $expected) {
  226. $this->assertEquals($expected, Unicode::strlen($text));
  227. }
  228. /**
  229. * Data provider for testStrlen().
  230. *
  231. * @see testStrlen()
  232. *
  233. * @return array
  234. * An array containing a string and its length.
  235. */
  236. public function providerStrlen() {
  237. return [
  238. ['tHe QUIcK bRoWn', 15],
  239. ['ÜBER-åwesome', 12],
  240. ['以呂波耳・ほへとち。リヌルヲ。', 15],
  241. ];
  242. }
  243. /**
  244. * Tests multibyte substr.
  245. *
  246. * @dataProvider providerSubstr
  247. * @covers ::substr
  248. * @group legacy
  249. * @expectedDeprecation \Drupal\Component\Utility\Unicode::substr() is deprecated in Drupal 8.6.0 and will be removed before Drupal 9.0.0. Use mb_substr() instead. See https://www.drupal.org/node/2850048.
  250. */
  251. public function testSubstr($text, $start, $length, $expected) {
  252. $this->assertEquals($expected, Unicode::substr($text, $start, $length));
  253. }
  254. /**
  255. * Data provider for testSubstr().
  256. *
  257. * @see testSubstr()
  258. *
  259. * @return array
  260. * An array containing:
  261. * - The string to test.
  262. * - The start number to be processed by substr.
  263. * - The length number to be processed by substr.
  264. * - The expected string result.
  265. */
  266. public function providerSubstr() {
  267. return [
  268. ['frànçAIS is über-åwesome', 0, NULL, 'frànçAIS is über-åwesome'],
  269. ['frànçAIS is über-åwesome', 0, 0, ''],
  270. ['frànçAIS is über-åwesome', 0, 1, 'f'],
  271. ['frànçAIS is über-åwesome', 0, 8, 'frànçAIS'],
  272. ['frànçAIS is über-åwesome', 0, 23, 'frànçAIS is über-åwesom'],
  273. ['frànçAIS is über-åwesome', 0, 24, 'frànçAIS is über-åwesome'],
  274. ['frànçAIS is über-åwesome', 0, 25, 'frànçAIS is über-åwesome'],
  275. ['frànçAIS is über-åwesome', 0, 100, 'frànçAIS is über-åwesome'],
  276. ['frànçAIS is über-åwesome', 4, 4, 'çAIS'],
  277. ['frànçAIS is über-åwesome', 1, 0, ''],
  278. ['frànçAIS is über-åwesome', 100, 0, ''],
  279. ['frànçAIS is über-åwesome', -4, 2, 'so'],
  280. ['frànçAIS is über-åwesome', -4, 3, 'som'],
  281. ['frànçAIS is über-åwesome', -4, 4, 'some'],
  282. ['frànçAIS is über-åwesome', -4, 5, 'some'],
  283. ['frànçAIS is über-åwesome', -7, 10, 'åwesome'],
  284. ['frànçAIS is über-åwesome', 5, -10, 'AIS is üb'],
  285. ['frànçAIS is über-åwesome', 0, -10, 'frànçAIS is üb'],
  286. ['frànçAIS is über-åwesome', 0, -1, 'frànçAIS is über-åwesom'],
  287. ['frànçAIS is über-åwesome', -7, -2, 'åweso'],
  288. ['frànçAIS is über-åwesome', -7, -6, 'å'],
  289. ['frànçAIS is über-åwesome', -7, -7, ''],
  290. ['frànçAIS is über-åwesome', -7, -8, ''],
  291. ['...', 0, 2, '..'],
  292. ['以呂波耳・ほへとち。リヌルヲ。', 1, 3, '呂波耳'],
  293. ];
  294. }
  295. /**
  296. * Tests multibyte truncate.
  297. *
  298. * @dataProvider providerTruncate
  299. * @covers ::truncate
  300. */
  301. public function testTruncate($text, $max_length, $expected, $wordsafe = FALSE, $add_ellipsis = FALSE) {
  302. $this->assertEquals($expected, Unicode::truncate($text, $max_length, $wordsafe, $add_ellipsis));
  303. }
  304. /**
  305. * Data provider for testTruncate().
  306. *
  307. * @see testTruncate()
  308. *
  309. * @return array
  310. * An array containing:
  311. * - The string to test.
  312. * - The max length to truncate this string to.
  313. * - The expected string result.
  314. * - (optional) Boolean for the $wordsafe flag. Defaults to FALSE.
  315. * - (optional) Boolean for the $add_ellipsis flag. Defaults to FALSE.
  316. */
  317. public function providerTruncate() {
  318. $tests = [
  319. ['frànçAIS is über-åwesome', 24, 'frànçAIS is über-åwesome'],
  320. ['frànçAIS is über-åwesome', 23, 'frànçAIS is über-åwesom'],
  321. ['frànçAIS is über-åwesome', 17, 'frànçAIS is über-'],
  322. ['以呂波耳・ほへとち。リヌルヲ。', 6, '以呂波耳・ほ'],
  323. ['frànçAIS is über-åwesome', 24, 'frànçAIS is über-åwesome', FALSE, TRUE],
  324. ['frànçAIS is über-åwesome', 23, 'frànçAIS is über-åweso…', FALSE, TRUE],
  325. ['frànçAIS is über-åwesome', 17, 'frànçAIS is über…', FALSE, TRUE],
  326. ['123', 1, '…', TRUE, TRUE],
  327. ['123', 2, '1…', TRUE, TRUE],
  328. ['123', 3, '123', TRUE, TRUE],
  329. ['1234', 3, '12…', TRUE, TRUE],
  330. ['1234567890', 10, '1234567890', TRUE, TRUE],
  331. ['12345678901', 10, '123456789…', TRUE, TRUE],
  332. ['12345678901', 11, '12345678901', TRUE, TRUE],
  333. ['123456789012', 11, '1234567890…', TRUE, TRUE],
  334. ['12345 7890', 10, '12345 7890', TRUE, TRUE],
  335. ['12345 7890', 9, '12345…', TRUE, TRUE],
  336. ['123 567 90', 10, '123 567 90', TRUE, TRUE],
  337. ['123 567 901', 10, '123 567…', TRUE, TRUE],
  338. ['Stop. Hammertime.', 17, 'Stop. Hammertime.', TRUE, TRUE],
  339. ['Stop. Hammertime.', 16, 'Stop…', TRUE, TRUE],
  340. ['frànçAIS is über-åwesome', 24, 'frànçAIS is über-åwesome', TRUE, TRUE],
  341. ['frànçAIS is über-åwesome', 23, 'frànçAIS is über…', TRUE, TRUE],
  342. ['frànçAIS is über-åwesome', 17, 'frànçAIS is über…', TRUE, TRUE],
  343. ['¿Dónde está el niño?', 20, '¿Dónde está el niño?', TRUE, TRUE],
  344. ['¿Dónde está el niño?', 19, '¿Dónde está el…', TRUE, TRUE],
  345. ['¿Dónde está el niño?', 13, '¿Dónde está…', TRUE, TRUE],
  346. ['¿Dónde está el niño?', 10, '¿Dónde…', TRUE, TRUE],
  347. ['Help! Help! Help!', 17, 'Help! Help! Help!', TRUE, TRUE],
  348. ['Help! Help! Help!', 16, 'Help! Help!…', TRUE, TRUE],
  349. ['Help! Help! Help!', 15, 'Help! Help!…', TRUE, TRUE],
  350. ['Help! Help! Help!', 14, 'Help! Help!…', TRUE, TRUE],
  351. ['Help! Help! Help!', 13, 'Help! Help!…', TRUE, TRUE],
  352. ['Help! Help! Help!', 12, 'Help! Help!…', TRUE, TRUE],
  353. ['Help! Help! Help!', 11, 'Help! Help…', TRUE, TRUE],
  354. ['Help! Help! Help!', 10, 'Help!…', TRUE, TRUE],
  355. ['Help! Help! Help!', 9, 'Help!…', TRUE, TRUE],
  356. ['Help! Help! Help!', 8, 'Help!…', TRUE, TRUE],
  357. ['Help! Help! Help!', 7, 'Help!…', TRUE, TRUE],
  358. ['Help! Help! Help!', 6, 'Help!…', TRUE, TRUE],
  359. ['Help! Help! Help!', 5, 'Help…', TRUE, TRUE],
  360. ['Help! Help! Help!', 4, 'Hel…', TRUE, TRUE],
  361. ['Help! Help! Help!', 3, 'He…', TRUE, TRUE],
  362. ['Help! Help! Help!', 2, 'H…', TRUE, TRUE],
  363. ];
  364. // Test truncate on text with multiple lines.
  365. $multi_line = <<<EOF
  366. This is a text that spans multiple lines.
  367. Line 2 goes here.
  368. EOF;
  369. $multi_line_wordsafe = <<<EOF
  370. This is a text that spans multiple lines.
  371. Line 2
  372. EOF;
  373. $multi_line_non_wordsafe = <<<EOF
  374. This is a text that spans multiple lines.
  375. Line 2 go
  376. EOF;
  377. $tests[] = [$multi_line, 51, $multi_line_wordsafe, TRUE];
  378. $tests[] = [$multi_line, 51, $multi_line_non_wordsafe, FALSE];
  379. return $tests;
  380. }
  381. /**
  382. * Tests multibyte truncate bytes.
  383. *
  384. * @dataProvider providerTestTruncateBytes
  385. * @covers ::truncateBytes
  386. *
  387. * @param string $text
  388. * The string to truncate.
  389. * @param int $max_length
  390. * The upper limit on the returned string length.
  391. * @param string $expected
  392. * The expected return from Unicode::truncateBytes().
  393. */
  394. public function testTruncateBytes($text, $max_length, $expected) {
  395. $this->assertEquals($expected, Unicode::truncateBytes($text, $max_length), 'The string was not correctly truncated.');
  396. }
  397. /**
  398. * Provides data for self::testTruncateBytes().
  399. *
  400. * @return array
  401. * An array of arrays, each containing the parameters to
  402. * self::testTruncateBytes().
  403. */
  404. public function providerTestTruncateBytes() {
  405. return [
  406. // String shorter than max length.
  407. ['Short string', 42, 'Short string'],
  408. // Simple string longer than max length.
  409. ['Longer string than previous.', 10, 'Longer str'],
  410. // Unicode.
  411. ['以呂波耳・ほへとち。リヌルヲ。', 10, '以呂波'],
  412. ];
  413. }
  414. /**
  415. * Tests UTF-8 validation.
  416. *
  417. * @dataProvider providerTestValidateUtf8
  418. * @covers ::validateUtf8
  419. *
  420. * @param string $text
  421. * The text to validate.
  422. * @param bool $expected
  423. * The expected return value from Unicode::validateUtf8().
  424. * @param string $message
  425. * The message to display on failure.
  426. */
  427. public function testValidateUtf8($text, $expected, $message) {
  428. $this->assertEquals($expected, Unicode::validateUtf8($text), $message);
  429. }
  430. /**
  431. * Provides data for self::testValidateUtf8().
  432. *
  433. * Invalid UTF-8 examples sourced from http://stackoverflow.com/a/11709412/109119.
  434. *
  435. * @return array
  436. * An array of arrays, each containing the parameters for
  437. * self::testValidateUtf8().
  438. */
  439. public function providerTestValidateUtf8() {
  440. return [
  441. // Empty string.
  442. ['', TRUE, 'An empty string did not validate.'],
  443. // Simple text string.
  444. ['Simple text.', TRUE, 'A simple ASCII text string did not validate.'],
  445. // Invalid UTF-8, overlong 5 byte encoding.
  446. [chr(0xF8) . chr(0x80) . chr(0x80) . chr(0x80) . chr(0x80), FALSE, 'Invalid UTF-8 was validated.'],
  447. // High code-point without trailing characters.
  448. [chr(0xD0) . chr(0x01), FALSE, 'Invalid UTF-8 was validated.'],
  449. ];
  450. }
  451. /**
  452. * Tests UTF-8 conversion.
  453. *
  454. * @dataProvider providerTestConvertToUtf8
  455. * @covers ::convertToUtf8
  456. *
  457. * @param string $data
  458. * The data to be converted.
  459. * @param string $encoding
  460. * The encoding the data is in.
  461. * @param string|bool $expected
  462. * The expected result.
  463. */
  464. public function testConvertToUtf8($data, $encoding, $expected) {
  465. $this->assertEquals($expected, Unicode::convertToUtf8($data, $encoding));
  466. }
  467. /**
  468. * Provides data to self::testConvertToUtf8().
  469. *
  470. * @return array
  471. * An array of arrays, each containing the parameters to
  472. * self::testConvertUtf8(). }
  473. */
  474. public function providerTestConvertToUtf8() {
  475. return [
  476. [chr(0x97), 'Windows-1252', '—'],
  477. [chr(0x99), 'Windows-1252', '™'],
  478. [chr(0x80), 'Windows-1252', '€'],
  479. ];
  480. }
  481. /**
  482. * Tests multibyte strpos.
  483. *
  484. * @dataProvider providerStrpos
  485. * @covers ::strpos
  486. * @group legacy
  487. * @expectedDeprecation \Drupal\Component\Utility\Unicode::strpos() is deprecated in Drupal 8.6.0 and will be removed before Drupal 9.0.0. Use mb_strpos() instead. See https://www.drupal.org/node/2850048.
  488. */
  489. public function testStrpos($haystack, $needle, $offset, $expected) {
  490. $this->assertEquals($expected, Unicode::strpos($haystack, $needle, $offset));
  491. }
  492. /**
  493. * Data provider for testStrpos().
  494. *
  495. * @see testStrpos()
  496. *
  497. * @return array
  498. * An array containing:
  499. * - The haystack string to be searched in.
  500. * - The needle string to search for.
  501. * - The offset integer to start at.
  502. * - The expected integer/FALSE result.
  503. */
  504. public function providerStrpos() {
  505. return [
  506. ['frànçAIS is über-åwesome', 'frànçAIS is über-åwesome', 0, 0],
  507. ['frànçAIS is über-åwesome', 'rànçAIS is über-åwesome', 0, 1],
  508. ['frànçAIS is über-åwesome', 'not in string', 0, FALSE],
  509. ['frànçAIS is über-åwesome', 'r', 0, 1],
  510. ['frànçAIS is über-åwesome', 'nçAIS', 0, 3],
  511. ['frànçAIS is über-åwesome', 'nçAIS', 2, 3],
  512. ['frànçAIS is über-åwesome', 'nçAIS', 3, 3],
  513. ['以呂波耳・ほへとち。リヌルヲ。', '波耳', 0, 2],
  514. ['以呂波耳・ほへとち。リヌルヲ。', '波耳', 1, 2],
  515. ['以呂波耳・ほへとち。リヌルヲ。', '波耳', 2, 2],
  516. ];
  517. }
  518. }