123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553 |
- <?php
- namespace Drupal\Tests\Component\Utility;
- use Drupal\Component\Utility\Unicode;
- use PHPUnit\Framework\TestCase;
- /**
- * Test unicode handling features implemented in Unicode component.
- *
- * @group Utility
- *
- * @coversDefaultClass \Drupal\Component\Utility\Unicode
- */
- class UnicodeTest extends TestCase {
- /**
- * @group legacy
- * @expectedDeprecation \Drupal\Component\Utility\Unicode::setStatus() is deprecated in Drupal 8.6.0 and will be removed before Drupal 9.0.0. In Drupal 9 there will be no way to set the status and in Drupal 8 this ability has been removed because mb_*() functions are supplied using Symfony's polyfill. See https://www.drupal.org/node/2850048.
- */
- public function testSetStatus() {
- Unicode::setStatus(Unicode::STATUS_SINGLEBYTE);
- }
- /**
- * Tests multibyte encoding.
- *
- * @dataProvider providerTestMimeHeader
- * @covers ::mimeHeaderEncode
- */
- public function testMimeHeaderEncode($value, $encoded) {
- $this->assertEquals($encoded, Unicode::mimeHeaderEncode($value));
- }
- /**
- * Data provider for testMimeHeader().
- *
- * @see testMimeHeader()
- *
- * @return array
- * An array containing a string and its encoded value.
- */
- public function providerTestMimeHeader() {
- return [
- "Base64 encoding" => ['tést.txt', '=?UTF-8?B?dMOpc3QudHh0?='],
- "ASCII characters only" => ['test.txt', 'test.txt'],
- ];
- }
- /**
- * Tests multibyte decoding.
- *
- * @dataProvider providerTestMimeHeaderDecode
- * @covers ::mimeHeaderDecode
- */
- public function testMimeHeaderDecode($value, $encoded) {
- $this->assertEquals($value, Unicode::mimeHeaderDecode($encoded));
- }
- /**
- * Data provider for testMimeHeaderDecode().
- *
- * @return array
- * An array containing a string and its encoded value.
- */
- public function providerTestMimeHeaderDecode() {
- return [
- 'Uppercase base64 encoding' => [
- 'tést.txt',
- '=?utf-8?B?dMOpc3QudHh0?=',
- ],
- 'Uppercase quoted-printable encoding' => [
- 'tést.txt',
- '=?UTF-8?Q?t=C3=A9st.txt?=',
- ],
- 'Lowercase base64 encoding' => [
- 'tést.txt',
- '=?utf-8?b?dMOpc3QudHh0?=',
- ],
- 'Lowercase quoted-printable encoding' => [
- 'tést.txt',
- '=?UTF-8?q?t=C3=A9st.txt?=',
- ],
- 'ASCII characters only' => [
- 'test.txt',
- 'test.txt',
- ],
- ];
- }
- /**
- * Tests multibyte strtolower.
- *
- * @dataProvider providerStrtolower
- * @covers ::strtolower
- * @group legacy
- * @expectedDeprecation \Drupal\Component\Utility\Unicode::strtolower() is deprecated in Drupal 8.6.0 and will be removed before Drupal 9.0.0. Use mb_strtolower() instead. See https://www.drupal.org/node/2850048.
- */
- public function testStrtolower($text, $expected) {
- $this->assertEquals($expected, Unicode::strtolower($text));
- }
- /**
- * Data provider for testStrtolower().
- *
- * @see testStrtolower()
- *
- * @return array
- * An array containing a string and its lowercase version.
- */
- public function providerStrtolower() {
- return [
- ['tHe QUIcK bRoWn', 'the quick brown'],
- ['FrançAIS is ÜBER-åwesome', 'français is über-åwesome'],
- ['ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΣὨ', 'αβγδεζηθικλμνξοσὠ'],
- ];
- }
- /**
- * Tests multibyte strtoupper.
- *
- * @dataProvider providerStrtoupper
- * @covers ::strtoupper
- * @group legacy
- * @expectedDeprecation \Drupal\Component\Utility\Unicode::strtoupper() is deprecated in Drupal 8.6.0 and will be removed before Drupal 9.0.0. Use mb_strtoupper() instead. See https://www.drupal.org/node/2850048.
- */
- public function testStrtoupper($text, $expected) {
- $this->assertEquals($expected, Unicode::strtoupper($text));
- }
- /**
- * Data provider for testStrtoupper().
- *
- * @see testStrtoupper()
- *
- * @return array
- * An array containing a string and its uppercase version.
- */
- public function providerStrtoupper() {
- return [
- ['tHe QUIcK bRoWn', 'THE QUICK BROWN'],
- ['FrançAIS is ÜBER-åwesome', 'FRANÇAIS IS ÜBER-ÅWESOME'],
- ['αβγδεζηθικλμνξοσὠ', 'ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΣὨ'],
- ];
- }
- /**
- * Tests multibyte ucfirst.
- *
- * @dataProvider providerUcfirst
- * @covers ::ucfirst
- */
- public function testUcfirst($text, $expected) {
- $this->assertEquals($expected, Unicode::ucfirst($text));
- }
- /**
- * Data provider for testUcfirst().
- *
- * @see testUcfirst()
- *
- * @return array
- * An array containing a string and its uppercase first version.
- */
- public function providerUcfirst() {
- return [
- ['tHe QUIcK bRoWn', 'THe QUIcK bRoWn'],
- ['françAIS', 'FrançAIS'],
- ['über', 'Über'],
- ['åwesome', 'Åwesome'],
- // A multibyte string.
- ['σion', 'Σion'],
- ];
- }
- /**
- * Tests multibyte lcfirst.
- *
- * @dataProvider providerLcfirst
- * @covers ::lcfirst
- */
- public function testLcfirst($text, $expected) {
- $this->assertEquals($expected, Unicode::lcfirst($text));
- }
- /**
- * Data provider for testLcfirst().
- *
- * @see testLcfirst()
- *
- * @return array
- * An array containing a string and its lowercase version.
- */
- public function providerLcfirst() {
- return [
- ['tHe QUIcK bRoWn', 'tHe QUIcK bRoWn'],
- ['FrançAIS is ÜBER-åwesome', 'françAIS is ÜBER-åwesome'],
- ['Über', 'über'],
- ['Åwesome', 'åwesome'],
- // Add a multibyte string.
- ['ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΣὨ', 'αΒΓΔΕΖΗΘΙΚΛΜΝΞΟΣὨ'],
- ];
- }
- /**
- * Tests multibyte ucwords.
- *
- * @dataProvider providerUcwords
- * @covers ::ucwords
- */
- public function testUcwords($text, $expected) {
- $this->assertEquals($expected, Unicode::ucwords($text));
- }
- /**
- * Data provider for testUcwords().
- *
- * @see testUcwords()
- *
- * @return array
- * An array containing a string and its capitalized version.
- */
- public function providerUcwords() {
- return [
- ['tHe QUIcK bRoWn', 'THe QUIcK BRoWn'],
- ['françAIS', 'FrançAIS'],
- ['über', 'Über'],
- ['åwesome', 'Åwesome'],
- // Make sure we don't mangle extra spaces.
- ['frànçAIS is über-åwesome', 'FrànçAIS Is Über-Åwesome'],
- // Add a multibyte string.
- ['σion', 'Σion'],
- ];
- }
- /**
- * Tests multibyte strlen.
- *
- * @dataProvider providerStrlen
- * @covers ::strlen
- * @group legacy
- * @expectedDeprecation \Drupal\Component\Utility\Unicode::strlen() is deprecated in Drupal 8.6.0 and will be removed before Drupal 9.0.0. Use mb_strlen() instead. See https://www.drupal.org/node/2850048.
- */
- public function testStrlen($text, $expected) {
- $this->assertEquals($expected, Unicode::strlen($text));
- }
- /**
- * Data provider for testStrlen().
- *
- * @see testStrlen()
- *
- * @return array
- * An array containing a string and its length.
- */
- public function providerStrlen() {
- return [
- ['tHe QUIcK bRoWn', 15],
- ['ÜBER-åwesome', 12],
- ['以呂波耳・ほへとち。リヌルヲ。', 15],
- ];
- }
- /**
- * Tests multibyte substr.
- *
- * @dataProvider providerSubstr
- * @covers ::substr
- * @group legacy
- * @expectedDeprecation \Drupal\Component\Utility\Unicode::substr() is deprecated in Drupal 8.6.0 and will be removed before Drupal 9.0.0. Use mb_substr() instead. See https://www.drupal.org/node/2850048.
- */
- public function testSubstr($text, $start, $length, $expected) {
- $this->assertEquals($expected, Unicode::substr($text, $start, $length));
- }
- /**
- * Data provider for testSubstr().
- *
- * @see testSubstr()
- *
- * @return array
- * An array containing:
- * - The string to test.
- * - The start number to be processed by substr.
- * - The length number to be processed by substr.
- * - The expected string result.
- */
- public function providerSubstr() {
- return [
- ['frànçAIS is über-åwesome', 0, NULL, 'frànçAIS is über-åwesome'],
- ['frànçAIS is über-åwesome', 0, 0, ''],
- ['frànçAIS is über-åwesome', 0, 1, 'f'],
- ['frànçAIS is über-åwesome', 0, 8, 'frànçAIS'],
- ['frànçAIS is über-åwesome', 0, 23, 'frànçAIS is über-åwesom'],
- ['frànçAIS is über-åwesome', 0, 24, 'frànçAIS is über-åwesome'],
- ['frànçAIS is über-åwesome', 0, 25, 'frànçAIS is über-åwesome'],
- ['frànçAIS is über-åwesome', 0, 100, 'frànçAIS is über-åwesome'],
- ['frànçAIS is über-åwesome', 4, 4, 'çAIS'],
- ['frànçAIS is über-åwesome', 1, 0, ''],
- ['frànçAIS is über-åwesome', 100, 0, ''],
- ['frànçAIS is über-åwesome', -4, 2, 'so'],
- ['frànçAIS is über-åwesome', -4, 3, 'som'],
- ['frànçAIS is über-åwesome', -4, 4, 'some'],
- ['frànçAIS is über-åwesome', -4, 5, 'some'],
- ['frànçAIS is über-åwesome', -7, 10, 'åwesome'],
- ['frànçAIS is über-åwesome', 5, -10, 'AIS is üb'],
- ['frànçAIS is über-åwesome', 0, -10, 'frànçAIS is üb'],
- ['frànçAIS is über-åwesome', 0, -1, 'frànçAIS is über-åwesom'],
- ['frànçAIS is über-åwesome', -7, -2, 'åweso'],
- ['frànçAIS is über-åwesome', -7, -6, 'å'],
- ['frànçAIS is über-åwesome', -7, -7, ''],
- ['frànçAIS is über-åwesome', -7, -8, ''],
- ['...', 0, 2, '..'],
- ['以呂波耳・ほへとち。リヌルヲ。', 1, 3, '呂波耳'],
- ];
- }
- /**
- * Tests multibyte truncate.
- *
- * @dataProvider providerTruncate
- * @covers ::truncate
- */
- public function testTruncate($text, $max_length, $expected, $wordsafe = FALSE, $add_ellipsis = FALSE) {
- $this->assertEquals($expected, Unicode::truncate($text, $max_length, $wordsafe, $add_ellipsis));
- }
- /**
- * Data provider for testTruncate().
- *
- * @see testTruncate()
- *
- * @return array
- * An array containing:
- * - The string to test.
- * - The max length to truncate this string to.
- * - The expected string result.
- * - (optional) Boolean for the $wordsafe flag. Defaults to FALSE.
- * - (optional) Boolean for the $add_ellipsis flag. Defaults to FALSE.
- */
- public function providerTruncate() {
- $tests = [
- ['frànçAIS is über-åwesome', 24, 'frànçAIS is über-åwesome'],
- ['frànçAIS is über-åwesome', 23, 'frànçAIS is über-åwesom'],
- ['frànçAIS is über-åwesome', 17, 'frànçAIS is über-'],
- ['以呂波耳・ほへとち。リヌルヲ。', 6, '以呂波耳・ほ'],
- ['frànçAIS is über-åwesome', 24, 'frànçAIS is über-åwesome', FALSE, TRUE],
- ['frànçAIS is über-åwesome', 23, 'frànçAIS is über-åweso…', FALSE, TRUE],
- ['frànçAIS is über-åwesome', 17, 'frànçAIS is über…', FALSE, TRUE],
- ['123', 1, '…', TRUE, TRUE],
- ['123', 2, '1…', TRUE, TRUE],
- ['123', 3, '123', TRUE, TRUE],
- ['1234', 3, '12…', TRUE, TRUE],
- ['1234567890', 10, '1234567890', TRUE, TRUE],
- ['12345678901', 10, '123456789…', TRUE, TRUE],
- ['12345678901', 11, '12345678901', TRUE, TRUE],
- ['123456789012', 11, '1234567890…', TRUE, TRUE],
- ['12345 7890', 10, '12345 7890', TRUE, TRUE],
- ['12345 7890', 9, '12345…', TRUE, TRUE],
- ['123 567 90', 10, '123 567 90', TRUE, TRUE],
- ['123 567 901', 10, '123 567…', TRUE, TRUE],
- ['Stop. Hammertime.', 17, 'Stop. Hammertime.', TRUE, TRUE],
- ['Stop. Hammertime.', 16, 'Stop…', TRUE, TRUE],
- ['frànçAIS is über-åwesome', 24, 'frànçAIS is über-åwesome', TRUE, TRUE],
- ['frànçAIS is über-åwesome', 23, 'frànçAIS is über…', TRUE, TRUE],
- ['frànçAIS is über-åwesome', 17, 'frànçAIS is über…', TRUE, TRUE],
- ['¿Dónde está el niño?', 20, '¿Dónde está el niño?', TRUE, TRUE],
- ['¿Dónde está el niño?', 19, '¿Dónde está el…', TRUE, TRUE],
- ['¿Dónde está el niño?', 13, '¿Dónde está…', TRUE, TRUE],
- ['¿Dónde está el niño?', 10, '¿Dónde…', TRUE, TRUE],
- ['Help! Help! Help!', 17, 'Help! Help! Help!', TRUE, TRUE],
- ['Help! Help! Help!', 16, 'Help! Help!…', TRUE, TRUE],
- ['Help! Help! Help!', 15, 'Help! Help!…', TRUE, TRUE],
- ['Help! Help! Help!', 14, 'Help! Help!…', TRUE, TRUE],
- ['Help! Help! Help!', 13, 'Help! Help!…', TRUE, TRUE],
- ['Help! Help! Help!', 12, 'Help! Help!…', TRUE, TRUE],
- ['Help! Help! Help!', 11, 'Help! Help…', TRUE, TRUE],
- ['Help! Help! Help!', 10, 'Help!…', TRUE, TRUE],
- ['Help! Help! Help!', 9, 'Help!…', TRUE, TRUE],
- ['Help! Help! Help!', 8, 'Help!…', TRUE, TRUE],
- ['Help! Help! Help!', 7, 'Help!…', TRUE, TRUE],
- ['Help! Help! Help!', 6, 'Help!…', TRUE, TRUE],
- ['Help! Help! Help!', 5, 'Help…', TRUE, TRUE],
- ['Help! Help! Help!', 4, 'Hel…', TRUE, TRUE],
- ['Help! Help! Help!', 3, 'He…', TRUE, TRUE],
- ['Help! Help! Help!', 2, 'H…', TRUE, TRUE],
- ];
- // Test truncate on text with multiple lines.
- $multi_line = <<<EOF
- This is a text that spans multiple lines.
- Line 2 goes here.
- EOF;
- $multi_line_wordsafe = <<<EOF
- This is a text that spans multiple lines.
- Line 2
- EOF;
- $multi_line_non_wordsafe = <<<EOF
- This is a text that spans multiple lines.
- Line 2 go
- EOF;
- $tests[] = [$multi_line, 51, $multi_line_wordsafe, TRUE];
- $tests[] = [$multi_line, 51, $multi_line_non_wordsafe, FALSE];
- return $tests;
- }
- /**
- * Tests multibyte truncate bytes.
- *
- * @dataProvider providerTestTruncateBytes
- * @covers ::truncateBytes
- *
- * @param string $text
- * The string to truncate.
- * @param int $max_length
- * The upper limit on the returned string length.
- * @param string $expected
- * The expected return from Unicode::truncateBytes().
- */
- public function testTruncateBytes($text, $max_length, $expected) {
- $this->assertEquals($expected, Unicode::truncateBytes($text, $max_length), 'The string was not correctly truncated.');
- }
- /**
- * Provides data for self::testTruncateBytes().
- *
- * @return array
- * An array of arrays, each containing the parameters to
- * self::testTruncateBytes().
- */
- public function providerTestTruncateBytes() {
- return [
- // String shorter than max length.
- ['Short string', 42, 'Short string'],
- // Simple string longer than max length.
- ['Longer string than previous.', 10, 'Longer str'],
- // Unicode.
- ['以呂波耳・ほへとち。リヌルヲ。', 10, '以呂波'],
- ];
- }
- /**
- * Tests UTF-8 validation.
- *
- * @dataProvider providerTestValidateUtf8
- * @covers ::validateUtf8
- *
- * @param string $text
- * The text to validate.
- * @param bool $expected
- * The expected return value from Unicode::validateUtf8().
- * @param string $message
- * The message to display on failure.
- */
- public function testValidateUtf8($text, $expected, $message) {
- $this->assertEquals($expected, Unicode::validateUtf8($text), $message);
- }
- /**
- * Provides data for self::testValidateUtf8().
- *
- * Invalid UTF-8 examples sourced from http://stackoverflow.com/a/11709412/109119.
- *
- * @return array
- * An array of arrays, each containing the parameters for
- * self::testValidateUtf8().
- */
- public function providerTestValidateUtf8() {
- return [
- // Empty string.
- ['', TRUE, 'An empty string did not validate.'],
- // Simple text string.
- ['Simple text.', TRUE, 'A simple ASCII text string did not validate.'],
- // Invalid UTF-8, overlong 5 byte encoding.
- [chr(0xF8) . chr(0x80) . chr(0x80) . chr(0x80) . chr(0x80), FALSE, 'Invalid UTF-8 was validated.'],
- // High code-point without trailing characters.
- [chr(0xD0) . chr(0x01), FALSE, 'Invalid UTF-8 was validated.'],
- ];
- }
- /**
- * Tests UTF-8 conversion.
- *
- * @dataProvider providerTestConvertToUtf8
- * @covers ::convertToUtf8
- *
- * @param string $data
- * The data to be converted.
- * @param string $encoding
- * The encoding the data is in.
- * @param string|bool $expected
- * The expected result.
- */
- public function testConvertToUtf8($data, $encoding, $expected) {
- $this->assertEquals($expected, Unicode::convertToUtf8($data, $encoding));
- }
- /**
- * Provides data to self::testConvertToUtf8().
- *
- * @return array
- * An array of arrays, each containing the parameters to
- * self::testConvertUtf8(). }
- */
- public function providerTestConvertToUtf8() {
- return [
- [chr(0x97), 'Windows-1252', '—'],
- [chr(0x99), 'Windows-1252', '™'],
- [chr(0x80), 'Windows-1252', '€'],
- ];
- }
- /**
- * Tests multibyte strpos.
- *
- * @dataProvider providerStrpos
- * @covers ::strpos
- * @group legacy
- * @expectedDeprecation \Drupal\Component\Utility\Unicode::strpos() is deprecated in Drupal 8.6.0 and will be removed before Drupal 9.0.0. Use mb_strpos() instead. See https://www.drupal.org/node/2850048.
- */
- public function testStrpos($haystack, $needle, $offset, $expected) {
- $this->assertEquals($expected, Unicode::strpos($haystack, $needle, $offset));
- }
- /**
- * Data provider for testStrpos().
- *
- * @see testStrpos()
- *
- * @return array
- * An array containing:
- * - The haystack string to be searched in.
- * - The needle string to search for.
- * - The offset integer to start at.
- * - The expected integer/FALSE result.
- */
- public function providerStrpos() {
- return [
- ['frànçAIS is über-åwesome', 'frànçAIS is über-åwesome', 0, 0],
- ['frànçAIS is über-åwesome', 'rànçAIS is über-åwesome', 0, 1],
- ['frànçAIS is über-åwesome', 'not in string', 0, FALSE],
- ['frànçAIS is über-åwesome', 'r', 0, 1],
- ['frànçAIS is über-åwesome', 'nçAIS', 0, 3],
- ['frànçAIS is über-åwesome', 'nçAIS', 2, 3],
- ['frànçAIS is über-åwesome', 'nçAIS', 3, 3],
- ['以呂波耳・ほへとち。リヌルヲ。', '波耳', 0, 2],
- ['以呂波耳・ほへとち。リヌルヲ。', '波耳', 1, 2],
- ['以呂波耳・ほへとち。リヌルヲ。', '波耳', 2, 2],
- ];
- }
- }
|