UserAgent.php 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
  1. <?php
  2. namespace Drupal\Component\Utility;
  3. /**
  4. * Provides user agent related utility functions.
  5. *
  6. * @ingroup utility
  7. */
  8. class UserAgent {
  9. /**
  10. * Identifies user agent language from the Accept-language HTTP header.
  11. *
  12. * The algorithm works as follows:
  13. * - map user agent language codes to available language codes.
  14. * - order all user agent language codes by qvalue from high to low.
  15. * - add generic user agent language codes if they aren't already specified
  16. * but with a slightly lower qvalue.
  17. * - find the most specific available language code with the highest qvalue.
  18. * - if 2 or more languages are having the same qvalue, respect the order of
  19. * them inside the $languages array.
  20. *
  21. * We perform user agent accept-language parsing only if page cache is
  22. * disabled, otherwise we would cache a user-specific preference.
  23. *
  24. * @param string $http_accept_language
  25. * The value of the "Accept-Language" HTTP header.
  26. * @param array $langcodes
  27. * An array of available language codes to pick from.
  28. * @param array $mappings
  29. * (optional) Custom mappings to support user agents that are sending non
  30. * standard language codes. No mapping is assumed by default.
  31. *
  32. * @return string
  33. * The selected language code or FALSE if no valid language can be
  34. * identified.
  35. */
  36. public static function getBestMatchingLangcode($http_accept_language, $langcodes, $mappings = []) {
  37. // The Accept-Language header contains information about the language
  38. // preferences configured in the user's user agent / operating system.
  39. // RFC 2616 (section 14.4) defines the Accept-Language header as follows:
  40. // Accept-Language = "Accept-Language" ":"
  41. // 1#( language-range [ ";" "q" "=" qvalue ] )
  42. // language-range = ( ( 1*8ALPHA *( "-" 1*8ALPHA ) ) | "*" )
  43. // Samples: "hu, en-us;q=0.66, en;q=0.33", "hu,en-us;q=0.5"
  44. $ua_langcodes = [];
  45. if (preg_match_all('@(?<=[, ]|^)([a-zA-Z-]+|\*)(?:;q=([0-9.]+))?(?:$|\s*,\s*)@', trim($http_accept_language), $matches, PREG_SET_ORDER)) {
  46. foreach ($matches as $match) {
  47. if ($mappings) {
  48. $langcode = strtolower($match[1]);
  49. foreach ($mappings as $ua_langcode => $standard_langcode) {
  50. if ($langcode == $ua_langcode) {
  51. $match[1] = $standard_langcode;
  52. }
  53. }
  54. }
  55. // We can safely use strtolower() here, tags are ASCII.
  56. // RFC2616 mandates that the decimal part is no more than three digits,
  57. // so we multiply the qvalue by 1000 to avoid floating point
  58. // comparisons.
  59. $langcode = strtolower($match[1]);
  60. $qvalue = isset($match[2]) ? (float) $match[2] : 1;
  61. // Take the highest qvalue for this langcode. Although the request
  62. // supposedly contains unique langcodes, our mapping possibly resolves
  63. // to the same langcode for different qvalues. Keep the highest.
  64. $ua_langcodes[$langcode] = max(
  65. (int) ($qvalue * 1000),
  66. (isset($ua_langcodes[$langcode]) ? $ua_langcodes[$langcode] : 0)
  67. );
  68. }
  69. }
  70. // We should take pristine values from the HTTP headers, but Internet
  71. // Explorer from version 7 sends only specific language tags (eg. fr-CA)
  72. // without the corresponding generic tag (fr) unless explicitly configured.
  73. // In that case, we assume that the lowest value of the specific tags is the
  74. // value of the generic language to be as close to the HTTP 1.1 spec as
  75. // possible.
  76. // See http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.4 and
  77. // http://blogs.msdn.com/b/ie/archive/2006/10/17/accept-language-header-for-internet-explorer-7.aspx
  78. asort($ua_langcodes);
  79. foreach ($ua_langcodes as $langcode => $qvalue) {
  80. // For Chinese languages the generic tag is either zh-hans or zh-hant, so
  81. // we need to handle this separately, we can not split $langcode on the
  82. // first occurrence of '-' otherwise we get a non-existing language zh.
  83. // All other languages use a langcode without a '-', so we can safely
  84. // split on the first occurrence of it.
  85. if (strlen($langcode) > 7 && (substr($langcode, 0, 7) == 'zh-hant' || substr($langcode, 0, 7) == 'zh-hans')) {
  86. $generic_tag = substr($langcode, 0, 7);
  87. }
  88. else {
  89. $generic_tag = strtok($langcode, '-');
  90. }
  91. if (!empty($generic_tag) && !isset($ua_langcodes[$generic_tag])) {
  92. // Add the generic langcode, but make sure it has a lower qvalue as the
  93. // more specific one, so the more specific one gets selected if it's
  94. // defined by both the user agent and us.
  95. $ua_langcodes[$generic_tag] = $qvalue - 0.1;
  96. }
  97. }
  98. // Find the added language with the greatest qvalue, following the rules
  99. // of RFC 2616 (section 14.4). If several languages have the same qvalue,
  100. // prefer the one with the greatest weight.
  101. $best_match_langcode = FALSE;
  102. $max_qvalue = 0;
  103. foreach ($langcodes as $langcode_case_sensitive) {
  104. // Language tags are case insensitive (RFC2616, sec 3.10).
  105. $langcode = strtolower($langcode_case_sensitive);
  106. // If nothing matches below, the default qvalue is the one of the wildcard
  107. // language, if set, or is 0 (which will never match).
  108. $qvalue = isset($ua_langcodes['*']) ? $ua_langcodes['*'] : 0;
  109. // Find the longest possible prefix of the user agent supplied language
  110. // ('the language-range') that matches this site language ('the language
  111. // tag').
  112. $prefix = $langcode;
  113. do {
  114. if (isset($ua_langcodes[$prefix])) {
  115. $qvalue = $ua_langcodes[$prefix];
  116. break;
  117. }
  118. } while ($prefix = substr($prefix, 0, strrpos($prefix, '-')));
  119. // Find the best match.
  120. if ($qvalue > $max_qvalue) {
  121. $best_match_langcode = $langcode_case_sensitive;
  122. $max_qvalue = $qvalue;
  123. }
  124. }
  125. return $best_match_langcode;
  126. }
  127. }