PoHeader.php 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563
  1. <?php
  2. namespace Drupal\Component\Gettext;
  3. /**
  4. * Gettext PO header handler.
  5. *
  6. * Possible Gettext PO header elements are explained in
  7. * http://www.gnu.org/software/gettext/manual/gettext.html#Header-Entry,
  8. * but we only support a subset of these directly.
  9. *
  10. * Example header:
  11. *
  12. * "Project-Id-Version: Drupal core (7.11)\n"
  13. * "POT-Creation-Date: 2012-02-12 22:59+0000\n"
  14. * "PO-Revision-Date: YYYY-mm-DD HH:MM+ZZZZ\n"
  15. * "Language-Team: Catalan\n"
  16. * "MIME-Version: 1.0\n"
  17. * "Content-Type: text/plain; charset=utf-8\n"
  18. * "Content-Transfer-Encoding: 8bit\n"
  19. * "Plural-Forms: nplurals=2; plural=(n>1);\n"
  20. */
  21. class PoHeader {
  22. /**
  23. * Language code.
  24. *
  25. * @var string
  26. */
  27. protected $langcode;
  28. /**
  29. * Formula for the plural form.
  30. *
  31. * @var string
  32. */
  33. protected $pluralForms;
  34. /**
  35. * Author(s) of the file.
  36. *
  37. * @var string
  38. */
  39. protected $authors;
  40. /**
  41. * Date the po file got created.
  42. *
  43. * @var string
  44. */
  45. protected $poDate;
  46. /**
  47. * Human readable language name.
  48. *
  49. * @var string
  50. */
  51. protected $languageName;
  52. /**
  53. * Name of the project the translation belongs to.
  54. *
  55. * @var string
  56. */
  57. protected $projectName;
  58. /**
  59. * Constructor, creates a PoHeader with default values.
  60. *
  61. * @param string $langcode
  62. * Language code.
  63. */
  64. public function __construct($langcode = NULL) {
  65. $this->langcode = $langcode;
  66. // Ignore errors when run during site installation before
  67. // date_default_timezone_set() is called.
  68. $this->poDate = @date("Y-m-d H:iO");
  69. $this->pluralForms = 'nplurals=2; plural=(n > 1);';
  70. }
  71. /**
  72. * Gets the plural form.
  73. *
  74. * @return string
  75. * Plural form component from the header, for example:
  76. * 'nplurals=2; plural=(n > 1);'.
  77. */
  78. public function getPluralForms() {
  79. return $this->pluralForms;
  80. }
  81. /**
  82. * Set the human readable language name.
  83. *
  84. * @param string $languageName
  85. * Human readable language name.
  86. */
  87. public function setLanguageName($languageName) {
  88. $this->languageName = $languageName;
  89. }
  90. /**
  91. * Gets the human readable language name.
  92. *
  93. * @return string
  94. * The human readable language name.
  95. */
  96. public function getLanguageName() {
  97. return $this->languageName;
  98. }
  99. /**
  100. * Set the project name.
  101. *
  102. * @param string $projectName
  103. * Human readable project name.
  104. */
  105. public function setProjectName($projectName) {
  106. $this->projectName = $projectName;
  107. }
  108. /**
  109. * Gets the project name.
  110. *
  111. * @return string
  112. * The human readable project name.
  113. */
  114. public function getProjectName() {
  115. return $this->projectName;
  116. }
  117. /**
  118. * Populate internal values from a string.
  119. *
  120. * @param string $header
  121. * Full header string with key-value pairs.
  122. */
  123. public function setFromString($header) {
  124. // Get an array of all header values for processing.
  125. $values = $this->parseHeader($header);
  126. // There is only one value relevant for our header implementation when
  127. // reading, and that is the plural formula.
  128. if (!empty($values['Plural-Forms'])) {
  129. $this->pluralForms = $values['Plural-Forms'];
  130. }
  131. }
  132. /**
  133. * Generate a Gettext PO formatted header string based on data set earlier.
  134. */
  135. public function __toString() {
  136. $output = '';
  137. $isTemplate = empty($this->languageName);
  138. $output .= '# ' . ($isTemplate ? 'LANGUAGE' : $this->languageName) . ' translation of ' . ($isTemplate ? 'PROJECT' : $this->projectName) . "\n";
  139. if (!empty($this->authors)) {
  140. $output .= '# Generated by ' . implode("\n# ", $this->authors) . "\n";
  141. }
  142. $output .= "#\n";
  143. // Add the actual header information.
  144. $output .= "msgid \"\"\n";
  145. $output .= "msgstr \"\"\n";
  146. $output .= "\"Project-Id-Version: PROJECT VERSION\\n\"\n";
  147. $output .= "\"POT-Creation-Date: " . $this->poDate . "\\n\"\n";
  148. $output .= "\"PO-Revision-Date: " . $this->poDate . "\\n\"\n";
  149. $output .= "\"Last-Translator: NAME <EMAIL@ADDRESS>\\n\"\n";
  150. $output .= "\"Language-Team: LANGUAGE <EMAIL@ADDRESS>\\n\"\n";
  151. $output .= "\"MIME-Version: 1.0\\n\"\n";
  152. $output .= "\"Content-Type: text/plain; charset=utf-8\\n\"\n";
  153. $output .= "\"Content-Transfer-Encoding: 8bit\\n\"\n";
  154. $output .= "\"Plural-Forms: " . $this->pluralForms . "\\n\"\n";
  155. $output .= "\n";
  156. return $output;
  157. }
  158. /**
  159. * Parses a Plural-Forms entry from a Gettext Portable Object file header.
  160. *
  161. * @param string $pluralforms
  162. * The Plural-Forms entry value.
  163. *
  164. * @return
  165. * An indexed array of parsed plural formula data. Containing:
  166. * - 'nplurals': The number of plural forms defined by the plural formula.
  167. * - 'plurals': Array of plural positions keyed by plural value.
  168. *
  169. * @throws \Exception
  170. */
  171. public function parsePluralForms($pluralforms) {
  172. $plurals = [];
  173. // First, delete all whitespace.
  174. $pluralforms = strtr($pluralforms, [" " => "", "\t" => ""]);
  175. // Select the parts that define nplurals and plural.
  176. $nplurals = strstr($pluralforms, "nplurals=");
  177. if (strpos($nplurals, ";")) {
  178. // We want the string from the 10th char, because "nplurals=" length is 9.
  179. $nplurals = substr($nplurals, 9, strpos($nplurals, ";") - 9);
  180. }
  181. else {
  182. return FALSE;
  183. }
  184. $plural = strstr($pluralforms, "plural=");
  185. if (strpos($plural, ";")) {
  186. // We want the string from the 8th char, because "plural=" length is 7.
  187. $plural = substr($plural, 7, strpos($plural, ";") - 7);
  188. }
  189. else {
  190. return FALSE;
  191. }
  192. // If the number of plurals is zero, we return a default result.
  193. if ($nplurals == 0) {
  194. return [$nplurals, ['default' => 0]];
  195. }
  196. // Calculate possible plural positions of different plural values. All known
  197. // plural formula's are repetitive above 100.
  198. // For data compression we store the last position the array value
  199. // changes and store it as default.
  200. $element_stack = $this->parseArithmetic($plural);
  201. if ($element_stack !== FALSE) {
  202. for ($i = 0; $i <= 199; $i++) {
  203. $plurals[$i] = $this->evaluatePlural($element_stack, $i);
  204. }
  205. $default = $plurals[$i - 1];
  206. $plurals = array_filter($plurals, function ($value) use ($default) {
  207. return ($value != $default);
  208. });
  209. $plurals['default'] = $default;
  210. return [$nplurals, $plurals];
  211. }
  212. else {
  213. throw new \Exception('The plural formula could not be parsed.');
  214. }
  215. }
  216. /**
  217. * Parses a Gettext Portable Object file header.
  218. *
  219. * @param string $header
  220. * A string containing the complete header.
  221. *
  222. * @return array
  223. * An associative array of key-value pairs.
  224. */
  225. private function parseHeader($header) {
  226. $header_parsed = [];
  227. $lines = array_map('trim', explode("\n", $header));
  228. foreach ($lines as $line) {
  229. if ($line) {
  230. list($tag, $contents) = explode(":", $line, 2);
  231. $header_parsed[trim($tag)] = trim($contents);
  232. }
  233. }
  234. return $header_parsed;
  235. }
  236. /**
  237. * Parses and sanitizes an arithmetic formula into a plural element stack.
  238. *
  239. * While parsing, we ensure, that the operators have the right
  240. * precedence and associativity.
  241. *
  242. * @param string $string
  243. * A string containing the arithmetic formula.
  244. *
  245. * @return
  246. * A stack of values and operations to be evaluated.
  247. */
  248. private function parseArithmetic($string) {
  249. // Operator precedence table.
  250. $precedence = ["(" => -1, ")" => -1, "?" => 1, ":" => 1, "||" => 3, "&&" => 4, "==" => 5, "!=" => 5, "<" => 6, ">" => 6, "<=" => 6, ">=" => 6, "+" => 7, "-" => 7, "*" => 8, "/" => 8, "%" => 8];
  251. // Right associativity.
  252. $right_associativity = ["?" => 1, ":" => 1];
  253. $tokens = $this->tokenizeFormula($string);
  254. // Parse by converting into infix notation then back into postfix
  255. // Operator stack - holds math operators and symbols.
  256. $operator_stack = [];
  257. // Element Stack - holds data to be operated on.
  258. $element_stack = [];
  259. foreach ($tokens as $token) {
  260. $current_token = $token;
  261. // Numbers and the $n variable are simply pushed into $element_stack.
  262. if (is_numeric($token)) {
  263. $element_stack[] = $current_token;
  264. }
  265. elseif ($current_token == "n") {
  266. $element_stack[] = '$n';
  267. }
  268. elseif ($current_token == "(") {
  269. $operator_stack[] = $current_token;
  270. }
  271. elseif ($current_token == ")") {
  272. $topop = array_pop($operator_stack);
  273. while (isset($topop) && ($topop != "(")) {
  274. $element_stack[] = $topop;
  275. $topop = array_pop($operator_stack);
  276. }
  277. }
  278. elseif (!empty($precedence[$current_token])) {
  279. // If it's an operator, then pop from $operator_stack into
  280. // $element_stack until the precedence in $operator_stack is less
  281. // than current, then push into $operator_stack.
  282. $topop = array_pop($operator_stack);
  283. while (isset($topop) && ($precedence[$topop] >= $precedence[$current_token]) && !(($precedence[$topop] == $precedence[$current_token]) && !empty($right_associativity[$topop]) && !empty($right_associativity[$current_token]))) {
  284. $element_stack[] = $topop;
  285. $topop = array_pop($operator_stack);
  286. }
  287. if ($topop) {
  288. // Return element to top.
  289. $operator_stack[] = $topop;
  290. }
  291. // Parentheses are not needed.
  292. $operator_stack[] = $current_token;
  293. }
  294. else {
  295. return FALSE;
  296. }
  297. }
  298. // Flush operator stack.
  299. $topop = array_pop($operator_stack);
  300. while ($topop != NULL) {
  301. $element_stack[] = $topop;
  302. $topop = array_pop($operator_stack);
  303. }
  304. $return = $element_stack;
  305. // Now validate stack.
  306. $previous_size = count($element_stack) + 1;
  307. while (count($element_stack) < $previous_size) {
  308. $previous_size = count($element_stack);
  309. for ($i = 2; $i < count($element_stack); $i++) {
  310. $op = $element_stack[$i];
  311. if (!empty($precedence[$op])) {
  312. if ($op == ":") {
  313. $f = $element_stack[$i - 2] . "):" . $element_stack[$i - 1] . ")";
  314. }
  315. elseif ($op == "?") {
  316. $f = "(" . $element_stack[$i - 2] . "?(" . $element_stack[$i - 1];
  317. }
  318. else {
  319. $f = "(" . $element_stack[$i - 2] . $op . $element_stack[$i - 1] . ")";
  320. }
  321. array_splice($element_stack, $i - 2, 3, $f);
  322. break;
  323. }
  324. }
  325. }
  326. // If only one element is left, the number of operators is appropriate.
  327. return count($element_stack) == 1 ? $return : FALSE;
  328. }
  329. /**
  330. * Tokenize the formula.
  331. *
  332. * @param string $formula
  333. * A string containing the arithmetic formula.
  334. *
  335. * @return array
  336. * List of arithmetic tokens identified in the formula.
  337. */
  338. private function tokenizeFormula($formula) {
  339. $formula = str_replace(" ", "", $formula);
  340. $tokens = [];
  341. for ($i = 0; $i < strlen($formula); $i++) {
  342. if (is_numeric($formula[$i])) {
  343. $num = $formula[$i];
  344. $j = $i + 1;
  345. while ($j < strlen($formula) && is_numeric($formula[$j])) {
  346. $num .= $formula[$j];
  347. $j++;
  348. }
  349. $i = $j - 1;
  350. $tokens[] = $num;
  351. }
  352. elseif ($pos = strpos(" =<>!&|", $formula[$i])) {
  353. $next = $formula[$i + 1];
  354. switch ($pos) {
  355. case 1:
  356. case 2:
  357. case 3:
  358. case 4:
  359. if ($next == '=') {
  360. $tokens[] = $formula[$i] . '=';
  361. $i++;
  362. }
  363. else {
  364. $tokens[] = $formula[$i];
  365. }
  366. break;
  367. case 5:
  368. if ($next == '&') {
  369. $tokens[] = '&&';
  370. $i++;
  371. }
  372. else {
  373. $tokens[] = $formula[$i];
  374. }
  375. break;
  376. case 6:
  377. if ($next == '|') {
  378. $tokens[] = '||';
  379. $i++;
  380. }
  381. else {
  382. $tokens[] = $formula[$i];
  383. }
  384. break;
  385. }
  386. }
  387. else {
  388. $tokens[] = $formula[$i];
  389. }
  390. }
  391. return $tokens;
  392. }
  393. /**
  394. * Evaluate the plural element stack using a plural value.
  395. *
  396. * Using an element stack, which represents a plural formula, we calculate
  397. * which plural string should be used for a given plural value.
  398. *
  399. * An example of plural formula parting and evaluation:
  400. * Plural formula: 'n!=1'
  401. * This formula is parsed by parseArithmetic() to a stack (array) of elements:
  402. * array(
  403. * 0 => '$n',
  404. * 1 => '1',
  405. * 2 => '!=',
  406. * );
  407. * The evaluatePlural() method evaluates the $element_stack using the plural
  408. * value $n. Before the actual evaluation, the '$n' in the array is replaced
  409. * by the value of $n.
  410. * For example: $n = 2 results in:
  411. * array(
  412. * 0 => '2',
  413. * 1 => '1',
  414. * 2 => '!=',
  415. * );
  416. * The stack is processed until only one element is (the result) is left. In
  417. * every iteration the top elements of the stack, up until the first operator,
  418. * are evaluated. After evaluation the arguments and the operator itself are
  419. * removed and replaced by the evaluation result. This is typically 2
  420. * arguments and 1 element for the operator.
  421. * Because the operator is '!=' the example stack is evaluated as:
  422. * $f = (int) 2 != 1;
  423. * The resulting stack is:
  424. * array(
  425. * 0 => 1,
  426. * );
  427. * With only one element left in the stack (the final result) the loop is
  428. * terminated and the result is returned.
  429. *
  430. * @param array $element_stack
  431. * Array of plural formula values and operators create by parseArithmetic().
  432. * @param int $n
  433. * The @count number for which we are determining the right plural position.
  434. *
  435. * @return int
  436. * Number of the plural string to be used for the given plural value.
  437. *
  438. * @see parseArithmetic()
  439. * @throws \Exception
  440. */
  441. protected function evaluatePlural($element_stack, $n) {
  442. $count = count($element_stack);
  443. $limit = $count;
  444. // Replace the '$n' value in the formula by the plural value.
  445. for ($i = 0; $i < $count; $i++) {
  446. if ($element_stack[$i] === '$n') {
  447. $element_stack[$i] = $n;
  448. }
  449. }
  450. // We process the stack until only one element is (the result) is left.
  451. // We limit the number of evaluation cycles to prevent an endless loop in
  452. // case the stack contains an error.
  453. while (isset($element_stack[1])) {
  454. for ($i = 2; $i < $count; $i++) {
  455. // There's no point in checking non-symbols. Also, switch(TRUE) would
  456. // match any case and so it would break.
  457. if (is_bool($element_stack[$i]) || is_numeric($element_stack[$i])) {
  458. continue;
  459. }
  460. $f = NULL;
  461. $length = 3;
  462. $delta = 2;
  463. switch ($element_stack[$i]) {
  464. case '==':
  465. $f = $element_stack[$i - 2] == $element_stack[$i - 1];
  466. break;
  467. case '!=':
  468. $f = $element_stack[$i - 2] != $element_stack[$i - 1];
  469. break;
  470. case '<=':
  471. $f = $element_stack[$i - 2] <= $element_stack[$i - 1];
  472. break;
  473. case '>=':
  474. $f = $element_stack[$i - 2] >= $element_stack[$i - 1];
  475. break;
  476. case '<':
  477. $f = $element_stack[$i - 2] < $element_stack[$i - 1];
  478. break;
  479. case '>':
  480. $f = $element_stack[$i - 2] > $element_stack[$i - 1];
  481. break;
  482. case '+':
  483. $f = $element_stack[$i - 2] + $element_stack[$i - 1];
  484. break;
  485. case '-':
  486. $f = $element_stack[$i - 2] - $element_stack[$i - 1];
  487. break;
  488. case '*':
  489. $f = $element_stack[$i - 2] * $element_stack[$i - 1];
  490. break;
  491. case '/':
  492. $f = $element_stack[$i - 2] / $element_stack[$i - 1];
  493. break;
  494. case '%':
  495. $f = $element_stack[$i - 2] % $element_stack[$i - 1];
  496. break;
  497. case '&&':
  498. $f = $element_stack[$i - 2] && $element_stack[$i - 1];
  499. break;
  500. case '||':
  501. $f = $element_stack[$i - 2] || $element_stack[$i - 1];
  502. break;
  503. case ':':
  504. $f = $element_stack[$i - 3] ? $element_stack[$i - 2] : $element_stack[$i - 1];
  505. // This operator has 3 preceding elements, instead of the default 2.
  506. $length = 5;
  507. $delta = 3;
  508. break;
  509. }
  510. // If the element is an operator we remove the processed elements and
  511. // store the result.
  512. if (isset($f)) {
  513. array_splice($element_stack, $i - $delta, $length, $f);
  514. break;
  515. }
  516. }
  517. }
  518. if (!$limit) {
  519. throw new \Exception('The plural formula could not be evaluated.');
  520. }
  521. return (int) $element_stack[0];
  522. }
  523. }