PoHeader.php 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423
  1. <?php
  2. /**
  3. * @file
  4. * Definition of Drupal\Component\Gettext\PoHeader.
  5. */
  6. /**
  7. * Gettext PO header handler.
  8. *
  9. * Possible Gettext PO header elements are explained in
  10. * http://www.gnu.org/software/gettext/manual/gettext.html#Header-Entry,
  11. * but we only support a subset of these directly.
  12. *
  13. * Example header:
  14. *
  15. * "Project-Id-Version: Drupal core (7.11)\n"
  16. * "POT-Creation-Date: 2012-02-12 22:59+0000\n"
  17. * "PO-Revision-Date: YYYY-mm-DD HH:MM+ZZZZ\n"
  18. * "Language-Team: Catalan\n"
  19. * "MIME-Version: 1.0\n"
  20. * "Content-Type: text/plain; charset=utf-8\n"
  21. * "Content-Transfer-Encoding: 8bit\n"
  22. * "Plural-Forms: nplurals=2; plural=(n>1);\n"
  23. */
  24. class PoHeader {
  25. /**
  26. * Language code.
  27. *
  28. * @var string
  29. */
  30. private $_langcode;
  31. /**
  32. * Formula for the plural form.
  33. *
  34. * @var string
  35. */
  36. private $_pluralForms;
  37. /**
  38. * Author(s) of the file.
  39. *
  40. * @var string
  41. */
  42. private $_authors;
  43. /**
  44. * Date the po file got created.
  45. *
  46. * @var string
  47. */
  48. private $_po_date;
  49. /**
  50. * Human readable language name.
  51. *
  52. * @var string
  53. */
  54. private $_languageName;
  55. /**
  56. * Name of the project the translation belongs to.
  57. *
  58. * @var string
  59. */
  60. private $_projectName;
  61. /**
  62. * Constructor, creates a PoHeader with default values.
  63. *
  64. * @param string $langcode
  65. * Language code.
  66. */
  67. public function __construct($langcode = NULL) {
  68. $this->_langcode = $langcode;
  69. // Ignore errors when run during site installation before
  70. // date_default_timezone_set() is called.
  71. $this->_po_date = @date("Y-m-d H:iO");
  72. $this->_pluralForms = 'nplurals=2; plural=(n > 1);';
  73. }
  74. /**
  75. * Get the plural form.
  76. *
  77. * @return string
  78. * Plural form component from the header, for example:
  79. * 'nplurals=2; plural=(n > 1);'.
  80. */
  81. public function getPluralForms() {
  82. return $this->_pluralForms;
  83. }
  84. /**
  85. * Set the human readable language name.
  86. *
  87. * @param string $languageName
  88. * Human readable language name.
  89. */
  90. public function setLanguageName($languageName) {
  91. $this->_languageName = $languageName;
  92. }
  93. /**
  94. * Get the human readable language name.
  95. *
  96. * @return string
  97. * The human readable language name.
  98. */
  99. public function getLanguageName() {
  100. return $this->_languageName;
  101. }
  102. /**
  103. * Set the project name.
  104. *
  105. * @param string $projectName
  106. * Human readable project name.
  107. */
  108. public function setProjectName($projectName) {
  109. $this->_projectName = $projectName;
  110. }
  111. /**
  112. * Get the project name.
  113. *
  114. * @return string
  115. * The human readable project name.
  116. */
  117. public function getProjectName() {
  118. return $this->_projectName;
  119. }
  120. /**
  121. * Populate internal values from a string.
  122. *
  123. * @param string $header
  124. * Full header string with key-value pairs.
  125. */
  126. public function setFromString($header) {
  127. // Get an array of all header values for processing.
  128. $values = $this->parseHeader($header);
  129. // There is only one value relevant for our header implementation when
  130. // reading, and that is the plural formula.
  131. if (!empty($values['Plural-Forms'])) {
  132. $this->_pluralForms = $values['Plural-Forms'];
  133. }
  134. }
  135. /**
  136. * Generate a Gettext PO formatted header string based on data set earlier.
  137. */
  138. public function __toString() {
  139. $output = '';
  140. $isTemplate = empty($this->_languageName);
  141. $output .= '# ' . ($isTemplate ? 'LANGUAGE' : $this->_languageName) . ' translation of ' . ($isTemplate ? 'PROJECT' : $this->_projectName) . "\n";
  142. if (!empty($this->_authors)) {
  143. $output .= '# Generated by ' . implode("\n# ", $this->_authors) . "\n";
  144. }
  145. $output .= "#\n";
  146. // Add the actual header information.
  147. $output .= "msgid \"\"\n";
  148. $output .= "msgstr \"\"\n";
  149. $output .= "\"Project-Id-Version: PROJECT VERSION\\n\"\n";
  150. $output .= "\"POT-Creation-Date: " . $this->_po_date . "\\n\"\n";
  151. $output .= "\"PO-Revision-Date: " . $this->_po_date . "\\n\"\n";
  152. $output .= "\"Last-Translator: NAME <EMAIL@ADDRESS>\\n\"\n";
  153. $output .= "\"Language-Team: LANGUAGE <EMAIL@ADDRESS>\\n\"\n";
  154. $output .= "\"MIME-Version: 1.0\\n\"\n";
  155. $output .= "\"Content-Type: text/plain; charset=utf-8\\n\"\n";
  156. $output .= "\"Content-Transfer-Encoding: 8bit\\n\"\n";
  157. $output .= "\"Plural-Forms: " . $this->_pluralForms . "\\n\"\n";
  158. $output .= "\n";
  159. return $output;
  160. }
  161. /**
  162. * Parses a Plural-Forms entry from a Gettext Portable Object file header.
  163. *
  164. * @param string $pluralforms
  165. * The Plural-Forms entry value.
  166. *
  167. * @return array|boolean
  168. * An array containing the number of plural forms and the converted version
  169. * of the formula that can be evaluated with PHP later.
  170. *
  171. * @throws \Exception
  172. * Throws exception in case plural formula could not be parsed.
  173. */
  174. public function parsePluralForms($pluralforms) {
  175. // First, delete all whitespace.
  176. $pluralforms = strtr($pluralforms, array(" " => "", "\t" => ""));
  177. // Select the parts that define nplurals and plural.
  178. $nplurals = strstr($pluralforms, "nplurals=");
  179. if (strpos($nplurals, ";")) {
  180. // We want the string from the 10th char, because "nplurals=" length is 9.
  181. $nplurals = substr($nplurals, 9, strpos($nplurals, ";") - 9);
  182. }
  183. else {
  184. return FALSE;
  185. }
  186. $plural = strstr($pluralforms, "plural=");
  187. if (strpos($plural, ";")) {
  188. // We want the string from the 8th char, because "plural=" length is 7.
  189. $plural = substr($plural, 7, strpos($plural, ";") - 7);
  190. }
  191. else {
  192. return FALSE;
  193. }
  194. // Get PHP version of the plural formula.
  195. $plural = $this->parseArithmetic($plural);
  196. if ($plural !== FALSE) {
  197. return array($nplurals, $plural);
  198. }
  199. else {
  200. throw new Exception('The plural formula could not be parsed.');
  201. }
  202. }
  203. /**
  204. * Parses a Gettext Portable Object file header.
  205. *
  206. * @param string $header
  207. * A string containing the complete header.
  208. *
  209. * @return array
  210. * An associative array of key-value pairs.
  211. */
  212. private function parseHeader($header) {
  213. $header_parsed = array();
  214. $lines = array_map('trim', explode("\n", $header));
  215. foreach ($lines as $line) {
  216. if ($line) {
  217. list($tag, $contents) = explode(":", $line, 2);
  218. $header_parsed[trim($tag)] = trim($contents);
  219. }
  220. }
  221. return $header_parsed;
  222. }
  223. /**
  224. * Parses and sanitizes an arithmetic formula into a PHP expression.
  225. *
  226. * While parsing, we ensure, that the operators have the right
  227. * precedence and associativity.
  228. *
  229. * @param string $string
  230. * A string containing the arithmetic formula.
  231. *
  232. * @return string|FALSE
  233. * A version of the formula to evaluate with PHP later.
  234. */
  235. private function parseArithmetic($string) {
  236. // Operator precedence table.
  237. $precedence = array("(" => -1, ")" => -1, "?" => 1, ":" => 1, "||" => 3, "&&" => 4, "==" => 5, "!=" => 5, "<" => 6, ">" => 6, "<=" => 6, ">=" => 6, "+" => 7, "-" => 7, "*" => 8, "/" => 8, "%" => 8);
  238. // Right associativity.
  239. $right_associativity = array("?" => 1, ":" => 1);
  240. $tokens = $this->tokenizeFormula($string);
  241. // Parse by converting into infix notation then back into postfix
  242. // Operator stack - holds math operators and symbols.
  243. $operator_stack = array();
  244. // Element Stack - holds data to be operated on.
  245. $element_stack = array();
  246. foreach ($tokens as $token) {
  247. $current_token = $token;
  248. // Numbers and the $n variable are simply pushed into $element_stack.
  249. if (is_numeric($token)) {
  250. $element_stack[] = $current_token;
  251. }
  252. elseif ($current_token == "n") {
  253. $element_stack[] = '$n';
  254. }
  255. elseif ($current_token == "(") {
  256. $operator_stack[] = $current_token;
  257. }
  258. elseif ($current_token == ")") {
  259. $topop = array_pop($operator_stack);
  260. while (isset($topop) && ($topop != "(")) {
  261. $element_stack[] = $topop;
  262. $topop = array_pop($operator_stack);
  263. }
  264. }
  265. elseif (!empty($precedence[$current_token])) {
  266. // If it's an operator, then pop from $operator_stack into
  267. // $element_stack until the precedence in $operator_stack is less
  268. // than current, then push into $operator_stack.
  269. $topop = array_pop($operator_stack);
  270. while (isset($topop) && ($precedence[$topop] >= $precedence[$current_token]) && !(($precedence[$topop] == $precedence[$current_token]) && !empty($right_associativity[$topop]) && !empty($right_associativity[$current_token]))) {
  271. $element_stack[] = $topop;
  272. $topop = array_pop($operator_stack);
  273. }
  274. if ($topop) {
  275. // Return element to top.
  276. $operator_stack[] = $topop;
  277. }
  278. // Parentheses are not needed.
  279. $operator_stack[] = $current_token;
  280. }
  281. else {
  282. return FALSE;
  283. }
  284. }
  285. // Flush operator stack.
  286. $topop = array_pop($operator_stack);
  287. while ($topop != NULL) {
  288. $element_stack[] = $topop;
  289. $topop = array_pop($operator_stack);
  290. }
  291. // Now extract formula from stack.
  292. $previous_size = count($element_stack) + 1;
  293. while (count($element_stack) < $previous_size) {
  294. $previous_size = count($element_stack);
  295. for ($i = 2; $i < count($element_stack); $i++) {
  296. $op = $element_stack[$i];
  297. if (!empty($precedence[$op])) {
  298. $f = "";
  299. if ($op == ":") {
  300. $f = $element_stack[$i - 2] . "):" . $element_stack[$i - 1] . ")";
  301. }
  302. elseif ($op == "?") {
  303. $f = "(" . $element_stack[$i - 2] . "?(" . $element_stack[$i - 1];
  304. }
  305. else {
  306. $f = "(" . $element_stack[$i - 2] . $op . $element_stack[$i - 1] . ")";
  307. }
  308. array_splice($element_stack, $i - 2, 3, $f);
  309. break;
  310. }
  311. }
  312. }
  313. // If only one element is left, the number of operators is appropriate.
  314. if (count($element_stack) == 1) {
  315. return $element_stack[0];
  316. }
  317. else {
  318. return FALSE;
  319. }
  320. }
  321. /**
  322. * Tokenize the formula.
  323. *
  324. * @param string $formula
  325. * A string containing the arithmetic formula.
  326. *
  327. * @return array
  328. * List of arithmetic tokens identified in the formula.
  329. */
  330. private function tokenizeFormula($formula) {
  331. $formula = str_replace(" ", "", $formula);
  332. $tokens = array();
  333. for ($i = 0; $i < strlen($formula); $i++) {
  334. if (is_numeric($formula[$i])) {
  335. $num = $formula[$i];
  336. $j = $i + 1;
  337. while ($j < strlen($formula) && is_numeric($formula[$j])) {
  338. $num .= $formula[$j];
  339. $j++;
  340. }
  341. $i = $j - 1;
  342. $tokens[] = $num;
  343. }
  344. elseif ($pos = strpos(" =<>!&|", $formula[$i])) {
  345. $next = $formula[$i + 1];
  346. switch ($pos) {
  347. case 1:
  348. case 2:
  349. case 3:
  350. case 4:
  351. if ($next == '=') {
  352. $tokens[] = $formula[$i] . '=';
  353. $i++;
  354. }
  355. else {
  356. $tokens[] = $formula[$i];
  357. }
  358. break;
  359. case 5:
  360. if ($next == '&') {
  361. $tokens[] = '&&';
  362. $i++;
  363. }
  364. else {
  365. $tokens[] = $formula[$i];
  366. }
  367. break;
  368. case 6:
  369. if ($next == '|') {
  370. $tokens[] = '||';
  371. $i++;
  372. }
  373. else {
  374. $tokens[] = $formula[$i];
  375. }
  376. break;
  377. }
  378. }
  379. else {
  380. $tokens[] = $formula[$i];
  381. }
  382. }
  383. return $tokens;
  384. }
  385. }