PoStreamReader.php 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596
  1. <?php
  2. namespace Drupal\Component\Gettext;
  3. use Drupal\Component\Render\FormattableMarkup;
  4. /**
  5. * Implements Gettext PO stream reader.
  6. *
  7. * The PO file format parsing is implemented according to the documentation at
  8. * http://www.gnu.org/software/gettext/manual/gettext.html#PO-Files
  9. */
  10. class PoStreamReader implements PoStreamInterface, PoReaderInterface {
  11. /**
  12. * Source line number of the stream being parsed.
  13. *
  14. * @var int
  15. */
  16. protected $lineNumber = 0;
  17. /**
  18. * Parser context for the stream reader state machine.
  19. *
  20. * Possible contexts are:
  21. * - 'COMMENT' (#)
  22. * - 'MSGID' (msgid)
  23. * - 'MSGID_PLURAL' (msgid_plural)
  24. * - 'MSGCTXT' (msgctxt)
  25. * - 'MSGSTR' (msgstr or msgstr[])
  26. * - 'MSGSTR_ARR' (msgstr_arg)
  27. *
  28. * @var string
  29. */
  30. protected $context = 'COMMENT';
  31. /**
  32. * Current entry being read. Incomplete.
  33. *
  34. * @var array
  35. */
  36. protected $currentItem = [];
  37. /**
  38. * Current plural index for plural translations.
  39. *
  40. * @var int
  41. */
  42. protected $currentPluralIndex = 0;
  43. /**
  44. * URI of the PO stream that is being read.
  45. *
  46. * @var string
  47. */
  48. protected $uri = '';
  49. /**
  50. * Language code for the PO stream being read.
  51. *
  52. * @var string
  53. */
  54. protected $langcode = NULL;
  55. /**
  56. * File handle of the current PO stream.
  57. *
  58. * @var resource
  59. */
  60. protected $fd;
  61. /**
  62. * The PO stream header.
  63. *
  64. * @var \Drupal\Component\Gettext\PoHeader
  65. */
  66. protected $header;
  67. /**
  68. * Object wrapper for the last read source/translation pair.
  69. *
  70. * @var \Drupal\Component\Gettext\PoItem
  71. */
  72. protected $lastItem;
  73. /**
  74. * Indicator of whether the stream reading is finished.
  75. *
  76. * @var bool
  77. */
  78. protected $finished;
  79. /**
  80. * Array of translated error strings recorded on reading this stream so far.
  81. *
  82. * @var array
  83. */
  84. protected $errors;
  85. /**
  86. * {@inheritdoc}
  87. */
  88. public function getLangcode() {
  89. return $this->langcode;
  90. }
  91. /**
  92. * {@inheritdoc}
  93. */
  94. public function setLangcode($langcode) {
  95. $this->langcode = $langcode;
  96. }
  97. /**
  98. * {@inheritdoc}
  99. */
  100. public function getHeader() {
  101. return $this->header;
  102. }
  103. /**
  104. * Implements Drupal\Component\Gettext\PoMetadataInterface::setHeader().
  105. *
  106. * Not applicable to stream reading and therefore not implemented.
  107. */
  108. public function setHeader(PoHeader $header) {
  109. }
  110. /**
  111. * {@inheritdoc}
  112. */
  113. public function getURI() {
  114. return $this->uri;
  115. }
  116. /**
  117. * {@inheritdoc}
  118. */
  119. public function setURI($uri) {
  120. $this->uri = $uri;
  121. }
  122. /**
  123. * Implements Drupal\Component\Gettext\PoStreamInterface::open().
  124. *
  125. * Opens the stream and reads the header. The stream is ready for reading
  126. * items after.
  127. *
  128. * @throws \Exception
  129. * If the URI is not yet set.
  130. */
  131. public function open() {
  132. if (!empty($this->uri)) {
  133. $this->fd = fopen($this->uri, 'rb');
  134. $this->readHeader();
  135. }
  136. else {
  137. throw new \Exception('Cannot open stream without URI set.');
  138. }
  139. }
  140. /**
  141. * Implements Drupal\Component\Gettext\PoStreamInterface::close().
  142. *
  143. * @throws \Exception
  144. * If the stream is not open.
  145. */
  146. public function close() {
  147. if ($this->fd) {
  148. fclose($this->fd);
  149. }
  150. else {
  151. throw new \Exception('Cannot close stream that is not open.');
  152. }
  153. }
  154. /**
  155. * {@inheritdoc}
  156. */
  157. public function readItem() {
  158. // Clear out the last item.
  159. $this->lastItem = NULL;
  160. // Read until finished with the stream or a complete item was identified.
  161. while (!$this->finished && is_null($this->lastItem)) {
  162. $this->readLine();
  163. }
  164. return $this->lastItem;
  165. }
  166. /**
  167. * Sets the seek position for the current PO stream.
  168. *
  169. * @param int $seek
  170. * The new seek position to set.
  171. */
  172. public function setSeek($seek) {
  173. fseek($this->fd, $seek);
  174. }
  175. /**
  176. * Gets the pointer position of the current PO stream.
  177. */
  178. public function getSeek() {
  179. return ftell($this->fd);
  180. }
  181. /**
  182. * Read the header from the PO stream.
  183. *
  184. * The header is a special case PoItem, using the empty string as source and
  185. * key-value pairs as translation. We just reuse the item reader logic to
  186. * read the header.
  187. */
  188. private function readHeader() {
  189. $item = $this->readItem();
  190. // Handle the case properly when the .po file is empty (0 bytes).
  191. if (!$item) {
  192. return;
  193. }
  194. $header = new PoHeader();
  195. $header->setFromString(trim($item->getTranslation()));
  196. $this->header = $header;
  197. }
  198. /**
  199. * Reads a line from the PO stream and stores data internally.
  200. *
  201. * Expands $this->current_item based on new data for the current item. If
  202. * this line ends the current item, it is saved with setItemFromArray() with
  203. * data from $this->current_item.
  204. *
  205. * An internal state machine is maintained in this reader using
  206. * $this->context as the reading state. PO items are in between COMMENT
  207. * states (when items have at least one line or comment in between them) or
  208. * indicated by MSGSTR or MSGSTR_ARR followed immediately by an MSGID or
  209. * MSGCTXT (when items closely follow each other).
  210. *
  211. * @return
  212. * FALSE if an error was logged, NULL otherwise. The errors are considered
  213. * non-blocking, so reading can continue, while the errors are collected
  214. * for later presentation.
  215. */
  216. private function readLine() {
  217. // Read a line and set the stream finished indicator if it was not
  218. // possible anymore.
  219. $line = fgets($this->fd);
  220. $this->finished = ($line === FALSE);
  221. if (!$this->finished) {
  222. if ($this->lineNumber == 0) {
  223. // The first line might come with a UTF-8 BOM, which should be removed.
  224. $line = str_replace("\xEF\xBB\xBF", '', $line);
  225. // Current plurality for 'msgstr[]'.
  226. $this->currentPluralIndex = 0;
  227. }
  228. // Track the line number for error reporting.
  229. $this->lineNumber++;
  230. // Initialize common values for error logging.
  231. $log_vars = [
  232. '%uri' => $this->getURI(),
  233. '%line' => $this->lineNumber,
  234. ];
  235. // Trim away the linefeed. \\n might appear at the end of the string if
  236. // another line continuing the same string follows. We can remove that.
  237. $line = trim(strtr($line, ["\\\n" => ""]));
  238. if (!strncmp('#', $line, 1)) {
  239. // Lines starting with '#' are comments.
  240. if ($this->context == 'COMMENT') {
  241. // Already in comment context, add to current comment.
  242. $this->currentItem['#'][] = substr($line, 1);
  243. }
  244. elseif (($this->context == 'MSGSTR') || ($this->context == 'MSGSTR_ARR')) {
  245. // We are currently in string context, save current item.
  246. $this->setItemFromArray($this->currentItem);
  247. // Start a new entry for the comment.
  248. $this->currentItem = [];
  249. $this->currentItem['#'][] = substr($line, 1);
  250. $this->context = 'COMMENT';
  251. return;
  252. }
  253. else {
  254. // A comment following any other context is a syntax error.
  255. $this->errors[] = new FormattableMarkup('The translation stream %uri contains an error: "msgstr" was expected but not found on line %line.', $log_vars);
  256. return FALSE;
  257. }
  258. return;
  259. }
  260. elseif (!strncmp('msgid_plural', $line, 12)) {
  261. // A plural form for the current source string.
  262. if ($this->context != 'MSGID') {
  263. // A plural form can only be added to an msgid directly.
  264. $this->errors[] = new FormattableMarkup('The translation stream %uri contains an error: "msgid_plural" was expected but not found on line %line.', $log_vars);
  265. return FALSE;
  266. }
  267. // Remove 'msgid_plural' and trim away whitespace.
  268. $line = trim(substr($line, 12));
  269. // Only the plural source string is left, parse it.
  270. $quoted = $this->parseQuoted($line);
  271. if ($quoted === FALSE) {
  272. // The plural form must be wrapped in quotes.
  273. $this->errors[] = new FormattableMarkup('The translation stream %uri contains a syntax error on line %line.', $log_vars);
  274. return FALSE;
  275. }
  276. // Append the plural source to the current entry.
  277. if (is_string($this->currentItem['msgid'])) {
  278. // The first value was stored as string. Now we know the context is
  279. // plural, it is converted to array.
  280. $this->currentItem['msgid'] = [$this->currentItem['msgid']];
  281. }
  282. $this->currentItem['msgid'][] = $quoted;
  283. $this->context = 'MSGID_PLURAL';
  284. return;
  285. }
  286. elseif (!strncmp('msgid', $line, 5)) {
  287. // Starting a new message.
  288. if (($this->context == 'MSGSTR') || ($this->context == 'MSGSTR_ARR')) {
  289. // We are currently in string context, save current item.
  290. $this->setItemFromArray($this->currentItem);
  291. // Start a new context for the msgid.
  292. $this->currentItem = [];
  293. }
  294. elseif ($this->context == 'MSGID') {
  295. // We are currently already in the context, meaning we passed an id with no data.
  296. $this->errors[] = new FormattableMarkup('The translation stream %uri contains an error: "msgid" is unexpected on line %line.', $log_vars);
  297. return FALSE;
  298. }
  299. // Remove 'msgid' and trim away whitespace.
  300. $line = trim(substr($line, 5));
  301. // Only the message id string is left, parse it.
  302. $quoted = $this->parseQuoted($line);
  303. if ($quoted === FALSE) {
  304. // The message id must be wrapped in quotes.
  305. $this->errors[] = new FormattableMarkup('The translation stream %uri contains an error: invalid format for "msgid" on line %line.', $log_vars, $log_vars);
  306. return FALSE;
  307. }
  308. $this->currentItem['msgid'] = $quoted;
  309. $this->context = 'MSGID';
  310. return;
  311. }
  312. elseif (!strncmp('msgctxt', $line, 7)) {
  313. // Starting a new context.
  314. if (($this->context == 'MSGSTR') || ($this->context == 'MSGSTR_ARR')) {
  315. // We are currently in string context, save current item.
  316. $this->setItemFromArray($this->currentItem);
  317. $this->currentItem = [];
  318. }
  319. elseif (!empty($this->currentItem['msgctxt'])) {
  320. // A context cannot apply to another context.
  321. $this->errors[] = new FormattableMarkup('The translation stream %uri contains an error: "msgctxt" is unexpected on line %line.', $log_vars);
  322. return FALSE;
  323. }
  324. // Remove 'msgctxt' and trim away whitespaces.
  325. $line = trim(substr($line, 7));
  326. // Only the msgctxt string is left, parse it.
  327. $quoted = $this->parseQuoted($line);
  328. if ($quoted === FALSE) {
  329. // The context string must be quoted.
  330. $this->errors[] = new FormattableMarkup('The translation stream %uri contains an error: invalid format for "msgctxt" on line %line.', $log_vars);
  331. return FALSE;
  332. }
  333. $this->currentItem['msgctxt'] = $quoted;
  334. $this->context = 'MSGCTXT';
  335. return;
  336. }
  337. elseif (!strncmp('msgstr[', $line, 7)) {
  338. // A message string for a specific plurality.
  339. if (($this->context != 'MSGID') &&
  340. ($this->context != 'MSGCTXT') &&
  341. ($this->context != 'MSGID_PLURAL') &&
  342. ($this->context != 'MSGSTR_ARR')) {
  343. // Plural message strings must come after msgid, msgctxt,
  344. // msgid_plural, or other msgstr[] entries.
  345. $this->errors[] = new FormattableMarkup('The translation stream %uri contains an error: "msgstr[]" is unexpected on line %line.', $log_vars);
  346. return FALSE;
  347. }
  348. // Ensure the plurality is terminated.
  349. if (strpos($line, ']') === FALSE) {
  350. $this->errors[] = new FormattableMarkup('The translation stream %uri contains an error: invalid format for "msgstr[]" on line %line.', $log_vars);
  351. return FALSE;
  352. }
  353. // Extract the plurality.
  354. $frombracket = strstr($line, '[');
  355. $this->currentPluralIndex = substr($frombracket, 1, strpos($frombracket, ']') - 1);
  356. // Skip to the next whitespace and trim away any further whitespace,
  357. // bringing $line to the message text only.
  358. $line = trim(strstr($line, " "));
  359. $quoted = $this->parseQuoted($line);
  360. if ($quoted === FALSE) {
  361. // The string must be quoted.
  362. $this->errors[] = new FormattableMarkup('The translation stream %uri contains an error: invalid format for "msgstr[]" on line %line.', $log_vars);
  363. return FALSE;
  364. }
  365. if (!isset($this->currentItem['msgstr']) || !is_array($this->currentItem['msgstr'])) {
  366. $this->currentItem['msgstr'] = [];
  367. }
  368. $this->currentItem['msgstr'][$this->currentPluralIndex] = $quoted;
  369. $this->context = 'MSGSTR_ARR';
  370. return;
  371. }
  372. elseif (!strncmp("msgstr", $line, 6)) {
  373. // A string pair for an msgid (with optional context).
  374. if (($this->context != 'MSGID') && ($this->context != 'MSGCTXT')) {
  375. // Strings are only valid within an id or context scope.
  376. $this->errors[] = new FormattableMarkup('The translation stream %uri contains an error: "msgstr" is unexpected on line %line.', $log_vars);
  377. return FALSE;
  378. }
  379. // Remove 'msgstr' and trim away away whitespaces.
  380. $line = trim(substr($line, 6));
  381. // Only the msgstr string is left, parse it.
  382. $quoted = $this->parseQuoted($line);
  383. if ($quoted === FALSE) {
  384. // The string must be quoted.
  385. $this->errors[] = new FormattableMarkup('The translation stream %uri contains an error: invalid format for "msgstr" on line %line.', $log_vars);
  386. return FALSE;
  387. }
  388. $this->currentItem['msgstr'] = $quoted;
  389. $this->context = 'MSGSTR';
  390. return;
  391. }
  392. elseif ($line != '') {
  393. // Anything that is not a token may be a continuation of a previous token.
  394. $quoted = $this->parseQuoted($line);
  395. if ($quoted === FALSE) {
  396. // This string must be quoted.
  397. $this->errors[] = new FormattableMarkup('The translation stream %uri contains an error: string continuation expected on line %line.', $log_vars);
  398. return FALSE;
  399. }
  400. // Append the string to the current item.
  401. if (($this->context == 'MSGID') || ($this->context == 'MSGID_PLURAL')) {
  402. if (is_array($this->currentItem['msgid'])) {
  403. // Add string to last array element for plural sources.
  404. $last_index = count($this->currentItem['msgid']) - 1;
  405. $this->currentItem['msgid'][$last_index] .= $quoted;
  406. }
  407. else {
  408. // Singular source, just append the string.
  409. $this->currentItem['msgid'] .= $quoted;
  410. }
  411. }
  412. elseif ($this->context == 'MSGCTXT') {
  413. // Multiline context name.
  414. $this->currentItem['msgctxt'] .= $quoted;
  415. }
  416. elseif ($this->context == 'MSGSTR') {
  417. // Multiline translation string.
  418. $this->currentItem['msgstr'] .= $quoted;
  419. }
  420. elseif ($this->context == 'MSGSTR_ARR') {
  421. // Multiline plural translation string.
  422. $this->currentItem['msgstr'][$this->currentPluralIndex] .= $quoted;
  423. }
  424. else {
  425. // No valid context to append to.
  426. $this->errors[] = new FormattableMarkup('The translation stream %uri contains an error: unexpected string on line %line.', $log_vars);
  427. return FALSE;
  428. }
  429. return;
  430. }
  431. }
  432. // Empty line read or EOF of PO stream, close out the last entry.
  433. if (($this->context == 'MSGSTR') || ($this->context == 'MSGSTR_ARR')) {
  434. $this->setItemFromArray($this->currentItem);
  435. $this->currentItem = [];
  436. }
  437. elseif ($this->context != 'COMMENT') {
  438. $this->errors[] = new FormattableMarkup('The translation stream %uri ended unexpectedly at line %line.', $log_vars);
  439. return FALSE;
  440. }
  441. return;
  442. }
  443. /**
  444. * Store the parsed values as a PoItem object.
  445. */
  446. public function setItemFromArray($value) {
  447. $plural = FALSE;
  448. $comments = '';
  449. if (isset($value['#'])) {
  450. $comments = $this->shortenComments($value['#']);
  451. }
  452. if (is_array($value['msgstr'])) {
  453. // Sort plural variants by their form index.
  454. ksort($value['msgstr']);
  455. $plural = TRUE;
  456. }
  457. $item = new PoItem();
  458. $item->setContext(isset($value['msgctxt']) ? $value['msgctxt'] : '');
  459. $item->setSource($value['msgid']);
  460. $item->setTranslation($value['msgstr']);
  461. $item->setPlural($plural);
  462. $item->setComment($comments);
  463. $item->setLangcode($this->langcode);
  464. $this->lastItem = $item;
  465. $this->context = 'COMMENT';
  466. }
  467. /**
  468. * Parses a string in quotes.
  469. *
  470. * @param $string
  471. * A string specified with enclosing quotes.
  472. *
  473. * @return
  474. * The string parsed from inside the quotes.
  475. */
  476. public function parseQuoted($string) {
  477. if (substr($string, 0, 1) != substr($string, -1, 1)) {
  478. // Start and end quotes must be the same.
  479. return FALSE;
  480. }
  481. $quote = substr($string, 0, 1);
  482. $string = substr($string, 1, -1);
  483. if ($quote == '"') {
  484. // Double quotes: strip slashes.
  485. return stripcslashes($string);
  486. }
  487. elseif ($quote == "'") {
  488. // Simple quote: return as-is.
  489. return $string;
  490. }
  491. else {
  492. // Unrecognized quote.
  493. return FALSE;
  494. }
  495. }
  496. /**
  497. * Generates a short, one-string version of the passed comment array.
  498. *
  499. * @param $comment
  500. * An array of strings containing a comment.
  501. *
  502. * @return
  503. * Short one-string version of the comment.
  504. */
  505. private function shortenComments($comment) {
  506. $comm = '';
  507. while (count($comment)) {
  508. $test = $comm . substr(array_shift($comment), 1) . ', ';
  509. if (strlen($comm) < 130) {
  510. $comm = $test;
  511. }
  512. else {
  513. break;
  514. }
  515. }
  516. return trim(substr($comm, 0, -2));
  517. }
  518. }