123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596 |
- <?php
- namespace Drupal\Component\Gettext;
- use Drupal\Component\Render\FormattableMarkup;
- /**
- * Implements Gettext PO stream reader.
- *
- * The PO file format parsing is implemented according to the documentation at
- * http://www.gnu.org/software/gettext/manual/gettext.html#PO-Files
- */
- class PoStreamReader implements PoStreamInterface, PoReaderInterface {
- /**
- * Source line number of the stream being parsed.
- *
- * @var int
- */
- protected $lineNumber = 0;
- /**
- * Parser context for the stream reader state machine.
- *
- * Possible contexts are:
- * - 'COMMENT' (#)
- * - 'MSGID' (msgid)
- * - 'MSGID_PLURAL' (msgid_plural)
- * - 'MSGCTXT' (msgctxt)
- * - 'MSGSTR' (msgstr or msgstr[])
- * - 'MSGSTR_ARR' (msgstr_arg)
- *
- * @var string
- */
- protected $context = 'COMMENT';
- /**
- * Current entry being read. Incomplete.
- *
- * @var array
- */
- protected $currentItem = [];
- /**
- * Current plural index for plural translations.
- *
- * @var int
- */
- protected $currentPluralIndex = 0;
- /**
- * URI of the PO stream that is being read.
- *
- * @var string
- */
- protected $uri = '';
- /**
- * Language code for the PO stream being read.
- *
- * @var string
- */
- protected $langcode = NULL;
- /**
- * File handle of the current PO stream.
- *
- * @var resource
- */
- protected $fd;
- /**
- * The PO stream header.
- *
- * @var \Drupal\Component\Gettext\PoHeader
- */
- protected $header;
- /**
- * Object wrapper for the last read source/translation pair.
- *
- * @var \Drupal\Component\Gettext\PoItem
- */
- protected $lastItem;
- /**
- * Indicator of whether the stream reading is finished.
- *
- * @var bool
- */
- protected $finished;
- /**
- * Array of translated error strings recorded on reading this stream so far.
- *
- * @var array
- */
- protected $errors;
- /**
- * {@inheritdoc}
- */
- public function getLangcode() {
- return $this->langcode;
- }
- /**
- * {@inheritdoc}
- */
- public function setLangcode($langcode) {
- $this->langcode = $langcode;
- }
- /**
- * {@inheritdoc}
- */
- public function getHeader() {
- return $this->header;
- }
- /**
- * Implements Drupal\Component\Gettext\PoMetadataInterface::setHeader().
- *
- * Not applicable to stream reading and therefore not implemented.
- */
- public function setHeader(PoHeader $header) {
- }
- /**
- * {@inheritdoc}
- */
- public function getURI() {
- return $this->uri;
- }
- /**
- * {@inheritdoc}
- */
- public function setURI($uri) {
- $this->uri = $uri;
- }
- /**
- * Implements Drupal\Component\Gettext\PoStreamInterface::open().
- *
- * Opens the stream and reads the header. The stream is ready for reading
- * items after.
- *
- * @throws \Exception
- * If the URI is not yet set.
- */
- public function open() {
- if (!empty($this->uri)) {
- $this->fd = fopen($this->uri, 'rb');
- $this->readHeader();
- }
- else {
- throw new \Exception('Cannot open stream without URI set.');
- }
- }
- /**
- * Implements Drupal\Component\Gettext\PoStreamInterface::close().
- *
- * @throws \Exception
- * If the stream is not open.
- */
- public function close() {
- if ($this->fd) {
- fclose($this->fd);
- }
- else {
- throw new \Exception('Cannot close stream that is not open.');
- }
- }
- /**
- * {@inheritdoc}
- */
- public function readItem() {
- // Clear out the last item.
- $this->lastItem = NULL;
- // Read until finished with the stream or a complete item was identified.
- while (!$this->finished && is_null($this->lastItem)) {
- $this->readLine();
- }
- return $this->lastItem;
- }
- /**
- * Sets the seek position for the current PO stream.
- *
- * @param int $seek
- * The new seek position to set.
- */
- public function setSeek($seek) {
- fseek($this->fd, $seek);
- }
- /**
- * Gets the pointer position of the current PO stream.
- */
- public function getSeek() {
- return ftell($this->fd);
- }
- /**
- * Read the header from the PO stream.
- *
- * The header is a special case PoItem, using the empty string as source and
- * key-value pairs as translation. We just reuse the item reader logic to
- * read the header.
- */
- private function readHeader() {
- $item = $this->readItem();
- // Handle the case properly when the .po file is empty (0 bytes).
- if (!$item) {
- return;
- }
- $header = new PoHeader();
- $header->setFromString(trim($item->getTranslation()));
- $this->header = $header;
- }
- /**
- * Reads a line from the PO stream and stores data internally.
- *
- * Expands $this->current_item based on new data for the current item. If
- * this line ends the current item, it is saved with setItemFromArray() with
- * data from $this->current_item.
- *
- * An internal state machine is maintained in this reader using
- * $this->context as the reading state. PO items are in between COMMENT
- * states (when items have at least one line or comment in between them) or
- * indicated by MSGSTR or MSGSTR_ARR followed immediately by an MSGID or
- * MSGCTXT (when items closely follow each other).
- *
- * @return
- * FALSE if an error was logged, NULL otherwise. The errors are considered
- * non-blocking, so reading can continue, while the errors are collected
- * for later presentation.
- */
- private function readLine() {
- // Read a line and set the stream finished indicator if it was not
- // possible anymore.
- $line = fgets($this->fd);
- $this->finished = ($line === FALSE);
- if (!$this->finished) {
- if ($this->lineNumber == 0) {
- // The first line might come with a UTF-8 BOM, which should be removed.
- $line = str_replace("\xEF\xBB\xBF", '', $line);
- // Current plurality for 'msgstr[]'.
- $this->currentPluralIndex = 0;
- }
- // Track the line number for error reporting.
- $this->lineNumber++;
- // Initialize common values for error logging.
- $log_vars = [
- '%uri' => $this->getURI(),
- '%line' => $this->lineNumber,
- ];
- // Trim away the linefeed. \\n might appear at the end of the string if
- // another line continuing the same string follows. We can remove that.
- $line = trim(strtr($line, ["\\\n" => ""]));
- if (!strncmp('#', $line, 1)) {
- // Lines starting with '#' are comments.
- if ($this->context == 'COMMENT') {
- // Already in comment context, add to current comment.
- $this->currentItem['#'][] = substr($line, 1);
- }
- elseif (($this->context == 'MSGSTR') || ($this->context == 'MSGSTR_ARR')) {
- // We are currently in string context, save current item.
- $this->setItemFromArray($this->currentItem);
- // Start a new entry for the comment.
- $this->currentItem = [];
- $this->currentItem['#'][] = substr($line, 1);
- $this->context = 'COMMENT';
- return;
- }
- else {
- // A comment following any other context is a syntax error.
- $this->errors[] = new FormattableMarkup('The translation stream %uri contains an error: "msgstr" was expected but not found on line %line.', $log_vars);
- return FALSE;
- }
- return;
- }
- elseif (!strncmp('msgid_plural', $line, 12)) {
- // A plural form for the current source string.
- if ($this->context != 'MSGID') {
- // A plural form can only be added to an msgid directly.
- $this->errors[] = new FormattableMarkup('The translation stream %uri contains an error: "msgid_plural" was expected but not found on line %line.', $log_vars);
- return FALSE;
- }
- // Remove 'msgid_plural' and trim away whitespace.
- $line = trim(substr($line, 12));
- // Only the plural source string is left, parse it.
- $quoted = $this->parseQuoted($line);
- if ($quoted === FALSE) {
- // The plural form must be wrapped in quotes.
- $this->errors[] = new FormattableMarkup('The translation stream %uri contains a syntax error on line %line.', $log_vars);
- return FALSE;
- }
- // Append the plural source to the current entry.
- if (is_string($this->currentItem['msgid'])) {
- // The first value was stored as string. Now we know the context is
- // plural, it is converted to array.
- $this->currentItem['msgid'] = [$this->currentItem['msgid']];
- }
- $this->currentItem['msgid'][] = $quoted;
- $this->context = 'MSGID_PLURAL';
- return;
- }
- elseif (!strncmp('msgid', $line, 5)) {
- // Starting a new message.
- if (($this->context == 'MSGSTR') || ($this->context == 'MSGSTR_ARR')) {
- // We are currently in string context, save current item.
- $this->setItemFromArray($this->currentItem);
- // Start a new context for the msgid.
- $this->currentItem = [];
- }
- elseif ($this->context == 'MSGID') {
- // We are currently already in the context, meaning we passed an id with no data.
- $this->errors[] = new FormattableMarkup('The translation stream %uri contains an error: "msgid" is unexpected on line %line.', $log_vars);
- return FALSE;
- }
- // Remove 'msgid' and trim away whitespace.
- $line = trim(substr($line, 5));
- // Only the message id string is left, parse it.
- $quoted = $this->parseQuoted($line);
- if ($quoted === FALSE) {
- // The message id must be wrapped in quotes.
- $this->errors[] = new FormattableMarkup('The translation stream %uri contains an error: invalid format for "msgid" on line %line.', $log_vars, $log_vars);
- return FALSE;
- }
- $this->currentItem['msgid'] = $quoted;
- $this->context = 'MSGID';
- return;
- }
- elseif (!strncmp('msgctxt', $line, 7)) {
- // Starting a new context.
- if (($this->context == 'MSGSTR') || ($this->context == 'MSGSTR_ARR')) {
- // We are currently in string context, save current item.
- $this->setItemFromArray($this->currentItem);
- $this->currentItem = [];
- }
- elseif (!empty($this->currentItem['msgctxt'])) {
- // A context cannot apply to another context.
- $this->errors[] = new FormattableMarkup('The translation stream %uri contains an error: "msgctxt" is unexpected on line %line.', $log_vars);
- return FALSE;
- }
- // Remove 'msgctxt' and trim away whitespaces.
- $line = trim(substr($line, 7));
- // Only the msgctxt string is left, parse it.
- $quoted = $this->parseQuoted($line);
- if ($quoted === FALSE) {
- // The context string must be quoted.
- $this->errors[] = new FormattableMarkup('The translation stream %uri contains an error: invalid format for "msgctxt" on line %line.', $log_vars);
- return FALSE;
- }
- $this->currentItem['msgctxt'] = $quoted;
- $this->context = 'MSGCTXT';
- return;
- }
- elseif (!strncmp('msgstr[', $line, 7)) {
- // A message string for a specific plurality.
- if (($this->context != 'MSGID') &&
- ($this->context != 'MSGCTXT') &&
- ($this->context != 'MSGID_PLURAL') &&
- ($this->context != 'MSGSTR_ARR')) {
- // Plural message strings must come after msgid, msgctxt,
- // msgid_plural, or other msgstr[] entries.
- $this->errors[] = new FormattableMarkup('The translation stream %uri contains an error: "msgstr[]" is unexpected on line %line.', $log_vars);
- return FALSE;
- }
- // Ensure the plurality is terminated.
- if (strpos($line, ']') === FALSE) {
- $this->errors[] = new FormattableMarkup('The translation stream %uri contains an error: invalid format for "msgstr[]" on line %line.', $log_vars);
- return FALSE;
- }
- // Extract the plurality.
- $frombracket = strstr($line, '[');
- $this->currentPluralIndex = substr($frombracket, 1, strpos($frombracket, ']') - 1);
- // Skip to the next whitespace and trim away any further whitespace,
- // bringing $line to the message text only.
- $line = trim(strstr($line, " "));
- $quoted = $this->parseQuoted($line);
- if ($quoted === FALSE) {
- // The string must be quoted.
- $this->errors[] = new FormattableMarkup('The translation stream %uri contains an error: invalid format for "msgstr[]" on line %line.', $log_vars);
- return FALSE;
- }
- if (!isset($this->currentItem['msgstr']) || !is_array($this->currentItem['msgstr'])) {
- $this->currentItem['msgstr'] = [];
- }
- $this->currentItem['msgstr'][$this->currentPluralIndex] = $quoted;
- $this->context = 'MSGSTR_ARR';
- return;
- }
- elseif (!strncmp("msgstr", $line, 6)) {
- // A string pair for an msgid (with optional context).
- if (($this->context != 'MSGID') && ($this->context != 'MSGCTXT')) {
- // Strings are only valid within an id or context scope.
- $this->errors[] = new FormattableMarkup('The translation stream %uri contains an error: "msgstr" is unexpected on line %line.', $log_vars);
- return FALSE;
- }
- // Remove 'msgstr' and trim away away whitespaces.
- $line = trim(substr($line, 6));
- // Only the msgstr string is left, parse it.
- $quoted = $this->parseQuoted($line);
- if ($quoted === FALSE) {
- // The string must be quoted.
- $this->errors[] = new FormattableMarkup('The translation stream %uri contains an error: invalid format for "msgstr" on line %line.', $log_vars);
- return FALSE;
- }
- $this->currentItem['msgstr'] = $quoted;
- $this->context = 'MSGSTR';
- return;
- }
- elseif ($line != '') {
- // Anything that is not a token may be a continuation of a previous token.
- $quoted = $this->parseQuoted($line);
- if ($quoted === FALSE) {
- // This string must be quoted.
- $this->errors[] = new FormattableMarkup('The translation stream %uri contains an error: string continuation expected on line %line.', $log_vars);
- return FALSE;
- }
- // Append the string to the current item.
- if (($this->context == 'MSGID') || ($this->context == 'MSGID_PLURAL')) {
- if (is_array($this->currentItem['msgid'])) {
- // Add string to last array element for plural sources.
- $last_index = count($this->currentItem['msgid']) - 1;
- $this->currentItem['msgid'][$last_index] .= $quoted;
- }
- else {
- // Singular source, just append the string.
- $this->currentItem['msgid'] .= $quoted;
- }
- }
- elseif ($this->context == 'MSGCTXT') {
- // Multiline context name.
- $this->currentItem['msgctxt'] .= $quoted;
- }
- elseif ($this->context == 'MSGSTR') {
- // Multiline translation string.
- $this->currentItem['msgstr'] .= $quoted;
- }
- elseif ($this->context == 'MSGSTR_ARR') {
- // Multiline plural translation string.
- $this->currentItem['msgstr'][$this->currentPluralIndex] .= $quoted;
- }
- else {
- // No valid context to append to.
- $this->errors[] = new FormattableMarkup('The translation stream %uri contains an error: unexpected string on line %line.', $log_vars);
- return FALSE;
- }
- return;
- }
- }
- // Empty line read or EOF of PO stream, close out the last entry.
- if (($this->context == 'MSGSTR') || ($this->context == 'MSGSTR_ARR')) {
- $this->setItemFromArray($this->currentItem);
- $this->currentItem = [];
- }
- elseif ($this->context != 'COMMENT') {
- $this->errors[] = new FormattableMarkup('The translation stream %uri ended unexpectedly at line %line.', $log_vars);
- return FALSE;
- }
- return;
- }
- /**
- * Store the parsed values as a PoItem object.
- */
- public function setItemFromArray($value) {
- $plural = FALSE;
- $comments = '';
- if (isset($value['#'])) {
- $comments = $this->shortenComments($value['#']);
- }
- if (is_array($value['msgstr'])) {
- // Sort plural variants by their form index.
- ksort($value['msgstr']);
- $plural = TRUE;
- }
- $item = new PoItem();
- $item->setContext(isset($value['msgctxt']) ? $value['msgctxt'] : '');
- $item->setSource($value['msgid']);
- $item->setTranslation($value['msgstr']);
- $item->setPlural($plural);
- $item->setComment($comments);
- $item->setLangcode($this->langcode);
- $this->lastItem = $item;
- $this->context = 'COMMENT';
- }
- /**
- * Parses a string in quotes.
- *
- * @param $string
- * A string specified with enclosing quotes.
- *
- * @return
- * The string parsed from inside the quotes.
- */
- public function parseQuoted($string) {
- if (substr($string, 0, 1) != substr($string, -1, 1)) {
- // Start and end quotes must be the same.
- return FALSE;
- }
- $quote = substr($string, 0, 1);
- $string = substr($string, 1, -1);
- if ($quote == '"') {
- // Double quotes: strip slashes.
- return stripcslashes($string);
- }
- elseif ($quote == "'") {
- // Simple quote: return as-is.
- return $string;
- }
- else {
- // Unrecognized quote.
- return FALSE;
- }
- }
- /**
- * Generates a short, one-string version of the passed comment array.
- *
- * @param $comment
- * An array of strings containing a comment.
- *
- * @return
- * Short one-string version of the comment.
- */
- private function shortenComments($comment) {
- $comm = '';
- while (count($comment)) {
- $test = $comm . substr(array_shift($comment), 1) . ', ';
- if (strlen($comm) < 130) {
- $comm = $test;
- }
- else {
- break;
- }
- }
- return trim(substr($comm, 0, -2));
- }
- }
|