FeedsXPathParserQueryParser.inc 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. <?php
  2. /**
  3. * @file
  4. * Pseudo-parser of XPath queries. When an XML document has a default
  5. * namespace this gets called so that adding the __default__ namepace where
  6. * appropriate. Aren't we nice?
  7. *
  8. * @todo
  9. * Cleanup.
  10. * @param $query
  11. * An XPath query string.
  12. * @return string
  13. * An XPath query string with the __default__ namespace added.
  14. */
  15. class FeedsXPathParserQueryParser {
  16. function __construct($query) {
  17. $this->query = preg_replace('/\s+\(\s*/', '(', $query);
  18. $this->word_boundaries = array(
  19. '[', ']', '=', '(', ')', '.', '<', '>', '*', '!', '|', '/', ',', ' ', ':',
  20. );
  21. $this->in_quotes = FALSE;
  22. $this->quote_char = '';
  23. $this->word = '';
  24. $this->output = '';
  25. $this->prev_boundary = '';
  26. $this->axis = '';
  27. $this->skip_next_word = FALSE;
  28. $this->start();
  29. }
  30. function start() {
  31. for ($i=0; $i < drupal_strlen($this->query); $i++) {
  32. $this->i = $i;
  33. $c = $this->query[$i];
  34. if ($c == '"' || $c == "'") {
  35. $this->handle_quote($c);
  36. continue;
  37. }
  38. if ($this->in_quotes) {
  39. $this->word .= $c;
  40. continue;
  41. }
  42. if (in_array($c, $this->word_boundaries)) {
  43. $this->handle_word_boundary($c);
  44. }
  45. else {
  46. $this->word .= $c;
  47. }
  48. }
  49. $this->handle_word();
  50. }
  51. function handle_quote($c) {
  52. if ($this->in_quotes && $c == $this->quote_char) {
  53. $this->in_quotes = FALSE;
  54. $this->word .= $c;
  55. $this->output .= $this->word;
  56. $this->word = '';
  57. }
  58. elseif (!$this->in_quotes) {
  59. $this->in_quotes = TRUE;
  60. $this->handle_word();
  61. $this->word = $c;
  62. $this->quote_char = $c;
  63. }
  64. else {
  65. $this->word .= $c;
  66. }
  67. }
  68. function handle_word_boundary($c) {
  69. if (in_array($this->word, array('div', 'or', 'and', 'mod')) &&
  70. $this->prev_boundary == ' ' && $c == ' ') {
  71. $this->output .= $this->word;
  72. }
  73. else {
  74. $this->handle_word($c);
  75. }
  76. $this->output .= $c;
  77. $this->word = '';
  78. $this->prev_boundary = $c;
  79. }
  80. function handle_word($c='') {
  81. if ($this->word == '') {
  82. return;
  83. }
  84. if ($c == ':' && $this->query[$this->i + 1] == ':') {
  85. $this->axis = $this->word;
  86. }
  87. if ($c == ':' && $this->query[$this->i - 1] != ':' &&
  88. $this->query[$this->i + 1] != ':') {
  89. $this->output .= $this->word;
  90. $this->skip_next_word = TRUE;
  91. return;
  92. }
  93. if ($this->skip_next_word) {
  94. $this->skip_next_word = FALSE;
  95. $this->output .= $this->word;
  96. return;
  97. }
  98. if (is_numeric($this->word) ||
  99. $this->axis == 'attribute' ||
  100. strpos($this->word, '@') === 0 ||
  101. $c == '(' ||
  102. $c == ':') {
  103. $this->output .= $this->word;
  104. return;
  105. }
  106. $this->output .= '__default__:' . $this->word;
  107. }
  108. function getQuery() {
  109. return $this->output;
  110. }
  111. }