solr_connection.inc 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259
  1. <?php
  2. /**
  3. * A few custom rewrites to the Apache_Solr_Service class, to allow providing
  4. * HTTP authentication and using this module without turning "allow_url_fopen"
  5. * on.
  6. *
  7. * Stolen from the apachesolr module for the most part.
  8. */
  9. class SearchApiSolrConnection extends Apache_Solr_Service {
  10. /**
  11. * Authentication string (username + password) for HTTP authentication.
  12. */
  13. protected $http_auth;
  14. /**
  15. * Additional servlet mapping. Allows us to use the LukeRequestHandler Solr
  16. * service.
  17. */
  18. const LUKE_SERVLET = 'admin/luke';
  19. /**
  20. * Lucene index schema information.
  21. *
  22. * @var Apache_Solr_Response
  23. */
  24. protected $luke;
  25. /**
  26. * Identifies which version of the SolrPhpClient this uses, "old" or "new".
  27. *
  28. * @var bool
  29. */
  30. protected $newClient = FALSE;
  31. /**
  32. * Constructs a Solr connection with an optional HTTP user and password.
  33. *
  34. * @param array $options
  35. * An array containing construction arguments.
  36. */
  37. public function __construct(array $options) {
  38. $options += array(
  39. 'host' => 'localhost',
  40. 'port' => 8983,
  41. 'path' => '',
  42. 'http_user' => NULL,
  43. 'http_pass' => NULL,
  44. 'default_field'=>'id',
  45. );
  46. parent::__construct($options['host'], $options['port'], $options['path']);
  47. if ($options['http_user'] && $options['http_pass']) {
  48. $this->http_auth = 'Basic ' . base64_encode($options['http_user'] . ':' . $options['http_pass']);
  49. }
  50. // Since /ping otherwise complains about missing default field.
  51. $this->_pingUrl .= '?q=' . $options['default_field'] . ':1';
  52. // As of July 2011, the newest release is r60, with Service.php having
  53. // revision 59. Revision 40 is just anything between 22 (old) and that.
  54. $this->newClient = trim(parent::SVN_REVISION, '$ :A..Za..z') > 40;
  55. if ($this->newClient) {
  56. $this->_httpTransport = new SearchApiSolrHttpTransport($this->http_auth);
  57. }
  58. }
  59. /**
  60. * Central method for making a get operation against this Solr Server.
  61. *
  62. * @see Apache_Solr_Service::_sendRawGet()
  63. */
  64. protected function _sendRawGet($url, $timeout = FALSE) {
  65. // Little "hack" to allow filter-only queries
  66. // Since "*:*" doesn't work with the dismax query handler, we mustn't set
  67. // "q", to let "q.alt" kick in. However, Apache_Solr_Service::search() will
  68. // always add "q", even if it is empty. Therefore, we delete empty "q"
  69. // parameters here.
  70. $url = preg_replace('/([?&])q=(&|$)/', '$1', $url);
  71. if ($this->newClient) {
  72. return parent::_sendRawGet($url, $timeout);
  73. }
  74. list($data, $headers) = $this->_makeHttpRequest($url, 'GET', array(), '', $timeout);
  75. $response = new Apache_Solr_Response($data, $headers, $this->_createDocuments, $this->_collapseSingleValueArrays);
  76. $code = (int) $response->getHttpStatus();
  77. if ($code != 200) {
  78. $message = $response->getHttpStatusMessage();
  79. if ($code >= 400 && $code != 403 && $code != 404) {
  80. // Add details, like Solr's exception message.
  81. $message .= $response->getRawResponse();
  82. }
  83. throw new Exception('"' . $code . '" Status: ' . $message);
  84. }
  85. return $response;
  86. }
  87. /**
  88. * Central method for making a post operation against this Solr Server.
  89. *
  90. * @see Apache_Solr_Service::_sendRawPost()
  91. */
  92. protected function _sendRawPost($url, $rawPost, $timeout = FALSE, $contentType = 'text/xml; charset=UTF-8') {
  93. if ($this->newClient) {
  94. return parent::_sendRawPost($url, $rawPost, $timeout, $contentType);
  95. }
  96. $request_headers = array('Content-Type' => $contentType);
  97. list($data, $headers) = $this->_makeHttpRequest($url, 'POST', $request_headers, $rawPost, $timeout);
  98. $response = new Apache_Solr_Response($data, $headers, $this->_createDocuments, $this->_collapseSingleValueArrays);
  99. $code = (int) $response->getHttpStatus();
  100. if ($code != 200) {
  101. $message = $response->getHttpStatusMessage();
  102. if ($code >= 400 && $code != 403 && $code != 404) {
  103. // Add details, like Solr's exception message.
  104. $message .= $response->getRawResponse();
  105. }
  106. throw new Exception('"' . $code . '" Status: ' . $message);
  107. }
  108. return $response;
  109. }
  110. /**
  111. * Call the /admin/ping servlet, to test the connection to the server.
  112. *
  113. * @param $timeout
  114. * maximum time to wait for ping in seconds, -1 for unlimited (default 2).
  115. * @return
  116. * (float) seconds taken to ping the server, FALSE if timeout occurs.
  117. */
  118. public function ping($timeout = 2) {
  119. if ($this->newClient) {
  120. return parent::ping($timeout);
  121. }
  122. $start = microtime(TRUE);
  123. if ($timeout <= 0.0) {
  124. $timeout = -1;
  125. }
  126. // Attempt a HEAD request to the solr ping url.
  127. list($data, $headers) = $this->_makeHttpRequest($this->_pingUrl, 'HEAD', array(), NULL, $timeout);
  128. $response = new Apache_Solr_Response($data, $headers);
  129. if ($response->getHttpStatus() == 200) {
  130. // Add 0.1 ms to the ping time so we never return 0.0.
  131. return microtime(TRUE) - $start + 0.0001;
  132. }
  133. else {
  134. return FALSE;
  135. }
  136. }
  137. /**
  138. * Helper method for making an HTTP request, without using stupid stuff like
  139. * file_get_contents().
  140. */
  141. protected function _makeHttpRequest($url, $method = 'GET', $headers = array(), $content = '', $timeout = FALSE) {
  142. $options = array(
  143. 'headers' => $headers,
  144. 'method' => $method,
  145. 'data' => $content,
  146. );
  147. if ($this->http_auth) {
  148. $options['headers']['Authorization'] = $this->http_auth;
  149. }
  150. if ($timeout) {
  151. $options['timeout'] = $timeout;
  152. }
  153. $result = drupal_http_request($url, $options);
  154. if (!isset($result->code) || $result->code < 0) {
  155. $result->code = 0;
  156. $result->status_message = 'Request failed';
  157. $result->protocol = 'HTTP/1.0';
  158. }
  159. // Additional information may be in the error property.
  160. if (isset($result->error)) {
  161. $result->status_message .= ': ' . check_plain($result->error);
  162. }
  163. if (!isset($result->data)) {
  164. $result->data = '';
  165. }
  166. // The headers have to be reformatted for the response class.
  167. $headers[] = "{$result->protocol} {$result->code} {$result->status_message}";
  168. if (isset($result->headers)) {
  169. foreach ($result->headers as $name => $value) {
  170. $headers[] = "$name: $value";
  171. }
  172. }
  173. return array($result->data, $headers);
  174. }
  175. /**
  176. * Convenience function for escaping a field name.
  177. *
  178. * Since field names can only contain one special character, ":", there is no
  179. * need to use the complete escape() method.
  180. *
  181. * @param string $value
  182. * The field name to escape.
  183. *
  184. * @return string
  185. * An escaped string suitable for passing to Solr.
  186. */
  187. static public function escapeFieldName($value) {
  188. $value = str_replace(':', '\:', $value);
  189. return $value;
  190. }
  191. /**
  192. * Convenience function for creating phrase syntax from a value.
  193. *
  194. * @param string $value
  195. * The string to convert into a Solr phrase value.
  196. *
  197. * @return string
  198. * A quoted string suitable for passing to Solr.
  199. */
  200. static public function phrase($value) {
  201. $value = str_replace("\\", "\\\\", $value);
  202. $value = str_replace('"', '\"', $value);
  203. return '"' . $value . '"';
  204. }
  205. /**
  206. * Get metadata about the Lucene index.
  207. *
  208. * @param int $num_terms
  209. * Number of 'top terms' to return.
  210. *
  211. * @return Apache_Solr_Response
  212. * A response object containing schema information.
  213. */
  214. public function getLuke($num_terms = 0) {
  215. if (!isset($this->luke[$num_terms])) {
  216. $url = $this->_constructUrl(self::LUKE_SERVLET, array('numTerms' => "$num_terms", 'wt' => self::SOLR_WRITER));
  217. $this->luke[$num_terms] = $this->_sendRawGet($url);
  218. }
  219. return $this->luke[$num_terms];
  220. }
  221. /**
  222. * Get metadata about fields in the Lucene index.
  223. *
  224. * @return array
  225. * An array of objects, keyed by field name, describing fields on the index.
  226. *
  227. * @see SearchApiSolrConnection::getLuke()
  228. * @see http://wiki.apache.org/solr/LukeRequestHandler
  229. */
  230. public function getFields() {
  231. return (array) $this->getLuke()->fields;
  232. }
  233. }