solr_connection.inc 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992
  1. <?php
  2. /**
  3. * Copyright (c) 2007-2009, Conduit Internet Technologies, Inc.
  4. * All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions are met:
  8. *
  9. * - Redistributions of source code must retain the above copyright notice,
  10. * this list of conditions and the following disclaimer.
  11. * - Redistributions in binary form must reproduce the above copyright
  12. * notice, this list of conditions and the following disclaimer in the
  13. * documentation and/or other materials provided with the distribution.
  14. * - Neither the name of Conduit Internet Technologies, Inc. nor the names of
  15. * its contributors may be used to endorse or promote products derived from
  16. * this software without specific prior written permission.
  17. *
  18. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  19. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  20. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  21. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  22. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  25. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  26. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  27. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  28. * POSSIBILITY OF SUCH DAMAGE.
  29. *
  30. * @copyright Copyright 2007-2009 Conduit Internet Technologies, Inc. (http://conduit-it.com)
  31. * @license New BSD (http://solr-php-client.googlecode.com/svn/trunk/COPYING)
  32. * @version $Id: Service.php 22 2009-11-09 22:46:54Z donovan.jimenez $
  33. *
  34. * @package Apache
  35. * @subpackage Solr
  36. * @author Donovan Jimenez <djimenez@conduit-it.com>
  37. */
  38. /**
  39. * Additional code Copyright (c) 2008-2011 by Robert Douglass, James McKinney,
  40. * Jacob Singh, Alejandro Garza, Peter Wolanin, and additional contributors.
  41. *
  42. * This program is free software; you can redistribute it and/or modify
  43. * it under the terms of the GNU General Public License as published by
  44. * the Free Software Foundation; either version 2 of the License, or (at
  45. * your option) any later version.
  46. *
  47. * This program is distributed in the hope that it will be useful, but
  48. * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  49. * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
  50. * for more details.
  51. *
  52. * You should have received a copy of the GNU General Public License
  53. * along with this program as the file LICENSE.txt; if not, please see
  54. * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt.
  55. */
  56. /**
  57. * Represents a Solr server resource.
  58. *
  59. * Contains methods for pinging, adding, deleting, committing, optimizing and
  60. * searching.
  61. */
  62. class SearchApiSolrConnection implements SearchApiSolrConnectionInterface {
  63. /**
  64. * Defines how NamedLists should be formatted in the output.
  65. *
  66. * This specifically affects facet counts. Valid values are 'map' (default) or
  67. * 'flat'.
  68. */
  69. const NAMED_LIST_FORMAT = 'map';
  70. /**
  71. * Path to the ping servlet.
  72. */
  73. const PING_SERVLET = 'admin/ping';
  74. /**
  75. * Path to the update servlet.
  76. */
  77. const UPDATE_SERVLET = 'update';
  78. /**
  79. * Path to the search servlet.
  80. */
  81. const SEARCH_SERVLET = 'select';
  82. /**
  83. * Path to the luke servlet.
  84. */
  85. const LUKE_SERVLET = 'admin/luke';
  86. /**
  87. * Path to the system servlet.
  88. */
  89. const SYSTEM_SERVLET = 'admin/system';
  90. /**
  91. * Path to the stats servlet.
  92. */
  93. const STATS_SERVLET = 'admin/stats.jsp';
  94. /**
  95. * Path to the stats servlet for Solr 4.x servers.
  96. */
  97. const STATS_SERVLET_4 = 'admin/mbeans?wt=xml&stats=true';
  98. /**
  99. * Path to the file servlet.
  100. */
  101. const FILE_SERVLET = 'admin/file';
  102. /**
  103. * The options passed when creating this connection.
  104. *
  105. * @var array
  106. */
  107. protected $options;
  108. /**
  109. * The Solr server's URL.
  110. *
  111. * @var string
  112. */
  113. protected $base_url;
  114. /**
  115. * Cached URL to the update servlet.
  116. *
  117. * @var string
  118. */
  119. protected $update_url;
  120. /**
  121. * HTTP Basic Authentication header to set for requests to the Solr server.
  122. *
  123. * @var string
  124. */
  125. protected $http_auth;
  126. /**
  127. * The stream context to use for requests to the Solr server.
  128. *
  129. * Defaults to NULL (= pass no context at all).
  130. *
  131. * @var string
  132. */
  133. protected $stream_context;
  134. /**
  135. * Cache for the metadata from admin/luke.
  136. *
  137. * Contains an array of response objects, keyed by the number of "top terms".
  138. *
  139. * @var array
  140. *
  141. * @see getLuke()
  142. */
  143. protected $luke = array();
  144. /**
  145. * Cache for information about the Solr core.
  146. *
  147. * @var SimpleXMLElement
  148. *
  149. * @see getStats()
  150. */
  151. protected $stats;
  152. /**
  153. * Cache for system information.
  154. *
  155. * @var array
  156. *
  157. * @see getSystemInfo()
  158. */
  159. protected $system_info;
  160. /**
  161. * Flag that denotes whether to use soft commits for Solr 4.x.
  162. *
  163. * Defaults to TRUE.
  164. *
  165. * @var bool
  166. */
  167. protected $soft_commit = TRUE;
  168. /**
  169. * Implements SearchApiSolrConnectionInterface::__construct().
  170. *
  171. * Valid options include:
  172. * - scheme: Scheme of the base URL of the Solr server. Most probably "http"
  173. * or "https". Defaults to "http".
  174. * - host: The host name (or IP) of the Solr server. Defaults to
  175. * "localhost".
  176. * - port: The port of the Solr server. Defaults to 8983.
  177. * - path: The base path to the Solr server. Defaults to "/solr/".
  178. * - http_user: If both this and "http_pass" are set, will use this
  179. * information to add basic HTTP authentication to all requests to the
  180. * Solr server. Not set by default.
  181. * - http_pass: See "http_user".
  182. */
  183. public function __construct(array $options) {
  184. $options += array(
  185. 'scheme' => 'http',
  186. 'host' => 'localhost',
  187. 'port' => 8983,
  188. 'path' => 'solr',
  189. 'http_user' => NULL,
  190. 'http_pass' => NULL,
  191. );
  192. $this->options = $options;
  193. $path = '/' . trim($options['path'], '/') . '/';
  194. $this->base_url = $options['scheme'] . '://' . $options['host'] . ':' . $options['port'] . $path;
  195. // Set HTTP Basic Authentication parameter, if login data was set.
  196. if (strlen($options['http_user']) && strlen($options['http_pass'])) {
  197. $this->http_auth = 'Basic ' . base64_encode($options['http_user'] . ':' . $options['http_pass']);
  198. }
  199. }
  200. /**
  201. * {@inheritdoc}
  202. */
  203. public function ping($timeout = 2) {
  204. $start = microtime(TRUE);
  205. if ($timeout <= 0.0) {
  206. $timeout = -1;
  207. }
  208. $pingUrl = $this->constructUrl(self::PING_SERVLET);
  209. // Attempt a HEAD request to the Solr ping url.
  210. $options = array(
  211. 'method' => 'HEAD',
  212. 'timeout' => $timeout,
  213. );
  214. $response = $this->makeHttpRequest($pingUrl, $options);
  215. if ($response->code == 200) {
  216. // Add 1 µs to the ping time so we never return 0.
  217. return (microtime(TRUE) - $start) + 1E-6;
  218. }
  219. else {
  220. return FALSE;
  221. }
  222. }
  223. /**
  224. * {@inheritdoc}
  225. */
  226. public function setSoftCommit($soft_commit) {
  227. $this->soft_commit = (bool) $soft_commit;
  228. }
  229. /**
  230. * {@inheritdoc}
  231. */
  232. public function getSoftCommit() {
  233. return $this->soft_commit;
  234. }
  235. /**
  236. * {@inheritdoc}
  237. */
  238. public function setStreamContext($stream_context) {
  239. $this->stream_context = $stream_context;
  240. }
  241. /**
  242. * {@inheritdoc}
  243. */
  244. public function getStreamContext() {
  245. return $this->stream_context;
  246. }
  247. /**
  248. * Computes the cache ID to use for this connection.
  249. *
  250. * @param $suffix
  251. * (optional) A suffix to append to the string to make it unique.
  252. *
  253. * @return string|null
  254. * The cache ID to use for this connection and usage; or NULL if no caching
  255. * should take place.
  256. */
  257. protected function getCacheId($suffix = '') {
  258. if (!empty($this->options['server'])) {
  259. $cid = $this->options['server'];
  260. return $suffix ? "$cid:$suffix" : $cid;
  261. }
  262. }
  263. /**
  264. * Call the /admin/system servlet to retrieve system information.
  265. *
  266. * Stores the retrieved information in $system_info.
  267. *
  268. * @see getSystemInfo()
  269. */
  270. protected function setSystemInfo() {
  271. $cid = $this->getCacheId(__FUNCTION__);
  272. if ($cid) {
  273. $cache = cache_get($cid, 'cache_search_api_solr');
  274. if ($cache) {
  275. $this->system_info = json_decode($cache->data);
  276. }
  277. }
  278. // Second pass to populate the cache if necessary.
  279. if (empty($this->system_info)) {
  280. $url = $this->constructUrl(self::SYSTEM_SERVLET, array('wt' => 'json'));
  281. $response = $this->sendRawGet($url);
  282. $this->system_info = json_decode($response->data);
  283. if ($cid) {
  284. cache_set($cid, $response->data, 'cache_search_api_solr');
  285. }
  286. }
  287. }
  288. /**
  289. * Implements SearchApiSolrConnectionInterface::getSystemInfo().
  290. */
  291. public function getSystemInfo() {
  292. if (!isset($this->system_info)) {
  293. $this->setSystemInfo();
  294. }
  295. return $this->system_info;
  296. }
  297. /**
  298. * Sets $this->luke with the metadata about the index from admin/luke.
  299. *
  300. * @param int $num_terms
  301. * (optional) The number of "top terms" to return.
  302. */
  303. protected function setLuke($num_terms = 0) {
  304. if (empty($this->luke[$num_terms])) {
  305. $cid = $this->getCacheId(__FUNCTION__ . ":$num_terms");
  306. if ($cid) {
  307. $cache = cache_get($cid, 'cache_search_api_solr');
  308. if (isset($cache->data)) {
  309. $this->luke = $cache->data;
  310. }
  311. }
  312. // Second pass to populate the cache if necessary.
  313. if (empty($this->luke[$num_terms])) {
  314. $params = array(
  315. 'numTerms' => "$num_terms",
  316. 'wt' => 'json',
  317. 'json.nl' => self::NAMED_LIST_FORMAT,
  318. );
  319. $url = $this->constructUrl(self::LUKE_SERVLET, $params);
  320. $this->luke[$num_terms] = $this->sendRawGet($url);
  321. if ($cid) {
  322. cache_set($cid, $this->luke, 'cache_search_api_solr');
  323. }
  324. }
  325. }
  326. }
  327. /**
  328. * {@inheritdoc}
  329. */
  330. public function getFields($num_terms = 0) {
  331. $fields = array();
  332. $luke_data = $this->getLuke($num_terms);
  333. if (isset($luke_data->fields)) {
  334. foreach ($luke_data->fields as $name => $info) {
  335. $fields[$name] = new SearchApiSolrField($info);
  336. }
  337. }
  338. return $fields;
  339. }
  340. /**
  341. * {@inheritdoc}
  342. */
  343. public function getLuke($num_terms = 0) {
  344. if (!isset($this->luke[$num_terms])) {
  345. $this->setLuke($num_terms);
  346. }
  347. return $this->luke[$num_terms];
  348. }
  349. /**
  350. * {@inheritdoc}
  351. */
  352. public function getSolrVersion() {
  353. // Allow for overrides by the user.
  354. if (!empty($this->options['solr_version'])) {
  355. return $this->options['solr_version'];
  356. }
  357. $system_info = $this->getSystemInfo();
  358. // Get our solr version number
  359. if (isset($system_info->lucene->{'solr-spec-version'})) {
  360. return (int) $system_info->lucene->{'solr-spec-version'};
  361. }
  362. return 0;
  363. }
  364. /**
  365. * Stores information about the Solr core in $this->stats.
  366. */
  367. protected function setStats() {
  368. $data = $this->getLuke();
  369. $solr_version = $this->getSolrVersion();
  370. // Only try to get stats if we have connected to the index.
  371. if (empty($this->stats) && isset($data->index->numDocs)) {
  372. $cid = $this->getCacheId(__FUNCTION__);
  373. if ($cid) {
  374. $cache = cache_get($cid, 'cache_search_api_solr');
  375. if (isset($cache->data)) {
  376. $this->stats = simplexml_load_string($cache->data);
  377. }
  378. }
  379. // Second pass to populate the cache if necessary.
  380. if (empty($this->stats)) {
  381. if ($solr_version >= 4) {
  382. $url = $this->constructUrl(self::STATS_SERVLET_4);
  383. }
  384. else {
  385. $url = $this->constructUrl(self::STATS_SERVLET);
  386. }
  387. $response = $this->sendRawGet($url);
  388. $this->stats = simplexml_load_string($response->data);
  389. if ($cid) {
  390. cache_set($cid, $response->data, 'cache_search_api_solr');
  391. }
  392. }
  393. }
  394. }
  395. /**
  396. * {@inheritdoc}
  397. */
  398. public function getStats() {
  399. if (!isset($this->stats)) {
  400. $this->setStats();
  401. }
  402. return $this->stats;
  403. }
  404. /**
  405. * {@inheritdoc}
  406. */
  407. public function getStatsSummary() {
  408. $stats = $this->getStats();
  409. $solr_version = $this->getSolrVersion();
  410. $summary = array(
  411. '@pending_docs' => '',
  412. '@autocommit_time_seconds' => '',
  413. '@autocommit_time' => '',
  414. '@deletes_by_id' => '',
  415. '@deletes_by_query' => '',
  416. '@deletes_total' => '',
  417. '@schema_version' => '',
  418. '@core_name' => '',
  419. '@index_size' => '',
  420. );
  421. if (!empty($stats)) {
  422. if ($solr_version <= 3) {
  423. $docs_pending_xpath = $stats->xpath('//stat[@name="docsPending"]');
  424. $summary['@pending_docs'] = (int) trim(current($docs_pending_xpath));
  425. $max_time_xpath = $stats->xpath('//stat[@name="autocommit maxTime"]');
  426. $max_time = (int) trim(current($max_time_xpath));
  427. // Convert to seconds.
  428. $summary['@autocommit_time_seconds'] = $max_time / 1000;
  429. $summary['@autocommit_time'] = format_interval($max_time / 1000);
  430. $deletes_id_xpath = $stats->xpath('//stat[@name="deletesById"]');
  431. $summary['@deletes_by_id'] = (int) trim(current($deletes_id_xpath));
  432. $deletes_query_xpath = $stats->xpath('//stat[@name="deletesByQuery"]');
  433. $summary['@deletes_by_query'] = (int) trim(current($deletes_query_xpath));
  434. $summary['@deletes_total'] = $summary['@deletes_by_id'] + $summary['@deletes_by_query'];
  435. $schema = $stats->xpath('/solr/schema[1]');
  436. $summary['@schema_version'] = trim($schema[0]);
  437. $core = $stats->xpath('/solr/core[1]');
  438. $summary['@core_name'] = trim($core[0]);
  439. $size_xpath = $stats->xpath('//stat[@name="indexSize"]');
  440. $summary['@index_size'] = trim(current($size_xpath));
  441. }
  442. else {
  443. $system_info = $this->getSystemInfo();
  444. $docs_pending_xpath = $stats->xpath('//lst["stats"]/long[@name="docsPending"]');
  445. $summary['@pending_docs'] = (int) trim(current($docs_pending_xpath));
  446. $max_time_xpath = $stats->xpath('//lst["stats"]/str[@name="autocommit maxTime"]');
  447. $max_time = (int) trim(current($max_time_xpath));
  448. // Convert to seconds.
  449. $summary['@autocommit_time_seconds'] = $max_time / 1000;
  450. $summary['@autocommit_time'] = format_interval($max_time / 1000);
  451. $deletes_id_xpath = $stats->xpath('//lst["stats"]/long[@name="deletesById"]');
  452. $summary['@deletes_by_id'] = (int) trim(current($deletes_id_xpath));
  453. $deletes_query_xpath = $stats->xpath('//lst["stats"]/long[@name="deletesByQuery"]');
  454. $summary['@deletes_by_query'] = (int) trim(current($deletes_query_xpath));
  455. $summary['@deletes_total'] = $summary['@deletes_by_id'] + $summary['@deletes_by_query'];
  456. $schema = $system_info->core->schema;
  457. $summary['@schema_version'] = $schema;
  458. $core = $stats->xpath('//lst["core"]/str[@name="coreName"]');
  459. $summary['@core_name'] = trim(current($core));
  460. $size_xpath = $stats->xpath('//lst["core"]/str[@name="indexSize"]');
  461. $summary['@index_size'] = trim(current($size_xpath));
  462. }
  463. }
  464. return $summary;
  465. }
  466. /**
  467. * {@inheritdoc}
  468. */
  469. public function clearCache() {
  470. if ($cid = $this->getCacheId()) {
  471. cache_clear_all($cid, 'cache_search_api_solr', TRUE);
  472. }
  473. $this->luke = array();
  474. $this->stats = NULL;
  475. $this->system_info = NULL;
  476. }
  477. /**
  478. * Checks the reponse code and throws an exception if it's not 200.
  479. *
  480. * @param object $response
  481. * A response object.
  482. *
  483. * @return object
  484. * The passed response object.
  485. *
  486. * @throws SearchApiException
  487. * If the object's HTTP status is not 200.
  488. */
  489. protected function checkResponse($response) {
  490. $code = (int) $response->code;
  491. if ($code != 200) {
  492. if ($code >= 400 && $code != 403 && $code != 404) {
  493. // Add details, like Solr's exception message.
  494. $response->status_message .= $response->data;
  495. }
  496. throw new SearchApiException('"' . $code . '" Status: ' . $response->status_message);
  497. }
  498. return $response;
  499. }
  500. /**
  501. * {@inheritdoc}
  502. */
  503. public function makeServletRequest($servlet, array $params = array(), array $options = array()) {
  504. // Add default params.
  505. $params += array(
  506. 'wt' => 'json',
  507. 'json.nl' => self::NAMED_LIST_FORMAT,
  508. );
  509. $url = $this->constructUrl($servlet, $params);
  510. $response = $this->makeHttpRequest($url, $options);
  511. return $this->checkResponse($response);
  512. }
  513. /**
  514. * Sends a GET request to the Solr server.
  515. *
  516. * @param string $url
  517. * The URL to which the request should be sent.
  518. * @param array $options
  519. * Additional options for the request, as recognized by
  520. * drupal_http_request().
  521. *
  522. * @return object
  523. * The HTTP response, as returned by drupal_http_request().
  524. *
  525. * @throws SearchApiException
  526. * If an error occurs, either during sending or on the server side.
  527. */
  528. protected function sendRawGet($url, array $options = array()) {
  529. $options['method'] = 'GET';
  530. $response = $this->makeHttpRequest($url, $options);
  531. return $this->checkResponse($response);
  532. }
  533. /**
  534. * Sends a PUT request to the Solr server.
  535. *
  536. * @param string $url
  537. * The URL to which the request should be sent.
  538. * @param array $options
  539. * Additional options for the request, as recognized by
  540. * drupal_http_request().
  541. *
  542. * @return object
  543. * The HTTP response, as returned by drupal_http_request().
  544. *
  545. * @throws SearchApiException
  546. * If an error occurs, either during sending or on the server side.
  547. */
  548. protected function sendRawPost($url, array $options = array()) {
  549. $options['method'] = 'POST';
  550. // Normally we use POST to send XML documents.
  551. if (empty($options['headers']['Content-Type'])) {
  552. $options['headers']['Content-Type'] = 'text/xml; charset=UTF-8';
  553. }
  554. $response = $this->makeHttpRequest($url, $options);
  555. return $this->checkResponse($response);
  556. }
  557. /**
  558. * Sends an HTTP request to Solr.
  559. *
  560. * This is just a wrapper around drupal_http_request().
  561. *
  562. * @param string $url
  563. * The URL to which the request should be sent.
  564. * @param array $options
  565. * Additional options for the request, as recognized by
  566. * drupal_http_request().
  567. *
  568. * @return object
  569. * The HTTP response, as returned by drupal_http_request().
  570. */
  571. protected function makeHttpRequest($url, array $options = array()) {
  572. if (empty($options['method']) || $options['method'] == 'GET' || $options['method'] == 'HEAD') {
  573. // Make sure we are not sending a request body.
  574. $options['data'] = NULL;
  575. }
  576. if ($this->http_auth) {
  577. $options['headers']['Authorization'] = $this->http_auth;
  578. }
  579. if ($this->stream_context) {
  580. $options['context'] = $this->stream_context;
  581. }
  582. $result = drupal_http_request($url, $options);
  583. $result->status_message = isset($result->status_message) ? $result->status_message : '';
  584. if (!isset($result->code) || $result->code < 0) {
  585. $result->code = 0;
  586. $result->status_message = 'Request failed';
  587. $result->protocol = 'HTTP/1.0';
  588. }
  589. // Additional information may be in the error property.
  590. if (isset($result->error)) {
  591. $result->status_message .= ': ' . check_plain($result->error);
  592. }
  593. if (!isset($result->data)) {
  594. $result->data = '';
  595. $result->response = NULL;
  596. }
  597. else {
  598. $response = json_decode($result->data);
  599. if (is_object($response)) {
  600. foreach ($response as $key => $value) {
  601. $result->$key = $value;
  602. }
  603. }
  604. }
  605. return $result;
  606. }
  607. /**
  608. * {@inheritdoc}
  609. */
  610. public static function escape($value, $version = 0) {
  611. $replacements = array();
  612. $specials = array('+', '-', '&&', '||', '!', '(', ')', '{', '}', '[', ']', '^', '"', '~', '*', '?', ':', "\\", 'AND', 'OR', 'NOT');
  613. // Solr 4.x introduces regular expressions, making the slash also a special
  614. // character.
  615. if ($version >= 4) {
  616. $specials[] = '/';
  617. }
  618. foreach ($specials as $special) {
  619. $replacements[$special] = "\\$special";
  620. }
  621. return strtr($value, $replacements);
  622. }
  623. /**
  624. * {@inheritdoc}
  625. */
  626. public static function escapePhrase($value) {
  627. $replacements['"'] = '\"';
  628. $replacements["\\"] = "\\\\";
  629. return strtr($value, $replacements);
  630. }
  631. /**
  632. * {@inheritdoc}
  633. */
  634. public static function phrase($value) {
  635. return '"' . self::escapePhrase($value) . '"';
  636. }
  637. /**
  638. * {@inheritdoc}
  639. */
  640. public static function escapeFieldName($value) {
  641. $value = str_replace(':', '\:', $value);
  642. return $value;
  643. }
  644. /**
  645. * Returns the HTTP URL for a certain servlet on the Solr server.
  646. *
  647. * @param $servlet
  648. * A string path to a Solr request handler.
  649. * @param array $params
  650. * Additional GET parameters to append to the URL.
  651. * @param $added_query_string
  652. * Additional query string to append to the URL.
  653. *
  654. * @return string
  655. * The complete URL.
  656. */
  657. protected function constructUrl($servlet, array $params = array(), $added_query_string = NULL) {
  658. // PHP's built in http_build_query() doesn't give us the format Solr wants.
  659. $query_string = $this->httpBuildQuery($params);
  660. if ($query_string) {
  661. $query_string = '?' . $query_string;
  662. if ($added_query_string) {
  663. $query_string = $query_string . '&' . $added_query_string;
  664. }
  665. }
  666. elseif ($added_query_string) {
  667. $query_string = '?' . $added_query_string;
  668. }
  669. return $this->base_url . $servlet . $query_string;
  670. }
  671. /**
  672. * {@inheritdoc}
  673. */
  674. public function getBaseUrl() {
  675. return $this->base_url;
  676. }
  677. /**
  678. * {@inheritdoc}
  679. */
  680. public function setBaseUrl($url) {
  681. $this->base_url = $url;
  682. $this->update_url = NULL;
  683. }
  684. /**
  685. * {@inheritdoc}
  686. */
  687. public function update($rawPost, $timeout = 3600) {
  688. if (empty($this->update_url)) {
  689. // Store the URL in an instance variable since many updates may be sent
  690. // via a single instance of this class.
  691. $this->update_url = $this->constructUrl(self::UPDATE_SERVLET, array('wt' => 'json'));
  692. }
  693. $options['data'] = $rawPost;
  694. if ($timeout) {
  695. $options['timeout'] = $timeout;
  696. }
  697. return $this->sendRawPost($this->update_url, $options);
  698. }
  699. /**
  700. * {@inheritdoc}
  701. */
  702. public function addDocuments(array $documents, $overwrite = NULL, $commitWithin = NULL) {
  703. $attr = '';
  704. if (isset($overwrite)) {
  705. $attr .= ' overwrite="' . ($overwrite ? 'true"' : 'false"');
  706. }
  707. if (isset($commitWithin)) {
  708. $attr .= ' commitWithin="' . ((int) $commitWithin) . '"';
  709. }
  710. $rawPost = "<add$attr>";
  711. foreach ($documents as $document) {
  712. if (is_object($document) && ($document instanceof SearchApiSolrDocument)) {
  713. $rawPost .= $document->toXml();
  714. }
  715. }
  716. $rawPost .= '</add>';
  717. return $this->update($rawPost);
  718. }
  719. /**
  720. * {@inheritdoc}
  721. */
  722. public function commit($waitSearcher = TRUE, $timeout = 3600) {
  723. return $this->optimizeOrCommit('commit', $waitSearcher, $timeout);
  724. }
  725. /**
  726. * {@inheritdoc}
  727. */
  728. public function deleteById($id, $timeout = 3600) {
  729. return $this->deleteByMultipleIds(array($id), $timeout);
  730. }
  731. /**
  732. * {@inheritdoc}
  733. */
  734. public function deleteByMultipleIds(array $ids, $timeout = 3600) {
  735. $rawPost = '<delete>';
  736. foreach ($ids as $id) {
  737. $rawPost .= '<id>' . htmlspecialchars($id, ENT_NOQUOTES, 'UTF-8') . '</id>';
  738. }
  739. $rawPost .= '</delete>';
  740. return $this->update($rawPost, $timeout);
  741. }
  742. /**
  743. * {@inheritdoc}
  744. */
  745. public function deleteByQuery($rawQuery, $timeout = 3600) {
  746. $rawPost = '<delete><query>' . htmlspecialchars($rawQuery, ENT_NOQUOTES, 'UTF-8') . '</query></delete>';
  747. return $this->update($rawPost, $timeout);
  748. }
  749. /**
  750. * {@inheritdoc}
  751. */
  752. public function optimize($waitSearcher = TRUE, $timeout = 3600) {
  753. return $this->optimizeOrCommit('optimize', $waitSearcher, $timeout);
  754. }
  755. /**
  756. * Sends a commit or optimize command to the Solr server.
  757. *
  758. * Will be synchronous unless $waitSearcher is set to FALSE.
  759. *
  760. * @param string $type
  761. * Either "commit" or "optimize".
  762. * @param bool $waitSearcher
  763. * (optional) Wait until a new searcher is opened and registered as the main
  764. * query searcher, making the changes visible. Defaults to true.
  765. * @param int $timeout
  766. * Seconds to wait until timing out with an exception. Defaults to an hour.
  767. *
  768. * @return object
  769. * A response object.
  770. *
  771. * @throws SearchApiException
  772. * If an error occurs during the service call.
  773. */
  774. protected function optimizeOrCommit($type, $waitSearcher = TRUE, $timeout = 3600) {
  775. $waitSearcher = $waitSearcher ? '' : ' waitSearcher="false"';
  776. if ($this->getSolrVersion() <= 3) {
  777. $rawPost = "<$type$waitSearcher />";
  778. }
  779. else {
  780. $softCommit = ($this->soft_commit) ? ' softCommit="true"' : '';
  781. $rawPost = "<$type$waitSearcher$softCommit />";
  782. }
  783. $response = $this->update($rawPost, $timeout);
  784. $this->clearCache();
  785. return $response;
  786. }
  787. /**
  788. * Generates an URL-encoded query string.
  789. *
  790. * Works like PHP's built in http_build_query() (or drupal_http_build_query())
  791. * but uses rawurlencode() and no [] for repeated params, to be compatible
  792. * with the Java-based servers Solr runs on.
  793. *
  794. *
  795. * @param array $query
  796. * The query parameters which should be set.
  797. * @param string $parent
  798. * Internal use only.
  799. *
  800. * @return string
  801. * A query string to append (after "?") to a URL.
  802. */
  803. protected function httpBuildQuery(array $query, $parent = '') {
  804. $params = array();
  805. foreach ($query as $key => $value) {
  806. $key = ($parent ? $parent : rawurlencode($key));
  807. // Recurse into children.
  808. if (is_array($value)) {
  809. $value = $this->httpBuildQuery($value, $key);
  810. if ($value) {
  811. $params[] = $value;
  812. }
  813. }
  814. // If a query parameter value is NULL, only append its key.
  815. elseif (!isset($value)) {
  816. $params[] = $key;
  817. }
  818. else {
  819. $params[] = $key . '=' . rawurlencode($value);
  820. }
  821. }
  822. return implode('&', $params);
  823. }
  824. /**
  825. * {@inheritdoc}
  826. */
  827. public function search($query = NULL, array $params = array(), $method = 'GET') {
  828. // Always use JSON. See
  829. // http://code.google.com/p/solr-php-client/issues/detail?id=6#c1 for
  830. // reasoning.
  831. $params['wt'] = 'json';
  832. // Additional default params.
  833. $params += array(
  834. 'json.nl' => self::NAMED_LIST_FORMAT,
  835. );
  836. if (isset($query)) {
  837. $params['q'] = $query;
  838. }
  839. // Carry out some performance improvements when no search keys are given.
  840. if (!isset($params['q']) || !strlen($params['q'])) {
  841. // Without search keys, the qf parameter is useless. We also remove empty
  842. // search keys here. (With our normal service class, empty keys won't be
  843. // set, but another module using this connection class might do that.)
  844. unset($params['q'], $params['qf']);
  845. }
  846. // Build the HTTP query string. We have our own method for that since PHP's
  847. // built-in http_build_query() doesn't give us the format Solr wants.
  848. $queryString = $this->httpBuildQuery($params);
  849. if (!empty($this->options['log_query'])) {
  850. watchdog('search_api_solr', 'Query: @query', array('@query' => $queryString), WATCHDOG_DEBUG);
  851. }
  852. if ($method == 'GET' || $method == 'AUTO') {
  853. $searchUrl = $this->constructUrl(self::SEARCH_SERVLET, array(), $queryString);
  854. if ($method == 'GET' || strlen($searchUrl) <= variable_get('search_api_solr_http_get_max_length', 4000)) {
  855. $response = $this->sendRawGet($searchUrl);
  856. if (!empty($this->options['log_response'])) {
  857. $this->logResponse($response);
  858. }
  859. return $response;
  860. }
  861. }
  862. // Method is POST, or AUTO with a long query
  863. $searchUrl = $this->constructUrl(self::SEARCH_SERVLET);
  864. $options['data'] = $queryString;
  865. $options['headers']['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8';
  866. $response = $this->sendRawPost($searchUrl, $options);
  867. if (!empty($this->options['log_response'])) {
  868. $this->logResponse($response);
  869. }
  870. return $response;
  871. }
  872. /**
  873. * Logs a Solr response object.
  874. *
  875. * @param object $response
  876. * The response received from Solr.
  877. */
  878. protected function logResponse($response) {
  879. $data = $response->code . ' ' . $response->status_message . "\n" . print_r($response->response, TRUE);
  880. watchdog('search_api_solr', 'Response: <div style="white-space: pre-wrap;">@response</div>', array('@response' => $data), WATCHDOG_DEBUG);
  881. if (!empty($response->facet_counts)) {
  882. watchdog('search_api_solr', 'Facets: <div style="white-space: pre-wrap;">@facets</div>', array('@facets' => print_r($response->facet_counts, TRUE)), WATCHDOG_DEBUG);
  883. }
  884. }
  885. }