solr_connection.inc 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961
  1. <?php
  2. /**
  3. * Copyright (c) 2007-2009, Conduit Internet Technologies, Inc.
  4. * All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions are met:
  8. *
  9. * - Redistributions of source code must retain the above copyright notice,
  10. * this list of conditions and the following disclaimer.
  11. * - Redistributions in binary form must reproduce the above copyright
  12. * notice, this list of conditions and the following disclaimer in the
  13. * documentation and/or other materials provided with the distribution.
  14. * - Neither the name of Conduit Internet Technologies, Inc. nor the names of
  15. * its contributors may be used to endorse or promote products derived from
  16. * this software without specific prior written permission.
  17. *
  18. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  19. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  20. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  21. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  22. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  25. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  26. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  27. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  28. * POSSIBILITY OF SUCH DAMAGE.
  29. *
  30. * @copyright Copyright 2007-2009 Conduit Internet Technologies, Inc. (http://conduit-it.com)
  31. * @license New BSD (http://solr-php-client.googlecode.com/svn/trunk/COPYING)
  32. * @version $Id: Service.php 22 2009-11-09 22:46:54Z donovan.jimenez $
  33. *
  34. * @package Apache
  35. * @subpackage Solr
  36. * @author Donovan Jimenez <djimenez@conduit-it.com>
  37. */
  38. /**
  39. * Additional code Copyright (c) 2008-2011 by Robert Douglass, James McKinney,
  40. * Jacob Singh, Alejandro Garza, Peter Wolanin, and additional contributors.
  41. *
  42. * This program is free software; you can redistribute it and/or modify
  43. * it under the terms of the GNU General Public License as published by
  44. * the Free Software Foundation; either version 2 of the License, or (at
  45. * your option) any later version.
  46. *
  47. * This program is distributed in the hope that it will be useful, but
  48. * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  49. * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
  50. * for more details.
  51. *
  52. * You should have received a copy of the GNU General Public License
  53. * along with this program as the file LICENSE.txt; if not, please see
  54. * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt.
  55. */
  56. /**
  57. * Represents a Solr server resource.
  58. *
  59. * Contains methods for pinging, adding, deleting, committing, optimizing and
  60. * searching.
  61. */
  62. class SearchApiSolrConnection implements SearchApiSolrConnectionInterface {
  63. /**
  64. * Defines how NamedLists should be formatted in the output.
  65. *
  66. * This specifically affects facet counts. Valid values are 'map' (default) or
  67. * 'flat'.
  68. */
  69. const NAMED_LIST_FORMAT = 'map';
  70. /**
  71. * Path to the ping servlet.
  72. */
  73. const PING_SERVLET = 'admin/ping';
  74. /**
  75. * Path to the update servlet.
  76. */
  77. const UPDATE_SERVLET = 'update';
  78. /**
  79. * Path to the search servlet.
  80. */
  81. const SEARCH_SERVLET = 'select';
  82. /**
  83. * Path to the luke servlet.
  84. */
  85. const LUKE_SERVLET = 'admin/luke';
  86. /**
  87. * Path to the system servlet.
  88. */
  89. const SYSTEM_SERVLET = 'admin/system';
  90. /**
  91. * Path to the stats servlet.
  92. */
  93. const STATS_SERVLET = 'admin/stats.jsp';
  94. /**
  95. * Path to the stats servlet for Solr 4.x servers.
  96. */
  97. const STATS_SERVLET_4 = 'admin/mbeans?wt=xml&stats=true';
  98. /**
  99. * Path to the file servlet.
  100. */
  101. const FILE_SERVLET = 'admin/file';
  102. /**
  103. * The options passed when creating this connection.
  104. *
  105. * @var array
  106. */
  107. protected $options;
  108. /**
  109. * The Solr server's URL.
  110. *
  111. * @var string
  112. */
  113. protected $base_url;
  114. /**
  115. * Cached URL to the update servlet.
  116. *
  117. * @var string
  118. */
  119. protected $update_url;
  120. /**
  121. * HTTP Basic Authentication header to set for requests to the Solr server.
  122. *
  123. * @var string
  124. */
  125. protected $http_auth;
  126. /**
  127. * The stream context to use for requests to the Solr server.
  128. *
  129. * Defaults to NULL (= pass no context at all).
  130. *
  131. * @var string
  132. */
  133. protected $stream_context;
  134. /**
  135. * Cache for the metadata from admin/luke.
  136. *
  137. * Contains an array of response objects, keyed by the number of "top terms".
  138. *
  139. * @var array
  140. *
  141. * @see getLuke()
  142. */
  143. protected $luke = array();
  144. /**
  145. * Cache for information about the Solr core.
  146. *
  147. * @var SimpleXMLElement
  148. *
  149. * @see getStats()
  150. */
  151. protected $stats;
  152. /**
  153. * Cache for system information.
  154. *
  155. * @var array
  156. *
  157. * @see getSystemInfo()
  158. */
  159. protected $system_info;
  160. /**
  161. * Flag that denotes whether to use soft commits for Solr 4.x.
  162. *
  163. * Defaults to TRUE.
  164. *
  165. * @var bool
  166. */
  167. protected $soft_commit = TRUE;
  168. /**
  169. * Implements SearchApiSolrConnectionInterface::__construct().
  170. *
  171. * Valid options include:
  172. * - scheme: Scheme of the base URL of the Solr server. Most probably "http"
  173. * or "https". Defaults to "http".
  174. * - host: The host name (or IP) of the Solr server. Defaults to
  175. * "localhost".
  176. * - port: The port of the Solr server. Defaults to 8983.
  177. * - path: The base path to the Solr server. Defaults to "/solr/".
  178. * - http_user: If both this and "http_pass" are set, will use this
  179. * information to add basic HTTP authentication to all requests to the
  180. * Solr server. Not set by default.
  181. * - http_pass: See "http_user".
  182. */
  183. public function __construct(array $options) {
  184. $options += array(
  185. 'scheme' => 'http',
  186. 'host' => 'localhost',
  187. 'port' => 8983,
  188. 'path' => 'solr',
  189. 'http_user' => NULL,
  190. 'http_pass' => NULL,
  191. );
  192. $this->options = $options;
  193. $path = '/' . trim($options['path'], '/') . '/';
  194. $this->base_url = $options['scheme'] . '://' . $options['host'] . ':' . $options['port'] . $path;
  195. // Set HTTP Basic Authentication parameter, if login data was set.
  196. if (strlen($options['http_user']) && strlen($options['http_pass'])) {
  197. $this->http_auth = 'Basic ' . base64_encode($options['http_user'] . ':' . $options['http_pass']);
  198. }
  199. }
  200. /**
  201. * {@inheritdoc}
  202. */
  203. public function ping($timeout = 2) {
  204. $start = microtime(TRUE);
  205. if ($timeout <= 0.0) {
  206. $timeout = -1;
  207. }
  208. $pingUrl = $this->constructUrl(self::PING_SERVLET);
  209. // Attempt a HEAD request to the Solr ping url.
  210. $options = array(
  211. 'method' => 'HEAD',
  212. 'timeout' => $timeout,
  213. );
  214. $response = $this->makeHttpRequest($pingUrl, $options);
  215. if ($response->code == 200) {
  216. // Add 1 µs to the ping time so we never return 0.
  217. return (microtime(TRUE) - $start) + 1E-6;
  218. }
  219. else {
  220. return FALSE;
  221. }
  222. }
  223. /**
  224. * {@inheritdoc}
  225. */
  226. public function setSoftCommit($soft_commit) {
  227. $this->soft_commit = (bool) $soft_commit;
  228. }
  229. /**
  230. * {@inheritdoc}
  231. */
  232. public function getSoftCommit() {
  233. return $this->soft_commit;
  234. }
  235. /**
  236. * {@inheritdoc}
  237. */
  238. public function setStreamContext($stream_context) {
  239. $this->stream_context = $stream_context;
  240. }
  241. /**
  242. * {@inheritdoc}
  243. */
  244. public function getStreamContext() {
  245. return $this->stream_context;
  246. }
  247. /**
  248. * Computes the cache ID to use for this connection.
  249. *
  250. * @param $suffix
  251. * (optional) A suffix to append to the string to make it unique.
  252. *
  253. * @return string|null
  254. * The cache ID to use for this connection and usage; or NULL if no caching
  255. * should take place.
  256. */
  257. protected function getCacheId($suffix = '') {
  258. if (!empty($this->options['server'])) {
  259. $cid = $this->options['server'];
  260. return $suffix ? "$cid:$suffix" : $cid;
  261. }
  262. }
  263. /**
  264. * Call the /admin/system servlet to retrieve system information.
  265. *
  266. * Stores the retrieved information in $system_info.
  267. *
  268. * @see getSystemInfo()
  269. */
  270. protected function setSystemInfo() {
  271. $cid = $this->getCacheId(__FUNCTION__);
  272. if ($cid) {
  273. $cache = cache_get($cid, 'cache_search_api_solr');
  274. if ($cache) {
  275. $this->system_info = json_decode($cache->data);
  276. }
  277. }
  278. // Second pass to populate the cache if necessary.
  279. if (empty($this->system_info)) {
  280. $url = $this->constructUrl(self::SYSTEM_SERVLET, array('wt' => 'json'));
  281. $response = $this->sendRawGet($url);
  282. $this->system_info = json_decode($response->data);
  283. if ($cid) {
  284. cache_set($cid, $response->data, 'cache_search_api_solr');
  285. }
  286. }
  287. }
  288. /**
  289. * Implements SearchApiSolrConnectionInterface::getSystemInfo().
  290. */
  291. public function getSystemInfo() {
  292. if (!isset($this->system_info)) {
  293. $this->setSystemInfo();
  294. }
  295. return $this->system_info;
  296. }
  297. /**
  298. * Sets $this->luke with the metadata about the index from admin/luke.
  299. *
  300. * @param int $num_terms
  301. * (optional) The number of "top terms" to return.
  302. */
  303. protected function setLuke($num_terms = 0) {
  304. if (empty($this->luke[$num_terms])) {
  305. $cid = $this->getCacheId(__FUNCTION__ . ":$num_terms");
  306. if ($cid) {
  307. $cache = cache_get($cid, 'cache_search_api_solr');
  308. if (isset($cache->data)) {
  309. $this->luke = $cache->data;
  310. }
  311. }
  312. // Second pass to populate the cache if necessary.
  313. if (empty($this->luke[$num_terms])) {
  314. $params = array(
  315. 'numTerms' => "$num_terms",
  316. 'wt' => 'json',
  317. 'json.nl' => self::NAMED_LIST_FORMAT,
  318. );
  319. $url = $this->constructUrl(self::LUKE_SERVLET, $params);
  320. $this->luke[$num_terms] = $this->sendRawGet($url);
  321. if ($cid) {
  322. cache_set($cid, $this->luke, 'cache_search_api_solr');
  323. }
  324. }
  325. }
  326. }
  327. /**
  328. * {@inheritdoc}
  329. */
  330. public function getFields($num_terms = 0) {
  331. $fields = array();
  332. foreach ($this->getLuke($num_terms)->fields as $name => $info) {
  333. $fields[$name] = new SearchApiSolrField($info);
  334. }
  335. return $fields;
  336. }
  337. /**
  338. * {@inheritdoc}
  339. */
  340. public function getLuke($num_terms = 0) {
  341. if (!isset($this->luke[$num_terms])) {
  342. $this->setLuke($num_terms);
  343. }
  344. return $this->luke[$num_terms];
  345. }
  346. /**
  347. * {@inheritdoc}
  348. */
  349. public function getSolrVersion() {
  350. // Allow for overrides by the user.
  351. if (!empty($this->options['solr_version'])) {
  352. return $this->options['solr_version'];
  353. }
  354. $system_info = $this->getSystemInfo();
  355. // Get our solr version number
  356. if (isset($system_info->lucene->{'solr-spec-version'})) {
  357. return (int) $system_info->lucene->{'solr-spec-version'};
  358. }
  359. return 0;
  360. }
  361. /**
  362. * Stores information about the Solr core in $this->stats.
  363. */
  364. protected function setStats() {
  365. $data = $this->getLuke();
  366. $solr_version = $this->getSolrVersion();
  367. // Only try to get stats if we have connected to the index.
  368. if (empty($this->stats) && isset($data->index->numDocs)) {
  369. $cid = $this->getCacheId(__FUNCTION__);
  370. if ($cid) {
  371. $cache = cache_get($cid, 'cache_search_api_solr');
  372. if (isset($cache->data)) {
  373. $this->stats = simplexml_load_string($cache->data);
  374. }
  375. }
  376. // Second pass to populate the cache if necessary.
  377. if (empty($this->stats)) {
  378. if ($solr_version >= 4) {
  379. $url = $this->constructUrl(self::STATS_SERVLET_4);
  380. }
  381. else {
  382. $url = $this->constructUrl(self::STATS_SERVLET);
  383. }
  384. $response = $this->sendRawGet($url);
  385. $this->stats = simplexml_load_string($response->data);
  386. if ($cid) {
  387. cache_set($cid, $response->data, 'cache_search_api_solr');
  388. }
  389. }
  390. }
  391. }
  392. /**
  393. * {@inheritdoc}
  394. */
  395. public function getStats() {
  396. if (!isset($this->stats)) {
  397. $this->setStats();
  398. }
  399. return $this->stats;
  400. }
  401. /**
  402. * {@inheritdoc}
  403. */
  404. public function getStatsSummary() {
  405. $stats = $this->getStats();
  406. $solr_version = $this->getSolrVersion();
  407. $summary = array(
  408. '@pending_docs' => '',
  409. '@autocommit_time_seconds' => '',
  410. '@autocommit_time' => '',
  411. '@deletes_by_id' => '',
  412. '@deletes_by_query' => '',
  413. '@deletes_total' => '',
  414. '@schema_version' => '',
  415. '@core_name' => '',
  416. '@index_size' => '',
  417. );
  418. if (!empty($stats)) {
  419. if ($solr_version <= 3) {
  420. $docs_pending_xpath = $stats->xpath('//stat[@name="docsPending"]');
  421. $summary['@pending_docs'] = (int) trim(current($docs_pending_xpath));
  422. $max_time_xpath = $stats->xpath('//stat[@name="autocommit maxTime"]');
  423. $max_time = (int) trim(current($max_time_xpath));
  424. // Convert to seconds.
  425. $summary['@autocommit_time_seconds'] = $max_time / 1000;
  426. $summary['@autocommit_time'] = format_interval($max_time / 1000);
  427. $deletes_id_xpath = $stats->xpath('//stat[@name="deletesById"]');
  428. $summary['@deletes_by_id'] = (int) trim(current($deletes_id_xpath));
  429. $deletes_query_xpath = $stats->xpath('//stat[@name="deletesByQuery"]');
  430. $summary['@deletes_by_query'] = (int) trim(current($deletes_query_xpath));
  431. $summary['@deletes_total'] = $summary['@deletes_by_id'] + $summary['@deletes_by_query'];
  432. $schema = $stats->xpath('/solr/schema[1]');
  433. $summary['@schema_version'] = trim($schema[0]);
  434. $core = $stats->xpath('/solr/core[1]');
  435. $summary['@core_name'] = trim($core[0]);
  436. $size_xpath = $stats->xpath('//stat[@name="indexSize"]');
  437. $summary['@index_size'] = trim(current($size_xpath));
  438. }
  439. else {
  440. $system_info = $this->getSystemInfo();
  441. $docs_pending_xpath = $stats->xpath('//lst["stats"]/long[@name="docsPending"]');
  442. $summary['@pending_docs'] = (int) trim(current($docs_pending_xpath));
  443. $max_time_xpath = $stats->xpath('//lst["stats"]/str[@name="autocommit maxTime"]');
  444. $max_time = (int) trim(current($max_time_xpath));
  445. // Convert to seconds.
  446. $summary['@autocommit_time_seconds'] = $max_time / 1000;
  447. $summary['@autocommit_time'] = format_interval($max_time / 1000);
  448. $deletes_id_xpath = $stats->xpath('//lst["stats"]/long[@name="deletesById"]');
  449. $summary['@deletes_by_id'] = (int) trim(current($deletes_id_xpath));
  450. $deletes_query_xpath = $stats->xpath('//lst["stats"]/long[@name="deletesByQuery"]');
  451. $summary['@deletes_by_query'] = (int) trim(current($deletes_query_xpath));
  452. $summary['@deletes_total'] = $summary['@deletes_by_id'] + $summary['@deletes_by_query'];
  453. $schema = $system_info->core->schema;
  454. $summary['@schema_version'] = $schema;
  455. $core = $stats->xpath('//lst["core"]/str[@name="coreName"]');
  456. $summary['@core_name'] = trim(current($core));
  457. $size_xpath = $stats->xpath('//lst["core"]/str[@name="indexSize"]');
  458. $summary['@index_size'] = trim(current($size_xpath));
  459. }
  460. }
  461. return $summary;
  462. }
  463. /**
  464. * {@inheritdoc}
  465. */
  466. public function clearCache() {
  467. if ($cid = $this->getCacheId()) {
  468. cache_clear_all($cid, 'cache_search_api_solr', TRUE);
  469. cache_clear_all($cid, 'cache_search_api_solr', TRUE);
  470. }
  471. $this->luke = array();
  472. $this->stats = NULL;
  473. $this->system_info = NULL;
  474. }
  475. /**
  476. * Checks the reponse code and throws an exception if it's not 200.
  477. *
  478. * @param object $response
  479. * A response object.
  480. *
  481. * @return object
  482. * The passed response object.
  483. *
  484. * @throws SearchApiException
  485. * If the object's HTTP status is not 200.
  486. */
  487. protected function checkResponse($response) {
  488. $code = (int) $response->code;
  489. if ($code != 200) {
  490. if ($code >= 400 && $code != 403 && $code != 404) {
  491. // Add details, like Solr's exception message.
  492. $response->status_message .= $response->data;
  493. }
  494. throw new SearchApiException('"' . $code . '" Status: ' . $response->status_message);
  495. }
  496. return $response;
  497. }
  498. /**
  499. * {@inheritdoc}
  500. */
  501. public function makeServletRequest($servlet, array $params = array(), array $options = array()) {
  502. // Add default params.
  503. $params += array(
  504. 'wt' => 'json',
  505. 'json.nl' => self::NAMED_LIST_FORMAT,
  506. );
  507. $url = $this->constructUrl($servlet, $params);
  508. $response = $this->makeHttpRequest($url, $options);
  509. return $this->checkResponse($response);
  510. }
  511. /**
  512. * Sends a GET request to the Solr server.
  513. *
  514. * @param string $url
  515. * The URL to which the request should be sent.
  516. * @param array $options
  517. * Additional options for the request, as recognized by
  518. * drupal_http_request().
  519. *
  520. * @return object
  521. * The HTTP response, as returned by drupal_http_request().
  522. *
  523. * @throws SearchApiException
  524. * If an error occurs, either during sending or on the server side.
  525. */
  526. protected function sendRawGet($url, array $options = array()) {
  527. $options['method'] = 'GET';
  528. $response = $this->makeHttpRequest($url, $options);
  529. return $this->checkResponse($response);
  530. }
  531. /**
  532. * Sends a PUT request to the Solr server.
  533. *
  534. * @param string $url
  535. * The URL to which the request should be sent.
  536. * @param array $options
  537. * Additional options for the request, as recognized by
  538. * drupal_http_request().
  539. *
  540. * @return object
  541. * The HTTP response, as returned by drupal_http_request().
  542. *
  543. * @throws SearchApiException
  544. * If an error occurs, either during sending or on the server side.
  545. */
  546. protected function sendRawPost($url, array $options = array()) {
  547. $options['method'] = 'POST';
  548. // Normally we use POST to send XML documents.
  549. if (empty($options['headers']['Content-Type'])) {
  550. $options['headers']['Content-Type'] = 'text/xml; charset=UTF-8';
  551. }
  552. $response = $this->makeHttpRequest($url, $options);
  553. return $this->checkResponse($response);
  554. }
  555. /**
  556. * Sends an HTTP request to Solr.
  557. *
  558. * This is just a wrapper around drupal_http_request().
  559. *
  560. * @param string $url
  561. * The URL to which the request should be sent.
  562. * @param array $options
  563. * Additional options for the request, as recognized by
  564. * drupal_http_request().
  565. *
  566. * @return object
  567. * The HTTP response, as returned by drupal_http_request().
  568. */
  569. protected function makeHttpRequest($url, array $options = array()) {
  570. if (empty($options['method']) || $options['method'] == 'GET' || $options['method'] == 'HEAD') {
  571. // Make sure we are not sending a request body.
  572. $options['data'] = NULL;
  573. }
  574. if ($this->http_auth) {
  575. $options['headers']['Authorization'] = $this->http_auth;
  576. }
  577. if ($this->stream_context) {
  578. $options['context'] = $this->stream_context;
  579. }
  580. $result = drupal_http_request($url, $options);
  581. $result->status_message = isset($result->status_message) ? $result->status_message : '';
  582. if (!isset($result->code) || $result->code < 0) {
  583. $result->code = 0;
  584. $result->status_message = 'Request failed';
  585. $result->protocol = 'HTTP/1.0';
  586. }
  587. // Additional information may be in the error property.
  588. if (isset($result->error)) {
  589. $result->status_message .= ': ' . check_plain($result->error);
  590. }
  591. if (!isset($result->data)) {
  592. $result->data = '';
  593. $result->response = NULL;
  594. }
  595. else {
  596. $response = json_decode($result->data);
  597. if (is_object($response)) {
  598. foreach ($response as $key => $value) {
  599. $result->$key = $value;
  600. }
  601. }
  602. }
  603. return $result;
  604. }
  605. /**
  606. * {@inheritdoc}
  607. */
  608. public static function escape($value, $version = 0) {
  609. $replacements = array();
  610. $specials = array('+', '-', '&&', '||', '!', '(', ')', '{', '}', '[', ']', '^', '"', '~', '*', '?', ':', "\\", 'AND', 'OR', 'NOT');
  611. // Solr 4.x introduces regular expressions, making the slash also a special
  612. // character.
  613. if ($version >= 4) {
  614. $specials[] = '/';
  615. }
  616. foreach ($specials as $special) {
  617. $replacements[$special] = "\\$special";
  618. }
  619. return strtr($value, $replacements);
  620. }
  621. /**
  622. * {@inheritdoc}
  623. */
  624. public static function escapePhrase($value) {
  625. $replacements['"'] = '\"';
  626. $replacements["\\"] = "\\\\";
  627. return strtr($value, $replacements);
  628. }
  629. /**
  630. * {@inheritdoc}
  631. */
  632. public static function phrase($value) {
  633. return '"' . self::escapePhrase($value) . '"';
  634. }
  635. /**
  636. * {@inheritdoc}
  637. */
  638. public static function escapeFieldName($value) {
  639. $value = str_replace(':', '\:', $value);
  640. return $value;
  641. }
  642. /**
  643. * Returns the HTTP URL for a certain servlet on the Solr server.
  644. *
  645. * @param $servlet
  646. * A string path to a Solr request handler.
  647. * @param array $params
  648. * Additional GET parameters to append to the URL.
  649. * @param $added_query_string
  650. * Additional query string to append to the URL.
  651. *
  652. * @return string
  653. * The complete URL.
  654. */
  655. protected function constructUrl($servlet, array $params = array(), $added_query_string = NULL) {
  656. // PHP's built in http_build_query() doesn't give us the format Solr wants.
  657. $query_string = $this->httpBuildQuery($params);
  658. if ($query_string) {
  659. $query_string = '?' . $query_string;
  660. if ($added_query_string) {
  661. $query_string = $query_string . '&' . $added_query_string;
  662. }
  663. }
  664. elseif ($added_query_string) {
  665. $query_string = '?' . $added_query_string;
  666. }
  667. return $this->base_url . $servlet . $query_string;
  668. }
  669. /**
  670. * {@inheritdoc}
  671. */
  672. public function getBaseUrl() {
  673. return $this->base_url;
  674. }
  675. /**
  676. * {@inheritdoc}
  677. */
  678. public function setBaseUrl($url) {
  679. $this->base_url = $url;
  680. $this->update_url = NULL;
  681. }
  682. /**
  683. * {@inheritdoc}
  684. */
  685. public function update($rawPost, $timeout = 3600) {
  686. if (empty($this->update_url)) {
  687. // Store the URL in an instance variable since many updates may be sent
  688. // via a single instance of this class.
  689. $this->update_url = $this->constructUrl(self::UPDATE_SERVLET, array('wt' => 'json'));
  690. }
  691. $options['data'] = $rawPost;
  692. if ($timeout) {
  693. $options['timeout'] = $timeout;
  694. }
  695. return $this->sendRawPost($this->update_url, $options);
  696. }
  697. /**
  698. * {@inheritdoc}
  699. */
  700. public function addDocuments(array $documents, $overwrite = NULL, $commitWithin = NULL) {
  701. $attr = '';
  702. if (isset($overwrite)) {
  703. $attr .= ' overwrite="' . ($overwrite ? 'true"' : 'false"');
  704. }
  705. if (isset($commitWithin)) {
  706. $attr .= ' commitWithin="' . ((int) $commitWithin) . '"';
  707. }
  708. $rawPost = "<add$attr>";
  709. foreach ($documents as $document) {
  710. if (is_object($document) && ($document instanceof SearchApiSolrDocument)) {
  711. $rawPost .= $document->toXml();
  712. }
  713. }
  714. $rawPost .= '</add>';
  715. return $this->update($rawPost);
  716. }
  717. /**
  718. * {@inheritdoc}
  719. */
  720. public function commit($waitSearcher = TRUE, $timeout = 3600) {
  721. return $this->optimizeOrCommit('commit', $waitSearcher, $timeout);
  722. }
  723. /**
  724. * {@inheritdoc}
  725. */
  726. public function deleteById($id, $timeout = 3600) {
  727. return $this->deleteByMultipleIds(array($id), $timeout);
  728. }
  729. /**
  730. * {@inheritdoc}
  731. */
  732. public function deleteByMultipleIds(array $ids, $timeout = 3600) {
  733. $rawPost = '<delete>';
  734. foreach ($ids as $id) {
  735. $rawPost .= '<id>' . htmlspecialchars($id, ENT_NOQUOTES, 'UTF-8') . '</id>';
  736. }
  737. $rawPost .= '</delete>';
  738. return $this->update($rawPost, $timeout);
  739. }
  740. /**
  741. * {@inheritdoc}
  742. */
  743. public function deleteByQuery($rawQuery, $timeout = 3600) {
  744. $rawPost = '<delete><query>' . htmlspecialchars($rawQuery, ENT_NOQUOTES, 'UTF-8') . '</query></delete>';
  745. return $this->update($rawPost, $timeout);
  746. }
  747. /**
  748. * {@inheritdoc}
  749. */
  750. public function optimize($waitSearcher = TRUE, $timeout = 3600) {
  751. return $this->optimizeOrCommit('optimize', $waitSearcher, $timeout);
  752. }
  753. /**
  754. * Sends a commit or optimize command to the Solr server.
  755. *
  756. * Will be synchronous unless $waitSearcher is set to FALSE.
  757. *
  758. * @param string $type
  759. * Either "commit" or "optimize".
  760. * @param bool $waitSearcher
  761. * (optional) Wait until a new searcher is opened and registered as the main
  762. * query searcher, making the changes visible. Defaults to true.
  763. * @param int $timeout
  764. * Seconds to wait until timing out with an exception. Defaults to an hour.
  765. *
  766. * @return object
  767. * A response object.
  768. *
  769. * @throws SearchApiException
  770. * If an error occurs during the service call.
  771. */
  772. protected function optimizeOrCommit($type, $waitSearcher = TRUE, $timeout = 3600) {
  773. $waitSearcher = $waitSearcher ? '' : ' waitSearcher="false"';
  774. if ($this->getSolrVersion() <= 3) {
  775. $rawPost = "<$type$waitSearcher />";
  776. }
  777. else {
  778. $softCommit = ($this->soft_commit) ? ' softCommit="true"' : '';
  779. $rawPost = "<$type$waitSearcher$softCommit />";
  780. }
  781. $response = $this->update($rawPost, $timeout);
  782. $this->clearCache();
  783. return $response;
  784. }
  785. /**
  786. * Generates an URL-encoded query string.
  787. *
  788. * Works like PHP's built in http_build_query() (or drupal_http_build_query())
  789. * but uses rawurlencode() and no [] for repeated params, to be compatible
  790. * with the Java-based servers Solr runs on.
  791. *
  792. *
  793. * @param array $query
  794. * The query parameters which should be set.
  795. * @param string $parent
  796. * Internal use only.
  797. *
  798. * @return string
  799. * A query string to append (after "?") to a URL.
  800. */
  801. protected function httpBuildQuery(array $query, $parent = '') {
  802. $params = array();
  803. foreach ($query as $key => $value) {
  804. $key = ($parent ? $parent : rawurlencode($key));
  805. // Recurse into children.
  806. if (is_array($value)) {
  807. $value = $this->httpBuildQuery($value, $key);
  808. if ($value) {
  809. $params[] = $value;
  810. }
  811. }
  812. // If a query parameter value is NULL, only append its key.
  813. elseif (!isset($value)) {
  814. $params[] = $key;
  815. }
  816. else {
  817. $params[] = $key . '=' . rawurlencode($value);
  818. }
  819. }
  820. return implode('&', $params);
  821. }
  822. /**
  823. * {@inheritdoc}
  824. */
  825. public function search($query = NULL, array $params = array(), $method = 'GET') {
  826. // Always use JSON. See
  827. // http://code.google.com/p/solr-php-client/issues/detail?id=6#c1 for
  828. // reasoning.
  829. $params['wt'] = 'json';
  830. // Additional default params.
  831. $params += array(
  832. 'json.nl' => self::NAMED_LIST_FORMAT,
  833. );
  834. if (isset($query)) {
  835. $params['q'] = $query;
  836. }
  837. // Carry out some performance improvements when no search keys are given.
  838. if (!isset($params['q']) || !strlen($params['q'])) {
  839. // Without search keys, the qf parameter is useless. We also remove empty
  840. // search keys here. (With our normal service class, empty keys won't be
  841. // set, but another module using this connection class might do that.)
  842. unset($params['q'], $params['qf']);
  843. }
  844. // Build the HTTP query string. We have our own method for that since PHP's
  845. // built-in http_build_query() doesn't give us the format Solr wants.
  846. $queryString = $this->httpBuildQuery($params);
  847. if ($method == 'GET' || $method == 'AUTO') {
  848. $searchUrl = $this->constructUrl(self::SEARCH_SERVLET, array(), $queryString);
  849. if ($method == 'GET' || strlen($searchUrl) <= variable_get('search_api_solr_http_get_max_length', 4000)) {
  850. return $this->sendRawGet($searchUrl);
  851. }
  852. }
  853. // Method is POST, or AUTO with a long query
  854. $searchUrl = $this->constructUrl(self::SEARCH_SERVLET);
  855. $options['data'] = $queryString;
  856. $options['headers']['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8';
  857. return $this->sendRawPost($searchUrl, $options);
  858. }
  859. }