solr_connection.inc 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905
  1. <?php
  2. /**
  3. * Copyright (c) 2007-2009, Conduit Internet Technologies, Inc.
  4. * All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions are met:
  8. *
  9. * - Redistributions of source code must retain the above copyright notice,
  10. * this list of conditions and the following disclaimer.
  11. * - Redistributions in binary form must reproduce the above copyright
  12. * notice, this list of conditions and the following disclaimer in the
  13. * documentation and/or other materials provided with the distribution.
  14. * - Neither the name of Conduit Internet Technologies, Inc. nor the names of
  15. * its contributors may be used to endorse or promote products derived from
  16. * this software without specific prior written permission.
  17. *
  18. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  19. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  20. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  21. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  22. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  25. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  26. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  27. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  28. * POSSIBILITY OF SUCH DAMAGE.
  29. *
  30. * @copyright Copyright 2007-2009 Conduit Internet Technologies, Inc. (http://conduit-it.com)
  31. * @license New BSD (http://solr-php-client.googlecode.com/svn/trunk/COPYING)
  32. * @version $Id: Service.php 22 2009-11-09 22:46:54Z donovan.jimenez $
  33. *
  34. * @package Apache
  35. * @subpackage Solr
  36. * @author Donovan Jimenez <djimenez@conduit-it.com>
  37. */
  38. /**
  39. * Additional code Copyright (c) 2008-2011 by Robert Douglass, James McKinney,
  40. * Jacob Singh, Alejandro Garza, Peter Wolanin, and additional contributors.
  41. *
  42. * This program is free software; you can redistribute it and/or modify
  43. * it under the terms of the GNU General Public License as published by
  44. * the Free Software Foundation; either version 2 of the License, or (at
  45. * your option) any later version.
  46. *
  47. * This program is distributed in the hope that it will be useful, but
  48. * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  49. * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
  50. * for more details.
  51. *
  52. * You should have received a copy of the GNU General Public License
  53. * along with this program as the file LICENSE.txt; if not, please see
  54. * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt.
  55. */
  56. /**
  57. * Represents a Solr server resource.
  58. *
  59. * Contains methods for pinging, adding, deleting, committing, optimizing and
  60. * searching.
  61. */
  62. class SearchApiSolrConnection implements SearchApiSolrConnectionInterface {
  63. /**
  64. * Defines how NamedLists should be formatted in the output.
  65. *
  66. * This specifically affects facet counts. Valid values are 'map' (default) or
  67. * 'flat'.
  68. */
  69. const NAMED_LIST_FORMAT = 'map';
  70. /**
  71. * Path to the ping servlet.
  72. */
  73. const PING_SERVLET = 'admin/ping';
  74. /**
  75. * Path to the update servlet.
  76. */
  77. const UPDATE_SERVLET = 'update';
  78. /**
  79. * Path to the search servlet.
  80. */
  81. const SEARCH_SERVLET = 'select';
  82. /**
  83. * Path to the luke servlet.
  84. */
  85. const LUKE_SERVLET = 'admin/luke';
  86. /**
  87. * Path to the system servlet.
  88. */
  89. const SYSTEM_SERVLET = 'admin/system';
  90. /**
  91. * Path to the stats servlet.
  92. */
  93. const STATS_SERVLET = 'admin/stats.jsp';
  94. /**
  95. * Path to the stats servlet for Solr 4.x servers.
  96. */
  97. const STATS_SERVLET_4 = 'admin/mbeans?wt=xml&stats=true';
  98. /**
  99. * Path to the file servlet.
  100. */
  101. const FILE_SERVLET = 'admin/file';
  102. /**
  103. * The options passed when creating this connection.
  104. *
  105. * @var array
  106. */
  107. protected $options;
  108. /**
  109. * The Solr server's URL.
  110. *
  111. * @var string
  112. */
  113. protected $base_url;
  114. /**
  115. * Cached URL to the update servlet.
  116. *
  117. * @var string
  118. */
  119. protected $update_url;
  120. /**
  121. * HTTP Basic Authentication header to set for requests to the Solr server.
  122. *
  123. * @var string
  124. */
  125. protected $http_auth;
  126. /**
  127. * The stream context to use for requests to the Solr server.
  128. *
  129. * Defaults to NULL (= pass no context at all).
  130. *
  131. * @var string
  132. */
  133. protected $stream_context;
  134. /**
  135. * Cache for the metadata from admin/luke.
  136. *
  137. * Contains an array of response objects, keyed by the number of "top terms".
  138. *
  139. * @var array
  140. *
  141. * @see getLuke()
  142. */
  143. protected $luke = array();
  144. /**
  145. * Cache for information about the Solr core.
  146. *
  147. * @var SimpleXMLElement
  148. *
  149. * @see getStats()
  150. */
  151. protected $stats;
  152. /**
  153. * Cache for system information.
  154. *
  155. * @var array
  156. *
  157. * @see getSystemInfo()
  158. */
  159. protected $system_info;
  160. /**
  161. * Flag that denotes whether to use soft commits for Solr 4.x.
  162. *
  163. * Defaults to FALSE.
  164. *
  165. * @var bool
  166. */
  167. protected $soft_commit = FALSE;
  168. /**
  169. * Implements SearchApiSolrConnectionInterface::__construct().
  170. *
  171. * Valid options include:
  172. * - scheme: Scheme of the base URL of the Solr server. Most probably "http"
  173. * or "https". Defaults to "http".
  174. * - host: The host name (or IP) of the Solr server. Defaults to
  175. * "localhost".
  176. * - port: The port of the Solr server. Defaults to 8983.
  177. * - path: The base path to the Solr server. Defaults to "/solr/".
  178. * - http_user: If both this and "http_pass" are set, will use this
  179. * information to add basic HTTP authentication to all requests to the
  180. * Solr server. Not set by default.
  181. * - http_pass: See "http_user".
  182. */
  183. public function __construct(array $options) {
  184. $options += array(
  185. 'scheme' => 'http',
  186. 'host' => 'localhost',
  187. 'port' => 8983,
  188. 'path' => 'solr',
  189. 'http_user' => NULL,
  190. 'http_pass' => NULL,
  191. );
  192. $this->options = $options;
  193. $path = '/' . trim($options['path'], '/') . '/';
  194. $this->base_url = $options['scheme'] . '://' . $options['host'] . ':' . $options['port'] . $path;
  195. // Set HTTP Basic Authentication parameter, if login data was set.
  196. if (strlen($options['http_user']) && strlen($options['http_pass'])) {
  197. $this->http_auth = 'Basic ' . base64_encode($options['http_user'] . ':' . $options['http_pass']);
  198. }
  199. }
  200. /**
  201. * Implements SearchApiSolrConnectionInterface::ping().
  202. */
  203. public function ping($timeout = 2) {
  204. $start = microtime(TRUE);
  205. if ($timeout <= 0.0) {
  206. $timeout = -1;
  207. }
  208. $pingUrl = $this->constructUrl(self::PING_SERVLET);
  209. // Attempt a HEAD request to the Solr ping url.
  210. $options = array(
  211. 'method' => 'HEAD',
  212. 'timeout' => $timeout,
  213. );
  214. $response = $this->makeHttpRequest($pingUrl, $options);
  215. if ($response->code == 200) {
  216. // Add 1 µs to the ping time so we never return 0.
  217. return (microtime(TRUE) - $start) + 1E-6;
  218. }
  219. else {
  220. return FALSE;
  221. }
  222. }
  223. /**
  224. * Implements SearchApiSolrConnectionInterface::setSoftCommit().
  225. */
  226. public function setSoftCommit($soft_commit) {
  227. $this->soft_commit = (bool) $soft_commit;
  228. }
  229. /**
  230. * Implements SearchApiSolrConnectionInterface::getSoftCommit().
  231. */
  232. public function getSoftCommit() {
  233. return $this->soft_commit;
  234. }
  235. /**
  236. * Implements SearchApiSolrConnectionInterface::setStreamContext().
  237. */
  238. public function setStreamContext($stream_context) {
  239. $this->stream_context = $stream_context;
  240. }
  241. /**
  242. * Implements SearchApiSolrConnectionInterface::getStreamContext().
  243. */
  244. public function getStreamContext() {
  245. return $this->stream_context;
  246. }
  247. /**
  248. * Computes the cache ID to use for this connection.
  249. *
  250. * @param $suffix
  251. * (optional) A suffix to append to the string to make it unique.
  252. *
  253. * @return string|null
  254. * The cache ID to use for this connection and usage; or NULL if no caching
  255. * should take place.
  256. */
  257. protected function getCacheId($suffix = '') {
  258. if (!empty($this->options['server'])) {
  259. $cid = $this->options['server'];
  260. return $suffix ? "$cid:$suffix" : $cid;
  261. }
  262. }
  263. /**
  264. * Call the /admin/system servlet to retrieve system information.
  265. *
  266. * Stores the retrieved information in $system_info.
  267. *
  268. * @see getSystemInfo()
  269. */
  270. protected function setSystemInfo() {
  271. $cid = $this->getCacheId(__FUNCTION__);
  272. if ($cid) {
  273. $cache = cache_get($cid, 'cache_search_api_solr');
  274. if ($cache) {
  275. $this->system_info = json_decode($cache->data);
  276. }
  277. }
  278. // Second pass to populate the cache if necessary.
  279. if (empty($this->system_info)) {
  280. $url = $this->constructUrl(self::SYSTEM_SERVLET, array('wt' => 'json'));
  281. $response = $this->sendRawGet($url);
  282. $this->system_info = json_decode($response->data);
  283. if ($cid) {
  284. cache_set($cid, $response->data, 'cache_search_api_solr');
  285. }
  286. }
  287. }
  288. /**
  289. * Implements SearchApiSolrConnectionInterface::getSystemInfo().
  290. */
  291. public function getSystemInfo() {
  292. if (!isset($this->system_info)) {
  293. $this->setSystemInfo();
  294. }
  295. return $this->system_info;
  296. }
  297. /**
  298. * Sets $this->luke with the metadata about the index from admin/luke.
  299. */
  300. protected function setLuke($num_terms = 0) {
  301. if (empty($this->luke[$num_terms])) {
  302. $cid = $this->getCacheId(__FUNCTION__ . ":$num_terms");
  303. if ($cid) {
  304. $cache = cache_get($cid, 'cache_search_api_solr');
  305. if (isset($cache->data)) {
  306. $this->luke = $cache->data;
  307. }
  308. }
  309. // Second pass to populate the cache if necessary.
  310. if (empty($this->luke[$num_terms])) {
  311. $params = array(
  312. 'numTerms' => "$num_terms",
  313. 'wt' => 'json',
  314. 'json.nl' => self::NAMED_LIST_FORMAT,
  315. );
  316. $url = $this->constructUrl(self::LUKE_SERVLET, $params);
  317. $this->luke[$num_terms] = $this->sendRawGet($url);
  318. if ($cid) {
  319. cache_set($cid, $this->luke, 'cache_search_api_solr');
  320. }
  321. }
  322. }
  323. }
  324. /**
  325. * Implements SearchApiSolrConnectionInterface::getFields().
  326. */
  327. public function getFields($num_terms = 0) {
  328. $fields = array();
  329. foreach ($this->getLuke($num_terms)->fields as $name => $info) {
  330. $fields[$name] = new SearchApiSolrField($info);
  331. }
  332. return $fields;
  333. }
  334. /**
  335. * Implements SearchApiSolrConnectionInterface::getLuke().
  336. */
  337. public function getLuke($num_terms = 0) {
  338. if (!isset($this->luke[$num_terms])) {
  339. $this->setLuke($num_terms);
  340. }
  341. return $this->luke[$num_terms];
  342. }
  343. /**
  344. * Implements SearchApiSolrConnectionInterface::getSolrVersion().
  345. */
  346. public function getSolrVersion() {
  347. $system_info = $this->getSystemInfo();
  348. // Get our solr version number
  349. if (isset($system_info->lucene->{'solr-spec-version'})) {
  350. return $system_info->lucene->{'solr-spec-version'}[0];
  351. }
  352. return 0;
  353. }
  354. /**
  355. * Stores information about the Solr core in $this->stats.
  356. */
  357. protected function setStats() {
  358. $data = $this->getLuke();
  359. $solr_version = $this->getSolrVersion();
  360. // Only try to get stats if we have connected to the index.
  361. if (empty($this->stats) && isset($data->index->numDocs)) {
  362. $cid = $this->getCacheId(__FUNCTION__);
  363. if ($cid) {
  364. $cache = cache_get($cid, 'cache_search_api_solr');
  365. if (isset($cache->data)) {
  366. $this->stats = simplexml_load_string($cache->data);
  367. }
  368. }
  369. // Second pass to populate the cache if necessary.
  370. if (empty($this->stats)) {
  371. if ($solr_version >= 4) {
  372. $url = $this->constructUrl(self::STATS_SERVLET_4);
  373. }
  374. else {
  375. $url = $this->constructUrl(self::STATS_SERVLET);
  376. }
  377. $response = $this->sendRawGet($url);
  378. $this->stats = simplexml_load_string($response->data);
  379. if ($cid) {
  380. cache_set($cid, $response->data, 'cache_search_api_solr');
  381. }
  382. }
  383. }
  384. }
  385. /**
  386. * Implements SearchApiSolrConnectionInterface::getStats().
  387. */
  388. public function getStats() {
  389. if (!isset($this->stats)) {
  390. $this->setStats();
  391. }
  392. return $this->stats;
  393. }
  394. /**
  395. * Implements SearchApiSolrConnectionInterface::getStatsSummary().
  396. */
  397. public function getStatsSummary() {
  398. $stats = $this->getStats();
  399. $solr_version = $this->getSolrVersion();
  400. $summary = array(
  401. '@pending_docs' => '',
  402. '@autocommit_time_seconds' => '',
  403. '@autocommit_time' => '',
  404. '@deletes_by_id' => '',
  405. '@deletes_by_query' => '',
  406. '@deletes_total' => '',
  407. '@schema_version' => '',
  408. '@core_name' => '',
  409. '@index_size' => '',
  410. );
  411. if (!empty($stats)) {
  412. if ($solr_version <= 3) {
  413. $docs_pending_xpath = $stats->xpath('//stat[@name="docsPending"]');
  414. $summary['@pending_docs'] = (int) trim(current($docs_pending_xpath));
  415. $max_time_xpath = $stats->xpath('//stat[@name="autocommit maxTime"]');
  416. $max_time = (int) trim(current($max_time_xpath));
  417. // Convert to seconds.
  418. $summary['@autocommit_time_seconds'] = $max_time / 1000;
  419. $summary['@autocommit_time'] = format_interval($max_time / 1000);
  420. $deletes_id_xpath = $stats->xpath('//stat[@name="deletesById"]');
  421. $summary['@deletes_by_id'] = (int) trim(current($deletes_id_xpath));
  422. $deletes_query_xpath = $stats->xpath('//stat[@name="deletesByQuery"]');
  423. $summary['@deletes_by_query'] = (int) trim(current($deletes_query_xpath));
  424. $summary['@deletes_total'] = $summary['@deletes_by_id'] + $summary['@deletes_by_query'];
  425. $schema = $stats->xpath('/solr/schema[1]');
  426. $summary['@schema_version'] = trim($schema[0]);
  427. $core = $stats->xpath('/solr/core[1]');
  428. $summary['@core_name'] = trim($core[0]);
  429. $size_xpath = $stats->xpath('//stat[@name="indexSize"]');
  430. $summary['@index_size'] = trim(current($size_xpath));
  431. }
  432. else {
  433. $system_info = $this->getSystemInfo();
  434. $docs_pending_xpath = $stats->xpath('//lst["stats"]/long[@name="docsPending"]');
  435. $summary['@pending_docs'] = (int) trim(current($docs_pending_xpath));
  436. $max_time_xpath = $stats->xpath('//lst["stats"]/str[@name="autocommit maxTime"]');
  437. $max_time = (int) trim(current($max_time_xpath));
  438. // Convert to seconds.
  439. $summary['@autocommit_time_seconds'] = $max_time / 1000;
  440. $summary['@autocommit_time'] = format_interval($max_time / 1000);
  441. $deletes_id_xpath = $stats->xpath('//lst["stats"]/long[@name="deletesById"]');
  442. $summary['@deletes_by_id'] = (int) trim(current($deletes_id_xpath));
  443. $deletes_query_xpath = $stats->xpath('//lst["stats"]/long[@name="deletesByQuery"]');
  444. $summary['@deletes_by_query'] = (int) trim(current($deletes_query_xpath));
  445. $summary['@deletes_total'] = $summary['@deletes_by_id'] + $summary['@deletes_by_query'];
  446. $schema = $system_info->core->schema;
  447. $summary['@schema_version'] = $schema;
  448. $core = $stats->xpath('//lst["core"]/str[@name="coreName"]');
  449. $summary['@core_name'] = trim(current($core));
  450. $size_xpath = $stats->xpath('//lst["core"]/str[@name="indexSize"]');
  451. $summary['@index_size'] = trim(current($size_xpath));
  452. }
  453. }
  454. return $summary;
  455. }
  456. /**
  457. * Implements SearchApiSolrConnectionInterface::clearCache().
  458. */
  459. public function clearCache() {
  460. if ($cid = $this->getCacheId()) {
  461. cache_clear_all($cid, 'cache_search_api_solr', TRUE);
  462. cache_clear_all($cid, 'cache_search_api_solr', TRUE);
  463. }
  464. $this->luke = array();
  465. $this->stats = NULL;
  466. $this->system_info = NULL;
  467. }
  468. /**
  469. * Checks the reponse code and throws an exception if it's not 200.
  470. *
  471. * @param object $response
  472. * A response object.
  473. *
  474. * @return object
  475. * The passed response object.
  476. *
  477. * @throws SearchApiException
  478. * If the object's HTTP status is not 200.
  479. */
  480. protected function checkResponse($response) {
  481. $code = (int) $response->code;
  482. if ($code != 200) {
  483. if ($code >= 400 && $code != 403 && $code != 404) {
  484. // Add details, like Solr's exception message.
  485. $response->status_message .= $response->data;
  486. }
  487. throw new SearchApiException('"' . $code . '" Status: ' . $response->status_message);
  488. }
  489. return $response;
  490. }
  491. /**
  492. * Implements SearchApiSolrConnectionInterface::makeServletRequest().
  493. */
  494. public function makeServletRequest($servlet, array $params = array(), array $options = array()) {
  495. // Add default params.
  496. $params += array(
  497. 'wt' => 'json',
  498. 'json.nl' => self::NAMED_LIST_FORMAT,
  499. );
  500. $url = $this->constructUrl($servlet, $params);
  501. $response = $this->makeHttpRequest($url, $options);
  502. return $this->checkResponse($response);
  503. }
  504. /**
  505. * Central method for making a GET operation against this Solr Server
  506. */
  507. protected function sendRawGet($url, array $options = array()) {
  508. $options['method'] = 'GET';
  509. $response = $this->makeHttpRequest($url, $options);
  510. return $this->checkResponse($response);
  511. }
  512. /**
  513. * Central method for making a POST operation against this Solr Server
  514. */
  515. protected function sendRawPost($url, array $options = array()) {
  516. $options['method'] = 'POST';
  517. // Normally we use POST to send XML documents.
  518. if (empty($options['headers']['Content-Type'])) {
  519. $options['headers']['Content-Type'] = 'text/xml; charset=UTF-8';
  520. }
  521. $response = $this->makeHttpRequest($url, $options);
  522. return $this->checkResponse($response);
  523. }
  524. /**
  525. * Sends an HTTP request to Solr.
  526. *
  527. * This is just a wrapper around drupal_http_request().
  528. */
  529. protected function makeHttpRequest($url, array $options = array()) {
  530. if (empty($options['method']) || $options['method'] == 'GET' || $options['method'] == 'HEAD') {
  531. // Make sure we are not sending a request body.
  532. $options['data'] = NULL;
  533. }
  534. if ($this->http_auth) {
  535. $options['headers']['Authorization'] = $this->http_auth;
  536. }
  537. if ($this->stream_context) {
  538. $options['context'] = $this->stream_context;
  539. }
  540. $result = drupal_http_request($url, $options);
  541. if (!isset($result->code) || $result->code < 0) {
  542. $result->code = 0;
  543. $result->status_message = 'Request failed';
  544. $result->protocol = 'HTTP/1.0';
  545. }
  546. // Additional information may be in the error property.
  547. if (isset($result->error)) {
  548. $result->status_message .= ': ' . check_plain($result->error);
  549. }
  550. if (!isset($result->data)) {
  551. $result->data = '';
  552. $result->response = NULL;
  553. }
  554. else {
  555. $response = json_decode($result->data);
  556. if (is_object($response)) {
  557. foreach ($response as $key => $value) {
  558. $result->$key = $value;
  559. }
  560. }
  561. }
  562. return $result;
  563. }
  564. /**
  565. * Implements SearchApiSolrConnectionInterface::escape().
  566. */
  567. public static function escape($value, $version = 0) {
  568. $replacements = array();
  569. $specials = array('+', '-', '&&', '||', '!', '(', ')', '{', '}', '[', ']', '^', '"', '~', '*', '?', ':', "\\");
  570. // Solr 4.x introduces regular expressions, making the slash also a special
  571. // character.
  572. if ($version >= 4) {
  573. $specials[] = '/';
  574. }
  575. foreach ($specials as $special) {
  576. $replacements[$special] = "\\$special";
  577. }
  578. return strtr($value, $replacements);
  579. }
  580. /**
  581. * Implements SearchApiSolrConnectionInterface::escapePhrase().
  582. */
  583. public static function escapePhrase($value) {
  584. $replacements['"'] = '\"';
  585. $replacements["\\"] = "\\\\";
  586. return strtr($value, $replacements);
  587. }
  588. /**
  589. * Implements SearchApiSolrConnectionInterface::phrase().
  590. */
  591. public static function phrase($value) {
  592. return '"' . self::escapePhrase($value) . '"';
  593. }
  594. /**
  595. * Implements SearchApiSolrConnectionInterface::escapeFieldName().
  596. */
  597. public static function escapeFieldName($value) {
  598. $value = str_replace(':', '\:', $value);
  599. return $value;
  600. }
  601. /**
  602. * Returns the HTTP URL for a certain servlet on the Solr server.
  603. *
  604. * @param $servlet
  605. * A string path to a Solr request handler.
  606. * @param array $params
  607. * Additional GET parameters to append to the URL.
  608. * @param $added_query_string
  609. * Additional query string to append to the URL.
  610. *
  611. * @return string
  612. */
  613. protected function constructUrl($servlet, array $params = array(), $added_query_string = NULL) {
  614. // PHP's built in http_build_query() doesn't give us the format Solr wants.
  615. $query_string = $this->httpBuildQuery($params);
  616. if ($query_string) {
  617. $query_string = '?' . $query_string;
  618. if ($added_query_string) {
  619. $query_string = $query_string . '&' . $added_query_string;
  620. }
  621. }
  622. elseif ($added_query_string) {
  623. $query_string = '?' . $added_query_string;
  624. }
  625. return $this->base_url . $servlet . $query_string;
  626. }
  627. /**
  628. * Implements SearchApiSolrConnectionInterface::getBaseUrl().
  629. */
  630. public function getBaseUrl() {
  631. return $this->base_url;
  632. }
  633. /**
  634. * Implements SearchApiSolrConnectionInterface::setBaseUrl().
  635. */
  636. public function setBaseUrl($url) {
  637. $this->base_url = $url;
  638. $this->update_url = NULL;
  639. }
  640. /**
  641. * Implements SearchApiSolrConnectionInterface::update().
  642. */
  643. public function update($rawPost, $timeout = FALSE) {
  644. if (empty($this->update_url)) {
  645. // Store the URL in an instance variable since many updates may be sent
  646. // via a single instance of this class.
  647. $this->update_url = $this->constructUrl(self::UPDATE_SERVLET, array('wt' => 'json'));
  648. }
  649. $options['data'] = $rawPost;
  650. if ($timeout) {
  651. $options['timeout'] = $timeout;
  652. }
  653. return $this->sendRawPost($this->update_url, $options);
  654. }
  655. /**
  656. * Implements SearchApiSolrConnectionInterface::addDocuments().
  657. */
  658. public function addDocuments(array $documents, $overwrite = NULL, $commitWithin = NULL) {
  659. $attr = '';
  660. if (isset($overwrite)) {
  661. $attr .= ' overwrite="' . ($overwrite ? 'true"' : 'false"');
  662. }
  663. if (isset($commitWithin)) {
  664. $attr .= ' commitWithin="' . ((int) $commitWithin) . '"';
  665. }
  666. $rawPost = "<add$attr>";
  667. foreach ($documents as $document) {
  668. if (is_object($document) && ($document instanceof SearchApiSolrDocument)) {
  669. $rawPost .= $document->toXml();
  670. }
  671. }
  672. $rawPost .= '</add>';
  673. return $this->update($rawPost);
  674. }
  675. /**
  676. * Implements SearchApiSolrConnectionInterface::commit().
  677. */
  678. public function commit($waitSearcher = TRUE, $timeout = 3600) {
  679. return $this->optimizeOrCommit('commit', $waitSearcher, $timeout);
  680. }
  681. /**
  682. * Implements SearchApiSolrConnectionInterface::deleteById().
  683. */
  684. public function deleteById($id, $timeout = 3600) {
  685. return $this->deleteByMultipleIds(array($id), $timeout);
  686. }
  687. /**
  688. * Implements SearchApiSolrConnectionInterface::deleteByMultipleIds().
  689. */
  690. public function deleteByMultipleIds(array $ids, $timeout = 3600) {
  691. $rawPost = '<delete>';
  692. foreach ($ids as $id) {
  693. $rawPost .= '<id>' . htmlspecialchars($id, ENT_NOQUOTES, 'UTF-8') . '</id>';
  694. }
  695. $rawPost .= '</delete>';
  696. return $this->update($rawPost, $timeout);
  697. }
  698. /**
  699. * Implements SearchApiSolrConnectionInterface::deleteByQuery().
  700. */
  701. public function deleteByQuery($rawQuery, $timeout = 3600) {
  702. $rawPost = '<delete><query>' . htmlspecialchars($rawQuery, ENT_NOQUOTES, 'UTF-8') . '</query></delete>';
  703. return $this->update($rawPost, $timeout);
  704. }
  705. /**
  706. * Implements SearchApiSolrConnectionInterface::optimize().
  707. */
  708. public function optimize($waitSearcher = TRUE, $timeout = 3600) {
  709. return $this->optimizeOrCommit('optimize', $waitSearcher, $timeout);
  710. }
  711. /**
  712. * Sends an commit or optimize command to the Solr server.
  713. *
  714. * Will be synchronous unless $waitSearcher is set to FALSE.
  715. *
  716. * @param string $type
  717. * Either "commit" or "optimize".
  718. * @param bool $waitSearcher
  719. * (optional) Wait until a new searcher is opened and registered as the main
  720. * query searcher, making the changes visible. Defaults to true.
  721. * @param int $timeout
  722. * Seconds to wait until timing out with an exception. Defaults to an hour.
  723. *
  724. * @return object
  725. * A response object.
  726. *
  727. * @throws SearchApiException
  728. * If an error occurs during the service call.
  729. */
  730. protected function optimizeOrCommit($type, $waitSearcher = TRUE, $timeout = 3600) {
  731. $waitSearcher = $waitSearcher ? '' : ' waitSearcher="false"';
  732. if ($this->getSolrVersion() <= 3) {
  733. $rawPost = "<$type$waitSearcher />";
  734. }
  735. else {
  736. $softCommit = ($this->soft_commit) ? ' softCommit="true"' : '';
  737. $rawPost = "<$type$waitSearcher$softCommit />";
  738. }
  739. $response = $this->update($rawPost, $timeout);
  740. $this->clearCache();
  741. return $response;
  742. }
  743. /**
  744. * Generates an URL-encoded query string.
  745. *
  746. * Works like PHP's built in http_build_query() (or drupal_http_build_query())
  747. * but uses rawurlencode() and no [] for repeated params, to be compatible
  748. * with the Java-based servers Solr runs on.
  749. *
  750. *
  751. * @param array $query
  752. * The query parameters which should be set.
  753. * @param string $parent
  754. * Internal use only.
  755. *
  756. * @return string
  757. * A query string to append (after "?") to a URL.
  758. */
  759. protected function httpBuildQuery(array $query, $parent = '') {
  760. $params = array();
  761. foreach ($query as $key => $value) {
  762. $key = ($parent ? $parent : rawurlencode($key));
  763. // Recurse into children.
  764. if (is_array($value)) {
  765. $params[] = $this->httpBuildQuery($value, $key);
  766. }
  767. // If a query parameter value is NULL, only append its key.
  768. elseif (!isset($value)) {
  769. $params[] = $key;
  770. }
  771. else {
  772. $params[] = $key . '=' . rawurlencode($value);
  773. }
  774. }
  775. return implode('&', $params);
  776. }
  777. /**
  778. * {@inheritdoc}
  779. */
  780. public function search($query = NULL, array $params = array(), $method = 'GET') {
  781. // Always use JSON. See
  782. // http://code.google.com/p/solr-php-client/issues/detail?id=6#c1 for
  783. // reasoning.
  784. $params['wt'] = 'json';
  785. // Additional default params.
  786. $params += array(
  787. 'json.nl' => self::NAMED_LIST_FORMAT,
  788. );
  789. if ($query) {
  790. $params['q'] = $query;
  791. }
  792. // PHP's built-in http_build_query() doesn't give us the format Solr wants.
  793. $queryString = $this->httpBuildQuery($params);
  794. if ($method == 'GET' || $method == 'AUTO') {
  795. $searchUrl = $this->constructUrl(self::SEARCH_SERVLET, array(), $queryString);
  796. if ($method == 'GET' || strlen($searchUrl) <= variable_get('search_api_solr_http_get_max_length', 4000)) {
  797. return $this->sendRawGet($searchUrl);
  798. }
  799. }
  800. // Method is POST, or AUTO with a long query
  801. $searchUrl = $this->constructUrl(self::SEARCH_SERVLET);
  802. $options['data'] = $queryString;
  803. $options['headers']['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8';
  804. return $this->sendRawPost($searchUrl, $options);
  805. }
  806. }