solr_connection.inc 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901
  1. <?php
  2. /**
  3. * Copyright (c) 2007-2009, Conduit Internet Technologies, Inc.
  4. * All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions are met:
  8. *
  9. * - Redistributions of source code must retain the above copyright notice,
  10. * this list of conditions and the following disclaimer.
  11. * - Redistributions in binary form must reproduce the above copyright
  12. * notice, this list of conditions and the following disclaimer in the
  13. * documentation and/or other materials provided with the distribution.
  14. * - Neither the name of Conduit Internet Technologies, Inc. nor the names of
  15. * its contributors may be used to endorse or promote products derived from
  16. * this software without specific prior written permission.
  17. *
  18. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  19. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  20. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  21. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  22. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  25. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  26. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  27. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  28. * POSSIBILITY OF SUCH DAMAGE.
  29. *
  30. * @copyright Copyright 2007-2009 Conduit Internet Technologies, Inc. (http://conduit-it.com)
  31. * @license New BSD (http://solr-php-client.googlecode.com/svn/trunk/COPYING)
  32. * @version $Id: Service.php 22 2009-11-09 22:46:54Z donovan.jimenez $
  33. *
  34. * @package Apache
  35. * @subpackage Solr
  36. * @author Donovan Jimenez <djimenez@conduit-it.com>
  37. */
  38. /**
  39. * Additional code Copyright (c) 2008-2011 by Robert Douglass, James McKinney,
  40. * Jacob Singh, Alejandro Garza, Peter Wolanin, and additional contributors.
  41. *
  42. * This program is free software; you can redistribute it and/or modify
  43. * it under the terms of the GNU General Public License as published by
  44. * the Free Software Foundation; either version 2 of the License, or (at
  45. * your option) any later version.
  46. *
  47. * This program is distributed in the hope that it will be useful, but
  48. * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  49. * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
  50. * for more details.
  51. *
  52. * You should have received a copy of the GNU General Public License
  53. * along with this program as the file LICENSE.txt; if not, please see
  54. * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt.
  55. */
  56. /**
  57. * Starting point for the Solr API. Represents a Solr server resource and has
  58. * methods for pinging, adding, deleting, committing, optimizing and searching.
  59. */
  60. class SearchApiSolrConnection implements SearchApiSolrConnectionInterface {
  61. /**
  62. * Defines how NamedLists should be formatted in the output.
  63. *
  64. * This specifically affects facet counts. Valid values are 'map' (default) or
  65. * 'flat'.
  66. */
  67. const NAMED_LIST_FORMAT = 'map';
  68. /**
  69. * Path to the ping servlet.
  70. */
  71. const PING_SERVLET = 'admin/ping';
  72. /**
  73. * Path to the update servlet.
  74. */
  75. const UPDATE_SERVLET = 'update';
  76. /**
  77. * Path to the search servlet.
  78. */
  79. const SEARCH_SERVLET = 'select';
  80. /**
  81. * Path to the luke servlet.
  82. */
  83. const LUKE_SERVLET = 'admin/luke';
  84. /**
  85. * Path to the system servlet.
  86. */
  87. const SYSTEM_SERVLET = 'admin/system';
  88. /**
  89. * Path to the stats servlet.
  90. */
  91. const STATS_SERVLET = 'admin/stats.jsp';
  92. /**
  93. * Path to the stats servlet for Solr 4.x servers.
  94. */
  95. const STATS_SERVLET_4 = 'admin/mbeans?wt=xml&stats=true';
  96. /**
  97. * Path to the file servlet.
  98. */
  99. const FILE_SERVLET = 'admin/file';
  100. /**
  101. * The options passed when creating this connection.
  102. *
  103. * @var array
  104. */
  105. protected $options;
  106. /**
  107. * The Solr server's URL.
  108. *
  109. * @var string
  110. */
  111. protected $base_url;
  112. /**
  113. * Cached URL to the update servlet.
  114. *
  115. * @var string
  116. */
  117. protected $update_url;
  118. /**
  119. * The HTTP method to use for search requests.
  120. *
  121. * @var string
  122. */
  123. protected $method;
  124. /**
  125. * HTTP Basic Authentication header to set for requests to the Solr server.
  126. *
  127. * @var string
  128. */
  129. protected $http_auth;
  130. /**
  131. * The stream context to use for requests to the Solr server.
  132. *
  133. * Defaults to NULL (= pass no context at all).
  134. *
  135. * @var string
  136. */
  137. protected $stream_context;
  138. /**
  139. * Cache for the metadata from admin/luke.
  140. *
  141. * Contains an array of response objects, keyed by the number of "top terms".
  142. *
  143. * @var array
  144. *
  145. * @see getLuke()
  146. */
  147. protected $luke = array();
  148. /**
  149. * Cache for information about the Solr core.
  150. *
  151. * @var SimpleXMLElement
  152. *
  153. * @see getStats()
  154. */
  155. protected $stats;
  156. /**
  157. * Cache for system information.
  158. *
  159. * @var array
  160. *
  161. * @see getSystemInfo()
  162. */
  163. protected $system_info;
  164. /**
  165. * Flag that denotes whether to use soft commits for Solr 4.x.
  166. *
  167. * Defaults to FALSE.
  168. *
  169. * @var bool
  170. */
  171. protected $soft_commit = FALSE;
  172. /**
  173. * Implements SearchApiSolrConnectionInterface::__construct().
  174. *
  175. * Valid options include:
  176. * - scheme: Scheme of the base URL of the Solr server. Most probably "http"
  177. * or "https". Defaults to "http".
  178. * - host: The host name (or IP) of the Solr server. Defaults to
  179. * "localhost".
  180. * - port: The port of the Solr server. Defaults to 8983.
  181. * - path: The base path to the Solr server. Defaults to "/solr/".
  182. * - http_user: If both this and "http_pass" are set, will use this
  183. * information to add basic HTTP authentication to all requests to the
  184. * Solr server. Not set by default.
  185. * - http_pass: See "http_user".
  186. * - http_method: The HTTP method to use for searches. Can be either "GET"
  187. * or "POST". Defaults to "POST".
  188. */
  189. public function __construct(array $options) {
  190. $options += array(
  191. 'scheme' => 'http',
  192. 'host' => 'localhost',
  193. 'port' => 8983,
  194. 'path' => 'solr',
  195. 'http_user' => NULL,
  196. 'http_pass' => NULL,
  197. 'http_method' => 'POST',
  198. );
  199. $this->options = $options;
  200. $path = '/' . trim($options['path'], '/') . '/';
  201. $this->base_url = $options['scheme'] . '://' . $options['host'] . ':' . $options['port'] . $path;
  202. // Make sure we always have a valid method set, default to POST.
  203. $this->method = $options['http_method'] == 'GET' ? 'GET' : 'POST';
  204. // Set HTTP Basic Authentication parameter, if login data was set.
  205. if (strlen($options['http_user']) && strlen($options['http_pass'])) {
  206. $this->http_auth = 'Basic ' . base64_encode($options['http_user'] . ':' . $options['http_pass']);
  207. }
  208. }
  209. /**
  210. * Implements SearchApiSolrConnectionInterface::ping().
  211. */
  212. public function ping($timeout = 2) {
  213. $start = microtime(TRUE);
  214. if ($timeout <= 0.0) {
  215. $timeout = -1;
  216. }
  217. $pingUrl = $this->constructUrl(self::PING_SERVLET);
  218. // Attempt a HEAD request to the Solr ping url.
  219. $options = array(
  220. 'method' => 'HEAD',
  221. 'timeout' => $timeout,
  222. );
  223. $response = $this->makeHttpRequest($pingUrl, $options);
  224. if ($response->code == 200) {
  225. // Add 1 µs to the ping time so we never return 0.
  226. return (microtime(TRUE) - $start) + 1E-6;
  227. }
  228. else {
  229. return FALSE;
  230. }
  231. }
  232. /**
  233. * Implements SearchApiSolrConnectionInterface::setSoftCommit().
  234. */
  235. public function setSoftCommit($soft_commit) {
  236. $this->soft_commit = (bool) $soft_commit;
  237. }
  238. /**
  239. * Implements SearchApiSolrConnectionInterface::getSoftCommit().
  240. */
  241. public function getSoftCommit() {
  242. return $this->soft_commit;
  243. }
  244. /**
  245. * Implements SearchApiSolrConnectionInterface::setStreamContext().
  246. */
  247. public function setStreamContext($stream_context) {
  248. $this->stream_context = $stream_context;
  249. }
  250. /**
  251. * Implements SearchApiSolrConnectionInterface::getStreamContext().
  252. */
  253. public function getStreamContext() {
  254. return $this->stream_context;
  255. }
  256. /**
  257. * Computes the cache ID to use for this connection.
  258. *
  259. * @param $suffix
  260. * (optional) A suffix to append to the string to make it unique.
  261. *
  262. * @return string|null
  263. * The cache ID to use for this connection and usage; or NULL if no caching
  264. * should take place.
  265. */
  266. protected function getCacheId($suffix = '') {
  267. if (!empty($this->options['server'])) {
  268. $cid = $this->options['server'];
  269. return $suffix ? "$cid:$suffix" : $cid;
  270. }
  271. }
  272. /**
  273. * Call the /admin/system servlet to retrieve system information.
  274. *
  275. * Stores the retrieved information in $system_info.
  276. *
  277. * @see getSystemInfo()
  278. */
  279. protected function setSystemInfo() {
  280. $cid = $this->getCacheId(__FUNCTION__);
  281. if ($cid) {
  282. $cache = cache_get($cid, 'cache_search_api_solr');
  283. if ($cache) {
  284. $this->system_info = json_decode($cache->data);
  285. }
  286. }
  287. // Second pass to populate the cache if necessary.
  288. if (empty($this->system_info)) {
  289. $url = $this->constructUrl(self::SYSTEM_SERVLET, array('wt' => 'json'));
  290. $response = $this->sendRawGet($url);
  291. $this->system_info = json_decode($response->data);
  292. if ($cid) {
  293. cache_set($cid, $response->data, 'cache_search_api_solr');
  294. }
  295. }
  296. }
  297. /**
  298. * Implements SearchApiSolrConnectionInterface::getSystemInfo().
  299. */
  300. public function getSystemInfo() {
  301. if (!isset($this->system_info)) {
  302. $this->setSystemInfo();
  303. }
  304. return $this->system_info;
  305. }
  306. /**
  307. * Sets $this->luke with the metadata about the index from admin/luke.
  308. */
  309. protected function setLuke($num_terms = 0) {
  310. if (empty($this->luke[$num_terms])) {
  311. $cid = $this->getCacheId(__FUNCTION__ . ":$num_terms");
  312. if ($cid) {
  313. $cache = cache_get($cid, 'cache_search_api_solr');
  314. if (isset($cache->data)) {
  315. $this->luke = $cache->data;
  316. }
  317. }
  318. // Second pass to populate the cache if necessary.
  319. if (empty($this->luke[$num_terms])) {
  320. $params = array(
  321. 'numTerms' => "$num_terms",
  322. 'wt' => 'json',
  323. 'json.nl' => self::NAMED_LIST_FORMAT,
  324. );
  325. $url = $this->constructUrl(self::LUKE_SERVLET, $params);
  326. $this->luke[$num_terms] = $this->sendRawGet($url);
  327. if ($cid) {
  328. cache_set($cid, $this->luke, 'cache_search_api_solr');
  329. }
  330. }
  331. }
  332. }
  333. /**
  334. * Implements SearchApiSolrConnectionInterface::getFields().
  335. */
  336. public function getFields($num_terms = 0) {
  337. $fields = array();
  338. foreach ($this->getLuke($num_terms)->fields as $name => $info) {
  339. $fields[$name] = new SearchApiSolrField($info);
  340. }
  341. return $fields;
  342. }
  343. /**
  344. * Implements SearchApiSolrConnectionInterface::getLuke().
  345. */
  346. public function getLuke($num_terms = 0) {
  347. if (!isset($this->luke[$num_terms])) {
  348. $this->setLuke($num_terms);
  349. }
  350. return $this->luke[$num_terms];
  351. }
  352. /**
  353. * Implements SearchApiSolrConnectionInterface::getSolrVersion().
  354. */
  355. public function getSolrVersion() {
  356. $system_info = $this->getSystemInfo();
  357. // Get our solr version number
  358. if (isset($system_info->lucene->{'solr-spec-version'})) {
  359. return $system_info->lucene->{'solr-spec-version'}[0];
  360. }
  361. return 0;
  362. }
  363. /**
  364. * Stores information about the Solr core in $this->stats.
  365. */
  366. protected function setStats() {
  367. $data = $this->getLuke();
  368. $solr_version = $this->getSolrVersion();
  369. // Only try to get stats if we have connected to the index.
  370. if (empty($this->stats) && isset($data->index->numDocs)) {
  371. $cid = $this->getCacheId(__FUNCTION__);
  372. if ($cid) {
  373. $cache = cache_get($cid, 'cache_search_api_solr');
  374. if (isset($cache->data)) {
  375. $this->stats = simplexml_load_string($cache->data);
  376. }
  377. }
  378. // Second pass to populate the cache if necessary.
  379. if (empty($this->stats)) {
  380. if ($solr_version >= 4) {
  381. $url = $this->constructUrl(self::STATS_SERVLET_4);
  382. }
  383. else {
  384. $url = $this->constructUrl(self::STATS_SERVLET);
  385. }
  386. $response = $this->sendRawGet($url);
  387. $this->stats = simplexml_load_string($response->data);
  388. if ($this->env_id) {
  389. cache_set($cid, $response->data, 'cache_search_api_solr');
  390. }
  391. }
  392. }
  393. }
  394. /**
  395. * Implements SearchApiSolrConnectionInterface::getStats().
  396. */
  397. public function getStats() {
  398. if (!isset($this->stats)) {
  399. $this->setStats();
  400. }
  401. return $this->stats;
  402. }
  403. /**
  404. * Implements SearchApiSolrConnectionInterface::getStatsSummary().
  405. */
  406. public function getStatsSummary() {
  407. $stats = $this->getStats();
  408. $solr_version = $this->getSolrVersion();
  409. $summary = array(
  410. '@pending_docs' => '',
  411. '@autocommit_time_seconds' => '',
  412. '@autocommit_time' => '',
  413. '@deletes_by_id' => '',
  414. '@deletes_by_query' => '',
  415. '@deletes_total' => '',
  416. '@schema_version' => '',
  417. '@core_name' => '',
  418. '@index_size' => '',
  419. );
  420. if (!empty($stats)) {
  421. if ($solr_version <= 3) {
  422. $docs_pending_xpath = $stats->xpath('//stat[@name="docsPending"]');
  423. $summary['@pending_docs'] = (int) trim(current($docs_pending_xpath));
  424. $max_time_xpath = $stats->xpath('//stat[@name="autocommit maxTime"]');
  425. $max_time = (int) trim(current($max_time_xpath));
  426. // Convert to seconds.
  427. $summary['@autocommit_time_seconds'] = $max_time / 1000;
  428. $summary['@autocommit_time'] = format_interval($max_time / 1000);
  429. $deletes_id_xpath = $stats->xpath('//stat[@name="deletesById"]');
  430. $summary['@deletes_by_id'] = (int) trim(current($deletes_id_xpath));
  431. $deletes_query_xpath = $stats->xpath('//stat[@name="deletesByQuery"]');
  432. $summary['@deletes_by_query'] = (int) trim(current($deletes_query_xpath));
  433. $summary['@deletes_total'] = $summary['@deletes_by_id'] + $summary['@deletes_by_query'];
  434. $schema = $stats->xpath('/solr/schema[1]');
  435. $summary['@schema_version'] = trim($schema[0]);
  436. $core = $stats->xpath('/solr/core[1]');
  437. $summary['@core_name'] = trim($core[0]);
  438. $size_xpath = $stats->xpath('//stat[@name="indexSize"]');
  439. $summary['@index_size'] = trim(current($size_xpath));
  440. }
  441. else {
  442. $system_info = $this->getSystemInfo();
  443. $docs_pending_xpath = $stats->xpath('//lst["stats"]/long[@name="docsPending"]');
  444. $summary['@pending_docs'] = (int) trim(current($docs_pending_xpath));
  445. $max_time_xpath = $stats->xpath('//lst["stats"]/str[@name="autocommit maxTime"]');
  446. $max_time = (int) trim(current($max_time_xpath));
  447. // Convert to seconds.
  448. $summary['@autocommit_time_seconds'] = $max_time / 1000;
  449. $summary['@autocommit_time'] = format_interval($max_time / 1000);
  450. $deletes_id_xpath = $stats->xpath('//lst["stats"]/long[@name="deletesById"]');
  451. $summary['@deletes_by_id'] = (int) trim(current($deletes_id_xpath));
  452. $deletes_query_xpath = $stats->xpath('//lst["stats"]/long[@name="deletesByQuery"]');
  453. $summary['@deletes_by_query'] = (int) trim(current($deletes_query_xpath));
  454. $summary['@deletes_total'] = $summary['@deletes_by_id'] + $summary['@deletes_by_query'];
  455. $schema = $system_info->core->schema;
  456. $summary['@schema_version'] = $schema;
  457. $core = $stats->xpath('//lst["core"]/str[@name="coreName"]');
  458. $summary['@core_name'] = trim(current($core));
  459. $size_xpath = $stats->xpath('//lst["core"]/str[@name="indexSize"]');
  460. $summary['@index_size'] = trim(current($size_xpath));
  461. }
  462. }
  463. return $summary;
  464. }
  465. /**
  466. * Implements SearchApiSolrConnectionInterface::clearCache().
  467. */
  468. public function clearCache() {
  469. if ($cid = $this->getCacheId()) {
  470. cache_clear_all($cid, 'cache_search_api_solr', TRUE);
  471. cache_clear_all($cid, 'cache_search_api_solr', TRUE);
  472. }
  473. $this->luke = array();
  474. $this->stats = NULL;
  475. $this->system_info = NULL;
  476. }
  477. /**
  478. * Checks the reponse code and throws an exception if it's not 200.
  479. *
  480. * @param object $response
  481. * A response object.
  482. *
  483. * @return object
  484. * The passed response object.
  485. *
  486. * @throws SearchApiException
  487. * If the object's HTTP status is not 200.
  488. */
  489. protected function checkResponse($response) {
  490. $code = (int) $response->code;
  491. if ($code != 200) {
  492. if ($code >= 400 && $code != 403 && $code != 404) {
  493. // Add details, like Solr's exception message.
  494. $response->status_message .= $response->data;
  495. }
  496. throw new SearchApiException('"' . $code . '" Status: ' . $response->status_message);
  497. }
  498. return $response;
  499. }
  500. /**
  501. * Implements SearchApiSolrConnectionInterface::makeServletRequest().
  502. */
  503. public function makeServletRequest($servlet, array $params = array(), array $options = array()) {
  504. // Add default params.
  505. $params += array(
  506. 'wt' => 'json',
  507. 'json.nl' => self::NAMED_LIST_FORMAT,
  508. );
  509. $url = $this->constructUrl($servlet, $params);
  510. $response = $this->makeHttpRequest($url, $options);
  511. return $this->checkResponse($response);
  512. }
  513. /**
  514. * Central method for making a GET operation against this Solr Server
  515. */
  516. protected function sendRawGet($url, array $options = array()) {
  517. $options['method'] = 'GET';
  518. $response = $this->makeHttpRequest($url, $options);
  519. return $this->checkResponse($response);
  520. }
  521. /**
  522. * Central method for making a POST operation against this Solr Server
  523. */
  524. protected function sendRawPost($url, array $options = array()) {
  525. $options['method'] = 'POST';
  526. // Normally we use POST to send XML documents.
  527. if (empty($options['headers']['Content-Type'])) {
  528. $options['headers']['Content-Type'] = 'text/xml; charset=UTF-8';
  529. }
  530. $response = $this->makeHttpRequest($url, $options);
  531. return $this->checkResponse($response);
  532. }
  533. /**
  534. * Sends an HTTP request to Solr.
  535. *
  536. * This is just a wrapper around drupal_http_request().
  537. */
  538. protected function makeHttpRequest($url, array $options = array()) {
  539. if (empty($options['method']) || $options['method'] == 'GET' || $options['method'] == 'HEAD') {
  540. // Make sure we are not sending a request body.
  541. $options['data'] = NULL;
  542. }
  543. if ($this->http_auth) {
  544. $options['headers']['Authorization'] = $this->http_auth;
  545. }
  546. if ($this->stream_context) {
  547. $options['context'] = $this->stream_context;
  548. }
  549. $result = drupal_http_request($url, $options);
  550. if (!isset($result->code) || $result->code < 0) {
  551. $result->code = 0;
  552. $result->status_message = 'Request failed';
  553. $result->protocol = 'HTTP/1.0';
  554. }
  555. // Additional information may be in the error property.
  556. if (isset($result->error)) {
  557. $result->status_message .= ': ' . check_plain($result->error);
  558. }
  559. if (!isset($result->data)) {
  560. $result->data = '';
  561. $result->response = NULL;
  562. }
  563. else {
  564. $response = json_decode($result->data);
  565. if (is_object($response)) {
  566. foreach ($response as $key => $value) {
  567. $result->$key = $value;
  568. }
  569. }
  570. }
  571. return $result;
  572. }
  573. /**
  574. * Implements SearchApiSolrConnectionInterface::escape().
  575. */
  576. public static function escape($value, $version = 0) {
  577. $replacements = array();
  578. $specials = array('+', '-', '&&', '||', '!', '(', ')', '{', '}', '[', ']', '^', '"', '~', '*', '?', ':', "\\");
  579. // Solr 4.x introduces regular expressions, making the slash also a special
  580. // character.
  581. if ($version >= 4) {
  582. $specials[] = '/';
  583. }
  584. foreach ($specials as $special) {
  585. $replacements[$special] = "\\$special";
  586. }
  587. return strtr($value, $replacements);
  588. }
  589. /**
  590. * Implements SearchApiSolrConnectionInterface::escapePhrase().
  591. */
  592. public static function escapePhrase($value) {
  593. $replacements['"'] = '\"';
  594. $replacements["\\"] = "\\\\";
  595. return strtr($value, $replacements);
  596. }
  597. /**
  598. * Implements SearchApiSolrConnectionInterface::phrase().
  599. */
  600. public static function phrase($value) {
  601. return '"' . self::escapePhrase($value) . '"';
  602. }
  603. /**
  604. * Implements SearchApiSolrConnectionInterface::escapeFieldName().
  605. */
  606. public static function escapeFieldName($value) {
  607. $value = str_replace(':', '\:', $value);
  608. return $value;
  609. }
  610. /**
  611. * Returns the HTTP URL for a certain servlet on the Solr server.
  612. *
  613. * @param $servlet
  614. * A string path to a Solr request handler.
  615. * @param array $params
  616. * Additional GET parameters to append to the URL.
  617. * @param $added_query_string
  618. * Additional query string to append to the URL.
  619. *
  620. * @return string
  621. */
  622. protected function constructUrl($servlet, array $params = array(), $added_query_string = NULL) {
  623. // PHP's built in http_build_query() doesn't give us the format Solr wants.
  624. $query_string = $this->httpBuildQuery($params);
  625. if ($query_string) {
  626. $query_string = '?' . $query_string;
  627. if ($added_query_string) {
  628. $query_string = $query_string . '&' . $added_query_string;
  629. }
  630. }
  631. elseif ($added_query_string) {
  632. $query_string = '?' . $added_query_string;
  633. }
  634. return $this->base_url . $servlet . $query_string;
  635. }
  636. /**
  637. * Implements SearchApiSolrConnectionInterface::getBaseUrl().
  638. */
  639. public function getBaseUrl() {
  640. return $this->base_url;
  641. }
  642. /**
  643. * Implements SearchApiSolrConnectionInterface::setBaseUrl().
  644. */
  645. public function setBaseUrl($url) {
  646. $this->base_url = $url;
  647. $this->update_url = NULL;
  648. }
  649. /**
  650. * Implements SearchApiSolrConnectionInterface::update().
  651. */
  652. public function update($rawPost, $timeout = FALSE) {
  653. if (empty($this->update_url)) {
  654. // Store the URL in an instance variable since many updates may be sent
  655. // via a single instance of this class.
  656. $this->update_url = $this->constructUrl(self::UPDATE_SERVLET, array('wt' => 'json'));
  657. }
  658. $options['data'] = $rawPost;
  659. if ($timeout) {
  660. $options['timeout'] = $timeout;
  661. }
  662. return $this->sendRawPost($this->update_url, $options);
  663. }
  664. /**
  665. * Implements SearchApiSolrConnectionInterface::addDocuments().
  666. */
  667. public function addDocuments(array $documents, $overwrite = NULL, $commitWithin = NULL) {
  668. $attr = '';
  669. if (isset($overwrite)) {
  670. $attr .= ' overwrite="' . ($overwrite ? 'true"' : 'false"');
  671. }
  672. if (isset($commitWithin)) {
  673. $attr .= ' commitWithin="' . ((int) $commitWithin) . '"';
  674. }
  675. $rawPost = "<add$attr>";
  676. foreach ($documents as $document) {
  677. if (is_object($document) && ($document instanceof SearchApiSolrDocument)) {
  678. $rawPost .= $document->toXml();
  679. }
  680. }
  681. $rawPost .= '</add>';
  682. return $this->update($rawPost);
  683. }
  684. /**
  685. * Implements SearchApiSolrConnectionInterface::commit().
  686. */
  687. public function commit($waitSearcher = TRUE, $timeout = 3600) {
  688. return $this->optimizeOrCommit('commit', $waitSearcher, $timeout);
  689. }
  690. /**
  691. * Implements SearchApiSolrConnectionInterface::deleteById().
  692. */
  693. public function deleteById($id, $timeout = 3600) {
  694. return $this->deleteByMultipleIds(array($id), $timeout);
  695. }
  696. /**
  697. * Implements SearchApiSolrConnectionInterface::deleteByMultipleIds().
  698. */
  699. public function deleteByMultipleIds(array $ids, $timeout = 3600) {
  700. $rawPost = '<delete>';
  701. foreach ($ids as $id) {
  702. $rawPost .= '<id>' . htmlspecialchars($id, ENT_NOQUOTES, 'UTF-8') . '</id>';
  703. }
  704. $rawPost .= '</delete>';
  705. return $this->update($rawPost, $timeout);
  706. }
  707. /**
  708. * Implements SearchApiSolrConnectionInterface::deleteByQuery().
  709. */
  710. public function deleteByQuery($rawQuery, $timeout = 3600) {
  711. $rawPost = '<delete><query>' . htmlspecialchars($rawQuery, ENT_NOQUOTES, 'UTF-8') . '</query></delete>';
  712. return $this->update($rawPost, $timeout);
  713. }
  714. /**
  715. * Implements SearchApiSolrConnectionInterface::optimize().
  716. */
  717. public function optimize($waitSearcher = TRUE, $timeout = 3600) {
  718. return $this->optimizeOrCommit('optimize', $waitSearcher, $timeout);
  719. }
  720. /**
  721. * Sends an commit or optimize command to the Solr server.
  722. *
  723. * Will be synchronous unless $waitSearcher is set to FALSE.
  724. *
  725. * @param $type
  726. * Either "commit" or "optimize".
  727. * @param $waitSearcher
  728. * (optional) Wait until a new searcher is opened and registered as the main
  729. * query searcher, making the changes visible. Defaults to true.
  730. * @param $timeout
  731. * Seconds to wait until timing out with an exception. Defaults to an hour.
  732. *
  733. * @return
  734. * A response object.
  735. *
  736. * @throws SearchApiException
  737. * If an error occurs during the service call.
  738. */
  739. protected function optimizeOrCommit($type, $waitSearcher = TRUE, $timeout = 3600) {
  740. $waitSearcher = $waitSearcher ? '' : ' waitSearcher="false"';
  741. if ($this->getSolrVersion() <= 3) {
  742. $rawPost = "<$type$waitSearcher />";
  743. }
  744. else {
  745. $softCommit = ($this->soft_commit) ? ' softCommit="true"' : '';
  746. $rawPost = "<$type$waitSearcher$softCommit />";
  747. }
  748. $response = $this->update($rawPost, $timeout);
  749. $this->clearCache();
  750. return $response;
  751. }
  752. /**
  753. * Like PHP's built in http_build_query(), but uses rawurlencode() and no [] for repeated params.
  754. */
  755. protected function httpBuildQuery(array $query, $parent = '') {
  756. $params = array();
  757. foreach ($query as $key => $value) {
  758. $key = ($parent ? $parent : rawurlencode($key));
  759. // Recurse into children.
  760. if (is_array($value)) {
  761. $params[] = $this->httpBuildQuery($value, $key);
  762. }
  763. // If a query parameter value is NULL, only append its key.
  764. elseif (!isset($value)) {
  765. $params[] = $key;
  766. }
  767. else {
  768. $params[] = $key . '=' . rawurlencode($value);
  769. }
  770. }
  771. return implode('&', $params);
  772. }
  773. /**
  774. * Implements SearchApiSolrConnectionInterface::search().
  775. */
  776. public function search($query = NULL, array $params = array(), $method = 'GET') {
  777. // Always use JSON. See
  778. // http://code.google.com/p/solr-php-client/issues/detail?id=6#c1 for
  779. // reasoning.
  780. $params['wt'] = 'json';
  781. // Additional default params.
  782. $params += array(
  783. 'json.nl' => self::NAMED_LIST_FORMAT,
  784. );
  785. if ($query) {
  786. $params['q'] = $query;
  787. }
  788. // PHP's built-in http_build_query() doesn't give us the format Solr wants.
  789. $queryString = $this->httpBuildQuery($params);
  790. if ($this->method == 'GET') {
  791. $searchUrl = $this->constructUrl(self::SEARCH_SERVLET, array(), $queryString);
  792. return $this->sendRawGet($searchUrl);
  793. }
  794. else if ($this->method == 'POST') {
  795. $searchUrl = $this->constructUrl(self::SEARCH_SERVLET);
  796. $options['data'] = $queryString;
  797. $options['headers']['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8';
  798. return $this->sendRawPost($searchUrl, $options);
  799. }
  800. }
  801. }