solr_connection.inc 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943
  1. <?php
  2. /**
  3. * Copyright (c) 2007-2009, Conduit Internet Technologies, Inc.
  4. * All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions are met:
  8. *
  9. * - Redistributions of source code must retain the above copyright notice,
  10. * this list of conditions and the following disclaimer.
  11. * - Redistributions in binary form must reproduce the above copyright
  12. * notice, this list of conditions and the following disclaimer in the
  13. * documentation and/or other materials provided with the distribution.
  14. * - Neither the name of Conduit Internet Technologies, Inc. nor the names of
  15. * its contributors may be used to endorse or promote products derived from
  16. * this software without specific prior written permission.
  17. *
  18. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  19. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  20. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  21. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  22. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  25. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  26. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  27. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  28. * POSSIBILITY OF SUCH DAMAGE.
  29. *
  30. * @copyright Copyright 2007-2009 Conduit Internet Technologies, Inc. (http://conduit-it.com)
  31. * @license New BSD (http://solr-php-client.googlecode.com/svn/trunk/COPYING)
  32. * @version $Id: Service.php 22 2009-11-09 22:46:54Z donovan.jimenez $
  33. *
  34. * @package Apache
  35. * @subpackage Solr
  36. * @author Donovan Jimenez <djimenez@conduit-it.com>
  37. */
  38. /**
  39. * Additional code Copyright (c) 2008-2011 by Robert Douglass, James McKinney,
  40. * Jacob Singh, Alejandro Garza, Peter Wolanin, and additional contributors.
  41. *
  42. * This program is free software; you can redistribute it and/or modify
  43. * it under the terms of the GNU General Public License as published by
  44. * the Free Software Foundation; either version 2 of the License, or (at
  45. * your option) any later version.
  46. *
  47. * This program is distributed in the hope that it will be useful, but
  48. * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  49. * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
  50. * for more details.
  51. *
  52. * You should have received a copy of the GNU General Public License
  53. * along with this program as the file LICENSE.txt; if not, please see
  54. * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt.
  55. */
  56. /**
  57. * Represents a Solr server resource.
  58. *
  59. * Contains methods for pinging, adding, deleting, committing, optimizing and
  60. * searching.
  61. */
  62. class SearchApiSolrConnection implements SearchApiSolrConnectionInterface {
  63. /**
  64. * Defines how NamedLists should be formatted in the output.
  65. *
  66. * This specifically affects facet counts. Valid values are 'map' (default) or
  67. * 'flat'.
  68. */
  69. const NAMED_LIST_FORMAT = 'map';
  70. /**
  71. * Path to the ping servlet.
  72. */
  73. const PING_SERVLET = 'admin/ping';
  74. /**
  75. * Path to the update servlet.
  76. */
  77. const UPDATE_SERVLET = 'update';
  78. /**
  79. * Path to the search servlet.
  80. */
  81. const SEARCH_SERVLET = 'select';
  82. /**
  83. * Path to the luke servlet.
  84. */
  85. const LUKE_SERVLET = 'admin/luke';
  86. /**
  87. * Path to the system servlet.
  88. */
  89. const SYSTEM_SERVLET = 'admin/system';
  90. /**
  91. * Path to the stats servlet.
  92. */
  93. const STATS_SERVLET = 'admin/stats.jsp';
  94. /**
  95. * Path to the stats servlet for Solr 4.x servers.
  96. */
  97. const STATS_SERVLET_4 = 'admin/mbeans?wt=xml&stats=true';
  98. /**
  99. * Path to the file servlet.
  100. */
  101. const FILE_SERVLET = 'admin/file';
  102. /**
  103. * The options passed when creating this connection.
  104. *
  105. * @var array
  106. */
  107. protected $options;
  108. /**
  109. * The Solr server's URL.
  110. *
  111. * @var string
  112. */
  113. protected $base_url;
  114. /**
  115. * Cached URL to the update servlet.
  116. *
  117. * @var string
  118. */
  119. protected $update_url;
  120. /**
  121. * HTTP Basic Authentication header to set for requests to the Solr server.
  122. *
  123. * @var string
  124. */
  125. protected $http_auth;
  126. /**
  127. * The stream context to use for requests to the Solr server.
  128. *
  129. * Defaults to NULL (= pass no context at all).
  130. *
  131. * @var string
  132. */
  133. protected $stream_context;
  134. /**
  135. * Cache for the metadata from admin/luke.
  136. *
  137. * Contains an array of response objects, keyed by the number of "top terms".
  138. *
  139. * @var array
  140. *
  141. * @see getLuke()
  142. */
  143. protected $luke = array();
  144. /**
  145. * Cache for information about the Solr core.
  146. *
  147. * @var SimpleXMLElement
  148. *
  149. * @see getStats()
  150. */
  151. protected $stats;
  152. /**
  153. * Cache for system information.
  154. *
  155. * @var array
  156. *
  157. * @see getSystemInfo()
  158. */
  159. protected $system_info;
  160. /**
  161. * Flag that denotes whether to use soft commits for Solr 4.x.
  162. *
  163. * Defaults to TRUE.
  164. *
  165. * @var bool
  166. */
  167. protected $soft_commit = TRUE;
  168. /**
  169. * Implements SearchApiSolrConnectionInterface::__construct().
  170. *
  171. * Valid options include:
  172. * - scheme: Scheme of the base URL of the Solr server. Most probably "http"
  173. * or "https". Defaults to "http".
  174. * - host: The host name (or IP) of the Solr server. Defaults to
  175. * "localhost".
  176. * - port: The port of the Solr server. Defaults to 8983.
  177. * - path: The base path to the Solr server. Defaults to "/solr/".
  178. * - http_user: If both this and "http_pass" are set, will use this
  179. * information to add basic HTTP authentication to all requests to the
  180. * Solr server. Not set by default.
  181. * - http_pass: See "http_user".
  182. */
  183. public function __construct(array $options) {
  184. $options += array(
  185. 'scheme' => 'http',
  186. 'host' => 'localhost',
  187. 'port' => 8983,
  188. 'path' => 'solr',
  189. 'http_user' => NULL,
  190. 'http_pass' => NULL,
  191. );
  192. $this->options = $options;
  193. $path = '/' . trim($options['path'], '/') . '/';
  194. $this->base_url = $options['scheme'] . '://' . $options['host'] . ':' . $options['port'] . $path;
  195. // Set HTTP Basic Authentication parameter, if login data was set.
  196. if (strlen($options['http_user']) && strlen($options['http_pass'])) {
  197. $this->http_auth = 'Basic ' . base64_encode($options['http_user'] . ':' . $options['http_pass']);
  198. }
  199. }
  200. /**
  201. * Implements SearchApiSolrConnectionInterface::ping().
  202. */
  203. public function ping($timeout = 2) {
  204. $start = microtime(TRUE);
  205. if ($timeout <= 0.0) {
  206. $timeout = -1;
  207. }
  208. $pingUrl = $this->constructUrl(self::PING_SERVLET);
  209. // Attempt a HEAD request to the Solr ping url.
  210. $options = array(
  211. 'method' => 'HEAD',
  212. 'timeout' => $timeout,
  213. );
  214. $response = $this->makeHttpRequest($pingUrl, $options);
  215. if ($response->code == 200) {
  216. // Add 1 µs to the ping time so we never return 0.
  217. return (microtime(TRUE) - $start) + 1E-6;
  218. }
  219. else {
  220. return FALSE;
  221. }
  222. }
  223. /**
  224. * Implements SearchApiSolrConnectionInterface::setSoftCommit().
  225. */
  226. public function setSoftCommit($soft_commit) {
  227. $this->soft_commit = (bool) $soft_commit;
  228. }
  229. /**
  230. * Implements SearchApiSolrConnectionInterface::getSoftCommit().
  231. */
  232. public function getSoftCommit() {
  233. return $this->soft_commit;
  234. }
  235. /**
  236. * Implements SearchApiSolrConnectionInterface::setStreamContext().
  237. */
  238. public function setStreamContext($stream_context) {
  239. $this->stream_context = $stream_context;
  240. }
  241. /**
  242. * Implements SearchApiSolrConnectionInterface::getStreamContext().
  243. */
  244. public function getStreamContext() {
  245. return $this->stream_context;
  246. }
  247. /**
  248. * Computes the cache ID to use for this connection.
  249. *
  250. * @param $suffix
  251. * (optional) A suffix to append to the string to make it unique.
  252. *
  253. * @return string|null
  254. * The cache ID to use for this connection and usage; or NULL if no caching
  255. * should take place.
  256. */
  257. protected function getCacheId($suffix = '') {
  258. if (!empty($this->options['server'])) {
  259. $cid = $this->options['server'];
  260. return $suffix ? "$cid:$suffix" : $cid;
  261. }
  262. }
  263. /**
  264. * Call the /admin/system servlet to retrieve system information.
  265. *
  266. * Stores the retrieved information in $system_info.
  267. *
  268. * @see getSystemInfo()
  269. */
  270. protected function setSystemInfo() {
  271. $cid = $this->getCacheId(__FUNCTION__);
  272. if ($cid) {
  273. $cache = cache_get($cid, 'cache_search_api_solr');
  274. if ($cache) {
  275. $this->system_info = json_decode($cache->data);
  276. }
  277. }
  278. // Second pass to populate the cache if necessary.
  279. if (empty($this->system_info)) {
  280. $url = $this->constructUrl(self::SYSTEM_SERVLET, array('wt' => 'json'));
  281. $response = $this->sendRawGet($url);
  282. $this->system_info = json_decode($response->data);
  283. if ($cid) {
  284. cache_set($cid, $response->data, 'cache_search_api_solr');
  285. }
  286. }
  287. }
  288. /**
  289. * Implements SearchApiSolrConnectionInterface::getSystemInfo().
  290. */
  291. public function getSystemInfo() {
  292. if (!isset($this->system_info)) {
  293. $this->setSystemInfo();
  294. }
  295. return $this->system_info;
  296. }
  297. /**
  298. * Sets $this->luke with the metadata about the index from admin/luke.
  299. */
  300. protected function setLuke($num_terms = 0) {
  301. if (empty($this->luke[$num_terms])) {
  302. $cid = $this->getCacheId(__FUNCTION__ . ":$num_terms");
  303. if ($cid) {
  304. $cache = cache_get($cid, 'cache_search_api_solr');
  305. if (isset($cache->data)) {
  306. $this->luke = $cache->data;
  307. }
  308. }
  309. // Second pass to populate the cache if necessary.
  310. if (empty($this->luke[$num_terms])) {
  311. $params = array(
  312. 'numTerms' => "$num_terms",
  313. 'wt' => 'json',
  314. 'json.nl' => self::NAMED_LIST_FORMAT,
  315. );
  316. $url = $this->constructUrl(self::LUKE_SERVLET, $params);
  317. $this->luke[$num_terms] = $this->sendRawGet($url);
  318. if ($cid) {
  319. cache_set($cid, $this->luke, 'cache_search_api_solr');
  320. }
  321. }
  322. }
  323. }
  324. /**
  325. * Implements SearchApiSolrConnectionInterface::getFields().
  326. */
  327. public function getFields($num_terms = 0) {
  328. $fields = array();
  329. foreach ($this->getLuke($num_terms)->fields as $name => $info) {
  330. $fields[$name] = new SearchApiSolrField($info);
  331. }
  332. return $fields;
  333. }
  334. /**
  335. * Implements SearchApiSolrConnectionInterface::getLuke().
  336. */
  337. public function getLuke($num_terms = 0) {
  338. if (!isset($this->luke[$num_terms])) {
  339. $this->setLuke($num_terms);
  340. }
  341. return $this->luke[$num_terms];
  342. }
  343. /**
  344. * Implements SearchApiSolrConnectionInterface::getSolrVersion().
  345. */
  346. public function getSolrVersion() {
  347. // Allow for overrides by the user.
  348. if (!empty($this->options['solr_version'])) {
  349. return $this->options['solr_version'];
  350. }
  351. $system_info = $this->getSystemInfo();
  352. // Get our solr version number
  353. if (isset($system_info->lucene->{'solr-spec-version'})) {
  354. return $system_info->lucene->{'solr-spec-version'}[0];
  355. }
  356. return 0;
  357. }
  358. /**
  359. * Stores information about the Solr core in $this->stats.
  360. */
  361. protected function setStats() {
  362. $data = $this->getLuke();
  363. $solr_version = $this->getSolrVersion();
  364. // Only try to get stats if we have connected to the index.
  365. if (empty($this->stats) && isset($data->index->numDocs)) {
  366. $cid = $this->getCacheId(__FUNCTION__);
  367. if ($cid) {
  368. $cache = cache_get($cid, 'cache_search_api_solr');
  369. if (isset($cache->data)) {
  370. $this->stats = simplexml_load_string($cache->data);
  371. }
  372. }
  373. // Second pass to populate the cache if necessary.
  374. if (empty($this->stats)) {
  375. if ($solr_version >= 4) {
  376. $url = $this->constructUrl(self::STATS_SERVLET_4);
  377. }
  378. else {
  379. $url = $this->constructUrl(self::STATS_SERVLET);
  380. }
  381. $response = $this->sendRawGet($url);
  382. $this->stats = simplexml_load_string($response->data);
  383. if ($cid) {
  384. cache_set($cid, $response->data, 'cache_search_api_solr');
  385. }
  386. }
  387. }
  388. }
  389. /**
  390. * Implements SearchApiSolrConnectionInterface::getStats().
  391. */
  392. public function getStats() {
  393. if (!isset($this->stats)) {
  394. $this->setStats();
  395. }
  396. return $this->stats;
  397. }
  398. /**
  399. * Implements SearchApiSolrConnectionInterface::getStatsSummary().
  400. */
  401. public function getStatsSummary() {
  402. $stats = $this->getStats();
  403. $solr_version = $this->getSolrVersion();
  404. $summary = array(
  405. '@pending_docs' => '',
  406. '@autocommit_time_seconds' => '',
  407. '@autocommit_time' => '',
  408. '@deletes_by_id' => '',
  409. '@deletes_by_query' => '',
  410. '@deletes_total' => '',
  411. '@schema_version' => '',
  412. '@core_name' => '',
  413. '@index_size' => '',
  414. );
  415. if (!empty($stats)) {
  416. if ($solr_version <= 3) {
  417. $docs_pending_xpath = $stats->xpath('//stat[@name="docsPending"]');
  418. $summary['@pending_docs'] = (int) trim(current($docs_pending_xpath));
  419. $max_time_xpath = $stats->xpath('//stat[@name="autocommit maxTime"]');
  420. $max_time = (int) trim(current($max_time_xpath));
  421. // Convert to seconds.
  422. $summary['@autocommit_time_seconds'] = $max_time / 1000;
  423. $summary['@autocommit_time'] = format_interval($max_time / 1000);
  424. $deletes_id_xpath = $stats->xpath('//stat[@name="deletesById"]');
  425. $summary['@deletes_by_id'] = (int) trim(current($deletes_id_xpath));
  426. $deletes_query_xpath = $stats->xpath('//stat[@name="deletesByQuery"]');
  427. $summary['@deletes_by_query'] = (int) trim(current($deletes_query_xpath));
  428. $summary['@deletes_total'] = $summary['@deletes_by_id'] + $summary['@deletes_by_query'];
  429. $schema = $stats->xpath('/solr/schema[1]');
  430. $summary['@schema_version'] = trim($schema[0]);
  431. $core = $stats->xpath('/solr/core[1]');
  432. $summary['@core_name'] = trim($core[0]);
  433. $size_xpath = $stats->xpath('//stat[@name="indexSize"]');
  434. $summary['@index_size'] = trim(current($size_xpath));
  435. }
  436. else {
  437. $system_info = $this->getSystemInfo();
  438. $docs_pending_xpath = $stats->xpath('//lst["stats"]/long[@name="docsPending"]');
  439. $summary['@pending_docs'] = (int) trim(current($docs_pending_xpath));
  440. $max_time_xpath = $stats->xpath('//lst["stats"]/str[@name="autocommit maxTime"]');
  441. $max_time = (int) trim(current($max_time_xpath));
  442. // Convert to seconds.
  443. $summary['@autocommit_time_seconds'] = $max_time / 1000;
  444. $summary['@autocommit_time'] = format_interval($max_time / 1000);
  445. $deletes_id_xpath = $stats->xpath('//lst["stats"]/long[@name="deletesById"]');
  446. $summary['@deletes_by_id'] = (int) trim(current($deletes_id_xpath));
  447. $deletes_query_xpath = $stats->xpath('//lst["stats"]/long[@name="deletesByQuery"]');
  448. $summary['@deletes_by_query'] = (int) trim(current($deletes_query_xpath));
  449. $summary['@deletes_total'] = $summary['@deletes_by_id'] + $summary['@deletes_by_query'];
  450. $schema = $system_info->core->schema;
  451. $summary['@schema_version'] = $schema;
  452. $core = $stats->xpath('//lst["core"]/str[@name="coreName"]');
  453. $summary['@core_name'] = trim(current($core));
  454. $size_xpath = $stats->xpath('//lst["core"]/str[@name="indexSize"]');
  455. $summary['@index_size'] = trim(current($size_xpath));
  456. }
  457. }
  458. return $summary;
  459. }
  460. /**
  461. * Implements SearchApiSolrConnectionInterface::clearCache().
  462. */
  463. public function clearCache() {
  464. if ($cid = $this->getCacheId()) {
  465. cache_clear_all($cid, 'cache_search_api_solr', TRUE);
  466. cache_clear_all($cid, 'cache_search_api_solr', TRUE);
  467. }
  468. $this->luke = array();
  469. $this->stats = NULL;
  470. $this->system_info = NULL;
  471. }
  472. /**
  473. * Checks the reponse code and throws an exception if it's not 200.
  474. *
  475. * @param object $response
  476. * A response object.
  477. *
  478. * @return object
  479. * The passed response object.
  480. *
  481. * @throws SearchApiException
  482. * If the object's HTTP status is not 200.
  483. */
  484. protected function checkResponse($response) {
  485. $code = (int) $response->code;
  486. if ($code != 200) {
  487. if ($code >= 400 && $code != 403 && $code != 404) {
  488. // Add details, like Solr's exception message.
  489. $response->status_message .= $response->data;
  490. }
  491. throw new SearchApiException('"' . $code . '" Status: ' . $response->status_message);
  492. }
  493. return $response;
  494. }
  495. /**
  496. * Implements SearchApiSolrConnectionInterface::makeServletRequest().
  497. */
  498. public function makeServletRequest($servlet, array $params = array(), array $options = array()) {
  499. // Add default params.
  500. $params += array(
  501. 'wt' => 'json',
  502. 'json.nl' => self::NAMED_LIST_FORMAT,
  503. );
  504. $url = $this->constructUrl($servlet, $params);
  505. $response = $this->makeHttpRequest($url, $options);
  506. return $this->checkResponse($response);
  507. }
  508. /**
  509. * Central method for making a GET operation against this Solr Server
  510. */
  511. protected function sendRawGet($url, array $options = array()) {
  512. $options['method'] = 'GET';
  513. $response = $this->makeHttpRequest($url, $options);
  514. return $this->checkResponse($response);
  515. }
  516. /**
  517. * Central method for making a POST operation against this Solr Server
  518. */
  519. protected function sendRawPost($url, array $options = array()) {
  520. $options['method'] = 'POST';
  521. // Normally we use POST to send XML documents.
  522. if (empty($options['headers']['Content-Type'])) {
  523. $options['headers']['Content-Type'] = 'text/xml; charset=UTF-8';
  524. }
  525. $response = $this->makeHttpRequest($url, $options);
  526. return $this->checkResponse($response);
  527. }
  528. /**
  529. * Sends an HTTP request to Solr.
  530. *
  531. * This is just a wrapper around drupal_http_request().
  532. */
  533. protected function makeHttpRequest($url, array $options = array()) {
  534. if (empty($options['method']) || $options['method'] == 'GET' || $options['method'] == 'HEAD') {
  535. // Make sure we are not sending a request body.
  536. $options['data'] = NULL;
  537. }
  538. if ($this->http_auth) {
  539. $options['headers']['Authorization'] = $this->http_auth;
  540. }
  541. if ($this->stream_context) {
  542. $options['context'] = $this->stream_context;
  543. }
  544. $result = drupal_http_request($url, $options);
  545. if (!isset($result->code) || $result->code < 0) {
  546. $result->code = 0;
  547. $result->status_message = 'Request failed';
  548. $result->protocol = 'HTTP/1.0';
  549. }
  550. // Additional information may be in the error property.
  551. if (isset($result->error)) {
  552. $result->status_message .= ': ' . check_plain($result->error);
  553. }
  554. if (!isset($result->data)) {
  555. $result->data = '';
  556. $result->response = NULL;
  557. }
  558. else {
  559. $response = json_decode($result->data);
  560. if (is_object($response)) {
  561. foreach ($response as $key => $value) {
  562. $result->$key = $value;
  563. }
  564. }
  565. }
  566. return $result;
  567. }
  568. /**
  569. * Implements SearchApiSolrConnectionInterface::escape().
  570. */
  571. public static function escape($value, $version = 0) {
  572. $replacements = array();
  573. $specials = array('+', '-', '&&', '||', '!', '(', ')', '{', '}', '[', ']', '^', '"', '~', '*', '?', ':', "\\");
  574. // Solr 4.x introduces regular expressions, making the slash also a special
  575. // character.
  576. if ($version >= 4) {
  577. $specials[] = '/';
  578. }
  579. foreach ($specials as $special) {
  580. $replacements[$special] = "\\$special";
  581. }
  582. return strtr($value, $replacements);
  583. }
  584. /**
  585. * Implements SearchApiSolrConnectionInterface::escapePhrase().
  586. */
  587. public static function escapePhrase($value) {
  588. $replacements['"'] = '\"';
  589. $replacements["\\"] = "\\\\";
  590. return strtr($value, $replacements);
  591. }
  592. /**
  593. * Implements SearchApiSolrConnectionInterface::phrase().
  594. */
  595. public static function phrase($value) {
  596. return '"' . self::escapePhrase($value) . '"';
  597. }
  598. /**
  599. * Implements SearchApiSolrConnectionInterface::escapeFieldName().
  600. */
  601. public static function escapeFieldName($value) {
  602. $value = str_replace(':', '\:', $value);
  603. return $value;
  604. }
  605. /**
  606. * Returns the HTTP URL for a certain servlet on the Solr server.
  607. *
  608. * @param $servlet
  609. * A string path to a Solr request handler.
  610. * @param array $params
  611. * Additional GET parameters to append to the URL.
  612. * @param $added_query_string
  613. * Additional query string to append to the URL.
  614. *
  615. * @return string
  616. */
  617. protected function constructUrl($servlet, array $params = array(), $added_query_string = NULL) {
  618. // PHP's built in http_build_query() doesn't give us the format Solr wants.
  619. $query_string = $this->httpBuildQuery($params);
  620. if ($query_string) {
  621. $query_string = '?' . $query_string;
  622. if ($added_query_string) {
  623. $query_string = $query_string . '&' . $added_query_string;
  624. }
  625. }
  626. elseif ($added_query_string) {
  627. $query_string = '?' . $added_query_string;
  628. }
  629. return $this->base_url . $servlet . $query_string;
  630. }
  631. /**
  632. * Implements SearchApiSolrConnectionInterface::getBaseUrl().
  633. */
  634. public function getBaseUrl() {
  635. return $this->base_url;
  636. }
  637. /**
  638. * Implements SearchApiSolrConnectionInterface::setBaseUrl().
  639. */
  640. public function setBaseUrl($url) {
  641. $this->base_url = $url;
  642. $this->update_url = NULL;
  643. }
  644. /**
  645. * Implements SearchApiSolrConnectionInterface::update().
  646. */
  647. public function update($rawPost, $timeout = FALSE) {
  648. if (empty($this->update_url)) {
  649. // Store the URL in an instance variable since many updates may be sent
  650. // via a single instance of this class.
  651. $this->update_url = $this->constructUrl(self::UPDATE_SERVLET, array('wt' => 'json'));
  652. }
  653. $options['data'] = $rawPost;
  654. if ($timeout) {
  655. $options['timeout'] = $timeout;
  656. }
  657. return $this->sendRawPost($this->update_url, $options);
  658. }
  659. /**
  660. * Implements SearchApiSolrConnectionInterface::addDocuments().
  661. */
  662. public function addDocuments(array $documents, $overwrite = NULL, $commitWithin = NULL) {
  663. $attr = '';
  664. if (isset($overwrite)) {
  665. $attr .= ' overwrite="' . ($overwrite ? 'true"' : 'false"');
  666. }
  667. if (isset($commitWithin)) {
  668. $attr .= ' commitWithin="' . ((int) $commitWithin) . '"';
  669. }
  670. $rawPost = "<add$attr>";
  671. foreach ($documents as $document) {
  672. if (is_object($document) && ($document instanceof SearchApiSolrDocument)) {
  673. $rawPost .= $document->toXml();
  674. }
  675. }
  676. $rawPost .= '</add>';
  677. return $this->update($rawPost);
  678. }
  679. /**
  680. * Implements SearchApiSolrConnectionInterface::commit().
  681. */
  682. public function commit($waitSearcher = TRUE, $timeout = 3600) {
  683. return $this->optimizeOrCommit('commit', $waitSearcher, $timeout);
  684. }
  685. /**
  686. * Implements SearchApiSolrConnectionInterface::deleteById().
  687. */
  688. public function deleteById($id, $timeout = 3600) {
  689. return $this->deleteByMultipleIds(array($id), $timeout);
  690. }
  691. /**
  692. * Implements SearchApiSolrConnectionInterface::deleteByMultipleIds().
  693. */
  694. public function deleteByMultipleIds(array $ids, $timeout = 3600) {
  695. $rawPost = '<delete>';
  696. foreach ($ids as $id) {
  697. $rawPost .= '<id>' . htmlspecialchars($id, ENT_NOQUOTES, 'UTF-8') . '</id>';
  698. }
  699. $rawPost .= '</delete>';
  700. return $this->update($rawPost, $timeout);
  701. }
  702. /**
  703. * Implements SearchApiSolrConnectionInterface::deleteByQuery().
  704. */
  705. public function deleteByQuery($rawQuery, $timeout = 3600) {
  706. $rawPost = '<delete><query>' . htmlspecialchars($rawQuery, ENT_NOQUOTES, 'UTF-8') . '</query></delete>';
  707. return $this->update($rawPost, $timeout);
  708. }
  709. /**
  710. * Implements SearchApiSolrConnectionInterface::optimize().
  711. */
  712. public function optimize($waitSearcher = TRUE, $timeout = 3600) {
  713. return $this->optimizeOrCommit('optimize', $waitSearcher, $timeout);
  714. }
  715. /**
  716. * Sends an commit or optimize command to the Solr server.
  717. *
  718. * Will be synchronous unless $waitSearcher is set to FALSE.
  719. *
  720. * @param string $type
  721. * Either "commit" or "optimize".
  722. * @param bool $waitSearcher
  723. * (optional) Wait until a new searcher is opened and registered as the main
  724. * query searcher, making the changes visible. Defaults to true.
  725. * @param int $timeout
  726. * Seconds to wait until timing out with an exception. Defaults to an hour.
  727. *
  728. * @return object
  729. * A response object.
  730. *
  731. * @throws SearchApiException
  732. * If an error occurs during the service call.
  733. */
  734. protected function optimizeOrCommit($type, $waitSearcher = TRUE, $timeout = 3600) {
  735. $waitSearcher = $waitSearcher ? '' : ' waitSearcher="false"';
  736. if ($this->getSolrVersion() <= 3) {
  737. $rawPost = "<$type$waitSearcher />";
  738. }
  739. else {
  740. $softCommit = ($this->soft_commit) ? ' softCommit="true"' : '';
  741. $rawPost = "<$type$waitSearcher$softCommit />";
  742. }
  743. $response = $this->update($rawPost, $timeout);
  744. $this->clearCache();
  745. return $response;
  746. }
  747. /**
  748. * Generates an URL-encoded query string.
  749. *
  750. * Works like PHP's built in http_build_query() (or drupal_http_build_query())
  751. * but uses rawurlencode() and no [] for repeated params, to be compatible
  752. * with the Java-based servers Solr runs on.
  753. *
  754. *
  755. * @param array $query
  756. * The query parameters which should be set.
  757. * @param string $parent
  758. * Internal use only.
  759. *
  760. * @return string
  761. * A query string to append (after "?") to a URL.
  762. */
  763. protected function httpBuildQuery(array $query, $parent = '') {
  764. $params = array();
  765. foreach ($query as $key => $value) {
  766. $key = ($parent ? $parent : rawurlencode($key));
  767. // Recurse into children.
  768. if (is_array($value)) {
  769. $value = $this->httpBuildQuery($value, $key);
  770. if ($value) {
  771. $params[] = $value;
  772. }
  773. }
  774. // If a query parameter value is NULL, only append its key.
  775. elseif (!isset($value)) {
  776. $params[] = $key;
  777. }
  778. else {
  779. $params[] = $key . '=' . rawurlencode($value);
  780. }
  781. }
  782. return implode('&', $params);
  783. }
  784. /**
  785. * {@inheritdoc}
  786. */
  787. public function search($query = NULL, array $params = array(), $method = 'GET') {
  788. // Always use JSON. See
  789. // http://code.google.com/p/solr-php-client/issues/detail?id=6#c1 for
  790. // reasoning.
  791. $params['wt'] = 'json';
  792. // Additional default params.
  793. $params += array(
  794. 'json.nl' => self::NAMED_LIST_FORMAT,
  795. );
  796. if (isset($query)) {
  797. $params['q'] = $query;
  798. }
  799. // Carry out some performance improvements when no search keys are given.
  800. if (!isset($params['q']) || !strlen($params['q'])) {
  801. // Without search keys, the qf parameter is useless. We also remove empty
  802. // search keys here. (With our normal service class, empty keys won't be
  803. // set, but another module using this connection class might do that.)
  804. unset($params['q'], $params['qf']);
  805. // If we have filters set (which will nearly always be the case, since we
  806. // have to filter by index), move them to the q.alt parameter where
  807. // possible.
  808. if (!empty($params['fq'])) {
  809. $qalt = array();
  810. foreach ($params['fq'] as $i => $fq) {
  811. // Tagged and negative filters cannot be moved to q.alt.
  812. if ($fq[0] !== '{' && $fq[0] !== '-') {
  813. $qalt[] = "($fq)";
  814. unset($params['fq'][$i]);
  815. }
  816. }
  817. if ($qalt) {
  818. $params['q.alt'] = implode(' ', $qalt);
  819. }
  820. if (empty($params['fq'])) {
  821. unset($params['fq']);
  822. }
  823. }
  824. }
  825. // Build the HTTP query string. We have our own method for that since PHP's
  826. // built-in http_build_query() doesn't give us the format Solr wants.
  827. $queryString = $this->httpBuildQuery($params);
  828. if ($method == 'GET' || $method == 'AUTO') {
  829. $searchUrl = $this->constructUrl(self::SEARCH_SERVLET, array(), $queryString);
  830. if ($method == 'GET' || strlen($searchUrl) <= variable_get('search_api_solr_http_get_max_length', 4000)) {
  831. return $this->sendRawGet($searchUrl);
  832. }
  833. }
  834. // Method is POST, or AUTO with a long query
  835. $searchUrl = $this->constructUrl(self::SEARCH_SERVLET);
  836. $options['data'] = $queryString;
  837. $options['headers']['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8';
  838. return $this->sendRawPost($searchUrl, $options);
  839. }
  840. }