Service.php 38 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181
  1. <?php
  2. /**
  3. * Copyright (c) 2007-2011, Servigistics, Inc.
  4. * All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions are met:
  8. *
  9. * - Redistributions of source code must retain the above copyright notice,
  10. * this list of conditions and the following disclaimer.
  11. * - Redistributions in binary form must reproduce the above copyright
  12. * notice, this list of conditions and the following disclaimer in the
  13. * documentation and/or other materials provided with the distribution.
  14. * - Neither the name of Servigistics, Inc. nor the names of
  15. * its contributors may be used to endorse or promote products derived from
  16. * this software without specific prior written permission.
  17. *
  18. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  19. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  20. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  21. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  22. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  25. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  26. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  27. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  28. * POSSIBILITY OF SUCH DAMAGE.
  29. *
  30. * @copyright Copyright 2007-2011 Servigistics, Inc. (http://servigistics.com)
  31. * @license http://solr-php-client.googlecode.com/svn/trunk/COPYING New BSD
  32. * @version $Id: Service.php 59 2011-02-08 20:38:59Z donovan.jimenez $
  33. *
  34. * @package Apache
  35. * @subpackage Solr
  36. * @author Donovan Jimenez <djimenez@conduit-it.com>
  37. */
  38. // See Issue #1 (http://code.google.com/p/solr-php-client/issues/detail?id=1)
  39. // Doesn't follow typical include path conventions, but is more convenient for users
  40. require_once(dirname(__FILE__) . '/Exception.php');
  41. require_once(dirname(__FILE__) . '/HttpTransportException.php');
  42. require_once(dirname(__FILE__) . '/InvalidArgumentException.php');
  43. require_once(dirname(__FILE__) . '/Document.php');
  44. require_once(dirname(__FILE__) . '/Response.php');
  45. require_once(dirname(__FILE__) . '/HttpTransport/Interface.php');
  46. /**
  47. * Starting point for the Solr API. Represents a Solr server resource and has
  48. * methods for pinging, adding, deleting, committing, optimizing and searching.
  49. *
  50. * Example Usage:
  51. * <code>
  52. * ...
  53. * $solr = new Apache_Solr_Service(); //or explicitly new Apache_Solr_Service('localhost', 8180, '/solr')
  54. *
  55. * if ($solr->ping())
  56. * {
  57. * $solr->deleteByQuery('*:*'); //deletes ALL documents - be careful :)
  58. *
  59. * $document = new Apache_Solr_Document();
  60. * $document->id = uniqid(); //or something else suitably unique
  61. *
  62. * $document->title = 'Some Title';
  63. * $document->content = 'Some content for this wonderful document. Blah blah blah.';
  64. *
  65. * $solr->addDocument($document); //if you're going to be adding documents in bulk using addDocuments
  66. * //with an array of documents is faster
  67. *
  68. * $solr->commit(); //commit to see the deletes and the document
  69. * $solr->optimize(); //merges multiple segments into one
  70. *
  71. * //and the one we all care about, search!
  72. * //any other common or custom parameters to the request handler can go in the
  73. * //optional 4th array argument.
  74. * $solr->search('content:blah', 0, 10, array('sort' => 'timestamp desc'));
  75. * }
  76. * ...
  77. * </code>
  78. *
  79. * @todo Investigate using other HTTP clients other than file_get_contents built-in handler. Could provide performance
  80. * improvements when dealing with multiple requests by using HTTP's keep alive functionality
  81. */
  82. class Apache_Solr_Service
  83. {
  84. /**
  85. * SVN Revision meta data for this class
  86. */
  87. const SVN_REVISION = '$Revision: 59 $';
  88. /**
  89. * SVN ID meta data for this class
  90. */
  91. const SVN_ID = '$Id: Service.php 59 2011-02-08 20:38:59Z donovan.jimenez $';
  92. /**
  93. * Response writer we'll request - JSON. See http://code.google.com/p/solr-php-client/issues/detail?id=6#c1 for reasoning
  94. */
  95. const SOLR_WRITER = 'json';
  96. /**
  97. * NamedList Treatment constants
  98. */
  99. const NAMED_LIST_FLAT = 'flat';
  100. const NAMED_LIST_MAP = 'map';
  101. /**
  102. * Search HTTP Methods
  103. */
  104. const METHOD_GET = 'GET';
  105. const METHOD_POST = 'POST';
  106. /**
  107. * Servlet mappings
  108. */
  109. const PING_SERVLET = 'admin/ping';
  110. const UPDATE_SERVLET = 'update';
  111. const SEARCH_SERVLET = 'select';
  112. const THREADS_SERVLET = 'admin/threads';
  113. const EXTRACT_SERVLET = 'update/extract';
  114. /**
  115. * Server identification strings
  116. *
  117. * @var string
  118. */
  119. protected $_host, $_port, $_path;
  120. /**
  121. * Whether {@link Apache_Solr_Response} objects should create {@link Apache_Solr_Document}s in
  122. * the returned parsed data
  123. *
  124. * @var boolean
  125. */
  126. protected $_createDocuments = true;
  127. /**
  128. * Whether {@link Apache_Solr_Response} objects should have multivalue fields with only a single value
  129. * collapsed to appear as a single value would.
  130. *
  131. * @var boolean
  132. */
  133. protected $_collapseSingleValueArrays = true;
  134. /**
  135. * How NamedLists should be formatted in the output. This specifically effects facet counts. Valid values
  136. * are {@link Apache_Solr_Service::NAMED_LIST_MAP} (default) or {@link Apache_Solr_Service::NAMED_LIST_FLAT}.
  137. *
  138. * @var string
  139. */
  140. protected $_namedListTreatment = self::NAMED_LIST_MAP;
  141. /**
  142. * Query delimiters. Someone might want to be able to change
  143. * these (to use &amp; instead of & for example), so I've provided them.
  144. *
  145. * @var string
  146. */
  147. protected $_queryDelimiter = '?', $_queryStringDelimiter = '&', $_queryBracketsEscaped = true;
  148. /**
  149. * Constructed servlet full path URLs
  150. *
  151. * @var string
  152. */
  153. protected $_pingUrl, $_updateUrl, $_searchUrl, $_threadsUrl;
  154. /**
  155. * Keep track of whether our URLs have been constructed
  156. *
  157. * @var boolean
  158. */
  159. protected $_urlsInited = false;
  160. /**
  161. * HTTP Transport implementation (pluggable)
  162. *
  163. * @var Apache_Solr_HttpTransport_Interface
  164. */
  165. protected $_httpTransport = false;
  166. /**
  167. * Escape a value for special query characters such as ':', '(', ')', '*', '?', etc.
  168. *
  169. * NOTE: inside a phrase fewer characters need escaped, use {@link Apache_Solr_Service::escapePhrase()} instead
  170. *
  171. * @param string $value
  172. * @return string
  173. */
  174. static public function escape($value)
  175. {
  176. //list taken from http://lucene.apache.org/java/docs/queryparsersyntax.html#Escaping%20Special%20Characters
  177. $pattern = '/(\+|-|&&|\|\||!|\(|\)|\{|}|\[|]|\^|"|~|\*|\?|:|\\\)/';
  178. $replace = '\\\$1';
  179. return preg_replace($pattern, $replace, $value);
  180. }
  181. /**
  182. * Escape a value meant to be contained in a phrase for special query characters
  183. *
  184. * @param string $value
  185. * @return string
  186. */
  187. static public function escapePhrase($value)
  188. {
  189. $pattern = '/("|\\\)/';
  190. $replace = '\\\$1';
  191. return preg_replace($pattern, $replace, $value);
  192. }
  193. /**
  194. * Convenience function for creating phrase syntax from a value
  195. *
  196. * @param string $value
  197. * @return string
  198. */
  199. static public function phrase($value)
  200. {
  201. return '"' . self::escapePhrase($value) . '"';
  202. }
  203. /**
  204. * Constructor. All parameters are optional and will take on default values
  205. * if not specified.
  206. *
  207. * @param string $host
  208. * @param string $port
  209. * @param string $path
  210. * @param Apache_Solr_HttpTransport_Interface $httpTransport
  211. */
  212. public function __construct($host = 'localhost', $port = 8180, $path = '/solr/', $httpTransport = false)
  213. {
  214. $this->setHost($host);
  215. $this->setPort($port);
  216. $this->setPath($path);
  217. $this->_initUrls();
  218. if ($httpTransport)
  219. {
  220. $this->setHttpTransport($httpTransport);
  221. }
  222. // check that our php version is >= 5.1.3 so we can correct for http_build_query behavior later
  223. $this->_queryBracketsEscaped = version_compare(phpversion(), '5.1.3', '>=');
  224. }
  225. /**
  226. * Return a valid http URL given this server's host, port and path and a provided servlet name
  227. *
  228. * @param string $servlet
  229. * @return string
  230. */
  231. protected function _constructUrl($servlet, $params = array())
  232. {
  233. if (count($params))
  234. {
  235. //escape all parameters appropriately for inclusion in the query string
  236. $escapedParams = array();
  237. foreach ($params as $key => $value)
  238. {
  239. $escapedParams[] = urlencode($key) . '=' . urlencode($value);
  240. }
  241. $queryString = $this->_queryDelimiter . implode($this->_queryStringDelimiter, $escapedParams);
  242. }
  243. else
  244. {
  245. $queryString = '';
  246. }
  247. return 'http://' . $this->_host . ':' . $this->_port . $this->_path . $servlet . $queryString;
  248. }
  249. /**
  250. * Construct the Full URLs for the three servlets we reference
  251. */
  252. protected function _initUrls()
  253. {
  254. //Initialize our full servlet URLs now that we have server information
  255. $this->_extractUrl = $this->_constructUrl(self::EXTRACT_SERVLET);
  256. $this->_pingUrl = $this->_constructUrl(self::PING_SERVLET);
  257. $this->_searchUrl = $this->_constructUrl(self::SEARCH_SERVLET);
  258. $this->_threadsUrl = $this->_constructUrl(self::THREADS_SERVLET, array('wt' => self::SOLR_WRITER ));
  259. $this->_updateUrl = $this->_constructUrl(self::UPDATE_SERVLET, array('wt' => self::SOLR_WRITER ));
  260. $this->_urlsInited = true;
  261. }
  262. protected function _generateQueryString($params)
  263. {
  264. // use http_build_query to encode our arguments because its faster
  265. // than urlencoding all the parts ourselves in a loop
  266. //
  267. // because http_build_query treats arrays differently than we want to, correct the query
  268. // string by changing foo[#]=bar (# being an actual number) parameter strings to just
  269. // multiple foo=bar strings. This regex should always work since '=' will be urlencoded
  270. // anywhere else the regex isn't expecting it
  271. //
  272. // NOTE: before php 5.1.3 brackets were not url encoded by http_build query - we've checked
  273. // the php version in the constructor and put the results in the instance variable. Also, before
  274. // 5.1.2 the arg_separator parameter was not available, so don't use it
  275. if ($this->_queryBracketsEscaped)
  276. {
  277. $queryString = http_build_query($params, null, $this->_queryStringDelimiter);
  278. return preg_replace('/%5B(?:[0-9]|[1-9][0-9]+)%5D=/', '=', $queryString);
  279. }
  280. else
  281. {
  282. $queryString = http_build_query($params);
  283. return preg_replace('/\\[(?:[0-9]|[1-9][0-9]+)\\]=/', '=', $queryString);
  284. }
  285. }
  286. /**
  287. * Central method for making a get operation against this Solr Server
  288. *
  289. * @param string $url
  290. * @param float $timeout Read timeout in seconds
  291. * @return Apache_Solr_Response
  292. *
  293. * @throws Apache_Solr_HttpTransportException If a non 200 response status is returned
  294. */
  295. protected function _sendRawGet($url, $timeout = FALSE)
  296. {
  297. $httpTransport = $this->getHttpTransport();
  298. $httpResponse = $httpTransport->performGetRequest($url, $timeout);
  299. $solrResponse = new Apache_Solr_Response($httpResponse, $this->_createDocuments, $this->_collapseSingleValueArrays);
  300. if ($solrResponse->getHttpStatus() != 200)
  301. {
  302. throw new Apache_Solr_HttpTransportException($solrResponse);
  303. }
  304. return $solrResponse;
  305. }
  306. /**
  307. * Central method for making a post operation against this Solr Server
  308. *
  309. * @param string $url
  310. * @param string $rawPost
  311. * @param float $timeout Read timeout in seconds
  312. * @param string $contentType
  313. * @return Apache_Solr_Response
  314. *
  315. * @throws Apache_Solr_HttpTransportException If a non 200 response status is returned
  316. */
  317. protected function _sendRawPost($url, $rawPost, $timeout = FALSE, $contentType = 'text/xml; charset=UTF-8')
  318. {
  319. $httpTransport = $this->getHttpTransport();
  320. $httpResponse = $httpTransport->performPostRequest($url, $rawPost, $contentType, $timeout);
  321. $solrResponse = new Apache_Solr_Response($httpResponse, $this->_createDocuments, $this->_collapseSingleValueArrays);
  322. if ($solrResponse->getHttpStatus() != 200)
  323. {
  324. throw new Apache_Solr_HttpTransportException($solrResponse);
  325. }
  326. return $solrResponse;
  327. }
  328. /**
  329. * Returns the set host
  330. *
  331. * @return string
  332. */
  333. public function getHost()
  334. {
  335. return $this->_host;
  336. }
  337. /**
  338. * Set the host used. If empty will fallback to constants
  339. *
  340. * @param string $host
  341. *
  342. * @throws Apache_Solr_InvalidArgumentException If the host parameter is empty
  343. */
  344. public function setHost($host)
  345. {
  346. //Use the provided host or use the default
  347. if (empty($host))
  348. {
  349. throw new Apache_Solr_InvalidArgumentException('Host parameter is empty');
  350. }
  351. else
  352. {
  353. $this->_host = $host;
  354. }
  355. if ($this->_urlsInited)
  356. {
  357. $this->_initUrls();
  358. }
  359. }
  360. /**
  361. * Get the set port
  362. *
  363. * @return integer
  364. */
  365. public function getPort()
  366. {
  367. return $this->_port;
  368. }
  369. /**
  370. * Set the port used. If empty will fallback to constants
  371. *
  372. * @param integer $port
  373. *
  374. * @throws Apache_Solr_InvalidArgumentException If the port parameter is empty
  375. */
  376. public function setPort($port)
  377. {
  378. //Use the provided port or use the default
  379. $port = (int) $port;
  380. if ($port <= 0)
  381. {
  382. throw new Apache_Solr_InvalidArgumentException('Port is not a valid port number');
  383. }
  384. else
  385. {
  386. $this->_port = $port;
  387. }
  388. if ($this->_urlsInited)
  389. {
  390. $this->_initUrls();
  391. }
  392. }
  393. /**
  394. * Get the set path.
  395. *
  396. * @return string
  397. */
  398. public function getPath()
  399. {
  400. return $this->_path;
  401. }
  402. /**
  403. * Set the path used. If empty will fallback to constants
  404. *
  405. * @param string $path
  406. */
  407. public function setPath($path)
  408. {
  409. $path = trim($path, '/');
  410. $this->_path = '/' . $path . '/';
  411. if ($this->_urlsInited)
  412. {
  413. $this->_initUrls();
  414. }
  415. }
  416. /**
  417. * Get the current configured HTTP Transport
  418. *
  419. * @return HttpTransportInterface
  420. */
  421. public function getHttpTransport()
  422. {
  423. // lazy load a default if one has not be set
  424. if ($this->_httpTransport === false)
  425. {
  426. require_once(dirname(__FILE__) . '/HttpTransport/FileGetContents.php');
  427. $this->_httpTransport = new Apache_Solr_HttpTransport_FileGetContents();
  428. }
  429. return $this->_httpTransport;
  430. }
  431. /**
  432. * Set the HTTP Transport implemenation that will be used for all HTTP requests
  433. *
  434. * @param Apache_Solr_HttpTransport_Interface
  435. */
  436. public function setHttpTransport(Apache_Solr_HttpTransport_Interface $httpTransport)
  437. {
  438. $this->_httpTransport = $httpTransport;
  439. }
  440. /**
  441. * Set the create documents flag. This determines whether {@link Apache_Solr_Response} objects will
  442. * parse the response and create {@link Apache_Solr_Document} instances in place.
  443. *
  444. * @param boolean $createDocuments
  445. */
  446. public function setCreateDocuments($createDocuments)
  447. {
  448. $this->_createDocuments = (bool) $createDocuments;
  449. }
  450. /**
  451. * Get the current state of teh create documents flag.
  452. *
  453. * @return boolean
  454. */
  455. public function getCreateDocuments()
  456. {
  457. return $this->_createDocuments;
  458. }
  459. /**
  460. * Set the collapse single value arrays flag.
  461. *
  462. * @param boolean $collapseSingleValueArrays
  463. */
  464. public function setCollapseSingleValueArrays($collapseSingleValueArrays)
  465. {
  466. $this->_collapseSingleValueArrays = (bool) $collapseSingleValueArrays;
  467. }
  468. /**
  469. * Get the current state of the collapse single value arrays flag.
  470. *
  471. * @return boolean
  472. */
  473. public function getCollapseSingleValueArrays()
  474. {
  475. return $this->_collapseSingleValueArrays;
  476. }
  477. /**
  478. * Get the current default timeout setting (initially the default_socket_timeout ini setting)
  479. * in seconds
  480. *
  481. * @return float
  482. *
  483. * @deprecated Use the getDefaultTimeout method on the HTTP transport implementation
  484. */
  485. public function getDefaultTimeout()
  486. {
  487. return $this->getHttpTransport()->getDefaultTimeout();
  488. }
  489. /**
  490. * Set the default timeout for all calls that aren't passed a specific timeout
  491. *
  492. * @param float $timeout Timeout value in seconds
  493. *
  494. * @deprecated Use the setDefaultTimeout method on the HTTP transport implementation
  495. */
  496. public function setDefaultTimeout($timeout)
  497. {
  498. $this->getHttpTransport()->setDefaultTimeout($timeout);
  499. }
  500. /**
  501. * Set how NamedLists should be formatted in the response data. This mainly effects
  502. * the facet counts format.
  503. *
  504. * @param string $namedListTreatment
  505. * @throws Apache_Solr_InvalidArgumentException If invalid option is set
  506. */
  507. public function setNamedListTreatment($namedListTreatment)
  508. {
  509. switch ((string) $namedListTreatment)
  510. {
  511. case Apache_Solr_Service::NAMED_LIST_FLAT:
  512. $this->_namedListTreatment = Apache_Solr_Service::NAMED_LIST_FLAT;
  513. break;
  514. case Apache_Solr_Service::NAMED_LIST_MAP:
  515. $this->_namedListTreatment = Apache_Solr_Service::NAMED_LIST_MAP;
  516. break;
  517. default:
  518. throw new Apache_Solr_InvalidArgumentException('Not a valid named list treatement option');
  519. }
  520. }
  521. /**
  522. * Get the current setting for named list treatment.
  523. *
  524. * @return string
  525. */
  526. public function getNamedListTreatment()
  527. {
  528. return $this->_namedListTreatment;
  529. }
  530. /**
  531. * Set the string used to separate the path form the query string.
  532. * Defaulted to '?'
  533. *
  534. * @param string $queryDelimiter
  535. */
  536. public function setQueryDelimiter($queryDelimiter)
  537. {
  538. $this->_queryDelimiter = $queryDelimiter;
  539. }
  540. /**
  541. * Set the string used to separate the parameters in thequery string
  542. * Defaulted to '&'
  543. *
  544. * @param string $queryStringDelimiter
  545. */
  546. public function setQueryStringDelimiter($queryStringDelimiter)
  547. {
  548. $this->_queryStringDelimiter = $queryStringDelimiter;
  549. }
  550. /**
  551. * Call the /admin/ping servlet, can be used to quickly tell if a connection to the
  552. * server is able to be made.
  553. *
  554. * @param float $timeout maximum time to wait for ping in seconds, -1 for unlimited (default is 2)
  555. * @return float Actual time taken to ping the server, FALSE if timeout or HTTP error status occurs
  556. */
  557. public function ping($timeout = 2)
  558. {
  559. $start = microtime(true);
  560. $httpTransport = $this->getHttpTransport();
  561. $httpResponse = $httpTransport->performHeadRequest($this->_pingUrl, $timeout);
  562. $solrResponse = new Apache_Solr_Response($httpResponse, $this->_createDocuments, $this->_collapseSingleValueArrays);
  563. if ($solrResponse->getHttpStatus() == 200)
  564. {
  565. return microtime(true) - $start;
  566. }
  567. else
  568. {
  569. return false;
  570. }
  571. }
  572. /**
  573. * Call the /admin/threads servlet and retrieve information about all threads in the
  574. * Solr servlet's thread group. Useful for diagnostics.
  575. *
  576. * @return Apache_Solr_Response
  577. *
  578. * @throws Apache_Solr_HttpTransportException If an error occurs during the service call
  579. */
  580. public function threads()
  581. {
  582. return $this->_sendRawGet($this->_threadsUrl);
  583. }
  584. /**
  585. * Raw Add Method. Takes a raw post body and sends it to the update service. Post body
  586. * should be a complete and well formed "add" xml document.
  587. *
  588. * @param string $rawPost
  589. * @return Apache_Solr_Response
  590. *
  591. * @throws Apache_Solr_HttpTransportException If an error occurs during the service call
  592. */
  593. public function add($rawPost)
  594. {
  595. return $this->_sendRawPost($this->_updateUrl, $rawPost);
  596. }
  597. /**
  598. * Add a Solr Document to the index
  599. *
  600. * @param Apache_Solr_Document $document
  601. * @param boolean $allowDups
  602. * @param boolean $overwritePending
  603. * @param boolean $overwriteCommitted
  604. * @param integer $commitWithin The number of milliseconds that a document must be committed within, see @{link http://wiki.apache.org/solr/UpdateXmlMessages#The_Update_Schema} for details. If left empty this property will not be set in the request.
  605. * @return Apache_Solr_Response
  606. *
  607. * @throws Apache_Solr_HttpTransportException If an error occurs during the service call
  608. */
  609. public function addDocument(Apache_Solr_Document $document, $allowDups = false, $overwritePending = true, $overwriteCommitted = true, $commitWithin = 0)
  610. {
  611. $dupValue = $allowDups ? 'true' : 'false';
  612. $pendingValue = $overwritePending ? 'true' : 'false';
  613. $committedValue = $overwriteCommitted ? 'true' : 'false';
  614. $commitWithin = (int) $commitWithin;
  615. $commitWithinString = $commitWithin > 0 ? " commitWithin=\"{$commitWithin}\"" : '';
  616. $rawPost = "<add allowDups=\"{$dupValue}\" overwritePending=\"{$pendingValue}\" overwriteCommitted=\"{$committedValue}\"{$commitWithinString}>";
  617. $rawPost .= $this->_documentToXmlFragment($document);
  618. $rawPost .= '</add>';
  619. return $this->add($rawPost);
  620. }
  621. /**
  622. * Add an array of Solr Documents to the index all at once
  623. *
  624. * @param array $documents Should be an array of Apache_Solr_Document instances
  625. * @param boolean $allowDups
  626. * @param boolean $overwritePending
  627. * @param boolean $overwriteCommitted
  628. * @param integer $commitWithin The number of milliseconds that a document must be committed within, see @{link http://wiki.apache.org/solr/UpdateXmlMessages#The_Update_Schema} for details. If left empty this property will not be set in the request.
  629. * @return Apache_Solr_Response
  630. *
  631. * @throws Apache_Solr_HttpTransportException If an error occurs during the service call
  632. */
  633. public function addDocuments($documents, $allowDups = false, $overwritePending = true, $overwriteCommitted = true, $commitWithin = 0)
  634. {
  635. $dupValue = $allowDups ? 'true' : 'false';
  636. $pendingValue = $overwritePending ? 'true' : 'false';
  637. $committedValue = $overwriteCommitted ? 'true' : 'false';
  638. $commitWithin = (int) $commitWithin;
  639. $commitWithinString = $commitWithin > 0 ? " commitWithin=\"{$commitWithin}\"" : '';
  640. $rawPost = "<add allowDups=\"{$dupValue}\" overwritePending=\"{$pendingValue}\" overwriteCommitted=\"{$committedValue}\"{$commitWithinString}>";
  641. foreach ($documents as $document)
  642. {
  643. if ($document instanceof Apache_Solr_Document)
  644. {
  645. $rawPost .= $this->_documentToXmlFragment($document);
  646. }
  647. }
  648. $rawPost .= '</add>';
  649. return $this->add($rawPost);
  650. }
  651. /**
  652. * Create an XML fragment from a {@link Apache_Solr_Document} instance appropriate for use inside a Solr add call
  653. *
  654. * @return string
  655. */
  656. protected function _documentToXmlFragment(Apache_Solr_Document $document)
  657. {
  658. $xml = '<doc';
  659. if ($document->getBoost() !== false)
  660. {
  661. $xml .= ' boost="' . $document->getBoost() . '"';
  662. }
  663. $xml .= '>';
  664. foreach ($document as $key => $value)
  665. {
  666. $key = htmlspecialchars($key, ENT_QUOTES, 'UTF-8');
  667. $fieldBoost = $document->getFieldBoost($key);
  668. if (is_array($value))
  669. {
  670. foreach ($value as $multivalue)
  671. {
  672. $xml .= '<field name="' . $key . '"';
  673. if ($fieldBoost !== false)
  674. {
  675. $xml .= ' boost="' . $fieldBoost . '"';
  676. // only set the boost for the first field in the set
  677. $fieldBoost = false;
  678. }
  679. $multivalue = htmlspecialchars($multivalue, ENT_NOQUOTES, 'UTF-8');
  680. $xml .= '>' . $multivalue . '</field>';
  681. }
  682. }
  683. else
  684. {
  685. $xml .= '<field name="' . $key . '"';
  686. if ($fieldBoost !== false)
  687. {
  688. $xml .= ' boost="' . $fieldBoost . '"';
  689. }
  690. $value = htmlspecialchars($value, ENT_NOQUOTES, 'UTF-8');
  691. $xml .= '>' . $value . '</field>';
  692. }
  693. }
  694. $xml .= '</doc>';
  695. // replace any control characters to avoid Solr XML parser exception
  696. return $this->_stripCtrlChars($xml);
  697. }
  698. /**
  699. * Replace control (non-printable) characters from string that are invalid to Solr's XML parser with a space.
  700. *
  701. * @param string $string
  702. * @return string
  703. */
  704. protected function _stripCtrlChars($string)
  705. {
  706. // See: http://w3.org/International/questions/qa-forms-utf-8.html
  707. // Printable utf-8 does not include any of these chars below x7F
  708. return preg_replace('@[\x00-\x08\x0B\x0C\x0E-\x1F]@', ' ', $string);
  709. }
  710. /**
  711. * Send a commit command. Will be synchronous unless both wait parameters are set to false.
  712. *
  713. * @param boolean $expungeDeletes Defaults to false, merge segments with deletes away
  714. * @param boolean $waitFlush Defaults to true, block until index changes are flushed to disk
  715. * @param boolean $waitSearcher Defaults to true, block until a new searcher is opened and registered as the main query searcher, making the changes visible
  716. * @param float $timeout Maximum expected duration (in seconds) of the commit operation on the server (otherwise, will throw a communication exception). Defaults to 1 hour
  717. * @return Apache_Solr_Response
  718. *
  719. * @throws Apache_Solr_HttpTransportException If an error occurs during the service call
  720. */
  721. public function commit($expungeDeletes = false, $waitFlush = true, $waitSearcher = true, $timeout = 3600)
  722. {
  723. $expungeValue = $expungeDeletes ? 'true' : 'false';
  724. $flushValue = $waitFlush ? 'true' : 'false';
  725. $searcherValue = $waitSearcher ? 'true' : 'false';
  726. $rawPost = '<commit expungeDeletes="' . $expungeValue . '" waitFlush="' . $flushValue . '" waitSearcher="' . $searcherValue . '" />';
  727. return $this->_sendRawPost($this->_updateUrl, $rawPost, $timeout);
  728. }
  729. /**
  730. * Raw Delete Method. Takes a raw post body and sends it to the update service. Body should be
  731. * a complete and well formed "delete" xml document
  732. *
  733. * @param string $rawPost Expected to be utf-8 encoded xml document
  734. * @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception)
  735. * @return Apache_Solr_Response
  736. *
  737. * @throws Apache_Solr_HttpTransportException If an error occurs during the service call
  738. */
  739. public function delete($rawPost, $timeout = 3600)
  740. {
  741. return $this->_sendRawPost($this->_updateUrl, $rawPost, $timeout);
  742. }
  743. /**
  744. * Create a delete document based on document ID
  745. *
  746. * @param string $id Expected to be utf-8 encoded
  747. * @param boolean $fromPending
  748. * @param boolean $fromCommitted
  749. * @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception)
  750. * @return Apache_Solr_Response
  751. *
  752. * @throws Apache_Solr_HttpTransportException If an error occurs during the service call
  753. */
  754. public function deleteById($id, $fromPending = true, $fromCommitted = true, $timeout = 3600)
  755. {
  756. $pendingValue = $fromPending ? 'true' : 'false';
  757. $committedValue = $fromCommitted ? 'true' : 'false';
  758. //escape special xml characters
  759. $id = htmlspecialchars($id, ENT_NOQUOTES, 'UTF-8');
  760. $rawPost = '<delete fromPending="' . $pendingValue . '" fromCommitted="' . $committedValue . '"><id>' . $id . '</id></delete>';
  761. return $this->delete($rawPost, $timeout);
  762. }
  763. /**
  764. * Create and post a delete document based on multiple document IDs.
  765. *
  766. * @param array $ids Expected to be utf-8 encoded strings
  767. * @param boolean $fromPending
  768. * @param boolean $fromCommitted
  769. * @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception)
  770. * @return Apache_Solr_Response
  771. *
  772. * @throws Apache_Solr_HttpTransportException If an error occurs during the service call
  773. */
  774. public function deleteByMultipleIds($ids, $fromPending = true, $fromCommitted = true, $timeout = 3600)
  775. {
  776. $pendingValue = $fromPending ? 'true' : 'false';
  777. $committedValue = $fromCommitted ? 'true' : 'false';
  778. $rawPost = '<delete fromPending="' . $pendingValue . '" fromCommitted="' . $committedValue . '">';
  779. foreach ($ids as $id)
  780. {
  781. //escape special xml characters
  782. $id = htmlspecialchars($id, ENT_NOQUOTES, 'UTF-8');
  783. $rawPost .= '<id>' . $id . '</id>';
  784. }
  785. $rawPost .= '</delete>';
  786. return $this->delete($rawPost, $timeout);
  787. }
  788. /**
  789. * Create a delete document based on a query and submit it
  790. *
  791. * @param string $rawQuery Expected to be utf-8 encoded
  792. * @param boolean $fromPending
  793. * @param boolean $fromCommitted
  794. * @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception)
  795. * @return Apache_Solr_Response
  796. *
  797. * @throws Apache_Solr_HttpTransportException If an error occurs during the service call
  798. */
  799. public function deleteByQuery($rawQuery, $fromPending = true, $fromCommitted = true, $timeout = 3600)
  800. {
  801. $pendingValue = $fromPending ? 'true' : 'false';
  802. $committedValue = $fromCommitted ? 'true' : 'false';
  803. // escape special xml characters
  804. $rawQuery = htmlspecialchars($rawQuery, ENT_NOQUOTES, 'UTF-8');
  805. $rawPost = '<delete fromPending="' . $pendingValue . '" fromCommitted="' . $committedValue . '"><query>' . $rawQuery . '</query></delete>';
  806. return $this->delete($rawPost, $timeout);
  807. }
  808. /**
  809. * Use Solr Cell to extract document contents. See {@link http://wiki.apache.org/solr/ExtractingRequestHandler} for information on how
  810. * to use Solr Cell and what parameters are available.
  811. *
  812. * NOTE: when passing an Apache_Solr_Document instance, field names and boosts will automatically be prepended by "literal." and "boost."
  813. * as appropriate. Any keys from the $params array will NOT be treated this way. Any mappings from the document will overwrite key / value
  814. * pairs in the params array if they have the same name (e.g. you pass a "literal.id" key and value in your $params array but you also
  815. * pass in a document isntance with an "id" field" - the document's value(s) will take precedence).
  816. *
  817. * @param string $file Path to file to extract data from
  818. * @param array $params optional array of key value pairs that will be sent with the post (see Solr Cell documentation)
  819. * @param Apache_Solr_Document $document optional document that will be used to generate post parameters (literal.* and boost.* params)
  820. * @param string $mimetype optional mimetype specification (for the file being extracted)
  821. *
  822. * @return Apache_Solr_Response
  823. *
  824. * @throws Apache_Solr_InvalidArgumentException if $file, $params, or $document are invalid.
  825. */
  826. public function extract($file, $params = array(), $document = null, $mimetype = 'application/octet-stream')
  827. {
  828. // check if $params is an array (allow null for default empty array)
  829. if (!is_null($params))
  830. {
  831. if (!is_array($params))
  832. {
  833. throw new Apache_Solr_InvalidArgumentException("\$params must be a valid array or null");
  834. }
  835. }
  836. else
  837. {
  838. $params = array();
  839. }
  840. // if $file is an http request, defer to extractFromUrl instead
  841. if (substr($file, 0, 7) == 'http://' || substr($file, 0, 8) == 'https://')
  842. {
  843. return $this->extractFromUrl($file, $params, $document, $mimetype);
  844. }
  845. // read the contents of the file
  846. $contents = @file_get_contents($file);
  847. if ($contents !== false)
  848. {
  849. // add the resource.name parameter if not specified
  850. if (!isset($params['resource.name']))
  851. {
  852. $params['resource.name'] = basename($file);
  853. }
  854. // delegate the rest to extractFromString
  855. return $this->extractFromString($contents, $params, $document, $mimetype);
  856. }
  857. else
  858. {
  859. throw new Apache_Solr_InvalidArgumentException("File '{$file}' is empty or could not be read");
  860. }
  861. }
  862. /**
  863. * Use Solr Cell to extract document contents. See {@link http://wiki.apache.org/solr/ExtractingRequestHandler} for information on how
  864. * to use Solr Cell and what parameters are available.
  865. *
  866. * NOTE: when passing an Apache_Solr_Document instance, field names and boosts will automatically be prepended by "literal." and "boost."
  867. * as appropriate. Any keys from the $params array will NOT be treated this way. Any mappings from the document will overwrite key / value
  868. * pairs in the params array if they have the same name (e.g. you pass a "literal.id" key and value in your $params array but you also
  869. * pass in a document isntance with an "id" field" - the document's value(s) will take precedence).
  870. *
  871. * @param string $data Data that will be passed to Solr Cell
  872. * @param array $params optional array of key value pairs that will be sent with the post (see Solr Cell documentation)
  873. * @param Apache_Solr_Document $document optional document that will be used to generate post parameters (literal.* and boost.* params)
  874. * @param string $mimetype optional mimetype specification (for the file being extracted)
  875. *
  876. * @return Apache_Solr_Response
  877. *
  878. * @throws Apache_Solr_InvalidArgumentException if $file, $params, or $document are invalid.
  879. *
  880. * @todo Should be using multipart/form-data to post parameter values, but I could not get my implementation to work. Needs revisisted.
  881. */
  882. public function extractFromString($data, $params = array(), $document = null, $mimetype = 'application/octet-stream')
  883. {
  884. // check if $params is an array (allow null for default empty array)
  885. if (!is_null($params))
  886. {
  887. if (!is_array($params))
  888. {
  889. throw new Apache_Solr_InvalidArgumentException("\$params must be a valid array or null");
  890. }
  891. }
  892. else
  893. {
  894. $params = array();
  895. }
  896. // make sure we receive our response in JSON and have proper name list treatment
  897. $params['wt'] = self::SOLR_WRITER;
  898. $params['json.nl'] = $this->_namedListTreatment;
  899. // check if $document is an Apache_Solr_Document instance
  900. if (!is_null($document) && $document instanceof Apache_Solr_Document)
  901. {
  902. // iterate document, adding literal.* and boost.* fields to $params as appropriate
  903. foreach ($document as $field => $fieldValue)
  904. {
  905. // check if we need to add a boost.* parameters
  906. $fieldBoost = $document->getFieldBoost($field);
  907. if ($fieldBoost !== false)
  908. {
  909. $params["boost.{$field}"] = $fieldBoost;
  910. }
  911. // add the literal.* parameter
  912. $params["literal.{$field}"] = $fieldValue;
  913. }
  914. }
  915. // params will be sent to SOLR in the QUERY STRING
  916. $queryString = $this->_generateQueryString($params);
  917. // the file contents will be sent to SOLR as the POST BODY - we use application/octect-stream as default mimetype
  918. return $this->_sendRawPost($this->_extractUrl . $this->_queryDelimiter . $queryString, $data, false, $mimetype);
  919. }
  920. /**
  921. * Use Solr Cell to extract document contents. See {@link http://wiki.apache.org/solr/ExtractingRequestHandler} for information on how
  922. * to use Solr Cell and what parameters are available.
  923. *
  924. * NOTE: when passing an Apache_Solr_Document instance, field names and boosts will automatically be prepended by "literal." and "boost."
  925. * as appropriate. Any keys from the $params array will NOT be treated this way. Any mappings from the document will overwrite key / value
  926. * pairs in the params array if they have the same name (e.g. you pass a "literal.id" key and value in your $params array but you also
  927. * pass in a document isntance with an "id" field" - the document's value(s) will take precedence).
  928. *
  929. * @param string $url URL
  930. * @param array $params optional array of key value pairs that will be sent with the post (see Solr Cell documentation)
  931. * @param Apache_Solr_Document $document optional document that will be used to generate post parameters (literal.* and boost.* params)
  932. * @param string $mimetype optional mimetype specification (for the file being extracted)
  933. *
  934. * @return Apache_Solr_Response
  935. *
  936. * @throws Apache_Solr_InvalidArgumentException if $url, $params, or $document are invalid.
  937. */
  938. public function extractFromUrl($url, $params = array(), $document = null, $mimetype = 'application/octet-stream')
  939. {
  940. // check if $params is an array (allow null for default empty array)
  941. if (!is_null($params))
  942. {
  943. if (!is_array($params))
  944. {
  945. throw new Apache_Solr_InvalidArgumentException("\$params must be a valid array or null");
  946. }
  947. }
  948. else
  949. {
  950. $params = array();
  951. }
  952. $httpTransport = $this->getHttpTransport();
  953. // read the contents of the URL using our configured Http Transport and default timeout
  954. $httpResponse = $httpTransport->performGetRequest($url);
  955. // check that its a 200 response
  956. if ($httpResponse->getStatusCode() == 200)
  957. {
  958. // add the resource.name parameter if not specified
  959. if (!isset($params['resource.name']))
  960. {
  961. $params['resource.name'] = $url;
  962. }
  963. // delegate the rest to extractFromString
  964. return $this->extractFromString($httpResponse->getBody(), $params, $document, $mimetype);
  965. }
  966. else
  967. {
  968. throw new Apache_Solr_InvalidArgumentException("URL '{$url}' returned non 200 response code");
  969. }
  970. }
  971. /**
  972. * Send an optimize command. Will be synchronous unless both wait parameters are set
  973. * to false.
  974. *
  975. * @param boolean $waitFlush
  976. * @param boolean $waitSearcher
  977. * @param float $timeout Maximum expected duration of the commit operation on the server (otherwise, will throw a communication exception)
  978. * @return Apache_Solr_Response
  979. *
  980. * @throws Apache_Solr_HttpTransportException If an error occurs during the service call
  981. */
  982. public function optimize($waitFlush = true, $waitSearcher = true, $timeout = 3600)
  983. {
  984. $flushValue = $waitFlush ? 'true' : 'false';
  985. $searcherValue = $waitSearcher ? 'true' : 'false';
  986. $rawPost = '<optimize waitFlush="' . $flushValue . '" waitSearcher="' . $searcherValue . '" />';
  987. return $this->_sendRawPost($this->_updateUrl, $rawPost, $timeout);
  988. }
  989. /**
  990. * Simple Search interface
  991. *
  992. * @param string $query The raw query string
  993. * @param int $offset The starting offset for result documents
  994. * @param int $limit The maximum number of result documents to return
  995. * @param array $params key / value pairs for other query parameters (see Solr documentation), use arrays for parameter keys used more than once (e.g. facet.field)
  996. * @param string $method The HTTP method (Apache_Solr_Service::METHOD_GET or Apache_Solr_Service::METHOD::POST)
  997. * @return Apache_Solr_Response
  998. *
  999. * @throws Apache_Solr_HttpTransportException If an error occurs during the service call
  1000. * @throws Apache_Solr_InvalidArgumentException If an invalid HTTP method is used
  1001. */
  1002. public function search($query, $offset = 0, $limit = 10, $params = array(), $method = self::METHOD_GET)
  1003. {
  1004. // ensure params is an array
  1005. if (!is_null($params))
  1006. {
  1007. if (!is_array($params))
  1008. {
  1009. // params was specified but was not an array - invalid
  1010. throw new Apache_Solr_InvalidArgumentException("\$params must be a valid array or null");
  1011. }
  1012. }
  1013. else
  1014. {
  1015. $params = array();
  1016. }
  1017. // construct our full parameters
  1018. // common parameters in this interface
  1019. $params['wt'] = self::SOLR_WRITER;
  1020. $params['json.nl'] = $this->_namedListTreatment;
  1021. $params['q'] = $query;
  1022. $params['start'] = $offset;
  1023. $params['rows'] = $limit;
  1024. $queryString = $this->_generateQueryString($params);
  1025. if ($method == self::METHOD_GET)
  1026. {
  1027. return $this->_sendRawGet($this->_searchUrl . $this->_queryDelimiter . $queryString);
  1028. }
  1029. else if ($method == self::METHOD_POST)
  1030. {
  1031. return $this->_sendRawPost($this->_searchUrl, $queryString, FALSE, 'application/x-www-form-urlencoded; charset=UTF-8');
  1032. }
  1033. else
  1034. {
  1035. throw new Apache_Solr_InvalidArgumentException("Unsupported method '$method', please use the Apache_Solr_Service::METHOD_* constants");
  1036. }
  1037. }
  1038. }