12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181 |
- <?php
- /**
- * Copyright (c) 2007-2011, Servigistics, Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * - Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * - Neither the name of Servigistics, Inc. nor the names of
- * its contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * @copyright Copyright 2007-2011 Servigistics, Inc. (http://servigistics.com)
- * @license http://solr-php-client.googlecode.com/svn/trunk/COPYING New BSD
- * @version $Id: Service.php 59 2011-02-08 20:38:59Z donovan.jimenez $
- *
- * @package Apache
- * @subpackage Solr
- * @author Donovan Jimenez <djimenez@conduit-it.com>
- */
- // See Issue #1 (http://code.google.com/p/solr-php-client/issues/detail?id=1)
- // Doesn't follow typical include path conventions, but is more convenient for users
- require_once(dirname(__FILE__) . '/Exception.php');
- require_once(dirname(__FILE__) . '/HttpTransportException.php');
- require_once(dirname(__FILE__) . '/InvalidArgumentException.php');
- require_once(dirname(__FILE__) . '/Document.php');
- require_once(dirname(__FILE__) . '/Response.php');
- require_once(dirname(__FILE__) . '/HttpTransport/Interface.php');
- /**
- * Starting point for the Solr API. Represents a Solr server resource and has
- * methods for pinging, adding, deleting, committing, optimizing and searching.
- *
- * Example Usage:
- * <code>
- * ...
- * $solr = new Apache_Solr_Service(); //or explicitly new Apache_Solr_Service('localhost', 8180, '/solr')
- *
- * if ($solr->ping())
- * {
- * $solr->deleteByQuery('*:*'); //deletes ALL documents - be careful :)
- *
- * $document = new Apache_Solr_Document();
- * $document->id = uniqid(); //or something else suitably unique
- *
- * $document->title = 'Some Title';
- * $document->content = 'Some content for this wonderful document. Blah blah blah.';
- *
- * $solr->addDocument($document); //if you're going to be adding documents in bulk using addDocuments
- * //with an array of documents is faster
- *
- * $solr->commit(); //commit to see the deletes and the document
- * $solr->optimize(); //merges multiple segments into one
- *
- * //and the one we all care about, search!
- * //any other common or custom parameters to the request handler can go in the
- * //optional 4th array argument.
- * $solr->search('content:blah', 0, 10, array('sort' => 'timestamp desc'));
- * }
- * ...
- * </code>
- *
- * @todo Investigate using other HTTP clients other than file_get_contents built-in handler. Could provide performance
- * improvements when dealing with multiple requests by using HTTP's keep alive functionality
- */
- class Apache_Solr_Service
- {
- /**
- * SVN Revision meta data for this class
- */
- const SVN_REVISION = '$Revision: 59 $';
- /**
- * SVN ID meta data for this class
- */
- const SVN_ID = '$Id: Service.php 59 2011-02-08 20:38:59Z donovan.jimenez $';
- /**
- * Response writer we'll request - JSON. See http://code.google.com/p/solr-php-client/issues/detail?id=6#c1 for reasoning
- */
- const SOLR_WRITER = 'json';
- /**
- * NamedList Treatment constants
- */
- const NAMED_LIST_FLAT = 'flat';
- const NAMED_LIST_MAP = 'map';
- /**
- * Search HTTP Methods
- */
- const METHOD_GET = 'GET';
- const METHOD_POST = 'POST';
- /**
- * Servlet mappings
- */
- const PING_SERVLET = 'admin/ping';
- const UPDATE_SERVLET = 'update';
- const SEARCH_SERVLET = 'select';
- const THREADS_SERVLET = 'admin/threads';
- const EXTRACT_SERVLET = 'update/extract';
- /**
- * Server identification strings
- *
- * @var string
- */
- protected $_host, $_port, $_path;
- /**
- * Whether {@link Apache_Solr_Response} objects should create {@link Apache_Solr_Document}s in
- * the returned parsed data
- *
- * @var boolean
- */
- protected $_createDocuments = true;
- /**
- * Whether {@link Apache_Solr_Response} objects should have multivalue fields with only a single value
- * collapsed to appear as a single value would.
- *
- * @var boolean
- */
- protected $_collapseSingleValueArrays = true;
- /**
- * How NamedLists should be formatted in the output. This specifically effects facet counts. Valid values
- * are {@link Apache_Solr_Service::NAMED_LIST_MAP} (default) or {@link Apache_Solr_Service::NAMED_LIST_FLAT}.
- *
- * @var string
- */
- protected $_namedListTreatment = self::NAMED_LIST_MAP;
- /**
- * Query delimiters. Someone might want to be able to change
- * these (to use & instead of & for example), so I've provided them.
- *
- * @var string
- */
- protected $_queryDelimiter = '?', $_queryStringDelimiter = '&', $_queryBracketsEscaped = true;
- /**
- * Constructed servlet full path URLs
- *
- * @var string
- */
- protected $_pingUrl, $_updateUrl, $_searchUrl, $_threadsUrl;
- /**
- * Keep track of whether our URLs have been constructed
- *
- * @var boolean
- */
- protected $_urlsInited = false;
- /**
- * HTTP Transport implementation (pluggable)
- *
- * @var Apache_Solr_HttpTransport_Interface
- */
- protected $_httpTransport = false;
- /**
- * Escape a value for special query characters such as ':', '(', ')', '*', '?', etc.
- *
- * NOTE: inside a phrase fewer characters need escaped, use {@link Apache_Solr_Service::escapePhrase()} instead
- *
- * @param string $value
- * @return string
- */
- static public function escape($value)
- {
- //list taken from http://lucene.apache.org/java/docs/queryparsersyntax.html#Escaping%20Special%20Characters
- $pattern = '/(\+|-|&&|\|\||!|\(|\)|\{|}|\[|]|\^|"|~|\*|\?|:|\\\)/';
- $replace = '\\\$1';
- return preg_replace($pattern, $replace, $value);
- }
- /**
- * Escape a value meant to be contained in a phrase for special query characters
- *
- * @param string $value
- * @return string
- */
- static public function escapePhrase($value)
- {
- $pattern = '/("|\\\)/';
- $replace = '\\\$1';
- return preg_replace($pattern, $replace, $value);
- }
- /**
- * Convenience function for creating phrase syntax from a value
- *
- * @param string $value
- * @return string
- */
- static public function phrase($value)
- {
- return '"' . self::escapePhrase($value) . '"';
- }
- /**
- * Constructor. All parameters are optional and will take on default values
- * if not specified.
- *
- * @param string $host
- * @param string $port
- * @param string $path
- * @param Apache_Solr_HttpTransport_Interface $httpTransport
- */
- public function __construct($host = 'localhost', $port = 8180, $path = '/solr/', $httpTransport = false)
- {
- $this->setHost($host);
- $this->setPort($port);
- $this->setPath($path);
- $this->_initUrls();
- if ($httpTransport)
- {
- $this->setHttpTransport($httpTransport);
- }
- // check that our php version is >= 5.1.3 so we can correct for http_build_query behavior later
- $this->_queryBracketsEscaped = version_compare(phpversion(), '5.1.3', '>=');
- }
- /**
- * Return a valid http URL given this server's host, port and path and a provided servlet name
- *
- * @param string $servlet
- * @return string
- */
- protected function _constructUrl($servlet, $params = array())
- {
- if (count($params))
- {
- //escape all parameters appropriately for inclusion in the query string
- $escapedParams = array();
- foreach ($params as $key => $value)
- {
- $escapedParams[] = urlencode($key) . '=' . urlencode($value);
- }
- $queryString = $this->_queryDelimiter . implode($this->_queryStringDelimiter, $escapedParams);
- }
- else
- {
- $queryString = '';
- }
- return 'http://' . $this->_host . ':' . $this->_port . $this->_path . $servlet . $queryString;
- }
- /**
- * Construct the Full URLs for the three servlets we reference
- */
- protected function _initUrls()
- {
- //Initialize our full servlet URLs now that we have server information
- $this->_extractUrl = $this->_constructUrl(self::EXTRACT_SERVLET);
- $this->_pingUrl = $this->_constructUrl(self::PING_SERVLET);
- $this->_searchUrl = $this->_constructUrl(self::SEARCH_SERVLET);
- $this->_threadsUrl = $this->_constructUrl(self::THREADS_SERVLET, array('wt' => self::SOLR_WRITER ));
- $this->_updateUrl = $this->_constructUrl(self::UPDATE_SERVLET, array('wt' => self::SOLR_WRITER ));
- $this->_urlsInited = true;
- }
- protected function _generateQueryString($params)
- {
- // use http_build_query to encode our arguments because its faster
- // than urlencoding all the parts ourselves in a loop
- //
- // because http_build_query treats arrays differently than we want to, correct the query
- // string by changing foo[#]=bar (# being an actual number) parameter strings to just
- // multiple foo=bar strings. This regex should always work since '=' will be urlencoded
- // anywhere else the regex isn't expecting it
- //
- // NOTE: before php 5.1.3 brackets were not url encoded by http_build query - we've checked
- // the php version in the constructor and put the results in the instance variable. Also, before
- // 5.1.2 the arg_separator parameter was not available, so don't use it
- if ($this->_queryBracketsEscaped)
- {
- $queryString = http_build_query($params, null, $this->_queryStringDelimiter);
- return preg_replace('/%5B(?:[0-9]|[1-9][0-9]+)%5D=/', '=', $queryString);
- }
- else
- {
- $queryString = http_build_query($params);
- return preg_replace('/\\[(?:[0-9]|[1-9][0-9]+)\\]=/', '=', $queryString);
- }
- }
- /**
- * Central method for making a get operation against this Solr Server
- *
- * @param string $url
- * @param float $timeout Read timeout in seconds
- * @return Apache_Solr_Response
- *
- * @throws Apache_Solr_HttpTransportException If a non 200 response status is returned
- */
- protected function _sendRawGet($url, $timeout = FALSE)
- {
- $httpTransport = $this->getHttpTransport();
- $httpResponse = $httpTransport->performGetRequest($url, $timeout);
- $solrResponse = new Apache_Solr_Response($httpResponse, $this->_createDocuments, $this->_collapseSingleValueArrays);
- if ($solrResponse->getHttpStatus() != 200)
- {
- throw new Apache_Solr_HttpTransportException($solrResponse);
- }
- return $solrResponse;
- }
- /**
- * Central method for making a post operation against this Solr Server
- *
- * @param string $url
- * @param string $rawPost
- * @param float $timeout Read timeout in seconds
- * @param string $contentType
- * @return Apache_Solr_Response
- *
- * @throws Apache_Solr_HttpTransportException If a non 200 response status is returned
- */
- protected function _sendRawPost($url, $rawPost, $timeout = FALSE, $contentType = 'text/xml; charset=UTF-8')
- {
- $httpTransport = $this->getHttpTransport();
- $httpResponse = $httpTransport->performPostRequest($url, $rawPost, $contentType, $timeout);
- $solrResponse = new Apache_Solr_Response($httpResponse, $this->_createDocuments, $this->_collapseSingleValueArrays);
- if ($solrResponse->getHttpStatus() != 200)
- {
- throw new Apache_Solr_HttpTransportException($solrResponse);
- }
- return $solrResponse;
- }
- /**
- * Returns the set host
- *
- * @return string
- */
- public function getHost()
- {
- return $this->_host;
- }
- /**
- * Set the host used. If empty will fallback to constants
- *
- * @param string $host
- *
- * @throws Apache_Solr_InvalidArgumentException If the host parameter is empty
- */
- public function setHost($host)
- {
- //Use the provided host or use the default
- if (empty($host))
- {
- throw new Apache_Solr_InvalidArgumentException('Host parameter is empty');
- }
- else
- {
- $this->_host = $host;
- }
- if ($this->_urlsInited)
- {
- $this->_initUrls();
- }
- }
- /**
- * Get the set port
- *
- * @return integer
- */
- public function getPort()
- {
- return $this->_port;
- }
- /**
- * Set the port used. If empty will fallback to constants
- *
- * @param integer $port
- *
- * @throws Apache_Solr_InvalidArgumentException If the port parameter is empty
- */
- public function setPort($port)
- {
- //Use the provided port or use the default
- $port = (int) $port;
- if ($port <= 0)
- {
- throw new Apache_Solr_InvalidArgumentException('Port is not a valid port number');
- }
- else
- {
- $this->_port = $port;
- }
- if ($this->_urlsInited)
- {
- $this->_initUrls();
- }
- }
- /**
- * Get the set path.
- *
- * @return string
- */
- public function getPath()
- {
- return $this->_path;
- }
- /**
- * Set the path used. If empty will fallback to constants
- *
- * @param string $path
- */
- public function setPath($path)
- {
- $path = trim($path, '/');
- $this->_path = '/' . $path . '/';
- if ($this->_urlsInited)
- {
- $this->_initUrls();
- }
- }
- /**
- * Get the current configured HTTP Transport
- *
- * @return HttpTransportInterface
- */
- public function getHttpTransport()
- {
- // lazy load a default if one has not be set
- if ($this->_httpTransport === false)
- {
- require_once(dirname(__FILE__) . '/HttpTransport/FileGetContents.php');
- $this->_httpTransport = new Apache_Solr_HttpTransport_FileGetContents();
- }
- return $this->_httpTransport;
- }
- /**
- * Set the HTTP Transport implemenation that will be used for all HTTP requests
- *
- * @param Apache_Solr_HttpTransport_Interface
- */
- public function setHttpTransport(Apache_Solr_HttpTransport_Interface $httpTransport)
- {
- $this->_httpTransport = $httpTransport;
- }
- /**
- * Set the create documents flag. This determines whether {@link Apache_Solr_Response} objects will
- * parse the response and create {@link Apache_Solr_Document} instances in place.
- *
- * @param boolean $createDocuments
- */
- public function setCreateDocuments($createDocuments)
- {
- $this->_createDocuments = (bool) $createDocuments;
- }
- /**
- * Get the current state of teh create documents flag.
- *
- * @return boolean
- */
- public function getCreateDocuments()
- {
- return $this->_createDocuments;
- }
- /**
- * Set the collapse single value arrays flag.
- *
- * @param boolean $collapseSingleValueArrays
- */
- public function setCollapseSingleValueArrays($collapseSingleValueArrays)
- {
- $this->_collapseSingleValueArrays = (bool) $collapseSingleValueArrays;
- }
- /**
- * Get the current state of the collapse single value arrays flag.
- *
- * @return boolean
- */
- public function getCollapseSingleValueArrays()
- {
- return $this->_collapseSingleValueArrays;
- }
- /**
- * Get the current default timeout setting (initially the default_socket_timeout ini setting)
- * in seconds
- *
- * @return float
- *
- * @deprecated Use the getDefaultTimeout method on the HTTP transport implementation
- */
- public function getDefaultTimeout()
- {
- return $this->getHttpTransport()->getDefaultTimeout();
- }
- /**
- * Set the default timeout for all calls that aren't passed a specific timeout
- *
- * @param float $timeout Timeout value in seconds
- *
- * @deprecated Use the setDefaultTimeout method on the HTTP transport implementation
- */
- public function setDefaultTimeout($timeout)
- {
- $this->getHttpTransport()->setDefaultTimeout($timeout);
- }
- /**
- * Set how NamedLists should be formatted in the response data. This mainly effects
- * the facet counts format.
- *
- * @param string $namedListTreatment
- * @throws Apache_Solr_InvalidArgumentException If invalid option is set
- */
- public function setNamedListTreatment($namedListTreatment)
- {
- switch ((string) $namedListTreatment)
- {
- case Apache_Solr_Service::NAMED_LIST_FLAT:
- $this->_namedListTreatment = Apache_Solr_Service::NAMED_LIST_FLAT;
- break;
- case Apache_Solr_Service::NAMED_LIST_MAP:
- $this->_namedListTreatment = Apache_Solr_Service::NAMED_LIST_MAP;
- break;
- default:
- throw new Apache_Solr_InvalidArgumentException('Not a valid named list treatement option');
- }
- }
- /**
- * Get the current setting for named list treatment.
- *
- * @return string
- */
- public function getNamedListTreatment()
- {
- return $this->_namedListTreatment;
- }
- /**
- * Set the string used to separate the path form the query string.
- * Defaulted to '?'
- *
- * @param string $queryDelimiter
- */
- public function setQueryDelimiter($queryDelimiter)
- {
- $this->_queryDelimiter = $queryDelimiter;
- }
- /**
- * Set the string used to separate the parameters in thequery string
- * Defaulted to '&'
- *
- * @param string $queryStringDelimiter
- */
- public function setQueryStringDelimiter($queryStringDelimiter)
- {
- $this->_queryStringDelimiter = $queryStringDelimiter;
- }
- /**
- * Call the /admin/ping servlet, can be used to quickly tell if a connection to the
- * server is able to be made.
- *
- * @param float $timeout maximum time to wait for ping in seconds, -1 for unlimited (default is 2)
- * @return float Actual time taken to ping the server, FALSE if timeout or HTTP error status occurs
- */
- public function ping($timeout = 2)
- {
- $start = microtime(true);
-
- $httpTransport = $this->getHttpTransport();
- $httpResponse = $httpTransport->performHeadRequest($this->_pingUrl, $timeout);
- $solrResponse = new Apache_Solr_Response($httpResponse, $this->_createDocuments, $this->_collapseSingleValueArrays);
- if ($solrResponse->getHttpStatus() == 200)
- {
- return microtime(true) - $start;
- }
- else
- {
- return false;
- }
- }
- /**
- * Call the /admin/threads servlet and retrieve information about all threads in the
- * Solr servlet's thread group. Useful for diagnostics.
- *
- * @return Apache_Solr_Response
- *
- * @throws Apache_Solr_HttpTransportException If an error occurs during the service call
- */
- public function threads()
- {
- return $this->_sendRawGet($this->_threadsUrl);
- }
- /**
- * Raw Add Method. Takes a raw post body and sends it to the update service. Post body
- * should be a complete and well formed "add" xml document.
- *
- * @param string $rawPost
- * @return Apache_Solr_Response
- *
- * @throws Apache_Solr_HttpTransportException If an error occurs during the service call
- */
- public function add($rawPost)
- {
- return $this->_sendRawPost($this->_updateUrl, $rawPost);
- }
- /**
- * Add a Solr Document to the index
- *
- * @param Apache_Solr_Document $document
- * @param boolean $allowDups
- * @param boolean $overwritePending
- * @param boolean $overwriteCommitted
- * @param integer $commitWithin The number of milliseconds that a document must be committed within, see @{link http://wiki.apache.org/solr/UpdateXmlMessages#The_Update_Schema} for details. If left empty this property will not be set in the request.
- * @return Apache_Solr_Response
- *
- * @throws Apache_Solr_HttpTransportException If an error occurs during the service call
- */
- public function addDocument(Apache_Solr_Document $document, $allowDups = false, $overwritePending = true, $overwriteCommitted = true, $commitWithin = 0)
- {
- $dupValue = $allowDups ? 'true' : 'false';
- $pendingValue = $overwritePending ? 'true' : 'false';
- $committedValue = $overwriteCommitted ? 'true' : 'false';
-
- $commitWithin = (int) $commitWithin;
- $commitWithinString = $commitWithin > 0 ? " commitWithin=\"{$commitWithin}\"" : '';
-
- $rawPost = "<add allowDups=\"{$dupValue}\" overwritePending=\"{$pendingValue}\" overwriteCommitted=\"{$committedValue}\"{$commitWithinString}>";
- $rawPost .= $this->_documentToXmlFragment($document);
- $rawPost .= '</add>';
- return $this->add($rawPost);
- }
- /**
- * Add an array of Solr Documents to the index all at once
- *
- * @param array $documents Should be an array of Apache_Solr_Document instances
- * @param boolean $allowDups
- * @param boolean $overwritePending
- * @param boolean $overwriteCommitted
- * @param integer $commitWithin The number of milliseconds that a document must be committed within, see @{link http://wiki.apache.org/solr/UpdateXmlMessages#The_Update_Schema} for details. If left empty this property will not be set in the request.
- * @return Apache_Solr_Response
- *
- * @throws Apache_Solr_HttpTransportException If an error occurs during the service call
- */
- public function addDocuments($documents, $allowDups = false, $overwritePending = true, $overwriteCommitted = true, $commitWithin = 0)
- {
- $dupValue = $allowDups ? 'true' : 'false';
- $pendingValue = $overwritePending ? 'true' : 'false';
- $committedValue = $overwriteCommitted ? 'true' : 'false';
- $commitWithin = (int) $commitWithin;
- $commitWithinString = $commitWithin > 0 ? " commitWithin=\"{$commitWithin}\"" : '';
- $rawPost = "<add allowDups=\"{$dupValue}\" overwritePending=\"{$pendingValue}\" overwriteCommitted=\"{$committedValue}\"{$commitWithinString}>";
- foreach ($documents as $document)
- {
- if ($document instanceof Apache_Solr_Document)
- {
- $rawPost .= $this->_documentToXmlFragment($document);
- }
- }
- $rawPost .= '</add>';
- return $this->add($rawPost);
- }
- /**
- * Create an XML fragment from a {@link Apache_Solr_Document} instance appropriate for use inside a Solr add call
- *
- * @return string
- */
- protected function _documentToXmlFragment(Apache_Solr_Document $document)
- {
- $xml = '<doc';
- if ($document->getBoost() !== false)
- {
- $xml .= ' boost="' . $document->getBoost() . '"';
- }
- $xml .= '>';
- foreach ($document as $key => $value)
- {
- $key = htmlspecialchars($key, ENT_QUOTES, 'UTF-8');
- $fieldBoost = $document->getFieldBoost($key);
- if (is_array($value))
- {
- foreach ($value as $multivalue)
- {
- $xml .= '<field name="' . $key . '"';
- if ($fieldBoost !== false)
- {
- $xml .= ' boost="' . $fieldBoost . '"';
- // only set the boost for the first field in the set
- $fieldBoost = false;
- }
- $multivalue = htmlspecialchars($multivalue, ENT_NOQUOTES, 'UTF-8');
- $xml .= '>' . $multivalue . '</field>';
- }
- }
- else
- {
- $xml .= '<field name="' . $key . '"';
- if ($fieldBoost !== false)
- {
- $xml .= ' boost="' . $fieldBoost . '"';
- }
- $value = htmlspecialchars($value, ENT_NOQUOTES, 'UTF-8');
- $xml .= '>' . $value . '</field>';
- }
- }
- $xml .= '</doc>';
- // replace any control characters to avoid Solr XML parser exception
- return $this->_stripCtrlChars($xml);
- }
- /**
- * Replace control (non-printable) characters from string that are invalid to Solr's XML parser with a space.
- *
- * @param string $string
- * @return string
- */
- protected function _stripCtrlChars($string)
- {
- // See: http://w3.org/International/questions/qa-forms-utf-8.html
- // Printable utf-8 does not include any of these chars below x7F
- return preg_replace('@[\x00-\x08\x0B\x0C\x0E-\x1F]@', ' ', $string);
- }
- /**
- * Send a commit command. Will be synchronous unless both wait parameters are set to false.
- *
- * @param boolean $expungeDeletes Defaults to false, merge segments with deletes away
- * @param boolean $waitFlush Defaults to true, block until index changes are flushed to disk
- * @param boolean $waitSearcher Defaults to true, block until a new searcher is opened and registered as the main query searcher, making the changes visible
- * @param float $timeout Maximum expected duration (in seconds) of the commit operation on the server (otherwise, will throw a communication exception). Defaults to 1 hour
- * @return Apache_Solr_Response
- *
- * @throws Apache_Solr_HttpTransportException If an error occurs during the service call
- */
- public function commit($expungeDeletes = false, $waitFlush = true, $waitSearcher = true, $timeout = 3600)
- {
- $expungeValue = $expungeDeletes ? 'true' : 'false';
- $flushValue = $waitFlush ? 'true' : 'false';
- $searcherValue = $waitSearcher ? 'true' : 'false';
- $rawPost = '<commit expungeDeletes="' . $expungeValue . '" waitFlush="' . $flushValue . '" waitSearcher="' . $searcherValue . '" />';
- return $this->_sendRawPost($this->_updateUrl, $rawPost, $timeout);
- }
- /**
- * Raw Delete Method. Takes a raw post body and sends it to the update service. Body should be
- * a complete and well formed "delete" xml document
- *
- * @param string $rawPost Expected to be utf-8 encoded xml document
- * @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception)
- * @return Apache_Solr_Response
- *
- * @throws Apache_Solr_HttpTransportException If an error occurs during the service call
- */
- public function delete($rawPost, $timeout = 3600)
- {
- return $this->_sendRawPost($this->_updateUrl, $rawPost, $timeout);
- }
- /**
- * Create a delete document based on document ID
- *
- * @param string $id Expected to be utf-8 encoded
- * @param boolean $fromPending
- * @param boolean $fromCommitted
- * @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception)
- * @return Apache_Solr_Response
- *
- * @throws Apache_Solr_HttpTransportException If an error occurs during the service call
- */
- public function deleteById($id, $fromPending = true, $fromCommitted = true, $timeout = 3600)
- {
- $pendingValue = $fromPending ? 'true' : 'false';
- $committedValue = $fromCommitted ? 'true' : 'false';
- //escape special xml characters
- $id = htmlspecialchars($id, ENT_NOQUOTES, 'UTF-8');
- $rawPost = '<delete fromPending="' . $pendingValue . '" fromCommitted="' . $committedValue . '"><id>' . $id . '</id></delete>';
- return $this->delete($rawPost, $timeout);
- }
- /**
- * Create and post a delete document based on multiple document IDs.
- *
- * @param array $ids Expected to be utf-8 encoded strings
- * @param boolean $fromPending
- * @param boolean $fromCommitted
- * @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception)
- * @return Apache_Solr_Response
- *
- * @throws Apache_Solr_HttpTransportException If an error occurs during the service call
- */
- public function deleteByMultipleIds($ids, $fromPending = true, $fromCommitted = true, $timeout = 3600)
- {
- $pendingValue = $fromPending ? 'true' : 'false';
- $committedValue = $fromCommitted ? 'true' : 'false';
- $rawPost = '<delete fromPending="' . $pendingValue . '" fromCommitted="' . $committedValue . '">';
- foreach ($ids as $id)
- {
- //escape special xml characters
- $id = htmlspecialchars($id, ENT_NOQUOTES, 'UTF-8');
- $rawPost .= '<id>' . $id . '</id>';
- }
- $rawPost .= '</delete>';
- return $this->delete($rawPost, $timeout);
- }
- /**
- * Create a delete document based on a query and submit it
- *
- * @param string $rawQuery Expected to be utf-8 encoded
- * @param boolean $fromPending
- * @param boolean $fromCommitted
- * @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception)
- * @return Apache_Solr_Response
- *
- * @throws Apache_Solr_HttpTransportException If an error occurs during the service call
- */
- public function deleteByQuery($rawQuery, $fromPending = true, $fromCommitted = true, $timeout = 3600)
- {
- $pendingValue = $fromPending ? 'true' : 'false';
- $committedValue = $fromCommitted ? 'true' : 'false';
- // escape special xml characters
- $rawQuery = htmlspecialchars($rawQuery, ENT_NOQUOTES, 'UTF-8');
- $rawPost = '<delete fromPending="' . $pendingValue . '" fromCommitted="' . $committedValue . '"><query>' . $rawQuery . '</query></delete>';
- return $this->delete($rawPost, $timeout);
- }
- /**
- * Use Solr Cell to extract document contents. See {@link http://wiki.apache.org/solr/ExtractingRequestHandler} for information on how
- * to use Solr Cell and what parameters are available.
- *
- * NOTE: when passing an Apache_Solr_Document instance, field names and boosts will automatically be prepended by "literal." and "boost."
- * as appropriate. Any keys from the $params array will NOT be treated this way. Any mappings from the document will overwrite key / value
- * pairs in the params array if they have the same name (e.g. you pass a "literal.id" key and value in your $params array but you also
- * pass in a document isntance with an "id" field" - the document's value(s) will take precedence).
- *
- * @param string $file Path to file to extract data from
- * @param array $params optional array of key value pairs that will be sent with the post (see Solr Cell documentation)
- * @param Apache_Solr_Document $document optional document that will be used to generate post parameters (literal.* and boost.* params)
- * @param string $mimetype optional mimetype specification (for the file being extracted)
- *
- * @return Apache_Solr_Response
- *
- * @throws Apache_Solr_InvalidArgumentException if $file, $params, or $document are invalid.
- */
- public function extract($file, $params = array(), $document = null, $mimetype = 'application/octet-stream')
- {
- // check if $params is an array (allow null for default empty array)
- if (!is_null($params))
- {
- if (!is_array($params))
- {
- throw new Apache_Solr_InvalidArgumentException("\$params must be a valid array or null");
- }
- }
- else
- {
- $params = array();
- }
-
- // if $file is an http request, defer to extractFromUrl instead
- if (substr($file, 0, 7) == 'http://' || substr($file, 0, 8) == 'https://')
- {
- return $this->extractFromUrl($file, $params, $document, $mimetype);
- }
-
- // read the contents of the file
- $contents = @file_get_contents($file);
- if ($contents !== false)
- {
- // add the resource.name parameter if not specified
- if (!isset($params['resource.name']))
- {
- $params['resource.name'] = basename($file);
- }
- // delegate the rest to extractFromString
- return $this->extractFromString($contents, $params, $document, $mimetype);
- }
- else
- {
- throw new Apache_Solr_InvalidArgumentException("File '{$file}' is empty or could not be read");
- }
- }
-
- /**
- * Use Solr Cell to extract document contents. See {@link http://wiki.apache.org/solr/ExtractingRequestHandler} for information on how
- * to use Solr Cell and what parameters are available.
- *
- * NOTE: when passing an Apache_Solr_Document instance, field names and boosts will automatically be prepended by "literal." and "boost."
- * as appropriate. Any keys from the $params array will NOT be treated this way. Any mappings from the document will overwrite key / value
- * pairs in the params array if they have the same name (e.g. you pass a "literal.id" key and value in your $params array but you also
- * pass in a document isntance with an "id" field" - the document's value(s) will take precedence).
- *
- * @param string $data Data that will be passed to Solr Cell
- * @param array $params optional array of key value pairs that will be sent with the post (see Solr Cell documentation)
- * @param Apache_Solr_Document $document optional document that will be used to generate post parameters (literal.* and boost.* params)
- * @param string $mimetype optional mimetype specification (for the file being extracted)
- *
- * @return Apache_Solr_Response
- *
- * @throws Apache_Solr_InvalidArgumentException if $file, $params, or $document are invalid.
- *
- * @todo Should be using multipart/form-data to post parameter values, but I could not get my implementation to work. Needs revisisted.
- */
- public function extractFromString($data, $params = array(), $document = null, $mimetype = 'application/octet-stream')
- {
- // check if $params is an array (allow null for default empty array)
- if (!is_null($params))
- {
- if (!is_array($params))
- {
- throw new Apache_Solr_InvalidArgumentException("\$params must be a valid array or null");
- }
- }
- else
- {
- $params = array();
- }
- // make sure we receive our response in JSON and have proper name list treatment
- $params['wt'] = self::SOLR_WRITER;
- $params['json.nl'] = $this->_namedListTreatment;
- // check if $document is an Apache_Solr_Document instance
- if (!is_null($document) && $document instanceof Apache_Solr_Document)
- {
- // iterate document, adding literal.* and boost.* fields to $params as appropriate
- foreach ($document as $field => $fieldValue)
- {
- // check if we need to add a boost.* parameters
- $fieldBoost = $document->getFieldBoost($field);
- if ($fieldBoost !== false)
- {
- $params["boost.{$field}"] = $fieldBoost;
- }
- // add the literal.* parameter
- $params["literal.{$field}"] = $fieldValue;
- }
- }
- // params will be sent to SOLR in the QUERY STRING
- $queryString = $this->_generateQueryString($params);
- // the file contents will be sent to SOLR as the POST BODY - we use application/octect-stream as default mimetype
- return $this->_sendRawPost($this->_extractUrl . $this->_queryDelimiter . $queryString, $data, false, $mimetype);
- }
-
- /**
- * Use Solr Cell to extract document contents. See {@link http://wiki.apache.org/solr/ExtractingRequestHandler} for information on how
- * to use Solr Cell and what parameters are available.
- *
- * NOTE: when passing an Apache_Solr_Document instance, field names and boosts will automatically be prepended by "literal." and "boost."
- * as appropriate. Any keys from the $params array will NOT be treated this way. Any mappings from the document will overwrite key / value
- * pairs in the params array if they have the same name (e.g. you pass a "literal.id" key and value in your $params array but you also
- * pass in a document isntance with an "id" field" - the document's value(s) will take precedence).
- *
- * @param string $url URL
- * @param array $params optional array of key value pairs that will be sent with the post (see Solr Cell documentation)
- * @param Apache_Solr_Document $document optional document that will be used to generate post parameters (literal.* and boost.* params)
- * @param string $mimetype optional mimetype specification (for the file being extracted)
- *
- * @return Apache_Solr_Response
- *
- * @throws Apache_Solr_InvalidArgumentException if $url, $params, or $document are invalid.
- */
- public function extractFromUrl($url, $params = array(), $document = null, $mimetype = 'application/octet-stream')
- {
- // check if $params is an array (allow null for default empty array)
- if (!is_null($params))
- {
- if (!is_array($params))
- {
- throw new Apache_Solr_InvalidArgumentException("\$params must be a valid array or null");
- }
- }
- else
- {
- $params = array();
- }
- $httpTransport = $this->getHttpTransport();
-
- // read the contents of the URL using our configured Http Transport and default timeout
- $httpResponse = $httpTransport->performGetRequest($url);
-
- // check that its a 200 response
- if ($httpResponse->getStatusCode() == 200)
- {
- // add the resource.name parameter if not specified
- if (!isset($params['resource.name']))
- {
- $params['resource.name'] = $url;
- }
- // delegate the rest to extractFromString
- return $this->extractFromString($httpResponse->getBody(), $params, $document, $mimetype);
- }
- else
- {
- throw new Apache_Solr_InvalidArgumentException("URL '{$url}' returned non 200 response code");
- }
- }
- /**
- * Send an optimize command. Will be synchronous unless both wait parameters are set
- * to false.
- *
- * @param boolean $waitFlush
- * @param boolean $waitSearcher
- * @param float $timeout Maximum expected duration of the commit operation on the server (otherwise, will throw a communication exception)
- * @return Apache_Solr_Response
- *
- * @throws Apache_Solr_HttpTransportException If an error occurs during the service call
- */
- public function optimize($waitFlush = true, $waitSearcher = true, $timeout = 3600)
- {
- $flushValue = $waitFlush ? 'true' : 'false';
- $searcherValue = $waitSearcher ? 'true' : 'false';
- $rawPost = '<optimize waitFlush="' . $flushValue . '" waitSearcher="' . $searcherValue . '" />';
- return $this->_sendRawPost($this->_updateUrl, $rawPost, $timeout);
- }
- /**
- * Simple Search interface
- *
- * @param string $query The raw query string
- * @param int $offset The starting offset for result documents
- * @param int $limit The maximum number of result documents to return
- * @param array $params key / value pairs for other query parameters (see Solr documentation), use arrays for parameter keys used more than once (e.g. facet.field)
- * @param string $method The HTTP method (Apache_Solr_Service::METHOD_GET or Apache_Solr_Service::METHOD::POST)
- * @return Apache_Solr_Response
- *
- * @throws Apache_Solr_HttpTransportException If an error occurs during the service call
- * @throws Apache_Solr_InvalidArgumentException If an invalid HTTP method is used
- */
- public function search($query, $offset = 0, $limit = 10, $params = array(), $method = self::METHOD_GET)
- {
- // ensure params is an array
- if (!is_null($params))
- {
- if (!is_array($params))
- {
- // params was specified but was not an array - invalid
- throw new Apache_Solr_InvalidArgumentException("\$params must be a valid array or null");
- }
- }
- else
- {
- $params = array();
- }
-
- // construct our full parameters
- // common parameters in this interface
- $params['wt'] = self::SOLR_WRITER;
- $params['json.nl'] = $this->_namedListTreatment;
- $params['q'] = $query;
- $params['start'] = $offset;
- $params['rows'] = $limit;
- $queryString = $this->_generateQueryString($params);
- if ($method == self::METHOD_GET)
- {
- return $this->_sendRawGet($this->_searchUrl . $this->_queryDelimiter . $queryString);
- }
- else if ($method == self::METHOD_POST)
- {
- return $this->_sendRawPost($this->_searchUrl, $queryString, FALSE, 'application/x-www-form-urlencoded; charset=UTF-8');
- }
- else
- {
- throw new Apache_Solr_InvalidArgumentException("Unsupported method '$method', please use the Apache_Solr_Service::METHOD_* constants");
- }
- }
- }
|