123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435 |
- <?php
- /**
- * Copyright (c) 2007-2009, Conduit Internet Technologies, Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * - Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * - Neither the name of Conduit Internet Technologies, Inc. nor the names of
- * its contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- * @copyright Copyright 2007-2009 Conduit Internet Technologies, Inc. (http://conduit-it.com)
- * @license New BSD (http://solr-php-client.googlecode.com/svn/trunk/COPYING)
- * @version $Id: Document.php 15 2009-08-04 17:53:08Z donovan.jimenez $
- *
- * @package Apache
- * @subpackage Solr
- * @author Donovan Jimenez <djimenez@conduit-it.com>
- */
- /**
- * Additional code Copyright (c) 2011 by Peter Wolanin, and
- * additional contributors.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
- * for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program as the file LICENSE.txt; if not, please see
- * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt.
- */
- /**
- * Holds Key / Value pairs that represent a Solr Document along with any
- * associated boost values. Field values can be accessed by direct dereferencing
- * such as:
- *
- * @code
- * $document->title = 'Something';
- * echo $document->title;
- * @endcode
- *
- * Additionally, the field values can be iterated with foreach:
- *
- * @code
- * foreach ($document as $fieldName => $fieldValue) {
- * // ...
- * }
- * @endcode
- */
- class SearchApiSolrDocument implements IteratorAggregate {
- /**
- * Document boost value.
- *
- * @var float|false
- */
- protected $documentBoost = FALSE;
- /**
- * Document field values, indexed by name.
- *
- * @var array
- */
- protected $fields = array();
- /**
- * Document field boost values, indexed by name.
- *
- * @var array
- */
- protected $fieldBoosts = array();
- /**
- * Clears all boosts and fields from this document.
- */
- public function clear() {
- $this->documentBoost = FALSE;
- $this->fields = array();
- $this->fieldBoosts = array();
- }
- /**
- * Gets the current document boost.
- *
- * @return float|false
- * The current document boost, or FALSE if none is set.
- */
- public function getBoost() {
- return $this->documentBoost;
- }
- /**
- * Sets the document boost factor.
- *
- * @param float|false $boost
- * FALSE for default boost, or a positive number for setting a document
- * boost.
- */
- public function setBoost($boost) {
- $boost = (float) $boost;
- if ($boost > 0.0) {
- $this->documentBoost = $boost;
- }
- else {
- $this->documentBoost = FALSE;
- }
- }
- /**
- * Adds a value to a multi-valued field
- *
- * NOTE: the solr XML format allows you to specify boosts PER value even
- * though the underlying Lucene implementation only allows a boost per field.
- * To remedy this, the final field boost value will be the product of all
- * specified boosts on field values - this is similar to SolrJ's
- * functionality.
- *
- * @code
- * $doc = new ApacheSolrDocument();
- * $doc->addField('foo', 'bar', 2.0);
- * $doc->addField('foo', 'baz', 3.0);
- * // Resultant field boost will be 6!
- * echo $doc->getFieldBoost('foo');
- * @endcode
- *
- * @param string $key
- * The name of the field.
- * @param $value
- * The value to add for the field.
- * @param float|false $boost
- * FALSE for default boost, or a positive number for setting a field boost.
- */
- public function addField($key, $value, $boost = FALSE) {
- if (!isset($this->fields[$key])) {
- // create holding array if this is the first value
- $this->fields[$key] = array();
- }
- else if (!is_array($this->fields[$key])) {
- // move existing value into array if it is not already an array
- $this->fields[$key] = array($this->fields[$key]);
- }
- if ($this->getFieldBoost($key) === FALSE) {
- // boost not already set, set it now
- $this->setFieldBoost($key, $boost);
- }
- else if ((float) $boost > 0.0) {
- // multiply passed boost with current field boost - similar to SolrJ implementation
- $this->fieldBoosts[$key] *= (float) $boost;
- }
- // add value to array
- $this->fields[$key][] = $value;
- }
- /**
- * Gets information about a field stored in Solr.
- *
- * @param string $key
- * The name of the field.
- *
- * @return array|false
- * An associative array of info if the field exists, FALSE otherwise.
- */
- public function getField($key) {
- if (isset($this->fields[$key])) {
- return array(
- 'name' => $key,
- 'value' => $this->fields[$key],
- 'boost' => $this->getFieldBoost($key)
- );
- }
- return FALSE;
- }
- /**
- * Sets a field value.
- *
- * Multi-valued fields should be set as arrays or via the addField()
- * function which will automatically make sure the field is an array.
- *
- * @param string $key
- * The name of the field.
- * @param string|array $value
- * The value to set for the field.
- * @param float|false $boost
- * FALSE for default boost, or a positive number for setting a field boost.
- */
- public function setField($key, $value, $boost = FALSE) {
- $this->fields[$key] = $value;
- $this->setFieldBoost($key, $boost);
- }
- /**
- * Gets the currently set field boost for a document field.
- *
- * @param string $key
- * The name of the field.
- *
- * @return float|false
- * The currently set field boost, or FALSE if none was set.
- */
- public function getFieldBoost($key) {
- return isset($this->fieldBoosts[$key]) ? $this->fieldBoosts[$key] : FALSE;
- }
- /**
- * Sets the field boost for a document field.
- *
- * @param string $key
- * The name of the field.
- * @param float|false $boost
- * FALSE for default boost, or a positive number for setting a field boost.
- */
- public function setFieldBoost($key, $boost) {
- $boost = (float) $boost;
- if ($boost > 0.0) {
- $this->fieldBoosts[$key] = $boost;
- }
- else {
- $this->fieldBoosts[$key] = FALSE;
- }
- }
- /**
- * Returns all current field boosts, indexed by field name.
- *
- * @return array
- * An associative array in the format $field_name => $field_boost.
- */
- public function getFieldBoosts() {
- return $this->fieldBoosts;
- }
- /**
- * Gets the names of all fields in this document.
- *
- * @return array
- * The names of all fields in this document.
- */
- public function getFieldNames() {
- return array_keys($this->fields);
- }
- /**
- * Gets the values of all fields in this document.
- *
- * @return array
- * The values of all fields in this document.
- */
- public function getFieldValues() {
- return array_values($this->fields);
- }
- /**
- * Implements IteratorAggregate::getIterator().
- *
- * Implementing the IteratorAggregate interface allows the following usage:
- * @code
- * foreach ($document as $key => $value) {
- * // ...
- * }
- * @endcode
- *
- * @return Traversable
- * An iterator over this document's fields.
- */
- public function getIterator() {
- $arrayObject = new ArrayObject($this->fields);
- return $arrayObject->getIterator();
- }
- /**
- * Magic getter for field values.
- *
- * @param string $key
- * The name of the field.
- *
- * @return string|array|null
- * The value that was set for the field.
- */
- public function __get($key) {
- return $this->fields[$key];
- }
- /**
- * Magic setter for field values.
- *
- * Multi-valued fields should be set as arrays or via the addField() function
- * which will automatically make sure the field is an array.
- *
- * @param string $key
- * The name of the field.
- * @param string|array $value
- * The value to set for the field.
- */
- public function __set($key, $value) {
- $this->setField($key, $value);
- }
- /**
- * Magic isset for fields values.
- *
- * Do not call directly. Allows the following usage:
- * @code
- * isset($document->some_field);
- * @endcode
- *
- * @param string $key
- * The name of the field.
- *
- * @return bool
- * Whether the given key is set in this document.
- */
- public function __isset($key) {
- return isset($this->fields[$key]);
- }
- /**
- * Magic unset for field values.
- *
- * Do not call directly. Allows the following usage:
- * @code
- * unset($document->some_field);
- * @endcode
- *
- * @param string $key
- * The name of the field.
- */
- public function __unset($key) {
- unset($this->fields[$key]);
- unset($this->fieldBoosts[$key]);
- }
- /**
- * Create an XML fragment from this document.
- *
- * This string can then be used inside a Solr add call.
- *
- * @return string
- * An XML formatted string for this document.
- */
- public function toXml() {
- $xml = '<doc';
- if ($this->documentBoost !== FALSE) {
- $xml .= ' boost="' . $this->documentBoost . '"';
- }
- $xml .= '>';
- foreach ($this->fields as $key => $value) {
- $fieldBoost = $this->getFieldBoost($key);
- $key = htmlspecialchars($key, ENT_COMPAT, 'UTF-8');
- if (is_array($value)) {
- foreach ($value as $multivalue) {
- $xml .= '<field name="' . $key . '"';
- if ($fieldBoost !== FALSE) {
- $xml .= ' boost="' . $fieldBoost . '"';
- // Only set the boost for the first field in the set.
- $fieldBoost = FALSE;
- }
- $xml .= '>' . htmlspecialchars($multivalue, ENT_NOQUOTES, 'UTF-8') . '</field>';
- }
- }
- else {
- $xml .= '<field name="' . $key . '"';
- if ($fieldBoost !== FALSE) {
- $xml .= ' boost="' . $fieldBoost . '"';
- }
- $xml .= '>' . htmlspecialchars($value, ENT_NOQUOTES, 'UTF-8') . '</field>';
- }
- }
- $xml .= '</doc>';
- // Remove any control characters to avoid Solr XML parser exception.
- return self::stripCtrlChars($xml);
- }
- /**
- * Sanitizes XML for sending to Solr.
- *
- * Replaces control (non-printable) characters that are invalid to Solr's XML
- * parser with a space.
- *
- * @param string $string
- * The string to sanitize.
- *
- * @return string
- * A string safe for including in a Solr request.
- */
- public static function stripCtrlChars($string) {
- // See: http://w3.org/International/questions/qa-forms-utf-8.html
- // Printable utf-8 does not include any of these chars below x7F
- return preg_replace('@[\x00-\x08\x0B\x0C\x0E-\x1F]@', ' ', $string);
- }
- }
|