1068 lines
		
	
	
		
			29 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
			
		
		
	
	
			1068 lines
		
	
	
		
			29 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
| <?php
 | |
| 
 | |
| /**
 | |
|  * @file
 | |
|  * Support for migration from XML sources.
 | |
|  *
 | |
|  * NOTE: There are two methods supported in this file.
 | |
|  *
 | |
|  * 1) List - ids are listed in an index xml file and the data for each item is
 | |
|  *      stored in a separate xml file per item. Use MigrateSourceList class
 | |
|  *      as the source.
 | |
|  *
 | |
|  * 2) MultiItems - ids are part of the item and all items are stored in a
 | |
|  *      single xml file. Use MigrateSourceMultiItems class as the source.
 | |
|  *
 | |
|  * Both of these methods are described in more detail in the wine migration
 | |
|  * example.
 | |
|  */
 | |
| 
 | |
| /* =========================================================================== */
 | |
| /*                              List Method                                    */
 | |
| /* =========================================================================== */
 | |
| /**
 | |
|  * Implementation of MigrateList, for retrieving a list of IDs to be migrated
 | |
|  * from an XML document.
 | |
|  */
 | |
| class MigrateListXML extends MigrateList {
 | |
|   /**
 | |
|    * A URL pointing to an XML document containing a list of IDs to be processed.
 | |
|    *
 | |
|    * @var string
 | |
|    */
 | |
|   protected $listUrl;
 | |
| 
 | |
|   public function __construct($list_url) {
 | |
|     parent::__construct();
 | |
|     $this->listUrl = $list_url;
 | |
|     // Suppress errors during parsing, so we can pick them up after
 | |
|     libxml_use_internal_errors(TRUE);
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Our public face is the URL we're getting items from
 | |
|    *
 | |
|    * @return string
 | |
|    */
 | |
|   public function __toString() {
 | |
|     return $this->listUrl;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Load the XML at the given URL, and return an array of the IDs found within it.
 | |
|    *
 | |
|    * @return array
 | |
|    */
 | |
|   public function getIdList() {
 | |
|     migrate_instrument_start("Retrieve $this->listUrl");
 | |
|     $xml = simplexml_load_file($this->listUrl);
 | |
|     migrate_instrument_stop("Retrieve $this->listUrl");
 | |
|     if ($xml) {
 | |
|       return $this->getIDsFromXML($xml);
 | |
|     }
 | |
|     else {
 | |
|       Migration::displayMessage(t(
 | |
|         'Loading of !listUrl failed:',
 | |
|         array('!listUrl' => $this->listUrl)
 | |
|       ));
 | |
|       foreach (libxml_get_errors() as $error) {
 | |
|         Migration::displayMessage(MigrateItemsXML::parseLibXMLError($error));
 | |
|       }
 | |
|       return NULL;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Given an XML object, parse out the IDs for processing and return them as an
 | |
|    * array. The default implementation assumes the IDs are simply the values of
 | |
|    * the top-level elements - in most cases, you will need to override this to
 | |
|    * reflect your particular XML structure.
 | |
|    *
 | |
|    * @param SimpleXMLElement $xml
 | |
|    *
 | |
|    * @return array
 | |
|    */
 | |
|   protected function getIDsFromXML(SimpleXMLElement $xml) {
 | |
|     $ids = array();
 | |
|     foreach ($xml as $element) {
 | |
|       $ids[] = (string)$element;
 | |
|     }
 | |
|     return array_unique($ids);
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Return a count of all available IDs from the source listing. The default
 | |
|    * implementation assumes the count of top-level elements reflects the number
 | |
|    * of IDs available - in many cases, you will need to override this to reflect
 | |
|    * your particular XML structure.
 | |
|    */
 | |
|   public function computeCount() {
 | |
|     $xml = simplexml_load_file($this->listUrl);
 | |
|     // Number of sourceid elements beneath the top-level element
 | |
|     $count = count($xml);
 | |
|     return $count;
 | |
|   }
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Implementation of MigrateItem, for retrieving a parsed XML document given
 | |
|  * an ID provided by a MigrateList class.
 | |
|  */
 | |
| class MigrateItemXML extends MigrateItem {
 | |
|   /**
 | |
|    * A URL pointing to an XML document containing the data for one item to be
 | |
|    * migrated.
 | |
|    *
 | |
|    * @var string
 | |
|    */
 | |
|   protected $itemUrl;
 | |
| 
 | |
|   public function __construct($item_url) {
 | |
|     parent::__construct();
 | |
|     $this->itemUrl = $item_url;
 | |
|     // Suppress errors during parsing, so we can pick them up after
 | |
|     libxml_use_internal_errors(TRUE);
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Implementors are expected to return an object representing a source item.
 | |
|    *
 | |
|    * @param mixed $id
 | |
|    *
 | |
|    * @return stdClass
 | |
|    */
 | |
|   public function getItem($id) {
 | |
|     // Make sure we actually have an ID
 | |
|     if (empty($id)) {
 | |
|       return NULL;
 | |
|     }
 | |
|     $item_url = $this->constructItemUrl($id);
 | |
|     // And make sure we actually got a URL to fetch
 | |
|     if (empty($item_url)) {
 | |
|       return NULL;
 | |
|     }
 | |
|     // Get the XML object at the specified URL;
 | |
|     $xml = $this->loadXmlUrl($item_url);
 | |
|     if ($xml) {
 | |
|       $return = new stdclass;
 | |
|       $return->xml = $xml;
 | |
|       return $return;
 | |
|     }
 | |
|     else {
 | |
|       $migration = Migration::currentMigration();
 | |
|       $message =  t('Loading of !objecturl failed:', array('!objecturl' => $item_url));
 | |
|       foreach (libxml_get_errors() as $error) {
 | |
|         $message .= "\n" . $error->message;
 | |
|       }
 | |
|       $migration->getMap()->saveMessage(
 | |
|         array($id), $message, MigrationBase::MESSAGE_ERROR);
 | |
|       libxml_clear_errors();
 | |
|       return NULL;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * The default implementation simply replaces the :id token in the URL with
 | |
|    * the ID obtained from MigrateListXML. Override if the item URL is not
 | |
|    * so easily expressed from the ID.
 | |
|    *
 | |
|    * @param mixed $id
 | |
|    */
 | |
|   protected function constructItemUrl($id) {
 | |
|     return str_replace(':id', $id, $this->itemUrl);
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Default XML loader - just use Simplexml directly. This can be overridden for
 | |
|    * preprocessing of XML (removal of unwanted elements, caching of XML if the
 | |
|    * source service is slow, etc.)
 | |
|    */
 | |
|   protected function loadXmlUrl($item_url) {
 | |
|     return simplexml_load_file($item_url);
 | |
|   }
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Adds xpath info to field mappings for XML sources
 | |
|  */
 | |
| class MigrateXMLFieldMapping extends MigrateFieldMapping {
 | |
|   /**
 | |
|    * The xpath used to retrieve the data for this field from the XML.
 | |
|    *
 | |
|    * @var string
 | |
|    */
 | |
|   protected $xpath;
 | |
|   public function getXpath() {
 | |
|     return $this->xpath;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Add an xpath to this field mapping
 | |
|    *
 | |
|    * @param string $xpath
 | |
|    */
 | |
|   public function xpath($xpath) {
 | |
|     $this->xpath = $xpath;
 | |
|     return $this;
 | |
|   }
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Migrations using XML sources should extend this class instead of Migration.
 | |
|  */
 | |
| abstract class XMLMigration extends Migration {
 | |
|   /**
 | |
|    * Override the default addFieldMapping(), so we can create our special
 | |
|    * field mapping class.
 | |
|    * TODO: Find a cleaner way to just substitute a different mapping class
 | |
|    *
 | |
|    * @param string $destinationField
 | |
|    *  Name of the destination field.
 | |
|    * @param string $sourceField
 | |
|    *  Name of the source field (optional).
 | |
|    */
 | |
|   public function addFieldMapping($destination_field, $source_field = NULL) {
 | |
|     // Warn of duplicate mappings
 | |
|     if (!is_null($destination_field) && isset($this->fieldMappings[$destination_field])) {
 | |
|       self::displayMessage(
 | |
|         t('!name addFieldMapping: !dest was previously mapped, overridden',
 | |
|           array('!name' => $this->machineName, '!dest' => $destination_field)),
 | |
|         'warning');
 | |
|     }
 | |
|     $mapping = new MigrateXMLFieldMapping($destination_field, $source_field);
 | |
|     if (is_null($destination_field)) {
 | |
|       $this->fieldMappings[] = $mapping;
 | |
|     }
 | |
|     else {
 | |
|       $this->fieldMappings[$destination_field] = $mapping;
 | |
|     }
 | |
|     return $mapping;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * A normal $data_row has all the input data as top-level fields - in this
 | |
|    * case, however, the data is embedded within a SimpleXMLElement object in
 | |
|    * $data_row->xml. Explode that out to the normal form, and pass on to the
 | |
|    * normal implementation.
 | |
|    */
 | |
|   protected function applyMappings() {
 | |
|     // We only know what data to pull from the xpaths in the mappings.
 | |
|     foreach ($this->fieldMappings as $mapping) {
 | |
|       $source = $mapping->getSourceField();
 | |
|       if ($source) {
 | |
|         $xpath = $mapping->getXpath();
 | |
|         if ($xpath) {
 | |
|           // Derived class may override applyXpath()
 | |
|           $this->sourceValues->$source = $this->applyXpath($this->sourceValues, $xpath);
 | |
|         }
 | |
|       }
 | |
|     }
 | |
|     parent::applyMappings();
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Default implementation - straightforward xpath application
 | |
|    *
 | |
|    * @param $data_row
 | |
|    * @param $xpath
 | |
|    */
 | |
|   public function applyXpath($data_row, $xpath) {
 | |
|     $result = $data_row->xml->xpath($xpath);
 | |
|     if ($result) {
 | |
|       if (count($result) > 1) {
 | |
|         $return = array();
 | |
|         foreach ($result as $record) {
 | |
|           $return[] = (string)$record;
 | |
|         }
 | |
|         return $return;
 | |
|       }
 | |
|       else {
 | |
|         return (string)$result[0];
 | |
|       }
 | |
|     }
 | |
|     else {
 | |
|       return NULL;
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| /* =========================================================================== */
 | |
| /*                            MultiItems Method                                */
 | |
| /* =========================================================================== */
 | |
| /**
 | |
|  * Implementation of MigrateItems, for providing a list of IDs and for
 | |
|  * retrieving a parsed XML document given an ID from this list.
 | |
|  */
 | |
| class MigrateItemsXML extends MigrateItems {
 | |
|   /**
 | |
|    * A URL pointing to an XML document containing the ids and data.
 | |
|    *
 | |
|    * @var string
 | |
|    */
 | |
|   protected $xmlUrl;
 | |
| 
 | |
|   /**
 | |
|    * Stores the loaded XML document.
 | |
|    *
 | |
|    * @var SimpleXMLElement
 | |
|    */
 | |
|   protected $xml = FALSE;
 | |
| 
 | |
|   /**
 | |
|    * xpath identifying the element used for each item
 | |
|    */
 | |
|   protected $itemXpath;
 | |
|   public function getItemXpath() {
 | |
|     return $this->itemXpath;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * xpath identifying the subelement under itemXpath that holds the id for
 | |
|    * each item.
 | |
|    */
 | |
|   protected $itemIDXpath;
 | |
|   public function getIDXpath() {
 | |
|     return $this->itemIDXpath;
 | |
|   }
 | |
| 
 | |
|   public function __construct($xml_url, $item_xpath='item', $itemID_xpath='id') {
 | |
|     parent::__construct();
 | |
|     $this->xmlUrl = $xml_url;
 | |
|     $this->itemXpath = $item_xpath;
 | |
|     $this->itemIDXpath = $itemID_xpath;
 | |
| 
 | |
|     // Suppress errors during parsing, so we can pick them up after
 | |
|     libxml_use_internal_errors(TRUE);
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Our public face is the URL we're getting items from
 | |
|    *
 | |
|    * @return string
 | |
|    */
 | |
|   public function __toString() {
 | |
|     return 'url = ' . $this->xmlUrl . ' | item xpath = ' . $this->itemXpath .
 | |
|                                       ' | item ID xpath = ' . $this->itemIDXpath;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Load and return the xml from the defined xmlUrl.
 | |
|    * @return SimpleXMLElement
 | |
|    */
 | |
|   public function &xml() {
 | |
|     if (!$this->xml && !empty($this->xmlUrl)) {
 | |
|       $this->xml = simplexml_load_file($this->xmlUrl);
 | |
|       if (!$this->xml) {
 | |
|         Migration::displayMessage(t(
 | |
|           'Loading of !xmlUrl failed:',
 | |
|           array('!xmlUrl' => $this->xmlUrl)
 | |
|         ));
 | |
|         foreach (libxml_get_errors() as $error) {
 | |
|           Migration::displayMessage(self::parseLibXMLError($error));
 | |
|         }
 | |
|       }
 | |
|     }
 | |
|     return $this->xml;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Parses a LibXMLError to a error message string.
 | |
|    * @param LibXMLError $error
 | |
|    * @return string
 | |
|    */
 | |
|   public static function parseLibXMLError(LibXMLError $error) {
 | |
|     $error_code_name = 'Unknown Error';
 | |
|     switch ($error->level) {
 | |
|       case LIBXML_ERR_WARNING:
 | |
|         $error_code_name = t('Warning');
 | |
|         break;
 | |
|       case LIBXML_ERR_ERROR:
 | |
|         $error_code_name = t('Error');
 | |
|         break;
 | |
|       case LIBXML_ERR_FATAL:
 | |
|         $error_code_name = t('Fatal Error');
 | |
|         break;
 | |
|     }
 | |
|     return t(
 | |
|        "!libxmlerrorcodename !libxmlerrorcode: !libxmlerrormessage\n" .
 | |
|       "Line: !libxmlerrorline\n" .
 | |
|       "Column: !libxmlerrorcolumn\n" .
 | |
|       "File: !libxmlerrorfile",
 | |
|       array(
 | |
|         '!libxmlerrorcodename' => $error_code_name,
 | |
|         '!libxmlerrorcode' => $error->code,
 | |
|         '!libxmlerrormessage' => trim($error->message),
 | |
|         '!libxmlerrorline' => $error->line,
 | |
|         '!libxmlerrorcolumn' => $error->column,
 | |
|         '!libxmlerrorfile' => (($error->file)) ? $error->file : NULL,
 | |
|       )
 | |
|     );
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Load the XML at the given URL, and return an array of the IDs found
 | |
|    * within it.
 | |
|    *
 | |
|    * @return array
 | |
|    */
 | |
|   public function getIdList() {
 | |
|     migrate_instrument_start("Retrieve $this->xmlUrl");
 | |
|     $xml = $this->xml();
 | |
|     migrate_instrument_stop("Retrieve $this->xmlUrl");
 | |
|     if ($xml) {
 | |
|       return $this->getIDsFromXML($xml);
 | |
|     }
 | |
|     return NULL;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Given an XML object, parse out the IDs for processing and return them as
 | |
|    * an array. The location of the IDs in the XML are based on the item xpath
 | |
|    * and item ID xpath set in the constructor.
 | |
|    *    eg, xpath = itemXpath . '/' . itemIDXpath
 | |
|    * IDs are cached.  The list of IDs are returned from the cache except when
 | |
|    * this is the first call (ie, cache is NULL) OR the refresh parameter is
 | |
|    * TRUE.
 | |
|    *
 | |
|    * @param SimpleXMLElement $xml
 | |
|    * @param boolean $refresh
 | |
|    *
 | |
|    * @return array
 | |
|    */
 | |
|   protected $cache_ids = NULL;
 | |
|   protected function getIDsFromXML(SimpleXMLElement $xml, $refresh = FALSE) {
 | |
|     if ($refresh !== TRUE && $this->cache_ids != NULL) {
 | |
|       return $this->cache_ids;
 | |
|     }
 | |
| 
 | |
|     $this->cache_ids = NULL;
 | |
|     $result = $xml->xpath($this->itemXpath);
 | |
| 
 | |
|     $ids = array();
 | |
|     if ($result) {
 | |
|       foreach ($result as $element) {
 | |
|         $id = $this->getItemID($element);
 | |
|         if (!is_null($id)) {
 | |
|           $ids[] = (string)$id;
 | |
|         }
 | |
|       }
 | |
|     }
 | |
|     $this->cache_ids = array_unique($ids);
 | |
|     return $this->cache_ids;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Return a count of all available IDs from the source listing.
 | |
|    */
 | |
|   public function computeCount() {
 | |
|     $count = 0;
 | |
|     $xml = $this->xml();
 | |
|     if ($xml) {
 | |
|       $ids = $this->getIDsFromXML($xml, TRUE);
 | |
|       $count = count($ids);
 | |
|     }
 | |
|     return $count;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Load the XML at the given URL, and return an array of the Items found
 | |
|    * within it.
 | |
|    *
 | |
|    * @return array
 | |
|    */
 | |
|   public function getAllItems() {
 | |
|     $xml = $this->xml();
 | |
|     if ($xml) {
 | |
|       return $this->getItemsFromXML($xml);
 | |
|     }
 | |
|     return NULL;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Given an XML object, parse out the items for processing and return them as
 | |
|    * an array. The location of the items in the XML are based on the item xpath
 | |
|    * set in the constructor.  Items are cached.  The list of items are returned
 | |
|    * from the cache except when this is the first call (ie, cache is NULL) OR
 | |
|    * the refresh parameter is TRUE.
 | |
|    *
 | |
|    * Items are cached as an array of key=ID and value=stdclass object with
 | |
|    * attribute xml containing the xml SimpleXMLElement object of the item.
 | |
|    *
 | |
|    * @param SimpleXMLElement $xml
 | |
|    * @param boolean $refresh
 | |
|    *
 | |
|    * @return array
 | |
|    */
 | |
|   protected $cache_items = NULL;
 | |
|   public function getItemsFromXML(SimpleXMLElement $xml, $refresh=FALSE) {
 | |
|     if ($refresh !== FALSE && $this->cache_items != NULL) {
 | |
|       return $this->cache_items;
 | |
|     }
 | |
| 
 | |
|     $this->cache_items = NULL;
 | |
|     $items = array();
 | |
|     $result = $xml->xpath($this->itemXpath);
 | |
| 
 | |
|     if ($result) {
 | |
|       foreach ($result as $item_xml) {
 | |
|         $id = $this->getItemID($item_xml);
 | |
|         $item = new stdclass;
 | |
|         $item->xml = $item_xml;
 | |
|         $items[$id] = $item;
 | |
|       }
 | |
|       $this->cache_items = $items;
 | |
|       return $items;
 | |
|     }
 | |
|     else {
 | |
|       return NULL;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Get the item ID from the itemXML based on itemIDXpath.
 | |
|    *
 | |
|    * @return string
 | |
|    */
 | |
|   protected function getItemID($itemXML) {
 | |
|     return $this->getElementValue($itemXML, $this->itemIDXpath);
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Get an element from the itemXML based on an xpath.
 | |
|    *
 | |
|    * @return string
 | |
|    */
 | |
|   protected function getElementValue($itemXML, $xpath) {
 | |
|     $value = NULL;
 | |
|     if ($itemXML) {
 | |
|       $result = $itemXML->xpath($xpath);
 | |
|       if ($result)
 | |
|         $value = (string)$result[0];
 | |
|     }
 | |
|     return $value;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Implementors are expected to return an object representing a source item.
 | |
|    * Items are cached as an array of key=ID and value=stdclass object with
 | |
|    * attribute xml containing the xml SimpleXMLElement object of the item.
 | |
|    *
 | |
|    * @param mixed $id
 | |
|    *
 | |
|    * @return stdClass
 | |
|    */
 | |
|   public function getItem($id) {
 | |
|     // Make sure we actually have an ID
 | |
|     if (empty($id)) {
 | |
|       return NULL;
 | |
|     }
 | |
|     $items = $this->getAllItems();
 | |
|     $item = $items[$id];
 | |
|     if ($item) {
 | |
|       return $item;
 | |
|     }
 | |
|     else {
 | |
|       $migration = Migration::currentMigration();
 | |
|       $message =  t('Loading of item XML for ID !id failed:', array('!id' => $id));
 | |
|       foreach (libxml_get_errors() as $error) {
 | |
|         $message .= "\n" . $error->message;
 | |
|       }
 | |
|       $migration->getMap()->saveMessage(
 | |
|         array($id), $message, MigrationBase::MESSAGE_ERROR);
 | |
|       libxml_clear_errors();
 | |
|       return NULL;
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Makes an XMLReader object iterable, returning elements matching a restricted
 | |
|  * xpath-like syntax.
 | |
|  */
 | |
| class MigrateXMLReader implements Iterator {
 | |
| 
 | |
|   /**
 | |
|    * The XMLReader we are encapsulating.
 | |
|    *
 | |
|    * @var XMLReader
 | |
|    */
 | |
|   public $reader;
 | |
| 
 | |
|   /**
 | |
|    * URL of the source XML file.
 | |
|    *
 | |
|    * @var string
 | |
|    */
 | |
|   public $url;
 | |
| 
 | |
|   /**
 | |
|    * Array of the element names from the query, 0-based from the first (root)
 | |
|    * element. For example, '//file/article' would be stored as
 | |
|    * array(0 => 'file', 1 => 'article').
 | |
|    *
 | |
|    * @var array
 | |
|    */
 | |
|   protected $elementsToMatch = array();
 | |
| 
 | |
|   /**
 | |
|    * If the element query is filtering by an attribute name=value, the name of
 | |
|    * the attribute in question.
 | |
|    *
 | |
|    * @var string
 | |
|    */
 | |
|   protected $attributeName = NULL;
 | |
| 
 | |
|   /**
 | |
|    * If the element query is filtering by an attribute name=value, the value of
 | |
|    * the attribute in question.
 | |
|    *
 | |
|    * @var string
 | |
|    */
 | |
|   protected $attributeValue = NULL;
 | |
| 
 | |
|   /**
 | |
|    * Array representing the path to the current element as we traverse the XML.
 | |
|    * For example, if in an XML string like '<file><article>...</article></file>'
 | |
|    * we are positioned within the article element, currentPath will be
 | |
|    * array(0 => 'file', 1 => 'article').
 | |
|    *
 | |
|    * @var array
 | |
|    */
 | |
|   protected $currentPath = array();
 | |
| 
 | |
|   /**
 | |
|    * Query string used to retrieve the elements from the XML file.
 | |
|    *
 | |
|    * @var string
 | |
|    */
 | |
|   public $elementQuery;
 | |
| 
 | |
|   /**
 | |
|    * Xpath query string used to retrieve the primary key value from each element.
 | |
|    *
 | |
|    * @var string
 | |
|    */
 | |
|   public $idQuery;
 | |
| 
 | |
|   /**
 | |
|    * Current element object when iterating.
 | |
|    *
 | |
|    * @var SimpleXMLElement
 | |
|    */
 | |
|   protected $currentElement = NULL;
 | |
| 
 | |
|   /**
 | |
|    * Value of the ID for the current element when iterating.
 | |
|    *
 | |
|    * @var string
 | |
|    */
 | |
|   protected $currentId = NULL;
 | |
| 
 | |
|   /**
 | |
|    * When matching element names, whether to compare to the namespace-prefixed
 | |
|    * name, or the local name.
 | |
|    *
 | |
|    * @var bool
 | |
|    */
 | |
|   protected $prefixedName = FALSE;
 | |
| 
 | |
|   /**
 | |
|    * Prepares our extensions to the XMLReader object.
 | |
|    *
 | |
|    * @param $xml_url
 | |
|    *  URL of the XML file to be parsed.
 | |
|    * @param $element_query
 | |
|    *  Query string in a restricted xpath format, for selecting elements to be
 | |
|    *  returned by the interator. Supported syntax:
 | |
|    *  - The full path to the element must be specified; i.e., /file/article
 | |
|    *    rather than //article.
 | |
|    *  - The elements may be filtered by attribute value by appending
 | |
|    *    [@attribute="value"].
 | |
|    * @param $id_query
 | |
|    *  Query string to the unique identifier for an element, relative to the root
 | |
|    *  of that element. This supports the full xpath syntax.
 | |
|    */
 | |
|   public function __construct($xml_url, $element_query, $id_query) {
 | |
|     $this->reader = new XMLReader;
 | |
|     $this->url = $xml_url;
 | |
|     $this->elementQuery = $element_query;
 | |
|     $this->idQuery = $id_query;
 | |
| 
 | |
|     // Suppress errors during parsing, so we can pick them up after
 | |
|     libxml_use_internal_errors(TRUE);
 | |
| 
 | |
|     // Parse the element query. First capture group is the element path, second
 | |
|     // (if present) is the attribute.
 | |
|     preg_match_all('|^/([^\[]+)(.*)$|', $element_query, $matches);
 | |
|     $element_path = $matches[1][0];
 | |
|     $this->elementsToMatch = explode('/', $element_path);
 | |
|     $attribute_query = $matches[2][0];
 | |
|     if ($attribute_query) {
 | |
|       // Matches [@attribute="value"] (with either single- or double-quotes).
 | |
|       preg_match_all('|^\[@([^=]+)=[\'"](.*)[\'"]\]$|', $attribute_query, $matches);
 | |
|       $this->attributeName = $matches[1][0];
 | |
|       $this->attributeValue = $matches[2][0];
 | |
|     }
 | |
| 
 | |
|     // If the element path contains any colons, it must be specifying namespaces,
 | |
|     // so we need to compare using the prefixed element name in next().
 | |
|     if (strpos($element_path, ':')) {
 | |
|       $this->prefixedName = TRUE;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Implementation of Iterator::rewind().
 | |
|    *
 | |
|    * @return void
 | |
|    */
 | |
|   public function rewind() {
 | |
|     // (Re)open the provided URL.
 | |
|     $this->reader->close();
 | |
|     $status = $this->reader->open($this->url);
 | |
|     if (!$status) {
 | |
|       Migration::displayMessage(t('Could not open XML file !url',
 | |
|                                 array('!url' => $this->url)));
 | |
|     }
 | |
| 
 | |
|     // Reset our path tracker
 | |
|     $this->currentPath = array();
 | |
| 
 | |
|     // Load the first matching element and its ID.
 | |
|     $this->next();
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Implementation of Iterator::next().
 | |
|    *
 | |
|    * @return void
 | |
|    */
 | |
|   public function next() {
 | |
|     migrate_instrument_start('MigrateXMLReader::next');
 | |
|     $this->currentElement = $this->currentId = NULL;
 | |
| 
 | |
|     // Loop over each node in the XML file, looking for elements at a path
 | |
|     // matching the input query string (represented in $this->elementsToMatch).
 | |
|     while ($this->reader->read()) {
 | |
|       if ($this->reader->nodeType == XMLREADER::ELEMENT) {
 | |
|         if ($this->prefixedName) {
 | |
|           $this->currentPath[$this->reader->depth] = $this->reader->name;
 | |
|         }
 | |
|         else {
 | |
|           $this->currentPath[$this->reader->depth] = $this->reader->localName;
 | |
|         }
 | |
|         if ($this->currentPath == $this->elementsToMatch) {
 | |
|           // We're positioned to the right element path - if filtering on an
 | |
|           // attribute, check that as well before accepting this element.
 | |
|           if (empty($this->attributeName) ||
 | |
|               ($this->reader->getAttribute($this->attributeName) == $this->attributeValue)) {
 | |
|             // We've found a matching element - get a SimpleXML object representing it.
 | |
|             // We must associate the DOMNode with a DOMDocument to be able to import
 | |
|             // it into SimpleXML.
 | |
|             // Despite appearances, this is almost twice as fast as
 | |
|             // simplexml_load_string($this->readOuterXML());
 | |
|             $node = $this->reader->expand();
 | |
|             if ($node) {
 | |
|               $dom = new DOMDocument();
 | |
|               $node = $dom->importNode($node, TRUE);
 | |
|               $dom->appendChild($node);
 | |
|               $this->currentElement = simplexml_import_dom($node);
 | |
|               $idnode = $this->currentElement->xpath($this->idQuery);
 | |
|               if (is_array($idnode)) {
 | |
|                 $this->currentId = (string)reset($idnode);
 | |
|               }
 | |
|               else {
 | |
|                 throw new Exception(t('Failure retrieving ID, xpath: !xpath',
 | |
|                   array('!xpath' => $this->idQuery)));
 | |
|               }
 | |
|               break;
 | |
|             }
 | |
|             else {
 | |
|               foreach (libxml_get_errors() as $error) {
 | |
|                 $error_string = MigrateItemsXML::parseLibXMLError($error);
 | |
|                 if ($migration = Migration::currentMigration()) {
 | |
|                   $migration->saveMessage($error_string);
 | |
|                 }
 | |
|                 else {
 | |
|                   Migration::displayMessage($error_string);
 | |
|                 }
 | |
|               }
 | |
|             }
 | |
|           }
 | |
|         }
 | |
|       }
 | |
|       elseif ($this->reader->nodeType == XMLREADER::END_ELEMENT) {
 | |
|         // Remove this element and any deeper ones from the current path
 | |
|         foreach ($this->currentPath as $depth => $name) {
 | |
|           if ($depth >= $this->reader->depth) {
 | |
|             unset($this->currentPath[$depth]);
 | |
|           }
 | |
|         }
 | |
|       }
 | |
|     }
 | |
|     migrate_instrument_stop('MigrateXMLReader::next');
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Implementation of Iterator::current().
 | |
|    *
 | |
|    * @return null|SimpleXMLElement
 | |
|    */
 | |
|   public function current() {
 | |
|     return $this->currentElement;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Implementation of Iterator::key().
 | |
|    *
 | |
|    * @return null|string
 | |
|    */
 | |
|   public function key() {
 | |
|     return $this->currentId;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Implementation of Iterator::valid().
 | |
|    *
 | |
|    * @return bool
 | |
|    */
 | |
|   public function valid() {
 | |
|     return !empty($this->currentElement);
 | |
|   }
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Implementation of MigrateSource, to handle imports from XML files.
 | |
|  */
 | |
| class MigrateSourceXML extends MigrateSource {
 | |
| 
 | |
|   /**
 | |
|    * The MigrateXMLReader object serving as a cursor over the XML source.
 | |
|    *
 | |
|    * @var MigrateXMLReader
 | |
|    */
 | |
|   protected $reader;
 | |
| 
 | |
|   /**
 | |
|    * The source URLs to load XML from
 | |
|    *
 | |
|    * @var array
 | |
|    */
 | |
|   protected $sourceUrls = array();
 | |
| 
 | |
|   /**
 | |
|    * Holds our current position within the $source_urls array
 | |
|    *
 | |
|    * @var int
 | |
|    */
 | |
|   protected $activeUrl = NULL;
 | |
| 
 | |
|   /**
 | |
|    * Store the query string used to recognize elements being iterated
 | |
|    * so we can create reader objects on the fly.
 | |
|    *
 | |
|    * @var string
 | |
|    */
 | |
|   protected $elementQuery = '';
 | |
| 
 | |
|   /**
 | |
|    * Store the query string used to retrieve the primary key value from each
 | |
|    * element so we can create reader objects on the fly.
 | |
|    *
 | |
|    * @var string
 | |
|    */
 | |
|   protected $idQuery = '';
 | |
| 
 | |
|   /**
 | |
|    * Store the reader class used to query XML so we can create reader objects
 | |
|    * on the fly.
 | |
|    *
 | |
|    * @var string
 | |
|    */
 | |
|   protected $readerClass = '';
 | |
| 
 | |
|   /**
 | |
|    * List of available source fields.
 | |
|    *
 | |
|    * @var array
 | |
|    */
 | |
|   protected $fields = array();
 | |
| 
 | |
|   /**
 | |
|    * Source constructor.
 | |
|    *
 | |
|    * @param string or array $url
 | |
|    *  URL(s) of the XML source data.
 | |
|    * @param string $element_query
 | |
|    *  Query string used to recognize elements being iterated.
 | |
|    * @param string $id_query
 | |
|    *  Xpath query string used to retrieve the primary key value from each element.
 | |
|    * @param array $fields
 | |
|    *  Optional - keys are field names, values are descriptions. Use to override
 | |
|    *  the default descriptions, or to add additional source fields which the
 | |
|    *  migration will add via other means (e.g., prepareRow()).
 | |
|    * @param boolean $options
 | |
|    *  Options applied to this source. In addition to the standard MigrateSource
 | |
|    *  options, we support:
 | |
|    *  - reader_class: The reader class to instantiate for traversing the XML -
 | |
|    *    defaults to MigrateXMLReader (any substitutions must be derived from
 | |
|    *    MigrateXMLReader).
 | |
|    */
 | |
|   public function __construct($urls, $element_query, $id_query, array $fields = array(),
 | |
|       array $options = array()) {
 | |
|     parent::__construct($options);
 | |
|     if (empty($options['reader_class'])) {
 | |
|       $reader_class = 'MigrateXMLReader';
 | |
|     }
 | |
|     else {
 | |
|       $reader_class = $options['reader_class'];
 | |
|     }
 | |
| 
 | |
|     if (!is_array($urls)) {
 | |
|       $urls = array($urls);
 | |
|     }
 | |
| 
 | |
|     $this->sourceUrls = $urls;
 | |
|     $this->activeUrl = NULL;
 | |
|     $this->elementQuery = $element_query;
 | |
|     $this->idQuery = $id_query;
 | |
|     $this->readerClass = $reader_class;
 | |
|     $this->fields = $fields;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Return a string representing the source query.
 | |
|    *
 | |
|    * @return string
 | |
|    */
 | |
|   public function __toString() {
 | |
|     // Clump the urls into a string
 | |
|     // This could cause a problem when using a lot of urls, may need to hash
 | |
|     $urls = implode(', ', $this->sourceUrls);
 | |
|     return 'urls = ' . $urls .
 | |
|            ' | item xpath = ' . $this->elementQuery .
 | |
|            ' | item ID xpath = ' . $this->idQuery;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Returns a list of fields available to be mapped from the source query.
 | |
|    *
 | |
|    * @return array
 | |
|    *  Keys: machine names of the fields (to be passed to addFieldMapping)
 | |
|    *  Values: Human-friendly descriptions of the fields.
 | |
|    */
 | |
|   public function fields() {
 | |
|     return $this->fields;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Returns the active Url.
 | |
|    *
 | |
|    * @return string
 | |
|    */
 | |
|   public function activeUrl() {
 | |
|     if ($this->activeUrl) {
 | |
|       return $this->sourceUrls[$this->activeUrl];
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Return a count of all available source records.
 | |
|    */
 | |
|   public function computeCount() {
 | |
|     $count = 0;
 | |
|     foreach ($this->sourceUrls as $url) {
 | |
|       $reader = new $this->readerClass($url, $this->elementQuery, $this->idQuery);
 | |
|       foreach ($reader as $element) {
 | |
|         $count++;
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     return $count;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Implementation of MigrateSource::performRewind().
 | |
|    */
 | |
|   public function performRewind() {
 | |
|     // Set the reader back to the beginning of the file (positioned to the
 | |
|     // first matching element), then apply our logic to make sure we have the
 | |
|     // first element fulfilling our logic (idlist/map/prepareRow()).
 | |
|     $this->activeUrl = NULL;
 | |
|     $this->reader = NULL;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Implementation of MigrationSource::getNextRow().
 | |
|    *
 | |
|    * @return stdClass
 | |
|    *  data for the next row from the XML source files
 | |
|    */
 | |
|   public function getNextRow() {
 | |
|     migrate_instrument_start('MigrateSourceXML::next');
 | |
| 
 | |
|     $source_key = $this->activeMap->getSourceKey();
 | |
|     $key_name = key($source_key);
 | |
|     $row = NULL;
 | |
| 
 | |
|     // The reader is now lazy loaded, so it may not be defined yet, need to test if set
 | |
|     if (isset($this->reader)) {
 | |
|       // attempt to load the next row
 | |
|       $this->reader->next();
 | |
|     }
 | |
| 
 | |
|     // Test the reader for a valid row
 | |
|     if (isset($this->reader) && $this->reader->valid()) {
 | |
|       $row = new stdClass;
 | |
|       $row->$key_name = $this->reader->key();
 | |
|       $row->xml = $this->reader->current();
 | |
|     }
 | |
|     else {
 | |
|       // The current source is at the end, try to load the next source
 | |
|       if ($this->getNextSource()) {
 | |
|         $row = new stdClass;
 | |
|         $row->$key_name = $this->reader->key();
 | |
|         $row->xml = $this->reader->current();
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     migrate_instrument_stop('MigrateSourceXML::next');
 | |
|     return $row;
 | |
|   }
 | |
| 
 | |
|   /**
 | |
|    * Advances the reader to the next source from source_urls
 | |
|    *
 | |
|    * @return bool
 | |
|    *  TRUE if a valid source was loaded
 | |
|    */
 | |
|   public function getNextSource() {
 | |
|     migrate_instrument_start('MigrateSourceXML::nextSource');
 | |
| 
 | |
|     // Return value
 | |
|     $status = FALSE;
 | |
| 
 | |
|     while ($this->activeUrl === NULL || (count($this->sourceUrls)-1) > $this->activeUrl) {
 | |
|       if (is_null($this->activeUrl)) {
 | |
|         $this->activeUrl = 0;
 | |
|       }
 | |
|       else {
 | |
|         // Increment the activeUrl so we try to load the next source
 | |
|         $this->activeUrl = $this->activeUrl + 1;
 | |
|       }
 | |
| 
 | |
|       $this->reader = new $this->readerClass($this->sourceUrls[$this->activeUrl], $this->elementQuery, $this->idQuery);
 | |
|       $this->reader->rewind();
 | |
| 
 | |
|       if ($this->reader->valid()) {
 | |
|         // We have a valid source
 | |
|         $status = TRUE;
 | |
|         break;
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     migrate_instrument_stop('MigrateSourceXML::nextSource');
 | |
|     return $status;
 | |
|   }
 | |
| }
 | 
