xml.inc 29 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067
  1. <?php
  2. /**
  3. * @file
  4. * Support for migration from XML sources.
  5. *
  6. * NOTE: There are two methods supported in this file.
  7. *
  8. * 1) List - ids are listed in an index xml file and the data for each item is
  9. * stored in a separate xml file per item. Use MigrateSourceList class
  10. * as the source.
  11. *
  12. * 2) MultiItems - ids are part of the item and all items are stored in a
  13. * single xml file. Use MigrateSourceMultiItems class as the source.
  14. *
  15. * Both of these methods are described in more detail in the wine migration
  16. * example.
  17. */
  18. /* =========================================================================== */
  19. /* List Method */
  20. /* =========================================================================== */
  21. /**
  22. * Implementation of MigrateList, for retrieving a list of IDs to be migrated
  23. * from an XML document.
  24. */
  25. class MigrateListXML extends MigrateList {
  26. /**
  27. * A URL pointing to an XML document containing a list of IDs to be processed.
  28. *
  29. * @var string
  30. */
  31. protected $listUrl;
  32. public function __construct($list_url) {
  33. parent::__construct();
  34. $this->listUrl = $list_url;
  35. // Suppress errors during parsing, so we can pick them up after
  36. libxml_use_internal_errors(TRUE);
  37. }
  38. /**
  39. * Our public face is the URL we're getting items from
  40. *
  41. * @return string
  42. */
  43. public function __toString() {
  44. return $this->listUrl;
  45. }
  46. /**
  47. * Load the XML at the given URL, and return an array of the IDs found within it.
  48. *
  49. * @return array
  50. */
  51. public function getIdList() {
  52. migrate_instrument_start("Retrieve $this->listUrl");
  53. $xml = simplexml_load_file($this->listUrl);
  54. migrate_instrument_stop("Retrieve $this->listUrl");
  55. if ($xml) {
  56. return $this->getIDsFromXML($xml);
  57. }
  58. else {
  59. Migration::displayMessage(t(
  60. 'Loading of !listUrl failed:',
  61. array('!listUrl' => $this->listUrl)
  62. ));
  63. foreach (libxml_get_errors() as $error) {
  64. Migration::displayMessage(MigrateItemsXML::parseLibXMLError($error));
  65. }
  66. return NULL;
  67. }
  68. }
  69. /**
  70. * Given an XML object, parse out the IDs for processing and return them as an
  71. * array. The default implementation assumes the IDs are simply the values of
  72. * the top-level elements - in most cases, you will need to override this to
  73. * reflect your particular XML structure.
  74. *
  75. * @param SimpleXMLElement $xml
  76. *
  77. * @return array
  78. */
  79. protected function getIDsFromXML(SimpleXMLElement $xml) {
  80. $ids = array();
  81. foreach ($xml as $element) {
  82. $ids[] = (string)$element;
  83. }
  84. return array_unique($ids);
  85. }
  86. /**
  87. * Return a count of all available IDs from the source listing. The default
  88. * implementation assumes the count of top-level elements reflects the number
  89. * of IDs available - in many cases, you will need to override this to reflect
  90. * your particular XML structure.
  91. */
  92. public function computeCount() {
  93. $xml = simplexml_load_file($this->listUrl);
  94. // Number of sourceid elements beneath the top-level element
  95. $count = count($xml);
  96. return $count;
  97. }
  98. }
  99. /**
  100. * Implementation of MigrateItem, for retrieving a parsed XML document given
  101. * an ID provided by a MigrateList class.
  102. */
  103. class MigrateItemXML extends MigrateItem {
  104. /**
  105. * A URL pointing to an XML document containing the data for one item to be
  106. * migrated.
  107. *
  108. * @var string
  109. */
  110. protected $itemUrl;
  111. public function __construct($item_url) {
  112. parent::__construct();
  113. $this->itemUrl = $item_url;
  114. // Suppress errors during parsing, so we can pick them up after
  115. libxml_use_internal_errors(TRUE);
  116. }
  117. /**
  118. * Implementors are expected to return an object representing a source item.
  119. *
  120. * @param mixed $id
  121. *
  122. * @return stdClass
  123. */
  124. public function getItem($id) {
  125. // Make sure we actually have an ID
  126. if (empty($id)) {
  127. return NULL;
  128. }
  129. $item_url = $this->constructItemUrl($id);
  130. // And make sure we actually got a URL to fetch
  131. if (empty($item_url)) {
  132. return NULL;
  133. }
  134. // Get the XML object at the specified URL;
  135. $xml = $this->loadXmlUrl($item_url);
  136. if ($xml) {
  137. $return = new stdclass;
  138. $return->xml = $xml;
  139. return $return;
  140. }
  141. else {
  142. $migration = Migration::currentMigration();
  143. $message = t('Loading of !objecturl failed:', array('!objecturl' => $item_url));
  144. foreach (libxml_get_errors() as $error) {
  145. $message .= "\n" . $error->message;
  146. }
  147. $migration->getMap()->saveMessage(
  148. array($id), $message, MigrationBase::MESSAGE_ERROR);
  149. libxml_clear_errors();
  150. return NULL;
  151. }
  152. }
  153. /**
  154. * The default implementation simply replaces the :id token in the URL with
  155. * the ID obtained from MigrateListXML. Override if the item URL is not
  156. * so easily expressed from the ID.
  157. *
  158. * @param mixed $id
  159. */
  160. protected function constructItemUrl($id) {
  161. return str_replace(':id', $id, $this->itemUrl);
  162. }
  163. /**
  164. * Default XML loader - just use Simplexml directly. This can be overridden for
  165. * preprocessing of XML (removal of unwanted elements, caching of XML if the
  166. * source service is slow, etc.)
  167. */
  168. protected function loadXmlUrl($item_url) {
  169. return simplexml_load_file($item_url);
  170. }
  171. }
  172. /**
  173. * Adds xpath info to field mappings for XML sources
  174. */
  175. class MigrateXMLFieldMapping extends MigrateFieldMapping {
  176. /**
  177. * The xpath used to retrieve the data for this field from the XML.
  178. *
  179. * @var string
  180. */
  181. protected $xpath;
  182. public function getXpath() {
  183. return $this->xpath;
  184. }
  185. /**
  186. * Add an xpath to this field mapping
  187. *
  188. * @param string $xpath
  189. */
  190. public function xpath($xpath) {
  191. $this->xpath = $xpath;
  192. return $this;
  193. }
  194. }
  195. /**
  196. * Migrations using XML sources should extend this class instead of Migration.
  197. */
  198. abstract class XMLMigration extends Migration {
  199. /**
  200. * Override the default addFieldMapping(), so we can create our special
  201. * field mapping class.
  202. * TODO: Find a cleaner way to just substitute a different mapping class
  203. *
  204. * @param string $destinationField
  205. * Name of the destination field.
  206. * @param string $sourceField
  207. * Name of the source field (optional).
  208. */
  209. public function addFieldMapping($destination_field, $source_field = NULL) {
  210. // Warn of duplicate mappings
  211. if (!is_null($destination_field) && isset($this->fieldMappings[$destination_field])) {
  212. self::displayMessage(
  213. t('!name addFieldMapping: !dest was previously mapped, overridden',
  214. array('!name' => $this->machineName, '!dest' => $destination_field)),
  215. 'warning');
  216. }
  217. $mapping = new MigrateXMLFieldMapping($destination_field, $source_field);
  218. if (is_null($destination_field)) {
  219. $this->fieldMappings[] = $mapping;
  220. }
  221. else {
  222. $this->fieldMappings[$destination_field] = $mapping;
  223. }
  224. return $mapping;
  225. }
  226. /**
  227. * A normal $data_row has all the input data as top-level fields - in this
  228. * case, however, the data is embedded within a SimpleXMLElement object in
  229. * $data_row->xml. Explode that out to the normal form, and pass on to the
  230. * normal implementation.
  231. */
  232. protected function applyMappings() {
  233. // We only know what data to pull from the xpaths in the mappings.
  234. foreach ($this->fieldMappings as $mapping) {
  235. $source = $mapping->getSourceField();
  236. if ($source) {
  237. $xpath = $mapping->getXpath();
  238. if ($xpath) {
  239. // Derived class may override applyXpath()
  240. $this->sourceValues->$source = $this->applyXpath($this->sourceValues, $xpath);
  241. }
  242. }
  243. }
  244. parent::applyMappings();
  245. }
  246. /**
  247. * Default implementation - straightforward xpath application
  248. *
  249. * @param $data_row
  250. * @param $xpath
  251. */
  252. public function applyXpath($data_row, $xpath) {
  253. $result = $data_row->xml->xpath($xpath);
  254. if ($result) {
  255. if (count($result) > 1) {
  256. $return = array();
  257. foreach ($result as $record) {
  258. $return[] = (string)$record;
  259. }
  260. return $return;
  261. }
  262. else {
  263. return (string)$result[0];
  264. }
  265. }
  266. else {
  267. return NULL;
  268. }
  269. }
  270. }
  271. /* =========================================================================== */
  272. /* MultiItems Method */
  273. /* =========================================================================== */
  274. /**
  275. * Implementation of MigrateItems, for providing a list of IDs and for
  276. * retrieving a parsed XML document given an ID from this list.
  277. */
  278. class MigrateItemsXML extends MigrateItems {
  279. /**
  280. * A URL pointing to an XML document containing the ids and data.
  281. *
  282. * @var string
  283. */
  284. protected $xmlUrl;
  285. /**
  286. * Stores the loaded XML document.
  287. *
  288. * @var SimpleXMLElement
  289. */
  290. protected $xml = FALSE;
  291. /**
  292. * xpath identifying the element used for each item
  293. */
  294. protected $itemXpath;
  295. public function getItemXpath() {
  296. return $this->itemXpath;
  297. }
  298. /**
  299. * xpath identifying the subelement under itemXpath that holds the id for
  300. * each item.
  301. */
  302. protected $itemIDXpath;
  303. public function getIDXpath() {
  304. return $this->itemIDXpath;
  305. }
  306. public function __construct($xml_url, $item_xpath='item', $itemID_xpath='id') {
  307. parent::__construct();
  308. $this->xmlUrl = $xml_url;
  309. $this->itemXpath = $item_xpath;
  310. $this->itemIDXpath = $itemID_xpath;
  311. // Suppress errors during parsing, so we can pick them up after
  312. libxml_use_internal_errors(TRUE);
  313. }
  314. /**
  315. * Our public face is the URL we're getting items from
  316. *
  317. * @return string
  318. */
  319. public function __toString() {
  320. return 'url = ' . $this->xmlUrl . ' | item xpath = ' . $this->itemXpath .
  321. ' | item ID xpath = ' . $this->itemIDXpath;
  322. }
  323. /**
  324. * Load and return the xml from the defined xmlUrl.
  325. * @return SimpleXMLElement
  326. */
  327. public function &xml() {
  328. if (!$this->xml && !empty($this->xmlUrl)) {
  329. $this->xml = simplexml_load_file($this->xmlUrl);
  330. if (!$this->xml) {
  331. Migration::displayMessage(t(
  332. 'Loading of !xmlUrl failed:',
  333. array('!xmlUrl' => $this->xmlUrl)
  334. ));
  335. foreach (libxml_get_errors() as $error) {
  336. Migration::displayMessage(self::parseLibXMLError($error));
  337. }
  338. }
  339. }
  340. return $this->xml;
  341. }
  342. /**
  343. * Parses a LibXMLError to a error message string.
  344. * @param LibXMLError $error
  345. * @return string
  346. */
  347. public static function parseLibXMLError(LibXMLError $error) {
  348. $error_code_name = 'Unknown Error';
  349. switch ($error->level) {
  350. case LIBXML_ERR_WARNING:
  351. $error_code_name = t('Warning');
  352. break;
  353. case LIBXML_ERR_ERROR:
  354. $error_code_name = t('Error');
  355. break;
  356. case LIBXML_ERR_FATAL:
  357. $error_code_name = t('Fatal Error');
  358. break;
  359. }
  360. return t(
  361. "!libxmlerrorcodename !libxmlerrorcode: !libxmlerrormessage\n" .
  362. "Line: !libxmlerrorline\n" .
  363. "Column: !libxmlerrorcolumn\n" .
  364. "File: !libxmlerrorfile",
  365. array(
  366. '!libxmlerrorcodename' => $error_code_name,
  367. '!libxmlerrorcode' => $error->code,
  368. '!libxmlerrormessage' => trim($error->message),
  369. '!libxmlerrorline' => $error->line,
  370. '!libxmlerrorcolumn' => $error->column,
  371. '!libxmlerrorfile' => (($error->file)) ? $error->file : NULL,
  372. )
  373. );
  374. }
  375. /**
  376. * Load the XML at the given URL, and return an array of the IDs found
  377. * within it.
  378. *
  379. * @return array
  380. */
  381. public function getIdList() {
  382. migrate_instrument_start("Retrieve $this->xmlUrl");
  383. $xml = $this->xml();
  384. migrate_instrument_stop("Retrieve $this->xmlUrl");
  385. if ($xml) {
  386. return $this->getIDsFromXML($xml);
  387. }
  388. return NULL;
  389. }
  390. /**
  391. * Given an XML object, parse out the IDs for processing and return them as
  392. * an array. The location of the IDs in the XML are based on the item xpath
  393. * and item ID xpath set in the constructor.
  394. * eg, xpath = itemXpath . '/' . itemIDXpath
  395. * IDs are cached. The list of IDs are returned from the cache except when
  396. * this is the first call (ie, cache is NULL) OR the refresh parameter is
  397. * TRUE.
  398. *
  399. * @param SimpleXMLElement $xml
  400. * @param boolean $refresh
  401. *
  402. * @return array
  403. */
  404. protected $cache_ids = NULL;
  405. protected function getIDsFromXML(SimpleXMLElement $xml, $refresh = FALSE) {
  406. if ($refresh !== TRUE && $this->cache_ids != NULL) {
  407. return $this->cache_ids;
  408. }
  409. $this->cache_ids = NULL;
  410. $result = $xml->xpath($this->itemXpath);
  411. $ids = array();
  412. if ($result) {
  413. foreach ($result as $element) {
  414. $id = $this->getItemID($element);
  415. if (!is_null($id)) {
  416. $ids[] = (string)$id;
  417. }
  418. }
  419. }
  420. $this->cache_ids = array_unique($ids);
  421. return $this->cache_ids;
  422. }
  423. /**
  424. * Return a count of all available IDs from the source listing.
  425. */
  426. public function computeCount() {
  427. $count = 0;
  428. $xml = $this->xml();
  429. if ($xml) {
  430. $ids = $this->getIDsFromXML($xml, TRUE);
  431. $count = count($ids);
  432. }
  433. return $count;
  434. }
  435. /**
  436. * Load the XML at the given URL, and return an array of the Items found
  437. * within it.
  438. *
  439. * @return array
  440. */
  441. public function getAllItems() {
  442. $xml = $this->xml();
  443. if ($xml) {
  444. return $this->getItemsFromXML($xml);
  445. }
  446. return NULL;
  447. }
  448. /**
  449. * Given an XML object, parse out the items for processing and return them as
  450. * an array. The location of the items in the XML are based on the item xpath
  451. * set in the constructor. Items are cached. The list of items are returned
  452. * from the cache except when this is the first call (ie, cache is NULL) OR
  453. * the refresh parameter is TRUE.
  454. *
  455. * Items are cached as an array of key=ID and value=stdclass object with
  456. * attribute xml containing the xml SimpleXMLElement object of the item.
  457. *
  458. * @param SimpleXMLElement $xml
  459. * @param boolean $refresh
  460. *
  461. * @return array
  462. */
  463. protected $cache_items = NULL;
  464. public function getItemsFromXML(SimpleXMLElement $xml, $refresh=FALSE) {
  465. if ($refresh !== FALSE && $this->cache_items != NULL) {
  466. return $this->cache_items;
  467. }
  468. $this->cache_items = NULL;
  469. $items = array();
  470. $result = $xml->xpath($this->itemXpath);
  471. if ($result) {
  472. foreach ($result as $item_xml) {
  473. $id = $this->getItemID($item_xml);
  474. $item = new stdclass;
  475. $item->xml = $item_xml;
  476. $items[$id] = $item;
  477. }
  478. $this->cache_items = $items;
  479. return $items;
  480. }
  481. else {
  482. return NULL;
  483. }
  484. }
  485. /**
  486. * Get the item ID from the itemXML based on itemIDXpath.
  487. *
  488. * @return string
  489. */
  490. protected function getItemID($itemXML) {
  491. return $this->getElementValue($itemXML, $this->itemIDXpath);
  492. }
  493. /**
  494. * Get an element from the itemXML based on an xpath.
  495. *
  496. * @return string
  497. */
  498. protected function getElementValue($itemXML, $xpath) {
  499. $value = NULL;
  500. if ($itemXML) {
  501. $result = $itemXML->xpath($xpath);
  502. if ($result)
  503. $value = (string)$result[0];
  504. }
  505. return $value;
  506. }
  507. /**
  508. * Implementors are expected to return an object representing a source item.
  509. * Items are cached as an array of key=ID and value=stdclass object with
  510. * attribute xml containing the xml SimpleXMLElement object of the item.
  511. *
  512. * @param mixed $id
  513. *
  514. * @return stdClass
  515. */
  516. public function getItem($id) {
  517. // Make sure we actually have an ID
  518. if (empty($id)) {
  519. return NULL;
  520. }
  521. $items = $this->getAllItems();
  522. $item = $items[$id];
  523. if ($item) {
  524. return $item;
  525. }
  526. else {
  527. $migration = Migration::currentMigration();
  528. $message = t('Loading of item XML for ID !id failed:', array('!id' => $id));
  529. foreach (libxml_get_errors() as $error) {
  530. $message .= "\n" . $error->message;
  531. }
  532. $migration->getMap()->saveMessage(
  533. array($id), $message, MigrationBase::MESSAGE_ERROR);
  534. libxml_clear_errors();
  535. return NULL;
  536. }
  537. }
  538. }
  539. /**
  540. * Makes an XMLReader object iterable, returning elements matching a restricted
  541. * xpath-like syntax.
  542. */
  543. class MigrateXMLReader implements Iterator {
  544. /**
  545. * The XMLReader we are encapsulating.
  546. *
  547. * @var XMLReader
  548. */
  549. public $reader;
  550. /**
  551. * URL of the source XML file.
  552. *
  553. * @var string
  554. */
  555. public $url;
  556. /**
  557. * Array of the element names from the query, 0-based from the first (root)
  558. * element. For example, '//file/article' would be stored as
  559. * array(0 => 'file', 1 => 'article').
  560. *
  561. * @var array
  562. */
  563. protected $elementsToMatch = array();
  564. /**
  565. * If the element query is filtering by an attribute name=value, the name of
  566. * the attribute in question.
  567. *
  568. * @var string
  569. */
  570. protected $attributeName = NULL;
  571. /**
  572. * If the element query is filtering by an attribute name=value, the value of
  573. * the attribute in question.
  574. *
  575. * @var string
  576. */
  577. protected $attributeValue = NULL;
  578. /**
  579. * Array representing the path to the current element as we traverse the XML.
  580. * For example, if in an XML string like '<file><article>...</article></file>'
  581. * we are positioned within the article element, currentPath will be
  582. * array(0 => 'file', 1 => 'article').
  583. *
  584. * @var array
  585. */
  586. protected $currentPath = array();
  587. /**
  588. * Query string used to retrieve the elements from the XML file.
  589. *
  590. * @var string
  591. */
  592. public $elementQuery;
  593. /**
  594. * Xpath query string used to retrieve the primary key value from each element.
  595. *
  596. * @var string
  597. */
  598. public $idQuery;
  599. /**
  600. * Current element object when iterating.
  601. *
  602. * @var SimpleXMLElement
  603. */
  604. protected $currentElement = NULL;
  605. /**
  606. * Value of the ID for the current element when iterating.
  607. *
  608. * @var string
  609. */
  610. protected $currentId = NULL;
  611. /**
  612. * When matching element names, whether to compare to the namespace-prefixed
  613. * name, or the local name.
  614. *
  615. * @var bool
  616. */
  617. protected $prefixedName = FALSE;
  618. /**
  619. * Prepares our extensions to the XMLReader object.
  620. *
  621. * @param $xml_url
  622. * URL of the XML file to be parsed.
  623. * @param $element_query
  624. * Query string in a restricted xpath format, for selecting elements to be
  625. * returned by the interator. Supported syntax:
  626. * - The full path to the element must be specified; i.e., /file/article
  627. * rather than //article.
  628. * - The elements may be filtered by attribute value by appending
  629. * [@attribute="value"].
  630. * @param $id_query
  631. * Query string to the unique identifier for an element, relative to the root
  632. * of that element. This supports the full xpath syntax.
  633. */
  634. public function __construct($xml_url, $element_query, $id_query) {
  635. $this->reader = new XMLReader;
  636. $this->url = $xml_url;
  637. $this->elementQuery = $element_query;
  638. $this->idQuery = $id_query;
  639. // Suppress errors during parsing, so we can pick them up after
  640. libxml_use_internal_errors(TRUE);
  641. // Parse the element query. First capture group is the element path, second
  642. // (if present) is the attribute.
  643. preg_match_all('|^/([^\[]+)(.*)$|', $element_query, $matches);
  644. $element_path = $matches[1][0];
  645. $this->elementsToMatch = explode('/', $element_path);
  646. $attribute_query = $matches[2][0];
  647. if ($attribute_query) {
  648. // Matches [@attribute="value"] (with either single- or double-quotes).
  649. preg_match_all('|^\[@([^=]+)=[\'"](.*)[\'"]\]$|', $attribute_query, $matches);
  650. $this->attributeName = $matches[1][0];
  651. $this->attributeValue = $matches[2][0];
  652. }
  653. // If the element path contains any colons, it must be specifying namespaces,
  654. // so we need to compare using the prefixed element name in next().
  655. if (strpos($element_path, ':')) {
  656. $this->prefixedName = TRUE;
  657. }
  658. }
  659. /**
  660. * Implementation of Iterator::rewind().
  661. *
  662. * @return void
  663. */
  664. public function rewind() {
  665. // (Re)open the provided URL.
  666. $this->reader->close();
  667. $status = $this->reader->open($this->url);
  668. if (!$status) {
  669. Migration::displayMessage(t('Could not open XML file !url',
  670. array('!url' => $this->url)));
  671. }
  672. // Reset our path tracker
  673. $this->currentPath = array();
  674. // Load the first matching element and its ID.
  675. $this->next();
  676. }
  677. /**
  678. * Implementation of Iterator::next().
  679. *
  680. * @return void
  681. */
  682. public function next() {
  683. migrate_instrument_start('MigrateXMLReader::next');
  684. $this->currentElement = $this->currentId = NULL;
  685. // Loop over each node in the XML file, looking for elements at a path
  686. // matching the input query string (represented in $this->elementsToMatch).
  687. while ($this->reader->read()) {
  688. if ($this->reader->nodeType == XMLREADER::ELEMENT) {
  689. if ($this->prefixedName) {
  690. $this->currentPath[$this->reader->depth] = $this->reader->name;
  691. }
  692. else {
  693. $this->currentPath[$this->reader->depth] = $this->reader->localName;
  694. }
  695. if ($this->currentPath == $this->elementsToMatch) {
  696. // We're positioned to the right element path - if filtering on an
  697. // attribute, check that as well before accepting this element.
  698. if (empty($this->attributeName) ||
  699. ($this->reader->getAttribute($this->attributeName) == $this->attributeValue)) {
  700. // We've found a matching element - get a SimpleXML object representing it.
  701. // We must associate the DOMNode with a DOMDocument to be able to import
  702. // it into SimpleXML.
  703. // Despite appearances, this is almost twice as fast as
  704. // simplexml_load_string($this->readOuterXML());
  705. $node = $this->reader->expand();
  706. if ($node) {
  707. $dom = new DOMDocument();
  708. $node = $dom->importNode($node, TRUE);
  709. $dom->appendChild($node);
  710. $this->currentElement = simplexml_import_dom($node);
  711. $idnode = $this->currentElement->xpath($this->idQuery);
  712. if (is_array($idnode)) {
  713. $this->currentId = (string)reset($idnode);
  714. }
  715. else {
  716. throw new Exception(t('Failure retrieving ID, xpath: !xpath',
  717. array('!xpath' => $this->idQuery)));
  718. }
  719. break;
  720. }
  721. else {
  722. foreach (libxml_get_errors() as $error) {
  723. $error_string = MigrateItemsXML::parseLibXMLError($error);
  724. if ($migration = Migration::currentMigration()) {
  725. $migration->saveMessage($error_string);
  726. }
  727. else {
  728. Migration::displayMessage($error_string);
  729. }
  730. }
  731. }
  732. }
  733. }
  734. }
  735. elseif ($this->reader->nodeType == XMLREADER::END_ELEMENT) {
  736. // Remove this element and any deeper ones from the current path
  737. foreach ($this->currentPath as $depth => $name) {
  738. if ($depth >= $this->reader->depth) {
  739. unset($this->currentPath[$depth]);
  740. }
  741. }
  742. }
  743. }
  744. migrate_instrument_stop('MigrateXMLReader::next');
  745. }
  746. /**
  747. * Implementation of Iterator::current().
  748. *
  749. * @return null|SimpleXMLElement
  750. */
  751. public function current() {
  752. return $this->currentElement;
  753. }
  754. /**
  755. * Implementation of Iterator::key().
  756. *
  757. * @return null|string
  758. */
  759. public function key() {
  760. return $this->currentId;
  761. }
  762. /**
  763. * Implementation of Iterator::valid().
  764. *
  765. * @return bool
  766. */
  767. public function valid() {
  768. return !empty($this->currentElement);
  769. }
  770. }
  771. /**
  772. * Implementation of MigrateSource, to handle imports from XML files.
  773. */
  774. class MigrateSourceXML extends MigrateSource {
  775. /**
  776. * The MigrateXMLReader object serving as a cursor over the XML source.
  777. *
  778. * @var MigrateXMLReader
  779. */
  780. protected $reader;
  781. /**
  782. * The source URLs to load XML from
  783. *
  784. * @var array
  785. */
  786. protected $sourceUrls = array();
  787. /**
  788. * Holds our current position within the $source_urls array
  789. *
  790. * @var int
  791. */
  792. protected $activeUrl = NULL;
  793. /**
  794. * Store the query string used to recognize elements being iterated
  795. * so we can create reader objects on the fly.
  796. *
  797. * @var string
  798. */
  799. protected $elementQuery = '';
  800. /**
  801. * Store the query string used to retrieve the primary key value from each
  802. * element so we can create reader objects on the fly.
  803. *
  804. * @var string
  805. */
  806. protected $idQuery = '';
  807. /**
  808. * Store the reader class used to query XML so we can create reader objects
  809. * on the fly.
  810. *
  811. * @var string
  812. */
  813. protected $readerClass = '';
  814. /**
  815. * List of available source fields.
  816. *
  817. * @var array
  818. */
  819. protected $fields = array();
  820. /**
  821. * Source constructor.
  822. *
  823. * @param string or array $url
  824. * URL(s) of the XML source data.
  825. * @param string $element_query
  826. * Query string used to recognize elements being iterated.
  827. * @param string $id_query
  828. * Xpath query string used to retrieve the primary key value from each element.
  829. * @param array $fields
  830. * Optional - keys are field names, values are descriptions. Use to override
  831. * the default descriptions, or to add additional source fields which the
  832. * migration will add via other means (e.g., prepareRow()).
  833. * @param boolean $options
  834. * Options applied to this source. In addition to the standard MigrateSource
  835. * options, we support:
  836. * - reader_class: The reader class to instantiate for traversing the XML -
  837. * defaults to MigrateXMLReader (any substitutions must be derived from
  838. * MigrateXMLReader).
  839. */
  840. public function __construct($urls, $element_query, $id_query, array $fields = array(),
  841. array $options = array()) {
  842. parent::__construct($options);
  843. if (empty($options['reader_class'])) {
  844. $reader_class = 'MigrateXMLReader';
  845. }
  846. else {
  847. $reader_class = $options['reader_class'];
  848. }
  849. if (!is_array($urls)) {
  850. $urls = array($urls);
  851. }
  852. $this->sourceUrls = $urls;
  853. $this->activeUrl = NULL;
  854. $this->elementQuery = $element_query;
  855. $this->idQuery = $id_query;
  856. $this->readerClass = $reader_class;
  857. $this->fields = $fields;
  858. }
  859. /**
  860. * Return a string representing the source query.
  861. *
  862. * @return string
  863. */
  864. public function __toString() {
  865. // Clump the urls into a string
  866. // This could cause a problem when using a lot of urls, may need to hash
  867. $urls = implode(', ', $this->sourceUrls);
  868. return 'urls = ' . $urls .
  869. ' | item xpath = ' . $this->elementQuery .
  870. ' | item ID xpath = ' . $this->idQuery;
  871. }
  872. /**
  873. * Returns a list of fields available to be mapped from the source query.
  874. *
  875. * @return array
  876. * Keys: machine names of the fields (to be passed to addFieldMapping)
  877. * Values: Human-friendly descriptions of the fields.
  878. */
  879. public function fields() {
  880. return $this->fields;
  881. }
  882. /**
  883. * Returns the active Url.
  884. *
  885. * @return string
  886. */
  887. public function activeUrl() {
  888. if ($this->activeUrl) {
  889. return $this->sourceUrls[$this->activeUrl];
  890. }
  891. }
  892. /**
  893. * Return a count of all available source records.
  894. */
  895. public function computeCount() {
  896. $count = 0;
  897. foreach ($this->sourceUrls as $url) {
  898. $reader = new $this->readerClass($url, $this->elementQuery, $this->idQuery);
  899. foreach ($reader as $element) {
  900. $count++;
  901. }
  902. }
  903. return $count;
  904. }
  905. /**
  906. * Implementation of MigrateSource::performRewind().
  907. */
  908. public function performRewind() {
  909. // Set the reader back to the beginning of the file (positioned to the
  910. // first matching element), then apply our logic to make sure we have the
  911. // first element fulfilling our logic (idlist/map/prepareRow()).
  912. $this->activeUrl = NULL;
  913. $this->reader = NULL;
  914. }
  915. /**
  916. * Implementation of MigrationSource::getNextRow().
  917. *
  918. * @return stdClass
  919. * data for the next row from the XML source files
  920. */
  921. public function getNextRow() {
  922. migrate_instrument_start('MigrateSourceXML::next');
  923. $source_key = $this->activeMap->getSourceKey();
  924. $key_name = key($source_key);
  925. $row = NULL;
  926. // The reader is now lazy loaded, so it may not be defined yet, need to test if set
  927. if (isset($this->reader)) {
  928. // attempt to load the next row
  929. $this->reader->next();
  930. }
  931. // Test the reader for a valid row
  932. if (isset($this->reader) && $this->reader->valid()) {
  933. $row = new stdClass;
  934. $row->$key_name = $this->reader->key();
  935. $row->xml = $this->reader->current();
  936. }
  937. else {
  938. // The current source is at the end, try to load the next source
  939. if ($this->getNextSource()) {
  940. $row = new stdClass;
  941. $row->$key_name = $this->reader->key();
  942. $row->xml = $this->reader->current();
  943. }
  944. }
  945. migrate_instrument_stop('MigrateSourceXML::next');
  946. return $row;
  947. }
  948. /**
  949. * Advances the reader to the next source from source_urls
  950. *
  951. * @return bool
  952. * TRUE if a valid source was loaded
  953. */
  954. public function getNextSource() {
  955. migrate_instrument_start('MigrateSourceXML::nextSource');
  956. // Return value
  957. $status = FALSE;
  958. while ($this->activeUrl === NULL || (count($this->sourceUrls)-1) > $this->activeUrl) {
  959. if (is_null($this->activeUrl)) {
  960. $this->activeUrl = 0;
  961. }
  962. else {
  963. // Increment the activeUrl so we try to load the next source
  964. $this->activeUrl = $this->activeUrl + 1;
  965. }
  966. $this->reader = new $this->readerClass($this->sourceUrls[$this->activeUrl], $this->elementQuery, $this->idQuery);
  967. $this->reader->rewind();
  968. if ($this->reader->valid()) {
  969. // We have a valid source
  970. $status = TRUE;
  971. break;
  972. }
  973. }
  974. migrate_instrument_stop('MigrateSourceXML::nextSource');
  975. return $status;
  976. }
  977. }