source.inc 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368
  1. <?php
  2. /**
  3. * @file
  4. * Define base for migration sources.
  5. */
  6. /**
  7. * Abstract base class for source handling.
  8. *
  9. * Derived classes are expected to define __toString(), returning a string
  10. * describing the source and significant options. See
  11. * MigrateSourceSQL for an example.
  12. */
  13. abstract class MigrateSource implements Iterator {
  14. /**
  15. * The current row from the quey
  16. *
  17. * @var stdClass
  18. */
  19. protected $currentRow;
  20. /**
  21. * The primary key of the current row
  22. *
  23. * @var array
  24. */
  25. protected $currentKey;
  26. public function getCurrentKey() {
  27. return $this->currentKey;
  28. }
  29. /**
  30. * The Migration class currently invoking us, during rewind() and next().
  31. *
  32. * @var Migration
  33. */
  34. protected $activeMigration;
  35. /**
  36. * The MigrateMap class for the current migration.
  37. *
  38. * @var MigrateMap
  39. */
  40. protected $activeMap;
  41. /**
  42. * Number of rows intentionally ignored (prepareRow() returned FALSE)
  43. *
  44. * @var int
  45. */
  46. protected $numIgnored = 0;
  47. public function getIgnored() {
  48. return $this->numIgnored;
  49. }
  50. /**
  51. * Number of rows we've at least looked at.
  52. *
  53. * @var int
  54. */
  55. protected $numProcessed = 0;
  56. public function getProcessed() {
  57. return $this->numProcessed;
  58. }
  59. /**
  60. * Reset numIgnored back to 0.
  61. */
  62. public function resetStats() {
  63. $this->numIgnored = 0;
  64. }
  65. /**
  66. * Information on the highwater mark for the current migration, if any.
  67. *
  68. * @var array
  69. */
  70. protected $highwaterField;
  71. /**
  72. * List of source IDs to process.
  73. *
  74. * @var array
  75. */
  76. protected $idList = array();
  77. /**
  78. * Derived classes must implement fields(), returning a list of available
  79. * source fields.
  80. *
  81. * @return array
  82. * Keys: machine names of the fields (to be passed to addFieldMapping)
  83. * Values: Human-friendly descriptions of the fields.
  84. */
  85. abstract public function fields();
  86. /**
  87. * Whether this instance should cache the source count.
  88. *
  89. * @var boolean
  90. */
  91. protected $cacheCounts = FALSE;
  92. /**
  93. * Key to use for caching counts.
  94. *
  95. * @var string
  96. */
  97. protected $cacheKey;
  98. /**
  99. * Whether this instance should not attempt to count the source.
  100. *
  101. * @var boolean
  102. */
  103. protected $skipCount = FALSE;
  104. /**
  105. * By default, next() will directly read the map row and add it to the data
  106. * row. A source plugin implementation may do this itself (in particular, the
  107. * SQL source can incorporate the map table into the query) - if so, it should
  108. * set this TRUE so we don't duplicate the effort.
  109. *
  110. * @var bool
  111. */
  112. protected $mapRowAdded = FALSE;
  113. /**
  114. * Return a count of available source records, from the cache if appropriate.
  115. * Returns -1 if the source is not countable.
  116. *
  117. * @param boolean $refresh
  118. */
  119. public function count($refresh = FALSE) {
  120. if ($this->skipCount) {
  121. return -1;
  122. }
  123. if (!isset($this->cacheKey)) {
  124. $this->cacheKey = md5((string)$this);
  125. }
  126. // If a refresh is requested, or we're not caching counts, ask the derived
  127. // class to get the count from the source.
  128. if ($refresh || !$this->cacheCounts) {
  129. $count = $this->computeCount();
  130. cache_set($this->cacheKey, $count, 'cache');
  131. }
  132. else {
  133. // Caching is in play, first try to retrieve a cached count.
  134. $cache_object = cache_get($this->cacheKey, 'cache');
  135. if (is_object($cache_object)) {
  136. // Success
  137. $count = $cache_object->data;
  138. }
  139. else {
  140. // No cached count, ask the derived class to count 'em up, and cache
  141. // the result
  142. $count = $this->computeCount();
  143. cache_set($this->cacheKey, $count, 'cache');
  144. }
  145. }
  146. return $count;
  147. }
  148. /**
  149. * Derived classes must implement computeCount(), to retrieve a fresh count of
  150. * source records.
  151. */
  152. //abstract public function computeCount();
  153. /**
  154. * Class constructor.
  155. *
  156. * @param array $options
  157. * Optional array of options.
  158. */
  159. public function __construct($options = array()) {
  160. if (!empty($options['cache_counts'])) {
  161. $this->cacheCounts = TRUE;
  162. }
  163. if (!empty($options['skip_count'])) {
  164. $this->skipCount = TRUE;
  165. }
  166. if (!empty($options['cache_key'])) {
  167. $this->cacheKey = $options['cache_key'];
  168. }
  169. }
  170. /**
  171. * Default implementations of Iterator methods - many derivations will find
  172. * these adequate and will only need to implement rewind() and next()
  173. */
  174. /**
  175. * Implementation of Iterator::current() - called when entering a loop
  176. * iteration, returning the current row
  177. */
  178. public function current() {
  179. return $this->currentRow;
  180. }
  181. /**
  182. * Implementation of Iterator::key - called when entering a loop iteration, returning
  183. * the key of the current row. It must be a scalar - we will serialize
  184. * to fulfill the requirement, but using getCurrentKey() is preferable.
  185. */
  186. public function key() {
  187. return serialize($this->currentKey);
  188. }
  189. /**
  190. * Implementation of Iterator::valid() - called at the top of the loop, returning
  191. * TRUE to process the loop and FALSE to terminate it
  192. */
  193. public function valid() {
  194. return !is_null($this->currentRow);
  195. }
  196. /**
  197. * Implementation of Iterator::rewind() - subclasses of MigrateSource should
  198. * implement performRewind() to do any class-specific setup for iterating
  199. * source records.
  200. */
  201. public function rewind() {
  202. $this->activeMigration = Migration::currentMigration();
  203. $this->activeMap = $this->activeMigration->getMap();
  204. $this->numProcessed = 0;
  205. $this->numIgnored = 0;
  206. $this->highwaterField = $this->activeMigration->getHighwaterField();
  207. if ($this->activeMigration->getOption('idlist')) {
  208. $this->idList = explode(',', $this->activeMigration->getOption('idlist'));
  209. }
  210. else {
  211. $this->idList = array();
  212. }
  213. migrate_instrument_start(get_class($this) . ' performRewind');
  214. $this->performRewind();
  215. migrate_instrument_stop(get_class($this) . ' performRewind');
  216. $this->next();
  217. }
  218. /**
  219. * Implementation of Iterator::next() - subclasses of MigrateSource should
  220. * implement getNextRow() to retrieve the next valid source rocord to process.
  221. */
  222. public function next() {
  223. $this->currentKey = NULL;
  224. $this->currentRow = NULL;
  225. migrate_instrument_start(get_class($this) . ' getNextRow');
  226. while ($row = $this->getNextRow()) {
  227. migrate_instrument_stop(get_class($this) . ' getNextRow');
  228. // Populate the source key for this row
  229. foreach ($this->activeMap->getSourceKey() as $field_name => $field_schema) {
  230. $this->currentKey[$field_name] = $row->$field_name;
  231. }
  232. // Pick up the existing map row, if any, unless getNextRow() did it.
  233. if (!$this->mapRowAdded) {
  234. $map_row = $this->activeMap->getRowBySource($this->currentKey);
  235. // Add map info to the row, if present
  236. if ($map_row) {
  237. foreach ($map_row as $field => $value) {
  238. $field = 'migrate_map_' . $field;
  239. $row->$field = $value;
  240. }
  241. }
  242. }
  243. // First, determine if this row should be passed to prepareRow(), or skipped
  244. // entirely. The rules are:
  245. // 1. If there's an explicit idlist, that's all we care about (ignore
  246. // highwaters and map rows).
  247. $prepared = FALSE;
  248. if (!empty($this->idList)) {
  249. if (in_array(reset($this->currentKey), $this->idList)) {
  250. // In the list, fall through.
  251. }
  252. else {
  253. // Not in the list, skip it
  254. $this->currentRow = NULL;
  255. continue;
  256. }
  257. }
  258. // 2. If the row is not in the map (we have never tried to import it before),
  259. // we always want to try it.
  260. elseif (!isset($row->migrate_map_sourceid1)) {
  261. // Fall through
  262. }
  263. // 3. If the row is marked as needing update, pass it.
  264. elseif ($row->migrate_map_needs_update == MigrateMap::STATUS_NEEDS_UPDATE) {
  265. // Fall through
  266. }
  267. // 4. At this point, we have a row which has previously been imported and
  268. // not marked for update. If we're not using highwater marks, then we
  269. // will not take this row.
  270. elseif (empty($this->highwaterField)) {
  271. // No highwater, skip
  272. $this->currentRow = NULL;
  273. continue;
  274. }
  275. // 5. So, we are using highwater marks. Take the row if its highwater field
  276. // value is greater than the saved marked, otherwise skip it.
  277. else {
  278. // Call prepareRow() here, in case the highwaterField needs preparation
  279. if ($this->prepareRow($row) !== FALSE) {
  280. if ($row->{$this->highwaterField['name']} > $this->activeMigration->getHighwater()) {
  281. $this->currentRow = $row;
  282. break;
  283. }
  284. else {
  285. // Skip
  286. $this->currentRow = NULL;
  287. continue;
  288. }
  289. }
  290. else {
  291. $this->currentRow = NULL;
  292. }
  293. $prepared = TRUE;
  294. }
  295. // Allow the Migration to prepare this row. prepareRow() can return boolean
  296. // FALSE to ignore this row.
  297. if (!$prepared) {
  298. if ($this->prepareRow($row) !== FALSE) {
  299. // Finally, we've got a keeper.
  300. $this->currentRow = $row;
  301. break;
  302. }
  303. else {
  304. $this->currentRow = NULL;
  305. }
  306. }
  307. }
  308. migrate_instrument_stop(get_class($this) . ' getNextRow');
  309. if (!$this->currentRow) {
  310. $this->currentKey = NULL;
  311. }
  312. }
  313. /**
  314. * Give the calling migration a shot at manipulating, and possibly rejecting,
  315. * the source row.
  316. *
  317. * @return bool
  318. * FALSE if the row is to be skipped.
  319. */
  320. protected function prepareRow($row) {
  321. migrate_instrument_start(get_class($this->activeMigration) . ' prepareRow');
  322. $return = $this->activeMigration->prepareRow($row);
  323. migrate_instrument_stop(get_class($this->activeMigration) . ' prepareRow');
  324. // We're explicitly skipping this row - keep track in the map table
  325. if ($return === FALSE) {
  326. $this->activeMigration->getMap()->saveIDMapping($row, array(NULL),
  327. MigrateMap::STATUS_IGNORED);
  328. $this->numIgnored++;
  329. $this->currentRow = NULL;
  330. $this->currentKey = NULL;
  331. }
  332. else {
  333. $return = TRUE;
  334. }
  335. $this->numProcessed++;
  336. return $return;
  337. }
  338. }