csv.inc 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208
  1. <?php
  2. /**
  3. * @file
  4. * Define a MigrateSource for importing from comma separated values files.
  5. */
  6. /**
  7. * Implementation of MigrateSource, to handle imports from CSV files.
  8. *
  9. * If the CSV file contains non-ASCII characters, make sure it includes a
  10. * UTF BOM (Byte Order Marker) so they are interpreted correctly.
  11. */
  12. class MigrateSourceCSV extends MigrateSource {
  13. /**
  14. * List of available source fields.
  15. *
  16. * @var array
  17. */
  18. protected $fields = array();
  19. /**
  20. * Parameters for the fgetcsv() call.
  21. *
  22. * @var array
  23. */
  24. protected $fgetcsv = array();
  25. /**
  26. * File handle for the CSV file being iterated.
  27. *
  28. * @var resource
  29. */
  30. protected $csvHandle = NULL;
  31. /**
  32. * The number of rows in the CSV file before the data starts.
  33. *
  34. * @var integer
  35. */
  36. protected $headerRows = 0;
  37. /**
  38. * Simple initialization.
  39. *
  40. * @param string $path
  41. * The path to the source file
  42. * @param array $csvcolumns
  43. * Keys are integers. values are array(field name, description).
  44. * @param array $options
  45. * Options applied to this source.
  46. * @param array $fields
  47. * Optional - keys are field names, values are descriptions. Use to override
  48. * the default descriptions, or to add additional source fields which the
  49. * migration will add via other means (e.g., prepareRow()).
  50. */
  51. public function __construct($path, array $csvcolumns = array(), array $options = array(), array $fields = array()) {
  52. parent::__construct($options);
  53. $this->file = $path;
  54. if (!empty($options['header_rows'])) {
  55. $this->headerRows = $options['header_rows'];
  56. }
  57. else {
  58. $this->headerRows = 0;
  59. }
  60. $this->options = $options;
  61. $this->fields = $fields;
  62. // fgetcsv specific options
  63. foreach (array('length' => NULL, 'delimiter' => ',', 'enclosure' => '"', 'escape' => '\\') as $key => $default) {
  64. $this->fgetcsv[$key] = isset($options[$key]) ? $options[$key] : $default;
  65. }
  66. // One can either pass in an explicit list of column names to use, or if we have
  67. // a header row we can use the names from that
  68. if ($this->headerRows && empty($csvcolumns)) {
  69. $this->csvcolumns = array();
  70. $this->csvHandle = fopen($this->file, 'r');
  71. // Skip all but the last header
  72. for ($i = 0; $i < $this->headerRows - 1; $i++) {
  73. $this->getNextLine();
  74. }
  75. $row = $this->getNextLine();
  76. foreach ($row as $header) {
  77. $header = trim($header);
  78. $this->csvcolumns[] = array($header, $header);
  79. }
  80. fclose($this->csvHandle);
  81. unset($this->csvHandle);
  82. }
  83. else {
  84. $this->csvcolumns = $csvcolumns;
  85. }
  86. }
  87. /**
  88. * Return a string representing the source query.
  89. *
  90. * @return string
  91. */
  92. public function __toString() {
  93. return $this->file;
  94. }
  95. /**
  96. * Returns a list of fields available to be mapped from the source query.
  97. *
  98. * @return array
  99. * Keys: machine names of the fields (to be passed to addFieldMapping)
  100. * Values: Human-friendly descriptions of the fields.
  101. */
  102. public function fields() {
  103. $fields = array();
  104. foreach ($this->csvcolumns as $values) {
  105. $fields[$values[0]] = $values[1];
  106. }
  107. // Any caller-specified fields with the same names as extracted fields will
  108. // override them; any others will be added
  109. if ($this->fields) {
  110. $fields = $this->fields + $fields;
  111. }
  112. return $fields;
  113. }
  114. /**
  115. * Return a count of all available source records.
  116. */
  117. public function computeCount() {
  118. // If the data may have embedded newlines, the file line count won't reflect
  119. // the number of CSV records (one record will span multiple lines). We need
  120. // to scan with fgetcsv to get the true count.
  121. if (!empty($this->options['embedded_newlines'])) {
  122. $result = fopen($this->file, 'r');
  123. // Skip all but the last header
  124. for ($i = 0; $i < $this->headerRows; $i++) {
  125. fgets($result);
  126. }
  127. $count = 0;
  128. while ($this->getNextLine()) {
  129. $count++;
  130. }
  131. fclose($result);
  132. }
  133. else {
  134. // TODO. If this takes too much time/memory, use exec('wc -l')
  135. $count = count(file($this->file));
  136. $count -= $this->headerRows;
  137. }
  138. return $count;
  139. }
  140. /**
  141. * Implementation of MigrateSource::performRewind().
  142. *
  143. * @return void
  144. */
  145. public function performRewind() {
  146. // Close any previously-opened handle
  147. if (!is_null($this->csvHandle)) {
  148. fclose($this->csvHandle);
  149. }
  150. // Load up the first row, skipping the header(s) if necessary
  151. $this->csvHandle = fopen($this->file, 'r');
  152. for ($i = 0; $i < $this->headerRows; $i++) {
  153. $this->getNextLine();
  154. }
  155. }
  156. /**
  157. * Implementation of MigrateSource::getNextRow().
  158. * Return the next line of the source CSV file as an object.
  159. *
  160. * @return null|object
  161. */
  162. public function getNextRow() {
  163. $row = $this->getNextLine();
  164. if ($row) {
  165. // Set meaningful keys for the columns mentioned in $this->csvcolumns().
  166. foreach ($this->csvcolumns as $int => $values) {
  167. list($key, $description) = $values;
  168. // Copy value to more descriptive string based key and then unset original.
  169. $row[$key] = isset($row[$int]) ? $row[$int] : NULL;
  170. unset($row[$int]);
  171. }
  172. return (object)$row;
  173. }
  174. else {
  175. fclose($this->csvHandle);
  176. $this->csvHandle = NULL;
  177. return NULL;
  178. }
  179. }
  180. protected function getNextLine() {
  181. // escape parameter was added in PHP 5.3.
  182. if (version_compare(phpversion(), '5.3', '<')) {
  183. $row = fgetcsv($this->csvHandle, $this->fgetcsv['length'],
  184. $this->fgetcsv['delimiter'], $this->fgetcsv['enclosure']);
  185. }
  186. else {
  187. $row = fgetcsv($this->csvHandle, $this->fgetcsv['length'],
  188. $this->fgetcsv['delimiter'], $this->fgetcsv['enclosure'],
  189. $this->fgetcsv['escape']);
  190. }
  191. return $row;
  192. }
  193. }