dsv.inc 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345
  1. <?php
  2. /**
  3. * @file
  4. * The Node export DSV format handler.
  5. *
  6. * Adds configurable DSV format to Node export.
  7. */
  8. /**
  9. * Settings callback.
  10. */
  11. function node_export_dsv_settings($form, $form_state) {
  12. $settings['dsv'] = array(
  13. '#type' => 'fieldset',
  14. '#title' => t('DSV format settings'),
  15. '#description' => t(
  16. 'Select how your DSV output will be formatted - this must be configured the
  17. same on both sites. By default this is configured to RFC4180 CSV format
  18. where the delimiter is a comma (,), the enclosure is a double-quote ("),
  19. and the separator is CRLF (\r\n). Not all configurations may be possible,
  20. use wisely. Enclosure will only be used to escape values that contain any
  21. of the configured strings. Additionally single-quotes will be used to
  22. escape values that are equivalent to reserved words (NULL, TRUE, FALSE).'
  23. ),
  24. '#collapsible' => TRUE,
  25. '#collapsed' => TRUE,
  26. );
  27. $settings['dsv']['node_export_dsv_delimiter'] = array(
  28. '#type' => 'textfield',
  29. '#title' => t('Value delimiter'),
  30. '#size' => 5,
  31. '#maxlength' => 255,
  32. '#default_value' => variable_get('node_export_dsv_delimiter', ','),
  33. '#required' => TRUE,
  34. );
  35. $settings['dsv']['node_export_dsv_enclosure'] = array(
  36. '#type' => 'textfield',
  37. '#title' => t('Escape enclosure'),
  38. '#size' => 5,
  39. '#maxlength' => 255,
  40. '#default_value' => variable_get('node_export_dsv_enclosure', '"'),
  41. '#required' => TRUE,
  42. );
  43. $settings['dsv']['node_export_dsv_separator'] = array(
  44. '#type' => 'textfield',
  45. '#title' => t('Record separator'),
  46. '#size' => 5,
  47. '#maxlength' => 255,
  48. '#default_value' => variable_get('node_export_dsv_separator', '\r\n'),
  49. '#required' => TRUE,
  50. );
  51. $settings['dsv']['node_export_dsv_escape_eol'] = array(
  52. '#type' => 'checkbox',
  53. '#title' => t('Always escape values containing line breaks'),
  54. '#default_value' => variable_get('node_export_dsv_escape_eol', 1),
  55. '#description' => t('This is to overcome problems where Windows injects CRLF line breaks.'),
  56. );
  57. return $settings;
  58. }
  59. function node_export_dsv_string($string) {
  60. $replace = array(
  61. '\n' => "\n",
  62. '\r' => "\r",
  63. '\t' => "\t",
  64. '\v' => "\v",
  65. '\e' => "\e",
  66. '\f' => "\f",
  67. );
  68. return str_replace(array_keys($replace), array_values($replace), $string);
  69. }
  70. /**
  71. * Export callback.
  72. */
  73. function node_export_dsv_export($nodes, $format) {
  74. $delimiter = node_export_dsv_string(variable_get('node_export_dsv_delimiter', ','));
  75. $enclosure = node_export_dsv_string(variable_get('node_export_dsv_enclosure', '"'));
  76. $separator = node_export_dsv_string(variable_get('node_export_dsv_separator', '\r\n'));
  77. $escape_eol = variable_get('node_export_dsv_escape_eol', 1);
  78. return node_export_dsv_encode($nodes, $delimiter, $enclosure, $separator, $escape_eol);
  79. }
  80. /**
  81. * Build DSV string.
  82. */
  83. function node_export_dsv_encode($nodes, $delimiter, $enclosure, $separator, $escape_eol) {
  84. $encoded_nodes = array();
  85. $dsv_lines = array();
  86. $node_keys = array();
  87. foreach (array_keys($nodes) as $node_key) {
  88. $new_node_key = 'node_' . $node_key;
  89. $node_keys[] = $new_node_key;
  90. node_export_dsv_encode_node($encoded_nodes, $new_node_key, $nodes[$node_key]);
  91. }
  92. $dsv_lines['node_export_dsv_header'] = array_keys($encoded_nodes);
  93. foreach (array_keys($encoded_nodes) as $header_value) {
  94. $encoded_nodes[$header_value] = array_merge(array_fill_keys($node_keys, NULL), $encoded_nodes[$header_value]);
  95. foreach (array_keys($encoded_nodes[$header_value]) as $encoded_node_key) {
  96. $dsv_lines[$encoded_node_key][$header_value] = $encoded_nodes[$header_value][$encoded_node_key];
  97. }
  98. }
  99. return node_export_dsv_array_to_dsv($dsv_lines, $delimiter, $enclosure, $separator, $escape_eol);
  100. }
  101. /**
  102. * Process a node and update $header and $encoded_nodes accordingly.
  103. */
  104. function node_export_dsv_encode_node(&$encoded_nodes, $node_key, $var, $parent = NULL) {
  105. foreach ($var as $k => &$v) {
  106. // Get the new header value.
  107. $header_value = node_export_dsv_encode_header_value($parent, $var, $k);
  108. if (is_object($v) || is_array($v)) {
  109. // Recurse through the structure.
  110. node_export_dsv_encode_node($encoded_nodes, $node_key, $v, $header_value);
  111. }
  112. else {
  113. // Create a safe text version of this value and store it against the header using a safe key.
  114. $encoded_nodes[$header_value][$node_key] = node_export_dsv_encode_sanitize_value($v);
  115. }
  116. }
  117. }
  118. /**
  119. * Encode a value.
  120. */
  121. function node_export_dsv_encode_sanitize_value($var) {
  122. if (is_numeric($var)) {
  123. return $var;
  124. }
  125. elseif (is_bool($var)) {
  126. return ($var ? 'TRUE' : 'FALSE');
  127. }
  128. elseif (is_null($var)) {
  129. return 'NULL';
  130. }
  131. elseif (is_string($var) && !empty($var)) {
  132. // Single-quote strings that could be confused for null or boolean.
  133. if (in_array(strtoupper($var), array('TRUE', 'FALSE', 'NULL'))) {
  134. $var = "'" . $var . "'";
  135. }
  136. return $var;
  137. }
  138. else {
  139. return '';
  140. }
  141. }
  142. /**
  143. * Decode a value.
  144. */
  145. function node_export_dsv_decode_sanitize_value($var) {
  146. // Allow numeric, bool, and null values to pass right back as is.
  147. if (is_numeric($var) || is_bool($var) || is_null($var)) {
  148. return $var;
  149. }
  150. // Allow the special case strings back as is.
  151. elseif (in_array(strtoupper($var), array("'TRUE'", "'FALSE'", "'NULL'"))) {
  152. return $var;
  153. }
  154. // Assume this is a string.
  155. return "'" . str_replace("'", "\'", $var) . "'";
  156. }
  157. /**
  158. * Create header value from $parents, $var, and $k.
  159. */
  160. function node_export_dsv_encode_header_value($parents, $var, $k) {
  161. if (is_null($parents)) {
  162. // Special case; on the first level do not prefix the key.
  163. $header_value = $k;
  164. }
  165. elseif (is_object($var)) {
  166. $header_value = $parents . "->" . $k;
  167. }
  168. elseif (is_array($var)) {
  169. $header_value = $parents . "['" . $k . "']";
  170. }
  171. return $header_value;
  172. }
  173. /**
  174. * Import callback.
  175. */
  176. function node_export_dsv_import($code_string) {
  177. $delimiter = node_export_dsv_string(variable_get('node_export_dsv_delimiter', ','));
  178. $enclosure = node_export_dsv_string(variable_get('node_export_dsv_enclosure', '"'));
  179. $separator = node_export_dsv_string(variable_get('node_export_dsv_separator', '\r\n'));
  180. return node_export_dsv_decode($code_string, $delimiter, $enclosure, $separator);
  181. }
  182. /**
  183. * Interpret a DSV string.
  184. */
  185. function node_export_dsv_decode($code_string, $delimiter, $enclosure, $separator) {
  186. // Get array data from DSV.
  187. $array = @node_export_dsv_dsv_to_array($code_string, $delimiter, $enclosure, $separator);
  188. // If the first two rows are of equal length, we can assume this is a DSV.
  189. // Also checks there are a decent number of fields.
  190. if (!empty($array[0]) && !empty($array[1]) && count($array[0]) > 10 && count($array[0]) == count($array[1])) {
  191. $nodes = array();
  192. // Assume row 0 is the header, and the rest of the rows are the nodes.
  193. $header = array_shift($array);
  194. // Build the nodes.
  195. foreach ($array as &$row) {
  196. $node = (object)array();
  197. foreach ($row as $key => $item) {
  198. $item = node_export_dsv_decode_sanitize_value($item);
  199. eval('$node->' . $header[$key] . ' = ' . $item . ';');
  200. }
  201. $nodes[] = $node;
  202. }
  203. return $nodes;
  204. }
  205. }
  206. /**
  207. * Encode DSV.
  208. */
  209. function node_export_dsv_array_to_dsv($array, $delimiter, $enclosure, $separator, $escape_eol) {
  210. $lines = array();
  211. foreach ($array as $line) {
  212. $out_item = array();
  213. foreach ($line as $item) {
  214. if (stripos($item, $enclosure) !== FALSE) {
  215. $item = str_replace($enclosure, $enclosure . $enclosure, $item);
  216. }
  217. if (
  218. (stripos($item, $delimiter) !== FALSE)
  219. || (stripos($item, $enclosure) !== FALSE)
  220. || (stripos($item, $separator) !== FALSE)
  221. || ($escape_eol && stripos($item, "\n") !== FALSE)
  222. ) {
  223. $item = $enclosure . $item . $enclosure;
  224. }
  225. $out_item[] = $item;
  226. }
  227. $lines[] = implode($delimiter, $out_item);
  228. }
  229. return implode($separator, $lines);
  230. }
  231. /**
  232. * Decode DSV.
  233. */
  234. function node_export_dsv_dsv_to_array($string, $delimiter, $enclosure, $separator) {
  235. $lines = array();
  236. $out_item = array();
  237. $count = strlen($string);
  238. $escape = FALSE;
  239. $double_escape = FALSE;
  240. $position = 0;
  241. $i = 0;
  242. $separators = str_split($separator);
  243. while ($i < $count) {
  244. $c = $string[$i];
  245. // Determine whether this is an EOL.
  246. $is_eol = TRUE;
  247. for ($j = 0; $j < count($separators); $j++) {
  248. if (!isset($string[$i + $j]) || $string[$i + $j] != $separators[$j]) {
  249. $is_eol = FALSE;
  250. break;
  251. }
  252. }
  253. if ($is_eol) {
  254. if ($escape) {
  255. $out_item[$position] .= $c;
  256. }
  257. else {
  258. $i += count($separators);
  259. $lines[] = $out_item;
  260. $out_item = array();
  261. $position = 0;
  262. continue;
  263. }
  264. }
  265. elseif ($c == $delimiter) {
  266. if ($escape) {
  267. $out_item[$position] .= $c;
  268. }
  269. else {
  270. if ($string[$i - 1] == $delimiter) {
  271. $out_item[$position] .= '';
  272. }
  273. $position++;
  274. $escape = FALSE;
  275. $double_escape = FALSE;
  276. }
  277. }
  278. elseif ($c == $enclosure) {
  279. if ($double_escape) {
  280. $out_item[$position] .= $enclosure;
  281. $double_escape = FALSE;
  282. }
  283. if ($escape) {
  284. $escape = FALSE;
  285. $double_escape = TRUE;
  286. }
  287. else {
  288. $escape = TRUE;
  289. $double_escape = FALSE;
  290. }
  291. }
  292. else {
  293. if ($double_escape) {
  294. $out_item[$position] .= $enclosure;
  295. $double_escape = FALSE;
  296. }
  297. $out_item[$position] .= $c;
  298. }
  299. $i++;
  300. }
  301. if (!empty($out_item)) {
  302. $lines[] = $out_item;
  303. }
  304. return $lines;
  305. }
  306. /**
  307. * Callback for actions.
  308. */
  309. function node_export_dsv_action_form($context, &$form_state) {
  310. return node_export_action_form($context, $form_state, 'dsv');
  311. }