DiffEngine.php 35 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354
  1. <?php
  2. /**
  3. * @file
  4. * A PHP diff engine for phpwiki. (Taken from phpwiki-1.3.3)
  5. *
  6. * Copyright (C) 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org>
  7. * You may copy this code freely under the conditions of the GPL.
  8. */
  9. define('USE_ASSERTS', FALSE);
  10. /**
  11. * @todo document
  12. * @private
  13. * @subpackage DifferenceEngine
  14. */
  15. class _DiffOp {
  16. var $type;
  17. var $orig;
  18. var $closing;
  19. function reverse() {
  20. trigger_error('pure virtual', E_USER_ERROR);
  21. }
  22. function norig() {
  23. return $this->orig ? sizeof($this->orig) : 0;
  24. }
  25. function nclosing() {
  26. return $this->closing ? sizeof($this->closing) : 0;
  27. }
  28. }
  29. /**
  30. * @todo document
  31. * @private
  32. * @subpackage DifferenceEngine
  33. */
  34. class _DiffOp_Copy extends _DiffOp {
  35. var $type = 'copy';
  36. function __construct($orig, $closing = FALSE) {
  37. if (!is_array($closing)) {
  38. $closing = $orig;
  39. }
  40. $this->orig = $orig;
  41. $this->closing = $closing;
  42. }
  43. function reverse() {
  44. return new _DiffOp_Copy($this->closing, $this->orig);
  45. }
  46. }
  47. /**
  48. * @todo document
  49. * @private
  50. * @subpackage DifferenceEngine
  51. */
  52. class _DiffOp_Delete extends _DiffOp {
  53. var $type = 'delete';
  54. function __construct($lines) {
  55. $this->orig = $lines;
  56. $this->closing = FALSE;
  57. }
  58. function reverse() {
  59. return new _DiffOp_Add($this->orig);
  60. }
  61. }
  62. /**
  63. * @todo document
  64. * @private
  65. * @subpackage DifferenceEngine
  66. */
  67. class _DiffOp_Add extends _DiffOp {
  68. var $type = 'add';
  69. function __construct($lines) {
  70. $this->closing = $lines;
  71. $this->orig = FALSE;
  72. }
  73. function reverse() {
  74. return new _DiffOp_Delete($this->closing);
  75. }
  76. }
  77. /**
  78. * @todo document
  79. * @private
  80. * @subpackage DifferenceEngine
  81. */
  82. class _DiffOp_Change extends _DiffOp {
  83. var $type = 'change';
  84. function __construct($orig, $closing) {
  85. $this->orig = $orig;
  86. $this->closing = $closing;
  87. }
  88. function reverse() {
  89. return new _DiffOp_Change($this->closing, $this->orig);
  90. }
  91. }
  92. /**
  93. * Class used internally by Diff to actually compute the diffs.
  94. *
  95. * The algorithm used here is mostly lifted from the perl module
  96. * Algorithm::Diff (version 1.06) by Ned Konz, which is available at:
  97. * http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip
  98. *
  99. * More ideas are taken from:
  100. * http://www.ics.uci.edu/~eppstein/161/960229.html
  101. *
  102. * Some ideas are (and a bit of code) are from from analyze.c, from GNU
  103. * diffutils-2.7, which can be found at:
  104. * ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
  105. *
  106. * closingly, some ideas (subdivision by NCHUNKS > 2, and some optimizations)
  107. * are my own.
  108. *
  109. * Line length limits for robustness added by Tim Starling, 2005-08-31
  110. *
  111. * @author Geoffrey T. Dairiki, Tim Starling
  112. * @private
  113. * @subpackage DifferenceEngine
  114. */
  115. class _DiffEngine {
  116. function MAX_XREF_LENGTH() {
  117. return 10000;
  118. }
  119. function diff($from_lines, $to_lines) {
  120. $n_from = sizeof($from_lines);
  121. $n_to = sizeof($to_lines);
  122. $this->xchanged = $this->ychanged = array();
  123. $this->xv = $this->yv = array();
  124. $this->xind = $this->yind = array();
  125. unset($this->seq);
  126. unset($this->in_seq);
  127. unset($this->lcs);
  128. // Skip leading common lines.
  129. for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
  130. if ($from_lines[$skip] !== $to_lines[$skip]) {
  131. break;
  132. }
  133. $this->xchanged[$skip] = $this->ychanged[$skip] = FALSE;
  134. }
  135. // Skip trailing common lines.
  136. $xi = $n_from;
  137. $yi = $n_to;
  138. for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
  139. if ($from_lines[$xi] !== $to_lines[$yi]) {
  140. break;
  141. }
  142. $this->xchanged[$xi] = $this->ychanged[$yi] = FALSE;
  143. }
  144. // Ignore lines which do not exist in both files.
  145. for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
  146. $xhash[$this->_line_hash($from_lines[$xi])] = 1;
  147. }
  148. for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
  149. $line = $to_lines[$yi];
  150. if ($this->ychanged[$yi] = empty($xhash[$this->_line_hash($line)])) {
  151. continue;
  152. }
  153. $yhash[$this->_line_hash($line)] = 1;
  154. $this->yv[] = $line;
  155. $this->yind[] = $yi;
  156. }
  157. for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
  158. $line = $from_lines[$xi];
  159. if ($this->xchanged[$xi] = empty($yhash[$this->_line_hash($line)])) {
  160. continue;
  161. }
  162. $this->xv[] = $line;
  163. $this->xind[] = $xi;
  164. }
  165. // Find the LCS.
  166. $this->_compareseq(0, sizeof($this->xv), 0, sizeof($this->yv));
  167. // Merge edits when possible
  168. $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged);
  169. $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged);
  170. // Compute the edit operations.
  171. $edits = array();
  172. $xi = $yi = 0;
  173. while ($xi < $n_from || $yi < $n_to) {
  174. USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]);
  175. USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]);
  176. // Skip matching "snake".
  177. $copy = array();
  178. while ( $xi < $n_from && $yi < $n_to && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
  179. $copy[] = $from_lines[$xi++];
  180. ++$yi;
  181. }
  182. if ($copy) {
  183. $edits[] = new _DiffOp_Copy($copy);
  184. }
  185. // Find deletes & adds.
  186. $delete = array();
  187. while ($xi < $n_from && $this->xchanged[$xi]) {
  188. $_fl = $from_lines[$xi++];
  189. if (strlen($_fl)) {
  190. $delete[] = $_fl;
  191. }
  192. }
  193. $add = array();
  194. while ($yi < $n_to && $this->ychanged[$yi]) {
  195. $_tl = $to_lines[$yi++];
  196. if (strlen($_tl)) {
  197. $add[] = $_tl;
  198. }
  199. }
  200. if ($delete && $add) {
  201. $edits[] = new _DiffOp_Change($delete, $add);
  202. }
  203. elseif ($delete) {
  204. $edits[] = new _DiffOp_Delete($delete);
  205. }
  206. elseif ($add) {
  207. $edits[] = new _DiffOp_Add($add);
  208. }
  209. }
  210. return $edits;
  211. }
  212. /**
  213. * Returns the whole line if it's small enough, or the MD5 hash otherwise.
  214. */
  215. function _line_hash($line) {
  216. if (drupal_strlen($line) > $this->MAX_XREF_LENGTH()) {
  217. return md5($line);
  218. }
  219. else {
  220. return $line;
  221. }
  222. }
  223. /**
  224. * Divide the Largest Common Subsequence (LCS) of the sequences
  225. * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally
  226. * sized segments.
  227. *
  228. * Returns (LCS, PTS). LCS is the length of the LCS. PTS is an
  229. * array of NCHUNKS+1 (X, Y) indexes giving the diving points between
  230. * sub sequences. The first sub-sequence is contained in [X0, X1),
  231. * [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on. Note
  232. * that (X0, Y0) == (XOFF, YOFF) and
  233. * (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).
  234. *
  235. * This function assumes that the first lines of the specified portions
  236. * of the two files do not match, and likewise that the last lines do not
  237. * match. The caller must trim matching lines from the beginning and end
  238. * of the portions it is going to specify.
  239. */
  240. function _diag($xoff, $xlim, $yoff, $ylim, $nchunks) {
  241. $flip = FALSE;
  242. if ($xlim - $xoff > $ylim - $yoff) {
  243. // Things seems faster (I'm not sure I understand why)
  244. // when the shortest sequence in X.
  245. $flip = TRUE;
  246. list($xoff, $xlim, $yoff, $ylim) = array($yoff, $ylim, $xoff, $xlim);
  247. }
  248. if ($flip) {
  249. for ($i = $ylim - 1; $i >= $yoff; $i--) {
  250. $ymatches[$this->xv[$i]][] = $i;
  251. }
  252. }
  253. else {
  254. for ($i = $ylim - 1; $i >= $yoff; $i--) {
  255. $ymatches[$this->yv[$i]][] = $i;
  256. }
  257. }
  258. $this->lcs = 0;
  259. $this->seq[0]= $yoff - 1;
  260. $this->in_seq = array();
  261. $ymids[0] = array();
  262. $numer = $xlim - $xoff + $nchunks - 1;
  263. $x = $xoff;
  264. for ($chunk = 0; $chunk < $nchunks; $chunk++) {
  265. if ($chunk > 0) {
  266. for ($i = 0; $i <= $this->lcs; $i++) {
  267. $ymids[$i][$chunk-1] = $this->seq[$i];
  268. }
  269. }
  270. $x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks);
  271. for ( ; $x < $x1; $x++) {
  272. $line = $flip ? $this->yv[$x] : $this->xv[$x];
  273. if (empty($ymatches[$line])) {
  274. continue;
  275. }
  276. $matches = $ymatches[$line];
  277. reset($matches);
  278. while (list ($junk, $y) = each($matches)) {
  279. if (empty($this->in_seq[$y])) {
  280. $k = $this->_lcs_pos($y);
  281. USE_ASSERTS && assert($k > 0);
  282. $ymids[$k] = $ymids[$k-1];
  283. break;
  284. }
  285. }
  286. while (list ($junk, $y) = each($matches)) {
  287. if ($y > $this->seq[$k-1]) {
  288. USE_ASSERTS && assert($y < $this->seq[$k]);
  289. // Optimization: this is a common case:
  290. // next match is just replacing previous match.
  291. $this->in_seq[$this->seq[$k]] = FALSE;
  292. $this->seq[$k] = $y;
  293. $this->in_seq[$y] = 1;
  294. }
  295. elseif (empty($this->in_seq[$y])) {
  296. $k = $this->_lcs_pos($y);
  297. USE_ASSERTS && assert($k > 0);
  298. $ymids[$k] = $ymids[$k-1];
  299. }
  300. }
  301. }
  302. }
  303. $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
  304. $ymid = $ymids[$this->lcs];
  305. for ($n = 0; $n < $nchunks - 1; $n++) {
  306. $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks);
  307. $y1 = $ymid[$n] + 1;
  308. $seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
  309. }
  310. $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
  311. return array($this->lcs, $seps);
  312. }
  313. function _lcs_pos($ypos) {
  314. $end = $this->lcs;
  315. if ($end == 0 || $ypos > $this->seq[$end]) {
  316. $this->seq[++$this->lcs] = $ypos;
  317. $this->in_seq[$ypos] = 1;
  318. return $this->lcs;
  319. }
  320. $beg = 1;
  321. while ($beg < $end) {
  322. $mid = (int)(($beg + $end) / 2);
  323. if ($ypos > $this->seq[$mid]) {
  324. $beg = $mid + 1;
  325. }
  326. else {
  327. $end = $mid;
  328. }
  329. }
  330. USE_ASSERTS && assert($ypos != $this->seq[$end]);
  331. $this->in_seq[$this->seq[$end]] = FALSE;
  332. $this->seq[$end] = $ypos;
  333. $this->in_seq[$ypos] = 1;
  334. return $end;
  335. }
  336. /**
  337. * Find LCS of two sequences.
  338. *
  339. * The results are recorded in the vectors $this->{x,y}changed[], by
  340. * storing a 1 in the element for each line that is an insertion
  341. * or deletion (ie. is not in the LCS).
  342. *
  343. * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
  344. *
  345. * Note that XLIM, YLIM are exclusive bounds.
  346. * All line numbers are origin-0 and discarded lines are not counted.
  347. */
  348. function _compareseq($xoff, $xlim, $yoff, $ylim) {
  349. // Slide down the bottom initial diagonal.
  350. while ($xoff < $xlim && $yoff < $ylim && $this->xv[$xoff] == $this->yv[$yoff]) {
  351. ++$xoff;
  352. ++$yoff;
  353. }
  354. // Slide up the top initial diagonal.
  355. while ($xlim > $xoff && $ylim > $yoff && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) {
  356. --$xlim;
  357. --$ylim;
  358. }
  359. if ($xoff == $xlim || $yoff == $ylim) {
  360. $lcs = 0;
  361. }
  362. else {
  363. // This is ad hoc but seems to work well.
  364. //$nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
  365. //$nchunks = max(2, min(8, (int)$nchunks));
  366. $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
  367. list($lcs, $seps)
  368. = $this->_diag($xoff, $xlim, $yoff, $ylim, $nchunks);
  369. }
  370. if ($lcs == 0) {
  371. // X and Y sequences have no common subsequence:
  372. // mark all changed.
  373. while ($yoff < $ylim) {
  374. $this->ychanged[$this->yind[$yoff++]] = 1;
  375. }
  376. while ($xoff < $xlim) {
  377. $this->xchanged[$this->xind[$xoff++]] = 1;
  378. }
  379. }
  380. else {
  381. // Use the partitions to split this problem into subproblems.
  382. reset($seps);
  383. $pt1 = $seps[0];
  384. while ($pt2 = next($seps)) {
  385. $this->_compareseq ($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
  386. $pt1 = $pt2;
  387. }
  388. }
  389. }
  390. /**
  391. * Adjust inserts/deletes of identical lines to join changes
  392. * as much as possible.
  393. *
  394. * We do something when a run of changed lines include a
  395. * line at one end and has an excluded, identical line at the other.
  396. * We are free to choose which identical line is included.
  397. * `compareseq' usually chooses the one at the beginning,
  398. * but usually it is cleaner to consider the following identical line
  399. * to be the "change".
  400. *
  401. * This is extracted verbatim from analyze.c (GNU diffutils-2.7).
  402. */
  403. function _shift_boundaries($lines, &$changed, $other_changed) {
  404. $i = 0;
  405. $j = 0;
  406. USE_ASSERTS && assert('sizeof($lines) == sizeof($changed)');
  407. $len = sizeof($lines);
  408. $other_len = sizeof($other_changed);
  409. while (1) {
  410. /*
  411. * Scan forwards to find beginning of another run of changes.
  412. * Also keep track of the corresponding point in the other file.
  413. *
  414. * Throughout this code, $i and $j are adjusted together so that
  415. * the first $i elements of $changed and the first $j elements
  416. * of $other_changed both contain the same number of zeros
  417. * (unchanged lines).
  418. * Furthermore, $j is always kept so that $j == $other_len or
  419. * $other_changed[$j] == FALSE.
  420. */
  421. while ($j < $other_len && $other_changed[$j]) {
  422. $j++;
  423. }
  424. while ($i < $len && ! $changed[$i]) {
  425. USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
  426. $i++;
  427. $j++;
  428. while ($j < $other_len && $other_changed[$j]) {
  429. $j++;
  430. }
  431. }
  432. if ($i == $len) {
  433. break;
  434. }
  435. $start = $i;
  436. // Find the end of this run of changes.
  437. while (++$i < $len && $changed[$i]) {
  438. continue;
  439. }
  440. do {
  441. /*
  442. * Record the length of this run of changes, so that
  443. * we can later determine whether the run has grown.
  444. */
  445. $runlength = $i - $start;
  446. /*
  447. * Move the changed region back, so long as the
  448. * previous unchanged line matches the last changed one.
  449. * This merges with previous changed regions.
  450. */
  451. while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) {
  452. $changed[--$start] = 1;
  453. $changed[--$i] = FALSE;
  454. while ($start > 0 && $changed[$start - 1]) {
  455. $start--;
  456. }
  457. USE_ASSERTS && assert('$j > 0');
  458. while ($other_changed[--$j]) {
  459. continue;
  460. }
  461. USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
  462. }
  463. /*
  464. * Set CORRESPONDING to the end of the changed run, at the last
  465. * point where it corresponds to a changed run in the other file.
  466. * CORRESPONDING == LEN means no such point has been found.
  467. */
  468. $corresponding = $j < $other_len ? $i : $len;
  469. /*
  470. * Move the changed region forward, so long as the
  471. * first changed line matches the following unchanged one.
  472. * This merges with following changed regions.
  473. * Do this second, so that if there are no merges,
  474. * the changed region is moved forward as far as possible.
  475. */
  476. while ($i < $len && $lines[$start] == $lines[$i]) {
  477. $changed[$start++] = FALSE;
  478. $changed[$i++] = 1;
  479. while ($i < $len && $changed[$i]) {
  480. $i++;
  481. }
  482. USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
  483. $j++;
  484. if ($j < $other_len && $other_changed[$j]) {
  485. $corresponding = $i;
  486. while ($j < $other_len && $other_changed[$j]) {
  487. $j++;
  488. }
  489. }
  490. }
  491. } while ($runlength != $i - $start);
  492. /*
  493. * If possible, move the fully-merged run of changes
  494. * back to a corresponding run in the other file.
  495. */
  496. while ($corresponding < $i) {
  497. $changed[--$start] = 1;
  498. $changed[--$i] = 0;
  499. USE_ASSERTS && assert('$j > 0');
  500. while ($other_changed[--$j]) {
  501. continue;
  502. }
  503. USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
  504. }
  505. }
  506. }
  507. }
  508. /**
  509. * Class representing a 'diff' between two sequences of strings.
  510. * @todo document
  511. * @private
  512. * @subpackage DifferenceEngine
  513. */
  514. class Diff {
  515. var $edits;
  516. /**
  517. * Constructor.
  518. * Computes diff between sequences of strings.
  519. *
  520. * @param $from_lines array An array of strings.
  521. * (Typically these are lines from a file.)
  522. * @param $to_lines array An array of strings.
  523. */
  524. function __construct($from_lines, $to_lines) {
  525. $eng = new _DiffEngine;
  526. $this->edits = $eng->diff($from_lines, $to_lines);
  527. //$this->_check($from_lines, $to_lines);
  528. }
  529. /**
  530. * Compute reversed Diff.
  531. *
  532. * SYNOPSIS:
  533. *
  534. * $diff = new Diff($lines1, $lines2);
  535. * $rev = $diff->reverse();
  536. * @return object A Diff object representing the inverse of the
  537. * original diff.
  538. */
  539. function reverse() {
  540. $rev = $this;
  541. $rev->edits = array();
  542. foreach ($this->edits as $edit) {
  543. $rev->edits[] = $edit->reverse();
  544. }
  545. return $rev;
  546. }
  547. /**
  548. * Check for empty diff.
  549. *
  550. * @return bool True iff two sequences were identical.
  551. */
  552. function isEmpty() {
  553. foreach ($this->edits as $edit) {
  554. if ($edit->type != 'copy') {
  555. return FALSE;
  556. }
  557. }
  558. return TRUE;
  559. }
  560. /**
  561. * Compute the length of the Longest Common Subsequence (LCS).
  562. *
  563. * This is mostly for diagnostic purposed.
  564. *
  565. * @return int The length of the LCS.
  566. */
  567. function lcs() {
  568. $lcs = 0;
  569. foreach ($this->edits as $edit) {
  570. if ($edit->type == 'copy') {
  571. $lcs += sizeof($edit->orig);
  572. }
  573. }
  574. return $lcs;
  575. }
  576. /**
  577. * Get the original set of lines.
  578. *
  579. * This reconstructs the $from_lines parameter passed to the
  580. * constructor.
  581. *
  582. * @return array The original sequence of strings.
  583. */
  584. function orig() {
  585. $lines = array();
  586. foreach ($this->edits as $edit) {
  587. if ($edit->orig) {
  588. array_splice($lines, sizeof($lines), 0, $edit->orig);
  589. }
  590. }
  591. return $lines;
  592. }
  593. /**
  594. * Get the closing set of lines.
  595. *
  596. * This reconstructs the $to_lines parameter passed to the
  597. * constructor.
  598. *
  599. * @return array The sequence of strings.
  600. */
  601. function closing() {
  602. $lines = array();
  603. foreach ($this->edits as $edit) {
  604. if ($edit->closing) {
  605. array_splice($lines, sizeof($lines), 0, $edit->closing);
  606. }
  607. }
  608. return $lines;
  609. }
  610. /**
  611. * Check a Diff for validity.
  612. *
  613. * This is here only for debugging purposes.
  614. */
  615. function _check($from_lines, $to_lines) {
  616. if (serialize($from_lines) != serialize($this->orig())) {
  617. trigger_error("Reconstructed original doesn't match", E_USER_ERROR);
  618. }
  619. if (serialize($to_lines) != serialize($this->closing())) {
  620. trigger_error("Reconstructed closing doesn't match", E_USER_ERROR);
  621. }
  622. $rev = $this->reverse();
  623. if (serialize($to_lines) != serialize($rev->orig())) {
  624. trigger_error("Reversed original doesn't match", E_USER_ERROR);
  625. }
  626. if (serialize($from_lines) != serialize($rev->closing())) {
  627. trigger_error("Reversed closing doesn't match", E_USER_ERROR);
  628. }
  629. $prevtype = 'none';
  630. foreach ($this->edits as $edit) {
  631. if ( $prevtype == $edit->type ) {
  632. trigger_error("Edit sequence is non-optimal", E_USER_ERROR);
  633. }
  634. $prevtype = $edit->type;
  635. }
  636. $lcs = $this->lcs();
  637. trigger_error('Diff okay: LCS = ' . $lcs, E_USER_NOTICE);
  638. }
  639. }
  640. /**
  641. * FIXME: bad name.
  642. * @todo document
  643. * @private
  644. * @subpackage DifferenceEngine
  645. */
  646. class MappedDiff extends Diff {
  647. /**
  648. * Constructor.
  649. *
  650. * Computes diff between sequences of strings.
  651. *
  652. * This can be used to compute things like
  653. * case-insensitve diffs, or diffs which ignore
  654. * changes in white-space.
  655. *
  656. * @param $from_lines array An array of strings.
  657. * (Typically these are lines from a file.)
  658. *
  659. * @param $to_lines array An array of strings.
  660. *
  661. * @param $mapped_from_lines array This array should
  662. * have the same size number of elements as $from_lines.
  663. * The elements in $mapped_from_lines and
  664. * $mapped_to_lines are what is actually compared
  665. * when computing the diff.
  666. *
  667. * @param $mapped_to_lines array This array should
  668. * have the same number of elements as $to_lines.
  669. */
  670. function __construct($from_lines, $to_lines, $mapped_from_lines, $mapped_to_lines) {
  671. assert(sizeof($from_lines) == sizeof($mapped_from_lines));
  672. assert(sizeof($to_lines) == sizeof($mapped_to_lines));
  673. parent::__construct($mapped_from_lines, $mapped_to_lines);
  674. $xi = $yi = 0;
  675. for ($i = 0; $i < sizeof($this->edits); $i++) {
  676. $orig = &$this->edits[$i]->orig;
  677. if (is_array($orig)) {
  678. $orig = array_slice($from_lines, $xi, sizeof($orig));
  679. $xi += sizeof($orig);
  680. }
  681. $closing = &$this->edits[$i]->closing;
  682. if (is_array($closing)) {
  683. $closing = array_slice($to_lines, $yi, sizeof($closing));
  684. $yi += sizeof($closing);
  685. }
  686. }
  687. }
  688. }
  689. /**
  690. * A class to format Diffs
  691. *
  692. * This class formats the diff in classic diff format.
  693. * It is intended that this class be customized via inheritance,
  694. * to obtain fancier outputs.
  695. * @todo document
  696. * @private
  697. * @subpackage DifferenceEngine
  698. */
  699. class DiffFormatter {
  700. /**
  701. * Should a block header be shown?
  702. */
  703. var $show_header = TRUE;
  704. /**
  705. * Number of leading context "lines" to preserve.
  706. *
  707. * This should be left at zero for this class, but subclasses
  708. * may want to set this to other values.
  709. */
  710. var $leading_context_lines = 0;
  711. /**
  712. * Number of trailing context "lines" to preserve.
  713. *
  714. * This should be left at zero for this class, but subclasses
  715. * may want to set this to other values.
  716. */
  717. var $trailing_context_lines = 0;
  718. /**
  719. * Format a diff.
  720. *
  721. * @param $diff object A Diff object.
  722. * @return string The formatted output.
  723. */
  724. function format($diff) {
  725. $xi = $yi = 1;
  726. $block = FALSE;
  727. $context = array();
  728. $nlead = $this->leading_context_lines;
  729. $ntrail = $this->trailing_context_lines;
  730. $this->_start_diff();
  731. foreach ($diff->edits as $edit) {
  732. if ($edit->type == 'copy') {
  733. if (is_array($block)) {
  734. if (sizeof($edit->orig) <= $nlead + $ntrail) {
  735. $block[] = $edit;
  736. }
  737. else {
  738. if ($ntrail) {
  739. $context = array_slice($edit->orig, 0, $ntrail);
  740. $block[] = new _DiffOp_Copy($context);
  741. }
  742. $this->_block($x0, $ntrail + $xi - $x0, $y0, $ntrail + $yi - $y0, $block);
  743. $block = FALSE;
  744. }
  745. }
  746. $context = $edit->orig;
  747. }
  748. else {
  749. if (! is_array($block)) {
  750. $context = array_slice($context, sizeof($context) - $nlead);
  751. $x0 = $xi - sizeof($context);
  752. $y0 = $yi - sizeof($context);
  753. $block = array();
  754. if ($context) {
  755. $block[] = new _DiffOp_Copy($context);
  756. }
  757. }
  758. $block[] = $edit;
  759. }
  760. if ($edit->orig) {
  761. $xi += sizeof($edit->orig);
  762. }
  763. if ($edit->closing) {
  764. $yi += sizeof($edit->closing);
  765. }
  766. }
  767. if (is_array($block)) {
  768. $this->_block($x0, $xi - $x0, $y0, $yi - $y0, $block);
  769. }
  770. $end = $this->_end_diff();
  771. if (!empty($xi)) {
  772. $this->line_stats['counter']['x'] += $xi;
  773. }
  774. if (!empty($yi)) {
  775. $this->line_stats['counter']['y'] += $yi;
  776. }
  777. return $end;
  778. }
  779. function _block($xbeg, $xlen, $ybeg, $ylen, &$edits) {
  780. $this->_start_block($this->_block_header($xbeg, $xlen, $ybeg, $ylen));
  781. foreach ($edits as $edit) {
  782. if ($edit->type == 'copy') {
  783. $this->_context($edit->orig);
  784. }
  785. elseif ($edit->type == 'add') {
  786. $this->_added($edit->closing);
  787. }
  788. elseif ($edit->type == 'delete') {
  789. $this->_deleted($edit->orig);
  790. }
  791. elseif ($edit->type == 'change') {
  792. $this->_changed($edit->orig, $edit->closing);
  793. }
  794. else {
  795. trigger_error('Unknown edit type', E_USER_ERROR);
  796. }
  797. }
  798. $this->_end_block();
  799. }
  800. function _start_diff() {
  801. ob_start();
  802. }
  803. function _end_diff() {
  804. $val = ob_get_contents();
  805. ob_end_clean();
  806. return $val;
  807. }
  808. function _block_header($xbeg, $xlen, $ybeg, $ylen) {
  809. if ($xlen > 1) {
  810. $xbeg .= "," . ($xbeg + $xlen - 1);
  811. }
  812. if ($ylen > 1) {
  813. $ybeg .= "," . ($ybeg + $ylen - 1);
  814. }
  815. return $xbeg . ($xlen ? ($ylen ? 'c' : 'd') : 'a') . $ybeg;
  816. }
  817. function _start_block($header) {
  818. if ($this->show_header) {
  819. echo $header . "\n";
  820. }
  821. }
  822. function _end_block() {
  823. }
  824. function _lines($lines, $prefix = ' ') {
  825. foreach ($lines as $line) {
  826. echo "$prefix $line\n";
  827. }
  828. }
  829. function _context($lines) {
  830. $this->_lines($lines);
  831. }
  832. function _added($lines) {
  833. $this->_lines($lines, '>');
  834. }
  835. function _deleted($lines) {
  836. $this->_lines($lines, '<');
  837. }
  838. function _changed($orig, $closing) {
  839. $this->_deleted($orig);
  840. echo "---\n";
  841. $this->_added($closing);
  842. }
  843. }
  844. /**
  845. * Additions by Axel Boldt follow, partly taken from diff.php, phpwiki-1.3.3
  846. *
  847. */
  848. define('NBSP', '&#160;'); // iso-8859-x non-breaking space.
  849. /**
  850. * @todo document
  851. * @private
  852. * @subpackage DifferenceEngine
  853. */
  854. class _HWLDF_WordAccumulator {
  855. function __construct() {
  856. $this->_lines = array();
  857. $this->_line = '';
  858. $this->_group = '';
  859. $this->_tag = '';
  860. }
  861. function _flushGroup($new_tag) {
  862. if ($this->_group !== '') {
  863. if ($this->_tag == 'mark') {
  864. $this->_line .= '<span class="diffchange">' . check_plain($this->_group) . '</span>';
  865. }
  866. else {
  867. $this->_line .= check_plain($this->_group);
  868. }
  869. }
  870. $this->_group = '';
  871. $this->_tag = $new_tag;
  872. }
  873. function _flushLine($new_tag) {
  874. $this->_flushGroup($new_tag);
  875. if ($this->_line != '') {
  876. array_push($this->_lines, $this->_line);
  877. }
  878. else {
  879. // make empty lines visible by inserting an NBSP
  880. array_push($this->_lines, NBSP);
  881. }
  882. $this->_line = '';
  883. }
  884. function addWords($words, $tag = '') {
  885. if ($tag != $this->_tag) {
  886. $this->_flushGroup($tag);
  887. }
  888. foreach ($words as $word) {
  889. // new-line should only come as first char of word.
  890. if ($word == '') {
  891. continue;
  892. }
  893. if ($word[0] == "\n") {
  894. $this->_flushLine($tag);
  895. $word = drupal_substr($word, 1);
  896. }
  897. assert(!strstr($word, "\n"));
  898. $this->_group .= $word;
  899. }
  900. }
  901. function getLines() {
  902. $this->_flushLine('~done');
  903. return $this->_lines;
  904. }
  905. }
  906. /**
  907. * @todo document
  908. * @private
  909. * @subpackage DifferenceEngine
  910. */
  911. class WordLevelDiff extends MappedDiff {
  912. function MAX_LINE_LENGTH() {
  913. return 10000;
  914. }
  915. function __construct($orig_lines, $closing_lines) {
  916. list($orig_words, $orig_stripped) = $this->_split($orig_lines);
  917. list($closing_words, $closing_stripped) = $this->_split($closing_lines);
  918. parent::__construct($orig_words, $closing_words, $orig_stripped, $closing_stripped);
  919. }
  920. function _split($lines) {
  921. $words = array();
  922. $stripped = array();
  923. $first = TRUE;
  924. foreach ($lines as $line) {
  925. // If the line is too long, just pretend the entire line is one big word
  926. // This prevents resource exhaustion problems
  927. if ( $first ) {
  928. $first = FALSE;
  929. }
  930. else {
  931. $words[] = "\n";
  932. $stripped[] = "\n";
  933. }
  934. if ( drupal_strlen( $line ) > $this->MAX_LINE_LENGTH() ) {
  935. $words[] = $line;
  936. $stripped[] = $line;
  937. }
  938. else {
  939. if (preg_match_all('/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs', $line, $m)) {
  940. $words = array_merge($words, $m[0]);
  941. $stripped = array_merge($stripped, $m[1]);
  942. }
  943. }
  944. }
  945. return array($words, $stripped);
  946. }
  947. function orig() {
  948. $orig = new _HWLDF_WordAccumulator;
  949. foreach ($this->edits as $edit) {
  950. if ($edit->type == 'copy') {
  951. $orig->addWords($edit->orig);
  952. }
  953. elseif ($edit->orig) {
  954. $orig->addWords($edit->orig, 'mark');
  955. }
  956. }
  957. $lines = $orig->getLines();
  958. return $lines;
  959. }
  960. function closing() {
  961. $closing = new _HWLDF_WordAccumulator;
  962. foreach ($this->edits as $edit) {
  963. if ($edit->type == 'copy') {
  964. $closing->addWords($edit->closing);
  965. }
  966. elseif ($edit->closing) {
  967. $closing->addWords($edit->closing, 'mark');
  968. }
  969. }
  970. $lines = $closing->getLines();
  971. return $lines;
  972. }
  973. }
  974. /**
  975. * Diff formatter which uses Drupal theme functions.
  976. * @private
  977. * @subpackage DifferenceEngine
  978. */
  979. class DrupalDiffFormatter extends DiffFormatter {
  980. var $rows;
  981. var $line_stats = array(
  982. 'counter' => array('x' => 0, 'y' => 0),
  983. 'offset' => array('x' => 0, 'y' => 0),
  984. );
  985. function __construct() {
  986. $this->leading_context_lines = variable_get('diff_context_lines_leading', 2);
  987. $this->trailing_context_lines = variable_get('diff_context_lines_trailing', 2);
  988. }
  989. function _start_diff() {
  990. $this->rows = array();
  991. }
  992. function _end_diff() {
  993. return $this->rows;
  994. }
  995. function _block_header($xbeg, $xlen, $ybeg, $ylen) {
  996. return array(
  997. array(
  998. 'data' => theme('diff_header_line', array('lineno' => $xbeg + $this->line_stats['offset']['x'])),
  999. 'colspan' => 2,
  1000. ),
  1001. array(
  1002. 'data' => theme('diff_header_line', array('lineno' => $ybeg + $this->line_stats['offset']['y'])),
  1003. 'colspan' => 2,
  1004. )
  1005. );
  1006. }
  1007. function _start_block($header) {
  1008. if ($this->show_header) {
  1009. $this->rows[] = $header;
  1010. }
  1011. }
  1012. function _end_block() {
  1013. }
  1014. function _lines($lines, $prefix=' ', $color='white') {
  1015. }
  1016. /**
  1017. * Note: you should HTML-escape parameter before calling this.
  1018. */
  1019. function addedLine($line) {
  1020. return array(
  1021. array(
  1022. 'data' => '+',
  1023. 'class' => 'diff-marker',
  1024. ),
  1025. array(
  1026. 'data' => theme('diff_content_line', array('line' => $line)),
  1027. 'class' => 'diff-context diff-addedline',
  1028. )
  1029. );
  1030. }
  1031. /**
  1032. * Note: you should HTML-escape parameter before calling this.
  1033. */
  1034. function deletedLine($line) {
  1035. return array(
  1036. array(
  1037. 'data' => '-',
  1038. 'class' => 'diff-marker',
  1039. ),
  1040. array(
  1041. 'data' => theme('diff_content_line', array('line' => $line)),
  1042. 'class' => 'diff-context diff-deletedline',
  1043. )
  1044. );
  1045. }
  1046. /**
  1047. * Note: you should HTML-escape parameter before calling this.
  1048. */
  1049. function contextLine($line) {
  1050. return array(
  1051. '&nbsp;',
  1052. array(
  1053. 'data' => theme('diff_content_line', array('line' => $line)),
  1054. 'class' => 'diff-context',
  1055. )
  1056. );
  1057. }
  1058. function emptyLine() {
  1059. return array(
  1060. '&nbsp;',
  1061. theme('diff_empty_line', array('line' => '&nbsp;')),
  1062. );
  1063. }
  1064. function _added($lines) {
  1065. foreach ($lines as $line) {
  1066. $this->rows[] = array_merge($this->emptyLine(), $this->addedLine(check_plain($line)));
  1067. }
  1068. }
  1069. function _deleted($lines) {
  1070. foreach ($lines as $line) {
  1071. $this->rows[] = array_merge($this->deletedLine(check_plain($line)), $this->emptyLine());
  1072. }
  1073. }
  1074. function _context($lines) {
  1075. foreach ($lines as $line) {
  1076. $this->rows[] = array_merge($this->contextLine(check_plain($line)), $this->contextLine(check_plain($line)));
  1077. }
  1078. }
  1079. function _changed($orig, $closing) {
  1080. $diff = new WordLevelDiff($orig, $closing);
  1081. $del = $diff->orig();
  1082. $add = $diff->closing();
  1083. // Notice that WordLevelDiff returns HTML-escaped output.
  1084. // Hence, we will be calling addedLine/deletedLine without HTML-escaping.
  1085. while ($line = array_shift($del)) {
  1086. $aline = array_shift( $add );
  1087. $this->rows[] = array_merge($this->deletedLine($line), isset($aline) ? $this->addedLine($aline) : $this->emptyLine());
  1088. }
  1089. foreach ($add as $line) { // If any leftovers
  1090. $this->rows[] = array_merge($this->emptyLine(), $this->addedLine($line));
  1091. }
  1092. }
  1093. }
  1094. /**
  1095. * Drupal inline Diff formatter.
  1096. * @private
  1097. * @subpackage DifferenceEngine
  1098. */
  1099. class DrupalDiffInline {
  1100. var $a;
  1101. var $b;
  1102. /**
  1103. * Constructor.
  1104. */
  1105. function __construct($a, $b) {
  1106. $this->a = $a;
  1107. $this->b = $b;
  1108. }
  1109. /**
  1110. * Render differences inline using HTML markup.
  1111. */
  1112. function render() {
  1113. $a = preg_split('/(<[^>]+?>| )/', $this->a, -1, PREG_SPLIT_DELIM_CAPTURE);
  1114. $b = preg_split('/(<[^>]+?>| )/', $this->b, -1, PREG_SPLIT_DELIM_CAPTURE);
  1115. $diff = new Diff($a, $b);
  1116. $diff->edits = $this->process_edits($diff->edits);
  1117. // Assemble highlighted output
  1118. $output = '';
  1119. foreach ($diff->edits as $chunk) {
  1120. switch ($chunk->type) {
  1121. case 'copy':
  1122. $output .= implode('', $chunk->closing);
  1123. break;
  1124. case 'delete':
  1125. foreach ($chunk->orig as $i => $piece) {
  1126. if (strpos($piece, '<') === 0 && drupal_substr($piece, drupal_strlen($piece) - 1) === '>') {
  1127. $output .= $piece;
  1128. }
  1129. else {
  1130. $output .= theme('diff_inline_chunk', array('text' => $piece, 'type' => $chunk->type));
  1131. }
  1132. }
  1133. break;
  1134. default:
  1135. $chunk->closing = $this->process_chunk($chunk->closing);
  1136. foreach ($chunk->closing as $i => $piece) {
  1137. if ($piece === ' ' || (strpos($piece, '<') === 0 && drupal_substr($piece, drupal_strlen($piece) - 1) === '>' && drupal_strtolower(drupal_substr($piece, 1, 3)) != 'img')) {
  1138. $output .= $piece;
  1139. }
  1140. else {
  1141. $output .= theme('diff_inline_chunk', array('text' => $piece, 'type' => $chunk->type));
  1142. }
  1143. }
  1144. break;
  1145. }
  1146. }
  1147. return $output;
  1148. }
  1149. /**
  1150. * Merge chunk segments between tag delimiters.
  1151. */
  1152. function process_chunk($chunk) {
  1153. $processed = array();
  1154. $j = 0;
  1155. foreach ($chunk as $i => $piece) {
  1156. $next = isset($chunk[$i+1]) ? $chunk[$i+1] : NULL;
  1157. if (!isset($processed[$j])) {
  1158. $processed[$j] = '';
  1159. }
  1160. if (strpos($piece, '<') === 0 && drupal_substr($piece, drupal_strlen($piece) - 1) === '>') {
  1161. $processed[$j] = $piece;
  1162. $j++;
  1163. }
  1164. elseif (isset($next) && strpos($next, '<') === 0 && drupal_substr($next, drupal_strlen($next) - 1) === '>') {
  1165. $processed[$j] .= $piece;
  1166. $j++;
  1167. }
  1168. else {
  1169. $processed[$j] .= $piece;
  1170. }
  1171. }
  1172. return $processed;
  1173. }
  1174. /**
  1175. * Merge copy and equivalent edits into intelligible chunks.
  1176. */
  1177. function process_edits($edits) {
  1178. $processed = array();
  1179. $current = array_shift($edits);
  1180. // Make two passes -- first merge space delimiter copies back into their originals.
  1181. while ($chunk = array_shift($edits)) {
  1182. if ($chunk->type == 'copy' && $chunk->orig === array(' ')) {
  1183. $current->orig = array_merge((array) $current->orig, (array) $chunk->orig);
  1184. $current->closing = array_merge((array) $current->closing, (array) $chunk->closing);
  1185. }
  1186. else {
  1187. $processed[] = $current;
  1188. $current = $chunk;
  1189. }
  1190. }
  1191. $processed[] = $current;
  1192. // Initial setup
  1193. $edits = $processed;
  1194. $processed = array();
  1195. $current = array_shift($edits);
  1196. // Second, merge equivalent chunks into each other.
  1197. while ($chunk = array_shift($edits)) {
  1198. if ($current->type == $chunk->type) {
  1199. $current->orig = array_merge((array) $current->orig, (array) $chunk->orig);
  1200. $current->closing = array_merge((array) $current->closing, (array) $chunk->closing);
  1201. }
  1202. else {
  1203. $processed[] = $current;
  1204. $current = $chunk;
  1205. }
  1206. }
  1207. $processed[] = $current;
  1208. return $processed;
  1209. }
  1210. }