DiffEngine.php 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356
  1. <?php
  2. /**
  3. * @file
  4. * A PHP diff engine for phpwiki. (Taken from phpwiki-1.3.3)
  5. *
  6. * Copyright (C) 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org>
  7. * You may copy this code freely under the conditions of the GPL.
  8. */
  9. define('USE_ASSERTS', FALSE);
  10. /**
  11. * @todo document
  12. * @private
  13. * @subpackage DifferenceEngine
  14. */
  15. class _DiffOp {
  16. var $type;
  17. var $orig;
  18. var $closing;
  19. function reverse() {
  20. trigger_error('pure virtual', E_USER_ERROR);
  21. }
  22. function norig() {
  23. return $this->orig ? sizeof($this->orig) : 0;
  24. }
  25. function nclosing() {
  26. return $this->closing ? sizeof($this->closing) : 0;
  27. }
  28. }
  29. /**
  30. * @todo document
  31. * @private
  32. * @subpackage DifferenceEngine
  33. */
  34. class _DiffOp_Copy extends _DiffOp {
  35. var $type = 'copy';
  36. function __construct($orig, $closing = FALSE) {
  37. if (!is_array($closing)) {
  38. $closing = $orig;
  39. }
  40. $this->orig = $orig;
  41. $this->closing = $closing;
  42. }
  43. function reverse() {
  44. return new _DiffOp_Copy($this->closing, $this->orig);
  45. }
  46. }
  47. /**
  48. * @todo document
  49. * @private
  50. * @subpackage DifferenceEngine
  51. */
  52. class _DiffOp_Delete extends _DiffOp {
  53. var $type = 'delete';
  54. function __construct($lines) {
  55. $this->orig = $lines;
  56. $this->closing = FALSE;
  57. }
  58. function reverse() {
  59. return new _DiffOp_Add($this->orig);
  60. }
  61. }
  62. /**
  63. * @todo document
  64. * @private
  65. * @subpackage DifferenceEngine
  66. */
  67. class _DiffOp_Add extends _DiffOp {
  68. var $type = 'add';
  69. function __construct($lines) {
  70. $this->closing = $lines;
  71. $this->orig = FALSE;
  72. }
  73. function reverse() {
  74. return new _DiffOp_Delete($this->closing);
  75. }
  76. }
  77. /**
  78. * @todo document
  79. * @private
  80. * @subpackage DifferenceEngine
  81. */
  82. class _DiffOp_Change extends _DiffOp {
  83. var $type = 'change';
  84. function __construct($orig, $closing) {
  85. $this->orig = $orig;
  86. $this->closing = $closing;
  87. }
  88. function reverse() {
  89. return new _DiffOp_Change($this->closing, $this->orig);
  90. }
  91. }
  92. /**
  93. * Class used internally by Diff to actually compute the diffs.
  94. *
  95. * The algorithm used here is mostly lifted from the perl module
  96. * Algorithm::Diff (version 1.06) by Ned Konz, which is available at:
  97. * http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip
  98. *
  99. * More ideas are taken from:
  100. * http://www.ics.uci.edu/~eppstein/161/960229.html
  101. *
  102. * Some ideas are (and a bit of code) are from from analyze.c, from GNU
  103. * diffutils-2.7, which can be found at:
  104. * ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
  105. *
  106. * closingly, some ideas (subdivision by NCHUNKS > 2, and some optimizations)
  107. * are my own.
  108. *
  109. * Line length limits for robustness added by Tim Starling, 2005-08-31
  110. *
  111. * @author Geoffrey T. Dairiki, Tim Starling
  112. * @private
  113. * @subpackage DifferenceEngine
  114. */
  115. class _DiffEngine {
  116. function MAX_XREF_LENGTH() {
  117. return 10000;
  118. }
  119. function diff($from_lines, $to_lines) {
  120. $n_from = sizeof($from_lines);
  121. $n_to = sizeof($to_lines);
  122. $this->xchanged = $this->ychanged = array();
  123. $this->xv = $this->yv = array();
  124. $this->xind = $this->yind = array();
  125. unset($this->seq);
  126. unset($this->in_seq);
  127. unset($this->lcs);
  128. // Skip leading common lines.
  129. for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
  130. if ($from_lines[$skip] !== $to_lines[$skip]) {
  131. break;
  132. }
  133. $this->xchanged[$skip] = $this->ychanged[$skip] = FALSE;
  134. }
  135. // Skip trailing common lines.
  136. $xi = $n_from;
  137. $yi = $n_to;
  138. for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
  139. if ($from_lines[$xi] !== $to_lines[$yi]) {
  140. break;
  141. }
  142. $this->xchanged[$xi] = $this->ychanged[$yi] = FALSE;
  143. }
  144. // Ignore lines which do not exist in both files.
  145. for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
  146. $xhash[$this->_line_hash($from_lines[$xi])] = 1;
  147. }
  148. for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
  149. $line = $to_lines[$yi];
  150. if ($this->ychanged[$yi] = empty($xhash[$this->_line_hash($line)])) {
  151. continue;
  152. }
  153. $yhash[$this->_line_hash($line)] = 1;
  154. $this->yv[] = $line;
  155. $this->yind[] = $yi;
  156. }
  157. for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
  158. $line = $from_lines[$xi];
  159. if ($this->xchanged[$xi] = empty($yhash[$this->_line_hash($line)])) {
  160. continue;
  161. }
  162. $this->xv[] = $line;
  163. $this->xind[] = $xi;
  164. }
  165. // Find the LCS.
  166. $this->_compareseq(0, sizeof($this->xv), 0, sizeof($this->yv));
  167. // Merge edits when possible
  168. $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged);
  169. $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged);
  170. // Compute the edit operations.
  171. $edits = array();
  172. $xi = $yi = 0;
  173. while ($xi < $n_from || $yi < $n_to) {
  174. USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]);
  175. USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]);
  176. // Skip matching "snake".
  177. $copy = array();
  178. while ( $xi < $n_from && $yi < $n_to && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
  179. $copy[] = $from_lines[$xi++];
  180. ++$yi;
  181. }
  182. if ($copy) {
  183. $edits[] = new _DiffOp_Copy($copy);
  184. }
  185. // Find deletes & adds.
  186. $delete = array();
  187. while ($xi < $n_from && $this->xchanged[$xi]) {
  188. $_fl = $from_lines[$xi++];
  189. if (strlen($_fl)) {
  190. $delete[] = $_fl;
  191. }
  192. }
  193. $add = array();
  194. while ($yi < $n_to && $this->ychanged[$yi]) {
  195. $_tl = $to_lines[$yi++];
  196. if (strlen($_tl)) {
  197. $add[] = $_tl;
  198. }
  199. }
  200. if ($delete && $add) {
  201. $edits[] = new _DiffOp_Change($delete, $add);
  202. }
  203. elseif ($delete) {
  204. $edits[] = new _DiffOp_Delete($delete);
  205. }
  206. elseif ($add) {
  207. $edits[] = new _DiffOp_Add($add);
  208. }
  209. }
  210. return $edits;
  211. }
  212. /**
  213. * Returns the whole line if it's small enough, or the MD5 hash otherwise.
  214. */
  215. function _line_hash($line) {
  216. if (drupal_strlen($line) > $this->MAX_XREF_LENGTH()) {
  217. return md5($line);
  218. }
  219. else {
  220. return $line;
  221. }
  222. }
  223. /**
  224. * Divide the Largest Common Subsequence (LCS) of the sequences
  225. * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally
  226. * sized segments.
  227. *
  228. * Returns (LCS, PTS). LCS is the length of the LCS. PTS is an
  229. * array of NCHUNKS+1 (X, Y) indexes giving the diving points between
  230. * sub sequences. The first sub-sequence is contained in [X0, X1),
  231. * [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on. Note
  232. * that (X0, Y0) == (XOFF, YOFF) and
  233. * (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).
  234. *
  235. * This function assumes that the first lines of the specified portions
  236. * of the two files do not match, and likewise that the last lines do not
  237. * match. The caller must trim matching lines from the beginning and end
  238. * of the portions it is going to specify.
  239. */
  240. function _diag($xoff, $xlim, $yoff, $ylim, $nchunks) {
  241. $flip = FALSE;
  242. if ($xlim - $xoff > $ylim - $yoff) {
  243. // Things seems faster (I'm not sure I understand why)
  244. // when the shortest sequence in X.
  245. $flip = TRUE;
  246. list($xoff, $xlim, $yoff, $ylim) = array($yoff, $ylim, $xoff, $xlim);
  247. }
  248. if ($flip) {
  249. for ($i = $ylim - 1; $i >= $yoff; $i--) {
  250. $ymatches[$this->xv[$i]][] = $i;
  251. }
  252. }
  253. else {
  254. for ($i = $ylim - 1; $i >= $yoff; $i--) {
  255. $ymatches[$this->yv[$i]][] = $i;
  256. }
  257. }
  258. $this->lcs = 0;
  259. $this->seq[0]= $yoff - 1;
  260. $this->in_seq = array();
  261. $ymids[0] = array();
  262. $numer = $xlim - $xoff + $nchunks - 1;
  263. $x = $xoff;
  264. for ($chunk = 0; $chunk < $nchunks; $chunk++) {
  265. if ($chunk > 0) {
  266. for ($i = 0; $i <= $this->lcs; $i++) {
  267. $ymids[$i][$chunk-1] = $this->seq[$i];
  268. }
  269. }
  270. $x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks);
  271. for ( ; $x < $x1; $x++) {
  272. $line = $flip ? $this->yv[$x] : $this->xv[$x];
  273. if (empty($ymatches[$line])) {
  274. continue;
  275. }
  276. $matches = $ymatches[$line];
  277. reset($matches);
  278. while ($y = current($matches)) {
  279. next($matches);
  280. if (empty($this->in_seq[$y])) {
  281. $k = $this->_lcs_pos($y);
  282. USE_ASSERTS && assert($k > 0);
  283. $ymids[$k] = $ymids[$k-1];
  284. break;
  285. }
  286. }
  287. while ($y = current($matches)) {
  288. next($matches);
  289. if ($y > $this->seq[$k-1]) {
  290. USE_ASSERTS && assert($y < $this->seq[$k]);
  291. // Optimization: this is a common case:
  292. // next match is just replacing previous match.
  293. $this->in_seq[$this->seq[$k]] = FALSE;
  294. $this->seq[$k] = $y;
  295. $this->in_seq[$y] = 1;
  296. }
  297. elseif (empty($this->in_seq[$y])) {
  298. $k = $this->_lcs_pos($y);
  299. USE_ASSERTS && assert($k > 0);
  300. $ymids[$k] = $ymids[$k-1];
  301. }
  302. }
  303. }
  304. }
  305. $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
  306. $ymid = $ymids[$this->lcs];
  307. for ($n = 0; $n < $nchunks - 1; $n++) {
  308. $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks);
  309. $y1 = $ymid[$n] + 1;
  310. $seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
  311. }
  312. $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
  313. return array($this->lcs, $seps);
  314. }
  315. function _lcs_pos($ypos) {
  316. $end = $this->lcs;
  317. if ($end == 0 || $ypos > $this->seq[$end]) {
  318. $this->seq[++$this->lcs] = $ypos;
  319. $this->in_seq[$ypos] = 1;
  320. return $this->lcs;
  321. }
  322. $beg = 1;
  323. while ($beg < $end) {
  324. $mid = (int)(($beg + $end) / 2);
  325. if ($ypos > $this->seq[$mid]) {
  326. $beg = $mid + 1;
  327. }
  328. else {
  329. $end = $mid;
  330. }
  331. }
  332. USE_ASSERTS && assert($ypos != $this->seq[$end]);
  333. $this->in_seq[$this->seq[$end]] = FALSE;
  334. $this->seq[$end] = $ypos;
  335. $this->in_seq[$ypos] = 1;
  336. return $end;
  337. }
  338. /**
  339. * Find LCS of two sequences.
  340. *
  341. * The results are recorded in the vectors $this->{x,y}changed[], by
  342. * storing a 1 in the element for each line that is an insertion
  343. * or deletion (ie. is not in the LCS).
  344. *
  345. * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
  346. *
  347. * Note that XLIM, YLIM are exclusive bounds.
  348. * All line numbers are origin-0 and discarded lines are not counted.
  349. */
  350. function _compareseq($xoff, $xlim, $yoff, $ylim) {
  351. // Slide down the bottom initial diagonal.
  352. while ($xoff < $xlim && $yoff < $ylim && $this->xv[$xoff] == $this->yv[$yoff]) {
  353. ++$xoff;
  354. ++$yoff;
  355. }
  356. // Slide up the top initial diagonal.
  357. while ($xlim > $xoff && $ylim > $yoff && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) {
  358. --$xlim;
  359. --$ylim;
  360. }
  361. if ($xoff == $xlim || $yoff == $ylim) {
  362. $lcs = 0;
  363. }
  364. else {
  365. // This is ad hoc but seems to work well.
  366. //$nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
  367. //$nchunks = max(2, min(8, (int)$nchunks));
  368. $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
  369. list($lcs, $seps)
  370. = $this->_diag($xoff, $xlim, $yoff, $ylim, $nchunks);
  371. }
  372. if ($lcs == 0) {
  373. // X and Y sequences have no common subsequence:
  374. // mark all changed.
  375. while ($yoff < $ylim) {
  376. $this->ychanged[$this->yind[$yoff++]] = 1;
  377. }
  378. while ($xoff < $xlim) {
  379. $this->xchanged[$this->xind[$xoff++]] = 1;
  380. }
  381. }
  382. else {
  383. // Use the partitions to split this problem into subproblems.
  384. reset($seps);
  385. $pt1 = $seps[0];
  386. while ($pt2 = next($seps)) {
  387. $this->_compareseq ($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
  388. $pt1 = $pt2;
  389. }
  390. }
  391. }
  392. /**
  393. * Adjust inserts/deletes of identical lines to join changes
  394. * as much as possible.
  395. *
  396. * We do something when a run of changed lines include a
  397. * line at one end and has an excluded, identical line at the other.
  398. * We are free to choose which identical line is included.
  399. * `compareseq' usually chooses the one at the beginning,
  400. * but usually it is cleaner to consider the following identical line
  401. * to be the "change".
  402. *
  403. * This is extracted verbatim from analyze.c (GNU diffutils-2.7).
  404. */
  405. function _shift_boundaries($lines, &$changed, $other_changed) {
  406. $i = 0;
  407. $j = 0;
  408. USE_ASSERTS && assert(sizeof($lines) == sizeof($changed));
  409. $len = sizeof($lines);
  410. $other_len = sizeof($other_changed);
  411. while (1) {
  412. /*
  413. * Scan forwards to find beginning of another run of changes.
  414. * Also keep track of the corresponding point in the other file.
  415. *
  416. * Throughout this code, $i and $j are adjusted together so that
  417. * the first $i elements of $changed and the first $j elements
  418. * of $other_changed both contain the same number of zeros
  419. * (unchanged lines).
  420. * Furthermore, $j is always kept so that $j == $other_len or
  421. * $other_changed[$j] == FALSE.
  422. */
  423. while ($j < $other_len && $other_changed[$j]) {
  424. $j++;
  425. }
  426. while ($i < $len && ! $changed[$i]) {
  427. USE_ASSERTS && assert($j < $other_len && ! $other_changed[$j]);
  428. $i++;
  429. $j++;
  430. while ($j < $other_len && $other_changed[$j]) {
  431. $j++;
  432. }
  433. }
  434. if ($i == $len) {
  435. break;
  436. }
  437. $start = $i;
  438. // Find the end of this run of changes.
  439. while (++$i < $len && $changed[$i]) {
  440. continue;
  441. }
  442. do {
  443. /*
  444. * Record the length of this run of changes, so that
  445. * we can later determine whether the run has grown.
  446. */
  447. $runlength = $i - $start;
  448. /*
  449. * Move the changed region back, so long as the
  450. * previous unchanged line matches the last changed one.
  451. * This merges with previous changed regions.
  452. */
  453. while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) {
  454. $changed[--$start] = 1;
  455. $changed[--$i] = FALSE;
  456. while ($start > 0 && $changed[$start - 1]) {
  457. $start--;
  458. }
  459. USE_ASSERTS && assert($j > 0);
  460. while ($other_changed[--$j]) {
  461. continue;
  462. }
  463. USE_ASSERTS && assert($j >= 0 && !$other_changed[$j]);
  464. }
  465. /*
  466. * Set CORRESPONDING to the end of the changed run, at the last
  467. * point where it corresponds to a changed run in the other file.
  468. * CORRESPONDING == LEN means no such point has been found.
  469. */
  470. $corresponding = $j < $other_len ? $i : $len;
  471. /*
  472. * Move the changed region forward, so long as the
  473. * first changed line matches the following unchanged one.
  474. * This merges with following changed regions.
  475. * Do this second, so that if there are no merges,
  476. * the changed region is moved forward as far as possible.
  477. */
  478. while ($i < $len && $lines[$start] == $lines[$i]) {
  479. $changed[$start++] = FALSE;
  480. $changed[$i++] = 1;
  481. while ($i < $len && $changed[$i]) {
  482. $i++;
  483. }
  484. USE_ASSERTS && assert($j < $other_len && ! $other_changed[$j]);
  485. $j++;
  486. if ($j < $other_len && $other_changed[$j]) {
  487. $corresponding = $i;
  488. while ($j < $other_len && $other_changed[$j]) {
  489. $j++;
  490. }
  491. }
  492. }
  493. } while ($runlength != $i - $start);
  494. /*
  495. * If possible, move the fully-merged run of changes
  496. * back to a corresponding run in the other file.
  497. */
  498. while ($corresponding < $i) {
  499. $changed[--$start] = 1;
  500. $changed[--$i] = 0;
  501. USE_ASSERTS && assert($j > 0);
  502. while ($other_changed[--$j]) {
  503. continue;
  504. }
  505. USE_ASSERTS && assert($j >= 0 && !$other_changed[$j]);
  506. }
  507. }
  508. }
  509. }
  510. /**
  511. * Class representing a 'diff' between two sequences of strings.
  512. * @todo document
  513. * @private
  514. * @subpackage DifferenceEngine
  515. */
  516. class Diff {
  517. var $edits;
  518. /**
  519. * Constructor.
  520. * Computes diff between sequences of strings.
  521. *
  522. * @param $from_lines array An array of strings.
  523. * (Typically these are lines from a file.)
  524. * @param $to_lines array An array of strings.
  525. */
  526. function __construct($from_lines, $to_lines) {
  527. $eng = new _DiffEngine;
  528. $this->edits = $eng->diff($from_lines, $to_lines);
  529. //$this->_check($from_lines, $to_lines);
  530. }
  531. /**
  532. * Compute reversed Diff.
  533. *
  534. * SYNOPSIS:
  535. *
  536. * $diff = new Diff($lines1, $lines2);
  537. * $rev = $diff->reverse();
  538. * @return object A Diff object representing the inverse of the
  539. * original diff.
  540. */
  541. function reverse() {
  542. $rev = $this;
  543. $rev->edits = array();
  544. foreach ($this->edits as $edit) {
  545. $rev->edits[] = $edit->reverse();
  546. }
  547. return $rev;
  548. }
  549. /**
  550. * Check for empty diff.
  551. *
  552. * @return bool True iff two sequences were identical.
  553. */
  554. function isEmpty() {
  555. foreach ($this->edits as $edit) {
  556. if ($edit->type != 'copy') {
  557. return FALSE;
  558. }
  559. }
  560. return TRUE;
  561. }
  562. /**
  563. * Compute the length of the Longest Common Subsequence (LCS).
  564. *
  565. * This is mostly for diagnostic purposed.
  566. *
  567. * @return int The length of the LCS.
  568. */
  569. function lcs() {
  570. $lcs = 0;
  571. foreach ($this->edits as $edit) {
  572. if ($edit->type == 'copy') {
  573. $lcs += sizeof($edit->orig);
  574. }
  575. }
  576. return $lcs;
  577. }
  578. /**
  579. * Get the original set of lines.
  580. *
  581. * This reconstructs the $from_lines parameter passed to the
  582. * constructor.
  583. *
  584. * @return array The original sequence of strings.
  585. */
  586. function orig() {
  587. $lines = array();
  588. foreach ($this->edits as $edit) {
  589. if ($edit->orig) {
  590. array_splice($lines, sizeof($lines), 0, $edit->orig);
  591. }
  592. }
  593. return $lines;
  594. }
  595. /**
  596. * Get the closing set of lines.
  597. *
  598. * This reconstructs the $to_lines parameter passed to the
  599. * constructor.
  600. *
  601. * @return array The sequence of strings.
  602. */
  603. function closing() {
  604. $lines = array();
  605. foreach ($this->edits as $edit) {
  606. if ($edit->closing) {
  607. array_splice($lines, sizeof($lines), 0, $edit->closing);
  608. }
  609. }
  610. return $lines;
  611. }
  612. /**
  613. * Check a Diff for validity.
  614. *
  615. * This is here only for debugging purposes.
  616. */
  617. function _check($from_lines, $to_lines) {
  618. if (serialize($from_lines) != serialize($this->orig())) {
  619. trigger_error("Reconstructed original doesn't match", E_USER_ERROR);
  620. }
  621. if (serialize($to_lines) != serialize($this->closing())) {
  622. trigger_error("Reconstructed closing doesn't match", E_USER_ERROR);
  623. }
  624. $rev = $this->reverse();
  625. if (serialize($to_lines) != serialize($rev->orig())) {
  626. trigger_error("Reversed original doesn't match", E_USER_ERROR);
  627. }
  628. if (serialize($from_lines) != serialize($rev->closing())) {
  629. trigger_error("Reversed closing doesn't match", E_USER_ERROR);
  630. }
  631. $prevtype = 'none';
  632. foreach ($this->edits as $edit) {
  633. if ( $prevtype == $edit->type ) {
  634. trigger_error("Edit sequence is non-optimal", E_USER_ERROR);
  635. }
  636. $prevtype = $edit->type;
  637. }
  638. $lcs = $this->lcs();
  639. trigger_error('Diff okay: LCS = ' . $lcs, E_USER_NOTICE);
  640. }
  641. }
  642. /**
  643. * FIXME: bad name.
  644. * @todo document
  645. * @private
  646. * @subpackage DifferenceEngine
  647. */
  648. class MappedDiff extends Diff {
  649. /**
  650. * Constructor.
  651. *
  652. * Computes diff between sequences of strings.
  653. *
  654. * This can be used to compute things like
  655. * case-insensitve diffs, or diffs which ignore
  656. * changes in white-space.
  657. *
  658. * @param $from_lines array An array of strings.
  659. * (Typically these are lines from a file.)
  660. *
  661. * @param $to_lines array An array of strings.
  662. *
  663. * @param $mapped_from_lines array This array should
  664. * have the same size number of elements as $from_lines.
  665. * The elements in $mapped_from_lines and
  666. * $mapped_to_lines are what is actually compared
  667. * when computing the diff.
  668. *
  669. * @param $mapped_to_lines array This array should
  670. * have the same number of elements as $to_lines.
  671. */
  672. function __construct($from_lines, $to_lines, $mapped_from_lines, $mapped_to_lines) {
  673. USE_ASSERTS && assert(sizeof($from_lines) == sizeof($mapped_from_lines));
  674. USE_ASSERTS && assert(sizeof($to_lines) == sizeof($mapped_to_lines));
  675. parent::__construct($mapped_from_lines, $mapped_to_lines);
  676. $xi = $yi = 0;
  677. for ($i = 0; $i < sizeof($this->edits); $i++) {
  678. $orig = &$this->edits[$i]->orig;
  679. if (is_array($orig)) {
  680. $orig = array_slice($from_lines, $xi, sizeof($orig));
  681. $xi += sizeof($orig);
  682. }
  683. $closing = &$this->edits[$i]->closing;
  684. if (is_array($closing)) {
  685. $closing = array_slice($to_lines, $yi, sizeof($closing));
  686. $yi += sizeof($closing);
  687. }
  688. }
  689. }
  690. }
  691. /**
  692. * A class to format Diffs
  693. *
  694. * This class formats the diff in classic diff format.
  695. * It is intended that this class be customized via inheritance,
  696. * to obtain fancier outputs.
  697. * @todo document
  698. * @private
  699. * @subpackage DifferenceEngine
  700. */
  701. class DiffFormatter {
  702. /**
  703. * Should a block header be shown?
  704. */
  705. var $show_header = TRUE;
  706. /**
  707. * Number of leading context "lines" to preserve.
  708. *
  709. * This should be left at zero for this class, but subclasses
  710. * may want to set this to other values.
  711. */
  712. var $leading_context_lines = 0;
  713. /**
  714. * Number of trailing context "lines" to preserve.
  715. *
  716. * This should be left at zero for this class, but subclasses
  717. * may want to set this to other values.
  718. */
  719. var $trailing_context_lines = 0;
  720. /**
  721. * Format a diff.
  722. *
  723. * @param $diff object A Diff object.
  724. * @return string The formatted output.
  725. */
  726. function format($diff) {
  727. $xi = $yi = 1;
  728. $block = FALSE;
  729. $context = array();
  730. $nlead = $this->leading_context_lines;
  731. $ntrail = $this->trailing_context_lines;
  732. $this->_start_diff();
  733. foreach ($diff->edits as $edit) {
  734. if ($edit->type == 'copy') {
  735. if (is_array($block)) {
  736. if (sizeof($edit->orig) <= $nlead + $ntrail) {
  737. $block[] = $edit;
  738. }
  739. else {
  740. if ($ntrail) {
  741. $context = array_slice($edit->orig, 0, $ntrail);
  742. $block[] = new _DiffOp_Copy($context);
  743. }
  744. $this->_block($x0, $ntrail + $xi - $x0, $y0, $ntrail + $yi - $y0, $block);
  745. $block = FALSE;
  746. }
  747. }
  748. $context = $edit->orig;
  749. }
  750. else {
  751. if (! is_array($block)) {
  752. $context = array_slice($context, sizeof($context) - $nlead);
  753. $x0 = $xi - sizeof($context);
  754. $y0 = $yi - sizeof($context);
  755. $block = array();
  756. if ($context) {
  757. $block[] = new _DiffOp_Copy($context);
  758. }
  759. }
  760. $block[] = $edit;
  761. }
  762. if ($edit->orig) {
  763. $xi += sizeof($edit->orig);
  764. }
  765. if ($edit->closing) {
  766. $yi += sizeof($edit->closing);
  767. }
  768. }
  769. if (is_array($block)) {
  770. $this->_block($x0, $xi - $x0, $y0, $yi - $y0, $block);
  771. }
  772. $end = $this->_end_diff();
  773. if (!empty($xi)) {
  774. $this->line_stats['counter']['x'] += $xi;
  775. }
  776. if (!empty($yi)) {
  777. $this->line_stats['counter']['y'] += $yi;
  778. }
  779. return $end;
  780. }
  781. function _block($xbeg, $xlen, $ybeg, $ylen, &$edits) {
  782. $this->_start_block($this->_block_header($xbeg, $xlen, $ybeg, $ylen));
  783. foreach ($edits as $edit) {
  784. if ($edit->type == 'copy') {
  785. $this->_context($edit->orig);
  786. }
  787. elseif ($edit->type == 'add') {
  788. $this->_added($edit->closing);
  789. }
  790. elseif ($edit->type == 'delete') {
  791. $this->_deleted($edit->orig);
  792. }
  793. elseif ($edit->type == 'change') {
  794. $this->_changed($edit->orig, $edit->closing);
  795. }
  796. else {
  797. trigger_error('Unknown edit type', E_USER_ERROR);
  798. }
  799. }
  800. $this->_end_block();
  801. }
  802. function _start_diff() {
  803. ob_start();
  804. }
  805. function _end_diff() {
  806. $val = ob_get_contents();
  807. ob_end_clean();
  808. return $val;
  809. }
  810. function _block_header($xbeg, $xlen, $ybeg, $ylen) {
  811. if ($xlen > 1) {
  812. $xbeg .= "," . ($xbeg + $xlen - 1);
  813. }
  814. if ($ylen > 1) {
  815. $ybeg .= "," . ($ybeg + $ylen - 1);
  816. }
  817. return $xbeg . ($xlen ? ($ylen ? 'c' : 'd') : 'a') . $ybeg;
  818. }
  819. function _start_block($header) {
  820. if ($this->show_header) {
  821. echo $header . "\n";
  822. }
  823. }
  824. function _end_block() {
  825. }
  826. function _lines($lines, $prefix = ' ') {
  827. foreach ($lines as $line) {
  828. echo "$prefix $line\n";
  829. }
  830. }
  831. function _context($lines) {
  832. $this->_lines($lines);
  833. }
  834. function _added($lines) {
  835. $this->_lines($lines, '>');
  836. }
  837. function _deleted($lines) {
  838. $this->_lines($lines, '<');
  839. }
  840. function _changed($orig, $closing) {
  841. $this->_deleted($orig);
  842. echo "---\n";
  843. $this->_added($closing);
  844. }
  845. }
  846. /**
  847. * Additions by Axel Boldt follow, partly taken from diff.php, phpwiki-1.3.3
  848. *
  849. */
  850. define('NBSP', '&#160;'); // iso-8859-x non-breaking space.
  851. /**
  852. * @todo document
  853. * @private
  854. * @subpackage DifferenceEngine
  855. */
  856. class _HWLDF_WordAccumulator {
  857. function __construct() {
  858. $this->_lines = array();
  859. $this->_line = '';
  860. $this->_group = '';
  861. $this->_tag = '';
  862. }
  863. function _flushGroup($new_tag) {
  864. if ($this->_group !== '') {
  865. if ($this->_tag == 'mark') {
  866. $this->_line .= '<span class="diffchange">' . check_plain($this->_group) . '</span>';
  867. }
  868. else {
  869. $this->_line .= check_plain($this->_group);
  870. }
  871. }
  872. $this->_group = '';
  873. $this->_tag = $new_tag;
  874. }
  875. function _flushLine($new_tag) {
  876. $this->_flushGroup($new_tag);
  877. if ($this->_line != '') {
  878. array_push($this->_lines, $this->_line);
  879. }
  880. else {
  881. // make empty lines visible by inserting an NBSP
  882. array_push($this->_lines, NBSP);
  883. }
  884. $this->_line = '';
  885. }
  886. function addWords($words, $tag = '') {
  887. if ($tag != $this->_tag) {
  888. $this->_flushGroup($tag);
  889. }
  890. foreach ($words as $word) {
  891. // new-line should only come as first char of word.
  892. if ($word == '') {
  893. continue;
  894. }
  895. if ($word[0] == "\n") {
  896. $this->_flushLine($tag);
  897. $word = drupal_substr($word, 1);
  898. }
  899. USE_ASSERTS && assert(!strstr($word, "\n"));
  900. $this->_group .= $word;
  901. }
  902. }
  903. function getLines() {
  904. $this->_flushLine('~done');
  905. return $this->_lines;
  906. }
  907. }
  908. /**
  909. * @todo document
  910. * @private
  911. * @subpackage DifferenceEngine
  912. */
  913. class WordLevelDiff extends MappedDiff {
  914. function MAX_LINE_LENGTH() {
  915. return 10000;
  916. }
  917. function __construct($orig_lines, $closing_lines) {
  918. list($orig_words, $orig_stripped) = $this->_split($orig_lines);
  919. list($closing_words, $closing_stripped) = $this->_split($closing_lines);
  920. parent::__construct($orig_words, $closing_words, $orig_stripped, $closing_stripped);
  921. }
  922. function _split($lines) {
  923. $words = array();
  924. $stripped = array();
  925. $first = TRUE;
  926. foreach ($lines as $line) {
  927. // If the line is too long, just pretend the entire line is one big word
  928. // This prevents resource exhaustion problems
  929. if ( $first ) {
  930. $first = FALSE;
  931. }
  932. else {
  933. $words[] = "\n";
  934. $stripped[] = "\n";
  935. }
  936. if ( drupal_strlen( $line ) > $this->MAX_LINE_LENGTH() ) {
  937. $words[] = $line;
  938. $stripped[] = $line;
  939. }
  940. else {
  941. if (preg_match_all('/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs', $line, $m)) {
  942. $words = array_merge($words, $m[0]);
  943. $stripped = array_merge($stripped, $m[1]);
  944. }
  945. }
  946. }
  947. return array($words, $stripped);
  948. }
  949. function orig() {
  950. $orig = new _HWLDF_WordAccumulator;
  951. foreach ($this->edits as $edit) {
  952. if ($edit->type == 'copy') {
  953. $orig->addWords($edit->orig);
  954. }
  955. elseif ($edit->orig) {
  956. $orig->addWords($edit->orig, 'mark');
  957. }
  958. }
  959. $lines = $orig->getLines();
  960. return $lines;
  961. }
  962. function closing() {
  963. $closing = new _HWLDF_WordAccumulator;
  964. foreach ($this->edits as $edit) {
  965. if ($edit->type == 'copy') {
  966. $closing->addWords($edit->closing);
  967. }
  968. elseif ($edit->closing) {
  969. $closing->addWords($edit->closing, 'mark');
  970. }
  971. }
  972. $lines = $closing->getLines();
  973. return $lines;
  974. }
  975. }
  976. /**
  977. * Diff formatter which uses Drupal theme functions.
  978. * @private
  979. * @subpackage DifferenceEngine
  980. */
  981. class DrupalDiffFormatter extends DiffFormatter {
  982. var $rows;
  983. var $line_stats = array(
  984. 'counter' => array('x' => 0, 'y' => 0),
  985. 'offset' => array('x' => 0, 'y' => 0),
  986. );
  987. function __construct() {
  988. $this->leading_context_lines = variable_get('diff_context_lines_leading', 2);
  989. $this->trailing_context_lines = variable_get('diff_context_lines_trailing', 2);
  990. }
  991. function _start_diff() {
  992. $this->rows = array();
  993. }
  994. function _end_diff() {
  995. return $this->rows;
  996. }
  997. function _block_header($xbeg, $xlen, $ybeg, $ylen) {
  998. return array(
  999. array(
  1000. 'data' => theme('diff_header_line', array('lineno' => $xbeg + $this->line_stats['offset']['x'])),
  1001. 'colspan' => 2,
  1002. ),
  1003. array(
  1004. 'data' => theme('diff_header_line', array('lineno' => $ybeg + $this->line_stats['offset']['y'])),
  1005. 'colspan' => 2,
  1006. )
  1007. );
  1008. }
  1009. function _start_block($header) {
  1010. if ($this->show_header) {
  1011. $this->rows[] = $header;
  1012. }
  1013. }
  1014. function _end_block() {
  1015. }
  1016. function _lines($lines, $prefix=' ', $color='white') {
  1017. }
  1018. /**
  1019. * Note: you should HTML-escape parameter before calling this.
  1020. */
  1021. function addedLine($line) {
  1022. return array(
  1023. array(
  1024. 'data' => '+',
  1025. 'class' => 'diff-marker',
  1026. ),
  1027. array(
  1028. 'data' => theme('diff_content_line', array('line' => $line)),
  1029. 'class' => 'diff-context diff-addedline',
  1030. )
  1031. );
  1032. }
  1033. /**
  1034. * Note: you should HTML-escape parameter before calling this.
  1035. */
  1036. function deletedLine($line) {
  1037. return array(
  1038. array(
  1039. 'data' => '-',
  1040. 'class' => 'diff-marker',
  1041. ),
  1042. array(
  1043. 'data' => theme('diff_content_line', array('line' => $line)),
  1044. 'class' => 'diff-context diff-deletedline',
  1045. )
  1046. );
  1047. }
  1048. /**
  1049. * Note: you should HTML-escape parameter before calling this.
  1050. */
  1051. function contextLine($line) {
  1052. return array(
  1053. '&nbsp;',
  1054. array(
  1055. 'data' => theme('diff_content_line', array('line' => $line)),
  1056. 'class' => 'diff-context',
  1057. )
  1058. );
  1059. }
  1060. function emptyLine() {
  1061. return array(
  1062. '&nbsp;',
  1063. theme('diff_empty_line', array('line' => '&nbsp;')),
  1064. );
  1065. }
  1066. function _added($lines) {
  1067. foreach ($lines as $line) {
  1068. $this->rows[] = array_merge($this->emptyLine(), $this->addedLine(check_plain($line)));
  1069. }
  1070. }
  1071. function _deleted($lines) {
  1072. foreach ($lines as $line) {
  1073. $this->rows[] = array_merge($this->deletedLine(check_plain($line)), $this->emptyLine());
  1074. }
  1075. }
  1076. function _context($lines) {
  1077. foreach ($lines as $line) {
  1078. $this->rows[] = array_merge($this->contextLine(check_plain($line)), $this->contextLine(check_plain($line)));
  1079. }
  1080. }
  1081. function _changed($orig, $closing) {
  1082. $diff = new WordLevelDiff($orig, $closing);
  1083. $del = $diff->orig();
  1084. $add = $diff->closing();
  1085. // Notice that WordLevelDiff returns HTML-escaped output.
  1086. // Hence, we will be calling addedLine/deletedLine without HTML-escaping.
  1087. while ($line = array_shift($del)) {
  1088. $aline = array_shift( $add );
  1089. $this->rows[] = array_merge($this->deletedLine($line), isset($aline) ? $this->addedLine($aline) : $this->emptyLine());
  1090. }
  1091. foreach ($add as $line) { // If any leftovers
  1092. $this->rows[] = array_merge($this->emptyLine(), $this->addedLine($line));
  1093. }
  1094. }
  1095. }
  1096. /**
  1097. * Drupal inline Diff formatter.
  1098. * @private
  1099. * @subpackage DifferenceEngine
  1100. */
  1101. class DrupalDiffInline {
  1102. var $a;
  1103. var $b;
  1104. /**
  1105. * Constructor.
  1106. */
  1107. function __construct($a, $b) {
  1108. $this->a = $a;
  1109. $this->b = $b;
  1110. }
  1111. /**
  1112. * Render differences inline using HTML markup.
  1113. */
  1114. function render() {
  1115. $a = preg_split('/(<[^>]+?>| )/', $this->a, -1, PREG_SPLIT_DELIM_CAPTURE);
  1116. $b = preg_split('/(<[^>]+?>| )/', $this->b, -1, PREG_SPLIT_DELIM_CAPTURE);
  1117. $diff = new Diff($a, $b);
  1118. $diff->edits = $this->process_edits($diff->edits);
  1119. // Assemble highlighted output
  1120. $output = '';
  1121. foreach ($diff->edits as $chunk) {
  1122. switch ($chunk->type) {
  1123. case 'copy':
  1124. $output .= implode('', $chunk->closing);
  1125. break;
  1126. case 'delete':
  1127. foreach ($chunk->orig as $i => $piece) {
  1128. if (strpos($piece, '<') === 0 && drupal_substr($piece, drupal_strlen($piece) - 1) === '>') {
  1129. $output .= $piece;
  1130. }
  1131. else {
  1132. $output .= theme('diff_inline_chunk', array('text' => $piece, 'type' => $chunk->type));
  1133. }
  1134. }
  1135. break;
  1136. default:
  1137. $chunk->closing = $this->process_chunk($chunk->closing);
  1138. foreach ($chunk->closing as $i => $piece) {
  1139. if ($piece === ' ' || (strpos($piece, '<') === 0 && drupal_substr($piece, drupal_strlen($piece) - 1) === '>' && drupal_strtolower(drupal_substr($piece, 1, 3)) != 'img')) {
  1140. $output .= $piece;
  1141. }
  1142. else {
  1143. $output .= theme('diff_inline_chunk', array('text' => $piece, 'type' => $chunk->type));
  1144. }
  1145. }
  1146. break;
  1147. }
  1148. }
  1149. return $output;
  1150. }
  1151. /**
  1152. * Merge chunk segments between tag delimiters.
  1153. */
  1154. function process_chunk($chunk) {
  1155. $processed = array();
  1156. $j = 0;
  1157. foreach ($chunk as $i => $piece) {
  1158. $next = isset($chunk[$i+1]) ? $chunk[$i+1] : NULL;
  1159. if (!isset($processed[$j])) {
  1160. $processed[$j] = '';
  1161. }
  1162. if (strpos($piece, '<') === 0 && drupal_substr($piece, drupal_strlen($piece) - 1) === '>') {
  1163. $processed[$j] = $piece;
  1164. $j++;
  1165. }
  1166. elseif (isset($next) && strpos($next, '<') === 0 && drupal_substr($next, drupal_strlen($next) - 1) === '>') {
  1167. $processed[$j] .= $piece;
  1168. $j++;
  1169. }
  1170. else {
  1171. $processed[$j] .= $piece;
  1172. }
  1173. }
  1174. return $processed;
  1175. }
  1176. /**
  1177. * Merge copy and equivalent edits into intelligible chunks.
  1178. */
  1179. function process_edits($edits) {
  1180. $processed = array();
  1181. $current = array_shift($edits);
  1182. // Make two passes -- first merge space delimiter copies back into their originals.
  1183. while ($chunk = array_shift($edits)) {
  1184. if ($chunk->type == 'copy' && $chunk->orig === array(' ')) {
  1185. $current->orig = array_merge((array) $current->orig, (array) $chunk->orig);
  1186. $current->closing = array_merge((array) $current->closing, (array) $chunk->closing);
  1187. }
  1188. else {
  1189. $processed[] = $current;
  1190. $current = $chunk;
  1191. }
  1192. }
  1193. $processed[] = $current;
  1194. // Initial setup
  1195. $edits = $processed;
  1196. $processed = array();
  1197. $current = array_shift($edits);
  1198. // Second, merge equivalent chunks into each other.
  1199. while ($chunk = array_shift($edits)) {
  1200. if ($current->type == $chunk->type) {
  1201. $current->orig = array_merge((array) $current->orig, (array) $chunk->orig);
  1202. $current->closing = array_merge((array) $current->closing, (array) $chunk->closing);
  1203. }
  1204. else {
  1205. $processed[] = $current;
  1206. $current = $chunk;
  1207. }
  1208. }
  1209. $processed[] = $current;
  1210. return $processed;
  1211. }
  1212. }