DiffEngine.php 35 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349
  1. <?php
  2. /**
  3. * @file
  4. * A PHP diff engine for phpwiki. (Taken from phpwiki-1.3.3)
  5. *
  6. * Copyright (C) 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org>
  7. * You may copy this code freely under the conditions of the GPL.
  8. */
  9. define('USE_ASSERTS', FALSE);
  10. /**
  11. * @todo document
  12. * @private
  13. * @subpackage DifferenceEngine
  14. */
  15. class _DiffOp {
  16. var $type;
  17. var $orig;
  18. var $closing;
  19. function reverse() {
  20. trigger_error('pure virtual', E_USER_ERROR);
  21. }
  22. function norig() {
  23. return $this->orig ? sizeof($this->orig) : 0;
  24. }
  25. function nclosing() {
  26. return $this->closing ? sizeof($this->closing) : 0;
  27. }
  28. }
  29. /**
  30. * @todo document
  31. * @private
  32. * @subpackage DifferenceEngine
  33. */
  34. class _DiffOp_Copy extends _DiffOp {
  35. var $type = 'copy';
  36. function _DiffOp_Copy($orig, $closing = FALSE) {
  37. if (!is_array($closing)) {
  38. $closing = $orig;
  39. }
  40. $this->orig = $orig;
  41. $this->closing = $closing;
  42. }
  43. function reverse() {
  44. return new _DiffOp_Copy($this->closing, $this->orig);
  45. }
  46. }
  47. /**
  48. * @todo document
  49. * @private
  50. * @subpackage DifferenceEngine
  51. */
  52. class _DiffOp_Delete extends _DiffOp {
  53. var $type = 'delete';
  54. function _DiffOp_Delete($lines) {
  55. $this->orig = $lines;
  56. $this->closing = FALSE;
  57. }
  58. function reverse() {
  59. return new _DiffOp_Add($this->orig);
  60. }
  61. }
  62. /**
  63. * @todo document
  64. * @private
  65. * @subpackage DifferenceEngine
  66. */
  67. class _DiffOp_Add extends _DiffOp {
  68. var $type = 'add';
  69. function _DiffOp_Add($lines) {
  70. $this->closing = $lines;
  71. $this->orig = FALSE;
  72. }
  73. function reverse() {
  74. return new _DiffOp_Delete($this->closing);
  75. }
  76. }
  77. /**
  78. * @todo document
  79. * @private
  80. * @subpackage DifferenceEngine
  81. */
  82. class _DiffOp_Change extends _DiffOp {
  83. var $type = 'change';
  84. function _DiffOp_Change($orig, $closing) {
  85. $this->orig = $orig;
  86. $this->closing = $closing;
  87. }
  88. function reverse() {
  89. return new _DiffOp_Change($this->closing, $this->orig);
  90. }
  91. }
  92. /**
  93. * Class used internally by Diff to actually compute the diffs.
  94. *
  95. * The algorithm used here is mostly lifted from the perl module
  96. * Algorithm::Diff (version 1.06) by Ned Konz, which is available at:
  97. * http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip
  98. *
  99. * More ideas are taken from:
  100. * http://www.ics.uci.edu/~eppstein/161/960229.html
  101. *
  102. * Some ideas are (and a bit of code) are from from analyze.c, from GNU
  103. * diffutils-2.7, which can be found at:
  104. * ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
  105. *
  106. * closingly, some ideas (subdivision by NCHUNKS > 2, and some optimizations)
  107. * are my own.
  108. *
  109. * Line length limits for robustness added by Tim Starling, 2005-08-31
  110. *
  111. * @author Geoffrey T. Dairiki, Tim Starling
  112. * @private
  113. * @subpackage DifferenceEngine
  114. */
  115. class _DiffEngine {
  116. function MAX_XREF_LENGTH() {
  117. return 10000;
  118. }
  119. function diff($from_lines, $to_lines) {
  120. $n_from = sizeof($from_lines);
  121. $n_to = sizeof($to_lines);
  122. $this->xchanged = $this->ychanged = array();
  123. $this->xv = $this->yv = array();
  124. $this->xind = $this->yind = array();
  125. unset($this->seq);
  126. unset($this->in_seq);
  127. unset($this->lcs);
  128. // Skip leading common lines.
  129. for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
  130. if ($from_lines[$skip] !== $to_lines[$skip]) {
  131. break;
  132. }
  133. $this->xchanged[$skip] = $this->ychanged[$skip] = FALSE;
  134. }
  135. // Skip trailing common lines.
  136. $xi = $n_from;
  137. $yi = $n_to;
  138. for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
  139. if ($from_lines[$xi] !== $to_lines[$yi]) {
  140. break;
  141. }
  142. $this->xchanged[$xi] = $this->ychanged[$yi] = FALSE;
  143. }
  144. // Ignore lines which do not exist in both files.
  145. for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
  146. $xhash[$this->_line_hash($from_lines[$xi])] = 1;
  147. }
  148. for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
  149. $line = $to_lines[$yi];
  150. if ($this->ychanged[$yi] = empty($xhash[$this->_line_hash($line)])) {
  151. continue;
  152. }
  153. $yhash[$this->_line_hash($line)] = 1;
  154. $this->yv[] = $line;
  155. $this->yind[] = $yi;
  156. }
  157. for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
  158. $line = $from_lines[$xi];
  159. if ($this->xchanged[$xi] = empty($yhash[$this->_line_hash($line)])) {
  160. continue;
  161. }
  162. $this->xv[] = $line;
  163. $this->xind[] = $xi;
  164. }
  165. // Find the LCS.
  166. $this->_compareseq(0, sizeof($this->xv), 0, sizeof($this->yv));
  167. // Merge edits when possible
  168. $this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged);
  169. $this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged);
  170. // Compute the edit operations.
  171. $edits = array();
  172. $xi = $yi = 0;
  173. while ($xi < $n_from || $yi < $n_to) {
  174. USE_ASSERTS && assert($yi < $n_to || $this->xchanged[$xi]);
  175. USE_ASSERTS && assert($xi < $n_from || $this->ychanged[$yi]);
  176. // Skip matching "snake".
  177. $copy = array();
  178. while ( $xi < $n_from && $yi < $n_to && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
  179. $copy[] = $from_lines[$xi++];
  180. ++$yi;
  181. }
  182. if ($copy) {
  183. $edits[] = new _DiffOp_Copy($copy);
  184. }
  185. // Find deletes & adds.
  186. $delete = array();
  187. while ($xi < $n_from && $this->xchanged[$xi]) {
  188. $delete[] = $from_lines[$xi++];
  189. }
  190. $add = array();
  191. while ($yi < $n_to && $this->ychanged[$yi]) {
  192. $add[] = $to_lines[$yi++];
  193. }
  194. if ($delete && $add) {
  195. $edits[] = new _DiffOp_Change($delete, $add);
  196. }
  197. elseif ($delete) {
  198. $edits[] = new _DiffOp_Delete($delete);
  199. }
  200. elseif ($add) {
  201. $edits[] = new _DiffOp_Add($add);
  202. }
  203. }
  204. return $edits;
  205. }
  206. /**
  207. * Returns the whole line if it's small enough, or the MD5 hash otherwise.
  208. */
  209. function _line_hash($line) {
  210. if (drupal_strlen($line) > $this->MAX_XREF_LENGTH()) {
  211. return md5($line);
  212. }
  213. else {
  214. return $line;
  215. }
  216. }
  217. /**
  218. * Divide the Largest Common Subsequence (LCS) of the sequences
  219. * [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally
  220. * sized segments.
  221. *
  222. * Returns (LCS, PTS). LCS is the length of the LCS. PTS is an
  223. * array of NCHUNKS+1 (X, Y) indexes giving the diving points between
  224. * sub sequences. The first sub-sequence is contained in [X0, X1),
  225. * [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on. Note
  226. * that (X0, Y0) == (XOFF, YOFF) and
  227. * (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).
  228. *
  229. * This function assumes that the first lines of the specified portions
  230. * of the two files do not match, and likewise that the last lines do not
  231. * match. The caller must trim matching lines from the beginning and end
  232. * of the portions it is going to specify.
  233. */
  234. function _diag($xoff, $xlim, $yoff, $ylim, $nchunks) {
  235. $flip = FALSE;
  236. if ($xlim - $xoff > $ylim - $yoff) {
  237. // Things seems faster (I'm not sure I understand why)
  238. // when the shortest sequence in X.
  239. $flip = TRUE;
  240. list($xoff, $xlim, $yoff, $ylim) = array($yoff, $ylim, $xoff, $xlim);
  241. }
  242. if ($flip) {
  243. for ($i = $ylim - 1; $i >= $yoff; $i--) {
  244. $ymatches[$this->xv[$i]][] = $i;
  245. }
  246. }
  247. else {
  248. for ($i = $ylim - 1; $i >= $yoff; $i--) {
  249. $ymatches[$this->yv[$i]][] = $i;
  250. }
  251. }
  252. $this->lcs = 0;
  253. $this->seq[0]= $yoff - 1;
  254. $this->in_seq = array();
  255. $ymids[0] = array();
  256. $numer = $xlim - $xoff + $nchunks - 1;
  257. $x = $xoff;
  258. for ($chunk = 0; $chunk < $nchunks; $chunk++) {
  259. if ($chunk > 0) {
  260. for ($i = 0; $i <= $this->lcs; $i++) {
  261. $ymids[$i][$chunk-1] = $this->seq[$i];
  262. }
  263. }
  264. $x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks);
  265. for ( ; $x < $x1; $x++) {
  266. $line = $flip ? $this->yv[$x] : $this->xv[$x];
  267. if (empty($ymatches[$line])) {
  268. continue;
  269. }
  270. $matches = $ymatches[$line];
  271. reset($matches);
  272. while (list ($junk, $y) = each($matches)) {
  273. if (empty($this->in_seq[$y])) {
  274. $k = $this->_lcs_pos($y);
  275. USE_ASSERTS && assert($k > 0);
  276. $ymids[$k] = $ymids[$k-1];
  277. break;
  278. }
  279. }
  280. while (list ($junk, $y) = each($matches)) {
  281. if ($y > $this->seq[$k-1]) {
  282. USE_ASSERTS && assert($y < $this->seq[$k]);
  283. // Optimization: this is a common case:
  284. // next match is just replacing previous match.
  285. $this->in_seq[$this->seq[$k]] = FALSE;
  286. $this->seq[$k] = $y;
  287. $this->in_seq[$y] = 1;
  288. }
  289. elseif (empty($this->in_seq[$y])) {
  290. $k = $this->_lcs_pos($y);
  291. USE_ASSERTS && assert($k > 0);
  292. $ymids[$k] = $ymids[$k-1];
  293. }
  294. }
  295. }
  296. }
  297. $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
  298. $ymid = $ymids[$this->lcs];
  299. for ($n = 0; $n < $nchunks - 1; $n++) {
  300. $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks);
  301. $y1 = $ymid[$n] + 1;
  302. $seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
  303. }
  304. $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
  305. return array($this->lcs, $seps);
  306. }
  307. function _lcs_pos($ypos) {
  308. $end = $this->lcs;
  309. if ($end == 0 || $ypos > $this->seq[$end]) {
  310. $this->seq[++$this->lcs] = $ypos;
  311. $this->in_seq[$ypos] = 1;
  312. return $this->lcs;
  313. }
  314. $beg = 1;
  315. while ($beg < $end) {
  316. $mid = (int)(($beg + $end) / 2);
  317. if ($ypos > $this->seq[$mid]) {
  318. $beg = $mid + 1;
  319. }
  320. else {
  321. $end = $mid;
  322. }
  323. }
  324. USE_ASSERTS && assert($ypos != $this->seq[$end]);
  325. $this->in_seq[$this->seq[$end]] = FALSE;
  326. $this->seq[$end] = $ypos;
  327. $this->in_seq[$ypos] = 1;
  328. return $end;
  329. }
  330. /**
  331. * Find LCS of two sequences.
  332. *
  333. * The results are recorded in the vectors $this->{x,y}changed[], by
  334. * storing a 1 in the element for each line that is an insertion
  335. * or deletion (ie. is not in the LCS).
  336. *
  337. * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
  338. *
  339. * Note that XLIM, YLIM are exclusive bounds.
  340. * All line numbers are origin-0 and discarded lines are not counted.
  341. */
  342. function _compareseq($xoff, $xlim, $yoff, $ylim) {
  343. // Slide down the bottom initial diagonal.
  344. while ($xoff < $xlim && $yoff < $ylim && $this->xv[$xoff] == $this->yv[$yoff]) {
  345. ++$xoff;
  346. ++$yoff;
  347. }
  348. // Slide up the top initial diagonal.
  349. while ($xlim > $xoff && $ylim > $yoff && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) {
  350. --$xlim;
  351. --$ylim;
  352. }
  353. if ($xoff == $xlim || $yoff == $ylim) {
  354. $lcs = 0;
  355. }
  356. else {
  357. // This is ad hoc but seems to work well.
  358. //$nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
  359. //$nchunks = max(2, min(8, (int)$nchunks));
  360. $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
  361. list($lcs, $seps)
  362. = $this->_diag($xoff, $xlim, $yoff, $ylim, $nchunks);
  363. }
  364. if ($lcs == 0) {
  365. // X and Y sequences have no common subsequence:
  366. // mark all changed.
  367. while ($yoff < $ylim) {
  368. $this->ychanged[$this->yind[$yoff++]] = 1;
  369. }
  370. while ($xoff < $xlim) {
  371. $this->xchanged[$this->xind[$xoff++]] = 1;
  372. }
  373. }
  374. else {
  375. // Use the partitions to split this problem into subproblems.
  376. reset($seps);
  377. $pt1 = $seps[0];
  378. while ($pt2 = next($seps)) {
  379. $this->_compareseq ($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
  380. $pt1 = $pt2;
  381. }
  382. }
  383. }
  384. /**
  385. * Adjust inserts/deletes of identical lines to join changes
  386. * as much as possible.
  387. *
  388. * We do something when a run of changed lines include a
  389. * line at one end and has an excluded, identical line at the other.
  390. * We are free to choose which identical line is included.
  391. * `compareseq' usually chooses the one at the beginning,
  392. * but usually it is cleaner to consider the following identical line
  393. * to be the "change".
  394. *
  395. * This is extracted verbatim from analyze.c (GNU diffutils-2.7).
  396. */
  397. function _shift_boundaries($lines, &$changed, $other_changed) {
  398. $i = 0;
  399. $j = 0;
  400. USE_ASSERTS && assert('sizeof($lines) == sizeof($changed)');
  401. $len = sizeof($lines);
  402. $other_len = sizeof($other_changed);
  403. while (1) {
  404. /*
  405. * Scan forwards to find beginning of another run of changes.
  406. * Also keep track of the corresponding point in the other file.
  407. *
  408. * Throughout this code, $i and $j are adjusted together so that
  409. * the first $i elements of $changed and the first $j elements
  410. * of $other_changed both contain the same number of zeros
  411. * (unchanged lines).
  412. * Furthermore, $j is always kept so that $j == $other_len or
  413. * $other_changed[$j] == FALSE.
  414. */
  415. while ($j < $other_len && $other_changed[$j]) {
  416. $j++;
  417. }
  418. while ($i < $len && ! $changed[$i]) {
  419. USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
  420. $i++;
  421. $j++;
  422. while ($j < $other_len && $other_changed[$j]) {
  423. $j++;
  424. }
  425. }
  426. if ($i == $len) {
  427. break;
  428. }
  429. $start = $i;
  430. // Find the end of this run of changes.
  431. while (++$i < $len && $changed[$i]) {
  432. continue;
  433. }
  434. do {
  435. /*
  436. * Record the length of this run of changes, so that
  437. * we can later determine whether the run has grown.
  438. */
  439. $runlength = $i - $start;
  440. /*
  441. * Move the changed region back, so long as the
  442. * previous unchanged line matches the last changed one.
  443. * This merges with previous changed regions.
  444. */
  445. while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) {
  446. $changed[--$start] = 1;
  447. $changed[--$i] = FALSE;
  448. while ($start > 0 && $changed[$start - 1]) {
  449. $start--;
  450. }
  451. USE_ASSERTS && assert('$j > 0');
  452. while ($other_changed[--$j]) {
  453. continue;
  454. }
  455. USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
  456. }
  457. /*
  458. * Set CORRESPONDING to the end of the changed run, at the last
  459. * point where it corresponds to a changed run in the other file.
  460. * CORRESPONDING == LEN means no such point has been found.
  461. */
  462. $corresponding = $j < $other_len ? $i : $len;
  463. /*
  464. * Move the changed region forward, so long as the
  465. * first changed line matches the following unchanged one.
  466. * This merges with following changed regions.
  467. * Do this second, so that if there are no merges,
  468. * the changed region is moved forward as far as possible.
  469. */
  470. while ($i < $len && $lines[$start] == $lines[$i]) {
  471. $changed[$start++] = FALSE;
  472. $changed[$i++] = 1;
  473. while ($i < $len && $changed[$i]) {
  474. $i++;
  475. }
  476. USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
  477. $j++;
  478. if ($j < $other_len && $other_changed[$j]) {
  479. $corresponding = $i;
  480. while ($j < $other_len && $other_changed[$j]) {
  481. $j++;
  482. }
  483. }
  484. }
  485. } while ($runlength != $i - $start);
  486. /*
  487. * If possible, move the fully-merged run of changes
  488. * back to a corresponding run in the other file.
  489. */
  490. while ($corresponding < $i) {
  491. $changed[--$start] = 1;
  492. $changed[--$i] = 0;
  493. USE_ASSERTS && assert('$j > 0');
  494. while ($other_changed[--$j]) {
  495. continue;
  496. }
  497. USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
  498. }
  499. }
  500. }
  501. }
  502. /**
  503. * Class representing a 'diff' between two sequences of strings.
  504. * @todo document
  505. * @private
  506. * @subpackage DifferenceEngine
  507. */
  508. class Diff {
  509. var $edits;
  510. /**
  511. * Constructor.
  512. * Computes diff between sequences of strings.
  513. *
  514. * @param $from_lines array An array of strings.
  515. * (Typically these are lines from a file.)
  516. * @param $to_lines array An array of strings.
  517. */
  518. function Diff($from_lines, $to_lines) {
  519. $eng = new _DiffEngine;
  520. $this->edits = $eng->diff($from_lines, $to_lines);
  521. //$this->_check($from_lines, $to_lines);
  522. }
  523. /**
  524. * Compute reversed Diff.
  525. *
  526. * SYNOPSIS:
  527. *
  528. * $diff = new Diff($lines1, $lines2);
  529. * $rev = $diff->reverse();
  530. * @return object A Diff object representing the inverse of the
  531. * original diff.
  532. */
  533. function reverse() {
  534. $rev = $this;
  535. $rev->edits = array();
  536. foreach ($this->edits as $edit) {
  537. $rev->edits[] = $edit->reverse();
  538. }
  539. return $rev;
  540. }
  541. /**
  542. * Check for empty diff.
  543. *
  544. * @return bool True iff two sequences were identical.
  545. */
  546. function isEmpty() {
  547. foreach ($this->edits as $edit) {
  548. if ($edit->type != 'copy') {
  549. return FALSE;
  550. }
  551. }
  552. return TRUE;
  553. }
  554. /**
  555. * Compute the length of the Longest Common Subsequence (LCS).
  556. *
  557. * This is mostly for diagnostic purposed.
  558. *
  559. * @return int The length of the LCS.
  560. */
  561. function lcs() {
  562. $lcs = 0;
  563. foreach ($this->edits as $edit) {
  564. if ($edit->type == 'copy') {
  565. $lcs += sizeof($edit->orig);
  566. }
  567. }
  568. return $lcs;
  569. }
  570. /**
  571. * Get the original set of lines.
  572. *
  573. * This reconstructs the $from_lines parameter passed to the
  574. * constructor.
  575. *
  576. * @return array The original sequence of strings.
  577. */
  578. function orig() {
  579. $lines = array();
  580. foreach ($this->edits as $edit) {
  581. if ($edit->orig) {
  582. array_splice($lines, sizeof($lines), 0, $edit->orig);
  583. }
  584. }
  585. return $lines;
  586. }
  587. /**
  588. * Get the closing set of lines.
  589. *
  590. * This reconstructs the $to_lines parameter passed to the
  591. * constructor.
  592. *
  593. * @return array The sequence of strings.
  594. */
  595. function closing() {
  596. $lines = array();
  597. foreach ($this->edits as $edit) {
  598. if ($edit->closing) {
  599. array_splice($lines, sizeof($lines), 0, $edit->closing);
  600. }
  601. }
  602. return $lines;
  603. }
  604. /**
  605. * Check a Diff for validity.
  606. *
  607. * This is here only for debugging purposes.
  608. */
  609. function _check($from_lines, $to_lines) {
  610. if (serialize($from_lines) != serialize($this->orig())) {
  611. trigger_error("Reconstructed original doesn't match", E_USER_ERROR);
  612. }
  613. if (serialize($to_lines) != serialize($this->closing())) {
  614. trigger_error("Reconstructed closing doesn't match", E_USER_ERROR);
  615. }
  616. $rev = $this->reverse();
  617. if (serialize($to_lines) != serialize($rev->orig())) {
  618. trigger_error("Reversed original doesn't match", E_USER_ERROR);
  619. }
  620. if (serialize($from_lines) != serialize($rev->closing())) {
  621. trigger_error("Reversed closing doesn't match", E_USER_ERROR);
  622. }
  623. $prevtype = 'none';
  624. foreach ($this->edits as $edit) {
  625. if ( $prevtype == $edit->type ) {
  626. trigger_error("Edit sequence is non-optimal", E_USER_ERROR);
  627. }
  628. $prevtype = $edit->type;
  629. }
  630. $lcs = $this->lcs();
  631. trigger_error('Diff okay: LCS = ' . $lcs, E_USER_NOTICE);
  632. }
  633. }
  634. /**
  635. * FIXME: bad name.
  636. * @todo document
  637. * @private
  638. * @subpackage DifferenceEngine
  639. */
  640. class MappedDiff extends Diff {
  641. /**
  642. * Constructor.
  643. *
  644. * Computes diff between sequences of strings.
  645. *
  646. * This can be used to compute things like
  647. * case-insensitve diffs, or diffs which ignore
  648. * changes in white-space.
  649. *
  650. * @param $from_lines array An array of strings.
  651. * (Typically these are lines from a file.)
  652. *
  653. * @param $to_lines array An array of strings.
  654. *
  655. * @param $mapped_from_lines array This array should
  656. * have the same size number of elements as $from_lines.
  657. * The elements in $mapped_from_lines and
  658. * $mapped_to_lines are what is actually compared
  659. * when computing the diff.
  660. *
  661. * @param $mapped_to_lines array This array should
  662. * have the same number of elements as $to_lines.
  663. */
  664. function MappedDiff($from_lines, $to_lines, $mapped_from_lines, $mapped_to_lines) {
  665. assert(sizeof($from_lines) == sizeof($mapped_from_lines));
  666. assert(sizeof($to_lines) == sizeof($mapped_to_lines));
  667. $this->Diff($mapped_from_lines, $mapped_to_lines);
  668. $xi = $yi = 0;
  669. for ($i = 0; $i < sizeof($this->edits); $i++) {
  670. $orig = &$this->edits[$i]->orig;
  671. if (is_array($orig)) {
  672. $orig = array_slice($from_lines, $xi, sizeof($orig));
  673. $xi += sizeof($orig);
  674. }
  675. $closing = &$this->edits[$i]->closing;
  676. if (is_array($closing)) {
  677. $closing = array_slice($to_lines, $yi, sizeof($closing));
  678. $yi += sizeof($closing);
  679. }
  680. }
  681. }
  682. }
  683. /**
  684. * A class to format Diffs
  685. *
  686. * This class formats the diff in classic diff format.
  687. * It is intended that this class be customized via inheritance,
  688. * to obtain fancier outputs.
  689. * @todo document
  690. * @private
  691. * @subpackage DifferenceEngine
  692. */
  693. class DiffFormatter {
  694. /**
  695. * Should a block header be shown?
  696. */
  697. var $show_header = TRUE;
  698. /**
  699. * Number of leading context "lines" to preserve.
  700. *
  701. * This should be left at zero for this class, but subclasses
  702. * may want to set this to other values.
  703. */
  704. var $leading_context_lines = 0;
  705. /**
  706. * Number of trailing context "lines" to preserve.
  707. *
  708. * This should be left at zero for this class, but subclasses
  709. * may want to set this to other values.
  710. */
  711. var $trailing_context_lines = 0;
  712. /**
  713. * Format a diff.
  714. *
  715. * @param $diff object A Diff object.
  716. * @return string The formatted output.
  717. */
  718. function format($diff) {
  719. $xi = $yi = 1;
  720. $block = FALSE;
  721. $context = array();
  722. $nlead = $this->leading_context_lines;
  723. $ntrail = $this->trailing_context_lines;
  724. $this->_start_diff();
  725. foreach ($diff->edits as $edit) {
  726. if ($edit->type == 'copy') {
  727. if (is_array($block)) {
  728. if (sizeof($edit->orig) <= $nlead + $ntrail) {
  729. $block[] = $edit;
  730. }
  731. else {
  732. if ($ntrail) {
  733. $context = array_slice($edit->orig, 0, $ntrail);
  734. $block[] = new _DiffOp_Copy($context);
  735. }
  736. $this->_block($x0, $ntrail + $xi - $x0, $y0, $ntrail + $yi - $y0, $block);
  737. $block = FALSE;
  738. }
  739. }
  740. $context = $edit->orig;
  741. }
  742. else {
  743. if (! is_array($block)) {
  744. $context = array_slice($context, sizeof($context) - $nlead);
  745. $x0 = $xi - sizeof($context);
  746. $y0 = $yi - sizeof($context);
  747. $block = array();
  748. if ($context) {
  749. $block[] = new _DiffOp_Copy($context);
  750. }
  751. }
  752. $block[] = $edit;
  753. }
  754. if ($edit->orig) {
  755. $xi += sizeof($edit->orig);
  756. }
  757. if ($edit->closing) {
  758. $yi += sizeof($edit->closing);
  759. }
  760. }
  761. if (is_array($block)) {
  762. $this->_block($x0, $xi - $x0, $y0, $yi - $y0, $block);
  763. }
  764. $end = $this->_end_diff();
  765. if (!empty($xi)) {
  766. $this->line_stats['counter']['x'] += $xi;
  767. }
  768. if (!empty($yi)) {
  769. $this->line_stats['counter']['y'] += $yi;
  770. }
  771. return $end;
  772. }
  773. function _block($xbeg, $xlen, $ybeg, $ylen, &$edits) {
  774. $this->_start_block($this->_block_header($xbeg, $xlen, $ybeg, $ylen));
  775. foreach ($edits as $edit) {
  776. if ($edit->type == 'copy') {
  777. $this->_context($edit->orig);
  778. }
  779. elseif ($edit->type == 'add') {
  780. $this->_added($edit->closing);
  781. }
  782. elseif ($edit->type == 'delete') {
  783. $this->_deleted($edit->orig);
  784. }
  785. elseif ($edit->type == 'change') {
  786. $this->_changed($edit->orig, $edit->closing);
  787. }
  788. else {
  789. trigger_error('Unknown edit type', E_USER_ERROR);
  790. }
  791. }
  792. $this->_end_block();
  793. }
  794. function _start_diff() {
  795. ob_start();
  796. }
  797. function _end_diff() {
  798. $val = ob_get_contents();
  799. ob_end_clean();
  800. return $val;
  801. }
  802. function _block_header($xbeg, $xlen, $ybeg, $ylen) {
  803. if ($xlen > 1) {
  804. $xbeg .= "," . ($xbeg + $xlen - 1);
  805. }
  806. if ($ylen > 1) {
  807. $ybeg .= "," . ($ybeg + $ylen - 1);
  808. }
  809. return $xbeg . ($xlen ? ($ylen ? 'c' : 'd') : 'a') . $ybeg;
  810. }
  811. function _start_block($header) {
  812. if ($this->show_header) {
  813. echo $header . "\n";
  814. }
  815. }
  816. function _end_block() {
  817. }
  818. function _lines($lines, $prefix = ' ') {
  819. foreach ($lines as $line) {
  820. echo "$prefix $line\n";
  821. }
  822. }
  823. function _context($lines) {
  824. $this->_lines($lines);
  825. }
  826. function _added($lines) {
  827. $this->_lines($lines, '>');
  828. }
  829. function _deleted($lines) {
  830. $this->_lines($lines, '<');
  831. }
  832. function _changed($orig, $closing) {
  833. $this->_deleted($orig);
  834. echo "---\n";
  835. $this->_added($closing);
  836. }
  837. }
  838. /**
  839. * Additions by Axel Boldt follow, partly taken from diff.php, phpwiki-1.3.3
  840. *
  841. */
  842. define('NBSP', '&#160;'); // iso-8859-x non-breaking space.
  843. /**
  844. * @todo document
  845. * @private
  846. * @subpackage DifferenceEngine
  847. */
  848. class _HWLDF_WordAccumulator {
  849. function _HWLDF_WordAccumulator() {
  850. $this->_lines = array();
  851. $this->_line = '';
  852. $this->_group = '';
  853. $this->_tag = '';
  854. }
  855. function _flushGroup($new_tag) {
  856. if ($this->_group !== '') {
  857. if ($this->_tag == 'mark') {
  858. $this->_line .= '<span class="diffchange">' . check_plain($this->_group) . '</span>';
  859. }
  860. else {
  861. $this->_line .= check_plain($this->_group);
  862. }
  863. }
  864. $this->_group = '';
  865. $this->_tag = $new_tag;
  866. }
  867. function _flushLine($new_tag) {
  868. $this->_flushGroup($new_tag);
  869. if ($this->_line != '') {
  870. array_push($this->_lines, $this->_line);
  871. }
  872. else {
  873. // make empty lines visible by inserting an NBSP
  874. array_push($this->_lines, NBSP);
  875. }
  876. $this->_line = '';
  877. }
  878. function addWords($words, $tag = '') {
  879. if ($tag != $this->_tag) {
  880. $this->_flushGroup($tag);
  881. }
  882. foreach ($words as $word) {
  883. // new-line should only come as first char of word.
  884. if ($word == '') {
  885. continue;
  886. }
  887. if ($word[0] == "\n") {
  888. $this->_flushLine($tag);
  889. $word = drupal_substr($word, 1);
  890. }
  891. assert(!strstr($word, "\n"));
  892. $this->_group .= $word;
  893. }
  894. }
  895. function getLines() {
  896. $this->_flushLine('~done');
  897. return $this->_lines;
  898. }
  899. }
  900. /**
  901. * @todo document
  902. * @private
  903. * @subpackage DifferenceEngine
  904. */
  905. class WordLevelDiff extends MappedDiff {
  906. function MAX_LINE_LENGTH() {
  907. return 10000;
  908. }
  909. function WordLevelDiff($orig_lines, $closing_lines) {
  910. list($orig_words, $orig_stripped) = $this->_split($orig_lines);
  911. list($closing_words, $closing_stripped) = $this->_split($closing_lines);
  912. $this->MappedDiff($orig_words, $closing_words, $orig_stripped, $closing_stripped);
  913. }
  914. function _split($lines) {
  915. $words = array();
  916. $stripped = array();
  917. $first = TRUE;
  918. foreach ($lines as $line) {
  919. // If the line is too long, just pretend the entire line is one big word
  920. // This prevents resource exhaustion problems
  921. if ( $first ) {
  922. $first = FALSE;
  923. }
  924. else {
  925. $words[] = "\n";
  926. $stripped[] = "\n";
  927. }
  928. if ( drupal_strlen( $line ) > $this->MAX_LINE_LENGTH() ) {
  929. $words[] = $line;
  930. $stripped[] = $line;
  931. }
  932. else {
  933. if (preg_match_all('/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs', $line, $m)) {
  934. $words = array_merge($words, $m[0]);
  935. $stripped = array_merge($stripped, $m[1]);
  936. }
  937. }
  938. }
  939. return array($words, $stripped);
  940. }
  941. function orig() {
  942. $orig = new _HWLDF_WordAccumulator;
  943. foreach ($this->edits as $edit) {
  944. if ($edit->type == 'copy') {
  945. $orig->addWords($edit->orig);
  946. }
  947. elseif ($edit->orig) {
  948. $orig->addWords($edit->orig, 'mark');
  949. }
  950. }
  951. $lines = $orig->getLines();
  952. return $lines;
  953. }
  954. function closing() {
  955. $closing = new _HWLDF_WordAccumulator;
  956. foreach ($this->edits as $edit) {
  957. if ($edit->type == 'copy') {
  958. $closing->addWords($edit->closing);
  959. }
  960. elseif ($edit->closing) {
  961. $closing->addWords($edit->closing, 'mark');
  962. }
  963. }
  964. $lines = $closing->getLines();
  965. return $lines;
  966. }
  967. }
  968. /**
  969. * Diff formatter which uses Drupal theme functions.
  970. * @private
  971. * @subpackage DifferenceEngine
  972. */
  973. class DrupalDiffFormatter extends DiffFormatter {
  974. var $rows;
  975. var $line_stats = array(
  976. 'counter' => array('x' => 0, 'y' => 0),
  977. 'offset' => array('x' => 0, 'y' => 0),
  978. );
  979. function DrupalDiffFormatter() {
  980. $this->leading_context_lines = variable_get('diff_context_lines_leading', 2);
  981. $this->trailing_context_lines = variable_get('diff_context_lines_trailing', 2);
  982. }
  983. function _start_diff() {
  984. $this->rows = array();
  985. }
  986. function _end_diff() {
  987. return $this->rows;
  988. }
  989. function _block_header($xbeg, $xlen, $ybeg, $ylen) {
  990. return array(
  991. array(
  992. 'data' => theme('diff_header_line', array('lineno' => $xbeg + $this->line_stats['offset']['x'])),
  993. 'colspan' => 2,
  994. ),
  995. array(
  996. 'data' => theme('diff_header_line', array('lineno' => $ybeg + $this->line_stats['offset']['y'])),
  997. 'colspan' => 2,
  998. )
  999. );
  1000. }
  1001. function _start_block($header) {
  1002. if ($this->show_header) {
  1003. $this->rows[] = $header;
  1004. }
  1005. }
  1006. function _end_block() {
  1007. }
  1008. function _lines($lines, $prefix=' ', $color='white') {
  1009. }
  1010. /**
  1011. * Note: you should HTML-escape parameter before calling this.
  1012. */
  1013. function addedLine($line) {
  1014. return array(
  1015. array(
  1016. 'data' => '+',
  1017. 'class' => 'diff-marker',
  1018. ),
  1019. array(
  1020. 'data' => theme('diff_content_line', array('line' => $line)),
  1021. 'class' => 'diff-context diff-addedline',
  1022. )
  1023. );
  1024. }
  1025. /**
  1026. * Note: you should HTML-escape parameter before calling this.
  1027. */
  1028. function deletedLine($line) {
  1029. return array(
  1030. array(
  1031. 'data' => '-',
  1032. 'class' => 'diff-marker',
  1033. ),
  1034. array(
  1035. 'data' => theme('diff_content_line', array('line' => $line)),
  1036. 'class' => 'diff-context diff-deletedline',
  1037. )
  1038. );
  1039. }
  1040. /**
  1041. * Note: you should HTML-escape parameter before calling this.
  1042. */
  1043. function contextLine($line) {
  1044. return array(
  1045. '&nbsp;',
  1046. array(
  1047. 'data' => theme('diff_content_line', array('line' => $line)),
  1048. 'class' => 'diff-context',
  1049. )
  1050. );
  1051. }
  1052. function emptyLine() {
  1053. return array(
  1054. '&nbsp;',
  1055. theme('diff_empty_line', array('line' => '&nbsp;')),
  1056. );
  1057. }
  1058. function _added($lines) {
  1059. foreach ($lines as $line) {
  1060. $this->rows[] = array_merge($this->emptyLine(), $this->addedLine(check_plain($line)));
  1061. }
  1062. }
  1063. function _deleted($lines) {
  1064. foreach ($lines as $line) {
  1065. $this->rows[] = array_merge($this->deletedLine(check_plain($line)), $this->emptyLine());
  1066. }
  1067. }
  1068. function _context($lines) {
  1069. foreach ($lines as $line) {
  1070. $this->rows[] = array_merge($this->contextLine(check_plain($line)), $this->contextLine(check_plain($line)));
  1071. }
  1072. }
  1073. function _changed($orig, $closing) {
  1074. $diff = new WordLevelDiff($orig, $closing);
  1075. $del = $diff->orig();
  1076. $add = $diff->closing();
  1077. // Notice that WordLevelDiff returns HTML-escaped output.
  1078. // Hence, we will be calling addedLine/deletedLine without HTML-escaping.
  1079. while ($line = array_shift($del)) {
  1080. $aline = array_shift( $add );
  1081. $this->rows[] = array_merge($this->deletedLine($line), isset($aline) ? $this->addedLine($aline) : $this->emptyLine());
  1082. }
  1083. foreach ($add as $line) { // If any leftovers
  1084. $this->rows[] = array_merge($this->emptyLine(), $this->addedLine($line));
  1085. }
  1086. }
  1087. }
  1088. /**
  1089. * Drupal inline Diff formatter.
  1090. * @private
  1091. * @subpackage DifferenceEngine
  1092. */
  1093. class DrupalDiffInline {
  1094. var $a;
  1095. var $b;
  1096. /**
  1097. * Constructor.
  1098. */
  1099. function __construct($a, $b) {
  1100. $this->a = $a;
  1101. $this->b = $b;
  1102. }
  1103. /**
  1104. * Render differences inline using HTML markup.
  1105. */
  1106. function render() {
  1107. $a = preg_split('/(<[^>]+?>| )/', $this->a, -1, PREG_SPLIT_DELIM_CAPTURE);
  1108. $b = preg_split('/(<[^>]+?>| )/', $this->b, -1, PREG_SPLIT_DELIM_CAPTURE);
  1109. $diff = new Diff($a, $b);
  1110. $diff->edits = $this->process_edits($diff->edits);
  1111. // Assemble highlighted output
  1112. $output = '';
  1113. foreach ($diff->edits as $chunk) {
  1114. switch ($chunk->type) {
  1115. case 'copy':
  1116. $output .= implode('', $chunk->closing);
  1117. break;
  1118. case 'delete':
  1119. foreach ($chunk->orig as $i => $piece) {
  1120. if (strpos($piece, '<') === 0 && drupal_substr($piece, drupal_strlen($piece) - 1) === '>') {
  1121. $output .= $piece;
  1122. }
  1123. else {
  1124. $output .= theme('diff_inline_chunk', array('text' => $piece, 'type' => $chunk->type));
  1125. }
  1126. }
  1127. break;
  1128. default:
  1129. $chunk->closing = $this->process_chunk($chunk->closing);
  1130. foreach ($chunk->closing as $i => $piece) {
  1131. if ($piece === ' ' || (strpos($piece, '<') === 0 && drupal_substr($piece, drupal_strlen($piece) - 1) === '>' && drupal_strtolower(drupal_substr($piece, 1, 3)) != 'img')) {
  1132. $output .= $piece;
  1133. }
  1134. else {
  1135. $output .= theme('diff_inline_chunk', array('text' => $piece, 'type' => $chunk->type));
  1136. }
  1137. }
  1138. break;
  1139. }
  1140. }
  1141. return $output;
  1142. }
  1143. /**
  1144. * Merge chunk segments between tag delimiters.
  1145. */
  1146. function process_chunk($chunk) {
  1147. $processed = array();
  1148. $j = 0;
  1149. foreach ($chunk as $i => $piece) {
  1150. $next = isset($chunk[$i+1]) ? $chunk[$i+1] : NULL;
  1151. if (!isset($processed[$j])) {
  1152. $processed[$j] = '';
  1153. }
  1154. if (strpos($piece, '<') === 0 && drupal_substr($piece, drupal_strlen($piece) - 1) === '>') {
  1155. $processed[$j] = $piece;
  1156. $j++;
  1157. }
  1158. elseif (isset($next) && strpos($next, '<') === 0 && drupal_substr($next, drupal_strlen($next) - 1) === '>') {
  1159. $processed[$j] .= $piece;
  1160. $j++;
  1161. }
  1162. else {
  1163. $processed[$j] .= $piece;
  1164. }
  1165. }
  1166. return $processed;
  1167. }
  1168. /**
  1169. * Merge copy and equivalent edits into intelligible chunks.
  1170. */
  1171. function process_edits($edits) {
  1172. $processed = array();
  1173. $current = array_shift($edits);
  1174. // Make two passes -- first merge space delimiter copies back into their originals.
  1175. while ($chunk = array_shift($edits)) {
  1176. if ($chunk->type == 'copy' && $chunk->orig === array(' ')) {
  1177. $current->orig = array_merge((array) $current->orig, (array) $chunk->orig);
  1178. $current->closing = array_merge((array) $current->closing, (array) $chunk->closing);
  1179. }
  1180. else {
  1181. $processed[] = $current;
  1182. $current = $chunk;
  1183. }
  1184. }
  1185. $processed[] = $current;
  1186. // Initial setup
  1187. $edits = $processed;
  1188. $processed = array();
  1189. $current = array_shift($edits);
  1190. // Second, merge equivalent chunks into each other.
  1191. while ($chunk = array_shift($edits)) {
  1192. if ($current->type == $chunk->type) {
  1193. $current->orig = array_merge((array) $current->orig, (array) $chunk->orig);
  1194. $current->closing = array_merge((array) $current->closing, (array) $chunk->closing);
  1195. }
  1196. else {
  1197. $processed[] = $current;
  1198. $current = $chunk;
  1199. }
  1200. }
  1201. $processed[] = $current;
  1202. return $processed;
  1203. }
  1204. }