Code Coverage for /Users/ericduran/Sites/drupal/drupal/core/lib/Drupal/Component/Diff/Engine/DiffEngine.php

	Code Coverage
	Classes and Traits			Functions and Methods				Lines
Total	0.00% covered (danger)	0.00%	0 / 1	0.00% covered (danger)	0.00%	0 / 6	CRAP	0.00% covered (danger)	0.00%	0 / 274
DiffEngine	0.00% covered (danger)	0.00%	0 / 1	0.00% covered (danger)	0.00%	0 / 6	12210	0.00% covered (danger)	0.00%	0 / 274
diff				0.00% covered (danger)	0.00%	0 / 1	992	0.00% covered (danger)	0.00%	0 / 78
_line_hash				0.00% covered (danger)	0.00%	0 / 1	6	0.00% covered (danger)	0.00%	0 / 7
_diag				0.00% covered (danger)	0.00%	0 / 1	552	0.00% covered (danger)	0.00%	0 / 68
_lcs_pos				0.00% covered (danger)	0.00%	0 / 1	42	0.00% covered (danger)	0.00%	0 / 22
_compareseq				0.00% covered (danger)	0.00%	0 / 1	182	0.00% covered (danger)	0.00%	0 / 33
_shift_boundaries				0.00% covered (danger)	0.00%	0 / 1	1260	0.00% covered (danger)	0.00%	0 / 66

1	<?php
2
3	/**
4	* @file
5	* Contains \Drupal\Component\Diff\Engine\DiffEngine.
6	*/
7
8	namespace Drupal\Component\Diff\Engine;
9
10	use Drupal\Component\Utility\Unicode;
11
12	/**
13	* Class used internally by Diff to actually compute the diffs.
14	*
15	* The algorithm used here is mostly lifted from the perl module
16	* Algorithm::Diff (version 1.06) by Ned Konz, which is available at:
17	* http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip
18	*
19	* More ideas are taken from:
20	* http://www.ics.uci.edu/~eppstein/161/960229.html
21	*
22	* Some ideas (and a bit of code) are from analyze.c, from GNU
23	* diffutils-2.7, which can be found at:
24	* ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
25	*
26	* closingly, some ideas (subdivision by NCHUNKS > 2, and some optimizations)
27	* are my own.
28	*
29	* Line length limits for robustness added by Tim Starling, 2005-08-31
30	*
31	* @author Geoffrey T. Dairiki, Tim Starling
32	* @private
33	* @subpackage DifferenceEngine
34	*/
35	class DiffEngine {
36
37	const USE_ASSERTS = FALSE;
38
39	const MAX_XREF_LENGTH = 10000;
40
41	public function diff($from_lines, $to_lines) {
42
43	$n_from = sizeof($from_lines);
44	$n_to = sizeof($to_lines);
45
46	$this->xchanged = $this->ychanged = array();
47	$this->xv = $this->yv = array();
48	$this->xind = $this->yind = array();
49	unset($this->seq);
50	unset($this->in_seq);
51	unset($this->lcs);
52
53	// Skip leading common lines.
54	for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
55	if ($from_lines[$skip] !== $to_lines[$skip]) {
56	break;
57	}
58	$this->xchanged[$skip] = $this->ychanged[$skip] = FALSE;
59	}
60	// Skip trailing common lines.
61	$xi = $n_from;
62	$yi = $n_to;
63	for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
64	if ($from_lines[$xi] !== $to_lines[$yi]) {
65	break;
66	}
67	$this->xchanged[$xi] = $this->ychanged[$yi] = FALSE;
68	}
69
70	// Ignore lines which do not exist in both files.
71	for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
72	$xhash[$this->_line_hash($from_lines[$xi])] = 1;
73	}
74
75	for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
76	$line = $to_lines[$yi];
77	if ($this->ychanged[$yi] = empty($xhash[$this->_line_hash($line)])) {
78	continue;
79	}
80	$yhash[$this->_line_hash($line)] = 1;
81	$this->yv[] = $line;
82	$this->yind[] = $yi;
83	}
84	for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
85	$line = $from_lines[$xi];
86	if ($this->xchanged[$xi] = empty($yhash[$this->_line_hash($line)])) {
87	continue;
88	}
89	$this->xv[] = $line;
90	$this->xind[] = $xi;
91	}
92
93	// Find the LCS.
94	$this->_compareseq(0, sizeof($this->xv), 0, sizeof($this->yv));
95
96	// Merge edits when possible
97	$this->_shift_boundaries($from_lines, $this->xchanged, $this->ychanged);
98	$this->_shift_boundaries($to_lines, $this->ychanged, $this->xchanged);
99
100	// Compute the edit operations.
101	$edits = array();
102	$xi = $yi = 0;
103	while ($xi < $n_from \|\| $yi < $n_to) {
104	$this::USE_ASSERTS && assert($yi < $n_to \|\| $this->xchanged[$xi]);
105	$this::USE_ASSERTS && assert($xi < $n_from \|\| $this->ychanged[$yi]);
106
107	// Skip matching "snake".
108	$copy = array();
109	while ( $xi < $n_from && $yi < $n_to && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
110	$copy[] = $from_lines[$xi++];
111	++$yi;
112	}
113	if ($copy) {
114	$edits[] = new DiffOpCopy($copy);
115	}
116	// Find deletes & adds.
117	$delete = array();
118	while ($xi < $n_from && $this->xchanged[$xi]) {
119	$delete[] = $from_lines[$xi++];
120	}
121	$add = array();
122	while ($yi < $n_to && $this->ychanged[$yi]) {
123	$add[] = $to_lines[$yi++];
124	}
125	if ($delete && $add) {
126	$edits[] = new DiffOpChange($delete, $add);
127	}
128	elseif ($delete) {
129	$edits[] = new DiffOpDelete($delete);
130	}
131	elseif ($add) {
132	$edits[] = new DiffOpAdd($add);
133	}
134	}
135	return $edits;
136	}
137
138	/**
139	* Returns the whole line if it's small enough, or the MD5 hash otherwise.
140	*/
141	protected function _line_hash($line) {
142	if (Unicode::strlen($line) > $this::MAX_XREF_LENGTH) {
143	return md5($line);
144	}
145	else {
146	return $line;
147	}
148	}
149
150
151	/**
152	* Divide the Largest Common Subsequence (LCS) of the sequences
153	* [XOFF, XLIM) and [YOFF, YLIM) into NCHUNKS approximately equally
154	* sized segments.
155	*
156	* Returns (LCS, PTS). LCS is the length of the LCS. PTS is an
157	* array of NCHUNKS+1 (X, Y) indexes giving the diving points between
158	* sub sequences. The first sub-sequence is contained in [X0, X1),
159	* [Y0, Y1), the second in [X1, X2), [Y1, Y2) and so on. Note
160	* that (X0, Y0) == (XOFF, YOFF) and
161	* (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).
162	*
163	* This function assumes that the first lines of the specified portions
164	* of the two files do not match, and likewise that the last lines do not
165	* match. The caller must trim matching lines from the beginning and end
166	* of the portions it is going to specify.
167	*/
168	protected function _diag($xoff, $xlim, $yoff, $ylim, $nchunks) {
169	$flip = FALSE;
170
171	if ($xlim - $xoff > $ylim - $yoff) {
172	// Things seems faster (I'm not sure I understand why)
173	// when the shortest sequence in X.
174	$flip = TRUE;
175	list($xoff, $xlim, $yoff, $ylim) = array($yoff, $ylim, $xoff, $xlim);
176	}
177
178	if ($flip) {
179	for ($i = $ylim - 1; $i >= $yoff; $i--) {
180	$ymatches[$this->xv[$i]][] = $i;
181	}
182	}
183	else {
184	for ($i = $ylim - 1; $i >= $yoff; $i--) {
185	$ymatches[$this->yv[$i]][] = $i;
186	}
187	}
188	$this->lcs = 0;
189	$this->seq[0]= $yoff - 1;
190	$this->in_seq = array();
191	$ymids[0] = array();
192
193	$numer = $xlim - $xoff + $nchunks - 1;
194	$x = $xoff;
195	for ($chunk = 0; $chunk < $nchunks; $chunk++) {
196	if ($chunk > 0) {
197	for ($i = 0; $i <= $this->lcs; $i++) {
198	$ymids[$i][$chunk-1] = $this->seq[$i];
199	}
200	}
201
202	$x1 = $xoff + (int)(($numer + ($xlim-$xoff)*$chunk) / $nchunks);
203	for ( ; $x < $x1; $x++) {
204	$line = $flip ? $this->yv[$x] : $this->xv[$x];
205	if (empty($ymatches[$line])) {
206	continue;
207	}
208	$matches = $ymatches[$line];
209	reset($matches);
210	while (list ($junk, $y) = each($matches)) {
211	if (empty($this->in_seq[$y])) {
212	$k = $this->_lcs_pos($y);
213	$this::USE_ASSERTS && assert($k > 0);
214	$ymids[$k] = $ymids[$k-1];
215	break;
216	}
217	}
218	while (list ($junk, $y) = each($matches)) {
219	if ($y > $this->seq[$k-1]) {
220	$this::USE_ASSERTS && assert($y < $this->seq[$k]);
221	// Optimization: this is a common case:
222	// next match is just replacing previous match.
223	$this->in_seq[$this->seq[$k]] = FALSE;
224	$this->seq[$k] = $y;
225	$this->in_seq[$y] = 1;
226	}
227	elseif (empty($this->in_seq[$y])) {
228	$k = $this->_lcs_pos($y);
229	$this::USE_ASSERTS && assert($k > 0);
230	$ymids[$k] = $ymids[$k-1];
231	}
232	}
233	}
234	}
235
236	$seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
237	$ymid = $ymids[$this->lcs];
238	for ($n = 0; $n < $nchunks - 1; $n++) {
239	$x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks);
240	$y1 = $ymid[$n] + 1;
241	$seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
242	}
243	$seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
244
245	return array($this->lcs, $seps);
246	}
247
248	protected function _lcs_pos($ypos) {
249
250	$end = $this->lcs;
251	if ($end == 0 \|\| $ypos > $this->seq[$end]) {
252	$this->seq[++$this->lcs] = $ypos;
253	$this->in_seq[$ypos] = 1;
254	return $this->lcs;
255	}
256
257	$beg = 1;
258	while ($beg < $end) {
259	$mid = (int)(($beg + $end) / 2);
260	if ($ypos > $this->seq[$mid]) {
261	$beg = $mid + 1;
262	}
263	else {
264	$end = $mid;
265	}
266	}
267
268	$this::USE_ASSERTS && assert($ypos != $this->seq[$end]);
269
270	$this->in_seq[$this->seq[$end]] = FALSE;
271	$this->seq[$end] = $ypos;
272	$this->in_seq[$ypos] = 1;
273	return $end;
274	}
275
276	/**
277	* Find LCS of two sequences.
278	*
279	* The results are recorded in the vectors $this->{x,y}changed[], by
280	* storing a 1 in the element for each line that is an insertion
281	* or deletion (ie. is not in the LCS).
282	*
283	* The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
284	*
285	* Note that XLIM, YLIM are exclusive bounds.
286	* All line numbers are origin-0 and discarded lines are not counted.
287	*/
288	protected function _compareseq($xoff, $xlim, $yoff, $ylim) {
289
290	// Slide down the bottom initial diagonal.
291	while ($xoff < $xlim && $yoff < $ylim && $this->xv[$xoff] == $this->yv[$yoff]) {
292	++$xoff;
293	++$yoff;
294	}
295
296	// Slide up the top initial diagonal.
297	while ($xlim > $xoff && $ylim > $yoff && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) {
298	--$xlim;
299	--$ylim;
300	}
301
302	if ($xoff == $xlim \|\| $yoff == $ylim) {
303	$lcs = 0;
304	}
305	else {
306	// This is ad hoc but seems to work well.
307	//$nchunks = sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5);
308	//$nchunks = max(2, min(8, (int)$nchunks));
309	$nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
310	list($lcs, $seps)
311	= $this->_diag($xoff, $xlim, $yoff, $ylim, $nchunks);
312	}
313
314	if ($lcs == 0) {
315	// X and Y sequences have no common subsequence:
316	// mark all changed.
317	while ($yoff < $ylim) {
318	$this->ychanged[$this->yind[$yoff++]] = 1;
319	}
320	while ($xoff < $xlim) {
321	$this->xchanged[$this->xind[$xoff++]] = 1;
322	}
323	}
324	else {
325	// Use the partitions to split this problem into subproblems.
326	reset($seps);
327	$pt1 = $seps[0];
328	while ($pt2 = next($seps)) {
329	$this->_compareseq ($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
330	$pt1 = $pt2;
331	}
332	}
333	}
334
335	/**
336	* Adjust inserts/deletes of identical lines to join changes
337	* as much as possible.
338	*
339	* We do something when a run of changed lines include a
340	* line at one end and has an excluded, identical line at the other.
341	* We are free to choose which identical line is included.
342	* `compareseq' usually chooses the one at the beginning,
343	* but usually it is cleaner to consider the following identical line
344	* to be the "change".
345	*
346	* This is extracted verbatim from analyze.c (GNU diffutils-2.7).
347	*/
348	protected function _shift_boundaries($lines, &$changed, $other_changed) {
349	$i = 0;
350	$j = 0;
351
352	$this::USE_ASSERTS && assert('sizeof($lines) == sizeof($changed)');
353	$len = sizeof($lines);
354	$other_len = sizeof($other_changed);
355
356	while (1) {
357	/*
358	* Scan forwards to find beginning of another run of changes.
359	* Also keep track of the corresponding point in the other file.
360	*
361	* Throughout this code, $i and $j are adjusted together so that
362	* the first $i elements of $changed and the first $j elements
363	* of $other_changed both contain the same number of zeros
364	* (unchanged lines).
365	* Furthermore, $j is always kept so that $j == $other_len or
366	* $other_changed[$j] == FALSE.
367	*/
368	while ($j < $other_len && $other_changed[$j]) {
369	$j++;
370	}
371	while ($i < $len && ! $changed[$i]) {
372	$this::USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
373	$i++;
374	$j++;
375	while ($j < $other_len && $other_changed[$j]) {
376	$j++;
377	}
378	}
379
380	if ($i == $len) {
381	break;
382	}
383	$start = $i;
384
385	// Find the end of this run of changes.
386	while (++$i < $len && $changed[$i]) {
387	continue;
388	}
389
390	do {
391	/*
392	* Record the length of this run of changes, so that
393	* we can later determine whether the run has grown.
394	*/
395	$runlength = $i - $start;
396
397	/*
398	* Move the changed region back, so long as the
399	* previous unchanged line matches the last changed one.
400	* This merges with previous changed regions.
401	*/
402	while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) {
403	$changed[--$start] = 1;
404	$changed[--$i] = FALSE;
405	while ($start > 0 && $changed[$start - 1]) {
406	$start--;
407	}
408	$this::USE_ASSERTS && assert('$j > 0');
409	while ($other_changed[--$j]) {
410	continue;
411	}
412	$this::USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
413	}
414
415	/*
416	* Set CORRESPONDING to the end of the changed run, at the last
417	* point where it corresponds to a changed run in the other file.
418	* CORRESPONDING == LEN means no such point has been found.
419	*/
420	$corresponding = $j < $other_len ? $i : $len;
421
422	/*
423	* Move the changed region forward, so long as the
424	* first changed line matches the following unchanged one.
425	* This merges with following changed regions.
426	* Do this second, so that if there are no merges,
427	* the changed region is moved forward as far as possible.
428	*/
429	while ($i < $len && $lines[$start] == $lines[$i]) {
430	$changed[$start++] = FALSE;
431	$changed[$i++] = 1;
432	while ($i < $len && $changed[$i]) {
433	$i++;
434	}
435	$this::USE_ASSERTS && assert('$j < $other_len && ! $other_changed[$j]');
436	$j++;
437	if ($j < $other_len && $other_changed[$j]) {
438	$corresponding = $i;
439	while ($j < $other_len && $other_changed[$j]) {
440	$j++;
441	}
442	}
443	}
444	} while ($runlength != $i - $start);
445
446	/*
447	* If possible, move the fully-merged run of changes
448	* back to a corresponding run in the other file.
449	*/
450	while ($corresponding < $i) {
451	$changed[--$start] = 1;
452	$changed[--$i] = 0;
453	$this::USE_ASSERTS && assert('$j > 0');
454	while ($other_changed[--$j]) {
455	continue;
456	}
457	$this::USE_ASSERTS && assert('$j >= 0 && !$other_changed[$j]');
458	}
459	}
460	}
461	}