NCBI C++ ToolKit
Diff.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /**
2  dtl -- Diff Template Library
3 
4  In short, Diff Template Library is distributed under so called "BSD license",
5 
6  Copyright (c) 2012 Tatsuhiko Kubo <cubicdaiya@gmail.com>
7  All rights reserved.
8 
9  Redistribution and use in source and binary forms, with or without modification,
10  are permitted provided that the following conditions are met:
11 
12  * Redistributions of source code must retain the above copyright notice,
13  this list of conditions and the following disclaimer.
14 
15  * Redistributions in binary form must reproduce the above copyright notice,
16  this list of conditions and the following disclaimer in the documentation
17  and/or other materials provided with the distribution.
18 
19  * Neither the name of the authors nor the names of its contributors
20  may be used to endorse or promote products derived from this software
21  without specific prior written permission.
22 
23  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29  TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 */
35 
36 /* If you use this library, you must include dtl.hpp only. */
37 
38 #ifndef DTL_DIFF_H
39 #define DTL_DIFF_H
40 
41 namespace dtl {
42 
43  /**
44  * diff class template
45  * sequence must support random_access_iterator.
46  */
47  template <typename elem, typename sequence = vector< elem >, typename comparator = Compare< elem > >
48  class Diff
49  {
50  private :
51  dtl_typedefs(elem, sequence)
52  sequence A;
53  sequence B;
54  size_t M;
55  size_t N;
56  size_t delta;
57  size_t offset;
58  long long *fp;
59  long long editDistance;
64  bool swapped;
65  bool huge;
66  bool trivial;
68  uniHunkVec uniHunks;
69  comparator cmp;
70  public :
71  Diff () {}
72 
73  Diff (const sequence& a,
74  const sequence& b) : A(a), B(b), ses(false) {
75  init();
76  }
77 
78  Diff (const sequence& a,
79  const sequence& b,
80  bool deletesFirst) : A(a), B(b), ses(deletesFirst) {
81  init();
82  }
83 
84  Diff (const sequence& a,
85  const sequence& b,
86  const comparator& comp) : A(a), B(b), ses(false), cmp(comp) {
87  init();
88  }
89 
90  Diff (const sequence& a,
91  const sequence& b,
92  bool deleteFirst,
93  const comparator& comp) : A(a), B(b), ses(deleteFirst), cmp(comp) {
94  init();
95  }
96 
97  ~Diff() {}
98 
99  long long getEditDistance () const {
100  return editDistance;
101  }
102 
103  Lcs< elem > getLcs () const {
104  return lcs;
105  }
106 
107  elemVec getLcsVec () const {
108  return lcs.getSequence();
109  }
110 
111  Ses< elem > getSes () const {
112  return ses;
113  }
114 
115  uniHunkVec getUniHunks () const {
116  return uniHunks;
117  }
118 
119  /* These should be deprecated */
120  bool isHuge () const {
121  return huge;
122  }
123 
124  void onHuge () {
125  this->huge = true;
126  }
127 
128  void offHuge () {
129  this->huge = false;
130  }
131 
132  bool isUnserious () const {
133  return trivial;
134  }
135 
136  void onUnserious () {
137  this->trivial = true;
138  }
139 
140  void offUnserious () {
141  this->trivial = false;
142  }
143 
145  this->editDistanceOnly = true;
146  }
147 
148  /* These are the replacements for the above */
149  bool hugeEnabled () const {
150  return huge;
151  }
152 
153  void enableHuge () {
154  this->huge = true;
155  }
156 
157  void disableHuge () {
158  this->huge = false;
159  }
160 
161  bool trivialEnabled () const {
162  return trivial;
163  }
164 
165  void enableTrivial () const {
166  this->trivial = true;
167  }
168 
169  void disableTrivial () {
170  this->trivial = false;
171  }
172 
174  this->editDistanceOnly = true;
175  }
176 
177  /**
178  * patching with Unified Format Hunks
179  */
180  sequence uniPatch (const sequence& seq) {
181  elemList seqLst(seq.begin(), seq.end());
182  sesElemVec shunk;
183  sesElemVec_iter vsesIt;
184  elemList_iter lstIt = seqLst.begin();
185  long long inc_dec_total = 0;
186  long long gap = 1;
187  for (uniHunkVec_iter it=uniHunks.begin();it!=uniHunks.end();++it) {
188  joinSesVec(shunk, it->common[0]);
189  joinSesVec(shunk, it->change);
190  joinSesVec(shunk, it->common[1]);
191  it->a += inc_dec_total;
192  inc_dec_total += it->inc_dec_count;
193  for (long long i=0;i<it->a - gap;++i) {
194  ++lstIt;
195  }
196  gap = it->a + it->b + it->inc_dec_count;
197  vsesIt = shunk.begin();
198  while (vsesIt!=shunk.end()) {
199  switch (vsesIt->second.type) {
200  case SES_ADD :
201  seqLst.insert(lstIt, vsesIt->first);
202  break;
203  case SES_DELETE :
204  if (lstIt != seqLst.end()) {
205  lstIt = seqLst.erase(lstIt);
206  }
207  break;
208  case SES_COMMON :
209  if (lstIt != seqLst.end()) {
210  ++lstIt;
211  }
212  break;
213  default :
214  // no fall-through
215  break;
216  }
217  ++vsesIt;
218  }
219  shunk.clear();
220  }
221 
222  sequence patchedSeq(seqLst.begin(), seqLst.end());
223  return patchedSeq;
224  }
225 
226  /**
227  * patching with Shortest Edit Script (SES)
228  */
229  sequence patch (const sequence& seq) const {
230  sesElemVec sesSeq = ses.getSequence();
231  elemList seqLst(seq.begin(), seq.end());
232  elemList_iter lstIt = seqLst.begin();
233  for (sesElemVec_iter sesIt=sesSeq.begin();sesIt!=sesSeq.end();++sesIt) {
234  switch (sesIt->second.type) {
235  case SES_ADD :
236  seqLst.insert(lstIt, sesIt->first);
237  break;
238  case SES_DELETE :
239  lstIt = seqLst.erase(lstIt);
240  break;
241  case SES_COMMON :
242  ++lstIt;
243  break;
244  default :
245  // no through
246  break;
247  }
248  }
249  sequence patchedSeq(seqLst.begin(), seqLst.end());
250  return patchedSeq;
251  }
252 
253  /**
254  * compose Longest Common Subsequence and Shortest Edit Script.
255  * The algorithm implemented here is based on "An O(NP) Sequence Comparison Algorithm"
256  * described by Sun Wu, Udi Manber and Gene Myers
257  */
258  void compose() {
259 
260  if (isHuge()) {
262  }
263 
264  long long p = -1;
265  fp = new long long[M + N + 3];
266  fill(&fp[0], &fp[M + N + 3], -1);
267  path = editPath(M + N + 3);
268  fill(path.begin(), path.end(), -1);
269  ONP:
270  do {
271  ++p;
272  for (long long k=-p;k<=static_cast<long long>(delta)-1;++k) {
273  fp[k+offset] = snake(k, fp[k-1+offset]+1, fp[k+1+offset]);
274  }
275  for (long long k=static_cast<long long>(delta)+p;k>=static_cast<long long>(delta)+1;--k) {
276  fp[k+offset] = snake(k, fp[k-1+offset]+1, fp[k+1+offset]);
277  }
278  fp[delta+offset] = snake(static_cast<long long>(delta), fp[delta-1+offset]+1, fp[delta+1+offset]);
279  } while (fp[delta+offset] != static_cast<long long>(N) && pathCordinates.size() < MAX_CORDINATES_SIZE);
280 
281  editDistance += static_cast<long long>(delta) + 2 * p;
282  long long r = path[delta+offset];
283  P cordinate;
284  editPathCordinates epc(0);
285 
286  // recording edit distance only
287  if (editDistanceOnly) {
288  delete[] this->fp;
289  return;
290  }
291 
292  while(r != -1) {
293  cordinate.x = pathCordinates[(size_t)r].x;
294  cordinate.y = pathCordinates[(size_t)r].y;
295  epc.push_back(cordinate);
296  r = pathCordinates[(size_t)r].k;
297  }
298 
299  // record Longest Common Subsequence & Shortest Edit Script
300  if (!recordSequence(epc)) {
301  pathCordinates.resize(0);
302  epc.resize(0);
303  p = -1;
304  goto ONP;
305  }
306  delete[] this->fp;
307  }
308 
309  /**
310  * print difference between A and B as an SES
311  */
312  template < typename stream >
313  void printSES (stream& out) const {
314  sesElemVec ses_v = ses.getSequence();
315  for_each(ses_v.begin(), ses_v.end(), ChangePrinter< sesElem, stream >(out));
316  }
317 
318  void printSES (ostream& out = cout) const {
319  printSES< ostream >(out);
320  }
321 
322  /**
323  * print differences given an SES
324  */
325  template < typename stream >
326  static void printSES (const Ses< elem >& s, stream& out) {
327  sesElemVec ses_v = s.getSequence();
328  for_each(ses_v.begin(), ses_v.end(), ChangePrinter< sesElem, stream >(out));
329  }
330 
331  static void printSES (const Ses< elem >& s, ostream& out = cout) {
332  printSES< ostream >(s, out);
333  }
334 
335  /**
336  * print difference between A and B in the Unified Format
337  */
338  template < typename stream >
339  void printUnifiedFormat (stream& out) const {
340  for_each(uniHunks.begin(), uniHunks.end(), UniHunkPrinter< sesElem, stream >(out));
341  }
342 
343  void printUnifiedFormat (ostream& out = cout) const {
344  printUnifiedFormat< ostream >(out);
345  }
346 
347  /**
348  * print unified format difference with given unified format hunks
349  */
350  template < typename stream >
351  static void printUnifiedFormat (const uniHunkVec& hunks, stream& out) {
352  for_each(hunks.begin(), hunks.end(), UniHunkPrinter< sesElem >(out));
353  }
354 
355  static void printUnifiedFormat (const uniHunkVec& hunks, ostream& out = cout) {
356  printUnifiedFormat< ostream >(hunks, out);
357  }
358 
359  /**
360  * compose Unified Format Hunks from Shortest Edit Script
361  */
363  sesElemVec common[2];
364  sesElemVec change;
365  sesElemVec ses_v = ses.getSequence();
366  long long l_cnt = 1;
367  long long length = distance(ses_v.begin(), ses_v.end());
368  long long middle = 0;
369  bool isMiddle, isAfter;
370  elemInfo einfo;
371  long long a, b, c, d; // @@ -a,b +c,d @@
372  long long inc_dec_count = 0;
373  uniHunk< sesElem > hunk;
374  sesElemVec adds;
375  sesElemVec deletes;
376 
377  isMiddle = isAfter = false;
378  a = b = c = d = 0;
379 
380  for (sesElemVec_iter it=ses_v.begin();it!=ses_v.end();++it, ++l_cnt) {
381  einfo = it->second;
382  switch (einfo.type) {
383  case SES_ADD :
384  middle = 0;
385  ++inc_dec_count;
386  adds.push_back(*it);
387  if (!isMiddle) isMiddle = true;
388  if (isMiddle) ++d;
389  if (l_cnt >= length) {
390  joinSesVec(change, deletes);
391  joinSesVec(change, adds);
392  isAfter = true;
393  }
394  break;
395  case SES_DELETE :
396  middle = 0;
397  --inc_dec_count;
398  deletes.push_back(*it);
399  if (!isMiddle) isMiddle = true;
400  if (isMiddle) ++b;
401  if (l_cnt >= length) {
402  joinSesVec(change, deletes);
403  joinSesVec(change, adds);
404  isAfter = true;
405  }
406  break;
407  case SES_COMMON :
408  ++b;++d;
409  if (common[1].empty() && adds.empty() && deletes.empty() && change.empty()) {
410  if (static_cast<long long>(common[0].size()) < DTL_CONTEXT_SIZE) {
411  if (a == 0 && c == 0) {
412  if (!wasSwapped()) {
413  a = einfo.beforeIdx;
414  c = einfo.afterIdx;
415  } else {
416  a = einfo.afterIdx;
417  c = einfo.beforeIdx;
418  }
419  }
420  common[0].push_back(*it);
421  } else {
422  rotate(common[0].begin(), common[0].begin() + 1, common[0].end());
423  common[0].pop_back();
424  common[0].push_back(*it);
425  ++a;++c;
426  --b;--d;
427  }
428  }
429  if (isMiddle && !isAfter) {
430  ++middle;
431  joinSesVec(change, deletes);
432  joinSesVec(change, adds);
433  change.push_back(*it);
434  if (middle >= DTL_SEPARATE_SIZE || l_cnt >= length) {
435  isAfter = true;
436  }
437  adds.clear();
438  deletes.clear();
439  }
440  break;
441  default :
442  // no through
443  break;
444  }
445  // compose unified format hunk
446  if (isAfter && !change.empty()) {
447  sesElemVec_iter cit = it;
448  long long cnt = 0;
449  for (long long i=0;i<DTL_SEPARATE_SIZE && (cit != ses_v.end());++i, ++cit) {
450  if (cit->second.type == SES_COMMON) {
451  ++cnt;
452  }
453  }
454  if (cnt < DTL_SEPARATE_SIZE && l_cnt < length) {
455  middle = 0;
456  isAfter = false;
457  continue;
458  }
459  if (static_cast<long long>(common[0].size()) >= DTL_SEPARATE_SIZE) {
460  long long c0size = static_cast<long long>(common[0].size());
461  rotate(common[0].begin(),
462  common[0].begin() + (size_t)c0size - DTL_SEPARATE_SIZE,
463  common[0].end());
464  for (long long i=0;i<c0size - DTL_SEPARATE_SIZE;++i) {
465  common[0].pop_back();
466  }
467  a += c0size - DTL_SEPARATE_SIZE;
468  c += c0size - DTL_SEPARATE_SIZE;
469  }
470  if (a == 0) ++a;
471  if (c == 0) ++c;
472  if (wasSwapped()) swap(a, c);
473  hunk.a = a;
474  hunk.b = b;
475  hunk.c = c;
476  hunk.d = d;
477  hunk.common[0] = common[0];
478  hunk.change = change;
479  hunk.common[1] = common[1];
480  hunk.inc_dec_count = inc_dec_count;
481  uniHunks.push_back(hunk);
482  isMiddle = false;
483  isAfter = false;
484  common[0].clear();
485  common[1].clear();
486  adds.clear();
487  deletes.clear();
488  change.clear();
489  a = b = c = d = middle = inc_dec_count = 0;
490  }
491  }
492  }
493 
494  /**
495  * compose ses from stream
496  */
497  template <typename stream>
498  static Ses< elem > composeSesFromStream (stream& st)
499  {
500  elem line;
501  Ses< elem > ret;
502  long long x_idx, y_idx;
503  x_idx = y_idx = 1;
504  while (getline(st, line)) {
505  elem mark(line.begin(), line.begin() + 1);
506  elem e(line.begin() + 1, line.end());
507  if (mark == SES_MARK_DELETE) {
508  ret.addSequence(e, x_idx, 0, SES_DELETE);
509  ++x_idx;
510  } else if (mark == SES_MARK_ADD) {
511  ret.addSequence(e, y_idx, 0, SES_ADD);
512  ++y_idx;
513  } else if (mark == SES_MARK_COMMON) {
514  ret.addSequence(e, x_idx, y_idx, SES_COMMON);
515  ++x_idx;
516  ++y_idx;
517  }
518  }
519  return ret;
520  }
521 
522  private :
523  /**
524  * initialize
525  */
526  void init () {
527 #ifdef NCBI_COMPILER_WORKSHOP
528  distance(A.begin(), A.end(), M);
529  distance(B.begin(), B.end(), N);
530 #else
531  M = distance(A.begin(), A.end());
532  N = distance(B.begin(), B.end());
533 #endif
534  if (M < N) {
535  swapped = false;
536  } else {
537  swap(A, B);
538  swap(M, N);
539  swapped = true;
540  }
541  editDistance = 0;
542  delta = N - M;
543  offset = M + 1;
544  huge = false;
545  trivial = false;
546  editDistanceOnly = false;
547  fp = NULL;
548  }
549 
550  /**
551  * search shortest path and record the path
552  */
553  long long snake(const long long& k, const long long& above, const long long& below) {
554  long long r = above > below ? path[(size_t)k-1+offset] : path[(size_t)k+1+offset];
555  long long y = max(above, below);
556  long long x = y - k;
557  while ((size_t)x < M && (size_t)y < N && (swapped ? cmp.impl(B[(size_t)y], A[(size_t)x]) : cmp.impl(A[(size_t)x], B[(size_t)y]))) {
558  ++x;++y;
559  }
560 
561  path[(size_t)k+offset] = static_cast<long long>(pathCordinates.size());
562  if (!editDistanceOnly) {
563  P p;
564  p.x = x;p.y = y;p.k = r;
565  pathCordinates.push_back(p);
566  }
567  return y;
568  }
569 
570  /**
571  * record SES and LCS
572  */
574  sequence_const_iter x(A.begin());
575  sequence_const_iter y(B.begin());
576  long long x_idx, y_idx; // line number for Unified Format
577  long long px_idx, py_idx; // cordinates
578  bool complete = false;
579  x_idx = y_idx = 1;
580  px_idx = py_idx = 0;
581  for (size_t i=v.size()-1;!complete;--i) {
582  while(px_idx < v[i].x || py_idx < v[i].y) {
583  if (v[i].y - v[i].x > py_idx - px_idx) {
584  if (!wasSwapped()) {
585  ses.addSequence(*y, 0, y_idx, SES_ADD);
586  } else {
587  ses.addSequence(*y, y_idx, 0, SES_DELETE);
588  }
589  ++y;
590  ++y_idx;
591  ++py_idx;
592  } else if (v[i].y - v[i].x < py_idx - px_idx) {
593  if (!wasSwapped()) {
594  ses.addSequence(*x, x_idx, 0, SES_DELETE);
595  } else {
596  ses.addSequence(*x, 0, x_idx, SES_ADD);
597  }
598  ++x;
599  ++x_idx;
600  ++px_idx;
601  } else {
602  if (!wasSwapped()) {
603  lcs.addSequence(*x);
604  ses.addSequence(*x, x_idx, y_idx, SES_COMMON);
605  } else {
606  lcs.addSequence(*y);
607  ses.addSequence(*y, y_idx, x_idx, SES_COMMON);
608  }
609  ++x;
610  ++y;
611  ++x_idx;
612  ++y_idx;
613  ++px_idx;
614  ++py_idx;
615  }
616  }
617  if (i == 0) complete = true;
618  }
619 
620  if (x_idx > static_cast<long long>(M) && y_idx > static_cast<long long>(N)) {
621  // all recording succeeded
622  } else {
623  // trivial difference
624  if (trivialEnabled()) {
625  if (!wasSwapped()) {
626  recordOddSequence(x_idx, M, x, SES_DELETE);
627  recordOddSequence(y_idx, N, y, SES_ADD);
628  } else {
629  recordOddSequence(x_idx, M, x, SES_ADD);
630  recordOddSequence(y_idx, N, y, SES_DELETE);
631  }
632  return true;
633  }
634 
635  // nontrivial difference
636  sequence A_(A.begin() + (size_t)x_idx - 1, A.end());
637  sequence B_(B.begin() + (size_t)y_idx - 1, B.end());
638  A = A_;
639  B = B_;
640 #ifdef NCBI_COMPILER_WORKSHOP
641  distance(A.begin(), A.end(), M);
642  distance(B.begin(), B.end(), N);
643 #else
644  M = distance(A.begin(), A.end());
645  N = distance(B.begin(), B.end());
646 #endif
647  delta = N - M;
648  offset = M + 1;
649  delete[] fp;
650  fp = new long long[M + N + 3];
651  fill(&fp[0], &fp[M + N + 3], -1);
652  fill(path.begin(), path.end(), -1);
653  return false;
654  }
655  return true;
656  }
657 
658  /**
659  * record odd sequence in SES
660  */
661  void inline recordOddSequence (long long idx, long long length, sequence_const_iter it, const edit_t et) {
662  while(idx < length){
663  ses.addSequence(*it, idx, 0, et);
664  ++it;
665  ++idx;
666  ++editDistance;
667  }
668  ses.addSequence(*it, idx, 0, et);
669  ++editDistance;
670  }
671 
672  /**
673  * join SES vectors
674  */
675  void inline joinSesVec (sesElemVec& s1, sesElemVec& s2) const {
676  if (!s2.empty()) {
677  for (sesElemVec_iter vit=s2.begin();vit!=s2.end();++vit) {
678  s1.push_back(*vit);
679  }
680  }
681  }
682 
683  /**
684  * check if the sequences have been swapped
685  */
686  bool inline wasSwapped () const {
687  return swapped;
688  }
689 
690  };
691 }
692 
693 #endif // DTL_DIFF_H
#define false
Definition: bool.h:36
ses element printer class template
Definition: functors.hpp:78
diff class template sequence must support random_access_iterator.
Definition: Diff.hpp:49
void enableTrivial() const
Definition: Diff.hpp:165
static Ses< elem > composeSesFromStream(stream &st)
compose ses from stream
Definition: Diff.hpp:498
void printUnifiedFormat(stream &out) const
print difference between A and B in the Unified Format
Definition: Diff.hpp:339
bool hugeEnabled() const
Definition: Diff.hpp:149
dtl_typedefs(elem, sequence) sequence A
void disableHuge()
Definition: Diff.hpp:157
comparator cmp
Definition: Diff.hpp:69
editPath path
Definition: Diff.hpp:62
bool isHuge() const
Definition: Diff.hpp:120
void onUnserious()
Definition: Diff.hpp:136
size_t N
Definition: Diff.hpp:55
Diff(const sequence &a, const sequence &b, const comparator &comp)
Definition: Diff.hpp:84
bool editDistanceOnly
Definition: Diff.hpp:67
void printSES(ostream &out=cout) const
Definition: Diff.hpp:318
size_t delta
Definition: Diff.hpp:56
Diff(const sequence &a, const sequence &b, bool deletesFirst)
Definition: Diff.hpp:78
static void printUnifiedFormat(const uniHunkVec &hunks, stream &out)
print unified format difference with given unified format hunks
Definition: Diff.hpp:351
void editDistanceOnlyEnabled()
Definition: Diff.hpp:173
Diff()
Definition: Diff.hpp:71
size_t offset
Definition: Diff.hpp:57
void recordOddSequence(long long idx, long long length, sequence_const_iter it, const edit_t et)
record odd sequence in SES
Definition: Diff.hpp:661
void composeUnifiedHunks()
compose Unified Format Hunks from Shortest Edit Script
Definition: Diff.hpp:362
long long * fp
Definition: Diff.hpp:58
void disableTrivial()
Definition: Diff.hpp:169
bool huge
Definition: Diff.hpp:65
sequence patch(const sequence &seq) const
patching with Shortest Edit Script (SES)
Definition: Diff.hpp:229
void compose()
compose Longest Common Subsequence and Shortest Edit Script.
Definition: Diff.hpp:258
long long getEditDistance() const
Definition: Diff.hpp:99
void init()
initialize
Definition: Diff.hpp:526
void offUnserious()
Definition: Diff.hpp:140
size_t M
Definition: Diff.hpp:54
bool wasSwapped() const
check if the sequences have been swapped
Definition: Diff.hpp:686
Ses< elem > ses
Definition: Diff.hpp:61
static void printSES(const Ses< elem > &s, stream &out)
print differences given an SES
Definition: Diff.hpp:326
sequence B
Definition: Diff.hpp:53
elemVec getLcsVec() const
Definition: Diff.hpp:107
Diff(const sequence &a, const sequence &b, bool deleteFirst, const comparator &comp)
Definition: Diff.hpp:90
Lcs< elem > lcs
Definition: Diff.hpp:60
sequence uniPatch(const sequence &seq)
patching with Unified Format Hunks
Definition: Diff.hpp:180
editPathCordinates pathCordinates
Definition: Diff.hpp:63
bool trivial
Definition: Diff.hpp:66
void printSES(stream &out) const
print difference between A and B as an SES
Definition: Diff.hpp:313
Lcs< elem > getLcs() const
Definition: Diff.hpp:103
Diff(const sequence &a, const sequence &b)
Definition: Diff.hpp:73
void offHuge()
Definition: Diff.hpp:128
void printUnifiedFormat(ostream &out=cout) const
Definition: Diff.hpp:343
~Diff()
Definition: Diff.hpp:97
bool recordSequence(const editPathCordinates &v)
record SES and LCS
Definition: Diff.hpp:573
uniHunkVec getUniHunks() const
Definition: Diff.hpp:115
bool trivialEnabled() const
Definition: Diff.hpp:161
Ses< elem > getSes() const
Definition: Diff.hpp:111
bool isUnserious() const
Definition: Diff.hpp:132
bool swapped
Definition: Diff.hpp:64
long long snake(const long long &k, const long long &above, const long long &below)
search shortest path and record the path
Definition: Diff.hpp:553
static void printUnifiedFormat(const uniHunkVec &hunks, ostream &out=cout)
Definition: Diff.hpp:355
long long editDistance
Definition: Diff.hpp:59
void joinSesVec(sesElemVec &s1, sesElemVec &s2) const
join SES vectors
Definition: Diff.hpp:675
uniHunkVec uniHunks
Definition: Diff.hpp:68
static void printSES(const Ses< elem > &s, ostream &out=cout)
Definition: Diff.hpp:331
void enableHuge()
Definition: Diff.hpp:153
void onHuge()
Definition: Diff.hpp:124
void onOnlyEditDistance()
Definition: Diff.hpp:144
Longest Common Subsequence template class.
Definition: Lcs.hpp:48
Shortest Edit Script template class.
Definition: Ses.hpp:48
sesElemVec getSequence() const
Definition: Ses.hpp:119
void addSequence(elem e, long long beforeIdx, long long afterIdx, const edit_t type)
Definition: Ses.hpp:83
unified format element printer class template
Definition: functors.hpp:103
#define A(i)
Definition: ecp_curves.c:948
std::ofstream out("events_result.xml")
main entry point for tests
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
Definition: ncbimisc.hpp:1508
#define NULL
Definition: ncbistd.hpp:225
int i
constexpr auto rotate(list< Ts... >) -> decltype((list<>{}+...+rotate_item< Ts >{}))
constexpr bool empty(list< Ts... >) noexcept
dtl – Diff Template Library
Definition: Diff.hpp:41
int edit_t
type of edit for SES
Definition: variables.hpp:71
const edit_t SES_ADD
Definition: variables.hpp:74
const long long DTL_CONTEXT_SIZE
Definition: variables.hpp:96
const unsigned long long MAX_CORDINATES_SIZE
limit of cordinate size
Definition: variables.hpp:110
const long long DTL_SEPARATE_SIZE
Definition: variables.hpp:95
const edit_t SES_COMMON
Definition: variables.hpp:73
const edit_t SES_DELETE
Definition: variables.hpp:72
vector< long long > editPath
Definition: variables.hpp:112
vector< P > editPathCordinates
Definition: variables.hpp:113
const struct ncbi::grid::netcache::search::fields::SIZE size
unsigned int a
Definition: ncbi_localip.c:102
T max(T x_, T y_)
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
static unsigned cnt[256]
cordinate for registering route
Definition: variables.hpp:101
long long x
Definition: variables.hpp:102
long long k
Definition: variables.hpp:104
long long y
Definition: variables.hpp:103
info for Unified Format
Definition: variables.hpp:86
long long afterIdx
Definition: variables.hpp:88
long long beforeIdx
Definition: variables.hpp:87
Structure of Unified Format Hunk.
Definition: variables.hpp:119
vector< sesElem > change
Definition: variables.hpp:122
long long b
Definition: variables.hpp:120
long long d
Definition: variables.hpp:120
long long inc_dec_count
Definition: variables.hpp:123
long long c
Definition: variables.hpp:120
long long a
Definition: variables.hpp:120
vector< sesElem > common[2]
Definition: variables.hpp:121
#define SES_MARK_DELETE
mark of SES
Definition: variables.hpp:79
#define SES_MARK_ADD
Definition: variables.hpp:81
#define SES_MARK_COMMON
Definition: variables.hpp:80
Modified on Tue Dec 05 02:18:04 2023 by modify_doxy.py rev. 669887