NCBI C++ ToolKit
mapper_unit_test.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: mapper_unit_test.cpp 96347 2022-03-17 16:29:41Z grichenk $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Aleksey Grichenko
27  *
28  * File Description:
29  * Unit test for CSeq_loc_Mapper_Base, CSeq_align_Mapper_Base and
30  * some closely related code.
31  *
32  * ===========================================================================
33  */
34 #define NCBI_TEST_APPLICATION
35 #include <ncbi_pch.hpp>
36 
47 
48 #include <corelib/ncbiapp.hpp>
49 #include <corelib/test_boost.hpp>
50 
51 #include <common/test_assert.h> /* This header must go last */
52 
55 
56 
57 #define CHECK_GI(id, gi) \
58  BOOST_CHECK_EQUAL((id).Which(), CSeq_id::e_Gi); \
59  BOOST_CHECK_EQUAL((id).GetGi(), gi);
60 
61 #define CHECK_SEQ_INT(loc, gi, from, to, \
62  have_strand, strand, \
63  fuzz_from, fuzz_to) \
64  CHECK_GI((loc).GetId(), gi); \
65  BOOST_CHECK_EQUAL((loc).GetFrom(), from); \
66  BOOST_CHECK_EQUAL((loc).GetTo(), to); \
67  BOOST_CHECK_EQUAL((loc).IsSetStrand(), have_strand); \
68  if (have_strand) { \
69  BOOST_CHECK_EQUAL((loc).GetStrand(), strand); \
70  } \
71  if (fuzz_from != CInt_fuzz::eLim_unk) { \
72  BOOST_CHECK((loc).IsSetFuzz_from()); \
73  BOOST_CHECK_EQUAL((loc).GetFuzz_from().Which(), CInt_fuzz::e_Lim); \
74  BOOST_CHECK_EQUAL((loc).GetFuzz_from().GetLim(), fuzz_from); \
75  } \
76  else { \
77  BOOST_CHECK(!(loc).IsSetFuzz_from()); \
78  } \
79  if (fuzz_to != CInt_fuzz::eLim_unk) { \
80  BOOST_CHECK((loc).IsSetFuzz_to()); \
81  BOOST_CHECK_EQUAL((loc).GetFuzz_to().Which(), CInt_fuzz::e_Lim); \
82  BOOST_CHECK_EQUAL((loc).GetFuzz_to().GetLim(), fuzz_to); \
83  } \
84  else { \
85  BOOST_CHECK(!(loc).IsSetFuzz_to()); \
86  }
87 
88 // Workaround for internal compiler error on MSVC7 with using original
89 // kInvalidSeqPos with BOOST_CHECK_EQUAL.
90 #if NCBI_COMPILER_MSVC && (_MSC_VER < 1400) // 1400 == VC++ 8.0
91 # undef kInvalidSeqPos
92 # define kInvalidSeqPos -1
93 #endif
94 
96 {
97 }
98 
99 
101 {
102 }
103 
104 
105 // Read two seq-locs, initialize seq-loc mapper.
107 {
108  CSeq_loc src, dst;
109  in >> MSerial_AsnText >> src;
110  in >> MSerial_AsnText >> dst;
111  return new CSeq_loc_Mapper_Base(src, dst);
112 }
113 
114 
115 // Map the original seq-loc, read the reference location, compare to the
116 // mapped one.
118  const CSeq_loc& orig,
119  CNcbiIstream& in)
120 {
121  CSeq_loc ref_mapped;
122  in >> MSerial_AsnText >> ref_mapped;
123  CRef<CSeq_loc> mapped = mapper.Map(orig);
124  BOOST_CHECK(mapped);
125  bool eq = mapped->Equals(ref_mapped);
126  BOOST_CHECK(mapped->Equals(ref_mapped));
127  if ( !eq ) {
128  cout << "Expected mapped location:" << endl;
129  cout << MSerial_AsnText << ref_mapped;
130  cout << "Actual mapped location:" << endl;
131  cout << MSerial_AsnText << *mapped;
132  }
133 }
134 
135 
137  CNcbiIstream& in)
138 {
139  CSeq_loc orig;
140  in >> MSerial_AsnText >> orig;
141  TestMappingSeq_loc(mapper, orig, in);
142 }
143 
144 
146  CNcbiIstream& in)
147 {
148  CSeq_loc orig;
149  in >> MSerial_AsnText >> orig;
150  BOOST_CHECK_THROW(mapper.Map(orig), CAnnotMapperException);
151 }
152 
153 
154 // Map the original seq-align, read the reference alignment, compare to the
155 // mapped one.
157  const CSeq_align& orig,
158  CNcbiIstream& in)
159 {
160  CSeq_align ref_mapped;
161  in >> MSerial_AsnText >> ref_mapped;
162  CRef<CSeq_align> mapped = mapper.Map(orig);
163  BOOST_CHECK(mapped);
164  bool eq = mapped->Equals(ref_mapped);
165  BOOST_CHECK(mapped->Equals(ref_mapped));
166  if ( !eq ) {
167  cout << "Expected mapped alignment:" << endl;
168  cout << MSerial_AsnText << ref_mapped;
169  cout << "Actual mapped alignment:" << endl;
170  cout << MSerial_AsnText << *mapped;
171  }
172 }
173 
174 
176  CNcbiIstream& in)
177 {
179  in >> MSerial_AsnText >> orig;
180  TestMappingSeq_align(mapper, orig, in);
181 }
182 
183 
185  CNcbiIstream& in)
186 {
188  in >> MSerial_AsnText >> orig;
189  BOOST_CHECK_THROW(mapper.Map(orig), CAnnotMapperException);
190 }
191 
192 
193 // Map the original seq-graph, read the reference graph, compare to the
194 // mapped one.
196  const CSeq_graph& orig,
197  CNcbiIstream& in)
198 {
199  CSeq_graph ref_mapped;
200  in >> MSerial_AsnText >> ref_mapped;
201  CRef<CSeq_graph> mapped = mapper.Map(orig);
202  BOOST_CHECK(mapped);
203  bool eq = mapped->Equals(ref_mapped);
204  BOOST_CHECK(mapped->Equals(ref_mapped));
205  if ( !eq ) {
206  cout << "Expected mapped graph:" << endl;
207  cout << MSerial_AsnText << ref_mapped;
208  cout << "Actual mapped graph:" << endl;
209  cout << MSerial_AsnText << *mapped;
210  }
211 }
212 
213 
215  CNcbiIstream& in)
216 {
218  in >> MSerial_AsnText >> orig;
219  TestMappingSeq_graph(mapper, orig, in);
220 }
221 
222 
224  CNcbiIstream& in)
225 {
227  in >> MSerial_AsnText >> orig;
228  BOOST_CHECK_THROW(mapper.Map(orig), CAnnotMapperException);
229 }
230 
231 
233 {
234  CNcbiIfstream in("mapper_test_data/simple.asn");
235  cout << "Basic mapping and truncaction test" << endl;
236 
237  CSeq_loc src, dst_plus, dst_minus;
238  in >> MSerial_AsnText >> src;
239  in >> MSerial_AsnText >> dst_plus;
240  in >> MSerial_AsnText >> dst_minus;
241  CSeq_loc_Mapper_Base mapper_plus(src, dst_plus);
242  CSeq_loc_Mapper_Base mapper_minus(src, dst_minus);
243 
244  CSeq_loc orig;
245 
246  in >> MSerial_AsnText >> orig;
247  cout << " Simple interval" << endl;
248  TestMappingSeq_loc(mapper_plus, orig, in);
249  cout << " Simple interval, reversed strand" << endl;
250  TestMappingSeq_loc(mapper_minus, orig, in);
251 
252  in >> MSerial_AsnText >> orig;
253  cout << " Truncated on the right" << endl;
254  TestMappingSeq_loc(mapper_plus, orig, in);
255  cout << " Truncated on the right, reversed strand" << endl;
256  TestMappingSeq_loc(mapper_minus, orig, in);
257 
258  in >> MSerial_AsnText >> orig;
259  cout << " Truncated on the left" << endl;
260  TestMappingSeq_loc(mapper_plus, orig, in);
261  cout << " Truncated on the left, reversed strand" << endl;
262  TestMappingSeq_loc(mapper_minus, orig, in);
263 
264  in >> MSerial_AsnText >> orig;
265  cout << " Truncated on both ends" << endl;
266  TestMappingSeq_loc(mapper_plus, orig, in);
267  cout << " Truncated on both ends, reversed strand" << endl;
268  TestMappingSeq_loc(mapper_minus, orig, in);
269 
270  in >> MSerial_AsnText >> orig;
271  cout << " Minus strand interval" << endl;
272  TestMappingSeq_loc(mapper_plus, orig, in);
273  cout << " Minus strand interval, reversed strand" << endl;
274  TestMappingSeq_loc(mapper_minus, orig, in);
275 
276  in >> MSerial_AsnText >> orig;
277  cout << " Minus strand interval, truncated on the right" << endl;
278  TestMappingSeq_loc(mapper_plus, orig, in);
279  cout << " Minus strand interval, truncated on the right, reversed strand" << endl;
280  TestMappingSeq_loc(mapper_minus, orig, in);
281 
282  in >> MSerial_AsnText >> orig;
283  cout << " Minus strand interval, truncated on the left" << endl;
284  TestMappingSeq_loc(mapper_plus, orig, in);
285  cout << " Minus strand interval, truncated on the left, reversed strand" << endl;
286  TestMappingSeq_loc(mapper_minus, orig, in);
287 
288  in >> MSerial_AsnText >> orig;
289  cout << " Minus strand interval, truncated on both ends" << endl;
290  TestMappingSeq_loc(mapper_plus, orig, in);
291  cout << " Minus strand interval, truncated on both ends, reversed strand" << endl;
292  TestMappingSeq_loc(mapper_minus, orig, in);
293 
294  cout << " Null seq-loc" << endl;
295  TestMappingSeq_loc(mapper_plus, in);
296 
297  cout << " Empty seq-loc" << endl;
298  TestMappingSeq_loc(mapper_plus, in);
299 
300  in >> MSerial_AsnText >> orig;
301  cout << " Whole seq-loc" << endl;
302  TestMappingSeq_loc(mapper_plus, orig, in);
303  cout << " Whole seq-loc, reversed strand" << endl;
304  TestMappingSeq_loc(mapper_minus, orig, in);
305 
306  in >> MSerial_AsnText >> orig;
307  cout << " Point" << endl;
308  TestMappingSeq_loc(mapper_plus, orig, in);
309  cout << " Point, reversed strand" << endl;
310  TestMappingSeq_loc(mapper_minus, orig, in);
311 
312  in >> MSerial_AsnText >> orig;
313  cout << " Packed-points" << endl;
314  TestMappingSeq_loc(mapper_plus, orig, in);
315  cout << " Packed-points, reversed strand" << endl;
316  TestMappingSeq_loc(mapper_minus, orig, in);
317 
318  cout << " Bond" << endl;
319  TestMappingSeq_loc(mapper_plus, in);
320  cout << " Bond, reversed strand" << endl;
321  TestMappingSeq_loc(mapper_minus, in);
322 }
323 
324 
326 {
327  CNcbiIfstream in("mapper_test_data/order.asn");
328  cout << "Order of mapped intervals, direct" << endl;
329  unique_ptr<CSeq_loc_Mapper_Base> mapper(CreateMapperFromSeq_locs(in));
330  cout << " Mapping plus to plus strand" << endl;
331  TestMappingSeq_loc(*mapper, in);
332  cout << " Mapping minus to minus strand" << endl;
333  TestMappingSeq_loc(*mapper, in);
334 
335  cout << "Order of mapped intervals, plus to minus" << endl;
336  mapper.reset(CreateMapperFromSeq_locs(in));
337  cout << " Mapping plus to minus strand (src on plus)" << endl;
338  TestMappingSeq_loc(*mapper, in);
339  cout << " Mapping minus to plus strand (src on plus)" << endl;
340  TestMappingSeq_loc(*mapper, in);
341 
342  cout << "Order of mapped intervals, minus to plus" << endl;
343  mapper.reset(CreateMapperFromSeq_locs(in));
344  cout << " Mapping plus to minus strand (src on minus)" << endl;
345  TestMappingSeq_loc(*mapper, in);
346  cout << " Mapping minus to plus strand (src on minus)" << endl;
347  TestMappingSeq_loc(*mapper, in);
348 
349  cout << "Mapping through a mix, direct" << endl;
350  mapper.reset(CreateMapperFromSeq_locs(in));
351  cout << " Mapping through a mix, plus to plus strand" << endl;
352  TestMappingSeq_loc(*mapper, in);
353  cout << " Mapping through a mix, plus to plus strand, with merge" << endl;
354  mapper->SetMergeAbutting();
355  TestMappingSeq_loc(*mapper, in);
356  mapper->SetMergeNone();
357  cout << " Mapping through a mix, minus to minus strand" << endl;
358  TestMappingSeq_loc(*mapper, in);
359  cout << " Mapping through a mix, minus to minus strand, with merge" << endl;
360  mapper->SetMergeAbutting();
361  TestMappingSeq_loc(*mapper, in);
362  mapper->SetMergeNone();
363 
364  cout << "Mapping through a mix, plus to minus" << endl;
365  mapper.reset(CreateMapperFromSeq_locs(in));
366  cout << " Mapping through a mix, plus to minus strand (src on plus)" << endl;
367  TestMappingSeq_loc(*mapper, in);
368  cout << " Mapping through a mix, plus to minus (src on plus), with merge" << endl;
369  mapper->SetMergeAbutting();
370  TestMappingSeq_loc(*mapper, in);
371  mapper->SetMergeNone();
372  cout << " Mapping through a mix, minus to plus strand (src on plus)" << endl;
373  TestMappingSeq_loc(*mapper, in);
374  cout << " Mapping through a mix, minus to plus (src on plus), with merge" << endl;
375  mapper->SetMergeAbutting();
376  TestMappingSeq_loc(*mapper, in);
377  mapper->SetMergeNone();
378 
379  cout << "Mapping through a mix, minus to plus" << endl;
380  mapper.reset(CreateMapperFromSeq_locs(in));
381  cout << " Mapping through a mix, plus to minus strand (src on minus)" << endl;
382  TestMappingSeq_loc(*mapper, in);
383  cout << " Mapping through a mix, plus to minus strand (src on minus), with merge" << endl;
384  mapper->SetMergeAbutting();
385  TestMappingSeq_loc(*mapper, in);
386  mapper->SetMergeNone();
387  cout << " Mapping through a mix, minus to plus strand (src on minus)" << endl;
388  TestMappingSeq_loc(*mapper, in);
389  cout << " Mapping through a mix, minus to plus strand (src on minus), with merge" << endl;
390  mapper->SetMergeAbutting();
391  TestMappingSeq_loc(*mapper, in);
392  mapper->SetMergeNone();
393 }
394 
395 
397 {
398  CNcbiIfstream in("mapper_test_data/merging.asn");
399  cout << "Merging of mapped intervals" << endl;
400  CSeq_loc src, dst_plus, dst_minus;
401  in >> MSerial_AsnText >> src;
402  in >> MSerial_AsnText >> dst_plus;
403  in >> MSerial_AsnText >> dst_minus;
404  CSeq_loc_Mapper_Base mapper_plus(src, dst_plus);
405  CSeq_loc_Mapper_Base mapper_minus(src, dst_minus);
406 
407  CSeq_loc orig;
408  in >> MSerial_AsnText >> orig;
409 
410  cout << " No merging" << endl;
411  mapper_plus.SetMergeNone();
412  TestMappingSeq_loc(mapper_plus, orig, in);
413  cout << " No merging, reverse strand mapping" << endl;
414  mapper_minus.SetMergeNone();
415  TestMappingSeq_loc(mapper_minus, orig, in);
416  cout << " Merge abutting" << endl;
417  mapper_plus.SetMergeAbutting();
418  TestMappingSeq_loc(mapper_plus, orig, in);
419  cout << " Merge abutting, reverse strand mapping" << endl;
420  mapper_minus.SetMergeAbutting();
421  TestMappingSeq_loc(mapper_minus, orig, in);
422  cout << " Merge contained" << endl;
423  mapper_plus.SetMergeContained();
424  TestMappingSeq_loc(mapper_plus, orig, in);
425  cout << " Merge contained, reverse strand mapping" << endl;
426  mapper_minus.SetMergeContained();
427  TestMappingSeq_loc(mapper_minus, orig, in);
428  cout << " Merge all" << endl;
429  mapper_plus.SetMergeAll();
430  TestMappingSeq_loc(mapper_plus, orig, in);
431  cout << " Merge all, reverse strand mapping" << endl;
432  mapper_minus.SetMergeAll();
433  TestMappingSeq_loc(mapper_minus, orig, in);
434 
435  in >> MSerial_AsnText >> orig;
436 
437  cout << " Minus strand original, no merging" << endl;
438  mapper_plus.SetMergeNone();
439  TestMappingSeq_loc(mapper_plus, orig, in);
440  cout << " Minus strand original, no merging, reverse strand mapping" << endl;
441  mapper_minus.SetMergeNone();
442  TestMappingSeq_loc(mapper_minus, orig, in);
443  cout << " Minus strand original, merge abutting" << endl;
444  mapper_plus.SetMergeAbutting();
445  TestMappingSeq_loc(mapper_plus, orig, in);
446  cout << " Minus strand original, merge abutting, reverse strand mapping" << endl;
447  mapper_minus.SetMergeAbutting();
448  TestMappingSeq_loc(mapper_minus, orig, in);
449  cout << " Minus strand original, merge contained" << endl;
450  mapper_plus.SetMergeContained();
451  TestMappingSeq_loc(mapper_plus, orig, in);
452  cout << " Minus strand original, merge contained, reverse strand mapping" << endl;
453  mapper_minus.SetMergeContained();
454  TestMappingSeq_loc(mapper_minus, orig, in);
455  cout << " Minus strand original, merge all" << endl;
456  mapper_plus.SetMergeAll();
457  TestMappingSeq_loc(mapper_plus, orig, in);
458  cout << " Minus strand original, merge all, reverse strand mapping" << endl;
459  mapper_minus.SetMergeAll();
460  TestMappingSeq_loc(mapper_minus, orig, in);
461 }
462 
463 
465 {
466  CNcbiIfstream in("mapper_test_data/prot2nuc.asn");
467  // Incomplete, needs to be updated
468  cout << "Mapping from protein to nucleotide" << endl;
469  CSeq_loc src, dst_plus, dst_minus;
470  in >> MSerial_AsnText >> src;
471  in >> MSerial_AsnText >> dst_plus;
472  in >> MSerial_AsnText >> dst_minus;
473  CSeq_loc_Mapper_Base mapper_plus(src, dst_plus);
474  CSeq_loc_Mapper_Base mapper_minus(src, dst_minus);
475 
476  cout << " Simple interval" << endl;
477  TestMappingSeq_loc(mapper_plus, in);
478  cout << " Partial on the right" << endl;
479  TestMappingSeq_loc(mapper_plus, in);
480  cout << " Original location on minus strand" << endl;
481  TestMappingSeq_loc(mapper_plus, in);
482  cout << " Original location on minus strand, partial" << endl;
483  TestMappingSeq_loc(mapper_plus, in);
484 
485  cout << " Simple interval, reversed strand" << endl;
486  TestMappingSeq_loc(mapper_minus, in);
487  cout << " Partial on the right, reversed strand" << endl;
488  TestMappingSeq_loc(mapper_minus, in);
489  cout << " Original location on minus strand, reversed strand" << endl;
490  TestMappingSeq_loc(mapper_minus, in);
491  cout << " Original location on minus strand, partial, reversed strand" << endl;
492  TestMappingSeq_loc(mapper_minus, in);
493 }
494 
495 
497 {
498  CNcbiIfstream in("mapper_test_data/nuc2prot.asn");
499  // Incomplete, needs to be updated
500  cout << "Mapping from nucleotide to protein" << endl;
501  CSeq_loc src_plus, src_minus, dst;
502  in >> MSerial_AsnText >> src_plus;
503  in >> MSerial_AsnText >> src_minus;
504  in >> MSerial_AsnText >> dst;
505  CSeq_loc_Mapper_Base mapper_plus(src_plus, dst);
506  CSeq_loc_Mapper_Base mapper_minus(src_minus, dst);
507 
508  cout << " Simple interval" << endl;
509  TestMappingSeq_loc(mapper_plus, in);
510  cout << " Partial on the right" << endl;
511  TestMappingSeq_loc(mapper_plus, in);
512  cout << " Original location on minus strand" << endl;
513  TestMappingSeq_loc(mapper_plus, in);
514  cout << " Original location on minus strand, partial" << endl;
515  TestMappingSeq_loc(mapper_plus, in);
516  cout << " Shifted nucleotide positions (incomplete codons)" << endl;
517  TestMappingSeq_loc(mapper_plus, in);
518 
519  cout << " Simple interval, reversed strand" << endl;
520  TestMappingSeq_loc(mapper_minus, in);
521  cout << " Partial on the right, reversed strand" << endl;
522  TestMappingSeq_loc(mapper_minus, in);
523  cout << " Original location on minus strand, reversed strand" << endl;
524  TestMappingSeq_loc(mapper_minus, in);
525  cout << " Original location on minus strand, partial, reversed strand" << endl;
526  TestMappingSeq_loc(mapper_minus, in);
527  cout << " Shifted nucleotide positions (incomplete codons), reversed strand" << endl;
528  TestMappingSeq_loc(mapper_minus, in);
529 }
530 
531 
533 {
534  CNcbiIfstream in("mapper_test_data/through_mix.asn");
535  cout << "Mapping through mix" << endl;
536  unique_ptr<CSeq_loc_Mapper_Base> mapper(CreateMapperFromSeq_locs(in));
537  mapper->SetMergeAbutting();
538  cout << " Single interval overlapping all source ranges" << endl;
539  TestMappingSeq_loc(*mapper, in);
540  cout << " Single interval on minus strand, partial overlapping" << endl;
541  TestMappingSeq_loc(*mapper, in);
542 
543  cout << "Mapping through mix, reversed strand" << endl;
544  mapper.reset(CreateMapperFromSeq_locs(in));
545  cout << " Original sec-loc is the same as mapping source" << endl;
546  TestMappingSeq_loc(*mapper, in);
547  cout << " Mapping a packed-int" << endl;
548  TestMappingSeq_loc(*mapper, in);
549  cout << " Mapping a multi-level seq-loc" << endl;
550  TestMappingSeq_loc(*mapper, in);
551 }
552 
553 
555 {
556  CNcbiIfstream in("mapper_test_data/dendiag.asn");
557  cout << "Mapping dense-diag alignment" << endl;
558  unique_ptr<CSeq_loc_Mapper_Base> mapper(CreateMapperFromSeq_locs(in));
559  cout << " Single segment" << endl;
560  TestMappingSeq_align(*mapper, in);
561  cout << " Unsupported mapped alignment - gaps in dense-diag" << endl;
563 
564  cout << "Mapping dense-diag alignment, reverse" << endl;
565  mapper.reset(CreateMapperFromSeq_locs(in));
566  cout << " Single segment, reversed strand" << endl;
567  TestMappingSeq_align(*mapper, in);
568 }
569 
570 
572 {
573  CNcbiIfstream in("mapper_test_data/denseg.asn");
574  cout << "Mapping dense-seg alignments" << endl;
575  unique_ptr<CSeq_loc_Mapper_Base> mapper(CreateMapperFromSeq_locs(in));
576  cout << " Nuc to prot, converted to std-seg (mixed types)" << endl;
577  TestMappingSeq_align(*mapper, in);
578 
579  mapper->MixedAlignsAsSpliced(true);
580  cout << " Nuc to prot, converted to spliced-seg (mixed types)" << endl;
581  TestMappingSeq_align(*mapper, in);
582 
583  cout << " Unsupported alignment - dense-seg with mixed types" << endl;
585 
586  mapper.reset(CreateMapperFromSeq_locs(in));
587  cout << " Setting correct strands in gaps" << endl;
588  TestMappingSeq_align(*mapper, in);
589 }
590 
591 
593 {
594  CNcbiIfstream in("mapper_test_data/spliced.asn");
595  cout << "Mapping spliced-seg alignments" << endl;
596  unique_ptr<CSeq_loc_Mapper_Base> mapper(CreateMapperFromSeq_locs(in));
597  cout << " Mapping spliced-seg product, nuc to nuc" << endl;
598  TestMappingSeq_align(*mapper, in);
599 
600  mapper.reset(CreateMapperFromSeq_locs(in));
601  cout << " Mapping spliced-seg product, nuc to prot" << endl;
602  TestMappingSeq_align(*mapper, in);
603 
604  mapper.reset(CreateMapperFromSeq_locs(in));
605  cout << " Mapping spliced-seg product, nuc to prot, reversed strand" << endl;
606  TestMappingSeq_align(*mapper, in);
607 
608  mapper.reset(CreateMapperFromSeq_locs(in));
609  cout << " Mapping spliced-seg through multiple ranges" << endl;
610  TestMappingSeq_align(*mapper, in);
611 
612  CSeq_align mapping;
613  in >> MSerial_AsnText >> mapping;
614  mapper.reset(new CSeq_loc_Mapper_Base(mapping, 1));
615  cout << " Trimming indels" << endl;
616  TestMappingSeq_align(*mapper, in);
617  cout << " Trimming indels - 2" << endl;
618  TestMappingSeq_align(*mapper, in);
619  mapper.reset(CreateMapperFromSeq_locs(in));
620  cout << " Trimming indels - 3" << endl;
621  TestMappingSeq_align(*mapper, in);
622  mapper.reset(CreateMapperFromSeq_locs(in));
623  cout << " Trimming indels, minus strand" << endl;
624  TestMappingSeq_align(*mapper, in);
625 
626  mapper.reset(CreateMapperFromSeq_locs(in));
627  cout << " Trimming gen-ins - 1" << endl;
628  TestMappingSeq_align(*mapper, in);
629  mapper.reset(CreateMapperFromSeq_locs(in));
630  cout << " Trimming gen-ins - 2" << endl;
631  TestMappingSeq_align(*mapper, in);
632  mapper.reset(CreateMapperFromSeq_locs(in));
633  cout << " Trimming gen-ins - 3" << endl;
634  TestMappingSeq_align(*mapper, in);
635  mapper.reset(CreateMapperFromSeq_locs(in));
636  cout << " Trimming gen-ins - 4" << endl;
637  TestMappingSeq_align(*mapper, in);
638  mapper.reset(CreateMapperFromSeq_locs(in));
639  cout << " Trimming gen-ins - 5" << endl;
640  TestMappingSeq_align(*mapper, in);
641  mapper.reset(CreateMapperFromSeq_locs(in));
642  cout << " Trimming gen-ins - 6" << endl;
643  TestMappingSeq_align(*mapper, in);
644  mapper.reset(CreateMapperFromSeq_locs(in));
645  cout << " Trimming gen-ins - 7" << endl;
646  TestMappingSeq_align(*mapper, in);
647  mapper.reset(CreateMapperFromSeq_locs(in));
648  cout << " Trimming gen-ins - 8" << endl;
649  TestMappingSeq_align(*mapper, in);
650 }
651 
652 
654 {
655  CNcbiIfstream in("mapper_test_data/scores.asn");
656  cout << "Mapping scores" << endl;
657  unique_ptr<CSeq_loc_Mapper_Base> mapper(CreateMapperFromSeq_locs(in));
658  cout << " Dense-diag - scores are preserved" << endl;
659  TestMappingSeq_align(*mapper, in);
660  // NOTE: Can not test score dropping in dense-diag since the mapped dense-diag
661  // would contain gaps which are not supported by this alignment type.
662  cout << " Dense-seg, scores are preserved" << endl;
663  TestMappingSeq_align(*mapper, in);
664  cout << " Dense-seg - partial mapping, scores are dropped" << endl;
665  TestMappingSeq_align(*mapper, in);
666  cout << " Dense-seg - unmapped segment, scores are dropped" << endl;
667  TestMappingSeq_align(*mapper, in);
668  cout << " Std-seg, scores are preserved" << endl;
669  TestMappingSeq_align(*mapper, in);
670  cout << " Std-seg - partial mapping, scores are dropped" << endl;
671  TestMappingSeq_align(*mapper, in);
672  cout << " Std-seg - unmapped segment, scores are dropped" << endl;
673  TestMappingSeq_align(*mapper, in);
674  cout << " Packed-seg, scores are preserved" << endl;
675  TestMappingSeq_align(*mapper, in);
676  cout << " Packed-seg - partial mapping, scores are dropped" << endl;
677  TestMappingSeq_align(*mapper, in);
678  cout << " Packed-seg - unmapped segment, scores are dropped" << endl;
679  TestMappingSeq_align(*mapper, in);
680  cout << " Spliced-seg, scores are preserved" << endl;
681  TestMappingSeq_align(*mapper, in);
682  cout << " Spliced-seg, partial mapping, scores are dropped" << endl;
683  TestMappingSeq_align(*mapper, in);
684  cout << " Spliced-seg, unmapped segment, scores are dropped" << endl;
685  TestMappingSeq_align(*mapper, in);
686 }
687 
688 
690 {
691  CNcbiIfstream in("mapper_test_data/graph.asn");
692  cout << "Mapping graphs" << endl;
693  unique_ptr<CSeq_loc_Mapper_Base> mapper(CreateMapperFromSeq_locs(in));
694  cout << " Mapping whole graph" << endl;
695  TestMappingSeq_graph(*mapper, in);
696  cout << " Partial - skip a range in the middle" << endl;
697  TestMappingSeq_graph(*mapper, in);
698  cout << " Mapping a graph on minus strand" << endl;
699  TestMappingSeq_graph(*mapper, in);
700 
701  cout << "Graph mapping, nuc to prot" << endl;
702  mapper.reset(CreateMapperFromSeq_locs(in));
703  cout << " Simple graph, using comp=3 to allow mapping" << endl;
704  TestMappingSeq_graph(*mapper, in);
705 
706  cout << " Unsupported: different original and mapped location lengths" << endl;
708  cout << " Unsupported: unknown destination sequence type" << endl;
710 }
711 
712 
714 {
715  CNcbiIfstream in("mapper_test_data/aln2delta.asn");
716  cout << "Test mapping alignments to bioseq segments" << endl;
718 
719  // Although mapping to bioseq segments, we don't have an Object Manager
720  // here. Using seq-locs instead.
721 
722  in >> MSerial_AsnText >> orig;
723  cout << " Alignment #1, mapping row 1" << endl;
724  unique_ptr<CSeq_loc_Mapper_Base> mapper(CreateMapperFromSeq_locs(in));
725  TestMappingSeq_align(*mapper, orig, in);
726  cout << " Alignment #1, mapping row 2" << endl;
727  mapper.reset(CreateMapperFromSeq_locs(in));
728  TestMappingSeq_align(*mapper, orig, in);
729 
730  in >> MSerial_AsnText >> orig;
731  cout << " Alignment #2, mapping row 1" << endl;
732  mapper.reset(CreateMapperFromSeq_locs(in));
733  TestMappingSeq_align(*mapper, orig, in);
734  cout << " Alignment #2, mapping row 2" << endl;
735  mapper.reset(CreateMapperFromSeq_locs(in));
736  TestMappingSeq_align(*mapper, orig, in);
737 
738  in >> MSerial_AsnText >> orig;
739  cout << " Alignment #3, mapping row 1" << endl;
740  mapper.reset(CreateMapperFromSeq_locs(in));
741  TestMappingSeq_align(*mapper, orig, in);
742  cout << " Alignment #3, mapping row 2" << endl;
743  mapper.reset(CreateMapperFromSeq_locs(in));
744  TestMappingSeq_align(*mapper, orig, in);
745 
746  in >> MSerial_AsnText >> orig;
747  cout << " Alignment #4, mapping row 1" << endl;
748  mapper.reset(CreateMapperFromSeq_locs(in));
749  TestMappingSeq_align(*mapper, orig, in);
750  cout << " Alignment #4, mapping row 2" << endl;
751  mapper.reset(CreateMapperFromSeq_locs(in));
752  TestMappingSeq_align(*mapper, orig, in);
753 }
754 
755 
757 {
758  CNcbiIfstream in("mapper_test_data/through_aln.asn");
759  cout << "Test mapping through alignments" << endl;
760  CSeq_align aln;
761 
762  const char* titles[] = {
763  " Mapping through dense-diag",
764  " Mapping through dense-seg (with some gaps)",
765  " Mapping through packed-seg (with some gaps)",
766  " Mapping through std-seg",
767  " Mapping through disc",
768  " Mapping through spliced-seg",
769  " Mapping through spliced-seg, reversed strand",
770  " Mapping through sparse-seg"
771  };
772 
773  for (size_t i = 0; i < sizeof(titles)/sizeof(titles[0]); i++) {
774  cout << titles[i] << endl;
775  in >> MSerial_AsnText >> aln;
776  unique_ptr<CSeq_loc_Mapper_Base> mapper(new CSeq_loc_Mapper_Base(aln, 0));
777  cout << " Whole sequence" << endl;
778  TestMappingSeq_loc(*mapper, in);
779  cout << " Interval, complete" << endl;
780  TestMappingSeq_loc(*mapper, in);
781  cout << " Interval, split" << endl;
782  TestMappingSeq_loc(*mapper, in);
783  }
784 }
785 
786 
787 // Test sequence info provider
789 {
790 public:
793  {
794  if ( !idh.IsGi() ) return CSeq_loc_Mapper_Base::eSeq_unknown;
796  return it != m_Types.end() ?
798  }
800  {
801  if ( !idh.IsGi() ) return kInvalidSeqPos;
803  return it != m_Lengths.end() ?
804  it->second : kInvalidSeqPos;
805  }
806  virtual void CollectSynonyms(const CSeq_id_Handle& id,
807  TSynonyms& synonyms)
808  {
809  synonyms.insert(id);
810  }
811 
812  void AddSeq(TGi gi, TSeqType seqtype, TSeqPos len)
813  {
814  m_Types[gi] = seqtype;
815  m_Lengths[gi] = len;
816  }
817 
818 private:
821 
824 };
825 
826 
828 {
829  CNcbiIfstream in("mapper_test_data/seqinfo.asn");
830  cout << "Test mapping with sequence info provider" << endl;
832  info->AddSeq(4, CSeq_loc_Mapper_Base::eSeq_nuc, 300);
833  info->AddSeq(5, CSeq_loc_Mapper_Base::eSeq_prot, 100);
834 
835  CSeq_loc src, dst;
836  // Read seq-locs first to skip ASN.1 comments
837  in >> MSerial_AsnText >> src;
838  in >> MSerial_AsnText >> dst;
839  unique_ptr<CSeq_loc_Mapper_Base> mapper(
840  new CSeq_loc_Mapper_Base(src, dst, info.GetPointer()));
841 
842  cout << " Test mapping whole, nuc to prot" << endl;
843  TestMappingSeq_loc(*mapper, in);
844  cout << " Test mapping interval, nuc to prot" << endl;
845  TestMappingSeq_loc(*mapper, in);
846 }
847 
848 
850 {
851  CNcbiIfstream in("mapper_test_data/fuzz.asn");
852  cout << "Mapping fuzzes" << endl;
853 
854  CSeq_loc src, dst_plus, dst_minus;
855  in >> MSerial_AsnText >> src;
856  in >> MSerial_AsnText >> dst_plus;
857  in >> MSerial_AsnText >> dst_minus;
858  CSeq_loc_Mapper_Base mapper_plus(src, dst_plus);
859  CSeq_loc_Mapper_Base mapper_minus(src, dst_minus);
860 
861  CSeq_loc orig;
862 
863  // Fuzz-from
864  in >> MSerial_AsnText >> orig;
865  cout << " Fuzz-from lim lt" << endl;
866  TestMappingSeq_loc(mapper_plus, orig, in);
867  cout << " Fuzz-from lim lt, reversed strand" << endl;
868  TestMappingSeq_loc(mapper_minus, orig, in);
869 
870  in >> MSerial_AsnText >> orig;
871  cout << " Fuzz-from lim gt" << endl;
872  TestMappingSeq_loc(mapper_plus, orig, in);
873  cout << " Fuzz-from lim gt, reversed strand" << endl;
874  TestMappingSeq_loc(mapper_minus, orig, in);
875 
876  in >> MSerial_AsnText >> orig;
877  cout << " Fuzz-from lim tl" << endl;
878  TestMappingSeq_loc(mapper_plus, orig, in);
879  cout << " Fuzz-from lim tl, reversed strand" << endl;
880  TestMappingSeq_loc(mapper_minus, orig, in);
881 
882  in >> MSerial_AsnText >> orig;
883  cout << " Fuzz-from lim tr" << endl;
884  TestMappingSeq_loc(mapper_plus, orig, in);
885  cout << " Fuzz-from lim tr, reversed strand" << endl;
886  TestMappingSeq_loc(mapper_minus, orig, in);
887 
888  in >> MSerial_AsnText >> orig;
889  cout << " Fuzz-from alt #1" << endl;
890  TestMappingSeq_loc(mapper_plus, orig, in);
891  cout << " Fuzz-from alt #1, reversed strand" << endl;
892  TestMappingSeq_loc(mapper_minus, orig, in);
893 
894  in >> MSerial_AsnText >> orig;
895  cout << " Fuzz-from alt #2" << endl;
896  TestMappingSeq_loc(mapper_plus, orig, in);
897  cout << " Fuzz-from alt #2, reversed strand" << endl;
898  TestMappingSeq_loc(mapper_minus, orig, in);
899 
900  in >> MSerial_AsnText >> orig;
901  cout << " Fuzz-from range #1" << endl;
902  TestMappingSeq_loc(mapper_plus, orig, in);
903  cout << " Fuzz-from range #1, reversed strand" << endl;
904  TestMappingSeq_loc(mapper_minus, orig, in);
905 
906  in >> MSerial_AsnText >> orig;
907  cout << " Fuzz-from range #2" << endl;
908  TestMappingSeq_loc(mapper_plus, orig, in);
909  cout << " Fuzz-from range #2, reversed strand" << endl;
910  TestMappingSeq_loc(mapper_minus, orig, in);
911 
912  in >> MSerial_AsnText >> orig;
913  cout << " Fuzz-from range #3" << endl;
914  TestMappingSeq_loc(mapper_plus, orig, in);
915  cout << " Fuzz-from range #3, reversed strand" << endl;
916  TestMappingSeq_loc(mapper_minus, orig, in);
917 
918  in >> MSerial_AsnText >> orig;
919  cout << " Fuzz-from range #4" << endl;
920  TestMappingSeq_loc(mapper_plus, orig, in);
921  cout << " Fuzz-from range #4, reversed strand" << endl;
922  TestMappingSeq_loc(mapper_minus, orig, in);
923 
924  // Fuzz-to
925  in >> MSerial_AsnText >> orig;
926  cout << " Fuzz-to lim lt" << endl;
927  TestMappingSeq_loc(mapper_plus, orig, in);
928  cout << " Fuzz-to lim lt, reversed strand" << endl;
929  TestMappingSeq_loc(mapper_minus, orig, in);
930 
931  in >> MSerial_AsnText >> orig;
932  cout << " Fuzz-to lim gt" << endl;
933  TestMappingSeq_loc(mapper_plus, orig, in);
934  cout << " Fuzz-to lim gt, reversed strand" << endl;
935  TestMappingSeq_loc(mapper_minus, orig, in);
936 
937  in >> MSerial_AsnText >> orig;
938  cout << " Fuzz-to lim tl" << endl;
939  TestMappingSeq_loc(mapper_plus, orig, in);
940  cout << " Fuzz-to lim tl, reversed strand" << endl;
941  TestMappingSeq_loc(mapper_minus, orig, in);
942 
943  in >> MSerial_AsnText >> orig;
944  cout << " Fuzz-to lim tr" << endl;
945  TestMappingSeq_loc(mapper_plus, orig, in);
946  cout << " Fuzz-to lim tr, reversed strand" << endl;
947  TestMappingSeq_loc(mapper_minus, orig, in);
948 
949  in >> MSerial_AsnText >> orig;
950  cout << " Fuzz-to alt #1" << endl;
951  TestMappingSeq_loc(mapper_plus, orig, in);
952  cout << " Fuzz-to alt #1, reversed strand" << endl;
953  TestMappingSeq_loc(mapper_minus, orig, in);
954 
955  in >> MSerial_AsnText >> orig;
956  cout << " Fuzz-to alt #2" << endl;
957  TestMappingSeq_loc(mapper_plus, orig, in);
958  cout << " Fuzz-to alt #2, reversed strand" << endl;
959  TestMappingSeq_loc(mapper_minus, orig, in);
960 
961  in >> MSerial_AsnText >> orig;
962  cout << " Fuzz-to range #1" << endl;
963  TestMappingSeq_loc(mapper_plus, orig, in);
964  cout << " Fuzz-to range #1, reversed strand" << endl;
965  TestMappingSeq_loc(mapper_minus, orig, in);
966 
967  in >> MSerial_AsnText >> orig;
968  cout << " Fuzz-to range #2" << endl;
969  TestMappingSeq_loc(mapper_plus, orig, in);
970  cout << " Fuzz-to range #2, reversed strand" << endl;
971  TestMappingSeq_loc(mapper_minus, orig, in);
972 
973  in >> MSerial_AsnText >> orig;
974  cout << " Fuzz-to range #3" << endl;
975  TestMappingSeq_loc(mapper_plus, orig, in);
976  cout << " Fuzz-to range #3, reversed strand" << endl;
977  TestMappingSeq_loc(mapper_minus, orig, in);
978 
979  in >> MSerial_AsnText >> orig;
980  cout << " Fuzz-to range #4" << endl;
981  TestMappingSeq_loc(mapper_plus, orig, in);
982  cout << " Fuzz-to range #4, reversed strand" << endl;
983  TestMappingSeq_loc(mapper_minus, orig, in);
984 }
985 
986 
988 {
989  CNcbiIfstream in("mapper_test_data/exonparts.asn");
990  cout << "Testing sort order of mapped exons" << endl;
991 
992  CSeq_loc src, dst_plus, dst_minus;
993  in >> MSerial_AsnText >> src;
994  in >> MSerial_AsnText >> dst_plus;
995  in >> MSerial_AsnText >> dst_minus;
996  CSeq_loc_Mapper_Base mapper_plus(src, dst_plus);
997  CSeq_loc_Mapper_Base mapper_minus(src, dst_minus);
998 
1000 
1001  in >> MSerial_AsnText >> orig;
1002  cout << " Both rows on plus, map genomic to plus, no trim" << endl;
1003  mapper_plus.SetTrimSplicedSeg(false);
1004  TestMappingSeq_align(mapper_plus, orig, in);
1005  cout << " Both rows on plus, map genomic to plus, trim" << endl;
1006  mapper_plus.SetTrimSplicedSeg(true);
1007  TestMappingSeq_align(mapper_plus, orig, in);
1008  cout << " Both rows on plus, map genomic to minus, no trim" << endl;
1009  mapper_minus.SetTrimSplicedSeg(false);
1010  TestMappingSeq_align(mapper_minus, orig, in);
1011  cout << " Both rows on plus, map genomic to minus, trim" << endl;
1012  mapper_minus.SetTrimSplicedSeg(true);
1013  TestMappingSeq_align(mapper_minus, orig, in);
1014 
1015  in >> MSerial_AsnText >> orig;
1016  cout << " Both rows on plus, map product to plus, no trim" << endl;
1017  mapper_plus.SetTrimSplicedSeg(false);
1018  TestMappingSeq_align(mapper_plus, orig, in);
1019  cout << " Both rows on plus, map product to plus, trim" << endl;
1020  mapper_plus.SetTrimSplicedSeg(true);
1021  TestMappingSeq_align(mapper_plus, orig, in);
1022  cout << " Both rows on plus, map product to minus, no trim" << endl;
1023  mapper_minus.SetTrimSplicedSeg(false);
1024  TestMappingSeq_align(mapper_minus, orig, in);
1025  cout << " Both rows on plus, map product to minus, trim" << endl;
1026  mapper_minus.SetTrimSplicedSeg(true);
1027  TestMappingSeq_align(mapper_minus, orig, in);
1028 
1029  in >> MSerial_AsnText >> orig;
1030  cout << " Genomic on minus, map genomic to minus, no trim" << endl;
1031  mapper_plus.SetTrimSplicedSeg(false);
1032  TestMappingSeq_align(mapper_plus, orig, in);
1033  cout << " Genomic on minus, map genomic to minus, trim" << endl;
1034  mapper_plus.SetTrimSplicedSeg(true);
1035  TestMappingSeq_align(mapper_plus, orig, in);
1036  cout << " Genomic on minus, map genomic to plus, no trim" << endl;
1037  mapper_minus.SetTrimSplicedSeg(false);
1038  TestMappingSeq_align(mapper_minus, orig, in);
1039  cout << " Genomic on minus, map genomic to plus, trim" << endl;
1040  mapper_minus.SetTrimSplicedSeg(true);
1041  TestMappingSeq_align(mapper_minus, orig, in);
1042 
1043  in >> MSerial_AsnText >> orig;
1044  cout << " Genomic on minus, map product to plus, no trim" << endl;
1045  mapper_plus.SetTrimSplicedSeg(false);
1046  TestMappingSeq_align(mapper_plus, orig, in);
1047  cout << " Genomic on minus, map product to plus, trim" << endl;
1048  mapper_plus.SetTrimSplicedSeg(true);
1049  TestMappingSeq_align(mapper_plus, orig, in);
1050  cout << " Genomic on minus, map product to minus, no trim" << endl;
1051  mapper_minus.SetTrimSplicedSeg(false);
1052  TestMappingSeq_align(mapper_minus, orig, in);
1053  cout << " Genomic on minus, map product to minus, trim" << endl;
1054  mapper_minus.SetTrimSplicedSeg(true);
1055  TestMappingSeq_align(mapper_minus, orig, in);
1056 
1057  in >> MSerial_AsnText >> orig;
1058  cout << " Product on minus, map genomic to plus, no trim" << endl;
1059  mapper_plus.SetTrimSplicedSeg(false);
1060  TestMappingSeq_align(mapper_plus, orig, in);
1061  cout << " Product on minus, map genomic to plus, trim" << endl;
1062  mapper_plus.SetTrimSplicedSeg(true);
1063  TestMappingSeq_align(mapper_plus, orig, in);
1064  cout << " Product on minus, map genomic to minus, no trim" << endl;
1065  mapper_minus.SetTrimSplicedSeg(false);
1066  TestMappingSeq_align(mapper_minus, orig, in);
1067  cout << " Product on minus, map genomic to minus, trim" << endl;
1068  mapper_minus.SetTrimSplicedSeg(true);
1069  TestMappingSeq_align(mapper_minus, orig, in);
1070 
1071  in >> MSerial_AsnText >> orig;
1072  cout << " Product on minus, map product to minus, no trim" << endl;
1073  mapper_plus.SetTrimSplicedSeg(false);
1074  TestMappingSeq_align(mapper_plus, orig, in);
1075  cout << " Product on minus, map product to minus, trim" << endl;
1076  mapper_plus.SetTrimSplicedSeg(true);
1077  TestMappingSeq_align(mapper_plus, orig, in);
1078  cout << " Product on minus, map product to plus, no trim" << endl;
1079  mapper_minus.SetTrimSplicedSeg(false);
1080  TestMappingSeq_align(mapper_minus, orig, in);
1081  cout << " Product on minus, map product to plus, trim" << endl;
1082  mapper_minus.SetTrimSplicedSeg(true);
1083  TestMappingSeq_align(mapper_minus, orig, in);
1084 
1085  // CXX-5105 - if there's no global strand, per-exon one should be used.
1086  // Run the same tests with local strand only. Indel trimming is enabled.
1087  cout << "Testing sort order of mapped exons, local strands" << endl;
1088  mapper_plus.SetTrimSplicedSeg(true);
1089  mapper_minus.SetTrimSplicedSeg(true);
1090 
1091  in >> MSerial_AsnText >> orig;
1092  cout << " Both rows on plus, map genomic to plus" << endl;
1093  TestMappingSeq_align(mapper_plus, orig, in);
1094  cout << " Both rows on plus, map genomic to minus" << endl;
1095  TestMappingSeq_align(mapper_minus, orig, in);
1096 
1097  in >> MSerial_AsnText >> orig;
1098  cout << " Both rows on plus, map product to plus" << endl;
1099  TestMappingSeq_align(mapper_plus, orig, in);
1100  cout << " Both rows on plus, map product to minus" << endl;
1101  TestMappingSeq_align(mapper_minus, orig, in);
1102 
1103  in >> MSerial_AsnText >> orig;
1104  cout << " Genomic on minus, map genomic to minus" << endl;
1105  TestMappingSeq_align(mapper_plus, orig, in);
1106  cout << " Genomic on minus, map genomic to plus" << endl;
1107  TestMappingSeq_align(mapper_minus, orig, in);
1108 
1109  in >> MSerial_AsnText >> orig;
1110  cout << " Genomic on minus, map product to plus" << endl;
1111  TestMappingSeq_align(mapper_plus, orig, in);
1112  cout << " Genomic on minus, map product to minus" << endl;
1113  TestMappingSeq_align(mapper_minus, orig, in);
1114 
1115  in >> MSerial_AsnText >> orig;
1116  cout << " Product on minus, map genomic to plus" << endl;
1117  TestMappingSeq_align(mapper_plus, orig, in);
1118  cout << " Product on minus, map genomic to minus" << endl;
1119  TestMappingSeq_align(mapper_minus, orig, in);
1120 
1121  in >> MSerial_AsnText >> orig;
1122  cout << " Product on minus, map product to minus" << endl;
1123  TestMappingSeq_align(mapper_plus, orig, in);
1124  cout << " Product on minus, map product to plus" << endl;
1125  TestMappingSeq_align(mapper_minus, orig, in);
1126 }
1127 
1128 
1130 {
1131  CNcbiIfstream in("mapper_test_data/truncatedmix.asn");
1132  cout << "Testing truncation of mix parts" << endl;
1133 
1134  CSeq_loc src, dst_plus, dst_minus;
1135  in >> MSerial_AsnText >> src;
1136  in >> MSerial_AsnText >> dst_plus;
1137  in >> MSerial_AsnText >> dst_minus;
1138  CSeq_loc_Mapper_Base mapper_plus(src, dst_plus);
1139  CSeq_loc_Mapper_Base mapper_minus(src, dst_minus);
1140 
1141  CSeq_loc orig;
1142 
1143  in >> MSerial_AsnText >> orig;
1144  cout << " Plus, direct, unmapped ranges on the left" << endl;
1145  TestMappingSeq_loc(mapper_plus, orig, in);
1146  cout << " Plus, reversed, unmapped ranges on the left" << endl;
1147  TestMappingSeq_loc(mapper_minus, orig, in);
1148 
1149  in >> MSerial_AsnText >> orig;
1150  cout << " Plus, direct, unmapped ranges on the right" << endl;
1151  TestMappingSeq_loc(mapper_plus, orig, in);
1152  cout << " Plus, reversed, unmapped ranges on the right" << endl;
1153  TestMappingSeq_loc(mapper_minus, orig, in);
1154 
1155  in >> MSerial_AsnText >> orig;
1156  cout << " Plus, direct, range truncated on the left" << endl;
1157  TestMappingSeq_loc(mapper_plus, orig, in);
1158  cout << " Plus, reversed, range truncated on the left" << endl;
1159  TestMappingSeq_loc(mapper_minus, orig, in);
1160 
1161  in >> MSerial_AsnText >> orig;
1162  cout << " Plus, direct, range truncated on the right" << endl;
1163  TestMappingSeq_loc(mapper_plus, orig, in);
1164  cout << " Plus, reversed, range truncated on the right" << endl;
1165  TestMappingSeq_loc(mapper_minus, orig, in);
1166 
1167  in >> MSerial_AsnText >> orig;
1168  cout << " Minus, direct, unmapped ranges on the left" << endl;
1169  TestMappingSeq_loc(mapper_plus, orig, in);
1170  cout << " Minus, reversed, unmapped ranges on the left" << endl;
1171  TestMappingSeq_loc(mapper_minus, orig, in);
1172 
1173  in >> MSerial_AsnText >> orig;
1174  cout << " Minus, direct, unmapped ranges on the right" << endl;
1175  TestMappingSeq_loc(mapper_plus, orig, in);
1176  cout << " Minus, reversed, unmapped ranges on the right" << endl;
1177  TestMappingSeq_loc(mapper_minus, orig, in);
1178 
1179  in >> MSerial_AsnText >> orig;
1180  cout << " Minus, direct, range truncated on the left" << endl;
1181  TestMappingSeq_loc(mapper_plus, orig, in);
1182  cout << " Minus, reversed, range truncated on the left" << endl;
1183  TestMappingSeq_loc(mapper_minus, orig, in);
1184 
1185  in >> MSerial_AsnText >> orig;
1186  cout << " Minus, direct, range truncated on the right" << endl;
1187  TestMappingSeq_loc(mapper_plus, orig, in);
1188  cout << " Minus, reversed, range truncated on the right" << endl;
1189  TestMappingSeq_loc(mapper_minus, orig, in);
1190 }
1191 
1192 
1194 {
1195  CNcbiIfstream in("mapper_test_data/trimming.asn");
1196  cout << "Testing trimming of mapped locations" << endl;
1197 
1199  info->AddSeq(2, CSeq_loc_Mapper_Base::eSeq_nuc, 600);
1200  info->AddSeq(3, CSeq_loc_Mapper_Base::eSeq_prot, 100);
1201  info->AddSeq(4, CSeq_loc_Mapper_Base::eSeq_prot, 10);
1202  info->AddSeq(5, CSeq_loc_Mapper_Base::eSeq_nuc, 300);
1203 
1204  CSeq_loc src, dst, orig;
1205  // Read seq-locs first to skip ASN.1 comments
1206  in >> MSerial_AsnText >> src;
1207  in >> MSerial_AsnText >> dst;
1208  in >> MSerial_AsnText >> orig;
1209 
1210  // No trimming - stop codon should be preserved.
1211  cout << " Test stop codon mapping: trimming=off" << endl;
1212  unique_ptr<CSeq_loc_Mapper_Base> mapper(
1213  new CSeq_loc_Mapper_Base(src, dst,
1214  CSeq_loc_Mapper_Options(info.GetPointer())));
1215  TestMappingSeq_loc(*mapper, orig, in);
1216 
1217  // Trimming enabled - stop codon should be dropped.
1218  cout << " Test stop codon mapping: trimming=on" << endl;
1219  mapper.reset(
1220  new CSeq_loc_Mapper_Base(src, dst,
1221  CSeq_loc_Mapper_Options(info.GetPointer())
1222  .SetTrimMappedLocation(true)));
1223  TestMappingSeq_loc(*mapper, orig, in);
1224 
1225  // Mapping prot->nuc, no trimming - stop codon should be preserved.
1226  in >> MSerial_AsnText >> orig;
1227  cout << " Test stop codon mapping, prot->nuc: trimming=off" << endl;
1228  mapper.reset(
1229  new CSeq_loc_Mapper_Base(dst, src,
1230  CSeq_loc_Mapper_Options(info.GetPointer())));
1231  TestMappingSeq_loc(*mapper, orig, in);
1232 
1233  // Mapping prot->nuc, trimming enabled - stop codon should be dropped.
1234  cout << " Test stop codon mapping, prot->nuc: trimming=on" << endl;
1235  mapper.reset(
1236  new CSeq_loc_Mapper_Base(dst, src,
1237  CSeq_loc_Mapper_Options(info.GetPointer())
1238  .SetTrimMappedLocation(true)));
1239  TestMappingSeq_loc(*mapper, orig, in);
1240 
1241  // Stop codon trimming - minus strand.
1242  cout << " Test stop codon trimming on minus strand, nuc->prot" << endl;
1243  in >> MSerial_AsnText >> src;
1244  in >> MSerial_AsnText >> dst;
1245  mapper.reset(
1246  new CSeq_loc_Mapper_Base(src, dst,
1247  CSeq_loc_Mapper_Options(info.GetPointer())
1248  .SetTrimMappedLocation(true)));
1249  TestMappingSeq_loc(*mapper, src, in);
1250  cout << " Test stop codon trimming on minus strand, prot->nuc" << endl;
1251  mapper.reset(
1252  new CSeq_loc_Mapper_Base(dst, src,
1253  CSeq_loc_Mapper_Options(info.GetPointer())
1254  .SetTrimMappedLocation(true)));
1255  TestMappingSeq_loc(*mapper, dst, in);
1256 
1257  // Ignore extra codon when there are multiple destination proteins.
1258  cout << " Test stop codon non-extension, multi-id destionation" << endl;
1259  in >> MSerial_AsnText >> src;
1260  in >> MSerial_AsnText >> dst;
1261  mapper.reset(
1262  new CSeq_loc_Mapper_Base(src, dst,
1263  CSeq_loc_Mapper_Options(info.GetPointer())));
1264  TestMappingSeq_loc(*mapper, in);
1265 
1266  // Ignore extra codon when there are multiple source proteins.
1267  cout << " Test stop codon non-extension, multi-id source" << endl;
1268  mapper.reset(
1269  new CSeq_loc_Mapper_Base(dst, src,
1270  CSeq_loc_Mapper_Options(info.GetPointer())));
1271  mapper->SetMergeAbutting();
1272  TestMappingSeq_loc(*mapper, in);
1273 
1274  // Mapping to minus strand, nuc->nuc, trim=on.
1275  cout << " Test trimming while mapping to minus strand, nuc->nuc, #1" << endl;
1276  in >> MSerial_AsnText >> src;
1277  in >> MSerial_AsnText >> dst;
1278  mapper.reset(
1279  new CSeq_loc_Mapper_Base(src, dst,
1280  CSeq_loc_Mapper_Options(info.GetPointer())
1281  .SetTrimMappedLocation(true)));
1282  TestMappingSeq_loc(*mapper, in);
1283 
1284  cout << " Test trimming while mapping to minus strand, nuc->nuc, #2" << endl;
1285  TestMappingSeq_loc(*mapper, in);
1286 
1287  cout << " Test trimming while mapping to minus strand, nuc->nuc, #3" << endl;
1288  TestMappingSeq_loc(*mapper, in);
1289 
1290  // Mapping to minus strand, prot->nuc, trim=on.
1291  cout << " Test trimming while mapping to minus strand, prot->nuc, #1" << endl;
1292  in >> MSerial_AsnText >> src;
1293  in >> MSerial_AsnText >> dst;
1294  mapper.reset(
1295  new CSeq_loc_Mapper_Base(src, dst,
1296  CSeq_loc_Mapper_Options(info.GetPointer())
1297  .SetTrimMappedLocation(true)));
1298  TestMappingSeq_loc(*mapper, in);
1299 
1300  cout << " Test trimming while mapping to minus strand, prot->nuc, #2" << endl;
1301  TestMappingSeq_loc(*mapper, in);
1302 
1303  cout << " Test trimming while mapping to minus strand, prot->nuc, #3" << endl;
1304  TestMappingSeq_loc(*mapper, in);
1305 
1306  // Mapping from minus strand, nuc->nuc, trim=on.
1307  cout << " Test trimming while mapping from minus strand, nuc->nuc, #1" << endl;
1308  in >> MSerial_AsnText >> src;
1309  in >> MSerial_AsnText >> dst;
1310  mapper.reset(
1311  new CSeq_loc_Mapper_Base(src, dst,
1312  CSeq_loc_Mapper_Options(info.GetPointer())
1313  .SetTrimMappedLocation(true)));
1314  TestMappingSeq_loc(*mapper, in);
1315 
1316  cout << " Test trimming while mapping from minus strand, nuc->nuc, #2" << endl;
1317  TestMappingSeq_loc(*mapper, in);
1318 
1319  cout << " Test trimming while mapping from minus strand, nuc->nuc, #3" << endl;
1320  TestMappingSeq_loc(*mapper, in);
1321 
1322  // Mapping from minus strand, nuc->prot, trim=on.
1323  cout << " Test trimming while mapping from minus strand, nuc->prot, #1" << endl;
1324  in >> MSerial_AsnText >> src;
1325  in >> MSerial_AsnText >> dst;
1326  mapper.reset(
1327  new CSeq_loc_Mapper_Base(src, dst,
1328  CSeq_loc_Mapper_Options(info.GetPointer())
1329  .SetTrimMappedLocation(true)));
1330  TestMappingSeq_loc(*mapper, in);
1331 
1332  cout << " Test trimming while mapping from minus strand, nuc->prot, #2" << endl;
1333  TestMappingSeq_loc(*mapper, in);
1334 
1335  cout << " Test trimming while mapping from minus strand, nuc->prot, #3" << endl;
1336  TestMappingSeq_loc(*mapper, in);
1337 
1338  // Incomplete codon trimming test.
1339  // - prot->nuc vs nuc->prot
1340  // - plus strand, minus strand
1341  // - frame not_set/one/two/three
1342  // - 0/1/2 extra bases on nuc
1343  cout << "Testing frame and incomplete codon trimming" << endl;
1344  CSeq_feat feat_base, feat;
1345  CSeq_loc nuc_orig_long, nuc_orig_short, prot_orig_long, prot_orig_short;
1346  in >> MSerial_AsnText >> feat_base;
1347  in >> MSerial_AsnText >> nuc_orig_long;
1348  in >> MSerial_AsnText >> nuc_orig_short;
1349  in >> MSerial_AsnText >> prot_orig_long;
1350  in >> MSerial_AsnText >> prot_orig_short;
1351 
1352  feat.Assign(feat_base);
1353  CCdregion& cds = feat.SetData().SetCdregion();
1354 
1355  for (int str_idx = 0; str_idx < 2; ++str_idx) {
1356  feat.SetLocation().SetInt().SetStrand(str_idx ? eNa_strand_minus : eNa_strand_plus);
1357  if ( str_idx ) {
1358  nuc_orig_long.SetStrand(eNa_strand_minus);
1359  nuc_orig_short.SetStrand(eNa_strand_minus);
1360  }
1361  else {
1362  nuc_orig_long.ResetStrand();
1363  nuc_orig_short.ResetStrand();
1364  }
1365  for (int frame = CCdregion::eFrame_not_set; frame <= CCdregion::eFrame_three; ++frame) {
1366  cds.SetFrame(CCdregion::TFrame(frame));
1367  for (TSeqPos extra_bases = 0; extra_bases < 3; ++extra_bases) {
1368  cout << " nuc->prot, boundaries, " <<
1369  (str_idx ? "minus" : "plus") <<
1370  ", frame=" << frame <<
1371  ", " " extra bases=" << extra_bases << endl;
1372  mapper.reset(
1374  CSeq_loc_Mapper_Options(info.GetPointer())));
1375  TestMappingSeq_loc(*mapper, nuc_orig_long, in);
1376 
1377  cout << " nuc->prot, position, " <<
1378  (str_idx ? "minus" : "plus") <<
1379  ", frame=" << frame <<
1380  ", " " extra bases=" << extra_bases << endl;
1381  TestMappingSeq_loc(*mapper, nuc_orig_short, in);
1382 
1383  cout << " prot->nuc, boundaries, " <<
1384  (str_idx ? "minus" : "plus") <<
1385  ", frame=" << frame <<
1386  ", " " extra bases=" << extra_bases << endl;
1387  mapper.reset(
1389  CSeq_loc_Mapper_Options(info.GetPointer())));
1390  TestMappingSeq_loc(*mapper, prot_orig_long, in);
1391 
1392  cout << " prot->nuc, position, " <<
1393  (str_idx ? "minus" : "plus") <<
1394  ", frame=" << frame <<
1395  ", " " extra bases=" << extra_bases << endl;
1396  TestMappingSeq_loc(*mapper, prot_orig_short, in);
1397  }
1398  }
1399  }
1400 }
1401 
1402 
1403 BOOST_AUTO_TEST_CASE(s_TestMapping)
1404 {
1419  TestMapper_Fuzz();
1423 }
User-defined methods of the data storage class.
Seq-loc and seq-align mapper exceptions.
CCdregion –.
Definition: Cdregion.hpp:66
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
CSeq_loc_Mapper_Base –.
CSeq_loc_Mapper_Options –.
virtual TSeqPos GetSequenceLength(const CSeq_id_Handle &idh)
Get sequence length or kInvalidSeqPos.
void AddSeq(TGi gi, TSeqType seqtype, TSeqPos len)
virtual TSeqType GetSequenceType(const CSeq_id_Handle &idh)
Get information about sequence type (nuc or prot).
map< TGi, TSeqPos > TLenMap
map< TGi, TSeqType > TTypeMap
virtual void CollectSynonyms(const CSeq_id_Handle &id, TSynonyms &synonyms)
Collect all synonyms for the id including the id itself.
IMapper_Sequence_Info.
container_type::const_iterator const_iterator
Definition: map.hpp:53
const_iterator end() const
Definition: map.hpp:152
const_iterator find(const key_type &key) const
Definition: map.hpp:153
Definition: set.hpp:45
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
Definition: ncbimisc.hpp:878
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
virtual bool Equals(const CSerialObject &object, ESerialRecursionMode how=eRecursive) const
Check if both objects contain the same values.
bool IsGi(void) const
TGi GetGi(void) const
void ResetStrand(void)
Reset the strand on this location.
Definition: Seq_loc.cpp:5221
void SetStrand(ENa_strand strand)
Set the strand for all of the location's ranges.
Definition: Seq_loc.cpp:5196
CRef< CSeq_loc > Map(const CSeq_loc &src_loc)
Map seq-loc.
CSeq_loc_Mapper_Options & SetTrimMappedLocation(bool value=true)
CSeq_loc_Mapper_Base & SetMergeNone(void)
Intervals' merging mode MergeNone and MergeAbutting do not change the order of ranges in the destinat...
CSeq_loc_Mapper_Base & SetTrimSplicedSeg(bool trim)
For mapping spliced-segs only: preserve or trim starting/ending indels.
CSeq_loc_Mapper_Base & SetMergeAbutting(void)
Merge only abutting intervals, keep overlapping.
CSeq_loc_Mapper_Base & SetMergeContained(void)
Merge intervals only if one is completely covered by another.
CSeq_loc_Mapper_Base & SetMergeAll(void)
Merge any abutting or overlapping intervals.
@ eProductToLocation
Map from the feature's product to location.
@ eLocationToProduct
Map from the feature's location to product.
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
Definition: ncbistre.hpp:439
void SetLocation(TLocation &value)
Assign a value to Location data member.
Definition: Seq_feat_.cpp:131
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_feat_.cpp:94
void SetFrame(TFrame value)
Assign a value to Frame data member.
Definition: Cdregion_.hpp:540
@ eFrame_not_set
not set, code uses one
Definition: Cdregion_.hpp:95
@ eFrame_three
reading frame
Definition: Cdregion_.hpp:98
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is orig
int i
int len
void TestMapping_ThroughMix()
USING_SCOPE(objects)
void TestMapping_Spliced()
void TestMapping_Graph()
void TestMapping_NucToProt()
void TestMapper_Sequence_Info()
void TestMapping_Scores()
void TestMapping_ProtToNuc()
void TestMappingSeq_align(CSeq_loc_Mapper_Base &mapper, const CSeq_align &orig, CNcbiIstream &in)
void TestMapping_Dendiag()
void TestMappingSeq_graph_Exception(CSeq_loc_Mapper_Base &mapper, CNcbiIstream &in)
BOOST_AUTO_TEST_CASE(s_TestMapping)
void TestMapping_Merging()
void TestMapper_TruncatedMix()
void TestMapping_Order()
void TestMapper_ExonPartsOrder()
void TestMapping_Denseg()
void TestMapping_AlignmentsToParts()
void TestMappingSeq_loc_Exception(CSeq_loc_Mapper_Base &mapper, CNcbiIstream &in)
void TestMappingSeq_loc(CSeq_loc_Mapper_Base &mapper, const CSeq_loc &orig, CNcbiIstream &in)
void TestMapping_ThroughAlignments()
CSeq_loc_Mapper_Base * CreateMapperFromSeq_locs(CNcbiIstream &in)
void TestMapper_Trimming()
NCBITEST_AUTO_INIT()
void TestMapping_Simple()
USING_NCBI_SCOPE
void TestMappingSeq_graph(CSeq_loc_Mapper_Base &mapper, const CSeq_graph &orig, CNcbiIstream &in)
void TestMapper_Fuzz()
NCBITEST_AUTO_FINI()
void TestMappingSeq_align_Exception(CSeq_loc_Mapper_Base &mapper, CNcbiIstream &in)
static MDB_envinfo info
Definition: mdb_load.c:37
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
bool eq(T x_, T y_, T round_)
Definition: njn_approx.hpp:79
std::istream & in(std::istream &in_, double &x_)
Utility stuff for more convenient using of Boost.Test library.
Modified on Sat Dec 02 09:21:31 2023 by modify_doxy.py rev. 669887