NCBI C++ ToolKit
seq_id_unit_test.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: seq_id_unit_test.cpp 99126 2023-02-15 23:54:23Z vasilche $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Aaron Ucko, NCBI
27  *
28  * File Description:
29  * Unit test for CSeq_id and some closely related code
30  *
31  * ===========================================================================
32  */
33 #define NCBI_TEST_APPLICATION
34 #include <ncbi_pch.hpp>
35 
37 
39 #include <objects/general/Date.hpp>
42 #include <objects/seq/Seq_inst.hpp>
46 
47 #include <corelib/ncbiapp.hpp>
48 #include <corelib/test_boost.hpp>
49 
50 #include <boost/test/parameterized_test.hpp>
51 #include <util/util_exception.hpp>
52 #include <util/util_misc.hpp>
53 #include <util/random_gen.hpp>
54 #include <thread>
55 
56 #include <common/test_assert.h> /* This header must go last */
57 
60 
61 
62 #define NCBI_CHECK_THROW_SEQID(s) BOOST_CHECK_THROW(s, CSeqIdException)
63 
65 {
66  // force use of built-in accession guide
67  g_IgnoreDataFile("accguide.txt");
68 }
69 
70 #ifdef NCBI_THREADS
71 class CMTTestThread : public CThread
72 {
73 public:
74  CMTTestThread(int tid) : m_TId(tid), m_Random(tid), m_First(true) {
75  }
76 
77  CSeq_id_Handle GetRandomId(bool other, bool with_version) {
78  CSeq_id_Handle idh;
79  CNcbiOstrstream fmt;
80  if ( m_Random.GetRand(0, 1) ) {
81  fmt << "NC_" << setfill('0') << setw(6) << m_Random.GetRand(1, 10);
82  }
83  else {
84  fmt << "lcl|a";
85  }
86  int ver = with_version? m_Random.GetRand(1, 20): 0;
87  if ( 1 && m_First ) {
88  m_First = false;
89  if ( ver ) {
90  fmt << '.' << ver;
91  }
92  string str_id = CNcbiOstrstreamToString(fmt);
93  idh = CSeq_id_Handle::GetHandle(str_id);
94  }
95  else {
96  CSeq_id id;
97  CTextseq_id& text = other? id.SetOther(): id.SetGenbank();
98  text.SetAccession(CNcbiOstrstreamToString(fmt));
99  if ( ver ) {
100  text.SetVersion();
101  }
102  idh = CSeq_id_Handle::GetHandle(id);
103  }
104  return idh;
105  }
106 
107  virtual void* Main(void) {
108  for ( int i = 0; i < 1000; ++i ) {
109  if ( i%2 ) {
110  CSeq_id_Handle idh0 = GetRandomId(m_Random.GetRand(0, 1), false);
111  }
112  else {
113  CSeq_id_Handle idh = GetRandomId(m_Random.GetRand(0, 1), true);
114  //m_Ids.push_back(idh);
117  if ( m_Ids.size() > 10 ) {
118  m_Ids.pop_front();
119  }
120  }
121  }
122  return 0;
123  }
124 
125 private:
126  int m_TId;
128  bool m_First;
129  deque<CSeq_id_Handle> m_Ids;
130 };
131 
132 
134 {
135  vector< CRef<CThread> > tt;
136  for ( int i = 0; i < 10; ++i ) {
138  tt.push_back(t);
139  }
140  NON_CONST_ITERATE ( vector< CRef<CThread> >, it, tt ) {
141  (*it)->Run();
142  }
143  NON_CONST_ITERATE ( vector< CRef<CThread> >, it, tt ) {
144  (*it)->Join();
145  }
146 }
147 
148 
150 {
151  const size_t NQ = 20;
152  vector<thread> tt(NQ);
153  for ( size_t i = 0; i < NQ; ++i ) {
154  tt[i] =
155  thread([&]
156  (TGi gi)
157  {
160  for ( int i = 0; i < 1000000; ++i ) {
161  _VERIFY(id1.GetSeqId()->GetGi() == gi);
162  _VERIFY(id2.GetSeqId()->GetGi() == gi+1);
163  }
164  }, GI_FROM(int, i+1));
165  }
166  for ( size_t i = 0; i < NQ; ++i ) {
167  tt[i].join();
168  }
169 }
170 #endif
171 
172 
173 BOOST_AUTO_TEST_CASE(s_TestDefaultInit)
174 {
175  CSeq_id id;
176  BOOST_CHECK_EQUAL(id.Which(), CSeq_id::e_not_set);
177  BOOST_CHECK_THROW(id.GetGi(), CInvalidChoiceSelection);
178 }
179 
180 BOOST_AUTO_TEST_CASE(s_TestInitFromJunk)
181 {
182  CRef<CSeq_id> id;
183 
184  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id(kEmptyStr)));
185  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("JUNK")));
186  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("?!?!")));
187  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("4[ip]")));
188 }
189 
190 BOOST_AUTO_TEST_CASE(s_TestInitFromGIString)
191 {
192  CRef<CSeq_id> id;
193 
194  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id(" 1234 ")));
195  BOOST_CHECK(id->IsGi());
196  BOOST_CHECK(id->GetGi() == 1234);
197 
198  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("1234.5")));
199  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("-1234")));
200  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("0")));
201  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("01234")));
202 #ifdef NCBI_INT8_GI
203  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("9876543210")));
204  BOOST_CHECK(id->IsGi());
205  BOOST_CHECK(id->GetGi() == NCBI_CONST_INT8(9876543210));
206 #else
207  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("9876543210")));
208 #endif
209 }
210 
211 BOOST_AUTO_TEST_CASE(s_TestInitFromNAcc)
212 {
213  CRef<CSeq_id> id;
214 
215  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("N00001")));
216 
217  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("N0068")));
218  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("N00068")));
219  BOOST_CHECK(id->IsDdbj());
220  BOOST_CHECK_EQUAL(id->GetDdbj().GetAccession(), string("N00068"));
221  BOOST_CHECK( !id->GetDdbj().IsSetName() );
222  BOOST_CHECK( !id->GetDdbj().IsSetVersion() );
223  BOOST_CHECK( !id->GetDdbj().IsSetRelease() );
224  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("N000068")));
225 
226  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("N19999")));
227 
228  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("N20001.1")));
229  BOOST_CHECK(id->IsGenbank());
230  BOOST_CHECK_EQUAL(id->GetGenbank().GetAccession(), string("N20001"));
231  BOOST_CHECK( !id->GetGenbank().IsSetName() );
232  BOOST_CHECK_EQUAL(id->GetGenbank().GetVersion(), 1);
233  BOOST_CHECK( !id->GetGenbank().IsSetRelease() );
234 
235  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("N20001.1.1")));
236  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("N20001.1a")));
237  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("N20001.-1")));
238  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("N20001.x")));
239  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("N20001.9876543210")));
240 }
241 
242 BOOST_AUTO_TEST_CASE(s_TestInitFromStdAcc)
243 {
244  CRef<CSeq_id> id;
245 
246  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("BN00123")));
247  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("bn000123")));
248  BOOST_CHECK(id->IsTpe());
249  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("BN00012B")));
250  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("BN0000123")));
251 
252  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("FAA0017")));
253  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("FAA00017")));
254  BOOST_CHECK(id->IsTpd());
255  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("FAA000017")));
256 
257  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("ABCD1234567")));
258  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("CAAA01020304")));
259  BOOST_CHECK(id->IsEmbl());
260  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("AACN011056789")));
261  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("ABCD1234567890")));
262  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("ABCD12345678901")));
263 
265  BOOST_CHECK_EQUAL(ai, CSeq_id::eAcc_gb_tpa_wgs_nuc);
266  BOOST_CHECK_EQUAL(ai & CSeq_id::eAcc_division_mask, CSeq_id::eAcc_wgs);
267 
268  BOOST_CHECK_EQUAL(CSeq_id::IdentifyAccession("DAAA02000000"),
270  BOOST_CHECK_EQUAL(CSeq_id::IdentifyAccession("AACN010000000"),
272  BOOST_CHECK_EQUAL(CSeq_id::IdentifyAccession("AACN011000000"),
274 
275  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("BABA01S00009")));
276  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("BABA0S1000093")));
277  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("BABA01SS000093")));
278  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("BABA01T000093")));
279  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("BABA010S00093")));
280  BOOST_CHECK_EQUAL(CSeq_id::IdentifyAccession("BABA01S000093"),
282  BOOST_CHECK_EQUAL(CSeq_id::IdentifyAccession("BABA01S0000000"),
284  BOOST_CHECK_EQUAL(CSeq_id::IdentifyAccession("BABA01S00009300"),
286  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("BABA01S000093000")));
287 
288  BOOST_CHECK_EQUAL(CSeq_id::IdentifyAccession("BABA01P000093"),
290  BOOST_CHECK_EQUAL(CSeq_id::IdentifyAccession("HAHA01P234567"),
292  // Kxxx is specifically gb_targeted_nuc, but we gloss over that
293  // distinction for proteins at the moment.
294  BOOST_CHECK_EQUAL(CSeq_id::IdentifyAccession("KUKU01P234567"),
296 
297  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("MAP_12345")));
298  BOOST_CHECK_EQUAL(CSeq_id::IdentifyAccession("MAP_123456"),
300  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("MAP_1234567")));
301 
302  // New, longer formats
303  BOOST_CHECK_EQUAL(CSeq_id::IdentifyAccession("ADDEDE000000000"),
305  BOOST_CHECK_EQUAL(CSeq_id::IdentifyAccession("ADDEDE010000000"),
307  BOOST_CHECK_EQUAL(CSeq_id::IdentifyAccession("ADDEDE010000001"),
309  BOOST_CHECK_EQUAL(CSeq_id::IdentifyAccession("ADDEDE01S0000001"),
311  BOOST_CHECK_EQUAL(CSeq_id::IdentifyAccession("ADDEDE01P0000001"),
313  BOOST_CHECK_EQUAL(CSeq_id::IdentifyAccession("KJ01234522"),
315  BOOST_CHECK_EQUAL(CSeq_id::IdentifyAccession("AHL6932631"),
317 }
318 
319 BOOST_AUTO_TEST_CASE(s_TestInitFromPRFAcc)
320 {
321  CRef<CSeq_id> id;
322 
323  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("50086A")));
324  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("550086A")));
325  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("650771AF")));
326  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("0806162C")));
327  BOOST_CHECK(id->IsPrf());
328  BOOST_CHECK(!id->GetPrf().IsSetAccession());
329  BOOST_CHECK_EQUAL(id->GetPrf().GetName(), string("0806162C"));
330  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("2015436HX")));
331  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("1309311A:PDB=1EMD,2CMD")));
332  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("650771ABC")));
333  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("080616C2")));
334  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("00806162C")));
335  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("0806162C3")));
336  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("2015436HIJ")));
337 }
338 
339 BOOST_AUTO_TEST_CASE(s_TestInitFromPDBAcc)
340 {
341  CRef<CSeq_id> id;
342 
343  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("1GA")));
344  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("1GAV")));
345  BOOST_CHECK(id->IsPdb());
346  BOOST_CHECK_EQUAL(id->GetPdb().GetMol().Get(), string("1GAV"));
347  BOOST_CHECK_EQUAL(id->GetPdb().GetChain(), ' ');
348  // NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("1GAV2")));
349  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("1GAV.2")));
350 
351  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("1GAVX")));
352  BOOST_CHECK_NO_THROW
353  (id.Reset(new CSeq_id("1GAVX", (CSeq_id::fParse_RawText
355  BOOST_CHECK(id->IsLocal());
356 
357  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("1GAV|X")));
358  BOOST_CHECK(id->IsPdb());
359  BOOST_CHECK_EQUAL(id->GetPdb().GetMol().Get(), string("1GAV"));
360  BOOST_CHECK_EQUAL(id->GetPdb().GetChain(), 'X');
361 
362  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("1GAV|XY")));
363  BOOST_CHECK( !id->GetPdb().IsSetChain() );
364  BOOST_CHECK_EQUAL(id->GetPdb().GetChain_id(), "XY");
365  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("1GAV|XX")));
366  BOOST_CHECK( !id->GetPdb().IsSetChain() );
367  BOOST_CHECK_EQUAL(id->GetPdb().GetChain_id(), "XX");
368  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("1GAV_!")));
369  BOOST_CHECK_EQUAL(id->GetPdb().GetChain(), '!');
370  BOOST_CHECK_EQUAL(id->GetPdb().GetChain_id(), "!");
371  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("1GAV|VB")));
372  BOOST_CHECK( !id->GetPdb().IsSetChain() );
373  BOOST_CHECK_EQUAL(id->GetPdb().GetChain_id(), "VB");
374  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("1GAV|AAA")));
375  BOOST_CHECK( !id->GetPdb().IsSetChain() );
376  BOOST_CHECK_EQUAL(id->GetPdb().GetChain_id(), "AAA");
377 
378  BOOST_CHECK_EQUAL(CSeq_id::IdentifyAccession("2004[dp]"),
380  BOOST_CHECK_EQUAL(CSeq_id::IdentifyAccession("2008;358:2545"),
382  BOOST_CHECK_EQUAL(CSeq_id::IdentifyAccession("2000:2010"),
384 
385  BOOST_CHECK_EQUAL(CSeq_id::IdentifyAccession("1GAV|ABCDEFGHIJKL"),
387  BOOST_CHECK_EQUAL(CSeq_id::IdentifyAccession("1GAV|ABCDEFGHIJKLM"),
389  BOOST_CHECK_EQUAL(CSeq_id::IdentifyAccession("1GAV|@BCDEFGHIJKL"),
391 }
392 
393 BOOST_AUTO_TEST_CASE(s_TestInitFromSPAcc)
394 {
395  CRef<CSeq_id> id;
396 
397  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("Q7CQJ")));
398  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("q7cqj0")));
399  BOOST_CHECK(id->IsSwissprot());
400  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("Q7CQJO")));
401  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("Q7CQJ01")));
402  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("07CQJ0")));
403  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("A2ASS6.1")));
404  BOOST_CHECK(id->IsSwissprot());
405  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("A29SS6.1")));
406 
407  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("A0A022YWF9")));
408  BOOST_CHECK(id->IsSwissprot());
409  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("A0A022YWF")));
410  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("P0A022YWF9")));
411  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("ABA022YWF9")));
412  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("A02022YWF9")));
413  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("A0A02XYWF9")));
414  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("A0A0223WF9")));
415  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("A0A022YWFZ")));
416  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("A0A022YWF99")));
417 }
418 
419 BOOST_AUTO_TEST_CASE(s_TestInitFromRefSeqAcc)
420 {
421  CRef<CSeq_id> id;
422 
423  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("NM_00017")));
424  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("NM_000170.1")));
425  BOOST_CHECK(id->IsOther());
426  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("NM_001000170")));
427  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("NM_0001000170")));
428 
429  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("ZP_00345678")));
430 
431  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("NZ_CH95931.1")));
432  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("NZ_CH959311.1")));
433  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("NZ_CH9593111.1")));
434 
435  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("NZ_AABC0300051")));
436  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("NZ_AABC03000051")));
437  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("NZ_ABJB030000051")));
438  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("NZ_ABJB0300000510")));
439  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("NZ_ABJB03000005100")));
440 }
441 
442 BOOST_AUTO_TEST_CASE(s_TestInitFromGpipeAcc)
443 {
444  CRef<CSeq_id> id;
445 
446  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("GPC_12345")));
447  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("GPC_123456.1")));
448  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("GPC_123456789.1")));
449  BOOST_CHECK(id->IsGpipe());
450  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("GPC_1234567890")));
451  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("GPC_12S3456789")));
452  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("GPC_12P3456789")));
453 }
454 
455 BOOST_AUTO_TEST_CASE(s_TestInitFromNatAcc)
456 {
457  CRef<CSeq_id> id;
458 
459  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("AT_12345")));
460  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("AT_123456789.1")));
461  BOOST_CHECK(id->IsNamed_annot_track());
462  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("AT_1234567890")));
463 }
464 
465 BOOST_AUTO_TEST_CASE(s_TestInitFromFastaLocal)
466 {
467  CRef<CSeq_id> id;
468 
469  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("asd|fgh|jkl")));
470 
471  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("lcl|0")));
472  BOOST_CHECK(id->IsLocal());
473  BOOST_CHECK(id->GetLocal().IsStr());
474  BOOST_CHECK_EQUAL(id->GetLocal().GetStr(), "0");
475 
476  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("lcl|123")));
477  BOOST_CHECK(id->IsLocal());
478  BOOST_CHECK(id->GetLocal().IsId());
479  BOOST_CHECK_EQUAL(id->GetLocal().GetId(), 123);
480 
481  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("lcl|0123")));
482  BOOST_CHECK(id->IsLocal());
483  BOOST_CHECK(id->GetLocal().IsStr());
484  BOOST_CHECK_EQUAL(id->GetLocal().GetStr(), "0123");
485 
486  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("lcl|-123")));
487  BOOST_CHECK(id->IsLocal());
488  BOOST_CHECK(id->GetLocal().IsStr());
489  BOOST_CHECK_EQUAL(id->GetLocal().GetStr(), "-123");
490 
491  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("lcl|asdf")));
492  BOOST_CHECK(id->IsLocal());
493  BOOST_CHECK(id->GetLocal().IsStr());
494  BOOST_CHECK_EQUAL(id->GetLocal().GetStr(), string("asdf"));
495 
496  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("lcl|NM_002020|")));
497  BOOST_CHECK(id->IsLocal());
498  BOOST_CHECK(id->GetLocal().IsStr());
499  BOOST_CHECK_EQUAL(id->GetLocal().GetStr(), string("NM_002020"));
500  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("lcl|NM_002020|junk")));
501 }
502 
503 BOOST_AUTO_TEST_CASE(s_TestInitFromFastaObsolete)
504 {
505  CRef<CSeq_id> id;
506 
507  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("bbs|123")));
508  BOOST_CHECK(id->IsGibbsq());
509  BOOST_CHECK_EQUAL(id->GetGibbsq(), 123);
510 // NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("bbs|0")));
511  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("bbs|0")));
512  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("bbs|123.4")));
513  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("bbs|123Z")));
514  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("bbs|xyz")));
515  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("bbs|9876543210")));
516 
517  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("bbm|123")));
518  BOOST_CHECK(id->IsGibbmt());
519 
520  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("gim|123")));
521  BOOST_CHECK(id->IsGiim());
522  BOOST_CHECK_EQUAL(id->GetGiim().GetId(), 123);
523 }
524 
525 BOOST_AUTO_TEST_CASE(s_TestInitFromFastaGenbank)
526 {
527  CRef<CSeq_id> id;
528 
529  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("gb|")));
530  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("gb|U12345.1|AMU12345")));
531  BOOST_CHECK(id->IsGenbank());
532  BOOST_CHECK_EQUAL(id->GetGenbank().GetAccession(), string("U12345"));
533  BOOST_CHECK_EQUAL(id->GetGenbank().GetName(), string("AMU12345"));
534  BOOST_CHECK_EQUAL(id->GetGenbank().GetVersion(), 1);
535  BOOST_CHECK( !id->GetGenbank().IsSetRelease() );
536 }
537 
538 BOOST_AUTO_TEST_CASE(s_TestInitFromFastaEmbl)
539 {
540  CRef<CSeq_id> id;
541 
542  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("emb|AL123456|MTBH37RV")));
543  BOOST_CHECK(id->IsEmbl());
544  BOOST_CHECK_EQUAL(id->GetEmbl().GetAccession(), string("AL123456"));
545  BOOST_CHECK_EQUAL(id->GetEmbl().GetName(), string("MTBH37RV"));
546  BOOST_CHECK( !id->GetEmbl().IsSetVersion() );
547  BOOST_CHECK( !id->GetEmbl().IsSetRelease() );
548 }
549 
550 BOOST_AUTO_TEST_CASE(s_TestInitFromFastaPir)
551 {
552  CRef<CSeq_id> id;
553 
554  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("pir||S16356")));
555  BOOST_CHECK(id->IsPir());
556  BOOST_CHECK( !id->GetPir().IsSetAccession() );
557  BOOST_CHECK_EQUAL(id->GetPir().GetName(), string("S16356"));
558  BOOST_CHECK( !id->GetPir().IsSetVersion() );
559  BOOST_CHECK( !id->GetPir().IsSetRelease() );
560 }
561 
562 BOOST_AUTO_TEST_CASE(s_TestInitFromFastaSwissprot)
563 {
564  CRef<CSeq_id> id;
565 
566  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("sp|Q7CQJ0|RS22_SALTY")));
567  BOOST_CHECK(id->IsSwissprot());
568  BOOST_CHECK_EQUAL(id->GetSwissprot().GetAccession(), string("Q7CQJ0"));
569  BOOST_CHECK_EQUAL(id->GetSwissprot().GetName(), string("RS22_SALTY"));
570  BOOST_CHECK( !id->GetSwissprot().IsSetVersion() );
571  BOOST_CHECK_EQUAL(id->GetSwissprot().GetRelease(), string("reviewed"));
572 
573  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("tr|Q90RT2|Q90RT2_9HIV1")));
574  BOOST_CHECK_EQUAL(id->GetSwissprot().GetRelease(), string("unreviewed"));
575 
576  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("sp|Q7CQJ0.1")));
577  BOOST_CHECK(id->IsSwissprot());
578  BOOST_CHECK_EQUAL(id->GetSwissprot().GetAccession(), string("Q7CQJ0"));
579  BOOST_CHECK_EQUAL(id->GetSwissprot().GetVersion(), 1);
580 }
581 
582 BOOST_AUTO_TEST_CASE(s_TestInitFromPatent)
583 {
584  CRef<CSeq_id> id;
585 
586  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("pat|US|RE33188|1")));
587  BOOST_CHECK(id->IsPatent());
588  BOOST_CHECK_EQUAL(id->GetPatent().GetSeqid(), 1);
589  BOOST_CHECK_EQUAL(id->GetPatent().GetCit().GetCountry(), string("US"));
590  BOOST_CHECK(id->GetPatent().GetCit().GetId().IsNumber());
591  BOOST_CHECK_EQUAL(id->GetPatent().GetCit().GetId().GetNumber(),
592  string("RE33188"));
593 
594  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("pat|US|RE33188|1.5")));
595  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("pat|US|RE33188|1b")));
596  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("pat|US|RE33188|9876543210")));
597  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("pat|US|RE33188|-1")));
598  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("pat|US|RE33188|Z")));
599  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id("pat|US|RE33188")));
600 
601  /*
602  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("pat|EP|0238993|7")));
603  BOOST_CHECK(id->IsPatent());
604  BOOST_CHECK_EQUAL(id->GetPatent().GetSeqid(), 7);
605  BOOST_CHECK_EQUAL(id->GetPatent().GetCit().GetCountry(), string("EP"));
606  BOOST_CHECK(id->GetPatent().GetCit().GetId().IsApp_number());
607  BOOST_CHECK_EQUAL(id->GetPatent().GetCit().GetId().GetApp_number(),
608  string("0238993"));
609  */
610 
611  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id(CSeq_id::e_Patent,
612  "US", "RE33188", 1)));
613  BOOST_CHECK_EQUAL(id->GetPatent().GetCit().GetId().GetNumber(),
614  string("RE33188"));
615 
616  /*
617  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id(CSeq_id::e_Patent,
618  "EP", "0238993", 7, "PGP")));
619  BOOST_CHECK_EQUAL(id->GetPatent().GetCit().GetId().GetApp_number(),
620  string("0238993"));
621  */
622 }
623 
624 BOOST_AUTO_TEST_CASE(s_TestInitFromFastaRefseq)
625 {
626  CRef<CSeq_id> id;
627 
628  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("ref|NM_000170.1")));
629  BOOST_CHECK(id->IsOther());
630  BOOST_CHECK_EQUAL(id->GetOther().GetAccession(), string("NM_000170"));
631  // BOOST_CHECK_EQUAL(id->GetOther().GetVersion(), 1);
632  // Split up to avoid mysterious WorkShop 5.5 11381x-19 errors:
633  int version;
634  BOOST_CHECK_NO_THROW(version = id->GetOther().GetVersion());
635  BOOST_CHECK_EQUAL(version, 1);
636  // Don't try to do anything with the release field, which is no longer
637  // supported.
638 }
639 
640 BOOST_AUTO_TEST_CASE(s_TestInitFromFastaGeneral)
641 {
642  CRef<CSeq_id> id;
643 
644  BOOST_CHECK_NO_THROW
645  (id.Reset(new CSeq_id
646  ("gnl|dbSNP|rs31251_allelePos=201totallen=401|taxid=9606"
647  "|snpClass=1|alleles=?|mol=?|build=?")));
648  BOOST_CHECK(id->IsGeneral());
649  BOOST_CHECK_EQUAL(id->GetGeneral().GetDb(), string("dbSNP"));
650  BOOST_CHECK(id->GetGeneral().GetTag().IsStr());
651  BOOST_CHECK_EQUAL(id->GetGeneral().GetTag().GetStr(),
652  string("rs31251_allelePos=201totallen=401|taxid=9606"
653  "|snpClass=1|alleles=?|mol=?|build=?"));
654 
655  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("gnl|taxon|9606")));
656  BOOST_CHECK(id->IsGeneral());
657  BOOST_CHECK_EQUAL(id->GetGeneral().GetDb(), string("taxon"));
658  BOOST_CHECK(id->GetGeneral().GetTag().IsId());
659  BOOST_CHECK_EQUAL(id->GetGeneral().GetTag().GetId(), 9606);
660 }
661 
662 BOOST_AUTO_TEST_CASE(s_TestInitFromFastaGI)
663 {
664  CRef<CSeq_id> id;
665 
666  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("gi|1234")));
667  BOOST_CHECK(id->IsGi());
668  BOOST_CHECK_EQUAL(id->GetGi(), 1234);
669 }
670 
671 BOOST_AUTO_TEST_CASE(s_TestInitFromFastaDdbj)
672 {
673  CRef<CSeq_id> id;
674 
675  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("dbj|N00068")));
676  BOOST_CHECK(id->IsDdbj());
677  BOOST_CHECK_EQUAL(id->GetDdbj().GetAccession(), string("N00068"));
678 }
679 
680 BOOST_AUTO_TEST_CASE(s_TestInitFromFastaPrf)
681 {
682  CRef<CSeq_id> id;
683 
684  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("prf||0806162C")));
685  BOOST_CHECK(id->IsPrf());
686 }
687 
688 BOOST_AUTO_TEST_CASE(s_TestInitFromFastaPdb)
689 {
690  CRef<CSeq_id> id;
691 
692  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("pdb|1GAV")));
693  BOOST_CHECK(id->IsPdb());
694  BOOST_CHECK_EQUAL(id->GetPdb().GetMol().Get(), string("1GAV"));
695  BOOST_CHECK_EQUAL(id->GetPdb().GetChain(), ' ');
696  BOOST_CHECK( !id->GetPdb().IsSetChain_id() );
697 
698  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("pdb|1GAV|X")));
699  BOOST_CHECK(id->IsPdb());
700  BOOST_CHECK_EQUAL(id->GetPdb().GetMol().Get(), string("1GAV"));
701  BOOST_CHECK_EQUAL(id->GetPdb().GetChain(), 'X');
702  BOOST_CHECK_EQUAL(id->GetPdb().GetChain_id(), "X");
703 
704  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("pdb|1GAV|XY")));
705  BOOST_CHECK( !id->GetPdb().IsSetChain() );
706  BOOST_CHECK_EQUAL(id->GetPdb().GetChain_id(), "XY");
707  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("pdb|1GAV|XX")));
708  BOOST_CHECK( !id->GetPdb().IsSetChain() );
709  BOOST_CHECK_EQUAL(id->GetPdb().GetChain_id(), "XX");
710  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("pdb|1GAV|!")));
711  BOOST_CHECK_EQUAL(id->GetPdb().GetChain(), '!');
712  BOOST_CHECK_EQUAL(id->GetPdb().GetChain_id(), "!");
713  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("pdb|1GAV|VB")));
714  BOOST_CHECK( !id->GetPdb().IsSetChain() );
715  BOOST_CHECK_EQUAL(id->GetPdb().GetChain_id(), "VB");
716  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("pdb|1GAV|AAA")));
717  BOOST_CHECK( !id->GetPdb().IsSetChain() );
718  BOOST_CHECK_EQUAL(id->GetPdb().GetChain_id(), "AAA");
719 }
720 
721 BOOST_AUTO_TEST_CASE(s_TestInitFromFastaTpa)
722 {
723  CRef<CSeq_id> id;
724 
725  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("tpg|BK003456")));
726  BOOST_CHECK(id->IsTpg());
727  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("tpe|BN000123")));
728  BOOST_CHECK(id->IsTpe());
729  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("tpd|FAA00017")));
730  BOOST_CHECK(id->IsTpd());
731 }
732 
733 BOOST_AUTO_TEST_CASE(s_TestInitFromFastaGpipe)
734 {
735  CRef<CSeq_id> id;
736  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("gpp|GPC_123456789")));
737  BOOST_CHECK(id->IsGpipe());
738 }
739 
740 BOOST_AUTO_TEST_CASE(s_TestInitFromFastaNat)
741 {
742  CRef<CSeq_id> id;
743  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id("nat|AT_123456789")));
744  BOOST_CHECK(id->IsNamed_annot_track());
745 }
746 
747 
748 static CSeq_id* s_NewDbtagId(const string& db, const string& tag,
749  bool set_as_general = false)
750 {
751  CDbtag dbtag;
752  dbtag.SetDb(db);
753  dbtag.SetTag().SetStr(tag);
754  return new CSeq_id(dbtag, set_as_general);
755 }
756 
757 static CSeq_id* s_NewDbtagId(const string& db, int tag,
758  bool set_as_general = false)
759 {
760  CDbtag dbtag;
761  dbtag.SetDb(db);
762  dbtag.SetTag().SetId(tag);
763  return new CSeq_id(dbtag, set_as_general);
764 }
765 
766 BOOST_AUTO_TEST_CASE(s_TestInitFromDbtag)
767 {
768  CRef<CSeq_id> id;
769  CDbtag dbtag;
770 
771  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id(dbtag)));
772 
773  // No longer supported.
774  NCBI_CHECK_THROW_SEQID(id.Reset(s_NewDbtagId("GenBank", "N20001.1")));
775  NCBI_CHECK_THROW_SEQID(id.Reset(s_NewDbtagId("GI", "12345")));
776  NCBI_CHECK_THROW_SEQID(id.Reset(s_NewDbtagId("GI", 12345)));
777 
778  BOOST_CHECK_NO_THROW(id.Reset(s_NewDbtagId("EMBL", "AL123456.7")));
779  BOOST_CHECK(id->IsEmbl());
780  BOOST_CHECK_EQUAL(id->GetEmbl().GetAccession(), string("AL123456"));
781  BOOST_CHECK( !id->GetEmbl().IsSetName() );
782  BOOST_CHECK_EQUAL(id->GetEmbl().GetVersion(), 7);
783  BOOST_CHECK( !id->GetEmbl().IsSetRelease() );
784 
785  NCBI_CHECK_THROW_SEQID(id.Reset(s_NewDbtagId("EMBL", "AL123456.7.8")));
786  NCBI_CHECK_THROW_SEQID(id.Reset(s_NewDbtagId("EMBL", "AL123456.7b")));
787  NCBI_CHECK_THROW_SEQID(id.Reset(s_NewDbtagId("EMBL", "AL123456.-7")));
788  NCBI_CHECK_THROW_SEQID(id.Reset(s_NewDbtagId("EMBL", "AL123456.z")));
789 
790  BOOST_CHECK_NO_THROW(id.Reset(s_NewDbtagId("DDBJ", "N00068")));
791  BOOST_CHECK(id->IsDdbj());
792  BOOST_CHECK_EQUAL(id->GetDdbj().GetAccession(), string("N00068"));
793 
794  BOOST_CHECK_NO_THROW(id.Reset(s_NewDbtagId("GI", "12345", true)));
795  BOOST_CHECK(id->IsGeneral());
796  BOOST_CHECK(id->GetGeneral().GetTag().IsStr());
797  BOOST_CHECK_EQUAL(id->GetGeneral().GetTag().GetStr(), "12345");
798 
799  BOOST_CHECK_NO_THROW(id.Reset(s_NewDbtagId("GI", 12345, true)));
800  BOOST_CHECK(id->IsGeneral());
801  BOOST_CHECK(id->GetGeneral().GetTag().IsId());
802  BOOST_CHECK_EQUAL(id->GetGeneral().GetTag().GetId(), 12345);
803 
804  NCBI_CHECK_THROW_SEQID(id.Reset(s_NewDbtagId("taxon", 9606)));
805  BOOST_CHECK_NO_THROW(id.Reset(s_NewDbtagId("taxon", 9606, true)));
806  BOOST_CHECK_EQUAL(id->IdentifyAccession(), CSeq_id::eAcc_general);
807 
808  NCBI_CHECK_THROW_SEQID(id.Reset(s_NewDbtagId("TRACE_ASSM", "992")));
809  BOOST_CHECK_NO_THROW(id.Reset(s_NewDbtagId("TRACE_ASSM", "992", true)));
810  BOOST_CHECK_EQUAL(id->IdentifyAccession(), CSeq_id::eAcc_general_nuc);
811 }
812 
813 BOOST_AUTO_TEST_CASE(s_TestInitFromInt)
814 {
815  CRef<CSeq_id> id;
816 
817  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id(CSeq_id::e_Gi, 1234)));
818  BOOST_CHECK(id->IsGi());
819  BOOST_CHECK_EQUAL(id->GetGi(), 1234);
820 // NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id(CSeq_id::e_Gi, 0)));
821  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id(CSeq_id::e_Gi, 0)));
822  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id(CSeq_id::e_Gi, -1)));
823  NCBI_CHECK_THROW_SEQID(id.Reset(new CSeq_id(CSeq_id::e_Pdb, 1234)));
824 
825  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id(CSeq_id::e_Local, 1234)));
826  BOOST_CHECK(id->IsLocal());
827  BOOST_CHECK(id->GetLocal().IsId());
828  BOOST_CHECK_EQUAL(id->GetLocal().GetId(), 1234);
829 
830  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id(CSeq_id::e_Gibbsq, 1234)));
831  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id(CSeq_id::e_Gibbmt, 1234)));
832 
833  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id(CSeq_id::e_Giim, 1234)));
834  BOOST_CHECK(id->IsGiim());
835  BOOST_CHECK_EQUAL(id->GetGiim().GetId(), 1234);
836 }
837 
838 static const char* kTestFastaStrings[] = {
839  "lcl|123",
840  "lcl|asdf",
841  "lcl|0123",
842  "lcl|0",
843  "lcl|+1",
844  "lcl|-1",
845  "lcl|2147483648",
846  "bbs|123",
847  "bbm|123",
848  "gim|123",
849  "gb|U12345.1|AMU12345",
850  "emb|AL123456|MTBH37RV",
851  "pir||S16356",
852  "sp|Q7CQJ0|RS22_SALTY",
853  "tr|Q90RT2|Q90RT2_9HIV1",
854  "sp|Q7CQJ0.1|",
855  "pat|US|RE33188|1",
856  // "pgp|EP|0238993|7",
857  "ref|NM_000170.1|",
858  "gnl|EcoSeq|EcoAce",
859  "gnl|Celera|CDM:10213987",
860  "gnl|taxon|9606",
861  "gi|1234",
862  "dbj|N00068|",
863  "prf||0806162C",
864  "pdb|1GAV| ",
865  "pdb|1GAV|X",
866  "pdb|1GAV|XX",
867  "pdb|1GAV|!",
868  "pdb|1GAV|VB",
869  "tpg|BK003456|",
870  "tpe|BN000123|",
871  "tpd|FAA00017|",
872  "gpp|GPC_123456789|",
873  "nat|AT_123456789.1|",
874  /* Must be last due to special-cased greedy parsing */
875  "gnl|dbSNP|rs31251_allelePos=201totallen=401|taxid=9606"
876  "|snpClass=1|alleles=?|mol=?|build=?"
877 };
878 static const size_t kNumFastaStrings
879 = sizeof(kTestFastaStrings)/sizeof(*kTestFastaStrings);
880 
881 static void s_TestFastaRoundTrip(const char* s)
882 {
883  CRef<CSeq_id> id;
884  BOOST_TEST_MESSAGE(string("Testing round trip for ") << s);
885  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id(s)));
886  BOOST_CHECK_EQUAL(id->AsFastaString(), s);
887  if ( 1 ) {
888  cout << s << endl;
889  CBioseq::TId ids;
890  CSeq_id::ParseFastaIds(ids, s);
891  BOOST_REQUIRE_EQUAL(ids.size(), 1u);
892  BOOST_CHECK_EQUAL(ids.front()->AsFastaString(), s);
893  BOOST_CHECK(ids.front()->Equals(*id));
894  BOOST_CHECK(id->Match(*id));
895  BOOST_CHECK_EQUAL(id->Compare(*id), CSeq_id::e_YES);
896  }
897  for (SIZE_TYPE pos = strlen(s) - 1;
898  pos != NPOS && (s[pos] == '|' || s[pos] == ' ');
899  --pos) {
900  CRef<CSeq_id> id2;
901  string ss(s, pos);
902  BOOST_TEST_MESSAGE("Testing equality with " << ss);
903  BOOST_CHECK_NO_THROW(id2.Reset(new CSeq_id(ss)));
904  BOOST_CHECK_EQUAL(id2->AsFastaString(), s);
905  BOOST_CHECK(id->Match(*id2));
906  BOOST_CHECK_EQUAL(id->Compare(*id2), CSeq_id::e_YES);
907  }
908 }
909 
912 
913 BOOST_AUTO_TEST_CASE(s_TestNoStrays)
914 {
915  CSeq_id id;
916  BOOST_CHECK_NO_THROW(id.SetGiim().SetDb("foo"));
917  BOOST_CHECK_NO_THROW(id.SetGiim().SetRelease("2.0"));
918  BOOST_CHECK(id.IsGiim());
919  BOOST_CHECK(id.GetGiim().IsSetDb());
920  BOOST_CHECK(id.GetGiim().IsSetRelease());
921  BOOST_CHECK_NO_THROW(id.Set("gim|123"));
922  BOOST_CHECK(id.IsGiim());
923  BOOST_CHECK( !id.GetGiim().IsSetDb() );
924  BOOST_CHECK( !id.GetGiim().IsSetRelease() );
925 
926  BOOST_CHECK_NO_THROW(id.SetGenbank().SetRelease("135"));
927  BOOST_CHECK(id.IsGenbank());
928  BOOST_CHECK(id.GetGenbank().IsSetRelease());
929  BOOST_CHECK_NO_THROW(id.Set("gb|U12345.1|AMU12345"));
930  BOOST_CHECK(id.IsGenbank());
931  BOOST_CHECK( !id.GetGenbank().IsSetRelease() );
932 
933  BOOST_CHECK_NO_THROW(id.SetPatent().SetCit().SetDoc_type("app"));
934  BOOST_CHECK(id.IsPatent());
935  BOOST_CHECK(id.GetPatent().GetCit().IsSetDoc_type());
936  BOOST_CHECK_NO_THROW(id.Set("pat|US|RE33188|1"));
937  BOOST_CHECK(id.IsPatent());
938  BOOST_CHECK( !id.GetPatent().GetCit().IsSetDoc_type() );
939 
940  BOOST_CHECK_NO_THROW(id.SetPdb().SetRel().SetToTime(GetFastLocalTime()));
941  BOOST_CHECK(id.IsPdb());
942  BOOST_CHECK(id.GetPdb().IsSetRel());
943  BOOST_CHECK_NO_THROW(id.Set("pdb|1GAV|X"));
944  BOOST_CHECK(id.IsPdb());
945  BOOST_CHECK( !id.GetPdb().IsSetRel() );
946 }
947 
948 BOOST_AUTO_TEST_CASE(s_TestListOps)
949 {
950  string merged;
951  for (size_t i = 0; i < kNumFastaStrings; ++i) {
952  if (i > 0) {
953  merged += '|';
954  }
955  merged += kTestFastaStrings[i];
956  }
957  CBioseq bs;
958  bs.SetInst().SetRepr(CSeq_inst::eRepr_virtual);
959  bs.SetInst().SetMol(CSeq_inst::eMol_other);
960  CBioseq::TId& ids = bs.SetId();
961  BOOST_CHECK_EQUAL(CSeq_id::ParseFastaIds(ids, merged, true),
963  BOOST_CHECK_EQUAL(ids.size(), kNumFastaStrings);
964  BOOST_CHECK_EQUAL(CSeq_id::GetStringDescr(bs, CSeq_id::eFormat_FastA),
965  string("gi|1234|ref|NM_000170.1|"));
966  BOOST_CHECK_EQUAL(CSeq_id::GetStringDescr(bs, CSeq_id::eFormat_ForceGI),
967  string("gi|1234"));
968  BOOST_CHECK_EQUAL
970  string("ref|NM_000170.1"));
971  BOOST_CHECK_EQUAL
973  string("ref|NM_000170"));
974  /*
975  BOOST_CHECK_EQUAL
976  (CSeq_id::ParseFastaIds(ids, "gi|1234|junk|pdb|1GAV", true),
977  size_t(2));
978  */
980  (CSeq_id::ParseFastaIds(ids, "gi|1234|junk|pdb|1GAV"));
981 }
982 
983 BOOST_AUTO_TEST_CASE(s_TestSeq_locAssign)
984 {
985  {
986  CRef<CSeq_id> id1(new CSeq_id("gi|1"));
987  CRef<CSeq_loc> loc1(new CSeq_loc);
988  loc1->SetWhole(*id1);
989  CRef<CSeq_loc> mix1(new CSeq_loc);
990  mix1->SetMix().Set().push_back(loc1);
991 
992  CRef<CSeq_id> id2(new CSeq_id("gi|2"));
993  CRef<CSeq_loc> loc2(new CSeq_loc);
994  loc2->SetEmpty(*id2);
995  CRef<CSeq_loc> mix2(new CSeq_loc);
996  mix2->SetMix().Set().push_back(loc2);
997 
998  BOOST_CHECK(loc1->IsWhole());
999  BOOST_CHECK(loc1->GetWhole().IsGi());
1000  BOOST_CHECK_EQUAL(loc1->GetWhole().GetGi(), 1);
1001 
1002  BOOST_CHECK(loc2->IsEmpty());
1003  BOOST_CHECK(loc2->GetEmpty().IsGi());
1004  BOOST_CHECK_EQUAL(loc2->GetEmpty().GetGi(), 2);
1005 
1006  BOOST_CHECK(loc1->GetId());
1007  BOOST_CHECK(loc1->GetId()->IsGi());
1008  BOOST_CHECK_EQUAL(loc1->GetId()->GetGi(), 1);
1009 
1010  BOOST_CHECK(loc2->GetId());
1011  BOOST_CHECK(loc2->GetId()->IsGi());
1012  BOOST_CHECK_EQUAL(loc2->GetId()->GetGi(), 2);
1013 
1014  loc1->Assign(*loc2);
1015 
1016  id1.Reset();
1017  id2.Reset();
1018 
1019  BOOST_CHECK(loc1->IsEmpty());
1020  BOOST_CHECK(loc1->GetEmpty().IsGi());
1021  BOOST_CHECK_EQUAL(loc1->GetEmpty().GetGi(), 2);
1022 
1023  BOOST_CHECK(loc2->IsEmpty());
1024  BOOST_CHECK(loc2->GetEmpty().IsGi());
1025  BOOST_CHECK_EQUAL(loc2->GetEmpty().GetGi(), 2);
1026 
1027  BOOST_CHECK(loc1->GetId());
1028  BOOST_CHECK(loc1->GetId()->IsGi());
1029  BOOST_CHECK_EQUAL(loc1->GetId()->GetGi(), 2);
1030 
1031  BOOST_CHECK(loc2->GetId());
1032  BOOST_CHECK(loc2->GetId()->IsGi());
1033  BOOST_CHECK_EQUAL(loc2->GetId()->GetGi(), 2);
1034  }
1035  {
1036  CRef<CSeq_id> id1(new CSeq_id("gi|1"));
1037  CRef<CSeq_loc> loc1(new CSeq_loc);
1038  loc1->SetWhole(*id1);
1039  CRef<CSeq_feat> feat1(new CSeq_feat);
1040  feat1->SetData().SetRegion("1");
1041  feat1->SetLocation(*loc1);
1042 
1043  CRef<CSeq_id> id2(new CSeq_id("gi|2"));
1044  CRef<CSeq_loc> loc2(new CSeq_loc);
1045  loc2->SetEmpty(*id2);
1046  CRef<CSeq_feat> feat2(new CSeq_feat);
1047  feat2->SetData().SetRegion("2");
1048  feat2->SetLocation(*loc2);
1049 
1050  BOOST_CHECK(loc1->IsWhole());
1051  BOOST_CHECK(loc1->GetWhole().IsGi());
1052  BOOST_CHECK_EQUAL(loc1->GetWhole().GetGi(), 1);
1053 
1054  BOOST_CHECK(loc2->IsEmpty());
1055  BOOST_CHECK(loc2->GetEmpty().IsGi());
1056  BOOST_CHECK_EQUAL(loc2->GetEmpty().GetGi(), 2);
1057 
1058  BOOST_CHECK(loc1->GetId());
1059  BOOST_CHECK(loc1->GetId()->IsGi());
1060  BOOST_CHECK_EQUAL(loc1->GetId()->GetGi(), 1);
1061 
1062  BOOST_CHECK(loc2->GetId());
1063  BOOST_CHECK(loc2->GetId()->IsGi());
1064  BOOST_CHECK_EQUAL(loc2->GetId()->GetGi(), 2);
1065 
1066  feat1->Assign(*feat2);
1067 
1068  id1.Reset();
1069  id2.Reset();
1070 
1071  BOOST_CHECK_EQUAL(feat1->GetData().GetRegion(), string("2"));
1072  loc1 = &feat1->SetLocation();
1073 
1074  BOOST_CHECK(loc1->IsEmpty());
1075  BOOST_CHECK(loc1->GetEmpty().IsGi());
1076  BOOST_CHECK_EQUAL(loc1->GetEmpty().GetGi(), 2);
1077 
1078  BOOST_CHECK(loc2->IsEmpty());
1079  BOOST_CHECK(loc2->GetEmpty().IsGi());
1080  BOOST_CHECK_EQUAL(loc2->GetEmpty().GetGi(), 2);
1081 
1082  BOOST_CHECK(loc1->GetId());
1083  BOOST_CHECK(loc1->GetId()->IsGi());
1084  BOOST_CHECK_EQUAL(loc1->GetId()->GetGi(), 2);
1085 
1086  BOOST_CHECK(loc2->GetId());
1087  BOOST_CHECK(loc2->GetId()->IsGi());
1088  BOOST_CHECK_EQUAL(loc2->GetId()->GetGi(), 2);
1089  }
1090 }
1091 
1092 
1093 
1094 BOOST_AUTO_TEST_CASE(s_TestSeq_id_GetLabel)
1095 {
1096  static const char* sc_SeqIdLabels[] = {
1097  // order is important!
1098  // - raw id in ASN.1
1099  // - type
1100  // - content
1101  // - both
1102  // - fasta (CSeq_id::AsFastaString())
1103  // - seq-id string, +version
1104  // - seq-id string, -version
1105  // - both, upper case + version
1106  "Seq-id ::= gi 1234",
1107  "gi", "1234", "gi|1234",
1108  "gi|1234", "1234", "1234", "GI|1234",
1109 
1110  "Seq-id ::= other { accession \"NM_123456\", version 1}",
1111  "ref", "NM_123456.1", "ref|NM_123456.1",
1112  "ref|NM_123456.1|", "NM_123456.1", "NM_123456", "REF|NM_123456.1",
1113 
1114  "Seq-id ::= general { db \"ti\", tag id 1}",
1115  "gnl", "ti:1", "gnl|ti:1",
1116  "gnl|ti|1", "ti:1", "ti:1", "GNL|TI|1",
1117 
1118  "Seq-id ::= general { db \"NCBI_GENOMES\", tag id 1}",
1119  "gnl", "NCBI_GENOMES:1", "gnl|NCBI_GENOMES:1",
1120  "gnl|NCBI_GENOMES|1", "NCBI_GENOMES:1", "NCBI_GENOMES:1", "GNL|NCBI_GENOMES|1",
1121 
1122  "Seq-id ::= pir { name \"S34010\" }",
1123  "pir", "S34010", "pir|S34010",
1124  "pir||S34010", "S34010", "S34010", "PIR|S34010",
1125 
1126  "Seq-id ::= patent { seqid 257, cit { country \"JP\", id number \"2003530853\" } }",
1127  "pat", "JP2003530853_257", "pat|JP2003530853_257",
1128  "pat|JP|2003530853|257", "JP2003530853_257", "JP2003530853_257", "PAT|JP|2003530853|257",
1129 
1130  "Seq-id ::= pdb { mol \"1GAV\", chain 120 }",
1131  "pdb", "1GAV_x", "pdb|1GAV_x",
1132  "pdb|1GAV|x", "1GAV_x", "1GAV_x", "PDB|1GAV|x",
1133 
1134  "Seq-id ::= pdb { mol \"1GAV\", chain-id \"xY\" }",
1135  "pdb", "1GAV_xY", "pdb|1GAV_xY",
1136  "pdb|1GAV|xY", "1GAV_xY", "1GAV_xY", "PDB|1GAV|xY",
1137 
1138 
1139  "Seq-id ::= local str \"abcdABCD\"",
1140  "lcl", "abcdABCD", "lcl|abcdABCD",
1141  "lcl|abcdABCD", "abcdABCD", "abcdABCD", "LCL|ABCDABCD",
1142 
1143  "Seq-id ::= local id 1234",
1144  "lcl", "1234", "lcl|1234",
1145  "lcl|1234", "1234", "1234", "LCL|1234",
1146 
1147  NULL, NULL, NULL, NULL, NULL, NULL, NULL
1148  };
1149 
1150 
1151  const char** p = sc_SeqIdLabels;
1152  for ( ; p && *p; p += 8) {
1153  const char* src_id = *(p + 0);
1154  const char* type = *(p + 1);
1155  const char* content = *(p + 2);
1156  const char* both = *(p + 3);
1157  const char* fasta_str = *(p + 4);
1158  const char* seqid_str1 = *(p + 5);
1159  const char* seqid_str2 = *(p + 6);
1160  const char* upper_case = *(p + 7);
1161 
1162  LOG_POST(Info << "checking ID: " << src_id);
1163  CSeq_id id;
1164  {{
1165  CNcbiIstrstream istr(src_id);
1166  istr >> MSerial_AsnText >> id;
1167  }}
1168 
1169  string s;
1170 
1171  s.erase();
1172  id.GetLabel(&s, CSeq_id::eType);
1173  LOG_POST(Info << " type label: " << s);
1174  BOOST_CHECK_EQUAL(s, type);
1175 
1176  s.erase();
1177  id.GetLabel(&s, CSeq_id::eContent);
1178  LOG_POST(Info << " content label: " << s);
1179  BOOST_CHECK_EQUAL(s, content);
1180 
1181  s.erase();
1182  id.GetLabel(&s, CSeq_id::eBoth);
1183  LOG_POST(Info << " type + content label: " << s);
1184  BOOST_CHECK_EQUAL(s, both);
1185 
1186  LOG_POST(Info << " fasta string: " << id.AsFastaString());
1187  BOOST_CHECK_EQUAL(id.AsFastaString(), fasta_str);
1188  LOG_POST(Info << " id.GetSeqIdString(true): "
1189  << id.GetSeqIdString(true));
1190  BOOST_CHECK_EQUAL(id.GetSeqIdString(true), seqid_str1);
1191  LOG_POST(Info << " id.GetSeqIdString(false): "
1192  << id.GetSeqIdString(false));
1193  BOOST_CHECK_EQUAL(id.GetSeqIdString(false), seqid_str2);
1194 
1195  s.erase();
1196  id.GetLabel(&s, CSeq_id::eDefault,
1198  LOG_POST(Info << " upper case label: " << s);
1199  BOOST_CHECK_EQUAL(s, upper_case);
1200  }
1201 }
1202 
1203 
1204 #if 0
1205 /// NB: disabled, as certain of these tests are guaranteed to fail
1206 BOOST_AUTO_TEST_CASE(s_TestSeq_id_GetLabel_FastaString)
1207 {
1208  static const char* sc_Ids = "\
1209 Seq-id ::= pir {\
1210  name \"S34010\"\
1211 }\
1212 Seq-id ::= patent {\
1213  seqid 257,\
1214  cit {\
1215  country \"JP\",\
1216  id number \"2003530853\"\
1217  }\
1218 }\
1219 ";
1220 
1221  CNcbiIstrstream istr(sc_Ids);
1222  while (istr) {
1223  CSeq_id id;
1224  try {
1225  istr >> MSerial_AsnText >> id;
1226  }
1227  catch (CEofException&) {
1228  break;
1229  }
1230 
1231  string fasta_seqid = id.AsFastaString();
1232  string label;
1233  id.GetLabel(&label, CSeq_id::eBoth);
1234  BOOST_CHECK_EQUAL(label, fasta_seqid);
1235 
1236  CSeq_id other(label);
1237  BOOST_CHECK(other.Equals(id));
1238  }
1239 }
1240 #endif
1241 
1242 
1243 BOOST_AUTO_TEST_CASE(s_TestSeq_id_Compare)
1244 {
1245  // The array sc_Ids is sorted to match CompareOrdered().
1246  // Some elements may compare equal.
1247  static const char* const sc_Ids[] = {
1248  "lcl|-723121231214", // 64-bit id
1249  "lcl|-723121231214", // 64-bit id
1250  "lcl|-12",
1251  "lcl|-11",
1252  "lcl|-11",
1253  "lcl|0",
1254  "lcl|0",
1255  "lcl|12",
1256  "lcl|12",
1257  "lcl|13",
1258  "lcl|13",
1259  "lcl|123",
1260  "lcl|123",
1261  "lcl|124",
1262  "lcl|124",
1263  "lcl|723121231214", // 64-bit id
1264  "lcl|0012", // non-integer ids
1265  "lcl|00123",
1266  "lcl|00124",
1267  "lcl|0013",
1268  "lcl|012",
1269  "lcl|0123",
1270  "lcl|0124",
1271  "lcl|013",
1272  "NC_000001",
1273  "ref|NC_000001|chr1_build35",
1274  "ref|NC_000001|chr1_build36",
1275  "NC_000001.8",
1276  "nc_000001.8",
1277  "NC_000001.9",
1278  "Nc_000001.9",
1279  "ref|NC_000001.9|chr1_build36",
1280  "gnl|ti|-9223372036854775808", // smallest 64-bit int
1281  "gnl|ti|-623121231214", // 64-bit id
1282  "gnl|Ti|-2147483649",
1283  "gnl|Ti|-2147483648",
1284  "gnl|Ti|-2147483647",
1285  "gnl|ti|-12312",
1286  "gnl|ti|-1231",
1287  "gnl|ti|0",
1288  "gnl|ti|12312",
1289  "gnl|ti|12312",
1290  "gnl|ti|3231212",
1291  "gnl|ti|3231212",
1292  "gnl|ti|42312324",
1293  "gnl|ti|42312324",
1294  "gnl|Ti|2147483647",
1295  "gnl|Ti|2147483648",
1296  "gnl|TI|52312123124",
1297  "gnl|ti|623121231214", // 64-bit id
1298  "gnl|ti|9223372036854775807", // largest 64-bit int
1299  "gnl|ti|+ 0", // non-integer ids
1300  "gnl|ti|+0",
1301  "gnl|ti|- 0",
1302  "gnl|ti|-0",
1303  "gnl|ti|-012",
1304  "gnl|ti|-9223372036854775809", // doesn't fit into 64-bit int
1305  "gnl|ti|22312-234",
1306  "gnl|ti|9223372036854775808", // doesn't fit into 64-bit int
1307  "gnl|TI|str",
1308  "gnl|trace|-623121231214", // 64-bit id
1309  "gnl|trace|-623121231214", // 64-bit id
1310  "gnl|TRACE|-12312",
1311  "gnl|TRACE|-12312",
1312  "gnl|TRACE|-123",
1313  "gnl|TRACE|0",
1314  "gnl|TRACE|12312",
1315  "gnl|TRACE|12312",
1316  "gnl|trace|3231212",
1317  "gnl|trace|3231212",
1318  "gnl|TRACE|42312324",
1319  "gnl|TRACE|42312324",
1320  "gnl|TRACE|2123123241",
1321  "gnl|TRACE|2423123241",
1322  "gnl|TRACE|52312123124",
1323  "gnl|trace|623121231214", // 64-bit id
1324  "gnl|TRACE|+ 0", // non-integer ids
1325  "gnl|TRACE|+0",
1326  "gnl|TRACE|- 0",
1327  "gnl|trace|-0",
1328  "gnl|trace|-012",
1329  "gnl|TRACE|22312-234",
1330  "gnl|trace|str",
1331  "pdb|6hXx|Aa",
1332  "pdb|6hxx|Aa",
1333  "pdb|6HXX|Ab",
1334  "pdb|6xxx|AA",
1335  "pdb|6XXX|Aa",
1336  };
1337 
1338  typedef CRef<CSeq_id> TRef;
1339  vector<TRef> ids;
1340  for ( size_t i = 0; i < ArraySize(sc_Ids); ++i ) {
1341  ids.push_back(TRef(new CSeq_id(sc_Ids[i])));
1342  //NcbiCout << "Id["<<i<<"] from \""<<sc_Ids[i]<<"\""<<NcbiEndl;
1343  if ( ids[i]->IsLocal() ) {
1344  BOOST_CHECK_EQUAL(ids[i]->AsFastaString(), sc_Ids[i]);
1345  if ( ids[i]->GetLocal().IsId() ) {
1346  int id = ids[i]->GetLocal().GetId();
1347  BOOST_CHECK(id > 0);
1348  if ( i > 0 && strcmp(sc_Ids[i], sc_Ids[i-1]) == 0 ) {
1349  ids[i]->SetLocal().SetStr(NStr::NumericToString(id));
1350  BOOST_CHECK_EQUAL(ids[i]->AsFastaString(), sc_Ids[i]);
1351  }
1352  }
1353  else {
1354  const string& id = ids[i]->GetLocal().GetStr();
1355  BOOST_CHECK(NStr::StringToNonNegativeInt(id) <= 0 ||
1356  id[0] < '1' || id[0] > '9');
1357  }
1358  }
1359  if ( ids[i]->IsGeneral() ) {
1360  BOOST_CHECK_EQUAL(ids[i]->AsFastaString(), sc_Ids[i]);
1361  if ( ids[i]->GetGeneral().GetTag().IsId() ) {
1362  int id = ids[i]->GetGeneral().GetTag().GetId();
1363  BOOST_CHECK(id > 0);
1364  if ( i > 0 && strcmp(sc_Ids[i], sc_Ids[i-1]) == 0 ) {
1365  ids[i]->SetGeneral().SetTag().SetStr(NStr::NumericToString(id));
1366  BOOST_CHECK_EQUAL(ids[i]->AsFastaString(), sc_Ids[i]);
1367  }
1368  }
1369  else {
1370  const string& id = ids[i]->GetGeneral().GetTag().GetStr();
1371  BOOST_CHECK(NStr::StringToNonNegativeInt(id) <= 0 ||
1372  id[0] < '1' || id[0] > '9');
1373  }
1374  }
1375  }
1376  CRandom rnd(1);
1377  for ( size_t i = 0; i < ids.size(); ++i ) {
1378  swap(ids[i], ids[rnd.GetRandSize_t(i, ids.size()-1)]);
1379  }
1380  vector<TRef> sorted_ids = ids;
1381  stable_sort(sorted_ids.begin(), sorted_ids.end(), PPtrLess<TRef>());
1382  if ( false ) {
1383  // dump sorted ids
1384  ITERATE ( vector<TRef>, it, sorted_ids ) {
1385  NcbiCout << (*it)->AsFastaString() << NcbiEndl;
1386  }
1387  }
1388  for ( size_t i = 0; i < sorted_ids.size(); ++i ) {
1389  BOOST_CHECK_EQUAL(sorted_ids[i]->CompareOrdered(*sorted_ids[i]), 0);
1390  for ( size_t j = 0; j < i; ++j ) {
1391  BOOST_CHECK(sorted_ids[j]->CompareOrdered(*sorted_ids[i]) <= 0);
1392  BOOST_CHECK(sorted_ids[i]->CompareOrdered(*sorted_ids[j]) >= 0);
1393  }
1394  CSeq_id expected_id(sc_Ids[i]);
1395  if ( expected_id.CompareOrdered(*sorted_ids[i]) != 0 ) {
1396  BOOST_CHECK_EQUAL(sorted_ids[i]->AsFastaString(),
1397  expected_id.AsFastaString());
1398  BOOST_CHECK_EQUAL(sorted_ids[i]->AsFastaString(), "");
1399  }
1400  }
1401  typedef set<TRef, PPtrLess<TRef> > TSet;
1402  TSet ids_set(ids.begin(), ids.end());
1403  BOOST_CHECK(ids_set.size() < sorted_ids.size());
1404  ITERATE ( TSet, it, ids_set ) {
1405  //NcbiCout << (*it)->AsFastaString() << NcbiEndl;
1406  BOOST_CHECK_EQUAL((*it)->CompareOrdered(**it), 0);
1407  ITERATE ( TSet, it2, ids_set ) {
1408  if ( it2 == it ) {
1409  break;
1410  }
1411  BOOST_CHECK((*it2)->CompareOrdered(**it) < 0);
1412  BOOST_CHECK((*it)->CompareOrdered(**it2) > 0);
1413  }
1414  }
1415 
1416  if ( 1 ) {
1417  CSeq_id_Handle id1 = CSeq_id_Handle::GetHandle("gnl|ti|-12312");
1418  CSeq_id_Handle id2 = CSeq_id_Handle::GetHandle("gnl|ti|-1231");
1419  BOOST_CHECK_LT(id1.CompareOrdered(id2), 0);
1420  BOOST_CHECK_GT(id2.CompareOrdered(id1), 0);
1421  }
1422  vector<CSeq_id_Handle> sorted_idhs;
1423  for ( auto& i : sorted_ids ) {
1424  sorted_idhs.push_back(CSeq_id_Handle::GetHandle(*i));
1425  }
1426  for ( size_t i = 0; i < sorted_idhs.size(); ++i ) {
1427  BOOST_CHECK_EQUAL(sorted_idhs[i].CompareOrdered(sorted_idhs[i]), 0);
1428  for ( size_t j = 0; j < i; ++j ) {
1429  //NcbiCout << "sorted_idhs["<<i<<"] = "<<sorted_idhs[i] << " vs sorted_idhs["<<j<<"] = "<<sorted_idhs[j] << NcbiEndl;
1430  BOOST_CHECK_LE(sorted_idhs[j].CompareOrdered(sorted_idhs[i]), 0);
1431  BOOST_CHECK_GE(sorted_idhs[i].CompareOrdered(sorted_idhs[j]), 0);
1432  }
1433  }
1434  sorted_idhs.clear();
1435  for ( auto& i : ids ) {
1436  sorted_idhs.push_back(CSeq_id_Handle::GetHandle(*i));
1437  }
1438  stable_sort(sorted_idhs.begin(), sorted_idhs.end(), CSeq_id_Handle::PLessOrdered());
1439  if ( false ) {
1440  // dump sorted ids
1441  for ( auto& id : sorted_idhs ) {
1442  NcbiCout << id << NcbiEndl;
1443  }
1444  }
1445  BOOST_REQUIRE_EQUAL(sorted_ids.size(), sorted_idhs.size());
1446  for ( size_t i = 0; i < sorted_idhs.size(); ++i ) {
1447  //NcbiCout << "sorted_idhs["<<i<<"] = "<<sorted_idhs[i]<<NcbiEndl;
1448  BOOST_CHECK_EQUAL(CSeq_id_Handle::GetHandle(*sorted_ids[i]).CompareOrdered(sorted_idhs[i]), 0);
1449  BOOST_CHECK_EQUAL(sorted_idhs[i].CompareOrdered(CSeq_id_Handle::GetHandle(*sorted_ids[i])), 0);
1450  for ( size_t j = 0; j < i; ++j ) {
1451  BOOST_CHECK(CSeq_id_Handle::GetHandle(*sorted_ids[j]).CompareOrdered(sorted_idhs[i]) <= 0);
1452  BOOST_CHECK(CSeq_id_Handle::GetHandle(*sorted_ids[i]).CompareOrdered(sorted_idhs[j]) >= 0);
1453  BOOST_CHECK(sorted_idhs[j].CompareOrdered(CSeq_id_Handle::GetHandle(*sorted_ids[i])) <= 0);
1454  BOOST_CHECK(sorted_idhs[i].CompareOrdered(CSeq_id_Handle::GetHandle(*sorted_ids[j])) >= 0);
1455  }
1456  }
1457  for ( size_t i = 0; i < sorted_idhs.size(); ++i ) {
1458  BOOST_CHECK_EQUAL(sorted_idhs[i].CompareOrdered(sorted_idhs[i]), 0);
1459  for ( size_t j = 0; j < i; ++j ) {
1460  BOOST_CHECK(sorted_idhs[j].CompareOrdered(sorted_idhs[i]) <= 0);
1461  BOOST_CHECK(sorted_idhs[i].CompareOrdered(sorted_idhs[j]) >= 0);
1462  }
1464  BOOST_CHECK_EQUAL(expected_id.CompareOrdered(sorted_idhs[i]), 0);
1465  }
1466 }
1467 
1469 
1470 static const char* const sc_Ids[] = {
1471  "gnl|ti|12312",
1472  "gi|3231212",
1473  "NC_000001"
1474 };
1475 
1477 {
1478  CRef<CSeq_loc> loc(new CSeq_loc);
1479  if ( rnd.GetRand(0, 10) == 0 ) {
1480  loc->SetNull();
1481  }
1482  else {
1483  CRef<CSeq_id> id(new CSeq_id(sc_Ids[rnd.GetRand(0, 2)]));
1484  TSeqPos from = rnd.GetRand(0, 10);
1485  TSeqPos to = rnd.GetRand(0, 10);
1486  if ( from == to && rnd.GetRand(0, 1) ) {
1487  loc->SetPnt().SetId(*id);
1488  loc->SetPnt().SetPoint(from);
1489  if ( rnd.GetRand(0, 1) ) {
1490  loc->SetPnt().SetStrand(eNa_strand_minus);
1491  }
1492  }
1493  else {
1494  loc->SetInt().SetId(*id);
1495  if ( from > to || (from == to && rnd.GetRand(0, 1)) ) {
1496  swap(from, to);
1497  loc->SetInt().SetStrand(eNa_strand_minus);
1498  }
1499  loc->SetInt().SetFrom(from);
1500  loc->SetInt().SetTo(to);
1501  }
1502  }
1503  return loc;
1504 }
1505 
1507  bool operator()(const CSeq_loc& a, const CSeq_loc& b) const {
1508  if ( 0 && (a.IsNull() || b.IsNull()) ) {
1509  cout << "a: "<<MSerial_AsnText<<a;
1510  cout << "b: "<<MSerial_AsnText<<b<<endl;
1511  }
1512  int diff = a.Compare(b, CSeq_loc::fCompare_Default);
1513  if ( 0 && (a.IsNull() || b.IsNull()) ) {
1514  cout << " = " << diff << endl;
1515  }
1516  //a.GetId();
1517  //b.GetId();
1518  return diff < 0;
1519  }
1520  bool operator()(const CRef<CSeq_loc>& a, const CRef<CSeq_loc>& b) const {
1521  return (*this)(*a, *b);
1522  }
1523 };
1524 
1526  bool operator()(const CSeq_loc& a, const CSeq_loc& b) const {
1527  if (0 && (a.IsNull() || b.IsNull())) {
1528  cout << "a: " << MSerial_AsnText << a;
1529  cout << "b: " << MSerial_AsnText << b << endl;
1530  }
1531  int diff = a.Compare(b, CSeq_loc::fCompare_Strand);
1532  if (0 && (a.IsNull() || b.IsNull())) {
1533  cout << " = " << diff << endl;
1534  }
1535  //a.GetId();
1536  //b.GetId();
1537  return diff < 0;
1538  }
1539  bool operator()(const CRef<CSeq_loc>& a, const CRef<CSeq_loc>& b) const {
1540  return (*this)(*a, *b);
1541  }
1542 };
1543 
1545 
1546 BOOST_AUTO_TEST_CASE(s_TestSeq_id_Compare_Seq_loc)
1547 {
1548  CRandom rnd(1);
1549  for ( int t = 0; t < 1000; ++t ) {
1550  vector< CRef<CSeq_loc> > locs;
1551  for ( int i = 0; i < 10; ++i ) {
1552  size_t segs = rnd.GetRand(1, 10);
1553  CRef<CSeq_loc> loc(new CSeq_loc);
1554  if ( segs == 1 && rnd.GetRand(0, 1) ) {
1555  loc = GetRandomSegment(rnd);
1556  }
1557  else {
1558  for ( size_t j = 0; j < segs; ++j ) {
1559  loc->SetMix().Set().push_back(GetRandomSegment(rnd));
1560  }
1561  }
1562  locs.push_back(loc);
1563  }
1564  sort(locs.begin(), locs.end(), PSeq_locLess());
1565  for ( size_t i = 0; i < locs.size(); ++i ) {
1566  //cout << i << ": " << MSerial_AsnText << *locs[i] << endl;
1567  BOOST_CHECK_EQUAL(locs[i]->Compare(*locs[i], CSeq_loc::fCompare_Default), 0);
1568  if ( locs[i]->Compare(*locs[i], CSeq_loc::fCompare_Default) != 0 ) {
1569  cout << i << ": " << MSerial_AsnText << *locs[i];
1570  cout << " = " << locs[i]->Compare(*locs[i], CSeq_loc::fCompare_Default) << endl;
1571  }
1572  for ( size_t j = 0; j < i; ++j ) {
1573  BOOST_CHECK(locs[j]->Compare(*locs[i], CSeq_loc::fCompare_Default) <= 0);
1574  BOOST_CHECK(locs[i]->Compare(*locs[j], CSeq_loc::fCompare_Default) >= 0);
1575  if ( locs[j]->Compare(*locs[i], CSeq_loc::fCompare_Default) > 0 ||
1576  locs[i]->Compare(*locs[j], CSeq_loc::fCompare_Default) < 0 ) {
1577  cout << j << ": " << MSerial_AsnText << *locs[j];
1578  cout << i << ": " << MSerial_AsnText << *locs[i];
1579  cout << " = " << locs[j]->Compare(*locs[i], CSeq_loc::fCompare_Default) << endl;
1580  cout << i << ": " << MSerial_AsnText << *locs[i];
1581  cout << j << ": " << MSerial_AsnText << *locs[j];
1582  cout << " = " << locs[i]->Compare(*locs[j], CSeq_loc::fCompare_Default) << endl;
1583  }
1584  }
1585  }
1586  sort(locs.begin(), locs.end(), PSeq_locStrandLess());
1587  for (size_t i = 0; i < locs.size(); ++i) {
1588  //cout << i << ": " << MSerial_AsnText << *locs[i] << endl;
1589  BOOST_CHECK_EQUAL(locs[i]->Compare(*locs[i], CSeq_loc::fCompare_Strand), 0);
1590  if (locs[i]->Compare(*locs[i], CSeq_loc::fCompare_Strand) != 0) {
1591  cout << i << ": " << MSerial_AsnText << *locs[i];
1592  cout << " = " << locs[i]->Compare(*locs[i], CSeq_loc::fCompare_Strand) << endl;
1593  }
1594  for (size_t j = 0; j < i; ++j) {
1595  BOOST_CHECK(locs[j]->Compare(*locs[i], CSeq_loc::fCompare_Strand) <= 0);
1596  BOOST_CHECK(locs[i]->Compare(*locs[j], CSeq_loc::fCompare_Strand) >= 0);
1597  if (locs[j]->Compare(*locs[i], CSeq_loc::fCompare_Strand) > 0 ||
1598  locs[i]->Compare(*locs[j], CSeq_loc::fCompare_Strand) < 0) {
1599  cout << j << ": " << MSerial_AsnText << *locs[j];
1600  cout << i << ": " << MSerial_AsnText << *locs[i];
1601  cout << " = " << locs[j]->Compare(*locs[i], CSeq_loc::fCompare_Strand) << endl;
1602  cout << i << ": " << MSerial_AsnText << *locs[i];
1603  cout << j << ": " << MSerial_AsnText << *locs[j];
1604  cout << " = " << locs[i]->Compare(*locs[j], CSeq_loc::fCompare_Strand) << endl;
1605  }
1606  }
1607  }
1608  }
1609 }
1610 
1611 
1612 ostream& operator<<(ostream& out, const CSeq_id_Handle::TMatches& ids)
1613 {
1614  ITERATE ( CSeq_id_Handle::TMatches, it, ids ) {
1615  if ( it != ids.begin() ) {
1616  out << ',';
1617  }
1618  out << *it;
1619  }
1620  return out;
1621 }
1622 
1623 
1624 ostream& operator<<(ostream& out, const vector<CSeq_id_Handle>& ids)
1625 {
1626  ITERATE ( vector<CSeq_id_Handle>, it, ids ) {
1627  if ( it != ids.begin() ) {
1628  out << ',';
1629  }
1630  out << *it;
1631  }
1632  return out;
1633 }
1634 
1635 
1637  const vector<CSeq_id_Handle>& ids,
1638  const CSeq_id_Handle::TMatches& matches,
1639  const CSeq_id_Handle::TMatches& exp_matches,
1640  bool strict,
1641  const char* type)
1642 {
1643  bool good_matches;
1644  if ( strict ) {
1645  good_matches = matches == exp_matches;
1646  }
1647  else {
1648  good_matches = true;
1649  ITERATE ( CSeq_id_Handle::TMatches, it, exp_matches ) {
1650  if ( !matches.count(*it) ) {
1651  good_matches = false;
1652  break;
1653  }
1654  }
1655  ITERATE ( CSeq_id_Handle::TMatches, it, matches ) {
1656  if ( !exp_matches.count(*it) &&
1657  count(ids.begin(), ids.end(), *it) ) {
1658  good_matches = false;
1659  break;
1660  }
1661  }
1662  }
1663  if ( !good_matches ) {
1664  NcbiCerr << "Bad " << type << " matches for " << id << NcbiEndl;
1665  NcbiCerr << " all: " << ids << NcbiEndl;
1666  NcbiCerr << " got: " << matches << NcbiEndl;
1667  NcbiCerr << " exp: " << exp_matches << NcbiEndl;
1668  }
1669  BOOST_CHECK(good_matches);
1670 }
1671 
1672 
1673 void s_Match_id(size_t num_ids,
1674  const char* const fasta_ids[],
1675  const char* const match_to_ids[],
1676  const char* const weak_match_to_ids[],
1677  bool strict = true)
1678 {
1679  LOG_POST(Info << "num_ids="<<num_ids);
1680  for ( size_t xi = 0; xi < num_ids; ++xi ) {
1681  LOG_POST(Info << "id["<<xi<<"]="<<CSeq_id_Handle::GetHandle(fasta_ids[xi]));
1682  }
1683  for ( size_t xi = 0; xi <= num_ids; ++xi ) {
1684  set<string> strs;
1685  vector<CSeq_id_Handle> ids;
1690  for ( size_t i = 0; i < num_ids; ++i ) {
1691  if ( i == xi ) {
1692  continue;
1693  }
1694  strs.insert(fasta_ids[i]);
1695  CSeq_id_Handle id = CSeq_id_Handle::GetHandle(fasta_ids[i]);
1696  ids.push_back(id);
1697  }
1698  for ( size_t i = 0; i < num_ids; ++i ) {
1699  if ( i == xi ) {
1700  continue;
1701  }
1702  CSeq_id_Handle id = CSeq_id_Handle::GetHandle(fasta_ids[i]);
1703  vector<string> ids;
1704  NStr::Split(match_to_ids[i], ",", ids);
1705  ITERATE ( vector<string>, it, ids ) {
1706  if ( !strs.count(*it) ) {
1707  continue;
1708  }
1709  CSeq_id_Handle match_to_id = CSeq_id_Handle::GetHandle(*it);
1710  match_to_map[id].insert(match_to_id);
1711  matching_map[match_to_id].insert(id);
1712  }
1713  }
1714  for ( size_t i = 0; i < num_ids; ++i ) {
1715  if ( i == xi ) {
1716  continue;
1717  }
1718  CSeq_id_Handle id = CSeq_id_Handle::GetHandle(fasta_ids[i]);
1719  vector<string> ids;
1720  NStr::Split(weak_match_to_ids[i], ",", ids);
1721  ITERATE ( vector<string>, it, ids ) {
1722  if ( !strs.count(*it) ) {
1723  continue;
1724  }
1725  CSeq_id_Handle match_to_id = CSeq_id_Handle::GetHandle(*it);
1726  weak_match_to_map[id].insert(match_to_id);
1727  weak_matching_map[match_to_id].insert(id);
1728  }
1729  }
1730  for ( size_t i = 0; i < ids.size(); ++i ) {
1731  CSeq_id_Handle::TMatches matches;
1732  ids[i].GetMatchingHandles(matches);
1733  CSeq_id_Handle::TMatches exp_matches = matching_map[ids[i]];
1734  exp_matches.insert(ids[i]);
1735  s_CheckMatches(ids[i], ids, matches, exp_matches, strict, "");
1736  ITERATE ( CSeq_id_Handle::TMatches, it, matches ) {
1737  BOOST_CHECK(ids[i].MatchesTo(*it));
1738  }
1739  for ( size_t j = 0; j < ids.size(); ++j ) {
1740  if ( j == xi || matches.count(ids[j]) ) {
1741  continue;
1742  }
1743  BOOST_CHECK(!ids[i].MatchesTo(ids[j]));
1744  }
1745  }
1746  for ( size_t i = 0; i < ids.size(); ++i ) {
1747  CSeq_id_Handle::TMatches matches;
1748  ids[i].GetReverseMatchingHandles(matches);
1749  CSeq_id_Handle::TMatches exp_matches = match_to_map[ids[i]];
1750  exp_matches.insert(ids[i]);
1751  s_CheckMatches(ids[i], ids, matches, exp_matches, strict, "rev");
1752  ITERATE ( CSeq_id_Handle::TMatches, it, matches ) {
1753  BOOST_CHECK(it->MatchesTo(ids[i]));
1754  }
1755  }
1756  for ( size_t i = 0; i < ids.size(); ++i ) {
1757  CSeq_id_Handle::TMatches matches;
1758  ids[i].GetMatchingHandles(matches, eAllowWeakMatch);
1759  CSeq_id_Handle::TMatches exp_matches = weak_matching_map[ids[i]];
1760  exp_matches.insert(ids[i]);
1761  s_CheckMatches(ids[i], ids, matches, exp_matches, strict, "weak");
1762  ITERATE ( CSeq_id_Handle::TMatches, it, matches ) {
1763  CSeq_id_Handle id2 = *it;
1764  if ( ids[i].Which() != id2.Which() ) {
1765  CSeq_id id;
1766  id.Select(ids[i].Which());
1767  const_cast<CTextseq_id&>(*id.GetTextseq_Id())
1768  .Assign(*id2.GetSeqId()->GetTextseq_Id());
1769  id2 = CSeq_id_Handle::GetHandle(id);
1770  }
1771  BOOST_CHECK(ids[i].MatchesTo(id2));
1772  }
1773  }
1774  for ( size_t i = 0; i < ids.size(); ++i ) {
1775  CSeq_id_Handle::TMatches matches;
1776  ids[i].GetReverseMatchingHandles(matches, eAllowWeakMatch);
1777  CSeq_id_Handle::TMatches exp_matches = weak_match_to_map[ids[i]];
1778  exp_matches.insert(ids[i]);
1779  s_CheckMatches(ids[i], ids, matches, exp_matches, strict, "weak rev");
1780  ITERATE ( CSeq_id_Handle::TMatches, it, matches ) {
1781  CSeq_id_Handle id2 = *it;
1782  if ( ids[i].Which() != id2.Which() ) {
1783  CSeq_id id;
1784  id.Select(ids[i].Which());
1785  const_cast<CTextseq_id&>(*id.GetTextseq_Id())
1786  .Assign(*id2.GetSeqId()->GetTextseq_Id());
1787  id2 = CSeq_id_Handle::GetHandle(id);
1788  }
1789  BOOST_CHECK(id2.MatchesTo(ids[i]));
1790  }
1791  }
1792  }
1793 }
1794 
1796 {
1797  const char* const fasta_ids[] = {
1798  "gb|A000001",
1799  "gb|A000001.2",
1800  "gb|A000001.3",
1801  "tpg|A000001",
1802  "tpg|A000001.2",
1803  "tpg|A000001.3",
1804  };
1805  const char* const match_to_ids[] = {
1806  "",
1807  "gb|A000001",
1808  "gb|A000001",
1809  "",
1810  "tpg|A000001",
1811  "tpg|A000001",
1812  };
1813  const char* const weak_match_to_ids[] = {
1814  "tpg|A000001",
1815  "tpg|A000001,tpg|A000001.2,gb|A000001",
1816  "tpg|A000001,tpg|A000001.3,gb|A000001",
1817  "gb|A000001",
1818  "gb|A000001,gb|A000001.2,tpg|A000001",
1819  "gb|A000001,gb|A000001.3,tpg|A000001",
1820  };
1821  s_Match_id(ArraySize(fasta_ids),
1822  fasta_ids, match_to_ids, weak_match_to_ids);
1823 }
1824 
1825 
1827 {
1828  const char* const fasta_ids[] = {
1829  "gb|A000001",
1830  "gb|A000001.2",
1831  "gb|A000001.3",
1832  "tpg|A000001",
1833  "tpg|A000001.2",
1834  "tpg|A000001.3",
1835  "gb|AAAAAAA",
1836  "gb|AAAAAAA.2",
1837  "gb|AAAAAAA.4",
1838  "tpg|AAAAAAA",
1839  "tpg|AAAAAAA.2",
1840  "tpg|AAAAAAA.5",
1841  };
1842  const char* const match_to_ids[] = {
1843  "",
1844  "gb|A000001",
1845  "gb|A000001",
1846  "",
1847  "tpg|A000001",
1848  "tpg|A000001",
1849  "",
1850  "gb|AAAAAAA",
1851  "gb|AAAAAAA",
1852  "",
1853  "tpg|AAAAAAA",
1854  "tpg|AAAAAAA",
1855  };
1856  const char* const weak_match_to_ids[] = {
1857  "tpg|A000001",
1858  "tpg|A000001,tpg|A000001.2,gb|A000001",
1859  "tpg|A000001,tpg|A000001.3,gb|A000001",
1860  "gb|A000001",
1861  "gb|A000001,gb|A000001.2,tpg|A000001",
1862  "gb|A000001,gb|A000001.3,tpg|A000001",
1863  "tpg|AAAAAAA",
1864  "tpg|AAAAAAA,tpg|AAAAAAA.2,gb|AAAAAAA",
1865  "tpg|AAAAAAA,gb|AAAAAAA",
1866  "gb|AAAAAAA",
1867  "gb|AAAAAAA,gb|AAAAAAA.2,tpg|AAAAAAA",
1868  "gb|AAAAAAA,tpg|AAAAAAA",
1869  };
1870  s_Match_id(ArraySize(fasta_ids),
1871  fasta_ids, match_to_ids, weak_match_to_ids);
1872 }
1873 
1874 
1876 {
1877  const char* const fasta_ids[] = {
1878  "gb|A000001",
1879  "gb|A000001.2",
1880  "gb|A000001.3",
1881  "tpg|A000001",
1882  "tpg|A000001.2",
1883  "tpg|A000001.3",
1884  "gb|A000002",
1885  "gb|A000002.2",
1886  "gb|A000002.4",
1887  "tpg|A000002",
1888  "tpg|A000002.2",
1889  "tpg|A000002.5",
1890  "tpg|A000002.5|name2",
1891  "ref|NT_025975.2",
1892  "ref|NT_025975.2|HsY_2613",
1893  "ref|NT_025975.2|HsY_2614",
1894  "ref|NT_025975.3",
1895  "ref|NT_025975.3|HsY_2613",
1896  "ref|NT_025975.3|HsY_2614",
1897  "ref|NT_025975",
1898  "ref|NT_025975|HsY_2613",
1899  "ref|NT_025975|HsY_2614",
1900  };
1901  const char* const match_to_ids[] = {
1902  "",
1903  "gb|A000001",
1904  "gb|A000001",
1905  "",
1906  "tpg|A000001",
1907  "tpg|A000001",
1908  "",
1909  "gb|A000002",
1910  "gb|A000002",
1911  "",
1912  "tpg|A000002",
1913  "tpg|A000002,tpg|A000002.5|name2",
1914  "tpg|A000002,tpg|A000002.5",
1915  "ref|NT_025975.2|HsY_2613,ref|NT_025975.2|HsY_2614,ref|NT_025975,ref|NT_025975|HsY_2613,ref|NT_025975|HsY_2614",
1916  "ref|NT_025975.2,ref|NT_025975.2|HsY_2614,ref|NT_025975,ref|NT_025975|HsY_2613,ref|NT_025975|HsY_2614",
1917  "ref|NT_025975.2,ref|NT_025975.2|HsY_2613,ref|NT_025975,ref|NT_025975|HsY_2613,ref|NT_025975|HsY_2614",
1918  "ref|NT_025975.3|HsY_2613,ref|NT_025975.3|HsY_2614,ref|NT_025975,ref|NT_025975|HsY_2613,ref|NT_025975|HsY_2614",
1919  "ref|NT_025975.3,ref|NT_025975.3|HsY_2614,ref|NT_025975,ref|NT_025975|HsY_2613,ref|NT_025975|HsY_2614",
1920  "ref|NT_025975.3,ref|NT_025975.3|HsY_2613,ref|NT_025975,ref|NT_025975|HsY_2613,ref|NT_025975|HsY_2614",
1921  "ref|NT_025975|HsY_2613,ref|NT_025975|HsY_2614",
1922  "ref|NT_025975,ref|NT_025975|HsY_2614",
1923  "ref|NT_025975,ref|NT_025975|HsY_2613",
1924  };
1925  const char* const weak_match_to_ids[] = {
1926  "tpg|A000001",
1927  "tpg|A000001,tpg|A000001.2,gb|A000001",
1928  "tpg|A000001,tpg|A000001.3,gb|A000001",
1929  "gb|A000001",
1930  "gb|A000001,gb|A000001.2,tpg|A000001",
1931  "gb|A000001,gb|A000001.3,tpg|A000001",
1932  "tpg|A000002",
1933  "tpg|A000002,tpg|A000002.2,gb|A000002",
1934  "tpg|A000002,gb|A000002",
1935  "gb|A000002",
1936  "gb|A000002,gb|A000002.2,tpg|A000002",
1937  "gb|A000002,tpg|A000002,tpg|A000002.5|name2",
1938  "gb|A000002,tpg|A000002,tpg|A000002.5",
1939  "ref|NT_025975.2|HsY_2613,ref|NT_025975.2|HsY_2614,ref|NT_025975,ref|NT_025975|HsY_2613,ref|NT_025975|HsY_2614",
1940  "ref|NT_025975.2,ref|NT_025975.2|HsY_2614,ref|NT_025975,ref|NT_025975|HsY_2613,ref|NT_025975|HsY_2614",
1941  "ref|NT_025975.2,ref|NT_025975.2|HsY_2613,ref|NT_025975,ref|NT_025975|HsY_2613,ref|NT_025975|HsY_2614",
1942  "ref|NT_025975.3|HsY_2613,ref|NT_025975.3|HsY_2614,ref|NT_025975,ref|NT_025975|HsY_2613,ref|NT_025975|HsY_2614",
1943  "ref|NT_025975.3,ref|NT_025975.3|HsY_2614,ref|NT_025975,ref|NT_025975|HsY_2613,ref|NT_025975|HsY_2614",
1944  "ref|NT_025975.3,ref|NT_025975.3|HsY_2613,ref|NT_025975,ref|NT_025975|HsY_2613,ref|NT_025975|HsY_2614",
1945  "ref|NT_025975|HsY_2613,ref|NT_025975|HsY_2614",
1946  "ref|NT_025975,ref|NT_025975|HsY_2614",
1947  "ref|NT_025975,ref|NT_025975|HsY_2613",
1948  };
1949  s_Match_id(ArraySize(fasta_ids),
1950  fasta_ids, match_to_ids, weak_match_to_ids, false);
1951 }
1952 
1954 {
1955  string acc = "HP56600";
1956  for ( int i = 0; i < 10; ++i ) {
1958  cout << acc << " -> " << type << " = 0x" << hex << type << dec << endl;
1959  acc.erase(acc.size()-1);
1960  acc += "01";
1961  }
1962  acc = "ALWX010";
1963  for ( int i = 0; i < 10; ++i ) {
1965  cout << acc << " -> " << type << " = 0x" << hex << type << dec << endl;
1966  acc.erase(acc.size()-1);
1967  acc += "01";
1968  }
1969  acc = "GANF010";
1970  for ( int i = 0; i < 10; ++i ) {
1972  cout << acc << " -> " << type << " = 0x" << hex << type << dec << endl;
1973  acc.erase(acc.size()-1);
1974  acc += "01";
1975  }
1976 }
1977 
1978 
1980 {
1981  LOG_POST("Matching local int<>str ids");
1982  {{
1983  CRef<CSeq_id> id1(new CSeq_id);
1984  CRef<CSeq_id> id2(new CSeq_id);
1985  id1->SetGeneral().SetDb("DB");
1986  id2->SetGeneral().SetDb("DB");
1987  CObject_id& oid1 = id1->SetGeneral().SetTag();
1988  CObject_id& oid2 = id2->SetGeneral().SetTag();
1989  oid1.SetId(12);
1990  oid2.SetStr("12");
1991  BOOST_CHECK(id1->Match(*id2));
1992  BOOST_CHECK(id2->Match(*id1));
1995  oid2.SetId(12);
1996  BOOST_CHECK(id1->Match(*id2));
1997  BOOST_CHECK(id2->Match(*id1));
2000  oid2.SetId(13);
2001  BOOST_CHECK(!id1->Match(*id2));
2002  BOOST_CHECK(!id2->Match(*id1));
2005  oid2.SetStr("012");
2006  BOOST_CHECK(!id1->Match(*id2));
2007  BOOST_CHECK(!id2->Match(*id1));
2010  oid2.SetStr("13");
2011  BOOST_CHECK(!id1->Match(*id2));
2012  BOOST_CHECK(!id2->Match(*id1));
2015  oid1.SetStr("13");
2016  BOOST_CHECK(id1->Match(*id2));
2017  BOOST_CHECK(id2->Match(*id1));
2020  oid1.SetStr("12");
2021  BOOST_CHECK(!id1->Match(*id2));
2022  BOOST_CHECK(!id2->Match(*id1));
2025  }}
2026  {{
2027  CRef<CSeq_id> id1(new CSeq_id);
2028  CRef<CSeq_id> id2(new CSeq_id);
2029  id1->SetGeneral().SetDb("DB1");
2030  id2->SetGeneral().SetDb("DB2");
2031  CObject_id& oid1 = id1->SetGeneral().SetTag();
2032  CObject_id& oid2 = id2->SetGeneral().SetTag();
2033  oid1.SetId(12);
2034  oid2.SetStr("12");
2035  BOOST_CHECK(!id1->Match(*id2));
2036  BOOST_CHECK(!id2->Match(*id1));
2039  oid2.SetId(12);
2040  BOOST_CHECK(!id1->Match(*id2));
2041  BOOST_CHECK(!id2->Match(*id1));
2044  oid2.SetId(13);
2045  BOOST_CHECK(!id1->Match(*id2));
2046  BOOST_CHECK(!id2->Match(*id1));
2049  oid2.SetStr("012");
2050  BOOST_CHECK(!id1->Match(*id2));
2051  BOOST_CHECK(!id2->Match(*id1));
2054  oid2.SetStr("13");
2055  BOOST_CHECK(!id1->Match(*id2));
2056  BOOST_CHECK(!id2->Match(*id1));
2059  oid1.SetStr("13");
2060  BOOST_CHECK(!id1->Match(*id2));
2061  BOOST_CHECK(!id2->Match(*id1));
2064  oid1.SetStr("12");
2065  BOOST_CHECK(!id1->Match(*id2));
2066  BOOST_CHECK(!id2->Match(*id1));
2069  }}
2070  {{
2071  CRef<CSeq_id> id1(new CSeq_id);
2072  CRef<CSeq_id> id2(new CSeq_id);
2073  CObject_id& oid1 = id1->SetLocal();
2074  CObject_id& oid2 = id2->SetLocal();
2075  oid1.SetId(12);
2076  oid2.SetStr("12");
2077  BOOST_CHECK(id1->Match(*id2));
2078  BOOST_CHECK(id2->Match(*id1));
2081  oid2.SetId(12);
2082  BOOST_CHECK(id1->Match(*id2));
2083  BOOST_CHECK(id2->Match(*id1));
2086  oid2.SetId(13);
2087  BOOST_CHECK(!id1->Match(*id2));
2088  BOOST_CHECK(!id2->Match(*id1));
2091  oid2.SetStr("012");
2092  BOOST_CHECK(!id1->Match(*id2));
2093  BOOST_CHECK(!id2->Match(*id1));
2096  oid2.SetStr("13");
2097  BOOST_CHECK(!id1->Match(*id2));
2098  BOOST_CHECK(!id2->Match(*id1));
2101  oid1.SetStr("13");
2102  BOOST_CHECK(id1->Match(*id2));
2103  BOOST_CHECK(id2->Match(*id1));
2106  oid1.SetStr("12");
2107  BOOST_CHECK(!id1->Match(*id2));
2108  BOOST_CHECK(!id2->Match(*id1));
2111  }}
2112  const Int8 start_ids[] = { kMin_Int, 0, kMax_Int };
2113  for ( auto v0 : start_ids ) {
2114  for ( int d1 = -2; d1 <= 2; ++d1 ) {
2115  Int8 v1 = v0+d1;
2116  CRef<CSeq_id> id1(new CSeq_id);
2117  CRef<CSeq_id> id2(new CSeq_id);
2118  CObject_id& oid1 = id1->SetLocal();
2119  CObject_id& oid2 = id2->SetLocal();
2120  oid1.SetStr(NStr::NumericToString(v1));
2121  BOOST_REQUIRE_EQUAL(oid1.GetId8(), v1);
2122  for ( int d2 = -2; d2 <= 2; ++d2 ) {
2123  Int8 v2 = v1+d2;
2124  //LOG_POST("Matching "<<v1<<" to "<<v2);
2125  oid2.SetId8(v2);
2126  BOOST_REQUIRE_EQUAL(oid2.GetId8(), v2);
2127  if ( v1 == v2 ) {
2128  BOOST_CHECK(id1->Match(*id2));
2129  BOOST_CHECK(id2->Match(*id1));
2132  }
2133  else {
2134  BOOST_CHECK(!id1->Match(*id2));
2135  BOOST_CHECK(!id2->Match(*id1));
2138  }
2140  BOOST_REQUIRE_EQUAL(oid2.GetId8(), v2);
2141  if ( v1 == v2 ) {
2142  BOOST_CHECK(id1->Match(*id2));
2143  BOOST_CHECK(id2->Match(*id1));
2146  }
2147  else {
2148  BOOST_CHECK(!id1->Match(*id2));
2149  BOOST_CHECK(!id2->Match(*id1));
2152  }
2153  }
2154  }
2155  }
2156 }
2157 
2158 
2160 {
2161  LOG_POST("Testing case preservation");
2162  {{
2163  CRef<CSeq_id> sid1(new CSeq_id(CSeq_id::e_Other, "abcdef"));
2164  CRef<CSeq_id> sid2(new CSeq_id(CSeq_id::e_Other, "ABCDeF"));
2167  BOOST_CHECK_EQUAL(id1, id2);
2168  BOOST_CHECK_EQUAL(id1.AsString(), "ref|abcdef|");
2169  BOOST_CHECK_EQUAL(id2.AsString(), "ref|ABCDeF|");
2170  BOOST_CHECK_EQUAL(id1.GetSeqId()->GetTextseq_Id()->GetAccession(), "abcdef");
2171  BOOST_CHECK_EQUAL(id2.GetSeqId()->GetTextseq_Id()->GetAccession(), "ABCDeF");
2172  }}
2173  {{
2174  CSeq_id_Handle id1 = CSeq_id_Handle::GetHandle("NC_000001");
2175  CSeq_id_Handle id2 = CSeq_id_Handle::GetHandle("nc_000001");
2176  BOOST_CHECK_EQUAL(id1, id2);
2177  BOOST_CHECK_EQUAL(id1.GetSeqId()->GetTextseq_Id()->GetAccession(), "NC_000001");
2178  BOOST_CHECK_EQUAL(id2.GetSeqId()->GetTextseq_Id()->GetAccession(), "nc_000001");
2179  BOOST_CHECK_EQUAL(id1.AsString(), "ref|NC_000001|");
2180  BOOST_CHECK_EQUAL(id2.AsString(), "ref|nc_000001|");
2181 
2182  CSeq_id_Handle id3 = CSeq_id_Handle::GetHandle("nC_000001.1");
2183  CSeq_id_Handle id4 = CSeq_id_Handle::GetHandle("Nc_000001.1");
2184  BOOST_CHECK_EQUAL(id3, id4);
2185  BOOST_CHECK_EQUAL(id3.GetSeqId()->GetTextseq_Id()->GetAccession(), "nC_000001");
2186  BOOST_CHECK_EQUAL(id4.GetSeqId()->GetTextseq_Id()->GetAccession(), "Nc_000001");
2187  BOOST_CHECK_EQUAL(id3.AsString(), "ref|nC_000001.1|");
2188  BOOST_CHECK_EQUAL(id4.AsString(), "ref|Nc_000001.1|");
2189 
2190  BOOST_CHECK(id3 != id1);
2191  BOOST_CHECK_EQUAL(id1.GetSeqId()->GetTextseq_Id()->GetAccession(), "NC_000001");
2192  BOOST_CHECK_EQUAL(id2.GetSeqId()->GetTextseq_Id()->GetAccession(), "nc_000001");
2193  BOOST_CHECK_EQUAL(id1.AsString(), "ref|NC_000001|");
2194  BOOST_CHECK_EQUAL(id2.AsString(), "ref|nc_000001|");
2195  }}
2196  {{
2197  CSeq_id_Handle id1 = CSeq_id_Handle::GetHandle("lcl|Test");
2198  CSeq_id_Handle id2 = CSeq_id_Handle::GetHandle("lcl|test");
2199  BOOST_CHECK_EQUAL(id1, id2);
2200  BOOST_CHECK_EQUAL(id1.GetSeqId()->GetLocal().GetStr(), "Test");
2201  BOOST_CHECK_EQUAL(id2.GetSeqId()->GetLocal().GetStr(), "test");
2202  BOOST_CHECK_EQUAL(id1.AsString(), "lcl|Test");
2203  BOOST_CHECK_EQUAL(id2.AsString(), "lcl|test");
2204 
2205  CSeq_id_Handle id3 = CSeq_id_Handle::GetHandle("lcl|TEST");
2206  CSeq_id_Handle id4 = CSeq_id_Handle::GetHandle("lcl|tEsT");
2207  BOOST_CHECK_EQUAL(id3, id4);
2208  BOOST_CHECK_EQUAL(id3.GetSeqId()->GetLocal().GetStr(), "TEST");
2209  BOOST_CHECK_EQUAL(id4.GetSeqId()->GetLocal().GetStr(), "tEsT");
2210  BOOST_CHECK_EQUAL(id3.AsString(), "lcl|TEST");
2211  BOOST_CHECK_EQUAL(id4.AsString(), "lcl|tEsT");
2212 
2213  BOOST_CHECK_EQUAL(id3, id1);
2214  BOOST_CHECK_EQUAL(id1.GetSeqId()->GetLocal().GetStr(), "Test");
2215  BOOST_CHECK_EQUAL(id2.GetSeqId()->GetLocal().GetStr(), "test");
2216  BOOST_CHECK_EQUAL(id1.AsString(), "lcl|Test");
2217  BOOST_CHECK_EQUAL(id2.AsString(), "lcl|test");
2218  }}
2219  {{
2220  CSeq_id_Handle id1 = CSeq_id_Handle::GetHandle("gnl|SRA|SRR");
2221  CSeq_id_Handle id2 = CSeq_id_Handle::GetHandle("gnl|SRA|srr");
2222  BOOST_CHECK_EQUAL(id1, id2);
2223  BOOST_CHECK_EQUAL(id1.GetSeqId()->AsFastaString(), "gnl|SRA|SRR");
2224  BOOST_CHECK_EQUAL(id2.GetSeqId()->AsFastaString(), "gnl|SRA|srr");
2225  BOOST_CHECK_EQUAL(id1.AsString(), "gnl|SRA|SRR");
2226  BOOST_CHECK_EQUAL(id2.AsString(), "gnl|SRA|srr");
2227 
2228  CSeq_id_Handle id3 = CSeq_id_Handle::GetHandle("gnl|sra|SRR");
2229  CSeq_id_Handle id4 = CSeq_id_Handle::GetHandle("gnl|Sra|Srr");
2230  BOOST_CHECK_EQUAL(id3, id4);
2231  BOOST_CHECK_EQUAL(id3.GetSeqId()->AsFastaString(), "gnl|sra|SRR");
2232  BOOST_CHECK_EQUAL(id4.GetSeqId()->AsFastaString(), "gnl|Sra|Srr");
2233  BOOST_CHECK_EQUAL(id3.AsString(), "gnl|sra|SRR");
2234  BOOST_CHECK_EQUAL(id4.AsString(), "gnl|Sra|Srr");
2235 
2236  BOOST_CHECK_EQUAL(id3, id1);
2237  BOOST_CHECK_EQUAL(id1.GetSeqId()->AsFastaString(), "gnl|SRA|SRR");
2238  BOOST_CHECK_EQUAL(id2.GetSeqId()->AsFastaString(), "gnl|SRA|srr");
2239  BOOST_CHECK_EQUAL(id1.AsString(), "gnl|SRA|SRR");
2240  BOOST_CHECK_EQUAL(id2.AsString(), "gnl|SRA|srr");
2241  }}
2242  {{
2243  CSeq_id_Handle id1 = CSeq_id_Handle::GetHandle("gnl|SRA|SRR000010.2");
2244  CSeq_id_Handle id2 = CSeq_id_Handle::GetHandle("gnl|SRA|srr000010.2");
2245  BOOST_CHECK_EQUAL(id1, id2);
2246  BOOST_CHECK_EQUAL(id1.GetSeqId()->AsFastaString(), "gnl|SRA|SRR000010.2");
2247  BOOST_CHECK_EQUAL(id2.GetSeqId()->AsFastaString(), "gnl|SRA|srr000010.2");
2248  BOOST_CHECK_EQUAL(id1.AsString(), "gnl|SRA|SRR000010.2");
2249  BOOST_CHECK_EQUAL(id2.AsString(), "gnl|SRA|srr000010.2");
2250 
2251  CSeq_id_Handle id3 = CSeq_id_Handle::GetHandle("gnl|sra|SRR000011.s");
2252  CSeq_id_Handle id4 = CSeq_id_Handle::GetHandle("gnl|Sra|Srr000011.S");
2253  BOOST_CHECK_EQUAL(id3, id4);
2254  BOOST_CHECK_EQUAL(id3.GetSeqId()->AsFastaString(), "gnl|sra|SRR000011.s");
2255  BOOST_CHECK_EQUAL(id4.GetSeqId()->AsFastaString(), "gnl|Sra|Srr000011.S");
2256  BOOST_CHECK_EQUAL(id3.AsString(), "gnl|sra|SRR000011.s");
2257  BOOST_CHECK_EQUAL(id4.AsString(), "gnl|Sra|Srr000011.S");
2258 
2259  BOOST_CHECK(id3 != id1);
2260  BOOST_CHECK_EQUAL(id1.GetSeqId()->AsFastaString(), "gnl|SRA|SRR000010.2");
2261  BOOST_CHECK_EQUAL(id2.GetSeqId()->AsFastaString(), "gnl|SRA|srr000010.2");
2262  BOOST_CHECK_EQUAL(id1.AsString(), "gnl|SRA|SRR000010.2");
2263  BOOST_CHECK_EQUAL(id2.AsString(), "gnl|SRA|srr000010.2");
2264  }}
2265  {{
2266  CSeq_id_Handle id1 = CSeq_id_Handle::GetHandle("gnl|SRA|1");
2267  CSeq_id_Handle id2 = CSeq_id_Handle::GetHandle("gnl|Sra|1");
2268  BOOST_CHECK_EQUAL(id1, id2);
2269  BOOST_CHECK_EQUAL(id1.GetSeqId()->AsFastaString(), "gnl|SRA|1");
2270  BOOST_CHECK_EQUAL(id2.GetSeqId()->AsFastaString(), "gnl|Sra|1");
2271  BOOST_CHECK_EQUAL(id1.AsString(), "gnl|SRA|1");
2272  BOOST_CHECK_EQUAL(id2.AsString(), "gnl|Sra|1");
2273 
2274  CSeq_id_Handle id3 = CSeq_id_Handle::GetHandle("gnl|sra|2");
2275  CSeq_id_Handle id4 = CSeq_id_Handle::GetHandle("gnl|Sra|2");
2276  BOOST_CHECK_EQUAL(id3, id4);
2277  BOOST_CHECK_EQUAL(id3.GetSeqId()->AsFastaString(), "gnl|sra|2");
2278  BOOST_CHECK_EQUAL(id4.GetSeqId()->AsFastaString(), "gnl|Sra|2");
2279  BOOST_CHECK_EQUAL(id3.AsString(), "gnl|sra|2");
2280  BOOST_CHECK_EQUAL(id4.AsString(), "gnl|Sra|2");
2281 
2282  BOOST_CHECK(id3 != id1);
2283  BOOST_CHECK_EQUAL(id1.GetSeqId()->AsFastaString(), "gnl|SRA|1");
2284  BOOST_CHECK_EQUAL(id2.GetSeqId()->AsFastaString(), "gnl|Sra|1");
2285  BOOST_CHECK_EQUAL(id1.AsString(), "gnl|SRA|1");
2286  BOOST_CHECK_EQUAL(id2.AsString(), "gnl|Sra|1");
2287  }}
2288 }
2289 
2292  { "lcl|123", { "" } },
2293  { "bbs|123", { "123", "123" } },
2294  { "bbm|123", { "123", "123" } },
2295  { "gim|123", { "123", "123" } },
2296  { "gb|U12345.1|amu12345", { "U12345", "AMU12345" } },
2297  { "emb|AL123456|MtBh37RV", { "AL123456", "MTBH37RV" } },
2298  { "pir||S16356", { "S16356" } },
2299  { "sp|Q7CQJ0|RS22_saltY", { "Q7CQJ0", "RS22_SALTY" } },
2300  { "tr|Q90RT2|Q90RT2_9hiv1", { "Q90RT2", "Q90RT2_9HIV1" } },
2301  { "sp|Q7CQJ0.1|", { "Q7CQJ0" } },
2302  { "pat|US|re33188|1", { "", "US|RE33188|1" } },
2303  // { "pgp|ep|0238993|7", { "", "EP|0238993|7" } },
2304  { "ref|NM_000170.1|", { "NM_000170" } },
2305  { "gnl|EcoSeq|EcoAce", { "", "ECOSEQ|ECOACE" } },
2306  { "gnl|Celera|cdm:10213987", { "", "CELERA|CDM:10213987" } },
2307  { "gnl|WGS:AAAB|CRA_x9P1GAV4nra", { "", "WGS:AAAB|CRA_X9P1GAV4NRA" } },
2308  { "gnl|WGS:ABCD|cont1", { "", "WGS:ABCD|CONT1" } },
2309  { "gi|1234", { "", "1234" } },
2310  { "dbj|N00068|", { "N00068" } },
2311  { "prf||0806162C", { "0806162C" } },
2312  { "pdb|1GAV|", { "1GAV" } },
2313  { "pdb|1GAV|X", { "1GAV|X" } },
2314  { "pdb|1GAV|x", { "1GAV|x" } },
2315  { "pdb|1gav|x", { "1GAV|x" } },
2316  { "pdb|1GAV|xyZ", { "1GAV|xyZ" } },
2317  { "tpg|BK003456|", { "BK003456" } },
2318  { "tpe|BN000123|", { "BN000123" } },
2319  { "tpd|FAA00017|", { "FAA00017" } },
2320  { "gpp|GPC_123456789.1|", { "GPC_123456789", "GPC_123456789.1" } },
2321  { "gpp|GPC_123456789|" , { "GPC_123456789", "GPC_123456789.1" } },
2322  { "nat|AT_123456789.1|", { "AT_123456789" } },
2323  { "gnl|REF_WGS:ACJF|NECHADRAFT_MRNA79537", { "", "REF_WGS:ACJF|NECHADRAFT_MRNA79537" } },
2324  { "gnl|TESTDB|", { "" } }
2325 };
2326 
2328 {
2329  string primary_id_ref;
2330  string primary_id;
2331  string secondary_id_ref;
2332  bool has_secondary_ids;
2333 
2334  LOG_POST("Testing generation of OSLT strings");
2335 
2337  CSeq_id id(it->first);
2338  primary_id_ref = it->second[0];
2339  has_secondary_ids = (it->second.size() == 2);
2340  if (has_secondary_ids)
2341  secondary_id_ref = it->second[1];
2342  list<string> secondary_ids;
2343  string primary_id = id.ComposeOSLT(&secondary_ids,
2345  BOOST_CHECK_EQUAL(primary_id, primary_id_ref);
2346  size_t secondary_ids_size = secondary_ids.size();
2347  BOOST_CHECK_EQUAL((secondary_ids_size > 0), has_secondary_ids);
2348  if (secondary_ids_size && has_secondary_ids)
2349  BOOST_CHECK_EQUAL(secondary_ids.front(), secondary_id_ref);
2350  }
2351 }
2352 
2353 BOOST_AUTO_TEST_CASE(s_TestTypeMismatch)
2354 {
2355  string acc;
2356  CRef<CSeq_id> id;
2357 
2358  acc = "ZZ123456";
2359  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id(CSeq_id::e_Genbank, acc)));
2360  BOOST_CHECK_EQUAL(id->IdentifyAccession(), CSeq_id::eAcc_gb_other_nuc);
2361  BOOST_CHECK_EQUAL(CSeq_id::IdentifyAccession(acc),
2363 
2364  acc = "CABIVQ012345678";
2365  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id(CSeq_id::e_Tpe, acc)));
2366  BOOST_CHECK_EQUAL(id->IdentifyAccession(), CSeq_id::eAcc_embl_tpa_wgs_nuc);
2367  BOOST_CHECK_EQUAL(CSeq_id::IdentifyAccession(acc),
2369  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id(CSeq_id::e_Genbank, acc)));
2370  BOOST_CHECK_EQUAL(id->IdentifyAccession(), CSeq_id::eAcc_gb_other);
2371 
2372  acc = "F12345";
2373  BOOST_CHECK_NO_THROW(id.Reset(new CSeq_id(CSeq_id::e_Tpe, acc)));
2374  BOOST_CHECK_EQUAL(id->IdentifyAccession(), CSeq_id::eAcc_embl_tpa_other);
2375  BOOST_CHECK_EQUAL(CSeq_id::IdentifyAccession(acc), CSeq_id::eAcc_embl_est);
2376 }
2377 
2378 static CRef<CSeq_id> s_ParsePDB(const char* str)
2379 {
2380  CRef<CSeq_id> ret(new CSeq_id);
2381  istringstream in(string("Seq-id::=pdb ")+str);
2382  in >> MSerial_AsnText >> *ret;
2383  return ret;
2384 }
2385 
2386 template<class C>
2387 static string s_ToASN(const C& obj)
2388 {
2389  ostringstream out;
2390  out << MSerial_AsnText << obj;
2391  return out.str();
2392 }
2393 
2395 {
2396  LOG_POST("Testing PDB equivalence");
2397  {{
2398  const char* str_eq[] = {
2399  "{mol\"4XNU\",chain 65,chain-id \"A\"}",
2400  "{mol\"4XNU\",chain-id \"A\"}",
2401  "{mol\"4XNU\",chain 65}",
2402  "{mol\"4XNU\",chain 65,rel std {year 2017},chain-id \"A\"}",
2403  "{mol\"4XNU\",chain 65,rel std {year 2017,month 6,day 2},chain-id \"A\"}",
2404  "{mol\"4XNU\",chain 65,rel std {year 2017,month 6,day 2,hour 12,minute 22,second 33},chain-id \"A\"}",
2405  };
2406  const char* str_ne[] = {
2407  "{mol\"4XNU\"}",
2408  "{mol\"4XNU\",chain 66,chain-id \"A\"}",
2409  "{mol\"4XNU\",chain 65,chain-id \"B\"}",
2410  "{mol\"4XNV\",chain 65,chain-id \"A\"}",
2411  "{mol\"4XNU\",chain 65,rel std {year 2017,month 6,day 2,season\"summer\"},chain-id \"A\"}",
2412  "{mol\"4XNU\",chain 65,rel std {year 0,month 6,day 2,season\"summer\"},chain-id \"A\"}",
2413  "{mol\"4XNU\",chain 65,rel str \"02/06/2017\",chain-id \"A\"}",
2414  };
2415  vector<CRef<CSeq_id>> id_eq;
2416  for ( auto str : str_eq ) {
2417  id_eq.push_back(s_ParsePDB(str));
2418  }
2419  vector<CRef<CSeq_id>> id_ne;
2420  for ( auto str : str_ne ) {
2421  id_ne.push_back(s_ParsePDB(str));
2422  }
2423  vector<CSeq_id_Handle> idh_eq;
2424  for ( auto id : id_eq ) {
2425  idh_eq.push_back(CSeq_id_Handle::GetHandle(*id));
2426  }
2427  vector<CSeq_id_Handle> idh_ne;
2428  for ( auto id : id_ne ) {
2429  idh_ne.push_back(CSeq_id_Handle::GetHandle(*id));
2430  }
2431  for ( auto& id1 : idh_eq ) {
2432  auto seq_id1 = id1.GetSeqId();
2433  for ( auto& id2 : idh_eq ) {
2434  BOOST_CHECK_EQUAL(id1, id2);
2435  auto seq_id2 = id2.GetSeqId();
2436  if ( &id1 == &id2 ) {
2437  BOOST_CHECK_EQUAL(s_ToASN(*seq_id1), s_ToASN(*seq_id2));
2438  }
2439  else {
2440  BOOST_CHECK_NE(s_ToASN(*seq_id1), s_ToASN(*seq_id2));
2441  }
2442  }
2443  for ( auto& id2 : idh_ne ) {
2444  BOOST_CHECK_NE(id1, id2);
2445  auto seq_id2 = id2.GetSeqId();
2446  BOOST_CHECK_NE(s_ToASN(*seq_id1), s_ToASN(*seq_id2));
2447  }
2448  }
2449  for ( auto& id1 : idh_ne ) {
2450  auto seq_id1 = id1.GetSeqId();
2451  for ( auto& id2 : idh_ne ) {
2452  auto seq_id2 = id2.GetSeqId();
2453  if ( &id1 == &id2 ) {
2454  BOOST_CHECK_EQUAL(s_ToASN(*seq_id1), s_ToASN(*seq_id2));
2455  }
2456  else {
2457  BOOST_CHECK_NE(id1, id2);
2458  BOOST_CHECK_NE(s_ToASN(*seq_id1), s_ToASN(*seq_id2));
2459  }
2460  }
2461  }
2462  for ( size_t i = 0; i < size(str_eq); ++i ) {
2463  BOOST_CHECK_NE(idh_eq[i].GetSeqId(), id_eq[i]);
2464  BOOST_CHECK_EQUAL(s_ToASN(*idh_eq[i].GetSeqId()), s_ToASN(*id_eq[i]));
2465  }
2466  for ( size_t i = 0; i < size(str_ne); ++i ) {
2467  BOOST_CHECK_NE(idh_ne[i].GetSeqId(), id_ne[i]);
2468  BOOST_CHECK_EQUAL(s_ToASN(*idh_ne[i].GetSeqId()), s_ToASN(*id_ne[i]));
2469  }
2470  set<CSeq_id_Handle> idh_set_eq(begin(idh_eq), end(idh_eq));
2471  BOOST_CHECK_EQUAL(idh_set_eq.size(), 1u);
2472  BOOST_CHECK_EQUAL(s_ToASN(idh_set_eq.begin()->GetSeqId()), s_ToASN(*id_eq[0]));
2473  set<CSeq_id_Handle> idh_set_ne(begin(idh_ne), end(idh_ne));
2474  BOOST_CHECK_EQUAL(idh_set_ne.size(), size(idh_ne));
2475  }}
2476 }
Definition: Dbtag.hpp:53
Thrown on an attempt to access wrong choice variant.
Definition: exception.hpp:102
CMTTestThread(int tid)
CSeq_id_Handle GetRandomId(bool other, bool with_version)
virtual void * Main(void)
Derived (user-created) class must provide a real thread function.
deque< CSeq_id_Handle > m_Ids
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
void SetId8(TId8 value)
Definition: Object_id.cpp:175
TId8 GetId8(void) const
Definition: Object_id.cpp:164
CRandom::
Definition: random_gen.hpp:66
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
iterator_bool insert(const value_type &val)
Definition: map.hpp:165
Definition: map.hpp:338
Definition: set.hpp:45
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
const_iterator begin() const
Definition: set.hpp:135
size_type size() const
Definition: set.hpp:132
string GetSeqIdString(const CSeq_id &id)
Definition: compartp.cpp:100
#define C(s)
Definition: common.h:231
std::ofstream out("events_result.xml")
main entry point for tests
#define true
Definition: bool.h:35
static const char * str(char *buf, int n)
Definition: stats.c:84
#define GI_FROM(T, value)
Definition: ncbimisc.hpp:1086
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
constexpr size_t ArraySize(const Element(&)[Size])
Definition: ncbimisc.hpp:1532
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
Definition: ncbimisc.hpp:1508
#define NULL
Definition: ncbistd.hpp:225
#define _VERIFY(expr)
Definition: ncbidbg.hpp:161
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:226
void Info(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1185
const CVect2< U > & v2
Definition: globals.hpp:440
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
const TPrim & Get(void) const
Definition: serialbase.hpp:347
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
const string AsFastaString(void) const
Definition: Seq_id.cpp:2266
static EAccessionInfo IdentifyAccession(const CTempString &accession, TParseFlags flags=fParse_AnyRaw)
Deduces information from a bare accession a la WHICH_db_accession; may report false negatives on prop...
Definition: Seq_id.cpp:1634
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
Definition: Seq_id.cpp:2040
CConstRef< CSeq_id > GetSeqId(void) const
EAccessionInfo
For IdentifyAccession (below)
Definition: Seq_id.hpp:220
static CSeq_id_Handle GetGiHandle(TGi gi)
Faster way to create a handle for a gi.
int CompareOrdered(const CSeq_id &sid2) const
Definition: Seq_id.cpp:486
static SIZE_TYPE ParseFastaIds(CBioseq::TId &ids, const CTempString &s, bool allow_partial_failure=false)
Parse an entire set of |-delimited FASTA-style IDs, appending the results to IDS.
Definition: Seq_id.cpp:2603
bool MatchesTo(const CSeq_id_Handle &h) const
True if *this matches to h.
bool Match(const CSeq_id &sid2) const
Match() - TRUE if SeqIds are equivalent.
Definition: Seq_id.hpp:1065
void GetReverseMatchingHandles(TMatches &matches) const
static string GetStringDescr(const CBioseq &bioseq, EStringFormat fmt)
Definition: Seq_id.cpp:2292
int CompareOrdered(const CSeq_id_Handle &id) const
Compare ids in a defined order (see CSeq_id::CompareOrdered())
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
string AsString(void) const
CSeq_id::E_Choice Which(void) const
const CTextseq_id * GetTextseq_Id(void) const
Return embedded CTextseq_id, if any.
Definition: Seq_id.cpp:169
E_SIC Compare(const CSeq_id &sid2) const
Compare() - more general.
Definition: Seq_id.cpp:411
@ fLabel_UpperCase
Upper case label, with special encoding for PDB chain-ids.
Definition: Seq_id.hpp:620
@ fLabel_Default
default options - always show the version
Definition: Seq_id.hpp:623
@ eAcc_gb_other_nuc
Definition: Seq_id.hpp:346
@ eAcc_wgs
Definition: Seq_id.hpp:290
@ eAcc_gb_optical_map
Definition: Seq_id.hpp:379
@ eAcc_ddbj_wgs_prot
Definition: Seq_id.hpp:470
@ eAcc_ddbj_wgs_nuc
Definition: Seq_id.hpp:469
@ eAcc_embl_wgs_nuc
Definition: Seq_id.hpp:400
@ eAcc_gb_wgsm_nuc
Definition: Seq_id.hpp:370
@ eAcc_embl_tsa_prot
Definition: Seq_id.hpp:389
@ eAcc_gb_tpa_wgsm_nuc
Definition: Seq_id.hpp:493
@ eAcc_gb_wgs_prot
Definition: Seq_id.hpp:369
@ eAcc_general
Definition: Seq_id.hpp:444
@ eAcc_gb_tpa_wgs_nuc
Definition: Seq_id.hpp:491
@ eAcc_embl_tpa_wgs_nuc
Definition: Seq_id.hpp:512
@ eAcc_pdb
Definition: Seq_id.hpp:484
@ eAcc_unreserved_nuc
Definition: Seq_id.hpp:334
@ eAcc_general_nuc
Definition: Seq_id.hpp:445
@ eAcc_gb_prot
Definition: Seq_id.hpp:345
@ eAcc_gb_wgs_nuc
Definition: Seq_id.hpp:368
@ eAcc_unknown
Definition: Seq_id.hpp:322
@ eAcc_embl_est
Definition: Seq_id.hpp:385
@ eAcc_embl_tpa_other
Definition: Seq_id.hpp:501
@ eAcc_division_mask
Definition: Seq_id.hpp:299
@ eAcc_gb_other
Definition: Seq_id.hpp:344
@ e_YES
SeqIds compared, but are different.
Definition: Seq_id.hpp:583
@ fGpipeAddSecondary
Add "ACC.VER(=1)" for a 2ndary id.
Definition: Seq_id.hpp:821
@ fParse_RawText
Try to ID raw non-numeric accessions.
Definition: Seq_id.hpp:81
@ fParse_ValidLocal
Treat otherwise unidentified strings as raw accessions, provided that they pass rudimentary validatio...
Definition: Seq_id.hpp:87
@ eAllowWeakMatch
@ eFormat_BestWithoutVersion
Definition: Seq_id.hpp:664
@ eFormat_FastA
Definition: Seq_id.hpp:662
@ eFormat_BestWithVersion
Definition: Seq_id.hpp:665
@ eFormat_ForceGI
Definition: Seq_id.hpp:663
@ eContent
Untagged human-readable accession or the like.
Definition: Seq_id.hpp:605
@ eDefault
default is to show type + content
Definition: Seq_id.hpp:611
@ eBoth
Type and content, delimited by a vertical bar.
Definition: Seq_id.hpp:606
@ eType
FASTA-style type, or database in GeneralDbIsContent mode.
Definition: Seq_id.hpp:604
void SetMix(TMix &v)
Definition: Seq_loc.hpp:987
void SetWhole(TWhole &v)
Definition: Seq_loc.hpp:982
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Override Assign() to incorporate cache invalidation.
Definition: Seq_loc.cpp:337
void SetPnt(TPnt &v)
Definition: Seq_loc.hpp:985
void SetEmpty(TEmpty &v)
Definition: Seq_loc.hpp:981
void SetInt(TInt &v)
Definition: Seq_loc.hpp:983
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
Definition: Seq_loc.hpp:941
void SetNull(void)
Override all setters to incorporate cache invalidation.
Definition: Seq_loc.hpp:960
@ fCompare_Default
Definition: Seq_loc.hpp:245
@ fCompare_Strand
Definition: Seq_loc.hpp:246
sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
Returns the sequence::ECompare containment relationship between CSeq_locs.
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
#define kMin_Int
Definition: ncbi_limits.h:183
#define kMax_Int
Definition: ncbi_limits.h:184
int64_t Int8
8-byte (64-bit) signed integer
Definition: ncbitype.h:104
TValue GetRand(void)
Get the next random number in the interval [0..GetMax()] (inclusive)
Definition: random_gen.hpp:238
#define NcbiEndl
Definition: ncbistre.hpp:548
#define NcbiCout
Definition: ncbistre.hpp:543
#define NcbiCerr
Definition: ncbistre.hpp:544
NCBI_NS_STD::string::size_type SIZE_TYPE
Definition: ncbistr.hpp:132
static int StringToNonNegativeInt(const CTempString str, TStringToNumFlags flags=0)
Convert string to non-negative integer value.
Definition: ncbistr.cpp:457
#define kEmptyStr
Definition: ncbistr.hpp:123
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3461
#define NPOS
Definition: ncbistr.hpp:133
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
CTime GetFastLocalTime(void)
Quick and dirty getter of local time.
Definition: ncbitime.cpp:4166
static const char label[]
bool IsNumber(void) const
Check if variant Number is selected.
Definition: Id_pat_.hpp:426
const TId & GetId(void) const
Get the Id member data.
Definition: Id_pat_.hpp:525
const TNumber & GetNumber(void) const
Get the variant data.
Definition: Id_pat_.hpp:432
const TCountry & GetCountry(void) const
Get the Country member data.
Definition: Id_pat_.hpp:478
bool IsStr(void) const
Check if variant Str is selected.
Definition: Object_id_.hpp:291
const TTag & GetTag(void) const
Get the Tag member data.
Definition: Dbtag_.hpp:267
void SetTag(TTag &value)
Assign a value to Tag data member.
Definition: Dbtag_.cpp:66
bool IsId(void) const
Check if variant Id is selected.
Definition: Object_id_.hpp:264
const TDb & GetDb(void) const
Get the Db member data.
Definition: Dbtag_.hpp:220
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
TStr & SetStr(void)
Select the variant.
Definition: Object_id_.hpp:304
void SetDb(const TDb &value)
Assign a value to Db data member.
Definition: Dbtag_.hpp:229
TId & SetId(void)
Select the variant.
Definition: Object_id_.hpp:277
TId GetId(void) const
Get the variant data.
Definition: Object_id_.hpp:270
void SetLocation(TLocation &value)
Assign a value to Location data member.
Definition: Seq_feat_.cpp:131
const TRegion & GetRegion(void) const
Get the variant data.
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_feat_.cpp:94
bool IsPatent(void) const
Check if variant Patent is selected.
Definition: Seq_id_.hpp:865
bool IsGenbank(void) const
Check if variant Genbank is selected.
Definition: Seq_id_.hpp:841
TGeneral & SetGeneral(void)
Select the variant.
Definition: Seq_id_.cpp:375
TChain GetChain(void) const
Get the Chain member data.
const TEmbl & GetEmbl(void) const
Get the variant data.
Definition: Seq_id_.cpp:259
const TPdb & GetPdb(void) const
Get the variant data.
Definition: Seq_id_.cpp:435
TGibbsq GetGibbsq(void) const
Get the variant data.
Definition: Seq_id_.hpp:787
TId GetId(void) const
Get the Id member data.
bool IsSetChain_id(void) const
chain identifier; length-independent generalization of 'chain' Check if a value has been assigned to ...
bool IsSetChain(void) const
Deprecated: 'chain' can't support multiple character PDB chain identifiers (introduced in 2015).
bool IsSetAccession(void) const
Check if a value has been assigned to Accession data member.
bool IsTpg(void) const
Check if variant Tpg is selected.
Definition: Seq_id_.hpp:928
const TGenbank & GetGenbank(void) const
Get the variant data.
Definition: Seq_id_.cpp:237
const TName & GetName(void) const
Get the Name member data.
bool IsEmpty(void) const
Check if variant Empty is selected.
Definition: Seq_loc_.hpp:516
bool IsTpd(void) const
Check if variant Tpd is selected.
Definition: Seq_id_.hpp:940
bool IsGibbmt(void) const
Check if variant Gibbmt is selected.
Definition: Seq_id_.hpp:808
bool IsOther(void) const
Check if variant Other is selected.
Definition: Seq_id_.hpp:871
const TWhole & GetWhole(void) const
Get the variant data.
Definition: Seq_loc_.cpp:172
bool IsGeneral(void) const
Check if variant General is selected.
Definition: Seq_id_.hpp:877
bool IsPrf(void) const
Check if variant Prf is selected.
Definition: Seq_id_.hpp:916
bool IsEmbl(void) const
Check if variant Embl is selected.
Definition: Seq_id_.hpp:847
bool IsPdb(void) const
Check if variant Pdb is selected.
Definition: Seq_id_.hpp:922
bool IsSwissprot(void) const
Check if variant Swissprot is selected.
Definition: Seq_id_.hpp:859
TGi GetGi(void) const
Get the variant data.
Definition: Seq_id_.hpp:889
TVersion GetVersion(void) const
Get the Version member data.
bool IsSetRelease(void) const
Check if a value has been assigned to Release data member.
const TRelease & GetRelease(void) const
Get the Release member data.
const TMol & GetMol(void) const
Get the Mol member data.
TSeqid GetSeqid(void) const
Get the Seqid member data.
const TOther & GetOther(void) const
Get the variant data.
Definition: Seq_id_.cpp:347
const TGiim & GetGiim(void) const
Get the variant data.
Definition: Seq_id_.cpp:215
const TSwissprot & GetSwissprot(void) const
Get the variant data.
Definition: Seq_id_.cpp:303
TLocal & SetLocal(void)
Select the variant.
Definition: Seq_id_.cpp:199
const TDdbj & GetDdbj(void) const
Get the variant data.
Definition: Seq_id_.cpp:391
const TLocal & GetLocal(void) const
Get the variant data.
Definition: Seq_id_.cpp:193
const TEmpty & GetEmpty(void) const
Get the variant data.
Definition: Seq_loc_.cpp:150
bool IsGiim(void) const
Check if variant Giim is selected.
Definition: Seq_id_.hpp:835
bool IsLocal(void) const
Check if variant Local is selected.
Definition: Seq_id_.hpp:775
const TPir & GetPir(void) const
Get the variant data.
Definition: Seq_id_.cpp:281
const TChain_id & GetChain_id(void) const
Get the Chain_id member data.
bool IsGpipe(void) const
Check if variant Gpipe is selected.
Definition: Seq_id_.hpp:946
const TGeneral & GetGeneral(void) const
Get the variant data.
Definition: Seq_id_.cpp:369
bool IsGi(void) const
Check if variant Gi is selected.
Definition: Seq_id_.hpp:883
const TPatent & GetPatent(void) const
Get the variant data.
Definition: Seq_id_.cpp:325
bool IsWhole(void) const
Check if variant Whole is selected.
Definition: Seq_loc_.hpp:522
const TPrf & GetPrf(void) const
Get the variant data.
Definition: Seq_id_.cpp:413
bool IsSetVersion(void) const
Check if a value has been assigned to Version data member.
bool IsTpe(void) const
Check if variant Tpe is selected.
Definition: Seq_id_.hpp:934
bool IsNamed_annot_track(void) const
Check if variant Named_annot_track is selected.
Definition: Seq_id_.hpp:952
const TCit & GetCit(void) const
Get the Cit member data.
bool IsPir(void) const
Check if variant Pir is selected.
Definition: Seq_id_.hpp:853
bool IsSetName(void) const
Check if a value has been assigned to Name data member.
bool IsGibbsq(void) const
Check if variant Gibbsq is selected.
Definition: Seq_id_.hpp:781
void Select(E_Choice index, EResetVariant reset=eDoResetVariant)
Select the requested variant if needed.
const TAccession & GetAccession(void) const
Get the Accession member data.
bool IsDdbj(void) const
Check if variant Ddbj is selected.
Definition: Seq_id_.hpp:910
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
@ e_Gibbmt
Geninfo backbone moltype.
Definition: Seq_id_.hpp:97
@ e_Giim
Geninfo import id.
Definition: Seq_id_.hpp:98
@ e_Other
for historical reasons, 'other' = 'refseq'
Definition: Seq_id_.hpp:104
@ e_Tpe
Third Party Annot/Seq EMBL.
Definition: Seq_id_.hpp:111
@ e_Gibbsq
Geninfo backbone seqid.
Definition: Seq_id_.hpp:96
@ e_Gi
GenInfo Integrated Database.
Definition: Seq_id_.hpp:106
@ e_not_set
No variant selected.
Definition: Seq_id_.hpp:94
@ e_Local
local use
Definition: Seq_id_.hpp:95
@ e_Pdb
PDB sequence.
Definition: Seq_id_.hpp:109
TId & SetId(void)
Assign a value to Id data member.
Definition: Bioseq_.hpp:296
list< CRef< CSeq_id > > TId
Definition: Bioseq_.hpp:94
void SetInst(TInst &value)
Assign a value to Inst data member.
Definition: Bioseq_.cpp:86
@ eRepr_virtual
no seq data
Definition: Seq_inst_.hpp:93
int i
static void hex(unsigned char c)
Definition: mdb_dump.c:56
static void text(MDB_val *v)
Definition: mdb_dump.c:62
static int version
Definition: mdb_load.c:29
constexpr auto sort(_Init &&init)
const struct ncbi::grid::netcache::search::fields::SIZE size
int strcmp(const char *str1, const char *str2)
Definition: odbc_utils.hpp:160
static size_t rnd(size_t minimal, size_t maximal)
unsigned int a
Definition: ncbi_localip.c:102
EIPRangeType t
Definition: ncbi_localip.c:101
const char * tag
#define NCBI_CONST_INT8(v)
64-bit integers
Definition: ncbi_std.h:195
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
std::istream & in(std::istream &in_, double &x_)
static bool GetSeqId(const T &d, set< string > &labels, const string name="", bool detect=false, bool found=false)
USING_SCOPE(objects)
static const TFastaOSLTMap kTestFastaOSLTMap
static void s_TestFastaRoundTrip(const char *s)
static const char *const sc_Ids[]
BEGIN_LOCAL_NAMESPACE
static CRef< CSeq_id > s_ParsePDB(const char *str)
static CSeq_id * s_NewDbtagId(const string &db, const string &tag, bool set_as_general=false)
END_LOCAL_NAMESPACE
static const char * kTestFastaStrings[]
static const size_t kNumFastaStrings
BOOST_AUTO_TEST_CASE(s_MTTest)
ostream & operator<<(ostream &out, const CSeq_id_Handle::TMatches &ids)
#define NCBI_CHECK_THROW_SEQID(s)
map< string, vector< string > > TFastaOSLTMap
void s_CheckMatches(const CSeq_id_Handle &id, const vector< CSeq_id_Handle > &ids, const CSeq_id_Handle::TMatches &matches, const CSeq_id_Handle::TMatches &exp_matches, bool strict, const char *type)
NCBITEST_AUTO_INIT()
BOOST_AUTO_PARAM_TEST_CASE(s_TestFastaRoundTrip, kTestFastaStrings+0, kTestFastaStrings+kNumFastaStrings)
void s_Match_id(size_t num_ids, const char *const fasta_ids[], const char *const match_to_ids[], const char *const weak_match_to_ids[], bool strict=true)
static string s_ToASN(const C &obj)
USING_NCBI_SCOPE
CRef< CSeq_loc > GetRandomSegment(CRandom &rnd)
Predicate for sorting CSeq_id_Handles in a defined order.
Compare objects pointed to by (smart) pointer.
Definition: ncbiutil.hpp:67
bool operator()(const CSeq_loc &a, const CSeq_loc &b) const
bool operator()(const CRef< CSeq_loc > &a, const CRef< CSeq_loc > &b) const
bool operator()(const CSeq_loc &a, const CSeq_loc &b) const
bool operator()(const CRef< CSeq_loc > &a, const CRef< CSeq_loc > &b) const
Definition: type.c:6
Utility stuff for more convenient using of Boost.Test library.
void g_IgnoreDataFile(const string &pattern, bool do_ignore=true)
Ignore (or stop ignoring, depending on do_ignore) NCBI application data files matching the given patt...
Definition: util_misc.cpp:182
Modified on Wed Apr 24 14:16:22 2024 by modify_doxy.py rev. 669887