NCBI C++ ToolKit
id_mapper.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: id_mapper.cpp 96325 2022-03-15 15:02:09Z boukn $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Nathan Bouk
27  *
28  * File Description:
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
39 #include <objects/seq/Seq_ext.hpp>
45 #include <objmgr/util/sequence.hpp>
46 #include <objmgr/seq_map.hpp>
47 #include <objmgr/seq_map_ci.hpp>
48 //#include <objmgr/seq_loc_mapper.hpp>
52 #include <serial/iterator.hpp>
53 #include <serial/objistr.hpp>
57 
58 
61 
62 
64 extern TTimerMap TimerMap;
65 #define START_TIMER(X) TimerMap[#X].Start()
66 #define STOP_TIMER(X) TimerMap[#X].Stop()
67 #define PRINT_TIMERS(X) ITERATE (TTimerMap, iter, TimerMap) { \
68  ERR_POST(X << "Timer: " << iter->first << ": " \
69  << iter->second.AsString() << "s"); \
70  }
71 
72 #define MARKLINE cerr << __FILE__ << ":" << __LINE__ << endl;
73 
74 const string DELIM = "%s";
75 const string CHROMO_EXT = "<CHROMOSOME_EXTERNAL>";
76 
77 
78 bool
79 s_RevStrLenSort(const string& A, const string& B)
80 {
81  return (B.length() < A.length());
82 }
83 
84 bool
85 s_IsNumericString(const string& A)
86 {
87  ITERATE(string, CharIter, A) {
88  if(!isdigit(*CharIter))
89  return false;
90  }
91  return true;
92 }
93 
94 size_t
95 s_CountNumeric(const string& A)
96 {
97  size_t Result = 0;
98  ITERATE(string, CharIter, A) {
99  if(isdigit(*CharIter))
100  Result++;
101  }
102  return Result;
103 }
104 
105 bool
106 s_HasMoreDigits(const string& Name, const string& Chromo)
107 {
108  return (s_CountNumeric(Name) > s_CountNumeric(Chromo));
109 }
110 
111 
113 {
114  //m_IdToSeqMap.reserve(300007);
115  //m_AccToVerMap.reserve(300007);
116  //m_ChildToParentMap.reserve(300007);
117 
118  if (SourceAsm.IsNull()) {
119  return;
120  }
122  m_Assembly->Assign(*SourceAsm);
123  m_Assembly->PostRead();
124  m_SourceAsm = m_Assembly->GetAccession();
125  x_Init();
126 }
127 
128 bool
129 CGencollIdMapper::Guess(const objects::CSeq_loc& Loc, SIdSpec& Spec) const
130 {
131 //#warning FIXME: If it returns null, deeply examine the Loc
132  if (Loc.GetId() == NULL) {
133  return false;
134  }
135  if (m_Assembly.IsNull()) {
136  return CRef<CSeq_loc>();
137  }
138 
139  CConstRef<CSeq_id> Id(Loc.GetId());
140  Id = x_FixImperfectId(Id, Spec); // But not apply Pattern. This derives Pattern
141  if (x_NCBI34_Guess(*Id, Spec)) {
142  return true;
143  }
144 
147  if (Found == m_IdToSeqMap.end()) {
148  const string IdStr = Id->GetSeqIdString(true);
149  ITERATE (vector<string>, ChromoIter, m_Chromosomes) {
150  if (NStr::Find(IdStr, *ChromoIter) != NPOS) {
151  CSeq_id Temp;
152  Temp.SetLocal().SetStr() = *ChromoIter;
153  Idh = CSeq_id_Handle::GetHandle(Temp);
154  Found = m_IdToSeqMap.find(Idh);
155  break;
156  }
157  }
158  if (Found == m_IdToSeqMap.end()) {
159  return false; // Unknown ID
160  }
161  }
162 
163  const CGC_Sequence& Seq = *Found->second;
164  return x_MakeSpecForSeq(*Id, Seq, Spec);
165 }
166 
168 CGencollIdMapper::Map(const objects::CSeq_loc& Loc, const SIdSpec& Spec) const
169 {
170  CRef<CSeq_loc_Mapper> Mapper;
171  if (m_Assembly.IsNull()) {
172  return CRef<CSeq_loc>();
173  }
174 
175  // Recurse down Mixes
176  if (Loc.GetId() == NULL) {
177  if (Loc.IsMix()) {
178  CRef<CSeq_loc> Result(new CSeq_loc());
179  CTypeConstIterator<CSeq_loc> LocIter(Loc);
180  for ( ; LocIter; ++LocIter) {
181  if (LocIter->Equals(Loc)) {
182  continue;
183  }
184  CRef<CSeq_loc> MappedLoc = Map(*LocIter, Spec);
185  if (MappedLoc.NotNull() && !MappedLoc->IsNull()) {
186  Result->SetMix().Set().push_back(MappedLoc);
187  }
188  }
189  if (Result->IsMix()) {
190  return Result;
191  }
192  }
193  else if (Loc.IsPacked_int() || Loc.IsPacked_pnt()) {
195  MixLoc.Assign(Loc);
196  MixLoc.ChangeToMix();
197  return Map(MixLoc, Spec);
198  }
199  return CRef<CSeq_loc>();
200  }
201 
202  CConstRef<CSeq_id> Id(Loc.GetId());
203  Id = x_FixImperfectId(Id, Spec);
204  Id = x_ApplyPatternToId(Id, Spec);
205  Id = x_NCBI34_Map_IdFix(Id);
206 
207  SIdSpec GuessSpec;
208  Guess(Loc, GuessSpec);
209  if (Spec.IsSpecMet(GuessSpec)) {
210  CRef<CSeq_loc> Result(new CSeq_loc());
211  Result->Assign(Loc);
212  return Result;
213  }
214 
216  {{
217  const CSeq_id_Handle Idh = CSeq_id_Handle::GetHandle(*Id);
219  if (Found != m_IdToSeqMap.end()) {
220  Seq = Found->second;
221  if (Seq.NotNull()) {
222  if (x_CanSeqMeetSpec(*Seq, Spec) == e_Yes) {
223  CRef<CSeq_loc> Result = x_Map_OneToOne(Loc, *Seq, Spec);
224  if (Result.NotNull() && !Result->IsNull()) {
225  return Result;
226  }
227  }
228  }
229  }
230  }}
231 
232  {{
233  const CSeq_id_Handle Idh = CSeq_id_Handle::GetHandle(*Id);
235  if (Found != m_IdToSeqMap.end()) {
236  Seq = Found->second;
237  if (Seq.NotNull()) {
238  if (x_CanSeqMeetSpec(*Seq, Spec) == e_Down) {
239  SIdSpec PrimarySpec;
240  PrimarySpec.Primary = true;
241  CRef<CSeq_loc> PrimaryLoc;
242  // The up-mapper only works with Locs that have the same ID-type
243  // as the Structure is built from, so this step maps the given-loc
244  // sideways to that needed ID. The Up-mapped initial result will be
245  // in the same space, but then it will also be side-mapped to the
246  // requested spec.
247  PrimaryLoc = Map(Loc, PrimarySpec);
248  CRef<CSeq_loc> Result;
249  if(PrimaryLoc.NotNull() && !PrimaryLoc->IsNull())
250  Result = x_Map_Down(*PrimaryLoc, *Seq, Spec);
251  else
252  Result = x_Map_Down(Loc, *Seq, Spec);
253  if (Result.NotNull() && !Result->IsNull()) {
254  return Result;
255  }
256  if(PrimaryLoc.NotNull() && !PrimaryLoc->IsNull())
257  return PrimaryLoc;
258  }
259  }
260  }
261  }}
262 
263  {{
264  const CSeq_id_Handle Idh = CSeq_id_Handle::GetHandle(*Id);
266  if (Found != m_IdToSeqMap.end()) {
267  Seq = Found->second;
268  if (Seq.NotNull()) {
269  if (x_CanSeqMeetSpec(*Seq, Spec) == e_Up) {
270  SIdSpec PrimarySpec;
271  PrimarySpec.Primary = true;
272  CRef<CSeq_loc> PrimaryLoc;
273  // The up-mapper only works with Locs that have the same ID-type
274  // as the Structure is built from, so this step maps the given-loc
275  // sideways to that needed ID. The Up-mapped initial result will be
276  // in the same space, but then it will also be side-mapped to the
277  // requested spec.
278  PrimaryLoc = Map(Loc, PrimarySpec);
279  if(PrimaryLoc.NotNull() && !PrimaryLoc->IsNull()) {
280  CRef<CSeq_loc> Result = x_Map_Up(*PrimaryLoc, *Seq->GetParent(), Spec);
281  if (Result.NotNull() && !Result->IsNull()) {
282  return Result;
283  }
284  }
285  }
286  }
287  }
288  }}
289 
290  {{
291  Seq = x_FindChromosomeSequence(*Id, Spec);
292  if (Seq.NotNull()) {
293  CRef<CSeq_loc> Result = x_Map_OneToOne(Loc, *Seq, Spec);
294  if (Result.NotNull() && !Result->IsNull()) {
295  return Map(*Result, Spec);
296  }
297  }
298  }}
299 
300  return CRef<CSeq_loc>();
301 }
302 
303 bool
304 CGencollIdMapper::CanMeetSpec(const objects::CSeq_loc& Loc, const SIdSpec& Spec) const
305 {
306 //#warning FIXME: If it returns null, deeply examine the Loc
307  if (Loc.GetId() == NULL) {
308  return false;
309  }
310 
311  CConstRef<CSeq_id> Id(Loc.GetId());
312  Id = x_FixImperfectId(Id, Spec);
313  Id = x_ApplyPatternToId(Id, Spec);
314  Id = x_NCBI34_Map_IdFix(Id);
315 
316  {{
317  const CSeq_id_Handle Idh = CSeq_id_Handle::GetHandle(*Id);
319  if (Found != m_IdToSeqMap.end()) {
320  CConstRef<CGC_Sequence> Seq = Found->second;
321  const bool Result = x_CanSeqMeetSpec(*Seq, Spec);
322  if (Result != e_No) {
323  return true;
324  }
325  }
326  }}
327 
328  {{
329  // Look for Parent seq
331  if (Seq.NotNull()) {
332  const bool Result = x_CanSeqMeetSpec(*Seq, Spec);
333  if (Result != e_No) {
334  return true;
335  }
336  }
337  }}
338 
339  {{
341  if (Seq.NotNull()) {
342  const bool Result = x_CanSeqMeetSpec(*Seq, Spec);
343  if (Result != e_No) {
344  return true;
345  }
346  }
347  }}
348 
349  return false;
350 }
351 
352 
353 void
355 {
356  bool HideRefSeqAcc = false;
357 
358 
359  if (m_Assembly->GetDesc().CanGetRelease_type() &&
360  m_Assembly->GetDesc().CanGetRelease_status() &&
361  m_Assembly->GetDesc().GetRelease_type() == CGC_AssemblyDesc::eRelease_type_refseq &&
362  m_Assembly->GetDesc().GetRelease_status() == CGC_AssemblyDesc::eRelease_status_gpipe) {
363  HideRefSeqAcc = true;
364  }
365 
366 
368  for ( ; SeqIter; ++SeqIter) {
369  x_StripPseudoSeq(*SeqIter);
370  x_RecursiveSeqFix(*SeqIter);
371  x_FillGpipeTopRole(*SeqIter);
372 
373  if(HideRefSeqAcc)
374  x_RemoveHiddenAccessions(*SeqIter);
375 
377  ChildIter, SeqIter->SetSequences()) {
378  CGC_TaggedSequences& Tagged = **ChildIter;
379  CTypeIterator<CGC_Sequence> InnerIter(Tagged);
380  for ( ; InnerIter; ++InnerIter) {
381  x_StripPseudoSeq(*InnerIter);
382  x_RecursiveSeqFix(*InnerIter);
383  x_FillGpipeTopRole(*InnerIter);
384  if(HideRefSeqAcc)
385  x_RemoveHiddenAccessions(*SeqIter);
386  }
387  }
388  }
389 
391  x_PrioritizeIds();
392 
393 //cout << MSerial_AsnText << *m_Assembly;
394 
395  m_MaxSequenceDepth = 0;
397 
399  for ( ; IdIter; ++IdIter) {
400  const CTextseq_id* textseqid = IdIter->GetTextseq_Id();
401  if (textseqid != 0) {
402  const string& Acc = textseqid->GetAccession();
403  const int Ver(
404  textseqid->CanGetVersion() ? textseqid->GetVersion() : 1
405  );
406  m_AccToVerMap[Acc] = Ver;
407  }
408  }
409 
411  for ( ; ReplIter; ++ReplIter) {
412  if (ReplIter->CanGetName() && !ReplIter->GetName().empty() ) {
413  if(!NStr::EndsWith(ReplIter->GetName(), "_random")) {
414  m_Chromosomes.push_back(ReplIter->GetName());
415  }
416  } else if(ReplIter->IsSetSequence() && ReplIter->GetSequence().IsSingle()) {
417  const CGC_Sequence& SingleSeq = ReplIter->GetSequence().GetSingle();
418  CConstRef<CSeq_id> SubNameId = SingleSeq.GetSubmitterName();
419  if(SubNameId.NotNull()) {
420  const string SubName = SingleSeq.GetSubmitterName()->GetSeqIdString();
421  if (!SubName.empty() &&
422  !NStr::EndsWith(SubName, "_random" )) {
423  m_Chromosomes.push_back(SubName);
424  }
425  }
426  }
427  }
429 
430  // x_Init_SeqLocMappers was changed to only run on demand
431 
432  m_Assembly->PostRead();
433 }
434 
435 
436 void
438 {
439  /* SetResolveCount(0) limits downmapping to just its immediate record,
440  * so it never down-maps more than one step at a time.
441  * But zero breaks up-mapping. It only returns null.
442  * Leaving Upmapping to SetResolveCount(1) causes it to up-map to top-only,
443  * But the later calls to Map() will down-map from top if we wanted a middle-level.
444  */
445  {{
446  SSeqMapSelector Sel;
447  Sel.SetResolveCount(1);
449  }}
450 
451  {{
452  SSeqMapSelector Sel;
453  Sel.SetResolveCount(0);
455  }}
456  {{
457  SSeqMapSelector Sel;
460  }}
461 }
462 
463 
464 bool
466 {
467  if (!(m_Assembly->GetTaxId() == TAX_ID_CONST(9606) &&
468  NStr::Equal(m_Assembly->GetName(), "NCBI34")
469  )
470  ) {
471  return false;
472  }
473  const string seqidstr = Id.GetSeqIdString(true);
474  if (NStr::Equal(seqidstr, "NC_000002") || NStr::Equal(seqidstr, "NC_000002.8")) {
477  Spec.External = kEmptyStr;
478  Spec.Pattern = kEmptyStr;
479  return true;
480  }
481  if (NStr::Equal(seqidstr, "NC_000009") || NStr::Equal(seqidstr, "NC_000009.8")) {
484  Spec.External = kEmptyStr;
485  Spec.Pattern = kEmptyStr;
486  return true;
487  }
488  return false;
489 }
490 
491 
494 {
495  if (!(m_Assembly->GetTaxId() == TAX_ID_CONST(9606) &&
496  NStr::Equal(m_Assembly->GetName(), "NCBI34")
497  )
498  ) {
499  return SourceId;
500  }
501  const string seqidstr = SourceId->GetSeqIdString(true);
502  if (NStr::Equal(seqidstr, "NC_000002") || NStr::Equal(seqidstr, "NC_000002.8")) {
503  CRef<CSeq_id> NewId(new CSeq_id());
504  NewId->SetLocal().SetStr("2");
505  return NewId;
506  }
507  if (NStr::Equal(seqidstr, "NC_000009") || NStr::Equal(seqidstr, "NC_000009.8")) {
508  CRef<CSeq_id> NewId(new CSeq_id());
509  NewId->SetLocal().SetStr("9");
510  return NewId;
511  }
512  return SourceId;
513 }
514 
515 
516 void
518 {
521  return;
522  }
523 
524  // Get the 'random' ID we want it to have.
525  CSeq_id TopSyn;
526  if (Seq.CanGetSeq_id_synonyms()) {
528  CTypeConstIterator<CSeq_id> IdIter(**SynIter);
529  for ( ; IdIter; ++IdIter) {
530  if (IdIter->IsGi()) {
531  continue;
532  }
533  TopSyn.Assign(*IdIter);
534  break;
535  }
536  if(TopSyn.Which() != CSeq_id::e_not_set)
537  break;
538  }
539  }
540 
541  Seq.ResetSeq_id();
542  Seq.SetSeq_id().Assign(TopSyn);
543 
544  // If this pseudo has a refseq/genbank syn, erase it
546  if( (*SynIter)->IsGenbank() ||
547  (*SynIter)->IsRefseq() ) {
548  Seq.SetSeq_id_synonyms().erase(SynIter);
549  }
550  }
551 }
552 
553 
554 void
556 {
557  // Hopefully not the ID that recurses
558  CSeq_id TopSyn;
559  if (Seq.CanGetSeq_id_synonyms()) {
561  CTypeConstIterator<CSeq_id> IdIter(**SynIter);
562  for ( ; IdIter; ++IdIter) {
563  if (IdIter->IsGi()) {
564  continue;
565  }
566  TopSyn.Assign(*IdIter);
567  break;
568  }
569  if(TopSyn.Which() != CSeq_id::e_not_set)
570  break;
571  }
572  }
573 
574  // Check if the Seq's Structure recurse
575  if (Seq.CanGetStructure()) {
576  const CSeq_id& TopId = Seq.GetSeq_id();
577  bool DoesRecurse = false;
578  CTypeConstIterator<CSeq_id> StructIdIter(Seq.GetStructure());
579  for ( ; StructIdIter; ++StructIdIter) {
580  if (StructIdIter->Equals(TopId)) {
581  DoesRecurse = true;
582  break;
583  }
584  }
585  if (DoesRecurse) {
586  Seq.ResetSeq_id();
587  Seq.SetSeq_id().Assign(TopSyn);
588  }
589  }
590 
591  // Check if the Seq's sub-sequences recurse
592  if (Seq.CanGetSequences()) {
593  const CSeq_id& TopId = Seq.GetSeq_id();
594  bool DoesRecurse = false;
595  ITERATE (CGC_Sequence::TSequences, TagIter, Seq.GetSequences()) {
596  CTypeConstIterator<CSeq_id> SubSeqIdIter(**TagIter);
597  for ( ; SubSeqIdIter; ++SubSeqIdIter) {
598  if (SubSeqIdIter->Equals(TopId)) {
599  DoesRecurse = true;
600  break;
601  }
602  }
603  if (DoesRecurse) {
604  Seq.ResetSeq_id();
605  Seq.SetSeq_id().Assign(TopSyn);
606  }
607  }
608  }
609 
610  // Bad Random GIs
611  if (Seq.GetSeq_id().IsGi() &&
613  //CTypeConstIterator<CSeq_id> IdIter(Seq);
614  bool IsRandom = false;
615  if (Seq.CanGetSeq_id_synonyms()) {
617  CTypeConstIterator<CSeq_id> IdIter(**SynIter);
618  for ( ; IdIter; ++IdIter) {
619  if (NStr::EndsWith(IdIter->GetSeqIdString(), "_random")) {
620  IsRandom = true;
621  break;
622  }
623  }
624  }
625  }
626  if (IsRandom) {
627  Seq.ResetSeq_id();
628  Seq.SetSeq_id().Assign(TopSyn);
629  }
630  }
631 }
632 
633 void
635 {
636  CConstRef<CSeq_id> GenGi(
638  );
639  CConstRef<CSeq_id> RefGi(
641  );
642  const bool SeqHasGi = bool(GenGi) || bool(RefGi);
643 
644  bool SeqQualifies = false;
645  bool ParentQualifies = false;
646  if (Seq.HasRole(eGC_SequenceRole_top_level) && SeqHasGi) {
647  SeqQualifies = true;
648  }
649 
650  CConstRef<CGC_Sequence> Parent = Seq.GetParent();
651  if (Parent.NotNull()) {
652  GenGi = Parent->GetSynonymSeq_id(CGC_TypedSeqId::e_Genbank, CGC_SeqIdAlias::e_Gi);
653  RefGi = Parent->GetSynonymSeq_id(CGC_TypedSeqId::e_Refseq, CGC_SeqIdAlias::e_Gi);
654  const bool ParentHasGi = bool(GenGi) || bool(RefGi);
655  if (Parent->HasRole(eGC_SequenceRole_top_level) &&
657  ParentHasGi
658  ) {
659  ParentQualifies = true;
660  }
661  }
662  if (SeqQualifies &&
663  !ParentQualifies &&
665  ) {
667  }
668 }
669 
670 
671 void
673 {
674  CSeq_id GenbankAcc;
676  SynIter, Seq.SetSeq_id_synonyms()) {
677  CGC_TypedSeqId& Typed = **SynIter;
678  if(Typed.IsGenbank()) {
679  GenbankAcc.Assign(Typed.SetGenbank().GetPublic());
680  }
681  }
682 
684  SynIter, Seq.SetSeq_id_synonyms()) {
685  CGC_TypedSeqId& Typed = **SynIter;
686  if(Typed.IsRefseq()) {
687  //Typed.SetRefseq().ResetPublic();
688  Typed.SetRefseq().SetPublic().Assign(GenbankAcc);
689  Typed.SetRefseq().ResetGpipe();
690  }
691  }
692 }
693 
694 
695 void
697 {
698  // For animals like Cow, whom's private ID is 'Chr1', create an extra
699  // private ID that is identical to the chromosome name.
701  for ( ; ReplIter; ++ReplIter) {
702  if (ReplIter->CanGetName() && ReplIter->CanGetSequence() &&
703  ReplIter->GetSequence().IsSingle() &&
704  ReplIter->GetSequence().GetSingle().CanGetSeq_id_synonyms() &&
705  ReplIter->GetSequence().GetSingle().CanGetStructure() ) {
706  CGC_Sequence& Seq = ReplIter->SetSequence().SetSingle();
707  bool NameFound = false;
709  if ((*it)->Which() == CGC_TypedSeqId::e_Private) {
710  NameFound = NStr::Equal((*it)->GetPrivate().GetSeqIdString(), ReplIter->GetName());
711  }
712  }
713  if (!NameFound) {
714  CRef<CGC_TypedSeqId> ChromoId(new CGC_TypedSeqId());
715  ChromoId->SetExternal().SetExternal() = CHROMO_EXT;
716  ChromoId->SetExternal().SetId().SetLocal().SetStr() = ReplIter->GetName();
717  //ChromoId->SetPrivate().SetLocal().SetStr() = ReplIter->GetName();
718  Seq.SetSeq_id_synonyms().push_back(ChromoId);
719  }
720  }
721  }
722 }
723 
724 void
726 {
728  while (SeqIter) {
729  x_PrioritizeIds(*SeqIter);
731  ChildIter,
732  SeqIter->SetSequences()
733  ) {
734  CGC_TaggedSequences& Tagged = **ChildIter;
735  CTypeIterator<CGC_Sequence> SeqIter(Tagged);
736  while (SeqIter) {
737  x_PrioritizeIds(*SeqIter);
738  ++SeqIter;
739  }
740  }
741  ++SeqIter;
742  }
743 }
744 
745 
746 void
748 {
749  // The only thing we have right now is making UCSC IDs first,
750  // so that they are above 'privite' dupes of UCSC
751  //
752  CGC_Sequence::TSeq_id_synonyms::iterator IdIter;
753  for (IdIter = Sequence.SetSeq_id_synonyms().begin();
754  IdIter != Sequence.SetSeq_id_synonyms().end(); ) {
755 
756  if ((*IdIter)->IsExternal() &&
757  (*IdIter)->GetExternal().IsSetExternal() &&
758  NStr::Equal((*IdIter)->GetExternal().GetExternal(), "UCSC")
759  ) {
760  CRef<CGC_TypedSeqId> CopyId = *IdIter;
761  IdIter = Sequence.SetSeq_id_synonyms().erase(IdIter);
762  Sequence.SetSeq_id_synonyms().push_front(CopyId);
763  }
764  else {
765  ++IdIter;
766  }
767  }
768 }
769 
770 
771 bool
773 {
776  return (Found != m_IdToSeqMap.end());
777 }
778 
779 bool
781 {
782  if(x_IsExactIdInAssembly(Id))
783  return true;
784 
785  SIdSpec Ignore;
786  CConstRef<CSeq_id> FuzzyId(&Id);
787 
788  FuzzyId = x_FixImperfectId(FuzzyId, Ignore);
789  if(x_IsExactIdInAssembly(*FuzzyId))
790  return true;
791 
792  FuzzyId = x_NCBI34_Map_IdFix(FuzzyId);
793  if(x_IsExactIdInAssembly(*FuzzyId))
794  return true;
795 
797  Seq = x_FindChromosomeSequence(*FuzzyId, Ignore);
798  if(Seq.NotNull())
799  return true;
800 
801  return false;
802 }
803 
806  const SIdSpec& Spec
807  ) const
808 {
809  // Fix up the ID if its not as well formed as it could be.
810  // Because GenColl only stores perfectly formed IDs.
811 
812  // nothing to fix here
813  if(x_IsExactIdInAssembly(*Id))
814  return Id;
815 
816  // Any GI might be a goofy numeric string id, check for it
817  if (Id->IsGi() /*&& Id->GetGi() < GI_CONST(50)*/ ) {
818  CRef<CSeq_id> NewId(new CSeq_id());
819  NewId->SetLocal().SetStr() = NStr::NumericToString(Id->GetGi());
820  if(x_IsExactIdInAssembly(*NewId))
821  Id = NewId;
822  }
823 
824 
825  // Fix PDB-looking ids. There are no PDBs in Gencoll. Any PDB that gets in here
826  // was a mis-identified local string.
827  if(Id->IsPdb()) {
828  // local str "2LHet"
829  //Seq-id ::= pdb {
830  // mol "2LHe",
831  // chain 116 (t)
832  //}
833  string LocalStr;
834 
835  if(Id->GetPdb().CanGetMol())
836  LocalStr = Id->GetPdb().GetMol();
837 
838  if(Id->GetPdb().CanGetChain()) {
839  LocalStr += ((char)(Id->GetPdb().GetChain()));
840  }
841 
842  if(LocalStr.size() == 5) {
843  CRef<CSeq_id> NewId(new CSeq_id);
844  NewId->SetLocal().SetStr(LocalStr);
845  if(x_IsExactIdInAssembly(*NewId))
846  Id = NewId;
847  }
848  }
849 
850  // First make Acc-as-locals into some form of Acc.
851  if (Id->IsLocal() && Id->GetLocal().IsStr()) {
853  CRef<CSeq_id> TryAcc;
854  TryAcc.Reset(new CSeq_id(Id->GetLocal().GetStr()));
855  if (!TryAcc->IsGi() && !TryAcc->IsLocal()) {
856  Id = TryAcc.GetPointer();
857  }
858  }
859  /*CRef<CSeq_id> TryAcc;
860  try {
861  TryAcc.Reset(new CSeq_id(Id->GetLocal().GetStr()));
862  if (!TryAcc->IsGi() && !TryAcc->IsLocal()) {
863  Id = TryAcc.GetPointer();
864  }
865  } catch(...) {
866  ;
867  }*/
868  }
869 
870  // Second, if the Acc is versionless, see if we can find a version for it
871  // in this assembly.
872  const CTextseq_id* textseqid = Id->GetTextseq_Id();
873  if (textseqid != 0 &&
874  textseqid->IsSetAccession() &&
875  !textseqid->IsSetVersion()
876  ) {
877  if(m_AccToVerMap.find(textseqid->GetAccession()) != m_AccToVerMap.end()) {
878  const int Ver = m_AccToVerMap.find(textseqid->GetAccession())->second;
879  CRef<CSeq_id> NewId(new CSeq_id());
880  NewId->Set(Id->Which(), textseqid->GetAccession(), kEmptyStr, Ver);
881  if(x_IsExactIdInAssembly(*NewId)) {
882  Id = NewId;
883  }
884  }
885  }
886 
887  return Id;
888 }
889 
892  const SIdSpec& Spec
893  ) const
894 {
895  if (Id->GetTextseq_Id() == 0 && !Id->IsGi() && !Spec.Pattern.empty()) {
896  // && Id->GetLocal().GetStr().find(Spec.Pattern) != NPOS
897  CRef<CSeq_id> NewId(new CSeq_id());
898  NewId->SetLocal().SetStr() = Id->GetSeqIdString();
899  string Pre, Post;
900  const size_t DelimPos = Spec.Pattern.find(DELIM);
901  Pre.assign(Spec.Pattern.data(), 0, DelimPos);
902  //Post.assign(Spec.Pattern.data(), DelimPos+DELIM.length(),
903  // Spec.Pattern.length()-DelimPos-DELIM.length());
904  if (!Pre.empty() || !Post.empty()) {
905  NStr::ReplaceInPlace(NewId->SetLocal().SetStr(), Pre, kEmptyStr);
906  //NStr::ReplaceInPlace(NewId->SetLocal().SetStr(), Post, kEmptyStr);
907  Id = NewId;
908  }
909  }
910  return Id;
911 }
912 
913 int
914 CGencollIdMapper::x_GetRole(const objects::CGC_Sequence& Seq) const
915 {
916  int SeqRole = SIdSpec::e_Role_NotSet;
917  if (Seq.CanGetRoles()) {
918  ITERATE (CGC_Sequence::TRoles, RoleIter, Seq.GetRoles()) {
919  //if ((*RoleIter) >= eGC_SequenceRole_top_level) {
920  if ((*RoleIter) >= eGC_SequenceRole_submitter_pseudo_scaffold) {
921  continue;
922  }
923  SeqRole = min(SeqRole, *RoleIter);
924  }
925  }
926  return SeqRole;
927 }
928 
929 void
932  )
933 {
934  if (x_GetRole(*Seq) == SIdSpec::e_Role_NotSet) {
935  return;
936  }
938 
939  TIdToSeqMap::iterator Found;
940  Found = m_IdToSeqMap.find(Handle);
941  if (Found != m_IdToSeqMap.end()) {
942  const int OldRole = x_GetRole(*Found->second);
943  const int NewRole = x_GetRole(*Seq);
944  if (NewRole == SIdSpec::e_Role_NotSet ||
945  (OldRole != SIdSpec::e_Role_NotSet && OldRole <= NewRole &&
946  (OldRole != eGC_SequenceRole_pseudo_scaffold &&
948  ) {
949  return;
950  }
951  //if(Seq->GetSeq_id_synonyms().size() <=
952  // Found->second->GetSeq_id_synonyms().size())
953  // return;
954  m_IdToSeqMap.erase(Found);
955  }
956  m_IdToSeqMap.insert(make_pair(Handle, Seq));
957 
958  {{
959  CConstRef<CGC_Sequence> ParentSeq = Seq->GetParent();
960  CGC_TaggedSequences_Base::TState ParentState = Seq->GetParentRelation();
961  if (ParentSeq &&
963  ) {
964  const CSeq_id_Handle ParentIdH(
965  CSeq_id_Handle::GetHandle(ParentSeq->GetSeq_id())
966  );
967  m_ChildToParentMap.insert(make_pair(Handle, ParentSeq));
968  }
969  }}
970 }
971 
974 {
975  const CGC_TypedSeqId::E_Choice syn_type = tsid->Which();
976  const bool is_gb = (syn_type == CGC_TypedSeqId::e_Genbank);
977  if (is_gb || syn_type == CGC_TypedSeqId::e_Refseq) {
978  return ConstRef(is_gb ? &tsid->GetGenbank() : &tsid->GetRefseq());
979  }
980  return CConstRef<CGC_SeqIdAlias>();
981 }
982 
983 void
984 CGencollIdMapper::x_BuildSeqMap(const CGC_Sequence& Seq, int Depth)
985 {
987 
988  if (Seq.CanGetSeq_id()) {
989  int IdCount = 0;
990  CTypeConstIterator<CSeq_id> IdIter(Seq);
991  for ( ; IdIter; ++IdIter) {
992  if (IdIter->Equals(Seq.GetSeq_id())) {
993  IdCount++;
994  }
995  }
996  if (IdCount <= 2) {
997  x_AddSeqToMap(Seq.GetSeq_id(), ConstRef(&Seq));
998  }
999  }
1000 
1001  if (Seq.CanGetSeq_id_synonyms()) {
1002  CConstRef<CGC_Sequence> SeqRef(&Seq);
1004  const CGC_TypedSeqId::E_Choice syn_type = (*it)->Which();
1006  if (seq_id_alias.NotNull()) {
1007  if (seq_id_alias->IsSetPublic()) {
1008  x_AddSeqToMap(seq_id_alias->GetPublic(), SeqRef);
1009  }
1010  if (seq_id_alias->IsSetGpipe()) {
1011  x_AddSeqToMap(seq_id_alias->GetGpipe(), SeqRef);
1012  }
1013  if (seq_id_alias->IsSetGi()) {
1014  x_AddSeqToMap(seq_id_alias->GetGi(), SeqRef);
1015  }
1016  }
1017  else if (syn_type == CGC_TypedSeqId::e_External) {
1018  x_AddSeqToMap((*it)->GetExternal().GetId(), SeqRef);
1019  }
1020  else if (syn_type == CGC_TypedSeqId::e_Private) {
1021  x_AddSeqToMap((*it)->GetPrivate(), SeqRef);
1022  }
1023  }
1024  }
1025 
1026  // child sequences
1027  if (Seq.CanGetSequences()) {
1028  ITERATE (CGC_Sequence::TSequences, TagIter, Seq.GetSequences()) {
1029  if (!(*TagIter)->CanGetSeqs()) {
1030  continue;
1031  }
1032  ITERATE (CGC_TaggedSequences::TSeqs, SeqIter, (*TagIter)->GetSeqs()) {
1033  x_BuildSeqMap(**SeqIter, Depth+1);
1034  }
1035  }
1036  }
1037 }
1038 
1039 void
1041 {
1042  if (assm.IsSetMols()) {
1043  ITERATE (CGC_AssemblyUnit::TMols, iter, assm.GetMols()) {
1044  const CGC_Replicon::TSequence& s = (*iter)->GetSequence();
1045  if (s.IsSingle()) {
1046  x_BuildSeqMap(s.GetSingle());
1047  }
1048  else {
1050  x_BuildSeqMap(**it);
1051  }
1052  }
1053  }
1054  }
1056  //if( (*TagIter)->GetState() != CGC_TaggedSequences::eState_placed ||
1057  // !(*TagIter)->CanGetSeqs())
1058  // continue;
1059  if (!(*TagIter)->CanGetSeqs()) {
1060  continue;
1061  }
1062  ITERATE (CGC_TaggedSequences::TSeqs, SeqIter, (*TagIter)->GetSeqs()) {
1063  x_BuildSeqMap(**SeqIter);
1064  }
1065  }
1066  /*
1067  if (assm.IsSetUnplaced()) {
1068  ITERATE (CGC_AssemblyUnit::TUnplaced, it, assm.GetUnplaced()) {
1069  x_BuildSeqMap(**it);
1070  }
1071  }*/
1072 }
1073 
1074 void
1076 {
1077  if (assm.IsUnit()) {
1078  x_BuildSeqMap(assm.GetUnit());
1079  }
1080  else if (assm.IsAssembly_set()) {
1082  if (assm.GetAssembly_set().IsSetMore_assemblies()) {
1084  iter,
1086  ) {
1087  x_BuildSeqMap(**iter);
1088  }
1089  }
1090  }
1091 }
1092 
1093 
1094 bool
1095 CGencollIdMapper::x_DoesSeqContainSyn(const objects::CGC_Sequence& Seq, const objects::CSeq_id& Id) const
1096 {
1097  ITERATE (CGC_Sequence::TSeq_id_synonyms, it, Seq.GetSeq_id_synonyms()) {
1098  CTypeConstIterator<CSeq_id> syn_id_iter(**it);
1099  for( ; syn_id_iter; ++syn_id_iter) {
1100  if ( syn_id_iter->Equals(Id)) {
1101  return true;
1102  }
1103  }
1104  }
1105  return false;
1106 }
1107 
1110  const SIdSpec& Spec
1111  ) const
1112 {
1113  if (Spec.Primary && Seq.CanGetSeq_id()) {
1114  return ConstRef(&Seq.GetSeq_id());
1115  }
1116  if (Seq.CanGetSeq_id_synonyms()) {
1118  const CGC_TypedSeqId::E_Choice syn_type = (*it)->Which();
1119  if (syn_type != Spec.TypedChoice) {
1120  continue;
1121  }
1123  if (seq_id_alias.NotNull()) {
1124  if (seq_id_alias->IsSetPublic() && Spec.Alias == CGC_SeqIdAlias::e_Public) {
1125  return ConstRef(&seq_id_alias->GetPublic());
1126  }
1127  if (seq_id_alias->IsSetGpipe() && Spec.Alias == CGC_SeqIdAlias::e_Gpipe) {
1128  return ConstRef(&seq_id_alias->GetGpipe());
1129  }
1130  if (seq_id_alias->IsSetGi() && Spec.Alias == CGC_SeqIdAlias::e_Gi) {
1131  return ConstRef(&seq_id_alias->GetGi());
1132  }
1133  }
1134  else if (syn_type == CGC_TypedSeqId::e_External) {
1135  const CGC_External_Seqid& ExternalId = (*it)->GetExternal();
1136  if (ExternalId.GetExternal() == Spec.External) {
1137  if (Spec.Pattern.empty()) {
1138  return ConstRef(&ExternalId.GetId());
1139  }
1140  CRef<CSeq_id> NewId(new CSeq_id());
1141  NewId->SetLocal().SetStr(
1142  NStr::Replace(Spec.Pattern,
1143  DELIM,
1144  ExternalId.GetId().GetSeqIdString()
1145  )
1146  );
1147  //NewId->SetLocal().SetStr() = Spec.Pattern + ExternalId.GetId().GetSeqIdString();
1148  return NewId;
1149  }
1150  }
1151  else if (syn_type == CGC_TypedSeqId::e_Private) {
1152  const CSeq_id& Private = (*it)->GetPrivate();
1153  if (Spec.Pattern.empty()) {
1154  return ConstRef(&Private);
1155  }
1156  CRef<CSeq_id> NewId(new CSeq_id());
1157  NewId->SetLocal().SetStr(NStr::Replace(Spec.Pattern, DELIM, Private.GetSeqIdString()));
1158  //NewId->SetLocal().SetStr() = Spec.Pattern + Private.GetSeqIdString();
1159  return NewId;
1160  }
1161  }
1162  }
1163 
1164 
1165  return CConstRef<CSeq_id>();
1166 }
1167 
1168 int
1170  const SIdSpec& Spec,
1171  const int Level
1172  ) const
1173 {
1174  if (Level == m_MaxSequenceDepth) {
1175  return e_No;
1176  }
1177  //int SeqRole = x_GetRole(Seq);
1178  bool HasId = false;
1179 
1180  if (Spec.Primary && Seq.CanGetSeq_id()) {
1181  HasId = true;
1182  }
1183  if (!Spec.Primary && Seq.CanGetSeq_id_synonyms()) {
1185  const CGC_TypedSeqId::E_Choice syn_type = (*it)->Which();
1186  if (syn_type != Spec.TypedChoice) {
1187  continue;
1188  }
1189 
1190  const bool alias_is_public = (Spec.Alias == CGC_SeqIdAlias::e_Public);
1191  const bool alias_is_gpipe = (Spec.Alias == CGC_SeqIdAlias::e_Gpipe);
1192  const bool alias_is_gi = (Spec.Alias == CGC_SeqIdAlias::e_Gi);
1193 
1195  if (seq_id_alias.NotNull()) {
1196  if ((seq_id_alias->CanGetPublic() && alias_is_public) ||
1197  (seq_id_alias->CanGetGpipe() && alias_is_gpipe) ||
1198  (seq_id_alias->CanGetGi() && alias_is_gi)
1199  ) {
1200  HasId = true;
1201  }
1202  }
1203  else if (syn_type == CGC_TypedSeqId::e_External) {
1204  if ((*it)->GetExternal().GetExternal() == Spec.External) {
1205  HasId = true;
1206  }
1207  }
1208  else if (syn_type == CGC_TypedSeqId::e_Private) {
1209  HasId = true;
1210  }
1211  }
1212  }
1213  if (HasId) {
1214  if(Spec.Top) {
1215  if(Spec.Top == Seq.HasRole(eGC_SequenceRole_top_level))
1216  return e_Yes;
1217  } else {
1218  if (Spec.Role == SIdSpec::e_Role_NotSet || Seq.HasRole(Spec.Role)) {
1219  // Has the ID match, and the Role either doesn't matter, or is matched
1220  return e_Yes;
1221  }
1222  }
1223  }
1226  CConstRef<CGC_Sequence> ParentSeq = Seq.GetParent();
1227  if (ParentSeq.NotNull()) {
1228  if (((Spec.Role != SIdSpec::e_Role_NotSet || Spec.Top) &&
1229  (Spec.Role >= eGC_SequenceRole_top_level || Spec.Role <= x_GetRole(*ParentSeq)))
1230  ) {
1231  const int Parent = x_CanSeqMeetSpec(*ParentSeq, Spec, Level + 1);
1232  if ((Parent == e_Yes || Parent == e_Up)) {
1233  return e_Up;
1234  }
1235  }
1236  }
1237  }
1238  if (Seq.CanGetSequences() ) {
1239  ITERATE (CGC_Sequence::TSequences, TagIter, Seq.GetSequences()) {
1240  if ((*TagIter)->GetState() != CGC_TaggedSequences::eState_placed) {
1241  continue;
1242  }
1243  ITERATE (CGC_TaggedSequences::TSeqs, SeqIter, (*TagIter)->GetSeqs()) {
1244  const int Child = x_CanSeqMeetSpec(**SeqIter, Spec, Level + 1);
1245  if (Child == e_Yes || Child == e_Down) {
1246  return e_Down;
1247  }
1248  }
1249  }
1250  }
1251  return e_No;
1252 }
1253 
1254 bool
1256  const CGC_Sequence& Seq,
1257  SIdSpec& Spec
1258  ) const
1259 {
1260  Spec.Primary = false;
1264  Spec.Top = false;
1265  Spec.External.clear();
1266  Spec.Pattern.clear();
1267 
1268  if(Id.Equals(Seq.GetSeq_id()))
1269  Spec.Primary = true;
1270 
1271  if (Seq.CanGetRoles()) {
1272  Spec.Role = x_GetRole(Seq);
1274  }
1275  // Loop over the IDs, find which matches the given ID
1276  if (Seq.CanGetSeq_id_synonyms()) {
1278  const CGC_TypedSeqId::E_Choice syn_type = (*it)->Which();
1280  if (seq_id_alias.NotNull()) {
1281  if (seq_id_alias->IsSetPublic() && seq_id_alias->GetPublic().Equals(Id)) {
1282  Spec.TypedChoice = syn_type;
1284  return true;
1285  }
1286  if (seq_id_alias->IsSetGpipe() && seq_id_alias->GetGpipe().Equals(Id)) {
1287  Spec.TypedChoice = syn_type;
1289  return true;
1290  }
1291  if (seq_id_alias->IsSetGi() && seq_id_alias->GetGi().Equals(Id)) {
1292  Spec.TypedChoice = syn_type;
1293  Spec.Alias = CGC_SeqIdAlias::e_Gi;
1294  return true;
1295  }
1296  }
1297  else if (syn_type == CGC_TypedSeqId::e_External) {
1298  const CGC_External_Seqid& ExternalId = (*it)->GetExternal();
1299  if (ExternalId.GetId().Equals(Id)) {
1301  Spec.External = ExternalId.GetExternal();
1302  return true;
1303  }
1304  }
1305  else if (syn_type == CGC_TypedSeqId::e_Private &&
1306  (*it)->GetPrivate().Equals(Id)
1307  ) {
1309  //return true;
1310  //Externals are better then Private.
1311  //But private is in the list first.
1312  //So continue the list for Privates and let External overwrite it, if it matches
1313  }
1314  }
1315  }
1317  // was found as a skipped-over Private
1318  return true;
1319  }
1320 
1321  // If we didn't find it normally, try again, looking for Pattern matches
1322  if (Seq.CanGetSeq_id_synonyms()) {
1324  const CGC_TypedSeqId::E_Choice syn_type = (*it)->Which();
1325  switch (syn_type) {
1327  {
1328  const CGC_External_Seqid& ExternalId = (*it)->GetExternal();
1329  if (!NStr::Equal(ExternalId.GetExternal(), CHROMO_EXT)) {
1330  continue;
1331  }
1332  const size_t Start(
1334  ExternalId.GetId().GetSeqIdString()
1335  )
1336  );
1337  if (Start != NPOS) {
1338  Spec.TypedChoice = syn_type;
1339  Spec.External = ExternalId.GetExternal();
1340  //Spec.Pattern = NStr::Replace(Id.GetSeqIdString(), Private.GetSeqIdString(), DELIM);
1341  Spec.Pattern = Id.GetSeqIdString().substr(0, Start) + DELIM;
1342  return true;
1343  }
1344  break;
1345  }
1347  {
1348  const CSeq_id& Private = (*it)->GetPrivate();
1349  const size_t Start(
1351  Private.GetSeqIdString()
1352  )
1353  );
1354  if (Start != NPOS) {
1355  Spec.TypedChoice = syn_type;
1356  //Spec.Pattern = NStr::Replace(Id.GetSeqIdString(), Private.GetSeqIdString(), DELIM);
1357  Spec.Pattern = Id.GetSeqIdString().substr(0, Start) + DELIM;
1358  return true;
1359  }
1360  break;
1361  }
1362  default:
1363  break;
1364  }
1365  }
1366  }
1367  return false;
1368 }
1369 
1371 CGencollIdMapper::x_FindParentSequence(const objects::CSeq_id& Id,
1372  const objects::CGC_Assembly& Assembly,
1373  const int Depth
1374  ) const
1375 {
1376  CConstRef<CGC_Sequence> Result;
1377  const CSeq_id_Handle IdH = CSeq_id_Handle::GetHandle(Id);
1379  if (Found != m_ChildToParentMap.end()) {
1380  return Found->second;
1381  }
1382  if (Depth > 5) {
1383  LOG_POST(Warning << "x_FindParentSequence: Depth Bounce " << Id.AsFastaString());
1384  return Result;
1385  }
1386 
1387  if (Assembly.IsAssembly_set()) {
1388  const CGC_AssemblySet& AssemblySet = Assembly.GetAssembly_set();
1389  if (AssemblySet.CanGetPrimary_assembly()) {
1390  Result = x_FindParentSequence(Id, AssemblySet.GetPrimary_assembly(), Depth + 1);
1391  if (Result.NotNull()) {
1392  return Result;
1393  }
1394  }
1395  if (AssemblySet.CanGetMore_assemblies()) {
1396  ITERATE (CGC_AssemblySet::TMore_assemblies, AssemIter, AssemblySet.GetMore_assemblies()) {
1397  Result = x_FindParentSequence(Id, **AssemIter, Depth + 1);
1398  if (Result.NotNull()) {
1399  return Result;
1400  }
1401  }
1402  }
1403  }
1404  else if (Assembly.IsUnit()) {
1405  const CGC_AssemblyUnit& AsmUnit = Assembly.GetUnit();
1406  if (AsmUnit.CanGetMols()) {
1407  ITERATE (CGC_AssemblyUnit::TMols, MolIter, AsmUnit.GetMols()) {
1408  if ((*MolIter)->GetSequence().IsSingle()) {
1409  const CGC_Sequence& Parent = (*MolIter)->GetSequence().GetSingle();
1410  if (x_IsParentSequence(Id, Parent)) {
1411  return ConstRef(&Parent);
1412  } // end IsParent if
1413  } // end Seq Single
1414  } // end Mols Loop
1415  } // end Mols
1416  } // end AsmUnit
1417  return Result;
1418 }
1419 
1420 bool
1422  const CGC_Sequence& Parent
1423  ) const
1424 {
1425  if (!Parent.CanGetSequences()) {
1426  return false;
1427  }
1428  ITERATE (CGC_Sequence::TSequences, ChildIter, Parent.GetSequences()) {
1429  if ((*ChildIter)->GetState() != CGC_TaggedSequences::eState_placed ||
1430  !(*ChildIter)->CanGetSeqs()
1431  ) {
1432  continue;
1433  }
1434  ITERATE (CGC_TaggedSequences::TSeqs, SeqIter, (*ChildIter)->GetSeqs()) {
1435  if ((*SeqIter)->GetSeq_id().Equals(Id)) {
1436  return true;
1437  }
1438  ITERATE (CGC_Sequence::TSeq_id_synonyms, SynIter, (*SeqIter)->GetSeq_id_synonyms()) {
1439  const CGC_TypedSeqId& TypedId = **SynIter;
1440  CTypeConstIterator<CSeq_id> IdIter(TypedId);
1441  while (IdIter) {
1442  if (IdIter->Equals(Id)) {
1443  return true;
1444  }
1445  ++IdIter;
1446  }
1447  //if(TypedId.IsRefseq() && TypedId.GetRefseq().CanGetPublic()) {
1448  // if (TypedId.GetRefseq().GetPublic().Equals(Id)) {
1449  // return true;
1450  // }
1451  //}
1452  }
1453  }
1454  }
1455  return false;
1456 }
1457 
1460 {
1461  if (Id.IsGi() && Id.GetGi() > GI_CONST(1000)) {
1462  return CConstRef<CGC_Sequence>();
1463  }
1465  return CConstRef<CGC_Sequence>();
1466  }
1467 
1468  const string IdStr = Id.GetSeqIdString(true);
1469  if(IdStr.find("random") != NPOS)
1470  return CConstRef<CGC_Sequence>();
1471  if(IdStr.find("decoy") != NPOS)
1472  return CConstRef<CGC_Sequence>();
1473 
1474 
1476  ITERATE (vector<string>, ChromoIter, m_Chromosomes) {
1477  bool IsNumeric = s_IsNumericString(*ChromoIter);
1478 
1479  if (NStr::Find(IdStr, *ChromoIter) != NPOS) {
1480  size_t Start = NStr::Find(IdStr, *ChromoIter);
1481  size_t End = Start + ChromoIter->length()-1;
1482  if(IsNumeric &&
1483  ((Start > 0 && isdigit(IdStr[Start-1])) ||
1484  (End+1 < IdStr.length() && isdigit(IdStr[End+1])) )) {
1485  // Matching region is in a run of digits, and not
1486  // the entire run of digits. Does not count.
1487  continue;
1488  }
1489 
1490  if(IsNumeric && s_HasMoreDigits(IdStr, *ChromoIter)) {
1491  continue;
1492  }
1493 
1494  CRef<CSeq_id> Temp(new CSeq_id());
1495  Temp->SetLocal().SetStr() = *ChromoIter;
1496  // If we have a pattern, double check it.
1497  /*if (!Spec.Pattern.empty() &&
1498  Id.Equals(*x_ApplyPatternToId(Temp, Spec))) {
1499  CSeq_id_Handle Idh = CSeq_id_Handle::GetHandle(*Temp);
1500  Found = m_IdToSeqMap.find(Idh);
1501  break;
1502  }
1503  // If we have no pattern, just trust the string.find()
1504  else if (Spec.Pattern.empty())*/ {
1506  Found = m_IdToSeqMap.find(Idh);
1507  break;
1508  }
1509  }
1510  }
1511  if (Found != m_IdToSeqMap.end()) {
1512  return Found->second;
1513  }
1514  return CConstRef<CGC_Sequence>();
1515 }
1516 
1519  const CGC_Sequence& Seq,
1520  const SIdSpec& Spec
1521  ) const
1522 {
1523  //if(!x_CanSeqMeetSpec(Seq, Spec))
1524  // return CRef<CSeq_loc>();
1525  CConstRef<CSeq_id> DestId = x_GetIdFromSeqAndSpec(Seq, Spec);
1526  if (DestId.IsNull()) {
1527  return CRef<CSeq_loc>();
1528  }
1529 
1530  CRef<CSeq_loc> Result(new CSeq_loc());
1531  Result->Assign(SourceLoc);
1532 
1533  CTypeIterator<CSeq_id> IdIter(*Result);
1534  for ( ; IdIter; ++IdIter) {
1535  IdIter->Assign(*DestId);
1536  }
1537  return Result;
1538 }
1539 
1540 bool
1542 {
1543  ITERATE (CBioseq::TId, IdIter, Bioseq.GetId()) {
1544  CTypeConstIterator<CSeq_id> PartIter(Bioseq.GetInst());
1545  for ( ; PartIter; ++PartIter) {
1546  if ((*IdIter)->Equals(*PartIter)) {
1547  return true;
1548  }
1549  }
1550  }
1551  return false;
1552 }
1553 
1556  const CGC_Sequence& Seq,
1557  const SIdSpec& Spec
1558  ) const
1559 {
1560  if(m_UpMapper.IsNull()) {
1562  }
1563 
1564  if(x_DoesSeqContainSyn(Seq, *(SourceLoc.GetId()) )) {
1565  CRef<CSeq_loc> Result(new CSeq_loc);
1566  Result->Assign(SourceLoc);
1567  return Result;
1568  }
1569 
1570  CRef<CSeq_loc> Result;
1571  Result = m_UpMapper->Map(SourceLoc);
1572  if(!Result.IsNull() && !Result->IsNull()) {
1573  if(Result->Equals(SourceLoc))
1574  return Result;
1575  Result = Map(*Result, Spec);
1576  }
1577  return Result;
1578 }
1579 
1582  const CGC_Sequence& Seq,
1583  const SIdSpec& Spec
1584  ) const
1585 {
1588  }
1589 
1590  CRef<CSeq_loc> Result;
1591  Result = m_DownMapper_Shallow->Map(SourceLoc);
1592  if(Result.IsNull() || Result->IsNull()) {
1593  Result = m_DownMapper_Deep->Map(SourceLoc);
1594  }
1595  if(!Result.IsNull() && !Result->IsNull()) {
1596  if(Result->Equals(SourceLoc)) {
1597  Result = m_DownMapper_Deep->Map(SourceLoc);
1598  if(!Result.IsNull() && !Result->IsNull()) {
1599  if(Result->Equals(SourceLoc)) {
1600  return CRef<CSeq_loc>();
1601  }
1602  Result = Map(*Result, Spec);
1603  }
1604  return Result;
1605  }
1606  Result = Map(*Result, Spec);
1607  }
1608  return Result;
1609 }
1610 
1613 {
1614  if (Loc.IsMix()) {
1615  E_Gap Result = e_None;
1616  ITERATE (CSeq_loc_mix::Tdata, LocIter, Loc.GetMix().Get()) {
1617  E_Gap Curr = IsLocInAGap(**LocIter);
1618  Result = x_Merge_E_Gaps(Result, Curr);
1619  }
1620  return Result;
1621  }
1622  if (Loc.IsPacked_int()) {
1623  E_Gap Result = e_None;
1624  ITERATE (CPacked_seqint::Tdata, IntIter, Loc.GetPacked_int().Get()) {
1625  E_Gap Curr = x_IsLoc_Int_InAGap(**IntIter);
1626  Result = x_Merge_E_Gaps(Result, Curr);
1627  }
1628  return Result;
1629  }
1630  if (Loc.IsInt()) {
1631  return x_IsLoc_Int_InAGap(Loc.GetInt());
1632  }
1633  if (Loc.IsPnt()) {
1634  CSeq_interval Int;
1635  Int.SetId().Assign(Loc.GetPnt().GetId());
1636  Int.SetFrom(Loc.GetPnt().GetPoint());
1637  Int.SetTo(Loc.GetPnt().GetPoint());
1638  return x_IsLoc_Int_InAGap(Int);
1639  }
1640  return e_None;
1641 }
1642 
1645 {
1646  return m_Assembly;
1647 }
1648 
1651 {
1652  CRange<TSeqPos> LocRange;
1653  LocRange.SetFrom(Int.GetFrom());
1654  LocRange.SetTo(Int.GetTo());
1655 
1658 
1659  if (Found != m_IdToSeqMap.end()) {
1660  CConstRef<CGC_Sequence> Seq = Found->second;
1661  if (!Seq->CanGetStructure()) {
1662  return e_None;
1663  }
1664 
1665  const CDelta_ext& DeltaExt = Seq->GetStructure();
1666  TSeqPos Start = 0;
1667  ITERATE (CDelta_ext::Tdata, SeqIter, DeltaExt.Get()) {
1668  const CDelta_seq& DeltaSeq = **SeqIter;
1669  if (DeltaSeq.IsLoc()) {
1670  Start += DeltaSeq.GetLoc().GetInt().GetLength();
1671  continue;
1672  }
1673  if (DeltaSeq.IsLiteral()) {
1674  CRange<TSeqPos> GapRange(
1675  Start,
1676  Start + DeltaSeq.GetLiteral().GetLength() - 1
1677  );
1678  Start += DeltaSeq.GetLiteral().GetLength();
1679  CRange<TSeqPos> Intersect = LocRange.IntersectionWith(GapRange);
1680  if (Intersect.Empty()) {
1681  continue;
1682  }
1683  if (Intersect == GapRange) {
1684  return e_Spans;
1685  }
1686  if (Intersect == LocRange) {
1687  return e_Contained;
1688  }
1689  return e_Overlaps;
1690  }
1691  }
1692  CRange<TSeqPos> ExtendRange(Start, numeric_limits<TSeqPos>::max());
1693  CRange<TSeqPos> Intersect = LocRange.IntersectionWith(ExtendRange);
1694  if (Intersect == LocRange) {
1695  return e_Contained;
1696  }
1697  if (!Intersect.Empty()) {
1698  return e_Overlaps;
1699  }
1700  }
1701 
1702  return e_None;
1703 }
1704 
1706 CGencollIdMapper::x_Merge_E_Gaps(const E_Gap First, const E_Gap Second) const
1707 {
1708  if (First == e_None) {
1709  return Second;
1710  }
1711  if (First != Second) {
1712  return e_Complicated;
1713  }
1714  // First == e_Complicated || First == Second || any other case
1715  return First;
1716 }
1717 
1718 
1719 void
1721  list< CConstRef<CSeq_id> >& Synonyms,
1722  bool NcbiOnly) const
1723 {
1724  if (m_Assembly.IsNull()) {
1725  return;
1726  }
1727 
1728  CConstRef<CSeq_id> Id(&BaseId);
1729  Id = x_NCBI34_Map_IdFix(Id);
1730 
1731 
1733  const CSeq_id_Handle Idh = CSeq_id_Handle::GetHandle(*Id);
1735  if (Found != m_IdToSeqMap.end()) {
1736  Seq = Found->second;
1737  if (Seq.NotNull()) {
1738  ITERATE (CGC_Sequence::TSeq_id_synonyms, it, Seq->GetSeq_id_synonyms()) {
1739  const CGC_TypedSeqId::E_Choice syn_type = (*it)->Which();
1740 
1742  if (seq_id_alias.NotNull()) {
1743  if (seq_id_alias->IsSetPublic()) {
1744  Synonyms.push_back(ConstRef(&seq_id_alias->GetPublic()));
1745  }
1746  if (seq_id_alias->IsSetGpipe()) {
1747  Synonyms.push_back(ConstRef(&seq_id_alias->GetGpipe()));
1748  }
1749  if (seq_id_alias->IsSetGi()) {
1750  Synonyms.push_back(ConstRef(&seq_id_alias->GetGi()));
1751  }
1752  }
1753  else if (syn_type == CGC_TypedSeqId::e_External && !NcbiOnly) {
1754  const CGC_External_Seqid& ExternalId = (*it)->GetExternal();
1755  Synonyms.push_back(ConstRef(&ExternalId.GetId()));
1756  }
1757  else if (syn_type == CGC_TypedSeqId::e_Private && !NcbiOnly) {
1758  const CSeq_id& Private = (*it)->GetPrivate();
1759  Synonyms.push_back(ConstRef(&Private));
1760  }
1761  }
1762  }
1763  }
1764 
1765  // de-dupe
1766  NON_CONST_ITERATE(list< CConstRef<CSeq_id> >, Outer, Synonyms) {
1767  list< CConstRef<CSeq_id> >::iterator Inner = Outer;
1768  ++Inner;
1769  for( ; Inner != Synonyms.end(); ) {
1770  if( (*Outer)->Equals(**Inner) ) {
1771  Inner = Synonyms.erase(Inner);
1772  } else {
1773  ++Inner;
1774  }
1775  }
1776  }
1777 
1778 }
1779 
1780 
1782  : Primary(false),
1783  TypedChoice(objects::CGC_TypedSeqId::e_not_set),
1784  Alias(objects::CGC_SeqIdAlias::e_None),
1785  External(kEmptyStr),
1786  Pattern(kEmptyStr),
1787  Role(e_Role_NotSet),
1788  Top(false)
1789 {
1790 }
1791 
1792 CGencollIdMapper::SIdSpec::operator string() const
1793 {
1794  return ToString();
1795 }
1796 
1797 bool
1799 {
1800  return !(TypedChoice < Other.TypedChoice);
1801 }
1802 
1803 bool
1805 {
1806  if (!(Primary == Other.Primary &&
1807  TypedChoice == Other.TypedChoice &&
1808  Alias == Other.Alias &&
1809  External == Other.External &&
1810  Pattern == Other.Pattern &&
1811  Role == Other.Role &&
1812  Top == Other.Top
1813  )
1814  ) {
1815  return false;
1816  }
1817  return true;
1818 }
1819 
1820 string
1822 {
1823  string Result;
1824  Result.reserve(64);
1825 
1826  if(Primary)
1827  Result += "Prim";
1828  else
1829  Result += "NotPrim";
1830  Result += ":";
1831 
1832  switch (TypedChoice) {
1833  case 0:
1834  Result += "NotSet";
1835  break;
1836  case 1:
1837  Result += "GenBank";
1838  break;
1839  case 2:
1840  Result += "RefSeq";
1841  break;
1842  case 3:
1843  Result += "Private";
1844  break;
1845  case 4:
1846  Result += "External";
1847  break;
1848  }
1849  Result += ":";
1850 
1851  switch (Alias) {
1852  case 0:
1853  Result += "NotSet";
1854  break;
1855  case 1:
1856  Result += "Public";
1857  break;
1858  case 2:
1859  Result += "Gpipe";
1860  break;
1861  case 3:
1862  Result += "Gi";
1863  break;
1864  }
1865  Result += ":";
1866 
1867  Result += External + ":" + Pattern;
1868  Result += ":";
1869 
1870  switch (Role) {
1872  Result += "CHRO";
1873  break;
1875  Result += "SCAF";
1876  break;
1878  Result += "COMP";
1879  break;
1881  Result += "TOP";
1882  break;
1883  case e_Role_NotSet:
1884  break;
1885  default:
1886  Result += NStr::IntToString(Role);
1887  }
1888 
1889  Result += ":";
1890  if(Top)
1891  Result += "TOP";
1892  else
1893  Result += "NOTOP";
1894 
1895  return Result;
1896 }
1897 
1898 bool
1900 {
1901  bool RoleTop = false;
1902  if(Guessed.Top) {
1903  RoleTop = (Top == Guessed.Top);
1904  } else {
1905  RoleTop = (Role == Guessed.Role);
1906  }
1907 
1908  if(Primary) {
1909  return (Guessed.Primary && RoleTop );
1910  } else {
1911  return (TypedChoice == Guessed.TypedChoice &&
1912  Alias == Guessed.Alias &&
1913  External == Guessed.External &&
1914  Pattern == Guessed.Pattern &&
1915  RoleTop );
1916  }
1917 
1918 
1919 }
1920 
1921 
1923 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
#define false
Definition: bool.h:36
#define bool
Definition: bool.h:34
CDelta_seq –.
Definition: Delta_seq.hpp:66
CGC_External_Seqid –.
the sequence(s) representing this molecule in the case of 2L and 2R - the molecule is represented by ...
CConstRef< CSeq_id > GetSubmitterName() const
CConstRef< CSeq_id > GetSynonymSeq_id(CGC_TypedSeqId::E_Choice type, CGC_SeqIdAlias::E_AliasTypes) const
bool HasRole(int Role) const
CGC_TaggedSequences::TState GetParentRelation() const
Access the relationship to the parent.
Definition: GC_Sequence.cpp:95
CConstRef< CGC_Sequence > GetParent() const
Access the parent sequence of this sequence.
Definition: GC_Sequence.cpp:89
CGC_TaggedSequences –.
CGC_TypedSeqId –.
bool Guess(const objects::CSeq_loc &Loc, SIdSpec &Spec) const
Definition: id_mapper.cpp:129
TSeqLocMapperRef m_UpMapper
Definition: id_mapper.hpp:250
CGencollIdMapper(CConstRef< objects::CGC_Assembly > SourceAsm)
Definition: id_mapper.cpp:112
CConstRef< objects::CSeq_id > x_ApplyPatternToId(CConstRef< objects::CSeq_id > Id, const SIdSpec &Spec) const
Definition: id_mapper.cpp:891
void x_StripPseudoSeq(objects::CGC_Sequence &Seq)
Definition: id_mapper.cpp:517
CRef< objects::CSeq_loc > x_Map_Up(const objects::CSeq_loc &SourceLoc, const objects::CGC_Sequence &Seq, const SIdSpec &Spec) const
Definition: id_mapper.cpp:1555
E_Gap x_Merge_E_Gaps(E_Gap First, E_Gap Second) const
Definition: id_mapper.cpp:1706
CConstRef< objects::CSeq_id > x_GetIdFromSeqAndSpec(const objects::CGC_Sequence &Seq, const SIdSpec &Spec) const
Definition: id_mapper.cpp:1109
int x_GetRole(const objects::CGC_Sequence &Seq) const
Definition: id_mapper.cpp:914
CConstRef< objects::CSeq_id > x_FixImperfectId(CConstRef< objects::CSeq_id > Id, const SIdSpec &Spec) const
Definition: id_mapper.cpp:805
void x_PrioritizeIds(void)
Definition: id_mapper.cpp:725
CConstRef< objects::CGC_Sequence > x_FindParentSequence(const objects::CSeq_id &Id, const objects::CGC_Assembly &Assembly, int Depth=0) const
Definition: id_mapper.cpp:1371
bool CanMeetSpec(const objects::CSeq_loc &Loc, const SIdSpec &Spec) const
Definition: id_mapper.cpp:304
bool x_DoesSeqContainSyn(const objects::CGC_Sequence &Seq, const objects::CSeq_id &Id) const
Definition: id_mapper.cpp:1095
void x_AddSeqToMap(const objects::CSeq_id &Id, CConstRef< objects::CGC_Sequence > Seq)
Definition: id_mapper.cpp:930
TSeqLocMapperRef m_DownMapper_Shallow
Definition: id_mapper.hpp:250
TAccToVerMap m_AccToVerMap
Definition: id_mapper.hpp:240
CRef< objects::CSeq_loc > x_Map_OneToOne(const objects::CSeq_loc &SourceLoc, const objects::CGC_Sequence &Seq, const SIdSpec &Spec) const
Definition: id_mapper.cpp:1518
void x_BuildSeqMap(const objects::CGC_Assembly &assm)
vector< string > m_Chromosomes
Definition: id_mapper.hpp:242
void x_RecursiveSeqFix(objects::CGC_Sequence &Seq)
Definition: id_mapper.cpp:555
bool x_NCBI34_Guess(const objects::CSeq_id &Id, SIdSpec &Spec) const
Definition: id_mapper.cpp:465
bool x_IsFuzzyIdInAssembly(const objects::CSeq_id &Id) const
Definition: id_mapper.cpp:780
CRef< objects::CSeq_loc > x_Map_Down(const objects::CSeq_loc &SourceLoc, const objects::CGC_Sequence &Seq, const SIdSpec &Spec) const
Definition: id_mapper.cpp:1581
void x_RemoveHiddenAccessions(objects::CGC_Sequence &Seq)
Definition: id_mapper.cpp:672
bool x_IsExactIdInAssembly(const objects::CSeq_id &Id) const
Definition: id_mapper.cpp:772
TSeqLocMapperRef m_DownMapper_Deep
Definition: id_mapper.hpp:250
void x_FillGpipeTopRole(objects::CGC_Sequence &Seq)
Definition: id_mapper.cpp:634
TChildToParentMap m_ChildToParentMap
Definition: id_mapper.hpp:247
bool x_IsParentSequence(const objects::CSeq_id &Id, const objects::CGC_Sequence &Parent) const
Definition: id_mapper.cpp:1421
CConstRef< objects::CSeq_id > x_NCBI34_Map_IdFix(CConstRef< objects::CSeq_id > SourceId) const
Definition: id_mapper.cpp:493
TIdToSeqMap m_IdToSeqMap
Definition: id_mapper.hpp:237
CRef< objects::CSeq_loc > Map(const objects::CSeq_loc &Loc, const SIdSpec &Spec) const
Definition: id_mapper.cpp:168
void x_Init(void)
Definition: id_mapper.cpp:354
CConstRef< objects::CGC_Sequence > x_FindChromosomeSequence(const objects::CSeq_id &Id, const SIdSpec &Spec) const
Definition: id_mapper.cpp:1459
void x_FillChromosomeIds(void)
Definition: id_mapper.cpp:696
CConstRef< objects::CGC_Assembly > GetInternalGencoll(void) const
Definition: id_mapper.cpp:1644
void x_Init_SeqLocMappers(void) const
Definition: id_mapper.cpp:437
CRef< objects::CGC_Assembly > m_Assembly
Definition: id_mapper.hpp:232
E_Gap x_IsLoc_Int_InAGap(const objects::CSeq_interval &Int) const
Definition: id_mapper.cpp:1650
void GetSynonyms(const objects::CSeq_id &BaseId, list< CConstRef< objects::CSeq_id > > &Synonyms, bool NcbiOnly=true) const
Definition: id_mapper.cpp:1720
E_Gap IsLocInAGap(const objects::CSeq_loc &Loc) const
Definition: id_mapper.cpp:1612
bool x_MakeSpecForSeq(const objects::CSeq_id &Id, const objects::CGC_Sequence &Seq, SIdSpec &Spec) const
Definition: id_mapper.cpp:1255
int x_CanSeqMeetSpec(const objects::CGC_Sequence &Seq, const SIdSpec &Spec, int Level=0) const
Definition: id_mapper.cpp:1169
TSeqPos GetLength(void) const
CSeq_loc_Mapper –.
Template class for iteration on objects of class C (non-medifiable version)
Definition: iterator.hpp:767
Template class for iteration on objects of class C.
Definition: iterator.hpp:673
Definition: svg.hpp:185
void erase(iterator pos)
Definition: map.hpp:167
const_iterator end() const
Definition: map.hpp:152
iterator_bool insert(const value_type &val)
Definition: map.hpp:165
const_iterator find(const key_type &key) const
Definition: map.hpp:153
Definition: map.hpp:338
#define A(i)
Definition: ecp_curves.c:948
#define TAX_ID_CONST(id)
Definition: ncbimisc.hpp:1112
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define ERASE_ITERATE(Type, Var, Cont)
Non-constant version with ability to erase current element, if container permits.
Definition: ncbimisc.hpp:843
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
#define GI_CONST(gi)
Definition: ncbimisc.hpp:1087
string
Definition: cgiapp.hpp:687
#define NULL
Definition: ncbistd.hpp:225
#define Handle
Definition: ncbistd.hpp:119
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:226
void Warning(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1191
virtual bool Equals(const CSerialObject &object, ESerialRecursionMode how=eRecursive) const
Check if both objects contain the same values.
static EAccessionInfo IdentifyAccession(const CTempString &accession, TParseFlags flags=fParse_AnyRaw)
Deduces information from a bare accession a la WHICH_db_accession; may report false negatives on prop...
Definition: Seq_id.cpp:1634
string GetSeqIdString(bool with_version=false) const
Return seqid string with optional version for text seqid type.
Definition: Seq_id.cpp:2144
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
Definition: Seq_id.cpp:318
CSeq_id & Set(const CTempString &the_id, TParseFlags flags=fParse_AnyRaw)
Reassign based on flat specifications; arguments interpreted as with constructors.
Definition: Seq_id.cpp:2456
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
const CTextseq_id * GetTextseq_Id(void) const
Return embedded CTextseq_id, if any.
Definition: Seq_id.cpp:169
@ eAcc_type_mask
Definition: Seq_id.hpp:222
void SetMix(TMix &v)
Definition: Seq_loc.hpp:987
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Override Assign() to incorporate cache invalidation.
Definition: Seq_loc.cpp:337
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
Definition: Seq_loc.hpp:941
void Assign(const CTypeIteratorBase &it)
Definition: iterator.hpp:551
@ eSeqMap_Down
map from a segmented bioseq to segments
@ eSeqMap_Up
map from segments to the top level bioseq
SSeqMapSelector & SetResolveCount(size_t res_cnt)
Set max depth of resolving seq-map.
Definition: seq_map_ci.hpp:151
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
Definition: ncbiobj.hpp:1401
bool NotNull(void) const THROWS_NONE
Check if pointer is not null – same effect as NotEmpty().
Definition: ncbiobj.hpp:744
CConstRef< C > ConstRef(const C *object)
Template function for conversion of const object pointer to CConstRef.
Definition: ncbiobj.hpp:2024
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:998
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool NotNull(void) const THROWS_NONE
Check if pointer is not null – same effect as NotEmpty().
Definition: ncbiobj.hpp:1410
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
Definition: ncbiobj.hpp:735
TThisType IntersectionWith(const TThisType &r) const
Definition: range.hpp:312
bool Empty(void) const
Definition: range.hpp:148
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define kEmptyStr
Definition: ncbistr.hpp:123
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5429
#define NPOS
Definition: ncbistr.hpp:133
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5083
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2887
static string & Replace(const string &src, const string &search, const string &replace, string &dst, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3310
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
Definition: ncbistr.hpp:5383
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3401
void SetFrom(TFrom value)
Assign a value to From data member.
Definition: Range_.hpp:231
void SetTo(TTo value)
Assign a value to To data member.
Definition: Range_.hpp:278
bool IsStr(void) const
Check if variant Str is selected.
Definition: Object_id_.hpp:291
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
TStr & SetStr(void)
Select the variant.
Definition: Object_id_.hpp:304
bool CanGetSequences(void) const
Check if it is safe to call GetSequences method.
bool CanGetSeq_id(void) const
Check if it is safe to call GetSeq_id method.
E_Choice
Choice variants.
bool CanGetPrimary_assembly(void) const
Check if it is safe to call GetPrimary_assembly method.
bool IsGenbank(void) const
Check if variant Genbank is selected.
list< CRef< CGC_Sequence > > TSeqs
const TId & GetId(void) const
Get the Id member data.
TGenbank & SetGenbank(void)
Select the variant.
const TUnit & GetUnit(void) const
Get the variant data.
const TStructure & GetStructure(void) const
Get the Structure member data.
bool IsSingle(void) const
Check if variant Single is selected.
bool CanGetSeq_id_synonyms(void) const
Check if it is safe to call GetSeq_id_synonyms method.
TRefseq & SetRefseq(void)
Select the variant.
void SetSeq_id(TSeq_id &value)
Assign a value to Seq_id data member.
bool IsSetMols(void) const
collections of molecules for this assembly Check if a value has been assigned to Mols data member.
TSeq_id_synonyms & SetSeq_id_synonyms(void)
Assign a value to Seq_id_synonyms data member.
list< CRef< CGC_TypedSeqId > > TSeq_id_synonyms
bool IsAssembly_set(void) const
Check if variant Assembly_set is selected.
TRoles & SetRoles(void)
Assign a value to Roles data member.
bool IsSetMore_assemblies(void) const
Check if a value has been assigned to More_assemblies data member.
const TExternal & GetExternal(void) const
Get the External member data.
void ResetGpipe(void)
Reset Gpipe data member.
const TOther_sequences & GetOther_sequences(void) const
Get the Other_sequences member data.
const TPrimary_assembly & GetPrimary_assembly(void) const
Get the Primary_assembly member data.
const TSeq_id_synonyms & GetSeq_id_synonyms(void) const
Get the Seq_id_synonyms member data.
bool CanGetRoles(void) const
Check if it is safe to call GetRoles method.
list< CRef< CGC_TaggedSequences > > TSequences
const TAssembly_set & GetAssembly_set(void) const
Get the variant data.
const TMols & GetMols(void) const
Get the Mols member data.
bool IsRefseq(void) const
Check if variant Refseq is selected.
list< CRef< CGC_Replicon > > TMols
bool CanGetMols(void) const
Check if it is safe to call GetMols method.
void ResetSeq_id(void)
Reset Seq_id data member.
void SetPublic(TPublic &value)
Assign a value to Public data member.
const TSequences & GetSequences(void) const
Get the Sequences member data.
bool IsUnit(void) const
Check if variant Unit is selected.
bool CanGetStructure(void) const
Check if it is safe to call GetStructure method.
list< CRef< CGC_Assembly > > TMore_assemblies
list< CRef< CGC_Sequence > > TSet
const TSingle & GetSingle(void) const
Get the variant data.
const TPublic & GetPublic(void) const
Get the Public member data.
const TSet & GetSet(void) const
Get the variant data.
bool CanGetMore_assemblies(void) const
Check if it is safe to call GetMore_assemblies method.
const TMore_assemblies & GetMore_assemblies(void) const
Get the More_assemblies member data.
list< int > TRoles
const TSeq_id & GetSeq_id(void) const
Get the Seq_id member data.
@ e_not_set
No variant selected.
@ eGC_SequenceRole_top_level
@ eGC_SequenceRole_component
@ eGC_SequenceRole_scaffold
@ eGC_SequenceRole_pseudo_scaffold
@ eGC_SequenceRole_chromosome
@ eGC_SequenceRole_submitter_pseudo_scaffold
@ eState_placed
exist only within a replicon. placed sequences on higher sequence
void SetTo(TTo value)
Assign a value to To data member.
TChain GetChain(void) const
Get the Chain member data.
const TPdb & GetPdb(void) const
Get the variant data.
Definition: Seq_id_.cpp:435
bool IsSetAccession(void) const
Check if a value has been assigned to Accession data member.
bool IsMix(void) const
Check if variant Mix is selected.
Definition: Seq_loc_.hpp:552
list< CRef< CSeq_interval > > Tdata
const Tdata & Get(void) const
Get the member data.
const TId & GetId(void) const
Get the Id member data.
const TPnt & GetPnt(void) const
Get the variant data.
Definition: Seq_loc_.cpp:238
TPoint GetPoint(void) const
Get the Point member data.
Definition: Seq_point_.hpp:303
void SetId(TId &value)
Assign a value to Id data member.
TFrom GetFrom(void) const
Get the From member data.
list< CRef< CSeq_loc > > Tdata
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_id_.hpp:746
bool CanGetMol(void) const
Check if it is safe to call GetMol method.
bool IsPdb(void) const
Check if variant Pdb is selected.
Definition: Seq_id_.hpp:922
const TId & GetId(void) const
Get the Id member data.
Definition: Seq_point_.hpp:390
void SetFrom(TFrom value)
Assign a value to From data member.
TGi GetGi(void) const
Get the variant data.
Definition: Seq_id_.hpp:889
const Tdata & Get(void) const
Get the member data.
TVersion GetVersion(void) const
Get the Version member data.
bool CanGetVersion(void) const
Check if it is safe to call GetVersion method.
const TMol & GetMol(void) const
Get the Mol member data.
bool IsPacked_int(void) const
Check if variant Packed_int is selected.
Definition: Seq_loc_.hpp:534
TLocal & SetLocal(void)
Select the variant.
Definition: Seq_id_.cpp:199
const TLocal & GetLocal(void) const
Get the variant data.
Definition: Seq_id_.cpp:193
bool IsLocal(void) const
Check if variant Local is selected.
Definition: Seq_id_.hpp:775
bool IsGi(void) const
Check if variant Gi is selected.
Definition: Seq_id_.hpp:883
TTo GetTo(void) const
Get the To member data.
bool IsInt(void) const
Check if variant Int is selected.
Definition: Seq_loc_.hpp:528
const TInt & GetInt(void) const
Get the variant data.
Definition: Seq_loc_.cpp:194
bool IsNull(void) const
Check if variant Null is selected.
Definition: Seq_loc_.hpp:504
bool IsSetVersion(void) const
Check if a value has been assigned to Version data member.
const TMix & GetMix(void) const
Get the variant data.
Definition: Seq_loc_.cpp:282
bool IsPnt(void) const
Check if variant Pnt is selected.
Definition: Seq_loc_.hpp:540
bool CanGetChain(void) const
Check if it is safe to call GetChain method.
const TPacked_int & GetPacked_int(void) const
Get the variant data.
Definition: Seq_loc_.cpp:216
const TAccession & GetAccession(void) const
Get the Accession member data.
@ e_not_set
No variant selected.
Definition: Seq_id_.hpp:94
const TInst & GetInst(void) const
Get the Inst member data.
Definition: Bioseq_.hpp:336
const TLiteral & GetLiteral(void) const
Get the variant data.
Definition: Delta_seq_.cpp:124
bool IsLoc(void) const
Check if variant Loc is selected.
Definition: Delta_seq_.hpp:257
const TId & GetId(void) const
Get the Id member data.
Definition: Bioseq_.hpp:290
TLength GetLength(void) const
Get the Length member data.
list< CRef< CSeq_id > > TId
Definition: Bioseq_.hpp:94
const TLoc & GetLoc(void) const
Get the variant data.
Definition: Delta_seq_.cpp:102
const Tdata & Get(void) const
Get the member data.
Definition: Delta_ext_.hpp:164
bool IsLiteral(void) const
Check if variant Literal is selected.
Definition: Delta_seq_.hpp:263
list< CRef< CDelta_seq > > Tdata
Definition: Delta_ext_.hpp:89
USING_SCOPE(objects)
CConstRef< CGC_SeqIdAlias > s_GetSeqIdAlias_GenBankRefSeq(CConstRef< CGC_TypedSeqId > tsid)
Definition: id_mapper.cpp:973
bool s_RevStrLenSort(const string &A, const string &B)
Definition: id_mapper.cpp:79
map< string, CStopWatch > TTimerMap
Definition: id_mapper.cpp:63
bool s_DoesBioseqRecurse(const CBioseq &Bioseq)
Definition: id_mapper.cpp:1541
bool s_HasMoreDigits(const string &Name, const string &Chromo)
Definition: id_mapper.cpp:106
bool s_IsNumericString(const string &A)
Definition: id_mapper.cpp:85
const string CHROMO_EXT
Definition: id_mapper.cpp:75
const string DELIM
Definition: id_mapper.cpp:74
TTimerMap TimerMap
size_t s_CountNumeric(const string &A)
Definition: id_mapper.cpp:95
map< string, CStopWatch > TTimerMap
@ e_not_set
constexpr auto sort(_Init &&init)
int isdigit(Uchar c)
Definition: ncbictype.hpp:64
T max(T x_, T y_)
T min(T x_, T y_)
bool operator==(const SIdSpec &Other) const
Definition: id_mapper.cpp:1804
bool IsSpecMet(const SIdSpec &Guessed) const
Definition: id_mapper.cpp:1899
bool operator<(const SIdSpec &Other) const
Definition: id_mapper.cpp:1798
string ToString(void) const
Definition: id_mapper.cpp:1821
Selector used in CSeqMap methods returning iterators.
Definition: seq_map_ci.hpp:113
string ToString(const wxRect &rc)
Definition: wx_utils.cpp:773
Modified on Fri Dec 08 08:19:24 2023 by modify_doxy.py rev. 669887