NCBI C++ ToolKit
GC_Assembly.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: GC_Assembly.cpp 99604 2023-04-24 15:24:30Z mozese2 $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: .......
27  *
28  * File Description:
29  * .......
30  *
31  * Remark:
32  * This code was originally generated by application DATATOOL
33  * using the following specifications:
34  * 'genome_collection.asn'.
35  */
36 
37 // standard includes
38 #include <ncbi_pch.hpp>
39 
40 // generated includes
48 
50 #include <objects/seq/Seqdesc.hpp>
55 
56 #include <serial/serial.hpp>
57 #include <serial/iterator.hpp>
58 
59 // generated classes
60 
62 
63 BEGIN_objects_SCOPE // namespace ncbi::objects::
64 
65 // constructor
67 : m_TargetSet(NULL)
68 {
69 }
70 
71 
72 // destructor
74 {
75 }
76 
77 const list<CRef<CDbtag>>& CGC_Assembly::x_GetId() const
78 {
79  if (IsAssembly_set()) return GetAssembly_set().GetId();
80  if (IsUnit()) return GetUnit().GetId();
81  NCBI_THROW(CException, eUnknown, "unhandled GC-Assembly choice");
82 }
83 
85 {
86  int release_id = 0;
87  typedef list<CRef<CDbtag>> TId;
88  ITERATE (TId, id_it, x_GetId()) {
89  if ((*id_it)->GetDb() == "GenColl" &&
90  (*id_it)->GetTag().IsId()) {
91  release_id = (*id_it)->GetTag().GetId();
92  break;
93  }
94  }
95  return release_id;
96 }
97 
98 
100 {
101  string accession;
102  typedef list<CRef<CDbtag>> TId;
103  ITERATE (TId, id_it, x_GetId()) {
104  if ((*id_it)->GetDb() == "GenColl" &&
105  (*id_it)->GetTag().IsStr()) {
106  accession = (*id_it)->GetTag().GetStr();
107  break;
108  }
109  }
110  return accession;
111 }
112 
114 {
115  const string acc = GetAccession();
116  return !acc.empty() ? acc : x_GetSubmitterId();
117 }
118 
120 {
121  string submitter_id;
122  typedef list<CRef<CDbtag>> TId;
123  ITERATE (TId, id_it, x_GetId()) {
124  if ((*id_it)->GetDb() == "submitter" &&
125  (*id_it)->GetTag().IsStr()) {
126  submitter_id = (*id_it)->GetTag().GetStr();
127  break;
128  }
129  }
130  return submitter_id;
131 }
132 
134 {
136  if (IsAssembly_set()) {
137  return GetAssembly_set().GetDesc();
138  } else if (IsUnit()) {
139  return GetUnit().GetDesc();
140  } else {
142  "assembly is neither unit not set");
143  }
144 }
145 
146 
147 string CGC_Assembly::GetName() const
148 {
150  if (IsAssembly_set()) {
151  desc.Reset(&GetAssembly_set().GetDesc());
152  } else if (IsUnit()) {
153  desc.Reset(&GetUnit().GetDesc());
154  }
155 
156  if (desc && desc->CanGetName()) {
157  return desc->GetName();
158  }
159 
160  return kEmptyStr;
161 }
162 
163 
165 {
166  if (IsAssembly_set()) {
167  return GetName();
168  } else if (IsUnit()) {
169  return GetUnit().GetDisplayName();
170  }
171 
172  return kEmptyStr;
173 }
174 
175 
177 {
178  return GetDesc().IsSetFilesafe_name()
180  : NStr::Replace(GetName(), " ", "_");
181 }
182 
183 
185 {
186  if (IsAssembly_set()) {
187  return GetFileSafeName();
188  } else if (IsUnit()) {
189  return GetUnit().GetFileSafeDisplayName();
190  }
191 
192  return kEmptyStr;
193 }
194 
195 
197 {
199  if (IsAssembly_set()) {
200  desc.Reset(&GetAssembly_set().GetDesc());
201  } else if (IsUnit()) {
202  desc.Reset(&GetUnit().GetDesc());
203  }
204 
205  TTaxId tax_id = ZERO_TAX_ID;
206  if (desc && desc->IsSetDescr()) {
207  ITERATE (CGC_AssemblyDesc::TDescr::Tdata, it, desc->GetDescr().Get()) {
208  if ((*it)->IsSource()) {
209  tax_id = (*it)->GetSource().GetOrg().GetTaxId();
210  break;
211  }
212  }
213  }
214  return tax_id;
215 }
216 
217 
219 {
221  if (IsAssembly_set()) {
222  desc.Reset(&GetAssembly_set().GetDesc());
223  } else if (IsUnit()) {
224  desc.Reset(&GetUnit().GetDesc());
225  }
226 
227  if (desc && desc->IsSetRelease_type()) {
228  return (desc->GetRelease_type() == CGC_AssemblyDesc::eRelease_type_refseq);
229  }
230  return false;
231 }
232 
233 
235 {
237  if (IsAssembly_set()) {
238  desc.Reset(&GetAssembly_set().GetDesc());
239  } else if (IsUnit()) {
240  desc.Reset(&GetUnit().GetDesc());
241  }
242 
243  if (desc && desc->IsSetRelease_type()) {
244  return (desc->GetRelease_type() == CGC_AssemblyDesc::eRelease_type_genbank);
245  }
246  return false;
247 }
248 
250 {
251  return GetName() == "non-nuclear";
252 }
253 
255 {
256  if (IsUnit()) {
257  return GetUnit().GetClass();
258  } else {
260  }
261 }
262 
263 /////////////////////////////////////////////////////////////////////////////
264 
265 /// Retrieve a list of all assembly units contained in this assembly
267 {
268  TAssemblyUnits units;
269  if (IsUnit()) {
270  units.push_back(CConstRef<CGC_AssemblyUnit>(&GetUnit()));
271  } else {
274  units.insert(units.end(), tmp.begin(), tmp.end());
275  if (GetAssembly_set().IsSetMore_assemblies()) {
277  GetAssembly_set().GetMore_assemblies()) {
278  tmp = (**it).GetAssemblyUnits();
279  units.insert(units.end(), tmp.begin(), tmp.end());
280  }
281  }
282  }
283 
284  return units;
285 }
286 
287 
288 /////////////////////////////////////////////////////////////////////////////
289 
291 {
292  TFullAssemblies assms;
293 
294  if (IsAssembly_set()) {
296  switch (set.GetSet_type()) {
298  /// each sub-assembly is its own entity and acts as its own root
299  assms.push_back
300  (CConstRef<CGC_Assembly>(&set.GetPrimary_assembly()));
301  if (set.IsSetMore_assemblies()) {
303  set.GetMore_assemblies()) {
304  assms.push_back(*it);
305  }
306  }
307  break;
308 
310  assms.push_back
311  (CConstRef<CGC_Assembly>(this));
312  break;
313 
314  default:
315  break;
316  }
317  } else {
320  ITERATE (TAssemblyUnits, it, units) {
321  CConstRef<CGC_Assembly> assm = (*it)->GetFullAssembly();
322  if (tmp.insert(assm).second) {
323  assms.push_back(assm);
324  }
325  }
326  }
327 
328  return assms;
329 }
330 
331 
332 /////////////////////////////////////////////////////////////////////////////
333 
336  const CConstRef<CGC_Sequence> &seq2) const
337  {
338  /// Prefer sequence from reference full assembly
339  if (seq1->GetFullAssembly()->IsTargetSetReference() &&
340  !seq2->GetFullAssembly()->IsTargetSetReference())
341  {
342  return true;
343  }
344  if (seq2->GetFullAssembly()->IsTargetSetReference() &&
345  !seq1->GetFullAssembly()->IsTargetSetReference())
346  {
347  return false;
348  }
349 
350  /// Prefer sequence from primary unit
351  if (seq1->GetAssemblyUnit()->IsPrimaryUnit() &&
352  !seq2->GetAssemblyUnit()->IsPrimaryUnit())
353  {
354  return true;
355  }
356  if (seq2->GetAssemblyUnit()->IsPrimaryUnit() &&
357  !seq1->GetAssemblyUnit()->IsPrimaryUnit())
358  {
359  return false;
360  }
361 
362  /// Prefer top-level sequence
363  if (seq1->HasRole(eGC_SequenceRole_top_level) &&
364  !seq2->HasRole(eGC_SequenceRole_top_level))
365  {
366  return true;
367  }
368  if (seq2->HasRole(eGC_SequenceRole_top_level) &&
369  !seq1->HasRole(eGC_SequenceRole_top_level))
370  {
371  return false;
372  }
373 
374  /// Prefer scaffold
375  if (seq1->HasRole(eGC_SequenceRole_scaffold) &&
376  !seq2->HasRole(eGC_SequenceRole_scaffold))
377  {
378  return true;
379  }
380  return false;
381  }
382 };
383 
385  EFindSeqOption find_option) const
386 {
387  if (m_SequenceMap.empty()) {
388  const_cast<CGC_Assembly&>(*this).CreateIndex();
389  }
391  if (it == m_SequenceMap.end() || it->second.size() == 0) {
392  return CConstRef<CGC_Sequence>();
393  }
394  if (it->second.size() > 1) {
395  switch (find_option) {
396  case eEnforceSingle:
398  "multiple sequences found in assembly: " +
399  id.GetSeqId()->AsFastaString());
400 
401  case eChooseBest:
402  return *min_element(it->second.begin(), it->second.end(), SBestSequence());
403 
404  default:
405  /// Arbitrarily take first one on list
406  break;
407  }
408  }
409  return it->second.front();
410 }
411 
413  TSequenceList& sequences) const
414 {
415  if (m_SequenceMap.empty()) {
416  const_cast<CGC_Assembly&>(*this).CreateIndex();
417  }
418  sequences.clear();
420  if (it != m_SequenceMap.end()) {
421  sequences = it->second;
422  }
423 }
424 
425 void CGC_Assembly::GetRepliconTypeLocRole(const CSeq_id_Handle& id, string& type, string& location, set<int>& role) const
426 {
428  Find(id, seqs);
429 
431  {
432  if(type.empty() || location.empty())
433  {
434  CConstRef<CGC_Replicon> repl((*its)->GetReplicon());
435  if(repl)
436  {
437  type = repl->GetMoleculeType();
438  location = repl->GetMoleculeLocation();
439  }
440  }
441 
443  if((*its)->HasRole(eGC_SequenceRole_scaffold )) role.insert(eGC_SequenceRole_scaffold);
445  }
446 }
447 
448 /////////////////////////////////////////////////////////////////////////////
449 
451 {
452 }
453 
455 {
456  CreateHierarchy();
457 }
458 
459 
461 {
462  //LOG_POST(Error << "CGC_Assembly::CreateHierarchy()");
463 
464  ///
465  /// generate the up-links as needed
466  ///
467  if (target_set == NULL) {
468  target_set = this;
469  }
470  m_TargetSet = target_set;
471  if (IsUnit()) {
472  x_Index(*this);
473  }
474  else if (IsAssembly_set()) {
476  switch (set.GetSet_type()) {
478  /// each sub-assembly is its own entity and acts as its own root
479  set.SetPrimary_assembly().CreateHierarchy(target_set);
480  if (set.IsSetMore_assemblies()) {
482  set.SetMore_assemblies()) {
483  (*it)->CreateHierarchy(target_set);
484  }
485  }
486  break;
487 
489  /// we are the root
490  set.SetPrimary_assembly().m_TargetSet = target_set;
491  set.SetPrimary_assembly().x_Index(*this);
492  if (set.IsSetMore_assemblies()) {
494  set.SetMore_assemblies()) {
495  (*it)->m_TargetSet = target_set;
496  (*it)->x_Index(*this);
497  }
498  }
499  break;
500 
501  default:
503  "unknown assembly set type");
504  }
505  }
506 }
507 
508 
509 //////////////////////////////////////////////////////////////////////////////
510 
512 {
513  if (m_SequenceMap.empty()) {
514  CMutexGuard LOCK(m_Mutex);
515  if (m_SequenceMap.empty()) {
516  CTypeConstIterator<CGC_Sequence> seq_it(*this);
517  for ( ; seq_it; ++seq_it) {
518  const CGC_Sequence& this_seq = *seq_it;
519  CConstRef<CGC_Replicon> repl = this_seq.GetReplicon();
520 
521  /// bizarre pattern: the sequence is a single placed sequence
522  /// with itself as the only scaffold. if this is the case,
523  /// don't index the scaffold
524  if (repl &&
525  repl->GetSequence().IsSingle() &&
526  &repl->GetSequence().GetSingle() != &this_seq) {
527  const CGC_Sequence& repl_seq =
528  repl->GetSequence().GetSingle();
529  if (repl_seq.IsSetSequences() &&
530  repl_seq.GetSequences().size() == 1 &&
531  repl_seq.GetSequences().front()->GetState() == CGC_TaggedSequences::eState_placed &&
532  repl_seq.GetSequences().front()->GetSeqs().size() == 1 &&
533  repl_seq.GetSequences().front()->GetSeqs().front() == &this_seq &&
534  repl->GetSequence().GetSingle().GetSeq_id()
535  .Match(this_seq.GetSeq_id())) {
536  continue;
537  }
538  }
539 
540  m_SequenceMap[CSeq_id_Handle::GetHandle(seq_it->GetSeq_id())]
541  .push_back(CConstRef<CGC_Sequence>(&*seq_it));
542 
543  // don't forget to index aliases
544  if (this_seq.IsSetSeq_id_synonyms()) {
545  set<CSeq_id_Handle> these_ids;
546  these_ids.insert(CSeq_id_Handle::GetHandle(seq_it->GetSeq_id()));
547 
549  this_seq.GetSeq_id_synonyms()) {
550  for (CTypeConstIterator<CSeq_id> id_it(**syn_it);
551  id_it; ++id_it) {
552  CSeq_id_Handle idh =
554  if (these_ids.insert(idh).second) {
556  .push_back(CConstRef<CGC_Sequence>(&*seq_it));
557  }
558  }
559  }
560  }
561  }
562  }
563  }
564 }
565 
566 
568 {
569  //LOG_POST(Error << "CGC_Assembly::x_Index(CGC_Assembly& root)");
570  if (IsUnit()) {
571  SetUnit().m_Assembly = &root;
572  if (GetUnit().IsSetMols()) {
574  SetUnit().SetMols()) {
575  x_Index(root, **it);
576  x_Index(SetUnit(), **it);
577  }
578  }
579 
580  if (GetUnit().IsSetOther_sequences()) {
582  SetUnit().SetOther_sequences()) {
584  (*it)->SetSeqs()) {
585  x_Index(root, **i);
586  x_Index(SetUnit(), **i);
587  x_Index(**i, (*it)->GetState());
588  }
589  }
590  }
591  }
592  else if (IsAssembly_set()) {
594  set.SetPrimary_assembly().x_Index(root);
595  if (set.IsSetMore_assemblies()) {
597  set.SetMore_assemblies()) {
598  (*it)->x_Index(root);
599  }
600  }
601  }
602 }
603 
604 
606 {
607  //LOG_POST(Error << "CGC_Assembly::x_Index(CGC_Assembly& assm, CGC_Replicon& replicon)");
608  replicon.m_Assembly = &assm;
609 
610  if (replicon.GetSequence().IsSingle()) {
611  CGC_Sequence& seq = replicon.SetSequence().SetSingle();
612  x_Index(assm, seq);
613  } else {
615  replicon.SetSequence().SetSet()) {
616  CGC_Sequence& seq = **it;
617  x_Index(assm, seq);
618  }
619  }
620 }
621 
622 
624 {
625  //LOG_POST(Error << "CGC_Assembly::x_Index(CGC_Assembly& assm, CGC_Sequence& seq)");
626  seq.m_Assembly = &assm;
627  if (seq.IsSetSequences()) {
630  (*it)->SetSeqs()) {
631  x_Index(assm, **i);
632  }
633  }
634  }
635 }
636 
637 
639 {
640  //LOG_POST(Error << "CGC_Assembly::x_Index(CGC_AssemblyUnit& unit, CGC_Replicon& replicon)");
641  replicon.m_AssemblyUnit = &unit;
642 
643  if (replicon.GetSequence().IsSingle()) {
644  CGC_Sequence& seq = replicon.SetSequence().SetSingle();
646 
647  x_Index(unit, seq);
648  x_Index(replicon, seq);
649  } else {
651  replicon.SetSequence().SetSet()) {
652  CGC_Sequence& seq = **it;
654 
655  x_Index(unit, seq);
656  x_Index(replicon, seq);
657  }
658  }
659 }
660 
661 
663 {
664  //LOG_POST(Error << "CGC_Assembly::x_Index(CGC_AssemblyUnit& unit, CGC_Sequence& seq)");
665  seq.m_AssemblyUnit = &unit;
666  if (seq.IsSetSequences()) {
669  (*it)->SetSeqs()) {
670  x_Index(unit, **i);
671  x_Index(seq, **i, (*it)->GetState());
672  }
673  }
674  }
675 }
676 
677 
679 {
680  //LOG_POST(Error << "CGC_Assembly::x_Index(CGC_Replicon& replicon, CGC_Sequence& seq)");
681  seq.m_Replicon = &replicon;
682  if (seq.IsSetSequences()) {
685  (*it)->SetSeqs()) {
686  x_Index(replicon, **i);
687  }
688  }
689  }
690 }
691 
692 
695 {
696  //LOG_POST(Error << "CGC_Assembly::x_Index(CGC_Sequence& parent, CGC_Sequence& seq, CGC_TaggedSequences::TState relation)");
697  seq.m_ParentSequence = &parent;
698  seq.m_ParentRel = relation;
699  if (seq.IsSetSequences()) {
702  (*it)->SetSeqs()) {
703  x_Index(seq, **i, (*it)->GetState());
704  }
705  }
706  }
707 }
708 
711 {
712  //LOG_POST(Error << "CGC_Assembly::x_Index(CGC_Sequence& seq, CGC_TaggedSequences::TState relation)");
713  seq.m_ParentSequence = NULL;
714  seq.m_ParentRel = relation;
715  if (seq.IsSetSequences()) {
718  (*it)->SetSeqs()) {
719  x_Index(seq, **i, (*it)->GetState());
720  }
721  }
722  }
723 }
724 
725 
726 
727 /////////////////////////////////////////////////////////////////////////////
728 ///
729 /// Molecule Extraction Routines
730 ///
731 
732 static void s_Extract(const CGC_Assembly& assm,
733  list< CConstRef<CGC_Sequence> >& molecules,
734  CGC_Assembly::ESubset subset);
735 
736 static void s_Extract(const CGC_AssemblyUnit& unit,
737  list< CConstRef<CGC_Sequence> >& molecules,
738  CGC_Assembly::ESubset subset);
739 
740 static void s_Extract(const CGC_AssemblySet& set,
741  list< CConstRef<CGC_Sequence> >& molecules,
742  CGC_Assembly::ESubset subset);
743 
744 static bool s_RoleFitsSubset(int role, CGC_Assembly::ESubset subset)
745 {
746  switch (subset) {
748  return role == eGC_SequenceRole_chromosome;
749 
751  return role == eGC_SequenceRole_scaffold;
752 
754  return role == eGC_SequenceRole_component;
755 
757  return role == eGC_SequenceRole_top_level;
758 
761 
762  default:
764  "Unexpected subset in call to CGC_Assembly::GetMolecules()");
765  }
766 }
767 
768 static void s_Extract(const CGC_AssemblyUnit& unit,
769  list< CConstRef<CGC_Sequence> >& molecules,
770  CGC_Assembly::ESubset subset)
771 {
772  bool invalid_data = false;
773  CTypeConstIterator<CGC_Sequence> sequence_it(unit);
774  size_t count = 0;
775  for ( ; sequence_it; ++sequence_it, ++count) {
776  if (sequence_it->GetSeq_id().IsGi() && !sequence_it->IsSetRoles() ) {
777  invalid_data = true;
778  }
779 
780  // Include this sequence if it has the correct role, or if
781  // all sequences are requested
782  bool fits_role = false;
783  if (subset == CGC_Assembly::eAll) {
784  fits_role = true;
785  }
786  else if (sequence_it->IsSetRoles()) {
787  ITERATE (CGC_Sequence::TRoles, it, sequence_it->GetRoles()) {
788  if (s_RoleFitsSubset(*it, subset)) {
789  fits_role = true;
790  break;
791  }
792  }
793  }
794  if (fits_role) {
795  molecules.push_back(CConstRef<CGC_Sequence>(&*sequence_it));
796  }
797  }
798 
799  if (invalid_data) {
801  "GC-Sequence.roles is not set in the current assembly; "
802  "please re-extract GC-Assembly");
803  }
804 }
805 
806 
807 static void s_Extract(const CGC_AssemblySet& set,
808  list< CConstRef<CGC_Sequence> >& molecules,
809  CGC_Assembly::ESubset subset)
810 {
811  s_Extract(set.GetPrimary_assembly(), molecules, subset);
812  if (set.IsSetMore_assemblies()) {
814  set.GetMore_assemblies()) {
815  s_Extract(**it, molecules, subset);
816  }
817  }
818 }
819 
820 
821 static void s_Extract(const CGC_AssemblySet& set,
822  vector< list< CConstRef<CGC_Sequence> > >& molecules,
823  CGC_Assembly::ESubset subset)
824 {
825  molecules.clear();
826  molecules.resize(set.IsSetMore_assemblies()
827  ? set.GetMore_assemblies().size() + 1 : 1);
828  vector< list< CConstRef<CGC_Sequence> > >::iterator unit_it = molecules.begin();
829  s_Extract(set.GetPrimary_assembly(), *unit_it++, subset);
830  if (set.IsSetMore_assemblies()) {
832  set.GetMore_assemblies()) {
833  s_Extract(**it, *unit_it++, subset);
834  }
835  }
836 }
837 
838 
839 static void s_Extract(const CGC_Assembly& assm,
840  list< CConstRef<CGC_Sequence> >& molecules,
841  CGC_Assembly::ESubset subset)
842 {
843  if (assm.IsUnit()) {
844  s_Extract(assm.GetUnit(), molecules, subset);
845  } else {
846  s_Extract(assm.GetAssembly_set(), molecules, subset);
847  }
848 }
849 
850 
851 static void s_Extract(const CGC_Assembly& assm,
852  vector< list< CConstRef<CGC_Sequence> > >& molecules,
853  CGC_Assembly::ESubset subset)
854 {
855  if (assm.IsUnit()) {
856  molecules.resize(1);
857  molecules.front().clear();
858  s_Extract(assm.GetUnit(), molecules.front(), subset);
859  } else {
860  s_Extract(assm.GetAssembly_set(), molecules, subset);
861  }
862 }
863 
864 
866  ESubset subset) const
867 {
868  s_Extract(*this, molecules, subset);
869 }
870 
871 void CGC_Assembly::GetMoleculesByUnit(vector<TSequenceList>& molecules,
872  ESubset subset) const
873 {
874  s_Extract(*this, molecules, subset);
875 }
876 
878 {
880 }
881 
883 {
884  if (IsUnit() && GetUnit().GetFullAssembly().GetPointer() != this) {
885  /// Assembly unit which is part of a multi-unit assembly
887  } else if (IsAssembly_set() && GetAssembly_set().GetSet_type() ==
889  {
891  "IsTargetSetReference() called on target set");
892  } else {
893  /// Full assembly
894  return m_TargetSet == this ||
896  == this;
897  }
898 }
899 
900 
901 END_objects_SCOPE // namespace ncbi::objects::
902 
904 
905 /* Original file checksum: lines: 57, chars: 1758, CRC32: 382c4e0c */
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
static bool s_RoleFitsSubset(int role, CGC_Assembly::ESubset subset)
static void s_Extract(const CGC_Assembly &assm, list< CConstRef< CGC_Sequence > > &molecules, CGC_Assembly::ESubset subset)
Molecule Extraction Routines.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
CConstRef –.
Definition: ncbiobj.hpp:1266
CGC_AssemblyDesc –.
string GetFileSafeDisplayName() const
Get file-safe version of full label for assmebly.
CConstRef< CGC_Assembly > GetFullAssembly() const
Access the most specific full assembly the assembly unit belongs to This is needed because assemblies...
CGC_Assembly * m_Assembly
string GetDisplayName() const
Get full display name of assembly unit.
list< CConstRef< CGC_Assembly > > TFullAssemblies
Definition: GC_Assembly.hpp:69
string GetAccession() const
Retrieve the accession for this assembly.
Definition: GC_Assembly.cpp:99
bool IsOrganelle() const
Is this a non-nuclear assembly unit?
void CreateHierarchy(CGC_Assembly *target_set=NULL)
Generate the internal up-pointers.
TTaxId GetTaxId() const
Retrieve the tax-id for this assembly.
CGC_AssemblyUnit::TClass GetUnitClass() const
If this is an assembly unit, get unit class.
string x_GetSubmitterId() const
string GetName() const
Retrieve the name of this assembly.
TFullAssemblies GetFullAssemblies() const
Retrieve a list of all full assemblies contained in this assembly Note that, if the assembly is a ful...
list< CConstRef< CGC_AssemblyUnit > > TAssemblyUnits
Definition: GC_Assembly.hpp:68
string GetBestIdentifier() const
Either accession or submitter-provided id.
string GetFileSafeName() const
Retrieve the file-safe version of assembly name, if available; othwreise default to standard name.
TAssemblyUnits GetAssemblyUnits() const
Retrieve a list of all assembly units contained in this assembly.
CGC_Assembly(void)
Definition: GC_Assembly.cpp:66
void PreWrite() const
PreWrite() / PostRead() handle events for indexing of local structures.
bool IsTargetSetReference() const
Is this assembly the reference assembly of the target set, or part of it?
bool IsGenBank() const
Is this assembly a GenBank assembly?
void GetMoleculesByUnit(vector< TSequenceList > &molecules, ESubset subset) const
Retrieve a subset of molecules separately for each unit, in the same order in which the units are ret...
void Find(const CSeq_id_Handle &id, TSequenceList &sequences) const
Find all references to a given sequence within an assembly.
void x_Index(CGC_Assembly &assm, CGC_Replicon &replicon)
indexing infrastructure
const CGC_AssemblyDesc & GetDesc() const
Retrieve the full set of assembly descriptors.
const list< CRef< CDbtag > > & x_GetId() const
Definition: GC_Assembly.cpp:77
string GetFileSafeDisplayName() const
Get file-safe version of full label for assmebly.
bool IsRefSeq() const
Is this assembly a RefSeq assembly?
~CGC_Assembly(void)
Definition: GC_Assembly.cpp:73
list< CConstRef< CGC_Sequence > > TSequenceList
Definition: GC_Assembly.hpp:67
CGC_Assembly * m_TargetSet
TSequenceIndex m_SequenceMap
void GetMolecules(TSequenceList &molecules, ESubset subset) const
Retrieve a subset of molecules.
void CreateIndex()
Generate the Seq-id index.
void GetRepliconTypeLocRole(const CSeq_id_Handle &id, string &type, string &location, set< int > &role) const
Returns replicon type, location and role.
int GetReleaseId() const
Retrieve the release id for this assembly.
Definition: GC_Assembly.cpp:84
CConstRef< CGC_Assembly > GetTargetSet() const
Access the top-level target set that this assemhly belongs to.
string GetDisplayName() const
Get full label for assmebly; if this is a unit, full assembly name followed by unit name.
@ eSubmitterPseudoScaffold
CGC_AssemblyUnit * m_AssemblyUnit
Definition: GC_Replicon.hpp:78
CGC_Assembly * m_Assembly
Definition: GC_Replicon.hpp:77
CGC_Replicon * m_Replicon
CGC_Sequence * m_ParentSequence
CConstRef< CGC_Replicon > GetReplicon() const
Access the replicon the sequence belongs to.
Definition: GC_Sequence.cpp:84
CGC_AssemblyUnit * m_AssemblyUnit
CGC_Assembly * m_Assembly
CGC_TaggedSequences::TState m_ParentRel
Template class for iteration on objects of class C (non-medifiable version)
Definition: iterator.hpp:767
const_iterator end() const
Definition: map.hpp:152
bool empty() const
Definition: map.hpp:149
const_iterator find(const key_type &key) const
Definition: map.hpp:153
Definition: set.hpp:45
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
size_type size() const
Definition: set.hpp:132
static char tmp[3200]
Definition: utf8.c:42
static const char location[]
Definition: config.c:97
#define ZERO_TAX_ID
Definition: ncbimisc.hpp:1115
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
SStrictId_Tax::TId TTaxId
Taxon id type.
Definition: ncbimisc.hpp:1048
#define NULL
Definition: ncbistd.hpp:225
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
@ eUnknown
Definition: app_popup.hpp:72
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:1439
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define kEmptyStr
Definition: ncbistr.hpp:123
static string & Replace(const string &src, const string &search, const string &replace, string &dst, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3314
const TId & GetId(void) const
Get the Id member data.
list< CRef< CGC_Sequence > > TSeqs
const TUnit & GetUnit(void) const
Get the variant data.
bool IsSingle(void) const
Check if variant Single is selected.
const TDesc & GetDesc(void) const
Get the Desc member data.
TClass GetClass(void) const
Get the Class member data.
list< CRef< CGC_TypedSeqId > > TSeq_id_synonyms
bool IsAssembly_set(void) const
Check if variant Assembly_set is selected.
const TFilesafe_name & GetFilesafe_name(void) const
Get the Filesafe_name member data.
TSequences & SetSequences(void)
Assign a value to Sequences data member.
bool IsSetFilesafe_name(void) const
Check if a value has been assigned to Filesafe_name data member.
const TDesc & GetDesc(void) const
Get the Desc member data.
const TPrimary_assembly & GetPrimary_assembly(void) const
Get the Primary_assembly member data.
void SetSequence(TSequence &value)
Assign a value to Sequence data member.
const TSeq_id_synonyms & GetSeq_id_synonyms(void) const
Get the Seq_id_synonyms member data.
list< CRef< CGC_TaggedSequences > > TSequences
const TAssembly_set & GetAssembly_set(void) const
Get the variant data.
list< CRef< CGC_Replicon > > TMols
bool IsSetSequences(void) const
placed: populated both on chromosome and scaffold levels unlocalized: populated on chromosome level C...
const TSequence & GetSequence(void) const
Get the Sequence member data.
TUnit & SetUnit(void)
Select the variant.
const TSequences & GetSequences(void) const
Get the Sequences member data.
bool IsUnit(void) const
Check if variant Unit is selected.
list< CRef< CGC_Assembly > > TMore_assemblies
list< CRef< CGC_Sequence > > TSet
TAssembly_set & SetAssembly_set(void)
Select the variant.
bool IsSetSeq_id_synonyms(void) const
Other known identifiers: Local / gpipe-satellite / genbank / refseq Check if a value has been assigne...
list< CRef< CGC_TaggedSequences > > TOther_sequences
const TId & GetId(void) const
Get the Id member data.
list< int > TRoles
const TSeq_id & GetSeq_id(void) const
Get the Seq_id member data.
@ eGC_SequenceRole_top_level
@ eGC_SequenceRole_component
@ eGC_SequenceRole_scaffold
@ eGC_SequenceRole_pseudo_scaffold
@ eGC_SequenceRole_chromosome
@ eGC_SequenceRole_submitter_pseudo_scaffold
@ eState_placed
exist only within a replicon. placed sequences on higher sequence
@ eSet_type_full_assembly
full-assembly: set of asm-units
@ eSet_type_assembly_set
set of full-assemblies stopper
list< CRef< CSeqdesc > > Tdata
Definition: Seq_descr_.hpp:91
int i
static bool GetSeqId(const T &d, set< string > &labels, const string name="", bool detect=false, bool found=false)
bool operator()(const CConstRef< CGC_Sequence > &seq1, const CConstRef< CGC_Sequence > &seq2) const
Definition: type.c:6
Modified on Sat Apr 13 11:50:00 2024 by modify_doxy.py rev. 669887