87 typedef list<CRef<CDbtag>> TId;
89 if ((*id_it)->GetDb() ==
"GenColl" &&
90 (*id_it)->GetTag().IsId()) {
91 release_id = (*id_it)->GetTag().GetId();
102 typedef list<CRef<CDbtag>> TId;
104 if ((*id_it)->GetDb() ==
"GenColl" &&
105 (*id_it)->GetTag().IsStr()) {
106 accession = (*id_it)->GetTag().GetStr();
122 typedef list<CRef<CDbtag>> TId;
124 if ((*id_it)->GetDb() ==
"submitter" &&
125 (*id_it)->GetTag().IsStr()) {
126 submitter_id = (*id_it)->GetTag().GetStr();
142 "assembly is neither unit not set");
156 if (desc && desc->CanGetName()) {
157 return desc->GetName();
206 if (desc && desc->IsSetDescr()) {
208 if ((*it)->IsSource()) {
209 tax_id = (*it)->GetSource().GetOrg().GetTaxId();
227 if (desc && desc->IsSetRelease_type()) {
243 if (desc && desc->IsSetRelease_type()) {
251 return GetName() ==
"non-nuclear";
274 units.insert(units.end(),
tmp.begin(),
tmp.end());
278 tmp = (**it).GetAssemblyUnits();
279 units.insert(units.end(),
tmp.begin(),
tmp.end());
296 switch (
set.GetSet_type()) {
301 if (
set.IsSetMore_assemblies()) {
303 set.GetMore_assemblies()) {
304 assms.push_back(*it);
322 if (
tmp.insert(assm).second) {
323 assms.push_back(assm);
339 if (seq1->GetFullAssembly()->IsTargetSetReference() &&
340 !seq2->GetFullAssembly()->IsTargetSetReference())
344 if (seq2->GetFullAssembly()->IsTargetSetReference() &&
345 !seq1->GetFullAssembly()->IsTargetSetReference())
351 if (seq1->GetAssemblyUnit()->IsPrimaryUnit() &&
352 !seq2->GetAssemblyUnit()->IsPrimaryUnit())
356 if (seq2->GetAssemblyUnit()->IsPrimaryUnit() &&
357 !seq1->GetAssemblyUnit()->IsPrimaryUnit())
394 if (it->second.size() > 1) {
395 switch (find_option) {
398 "multiple sequences found in assembly: " +
402 return *min_element(it->second.begin(), it->second.end(),
SBestSequence());
409 return it->second.front();
421 sequences = it->second;
437 type = repl->GetMoleculeType();
438 location = repl->GetMoleculeLocation();
467 if (target_set ==
NULL) {
476 switch (
set.GetSet_type()) {
479 set.SetPrimary_assembly().CreateHierarchy(target_set);
480 if (
set.IsSetMore_assemblies()) {
482 set.SetMore_assemblies()) {
483 (*it)->CreateHierarchy(target_set);
490 set.SetPrimary_assembly().m_TargetSet = target_set;
491 set.SetPrimary_assembly().x_Index(*
this);
492 if (
set.IsSetMore_assemblies()) {
494 set.SetMore_assemblies()) {
495 (*it)->m_TargetSet = target_set;
503 "unknown assembly set type");
517 for ( ; seq_it; ++seq_it) {
525 repl->GetSequence().IsSingle() &&
526 &repl->GetSequence().GetSingle() != &this_seq) {
528 repl->GetSequence().GetSingle();
532 repl_seq.
GetSequences().front()->GetSeqs().size() == 1 &&
533 repl_seq.
GetSequences().front()->GetSeqs().front() == &this_seq &&
534 repl->GetSequence().GetSingle().GetSeq_id()
554 if (these_ids.
insert(idh).second) {
580 if (
GetUnit().IsSetOther_sequences()) {
582 SetUnit().SetOther_sequences()) {
594 set.SetPrimary_assembly().x_Index(root);
595 if (
set.IsSetMore_assemblies()) {
597 set.SetMore_assemblies()) {
598 (*it)->x_Index(root);
671 x_Index(seq, **
i, (*it)->GetState());
703 x_Index(seq, **
i, (*it)->GetState());
719 x_Index(seq, **
i, (*it)->GetState());
764 "Unexpected subset in call to CGC_Assembly::GetMolecules()");
772 bool invalid_data =
false;
775 for ( ; sequence_it; ++sequence_it, ++
count) {
776 if (sequence_it->GetSeq_id().IsGi() && !sequence_it->IsSetRoles() ) {
782 bool fits_role =
false;
786 else if (sequence_it->IsSetRoles()) {
801 "GC-Sequence.roles is not set in the current assembly; "
802 "please re-extract GC-Assembly");
811 s_Extract(
set.GetPrimary_assembly(), molecules, subset);
812 if (
set.IsSetMore_assemblies()) {
814 set.GetMore_assemblies()) {
826 molecules.resize(
set.IsSetMore_assemblies()
827 ?
set.GetMore_assemblies().
size() + 1 : 1);
828 vector< list< CConstRef<CGC_Sequence> > >::iterator unit_it = molecules.begin();
829 s_Extract(
set.GetPrimary_assembly(), *unit_it++, subset);
830 if (
set.IsSetMore_assemblies()) {
832 set.GetMore_assemblies()) {
857 molecules.front().clear();
884 if (
IsUnit() &&
GetUnit().GetFullAssembly().GetPointer() !=
this) {
891 "IsTargetSetReference() called on target set");
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
static bool s_RoleFitsSubset(int role, CGC_Assembly::ESubset subset)
static void s_Extract(const CGC_Assembly &assm, list< CConstRef< CGC_Sequence > > &molecules, CGC_Assembly::ESubset subset)
Molecule Extraction Routines.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
string GetFileSafeDisplayName() const
Get file-safe version of full label for assmebly.
CConstRef< CGC_Assembly > GetFullAssembly() const
Access the most specific full assembly the assembly unit belongs to This is needed because assemblies...
CGC_Assembly * m_Assembly
string GetDisplayName() const
Get full display name of assembly unit.
list< CConstRef< CGC_Assembly > > TFullAssemblies
string GetAccession() const
Retrieve the accession for this assembly.
bool IsOrganelle() const
Is this a non-nuclear assembly unit?
void CreateHierarchy(CGC_Assembly *target_set=NULL)
Generate the internal up-pointers.
TTaxId GetTaxId() const
Retrieve the tax-id for this assembly.
CGC_AssemblyUnit::TClass GetUnitClass() const
If this is an assembly unit, get unit class.
string x_GetSubmitterId() const
string GetName() const
Retrieve the name of this assembly.
TFullAssemblies GetFullAssemblies() const
Retrieve a list of all full assemblies contained in this assembly Note that, if the assembly is a ful...
list< CConstRef< CGC_AssemblyUnit > > TAssemblyUnits
string GetBestIdentifier() const
Either accession or submitter-provided id.
string GetFileSafeName() const
Retrieve the file-safe version of assembly name, if available; othwreise default to standard name.
TAssemblyUnits GetAssemblyUnits() const
Retrieve a list of all assembly units contained in this assembly.
void PreWrite() const
PreWrite() / PostRead() handle events for indexing of local structures.
bool IsTargetSetReference() const
Is this assembly the reference assembly of the target set, or part of it?
bool IsGenBank() const
Is this assembly a GenBank assembly?
void GetMoleculesByUnit(vector< TSequenceList > &molecules, ESubset subset) const
Retrieve a subset of molecules separately for each unit, in the same order in which the units are ret...
void Find(const CSeq_id_Handle &id, TSequenceList &sequences) const
Find all references to a given sequence within an assembly.
void x_Index(CGC_Assembly &assm, CGC_Replicon &replicon)
indexing infrastructure
const CGC_AssemblyDesc & GetDesc() const
Retrieve the full set of assembly descriptors.
const list< CRef< CDbtag > > & x_GetId() const
string GetFileSafeDisplayName() const
Get file-safe version of full label for assmebly.
bool IsRefSeq() const
Is this assembly a RefSeq assembly?
list< CConstRef< CGC_Sequence > > TSequenceList
CGC_Assembly * m_TargetSet
TSequenceIndex m_SequenceMap
void GetMolecules(TSequenceList &molecules, ESubset subset) const
Retrieve a subset of molecules.
void CreateIndex()
Generate the Seq-id index.
void GetRepliconTypeLocRole(const CSeq_id_Handle &id, string &type, string &location, set< int > &role) const
Returns replicon type, location and role.
int GetReleaseId() const
Retrieve the release id for this assembly.
CConstRef< CGC_Assembly > GetTargetSet() const
Access the top-level target set that this assemhly belongs to.
string GetDisplayName() const
Get full label for assmebly; if this is a unit, full assembly name followed by unit name.
@ eSubmitterPseudoScaffold
CGC_AssemblyUnit * m_AssemblyUnit
CGC_Assembly * m_Assembly
CGC_Replicon * m_Replicon
CGC_Sequence * m_ParentSequence
CConstRef< CGC_Replicon > GetReplicon() const
Access the replicon the sequence belongs to.
CGC_AssemblyUnit * m_AssemblyUnit
CGC_Assembly * m_Assembly
CGC_TaggedSequences::TState m_ParentRel
Template class for iteration on objects of class C (non-medifiable version)
container_type::const_iterator const_iterator
const_iterator end() const
const_iterator find(const key_type &key) const
iterator_bool insert(const value_type &val)
static const char location[]
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
SStrictId_Tax::TId TTaxId
Taxon id type.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
void Reset(void)
Reset reference object.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static string & Replace(const string &src, const string &search, const string &replace, string &dst, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
const TId & GetId(void) const
Get the Id member data.
list< CRef< CGC_Sequence > > TSeqs
const TUnit & GetUnit(void) const
Get the variant data.
bool IsSingle(void) const
Check if variant Single is selected.
const TDesc & GetDesc(void) const
Get the Desc member data.
TClass GetClass(void) const
Get the Class member data.
list< CRef< CGC_TypedSeqId > > TSeq_id_synonyms
bool IsAssembly_set(void) const
Check if variant Assembly_set is selected.
const TFilesafe_name & GetFilesafe_name(void) const
Get the Filesafe_name member data.
TSequences & SetSequences(void)
Assign a value to Sequences data member.
bool IsSetFilesafe_name(void) const
Check if a value has been assigned to Filesafe_name data member.
const TDesc & GetDesc(void) const
Get the Desc member data.
const TPrimary_assembly & GetPrimary_assembly(void) const
Get the Primary_assembly member data.
void SetSequence(TSequence &value)
Assign a value to Sequence data member.
const TSeq_id_synonyms & GetSeq_id_synonyms(void) const
Get the Seq_id_synonyms member data.
list< CRef< CGC_TaggedSequences > > TSequences
const TAssembly_set & GetAssembly_set(void) const
Get the variant data.
list< CRef< CGC_Replicon > > TMols
bool IsSetSequences(void) const
placed: populated both on chromosome and scaffold levels unlocalized: populated on chromosome level C...
const TSequence & GetSequence(void) const
Get the Sequence member data.
TUnit & SetUnit(void)
Select the variant.
const TSequences & GetSequences(void) const
Get the Sequences member data.
bool IsUnit(void) const
Check if variant Unit is selected.
list< CRef< CGC_Assembly > > TMore_assemblies
list< CRef< CGC_Sequence > > TSet
TAssembly_set & SetAssembly_set(void)
Select the variant.
bool IsSetSeq_id_synonyms(void) const
Other known identifiers: Local / gpipe-satellite / genbank / refseq Check if a value has been assigne...
list< CRef< CGC_TaggedSequences > > TOther_sequences
const TId & GetId(void) const
Get the Id member data.
const TSeq_id & GetSeq_id(void) const
Get the Seq_id member data.
@ eGC_SequenceRole_top_level
@ eGC_SequenceRole_component
@ eGC_SequenceRole_scaffold
@ eGC_SequenceRole_pseudo_scaffold
@ eGC_SequenceRole_chromosome
@ eGC_SequenceRole_submitter_pseudo_scaffold
@ eState_placed
exist only within a replicon. placed sequences on higher sequence
@ eSet_type_full_assembly
full-assembly: set of asm-units
@ eSet_type_assembly_set
set of full-assemblies stopper
list< CRef< CSeqdesc > > Tdata
static bool GetSeqId(const T &d, set< string > &labels, const string name="", bool detect=false, bool found=false)
bool operator()(const CConstRef< CGC_Sequence > &seq1, const CConstRef< CGC_Sequence > &seq2) const