NCBI C++ ToolKit
Bioseq_set.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: Bioseq_set.cpp 75358 2016-11-09 14:19:55Z bollin $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: .......
27  *
28  * File Description:
29  * .......
30  *
31  * Remark:
32  * This code was originally generated by application DATATOOL
33  * using specifications from the ASN data definition file
34  * 'seqset.asn'.
35  *
36  */
37 
38 // standard includes
39 #include <ncbi_pch.hpp>
40 #include <serial/serial.hpp>
41 #include <serial/iterator.hpp>
42 #include <serial/enumvalues.hpp>
43 
44 // generated includes
46 #include <objects/seq/Bioseq.hpp>
47 #include <objects/seq/Seq_annot.hpp> // to make KCC happy
48 #include <objects/seq/Seq_inst.hpp>
53 
54 // generated classes
55 
57 
58 BEGIN_objects_SCOPE // namespace ncbi::objects::
59 
60 // destructor
62 {
63 }
64 
65 
66 static bool s_is_na(const CBioseq& seq)
67 {
68  switch (seq.GetInst().GetMol()) {
71  case CSeq_inst::eMol_na:
72  return true;
73  default:
74  return false;
75  }
76 }
77 
78 
79 static bool s_has_gb(const CSeq_id& id)
80 {
81  switch (id.Which()) {
82  case CSeq_id::e_Genbank:
83  case CSeq_id::e_Embl:
84  case CSeq_id::e_Ddbj:
85  case CSeq_id::e_Other:
86  case CSeq_id::e_Tpg:
87  case CSeq_id::e_Tpe:
88  case CSeq_id::e_Tpd:
89  return true;
90  default:
91  return false;
92  }
93 }
94 
95 
96 static bool s_has_accession(const CSeq_id& id)
97 {
98  if (!id.GetTextseq_Id()) {
99  return false;
100  } else if (id.GetTextseq_Id()->IsSetAccession()) {
101  return true;
102  } else {
103  return false;
104  }
105 }
106 
108 {
109  // If no label, just return
110  if (!label) {
111  return;
112  }
113 
114  // Get type label
115  if (IsSetClass() && type != eContent) {
116  const CEnumeratedTypeValues* tv =
117  CBioseq_set::GetTypeInfo_enum_EClass();
118  const string& cn = tv->FindName(GetClass(), true);
119  *label += cn;
120 
121  if (type != eType) {
122  *label += ": ";
123  }
124  }
125 
126  if (type == eType) {
127  return;
128  }
129 
130  // Loop through CBioseqs looking for the best one to use for a label
131  bool best_is_na = false;
132  const CBioseq* best = 0;
133  const CSeq_id* best_seq_id = 0;
134  const CSeq_id* best_seq_id_with_gb = 0;
135  const CSeq_id* best_seq_id_with_accession = 0;
136  int max = 0;
137  for (CTypeConstIterator<CBioseq> si(ConstBegin(*this)); si && max < 100; ++si, ++max) {
138  bool takeit = false, is_na;
139  const CSeq_id* current_seq_id = 0;
140  const CSeq_id* current_seq_id_with_gb = 0;
141  const CSeq_id* current_seq_id_with_accession = 0;
142  is_na = s_is_na(*si);
143  for (CTypeConstIterator<CSeq_id> ii(ConstBegin(*si)); ii; ++ii) {
144  if (!current_seq_id) {
145  current_seq_id = &(*ii);
146  }
147  if (s_has_gb(*ii)) {
148  current_seq_id_with_gb = &(*ii);
149  }
150  if (s_has_accession(*ii)) {
151  current_seq_id_with_accession = &(*ii);
152  }
153  }
154 
155  if (!best) {
156  takeit = true;
157  } else {
158  bool longer = false;
159  if (si->GetInst().GetLength() > best->GetInst().GetLength()) {
160  longer = true;
161  }
162  if (best_seq_id_with_accession) {
163  if (current_seq_id_with_accession) {
164  if(longer) {
165  takeit = true;
166  }
167  }
168  } else if (current_seq_id_with_accession) {
169  takeit = true;
170  } else if (best_seq_id_with_gb) {
171  if (current_seq_id_with_gb) {
172  if (longer) {
173  takeit = true;
174  }
175  }
176  } else if (current_seq_id_with_gb) {
177  takeit = true;
178  } else if (best_is_na) {
179  if (is_na) {
180  if (longer) {
181  takeit = true;
182  }
183  }
184  } else if (is_na) {
185  takeit = true;
186  } else if (longer) {
187  takeit = true;
188  }
189  }
190 
191  if (takeit) {
192  best = &(*si);
193  best_seq_id = current_seq_id;
194  best_seq_id_with_gb = current_seq_id_with_gb;
195  best_seq_id_with_accession = current_seq_id_with_accession;
196  best_is_na = is_na;
197  }
198  }
199 
200  // Add content to label.
201  if (best_seq_id_with_accession) {
202  best_seq_id = best_seq_id_with_accession;
203  }
204  else if (best_seq_id_with_gb) {
205  best_seq_id = best_seq_id_with_gb;
206  }
207  if (!best_seq_id) {
208  *label += "(No Bioseqs)";
209  } else {
210  //CNcbiOstrstream os;
211  //os << best_seq_id->DumpAsFasta();
212  //*label += CNcbiOstrstreamToString(os);
213  *label += best_seq_id->GetSeqIdString();
214  if (this->IsSetSeq_set()) {
215  const TSeq_set& sset = this->GetSeq_set();
216  size_t len = sset.size();
217  if (len > 1) {
218  *label += " (" + NStr::SizetToString(sset.size()) + " components)";
219  } else if (len == 1) {
220  *label += " (1 component)";
221  }
222  }
223  }
224 }
225 
226 
228 {
229  if (GetClass() != eClass_nuc_prot) {
231  "CBioseq_set::GetNucFromNucProtSet() : incompatible class (" +
232  ENUM_METHOD_NAME(EClass)()->FindName(GetClass(), true) + ")");
233  }
234 
235  ITERATE (TSeq_set, it, GetSeq_set()) {
236  const CSeq_entry& se = **it;
237  if (se.IsSeq() && se.GetSeq().IsNa()) {
238  return se.GetSeq();
239  } else if (se.IsSet() &&
241  return se.GetSet().GetMasterFromSegSet();
242  }
243  }
244 
246  "CBioseq_set::GetNucFromNucProtSet() : \
247  nuc-prot set doesn't contain the nucleotide bioseq");
248 }
249 
250 
252 {
253  if (GetClass() != eClass_gen_prod_set) {
255  "CBioseq_set::GetGenomicFromGenProdSet() : incompatible class (" +
256  ENUM_METHOD_NAME(EClass)()->FindName(GetClass(), true) + ")");
257  }
258 
259  ITERATE (TSeq_set, it, GetSeq_set()) {
260  if ((*it)->IsSeq()) {
261  const CBioseq& seq = (*it)->GetSeq();
262  if (seq.GetInst().IsSetMol() &&
263  seq.GetInst().GetMol() == CSeq_inst::eMol_dna) {
264  return seq;
265  }
266  }
267  }
268 
270  "CBioseq_set::GetGenomicFromGenProdSet() : \
271  gen-prod set doesn't contain the genomic bioseq");
272 }
273 
274 
276 {
277  if (GetClass() != eClass_segset) {
279  "CBioseq_set::GetMasterFromSegSet() : incompatible class (" +
280  ENUM_METHOD_NAME(EClass)()->FindName(GetClass(), true) + ")");
281  }
282 
283  ITERATE (TSeq_set, it, GetSeq_set()) {
284  if ((*it)->IsSeq()) {
285  return (*it)->GetSeq();
286  }
287  }
288 
290  "CBioseq_set::GetMasterFromSegSet() : \
291  segset set doesn't contain the master bioseq");
292 }
293 
295 {
296  CSeq_entry* se;
297 
298  se = GetParentEntry();
299  if ( se ) {
300  se = se->GetParentEntry();
301  if ( se ) {
302  if ( se->IsSet() ) {
303  return CConstRef<CBioseq_set> (&se->GetSet());
304  }
305  }
306  }
307 
308  return CConstRef<CBioseq_set> ();
309 }
310 
311 // Implemented here to prevent CBioseq dependency on Bioseq_set
313 {
314  CSeq_entry* se;
315 
316  se = GetParentEntry();
317  if ( se ) {
318  se = se->GetParentEntry();
319  if ( se ) {
320  if ( se->IsSet() ) {
321  return CConstRef<CBioseq_set> (&se->GetSet());
322  }
323  }
324  }
325 
326  return CConstRef<CBioseq_set> ();
327 }
328 
329 
331 {
332  bool rval = false;
333  if (set_class == CBioseq_set::eClass_pop_set
334  || set_class == CBioseq_set::eClass_phy_set
335  || set_class == CBioseq_set::eClass_eco_set
336  || set_class == CBioseq_set::eClass_mut_set) {
337  rval = true;
338  }
339  return rval;
340 }
341 
342 
344 {
345  bool rval = false;
346  if (IsSetClass()) {
347  rval = NeedsDocsumTitle(GetClass());
348  }
349  return rval;
350 
351 }
352 
353 
354 END_objects_SCOPE // namespace ncbi::objects::
static bool s_has_gb(const CSeq_id &id)
Definition: Bioseq_set.cpp:79
static bool s_is_na(const CBioseq &seq)
Definition: Bioseq_set.cpp:66
static bool s_has_accession(const CSeq_id &id)
Definition: Bioseq_set.cpp:96
const CBioseq & GetNucFromNucProtSet(void) const
Definition: Bioseq_set.cpp:227
const CBioseq & GetMasterFromSegSet(void) const
Definition: Bioseq_set.cpp:275
CSeq_entry * GetParentEntry(void) const
Definition: Bioseq_set.hpp:122
const CBioseq & GetGenomicFromGenProdSet(void) const
Definition: Bioseq_set.cpp:251
CConstRef< CBioseq_set > GetParentSet(void) const
Definition: Bioseq_set.cpp:294
~CBioseq_set(void)
Definition: Bioseq_set.cpp:61
bool NeedsDocsumTitle() const
Definition: Bioseq_set.cpp:343
void GetLabel(string *label, ELabelType type) const
Definition: Bioseq_set.cpp:107
CSeq_entry * GetParentEntry(void) const
Definition: Bioseq.hpp:174
CConstRef< CBioseq_set > GetParentSet(void) const
Definition: Bioseq_set.cpp:312
bool IsNa(void) const
Definition: Bioseq.cpp:345
Definition: Seq_entry.hpp:56
CSeq_entry * GetParentEntry(void) const
Definition: Seq_entry.hpp:131
Template class for iteration on objects of class C (non-medifiable version)
Definition: iterator.hpp:767
static const char si[8][64]
Definition: des.c:146
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
const string & FindName(TEnumValueType value, bool allowBadValue) const
Find name of the enum by its numeric value.
Definition: enumerated.cpp:146
#define ENUM_METHOD_NAME(EnumName)
Definition: serialbase.hpp:994
string GetSeqIdString(bool with_version=false) const
Return seqid string with optional version for text seqid type.
Definition: Seq_id.cpp:2145
CConstBeginInfo ConstBegin(const C &obj)
Get starting point of non-modifiable object hierarchy.
Definition: iterator.hpp:1012
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
static string SizetToString(size_t value, TNumToStringFlags flags=0, int base=10)
Convert size_t to string.
Definition: ncbistr.cpp:2751
static const char label[]
@ e_Other
for historical reasons, 'other' = 'refseq'
Definition: Seq_id_.hpp:104
@ e_Tpe
Third Party Annot/Seq EMBL.
Definition: Seq_id_.hpp:111
@ e_Tpd
Third Party Annot/Seq DDBJ.
Definition: Seq_id_.hpp:112
@ e_Ddbj
DDBJ.
Definition: Seq_id_.hpp:107
@ e_Tpg
Third Party Annot/Seq Genbank.
Definition: Seq_id_.hpp:110
const TSeq & GetSeq(void) const
Get the variant data.
Definition: Seq_entry_.cpp:102
bool IsSetClass(void) const
Check if a value has been assigned to Class data member.
TClass GetClass(void) const
Get the Class member data.
const TSet & GetSet(void) const
Get the variant data.
Definition: Seq_entry_.cpp:124
bool IsSeq(void) const
Check if variant Seq is selected.
Definition: Seq_entry_.hpp:257
bool IsSetSeq_set(void) const
Check if a value has been assigned to Seq_set data member.
bool IsSet(void) const
Check if variant Set is selected.
Definition: Seq_entry_.hpp:263
const TSeq_set & GetSeq_set(void) const
Get the Seq_set member data.
list< CRef< CSeq_entry > > TSeq_set
@ eClass_pop_set
population study
@ eClass_phy_set
phylogenetic study
@ eClass_mut_set
set of mutations
@ eClass_eco_set
ecological sample study
@ eClass_nuc_prot
nuc acid and coded proteins
Definition: Bioseq_set_.hpp:99
@ eClass_gen_prod_set
genomic products, chrom+mRNA+protein
@ eClass_segset
segmented sequence + parts
const TInst & GetInst(void) const
Get the Inst member data.
Definition: Bioseq_.hpp:336
bool IsSetMol(void) const
Check if a value has been assigned to Mol data member.
Definition: Seq_inst_.hpp:593
TLength GetLength(void) const
Get the Length member data.
Definition: Seq_inst_.hpp:659
TMol GetMol(void) const
Get the Mol member data.
Definition: Seq_inst_.hpp:612
@ eMol_na
just a nucleic acid
Definition: Seq_inst_.hpp:113
int len
T max(T x_, T y_)
Definition: type.c:6
Modified on Wed Jun 19 17:03:19 2024 by modify_doxy.py rev. 669887