NCBI C++ ToolKit
add_flu_comments.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: add_flu_comments.cpp 46192 2021-02-01 17:54:36Z asztalos $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Igor Filippov
27  */
28 
29 
30 #include <ncbi_pch.hpp>
31 
32 
33 #include <objmgr/scope.hpp>
34 #include <objmgr/bioseq_ci.hpp>
35 #include <objmgr/feat_ci.hpp>
36 #include <objmgr/util/sequence.hpp>
40 
41 
44 
46 {
47  CRef<CCmdComposite> composite(new CCmdComposite("Add Flu Comments"));
48 
49  map<string, TNumStrSet> taxname_to_ids;
50  CBioseq_CI b_iter(tse, CSeq_inst::eMol_na);
51  for (; b_iter; ++b_iter)
52  {
53  string taxname;
54  FindBioSource(b_iter->GetSeq_entry_Handle(), taxname);
55  if (taxname.empty() && b_iter->GetSeq_entry_Handle().HasParentEntry())
56  FindBioSource(b_iter->GetSeq_entry_Handle().GetParentEntry(), taxname);
57 
58  string id_str = GetBestLabel(*b_iter);
59  if (!taxname.empty() && !id_str.empty())
60  taxname_to_ids[taxname].insert(id_str);
61  }
62 
63  CBioseq_CI b_iter2(tse, CSeq_inst::eMol_na);
64  for (; b_iter2; ++b_iter2)
65  {
66  string taxname;
67  FindBioSource(b_iter2->GetSeq_entry_Handle(), taxname);
68 
69  if (taxname.empty() && b_iter2->GetSeq_entry_Handle().HasParentEntry())
70  FindBioSource(b_iter2->GetSeq_entry_Handle().GetParentEntry(), taxname);
71 
72  if (!taxname.empty() && taxname_to_ids.find(taxname) != taxname_to_ids.end() && taxname_to_ids[taxname].size() > 1)
73  {
74  CRef<CSeqdesc> new_desc = CreateNewComment(taxname_to_ids[taxname], taxname);
75  composite->AddCommand(*CRef<CCmdCreateDesc>(new CCmdCreateDesc(b_iter2->GetParentEntry(), *new_desc)));
76  }
77  }
78  cmdProcessor->Execute(composite.GetPointer());
79 }
80 
81 
83 {
84  vector<string> id_vec;
85  pair<string,string> range;
86  pair<long,long> range_num;
87  for (TNumStrSet::const_iterator id = ids.begin(); id != ids.end(); ++id)
88  {
90  if (num_id == 0)
91  {
92  id_vec.push_back(*id);
93  range_num.first = 0;
94  range_num.second = 0;
95  range.first.clear();
96  range.second.clear();
97  continue;
98  }
99 
100  if (!range.first.empty())
101  {
102  if (num_id == range_num.second+1)
103  {
104  range.second = *id;
105  range_num.second = num_id;
106  continue;
107  }
108 
109  if ( !range.second.empty())
110  {
111  if (range_num.second > range_num.first+1)
112  id_vec.push_back(range.first+"-"+range.second);
113  else
114  {
115  id_vec.push_back(range.first);
116  id_vec.push_back(range.second);
117  }
118 
119  }
120  else
121  {
122  id_vec.push_back(range.first);
123  }
124  }
125  range.first = *id;
126  range.second.clear();
127  range_num.first = num_id;
128  range_num.second = num_id;
129  }
130 
131  if (!range.first.empty())
132  {
133  if ( !range.second.empty())
134  {
135  if (range_num.second > range_num.first+1)
136  id_vec.push_back(range.first+"-"+range.second);
137  else
138  {
139  id_vec.push_back(range.first);
140  id_vec.push_back(range.second);
141  }
142 
143  }
144  else
145  {
146  id_vec.push_back(range.first);
147  }
148  }
149  return NStr::Join(id_vec,", ");
150 }
151 
153 {
154  CRef<CSeqdesc> new_desc( new CSeqdesc );
155 
156  string id_size = NStr::NumericToString(ids.size());
157  string id_list = ConcatIds(ids);
158  string new_comment = "GenBank Accession Numbers "+id_list+" represent sequences from the "+id_size+" segments of "+taxname;
159  new_desc->SetComment(new_comment);
160 
161  return new_desc;
162 }
163 
165 {
166  string label;
169  return label;
170 }
171 
172 void CAddFluComments::AddBioSource(const CBioSource& biosource, string &taxname)
173 {
174  if (biosource.IsSetTaxname() && !biosource.GetTaxname().empty())
175  {
176  taxname = biosource.GetTaxname();
177  }
178 }
179 
180 void CAddFluComments::GetDesc(const CSeq_entry& se, string & taxname)
181 {
182 
184  if ((*it)->IsSource()) {
185  AddBioSource ((*it)->GetSource(), taxname);
186  }
187  }
188 
189  if (se.IsSet()) {
190  FOR_EACH_SEQDESC_ON_SEQSET (it, se) {
191  if ((*it)->IsSource()) {
192  AddBioSource ((*it)->GetSource(), taxname);
193  }
194  }
196  GetDesc (**it, taxname);
197  }
198  }
199 }
200 
201 
203 {
204  if (!tse)
205  return;
206  GetDesc (*(tse.GetCompleteSeq_entry()), taxname);
208  while (feat) {
209  AddBioSource (feat->GetData().GetBiosrc(), taxname);
210  ++feat;
211  }
212 }
213 
USING_SCOPE(objects)
string ConcatIds(const TNumStrSet &ids)
void AddBioSource(const objects::CBioSource &biosource, string &taxname)
void FindBioSource(objects::CSeq_entry_Handle tse, string &taxname)
CRef< objects::CSeqdesc > CreateNewComment(const TNumStrSet &ids, const string &taxname)
string GetBestLabel(const objects::CBioseq_Handle &bsh)
void Apply(objects::CSeq_entry_Handle tse, ICommandProccessor *cmdProcessor)
void GetDesc(const objects::CSeq_entry &se, string &taxname)
const string & GetTaxname(void) const
Definition: BioSource.cpp:340
bool IsSetTaxname(void) const
Definition: BioSource.cpp:335
CBioseq_CI –.
Definition: bioseq_ci.hpp:69
CBioseq_Handle –.
void AddCommand(IEditCommand &command)
CFeat_CI –.
Definition: feat_ci.hpp:64
CSeq_entry_Handle –.
Definition: Seq_entry.hpp:56
Undo/Redo interface for editing operations.
virtual void Execute(IEditCommand *command, wxWindow *window=0)=0
size_type size() const
Definition: map.hpp:148
const_iterator end() const
Definition: map.hpp:152
iterator_bool insert(const value_type &val)
Definition: map.hpp:165
const_iterator find(const key_type &key) const
Definition: map.hpp:153
Definition: map.hpp:338
Definition: set.hpp:45
const_iterator begin() const
Definition: set.hpp:135
size_type size() const
Definition: set.hpp:132
const_iterator end() const
Definition: set.hpp:136
parent_type::const_iterator const_iterator
Definition: set.hpp:79
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
Definition: Seq_id.cpp:2040
@ fLabel_GeneralDbIsContent
For type general, use the database name as the tag and the (text or numeric) key as the content.
Definition: Seq_id.hpp:618
@ eContent
Untagged human-readable accession or the like.
Definition: Seq_id.hpp:605
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
@ eGetId_Best
return the "best" gi (uses FindBestScore(), with CSeq_id::CalculateScore() as the score function
Definition: sequence.hpp:101
CSeq_entry_Handle GetParentEntry(void) const
Get parent Seq-entry handle.
const CSeqFeatData & GetData(void) const
CSeq_entry_Handle GetSeq_entry_Handle(void) const
Get parent Seq-entry handle.
CConstRef< CSeq_entry > GetCompleteSeq_entry(void) const
Complete and get const reference to the seq-entry.
bool HasParentEntry(void) const
Check if current seq-entry has a parent.
CSeq_entry_Handle GetParentEntry(void) const
Get parent Seq-entry handle.
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:998
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
static unsigned long StringToULong(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to unsigned long.
Definition: ncbistr.cpp:665
static string Join(const TContainer &arr, const CTempString &delim)
Join strings using the specified delimiter.
Definition: ncbistr.hpp:2697
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
@ fConvErr_NoThrow
Do not throw an exception on error.
Definition: ncbistr.hpp:285
@ fAllowLeadingSymbols
Ignore leading non-numeric characters.
Definition: ncbistr.hpp:295
static const char label[]
const TBiosrc & GetBiosrc(void) const
Get the variant data.
const TSet & GetSet(void) const
Get the variant data.
Definition: Seq_entry_.cpp:124
bool IsSet(void) const
Check if variant Set is selected.
Definition: Seq_entry_.hpp:263
TComment & SetComment(void)
Select the variant.
Definition: Seqdesc_.hpp:1065
@ eMol_na
just a nucleic acid
Definition: Seq_inst_.hpp:113
range(_Ty, _Ty) -> range< _Ty >
#define FOR_EACH_SEQENTRY_ON_SEQSET(Itr, Var)
FOR_EACH_SEQENTRY_ON_SEQSET EDIT_EACH_SEQENTRY_ON_SEQSET.
#define FOR_EACH_SEQDESC_ON_SEQSET(Itr, Var)
FOR_EACH_SEQDESC_ON_SEQSET EDIT_EACH_SEQDESC_ON_SEQSET.
#define FOR_EACH_SEQDESC_ON_SEQENTRY(Itr, Var)
FOR_EACH_SEQDESC_ON_SEQENTRY EDIT_EACH_SEQDESC_ON_SEQENTRY.
SAnnotSelector –.
Modified on Mon Apr 22 04:04:58 2024 by modify_doxy.py rev. 669887