NCBI C++ ToolKit
autodef_source_desc.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: autodef_source_desc.cpp 93413 2021-04-09 19:12:01Z stakhovv $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Colleen Bollin
27 *
28 * File Description:
29 * Generate unique definition lines for a set of sequences using organism
30 * descriptions and feature clauses.
31 */
32 
33 #include <ncbi_pch.hpp>
35 #include <corelib/ncbimisc.hpp>
36 #include <objmgr/seqdesc_ci.hpp>
37 #include <objmgr/bioseq_ci.hpp>
38 #include <objmgr/feat_ci.hpp>
39 #include <objmgr/util/feature.hpp>
40 
42 #include <objects/seq/Seqdesc.hpp>
43 #include <objects/seq/Bioseq.hpp>
44 
45 #include <serial/iterator.hpp>
46 
49 
50 CAutoDefSourceDescription::CAutoDefSourceDescription(const CBioSource& bs, string feature_clauses) : m_BS(bs)
51 {
52  // consider feature clauses when looking for uniqueness
53  m_FeatureClauses = feature_clauses;
54 
55  if (bs.CanGetOrg() && bs.GetOrg().CanGetTaxname()) {
56  m_DescStrings.push_back (bs.GetOrg().GetTaxname());
57  }
58  if (bs.CanGetOrg() && bs.GetOrg().CanGetOrgname() && bs.GetOrg().GetOrgname().CanGetMod()) {
59  ITERATE (COrgName::TMod, modI, bs.GetOrg().GetOrgname().GetMod()) {
60  m_Modifiers.push_back (CAutoDefSourceModifierInfo(true, (*modI)->GetSubtype(), (*modI)->GetSubname()));
61  }
62  }
63  ITERATE (CBioSource::TSubtype, subSrcI, bs.GetSubtype()) {
64  m_Modifiers.push_back (CAutoDefSourceModifierInfo(false, (*subSrcI)->GetSubtype(), (*subSrcI)->GetName()));
65  }
66  std::sort (m_Modifiers.begin(), m_Modifiers.end());
67 }
68 
69 
71 {
72  // copy strings
73  ITERATE (TDescString, string_it, other->GetStrings()) {
74  m_DescStrings.push_back (*string_it);
75  }
76  // copy remaining modifier list
77  ITERATE (TModifierVector, it, other->GetModifiers()) {
78  m_Modifiers.push_back (CAutoDefSourceModifierInfo(*it));
79  }
80  // copy feature clauses
82 }
83 
84 
86 {
87 }
88 
90 {
91  return m_BS;
92 }
93 
94 
95 bool CAutoDefSourceDescription::AddQual (bool isOrgMod, int subtype, bool keepAfterSemicolon)
96 {
97  bool rval = false;
98  TModifierVector::iterator it;
99 
100  it = m_Modifiers.begin();
101  while (it != m_Modifiers.end()) {
102  if (isOrgMod) {
103  if (it->IsOrgMod() && it->GetSubtype() == subtype) {
104  string val = it->GetValue();
105  if (!keepAfterSemicolon) {
106  string::size_type end = NStr::Find(val, ";");
107  if (end != NCBI_NS_STD::string::npos) {
108  val = val.substr(0, end);
109  }
110  }
111  m_DescStrings.push_back (val);
112  it = m_Modifiers.erase(it);
113  rval = true;
114  } else {
115  ++it;
116  }
117  } else {
118  if (!it->IsOrgMod() && it->GetSubtype() == subtype) {
119  string val = it->GetValue();
120  if (!keepAfterSemicolon) {
121  string::size_type end = NStr::Find(val, ";");
122  if (end != NCBI_NS_STD::string::npos) {
123  val = val.substr(0, end);
124  }
125  }
126  m_DescStrings.push_back (val);
127  it = m_Modifiers.erase(it);
128  rval = true;
129  } else {
130  ++it;
131  }
132  }
133  }
134  return rval;
135 }
136 
137 
138 bool CAutoDefSourceDescription::RemoveQual (bool isOrgMod, int subtype)
139 {
140  bool rval = false;
141  TModifierVector::iterator it;
142 
143  it = m_Modifiers.begin();
144  while (it != m_Modifiers.end()) {
145  if (isOrgMod) {
146  if (it->IsOrgMod() && it->GetSubtype() == subtype) {
147  it = m_Modifiers.erase(it);
148  rval = true;
149  } else {
150  ++it;
151  }
152  } else {
153  if (!it->IsOrgMod() && it->GetSubtype() == subtype) {
154  it = m_Modifiers.erase(it);
155  rval = true;
156  } else {
157  ++it;
158  }
159  }
160  }
161  return rval;
162 }
163 
164 
166 {
167  unsigned int k = 0;
168  int rval = 0;
169  TDescString::const_iterator s_it, this_it;
170 
171  s_it = s.GetStrings().begin();
172  this_it = GetStrings().begin();
173  while (s_it != s.GetStrings().end()
174  && this_it != GetStrings().end()
175  && rval == 0) {
176  rval = NStr::Compare (*this_it, *s_it);
177  k++;
178  ++s_it;
179  ++this_it;
180  }
181  if (rval == 0) {
182  if (k < s.GetStrings().size()) {
183  rval = -1;
184  } else if (k < m_DescStrings.size()) {
185  rval = 1;
186  }
187  }
188  if (rval == 0) {
190  }
191  return rval;
192 }
193 
194 
196 {
197  string desc;
198  if (mod_combo) {
199  return mod_combo->GetSourceDescriptionString(m_BS);
200  } else {
201  return m_BS.GetOrg().GetTaxname();
202  }
203 }
204 
205 
207 {
208  unsigned int k;
209 
210  for (k = 0; k < modifier_list.size(); k++) {
211  bool found = false;
212  if (modifier_list[k].IsOrgMod()) {
215  if ((*modI)->GetSubtype() == modifier_list[k].GetOrgModType()) {
216  found = true;
217  modifier_list[k].ValueFound((*modI)->GetSubname() );
218  }
219  }
220  }
221  } else {
222  // get subsource modifiers
223  if (m_BS.CanGetSubtype()) {
225  if ((*subSrcI)->GetSubtype() == modifier_list[k].GetSubSourceType()) {
226  found = true;
227  modifier_list[k].ValueFound((*subSrcI)->GetName());
228  }
229  }
230  }
231  }
232  if (!found) {
233  modifier_list[k].ValueFound("");
234  }
235  }
236 }
237 
238 // tricky HIV records have an isolate and a clone
240 {
241  string tax_name = m_BS.GetOrg().GetTaxname();
242  if (!NStr::Equal(tax_name, "HIV-1") && !NStr::Equal(tax_name, "HIV-2")) {
243  return false;
244  }
245 
246  bool found = false;
247 
248  if (m_BS.CanGetSubtype()) {
250  if ((*subSrcI)->GetSubtype() == CSubSource::eSubtype_clone) {
251  found = true;
252  }
253  }
254  }
255  if (!found) {
256  return false;
257  }
258 
259  found = false;
262  if ((*modI)->GetSubtype() == COrgMod::eSubtype_isolate) {
263  found = true;
264  }
265  }
266  }
267  return found;
268 }
269 
270 
272 {
273  m_IsOrgMod = isOrgMod;
274  m_Subtype = subtype;
275  m_Value = value;
276 }
277 
278 
280 {
281  m_IsOrgMod = other.IsOrgMod();
282  m_Subtype = other.GetSubtype();
283  m_Value = other.GetValue();
284 }
285 
286 
288 {
289 }
290 
291 
293 {
294  if (m_IsOrgMod) {
296  return 3;
297  } else if (m_Subtype == COrgMod::eSubtype_isolate) {
298  return 5;
299  } else if (m_Subtype == COrgMod::eSubtype_cultivar) {
300  return 7;
302  return 8;
303  } else if (m_Subtype == COrgMod::eSubtype_ecotype) {
304  return 9;
305  } else if (m_Subtype == COrgMod::eSubtype_type) {
306  return 10;
307  } else if (m_Subtype == COrgMod::eSubtype_serotype) {
308  return 11;
309  } else if (m_Subtype == COrgMod::eSubtype_authority) {
310  return 12;
311  } else if (m_Subtype == COrgMod::eSubtype_breed) {
312  return 13;
313  }
314  } else {
316  return 0;
318  return 1;
320  return 2;
321  } else if (m_Subtype == CSubSource::eSubtype_clone) {
322  return 4;
324  return 6;
325  }
326  }
327  return 50;
328 }
329 
330 
332 {
333  int rank1, rank2;
334 
335  rank1 = GetRank();
336  rank2 = mod.GetRank();
337 
338  if (rank1 < rank2) {
339  return -1;
340  } else if (rank1 > rank2) {
341  return 1;
342  } else if (IsOrgMod() && !mod.IsOrgMod()) {
343  // prefer subsource to orgmod qualifiers
344  return -1;
345  } else if (!IsOrgMod() && mod.IsOrgMod()) {
346  return 1;
347  } else if (IsOrgMod() && mod.IsOrgMod()) {
348  if (GetSubtype() == mod.GetSubtype()) {
349  return 0;
350  } else {
351  return (GetSubtype() < mod.GetSubtype() ? -1 : 1);
352  }
353  } else {
354  if (GetSubtype() == mod.GetSubtype()) {
355  return 0;
356  } else {
357  return (GetSubtype() < mod.GetSubtype() ? -1 : 1);
358  }
359  }
360 }
361 
362 
void GetAvailableModifiers(TAvailableModifierVector &modifier_list)
int Compare(const CAutoDefSourceDescription &s) const
vector< CAutoDefSourceModifierInfo > TModifierVector
bool RemoveQual(bool isOrgMod, int subtype)
vector< CAutoDefAvailableModifier > TAvailableModifierVector
const TDescString & GetStrings() const
string GetComboDescription(IAutoDefCombo *mod_combo)
const TModifierVector & GetModifiers() const
const string & GetFeatureClauses() const
bool AddQual(bool isOrgMod, int subtype, bool keepAfterSemicolon)
CAutoDefSourceDescription(const CBioSource &bs, string feature_clauses="")
const CBioSource & GetBioSource() const
CAutoDefSourceModifierInfo(bool isOrgMod, int subtype, string value)
int Compare(const CAutoDefSourceModifierInfo &mod) const
virtual string GetSourceDescriptionString(const CBioSource &biosrc)=0
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
const CBioSource * GetBioSource(const CBioseq &bioseq)
Retrieve the BioSource object for a given bioseq handle.
Definition: sequence.cpp:104
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2891
static int Compare(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Compare of a substring with another string.
Definition: ncbistr.hpp:5297
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
Definition: ncbistr.hpp:5384
const TSubtype & GetSubtype(void) const
Get the Subtype member data.
Definition: BioSource_.hpp:539
bool CanGetSubtype(void) const
Check if it is safe to call GetSubtype method.
Definition: BioSource_.hpp:533
bool CanGetOrg(void) const
Check if it is safe to call GetOrg method.
Definition: BioSource_.hpp:503
list< CRef< CSubSource > > TSubtype
Definition: BioSource_.hpp:145
const TOrg & GetOrg(void) const
Get the Org member data.
Definition: BioSource_.hpp:509
@ eSubtype_endogenous_virus_name
Definition: SubSource_.hpp:109
const TMod & GetMod(void) const
Get the Mod member data.
Definition: OrgName_.hpp:839
const TTaxname & GetTaxname(void) const
Get the Taxname member data.
Definition: Org_ref_.hpp:372
bool IsSetMod(void) const
Check if a value has been assigned to Mod data member.
Definition: OrgName_.hpp:827
list< CRef< COrgMod > > TMod
Definition: OrgName_.hpp:332
bool CanGetOrgname(void) const
Check if it is safe to call GetOrgname method.
Definition: Org_ref_.hpp:535
const TOrgname & GetOrgname(void) const
Get the Orgname member data.
Definition: Org_ref_.hpp:541
@ eSubtype_authority
Definition: OrgMod_.hpp:107
@ eSubtype_cultivar
Definition: OrgMod_.hpp:93
@ eSubtype_strain
Definition: OrgMod_.hpp:85
@ eSubtype_specimen_voucher
Definition: OrgMod_.hpp:106
@ eSubtype_serotype
Definition: OrgMod_.hpp:90
@ eSubtype_ecotype
Definition: OrgMod_.hpp:110
@ eSubtype_isolate
Definition: OrgMod_.hpp:100
constexpr auto sort(_Init &&init)
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
Miscellaneous common-use basic types and functionality.
Int mod(Int i, Int j)
Definition: njn_integer.hpp:67
#define const
Definition: zconf.h:232
Modified on Tue Apr 23 07:38:52 2024 by modify_doxy.py rev. 669887