NCBI C++ ToolKit
sequpd.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: sequpd.cpp 43960 2019-09-27 14:43:29Z asztalos $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Andrea Asztalos
27  */
28 
29 
30 #include <ncbi_pch.hpp>
31 
34 
39 #include <serial/iterator.hpp>
40 
42 #include <objmgr/seqdesc_ci.hpp>
43 #include <objmgr/bioseq_ci.hpp>
44 #include <objmgr/impl/synonyms.hpp>
45 #include <objmgr/scope.hpp>
46 #include <objmgr/seq_vector.hpp>
47 
54 
58 
61 
63 {
65  if (!idh.IsGi()) {
66  return idh;
67  }
68 
69  CSeq_id_Handle gb_idh, lcl_idh;
70  CConstRef<CSynonymsSet> synonyms = bsh.GetSynonyms();
71  if (synonyms && !synonyms->empty()) {
72  for (auto& it : *synonyms) {
73  if (it.Which() == CSeq_id::e_Genbank) {
74  gb_idh = it;
75  }
76  else if (it.Which() == CSeq_id::e_Local) {
77  lcl_idh = it;
78  }
79  }
80  }
81 
82  if (gb_idh) {
83  idh.Swap(gb_idh);
84  }
85  else if (lcl_idh) {
86  idh.Swap(lcl_idh);
87  }
88 
89  return idh;
90 }
91 
92 bool sequpd::HaveIdenticalResidues(const objects::CBioseq_Handle& bsh1, const objects::CBioseq_Handle& bsh2)
93 {
94  if (!bsh1 && !bsh2) {
95  return true;
96  }
97  else if (!bsh1 || !bsh2) {
98  return false;
99  }
100  else if (bsh1.GetBioseqLength() != bsh2.GetBioseqLength()) {
101  return false;
102  }
103  else if (bsh1.IsNucleotide() && bsh2.IsProtein()) {
104  return false;
105  }
106  else if (bsh1.IsProtein() && bsh2.IsNucleotide()) {
107  return false;
108  }
109 
110  CSeqVector old_seqvec = bsh1.GetSeqVector(CBioseq_Handle::eCoding_Iupac, eNa_strand_plus);
111  CSeqVector upd_seqvec = bsh2.GetSeqVector(CBioseq_Handle::eCoding_Iupac, eNa_strand_plus);
112 
113  string old_seq;
114  old_seqvec.GetSeqData(0, bsh1.GetBioseqLength(), old_seq);
115  string upd_seq;
116  upd_seqvec.GetSeqData(0, bsh2.GetBioseqLength(), upd_seq);
117 
118  return NStr::EqualNocase(old_seq, upd_seq);
119 }
120 
121 static void s_FixCollidingIDs_Annot(CBioseq& bseq, CSeq_entry::TAnnot& annot, const vector<CRef<CSeq_id>>& upd_ids)
122 {
123  CRef<CSeq_id> new_id = bseq.GetId().front();
125  for (CTypeIterator<CSeq_id> id_iter(**ait); id_iter; ++id_iter) {
126  CSeq_id& id = *id_iter;
127  for (auto& it : upd_ids) {
128  if (id.Compare(it.GetObject()) == CSeq_id::e_YES) {
129  id.Assign(*new_id);
130  break;
131  }
132  }
133  }
134  }
135 }
136 
137 static const char* kUpdateSuffix = "_update";
138 
140 {
141  bool has_conflict = false;
142  for (auto& upd_id : bseq.GetId()) {
143  for (auto& old_id : seq_ids) {
144  if (upd_id->Compare(*old_id) == CSeq_id::e_YES) {
145  has_conflict = true;
146  break;
147  }
148  }
149  if (has_conflict) {
150  break;
151  }
152  }
153 
154  if (!has_conflict) {
155  return;
156  }
157 
158  // save the original IDs of the update sequence
159  vector<CRef<CSeq_id>> update_ids;
160  for (auto& it : bseq.GetId()) {
161  CRef<CSeq_id> newid(new CSeq_id);
162  newid->Assign(it.GetObject());
163  update_ids.push_back(newid);
164  }
165 
166  string lclID_label, gbID_label, accID_label;
167  if (bseq.GetId().size() == 1) {
168  bseq.GetId().front()->GetLabel(&gbID_label, CSeq_id::eContent);
169  }
170  else {
171  ITERATE(CBioseq::TId, upd_id, bseq.GetId()) {
172  if ((*upd_id)->IsLocal()) {
173  (*upd_id)->GetLabel(&lclID_label, CSeq_id::eContent);
174  }
175  else if ((*upd_id)->IsGenbank()) {
176  (*upd_id)->GetLabel(&gbID_label, CSeq_id::eContent);
177  }
178  else if ((*upd_id)->IsOther()) {
179  int version;
180  (*upd_id)->GetLabel(&accID_label, &version, CSeq_id::eContent);
181  accID_label.push_back('.');
182  accID_label.append(NStr::NumericToString(version));
183  }
184  }
185  }
186 
187  CRef<CSeq_id> newUpdate_Id;
188  if (!gbID_label.empty()) {
189  newUpdate_Id.Reset(new CSeq_id(CSeq_id::e_Local, gbID_label + kUpdateSuffix));
190  }
191  else if (!lclID_label.empty()) {
192  newUpdate_Id.Reset(new CSeq_id(CSeq_id::e_Local, lclID_label + kUpdateSuffix));
193  }
194  else if (!accID_label.empty()) {
195  newUpdate_Id.Reset(new CSeq_id(CSeq_id::e_Local, accID_label + kUpdateSuffix));
196  }
197 
198  if (!newUpdate_Id) {
199  NCBI_THROW(CSeqUpdateException, eInternal, "Update IDs could not be fixed.");
200  }
201 
202  bseq.ResetId();
203  bseq.SetId().push_back(newUpdate_Id);
204  s_FixCollidingIDs_Annot(bseq, bseq.SetAnnot(), update_ids);
205 
206  CSeq_entry* parent = bseq.GetParentEntry();
207  if (!parent) {
208  return;
209  }
210 
211  CSeq_entry* parent_set = parent->GetParentEntry();
212  if (parent_set && parent_set->IsSet() && parent_set->GetSet().GetClass() == CBioseq_set::eClass_nuc_prot) {
213  s_FixCollidingIDs_Annot(bseq, parent_set->SetAnnot(), update_ids);
214  }
215 }
216 
217 // return the matching old sequence for the update sequence if there is one
218 static CBioseq_Handle s_GetMatchingSequence(CSeq_inst::EMol type, CSeq_entry_Handle& oldSeq, const CBioseq& upd_bseq, bool& collide, const sequpd::TSeqIdHMap& matches);
219 
221 {
222  for (CTypeIterator<CBioseq> it(updEntry); it; ++it) {
223  CBioseq& updSeq = *it;
224  if (updSeq.GetInst().IsSetMol()) {
225  if (!(CSeq_inst::IsNa(updSeq.GetInst().GetMol()) && CSeq_inst::IsNa(type)) &&
226  !(CSeq_inst::IsAa(updSeq.GetInst().GetMol()) && CSeq_inst::IsAa(type))) {
227  continue;
228  }
229  }
230  else {
231  // no matching sequence has been found for the update sequence
232  CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*updSeq.GetId().front());
233  unmatched.push_back(idh);
234  }
235 
236  bool collide = false;
237  CBioseq_Handle match_bsh = s_GetMatchingSequence(type, oldSeq, updSeq, collide, matches);
238  if (match_bsh) {
239  if (collide) {
240  // fix ID conflicts
241  const CBioseq::TId& old_ids = match_bsh.GetCompleteBioseq()->GetId();
242  sequpd::FixCollidingIDs_Bioseq(updSeq, old_ids);
243  }
244 
245  CSeq_id_Handle old_idh = sequpd::GetGoodSeqIdHandle(match_bsh);
246  if (matches.find(old_idh) != matches.end()) {
247  NCBI_THROW(CSeqUpdateException, eReading, "Non-unique sequence IDs in update sequences!");
248  }
249  else {
250  CSeq_id_Handle upd_idh = CSeq_id_Handle::GetHandle(*updSeq.GetId().front());
251  matches.emplace(old_idh, upd_idh);
252  }
253  }
254  else {
255  // no matching sequence has been found for the update sequence
256  CSeq_id_Handle idh = CSeq_id_Handle::GetHandle(*updSeq.GetId().front());
257  unmatched.push_back(idh);
258  }
259  }
260 }
261 
262 static bool s_MatchSeqIds(CScope& scope, const CSeq_id& old_id, const CSeq_id& upd_id, bool& collide);
263 
264 static CBioseq_Handle s_GetMatchingSequence(CSeq_inst::EMol type, CSeq_entry_Handle& oldSeq, const CBioseq& upd_bseq, bool& collide, const sequpd::TSeqIdHMap& matches)
265 {
266  for (CBioseq_CI b_iter(oldSeq, type); b_iter; ++b_iter) {
267  CSeq_id_Handle old_id_handle = sequpd::GetGoodSeqIdHandle(*b_iter);
268  if (matches.find(old_id_handle) != matches.end())
269  continue;
270 
271  CConstRef<CBioseq> oldBseq = b_iter->GetBioseqCore();
272  if (oldBseq->IsSetId() && upd_bseq.IsSetId()) {
273  for (auto& id1 : oldBseq->GetId()) {
274  for (auto& id2 : upd_bseq.GetId()) {
275  if (s_MatchSeqIds(oldSeq.GetScope(), *id1, *id2, collide))
276  return *b_iter;
277  }
278  }
279  }
280  }
281  return CBioseq_Handle();
282 }
283 
285 
286 static bool s_MatchSeqIds(CScope& scope, const CSeq_id& old_id, const CSeq_id& upd_id, bool& collide)
287 {
288  collide = false;
289  CSeq_id::E_SIC cmp_type = old_id.Compare(upd_id);
290  if (cmp_type == CSeq_id::e_YES) {
291  collide = true;
292  return true;
293  }
294  else if (cmp_type == CSeq_id::e_DIFF) { // different types, compare their contents
295  string old_label(kEmptyStr), upd_label(kEmptyStr);
296  old_id.GetLabel(&old_label, CSeq_id::eContent);
297  upd_id.GetLabel(&upd_label, CSeq_id::eContent);
298  SIZE_TYPE old_dot = NStr::Find(old_label, ".");
299  SIZE_TYPE upd_dot = NStr::Find(upd_label, ".");
300  if (old_dot != NPOS) {
301  old_label = old_label.substr(0, old_dot);
302  }
303  if (upd_dot != NPOS) {
304  upd_label = upd_label.substr(0, upd_dot);
305  }
306  if (NStr::EqualCase(old_label, upd_label)) {
307  return true;
308  }
309  }
310  else if (cmp_type == CSeq_id::e_NO && old_id.IsLocal() && upd_id.IsLocal()) {
311  CConstRef<CSeq_id> orig_id = s_GetOriginalId(scope.GetBioseqHandle(old_id));
312  if (orig_id && orig_id->Compare(upd_id) == CSeq_id::e_YES) {
313  return true;
314  }
315  }
316 
317  return false;
318 }
319 
321 {
322  if (bsh.IsAa()) {
323  return bsh.GetLocalIdOrNull();
324  }
325 
326  CConstRef<CSeq_id> orig_id;
327  for (CSeqdesc_CI desc_it(bsh, CSeqdesc::e_User, 1); desc_it; ++desc_it) {
328  const CUser_object& usr = desc_it->GetUser();
330  ITERATE(CUser_object::TData, it, usr.GetData()) {
331  if ((*it)->IsSetLabel() && (*it)->GetLabel().IsStr()
332  && NStr::EqualNocase((*it)->GetLabel().GetStr(), "LocalId")
333  && (*it)->IsSetData()
334  && (*it)->GetData().IsStr()) {
335 
336  string id_str = (*it)->GetData().GetStr();
337  orig_id.Reset(new CSeq_id(CSeq_id::e_Local, id_str));
338  break;
339  }
340  }
341  }
342  }
343 
344  return orig_id;
345 }
346 
347 static vector<CConstRef<CSeq_align> > s_RunBlast2NASeq(const CBioseq_Handle& sh, const CBioseq_Handle& qh, bool accept_atleast_one, ICanceled* canceled);
348 static vector<CConstRef<CSeq_align> > s_RunBlast2NWSeq(const CBioseq_Handle& sh, const CBioseq_Handle& qh);
349 static vector<CConstRef<CSeq_align> > s_RunBlast2AASeq(const CBioseq_Handle& sh, const CBioseq_Handle& qh);
350 
351 vector<CConstRef<CSeq_align> > sequpd::RunBlast2Seq(const CBioseq_Handle& sh, const CBioseq_Handle& qh, bool accept_atleast_one, ICanceled* canceled)
352 {
353  if (!sh || !qh)
354  return vector<CConstRef<CSeq_align> >();
355 
356  if (sh.IsNucleotide() && qh.IsNucleotide()) {
357  return s_RunBlast2NASeq(sh, qh, accept_atleast_one, canceled);
358  }
359  else if (sh.IsProtein() && qh.IsProtein()) {
360  return s_RunBlast2NWSeq(sh, qh);
361  }
362  else {
363  NCBI_THROW(CSeqUpdateException, eAlignment, "Mismatch in sequence type, cannot form alignment.");
364  }
365 
366  return vector<CConstRef<CSeq_align> >();
367 }
368 
369 namespace
370 {
371  class CGPipeAlignmentScorer : public IAlignmentScorer {
372  public:
373  enum EAlignScoreTypes {
374  /// add BLAST-style 'num_ident' score
375  fScore_Identities = 0x001,
376 
377  /// add a 'mismatch' core with a count of mismatches
378  fScore_Mismatches = 0x002,
379 
380  /// add a 'gap_count' score
381  fScore_GapCount = 0x004,
382 
383  /// add scores for ungapped and gapped percent identity
384  fScore_PercentIdentity = 0x008,
385 
386  /// add a score for percent coverage of query (sequence 0)
387  fScore_PercentCoverage = 0x010,
388 
389  /// default flags: everything
390  fScore_Default = 0xffffffff
391  };
392 
394 
395  void ScoreAlignments(TAlignResultsRef results, CScope& scope)
396  {
398  result_iter, results->Get()) {
400  assm_iter, result_iter->second->Get()) {
402  query_iter, assm_iter->second) {
403  NON_CONST_ITERATE(CSeq_align_set::Tdata, it, query_iter->second->Set()) {
404  CSeq_align& align = **it;
405  s_AddStandardAlignmentScores(scope, align, fScore_Default);
406 
407  /// additionally, add the gaponly version, used in gbDNA
408  CScoreBuilder sb;
410  }
411  }
412  }
413  }
414  }
415  private:
416  static void s_AddStandardAlignmentScores(CScope& scope, CSeq_align& align, int flags);
417  };
418 
419  void CGPipeAlignmentScorer::s_AddStandardAlignmentScores(CScope& scope, CSeq_align& align, int flags)
420  {
421  CScoreBuilder sb;
422 
425  /// this automatically adds num_ident and num_mismatch
426  sb.AddScore(scope, align,
428  sb.AddScore(scope, align,
430  }
431  else if (flags & fScore_Identities) {
432  sb.AddScore(scope, align, CSeq_align::eScore_IdentityCount);
433  }
434  else if (flags & fScore_Mismatches) {
435  sb.AddScore(scope, align, CSeq_align::eScore_MismatchCount);
436  }
437  }
438 
439  if (flags & fScore_GapCount) {
440  /// FIXME: add eScore_GapCount to CSeq_align, CScoreBuilder
441  //sb.AddScore(scope, align, CScoreBuilder::eScore_GapCount);
442  int gap_count = sb.GetGapCount(align);
443  align.SetNamedScore("gap_count", gap_count);
444  }
445 
448  }
449  }
450 } // namespace
451 
452 static vector<CConstRef<CSeq_align> > s_RunBlast2NASeq(const CBioseq_Handle& sh, const CBioseq_Handle& qh, bool accept_atleast_one, ICanceled* canceled)
453 {
454  vector<CConstRef<CSeq_align> > align_vector;
455 
456  if (&(sh.GetScope()) != &(qh.GetScope())) {
457  LOG_POST(Error << "Both sequences should be in the same scope");
458  return align_vector;
459  }
460 
461  // both sequences should be in the same scope
462  CNgAligner ng_aligner(sh.GetScope());
463 
464  CRef<CSeq_loc> query_seqloc = qh.GetRangeSeq_loc(0, 0);
465  CRef<CSeq_loc> subject_seqloc = sh.GetRangeSeq_loc(0, 0);
466  if (query_seqloc.IsNull() || subject_seqloc.IsNull()) {
467  return align_vector;
468  }
469 
471  query->SetLocList().push_back(query_seqloc);
472  ng_aligner.SetQuery(query);
473 
475  subject->SetLocList().push_back(subject_seqloc);
476  ng_aligner.SetSubject(subject);
477 
478  auto cb = [](SBlastProgress* prog) -> Boolean
479  {
480  if (!prog || !prog->user_data)
481  return false;
482  return reinterpret_cast<ICanceled*>(prog->user_data)->IsCanceled();
483  };
484 
485  TSeqPos seqLength = sh.GetBioseqLength();
486  bool useHiWordAligner = (seqLength > 12000);
487  if (useHiWordAligner) {
488  CRef<blast::CBlastNucleotideOptionsHandle> opts(new blast::CBlastNucleotideOptionsHandle);
489  opts->SetTraditionalBlastnDefaults();
490  blast::CBlastOptions& options = opts->SetOptions();
491 
492  options.SetWordSize(1200);
493  options.SetEvalueThreshold(1e-6);
494  options.SetBestHitOverhang(0.1); // best_hit_score_edge
495  options.SetBestHitScoreEdge(0.1); // best_hit_overhang
496 
497  CRef<CBlastAligner> blastAligner(new CBlastAligner(*opts, 0));
498  if (canceled)
499  blastAligner->SetInterruptCallback(cb, canceled);
500 
501  ng_aligner.AddAligner(blastAligner);
503  }
504 
505  CRef<blast::CBlastNucleotideOptionsHandle> opts(new blast::CBlastNucleotideOptionsHandle);
506  opts->SetTraditionalBlastnDefaults();
507  blast::CBlastOptions& options = opts->SetOptions();
508 
509  options.SetWordSize(12);
510  options.SetEvalueThreshold(1e-6);
511  options.SetBestHitOverhang(0.1); // best_hit_score_edge
512  options.SetBestHitScoreEdge(0.1); // best_hit_overhang
513 
514  CRef<CBlastAligner> blastAligner(new CBlastAligner(*opts, useHiWordAligner ? 1 : 0));
515 
516  if (canceled)
517  blastAligner->SetInterruptCallback(cb, canceled);
518 
519  ng_aligner.AddAligner(blastAligner);
521  //ng_aligner.AddAligner(new CInversionMergeAligner(1));
522 
523  // adding scores
525  ng_aligner.AddScorer(new CGPipeAlignmentScorer());
526  ng_aligner.AddScorer(new CCommonComponentScorer());
527 
528  // add filters
529  ng_aligner.AddFilter(new CQueryFilter(0, "pct_identity_gapopen_only >= 99.5 AND pct_coverage >= 99"));
530  ng_aligner.AddFilter(new CQueryFilter(1, "pct_identity_gapopen_only >= 95 AND pct_coverage >= 95"));
531  ng_aligner.AddFilter(new CQueryFilter(2, "pct_identity_gapopen_only >= 95 AND pct_coverage >= 50"));
532  ng_aligner.AddFilter(new CQueryFilter(3, "pct_identity_gapopen_only >= 80 AND pct_coverage >= 25"));
533  if (accept_atleast_one) {
534  ng_aligner.AddFilter(new CQueryFilter(4, "align_length > 2"));
535  }
536 
537  CRef<CSeq_align_set> align = ng_aligner.Align();
538 
539  if (align && align->IsSet()) {
540  ITERATE(CSeq_align_set::Tdata, al_it, align->Get()) {
541  align_vector.push_back(CConstRef<CSeq_align>((*al_it)));
542  }
543  }
544 
545  return align_vector;
546 }
547 
548 static vector<CConstRef<CSeq_align> > s_RunBlast2NWSeq(const CBioseq_Handle& sh, const CBioseq_Handle& qh)
549 {
550  vector<CConstRef<CSeq_align> > align_vector;
551 
554 
555  string sSeq, qSeq;
556  sVec.GetSeqData(0, sVec.size(), sSeq);
557  sVec.GetSeqData(0, qVec.size(), qSeq);
558  NStr::ToUpper(sSeq);
559  NStr::ToUpper(qSeq);
560 
561  CRef<CNWAligner> aligner(new CNWAligner(sSeq.c_str(), sSeq.size(), qSeq.c_str(), qSeq.size(), &NCBISM_Blosum62));
562  CNWAligner::TScore score = aligner->Run();
563 
564  CNWFormatter formatter(*aligner);
565  CRef<CSeq_align> align =
566  formatter.AsSeqAlign(0, eNa_strand_plus, 0, eNa_strand_plus);
567 
568  align->SetSegs().SetDenseg().SetIds()[0]->Assign(*sh.GetSeqId());
569  align->SetSegs().SetDenseg().SetIds()[1]->Assign(*qh.GetSeqId());
570 
571  align_vector.push_back(align);
572 
573  return align_vector;
574 }
575 
576 static vector<CConstRef<CSeq_align> > s_RunBlast2AASeq(const CBioseq_Handle& sh, const CBioseq_Handle& qh)
577 {
578  vector<CConstRef<CSeq_align> > align_vector;
579 
580  CRef<CSeq_loc> query_seqloc = qh.GetRangeSeq_loc(0, 0, eNa_strand_plus);
581  CRef<CSeq_loc> subject_seqloc = sh.GetRangeSeq_loc(0, 0, eNa_strand_plus); //is strand correct?
582 
583  blast::SSeqLoc query(query_seqloc.GetPointerOrNull(), &qh.GetScope());
584  blast::SSeqLoc subject(subject_seqloc.GetPointerOrNull(), &sh.GetScope());
585 
586  try {
587  blast::CBlastProteinOptionsHandle prot_opts_handle;
588  prot_opts_handle.SetEvalueThreshold(1e-6);
589  prot_opts_handle.SetWordThreshold(100.0);
590  prot_opts_handle.Validate();
591 
592  blast::CBl2Seq blaster(query, subject, prot_opts_handle);
593  blast::TSeqAlignVector seqaligns = blaster.Run();
594 
595  if (!seqaligns.empty()) {
596  ITERATE(blast::TSeqAlignVector, it, seqaligns) {
597  if ((*it)->IsSet()) {
598  ITERATE(CSeq_align_set::Tdata, al_it, (*it)->Get()) {
599  align_vector.push_back(CConstRef<CSeq_align>((*al_it)));
600  }
601  }
602  }
603  }
604  }
605  catch (const blast::CBlastException& e) {
606  ERR_POST(Error << string(e.what()));
607  NCBI_THROW(CSeqUpdateException, eAlignment, "Options or input parameters were not accepted for Blast");
608  }
609 
610  return align_vector;
611 }
612 
613 bool sequpd::CompareAlignments(const CSeq_align& align_first, const CSeq_align& align_sec)
614 {
615  const auto length_first = align_first.GetAlignLength();
616  const auto length_sec = align_sec.GetAlignLength();
617  return length_first >= length_sec;
618 }
619 
Declares the CBlastProteinOptionsHandle class.
vector< CRef< objects::CSeq_align_set > > TSeqAlignVector
Vector of Seq-align-sets.
CBioseq_CI –.
Definition: bioseq_ci.hpp:69
CBioseq_Handle –.
CSeq_entry * GetParentEntry(void) const
Definition: Bioseq.hpp:174
CConstRef –.
Definition: ncbiobj.hpp:1266
void ScoreAlignments(TAlignResultsRef results, CScope &scope)
@ eTreeAlignMerger
Use the new (CTreeAlignMerger) merge algorithm.
void SetQuery(ISequenceSet *Set)
Definition: ngalign.cpp:76
void SetSubject(ISequenceSet *Set)
Definition: ngalign.cpp:82
TAlignSetRef Align()
Definition: ngalign.cpp:110
void AddFilter(IAlignmentFilter *Filter)
Definition: ngalign.cpp:88
void AddAligner(IAlignmentFactory *Aligner)
Definition: ngalign.cpp:94
void AddScorer(IAlignmentScorer *Scorer)
Definition: ngalign.cpp:100
CScope –.
Definition: scope.hpp:92
int GetGapCount(const CSeq_align &align)
Compute the number of gaps in the alignment.
void AddScore(CScope &scope, CSeq_align &align, EScoreType score)
deprecated: use CSeq_align::EScoreType directly
Sequence update exception class.
CSeqVector –.
Definition: seq_vector.hpp:65
@ eScore_PercentIdentity_GapOpeningOnly
Definition: Seq_align.hpp:165
@ eScore_PercentIdentity_Gapped
Definition: Seq_align.hpp:163
@ eScore_PercentIdentity_Ungapped
Definition: Seq_align.hpp:164
@ eScore_PercentCoverage
Definition: Seq_align.hpp:168
@ eScore_IdentityCount
Definition: Seq_align.hpp:145
@ eScore_MismatchCount
Definition: Seq_align.hpp:154
void SetNamedScore(const string &id, int score)
Definition: Seq_align.cpp:636
TSeqPos GetAlignLength(bool include_gaps=true) const
Get the length of this alignment.
Definition: Seq_align.cpp:1993
CSeq_entry_Handle –.
Definition: Seq_entry.hpp:56
TAnnot & SetAnnot(void)
Definition: Seq_entry.cpp:195
list< CRef< CSeq_annot > > TAnnot
Definition: Seq_entry.hpp:86
CSeq_entry * GetParentEntry(void) const
Definition: Seq_entry.hpp:131
bool IsAa(void) const
Definition: Seq_inst.hpp:113
bool IsNa(void) const
Definition: Seq_inst.hpp:106
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:65
Template class for iteration on objects of class C.
Definition: iterator.hpp:673
EObjectType GetObjectType() const
Interface for testing cancellation request in a long lasting operation.
Definition: icanceled.hpp:51
const_iterator end() const
Definition: map.hpp:152
const_iterator find(const key_type &key) const
Definition: map.hpp:153
static void FindMatches(objects::CSeq_inst::EMol type, objects::CSeq_entry_Handle &oldSeq, objects::CSeq_entry &updEntry, TSeqIdHMap &matches, TSeqIdHVector &unmatched)
Definition: sequpd.cpp:220
static bool CompareAlignments(const objects::CSeq_align &align_first, const objects::CSeq_align &align_sec)
Returns true if the first alignment is 'better' than the second one Compares the Blast scores and in ...
Definition: sequpd.cpp:613
static vector< CConstRef< objects::CSeq_align > > RunBlast2Seq(const objects::CBioseq_Handle &subject, const objects::CBioseq_Handle &query, bool accept_atleast_one, ICanceled *canceled=nullptr)
Definition: sequpd.cpp:351
static bool HaveIdenticalResidues(const objects::CBioseq_Handle &bsh1, const objects::CBioseq_Handle &bsh2)
Definition: sequpd.cpp:92
vector< objects::CSeq_id_Handle > TSeqIdHVector
Definition: sequpd.hpp:56
static void FixCollidingIDs_Bioseq(objects::CBioseq &bseq, const objects::CBioseq::TId &seq_ids)
Definition: sequpd.cpp:139
static objects::CSeq_id_Handle GetGoodSeqIdHandle(const objects::CBioseq_Handle &bsh)
Definition: sequpd.cpp:62
static uch flags
CRef< objects::CSeq_align > AsSeqAlign(TSeqPos query_start=0, objects::ENa_strand query_strand=objects::eNa_strand_plus, TSeqPos subj_start=0, objects::ENa_strand subj_strand=objects::eNa_strand_plus, int SAFF_flags=eSAFF_None) const
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:226
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
Definition: Seq_id.cpp:318
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
Definition: Seq_id.cpp:2040
E_SIC
Compare return values.
Definition: Seq_id.hpp:579
bool IsGi(void) const
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
void Swap(CSeq_id_Handle &idh)
E_SIC Compare(const CSeq_id &sid2) const
Compare() - more general.
Definition: Seq_id.cpp:411
@ e_NO
different SeqId types-can't compare
Definition: Seq_id.hpp:582
@ e_DIFF
some problem
Definition: Seq_id.hpp:581
@ e_YES
SeqIds compared, but are different.
Definition: Seq_id.hpp:583
@ eContent
Untagged human-readable accession or the like.
Definition: Seq_id.hpp:605
sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
Returns the sequence::ECompare containment relationship between CSeq_locs.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
CConstRef< CSeq_id > GetLocalIdOrNull(void) const
bool IsNucleotide(void) const
CConstRef< CBioseq > GetCompleteBioseq(void) const
Get the complete bioseq.
CSeq_id_Handle GetAccessSeq_id_Handle(void) const
Get any CSeq_id_Handle handle that can be used to access this bioseq Use GetSeq_id_Handle() if it's n...
TSeqPos GetBioseqLength(void) const
bool IsAa(void) const
CConstRef< CSeq_id > GetSeqId(void) const
Get id which can be used to access this bioseq handle Throws an exception if none is available.
bool IsProtein(void) const
CScope & GetScope(void) const
Get scope this handle belongs to.
CScope & GetScope(void) const
Get scope this handle belongs to.
CRef< CSeq_loc > GetRangeSeq_loc(TSeqPos start, TSeqPos stop, ENa_strand strand=eNa_strand_unknown) const
Return CSeq_loc referencing the given range and strand on the bioseq If start == 0,...
CConstRef< CSynonymsSet > GetSynonyms(void) const
Get the bioseq's synonyms.
CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const
Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
Definition: seq_vector.cpp:304
TSeqPos size(void) const
Definition: seq_vector.hpp:291
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:1439
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
Definition: ncbiobj.hpp:735
TObjectType * GetPointerOrNull(void) THROWS_NONE
Get pointer value.
Definition: ncbiobj.hpp:986
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
NCBI_NS_STD::string::size_type SIZE_TYPE
Definition: ncbistr.hpp:132
#define kEmptyStr
Definition: ncbistr.hpp:123
#define NPOS
Definition: ncbistr.hpp:133
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2891
static bool EqualCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-sensitive equality of a substring with another string.
Definition: ncbistr.hpp:5325
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
Definition: ncbistr.hpp:5353
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
static string & ToUpper(string &str)
Convert string to upper case – string& version.
Definition: ncbistr.cpp:424
const TData & GetData(void) const
Get the Data member data.
vector< CRef< CUser_field > > TData
void SetSegs(TSegs &value)
Assign a value to Segs data member.
Definition: Seq_align_.cpp:310
bool IsSet(void) const
Check if a value has been assigned to data member.
list< CRef< CSeq_align > > Tdata
const Tdata & Get(void) const
Get the member data.
bool IsLocal(void) const
Check if variant Local is selected.
Definition: Seq_id_.hpp:775
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ e_Local
local use
Definition: Seq_id_.hpp:95
TClass GetClass(void) const
Get the Class member data.
const TSet & GetSet(void) const
Get the variant data.
Definition: Seq_entry_.cpp:124
bool IsSet(void) const
Check if variant Set is selected.
Definition: Seq_entry_.hpp:263
@ eClass_nuc_prot
nuc acid and coded proteins
Definition: Bioseq_set_.hpp:99
TId & SetId(void)
Assign a value to Id data member.
Definition: Bioseq_.hpp:296
void ResetId(void)
Reset Id data member.
Definition: Bioseq_.cpp:54
const TInst & GetInst(void) const
Get the Inst member data.
Definition: Bioseq_.hpp:336
bool IsSetMol(void) const
Check if a value has been assigned to Mol data member.
Definition: Seq_inst_.hpp:593
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
Definition: Bioseq_.hpp:372
const TId & GetId(void) const
Get the Id member data.
Definition: Bioseq_.hpp:290
list< CRef< CSeq_id > > TId
Definition: Bioseq_.hpp:94
TMol GetMol(void) const
Get the Mol member data.
Definition: Seq_inst_.hpp:612
EMol
molecule class in living organism
Definition: Seq_inst_.hpp:108
bool IsSetId(void) const
equivalent identifiers Check if a value has been assigned to Id data member.
Definition: Bioseq_.hpp:278
@ e_User
user defined object
Definition: Seqdesc_.hpp:124
static int version
Definition: mdb_load.c:29
static char * prog
Definition: mdb_load.c:33
Uint1 Boolean
bool replacment for C
Definition: ncbi_std.h:94
EAlignScoreTypes
Definition: ngalign_job.cpp:78
@ fScore_GapCount
add a 'gap_count' score
Definition: ngalign_job.cpp:86
@ fScore_Identities
add BLAST-style 'num_ident' score
Definition: ngalign_job.cpp:80
@ fScore_Default
default flags: everything
Definition: ngalign_job.cpp:95
@ fScore_Mismatches
add a 'mismatch' core with a count of mismatches
Definition: ngalign_job.cpp:83
@ fScore_PercentCoverage
add a score for percent coverage of query (sequence 0)
Definition: ngalign_job.cpp:92
@ fScore_PercentIdentity
add scores for ungapped and gapped percent identity
Definition: ngalign_job.cpp:89
The Object manager core.
const SNCBIPackedScoreMatrix NCBISM_Blosum62
Definition: sm_blosum62.c:92
USING_SCOPE(objects)
static const char * kUpdateSuffix
Definition: sequpd.cpp:137
static void s_FixCollidingIDs_Annot(CBioseq &bseq, CSeq_entry::TAnnot &annot, const vector< CRef< CSeq_id >> &upd_ids)
Definition: sequpd.cpp:121
static CBioseq_Handle s_GetMatchingSequence(CSeq_inst::EMol type, CSeq_entry_Handle &oldSeq, const CBioseq &upd_bseq, bool &collide, const sequpd::TSeqIdHMap &matches)
Definition: sequpd.cpp:264
static CConstRef< CSeq_id > s_GetOriginalId(const CBioseq_Handle &bsh)
Definition: sequpd.cpp:320
static bool s_MatchSeqIds(CScope &scope, const CSeq_id &old_id, const CSeq_id &upd_id, bool &collide)
Definition: sequpd.cpp:286
static vector< CConstRef< CSeq_align > > s_RunBlast2NASeq(const CBioseq_Handle &sh, const CBioseq_Handle &qh, bool accept_atleast_one, ICanceled *canceled)
Definition: sequpd.cpp:452
static vector< CConstRef< CSeq_align > > s_RunBlast2NWSeq(const CBioseq_Handle &sh, const CBioseq_Handle &qh)
Definition: sequpd.cpp:548
static vector< CConstRef< CSeq_align > > s_RunBlast2AASeq(const CBioseq_Handle &sh, const CBioseq_Handle &qh)
Definition: sequpd.cpp:576
Progress monitoring structure.
Definition: blast_def.h:341
static string subject
static string query
Definition: type.c:6
Modified on Tue Apr 16 20:10:34 2024 by modify_doxy.py rev. 669887