NCBI C++ ToolKit
utils.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: utils.cpp 47780 2024-08-19 02:13:10Z asztalos $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors: Mike DiCuccio, Liangshou Wu
27 *
28 * File Description:
29 * General utility classes for GUI projects.
30 */
31 
32 #include <ncbi_pch.hpp>
33 
34 #include <corelib/ncbiapp.hpp>
35 
37 
38 #include <gui/objutils/utils.hpp>
43 
51 #include <objects/seq/Pubdesc.hpp>
61 
64 
65 #include <objmgr/feat_ci.hpp>
66 #include <objmgr/util/sequence.hpp>
67 #include <objmgr/util/feature.hpp>
69 #include <objmgr/impl/synonyms.hpp>
70 #include <objmgr/seq_vector.hpp>
71 #include <objmgr/bioseq_ci.hpp>
72 #include <objmgr/seq_entry_ci.hpp>
73 #include <objmgr/align_ci.hpp>
74 #include <objmgr/seqdesc_ci.hpp>
75 
76 #include <serial/iterator.hpp>
77 #include <connect/ncbi_types.h>
79 #include <util/line_reader.hpp>
80 #include <algorithm>
81 
84 
87 
88 // cache results of top level seq-id checks
89 // key is seq-id normalized AsFastaString()
93 
95 typedef vector<CConstRef<CSeq_id>> TAssemblySeqIds;
97 
98 //
99 // functor for sorting features based on their length
100 //
102 {
103  bool operator()(const CMappedFeat& feat0,
104  const CMappedFeat& feat1) const
105  {
108  return (r0.GetLength() < r1.GetLength());
109  }
110 };
111 
112 
113 //
114 // functor for sorting features based on the NCBI feature sort order
115 //
117 {
118  bool operator()(const CMappedFeat& feat0,
119  const CMappedFeat& feat1) const
120  {
121  const CSeq_feat& f0 = feat0.GetOriginalFeature();
122  const CSeq_feat& f1 = feat1.GetOriginalFeature();
123  return (f0.Compare(f1, feat0.GetLocation(), feat1.GetLocation()) < 0);
124  }
125 };
126 
127 
128 
130 static const TNameTypeStr s_NameTypeStrs[] = {
131  { "", CSeqUtils::eAnnot_All },
132  { "All", CSeqUtils::eAnnot_All },
133  { "Named", CSeqUtils::eAnnot_Named },
134  { "Unnamed", CSeqUtils::eAnnot_Unnamed },
135 };
136 
139 
140 
142 {
143  TNameTypeMap::const_iterator iter = sm_NameTypeMap.find(type);
144  if (iter != sm_NameTypeMap.end()) {
145  return iter->second;
146  } else {
148  }
149 }
150 
151 
152 const string&
154 {
156  for (iter = sm_NameTypeMap.begin(); iter != sm_NameTypeMap.end(); ++iter) {
157  if (iter->second == type) {
158  return iter->first;
159  }
160  }
161  return kEmptyStr;
162 }
163 
164 
165 //
166 // retrieve an annot selector
167 //
169 {
170  SAnnotSelector sel;
171  sel
172  // consider overlaps by total range...
174  // resolve all segments...
175  .SetResolveAll()
176  ;
177 
179 
180  CRegistryReadView view =
181  reg.GetReadView("GBENCH.Utils.NamedAnnots");
182  CRegistryReadView::TKeys naa_keys;
183  view.GetKeys(naa_keys);
184  ITERATE(CRegistryReadView::TKeys, iter, naa_keys) {
185  sel.IncludeNamedAnnotAccession(view.GetString(iter->key));
186  }
187 
188  view = reg.GetReadView("GBENCH.Utils.AnnotSelector");
189  if ( !(flags & fAnnot_UnsetNamed) ) {
190  if (view.GetBool("ExcludeExternal")) {
191  sel.SetExcludeExternal(true);
192  } else {
193  sel.SetExcludeExternal(false);
194 
195  ///
196  /// known external annotations
197  ///
198 
199  static const char* named_annots[] = {
200  "SNP", /// SNPs = variation features
201  "CDD", /// CDD = conserved domains
202  "STS", /// STS = sequence tagged sites
203  NULL
204  };
205 
206  for (const char** p = named_annots; p && *p; ++p) {
207  bool incl = view.GetBool(*p, true);
208  if ( !incl ) {
209  sel.ExcludeNamedAnnots(*p);
210  }
211  }
212  }
213  }
214 
215  if ( !(flags & fAnnot_UnsetDepth) ) {
216  if (view.GetBool("AdaptiveDepth", true)) {
217  sel.SetAdaptiveDepth(true);
218  sel.SetResolveAll();
219  }
220  }
221  return sel;
222 }
223 
224 
225 //
226 // retrieve an annot selector for our selected annotations
227 //
229  TAnnotFlags flags)
230 {
232  sel
233  // limit by our annotation type
234  .SetAnnotType(c);
235  return sel;
236 }
237 
238 
239 //
240 // retrieve an annot selector for our selected annotations
241 //
243  TAnnotFlags flags)
244 {
246  sel
247  // retrieve feature type and subtype of interest
248  .SetFeatType(feat);
249 
250  return sel;
251 }
252 
253 
255  TAnnotFlags flags)
256 {
258  sel
259  // retrieve feature type and subtype of interest
260  .SetFeatSubtype(sub);
261 
262  return sel;
263 }
264 
265 
266 SAnnotSelector CSeqUtils::GetAnnotSelector(const vector<string>& annots)
267 {
269  sel.SetCollectNames();
270 
271  //CGuiRegistry& reg = CGuiRegistry::GetInstance();
272  //CRegistryReadView view =
273  // reg.GetReadView("GBENCH.Utils.AnnotSelector");
274  //bool include_naas = view.GetBool("IncludeNAAs", false);
275  bool include_naas = false;
276 
277  if (include_naas && annots.empty()) {
278  sel.IncludeNamedAnnotAccession("NA*");
279  }
280 
281  ITERATE (vector<string>, iter, annots) {
282  const string& annot = *iter;
283  switch (CSeqUtils::NameTypeStrToValue(annot)) {
285  sel.AddUnnamedAnnots();
286  break;
288  sel.ExcludeUnnamedAnnots();
289  break;
291  if (include_naas) {
292  sel.IncludeNamedAnnotAccession("NA*");
293  }
294  return sel;
296  default:
297  if (NStr::StartsWith(annot, "NA*")) {
298  sel.IncludeNamedAnnotAccession("NA*");
299  } else {
300  sel.AddNamedAnnots(annot);
301  if(IsNAA(annot) || IsExtendedNAA(annot)) {
302  sel.IncludeNamedAnnotAccession(annot);
303  }
304  }
305  break;
306  }
307  }
308  return sel;
309 }
310 
311 
312 SAnnotSelector CSeqUtils::GetAnnotSelector(const vector<string>& annots,
313  bool adaptive, int depth)
314 {
315  SAnnotSelector sel(GetAnnotSelector(annots));
316  SetResolveDepth(sel, adaptive, depth);
317  return sel;
318 }
319 
320 
321 void CSeqUtils::SetAnnot(objects::SAnnotSelector& sel, const string& annot)
322 {
323  switch (CSeqUtils::NameTypeStrToValue(annot)) {
325  sel.AddUnnamedAnnots();
326  break;
328  sel.ExcludeUnnamedAnnots();
329  break;
331  {{
332  //CGuiRegistry& reg = CGuiRegistry::GetInstance();
333  //CRegistryReadView view =
334  // reg.GetReadView("GBENCH.Utils.AnnotSelector");
335  //bool include_naas = view.GetBool("IncludeNAAs", false);
336  bool include_naas = false;
337  if (include_naas) {
338  sel.IncludeNamedAnnotAccession("NA*");
339  }
340  }}
341  break;
343  default:
344  sel.AddNamedAnnots(annot);
345  if (IsNAA(annot) || IsExtendedNAA(annot)) {
346  sel.IncludeNamedAnnotAccession(annot);
347  }
348  break;
349  }
350 }
351 
352 
353 
355 {
357  return reg.GetReadView("GBENCH.Utils.AnnotSelector");
358 }
359 
361 {
362  return view.GetInt("MaxSearchSegments", 0);
363 }
364 
366 {
367  string max_search_segs_action = view.GetString("MaxSearchSegmentsAction", "silent");
369 
370  if(max_search_segs_action == "throw") {
371  MaxSearchSegmentsAction = SAnnotSelector::eMaxSearchSegmentsThrow;
372  } else if (max_search_segs_action == "log") {
373  MaxSearchSegmentsAction = SAnnotSelector::eMaxSearchSegmentsLog;
374  }
375 
376  return MaxSearchSegmentsAction;
377 }
378 
380 {
381  if (max > 0 && actual > max) {
382  switch(action) {
384  NCBI_THROW(CAnnotSearchLimitException, eSegmentsLimitExceded,
385  "CSeqUtils::CheckMaxSearchSegments: search segments limit exceeded");
386  break;
388  break;
390  default:
391  ERR_POST("CSeqUtils::CheckMaxSearchSegments: search segments limit exceeded");
392  break;
393  }
394  return true;
395  }
396  return false;
397 }
398 
399 
401 {
402  return view.GetInt("MaxSearchTime", 0);
403 }
404 
405 
406 void CSeqUtils::SetResolveDepth(objects::SAnnotSelector& sel,
407  bool adaptive, int depth)
408 {
409  if (adaptive) {
410  sel.SetAdaptiveDepth(true);
411  sel.SetExactDepth(false);
412  // TODO: watch out
413  // Maybe there is bug inside selector, we have call SetResolveAll() even
414  // for cases where we only want to resolve up to a given depth.
415  sel.SetResolveAll();
416 
418  int max_search_segs = GetMaxSearchSegments(view);
419 // LOG_POST(Trace << "MaxSearchSegments: " << max_search_segs);
420  sel.SetMaxSearchSegments(max_search_segs);
421  if(max_search_segs > 0) {
422  sel.SetMaxSearchSegmentsAction(GetMaxSearchSegmentsAction(view));
423  }
424  sel.SetMaxSearchTime((float)GetMaxSearchTime(view));
425 
426  if (depth >=0) {
427  sel.SetResolveDepth(depth);
428  }
429  } else if (depth >= 0) {
430  sel.SetResolveDepth(depth);
431  sel.SetExactDepth(true);
432  sel.SetAdaptiveDepth(false);
433  }
434 }
435 
436 
437 //
438 // LinkFeatures()
439 // This builds explicit links between features, creating a hierarchical tree of
440 // features.
441 //
442 
443 template <class T, class U>
445 {
446  bool operator()(const pair<T,U>& p1, const pair<T,U>& p2) const
447  {
448  return p1.first < p2.first;
449  }
450 };
451 
452 
456 {
457  if (p_cb) {
458  p_cb->SetTaskName("Linking features...");
459  p_cb->SetTaskTotal((int)feats.size());
460  p_cb->SetTaskCompleted(0);
461  }
462 
464 
465  // using CFeatTree
466  feature::CFeatTree tree;
467  tree.SetFeatIdMode(feature::CFeatTree::EFeatIdMode(mode));
470  if (p_cb && p_cb->StopRequested()) {
471  return false;
472  }
473  CRef<CLinkedFeature> curr_feat = *iter;
474  tree.AddFeature(curr_feat->GetMappedFeature());
475  fmap[curr_feat->GetMappedFeature()] = curr_feat;
476  }
478  if (p_cb && p_cb->StopRequested()) {
479  return false;
480  }
481  CRef<CLinkedFeature> curr_feat = *iter;
482  CMappedFeat parent_feat = tree.GetParent(curr_feat->GetMappedFeature());
483  if ( parent_feat ) {
484  fmap[parent_feat]->AddChild(curr_feat);
485  }
486  else {
487  out_feats.push_back(curr_feat);
488  }
489  if (p_cb) p_cb->AddTaskCompleted(1);
490  }
491 
492  out_feats.swap(feats);
493 
494  return true;
495 }
496 
497 
498 // remap a child location to a parent
500  const CSeq_loc& child,
501  CScope* scope)
502 {
503  CSeq_loc dummy_parent;
504  dummy_parent.SetWhole(const_cast<CSeq_id&>(sequence::GetId(parent, 0)));
505  SRelLoc converter(dummy_parent, child, scope);
506  converter.m_ParentLoc = &parent;
507  return converter.Resolve(scope);
508 }
509 
510 bool CSeqUtils::Match(const CSeq_id& id1, const CSeq_id& id2, CScope* scope)
511 {
512  return Match(CSeq_id_Handle::GetHandle(id1),
514  scope);
515 }
516 
517 
518 bool CSeqUtils::Match(const CSeq_id_Handle& id1,
519  const CSeq_id_Handle& id2, CScope* scope)
520 {
521  if (id1.MatchesTo(id2)) {
522  return true;
523  }
524 
525  if (id1.IsGi() && id2.IsGi() )
526  return false;
527 
528  if (scope) {
530 
531  syns = scope->GetSynonyms(id1);
532  if (syns) {
533  ITERATE (CSynonymsSet, iter, *syns) {
534  if (id2.MatchesTo(CSynonymsSet::GetSeq_id_Handle(iter))) {
535 // cerr << "matched to synonym of presented seq-id: " << endl;
536 // cerr << MSerial_AsnText << *CSynonymsSet::GetSeq_id_Handle(iter).GetSeqId();
537  return true;
538  }
539  }
540  }
541 
542  syns = scope->GetSynonyms(id2);
543  if (syns) {
544  ITERATE (CSynonymsSet, iter, *syns) {
545  if (id1.MatchesTo(CSynonymsSet::GetSeq_id_Handle(iter))) {
546 // cerr << "matched to synonym of tls-seq-id: " << endl;
547 // cerr << MSerial_AsnText << *CSynonymsSet::GetSeq_id_Handle(iter).GetSeqId();
548  return true;
549  }
550  }
551  }
552  }
553  return false;
554 }
555 
557  const CRangeCollection<TSeqPos>& ranges)
558 {
559  CRef<CSeq_loc> seq_loc(new CSeq_loc());
560  CSeq_loc::TPacked_int& p_int = seq_loc->SetPacked_int();
561 
562  ITERATE(CRangeCollection<TSeqPos>, it_r, ranges) { // for each range in mark
563  if ( !it_r->Empty() ) {
564  p_int.AddInterval(id, it_r->GetFrom(), it_r->GetTo());
565  }
566  }
567  switch (p_int.Get().size()) {
568  case 0:
569  {{
570  return CRef<CSeq_loc>();
571  }}
572  case 1:
573  {{
574  CRef<CSeq_interval> ival(p_int.Set().front());
575  seq_loc->SetInt(*ival);
576  /// p_int no longer valid!
577  }}
578  break;
579  default:
580  break;
581  }
582  return seq_loc;
583 }
584 
587 {
589 
590  // extract from the given map all segments corresponding to the given id
591  const CHandleRangeMap::TLocMap& loc_map = map.GetMap();
593  if(it != loc_map.end()) {
594  ITERATE(CHandleRange, it_r, it->second) {
595  ranges.CombineWith(it_r->first);
596  }
597  return true;
598  } else return false;
599 }
600 
602  const CSeq_loc& loc)
603 {
604  // Build a Seq-entry for the query Seq-loc
605  // A seq-loc may have multiple seq-ids, use the first successful seq-id.
606  CBioseq_Handle handle;
607  for ( CSeq_loc_CI citer (loc); citer; ++citer) {
608  handle = scope.GetBioseqHandle(citer.GetSeq_id());
609  if ( handle ) {
610  break;
611  }
612  }
613 
614  if ( !handle ) {
615  return CRef<CBioseq>();
616  }
617 
618  /// easy out: if the bioseq is of type whole, just duplicate it
619  if (loc.IsWhole()) {
620  CRef<CBioseq> bioseq(new CBioseq());
621  bioseq->Assign(*handle.GetCompleteBioseq());
622  return bioseq.Release();
623  }
624 
625  CSeqVector vec(loc, scope, CBioseq_Handle::eCoding_Iupac);
626  string seq_string;
627  vec.GetSeqData(0, vec.size(), seq_string);
628 
629  CRef<CBioseq> bioseq(new CBioseq());
630 
631  // curate our inst
632  bioseq->SetInst().SetRepr(CSeq_inst::eRepr_raw);
633  bioseq->SetInst().SetLength((int)seq_string.size());
634  if (vec.IsProtein()) {
635  bioseq->SetInst().SetMol(CSeq_inst::eMol_aa);
636  bioseq->SetInst().SetSeq_data().SetIupacaa(*new CIUPACaa(seq_string));
637  } else {
638  bioseq->SetInst().SetMol(CSeq_inst::eMol_na);
639  bioseq->SetInst().SetSeq_data().SetIupacna(*new CIUPACna(seq_string));
640  CSeqportUtil::Pack(&bioseq->SetInst().SetSeq_data());
641  }
642 
643 
644  // add an ID for our sequence
645  CRef<CSeq_id> id(new CSeq_id());
646  id->Assign(*handle.GetSeqId());
647  bioseq->SetId().push_back(id);
648 
649  // a title
650  CRef<CSeqdesc> title(new CSeqdesc);
651  string title_str;
652  id->GetLabel(&title_str);
653  title_str += ": ";
654  loc.GetLabel(&title_str);
655  title->SetTitle(title_str);
656  bioseq->SetDescr().Set().push_back(title);
657 
658  return bioseq.Release();
659 }
660 
661 
663  const CBioseq_Handle& handle)
664 {
665  CRef<CSeq_loc> seq_loc(new CSeq_loc());
666  CSeq_loc::TPacked_int& p_int = seq_loc->SetPacked_int();
667 
668  for (CSeq_loc_CI iter(mix_loc); iter; ++iter) {
669  if (handle.IsSynonym(iter.GetSeq_id())) {
670  CSeq_loc_CI::TRange range = iter.GetRange();
671  if ( !range.Empty() ) {
672  p_int.AddInterval(*handle.GetSeqId(), range.GetFrom(),
673  range.GetTo(), iter.GetStrand());
674  }
675  }
676  }
677  switch (p_int.Get().size()) {
678  case 0:
679  {{
680  return CRef<CSeq_loc>();
681  }}
682  case 1:
683  {{
684  CRef<CSeq_interval> ival(p_int.Set().front());
685  seq_loc->SetInt(*ival);
686  /// p_int no longer valid!
687  }}
688  break;
689  default:
690  break;
691  }
692 
693  return seq_loc;
694 }
695 
696 
697 string CSeqUtils::GetAnnotName(const CSeq_annot_Handle& annot_handle)
698 {
699  string name(GetUnnamedAnnot());
700  CConstRef<CSeq_annot> annot = annot_handle.GetCompleteSeq_annot();
701  if (annot) {
702  name = GetAnnotName(*annot);
703  } else if (annot_handle.IsNamed()) {
704  name = annot_handle.GetName();
705  }
706  return name;
707 }
708 
709 
710 string CSeqUtils::GetAnnotName(const CSeq_annot& annot)
711 {
712  string name(GetUnnamedAnnot());
713  if (annot.IsSetDesc()) {
714  ITERATE (objects::CAnnot_descr::Tdata, descrIter, annot.GetDesc().Get()) {
715  if ((*descrIter)->IsTitle()) {
716  name = (*descrIter)->GetTitle();
717  break;
718  } else if ((*descrIter)->IsName()) {
719  name = (*descrIter)->GetName();
720  }
721  }
722  }
723  return name;
724 }
725 
726 CConstRef<CUser_field> CSeqUtils::GetAnnotUserField(const CSeq_annot& annot, const string& type, const string& label)
727 {
728  if (annot.IsSetDesc()) {
729  ITERATE (objects::CAnnot_descr::Tdata, descrIter, annot.GetDesc().Get()) {
730  if ((*descrIter)->IsUser()) {
731  const auto& user((*descrIter)->GetUser());
732  if(user.GetType().IsStr() && user.GetType().GetStr() == type && user.HasField(label)) {
733  return user.GetFieldRef(label);
734  }
735  }
736  }
737  }
738  return CConstRef<CUser_field>();
739 }
740 
741 
742 string CSeqUtils::GetAnnotComment(const CSeq_annot_Handle& annot_handle)
743 {
744  string comment = kEmptyStr;
745  CConstRef<CSeq_annot> annot = annot_handle.GetCompleteSeq_annot();
746  if (annot) {
747  comment = GetAnnotComment(*annot);
748  }
749  return comment;
750 }
751 
752 
753 string CSeqUtils::GetAnnotComment(const CSeq_annot& annot)
754 {
755  if (annot.IsSetDesc()) {
756  ITERATE (objects::CAnnot_descr::Tdata, descrIter, annot.GetDesc().Get()) {
757  if ((*descrIter)->IsComment()) {
758  return (*descrIter)->GetComment();
759  }
760  }
761  }
762  return kEmptyStr;
763 }
764 
765 static bool s_IsNAA(const string& annot, char div)
766 {
767 // if(IsExtendedNAA(annot)) {
768 // LOG_POST(Trace << "CSeqUtils::IsNAA() when in fact IsExtendedNA() " << annot);
769 // }
770  size_t acc_len = 11;
771  bool is_naa = false;
772  size_t len = annot.size();
773  if (len >= acc_len && annot[0] == 'N' && annot[1] == 'A') {
774  size_t i = 2;
775  while (i < acc_len && annot[i] >= '0' && annot[i] <= '9') {
776  ++i;
777  }
778 
779  if (i == acc_len) {
780  if (len == acc_len) {
781  is_naa = true;
782  }
783  else if (annot[i++] == div) {
784  while (i < len && annot[i] >= '0' && annot[i] <= '9') {
785  ++i;
786  }
787  if (i == len) {
788  is_naa = true;
789  }
790  }
791  }
792  }
793 
794  return is_naa;
795 }
796 
797 bool CSeqUtils::IsNAA(const string& annot, bool isStrict)
798 {
799  return isStrict ? s_IsNAA(annot, '.') : IsExtendedNAA(annot);
800 }
801 
802 bool CSeqUtils::IsNAA_Name(const string& annot)
803 {
804  return IsExtendedNAA_Name(annot);
805 }
806 
807 /// create an annotation name for a remote file pipeline, appending sSuffix
808 string CSeqUtils::MakeRmtAnnotName(const string& sSuffix)
809 {
810  return "rmt_pipleine_" + sSuffix;
811 }
812 
813 /// check if a given annotation was created by a remote file pipeline
814 bool CSeqUtils::isRmtAnnotName(const string& sAnnotName)
815 {
816  return NStr::StartsWith(sAnnotName, "rmt_pipleine_");
817 }
818 
819 bool CSeqUtils::isRmtPipelineFileType(const string& sFileType)
820 {
821  return sFileType == "bigBed" || sFileType == "bigWig" || sFileType == "vcfTabix" || sFileType == "vcf";
822 }
823 
824 
825 static bool s_IsExtendedNAA(const string& sAnnotName, char div, bool isStrict)
826 {
827  size_t posHashSign{sAnnotName.find('#')};
828 
829  if(posHashSign == NPOS) {
830  if(isStrict) {
831  return false;
832  } else {
833  return s_IsNAA(sAnnotName, div);
834  }
835  }
836  if(!s_IsNAA(sAnnotName.substr(0, posHashSign), div))
837  {
838  return false;
839  }
840  if(posHashSign == sAnnotName.length() - 1) {
841  return true;
842  }
843  for(size_t i = posHashSign+1; i<sAnnotName.length(); ++i) {
844  if(sAnnotName[i] < '0' || sAnnotName[i] > '9') {
845  return false;
846  }
847  }
848  return true;
849 }
850 
851 bool CSeqUtils::IsExtendedNAA(const string& sAnnotName, bool isStrict)
852 {
853  return s_IsExtendedNAA(sAnnotName, '.', isStrict);
854 }
855 
856 bool CSeqUtils::IsExtendedNAA_Name(const string& sAnnotName)
857 {
858  return s_IsExtendedNAA(sAnnotName, '_', false);
859 }
860 
861 // check if a given annotation is AlignDb (potentially suffixed with batch identication string after a '#')
862 bool CSeqUtils::IsAlignDb(const string& annot)
863 {
864  return NStr::StartsWith(annot, "AlignDb", NStr::eNocase);
865 }
866 
867 // get a batch string from AlignDb annotation suffixed with batch identication string after a '#'
868 string CSeqUtils::GetAlignDbBatch(const string& annot)
869 {
870  vector<string> parts;
871  if(IsAlignDb(annot)) {
872  NStr::Split(annot, "#", parts);
873  }
874  return parts.size() > 1 ? parts[1] : string();
875 }
876 
877  /// get a is_source_assembly_query string fro AlignDb annotation suffixed after a second '#'
878 string CSeqUtils::GetAlignDbIsQuery(const string& annot)
879 {
880  vector<string> parts;
881  if(IsAlignDb(annot)) {
882  NStr::Split(annot, "#", parts);
883  }
884  return parts.size() > 2 ? parts[2] : string("N");
885 }
886 
887 // VDB accessions in scope are in ("SRA", "SRR", "DRR", "ERR")
888 bool CSeqUtils::IsVDBAccession(const string& acc)
889 {
890  if (acc.size() < 3 || acc[1] != 'R')
891  return false;
892  switch (acc[0]) {
893  case 'S':
894  case 'D':
895  case 'E':
896  break;
897  default:
898  return false;
899  }
900  switch (acc[2]) {
901  case 'A':
902  case 'R':
903  break;
904  default:
905  return false;
906  }
907  return true;
908 }
909 
910 
912 {
913  if (feat.IsSetPseudo()) {
914  return feat.GetPseudo();
915  } else {
916  const CSeq_feat::TData& data = feat.GetData();
917  if (data.IsGene() && data.GetGene().IsSetPseudo()) {
918  return data.GetGene().GetPseudo();
919  } else if (data.IsRna() && data.GetRna().IsSetPseudo()) {
920  return data.GetRna().GetPseudo();
921  }
922  }
923  return false;
924 }
925 
926 
928 {
929  if (feat.IsSetPartial() && feat.GetPartial() &&
930  !IsPartialStart(feat.GetLocation()) &&
931  !IsPartialStop(feat.GetLocation())) {
932  return true;
933  }
934  return false;
935 }
936 
937 
939 {
941 }
942 
943 
945 {
947 }
948 
950 {
951  CSeq_loc_CI it(loc);
952  if (it) {
953  auto strand = it.GetStrand();
954  ++it;
955  for (; it; ++it) {
956  if (it.GetStrand() != strand)
957  return false;
958  }
959  }
960  return true;
961 }
962 
964 {
965  if (feat.IsSetExcept()) {
966  return feat.GetExcept();
967  }
968  return false;
969 }
970 
971 
973 {
974  static string base_url = "https://www.ncbi.nlm.nih.gov";
975  return base_url;
976 }
977 
978 
980 {
981  return "<table border=\"0\" cellpadding=\"0\" cellspacing=\"0\">";
982 }
983 
984 
986 {
987  return "</table>";
988 }
989 
990 
991 string CSeqUtils::CreateTableRow(const string& tag, const string& value)
992 {
993  return "<tr><td align=\"right\" valign=\"top\" nowrap><b>" + tag + (tag.empty() ? "" : ":") + "&nbsp;" +
994  "</b></td><td valign=\"top\" width=\"200\">" + value + "</td></tr>";
995 }
996 
997 
998 string CSeqUtils::CreateSectionRow(const string& tag)
999 {
1000  return "<tr><td align=\"right\" nowrap>[<i>" + tag + "</i>]&nbsp;&nbsp;</td><td></td></tr>";
1001 }
1002 
1003 
1004 string CSeqUtils::CreateLinkRow(const string& tag,
1005  const string& label,
1006  const string& url)
1007 {
1008  return "<tr><td align=\"right\" valign=\"top\" nowrap><b>" + tag + ":&nbsp;" +
1009  "</b></td><td width=\"200\"><a href=\"" + url + "\">" + label + "</a></td></tr>";
1010 }
1011 
1013 {
1014  CSeqUtils::TLocVec mapped_locs;
1015 
1016  typedef map<TGi, TSeqRange> TRangeMap;
1017  TRangeMap r_map;
1018  CTypeConstIterator<CSeq_align> aln_iter(align_set);
1019  for (; aln_iter; ++aln_iter) {
1020  const CSeq_align& aln = *aln_iter;
1021  if (aln.CheckNumRows() == 2 && aln.GetSegs().IsStd() &&
1022  aln.GetSegs().GetStd().size() == 1) {
1023  int target_row = 0;
1024  if (aln.GetSeq_id(0).IsGi() && aln.GetSeq_id(0).GetGi() == gi) {
1025  target_row = 1;
1026  }
1027  if (aln.GetSeq_id(target_row).IsGi()) {
1028  TGi target_gi = aln.GetSeq_id(target_row).GetGi();
1030  aln.GetSegs().GetStd().front()->GetSeqRange(target_row);
1031  TSeqPos from = (TSeqPos)range.GetFrom();
1032  TSeqPos to = (TSeqPos)range.GetTo();
1033  if (from > to) {
1034  swap(from, to);
1035  }
1036  if (r_map.count(target_gi) == 0) {
1037  r_map[target_gi] = TSeqRange(from, to);
1038  }
1039  else {
1040  r_map[target_gi].CombineWith(TSeqRange(from, to));
1041  }
1042  }
1043  }
1044  }
1045 
1046  ITERATE(TRangeMap, iter, r_map) {
1047  CRef<CSeq_id> id(new CSeq_id);
1048  id->SetGi(iter->first);
1049  CRef<CSeq_loc> loc(new CSeq_loc(*id, iter->second.GetFrom(),
1050  iter->second.GetTo()));
1051  mapped_locs.push_back(loc);
1052  }
1053  return mapped_locs;
1054 }
1055 
1056 static const char* kLinksUrlDefault =
1057 "https://www.ncbi.nlm.nih.gov/sviewer/links.fcgi?link_name=gi_placement&report=asn";
1058 
1059 static const string& GetLinksURL()
1060 {
1061  static string LinksUrl;
1062  if (LinksUrl.empty()) {
1064  LinksUrl = reg.GetString("links", "url", kLinksUrlDefault);
1065  }
1066  return LinksUrl;
1067 }
1068 
1070 {
1071  STimeout timeout;
1072  timeout.sec = time_out_sec;
1073  timeout.usec = 0;
1074 
1075  CConn_HttpStream stream(GetLinksURL() + string("&gi=") + NStr::NumericToString(gi), flags, &timeout);
1076  unique_ptr<CObjectIStream> obj_stream(CObjectIStream::Open(eSerial_AsnText, stream));
1077  CSeq_align_set align_set;
1078  try {
1079  *obj_stream >> align_set;
1080  } catch (const CException& e) {
1081  LOG_POST(Error << "Failed to retrieve gi placements for gi|"
1082  << gi << ", error: " << e.GetMsg());
1083  return TLocVec();
1084  }
1085 
1086  return s_GetAlnMapplingLocs(align_set, gi);
1087 }
1088 
1089 bool CSeqUtils::CanHavePlacements(const objects::CSeq_id& seqid)
1090 {
1091  // GenColl accessions without NC
1092  CSeq_id::EAccessionInfo info = seqid.IdentifyAccession();
1093  return info == CSeq_id::eAcc_refseq_contig //NT
1096  || info == CSeq_id::eAcc_refseq_mrna //NM
1098  || info == CSeq_id::eAcc_refseq_ncrna //NR
1100  || info == CSeq_id::eAcc_refseq_prot //NP
1106  || (info & CSeq_id::eAcc_type_mask) == CSeq_id::e_Genbank // any GenBank
1108 }
1109 
1110 /// check that a given accession is either local or unrecognizable
1111 /// this can be important to avoid unnecessary calls to NCBI services
1112 bool CSeqUtils::isQuasiLocal(const objects::CBioseq_Handle& handle)
1113 {
1114  try {
1115  // filter out local ids
1116  // also, CSeq_id constructor will throw is accession does not look familiar, that's what we want
1117  CConstRef<CSeq_id> seqid(handle.GetSeqId());
1120  return true;
1121  }
1122  // all others are expected to be known
1123  return false;
1124  } catch(...) {
1125  // if an accession is so bad that it can't be recognized, then it's definitely something specific to the accession user
1126  return true;
1127  }
1128 }
1129 
1131 {
1132  return GetAccessionPlacementsMsec(id, scope, time_out_sec * 1000, flags);
1133 }
1134 
1136 {
1137  STimeout timeout;
1138  NcbiMsToTimeout(&timeout, time_out_msec);
1139  // filter out cases when the given id something unsuitable e.g. like a local id
1140  // generally what's bad for GenColl should be bad for getting placements
1141  if(!CanHavePlacements(id)) {
1142  return TLocVec();
1143  }
1144 
1145  do {
1146  CConn_HttpStream stream(GetLinksURL() + string("&id=") + id.GetSeqIdString(true), flags, &timeout);
1147  unique_ptr<CObjectIStream> obj_stream(CObjectIStream::Open(eSerial_AsnText, stream));
1148  CSeq_align_set align_set;
1149  *obj_stream >> align_set;
1150 
1152  if (!gi_idh)
1153  break;
1154  TGi gi(gi_idh.GetGi());
1155  return s_GetAlnMapplingLocs(align_set, gi);
1156  } while (false);
1157 
1158  return TLocVec();
1159 }
1160 
1161 CSeqUtils::TLocVec CSeqUtils::GetLocPlacements(const objects::CSeq_loc& loc, int time_out_sec)
1162 {
1163  TLocVec mapped_locs;
1164  STimeout timeout;
1165  timeout.sec = time_out_sec;
1166  timeout.usec = 0;
1167  TSignedSeqPos SourceFrom(-1);
1168  TSignedSeqPos SourceTo(-1);
1169  TGi SourceGi = INVALID_GI;
1170 
1171  if(loc.IsInt()) {
1172  SourceFrom = loc.GetInt().GetFrom();
1173  SourceTo = loc.GetInt().GetTo();
1174  if(loc.GetInt().GetId().IsGi()) {
1175  SourceGi = loc.GetInt().GetId().GetGi();
1176  } else {
1177  return mapped_locs;
1178  }
1179  } else if(loc.IsPnt()) {
1180  SourceFrom = loc.GetPnt().GetPoint();
1181  SourceTo = SourceFrom;
1182  if(loc.GetPnt().GetId().IsGi()) {
1183  SourceGi = loc.GetPnt().GetId().GetGi();
1184  } else {
1185  return mapped_locs;
1186  }
1187  } else {
1188  return mapped_locs;
1189  }
1190  if(SourceTo < SourceFrom) {
1191  swap(SourceTo, SourceFrom);
1192  }
1193  CConn_HttpStream stream(GetLinksURL() + string("&gi=") + NStr::NumericToString(SourceGi) +
1194  "&from=" + NStr::NumericToString(SourceFrom) +
1195  "&to=" + NStr::NumericToString(SourceTo),
1196  fHTTP_AutoReconnect, &timeout);
1197  unique_ptr<CObjectIStream> obj_stream(CObjectIStream::Open(eSerial_AsnText, stream));
1198  CSeq_align_set align_set;
1199  try {
1200  *obj_stream >> align_set;
1201  } catch (const CException& e) {
1202  LOG_POST(Error << "Failed to retrieve location placements for gi|"
1203  << SourceGi << ", error: " << e.GetMsg());
1204  return mapped_locs;
1205  }
1206 
1207  typedef map<TGi, TSignedSeqRange> TRangeMap;
1208  TRangeMap range_map;
1209  CTypeConstIterator<CSeq_align> aln_iter(align_set);
1210  for(; aln_iter; ++aln_iter) {
1211  const CSeq_align& aln = *aln_iter;
1212  if (aln.CheckNumRows() == 2 && aln.GetSegs().IsStd() &&
1213  aln.GetSegs().GetStd().size() == 1) {
1214  int target_row = 0;
1215  int source_row = 1;
1216  if (aln.GetSeq_id(target_row).IsGi() && aln.GetSeq_id(target_row).GetGi() == SourceGi) {
1217  target_row = 1;
1218  source_row = 0;
1219  }
1220  if (aln.GetSeq_id(target_row).IsGi()) {
1221  // check that the source range falls within the source within this alignment
1222  TSignedSeqRange i_source_range =
1223  aln.GetSegs().GetStd().front()->GetSeqRange(source_row);
1224  TSignedSeqPos i_source_from = i_source_range.GetFrom();
1225  TSignedSeqPos i_source_to = i_source_range.GetTo();
1226  if (i_source_from > i_source_to) {
1227  swap(i_source_from, i_source_to);
1228  }
1229  if(i_source_from <= SourceFrom && SourceTo <= i_source_to) {
1230  TGi target_gi = aln.GetSeq_id(target_row).GetGi();
1232  aln.GetSegs().GetStd().front()->GetSeqRange(target_row);
1233  TSeqPos from = (TSeqPos)range.GetFrom();
1234  TSeqPos to = (TSeqPos)range.GetTo();
1235  if (from > to) {
1236  swap(from, to);
1237  }
1238  range_map[target_gi] = TSignedSeqRange(from + (SourceFrom - i_source_from), from + (SourceTo - i_source_from));
1239  }
1240  }
1241  }
1242  }
1243 
1244  ITERATE (TRangeMap, iter, range_map) {
1245  CRef<CSeq_id> id(new CSeq_id);
1246  id->SetGi(iter->first);
1247  CRef<CSeq_loc> loc(new CSeq_loc(*id, iter->second.GetFrom(), iter->second.GetTo()));
1248  mapped_locs.push_back(loc);
1249  }
1250  return mapped_locs;
1251 }
1252 
1253 
1254 bool CSeqUtils::StringToRange(const string& range_str,
1255  long& from, long& to)
1256 {
1257  // Any input range string that follows this pattern will be
1258  // consisdered as a valid input:
1259  // "^[ \t]*[1-9][0-9,]*[ \t]*[kKmM]?((([ \t]*([-:]|\\.\\.)[ \t]*)|([ \t]+))[1-9][0-9,]*[ \t]*[kKmM]?)?[ \t]*$"
1260  // Some valid range examples:
1261  // - 1000
1262  // - [space]1000 -[tab]2000[tab]
1263  // - [space]1000[space]..[space]2000
1264  // - 10,000:2,000,000
1265  // - 100 k : 1m
1266  // - 1000[space]2000
1267  // - [space]1000[tab]2000
1268 
1269  from = to = 0;
1270  string str = NStr::TruncateSpaces(range_str);
1271  size_t len = str.length();
1272 
1273  if (len == 0) return false;
1274 
1275  // The loop tries to accomplish the followings:
1276  // - remove ','
1277  // - replace 'k' or 'K' with '000'
1278  // - replace 'm' or 'M' with '000000'
1279  // - remove white paces (and tabs) before ',', 'k', 'K', 'm', and 'M'
1280  // - remove white space (and tabs) around any separator (':', '..', and '-')
1281  // - replace any separator with '-'
1282  // - replace spaces between two numbers with '-'
1283  string out_str;
1284  bool space_before_this = false;
1285  bool separator_before_this = false;
1286  for (size_t i = 0; i < len; ++i) {
1287  switch (str[i])
1288  {
1289  case ' ':
1290  case '\t':
1291  // ignore space after a separator
1292  if ( !separator_before_this )
1293  space_before_this = true;
1294  break;
1295  case ',':
1296  if (separator_before_this) return false; // invalid
1297  // ignore space before ','
1298  space_before_this = false;
1299  break;
1300  case 'k':
1301  case 'K':
1302  if (separator_before_this) return false; // invalid
1303  // ignore space before 'k' and 'K'
1304  space_before_this = false;
1305  // replace it with "000'
1306  out_str.append("000");
1307  break;
1308  case 'm':
1309  case 'M':
1310  if (separator_before_this) return false; // invalid
1311  // ignore space before 'm' and 'M'
1312  space_before_this = false;
1313  // replace it with "0000000'
1314  out_str.append("000000");
1315  break;
1316  case '.':
1317  case '-':
1318  case ':':
1319  // ignore space before a separator
1320  space_before_this = false;
1321  separator_before_this = true;
1322  break;
1323  default:
1324  if (separator_before_this) {
1325  out_str.append("-");
1326  separator_before_this = false;
1327  } else if (space_before_this) {
1328  out_str.append("-");
1329  space_before_this = false;
1330  }
1331  out_str.append(1, str[i]);
1332  break;
1333  }
1334  }
1335 
1336  typedef vector<string> TPositions;
1337  TPositions pos;
1338  NStr::Split(out_str, "-", pos);
1339  if (pos.size() < 3) {
1340  try {
1341  bool is_from = true;
1342  NON_CONST_ITERATE (TPositions, iter, pos) {
1343  NStr::TruncateSpaces(*iter);
1344  if (iter->empty()) continue;
1345  if (is_from) {
1346  to = from = NStr::StringToLong(*iter);
1347  is_from = false;
1348  } else {
1349  to = NStr::StringToLong(*iter);
1350  }
1351  }
1352  } catch (const CException&) {
1353  return false;
1354  }
1355  }
1356  return true;
1357 }
1358 
1359 static const string kTaxDb = "taxonomy";
1360 static const string kNucDb = "nucleotide";
1361 static const string kAssmDb = "assembly";
1362 static const int kRetMax = 5000;
1363 
1364 
1366 {
1367  gc_ids.clear();
1368 
1369  TEntrezIds uids_from;
1370 #ifdef NCBI_STRICT_GI
1371  uids_from.push_back(GI_TO(TEntrezId, gi));
1372 #else
1373  uids_from.push_back(gi);
1374 #endif
1375  TEntrezIds uids_to;
1376 
1377  try {
1378  // prepare eLink request that will get entrez-id (not exactly the same as assembly ids)
1379  // chromosome is indicated by score of "2"
1380  // (magic string indicated in e-mail communication from Avi Kimchi on 04/01/2013)
1381  ELinkQuery(kNucDb, kAssmDb, uids_from, uids_to, "neighbor_score", "//Link[Score = \"2\"]/Id/text()");
1382  }
1383  catch (const CException& e) {
1384  LOG_POST(Error << "Failed to get assembly entrez ids for gi: " << gi << ". Error: " << e.GetMsg());
1385  }
1386 
1387  if (uids_to.empty())
1388  return;
1389 
1390  CGuiEutilsClient ecli;
1391  ecli.SetMaxReturn(kRetMax);
1392  xml::document docsums;
1393 
1394  try {
1395  // from Entrez ids, get true assembly ids
1396  ecli.Summary(kAssmDb, uids_to, docsums);
1397  } catch (const CException& e) {
1398  LOG_POST(Error << "Failed to get assembly ids from entrez ids: " << CreateIdStr(uids_to) << ". Error: " << e.GetMsg());
1399  }
1400 
1402  xml::node_set nodes ( docsums.get_root_node().run_xpath_query("//RsUid/text() | //GbUid/text()") );
1403  for (itNode = nodes.begin(); itNode != nodes.end(); ++itNode) {
1404  string id(itNode->get_content());
1405  if (id.empty())
1406  continue;
1407  gc_ids.push_back(NStr::StringToNumeric<TEntrezId>(id));
1408  }
1409 }
1410 
1412 {
1413  TEntrezIds gc_ids;
1414  GetAssmIds_GIChr(gc_ids, gi);
1417 
1418  ITERATE(TEntrezIds, iGCId, gc_ids) {
1419  try {
1420  CRef<CGC_Assembly> assm(gcs->GetAssembly(ENTREZ_ID_TO(int,*iGCId), "Gbench_chrs"));
1421 
1422  CGC_Assembly::TSequenceList sequences;
1423  assm->Find(idh, sequences);
1424 
1425  ITERATE(CGC_Assembly::TSequenceList, iSequences, sequences) {
1426  CConstRef<CGC_Replicon> replicon((*iSequences)->GetReplicon());
1427  if(replicon->IsSetName()) {
1428  return replicon->GetName();
1429  }
1430  }
1431  } catch(...) {
1432  LOG_POST(Error << "Call to GenColl timed out when getting assembly: " << *iGCId);
1433  }
1434  }
1435  return "";
1436 }
1437 
1438 
1439 // retrieve or cache list of ids for assembly accessions
1440 static const TAssemblySeqIds& s_GetAssemblySeqIds(const string& assm_acc)
1441 {
1442  {{
1444  auto it = s_AssemblySeqIdCache.find(assm_acc);
1445  if (it != s_AssemblySeqIdCache.end()) {
1446  return it->second;
1447  }
1448  }}
1449 
1450  // Warning: this is potentially a very slow call
1451  // no caching because we are caching molecule list instead as it's much smaller
1452  // this call is only used in CSeqUtils::isTopLevel() which is only used in SViewer so it should never
1453  // use seqconfig service to obtain assemblies
1454  CRef<CGC_Assembly> assm = CGencollSvc::GetInstance()->GetGCAssembly(assm_acc, false, "Gbench");
1455  if(assm.IsNull())
1456  NCBI_THROW(CException, eUnknown, "Failed to retrieve Assembly for '" + assm_acc + "'");
1457  CGC_Assembly::TSequenceList top_level_seqs;
1458  assm->GetMolecules(top_level_seqs, CGC_Assembly::eTopLevel);
1459  TAssemblySeqIds ids;
1460  for (auto it : top_level_seqs) {
1461  const CSeq_id& tls_seq_id = it->GetSeq_id();
1462  ids.emplace_back(&tls_seq_id);
1463  }
1465  {{
1466  auto it = s_AssemblySeqIdCache.find(assm_acc);
1467  if (it != s_AssemblySeqIdCache.end())
1468  return it->second;
1469  }}
1470  s_AssemblySeqIdCache.emplace(assm_acc, ids);
1471  if (assm_acc != assm->GetAccession())
1472  s_AssemblySeqIdCache.emplace(assm->GetAccession(), ids);
1473  return s_AssemblySeqIdCache[assm_acc];
1474 }
1475 
1476 bool CSeqUtils::isTopLevel(const CSeq_id& seq_id, const string& assm_acc, CScope* scope)
1477 {
1478  CBioseq_Handle handle(scope->GetBioseqHandle(seq_id));
1479  // filter out cases when the given id something unsuitable e.g. like a local id
1480  if(assm_acc.empty() || !CGencollSvc::isGenCollSequence(handle)) {
1481  return false;
1482  }
1483  {
1485  // NCs are top level
1487  return true;
1488  }
1489 
1490  string sNormalizedSeqId(seq_id.AsFastaString() + ":" + assm_acc);
1491  {{
1493  if(m_TopLevels.find(sNormalizedSeqId) != m_TopLevels.end()) {
1494  // cerr << "found " << sNormalizedSeqId << " in top level cache with value: " << m_TopLevels[sNormalizedSeqId] << endl;
1495  return m_TopLevels[sNormalizedSeqId];
1496  }
1497  }}
1498  try {
1499  auto assembly_ids = s_GetAssemblySeqIds(assm_acc);
1500  if (scope && !assembly_ids.empty() && assembly_ids.front()->IsGi()) {
1501  auto gi = sequence::GetGiForId(seq_id, *scope);
1502  if (gi > ZERO_GI) {
1503  bool all_checked = true;
1504  for (const auto& id : assembly_ids) {
1505  if (!id->IsGi()) {
1506  // not expected to happen
1507  // but we'll make sure that we check all the cases
1508  all_checked = false;
1509  continue;
1510  }
1511  if (id->GetGi() == gi) {
1513  m_TopLevels[sNormalizedSeqId] = true;
1514  return true;
1515  }
1516  }
1517  if (all_checked) {
1518  // all molecules are gi and they don't match our gi
1520  m_TopLevels[sNormalizedSeqId] = false;
1521  return false;
1522  }
1523  }
1524  }
1525  // Here if our id or some of the assembly seqeunces are gi-less
1526  for (const auto& id : assembly_ids) {
1527  if(Match(seq_id, *id, scope)) {
1529  m_TopLevels[sNormalizedSeqId] = true;
1530  return true;
1531  }
1532  }
1533  // nothing found
1535  m_TopLevels[sNormalizedSeqId] = false;
1536 
1537  } catch (exception& e) {
1538  LOG_POST(Error << "Call to GenColl timed out when getting assembly: " << assm_acc << ", " << e.what());
1539  }
1540  return false;
1541 }
1542 
1543 
1544 string CSeqUtils::GetChrId(const string& id_str, objects::CScope& scope)
1545 {
1546  TGi gi(ZERO_GI);
1547  if(!id_str.empty() ) {
1548  CRef<objects::CSeq_id> seq_id(new objects::CSeq_id);
1549  seq_id->Set(id_str);
1550  objects::CBioseq_Handle bsh = scope.GetBioseqHandle(*seq_id);
1551  objects::CSeq_id_Handle shdl = bsh.GetAccessSeq_id_Handle();
1552  shdl = sequence::GetId(shdl, scope, sequence::eGetId_ForceGi);
1553  if (shdl) {
1554  gi = shdl.GetGi();
1555  }
1556  }
1557  return CSeqUtils::GetChrGI(gi);
1558 }
1559 
1560 
1562 {
1563  accs.clear();
1564  TEntrezIds gc_ids;
1565  GetAssmIds_GI(gc_ids, gi);
1566 
1567  if(gc_ids.empty())
1568  return;
1569 
1570  xml::document docsums;
1571  CGuiEutilsClient ecli;
1572  ecli.SetMaxReturn(kRetMax);
1573 
1574  try {
1575  ecli.Summary(kAssmDb, gc_ids, docsums);
1576  }
1577  catch (const CException& e) {
1578  LOG_POST(Error << "Failed to get summary for the following assemblies: " << CreateIdStr(gc_ids) << ". Error: " << e.GetMsg());
1579  return;
1580  }
1581 
1582  // Using "gcassembly" Entrez, you will not get the the GB assemblies
1583  // for some cases (those paired to RS assemblies) as separate entries
1584  // from an Entrez search. But if you are looking at the Entrez Docsums,
1585  // there is a field "GbUid" that tells you the release id of the
1586  // corresponding GB, and fields <Synonym>/<Genbank> and <Synonym>/<RefSeq>
1587  // will tell you the accessions of both of them. (JIRA: GCOL-1493)
1588  xml::node_set doc_sums ( docsums.get_root_node().run_xpath_query("//DocumentSummary[contains(AssemblyAccession/text(),'GCF_') and GbUid/text()!=\"\"]/Synonym/Genbank/text()") );
1590  for (itAcc = doc_sums.begin(); itAcc != doc_sums.end(); ++itAcc) {
1591  string acc(itAcc->get_content());
1592  if (acc.empty())
1593  continue;
1594  accs.insert(acc);
1595  }
1596 }
1597 
1598 template<class T1, class T2>
1599 static void s_ELinkQuery(const string &db_from, const string &db_to, const vector<T1> &uids_from, vector<T2> &uids_to, const string &cmd, const string &xpath)
1600 {
1601  xml::document xmldoc;
1602  CSeqUtils::ELinkQuery(db_from, db_to, uids_from, xmldoc, cmd);
1603 
1604  xml::node_set links ( xmldoc.get_root_node().run_xpath_query(xpath.c_str()) );
1606  for (itLink = links.begin(); itLink != links.end(); ++itLink) {
1607  string id(itLink->get_content());
1608  if (id.empty())
1609  continue;
1610  uids_to.push_back(NStr::StringToNumeric<T2>(id));
1611  }
1612 }
1613 
1614 void CSeqUtils::ELinkQuery(const string& db_from, const string& db_to, const TEntrezIds& uids_from, TEntrezIds& uids_to, const string& cmd, const string& xpath)
1615 {
1616  s_ELinkQuery(db_from, db_to, uids_from, uids_to, cmd, xpath);
1617 }
1618 
1619 void CSeqUtils::ELinkQuery(const string &db_from, const string &db_to, const TSeqIdHandles &uids_from, TEntrezIds &uids_to, const string &cmd, const string &xpath)
1620 {
1621  s_ELinkQuery(db_from, db_to, uids_from, uids_to, cmd, xpath);
1622 }
1623 
1624 
1625 template<class T>
1626 static void s_ELinkQuery(const string &db_from, const string &db_to, const vector<T> &uids_from, xml::document& linkset, const string &cmd)
1627 {
1628  CGuiEutilsClient ecli;
1629  ecli.SetMaxReturn(kRetMax);
1631 
1632  ecli.Link(db_from, db_to, uids_from, xml, cmd);
1633 
1634  stringbuf sb;
1635  xml >> &sb;
1636  string docstr(sb.str());
1637  xml::document doc(docstr.data(), docstr.size(), NULL);
1638  linkset.swap(doc);
1639 }
1640 
1641 void CSeqUtils::ELinkQuery(const string &db_from, const string &db_to, const TEntrezIds &uids_from, xml::document& linkset, const string &cmd)
1642 {
1643  s_ELinkQuery(db_from, db_to, uids_from, linkset, cmd);
1644 }
1645 
1646 void CSeqUtils::ELinkQuery(const string &db_from, const string &db_to, const TSeqIdHandles &uids_from, xml::document& linkset, const string &cmd)
1647 {
1648  s_ELinkQuery(db_from, db_to, uids_from, linkset, cmd);
1649 }
1650 
1651 
1652 template<class T>
1653 static void s_ESearchQuery(const string &db, const string &term, vector<T> &uids, size_t &count, const int ret_max, const string &xpath)
1654 {
1655  CGuiEutilsClient ecli;
1656  ecli.SetMaxReturn(ret_max);
1658 
1659  ecli.Search(db, term, xml);
1660 
1661  stringbuf sb;
1662  xml >> &sb;
1663  string docstr(sb.str());
1664 
1665  xml::document xmldoc(docstr.data(), docstr.size(), NULL);
1666  xml::node_set links(xmldoc.get_root_node().run_xpath_query(xpath.c_str()));
1668  for (itLink = links.begin(); itLink != links.end(); ++itLink) {
1669  string id(itLink->get_content());
1670  if (id.empty())
1671  continue;
1672  uids.push_back(NStr::StringToNumeric<T>(id));
1673  }
1674  string countStr = CSeqUtils::GetXmlChildNodeValue(xmldoc.get_root_node(), "Count");
1675  if (!countStr.empty())
1676  count = NStr::StringToSizet(countStr);
1677  else
1678  count = uids.size();
1679 }
1680 
1681 void CSeqUtils::ESearchQuery(const string& db, const string& term, TEntrezIds& uids, size_t& count, const int ret_max, const string& xpath)
1682 {
1683  s_ESearchQuery(db, term, uids, count, ret_max, xpath);
1684 }
1685 
1686 template<class T>
1687 static void s_ESearchQuery(const string &db, const string &term, const string &web_env, const string &query_key, vector<T> &uids, size_t &count, int retstart, const string &xpath)
1688 {
1689  xml::document xmldoc;
1690  CSeqUtils::ESearchQuery(db, term, web_env, query_key, xmldoc, retstart);
1691 
1692  xml::node_set links ( xmldoc.get_root_node().run_xpath_query(xpath.c_str()) );
1694  for (itLink = links.begin(); itLink != links.end(); ++itLink) {
1695  string id(itLink->get_content());
1696  if (id.empty())
1697  continue;
1698  uids.push_back(NStr::StringToNumeric<T>(id));
1699  }
1700  string countStr = CSeqUtils::GetXmlChildNodeValue(xmldoc.get_root_node(), "Count");
1701  if (!countStr.empty())
1702  count = NStr::StringToSizet(countStr);
1703  else
1704  count = uids.size();
1705 }
1706 
1707 void CSeqUtils::ESearchQuery(const string &db, const string &term, const string &web_env, const string &query_key, TEntrezIds &uids, size_t &count, int retstart, const string &xpath)
1708 {
1709  s_ESearchQuery(db, term, web_env, query_key, uids, count, retstart, xpath);
1710 }
1711 
1712 void CSeqUtils::ESearchQuery(const string &db, const string &term, const string &web_env, const string &query_key, xml::document &searchset, int retstart)
1713 {
1714  CGuiEutilsClient ecli;
1715  ecli.SetMaxReturn(kRetMax);
1717 
1718  ecli.SearchHistory(db, term, web_env, NStr::StringToNumeric<Int8>(query_key), retstart, xml);
1719 
1720  stringbuf sb;
1721  xml >> &sb;
1722  string docstr(sb.str());
1723 
1724  xml::document doc(docstr.data(), docstr.size(), NULL);
1725  searchset.swap(doc);
1726 }
1727 
1729 {
1730  xml::node::const_iterator itNode = parent.find(name.c_str());
1731  do {
1732  if (parent.end() == itNode)
1733  break;
1734 
1735  const char* value = itNode->get_content();
1736 
1737  if (!value)
1738  break;
1739 
1740  return string(value);
1741  }
1742  while(false);
1743  return string();
1744 }
1745 
1747 {
1748  gc_ids.clear();
1749 
1750  TEntrezIds uids_from;
1751 #ifdef NCBI_STRICT_GI
1752  uids_from.push_back(GI_TO(TEntrezId, gi));
1753 #else
1754  uids_from.push_back(gi);
1755 #endif
1756 
1757  CGuiEutilsClient ecli;
1758  ecli.SetMaxReturn(kRetMax);
1759 
1760  try {
1761  ELinkQuery(kNucDb, kAssmDb, uids_from, gc_ids);
1762  }
1763  catch (const CException& e) {
1764  LOG_POST(Error << "Failed to get assembly ids for gi: " << gi << ". Error: " << e.GetMsg());
1765  }
1766 }
1767 
1768 /// For CDS and RNA feature mapping information
1769 typedef vector< CRef<CSeq_loc> > TMappedLocs;
1770 
1772  const CSeq_loc& feat_loc,
1773  const CMappedFeat& feat,
1774  const CBioseq_Handle& handle)
1775 {
1776  TMappedLocs locs;
1777  CConstRef<CSeq_align> align_ref;
1778  CScope& scope = handle.GetScope();
1779  TSeqRange range = feat_loc.GetTotalRange();
1780 
1781  if (!feat.IsSetProduct()) return locs;
1782  const CSeq_id& product_id = *feat.GetProduct().GetId();
1783 
1784  // check if there is any alignment associated with the
1785  // product sequence
1786  SAnnotSelector sel;
1787  sel.SetAdaptiveDepth(true);
1788  sel.SetExactDepth(false);
1789  sel.SetResolveAll();
1790  sel.SetResolveDepth(1);
1791  sel.ExcludeNamedAnnots("SNP");
1792  sel.ExcludeNamedAnnots("STS");
1793  sel.ExcludeNamedAnnots("CDD");
1794  CSeq_annot_Handle annot = feat.GetAnnot();
1795  if (annot && annot.IsNamed()) {
1796  const string& annot_str = annot.GetName();
1797  sel.AddNamedAnnots(annot_str);
1798  if (NStr::StartsWith(annot_str, "NA0")) {
1799  sel.IncludeNamedAnnotAccession(annot_str);
1800  }
1801  }
1802 
1803  CConstRef<CSeq_loc> aln_loc(
1804  handle.GetRangeSeq_loc(range.GetFrom(), range.GetTo()) );
1805  CAlign_CI align_iter(scope, *aln_loc, sel);
1806 
1807  while (align_iter && !align_ref) {
1808  // find the first seq-align that matches the product sequence
1809  const CSeq_align& align = *align_iter;
1810  CSeq_align::TDim num_row = align.CheckNumRows();
1811  if (num_row != 2) continue;
1812  for (CSeq_align::TDim row = 0; row < num_row; ++row) {
1813  if (product_id.Match(align.GetSeq_id(row))) {
1814  if (align.GetSegs().IsSpliced()) {
1815  // CSeq_loc_Mapper doesn't map location correctly for spliced-seg.
1816  // This is a work-around to convert spliced-seg to denseg.
1817  align_ref = ConvertSeq_align(align, CSeq_align::TSegs::e_Denseg);
1818  } else {
1819  align_ref.Reset(&align);
1820  }
1821  break;
1822  }
1823  }
1824  ++align_iter;
1825  }
1826 
1827  CRef<CSeq_loc_Mapper> mapper;
1828  CRef<CSeq_loc_Mapper> back_mapper;
1829 
1830  // No alignment found
1831  if (align_ref) {
1832  mapper.Reset(new CSeq_loc_Mapper(*align_ref, product_id, &scope));
1833  back_mapper.Reset(new CSeq_loc_Mapper(*align_ref, *feat_loc.GetId(), &scope));
1834  } else {
1835  const CSeq_feat& mapped_feat = feat.GetMappedFeature();
1836  mapper.Reset(new CSeq_loc_Mapper(mapped_feat, CSeq_loc_Mapper::eLocationToProduct, &scope));
1837  back_mapper.Reset(new CSeq_loc_Mapper(mapped_feat, CSeq_loc_Mapper::eProductToLocation, &scope));
1838  }
1839 
1840  CRef<CSeq_loc> prod_loc = mapper->Map(feat_loc);
1841  CRef<CSeq_loc> gen_loc = back_mapper->Map(*prod_loc);
1842  locs.push_back(prod_loc);
1843  locs.push_back(gen_loc);
1844 
1845  return locs;
1846 }
1847 
1848 #undef MAP_INFO_DEBUG
1849 
1850 /// Helper function to convert two mapped locations into
1851 /// an interval-to-interval mapping structure
1853  const CSeq_loc& prod_loc,
1854  const CSeq_loc& gen_loc,
1856 {
1857  // We assume both contains exactly the same number of intervals, and
1858  // each has the same length
1859  CSeq_loc_CI it1(prod_loc);
1860  CSeq_loc_CI it2(gen_loc);
1861  CRef<CSeq_id> gen_id(new CSeq_id);
1862  gen_id->Assign(*gen_loc.GetId());
1863  CRef<CSeq_id> prod_id(new CSeq_id);
1864  prod_id->Assign(*prod_loc.GetId());
1865  while (it1 && it2) {
1866  TSeqRange r1 = it1.GetRange();
1867  TSeqRange r2 = it2.GetRange();
1868  if (it1.IsEmpty() || it2.IsEmpty() ||
1869  r1.GetLength() != r2.GetLength()) {
1870  info.clear();
1871  return;
1872  }
1873 
1874  CRef<CSeq_interval> int1;
1875  CRef<CSeq_interval> int2;
1876  int1.Reset(new CSeq_interval);
1877  int1->SetFrom(r1.GetFrom());
1878  int1->SetTo(r1.GetTo());
1879  int1->SetId(*prod_id);
1880  if (it1.IsSetStrand()) {
1881  int1->SetStrand(it1.GetStrand());
1882  }
1883  int2.Reset(new CSeq_interval);
1884  int2->SetFrom(r2.GetFrom());
1885  int2->SetTo(r2.GetTo());
1886  int2->SetId(*gen_id);
1887  if (it2.IsSetStrand()) {
1888  int2->SetStrand(it2.GetStrand());
1889  }
1890  info.emplace_back(int1, int2);
1891  ++it1; ++it2;
1892  }
1893 
1894  // Two locations don't match
1895  if (it1 || it2) {
1896  info.clear();
1897  }
1898 }
1899 
1900 
1902  const objects::CSeq_loc& feat_loc,
1903  const CMappedFeat& feat,
1904  const objects::CBioseq_Handle& handle)
1905 {
1907  TMappedLocs locs = s_GetRnaMappingLocs(feat_loc, feat, handle);
1908  if (locs.size() == 2) {
1909  s_CreateMappingInfo(*locs[0], *locs[1], info);
1910  }
1911  return info;
1912 }
1913 
1914 
1915 static
1916 void s_AdjustToAnnotatedCDS(const CMappedFeat& mapped_cds_feat, CScope& scope, CSeqUtils::TMappingInfo& cds_map_info)
1917 {
1918  // At this point the mapping is generated via mRNA segments
1919  // This does not account for CDS ribosomal shifts
1920  // If now we remap genomic location via CDS feature
1921  // to product and back to location
1922  // the mapped genomic location will be split in two if there is a ribosomal slippage
1923  // in this case we split the mapping in two as well
1924 
1925 
1926  CSeq_loc_Mapper map2prod(mapped_cds_feat.GetMappedFeature(), CSeq_loc_Mapper::eLocationToProduct, &scope);
1927  CSeq_loc_Mapper map2loc(mapped_cds_feat.GetMappedFeature(), CSeq_loc_Mapper::eProductToLocation, &scope);
1928 
1929 #ifdef MAP_INFO_DEBUG
1930  cout << "\n===== Original Map ==========\n";
1931  for (const auto& it : cds_map_info) {
1932  auto gen_int = get<1>(it);
1933  auto prod_int = get<0>(it);
1934  cout << gen_int->GetFrom() << ".." << gen_int->GetTo() << "->" << prod_int->GetFrom() << ".." << prod_int->GetTo() << endl;
1935  }
1936  cout << "\n========================\n";
1937 #endif
1938 
1939  // we have to keep product coordinaites consecuitive
1940  // so we use curr_prod_pos to keep track of the current prod coordinate
1941  // but product might be annotated with gaps
1942  // last_prod_pos keeps track of teh last annotated pos to account for these gaps
1943  int curr_prod_pos = -1;
1944  int last_prod_pos = -1;
1945 
1946  auto cds_map_it = cds_map_info.begin();
1947  while (cds_map_it != cds_map_info.end()) {
1948  auto gen_int = cds_map_it->second;
1949  // remove odd mappings with length < 3
1950  if ((gen_int->GetTo() - gen_int->GetFrom()) + 1 < 3) {
1951  cds_map_it = cds_map_info.erase(cds_map_it);
1952  continue;
1953  }
1954 
1955  auto prod_loc = Ref(new CSeq_loc(gen_int->SetId(), gen_int->GetFrom(), gen_int->GetTo(), gen_int->GetStrand()));
1956  auto mapped_prod_loc = map2prod.Map(*prod_loc);
1957  auto mapped_gen_loc = map2loc.Map(*mapped_prod_loc);
1958 
1959 #ifdef MAP_INFO_DEBUG
1960 
1961  cout << MSerial_AsnText << *prod_int << endl;
1962  cout << MSerial_AsnText << *loc << "\nMapped to\n";
1963  cout << MSerial_AsnText << *mapped_loc << "\n";
1964  cout << "Remapped to\n";
1965  cout << MSerial_AsnText << *mapped_loc2 << endl;
1966 #endif
1967  CSeq_loc_CI lit(*mapped_gen_loc);
1968  auto sz = lit.GetSize();
1969  list<TSeqRange> rngs;
1970  for (size_t i = 0; i < sz; ++i, ++lit) {
1971  const auto& r = lit.GetRange();
1972  // remove the first and last mapped segments of length < 3
1973  // as these are codon carryovers from prev and next segments
1974  if (i == 0 && r.GetLength() < 3) {
1975  continue;
1976  }
1977  if (i == sz - 1 && r.GetLength() < 3)
1978  break;
1979  rngs.push_back(r);
1980  }
1981  if (!rngs.empty()) {
1982  bool reverse = gen_int->CanGetStrand() && gen_int->GetStrand() == eNa_strand_minus;
1983 
1984  auto prod_int = cds_map_it->first;
1985  auto r_it = rngs.begin();
1986 
1987  if (reverse) {
1988  gen_int->SetFrom(max<int>(r_it->GetFrom(), gen_int->GetFrom()));
1989  gen_int->SetTo(gen_int->GetTo());
1990  }
1991  else {
1992  gen_int->SetFrom(gen_int->GetFrom());
1993  gen_int->SetTo(min<int>(r_it->GetTo(), gen_int->GetTo()));
1994  }
1995  if (curr_prod_pos == -1) {
1996  curr_prod_pos = prod_int->SetFrom();
1997  }
1998  else if (last_prod_pos != -1) {
1999  if (prod_int->SetFrom() - last_prod_pos > 0) {
2000  curr_prod_pos += (prod_int->SetFrom() - last_prod_pos);
2001  }
2002  }
2003  last_prod_pos = prod_int->GetTo() + 1;
2004  prod_int->SetFrom(curr_prod_pos);
2005  curr_prod_pos += (gen_int->GetTo() - gen_int->GetFrom());
2006  prod_int->SetTo(curr_prod_pos);
2007  ++curr_prod_pos;
2008  ++r_it;
2009  while (r_it != rngs.end()) {
2010  // the original genomic location was split in two after mapping
2011  // the previos segment's to is already adjusted
2012  // so we insert a new mapiing here
2013  auto from = curr_prod_pos;
2014  curr_prod_pos += r_it->GetLength() - 1;
2015  auto int1 = Ref(new CSeq_interval(prod_int->SetId(), from, curr_prod_pos, eNa_strand_plus));
2016  auto int2 = Ref(new CSeq_interval(gen_int->SetId(), r_it->GetFrom(), r_it->GetTo(), reverse ? eNa_strand_minus : eNa_strand_plus));
2017  cds_map_it = cds_map_info.emplace(++cds_map_it, int1, int2);
2018  ++curr_prod_pos;
2019  ++r_it;
2020  }
2021  }
2022  ++cds_map_it;
2023  }
2024 
2025 #ifdef MAP_INFO_DEBUG
2026 
2027  cout << "\n=====Final Map ==========\n";
2028  for (const auto& it : cds_map_info) {
2029  auto& gen_int = get<1>(it);
2030  auto& prod_int = get<0>(it);
2031  cout << gen_int->GetFrom() << ".." << gen_int->GetTo() << "->" << prod_int->GetFrom() << ".." << prod_int->GetTo() << endl;
2032  }
2033  cout << "\n========================\n";
2034 #endif
2035 }
2036 
2038  const TMappingInfo& mapping_info,
2039  const CSeq_feat& rna_feat,
2040  const CMappedFeat& mapped_cds_feat,
2041  const CSeq_loc& feat_loc,
2042  CScope& scope,
2043  const int feat_offset)
2044 {
2045  const CSeq_id& product_id = *mapped_cds_feat.GetProduct().GetId();
2046  const CSeq_id& rna_product_id = *rna_feat.GetProduct().GetId();
2047  /// The start offset between CDS product sequence
2048  /// and its parent product sequence
2049  int cds_offset = -1;
2050  {
2051  // try if the parent RNA product sequence contain a
2052  // CDS feature with the same product sequence as 'product_id'
2053  CBioseq_Handle rna_bsh = scope.GetBioseqHandle(rna_product_id);
2054  if (rna_bsh) {
2055  SAnnotSelector sel;
2056  sel.SetAdaptiveDepth(true);
2057  sel.SetExactDepth(false);
2058  sel.SetResolveAll();
2059  sel.SetResolveDepth(1);
2060  sel.ExcludeNamedAnnots("SNP");
2061  sel.ExcludeNamedAnnots("STS");
2063  CFeat_CI feat_iter(rna_bsh, TSeqRange::GetWhole(), sel);
2064  for (; feat_iter; ++feat_iter) {
2065  const CSeq_feat* cds_feat = &feat_iter->GetMappedFeature();
2066  if (cds_feat->CanGetProduct() &&
2067  product_id.Match(*cds_feat->GetProduct().GetId())) {
2068  cds_offset = (int)cds_feat->GetLocation().GetTotalRange().GetFrom();
2069  break;
2070  }
2071  }
2072  }
2073  }
2074 
2075  CRef<CSeq_id> gen_id(new CSeq_id);
2076  gen_id->Assign(*feat_loc.GetId());
2077  CRef<CSeq_id> prod_id(new CSeq_id);
2078  prod_id->Assign(product_id);
2079 
2080  /// CDS biological range on the genomic sequence
2081  TSeqPos bio_start = feat_loc.GetStart(eExtreme_Biological);
2082  TSeqPos bio_stop = feat_loc.GetStop(eExtreme_Biological);
2083 
2084  /// truncate the rna mapping info using the CDS
2085  /// feature biological start and stop, and create
2086  /// the mapping info for the CDS feature by applying
2087  /// the cds-to-rna shift.
2088  TMappingInfo cds_map_info;
2089  TMappingInfo::const_iterator iter = mapping_info.begin();
2090  bool done = false;
2091  while (iter != mapping_info.end() && !done) {
2092  auto& gen_int = iter->second;
2093  auto& prod_int = iter->first;
2094 
2095  TSeqPos gen_from = gen_int->GetFrom();
2096  TSeqPos gen_to = gen_int->GetTo();
2097  TSeqPos prod_from = prod_int->GetFrom();
2098  TSeqPos prod_to = prod_int->GetTo();
2099  bool reverse = gen_int->CanGetStrand() && gen_int->GetStrand() == eNa_strand_minus;
2100 
2101  TSeqRange r1, r2;
2102  if (cds_map_info.empty()) {
2103  if (bio_start >= gen_from && bio_start <= gen_to) {
2104  // find the first interval intersecting with CDS location
2105  TSeqPos off1 = reverse ? gen_to - bio_start : bio_start - gen_from;
2106  r1.Set(prod_from + off1, prod_to);
2107  if (reverse) {
2108  r2.Set(gen_from, gen_to - off1);
2109  } else {
2110  r2.Set(gen_from + off1, gen_to);
2111  }
2112  if (cds_offset < 0 || (TSeqPos)cds_offset > r1.GetFrom()) {
2113  cds_offset = int(r1.GetFrom());
2114  }
2115  } // else, it is outside of the CDS range, skip it
2116  } else {
2117  r1.Set(prod_from, prod_to);
2118  r2.Set(gen_from, gen_to);
2119  }
2120 
2121  // check if it intersects with the biological stop position
2122  if (!r1.Empty() && !r2.Empty()) {
2123  if (bio_stop >= gen_from && bio_stop <= gen_to) {
2124  // find the last interval intersection with CDS location
2125  TSeqPos off2 = reverse ? bio_stop - gen_from : gen_to - bio_stop;
2126  r1.SetTo(r1.GetTo() - off2);
2127  if (reverse) r2.SetFrom(r2.GetFrom() + off2);
2128  else r2.SetTo(r2.GetTo() - off2);
2129 
2130  // set termination flag
2131  done = true;
2132  }
2133 
2135  int1->SetFrom(r1.GetFrom() - cds_offset);
2136  int1->SetTo(r1.GetTo() - cds_offset);
2137  int1->SetId(*prod_id);
2138  int1->SetStrand(eNa_strand_plus);
2139 
2141  int2->SetFrom(r2.GetFrom());
2142  int2->SetTo(r2.GetTo());
2143  int2->SetId(*gen_id);
2144  int2->SetStrand(reverse ? eNa_strand_minus : eNa_strand_plus);
2145 
2146  // create intervals and push them to the info map
2147  cds_map_info.emplace_back(int1, int2);
2148  }
2149 
2150  ++iter;
2151  }
2152 
2153  // Final step to determine if the mapping info is necessary for
2154  // the given cds feature.
2155  if ( !cds_map_info.empty() ) {
2156  // Create the mapped seq-loc on genomic and product sequence
2157  CRef<CSeq_loc> gen_loc(new CSeq_loc);
2158  CRef<CSeq_loc> prod_loc(new CSeq_loc);
2159  ITERATE (TMappingInfo, iter, cds_map_info) {
2160  prod_loc->SetPacked_int().Set().push_back(iter->first);
2161  gen_loc->SetPacked_int().Set().push_back(iter->second);
2162  }
2164  if (prod_loc->GetStart(eExtreme_Biological) == 0 &&
2165  prod_loc->GetPacked_int().Get().size() == 1 &&
2166  gen_loc->CompareSubLoc(feat_loc, eNa_strand_plus) == 0) {
2167  // All of above confidtions are met. the mapping info
2168  // is not necessary
2169  cds_map_info.clear();
2170  }
2171  }
2172 
2173  if (mapped_cds_feat.GetMappedFeature().IsSetExcept()) {
2174  // At this point the mapping is generated via mRNA segments
2175  // This does not account for CDS ribosomal shifts
2176  // If now we remap genomic location via CDS feature
2177  // to product and back to location
2178  // the mapped genomic location will be split in two if there is a ribosomal slippage
2179  // in this case we split the mapping in two as well
2180  s_AdjustToAnnotatedCDS(mapped_cds_feat, scope, cds_map_info);
2181  }
2182  if (!cds_map_info.empty() && (0 != feat_offset)) {
2183  if (feat_offset > 0) {
2184  auto& prod_int = cds_map_info.begin()->first;
2185  prod_int->SetFrom(prod_int->GetFrom() + feat_offset);
2186  auto& gen_int = cds_map_info.begin()->second;
2187  gen_int->SetFrom(gen_int->GetFrom() + feat_offset);
2188  }
2189  else {
2190  auto& prod_int = cds_map_info.begin()->first;
2191  prod_int->SetFrom(prod_int->GetFrom() - feat_offset);
2192  auto& gen_int = cds_map_info.begin()->second;
2193  gen_int->SetTo(gen_int->GetTo() + feat_offset);
2194  }
2195  }
2196  return cds_map_info;
2197 }
2198 
2199 
2201  const objects::CSeq_loc &feat_loc,
2202  const objects::CSeq_id &product_id,
2203  const int feat_offset
2204  )
2205 {
2207  CRef<CSeq_id> gen_id(new CSeq_id);
2208  gen_id->Assign(*feat_loc.GetId());
2209  CRef<CSeq_id> prod_id(new CSeq_id);
2210  prod_id->Assign(product_id);
2211  TSeqPos start = 0;
2212  CSeq_loc_CI it(feat_loc);
2213  while (it) {
2214  TSeqRange r = it.GetRange();
2215  auto int1 = Ref(new CSeq_interval(*prod_id, start, start + r.GetLength() - 1, eNa_strand_plus));
2216  start += r.GetLength();
2217  auto int2 = Ref(new CSeq_interval(*gen_id, r.GetFrom(), r.GetTo(), it.IsSetStrand() ?
2218  it.GetStrand() : eNa_strand_plus));
2219  info.emplace_back(int1, int2);
2220  ++it;
2221  }
2222  if (!info.empty() && (0 != feat_offset)) {
2223  if (feat_offset > 0) {
2224  auto& prod_int = info.begin()->first;
2225  prod_int->SetFrom(prod_int->GetFrom() + feat_offset);
2226  auto& gen_int = info.begin()->second;
2227  gen_int->SetFrom(gen_int->GetFrom() + feat_offset);
2228  }
2229  else {
2230  auto& prod_int = info.begin()->first;
2231  prod_int->SetFrom(prod_int->GetFrom() - feat_offset);
2232  auto& gen_int = info.begin()->second;
2233  gen_int->SetTo(gen_int->GetTo() + feat_offset);
2234  }
2235  }
2236  return info;
2237 }
2238 
2239 
2241 {
2242  const CSeq_feat_Base::TLocation& loc_obj = feat.GetLocation();
2243  CConstRef<CSeq_loc> loc;
2244  /// guard against the cases where the feature contains location
2245  /// with multiple seq-ids
2246  if ( !loc_obj.GetId() ) {
2247  loc = CSeqUtils::MixLocToLoc(loc_obj, bsh);
2248  }
2249  if ( !loc ) {
2250  loc.Reset(&loc_obj);
2251  }
2252  return loc;
2253 }
2254 
2255 CMappedFeat CSeqUtils::GetMrnaForCds(const CMappedFeat &cds_feat, const string &named_acc)
2256 {
2257  try {
2258  if (!named_acc.empty()) {
2259  SAnnotSelector sel;
2260  sel.SetAdaptiveDepth(true);
2261  sel.SetExactDepth(false);
2262  sel.SetResolveAll();
2263  sel.SetResolveDepth(1);
2264  sel.ExcludeNamedAnnots("SNP");
2265  sel.ExcludeNamedAnnots("STS");
2267  sel.ExcludeUnnamedAnnots();
2268  sel.IncludeNamedAnnotAccession(named_acc);
2269  return feature::GetBestMrnaForCds(cds_feat, nullptr, &sel);
2270  }
2271  else {
2272  return feature::GetBestMrnaForCds(cds_feat);
2273  }
2274  }
2275  catch(const CException&)
2276  {
2277  }
2278 
2279  return CMappedFeat();
2280 }
2281 
2282 void CSeqUtils::GetMappingInfo(const CMappedFeat &mapped_feat, const CBioseq_Handle& bsh, CSeqUtils::TMappingInfo &info, const string &annot)
2283 {
2284  const CSeq_feat& feat = mapped_feat.GetMappedFeature();
2286  CSeqFeatData::ESubtype subtype = feat.GetData().GetSubtype();
2288 
2289  auto GetCDSFrame = [&]() {
2290  if (feat.GetData().Which() != CSeqFeatData::e_Cdregion)
2291  return 0;
2292  if (feat.GetData().GetCdregion().IsSetFrame() && feat.GetData().GetCdregion().GetFrame() > 1) {
2293  int offset = feat.GetData().GetCdregion().GetFrame() - 1;
2294  const CSeq_loc& orig_loc = mapped_feat.GetLocation();
2295  ENa_strand q_strand = sequence::GetStrand(orig_loc, &bsh.GetScope());
2296  return (q_strand == eNa_strand_minus) ? -offset : offset;
2297  }
2298  return 0;
2299  };
2300 
2301  if (feat.IsSetProduct()) {
2302  if (feat.GetData().IsRna()) {
2303  info = CSeqUtils::GetRnaMappingInfo(*loc, mapped_feat, bsh);
2304  }
2305  else if (type == CSeqFeatData::e_Cdregion) {
2306  int offset = GetCDSFrame();
2307  do {
2308  const CSeq_loc& product = feat.GetProduct();
2309  CMappedFeat mapped_mrna = CSeqUtils::GetMrnaForCds(mapped_feat, annot);
2310  if (!mapped_mrna || !mapped_mrna.GetOriginalFeature().IsSetProduct()) {
2312  break;
2313  }
2314  CConstRef<CSeq_loc> mrna_loc = CSeqUtils::GetFeatLocation(mapped_mrna.GetMappedFeature(), bsh);
2315  CSeqUtils::TMappingInfo mrna_info = CSeqUtils::GetRnaMappingInfo(*mrna_loc, mapped_mrna, bsh);
2316  if (mrna_info.empty())
2317  break;
2319  mapped_mrna.GetMappedFeature(), mapped_feat, *loc, bsh.GetScope(), offset);
2320  } while (false);
2321  }
2322  }
2323  else {
2324  if ((type == CSeqFeatData::e_Cdregion) || (subtype == CSeqFeatData::eSubtype_V_segment) || feat.GetData().IsRna()) {
2325  // We assume both contains exactly the same number of intervals, and
2326  // each has the same length
2327  static unsigned id_num{ 0 };
2328  int offset = GetCDSFrame();
2329  CRef<CSeq_id> prod_id(new CSeq_id("lcl|pseudo" + NStr::IntToString(++id_num)));
2331  }
2332  }
2333 }
2334 
2335 
2336 bool CSeqUtils::GetGIString(const string& sid, string* gi_str)
2337 {
2338  bool match = false;
2339 
2340  if (!gi_str) return match;
2341  gi_str->clear();
2342 
2343  SIZE_TYPE pos = NStr::FindNoCase(sid, "gi|");
2344  if (pos != NPOS) {
2345  match = true;
2346  *gi_str = "gi|";
2347  for (size_t i = pos + 3; i < sid.length(); ++i) {
2348  char c = sid.at(i);
2349  if (isdigit(c)) {
2350  gi_str->append(1, c);
2351  } else {
2352  break;
2353  }
2354  }
2355  }
2356  return match;
2357 }
2358 
2359 int CSeqUtils::GetGenCode(const objects::CBioseq_Handle& handle)
2360 {
2361  // get an appropriate translation table. For the full list of tables,
2362  // please refer to https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi
2363  int gencode = 1;
2364  try {
2365  CSeqdesc_CI desc_it(handle, CSeqdesc::e_Source);
2366  if (desc_it) {
2367  const CBioSource& src = desc_it->GetSource();
2368  gencode = src.GetGenCode();
2369  }
2370  } catch (CException&) {
2371  // ignore it, will try other approach
2372  }
2373  return gencode;
2374 }
2375 
2376 // encode "shown" flag inside an annot
2377 void CSeqUtils::SetAnnotShown(objects::CSeq_annot& annot, bool isShown)
2378 {
2379  CRef<CUser_object> shown_desc(new CUser_object);
2380  shown_desc->SetType().SetStr("x-sv-track-settings");
2381  shown_desc->AddField("show-track", isShown);
2382  annot.AddUserObject(*shown_desc);
2383 }
2384 
2385 // get "shown" flag from an annot
2386 bool CSeqUtils::GetAnnotShown(const objects::CSeq_annot& annot)
2387 {
2388  CConstRef<CUser_field> show_field(GetAnnotUserField(annot, "x-sv-track-settings", "show-track"));
2389  if(!show_field.IsNull() && show_field->GetData().IsBool() && show_field->GetBool() == false) {
2390  return false;
2391  }
2392  return true;
2393 }
2394 
2395 CRef<objects::CSeq_id> CSeqUtils::MapStringId(const string& str, objects::IIdMapper *mapper)
2396 {
2397  CRef<CSeq_id> id;
2398  try {
2399  id = new CSeq_id(str);
2400  }
2401  catch (const CException&) {
2402  }
2403  if (!id && str.find('|') != NPOS) {
2404  try {
2405  CBioseq::TId ids;
2406  CSeq_id::ParseIDs(ids, str);
2407  if (!ids.empty()) {
2408  id = *ids.begin();
2409  }
2410  }
2411  catch (const CException&) {
2412  }
2413  }
2414  if (!id || (id->IsGi() && id->GetGi() < GI_CONST(1000))) {
2415  id = new CSeq_id(CSeq_id::e_Local, str);
2416  }
2417  if (mapper) {
2418  try {
2419  mapper->MapObject(*id);
2420  }
2421  catch (const CException&) {
2422  }
2423  }
2424  return id;
2425 }
2426 
2427 bool CSeqUtils::IsCrossOrigin(const CSeq_loc& loc)
2428 {
2429  for (CSeq_loc_CI loc_it(loc); loc_it; ++loc_it) {
2430  const CInt_fuzz* fuzz;
2431 
2432  fuzz = loc_it.GetFuzzFrom();
2433  if (fuzz &&
2434  fuzz->IsLim() &&
2435  fuzz->GetLim() == CInt_fuzz::eLim_circle) {
2436  return true;
2437  }
2438 
2439  fuzz = loc_it.GetFuzzTo();
2440  if (fuzz &&
2441  fuzz->IsLim() &&
2442  fuzz->GetLim() == CInt_fuzz::eLim_circle) {
2443  return true;
2444  }
2445  }
2446 
2448 }
2449 
2450 bool CSeqUtils::IsCrossOrigin(const CSeq_align& align)
2451 {
2452  if (!(align.IsSetSegs() && align.GetSegs().IsSpliced() && align.GetSegs().GetSpliced().IsSetExons())) {
2453  return false;
2454  }
2455 
2456  if (align.GetSegs().GetSpliced().GetExons().size() < 2) {
2457  return false;
2458  }
2459 
2460  ENa_strand strand = align.GetSeqStrand(1);
2461  TSeqPos prev;
2462  bool first = true;
2463  for (const auto& exon : align.GetSegs().GetSpliced().GetExons()) {
2464  TSeqPos pos = exon->GetGenomic_start();
2465  if (first) {
2466  prev = pos;
2467  first = false;
2468  continue;
2469  }
2470 
2471  if ((strand == eNa_strand_plus && pos < prev) ||
2472  (strand == eNa_strand_minus && pos > prev)) {
2473  return true;
2474  }
2475 
2476  }
2477  return false;
2478 }
2479 
2481 {
2482  CConstRef<CBioseq> bioseq;
2483 
2484  if (!scope) {
2485  return bioseq;
2486  }
2487 
2488  CScope::TTSE_Handles tses;
2489  scope->GetAllTSEs(tses, CScope::eAllTSEs);
2490  ITERATE (CScope::TTSE_Handles, handle, tses) {
2491  for (CBioseq_CI bioseq_it(*handle); bioseq_it; ++bioseq_it) {
2492  // Is Seqdesc on this Bioseq?
2493  if (bioseq_it->IsSetDescr()) {
2494  CConstRef<CBioseq> r_bioseq = bioseq_it->GetCompleteBioseq();
2495  ITERATE (CBioseq::TDescr::Tdata, dit, r_bioseq->GetDescr().Get()) {
2496  if (dit->GetPointer() == &seq_desc) {
2497  return r_bioseq;
2498  }
2499  }
2500  }
2501  }
2502  }
2503  return bioseq;
2504 }
2505 
2506 
2508 {
2509  CSeq_entry_Handle seh;
2510 
2511  if (!scope) {
2512  return seh;
2513  }
2514 
2515  CScope::TTSE_Handles tses;
2516  scope->GetAllTSEs(tses, CScope::eAllTSEs);
2517  ITERATE (CScope::TTSE_Handles, handle, tses) {
2518  for (CSeq_entry_CI entry_ci(*handle, CSeq_entry_CI::fRecursive | CSeq_entry_CI::fIncludeGivenEntry); entry_ci; ++entry_ci) {
2519  if (entry_ci->IsSetDescr()) {
2520  ITERATE (CBioseq::TDescr::Tdata, dit, entry_ci->GetDescr().Get()) {
2521  if ((*dit)->IsPub()) {
2522  const CPubdesc& desc_pub = (*dit)->GetPub();
2523  if (&desc_pub == &pubdesc) {
2524  return *entry_ci;
2525  }
2526  }
2527  }
2528  }
2529  if (entry_ci->IsSeq()) {
2530  for (CFeat_CI fi(entry_ci->GetSeq(), SAnnotSelector(CSeqFeatData::e_Pub)); fi; ++fi) {
2531  if (&(fi->GetData().GetPub()) == &pubdesc) {
2532  return *entry_ci;
2533  }
2534  }
2535  }
2536  }
2537  }
2538  return seh;
2539 }
2540 
2541 
2542 
2544 {
2545  CSeq_entry_Handle seh;
2546 
2547  CScope::TTSE_Handles handles;
2548  scope.GetAllTSEs(handles);
2549  if (handles.size() > 0) {
2550  seh = handles.front().GetTopLevelEntry();
2551  }
2552  return seh;
2553 }
2554 
2555 
2557 {
2558  CScope::TTSE_Handles tse_list;
2559  scope.GetAllTSEs(tse_list);
2560  ITERATE(CScope::TTSE_Handles, tse, tse_list) {
2561  CFeat_CI f(*tse);
2562  while (f) {
2563  if (f->GetSeq_feat() == &feat) {
2564  return *f;
2565  }
2566  ++f;
2567  }
2568  }
2569  CSeq_feat_Handle fh;
2570  return fh;
2571 }
2572 
2573 
2575 {
2576  CBioseq_Handle bsh;
2577  if (f.IsSetLocation()) {
2578  CSeq_loc_CI subloc(f.GetLocation());
2579  if (subloc && !subloc.IsEmpty()) {
2580  bsh = scope.GetBioseqHandle(subloc.GetSeq_id());
2581  }
2582  }
2583  if (!bsh) {
2585  if (fh) {
2587  if (seh.IsSeq()) {
2588  bsh = seh.GetSeq();
2589  }
2590  }
2591  }
2592  return bsh;
2593 }
2594 
2595 
2597 {
2598  const CObject* ptr = obj.object.GetPointer();
2599 
2600  /// CSeq_entry
2601  const objects::CSeq_entry* seqEntry = dynamic_cast<const objects::CSeq_entry*>(ptr);
2602  const objects::CBioseq* bioseq = dynamic_cast<const objects::CBioseq*>(ptr);
2603  const objects::CBioseq_set* bioseq_set = dynamic_cast<const objects::CBioseq_set*>(ptr);
2604  const objects::CSeq_annot* seqannot = dynamic_cast<const objects::CSeq_annot*>(ptr);
2605  const objects::CSeq_feat* seqfeat = dynamic_cast<const objects::CSeq_feat*>(ptr);
2606  const objects::CSeqdesc* seqdesc = dynamic_cast<const objects::CSeqdesc*>(ptr);
2607  const objects::CSeq_submit* seqsubmit = dynamic_cast<const objects::CSeq_submit*>(ptr);
2608  const objects::CPubdesc* pubdesc = dynamic_cast<const objects::CPubdesc*>(ptr);
2609  const objects::CSeq_loc* loc = dynamic_cast<const objects::CSeq_loc*>(ptr);
2610  const objects::CSeq_id* seq_id = dynamic_cast<const objects::CSeq_id*>(ptr);
2611 
2612  objects::CSeq_entry_Handle seh;
2613  if (!obj.scope)
2614  return seh;
2615 
2616  if (seqEntry) {
2617  seh = obj.scope->GetObjectHandle (*seqEntry, CScope::eMissing_Null);
2618  if (seh)
2619  seh = seh.GetTopLevelEntry();
2620  } else if(bioseq) {
2621  CBioseq_Handle bsh = obj.scope->GetObjectHandle (*bioseq, CScope::eMissing_Null);
2622  if (bsh)
2623  seh = bsh.GetTopLevelEntry();
2624  } else if(bioseq_set) {
2625  CBioseq_set_Handle bssh = obj.scope->GetObjectHandle(*bioseq_set, CScope::eMissing_Null);
2626  if (bssh) {
2627  seh = bssh.GetTopLevelEntry();
2628  }
2629  } else if(seqannot) {
2630  auto sah = obj.scope->GetObjectHandle (*seqannot, CScope::eMissing_Null);
2631  if (sah)
2632  seh = sah.GetTopLevelEntry();
2633  } else if(seqfeat) {
2634  CBioseq_Handle bsh = GetBioseqForSeqFeat(*seqfeat, *(obj.scope));
2635  if (bsh) {
2636  seh = bsh.GetTopLevelEntry();
2637  }
2638  } else if (seqdesc) {
2639  seh = edit::GetSeqEntryForSeqdesc(obj.scope, *seqdesc);
2640  if (seh) {
2641  seh = seh.GetTopLevelEntry();
2642  } else {
2643  seh = GetDefaultTopLevelSeqEntry(*obj.scope);
2644  }
2645  } else if (pubdesc) {
2646  seh = GetSeqEntryForPubdesc(obj.scope, *pubdesc);
2647  if (seh) {
2648  seh = seh.GetTopLevelEntry(); // GB-3727
2649  } else {
2650  seh = GetDefaultTopLevelSeqEntry(*obj.scope);
2651  }
2652  } else if (seqsubmit) {
2653  if (seqsubmit->IsEntrys() && seqsubmit->GetData().GetEntrys().front()) {
2654  seh = obj.scope->GetSeq_entryHandle(*(seqsubmit->GetData().GetEntrys().front()), CScope::eMissing_Null);
2655  }
2656  } else if (loc) {
2657  CBioseq_Handle bsh = obj.scope->GetBioseqHandle(*loc);
2658  if (bsh) {
2659  seh = bsh.GetTopLevelEntry();
2660  }
2661  } else if (seq_id) {
2662  CBioseq_Handle bsh = obj.scope->GetBioseqHandle(*seq_id);
2663  if (bsh) {
2664  seh = bsh.GetTopLevelEntry();
2665  }
2666  }
2667  return seh;
2668 }
2669 
2670 
2672  const CFeatListItem * p1,
2673  const CFeatListItem * p2
2674 )
2675 
2676 {
2677  string str1 = p1->GetDescription();
2678  string str2 = p2->GetDescription();
2679 
2680  char ch1 = str1.c_str()[0];
2681  char ch2 = str2.c_str()[0];
2682  // starts with a number -> goes at the end of the list
2683  bool num1 = isdigit(ch1);
2684  bool num2 = isdigit(ch2);
2685  if (num1 && num2) {
2686  return NStr::Compare(str1, str2, NStr::eNocase) < 0;
2687  } else if (num1) {
2688  return false;
2689  } else if (num2) {
2690  return true;
2691  }
2692 
2693  // starts with a tilde or dash - sort with other tildes,
2694  // put before numbers after alphas
2695  if (ch1 == '~' && ch2 == '~') {
2696  return NStr::Compare(str1, str2, NStr::eNocase) < 0;
2697  } else if (ch1 == '~') {
2698  return false;
2699  } else if (ch2 == '~') {
2700  return true;
2701  }
2702  if (ch1 == '-' && ch2 == '-') {
2703  return NStr::Compare(str1, str2, NStr::eNocase) < 0;
2704  } else if (ch1 == '-') {
2705  return false;
2706  } else if (ch2 == '-') {
2707  return true;
2708  }
2709 
2710  return NStr::Compare(p1->GetDescription(), p2->GetDescription(), NStr::eNocase) < 0;
2711 }
2712 
2713 
2714 vector<const CFeatListItem * > GetSortedFeatList(CSeq_entry_Handle seh, size_t max)
2715 {
2716  vector<const CFeatListItem * > r_list;
2717 
2718  vector<bool> present(CSeqFeatData::eSubtype_max, false);
2719  size_t count = 0;
2720  if (seh) {
2721  CFeat_CI fi(seh);
2722  while (fi) {
2723  present[fi->GetData().GetSubtype()] = true;
2724  ++fi;
2725  ++count;
2726  if (count > max)
2727  break;
2728  }
2729  }
2730 
2731  set<string> existing;
2732 
2733  vector<const CFeatListItem * > used;
2734  vector<const CFeatListItem * > popular;
2735  vector<const CFeatListItem * > import_feats;
2736  vector<const CFeatListItem * > least_liked;
2737  vector<const CFeatListItem * > unused;
2738  const CFeatListItem * all = NULL;
2739 
2740  const CFeatList* feat_list = CSeqFeatData::GetFeatList();
2741  ITERATE(CFeatList, ft_it, *feat_list) {
2742  const CFeatListItem * f = &(*ft_it);
2743  int subtype = f->GetSubtype();
2744  if (subtype == CSeqFeatData::eSubtype_any && f->GetType() == 0) {
2745  all = f;
2746  } else if (subtype != CSeqFeatData::eSubtype_bad
2747  && subtype != CSeqFeatData::eSubtype_any
2748  && subtype != CSeqFeatData::eSubtype_Imp_CDS
2749  && subtype != CSeqFeatData::eSubtype_source
2750  && subtype != CSeqFeatData::eSubtype_org) {
2751  string desc = f->GetDescription();
2752  if (existing.find(desc) == existing.end())
2753  {
2754  existing.insert(desc);
2755 
2756  if (!present[subtype]) {
2757  unused.push_back(f);
2758  } else {
2759  switch (subtype) {
2765  import_feats.push_back(f);
2766  break;
2768  popular.push_back(f);
2769  break;
2777  popular.push_back(f);
2778  break;
2780  least_liked.push_back(f);
2781  break;
2782  default:
2783  used.push_back(f);
2784  break;
2785  }
2786  }
2787  }
2788  }
2789  }
2790 
2791  sort(popular.begin(), popular.end(),s_CompareDescriptions);
2792  sort(used.begin(), used.end(), s_CompareDescriptions);
2793  sort(import_feats.begin(), import_feats.end(), s_CompareDescriptions);
2794  sort(unused.begin(), unused.end(), s_CompareDescriptions);
2795 
2796  r_list.insert(r_list.begin(), popular.begin(), popular.end());
2797  r_list.insert(r_list.end(), used.begin(), used.end());
2798  r_list.insert(r_list.end(), least_liked.begin(), least_liked.end());
2799  r_list.insert(r_list.end(), import_feats.begin(), import_feats.end());
2800  r_list.insert(r_list.end(), unused.begin(), unused.end());
2801 
2802  if (all) {
2803  r_list.insert(r_list.begin(), all);
2804  }
2805 
2806  return r_list;
2807 }
2808 
2809 void CSeqUtils::ParseRanges(const string& r_str, CSeqUtils::TRanges& ranges)
2810 {
2811  vector<string> range_pairs;
2812 
2813  NStr::Split(r_str, ",", range_pairs);
2814  ITERATE (vector<string>, iter, range_pairs) {
2815  vector<string> pos;
2816  NStr::Split(*iter, "-", pos);
2817  if (pos.size() != 2) continue;
2818  string f_str = NStr::TruncateSpaces(pos[0]);
2819  string t_str = NStr::TruncateSpaces(pos[1]);
2820  if ( !f_str.empty() && !t_str.empty() ) {
2821  try {
2822  TSeqPos from = NStr::StringToUInt(f_str);
2823  TSeqPos to = NStr::StringToUInt(t_str);
2824 
2825  //!! verify end of range inclusion!
2826  ranges.push_back(TSeqRange(from, to));
2827  } catch (CException&) {
2828  LOG_POST(Error << "Invalid range: " << *iter);
2829  }
2830  }
2831  }
2832 }
2833 
2834 
2835 /// --------------------------------
2836 /// CGencollIdMapperAdapter
2837 /// --------------------------------
2838 
2840  : m_Mapper(mapper)
2841  , m_Spec(spec) {
2842  if (!m_Mapper)
2843  NCBI_THROW(CException, eInvalid, "IIdMapperAdapter: Invalid initialization");
2844 }
2845 
2846 objects::CSeq_id_Handle CGencollIdMapperAdapter::Map(const objects::CSeq_id_Handle& idh) {
2847  auto id = Ref(new CSeq_id);
2848  id->Assign(*idh.GetSeqId());
2849 
2850  auto loc = Ref(new CSeq_loc);
2851  loc->SetWhole(*id);
2852  auto mapped_loc = Map(*loc);
2853 
2854  CSeq_id_Handle out_sih;
2855  if (mapped_loc && !mapped_loc->IsNull() && !mapped_loc->IsEmpty() && mapped_loc->GetId())
2856  out_sih = CSeq_id_Handle::GetHandle(*mapped_loc->GetId());
2857  return out_sih;
2858 }
2859 
2861  return m_Mapper->Map(loc, m_Spec);
2862 }
2863 
2865  set< CRef<CSeq_id> > ids;
2866  CTypeIterator<CSeq_id> idit(obj);
2867  for (; idit; ++idit) {
2868  CSeq_id& id = *idit;
2869  if (ids.emplace(&id).second == false)
2870  continue;
2871  auto loc = Ref(new CSeq_loc);
2872  loc->SetWhole(id);
2873  auto mapped_loc = Map(*loc);
2874  if (!mapped_loc || !mapped_loc->GetId())
2875  continue;
2876  id.Assign(*mapped_loc->GetId());
2877  }
2878 }
2879 
2881 {
2882  CGencollIdMapper::SIdSpec MapSpec;
2883  MapSpec.TypedChoice = assm->IsRefSeq() ?
2885  MapSpec.Alias = CGC_SeqIdAlias::e_Public;
2886  MapSpec.Role = eGC_SequenceRole_top_level;
2887  CRef<CGencollIdMapper> mapper(new CGencollIdMapper(assm));
2888  return new CGencollIdMapperAdapter(mapper, MapSpec);
2889 }
2890 
2891 
2892 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Positional
numerical value
Definition: Na_strand.hpp:63
@ eExtreme_Biological
5' and 3'
Definition: Na_strand.hpp:62
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
CRef< CSeq_align > ConvertSeq_align(const CSeq_align &src, CSeq_align::TSegs::E_Choice dst_choice, CSeq_align::TDim anchor_row=-1, CScope *scope=NULL)
Convert source alignment to a new type.
USING_SCOPE(objects)
CSeq_entry_Handle GetSeqEntryForSeqdesc(CRef< CScope > scope, const CSeqdesc &seq_desc)
CAlign_CI –.
Definition: align_ci.hpp:63
int GetGenCode(int def=1) const
Definition: BioSource.cpp:73
CBioseq_CI –.
Definition: bioseq_ci.hpp:69
CBioseq_Handle –.
CBioseq_set_Handle –.
This stream exchanges data with an HTTP server located at the URL: http[s]://host[:port]/path[?...
CConstRef –.
Definition: ncbiobj.hpp:1266
void SetMaxReturn(int ret_max)
void SearchHistory(const string &db, const string &term, const string &web_env, Int8 query_key, int retstart, CNcbiOstream &ostr)
Uint8 Search(const string &db, const string &term, vector< objects::CSeq_id_Handle > &uids, const string &xml_path=kEmptyStr)
void Summary(const string &db, const vector< objects::CSeq_id_Handle > &uids, xml::document &docsums, const string &version="")
void Link(const string &db_from, const string &db_to, const vector< objects::CSeq_id_Handle > &uids_from, vector< objects::CSeq_id_Handle > &uids_to, const string &xml_path=kEmptyStr, const string &command="neighbor")
CFeatListItem - basic configuration data for one "feature" type.
string GetDescription() const
CConfigurableItems - a static list of items that can be configured.
CFeat_CI –.
Definition: feat_ci.hpp:64
string GetAccession() const
Retrieve the accession for this assembly.
Definition: GC_Assembly.cpp:99
void Find(const CSeq_id_Handle &id, TSequenceList &sequences) const
Find all references to a given sequence within an assembly.
bool IsRefSeq() const
Is this assembly a RefSeq assembly?
list< CConstRef< CGC_Sequence > > TSequenceList
Definition: GC_Assembly.hpp:67
void GetMolecules(TSequenceList &molecules, ESubset subset) const
Retrieve a subset of molecules.
CRef< objects::CSeq_loc > Map(const objects::CSeq_loc &Loc, const SIdSpec &Spec) const
Definition: id_mapper.cpp:168
static bool isGenCollSequence(const objects::CBioseq_Handle &handle)
static CRef< CGencollSvc > GetInstance(void)
Definition: gencoll_svc.cpp:54
static CRef< objects::CGenomicCollectionsService > GetGenCollService(int timeout_sec=-1)
static CGuiRegistry & GetInstance()
access the application-wide singleton
Definition: registry.cpp:400
CRegistryReadView GetReadView(const string &section) const
get a read-only view at a particular level.
Definition: registry.cpp:428
CIUPACaa –.
Definition: IUPACaa.hpp:66
CIUPACna –.
Definition: IUPACna.hpp:66
CMappedFeat –.
Definition: mapped_feat.hpp:59
static CNcbiApplication * Instance(void)
Singleton method.
Definition: ncbiapp.cpp:264
CNcbiRegistry –.
Definition: ncbireg.hpp:913
CObject –.
Definition: ncbiobj.hpp:180
void AddInterval(const CSeq_interval &ival)
for convenience
@Pubdesc.hpp User-defined methods of the data storage class.
Definition: Pubdesc.hpp:54
CRWLock –.
Definition: ncbimtx.hpp:953
TThisType & CombineWith(const TRange &r)
Definition: range_coll.hpp:195
CRef –.
Definition: ncbiobj.hpp:618
class CRegistryReadView provides a nested hierarchical view at a particular key.
Definition: reg_view.hpp:58
int GetInt(const string &key, int default_val=0) const
access a named key at this level, with no recursion
Definition: reg_view.cpp:230
bool GetBool(const string &key, bool default_val=false) const
Definition: reg_view.cpp:241
list< SKeyInfo > TKeys
retrieve information about all keys in the registry
Definition: reg_view.hpp:68
string GetString(const string &key, const string &default_val=kEmptyStr) const
Definition: reg_view.cpp:246
void GetKeys(TKeys &keys) const
Retrieve information about all keys in this view.
Definition: reg_view.cpp:284
CScope –.
Definition: scope.hpp:92
ESubtype GetSubtype(void) const
static const CFeatList * GetFeatList()
@ eSubtype_transit_peptide
@ eSubtype_bad
These no longer need to match the FEATDEF values in the C toolkit's objfdef.h.
CSeqVector –.
Definition: seq_vector.hpp:65
TDim CheckNumRows(void) const
Validatiors.
Definition: Seq_align.cpp:73
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
Definition: Seq_align.cpp:317
ENa_strand GetSeqStrand(TDim row) const
Get strand (the first one if segments have different strands).
Definition: Seq_align.cpp:294
CSeq_annot_Handle –.
CSeq_entry_CI –.
CSeq_entry_Handle –.
CSeq_feat_Handle –.
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
int Compare(const CSeq_feat &f2) const
Compare relative order of this feature and feature f2, ordering first by features' coordinates,...
Definition: Seq_feat.hpp:242
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
Definition: Seq_loc.hpp:453
CSeq_loc_Mapper –.
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:65
static TSeqPos Pack(CSeq_data *in_seq, TSeqPos uLength=ncbi::numeric_limits< TSeqPos >::max())
Base class for all serializable objects.
Definition: serialbase.hpp:150
class CStaticArrayMap<> provides access to a static array in much the same way as CStaticArraySet<>,...
Definition: static_map.hpp:175
TBase::const_iterator const_iterator
Definition: static_map.hpp:179
static CSeq_id_Handle GetSeq_id_Handle(const const_iterator &iter)
Template class for iteration on objects of class C (non-medifiable version)
Definition: iterator.hpp:767
Template class for iteration on objects of class C.
Definition: iterator.hpp:673
bool GetBool(void) const
Definition: User_field.hpp:341
CUser_object & AddField(const string &label, const string &value, EParseField parse=eParse_String)
add a data field to the user object that holds a given value
General IdMapper interface.
Definition: iidmapper.hpp:48
Task clients implement this callback interface.
Definition: utils.hpp:107
const_iterator end() const
Definition: map.hpp:152
const_iterator find(const key_type &key) const
Definition: map.hpp:153
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
void clear()
Definition: set.hpp:153
const_iterator find(const key_type &key) const
Definition: set.hpp:137
const_iterator end() const
Definition: set.hpp:136
The xml::document class is used to hold the XML tree and various bits of information about it.
Definition: document.hpp:80
void swap(document &other)
Swap one xml::document object for another.
Definition: document.cpp:530
const node & get_root_node(void) const
Get a reference to the root node of this document.
Definition: document.cpp:539
The xml::node::const_iterator provides a way to access children nodes similar to a standard C++ conta...
Definition: node.hpp:746
The xml::node_set::const_iterator class is used to iterate over nodes in a node set.
Definition: node_set.hpp:226
The xml::node_set class is used to store xpath query result set.
Definition: node_set.hpp:68
iterator begin()
Get an iterator that points to the beginning of the xpath query result node set.
Definition: node_set.cpp:173
iterator end()
Get an iterator that points one past the last node in the xpath query result node set.
Definition: node_set.cpp:185
The xml::node class is used to hold information about one XML node.
Definition: node.hpp:106
iterator end(void)
Get an iterator that points one past the last child for this node.
Definition: node.hpp:835
iterator find(const char *name, const ns *nspace=NULL)
Find the first child node that has the given name and namespace.
Definition: node.cpp:1258
node_set run_xpath_query(const xpath_expression &expr)
Run the given XPath query.
Definition: node.cpp:1292
const char * get_content(void) const
Get the content for this text node.
Definition: node.cpp:797
string GetSeqIdString(const CSeq_id &id)
Definition: compartp.cpp:100
static uch flags
static unsigned char depth[2 *(256+1+29)+1]
static CS_COMMAND * cmd
Definition: ct_dynamic.c:26
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:61
static const char * str(char *buf, int n)
Definition: stats.c:84
int offset
Definition: replacements.h:160
static FILE * f
Definition: readconf.c:23
char data[12]
Definition: iconv.c:80
#define INVALID_GI
Definition: ncbimisc.hpp:1089
SStrictId_Entrez::TId TEntrezId
TEntrezId type for entrez ids which require the same strictness as TGi.
Definition: ncbimisc.hpp:1041
#define ENTREZ_ID_TO(T, entrez_id)
Definition: ncbimisc.hpp:1097
const CNcbiRegistry & GetConfig(void) const
Get the application's cached configuration parameters (read-only).
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
int TSignedSeqPos
Type for signed sequence position.
Definition: ncbimisc.hpp:887
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
#define GI_CONST(gi)
Definition: ncbimisc.hpp:1087
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
Definition: ncbimisc.hpp:1508
#define ZERO_GI
Definition: ncbimisc.hpp:1088
#define GI_TO(T, gi)
Definition: ncbimisc.hpp:1085
string
Definition: cgiapp.hpp:690
#define NULL
Definition: ncbistd.hpp:225
unsigned int THTTP_Flags
Bitwise OR of EHTTP_Flag.
@ fHTTP_AutoReconnect
See HTTP_CreateConnectorEx()
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:226
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
const string & GetMsg(void) const
Get message string.
Definition: ncbiexpt.cpp:461
static CConstRef< objects::CSeq_loc > GetFeatLocation(const objects::CSeq_feat &feat, const objects::CBioseq_Handle &bsh)
Definition: utils.cpp:2240
static bool StringToRange(const string &range_str, long &from, long &to)
Convert a range string to a range.
Definition: utils.cpp:1254
virtual void SetTaskName(const string &name)=0
virtual void SetTaskCompleted(int completed)=0
set total finished task number.
static int GetMaxSearchTime(const CRegistryReadView &view)
Definition: utils.cpp:400
static string CreateLinkRow(const string &tag, const string &label, const string &url)
Definition: utils.cpp:1004
static TMappingInfo GetMappingInfoFromLocation(const objects::CSeq_loc &feat_loc, const objects::CSeq_id &product_id, const int feat_offset=0)
Definition: utils.cpp:2200
static TMappingInfo GetRnaMappingInfo(const objects::CSeq_loc &feat_loc, const objects::CMappedFeat &feat, const objects::CBioseq_Handle &handle)
Helper function to generate mapping info between the specified product sequence and genomic sequence ...
Definition: utils.cpp:1901
static void ELinkQuery(const string &db_from, const string &db_to, const TEntrezIds &uids_from, TEntrezIds &uids_to, const string &cmd="neighbor", const string &xpath="//Link/Id/text()")
Queries elink.fcgi with a vector of uids/seq-ids (seq-ids preferred for future compatibility) and ret...
Definition: utils.cpp:1614
int TFeatLinkingMode
Definition: utils.hpp:219
static string GetAlignDbIsQuery(const string &annot)
get a is_source_assembly_query string from AlignDb annotation suffixed after a second '#'
Definition: utils.cpp:878
static bool LinkFeatures(CLinkedFeature::TLinkedFeats &feats, TFeatLinkingMode mode=1, ISeqTaskProgressCallback *p_cb=NULL)
Link features into a hierarchical list.
Definition: utils.cpp:453
static bool IsNAA(const string &annot, bool isStrict=false)
check if a given annotation is a named annotation accession[.version][number] when isSctrict == false...
Definition: utils.cpp:797
static bool IsException(const objects::CSeq_feat &feat)
Definition: utils.cpp:963
static objects::CBioseq * SeqLocToBioseq(objects::CScope &scope, const objects::CSeq_loc &loc)
Construct a bioseq to fit a given location.
Definition: utils.cpp:601
static bool isRmtPipelineFileType(const string &sFileType)
check if a file type is one of remote file types
Definition: utils.cpp:819
static TLocVec GetAccessionPlacements(const objects::CSeq_id &id, objects::CScope &scope, int time_out_sec=1, THTTP_Flags flags=fHTTP_AutoReconnect)
Retrieve mapped-up sequences.
Definition: utils.cpp:1130
static bool isTopLevel(const objects::CSeq_id &seq_id, const string &assm_acc, objects::CScope *scope=NULL)
Definition: utils.cpp:1476
static bool IsPartialStart(const objects::CSeq_loc &loc)
Definition: utils.cpp:938
static objects::SAnnotSelector GetAnnotSelector(TAnnotFlags flags=0)
request an annotation selector for a given type
Definition: utils.cpp:168
list< CRef< CLinkedFeature > > TLinkedFeats
Definition: utils.hpp:80
static bool IsPartialFeature(const objects::CSeq_feat &feat)
Definition: utils.cpp:927
static TLocVec GetGiPlacements(TGi gi, int time_out_sec=5, THTTP_Flags flags=fHTTP_AutoReconnect)
Retrieve mapped-up sequences.
Definition: utils.cpp:1069
static void ESearchQuery(const string &db, const string &term, TEntrezIds &uids, size_t &count, const int ret_max, const string &xpath="//IdList/Id/text()")
Queries esearch.fcgi and returns a vector of uids/seq-ids (seq-ids preferred for future compatibility...
Definition: utils.cpp:1681
vector< CRef< objects::CSeq_loc > > TLocVec
Definition: utils.hpp:123
static TMappingInfo GetCdsMappingInfoFromRna(const TMappingInfo &rna_mapping_info, const objects::CSeq_feat &rna_feat, const objects::CMappedFeat &mapped_cds_feat, const objects::CSeq_loc &feat_loc, objects::CScope &scope, const int feat_offset=0)
Derive the CDS feature mapping information based on its parent RNA feature mapping info.
Definition: utils.cpp:2037
static string GetNcbiBaseUrl()
Definition: utils.cpp:972
static int GetMaxSearchSegments(const CRegistryReadView &view)
Definition: utils.cpp:360
static bool IsCrossOrigin(const objects::CSeq_loc &loc)
static bool IsAlignDb(const string &annot)
check if a given annotation is AlignDb (potentially suffixed with batch identication string after a '...
Definition: utils.cpp:862
static const string & NameTypeValueToStr(TAnnotNameType type)
Definition: utils.cpp:153
vector< TEntrezId > TEntrezIds
Definition: utils.hpp:125
static void GetAssmAccs_Gi(TAccs &accs, TGi gi)
get all assembly accessions corresponding to a GI
Definition: utils.cpp:1561
static objects::SAnnotSelector::EMaxSearchSegmentsAction GetMaxSearchSegmentsAction(const CRegistryReadView &view)
Definition: utils.cpp:365
virtual objects::CSeq_id_Handle Map(const objects::CSeq_id_Handle &idh) override
Definition: utils.cpp:2846
static bool IsPseudoFeature(const objects::CSeq_feat &feat)
Definition: utils.cpp:911
vector< objects::CSeq_id_Handle > TSeqIdHandles
Definition: utils.hpp:128
static string MakeRmtAnnotName(const string &sSuffix)
create an annotation name for a remote file pipeline, appending sSuffix
Definition: utils.cpp:808
static string GetAnnotName(const objects::CSeq_annot_Handle &annot_handle)
static bool IsPartialStop(const objects::CSeq_loc &loc)
Definition: utils.cpp:944
CGencollIdMapper::SIdSpec m_Spec
Definition: utils.hpp:522
CSeq_entry_Handle GetTopSeqEntryFromScopedObject(SConstScopedObject &obj)
Definition: utils.cpp:2596
static bool GetRangeCollection(const objects::CSeq_id &id, const objects::CHandleRangeMap &map, CRangeCollection< TSeqPos > &ranges)
Definition: utils.cpp:585
static CRef< objects::CSeq_loc > MixLocToLoc(const objects::CSeq_loc &mix_loc, const objects::CBioseq_Handle &handle)
Create a new seq-loc with a unique seq-id from a "mixed" loc.
Definition: utils.cpp:662
static void GetAssmIds_GIChr(TEntrezIds &gc_ids, TGi gi)
get all assembly ids associated with a gi where this gi is a chromosome
Definition: utils.cpp:1365
static CConstRef< objects::CUser_field > GetAnnotUserField(const objects::CSeq_annot &annot, const string &type, const string &label)
Definition: utils.cpp:726
static bool CanHavePlacements(const objects::CSeq_id &seqid)
check that a given seq-id can potentially have placements (to weed out cases like local ids)
Definition: utils.cpp:1089
static const string & GetUnnamedAnnot()
Get the commonly used symbol representing a unnnamed annotation.
Definition: utils.hpp:534
static void GetMappingInfo(const objects::CMappedFeat &mapped_feat, const objects::CBioseq_Handle &bsh, TMappingInfo &info, const string &annot=string())
Definition: utils.cpp:2282
virtual void SetTaskTotal(int total)=0
static string CreateIdStr(const vector< T > &uids)
Convert a list of ids into a comma-delimited string.
Definition: utils.hpp:555
static TLocVec GetLocPlacements(const objects::CSeq_loc &loc, int time_out_sec=1)
Retrieve locations on mapped-up sequences Help method for retrieving upper level sequences for a give...
Definition: utils.cpp:1161
static string GetAnnotComment(const objects::CSeq_annot_Handle &annot_handle)
static string CreateSectionRow(const string &tag)
Definition: utils.cpp:998
virtual void MapObject(CSerialObject &obj) override
Definition: utils.cpp:2864
static bool isQuasiLocal(const objects::CBioseq_Handle &handle)
check that a given accession is either local or unrecognizable this can be important to avoid unneces...
Definition: utils.cpp:1112
static bool IsVDBAccession(const string &acc)
Check if string starts with ("SRA", "SRR", "DRR", "ERR")
Definition: utils.cpp:888
static bool IsSameStrands(const objects::CSeq_loc &loc)
Definition: utils.cpp:949
static bool GetAnnotShown(const objects::CSeq_annot &annot)
Definition: utils.cpp:2386
static CRef< objects::CSeq_loc > CreateSeq_loc(const objects::CSeq_id &id, const CRangeCollection< TSeqPos > &ranges)
Definition: utils.cpp:556
vector< TSeqRange > TRanges
Definition: utils.hpp:479
CRef< CGencollIdMapper > m_Mapper
Definition: utils.hpp:521
static bool IsNAA_Name(const string &annot)
check if a given annotation is a named annotation name with '.' replaced with '_'.
Definition: utils.cpp:802
static bool GetGIString(const string &sid, string *gi_str)
ad-hoc GI extraction (for misformed seq-id strings like: "gi|55823257|ref|YP_141698....
Definition: utils.cpp:2336
static IIdMapper * GetIdMapper(CRef< objects::CGC_Assembly > assm)
Definition: utils.cpp:2880
static TAnnotNameType NameTypeStrToValue(const string &type)
Definition: utils.cpp:141
static bool IsExtendedNAA(const string &annot, bool isStrict=false)
check if a given annotation is an extended NAA (named accession[.version][number],...
Definition: utils.cpp:851
static int GetGenCode(const objects::CBioseq_Handle &handle)
Returns Bioseq's Genetic Code.
Definition: utils.cpp:2359
static string GetChrGI(TGi gi)
try to get a chromosome from a GI
Definition: utils.cpp:1411
static bool Match(const objects::CSeq_id &id1, const objects::CSeq_id &id2, objects::CScope *scope=NULL)
check to see if two seq-ids are identical.
static void SetAnnot(objects::SAnnotSelector &sel, const string &annot)
help function for setting up an annotation.
Definition: utils.cpp:321
static string CreateTableRow(const string &tag="", const string &value="")
Definition: utils.cpp:991
static CRef< objects::CSeq_loc > RemapChildToParent(const objects::CSeq_loc &parent, const objects::CSeq_loc &child, objects::CScope *scope=NULL)
remap a location to a parent location.
Definition: utils.cpp:499
static string CreateTableStart()
help methods for creating HTML text
Definition: utils.cpp:979
static TLocVec GetAccessionPlacementsMsec(const objects::CSeq_id &id, objects::CScope &scope, unsigned long time_out_msec=1000, THTTP_Flags flags=fHTTP_AutoReconnect)
same, with a timeout in milliseconds
Definition: utils.cpp:1135
static CRegistryReadView GetSelectorRegistry()
helper functions to read selector-related tune-up info (mostly segment limits) from registry:
Definition: utils.cpp:354
CGencollIdMapperAdapter(CRef< CGencollIdMapper > mapper, const CGencollIdMapper::SIdSpec &spec)
-------------------------------- CGencollIdMapperAdapter ——————————–
Definition: utils.cpp:2839
virtual bool StopRequested() const =0
static bool isRmtAnnotName(const string &sAnnotname)
check if a given annotation was created by a remote file pipeline
Definition: utils.cpp:814
EAnnotationNameType
flags for classifying annotation names.
Definition: utils.hpp:132
static CRef< objects::CSeq_id > MapStringId(const string &str, objects::IIdMapper *mapper)
Definition: utils.cpp:2395
static objects::CMappedFeat GetMrnaForCds(const objects::CMappedFeat &cds_feat, const string &named_acc=string())
Definition: utils.cpp:2255
static std::string GetXmlChildNodeValue(const xml::node &parent, const std::string &name)
Returns the contents of the specified child node.
Definition: utils.cpp:1728
static string GetChrId(const string &id_str, objects::CScope &scope)
same as GetChrGI(), but takes an string with id (that must correspond to some GI)
Definition: utils.cpp:1544
static void GetAssmIds_GI(TEntrezIds &gc_ids, TGi gi)
get all assembly ids associated with a gi.
Definition: utils.cpp:1746
static bool IsExtendedNAA_Name(const string &annot)
check if a given annotation is an extended NAA name with '.' replaced with '_'.
Definition: utils.cpp:856
static void ParseRanges(const string &sRanges, TRanges &ranges)
Definition: utils.cpp:2809
vector< TMappedInt > TMappingInfo
Definition: utils.hpp:165
static string CreateTableEnd()
Definition: utils.cpp:985
static void SetResolveDepth(objects::SAnnotSelector &sel, bool adaptive, int depth=-1)
help function for setting selector resolve depth.
Definition: utils.cpp:406
static void SetAnnotShown(objects::CSeq_annot &annot, bool isShown)
Definition: utils.cpp:2377
static bool CheckMaxSearchSegments(int actual, int max, objects::SAnnotSelector::EMaxSearchSegmentsAction action)
check actual number of segments against max and perform the action if the actual number is more than ...
Definition: utils.cpp:379
virtual void AddTaskCompleted(int delta)=0
set to add newly finished task number.
int TAnnotFlags
Definition: utils.hpp:160
static string GetAlignDbBatch(const string &annot)
get a batch string from AlignDb annotation suffixed with batch identication string after a '#'
Definition: utils.cpp:868
@ eAnnot_Unnamed
unnamed annotation
Definition: utils.hpp:133
@ eAnnot_All
all annotations
Definition: utils.hpp:135
@ eAnnot_Named
all named annotations
Definition: utils.hpp:134
@ eAnnot_Other
any given named annots
Definition: utils.hpp:136
@ fAnnot_UnsetNamed
Definition: utils.hpp:155
@ fAnnot_UnsetDepth
Definition: utils.hpp:158
CRef< objects::CScope > scope
Definition: objects.hpp:53
CConstRef< CObject > object
Definition: objects.hpp:52
@ eUnknown
Definition: app_popup.hpp:72
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
@ eSerial_AsnText
ASN.1 text.
Definition: serialdef.hpp:73
const string AsFastaString(void) const
Definition: Seq_id.cpp:2266
static EAccessionInfo IdentifyAccession(const CTempString &accession, TParseFlags flags=fParse_AnyRaw)
Deduces information from a bare accession a la WHICH_db_accession; may report false negatives on prop...
Definition: Seq_id.cpp:1634
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
Definition: Seq_id.cpp:318
static SIZE_TYPE ParseIDs(CBioseq::TId &ids, const CTempString &s, TParseFlags flags=fParse_Default)
Parse a string representing one or more Seq-ids, appending the results to IDS.
Definition: Seq_id.cpp:2613
EAccessionInfo
For IdentifyAccession (below)
Definition: Seq_id.hpp:220
bool IsGi(void) const
bool MatchesTo(const CSeq_id_Handle &h) const
True if *this matches to h.
bool Match(const CSeq_id &sid2) const
Match() - TRUE if SeqIds are equivalent.
Definition: Seq_id.hpp:1065
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
TGi GetGi(void) const
@ eAcc_type_mask
Definition: Seq_id.hpp:247
@ eAcc_refseq_contig
Definition: Seq_id.hpp:420
@ eAcc_refseq_mrna_predicted
Definition: Seq_id.hpp:439
@ eAcc_refseq_unreserved
Definition: Seq_id.hpp:418
@ eAcc_refseq_mrna
Definition: Seq_id.hpp:415
@ eAcc_refseq_prot
Definition: Seq_id.hpp:414
@ eAcc_refseq_wgs_nuc
Definition: Seq_id.hpp:421
@ eAcc_refseq_wgs_prot
Definition: Seq_id.hpp:422
@ eAcc_refseq_ncrna
Definition: Seq_id.hpp:416
@ eAcc_refseq_chromosome
Definition: Seq_id.hpp:429
@ eAcc_refseq_genomic
Definition: Seq_id.hpp:430
@ eAcc_refseq_prot_predicted
Definition: Seq_id.hpp:438
@ eAcc_refseq_ncrna_predicted
Definition: Seq_id.hpp:440
@ eAcc_refseq_wgs_intermed
Definition: Seq_id.hpp:431
@ eAcc_refseq_genome
Definition: Seq_id.hpp:419
void SetPacked_int(TPacked_int &v)
Definition: Seq_loc.hpp:984
void SetWhole(TWhole &v)
Definition: Seq_loc.hpp:982
bool IsPartialStart(ESeqLocExtremes ext) const
check start or stop of location for e_Lim fuzz
Definition: Seq_loc.cpp:3222
TRange GetTotalRange(void) const
Definition: Seq_loc.hpp:913
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
Definition: Seq_loc.cpp:915
CRef< CSeq_loc > Merge(TOpFlags flags, ISynonymMapper *syn_mapper) const
All functions create and return a new seq-loc object.
Definition: Seq_loc.cpp:5037
bool IsSetStrand(void) const
Get strand.
Definition: Seq_loc.hpp:1049
void SetInt(TInt &v)
Definition: Seq_loc.hpp:983
int CompareSubLoc(const CSeq_loc &loc, ENa_strand strand, const ISubLocFilter *filter=NULL) const
Compare first-level sub-locations sequentially to order them by biological "complexity".
Definition: Seq_loc.cpp:805
bool IsEmpty(void) const
True if the current location is empty.
Definition: Seq_loc.hpp:1084
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
Definition: Seq_loc.hpp:941
size_t GetSize(void) const
Get number of ranges.
Definition: Seq_loc.cpp:2636
TRange GetRange(void) const
Get the range.
Definition: Seq_loc.hpp:1042
ENa_strand GetStrand(void) const
Definition: Seq_loc.hpp:1056
const CSeq_id & GetSeq_id(void) const
Get seq_id of the current location.
Definition: Seq_loc.hpp:1028
void GetLabel(string *label) const
Appends a label suitable for display (e.g., error messages) label must point to an existing string ob...
Definition: Seq_loc.cpp:3467
bool IsPartialStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:3251
TSeqPos GetStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:963
@ fMerge_AbuttingOnly
Definition: Seq_loc.hpp:327
static CObjectIStream * Open(ESerialDataFormat format, CNcbiIstream &inStream, bool deleteInStream)
Create serial object reader and attach it to an input stream.
Definition: objistr.cpp:195
CMappedFeat GetBestMrnaForCds(const CMappedFeat &cds_feat, CFeatTree *feat_tree=0, const SAnnotSelector *base_sel=0)
Definition: feature.cpp:3341
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
ENa_strand GetStrand(const CSeq_loc &loc, CScope *scope=0)
Returns eNa_strand_unknown if multiple Bioseqs in loc Returns eNa_strand_other if multiple strands in...
CConstRef< CSeq_loc > m_ParentLoc
Definition: sequence.hpp:1148
CRef< CSeq_loc > Resolve(CScope *scope=0, TFlags flags=0) const
Definition: sequence.hpp:1143
TGi GetGiForId(const objects::CSeq_id &id, CScope &scope, EGetIdType flags=0)
Given a Seq-id retrieve the corresponding GI.
Definition: sequence.cpp:668
@ eGetId_ForceGi
return only a gi-based seq-id
Definition: sequence.hpp:99
CRef< CSeq_loc > Map(const CSeq_loc &src_loc)
Map seq-loc.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
void GetAllTSEs(TTSE_Handles &tses, enum ETSEKind kind=eManualTSEs)
Definition: scope.cpp:295
CConstRef< CSynonymsSet > GetSynonyms(const CSeq_id &id)
Get bioseq synonyms, resolving to the bioseq in this scope.
Definition: scope.cpp:486
vector< CSeq_entry_Handle > TTSE_Handles
Definition: scope.hpp:645
@ eAllTSEs
Definition: scope.hpp:643
@ eProductToLocation
Map from the feature's product to location.
@ eLocationToProduct
Map from the feature's location to product.
@ eMissing_Null
Definition: scope.hpp:157
bool IsNamed(void) const
CConstRef< CBioseq > GetCompleteBioseq(void) const
Get the complete bioseq.
const CSeq_annot_Handle & GetAnnot(void) const
Get handle to seq-annot for this feature.
const CSeqFeatData & GetData(void) const
CConstRef< CSeq_annot > GetCompleteSeq_annot(void) const
Complete and return const reference to the current seq-annot.
bool IsSetProduct(void) const
CConstRef< CSeq_id > GetSeqId(void) const
Get id which can be used to access this bioseq handle Throws an exception if none is available.
TSeq GetSeq(void) const
CSeq_entry_Handle GetParentEntry(void) const
Get parent Seq-entry handle.
CSeq_entry_Handle GetTopLevelEntry(void) const
Get top level Seq-entry handle.
CScope & GetScope(void) const
Get scope this handle belongs to.
const string & GetName(void) const
CRef< CSeq_loc > GetRangeSeq_loc(TSeqPos start, TSeqPos stop, ENa_strand strand=eNa_strand_unknown) const
Return CSeq_loc referencing the given range and strand on the bioseq If start == 0,...
CSeq_entry_Handle GetTopLevelEntry(void) const
Return a handle for the top-level seq-entry.
CSeq_entry_Handle GetTopLevelEntry(void) const
Get top level Seq-entry handle.
bool IsSynonym(const CSeq_id &id) const
Check if this id can be used to obtain this bioseq handle.
bool IsSeq(void) const
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
SAnnotSelector & IncludeFeatSubtype(TFeatSubtype subtype)
Include feature subtype in the search.
SAnnotSelector & SetFeatType(TFeatType type)
Set feature type (also set annotation type to feat)
SAnnotSelector & SetExactDepth(bool value=true)
SetExactDepth() specifies that annotations will be searched on the segment level specified by SetReso...
SAnnotSelector & SetResolveAll(void)
SetResolveAll() is equivalent to SetResolveMethod(eResolve_All).
SAnnotSelector & SetOverlapTotalRange(void)
Check overlapping only of total ranges.
const CSeq_loc & GetLocation(void) const
const CSeq_feat & GetOriginalFeature(void) const
Get original feature with unmapped location/product.
SAnnotSelector & SetAdaptiveDepth(bool value=true)
SetAdaptiveDepth() requests to restrict subsegment resolution depending on annotations found on lower...
SAnnotSelector & SetResolveDepth(int depth)
SetResolveDepth sets the limit of subsegment resolution in searching annotations.
SAnnotSelector & IncludeNamedAnnotAccession(const string &acc, int zoom_level=0)
SAnnotSelector & SetExcludeExternal(bool exclude=true)
External annotations for the Object Manger are annotations located in top level Seq-entry different f...
SAnnotSelector & SetCollectNames(bool value=true)
Collect available annot names rather than annots.
const CSeq_feat & GetMappedFeature(void) const
Feature mapped to the master sequence.
const CSeq_loc & GetProduct(void) const
SAnnotSelector & SetAnnotType(TAnnotType type)
Set annotation type (feat, align, graph)
SAnnotSelector & AddNamedAnnots(const CAnnotName &name)
Add named annot to set of annots names to look for.
SAnnotSelector & SetFeatSubtype(TFeatSubtype subtype)
Set feature subtype (also set annotation and feat type)
SAnnotSelector & ExcludeNamedAnnots(const CAnnotName &name)
Add named annot to set of annots names to exclude.
SAnnotSelector & ExcludeUnnamedAnnots(void)
Add unnamed annots to set of annots names to exclude.
SAnnotSelector & AddUnnamedAnnots(void)
Add unnamed annots to set of annots names to look for.
@ fIncludeGivenEntry
Include the top (given) entry.
@ fRecursive
Iterate recursively.
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
Definition: seq_vector.cpp:304
TSeqPos size(void) const
Definition: seq_vector.hpp:291
bool IsProtein(void) const
Definition: seq_vector.hpp:350
TObjectType * GetPointer(void) const THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:1684
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
Definition: ncbiobj.hpp:1401
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
Definition: ncbiobj.hpp:2015
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:1439
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
Definition: ncbiobj.hpp:735
TObjectType * Release(void)
Release a reference to the object and return a pointer to the object.
Definition: ncbiobj.hpp:846
CRange< TSeqPos > TSeqRange
typedefs for sequence ranges
Definition: range.hpp:419
CRange< TSignedSeqPos > TSignedSeqRange
Definition: range.hpp:420
static TThisType GetWhole(void)
Definition: range.hpp:272
virtual string GetString(const string &section, const string &name, const string &default_value, TFlags flags=0) const
Get the parameter string value.
Definition: ncbireg.cpp:321
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
NCBI_NS_STD::string::size_type SIZE_TYPE
Definition: ncbistr.hpp:132
#define kEmptyStr
Definition: ncbistr.hpp:123
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3452
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
Definition: ncbistr.cpp:2984
#define NPOS
Definition: ncbistr.hpp:133
static size_t StringToSizet(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to size_t.
Definition: ncbistr.cpp:1760
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5078
static int Compare(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Compare of a substring with another string.
Definition: ncbistr.hpp:5291
static long StringToLong(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to long.
Definition: ncbistr.cpp:653
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5406
static unsigned int StringToUInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to unsigned int.
Definition: ncbistr.cpp:642
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string.
Definition: ncbistr.cpp:3177
@ eNocase
Case insensitive compare.
Definition: ncbistr.hpp:1206
unsigned int usec
microseconds (modulo 1,000,000)
Definition: ncbi_types.h:78
STimeout * NcbiMsToTimeout(STimeout *timeout, unsigned long ms)
Definition: ncbi_types.c:48
unsigned int sec
seconds
Definition: ncbi_types.h:77
static const char label[]
TTo GetTo(void) const
Get the To member data.
Definition: Range_.hpp:269
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
bool IsLim(void) const
Check if variant Lim is selected.
Definition: Int_fuzz_.hpp:636
const TData & GetData(void) const
Get the Data member data.
TLim GetLim(void) const
Get the variant data.
Definition: Int_fuzz_.hpp:642
void SetType(TType &value)
Assign a value to Type data member.
bool IsBool(void) const
Check if variant Bool is selected.
@ eLim_circle
artificial break at origin of circle
Definition: Int_fuzz_.hpp:215
@ eGC_SequenceRole_top_level
bool IsSetSegs(void) const
Check if a value has been assigned to Segs data member.
Definition: Seq_align_.hpp:909
const TSpliced & GetSpliced(void) const
Get the variant data.
Definition: Seq_align_.cpp:219
const TStd & GetStd(void) const
Get the variant data.
Definition: Seq_align_.hpp:752
const TExons & GetExons(void) const
Get the Exons member data.
bool IsStd(void) const
Check if variant Std is selected.
Definition: Seq_align_.hpp:746
bool IsSetExons(void) const
set of segments involved each segment corresponds to one exon exons are always in biological order Ch...
bool IsSpliced(void) const
Check if variant Spliced is selected.
Definition: Seq_align_.hpp:778
const TSegs & GetSegs(void) const
Get the Segs member data.
Definition: Seq_align_.hpp:921
E_Choice Which(void) const
Which variant is currently selected.
bool IsSetPartial(void) const
incomplete in some way? Check if a value has been assigned to Partial data member.
Definition: Seq_feat_.hpp:943
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
E_Choice
Choice variants.
TFrame GetFrame(void) const
Get the Frame member data.
Definition: Cdregion_.hpp:534
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
bool IsSetExcept(void) const
something funny about this? Check if a value has been assigned to Except data member.
Definition: Seq_feat_.hpp:990
const TCdregion & GetCdregion(void) const
Get the variant data.
TPseudo GetPseudo(void) const
Get the Pseudo member data.
Definition: Seq_feat_.hpp:1365
const TProduct & GetProduct(void) const
Get the Product member data.
Definition: Seq_feat_.hpp:1096
bool IsSetPseudo(void) const
annotated on pseudogene? Check if a value has been assigned to Pseudo data member.
Definition: Seq_feat_.hpp:1346
TPartial GetPartial(void) const
Get the Partial member data.
Definition: Seq_feat_.hpp:962
TExcept GetExcept(void) const
Get the Except member data.
Definition: Seq_feat_.hpp:1009
bool CanGetProduct(void) const
Check if it is safe to call GetProduct method.
Definition: Seq_feat_.hpp:1090
bool IsSetProduct(void) const
product of process Check if a value has been assigned to Product data member.
Definition: Seq_feat_.hpp:1084
bool IsRna(void) const
Check if variant Rna is selected.
bool IsSetFrame(void) const
Check if a value has been assigned to Frame data member.
Definition: Cdregion_.hpp:509
@ e_Pub
publication applies to this seq
void SetTo(TTo value)
Assign a value to To data member.
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
const Tdata & Get(void) const
Get the member data.
Tdata & Set(void)
Assign a value to data member.
void SetId(TId &value)
Assign a value to Id data member.
void SetFrom(TFrom value)
Assign a value to From data member.
TGi GetGi(void) const
Get the variant data.
Definition: Seq_id_.hpp:889
bool IsGi(void) const
Check if variant Gi is selected.
Definition: Seq_id_.hpp:883
bool IsWhole(void) const
Check if variant Whole is selected.
Definition: Seq_loc_.hpp:522
void SetStrand(TStrand value)
Assign a value to Strand data member.
const TPacked_int & GetPacked_int(void) const
Get the variant data.
Definition: Seq_loc_.cpp:216
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
@ e_Gi
GenInfo Integrated Database.
Definition: Seq_id_.hpp:106
@ e_Local
local use
Definition: Seq_id_.hpp:95
list< CRef< CSeqdesc > > Tdata
Definition: Seq_descr_.hpp:91
const Tdata & Get(void) const
Get the member data.
TId & SetId(void)
Assign a value to Id data member.
Definition: Bioseq_.hpp:296
TTitle & SetTitle(void)
Select the variant.
Definition: Seqdesc_.hpp:1039
const TSource & GetSource(void) const
Get the variant data.
Definition: Seqdesc_.cpp:566
const Tdata & Get(void) const
Get the member data.
Definition: Seq_descr_.hpp:166
const TDesc & GetDesc(void) const
Get the Desc member data.
Definition: Seq_annot_.hpp:852
list< CRef< CSeq_id > > TId
Definition: Bioseq_.hpp:94
bool IsSetDesc(void) const
used only for stand alone Seq-annots Check if a value has been assigned to Desc data member.
Definition: Seq_annot_.hpp:840
void SetInst(TInst &value)
Assign a value to Inst data member.
Definition: Bioseq_.cpp:86
void SetDescr(TDescr &value)
Assign a value to Descr data member.
Definition: Bioseq_.cpp:65
const TPub & GetPub(void) const
Get the Pub member data.
Definition: Pubdesc_.hpp:605
const TDescr & GetDescr(void) const
Get the Descr member data.
Definition: Bioseq_.hpp:315
@ eRepr_raw
continuous sequence
Definition: Seq_inst_.hpp:94
@ e_Source
source of materials, includes Org-ref
Definition: Seqdesc_.hpp:133
@ eMol_na
just a nucleic acid
Definition: Seq_inst_.hpp:113
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
SStaticPair< const char *, CSeqUtils::TAnnotNameType > TNameTypeStr
Definition: utils.cpp:129
static bool s_CompareDescriptions(const CFeatListItem *p1, const CFeatListItem *p2)
Definition: utils.cpp:2671
static void s_CreateMappingInfo(const CSeq_loc &prod_loc, const CSeq_loc &gen_loc, CSeqUtils::TMappingInfo &info)
Helper function to convert two mapped locations into an interval-to-interval mapping structure.
Definition: utils.cpp:1852
static TMappedLocs s_GetRnaMappingLocs(const CSeq_loc &feat_loc, const CMappedFeat &feat, const CBioseq_Handle &handle)
Definition: utils.cpp:1771
static const int kRetMax
Definition: utils.cpp:1362
static bool s_IsNAA(const string &annot, char div)
Definition: utils.cpp:765
static void s_AdjustToAnnotatedCDS(const CMappedFeat &mapped_cds_feat, CScope &scope, CSeqUtils::TMappingInfo &cds_map_info)
Definition: utils.cpp:1916
map< string, bool > TTopLevels
Definition: utils.cpp:90
DEFINE_STATIC_ARRAY_MAP(TNameTypeMap, sm_NameTypeMap, s_NameTypeStrs)
CBioseq_Handle GetBioseqForSeqFeat(const CSeq_feat &f, CScope &scope)
Definition: utils.cpp:2574
static void s_ESearchQuery(const string &db, const string &term, vector< T > &uids, size_t &count, const int ret_max, const string &xpath)
Definition: utils.cpp:1653
static void s_ELinkQuery(const string &db_from, const string &db_to, const vector< T1 > &uids_from, vector< T2 > &uids_to, const string &cmd, const string &xpath)
Definition: utils.cpp:1599
static const TAssemblySeqIds & s_GetAssemblySeqIds(const string &assm_acc)
Definition: utils.cpp:1440
CConstRef< CBioseq > GetBioseqForSeqdesc(CRef< CScope > scope, const CSeqdesc &seq_desc)
Definition: utils.cpp:2480
static CSeqUtils::TLocVec s_GetAlnMapplingLocs(const CSeq_align_set &align_set, TGi gi)
Definition: utils.cpp:1012
static CRWLock m_AssemblySeqIdLock
Definition: utils.cpp:94
CSeq_feat_Handle GetSeqFeatHandleForBadLocFeature(const CSeq_feat &feat, CScope &scope)
Definition: utils.cpp:2556
CStaticArrayMap< string, CSeqUtils::TAnnotNameType > TNameTypeMap
Definition: utils.cpp:137
static const string kTaxDb
Definition: utils.cpp:1359
static bool s_IsExtendedNAA(const string &sAnnotName, char div, bool isStrict)
Definition: utils.cpp:825
static map< string, TAssemblySeqIds > s_AssemblySeqIdCache
Definition: utils.cpp:96
static const string kAssmDb
Definition: utils.cpp:1361
static CRWLock m_TopLevelsLock
Definition: utils.cpp:92
static const string & GetLinksURL()
Definition: utils.cpp:1059
CSeq_entry_Handle GetDefaultTopLevelSeqEntry(CScope &scope)
Definition: utils.cpp:2543
static const string kNucDb
Definition: utils.cpp:1360
vector< const CFeatListItem * > GetSortedFeatList(CSeq_entry_Handle seh, size_t max)
Definition: utils.cpp:2714
CSeq_entry_Handle GetSeqEntryForPubdesc(CRef< CScope > scope, const CPubdesc &pubdesc)
Definition: utils.cpp:2507
static TTopLevels m_TopLevels
Definition: utils.cpp:91
vector< CRef< CSeq_loc > > TMappedLocs
For CDS and RNA feature mapping information.
Definition: utils.cpp:1769
static const char * kLinksUrlDefault
Definition: utils.cpp:1056
vector< CConstRef< CSeq_id > > TAssemblySeqIds
Definition: utils.cpp:95
static const TNameTypeStr s_NameTypeStrs[]
Definition: utils.cpp:130
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is smart and slim</td> n<td> orig</td> n</tr> n<tr> n<td> last_modified</td> n<td> optional</td> n<td> Integer</td> n<td class=\"description\"> The blob last modification If provided then the exact match will be requested with n the Cassandra storage corresponding field value</td> n<td> Positive integer Not provided means that the most recent match will be selected</td> n<td></td> n</tr> n<tr> n<td> use_cache</td> n<td> optional</td> n<td> String</td> n<td class=\"description\"> The option controls if the Cassandra LMDB cache and or database should be used It n affects the seq id resolution step and the blob properties lookup step The following n options are BIOSEQ_INFO and BLOB_PROP at all
int i
int len
Lightweight interface for getting lines of data with minimal memory copying.
static const CS_INT unused
Definition: long_binary.c:20
static MDB_envinfo info
Definition: mdb_load.c:37
range(_Ty, _Ty) -> range< _Ty >
constexpr auto sort(_Init &&init)
constexpr bool empty(list< Ts... >) noexcept
mdb_mode_t mode
Definition: lmdb++.h:38
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
XML library namespace.
Definition: attributes.hpp:57
static const BitmapCharRec ch1
Definition: ncbi_10x20.c:1827
static const BitmapCharRec ch2
Definition: ncbi_10x20.c:1819
const char * tag
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
int isdigit(Uchar c)
Definition: ncbictype.hpp:64
T max(T x_, T y_)
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
static int match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket, PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb)
Definition: pcre2_match.c:594
#define count
static const sljit_gpr r1
static const sljit_gpr r0
static const sljit_gpr r2
#define row(bind, expected)
Definition: string_bind.c:73
SAnnotSelector –.
CSeq_annot::C_Data::E_Choice TAnnotType
bool operator()(const CMappedFeat &feat0, const CMappedFeat &feat1) const
Definition: utils.cpp:103
bool operator()(const CMappedFeat &feat0, const CMappedFeat &feat1) const
Definition: utils.cpp:118
bool operator()(const pair< T, U > &p1, const pair< T, U > &p2) const
Definition: utils.cpp:446
Location relative to a base Seq-loc: one (usually) or more ranges of offsets.
Definition: sequence.hpp:1124
Template structure SStaticPair is simlified replacement of STL pair<> Main reason of introducing this...
Definition: static_set.hpp:60
Timeout structure.
Definition: ncbi_types.h:76
Definition: type.c:6
done
Definition: token1.c:1
Modified on Fri Sep 20 14:57:39 2024 by modify_doxy.py rev. 669887