NCBI C++ ToolKit
utils.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: utils.cpp 47462 2023-04-18 20:13:13Z evgeniev $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors: Mike DiCuccio, Liangshou Wu
27 *
28 * File Description:
29 * General utility classes for GUI projects.
30 */
31 
32 #include <ncbi_pch.hpp>
33 
34 #include <corelib/ncbiapp.hpp>
35 
37 
38 #include <gui/objutils/utils.hpp>
43 
51 #include <objects/seq/Pubdesc.hpp>
60 
63 
64 #include <objmgr/feat_ci.hpp>
65 #include <objmgr/util/sequence.hpp>
66 #include <objmgr/util/feature.hpp>
68 #include <objmgr/impl/synonyms.hpp>
69 #include <objmgr/seq_vector.hpp>
70 #include <objmgr/bioseq_ci.hpp>
71 #include <objmgr/seq_entry_ci.hpp>
72 #include <objmgr/align_ci.hpp>
73 #include <objmgr/seqdesc_ci.hpp>
74 
75 #include <serial/iterator.hpp>
76 #include <connect/ncbi_types.h>
78 #include <util/line_reader.hpp>
79 #include <algorithm>
80 
83 
86 
87 // cache results of top level seq-id checks
88 // key is seq-id normalized AsFastaString()
92 
94 typedef vector<CConstRef<CSeq_id>> TAssemblySeqIds;
96 
97 //
98 // functor for sorting features based on their length
99 //
101 {
102  bool operator()(const CMappedFeat& feat0,
103  const CMappedFeat& feat1) const
104  {
105  TSeqRange r0 = feat0.GetLocation().GetTotalRange();
106  TSeqRange r1 = feat1.GetLocation().GetTotalRange();
107  return (r0.GetLength() < r1.GetLength());
108  }
109 };
110 
111 
112 //
113 // functor for sorting features based on the NCBI feature sort order
114 //
116 {
117  bool operator()(const CMappedFeat& feat0,
118  const CMappedFeat& feat1) const
119  {
120  const CSeq_feat& f0 = feat0.GetOriginalFeature();
121  const CSeq_feat& f1 = feat1.GetOriginalFeature();
122  return (f0.Compare(f1, feat0.GetLocation(), feat1.GetLocation()) < 0);
123  }
124 };
125 
126 
127 
129 static const TNameTypeStr s_NameTypeStrs[] = {
130  { "", CSeqUtils::eAnnot_All },
131  { "All", CSeqUtils::eAnnot_All },
132  { "Named", CSeqUtils::eAnnot_Named },
133  { "Unnamed", CSeqUtils::eAnnot_Unnamed },
134 };
135 
138 
139 
141 {
142  TNameTypeMap::const_iterator iter = sm_NameTypeMap.find(type);
143  if (iter != sm_NameTypeMap.end()) {
144  return iter->second;
145  } else {
147  }
148 }
149 
150 
151 const string&
153 {
155  for (iter = sm_NameTypeMap.begin(); iter != sm_NameTypeMap.end(); ++iter) {
156  if (iter->second == type) {
157  return iter->first;
158  }
159  }
160  return kEmptyStr;
161 }
162 
163 
164 //
165 // retrieve an annot selector
166 //
168 {
169  SAnnotSelector sel;
170  sel
171  // consider overlaps by total range...
173  // resolve all segments...
174  .SetResolveAll()
175  ;
176 
178 
179  CRegistryReadView view =
180  reg.GetReadView("GBENCH.Utils.NamedAnnots");
181  CRegistryReadView::TKeys naa_keys;
182  view.GetKeys(naa_keys);
183  ITERATE(CRegistryReadView::TKeys, iter, naa_keys) {
184  sel.IncludeNamedAnnotAccession(view.GetString(iter->key));
185  }
186 
187  view = reg.GetReadView("GBENCH.Utils.AnnotSelector");
188  if ( !(flags & fAnnot_UnsetNamed) ) {
189  if (view.GetBool("ExcludeExternal")) {
190  sel.SetExcludeExternal(true);
191  } else {
192  sel.SetExcludeExternal(false);
193 
194  ///
195  /// known external annotations
196  ///
197 
198  static const char* named_annots[] = {
199  "SNP", /// SNPs = variation features
200  "CDD", /// CDD = conserved domains
201  "STS", /// STS = sequence tagged sites
202  NULL
203  };
204 
205  for (const char** p = named_annots; p && *p; ++p) {
206  bool incl = view.GetBool(*p, true);
207  if ( !incl ) {
208  sel.ExcludeNamedAnnots(*p);
209  }
210  }
211  }
212  }
213 
214  if ( !(flags & fAnnot_UnsetDepth) ) {
215  if (view.GetBool("AdaptiveDepth", true)) {
216  sel.SetAdaptiveDepth(true);
217  sel.SetResolveAll();
218  }
219  }
220  return sel;
221 }
222 
223 
224 //
225 // retrieve an annot selector for our selected annotations
226 //
228  TAnnotFlags flags)
229 {
231  sel
232  // limit by our annotation type
233  .SetAnnotType(c);
234  return sel;
235 }
236 
237 
238 //
239 // retrieve an annot selector for our selected annotations
240 //
242  TAnnotFlags flags)
243 {
245  sel
246  // retrieve feature type and subtype of interest
247  .SetFeatType(feat);
248 
249  return sel;
250 }
251 
252 
254  TAnnotFlags flags)
255 {
257  sel
258  // retrieve feature type and subtype of interest
259  .SetFeatSubtype(sub);
260 
261  return sel;
262 }
263 
264 
265 SAnnotSelector CSeqUtils::GetAnnotSelector(const vector<string>& annots)
266 {
268  sel.SetCollectNames();
269 
270  //CGuiRegistry& reg = CGuiRegistry::GetInstance();
271  //CRegistryReadView view =
272  // reg.GetReadView("GBENCH.Utils.AnnotSelector");
273  //bool include_naas = view.GetBool("IncludeNAAs", false);
274  bool include_naas = false;
275 
276  if (include_naas && annots.empty()) {
277  sel.IncludeNamedAnnotAccession("NA*");
278  }
279 
280  ITERATE (vector<string>, iter, annots) {
281  const string& annot = *iter;
282  switch (CSeqUtils::NameTypeStrToValue(annot)) {
284  sel.AddUnnamedAnnots();
285  break;
287  sel.ExcludeUnnamedAnnots();
288  break;
290  if (include_naas) {
291  sel.IncludeNamedAnnotAccession("NA*");
292  }
293  return sel;
295  default:
296  if (NStr::StartsWith(annot, "NA*")) {
297  sel.IncludeNamedAnnotAccession("NA*");
298  } else {
299  sel.AddNamedAnnots(annot);
300  if(IsNAA(annot) || IsExtendedNAA(annot)) {
301  sel.IncludeNamedAnnotAccession(annot);
302  }
303  }
304  break;
305  }
306  }
307  return sel;
308 }
309 
310 
311 SAnnotSelector CSeqUtils::GetAnnotSelector(const vector<string>& annots,
312  bool adaptive, int depth)
313 {
314  SAnnotSelector sel(GetAnnotSelector(annots));
315  SetResolveDepth(sel, adaptive, depth);
316  return sel;
317 }
318 
319 
320 void CSeqUtils::SetAnnot(objects::SAnnotSelector& sel, const string& annot)
321 {
322  switch (CSeqUtils::NameTypeStrToValue(annot)) {
324  sel.AddUnnamedAnnots();
325  break;
327  sel.ExcludeUnnamedAnnots();
328  break;
330  {{
331  //CGuiRegistry& reg = CGuiRegistry::GetInstance();
332  //CRegistryReadView view =
333  // reg.GetReadView("GBENCH.Utils.AnnotSelector");
334  //bool include_naas = view.GetBool("IncludeNAAs", false);
335  bool include_naas = false;
336  if (include_naas) {
337  sel.IncludeNamedAnnotAccession("NA*");
338  }
339  }}
340  break;
342  default:
343  sel.AddNamedAnnots(annot);
344  if (IsNAA(annot) || IsExtendedNAA(annot)) {
345  sel.IncludeNamedAnnotAccession(annot);
346  }
347  break;
348  }
349 }
350 
351 
352 
354 {
356  return reg.GetReadView("GBENCH.Utils.AnnotSelector");
357 }
358 
360 {
361  return view.GetInt("MaxSearchSegments", 0);
362 }
363 
365 {
366  string max_search_segs_action = view.GetString("MaxSearchSegmentsAction", "silent");
368 
369  if(max_search_segs_action == "throw") {
370  MaxSearchSegmentsAction = SAnnotSelector::eMaxSearchSegmentsThrow;
371  } else if (max_search_segs_action == "log") {
372  MaxSearchSegmentsAction = SAnnotSelector::eMaxSearchSegmentsLog;
373  }
374 
375  return MaxSearchSegmentsAction;
376 }
377 
379 {
380  if (max > 0 && actual > max) {
381  switch(action) {
383  NCBI_THROW(CAnnotSearchLimitException, eSegmentsLimitExceded,
384  "CSeqUtils::CheckMaxSearchSegments: search segments limit exceeded");
385  break;
387  break;
389  default:
390  ERR_POST("CSeqUtils::CheckMaxSearchSegments: search segments limit exceeded");
391  break;
392  }
393  return true;
394  }
395  return false;
396 }
397 
398 
400 {
401  return view.GetInt("MaxSearchTime", 0);
402 }
403 
404 
405 void CSeqUtils::SetResolveDepth(objects::SAnnotSelector& sel,
406  bool adaptive, int depth)
407 {
408  if (adaptive) {
409  sel.SetAdaptiveDepth(true);
410  sel.SetExactDepth(false);
411  // TODO: watch out
412  // Maybe there is bug inside selector, we have call SetResolveAll() even
413  // for cases where we only want to resolve up to a given depth.
414  sel.SetResolveAll();
415 
417  int max_search_segs = GetMaxSearchSegments(view);
418 // LOG_POST(Trace << "MaxSearchSegments: " << max_search_segs);
419  sel.SetMaxSearchSegments(max_search_segs);
420  if(max_search_segs > 0) {
421  sel.SetMaxSearchSegmentsAction(GetMaxSearchSegmentsAction(view));
422  }
423  sel.SetMaxSearchTime((float)GetMaxSearchTime(view));
424 
425  if (depth >=0) {
426  sel.SetResolveDepth(depth);
427  }
428  } else if (depth >= 0) {
429  sel.SetResolveDepth(depth);
430  sel.SetExactDepth(true);
431  sel.SetAdaptiveDepth(false);
432  }
433 }
434 
435 
436 //
437 // LinkFeatures()
438 // This builds explicit links between features, creating a hierarchical tree of
439 // features.
440 //
441 
442 template <class T, class U>
444 {
445  bool operator()(const pair<T,U>& p1, const pair<T,U>& p2) const
446  {
447  return p1.first < p2.first;
448  }
449 };
450 
451 
455 {
456  if (p_cb) {
457  p_cb->SetTaskName("Linking features...");
458  p_cb->SetTaskTotal((int)feats.size());
459  p_cb->SetTaskCompleted(0);
460  }
461 
463 
464  // using CFeatTree
465  feature::CFeatTree tree;
466  tree.SetFeatIdMode(feature::CFeatTree::EFeatIdMode(mode));
469  if (p_cb && p_cb->StopRequested()) {
470  return false;
471  }
472  CRef<CLinkedFeature> curr_feat = *iter;
473  tree.AddFeature(curr_feat->GetMappedFeature());
474  fmap[curr_feat->GetMappedFeature()] = curr_feat;
475  }
477  if (p_cb && p_cb->StopRequested()) {
478  return false;
479  }
480  CRef<CLinkedFeature> curr_feat = *iter;
481  CMappedFeat parent_feat = tree.GetParent(curr_feat->GetMappedFeature());
482  if ( parent_feat ) {
483  fmap[parent_feat]->AddChild(curr_feat);
484  }
485  else {
486  out_feats.push_back(curr_feat);
487  }
488  if (p_cb) p_cb->AddTaskCompleted(1);
489  }
490 
491  out_feats.swap(feats);
492 
493  return true;
494 }
495 
496 
497 // remap a child location to a parent
499  const CSeq_loc& child,
500  CScope* scope)
501 {
502  CSeq_loc dummy_parent;
503  dummy_parent.SetWhole(const_cast<CSeq_id&>(sequence::GetId(parent, 0)));
504  SRelLoc converter(dummy_parent, child, scope);
505  converter.m_ParentLoc = &parent;
506  return converter.Resolve(scope);
507 }
508 
509 bool CSeqUtils::Match(const CSeq_id& id1, const CSeq_id& id2, CScope* scope)
510 {
511  return Match(CSeq_id_Handle::GetHandle(id1),
513  scope);
514 }
515 
516 
517 bool CSeqUtils::Match(const CSeq_id_Handle& id1,
518  const CSeq_id_Handle& id2, CScope* scope)
519 {
520  if (id1.MatchesTo(id2)) {
521  return true;
522  }
523 
524  if (id1.IsGi() && id2.IsGi() )
525  return false;
526 
527  if (scope) {
529 
530  syns = scope->GetSynonyms(id1);
531  if (syns) {
532  ITERATE (CSynonymsSet, iter, *syns) {
533  if (id2.MatchesTo(CSynonymsSet::GetSeq_id_Handle(iter))) {
534 // cerr << "matched to synonym of presented seq-id: " << endl;
535 // cerr << MSerial_AsnText << *CSynonymsSet::GetSeq_id_Handle(iter).GetSeqId();
536  return true;
537  }
538  }
539  }
540 
541  syns = scope->GetSynonyms(id2);
542  if (syns) {
543  ITERATE (CSynonymsSet, iter, *syns) {
544  if (id1.MatchesTo(CSynonymsSet::GetSeq_id_Handle(iter))) {
545 // cerr << "matched to synonym of tls-seq-id: " << endl;
546 // cerr << MSerial_AsnText << *CSynonymsSet::GetSeq_id_Handle(iter).GetSeqId();
547  return true;
548  }
549  }
550  }
551  }
552  return false;
553 }
554 
556  const CRangeCollection<TSeqPos>& ranges)
557 {
558  CRef<CSeq_loc> seq_loc(new CSeq_loc());
559  CSeq_loc::TPacked_int& p_int = seq_loc->SetPacked_int();
560 
561  ITERATE(CRangeCollection<TSeqPos>, it_r, ranges) { // for each range in mark
562  if ( !it_r->Empty() ) {
563  p_int.AddInterval(id, it_r->GetFrom(), it_r->GetTo());
564  }
565  }
566  switch (p_int.Get().size()) {
567  case 0:
568  {{
569  return CRef<CSeq_loc>();
570  }}
571  case 1:
572  {{
573  CRef<CSeq_interval> ival(p_int.Set().front());
574  seq_loc->SetInt(*ival);
575  /// p_int no longer valid!
576  }}
577  break;
578  default:
579  break;
580  }
581  return seq_loc;
582 }
583 
586 {
588 
589  // extract from the given map all segments corresponding to the given id
590  const CHandleRangeMap::TLocMap& loc_map = map.GetMap();
592  if(it != loc_map.end()) {
593  ITERATE(CHandleRange, it_r, it->second) {
594  ranges.CombineWith(it_r->first);
595  }
596  return true;
597  } else return false;
598 }
599 
601  const CSeq_loc& loc)
602 {
603  // Build a Seq-entry for the query Seq-loc
604  // A seq-loc may have multiple seq-ids, use the first successful seq-id.
605  CBioseq_Handle handle;
606  for ( CSeq_loc_CI citer (loc); citer; ++citer) {
607  handle = scope.GetBioseqHandle(citer.GetSeq_id());
608  if ( handle ) {
609  break;
610  }
611  }
612 
613  if ( !handle ) {
614  return CRef<CBioseq>();
615  }
616 
617  /// easy out: if the bioseq is of type whole, just duplicate it
618  if (loc.IsWhole()) {
619  CRef<CBioseq> bioseq(new CBioseq());
620  bioseq->Assign(*handle.GetCompleteBioseq());
621  return bioseq.Release();
622  }
623 
624  CSeqVector vec(loc, scope, CBioseq_Handle::eCoding_Iupac);
625  string seq_string;
626  vec.GetSeqData(0, vec.size(), seq_string);
627 
628  CRef<CBioseq> bioseq(new CBioseq());
629 
630  // curate our inst
631  bioseq->SetInst().SetRepr(CSeq_inst::eRepr_raw);
632  bioseq->SetInst().SetLength((int)seq_string.size());
633  if (vec.IsProtein()) {
634  bioseq->SetInst().SetMol(CSeq_inst::eMol_aa);
635  bioseq->SetInst().SetSeq_data().SetIupacaa(*new CIUPACaa(seq_string));
636  } else {
637  bioseq->SetInst().SetMol(CSeq_inst::eMol_na);
638  bioseq->SetInst().SetSeq_data().SetIupacna(*new CIUPACna(seq_string));
639  CSeqportUtil::Pack(&bioseq->SetInst().SetSeq_data());
640  }
641 
642 
643  // add an ID for our sequence
644  CRef<CSeq_id> id(new CSeq_id());
645  id->Assign(*handle.GetSeqId());
646  bioseq->SetId().push_back(id);
647 
648  // a title
649  CRef<CSeqdesc> title(new CSeqdesc);
650  string title_str;
651  id->GetLabel(&title_str);
652  title_str += ": ";
653  loc.GetLabel(&title_str);
654  title->SetTitle(title_str);
655  bioseq->SetDescr().Set().push_back(title);
656 
657  return bioseq.Release();
658 }
659 
660 
662  const CBioseq_Handle& handle)
663 {
664  CRef<CSeq_loc> seq_loc(new CSeq_loc());
665  CSeq_loc::TPacked_int& p_int = seq_loc->SetPacked_int();
666 
667  for (CSeq_loc_CI iter(mix_loc); iter; ++iter) {
668  if (handle.IsSynonym(iter.GetSeq_id())) {
669  CSeq_loc_CI::TRange range = iter.GetRange();
670  if ( !range.Empty() ) {
671  p_int.AddInterval(*handle.GetSeqId(), range.GetFrom(),
672  range.GetTo(), iter.GetStrand());
673  }
674  }
675  }
676  switch (p_int.Get().size()) {
677  case 0:
678  {{
679  return CRef<CSeq_loc>();
680  }}
681  case 1:
682  {{
683  CRef<CSeq_interval> ival(p_int.Set().front());
684  seq_loc->SetInt(*ival);
685  /// p_int no longer valid!
686  }}
687  break;
688  default:
689  break;
690  }
691 
692  return seq_loc;
693 }
694 
695 
696 string CSeqUtils::GetAnnotName(const CSeq_annot_Handle& annot_handle)
697 {
698  string name(GetUnnamedAnnot());
699  CConstRef<CSeq_annot> annot = annot_handle.GetCompleteSeq_annot();
700  if (annot) {
701  name = GetAnnotName(*annot);
702  } else if (annot_handle.IsNamed()) {
703  name = annot_handle.GetName();
704  }
705  return name;
706 }
707 
708 
709 string CSeqUtils::GetAnnotName(const CSeq_annot& annot)
710 {
711  string name(GetUnnamedAnnot());
712  if (annot.IsSetDesc()) {
713  ITERATE (objects::CAnnot_descr::Tdata, descrIter, annot.GetDesc().Get()) {
714  if ((*descrIter)->IsTitle()) {
715  name = (*descrIter)->GetTitle();
716  break;
717  } else if ((*descrIter)->IsName()) {
718  name = (*descrIter)->GetName();
719  }
720  }
721  }
722  return name;
723 }
724 
725 CConstRef<CUser_field> CSeqUtils::GetAnnotUserField(const CSeq_annot& annot, const string& type, const string& label)
726 {
727  if (annot.IsSetDesc()) {
728  ITERATE (objects::CAnnot_descr::Tdata, descrIter, annot.GetDesc().Get()) {
729  if ((*descrIter)->IsUser()) {
730  const auto& user((*descrIter)->GetUser());
731  if(user.GetType().IsStr() && user.GetType().GetStr() == type && user.HasField(label)) {
732  return user.GetFieldRef(label);
733  }
734  }
735  }
736  }
737  return CConstRef<CUser_field>();
738 }
739 
740 
741 string CSeqUtils::GetAnnotComment(const CSeq_annot_Handle& annot_handle)
742 {
743  string comment = kEmptyStr;
744  CConstRef<CSeq_annot> annot = annot_handle.GetCompleteSeq_annot();
745  if (annot) {
746  comment = GetAnnotComment(*annot);
747  }
748  return comment;
749 }
750 
751 
752 string CSeqUtils::GetAnnotComment(const CSeq_annot& annot)
753 {
754  if (annot.IsSetDesc()) {
755  ITERATE (objects::CAnnot_descr::Tdata, descrIter, annot.GetDesc().Get()) {
756  if ((*descrIter)->IsComment()) {
757  return (*descrIter)->GetComment();
758  }
759  }
760  }
761  return kEmptyStr;
762 }
763 
764 static bool s_IsNAA(const string& annot, char div)
765 {
766 // if(IsExtendedNAA(annot)) {
767 // LOG_POST(Trace << "CSeqUtils::IsNAA() when in fact IsExtendedNA() " << annot);
768 // }
769  size_t acc_len = 11;
770  bool is_naa = false;
771  size_t len = annot.size();
772  if (len >= acc_len && annot[0] == 'N' && annot[1] == 'A') {
773  size_t i = 2;
774  while (i < acc_len && annot[i] >= '0' && annot[i] <= '9') {
775  ++i;
776  }
777 
778  if (i == acc_len) {
779  if (len == acc_len) {
780  is_naa = true;
781  }
782  else if (annot[i++] == div) {
783  while (i < len && annot[i] >= '0' && annot[i] <= '9') {
784  ++i;
785  }
786  if (i == len) {
787  is_naa = true;
788  }
789  }
790  }
791  }
792 
793  return is_naa;
794 }
795 
796 bool CSeqUtils::IsNAA(const string& annot, bool isStrict)
797 {
798  return isStrict ? s_IsNAA(annot, '.') : IsExtendedNAA(annot);
799 }
800 
801 bool CSeqUtils::IsNAA_Name(const string& annot)
802 {
803  return IsExtendedNAA_Name(annot);
804 }
805 
806 /// create an annotation name for a remote file pipeline, appending sSuffix
807 string CSeqUtils::MakeRmtAnnotName(const string& sSuffix)
808 {
809  return "rmt_pipleine_" + sSuffix;
810 }
811 
812 /// check if a given annotation was created by a remote file pipeline
813 bool CSeqUtils::isRmtAnnotName(const string& sAnnotName)
814 {
815  return NStr::StartsWith(sAnnotName, "rmt_pipleine_");
816 }
817 
818 bool CSeqUtils::isRmtPipelineFileType(const string& sFileType)
819 {
820  return sFileType == "bigBed" || sFileType == "bigWig" || sFileType == "vcfTabix" || sFileType == "vcf";
821 }
822 
823 
824 static bool s_IsExtendedNAA(const string& sAnnotName, char div, bool isStrict)
825 {
826  size_t posHashSign{sAnnotName.find('#')};
827 
828  if(posHashSign == NPOS) {
829  if(isStrict) {
830  return false;
831  } else {
832  return s_IsNAA(sAnnotName, div);
833  }
834  }
835  if(!s_IsNAA(sAnnotName.substr(0, posHashSign), div))
836  {
837  return false;
838  }
839  if(posHashSign == sAnnotName.length() - 1) {
840  return true;
841  }
842  for(size_t i = posHashSign+1; i<sAnnotName.length(); ++i) {
843  if(sAnnotName[i] < '0' || sAnnotName[i] > '9') {
844  return false;
845  }
846  }
847  return true;
848 }
849 
850 bool CSeqUtils::IsExtendedNAA(const string& sAnnotName, bool isStrict)
851 {
852  return s_IsExtendedNAA(sAnnotName, '.', isStrict);
853 }
854 
855 bool CSeqUtils::IsExtendedNAA_Name(const string& sAnnotName)
856 {
857  return s_IsExtendedNAA(sAnnotName, '_', false);
858 }
859 
860 // check if a given annotation is AlignDb (potentially suffixed with batch identication string after a '#')
861 bool CSeqUtils::IsAlignDb(const string& annot)
862 {
863  return NStr::StartsWith(annot, "AlignDb", NStr::eNocase);
864 }
865 
866 // get a batch string from AlignDb annotation suffixed with batch identication string after a '#'
867 string CSeqUtils::GetAlignDbBatch(const string& annot)
868 {
869  vector<string> parts;
870  if(IsAlignDb(annot)) {
871  NStr::Split(annot, "#", parts);
872  }
873  return parts.size() > 1 ? parts[1] : string();
874 }
875 
876  /// get a is_source_assembly_query string fro AlignDb annotation suffixed after a second '#'
877 string CSeqUtils::GetAlignDbIsQuery(const string& annot)
878 {
879  vector<string> parts;
880  if(IsAlignDb(annot)) {
881  NStr::Split(annot, "#", parts);
882  }
883  return parts.size() > 2 ? parts[2] : string("N");
884 }
885 
886 // VDB accessions in scope are in ("SRA", "SRR", "DRR", "ERR")
887 bool CSeqUtils::IsVDBAccession(const string& acc)
888 {
889  if (acc.size() < 3 || acc[1] != 'R')
890  return false;
891  switch (acc[0]) {
892  case 'S':
893  case 'D':
894  case 'E':
895  break;
896  default:
897  return false;
898  }
899  switch (acc[2]) {
900  case 'A':
901  case 'R':
902  break;
903  default:
904  return false;
905  }
906  return true;
907 }
908 
909 
911 {
912  if (feat.IsSetPseudo()) {
913  return feat.GetPseudo();
914  } else {
915  const CSeq_feat::TData& data = feat.GetData();
916  if (data.IsGene() && data.GetGene().IsSetPseudo()) {
917  return data.GetGene().GetPseudo();
918  } else if (data.IsRna() && data.GetRna().IsSetPseudo()) {
919  return data.GetRna().GetPseudo();
920  }
921  }
922  return false;
923 }
924 
925 
927 {
928  if (feat.IsSetPartial() && feat.GetPartial() &&
929  !IsPartialStart(feat.GetLocation()) &&
930  !IsPartialStop(feat.GetLocation())) {
931  return true;
932  }
933  return false;
934 }
935 
936 
938 {
940 }
941 
942 
944 {
946 }
947 
949 {
950  CSeq_loc_CI it(loc);
951  if (it) {
952  auto strand = it.GetStrand();
953  ++it;
954  for (; it; ++it) {
955  if (it.GetStrand() != strand)
956  return false;
957  }
958  }
959  return true;
960 }
961 
963 {
964  if (feat.IsSetExcept()) {
965  return feat.GetExcept();
966  }
967  return false;
968 }
969 
970 
972 {
973  static string base_url = "https://www.ncbi.nlm.nih.gov";
974  return base_url;
975 }
976 
977 
979 {
980  return "<table border=\"0\" cellpadding=\"0\" cellspacing=\"0\">";
981 }
982 
983 
985 {
986  return "</table>";
987 }
988 
989 
990 string CSeqUtils::CreateTableRow(const string& tag, const string& value)
991 {
992  return "<tr><td align=\"right\" valign=\"top\" nowrap><b>" + tag + (tag.empty() ? "" : ":") + "&nbsp;" +
993  "</b></td><td valign=\"top\" width=\"200\">" + value + "</td></tr>";
994 }
995 
996 
997 string CSeqUtils::CreateSectionRow(const string& tag)
998 {
999  return "<tr><td align=\"right\" nowrap>[<i>" + tag + "</i>]&nbsp;&nbsp;</td><td></td></tr>";
1000 }
1001 
1002 
1003 string CSeqUtils::CreateLinkRow(const string& tag,
1004  const string& label,
1005  const string& url)
1006 {
1007  return "<tr><td align=\"right\" valign=\"top\" nowrap><b>" + tag + ":&nbsp;" +
1008  "</b></td><td width=\"200\"><a href=\"" + url + "\">" + label + "</a></td></tr>";
1009 }
1010 
1012 {
1013  CSeqUtils::TLocVec mapped_locs;
1014 
1015  typedef map<TGi, TSeqRange> TRangeMap;
1016  TRangeMap r_map;
1017  CTypeConstIterator<CSeq_align> aln_iter(align_set);
1018  for (; aln_iter; ++aln_iter) {
1019  const CSeq_align& aln = *aln_iter;
1020  if (aln.CheckNumRows() == 2 && aln.GetSegs().IsStd() &&
1021  aln.GetSegs().GetStd().size() == 1) {
1022  int target_row = 0;
1023  if (aln.GetSeq_id(0).IsGi() && aln.GetSeq_id(0).GetGi() == gi) {
1024  target_row = 1;
1025  }
1026  if (aln.GetSeq_id(target_row).IsGi()) {
1027  TGi target_gi = aln.GetSeq_id(target_row).GetGi();
1029  aln.GetSegs().GetStd().front()->GetSeqRange(target_row);
1030  TSeqPos from = (TSeqPos)range.GetFrom();
1031  TSeqPos to = (TSeqPos)range.GetTo();
1032  if (from > to) {
1033  swap(from, to);
1034  }
1035  if (r_map.count(target_gi) == 0) {
1036  r_map[target_gi] = TSeqRange(from, to);
1037  }
1038  else {
1039  r_map[target_gi].CombineWith(TSeqRange(from, to));
1040  }
1041  }
1042  }
1043  }
1044 
1045  ITERATE(TRangeMap, iter, r_map) {
1046  CRef<CSeq_id> id(new CSeq_id);
1047  id->SetGi(iter->first);
1048  CRef<CSeq_loc> loc(new CSeq_loc(*id, iter->second.GetFrom(),
1049  iter->second.GetTo()));
1050  mapped_locs.push_back(loc);
1051  }
1052  return mapped_locs;
1053 }
1054 
1055 static const char* kLinksUrlDefault =
1056 "https://www.ncbi.nlm.nih.gov/sviewer/links.fcgi?link_name=gi_placement&report=asn";
1057 
1058 static const string& GetLinksURL()
1059 {
1060  static string LinksUrl;
1061  if (LinksUrl.empty()) {
1063  LinksUrl = reg.GetString("links", "url", kLinksUrlDefault);
1064  }
1065  return LinksUrl;
1066 }
1067 
1069 {
1070  STimeout timeout;
1071  timeout.sec = time_out_sec;
1072  timeout.usec = 0;
1073 
1074  CConn_HttpStream stream(GetLinksURL() + string("&gi=") + NStr::NumericToString(gi), flags, &timeout);
1075  unique_ptr<CObjectIStream> obj_stream(CObjectIStream::Open(eSerial_AsnText, stream));
1076  CSeq_align_set align_set;
1077  try {
1078  *obj_stream >> align_set;
1079  } catch (const CException& e) {
1080  LOG_POST(Error << "Failed to retrieve gi placements for gi|"
1081  << gi << ", error: " << e.GetMsg());
1082  return TLocVec();
1083  }
1084 
1085  return s_GetAlnMapplingLocs(align_set, gi);
1086 }
1087 
1088 bool CSeqUtils::CanHavePlacements(const objects::CSeq_id& seqid)
1089 {
1090  // GenColl accessions without NC
1091  CSeq_id::EAccessionInfo info = seqid.IdentifyAccession();
1092  return info == CSeq_id::eAcc_refseq_contig //NT
1095  || info == CSeq_id::eAcc_refseq_mrna //NM
1097  || info == CSeq_id::eAcc_refseq_ncrna //NR
1099  || info == CSeq_id::eAcc_refseq_prot //NP
1105  || (info & CSeq_id::eAcc_type_mask) == CSeq_id::e_Genbank // any GenBank
1107 }
1108 
1109 /// check that a given accession is either local or unrecognizable
1110 /// this can be important to avoid unnecessary calls to NCBI services
1111 bool CSeqUtils::isQuasiLocal(const objects::CBioseq_Handle& handle)
1112 {
1113  try {
1114  // filter out local ids
1115  // also, CSeq_id constructor will throw is accession does not look familiar, that's what we want
1116  CConstRef<CSeq_id> seqid(handle.GetSeqId());
1119  return true;
1120  }
1121  // all others are expected to be known
1122  return false;
1123  } catch(...) {
1124  // if an accession is so bad that it can't be recognized, then it's definitely something specific to the accession user
1125  return true;
1126  }
1127 }
1128 
1130 {
1131  return GetAccessionPlacementsMsec(id, scope, time_out_sec * 1000, flags);
1132 }
1133 
1135 {
1136  STimeout timeout;
1137  NcbiMsToTimeout(&timeout, time_out_msec);
1138  // filter out cases when the given id something unsuitable e.g. like a local id
1139  // generally what's bad for GenColl should be bad for getting placements
1140  if(!CanHavePlacements(id)) {
1141  return TLocVec();
1142  }
1143 
1144  do {
1145  CConn_HttpStream stream(GetLinksURL() + string("&id=") + id.GetSeqIdString(true), flags, &timeout);
1146  unique_ptr<CObjectIStream> obj_stream(CObjectIStream::Open(eSerial_AsnText, stream));
1147  CSeq_align_set align_set;
1148  *obj_stream >> align_set;
1149 
1151  if (!gi_idh)
1152  break;
1153  TGi gi(gi_idh.GetGi());
1154  return s_GetAlnMapplingLocs(align_set, gi);
1155  } while (false);
1156 
1157  return TLocVec();
1158 }
1159 
1160 CSeqUtils::TLocVec CSeqUtils::GetLocPlacements(const objects::CSeq_loc& loc, int time_out_sec)
1161 {
1162  TLocVec mapped_locs;
1163  STimeout timeout;
1164  timeout.sec = time_out_sec;
1165  timeout.usec = 0;
1166  TSignedSeqPos SourceFrom(-1);
1167  TSignedSeqPos SourceTo(-1);
1168  TGi SourceGi = INVALID_GI;
1169 
1170  if(loc.IsInt()) {
1171  SourceFrom = loc.GetInt().GetFrom();
1172  SourceTo = loc.GetInt().GetTo();
1173  if(loc.GetInt().GetId().IsGi()) {
1174  SourceGi = loc.GetInt().GetId().GetGi();
1175  } else {
1176  return mapped_locs;
1177  }
1178  } else if(loc.IsPnt()) {
1179  SourceFrom = loc.GetPnt().GetPoint();
1180  SourceTo = SourceFrom;
1181  if(loc.GetPnt().GetId().IsGi()) {
1182  SourceGi = loc.GetPnt().GetId().GetGi();
1183  } else {
1184  return mapped_locs;
1185  }
1186  } else {
1187  return mapped_locs;
1188  }
1189  if(SourceTo < SourceFrom) {
1190  swap(SourceTo, SourceFrom);
1191  }
1192  CConn_HttpStream stream(GetLinksURL() + string("&gi=") + NStr::NumericToString(SourceGi) +
1193  "&from=" + NStr::NumericToString(SourceFrom) +
1194  "&to=" + NStr::NumericToString(SourceTo),
1195  fHTTP_AutoReconnect, &timeout);
1196  unique_ptr<CObjectIStream> obj_stream(CObjectIStream::Open(eSerial_AsnText, stream));
1197  CSeq_align_set align_set;
1198  try {
1199  *obj_stream >> align_set;
1200  } catch (const CException& e) {
1201  LOG_POST(Error << "Failed to retrieve location placements for gi|"
1202  << SourceGi << ", error: " << e.GetMsg());
1203  return mapped_locs;
1204  }
1205 
1206  typedef map<TGi, TSignedSeqRange> TRangeMap;
1207  TRangeMap range_map;
1208  CTypeConstIterator<CSeq_align> aln_iter(align_set);
1209  for(; aln_iter; ++aln_iter) {
1210  const CSeq_align& aln = *aln_iter;
1211  if (aln.CheckNumRows() == 2 && aln.GetSegs().IsStd() &&
1212  aln.GetSegs().GetStd().size() == 1) {
1213  int target_row = 0;
1214  int source_row = 1;
1215  if (aln.GetSeq_id(target_row).IsGi() && aln.GetSeq_id(target_row).GetGi() == SourceGi) {
1216  target_row = 1;
1217  source_row = 0;
1218  }
1219  if (aln.GetSeq_id(target_row).IsGi()) {
1220  // check that the source range falls within the source within this alignment
1221  TSignedSeqRange i_source_range =
1222  aln.GetSegs().GetStd().front()->GetSeqRange(source_row);
1223  TSignedSeqPos i_source_from = i_source_range.GetFrom();
1224  TSignedSeqPos i_source_to = i_source_range.GetTo();
1225  if (i_source_from > i_source_to) {
1226  swap(i_source_from, i_source_to);
1227  }
1228  if(i_source_from <= SourceFrom && SourceTo <= i_source_to) {
1229  TGi target_gi = aln.GetSeq_id(target_row).GetGi();
1231  aln.GetSegs().GetStd().front()->GetSeqRange(target_row);
1232  TSeqPos from = (TSeqPos)range.GetFrom();
1233  TSeqPos to = (TSeqPos)range.GetTo();
1234  if (from > to) {
1235  swap(from, to);
1236  }
1237  range_map[target_gi] = TSignedSeqRange(from + (SourceFrom - i_source_from), from + (SourceTo - i_source_from));
1238  }
1239  }
1240  }
1241  }
1242 
1243  ITERATE (TRangeMap, iter, range_map) {
1244  CRef<CSeq_id> id(new CSeq_id);
1245  id->SetGi(iter->first);
1246  CRef<CSeq_loc> loc(new CSeq_loc(*id, iter->second.GetFrom(), iter->second.GetTo()));
1247  mapped_locs.push_back(loc);
1248  }
1249  return mapped_locs;
1250 }
1251 
1252 
1253 bool CSeqUtils::StringToRange(const string& range_str,
1254  long& from, long& to)
1255 {
1256  // Any input range string that follows this pattern will be
1257  // consisdered as a valid input:
1258  // "^[ \t]*[1-9][0-9,]*[ \t]*[kKmM]?((([ \t]*([-:]|\\.\\.)[ \t]*)|([ \t]+))[1-9][0-9,]*[ \t]*[kKmM]?)?[ \t]*$"
1259  // Some valid range examples:
1260  // - 1000
1261  // - [space]1000 -[tab]2000[tab]
1262  // - [space]1000[space]..[space]2000
1263  // - 10,000:2,000,000
1264  // - 100 k : 1m
1265  // - 1000[space]2000
1266  // - [space]1000[tab]2000
1267 
1268  from = to = 0;
1269  string str = NStr::TruncateSpaces(range_str);
1270  size_t len = str.length();
1271 
1272  if (len == 0) return false;
1273 
1274  // The loop tries to accomplish the followings:
1275  // - remove ','
1276  // - replace 'k' or 'K' with '000'
1277  // - replace 'm' or 'M' with '000000'
1278  // - remove white paces (and tabs) before ',', 'k', 'K', 'm', and 'M'
1279  // - remove white space (and tabs) around any separator (':', '..', and '-')
1280  // - replace any separator with '-'
1281  // - replace spaces between two numbers with '-'
1282  string out_str;
1283  bool space_before_this = false;
1284  bool separator_before_this = false;
1285  for (size_t i = 0; i < len; ++i) {
1286  switch (str[i])
1287  {
1288  case ' ':
1289  case '\t':
1290  // ignore space after a separator
1291  if ( !separator_before_this )
1292  space_before_this = true;
1293  break;
1294  case ',':
1295  if (separator_before_this) return false; // invalid
1296  // ignore space before ','
1297  space_before_this = false;
1298  break;
1299  case 'k':
1300  case 'K':
1301  if (separator_before_this) return false; // invalid
1302  // ignore space before 'k' and 'K'
1303  space_before_this = false;
1304  // replace it with "000'
1305  out_str.append("000");
1306  break;
1307  case 'm':
1308  case 'M':
1309  if (separator_before_this) return false; // invalid
1310  // ignore space before 'm' and 'M'
1311  space_before_this = false;
1312  // replace it with "0000000'
1313  out_str.append("000000");
1314  break;
1315  case '.':
1316  case '-':
1317  case ':':
1318  // ignore space before a separator
1319  space_before_this = false;
1320  separator_before_this = true;
1321  break;
1322  default:
1323  if (separator_before_this) {
1324  out_str.append("-");
1325  separator_before_this = false;
1326  } else if (space_before_this) {
1327  out_str.append("-");
1328  space_before_this = false;
1329  }
1330  out_str.append(1, str[i]);
1331  break;
1332  }
1333  }
1334 
1335  typedef vector<string> TPositions;
1336  TPositions pos;
1337  NStr::Split(out_str, "-", pos);
1338  if (pos.size() < 3) {
1339  try {
1340  bool is_from = true;
1341  NON_CONST_ITERATE (TPositions, iter, pos) {
1342  NStr::TruncateSpaces(*iter);
1343  if (iter->empty()) continue;
1344  if (is_from) {
1345  to = from = NStr::StringToLong(*iter);
1346  is_from = false;
1347  } else {
1348  to = NStr::StringToLong(*iter);
1349  }
1350  }
1351  } catch (const CException&) {
1352  return false;
1353  }
1354  }
1355  return true;
1356 }
1357 
1358 static const string kTaxDb = "taxonomy";
1359 static const string kNucDb = "nucleotide";
1360 static const string kAssmDb = "assembly";
1361 static const int kRetMax = 5000;
1362 
1363 
1365 {
1366  gc_ids.clear();
1367 
1368  TEntrezIds uids_from;
1369 #ifdef NCBI_STRICT_GI
1370  uids_from.push_back(GI_TO(TEntrezId, gi));
1371 #else
1372  uids_from.push_back(gi);
1373 #endif
1374  TEntrezIds uids_to;
1375 
1376  try {
1377  // prepare eLink request that will get entrez-id (not exactly the same as assembly ids)
1378  // chromosome is indicated by score of "2"
1379  // (magic string indicated in e-mail communication from Avi Kimchi on 04/01/2013)
1380  ELinkQuery(kNucDb, kAssmDb, uids_from, uids_to, "neighbor_score", "//Link[Score = \"2\"]/Id/text()");
1381  }
1382  catch (const CException& e) {
1383  LOG_POST(Error << "Failed to get assembly entrez ids for gi: " << gi << ". Error: " << e.GetMsg());
1384  }
1385 
1386  if (uids_to.empty())
1387  return;
1388 
1389  CGuiEutilsClient ecli;
1390  ecli.SetMaxReturn(kRetMax);
1391  xml::document docsums;
1392 
1393  try {
1394  // from Entrez ids, get true assembly ids
1395  ecli.Summary(kAssmDb, uids_to, docsums);
1396  } catch (const CException& e) {
1397  LOG_POST(Error << "Failed to get assembly ids from entrez ids: " << CreateIdStr(uids_to) << ". Error: " << e.GetMsg());
1398  }
1399 
1401  xml::node_set nodes ( docsums.get_root_node().run_xpath_query("//RsUid/text() | //GbUid/text()") );
1402  for (itNode = nodes.begin(); itNode != nodes.end(); ++itNode) {
1403  string id(itNode->get_content());
1404  if (id.empty())
1405  continue;
1406  gc_ids.push_back(NStr::StringToNumeric<TEntrezId>(id));
1407  }
1408 }
1409 
1411 {
1412  TEntrezIds gc_ids;
1413  GetAssmIds_GIChr(gc_ids, gi);
1416 
1417  ITERATE(TEntrezIds, iGCId, gc_ids) {
1418  try {
1419  CRef<CGC_Assembly> assm(gcs->GetAssembly(ENTREZ_ID_TO(int,*iGCId), "Gbench_chrs"));
1420 
1421  CGC_Assembly::TSequenceList sequences;
1422  assm->Find(idh, sequences);
1423 
1424  ITERATE(CGC_Assembly::TSequenceList, iSequences, sequences) {
1425  CConstRef<CGC_Replicon> replicon((*iSequences)->GetReplicon());
1426  if(replicon->IsSetName()) {
1427  return replicon->GetName();
1428  }
1429  }
1430  } catch(...) {
1431  LOG_POST(Error << "Call to GenColl timed out when getting assembly: " << *iGCId);
1432  }
1433  }
1434  return "";
1435 }
1436 
1437 
1438 // retrieve or cache list of ids for assembly accessions
1439 static const TAssemblySeqIds& s_GetAssemblySeqIds(const string& assm_acc)
1440 {
1441  {{
1443  auto it = s_AssemblySeqIdCache.find(assm_acc);
1444  if (it != s_AssemblySeqIdCache.end()) {
1445  return it->second;
1446  }
1447  }}
1448 
1449  // Warning: this is potentially a very slow call
1450  // no caching because we are caching molecule list instead as it's much smaller
1451  // this call is only used in CSeqUtils::isTopLevel() which is only used in SViewer so it should never
1452  // use seqconfig service to obtain assemblies
1453  CRef<CGC_Assembly> assm = CGencollSvc::GetInstance()->GetGCAssembly(assm_acc, false, "Gbench");
1454  if(assm.IsNull())
1455  NCBI_THROW(CException, eUnknown, "Failed to retrieve Assembly for '" + assm_acc + "'");
1456  CGC_Assembly::TSequenceList top_level_seqs;
1457  assm->GetMolecules(top_level_seqs, CGC_Assembly::eTopLevel);
1458  TAssemblySeqIds ids;
1459  for (auto it : top_level_seqs) {
1460  const CSeq_id& tls_seq_id = it->GetSeq_id();
1461  ids.emplace_back(&tls_seq_id);
1462  }
1464  {{
1465  auto it = s_AssemblySeqIdCache.find(assm_acc);
1466  if (it != s_AssemblySeqIdCache.end())
1467  return it->second;
1468  }}
1469  s_AssemblySeqIdCache.emplace(assm_acc, ids);
1470  if (assm_acc != assm->GetAccession())
1471  s_AssemblySeqIdCache.emplace(assm->GetAccession(), ids);
1472  return s_AssemblySeqIdCache[assm_acc];
1473 }
1474 
1475 bool CSeqUtils::isTopLevel(const CSeq_id& seq_id, const string& assm_acc, CScope* scope)
1476 {
1477  CBioseq_Handle handle(scope->GetBioseqHandle(seq_id));
1478  // filter out cases when the given id something unsuitable e.g. like a local id
1479  if(assm_acc.empty() || !CGencollSvc::isGenCollSequence(handle)) {
1480  return false;
1481  }
1482  {
1484  // NCs are top level
1486  return true;
1487  }
1488 
1489  string sNormalizedSeqId(seq_id.AsFastaString() + ":" + assm_acc);
1490  {{
1492  if(m_TopLevels.find(sNormalizedSeqId) != m_TopLevels.end()) {
1493  // cerr << "found " << sNormalizedSeqId << " in top level cache with value: " << m_TopLevels[sNormalizedSeqId] << endl;
1494  return m_TopLevels[sNormalizedSeqId];
1495  }
1496  }}
1497  try {
1498  auto assembly_ids = s_GetAssemblySeqIds(assm_acc);
1499  if (scope && !assembly_ids.empty() && assembly_ids.front()->IsGi()) {
1500  auto gi = sequence::GetGiForId(seq_id, *scope);
1501  if (gi > ZERO_GI) {
1502  bool all_checked = true;
1503  for (const auto& id : assembly_ids) {
1504  if (!id->IsGi()) {
1505  // not expected to happen
1506  // but we'll make sure that we check all the cases
1507  all_checked = false;
1508  continue;
1509  }
1510  if (id->GetGi() == gi) {
1512  m_TopLevels[sNormalizedSeqId] = true;
1513  return true;
1514  }
1515  }
1516  if (all_checked) {
1517  // all molecules are gi and they don't match our gi
1519  m_TopLevels[sNormalizedSeqId] = false;
1520  return false;
1521  }
1522  }
1523  }
1524  // Here if our id or some of the assembly seqeunces are gi-less
1525  for (const auto& id : assembly_ids) {
1526  if(Match(seq_id, *id, scope)) {
1528  m_TopLevels[sNormalizedSeqId] = true;
1529  return true;
1530  }
1531  }
1532  // nothing found
1534  m_TopLevels[sNormalizedSeqId] = false;
1535 
1536  } catch (exception& e) {
1537  LOG_POST(Error << "Call to GenColl timed out when getting assembly: " << assm_acc << ", " << e.what());
1538  }
1539  return false;
1540 }
1541 
1542 
1543 string CSeqUtils::GetChrId(const string& id_str, objects::CScope& scope)
1544 {
1545  TGi gi(ZERO_GI);
1546  if(!id_str.empty() ) {
1547  CRef<objects::CSeq_id> seq_id(new objects::CSeq_id);
1548  seq_id->Set(id_str);
1549  objects::CBioseq_Handle bsh = scope.GetBioseqHandle(*seq_id);
1550  objects::CSeq_id_Handle shdl = bsh.GetAccessSeq_id_Handle();
1551  shdl = sequence::GetId(shdl, scope, sequence::eGetId_ForceGi);
1552  if (shdl) {
1553  gi = shdl.GetGi();
1554  }
1555  }
1556  return CSeqUtils::GetChrGI(gi);
1557 }
1558 
1559 
1561 {
1562  accs.clear();
1563  TEntrezIds gc_ids;
1564  GetAssmIds_GI(gc_ids, gi);
1565 
1566  if(gc_ids.empty())
1567  return;
1568 
1569  xml::document docsums;
1570  CGuiEutilsClient ecli;
1571  ecli.SetMaxReturn(kRetMax);
1572 
1573  try {
1574  ecli.Summary(kAssmDb, gc_ids, docsums);
1575  }
1576  catch (const CException& e) {
1577  LOG_POST(Error << "Failed to get summary for the following assemblies: " << CreateIdStr(gc_ids) << ". Error: " << e.GetMsg());
1578  return;
1579  }
1580 
1581  // Using "gcassembly" Entrez, you will not get the the GB assemblies
1582  // for some cases (those paired to RS assemblies) as separate entries
1583  // from an Entrez search. But if you are looking at the Entrez Docsums,
1584  // there is a field "GbUid" that tells you the release id of the
1585  // corresponding GB, and fields <Synonym>/<Genbank> and <Synonym>/<RefSeq>
1586  // will tell you the accessions of both of them. (JIRA: GCOL-1493)
1587  xml::node_set doc_sums ( docsums.get_root_node().run_xpath_query("//DocumentSummary[contains(AssemblyAccession/text(),'GCF_') and GbUid/text()!=\"\"]/Synonym/Genbank/text()") );
1589  for (itAcc = doc_sums.begin(); itAcc != doc_sums.end(); ++itAcc) {
1590  string acc(itAcc->get_content());
1591  if (acc.empty())
1592  continue;
1593  accs.insert(acc);
1594  }
1595 }
1596 
1597 template<class T1, class T2>
1598 static void s_ELinkQuery(const string &db_from, const string &db_to, const vector<T1> &uids_from, vector<T2> &uids_to, const string &cmd, const string &xpath)
1599 {
1600  xml::document xmldoc;
1601  CSeqUtils::ELinkQuery(db_from, db_to, uids_from, xmldoc, cmd);
1602 
1603  xml::node_set links ( xmldoc.get_root_node().run_xpath_query(xpath.c_str()) );
1605  for (itLink = links.begin(); itLink != links.end(); ++itLink) {
1606  string id(itLink->get_content());
1607  if (id.empty())
1608  continue;
1609  uids_to.push_back(NStr::StringToNumeric<T2>(id));
1610  }
1611 }
1612 
1613 void CSeqUtils::ELinkQuery(const string& db_from, const string& db_to, const TEntrezIds& uids_from, TEntrezIds& uids_to, const string& cmd, const string& xpath)
1614 {
1615  s_ELinkQuery(db_from, db_to, uids_from, uids_to, cmd, xpath);
1616 }
1617 
1618 void CSeqUtils::ELinkQuery(const string &db_from, const string &db_to, const TSeqIdHandles &uids_from, TEntrezIds &uids_to, const string &cmd, const string &xpath)
1619 {
1620  s_ELinkQuery(db_from, db_to, uids_from, uids_to, cmd, xpath);
1621 }
1622 
1623 
1624 template<class T>
1625 static void s_ELinkQuery(const string &db_from, const string &db_to, const vector<T> &uids_from, xml::document& linkset, const string &cmd)
1626 {
1627  CGuiEutilsClient ecli;
1628  ecli.SetMaxReturn(kRetMax);
1630 
1631  ecli.Link(db_from, db_to, uids_from, xml, cmd);
1632 
1633  stringbuf sb;
1634  xml >> &sb;
1635  string docstr(sb.str());
1636  xml::document doc(docstr.data(), docstr.size(), NULL);
1637  linkset.swap(doc);
1638 }
1639 
1640 void CSeqUtils::ELinkQuery(const string &db_from, const string &db_to, const TEntrezIds &uids_from, xml::document& linkset, const string &cmd)
1641 {
1642  s_ELinkQuery(db_from, db_to, uids_from, linkset, cmd);
1643 }
1644 
1645 void CSeqUtils::ELinkQuery(const string &db_from, const string &db_to, const TSeqIdHandles &uids_from, xml::document& linkset, const string &cmd)
1646 {
1647  s_ELinkQuery(db_from, db_to, uids_from, linkset, cmd);
1648 }
1649 
1650 
1651 template<class T>
1652 static void s_ESearchQuery(const string &db, const string &term, vector<T> &uids, size_t &count, const int ret_max, const string &xpath)
1653 {
1654  CGuiEutilsClient ecli;
1655  ecli.SetMaxReturn(ret_max);
1657 
1658  ecli.Search(db, term, xml);
1659 
1660  stringbuf sb;
1661  xml >> &sb;
1662  string docstr(sb.str());
1663 
1664  xml::document xmldoc(docstr.data(), docstr.size(), NULL);
1665  xml::node_set links(xmldoc.get_root_node().run_xpath_query(xpath.c_str()));
1667  for (itLink = links.begin(); itLink != links.end(); ++itLink) {
1668  string id(itLink->get_content());
1669  if (id.empty())
1670  continue;
1671  uids.push_back(NStr::StringToNumeric<T>(id));
1672  }
1673  string countStr = CSeqUtils::GetXmlChildNodeValue(xmldoc.get_root_node(), "Count");
1674  if (!countStr.empty())
1675  count = NStr::StringToSizet(countStr);
1676  else
1677  count = uids.size();
1678 }
1679 
1680 void CSeqUtils::ESearchQuery(const string& db, const string& term, TEntrezIds& uids, size_t& count, const int ret_max, const string& xpath)
1681 {
1682  s_ESearchQuery(db, term, uids, count, ret_max, xpath);
1683 }
1684 
1685 template<class T>
1686 static void s_ESearchQuery(const string &db, const string &term, const string &web_env, const string &query_key, vector<T> &uids, size_t &count, int retstart, const string &xpath)
1687 {
1688  xml::document xmldoc;
1689  CSeqUtils::ESearchQuery(db, term, web_env, query_key, xmldoc, retstart);
1690 
1691  xml::node_set links ( xmldoc.get_root_node().run_xpath_query(xpath.c_str()) );
1693  for (itLink = links.begin(); itLink != links.end(); ++itLink) {
1694  string id(itLink->get_content());
1695  if (id.empty())
1696  continue;
1697  uids.push_back(NStr::StringToNumeric<T>(id));
1698  }
1699  string countStr = CSeqUtils::GetXmlChildNodeValue(xmldoc.get_root_node(), "Count");
1700  if (!countStr.empty())
1701  count = NStr::StringToSizet(countStr);
1702  else
1703  count = uids.size();
1704 }
1705 
1706 void CSeqUtils::ESearchQuery(const string &db, const string &term, const string &web_env, const string &query_key, TEntrezIds &uids, size_t &count, int retstart, const string &xpath)
1707 {
1708  s_ESearchQuery(db, term, web_env, query_key, uids, count, retstart, xpath);
1709 }
1710 
1711 void CSeqUtils::ESearchQuery(const string &db, const string &term, const string &web_env, const string &query_key, xml::document &searchset, int retstart)
1712 {
1713  CGuiEutilsClient ecli;
1714  ecli.SetMaxReturn(kRetMax);
1716 
1717  ecli.SearchHistory(db, term, web_env, NStr::StringToNumeric<Int8>(query_key), retstart, xml);
1718 
1719  stringbuf sb;
1720  xml >> &sb;
1721  string docstr(sb.str());
1722 
1723  xml::document doc(docstr.data(), docstr.size(), NULL);
1724  searchset.swap(doc);
1725 }
1726 
1728 {
1729  xml::node::const_iterator itNode = parent.find(name.c_str());
1730  do {
1731  if (parent.end() == itNode)
1732  break;
1733 
1734  const char* value = itNode->get_content();
1735 
1736  if (!value)
1737  break;
1738 
1739  return string(value);
1740  }
1741  while(false);
1742  return string();
1743 }
1744 
1746 {
1747  gc_ids.clear();
1748 
1749  TEntrezIds uids_from;
1750 #ifdef NCBI_STRICT_GI
1751  uids_from.push_back(GI_TO(TEntrezId, gi));
1752 #else
1753  uids_from.push_back(gi);
1754 #endif
1755 
1756  CGuiEutilsClient ecli;
1757  ecli.SetMaxReturn(kRetMax);
1758 
1759  try {
1760  ELinkQuery(kNucDb, kAssmDb, uids_from, gc_ids);
1761  }
1762  catch (const CException& e) {
1763  LOG_POST(Error << "Failed to get assembly ids for gi: " << gi << ". Error: " << e.GetMsg());
1764  }
1765 }
1766 
1767 /// For CDS and RNA feature mapping information
1768 typedef vector< CRef<CSeq_loc> > TMappedLocs;
1769 
1771  const CSeq_loc& feat_loc,
1772  const CMappedFeat& feat,
1773  const CBioseq_Handle& handle)
1774 {
1775  TMappedLocs locs;
1776  CConstRef<CSeq_align> align_ref;
1777  CScope& scope = handle.GetScope();
1778  TSeqRange range = feat_loc.GetTotalRange();
1779 
1780  if (!feat.IsSetProduct()) return locs;
1781  const CSeq_id& product_id = *feat.GetProduct().GetId();
1782 
1783  // check if there is any alignment associated with the
1784  // product sequence
1785  SAnnotSelector sel;
1786  sel.SetAdaptiveDepth(true);
1787  sel.SetExactDepth(false);
1788  sel.SetResolveAll();
1789  sel.SetResolveDepth(1);
1790  sel.ExcludeNamedAnnots("SNP");
1791  sel.ExcludeNamedAnnots("STS");
1792  sel.ExcludeNamedAnnots("CDD");
1793  CSeq_annot_Handle annot = feat.GetAnnot();
1794  if (annot && annot.IsNamed()) {
1795  const string& annot_str = annot.GetName();
1796  sel.AddNamedAnnots(annot_str);
1797  if (NStr::StartsWith(annot_str, "NA0")) {
1798  sel.IncludeNamedAnnotAccession(annot_str);
1799  }
1800  }
1801 
1802  CConstRef<CSeq_loc> aln_loc(
1803  handle.GetRangeSeq_loc(range.GetFrom(), range.GetTo()) );
1804  CAlign_CI align_iter(scope, *aln_loc, sel);
1805 
1806  while (align_iter && !align_ref) {
1807  // find the first seq-align that matches the product sequence
1808  const CSeq_align& align = *align_iter;
1809  CSeq_align::TDim num_row = align.CheckNumRows();
1810  if (num_row != 2) continue;
1811  for (CSeq_align::TDim row = 0; row < num_row; ++row) {
1812  if (product_id.Match(align.GetSeq_id(row))) {
1813  if (align.GetSegs().IsSpliced()) {
1814  // CSeq_loc_Mapper doesn't map location correctly for spliced-seg.
1815  // This is a work-around to convert spliced-seg to denseg.
1816  align_ref = ConvertSeq_align(align, CSeq_align::TSegs::e_Denseg);
1817  } else {
1818  align_ref.Reset(&align);
1819  }
1820  break;
1821  }
1822  }
1823  ++align_iter;
1824  }
1825 
1826  CRef<CSeq_loc_Mapper> mapper;
1827  CRef<CSeq_loc_Mapper> back_mapper;
1828 
1829  // No alignment found
1830  if (align_ref) {
1831  mapper.Reset(new CSeq_loc_Mapper(*align_ref, product_id, &scope));
1832  back_mapper.Reset(new CSeq_loc_Mapper(*align_ref, *feat_loc.GetId(), &scope));
1833  } else {
1834  const CSeq_feat& mapped_feat = feat.GetMappedFeature();
1835  mapper.Reset(new CSeq_loc_Mapper(mapped_feat, CSeq_loc_Mapper::eLocationToProduct, &scope));
1836  back_mapper.Reset(new CSeq_loc_Mapper(mapped_feat, CSeq_loc_Mapper::eProductToLocation, &scope));
1837  }
1838 
1839  CRef<CSeq_loc> prod_loc = mapper->Map(feat_loc);
1840  CRef<CSeq_loc> gen_loc = back_mapper->Map(*prod_loc);
1841  locs.push_back(prod_loc);
1842  locs.push_back(gen_loc);
1843 
1844  return locs;
1845 }
1846 
1847 #undef MAP_INFO_DEBUG
1848 
1849 /// Helper function to convert two mapped locations into
1850 /// an interval-to-interval mapping structure
1852  const CSeq_loc& prod_loc,
1853  const CSeq_loc& gen_loc,
1855 {
1856  // We assume both contains exactly the same number of intervals, and
1857  // each has the same length
1858  CSeq_loc_CI it1(prod_loc);
1859  CSeq_loc_CI it2(gen_loc);
1860  CRef<CSeq_id> gen_id(new CSeq_id);
1861  gen_id->Assign(*gen_loc.GetId());
1862  CRef<CSeq_id> prod_id(new CSeq_id);
1863  prod_id->Assign(*prod_loc.GetId());
1864  while (it1 && it2) {
1865  TSeqRange r1 = it1.GetRange();
1866  TSeqRange r2 = it2.GetRange();
1867  if (it1.IsEmpty() || it2.IsEmpty() ||
1868  r1.GetLength() != r2.GetLength()) {
1869  info.clear();
1870  return;
1871  }
1872 
1873  CRef<CSeq_interval> int1;
1874  CRef<CSeq_interval> int2;
1875  int1.Reset(new CSeq_interval);
1876  int1->SetFrom(r1.GetFrom());
1877  int1->SetTo(r1.GetTo());
1878  int1->SetId(*prod_id);
1879  if (it1.IsSetStrand()) {
1880  int1->SetStrand(it1.GetStrand());
1881  }
1882  int2.Reset(new CSeq_interval);
1883  int2->SetFrom(r2.GetFrom());
1884  int2->SetTo(r2.GetTo());
1885  int2->SetId(*gen_id);
1886  if (it2.IsSetStrand()) {
1887  int2->SetStrand(it2.GetStrand());
1888  }
1889  info.emplace_back(int1, int2);
1890  ++it1; ++it2;
1891  }
1892 
1893  // Two locations don't match
1894  if (it1 || it2) {
1895  info.clear();
1896  }
1897 }
1898 
1899 
1901  const objects::CSeq_loc& feat_loc,
1902  const CMappedFeat& feat,
1903  const objects::CBioseq_Handle& handle)
1904 {
1906  TMappedLocs locs = s_GetRnaMappingLocs(feat_loc, feat, handle);
1907  if (locs.size() == 2) {
1908  s_CreateMappingInfo(*locs[0], *locs[1], info);
1909  }
1910  return info;
1911 }
1912 
1913 
1914 static
1915 void s_AdjustToAnnotatedCDS(const CMappedFeat& mapped_cds_feat, CScope& scope, CSeqUtils::TMappingInfo& cds_map_info)
1916 {
1917  // At this point the mapping is generated via mRNA segments
1918  // This does not account for CDS ribosomal shifts
1919  // If now we remap genomic location via CDS feature
1920  // to product and back to location
1921  // the mapped genomic location will be split in two if there is a ribosomal slippage
1922  // in this case we split the mapping in two as well
1923 
1924 
1925  CSeq_loc_Mapper map2prod(mapped_cds_feat.GetMappedFeature(), CSeq_loc_Mapper::eLocationToProduct, &scope);
1926  CSeq_loc_Mapper map2loc(mapped_cds_feat.GetMappedFeature(), CSeq_loc_Mapper::eProductToLocation, &scope);
1927 
1928 #ifdef MAP_INFO_DEBUG
1929  cout << "\n===== Original Map ==========\n";
1930  for (const auto& it : cds_map_info) {
1931  auto& gen_int = get<1>(it);
1932  auto& prod_int = get<0>(it);
1933  cout << gen_int->GetFrom() << ".." << gen_int->GetTo() << "->" << prod_int->GetFrom() << ".." << prod_int->GetTo() << endl;
1934  }
1935  cout << "\n========================\n";
1936 #endif
1937 
1938  // we have to keep product coordinaites consecuitive
1939  // so we use curr_prod_pos to keep track of the current prod coordinate
1940  // but product might be annotated with gaps
1941  // last_prod_pos keeps track of teh last annotated pos to account for these gaps
1942  int curr_prod_pos = -1;
1943  int last_prod_pos = -1;
1944 
1945  auto cds_map_it = cds_map_info.begin();
1946  while (cds_map_it != cds_map_info.end()) {
1947  auto& gen_int = cds_map_it->second;
1948  // remove odd mappings with length < 3
1949  if ((gen_int->GetTo() - gen_int->GetFrom()) + 1 < 3) {
1950  cds_map_it = cds_map_info.erase(cds_map_it);
1951  continue;
1952  }
1953 
1954  auto prod_loc = Ref(new CSeq_loc(gen_int->SetId(), gen_int->GetFrom(), gen_int->GetTo(), gen_int->GetStrand()));
1955  auto mapped_prod_loc = map2prod.Map(*prod_loc);
1956  auto mapped_gen_loc = map2loc.Map(*mapped_prod_loc);
1957 
1958 #ifdef MAP_INFO_DEBUG
1959 
1960  cout << MSerial_AsnText << *prod_int << endl;
1961  cout << MSerial_AsnText << *loc << "\nMapped to\n";
1962  cout << MSerial_AsnText << *mapped_loc << "\n";
1963  cout << "Remapped to\n";
1964  cout << MSerial_AsnText << *mapped_loc2 << endl;
1965 #endif
1966  CSeq_loc_CI lit(*mapped_gen_loc);
1967  auto sz = lit.GetSize();
1968  list<TSeqRange> rngs;
1969  for (size_t i = 0; i < sz; ++i, ++lit) {
1970  const auto& r = lit.GetRange();
1971  // remove the first and last mapped segments of length < 3
1972  // as these are codon carryovers from prev and next segments
1973  if (i == 0 && r.GetLength() < 3) {
1974  continue;
1975  }
1976  if (i == sz - 1 && r.GetLength() < 3)
1977  break;
1978  rngs.push_back(r);
1979  }
1980  if (!rngs.empty()) {
1981  bool reverse = gen_int->CanGetStrand() && gen_int->GetStrand() == eNa_strand_minus;
1982 
1983  auto& prod_int = cds_map_it->first;
1984  auto r_it = rngs.begin();
1985 
1986  if (reverse) {
1987  gen_int->SetFrom(max<int>(r_it->GetFrom(), gen_int->GetFrom()));
1988  gen_int->SetTo(gen_int->GetTo());
1989  }
1990  else {
1991  gen_int->SetFrom(gen_int->GetFrom());
1992  gen_int->SetTo(min<int>(r_it->GetTo(), gen_int->GetTo()));
1993  }
1994  if (curr_prod_pos == -1) {
1995  curr_prod_pos = prod_int->SetFrom();
1996  }
1997  else if (last_prod_pos != -1) {
1998  if (prod_int->SetFrom() - last_prod_pos > 0) {
1999  curr_prod_pos += (prod_int->SetFrom() - last_prod_pos);
2000  }
2001  }
2002  last_prod_pos = prod_int->GetTo() + 1;
2003  prod_int->SetFrom(curr_prod_pos);
2004  curr_prod_pos += (gen_int->GetTo() - gen_int->GetFrom());
2005  prod_int->SetTo(curr_prod_pos);
2006  ++curr_prod_pos;
2007  ++r_it;
2008  while (r_it != rngs.end()) {
2009  // the original genomic location was split in two after mapping
2010  // the previos segment's to is already adjusted
2011  // so we insert a new mapiing here
2012  auto from = curr_prod_pos;
2013  curr_prod_pos += r_it->GetLength() - 1;
2014  auto int1 = Ref(new CSeq_interval(prod_int->SetId(), from, curr_prod_pos, eNa_strand_plus));
2015  auto int2 = Ref(new CSeq_interval(gen_int->SetId(), r_it->GetFrom(), r_it->GetTo(), reverse ? eNa_strand_minus : eNa_strand_plus));
2016  cds_map_it = cds_map_info.emplace(++cds_map_it, int1, int2);
2017  ++curr_prod_pos;
2018  ++r_it;
2019  }
2020  }
2021  ++cds_map_it;
2022  }
2023 
2024 #ifdef MAP_INFO_DEBUG
2025 
2026  cout << "\n=====Final Map ==========\n";
2027  for (const auto& it : cds_map_info) {
2028  auto& gen_int = get<1>(it);
2029  auto& prod_int = get<0>(it);
2030  cout << gen_int->GetFrom() << ".." << gen_int->GetTo() << "->" << prod_int->GetFrom() << ".." << prod_int->GetTo() << endl;
2031  }
2032  cout << "\n========================\n";
2033 #endif
2034 }
2035 
2037  const TMappingInfo& mapping_info,
2038  const CSeq_feat& rna_feat,
2039  const CMappedFeat& mapped_cds_feat,
2040  const CSeq_loc& feat_loc,
2041  CScope& scope,
2042  const int feat_offset)
2043 {
2044  const CSeq_id& product_id = *mapped_cds_feat.GetProduct().GetId();
2045  const CSeq_id& rna_product_id = *rna_feat.GetProduct().GetId();
2046  /// The start offset between CDS product sequence
2047  /// and its parent product sequence
2048  int cds_offset = -1;
2049  {
2050  // try if the parent RNA product sequence contain a
2051  // CDS feature with the same product sequence as 'product_id'
2052  CBioseq_Handle rna_bsh = scope.GetBioseqHandle(rna_product_id);
2053  if (rna_bsh) {
2054  SAnnotSelector sel;
2055  sel.SetAdaptiveDepth(true);
2056  sel.SetExactDepth(false);
2057  sel.SetResolveAll();
2058  sel.SetResolveDepth(1);
2059  sel.ExcludeNamedAnnots("SNP");
2060  sel.ExcludeNamedAnnots("STS");
2062  CFeat_CI feat_iter(rna_bsh, TSeqRange::GetWhole(), sel);
2063  for (; feat_iter; ++feat_iter) {
2064  const CSeq_feat* cds_feat = &feat_iter->GetMappedFeature();
2065  if (cds_feat->CanGetProduct() &&
2066  product_id.Match(*cds_feat->GetProduct().GetId())) {
2067  cds_offset = (int)cds_feat->GetLocation().GetTotalRange().GetFrom();
2068  break;
2069  }
2070  }
2071  }
2072  }
2073 
2074  CRef<CSeq_id> gen_id(new CSeq_id);
2075  gen_id->Assign(*feat_loc.GetId());
2076  CRef<CSeq_id> prod_id(new CSeq_id);
2077  prod_id->Assign(product_id);
2078 
2079  /// CDS biological range on the genomic sequence
2080  TSeqPos bio_start = feat_loc.GetStart(eExtreme_Biological);
2081  TSeqPos bio_stop = feat_loc.GetStop(eExtreme_Biological);
2082 
2083  /// truncate the rna mapping info using the CDS
2084  /// feature biological start and stop, and create
2085  /// the mapping info for the CDS feature by applying
2086  /// the cds-to-rna shift.
2087  TMappingInfo cds_map_info;
2088  TMappingInfo::const_iterator iter = mapping_info.begin();
2089  bool done = false;
2090  while (iter != mapping_info.end() && !done) {
2091  auto& gen_int = iter->second;
2092  auto& prod_int = iter->first;
2093 
2094  TSeqPos gen_from = gen_int->GetFrom();
2095  TSeqPos gen_to = gen_int->GetTo();
2096  TSeqPos prod_from = prod_int->GetFrom();
2097  TSeqPos prod_to = prod_int->GetTo();
2098  bool reverse = gen_int->CanGetStrand() && gen_int->GetStrand() == eNa_strand_minus;
2099 
2100  TSeqRange r1, r2;
2101  if (cds_map_info.empty()) {
2102  if (bio_start >= gen_from && bio_start <= gen_to) {
2103  // find the first interval intersecting with CDS location
2104  TSeqPos off1 = reverse ? gen_to - bio_start : bio_start - gen_from;
2105  r1.Set(prod_from + off1, prod_to);
2106  if (reverse) {
2107  r2.Set(gen_from, gen_to - off1);
2108  } else {
2109  r2.Set(gen_from + off1, gen_to);
2110  }
2111  if (cds_offset < 0 || (TSeqPos)cds_offset > r1.GetFrom()) {
2112  cds_offset = int(r1.GetFrom());
2113  }
2114  } // else, it is outside of the CDS range, skip it
2115  } else {
2116  r1.Set(prod_from, prod_to);
2117  r2.Set(gen_from, gen_to);
2118  }
2119 
2120  // check if it intersects with the biological stop position
2121  if (!r1.Empty() && !r2.Empty()) {
2122  if (bio_stop >= gen_from && bio_stop <= gen_to) {
2123  // find the last interval intersection with CDS location
2124  TSeqPos off2 = reverse ? bio_stop - gen_from : gen_to - bio_stop;
2125  r1.SetTo(r1.GetTo() - off2);
2126  if (reverse) r2.SetFrom(r2.GetFrom() + off2);
2127  else r2.SetTo(r2.GetTo() - off2);
2128 
2129  // set termination flag
2130  done = true;
2131  }
2132 
2134  int1->SetFrom(r1.GetFrom() - cds_offset);
2135  int1->SetTo(r1.GetTo() - cds_offset);
2136  int1->SetId(*prod_id);
2137  int1->SetStrand(eNa_strand_plus);
2138 
2140  int2->SetFrom(r2.GetFrom());
2141  int2->SetTo(r2.GetTo());
2142  int2->SetId(*gen_id);
2143  int2->SetStrand(reverse ? eNa_strand_minus : eNa_strand_plus);
2144 
2145  // create intervals and push them to the info map
2146  cds_map_info.emplace_back(int1, int2);
2147  }
2148 
2149  ++iter;
2150  }
2151 
2152  // Final step to determine if the mapping info is necessary for
2153  // the given cds feature.
2154  if ( !cds_map_info.empty() ) {
2155  // Create the mapped seq-loc on genomic and product sequence
2156  CRef<CSeq_loc> gen_loc(new CSeq_loc);
2157  CRef<CSeq_loc> prod_loc(new CSeq_loc);
2158  ITERATE (TMappingInfo, iter, cds_map_info) {
2159  prod_loc->SetPacked_int().Set().push_back(iter->first);
2160  gen_loc->SetPacked_int().Set().push_back(iter->second);
2161  }
2163  if (prod_loc->GetStart(eExtreme_Biological) == 0 &&
2164  prod_loc->GetPacked_int().Get().size() == 1 &&
2165  gen_loc->CompareSubLoc(feat_loc, eNa_strand_plus) == 0) {
2166  // All of above confidtions are met. the mapping info
2167  // is not necessary
2168  cds_map_info.clear();
2169  }
2170  }
2171 
2172  if (mapped_cds_feat.GetMappedFeature().IsSetExcept()) {
2173  // At this point the mapping is generated via mRNA segments
2174  // This does not account for CDS ribosomal shifts
2175  // If now we remap genomic location via CDS feature
2176  // to product and back to location
2177  // the mapped genomic location will be split in two if there is a ribosomal slippage
2178  // in this case we split the mapping in two as well
2179  s_AdjustToAnnotatedCDS(mapped_cds_feat, scope, cds_map_info);
2180  }
2181  if (!cds_map_info.empty() && (0 != feat_offset)) {
2182  if (feat_offset > 0) {
2183  auto& prod_int = cds_map_info.begin()->first;
2184  prod_int->SetFrom(prod_int->GetFrom() + feat_offset);
2185  auto& gen_int = cds_map_info.begin()->second;
2186  gen_int->SetFrom(gen_int->GetFrom() + feat_offset);
2187  }
2188  else {
2189  auto& prod_int = cds_map_info.begin()->first;
2190  prod_int->SetFrom(prod_int->GetFrom() - feat_offset);
2191  auto& gen_int = cds_map_info.begin()->second;
2192  gen_int->SetTo(gen_int->GetTo() + feat_offset);
2193  }
2194  }
2195  return cds_map_info;
2196 }
2197 
2198 
2200  const objects::CSeq_loc &feat_loc,
2201  const objects::CSeq_id &product_id,
2202  const int feat_offset
2203  )
2204 {
2206  CRef<CSeq_id> gen_id(new CSeq_id);
2207  gen_id->Assign(*feat_loc.GetId());
2208  CRef<CSeq_id> prod_id(new CSeq_id);
2209  prod_id->Assign(product_id);
2210  TSeqPos start = 0;
2211  CSeq_loc_CI it(feat_loc);
2212  while (it) {
2213  TSeqRange r = it.GetRange();
2214  auto int1 = Ref(new CSeq_interval(*prod_id, start, start + r.GetLength() - 1, eNa_strand_plus));
2215  start += r.GetLength();
2216  auto int2 = Ref(new CSeq_interval(*gen_id, r.GetFrom(), r.GetTo(), it.IsSetStrand() ?
2217  it.GetStrand() : eNa_strand_plus));
2218  info.emplace_back(int1, int2);
2219  ++it;
2220  }
2221  if (!info.empty() && (0 != feat_offset)) {
2222  if (feat_offset > 0) {
2223  auto& prod_int = info.begin()->first;
2224  prod_int->SetFrom(prod_int->GetFrom() + feat_offset);
2225  auto& gen_int = info.begin()->second;
2226  gen_int->SetFrom(gen_int->GetFrom() + feat_offset);
2227  }
2228  else {
2229  auto& prod_int = info.begin()->first;
2230  prod_int->SetFrom(prod_int->GetFrom() - feat_offset);
2231  auto& gen_int = info.begin()->second;
2232  gen_int->SetTo(gen_int->GetTo() + feat_offset);
2233  }
2234  }
2235  return info;
2236 }
2237 
2238 
2240 {
2241  const CSeq_feat_Base::TLocation& loc_obj = feat.GetLocation();
2242  CConstRef<CSeq_loc> loc;
2243  /// guard against the cases where the feature contains location
2244  /// with multiple seq-ids
2245  if ( !loc_obj.GetId() ) {
2246  loc = CSeqUtils::MixLocToLoc(loc_obj, bsh);
2247  }
2248  if ( !loc ) {
2249  loc.Reset(&loc_obj);
2250  }
2251  return loc;
2252 }
2253 
2254 CMappedFeat CSeqUtils::GetMrnaForCds(const CMappedFeat &cds_feat, const string &named_acc)
2255 {
2256  try {
2257  if (!named_acc.empty()) {
2258  SAnnotSelector sel;
2259  sel.SetAdaptiveDepth(true);
2260  sel.SetExactDepth(false);
2261  sel.SetResolveAll();
2262  sel.SetResolveDepth(1);
2263  sel.ExcludeNamedAnnots("SNP");
2264  sel.ExcludeNamedAnnots("STS");
2266  sel.ExcludeUnnamedAnnots();
2267  sel.IncludeNamedAnnotAccession(named_acc);
2268  return feature::GetBestMrnaForCds(cds_feat, nullptr, &sel);
2269  }
2270  else {
2271  return feature::GetBestMrnaForCds(cds_feat);
2272  }
2273  }
2274  catch(const CException&)
2275  {
2276  }
2277 
2278  return CMappedFeat();
2279 }
2280 
2281 void CSeqUtils::GetMappingInfo(const CMappedFeat &mapped_feat, const CBioseq_Handle& bsh, CSeqUtils::TMappingInfo &info, const string &annot)
2282 {
2283  const CSeq_feat& feat = mapped_feat.GetMappedFeature();
2285  CSeqFeatData::ESubtype subtype = feat.GetData().GetSubtype();
2287 
2288  auto GetCDSFrame = [&]() {
2289  if (feat.GetData().Which() != CSeqFeatData::e_Cdregion)
2290  return 0;
2291  if (feat.GetData().GetCdregion().IsSetFrame() && feat.GetData().GetCdregion().GetFrame() > 1) {
2292  int offset = feat.GetData().GetCdregion().GetFrame() - 1;
2293  const CSeq_loc& orig_loc = mapped_feat.GetLocation();
2294  ENa_strand q_strand = sequence::GetStrand(orig_loc, &bsh.GetScope());
2295  return (q_strand == eNa_strand_minus) ? -offset : offset;
2296  }
2297  return 0;
2298  };
2299 
2300  if (feat.IsSetProduct()) {
2301  if (feat.GetData().IsRna()) {
2302  info = CSeqUtils::GetRnaMappingInfo(*loc, mapped_feat, bsh);
2303  }
2304  else if (type == CSeqFeatData::e_Cdregion) {
2305  int offset = GetCDSFrame();
2306  do {
2307  const CSeq_loc& product = feat.GetProduct();
2308  CMappedFeat mapped_mrna = CSeqUtils::GetMrnaForCds(mapped_feat, annot);
2309  if (!mapped_mrna || !mapped_mrna.GetOriginalFeature().IsSetProduct()) {
2311  break;
2312  }
2313  CConstRef<CSeq_loc> mrna_loc = CSeqUtils::GetFeatLocation(mapped_mrna.GetMappedFeature(), bsh);
2314  CSeqUtils::TMappingInfo mrna_info = CSeqUtils::GetRnaMappingInfo(*mrna_loc, mapped_mrna, bsh);
2315  if (mrna_info.empty())
2316  break;
2318  mapped_mrna.GetMappedFeature(), mapped_feat, *loc, bsh.GetScope(), offset);
2319  } while (false);
2320  }
2321  }
2322  else {
2323  if ((type == CSeqFeatData::e_Cdregion) || (subtype == CSeqFeatData::eSubtype_V_segment) || feat.GetData().IsRna()) {
2324  // We assume both contains exactly the same number of intervals, and
2325  // each has the same length
2326  static unsigned id_num{ 0 };
2327  int offset = GetCDSFrame();
2328  CRef<CSeq_id> prod_id(new CSeq_id("lcl|pseudo" + NStr::IntToString(++id_num)));
2330  }
2331  }
2332 }
2333 
2334 
2335 bool CSeqUtils::GetGIString(const string& sid, string* gi_str)
2336 {
2337  bool match = false;
2338 
2339  if (!gi_str) return match;
2340  gi_str->clear();
2341 
2342  SIZE_TYPE pos = NStr::FindNoCase(sid, "gi|");
2343  if (pos != NPOS) {
2344  match = true;
2345  *gi_str = "gi|";
2346  for (size_t i = pos + 3; i < sid.length(); ++i) {
2347  char c = sid.at(i);
2348  if (isdigit(c)) {
2349  gi_str->append(1, c);
2350  } else {
2351  break;
2352  }
2353  }
2354  }
2355  return match;
2356 }
2357 
2358 int CSeqUtils::GetGenCode(const objects::CBioseq_Handle& handle)
2359 {
2360  // get an appropriate translation table. For the full list of tables,
2361  // please refer to https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi
2362  int gencode = 1;
2363  try {
2364  CSeqdesc_CI desc_it(handle, CSeqdesc::e_Source);
2365  if (desc_it) {
2366  const CBioSource& src = desc_it->GetSource();
2367  gencode = src.GetGenCode();
2368  }
2369  } catch (CException&) {
2370  // ignore it, will try other approach
2371  }
2372  return gencode;
2373 }
2374 
2375 // encode "shown" flag inside an annot
2376 void CSeqUtils::SetAnnotShown(objects::CSeq_annot& annot, bool isShown)
2377 {
2378  CRef<CUser_object> shown_desc(new CUser_object);
2379  shown_desc->SetType().SetStr("x-sv-track-settings");
2380  shown_desc->AddField("show-track", isShown);
2381  annot.AddUserObject(*shown_desc);
2382 }
2383 
2384 // get "shown" flag from an annot
2385 bool CSeqUtils::GetAnnotShown(const objects::CSeq_annot& annot)
2386 {
2387  CConstRef<CUser_field> show_field(GetAnnotUserField(annot, "x-sv-track-settings", "show-track"));
2388  if(!show_field.IsNull() && show_field->GetData().IsBool() && show_field->GetBool() == false) {
2389  return false;
2390  }
2391  return true;
2392 }
2393 
2394 CRef<objects::CSeq_id> CSeqUtils::MapStringId(const string& str, objects::IIdMapper *mapper)
2395 {
2396  CRef<CSeq_id> id;
2397  try {
2398  id = new CSeq_id(str);
2399  }
2400  catch (const CException&) {
2401  }
2402  if (!id && str.find('|') != NPOS) {
2403  try {
2404  CBioseq::TId ids;
2405  CSeq_id::ParseIDs(ids, str);
2406  if (!ids.empty()) {
2407  id = *ids.begin();
2408  }
2409  }
2410  catch (const CException&) {
2411  }
2412  }
2413  if (!id || (id->IsGi() && id->GetGi() < GI_CONST(1000))) {
2414  id = new CSeq_id(CSeq_id::e_Local, str);
2415  }
2416  if (mapper) {
2417  try {
2418  mapper->MapObject(*id);
2419  }
2420  catch (const CException&) {
2421  }
2422  }
2423  return id;
2424 }
2425 
2427 {
2428  CConstRef<CBioseq> bioseq;
2429 
2430  if (!scope) {
2431  return bioseq;
2432  }
2433 
2434  CScope::TTSE_Handles tses;
2435  scope->GetAllTSEs(tses, CScope::eAllTSEs);
2436  ITERATE (CScope::TTSE_Handles, handle, tses) {
2437  for (CBioseq_CI bioseq_it(*handle); bioseq_it; ++bioseq_it) {
2438  // Is Seqdesc on this Bioseq?
2439  if (bioseq_it->IsSetDescr()) {
2440  CConstRef<CBioseq> r_bioseq = bioseq_it->GetCompleteBioseq();
2441  ITERATE (CBioseq::TDescr::Tdata, dit, r_bioseq->GetDescr().Get()) {
2442  if (dit->GetPointer() == &seq_desc) {
2443  return r_bioseq;
2444  }
2445  }
2446  }
2447  }
2448  }
2449  return bioseq;
2450 }
2451 
2452 
2454 {
2455  CSeq_entry_Handle seh;
2456 
2457  if (!scope) {
2458  return seh;
2459  }
2460 
2461  CScope::TTSE_Handles tses;
2462  scope->GetAllTSEs(tses, CScope::eAllTSEs);
2463  ITERATE (CScope::TTSE_Handles, handle, tses) {
2464  for (CSeq_entry_CI entry_ci(*handle, CSeq_entry_CI::fRecursive | CSeq_entry_CI::fIncludeGivenEntry); entry_ci; ++entry_ci) {
2465  if (entry_ci->IsSetDescr()) {
2466  ITERATE (CBioseq::TDescr::Tdata, dit, entry_ci->GetDescr().Get()) {
2467  if ((*dit)->IsPub()) {
2468  const CPubdesc& desc_pub = (*dit)->GetPub();
2469  if (&desc_pub == &pubdesc) {
2470  return *entry_ci;
2471  }
2472  }
2473  }
2474  }
2475  if (entry_ci->IsSeq()) {
2476  for (CFeat_CI fi(entry_ci->GetSeq(), SAnnotSelector(CSeqFeatData::e_Pub)); fi; ++fi) {
2477  if (&(fi->GetData().GetPub()) == &pubdesc) {
2478  return *entry_ci;
2479  }
2480  }
2481  }
2482  }
2483  }
2484  return seh;
2485 }
2486 
2487 
2488 
2490 {
2491  CSeq_entry_Handle seh;
2492 
2493  CScope::TTSE_Handles handles;
2494  scope.GetAllTSEs(handles);
2495  if (handles.size() > 0) {
2496  seh = handles.front().GetTopLevelEntry();
2497  }
2498  return seh;
2499 }
2500 
2501 
2503 {
2504  CScope::TTSE_Handles tse_list;
2505  scope.GetAllTSEs(tse_list);
2506  ITERATE(CScope::TTSE_Handles, tse, tse_list) {
2507  CFeat_CI f(*tse);
2508  while (f) {
2509  if (f->GetSeq_feat() == &feat) {
2510  return *f;
2511  }
2512  ++f;
2513  }
2514  }
2515  CSeq_feat_Handle fh;
2516  return fh;
2517 }
2518 
2519 
2521 {
2522  CBioseq_Handle bsh;
2523  if (f.IsSetLocation()) {
2524  CSeq_loc_CI subloc(f.GetLocation());
2525  if (subloc && !subloc.IsEmpty()) {
2526  bsh = scope.GetBioseqHandle(subloc.GetSeq_id());
2527  }
2528  }
2529  if (!bsh) {
2531  if (fh) {
2533  if (seh.IsSeq()) {
2534  bsh = seh.GetSeq();
2535  }
2536  }
2537  }
2538  return bsh;
2539 }
2540 
2541 
2543 {
2544  const CObject* ptr = obj.object.GetPointer();
2545 
2546  /// CSeq_entry
2547  const objects::CSeq_entry* seqEntry = dynamic_cast<const objects::CSeq_entry*>(ptr);
2548  const objects::CBioseq* bioseq = dynamic_cast<const objects::CBioseq*>(ptr);
2549  const objects::CBioseq_set* bioseq_set = dynamic_cast<const objects::CBioseq_set*>(ptr);
2550  const objects::CSeq_annot* seqannot = dynamic_cast<const objects::CSeq_annot*>(ptr);
2551  const objects::CSeq_feat* seqfeat = dynamic_cast<const objects::CSeq_feat*>(ptr);
2552  const objects::CSeqdesc* seqdesc = dynamic_cast<const objects::CSeqdesc*>(ptr);
2553  const objects::CSeq_submit* seqsubmit = dynamic_cast<const objects::CSeq_submit*>(ptr);
2554  const objects::CPubdesc* pubdesc = dynamic_cast<const objects::CPubdesc*>(ptr);
2555  const objects::CSeq_loc* loc = dynamic_cast<const objects::CSeq_loc*>(ptr);
2556  const objects::CSeq_id* seq_id = dynamic_cast<const objects::CSeq_id*>(ptr);
2557 
2558  objects::CSeq_entry_Handle seh;
2559  if (!obj.scope)
2560  return seh;
2561 
2562  if (seqEntry) {
2563  seh = obj.scope->GetObjectHandle (*seqEntry, CScope::eMissing_Null);
2564  if (seh)
2565  seh = seh.GetTopLevelEntry();
2566  } else if(bioseq) {
2567  CBioseq_Handle bsh = obj.scope->GetObjectHandle (*bioseq, CScope::eMissing_Null);
2568  if (bsh)
2569  seh = bsh.GetTopLevelEntry();
2570  } else if(bioseq_set) {
2571  CBioseq_set_Handle bssh = obj.scope->GetObjectHandle(*bioseq_set, CScope::eMissing_Null);
2572  if (bssh) {
2573  seh = bssh.GetTopLevelEntry();
2574  }
2575  } else if(seqannot) {
2576  auto sah = obj.scope->GetObjectHandle (*seqannot, CScope::eMissing_Null);
2577  if (sah)
2578  seh = sah.GetTopLevelEntry();
2579  } else if(seqfeat) {
2580  CBioseq_Handle bsh = GetBioseqForSeqFeat(*seqfeat, *(obj.scope));
2581  if (bsh) {
2582  seh = bsh.GetTopLevelEntry();
2583  }
2584  } else if (seqdesc) {
2585  seh = edit::GetSeqEntryForSeqdesc(obj.scope, *seqdesc);
2586  if (seh) {
2587  seh = seh.GetTopLevelEntry();
2588  } else {
2589  seh = GetDefaultTopLevelSeqEntry(*obj.scope);
2590  }
2591  } else if (pubdesc) {
2592  seh = GetSeqEntryForPubdesc(obj.scope, *pubdesc);
2593  if (seh) {
2594  seh = seh.GetTopLevelEntry(); // GB-3727
2595  } else {
2596  seh = GetDefaultTopLevelSeqEntry(*obj.scope);
2597  }
2598  } else if (seqsubmit) {
2599  if (seqsubmit->IsEntrys() && seqsubmit->GetData().GetEntrys().front()) {
2600  seh = obj.scope->GetSeq_entryHandle(*(seqsubmit->GetData().GetEntrys().front()), CScope::eMissing_Null);
2601  }
2602  } else if (loc) {
2603  CBioseq_Handle bsh = obj.scope->GetBioseqHandle(*loc);
2604  if (bsh) {
2605  seh = bsh.GetTopLevelEntry();
2606  }
2607  } else if (seq_id) {
2608  CBioseq_Handle bsh = obj.scope->GetBioseqHandle(*seq_id);
2609  if (bsh) {
2610  seh = bsh.GetTopLevelEntry();
2611  }
2612  }
2613  return seh;
2614 }
2615 
2616 
2618  const CFeatListItem * p1,
2619  const CFeatListItem * p2
2620 )
2621 
2622 {
2623  string str1 = p1->GetDescription();
2624  string str2 = p2->GetDescription();
2625 
2626  char ch1 = str1.c_str()[0];
2627  char ch2 = str2.c_str()[0];
2628  // starts with a number -> goes at the end of the list
2629  bool num1 = isdigit(ch1);
2630  bool num2 = isdigit(ch2);
2631  if (num1 && num2) {
2632  return NStr::Compare(str1, str2, NStr::eNocase) < 0;
2633  } else if (num1) {
2634  return false;
2635  } else if (num2) {
2636  return true;
2637  }
2638 
2639  // starts with a tilde or dash - sort with other tildes,
2640  // put before numbers after alphas
2641  if (ch1 == '~' && ch2 == '~') {
2642  return NStr::Compare(str1, str2, NStr::eNocase) < 0;
2643  } else if (ch1 == '~') {
2644  return false;
2645  } else if (ch2 == '~') {
2646  return true;
2647  }
2648  if (ch1 == '-' && ch2 == '-') {
2649  return NStr::Compare(str1, str2, NStr::eNocase) < 0;
2650  } else if (ch1 == '-') {
2651  return false;
2652  } else if (ch2 == '-') {
2653  return true;
2654  }
2655 
2656  return NStr::Compare(p1->GetDescription(), p2->GetDescription(), NStr::eNocase) < 0;
2657 }
2658 
2659 
2660 vector<const CFeatListItem * > GetSortedFeatList(CSeq_entry_Handle seh, size_t max)
2661 {
2662  vector<const CFeatListItem * > r_list;
2663 
2664  vector<bool> present(CSeqFeatData::eSubtype_max, false);
2665  size_t count = 0;
2666  if (seh) {
2667  CFeat_CI fi(seh);
2668  while (fi) {
2669  present[fi->GetData().GetSubtype()] = true;
2670  ++fi;
2671  ++count;
2672  if (count > max)
2673  break;
2674  }
2675  }
2676 
2677  set<string> existing;
2678 
2679  vector<const CFeatListItem * > used;
2680  vector<const CFeatListItem * > popular;
2681  vector<const CFeatListItem * > import_feats;
2682  vector<const CFeatListItem * > least_liked;
2683  vector<const CFeatListItem * > unused;
2684  const CFeatListItem * all = NULL;
2685 
2686  const CFeatList* feat_list = CSeqFeatData::GetFeatList();
2687  ITERATE(CFeatList, ft_it, *feat_list) {
2688  const CFeatListItem * f = &(*ft_it);
2689  int subtype = f->GetSubtype();
2690  if (subtype == CSeqFeatData::eSubtype_any && f->GetType() == 0) {
2691  all = f;
2692  } else if (subtype != CSeqFeatData::eSubtype_bad
2693  && subtype != CSeqFeatData::eSubtype_any
2694  && subtype != CSeqFeatData::eSubtype_Imp_CDS
2695  && subtype != CSeqFeatData::eSubtype_source
2696  && subtype != CSeqFeatData::eSubtype_org) {
2697  string desc = f->GetDescription();
2698  if (existing.find(desc) == existing.end())
2699  {
2700  existing.insert(desc);
2701 
2702  if (!present[subtype]) {
2703  unused.push_back(f);
2704  } else {
2705  switch (subtype) {
2711  import_feats.push_back(f);
2712  break;
2714  popular.push_back(f);
2715  break;
2723  popular.push_back(f);
2724  break;
2726  least_liked.push_back(f);
2727  break;
2728  default:
2729  used.push_back(f);
2730  break;
2731  }
2732  }
2733  }
2734  }
2735  }
2736 
2737  sort(popular.begin(), popular.end(),s_CompareDescriptions);
2738  sort(used.begin(), used.end(), s_CompareDescriptions);
2739  sort(import_feats.begin(), import_feats.end(), s_CompareDescriptions);
2740  sort(unused.begin(), unused.end(), s_CompareDescriptions);
2741 
2742  r_list.insert(r_list.begin(), popular.begin(), popular.end());
2743  r_list.insert(r_list.end(), used.begin(), used.end());
2744  r_list.insert(r_list.end(), least_liked.begin(), least_liked.end());
2745  r_list.insert(r_list.end(), import_feats.begin(), import_feats.end());
2746  r_list.insert(r_list.end(), unused.begin(), unused.end());
2747 
2748  if (all) {
2749  r_list.insert(r_list.begin(), all);
2750  }
2751 
2752  return r_list;
2753 }
2754 
2755 void CSeqUtils::ParseRanges(const string& r_str, CSeqUtils::TRanges& ranges)
2756 {
2757  vector<string> range_pairs;
2758 
2759  NStr::Split(r_str, ",", range_pairs);
2760  ITERATE (vector<string>, iter, range_pairs) {
2761  vector<string> pos;
2762  NStr::Split(*iter, "-", pos);
2763  if (pos.size() != 2) continue;
2764  string f_str = NStr::TruncateSpaces(pos[0]);
2765  string t_str = NStr::TruncateSpaces(pos[1]);
2766  if ( !f_str.empty() && !t_str.empty() ) {
2767  try {
2768  TSeqPos from = NStr::StringToUInt(f_str);
2769  TSeqPos to = NStr::StringToUInt(t_str);
2770 
2771  //!! verify end of range inclusion!
2772  ranges.push_back(TSeqRange(from, to));
2773  } catch (CException&) {
2774  LOG_POST(Error << "Invalid range: " << *iter);
2775  }
2776  }
2777  }
2778 }
2779 
2780 
2781 /// --------------------------------
2782 /// CGencollIdMapperAdapter
2783 /// --------------------------------
2784 
2786  : m_Mapper(mapper)
2787  , m_Spec(spec) {
2788  if (!m_Mapper)
2789  NCBI_THROW(CException, eInvalid, "IIdMapperAdapter: Invalid initialization");
2790 }
2791 
2792 objects::CSeq_id_Handle CGencollIdMapperAdapter::Map(const objects::CSeq_id_Handle& idh) {
2793  auto id = Ref(new CSeq_id);
2794  id->Assign(*idh.GetSeqId());
2795 
2796  auto loc = Ref(new CSeq_loc);
2797  loc->SetWhole(*id);
2798  auto mapped_loc = Map(*loc);
2799 
2800  CSeq_id_Handle out_sih;
2801  if (mapped_loc && !mapped_loc->IsNull() && !mapped_loc->IsEmpty() && mapped_loc->GetId())
2802  out_sih = CSeq_id_Handle::GetHandle(*mapped_loc->GetId());
2803  return out_sih;
2804 }
2805 
2807  return m_Mapper->Map(loc, m_Spec);
2808 }
2809 
2811  set< CRef<CSeq_id> > ids;
2812  CTypeIterator<CSeq_id> idit(obj);
2813  for (; idit; ++idit) {
2814  CSeq_id& id = *idit;
2815  if (ids.emplace(&id).second == false)
2816  continue;
2817  auto loc = Ref(new CSeq_loc);
2818  loc->SetWhole(id);
2819  auto mapped_loc = Map(*loc);
2820  if (!mapped_loc || !mapped_loc->GetId())
2821  continue;
2822  id.Assign(*mapped_loc->GetId());
2823  }
2824 }
2825 
2827 {
2828  CGencollIdMapper::SIdSpec MapSpec;
2829  MapSpec.TypedChoice = assm->IsRefSeq() ?
2831  MapSpec.Alias = CGC_SeqIdAlias::e_Public;
2832  MapSpec.Role = eGC_SequenceRole_top_level;
2833  CRef<CGencollIdMapper> mapper(new CGencollIdMapper(assm));
2834  return new CGencollIdMapperAdapter(mapper, MapSpec);
2835 }
2836 
2837 
2838 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Biological
5' and 3'
Definition: Na_strand.hpp:62
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
CRef< CSeq_align > ConvertSeq_align(const CSeq_align &src, CSeq_align::TSegs::E_Choice dst_choice, CSeq_align::TDim anchor_row=-1, CScope *scope=NULL)
Convert source alignment to a new type.
USING_SCOPE(objects)
CSeq_entry_Handle GetSeqEntryForSeqdesc(CRef< CScope > scope, const CSeqdesc &seq_desc)
CAlign_CI –.
Definition: align_ci.hpp:63
int GetGenCode(int def=1) const
Definition: BioSource.cpp:73
CBioseq_CI –.
Definition: bioseq_ci.hpp:69
CBioseq_Handle –.
CBioseq_set_Handle –.
This stream exchanges data with an HTTP server located at the URL: http[s]://host[:port]/path[?...
CConstRef –.
Definition: ncbiobj.hpp:1266
void SetMaxReturn(int ret_max)
void SearchHistory(const string &db, const string &term, const string &web_env, Int8 query_key, int retstart, CNcbiOstream &ostr)
Uint8 Search(const string &db, const string &term, vector< objects::CSeq_id_Handle > &uids, const string &xml_path=kEmptyStr)
void Summary(const string &db, const vector< objects::CSeq_id_Handle > &uids, xml::document &docsums, const string &version="")
void Link(const string &db_from, const string &db_to, const vector< objects::CSeq_id_Handle > &uids_from, vector< objects::CSeq_id_Handle > &uids_to, const string &xml_path=kEmptyStr, const string &command="neighbor")
CFeatListItem - basic configuration data for one "feature" type.
string GetDescription() const
CConfigurableItems - a static list of items that can be configured.
CFeat_CI –.
Definition: feat_ci.hpp:64
string GetAccession() const
Retrieve the accession for this assembly.
Definition: GC_Assembly.cpp:99
void Find(const CSeq_id_Handle &id, TSequenceList &sequences) const
Find all references to a given sequence within an assembly.
bool IsRefSeq() const
Is this assembly a RefSeq assembly?
list< CConstRef< CGC_Sequence > > TSequenceList
Definition: GC_Assembly.hpp:67
void GetMolecules(TSequenceList &molecules, ESubset subset) const
Retrieve a subset of molecules.
CRef< objects::CSeq_loc > Map(const objects::CSeq_loc &Loc, const SIdSpec &Spec) const
Definition: id_mapper.cpp:168
static bool isGenCollSequence(const objects::CBioseq_Handle &handle)
static CRef< CGencollSvc > GetInstance(void)
Definition: gencoll_svc.cpp:54
static CRef< objects::CGenomicCollectionsService > GetGenCollService(int timeout_sec=-1)
static CGuiRegistry & GetInstance()
access the application-wide singleton
Definition: registry.cpp:400
CRegistryReadView GetReadView(const string &section) const
get a read-only view at a particular level.
Definition: registry.cpp:428
CIUPACaa –.
Definition: IUPACaa.hpp:66
CIUPACna –.
Definition: IUPACna.hpp:66
CMappedFeat –.
Definition: mapped_feat.hpp:59
static CNcbiApplication * Instance(void)
Singleton method.
Definition: ncbiapp.cpp:264
CNcbiRegistry –.
Definition: ncbireg.hpp:913
CObject –.
Definition: ncbiobj.hpp:180
void AddInterval(const CSeq_interval &ival)
for convenience
@Pubdesc.hpp User-defined methods of the data storage class.
Definition: Pubdesc.hpp:54
CRWLock –.
Definition: ncbimtx.hpp:953
TThisType & CombineWith(const TRange &r)
Definition: range_coll.hpp:195
CRef –.
Definition: ncbiobj.hpp:618
class CRegistryReadView provides a nested hierarchical view at a particular key.
Definition: reg_view.hpp:58
int GetInt(const string &key, int default_val=0) const
access a named key at this level, with no recursion
Definition: reg_view.cpp:230
bool GetBool(const string &key, bool default_val=false) const
Definition: reg_view.cpp:241
list< SKeyInfo > TKeys
retrieve information about all keys in the registry
Definition: reg_view.hpp:68
string GetString(const string &key, const string &default_val=kEmptyStr) const
Definition: reg_view.cpp:246
void GetKeys(TKeys &keys) const
Retrieve information about all keys in this view.
Definition: reg_view.cpp:284
CScope –.
Definition: scope.hpp:92
ESubtype GetSubtype(void) const
static const CFeatList * GetFeatList()
@ eSubtype_transit_peptide
@ eSubtype_bad
These no longer need to match the FEATDEF values in the C toolkit's objfdef.h.
CSeqVector –.
Definition: seq_vector.hpp:65
TDim CheckNumRows(void) const
Validatiors.
Definition: Seq_align.cpp:73
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
Definition: Seq_align.cpp:317
CSeq_annot_Handle –.
CSeq_entry_CI –.
CSeq_entry_Handle –.
CSeq_feat_Handle –.
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
int Compare(const CSeq_feat &f2) const
Compare relative order of this feature and feature f2, ordering first by features' coordinates,...
Definition: Seq_feat.hpp:242
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
Definition: Seq_loc.hpp:453
CSeq_loc_Mapper –.
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:65
static TSeqPos Pack(CSeq_data *in_seq, TSeqPos uLength=ncbi::numeric_limits< TSeqPos >::max())
Base class for all serializable objects.
Definition: serialbase.hpp:150
class CStaticArrayMap<> provides access to a static array in much the same way as CStaticArraySet<>,...
Definition: static_map.hpp:175
TBase::const_iterator const_iterator
Definition: static_map.hpp:179
static CSeq_id_Handle GetSeq_id_Handle(const const_iterator &iter)
Template class for iteration on objects of class C (non-medifiable version)
Definition: iterator.hpp:767
Template class for iteration on objects of class C.
Definition: iterator.hpp:673
bool GetBool(void) const
Definition: User_field.hpp:341
CUser_object & AddField(const string &label, const string &value, EParseField parse=eParse_String)
add a data field to the user object that holds a given value
General IdMapper interface.
Definition: iidmapper.hpp:48
Task clients implement this callback interface.
Definition: utils.hpp:107
const_iterator end() const
Definition: map.hpp:152
const_iterator find(const key_type &key) const
Definition: map.hpp:153
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
void clear()
Definition: set.hpp:153
const_iterator find(const key_type &key) const
Definition: set.hpp:137
const_iterator end() const
Definition: set.hpp:136
The xml::document class is used to hold the XML tree and various bits of information about it.
Definition: document.hpp:80
void swap(document &other)
Swap one xml::document object for another.
Definition: document.cpp:530
const node & get_root_node(void) const
Get a reference to the root node of this document.
Definition: document.cpp:539
The xml::node::const_iterator provides a way to access children nodes similar to a standard C++ conta...
Definition: node.hpp:746
The xml::node_set::const_iterator class is used to iterate over nodes in a node set.
Definition: node_set.hpp:226
The xml::node_set class is used to store xpath query result set.
Definition: node_set.hpp:68
iterator begin()
Get an iterator that points to the beginning of the xpath query result node set.
Definition: node_set.cpp:173
iterator end()
Get an iterator that points one past the last node in the xpath query result node set.
Definition: node_set.cpp:185
The xml::node class is used to hold information about one XML node.
Definition: node.hpp:106
iterator end(void)
Get an iterator that points one past the last child for this node.
Definition: node.hpp:835
iterator find(const char *name, const ns *nspace=NULL)
Find the first child node that has the given name and namespace.
Definition: node.cpp:1258
node_set run_xpath_query(const xpath_expression &expr)
Run the given XPath query.
Definition: node.cpp:1292
const char * get_content(void) const
Get the content for this text node.
Definition: node.cpp:797
string GetSeqIdString(const CSeq_id &id)
Definition: compartp.cpp:100
static uch flags
static unsigned char depth[2 *(256+1+29)+1]
static CS_COMMAND * cmd
Definition: ct_dynamic.c:26
static const char * str(char *buf, int n)
Definition: stats.c:84
int offset
Definition: replacements.h:160
char data[12]
Definition: iconv.c:80
#define INVALID_GI
Definition: ncbimisc.hpp:1089
SStrictId_Entrez::TId TEntrezId
TEntrezId type for entrez ids which require the same strictness as TGi.
Definition: ncbimisc.hpp:1041
#define ENTREZ_ID_TO(T, entrez_id)
Definition: ncbimisc.hpp:1097
const CNcbiRegistry & GetConfig(void) const
Get the application's cached configuration parameters (read-only).
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
int TSignedSeqPos
Type for signed sequence position.
Definition: ncbimisc.hpp:887
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
#define GI_CONST(gi)
Definition: ncbimisc.hpp:1087
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
Definition: ncbimisc.hpp:1508
#define ZERO_GI
Definition: ncbimisc.hpp:1088
#define GI_TO(T, gi)
Definition: ncbimisc.hpp:1085
string
Definition: cgiapp.hpp:687
#define NULL
Definition: ncbistd.hpp:225
unsigned int THTTP_Flags
Bitwise OR of EHTTP_Flag.
@ fHTTP_AutoReconnect
See HTTP_CreateConnectorEx()
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:226
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
const string & GetMsg(void) const
Get message string.
Definition: ncbiexpt.cpp:461
static CConstRef< objects::CSeq_loc > GetFeatLocation(const objects::CSeq_feat &feat, const objects::CBioseq_Handle &bsh)
Definition: utils.cpp:2239
static bool StringToRange(const string &range_str, long &from, long &to)
Convert a range string to a range.
Definition: utils.cpp:1253
virtual void SetTaskName(const string &name)=0
virtual void SetTaskCompleted(int completed)=0
set total finished task number.
static int GetMaxSearchTime(const CRegistryReadView &view)
Definition: utils.cpp:399
static string CreateLinkRow(const string &tag, const string &label, const string &url)
Definition: utils.cpp:1003
static TMappingInfo GetMappingInfoFromLocation(const objects::CSeq_loc &feat_loc, const objects::CSeq_id &product_id, const int feat_offset=0)
Definition: utils.cpp:2199
static TMappingInfo GetRnaMappingInfo(const objects::CSeq_loc &feat_loc, const objects::CMappedFeat &feat, const objects::CBioseq_Handle &handle)
Helper function to generate mapping info between the specified product sequence and genomic sequence ...
Definition: utils.cpp:1900
static void ELinkQuery(const string &db_from, const string &db_to, const TEntrezIds &uids_from, TEntrezIds &uids_to, const string &cmd="neighbor", const string &xpath="//Link/Id/text()")
Queries elink.fcgi with a vector of uids/seq-ids (seq-ids preferred for future compatibility) and ret...
Definition: utils.cpp:1613
int TFeatLinkingMode
Definition: utils.hpp:219
static string GetAlignDbIsQuery(const string &annot)
get a is_source_assembly_query string from AlignDb annotation suffixed after a second '#'
Definition: utils.cpp:877
static bool LinkFeatures(CLinkedFeature::TLinkedFeats &feats, TFeatLinkingMode mode=1, ISeqTaskProgressCallback *p_cb=NULL)
Link features into a hierarchical list.
Definition: utils.cpp:452
static bool IsNAA(const string &annot, bool isStrict=false)
check if a given annotation is a named annotation accession[.version][number] when isSctrict == false...
Definition: utils.cpp:796
static bool IsException(const objects::CSeq_feat &feat)
Definition: utils.cpp:962
static objects::CBioseq * SeqLocToBioseq(objects::CScope &scope, const objects::CSeq_loc &loc)
Construct a bioseq to fit a given location.
Definition: utils.cpp:600
static bool isRmtPipelineFileType(const string &sFileType)
check if a file type is one of remote file types
Definition: utils.cpp:818
static TLocVec GetAccessionPlacements(const objects::CSeq_id &id, objects::CScope &scope, int time_out_sec=1, THTTP_Flags flags=fHTTP_AutoReconnect)
Retrieve mapped-up sequences.
Definition: utils.cpp:1129
static bool isTopLevel(const objects::CSeq_id &seq_id, const string &assm_acc, objects::CScope *scope=NULL)
Definition: utils.cpp:1475
static bool IsPartialStart(const objects::CSeq_loc &loc)
Definition: utils.cpp:937
static objects::SAnnotSelector GetAnnotSelector(TAnnotFlags flags=0)
request an annotation selector for a given type
Definition: utils.cpp:167
list< CRef< CLinkedFeature > > TLinkedFeats
Definition: utils.hpp:80
static bool IsPartialFeature(const objects::CSeq_feat &feat)
Definition: utils.cpp:926
static TLocVec GetGiPlacements(TGi gi, int time_out_sec=5, THTTP_Flags flags=fHTTP_AutoReconnect)
Retrieve mapped-up sequences.
Definition: utils.cpp:1068
static void ESearchQuery(const string &db, const string &term, TEntrezIds &uids, size_t &count, const int ret_max, const string &xpath="//IdList/Id/text()")
Queries esearch.fcgi and returns a vector of uids/seq-ids (seq-ids preferred for future compatibility...
Definition: utils.cpp:1680
vector< CRef< objects::CSeq_loc > > TLocVec
Definition: utils.hpp:123
static TMappingInfo GetCdsMappingInfoFromRna(const TMappingInfo &rna_mapping_info, const objects::CSeq_feat &rna_feat, const objects::CMappedFeat &mapped_cds_feat, const objects::CSeq_loc &feat_loc, objects::CScope &scope, const int feat_offset=0)
Derive the CDS feature mapping information based on its parent RNA feature mapping info.
Definition: utils.cpp:2036
static string GetNcbiBaseUrl()
Definition: utils.cpp:971
static int GetMaxSearchSegments(const CRegistryReadView &view)
Definition: utils.cpp:359
static bool IsAlignDb(const string &annot)
check if a given annotation is AlignDb (potentially suffixed with batch identication string after a '...
Definition: utils.cpp:861
static const string & NameTypeValueToStr(TAnnotNameType type)
Definition: utils.cpp:152
vector< TEntrezId > TEntrezIds
Definition: utils.hpp:125
static void GetAssmAccs_Gi(TAccs &accs, TGi gi)
get all assembly accessions corresponding to a GI
Definition: utils.cpp:1560
static objects::SAnnotSelector::EMaxSearchSegmentsAction GetMaxSearchSegmentsAction(const CRegistryReadView &view)
Definition: utils.cpp:364
virtual objects::CSeq_id_Handle Map(const objects::CSeq_id_Handle &idh) override
Definition: utils.cpp:2792
static bool IsPseudoFeature(const objects::CSeq_feat &feat)
Definition: utils.cpp:910
vector< objects::CSeq_id_Handle > TSeqIdHandles
Definition: utils.hpp:128
static string MakeRmtAnnotName(const string &sSuffix)
create an annotation name for a remote file pipeline, appending sSuffix
Definition: utils.cpp:807
static string GetAnnotName(const objects::CSeq_annot_Handle &annot_handle)
static bool IsPartialStop(const objects::CSeq_loc &loc)
Definition: utils.cpp:943
CGencollIdMapper::SIdSpec m_Spec
Definition: utils.hpp:519
CSeq_entry_Handle GetTopSeqEntryFromScopedObject(SConstScopedObject &obj)
Definition: utils.cpp:2542
static bool GetRangeCollection(const objects::CSeq_id &id, const objects::CHandleRangeMap &map, CRangeCollection< TSeqPos > &ranges)
Definition: utils.cpp:584
static CRef< objects::CSeq_loc > MixLocToLoc(const objects::CSeq_loc &mix_loc, const objects::CBioseq_Handle &handle)
Create a new seq-loc with a unique seq-id from a "mixed" loc.
Definition: utils.cpp:661
static void GetAssmIds_GIChr(TEntrezIds &gc_ids, TGi gi)
get all assembly ids associated with a gi where this gi is a chromosome
Definition: utils.cpp:1364
static CConstRef< objects::CUser_field > GetAnnotUserField(const objects::CSeq_annot &annot, const string &type, const string &label)
Definition: utils.cpp:725
static bool CanHavePlacements(const objects::CSeq_id &seqid)
check that a given seq-id can potentially have placements (to weed out cases like local ids)
Definition: utils.cpp:1088
static const string & GetUnnamedAnnot()
Get the commonly used symbol representing a unnnamed annotation.
Definition: utils.hpp:531
static void GetMappingInfo(const objects::CMappedFeat &mapped_feat, const objects::CBioseq_Handle &bsh, TMappingInfo &info, const string &annot=string())
Definition: utils.cpp:2281
virtual void SetTaskTotal(int total)=0
static string CreateIdStr(const vector< T > &uids)
Convert a list of ids into a comma-delimited string.
Definition: utils.hpp:552
static TLocVec GetLocPlacements(const objects::CSeq_loc &loc, int time_out_sec=1)
Retrieve locations on mapped-up sequences Help method for retrieving upper level sequences for a give...
Definition: utils.cpp:1160
static string GetAnnotComment(const objects::CSeq_annot_Handle &annot_handle)
static string CreateSectionRow(const string &tag)
Definition: utils.cpp:997
virtual void MapObject(CSerialObject &obj) override
Definition: utils.cpp:2810
static bool isQuasiLocal(const objects::CBioseq_Handle &handle)
check that a given accession is either local or unrecognizable this can be important to avoid unneces...
Definition: utils.cpp:1111
static bool IsVDBAccession(const string &acc)
Check if string starts with ("SRA", "SRR", "DRR", "ERR")
Definition: utils.cpp:887
static bool IsSameStrands(const objects::CSeq_loc &loc)
Definition: utils.cpp:948
static bool GetAnnotShown(const objects::CSeq_annot &annot)
Definition: utils.cpp:2385
static CRef< objects::CSeq_loc > CreateSeq_loc(const objects::CSeq_id &id, const CRangeCollection< TSeqPos > &ranges)
Definition: utils.cpp:555
vector< TSeqRange > TRanges
Definition: utils.hpp:479
CRef< CGencollIdMapper > m_Mapper
Definition: utils.hpp:518
static bool IsNAA_Name(const string &annot)
check if a given annotation is a named annotation name with '.' replaced with '_'.
Definition: utils.cpp:801
static bool GetGIString(const string &sid, string *gi_str)
ad-hoc GI extraction (for misformed seq-id strings like: "gi|55823257|ref|YP_141698....
Definition: utils.cpp:2335
static IIdMapper * GetIdMapper(CRef< objects::CGC_Assembly > assm)
Definition: utils.cpp:2826
static TAnnotNameType NameTypeStrToValue(const string &type)
Definition: utils.cpp:140
static bool IsExtendedNAA(const string &annot, bool isStrict=false)
check if a given annotation is an extended NAA (named accession[.version][number],...
Definition: utils.cpp:850
static int GetGenCode(const objects::CBioseq_Handle &handle)
Returns Bioseq's Genetic Code.
Definition: utils.cpp:2358
static string GetChrGI(TGi gi)
try to get a chromosome from a GI
Definition: utils.cpp:1410
static bool Match(const objects::CSeq_id &id1, const objects::CSeq_id &id2, objects::CScope *scope=NULL)
check to see if two seq-ids are identical.
static void SetAnnot(objects::SAnnotSelector &sel, const string &annot)
help function for setting up an annotation.
Definition: utils.cpp:320
static string CreateTableRow(const string &tag="", const string &value="")
Definition: utils.cpp:990
static CRef< objects::CSeq_loc > RemapChildToParent(const objects::CSeq_loc &parent, const objects::CSeq_loc &child, objects::CScope *scope=NULL)
remap a location to a parent location.
Definition: utils.cpp:498
static string CreateTableStart()
help methods for creating HTML text
Definition: utils.cpp:978
static TLocVec GetAccessionPlacementsMsec(const objects::CSeq_id &id, objects::CScope &scope, unsigned long time_out_msec=1000, THTTP_Flags flags=fHTTP_AutoReconnect)
same, with a timeout in milliseconds
Definition: utils.cpp:1134
static CRegistryReadView GetSelectorRegistry()
helper functions to read selector-related tune-up info (mostly segment limits) from registry:
Definition: utils.cpp:353
CGencollIdMapperAdapter(CRef< CGencollIdMapper > mapper, const CGencollIdMapper::SIdSpec &spec)
-------------------------------- CGencollIdMapperAdapter ——————————–
Definition: utils.cpp:2785
virtual bool StopRequested() const =0
static bool isRmtAnnotName(const string &sAnnotname)
check if a given annotation was created by a remote file pipeline
Definition: utils.cpp:813
EAnnotationNameType
flags for classifying annotation names.
Definition: utils.hpp:132
static CRef< objects::CSeq_id > MapStringId(const string &str, objects::IIdMapper *mapper)
Definition: utils.cpp:2394
static objects::CMappedFeat GetMrnaForCds(const objects::CMappedFeat &cds_feat, const string &named_acc=string())
Definition: utils.cpp:2254
static std::string GetXmlChildNodeValue(const xml::node &parent, const std::string &name)
Returns the contents of the specified child node.
Definition: utils.cpp:1727
static string GetChrId(const string &id_str, objects::CScope &scope)
same as GetChrGI(), but takes an string with id (that must correspond to some GI)
Definition: utils.cpp:1543
static void GetAssmIds_GI(TEntrezIds &gc_ids, TGi gi)
get all assembly ids associated with a gi.
Definition: utils.cpp:1745
static bool IsExtendedNAA_Name(const string &annot)
check if a given annotation is an extended NAA name with '.' replaced with '_'.
Definition: utils.cpp:855
static void ParseRanges(const string &sRanges, TRanges &ranges)
Definition: utils.cpp:2755
vector< TMappedInt > TMappingInfo
Definition: utils.hpp:165
static string CreateTableEnd()
Definition: utils.cpp:984
static void SetResolveDepth(objects::SAnnotSelector &sel, bool adaptive, int depth=-1)
help function for setting selector resolve depth.
Definition: utils.cpp:405
static void SetAnnotShown(objects::CSeq_annot &annot, bool isShown)
Definition: utils.cpp:2376
static bool CheckMaxSearchSegments(int actual, int max, objects::SAnnotSelector::EMaxSearchSegmentsAction action)
check actual number of segments against max and perform the action if the actual number is more than ...
Definition: utils.cpp:378
virtual void AddTaskCompleted(int delta)=0
set to add newly finished task number.
int TAnnotFlags
Definition: utils.hpp:160
static string GetAlignDbBatch(const string &annot)
get a batch string from AlignDb annotation suffixed with batch identication string after a '#'
Definition: utils.cpp:867
@ eAnnot_Unnamed
unnamed annotation
Definition: utils.hpp:133
@ eAnnot_All
all annotations
Definition: utils.hpp:135
@ eAnnot_Named
all named annotations
Definition: utils.hpp:134
@ eAnnot_Other
any given named annots
Definition: utils.hpp:136
@ fAnnot_UnsetNamed
Definition: utils.hpp:155
@ fAnnot_UnsetDepth
Definition: utils.hpp:158
CRef< objects::CScope > scope
Definition: objects.hpp:53
CConstRef< CObject > object
Definition: objects.hpp:52
@ eUnknown
Definition: app_popup.hpp:72
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
@ eSerial_AsnText
ASN.1 text.
Definition: serialdef.hpp:73
const string AsFastaString(void) const
Definition: Seq_id.cpp:2266
static EAccessionInfo IdentifyAccession(const CTempString &accession, TParseFlags flags=fParse_AnyRaw)
Deduces information from a bare accession a la WHICH_db_accession; may report false negatives on prop...
Definition: Seq_id.cpp:1634
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
Definition: Seq_id.cpp:318
static SIZE_TYPE ParseIDs(CBioseq::TId &ids, const CTempString &s, TParseFlags flags=fParse_Default)
Parse a string representing one or more Seq-ids, appending the results to IDS.
Definition: Seq_id.cpp:2613
EAccessionInfo
For IdentifyAccession (below)
Definition: Seq_id.hpp:220
bool IsGi(void) const
bool MatchesTo(const CSeq_id_Handle &h) const
True if *this matches to h.
bool Match(const CSeq_id &sid2) const
Match() - TRUE if SeqIds are equivalent.
Definition: Seq_id.hpp:1065
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
TGi GetGi(void) const
@ eAcc_type_mask
Definition: Seq_id.hpp:247
@ eAcc_refseq_contig
Definition: Seq_id.hpp:420
@ eAcc_refseq_mrna_predicted
Definition: Seq_id.hpp:439
@ eAcc_refseq_unreserved
Definition: Seq_id.hpp:418
@ eAcc_refseq_mrna
Definition: Seq_id.hpp:415
@ eAcc_refseq_prot
Definition: Seq_id.hpp:414
@ eAcc_refseq_wgs_nuc
Definition: Seq_id.hpp:421
@ eAcc_refseq_wgs_prot
Definition: Seq_id.hpp:422
@ eAcc_refseq_ncrna
Definition: Seq_id.hpp:416
@ eAcc_refseq_chromosome
Definition: Seq_id.hpp:429
@ eAcc_refseq_genomic
Definition: Seq_id.hpp:430
@ eAcc_refseq_prot_predicted
Definition: Seq_id.hpp:438
@ eAcc_refseq_ncrna_predicted
Definition: Seq_id.hpp:440
@ eAcc_refseq_wgs_intermed
Definition: Seq_id.hpp:431
@ eAcc_refseq_genome
Definition: Seq_id.hpp:419
void SetPacked_int(TPacked_int &v)
Definition: Seq_loc.hpp:984
void SetWhole(TWhole &v)
Definition: Seq_loc.hpp:982
bool IsPartialStart(ESeqLocExtremes ext) const
check start or stop of location for e_Lim fuzz
Definition: Seq_loc.cpp:3222
TRange GetTotalRange(void) const
Definition: Seq_loc.hpp:913
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
Definition: Seq_loc.cpp:915
CRef< CSeq_loc > Merge(TOpFlags flags, ISynonymMapper *syn_mapper) const
All functions create and return a new seq-loc object.
Definition: Seq_loc.cpp:5037
bool IsSetStrand(void) const
Get strand.
Definition: Seq_loc.hpp:1049
void SetInt(TInt &v)
Definition: Seq_loc.hpp:983
int CompareSubLoc(const CSeq_loc &loc, ENa_strand strand, const ISubLocFilter *filter=NULL) const
Compare first-level sub-locations sequentially to order them by biological "complexity".
Definition: Seq_loc.cpp:805
bool IsEmpty(void) const
True if the current location is empty.
Definition: Seq_loc.hpp:1084
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
Definition: Seq_loc.hpp:941
size_t GetSize(void) const
Get number of ranges.
Definition: Seq_loc.cpp:2636
TRange GetRange(void) const
Get the range.
Definition: Seq_loc.hpp:1042
ENa_strand GetStrand(void) const
Definition: Seq_loc.hpp:1056
const CSeq_id & GetSeq_id(void) const
Get seq_id of the current location.
Definition: Seq_loc.hpp:1028
void GetLabel(string *label) const
Appends a label suitable for display (e.g., error messages) label must point to an existing string ob...
Definition: Seq_loc.cpp:3467
bool IsPartialStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:3251
TSeqPos GetStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:963
@ fMerge_AbuttingOnly
Definition: Seq_loc.hpp:327
static CObjectIStream * Open(ESerialDataFormat format, CNcbiIstream &inStream, bool deleteInStream)
Create serial object reader and attach it to an input stream.
Definition: objistr.cpp:195
CMappedFeat GetBestMrnaForCds(const CMappedFeat &cds_feat, CFeatTree *feat_tree=0, const SAnnotSelector *base_sel=0)
Definition: feature.cpp:3341
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
ENa_strand GetStrand(const CSeq_loc &loc, CScope *scope=0)
Returns eNa_strand_unknown if multiple Bioseqs in loc Returns eNa_strand_other if multiple strands in...
CConstRef< CSeq_loc > m_ParentLoc
Definition: sequence.hpp:1148
CRef< CSeq_loc > Resolve(CScope *scope=0, TFlags flags=0) const
Definition: sequence.hpp:1143
TGi GetGiForId(const objects::CSeq_id &id, CScope &scope, EGetIdType flags=0)
Given a Seq-id retrieve the corresponding GI.
Definition: sequence.cpp:668
@ eGetId_ForceGi
return only a gi-based seq-id
Definition: sequence.hpp:99
CRef< CSeq_loc > Map(const CSeq_loc &src_loc)
Map seq-loc.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
void GetAllTSEs(TTSE_Handles &tses, enum ETSEKind kind=eManualTSEs)
Definition: scope.cpp:295
CConstRef< CSynonymsSet > GetSynonyms(const CSeq_id &id)
Get bioseq synonyms, resolving to the bioseq in this scope.
Definition: scope.cpp:486
vector< CSeq_entry_Handle > TTSE_Handles
Definition: scope.hpp:645
@ eAllTSEs
Definition: scope.hpp:643
@ eProductToLocation
Map from the feature's product to location.
@ eLocationToProduct
Map from the feature's location to product.
@ eMissing_Null
Definition: scope.hpp:157
bool IsNamed(void) const
CConstRef< CBioseq > GetCompleteBioseq(void) const
Get the complete bioseq.
const CSeq_annot_Handle & GetAnnot(void) const
Get handle to seq-annot for this feature.
CConstRef< CSeq_annot > GetCompleteSeq_annot(void) const
Complete and return const reference to the current seq-annot.
bool IsSetProduct(void) const
CConstRef< CSeq_id > GetSeqId(void) const
Get id which can be used to access this bioseq handle Throws an exception if none is available.
TSeq GetSeq(void) const
CSeq_entry_Handle GetParentEntry(void) const
Get parent Seq-entry handle.
CSeq_entry_Handle GetTopLevelEntry(void) const
Get top level Seq-entry handle.
CScope & GetScope(void) const
Get scope this handle belongs to.
const string & GetName(void) const
CRef< CSeq_loc > GetRangeSeq_loc(TSeqPos start, TSeqPos stop, ENa_strand strand=eNa_strand_unknown) const
Return CSeq_loc referencing the given range and strand on the bioseq If start == 0,...
CSeq_entry_Handle GetTopLevelEntry(void) const
Return a handle for the top-level seq-entry.
CSeq_entry_Handle GetTopLevelEntry(void) const
Get top level Seq-entry handle.
bool IsSynonym(const CSeq_id &id) const
Check if this id can be used to obtain this bioseq handle.
bool IsSeq(void) const
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
SAnnotSelector & IncludeFeatSubtype(TFeatSubtype subtype)
Include feature subtype in the search.
SAnnotSelector & SetFeatType(TFeatType type)
Set feature type (also set annotation type to feat)
SAnnotSelector & SetExactDepth(bool value=true)
SetExactDepth() specifies that annotations will be searched on the segment level specified by SetReso...
SAnnotSelector & SetResolveAll(void)
SetResolveAll() is equivalent to SetResolveMethod(eResolve_All).
SAnnotSelector & SetOverlapTotalRange(void)
Check overlapping only of total ranges.
const CSeq_loc & GetLocation(void) const
const CSeq_feat & GetOriginalFeature(void) const
Get original feature with unmapped location/product.
SAnnotSelector & SetAdaptiveDepth(bool value=true)
SetAdaptiveDepth() requests to restrict subsegment resolution depending on annotations found on lower...
SAnnotSelector & SetResolveDepth(int depth)
SetResolveDepth sets the limit of subsegment resolution in searching annotations.
SAnnotSelector & IncludeNamedAnnotAccession(const string &acc, int zoom_level=0)
SAnnotSelector & SetExcludeExternal(bool exclude=true)
External annotations for the Object Manger are annotations located in top level Seq-entry different f...
SAnnotSelector & SetCollectNames(bool value=true)
Collect available annot names rather than annots.
const CSeq_feat & GetMappedFeature(void) const
Feature mapped to the master sequence.
const CSeq_loc & GetProduct(void) const
SAnnotSelector & SetAnnotType(TAnnotType type)
Set annotation type (feat, align, graph)
SAnnotSelector & AddNamedAnnots(const CAnnotName &name)
Add named annot to set of annots names to look for.
SAnnotSelector & SetFeatSubtype(TFeatSubtype subtype)
Set feature subtype (also set annotation and feat type)
SAnnotSelector & ExcludeNamedAnnots(const CAnnotName &name)
Add named annot to set of annots names to exclude.
SAnnotSelector & ExcludeUnnamedAnnots(void)
Add unnamed annots to set of annots names to exclude.
SAnnotSelector & AddUnnamedAnnots(void)
Add unnamed annots to set of annots names to look for.
@ fIncludeGivenEntry
Include the top (given) entry.
@ fRecursive
Iterate recursively.
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
Definition: seq_vector.cpp:304
TSeqPos size(void) const
Definition: seq_vector.hpp:291
bool IsProtein(void) const
Definition: seq_vector.hpp:350
TObjectType * GetPointer(void) const THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:1684
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
Definition: ncbiobj.hpp:1401
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
Definition: ncbiobj.hpp:2015
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:1439
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
Definition: ncbiobj.hpp:735
TObjectType * Release(void)
Release a reference to the object and return a pointer to the object.
Definition: ncbiobj.hpp:846
position_type GetLength(void) const
Definition: range.hpp:158
TThisType & Set(position_type from, position_type to)
Definition: range.hpp:188
bool Empty(void) const
Definition: range.hpp:148
CRange< TSeqPos > TSeqRange
typedefs for sequence ranges
Definition: range.hpp:419
CRange< TSignedSeqPos > TSignedSeqRange
Definition: range.hpp:420
static TThisType GetWhole(void)
Definition: range.hpp:272
virtual string GetString(const string &section, const string &name, const string &default_value, TFlags flags=0) const
Get the parameter string value.
Definition: ncbireg.cpp:321
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
NCBI_NS_STD::string::size_type SIZE_TYPE
Definition: ncbistr.hpp:132
#define kEmptyStr
Definition: ncbistr.hpp:123
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3461
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
Definition: ncbistr.cpp:2993
#define NPOS
Definition: ncbistr.hpp:133
static size_t StringToSizet(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to size_t.
Definition: ncbistr.cpp:1769
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084
static int Compare(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Compare of a substring with another string.
Definition: ncbistr.hpp:5297
static long StringToLong(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to long.
Definition: ncbistr.cpp:653
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5412
static unsigned int StringToUInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to unsigned int.
Definition: ncbistr.cpp:642
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string.
Definition: ncbistr.cpp:3186
@ eNocase
Case insensitive compare.
Definition: ncbistr.hpp:1206
unsigned int usec
microseconds (modulo 1,000,000)
Definition: ncbi_types.h:78
STimeout * NcbiMsToTimeout(STimeout *timeout, unsigned long ms)
Definition: ncbi_types.c:48
unsigned int sec
seconds
Definition: ncbi_types.h:77
static const char label[]
void SetFrom(TFrom value)
Assign a value to From data member.
Definition: Range_.hpp:231
TTo GetTo(void) const
Get the To member data.
Definition: Range_.hpp:269
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
void SetTo(TTo value)
Assign a value to To data member.
Definition: Range_.hpp:278
const TData & GetData(void) const
Get the Data member data.
void SetType(TType &value)
Assign a value to Type data member.
bool IsBool(void) const
Check if variant Bool is selected.
@ eGC_SequenceRole_top_level
const TStd & GetStd(void) const
Get the variant data.
Definition: Seq_align_.hpp:752
bool IsStd(void) const
Check if variant Std is selected.
Definition: Seq_align_.hpp:746
bool IsSpliced(void) const
Check if variant Spliced is selected.
Definition: Seq_align_.hpp:778
const TSegs & GetSegs(void) const
Get the Segs member data.
Definition: Seq_align_.hpp:921
E_Choice Which(void) const
Which variant is currently selected.
bool IsSetPartial(void) const
incomplete in some way? Check if a value has been assigned to Partial data member.
Definition: Seq_feat_.hpp:943
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
E_Choice
Choice variants.
TFrame GetFrame(void) const
Get the Frame member data.
Definition: Cdregion_.hpp:534
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
bool IsSetExcept(void) const
something funny about this? Check if a value has been assigned to Except data member.
Definition: Seq_feat_.hpp:990
const TCdregion & GetCdregion(void) const
Get the variant data.
TPseudo GetPseudo(void) const
Get the Pseudo member data.
Definition: Seq_feat_.hpp:1365
const TProduct & GetProduct(void) const
Get the Product member data.
Definition: Seq_feat_.hpp:1096
bool IsSetPseudo(void) const
annotated on pseudogene? Check if a value has been assigned to Pseudo data member.
Definition: Seq_feat_.hpp:1346
TPartial GetPartial(void) const
Get the Partial member data.
Definition: Seq_feat_.hpp:962
TExcept GetExcept(void) const
Get the Except member data.
Definition: Seq_feat_.hpp:1009
bool CanGetProduct(void) const
Check if it is safe to call GetProduct method.
Definition: Seq_feat_.hpp:1090
bool IsSetProduct(void) const
product of process Check if a value has been assigned to Product data member.
Definition: Seq_feat_.hpp:1084
bool IsRna(void) const
Check if variant Rna is selected.
bool IsSetFrame(void) const
Check if a value has been assigned to Frame data member.
Definition: Cdregion_.hpp:509
@ e_Pub
publication applies to this seq
void SetTo(TTo value)
Assign a value to To data member.
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
const Tdata & Get(void) const
Get the member data.
Tdata & Set(void)
Assign a value to data member.
void SetId(TId &value)
Assign a value to Id data member.
void SetFrom(TFrom value)
Assign a value to From data member.
TGi GetGi(void) const
Get the variant data.
Definition: Seq_id_.hpp:889
bool IsGi(void) const
Check if variant Gi is selected.
Definition: Seq_id_.hpp:883
bool IsWhole(void) const
Check if variant Whole is selected.
Definition: Seq_loc_.hpp:522
void SetStrand(TStrand value)
Assign a value to Strand data member.
const TPacked_int & GetPacked_int(void) const
Get the variant data.
Definition: Seq_loc_.cpp:216
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
@ e_Gi
GenInfo Integrated Database.
Definition: Seq_id_.hpp:106
@ e_Local
local use
Definition: Seq_id_.hpp:95
list< CRef< CSeqdesc > > Tdata
Definition: Seq_descr_.hpp:91
const Tdata & Get(void) const
Get the member data.
TId & SetId(void)
Assign a value to Id data member.
Definition: Bioseq_.hpp:296
TTitle & SetTitle(void)
Select the variant.
Definition: Seqdesc_.hpp:1039
const TSource & GetSource(void) const
Get the variant data.
Definition: Seqdesc_.cpp:566
const Tdata & Get(void) const
Get the member data.
Definition: Seq_descr_.hpp:166
const TDesc & GetDesc(void) const
Get the Desc member data.
Definition: Seq_annot_.hpp:852
list< CRef< CSeq_id > > TId
Definition: Bioseq_.hpp:94
bool IsSetDesc(void) const
used only for stand alone Seq-annots Check if a value has been assigned to Desc data member.
Definition: Seq_annot_.hpp:840
void SetInst(TInst &value)
Assign a value to Inst data member.
Definition: Bioseq_.cpp:86
void SetDescr(TDescr &value)
Assign a value to Descr data member.
Definition: Bioseq_.cpp:65
const TPub & GetPub(void) const
Get the Pub member data.
Definition: Pubdesc_.hpp:605
const TDescr & GetDescr(void) const
Get the Descr member data.
Definition: Bioseq_.hpp:315
@ eRepr_raw
continuous sequence
Definition: Seq_inst_.hpp:94
@ e_Source
source of materials, includes Org-ref
Definition: Seqdesc_.hpp:133
@ eMol_na
just a nucleic acid
Definition: Seq_inst_.hpp:113
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
SStaticPair< const char *, CSeqUtils::TAnnotNameType > TNameTypeStr
Definition: utils.cpp:128
static bool s_CompareDescriptions(const CFeatListItem *p1, const CFeatListItem *p2)
Definition: utils.cpp:2617
static void s_CreateMappingInfo(const CSeq_loc &prod_loc, const CSeq_loc &gen_loc, CSeqUtils::TMappingInfo &info)
Helper function to convert two mapped locations into an interval-to-interval mapping structure.
Definition: utils.cpp:1851
static TMappedLocs s_GetRnaMappingLocs(const CSeq_loc &feat_loc, const CMappedFeat &feat, const CBioseq_Handle &handle)
Definition: utils.cpp:1770
static const int kRetMax
Definition: utils.cpp:1361
static bool s_IsNAA(const string &annot, char div)
Definition: utils.cpp:764
static void s_AdjustToAnnotatedCDS(const CMappedFeat &mapped_cds_feat, CScope &scope, CSeqUtils::TMappingInfo &cds_map_info)
Definition: utils.cpp:1915
map< string, bool > TTopLevels
Definition: utils.cpp:89
DEFINE_STATIC_ARRAY_MAP(TNameTypeMap, sm_NameTypeMap, s_NameTypeStrs)
CBioseq_Handle GetBioseqForSeqFeat(const CSeq_feat &f, CScope &scope)
Definition: utils.cpp:2520
static void s_ESearchQuery(const string &db, const string &term, vector< T > &uids, size_t &count, const int ret_max, const string &xpath)
Definition: utils.cpp:1652
static void s_ELinkQuery(const string &db_from, const string &db_to, const vector< T1 > &uids_from, vector< T2 > &uids_to, const string &cmd, const string &xpath)
Definition: utils.cpp:1598
static const TAssemblySeqIds & s_GetAssemblySeqIds(const string &assm_acc)
Definition: utils.cpp:1439
CConstRef< CBioseq > GetBioseqForSeqdesc(CRef< CScope > scope, const CSeqdesc &seq_desc)
Definition: utils.cpp:2426
static CSeqUtils::TLocVec s_GetAlnMapplingLocs(const CSeq_align_set &align_set, TGi gi)
Definition: utils.cpp:1011
static CRWLock m_AssemblySeqIdLock
Definition: utils.cpp:93
CSeq_feat_Handle GetSeqFeatHandleForBadLocFeature(const CSeq_feat &feat, CScope &scope)
Definition: utils.cpp:2502
CStaticArrayMap< string, CSeqUtils::TAnnotNameType > TNameTypeMap
Definition: utils.cpp:136
static const string kTaxDb
Definition: utils.cpp:1358
static bool s_IsExtendedNAA(const string &sAnnotName, char div, bool isStrict)
Definition: utils.cpp:824
static map< string, TAssemblySeqIds > s_AssemblySeqIdCache
Definition: utils.cpp:95
static const string kAssmDb
Definition: utils.cpp:1360
static CRWLock m_TopLevelsLock
Definition: utils.cpp:91
static const string & GetLinksURL()
Definition: utils.cpp:1058
CSeq_entry_Handle GetDefaultTopLevelSeqEntry(CScope &scope)
Definition: utils.cpp:2489
static const string kNucDb
Definition: utils.cpp:1359
vector< const CFeatListItem * > GetSortedFeatList(CSeq_entry_Handle seh, size_t max)
Definition: utils.cpp:2660
CSeq_entry_Handle GetSeqEntryForPubdesc(CRef< CScope > scope, const CPubdesc &pubdesc)
Definition: utils.cpp:2453
static TTopLevels m_TopLevels
Definition: utils.cpp:90
vector< CRef< CSeq_loc > > TMappedLocs
For CDS and RNA feature mapping information.
Definition: utils.cpp:1768
static const char * kLinksUrlDefault
Definition: utils.cpp:1055
vector< CConstRef< CSeq_id > > TAssemblySeqIds
Definition: utils.cpp:94
static const TNameTypeStr s_NameTypeStrs[]
Definition: utils.cpp:129
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is smart and slim</td> n<td> orig</td> n</tr> n<tr> n<td> last_modified</td> n<td> optional</td> n<td> Integer</td> n<td class=\"description\"> The blob last modification If provided then the exact match will be requested with n the Cassandra storage corresponding field value</td> n<td> Positive integer Not provided means that the most recent match will be selected</td> n<td></td> n</tr> n<tr> n<td> use_cache</td> n<td> optional</td> n<td> String</td> n<td class=\"description\"> The option controls if the Cassandra LMDB cache and or database should be used It n affects the seq id resolution step and the blob properties lookup step The following n options are BIOSEQ_INFO and BLOB_PROP at all
int i
int len
Lightweight interface for getting lines of data with minimal memory copying.
static const CS_INT unused
Definition: long_binary.c:20
static MDB_envinfo info
Definition: mdb_load.c:37
range(_Ty, _Ty) -> range< _Ty >
constexpr auto sort(_Init &&init)
constexpr bool empty(list< Ts... >) noexcept
mdb_mode_t mode
Definition: lmdb++.h:38
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
XML library namespace.
Definition: attributes.hpp:57
static const BitmapCharRec ch1
Definition: ncbi_10x20.c:1827
static const BitmapCharRec ch2
Definition: ncbi_10x20.c:1819
const char * tag
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
int isdigit(Uchar c)
Definition: ncbictype.hpp:64
T max(T x_, T y_)
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
static int match(register const pcre_uchar *eptr, register const pcre_uchar *ecode, const pcre_uchar *mstart, int offset_top, match_data *md, eptrblock *eptrb, unsigned int rdepth)
Definition: pcre_exec.c:513
#define fi
#define row(bind, expected)
Definition: string_bind.c:73
SAnnotSelector –.
CSeq_annot::C_Data::E_Choice TAnnotType
bool operator()(const CMappedFeat &feat0, const CMappedFeat &feat1) const
Definition: utils.cpp:102
bool operator()(const CMappedFeat &feat0, const CMappedFeat &feat1) const
Definition: utils.cpp:117
bool operator()(const pair< T, U > &p1, const pair< T, U > &p2) const
Definition: utils.cpp:445
Location relative to a base Seq-loc: one (usually) or more ranges of offsets.
Definition: sequence.hpp:1124
Template structure SStaticPair is simlified replacement of STL pair<> Main reason of introducing this...
Definition: static_set.hpp:60
Timeout structure.
Definition: ncbi_types.h:76
Definition: type.c:6
done
Definition: token1.c:1
Modified on Tue Apr 23 07:37:07 2024 by modify_doxy.py rev. 669887